├── bradata ├── tse │ ├── eleitorado.py │ ├── resultados.py │ ├── prestacao_de_contas.py │ ├── url_relation.yaml │ ├── __init__.py │ ├── utils_tse.py │ ├── header_relation.yaml │ ├── candidatos.py │ └── headersTSE.csv ├── cgu │ ├── __init__.py │ ├── ceis_pedido_LAI.txt │ └── cgu.py ├── __init__.py ├── agencias │ ├── __init__.py │ └── infraero.py ├── connection.py └── utils.py ├── docs ├── _static │ └── .gitignore ├── authors.rst ├── changes.rst ├── contributing.rst ├── license.rst ├── bradata.cgu.rst ├── bradata.agencias.rst ├── bradata.tse.rst ├── bradata.rst ├── index.rst ├── beginner-tutorial.rst ├── Makefile └── conf.py ├── requirements.txt ├── AUTHORS.rst ├── CHANGES.rst ├── test-requirements.txt ├── tests ├── conftest.py └── test_skeleton.py ├── .gitignore ├── menu-de-dados-README ├── .coveragerc ├── setup.py ├── LICENSE.txt ├── README.rst ├── setup.cfg ├── CONTRIBUTING.rst └── menu-de-dados.csv /bradata/tse/eleitorado.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bradata/tse/resultados.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bradata/tse/prestacao_de_contas.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_static/.gitignore: -------------------------------------------------------------------------------- 1 | # Empty directory 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | bs4 3 | pandas 4 | tqdm 5 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. _authors: 2 | .. include:: ../AUTHORS.rst 3 | -------------------------------------------------------------------------------- /docs/changes.rst: -------------------------------------------------------------------------------- 1 | .. _changes: 2 | .. include:: ../CHANGES.rst 3 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. _contributing: 2 | .. include:: ../CONTRIBUTING.rst -------------------------------------------------------------------------------- /bradata/cgu/__init__.py: -------------------------------------------------------------------------------- 1 | from bradata.cgu.cgu import get_ceis, get_cepim, get_cnep, get_ceaf, get_diarias 2 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | .. _license: 2 | 3 | ======= 4 | License 5 | ======= 6 | 7 | .. include:: ../LICENSE.txt 8 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | .. _bradata-authors: 2 | 3 | Developers 4 | ########## 5 | 6 | - odanoburu 7 | 8 | - Joao Carabetta 9 | 10 | - 11 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | .. _bradata-changes: 2 | 3 | Changelog 4 | ######### 5 | 6 | Version 0.1 7 | =========== 8 | 9 | - add TSE module 10 | - candidatos 11 | - bens dos candidatos 12 | - add CGU module 13 | - CEIS 14 | - CEPIM 15 | - CNEP 16 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | # Add requirements only needed for your unittests and during development here. 2 | # They will be installed automatically when running `python setup.py test`. 3 | # ATTENTION: Don't remove pytest-cov and pytest as they are needed. 4 | pytest-cov 5 | pytest 6 | luigi -------------------------------------------------------------------------------- /bradata/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import bradata.agencias 3 | import bradata.cgu 4 | import bradata.tse 5 | from pkg_resources import get_distribution as _get_distribution 6 | 7 | try: 8 | __version__ = _get_distribution(__name__).version 9 | except: 10 | __version__ = 'unknown' 11 | 12 | 13 | __download_dir__ = bradata.utils._set_download_directory() 14 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Dummy conftest.py for bradata. 5 | 6 | If you don't know what this is for, just leave it empty. 7 | Read more about conftest.py under: 8 | https://pytest.org/latest/plugins.html 9 | """ 10 | from __future__ import print_function, absolute_import, division 11 | 12 | import pytest 13 | -------------------------------------------------------------------------------- /bradata/tse/url_relation.yaml: -------------------------------------------------------------------------------- 1 | candidatos: http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_cand/consulta_cand_ 2 | 3 | bens: http://agencia.tse.jus.br/estatistica/sead/odsele/bem_candidato/bem_candidato_ 4 | 5 | legendas: http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_legendas/consulta_legendas_ 6 | 7 | vagas: http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_vagas/consulta_vagas_ 8 | -------------------------------------------------------------------------------- /tests/test_skeleton.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import pytest 5 | from bradata.skeleton import fib 6 | 7 | __author__ = "odanoburu" 8 | __copyright__ = "odanoburu" 9 | __license__ = "none" 10 | 11 | 12 | def test_fib(): 13 | assert fib(1) == 1 14 | assert fib(2) == 1 15 | assert fib(7) == 13 16 | with pytest.raises(AssertionError): 17 | fib(-10) 18 | -------------------------------------------------------------------------------- /bradata/cgu/ceis_pedido_LAI.txt: -------------------------------------------------------------------------------- 1 | ---- pedido de acesso à informação enviado pelo e-SIC em 2017-04-03 por odanoburu. 2 | Olá, na página http://www.portaltransparencia.gov.br/downloads/snapshot.asp?c=CEIS só consigo obter o cadastro de empresas inidôneas e suspensas do dia atual. gostaria, entretanto, de ter uma cópia do CEIS desde o início da série temporal. 3 | obrigado. 4 | atenciosamente, 5 | ---- protocolo: 00075.000421/2017-13 -------------------------------------------------------------------------------- /bradata/agencias/__init__.py: -------------------------------------------------------------------------------- 1 | import bradata.agencias.infraero 2 | """ 3 | Gets content from infraero website. It provides a mapping to content types. 4 | You can initialize this submodule by: 5 | import bradata.agencias.infraero as infraero 6 | infraero 7 | or 8 | import bradata 9 | bradata.agencias.infraero. 10 | and you'll be ready to use the API on your Python project. 11 | """ -------------------------------------------------------------------------------- /docs/bradata.cgu.rst: -------------------------------------------------------------------------------- 1 | bradata.cgu package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | bradata.cgu.cgu module 8 | ---------------------- 9 | 10 | .. automodule:: bradata.cgu.cgu 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: bradata.cgu 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/bradata.agencias.rst: -------------------------------------------------------------------------------- 1 | bradata.agencias package 2 | ======================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | bradata.agencias.infraero module 8 | -------------------------------- 9 | 10 | .. automodule:: bradata.agencias.infraero 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: bradata.agencias 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /bradata/tse/__init__.py: -------------------------------------------------------------------------------- 1 | from .candidatos import Candidatos 2 | 3 | 4 | class Tse(object): 5 | """ 6 | Gets content from infraero website. It provides a mapping to content types. 7 | :class:`~bradata.infraero.statistics` 8 | This is the preferred (and only supported) way to get access to those classes and their 9 | methods. 10 | You can initialize your connection class by: 11 | 12 | camara = bradata.Infraero() 13 | 14 | and you'll be ready to use the API on your Python projetct. 15 | """ 16 | 17 | def __init__(self): 18 | 19 | self.candidatos = Candidatos() -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Temporary and binary files 2 | *~ 3 | *.py[cod] 4 | *.so 5 | *.cfg 6 | !setup.cfg 7 | *.orig 8 | *.log 9 | *.pot 10 | __pycache__/* 11 | .cache/* 12 | .*.swp 13 | .ipynb_checkpoints/ 14 | 15 | # Project files 16 | .ropeproject 17 | .project 18 | .pydevproject 19 | .settings 20 | .idea 21 | 22 | # Package files 23 | *.egg 24 | *.eggs/ 25 | .installed.cfg 26 | *.egg-info 27 | 28 | # Unittest and coverage 29 | htmlcov/* 30 | .coverage 31 | .tox 32 | junit.xml 33 | coverage.xml 34 | 35 | # Build and docs folder/files 36 | build/* 37 | dist/* 38 | sdist/* 39 | docs/api/* 40 | docs/_build/* 41 | cover/* 42 | MANIFEST 43 | -------------------------------------------------------------------------------- /menu-de-dados-README: -------------------------------------------------------------------------------- 1 | first version forked from: https://github.com/dadosgovbr/catalogos-dados-brasil/, under MIT License 2 | 3 | check for ideas: 4 | 5 | * https://www.quandl.com/search?query=brazil 6 | 7 | * https://github.com/Webschool-io/Politicos-brasileiros 8 | 9 | * http://politicos.olhoneles.org/ 10 | 11 | * https://pypi.python.org/pypi/brasil.gov.agenda/1.0.1 12 | 13 | * github.com/ajdamico/adsfree 14 | 15 | * github.com/lucasmation/microdadosBrasil 16 | 17 | * github.com/paulozip/acidentes-aereos 18 | 19 | * https://github.com/dadosgovbr/scripts-dadosgovbr 20 | 21 | * https://github.com/georgeyk/dadosgovbr 22 | 23 | * -------------------------------------------------------------------------------- /docs/bradata.tse.rst: -------------------------------------------------------------------------------- 1 | bradata.tse package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | bradata.tse.candidatos module 8 | ----------------------------- 9 | 10 | .. automodule:: bradata.tse.candidatos 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | bradata.tse.utils_tse module 16 | ---------------------------- 17 | 18 | .. automodule:: bradata.tse.utils_tse 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: bradata.tse 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | [run] 3 | branch = True 4 | source = bradata 5 | # omit = bad_file.py 6 | 7 | [report] 8 | # Regexes for lines to exclude from consideration 9 | exclude_lines = 10 | # Have to re-enable the standard pragma 11 | pragma: no cover 12 | 13 | # Don't complain about missing debug-only code: 14 | def __repr__ 15 | if self\.debug 16 | 17 | # Don't complain if tests don't hit defensive assertion code: 18 | raise AssertionError 19 | raise NotImplementedError 20 | 21 | # Don't complain if non-runnable code isn't run: 22 | if 0: 23 | if __name__ == .__main__.: 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Setup file for bradata. 5 | 6 | This file was generated with PyScaffold 2.5.7, a tool that easily 7 | puts up a scaffold for your new Python project. Learn more under: 8 | http://pyscaffold.readthedocs.org/ 9 | """ 10 | 11 | import sys 12 | from setuptools import setup 13 | 14 | 15 | def setup_package(): 16 | needs_sphinx = {'build_sphinx', 'upload_docs'}.intersection(sys.argv) 17 | sphinx = ['sphinx'] if needs_sphinx else [] 18 | setup(setup_requires=['six', 'pyscaffold>=2.5a0,<2.6a0'] + sphinx, 19 | use_pyscaffold=True) 20 | 21 | 22 | if __name__ == "__main__": 23 | setup_package() 24 | -------------------------------------------------------------------------------- /docs/bradata.rst: -------------------------------------------------------------------------------- 1 | bradata package 2 | =============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | bradata.agencias 10 | bradata.cgu 11 | bradata.tse 12 | 13 | Submodules 14 | ---------- 15 | 16 | bradata.connection module 17 | ------------------------- 18 | 19 | .. automodule:: bradata.connection 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | bradata.utils module 25 | -------------------- 26 | 27 | .. automodule:: bradata.utils 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | 32 | 33 | Module contents 34 | --------------- 35 | 36 | .. automodule:: bradata 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | bradata 3 | ======= 4 | 5 | This is the documentation of **bradata**. 6 | 7 | bradata means to make easily available **all** Brazilian government data 8 | as a Python package. 9 | 10 | it should be as symple as: 11 | 12 | .. code:: python 13 | 14 | import bradata 15 | 16 | bradata.inep.enem.get() 17 | 18 | and you should have all ENEM microdata in your /bradata\_download directory. 19 | 20 | check our source code at `github `_. 21 | 22 | 23 | Contents 24 | ======== 25 | 26 | .. toctree:: 27 | :maxdepth: 3 28 | 29 | bradata 30 | Contributing 31 | Beginner's tutorial 32 | License 33 | Authors 34 | Changelog 35 | 36 | 37 | 38 | 39 | Indices and tables 40 | ================== 41 | 42 | * :ref:`genindex` 43 | * :ref:`modindex` 44 | * :ref:`search` 45 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2017 AUTHORS 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /bradata/agencias/infraero.py: -------------------------------------------------------------------------------- 1 | import bradata 2 | from bradata.connection import Connection 3 | import requests 4 | from bs4 import BeautifulSoup 5 | import time 6 | import os 7 | 8 | 9 | def _get_links(xml): 10 | """ 11 | Get all links from a html page 12 | :param xml: string of a html page 13 | :return: links: A list of all the link in the page 14 | """ 15 | soup = BeautifulSoup(xml) 16 | links = soup("a") 17 | 18 | return links 19 | 20 | def get(year="2015"): 21 | """ 22 | Get all statistics xls files from Infraero website for a given year 23 | :param year: string year, from 2017 to 2012 24 | :return: links: A list of all the links downloaded 25 | """ 26 | database_links = set() 27 | conn = Connection() 28 | statistics_page = conn.perform_request("http://www.infraero.gov.br/index.php/br/estatisticas/estatisticas.html") 29 | links = _get_links(statistics_page["content"]) 30 | for link in links: 31 | if (('Estatistica' in link['href']) and (year in link['href'])): 32 | file_name = str(link) 33 | file_name = file_name.split('"')[1] 34 | complete_link = "http://www.infraero.gov.br" + str(file_name) 35 | database_links.add(complete_link) 36 | for link in database_links: 37 | name = str.split(link, "/")[-1] 38 | print("Downloading: {}".format(link)) 39 | resp = requests.get(link) 40 | with open(os.path.join(bradata.__download_dir__, "{}-{}".format(year, name)), mode='wb') as f: 41 | f.write(resp.content) 42 | time.sleep(0.05) 43 | 44 | return database_links 45 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | bradata 2 | ####### 3 | 4 | bradata means to make easily available **all** Brazilian government data 5 | as a Python package. 6 | 7 | it should be as symple as: 8 | 9 | .. code-block:: python 10 | 11 | import bradata 12 | 13 | bradata.inep.enem.get() 14 | 15 | and you should have all ENEM microdata in your ``/bradata\_download`` 16 | directory.\* 17 | 18 | documentation is available at 19 | `http://bradata.readthedocs.io/ `_. 20 | 21 | \* except the ENEM module is not ready. we have a lot of work to do, and we 22 | would love to have contributors! 23 | 24 | status 25 | ====== 26 | 27 | this package is in the early stages of development. the only modules we have so 28 | far are those that download data from: 29 | 30 | - Tribunal Superior Eleitoral (TSE) 31 | 32 | - Controladoria-Geral da União (CGU) 33 | 34 | - Infraero 35 | 36 | of which none are complete so far. 37 | 38 | contributing 39 | ============ 40 | 41 | if you wish to contribute, check the issues. all issues are labeled by 42 | difficulty (beginner, intermediate and advanced). 43 | 44 | there is a list of possible data sources in 45 | ``menu-de-dados.csv``. 46 | 47 | everything you do is yours -- it should be licensed under your own name. 48 | 49 | check the contributing guidelines (at ``CONTRIBUTING.rst``). pull requests 50 | not following the guidelines won't be accepted. 51 | 52 | if you have any doubts, contact @odanoburu or @joaocarabetta (project 53 | maintainers). 54 | 55 | License 56 | ======= 57 | 58 | MIT license available at ``LICENSE.txt``. 59 | 60 | Note 61 | ---- 62 | 63 | This project has been set up using PyScaffold 2.5.7. For details and 64 | usage information on PyScaffold see 65 | `here `_. 66 | -------------------------------------------------------------------------------- /bradata/tse/utils_tse.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pandas as pd 4 | import bradata.connection 5 | from bradata.utils import _unzip, _set_download_directory 6 | 7 | 8 | def unzip_tse(result, current_path): 9 | 10 | if not os.path.exists(current_path): 11 | os.makedirs(current_path) 12 | 13 | with open(current_path + 'temp.zip', 'wb') as f: 14 | f.write(result) 15 | 16 | _unzip(current_path + 'temp.zip', current_path) 17 | 18 | os.remove(current_path + 'temp.zip') 19 | 20 | def aggregate_tse(path, type, year): 21 | 22 | year = year 23 | files = glob.glob("{}/*_{}_*.txt".format(path, year)) 24 | 25 | headers = pd.read_csv(os.getcwd() + '/bradata/tse/headersTSE.csv') 26 | 27 | 28 | 29 | df_list = [] 30 | print(files) 31 | for filename in sorted(files): 32 | if type == 'candidatos': 33 | if year >= 2014: 34 | header = 'CONSULTA_CAND_2014' 35 | elif year == 2012: 36 | header = 'CONSULTA_CAND_2012' 37 | elif year <= 2010: 38 | header = 'CONSULTA_CAND_2010' 39 | df_list.append( 40 | pd.read_csv(filename, sep=';', names=headers[header].dropna().tolist(), encoding='latin1')) 41 | 42 | full_df = pd.concat(df_list) 43 | 44 | full_df.to_csv(path + '/{}/{}_{}.csv'.format(type, type, year)) 45 | 46 | def download_headers(): 47 | 48 | result = bradata.connection.Connection().perform_request('https://gist.github.com/JoaoCarabetta/e2bf8437007efec84c3110cb93941850', 49 | binary = True) 50 | if result['status'] == 'ok': 51 | result = result['content'] 52 | else: 53 | print('File was not dowloaded') 54 | 55 | unzip_tse(result, _set_download_directory()) 56 | -------------------------------------------------------------------------------- /bradata/tse/header_relation.yaml: -------------------------------------------------------------------------------- 1 | candidatos: 2 | columns: 3 | 2010: CONSULTA_CAND_2010 4 | 2012: CONSULTA_CAND_2012 5 | 2014: CONSULTA_CAND_2014 6 | url: http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_cand/consulta_cand_ 7 | 8 | perfil_eleitorado: 9 | columns: 10 | 0: PERFIL_ELEITORADO 11 | url: http://agencia.tse.jus.br/estatistica/sead/odsele/perfil_eleitorado/perfil_eleitorado_ 12 | 13 | bem_candidato: 14 | columns: 15 | 0: BEM_CANDIDATO 16 | url: http://agencia.tse.jus.br/estatistica/sead/odsele/bem_candidato/bem_candidato_ 17 | 18 | legendas: 19 | columns: 20 | 0: CONSULTA_LEGENDAS 21 | url: http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_legendas/consulta_legendas_ 22 | 23 | vagas: 24 | columns: 25 | 0: CONSULTA_VAGAS 26 | url: http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_vagas/consulta_vagas_ 27 | 28 | votacao_candidato_munzona: 29 | columns: 30 | 2012: VOTACAO_CANDIDATO_MUN_ZONA_2012 31 | 2014: VOTACAO_CANDIDATO_MUN_ZONA_2014 32 | url: http://agencia.tse.jus.br/estatistica/sead/odsele/votacao_candidato_munzona/votacao_candidato_munzona_ 33 | 34 | votacao_partido_munzona: 35 | columns: 36 | 2012: VOTACAO_PARTIDO_MUN_ZONA_2012 37 | 2014: VOTACAO_PARTIDO_MUN_ZONA_2014 38 | url: http://agencia.tse.jus.br/estatistica/sead/odsele/votacao_partido_munzona/votacao_partido_munzona_ 39 | 40 | votacao_secao_eleitoral: 41 | columns: 42 | 0: VOTO_SECAO 43 | url: http://www.tse.jus.br/hotSites/pesquisas-eleitorais/resultados_anos/votacao/votacao_secao_eleitoral_ 44 | 45 | detalhe_votacao_munzona: 46 | columns: 47 | 2012: DETALHE_VOTACAO_MUN_ZONA_2012 48 | 2014: DETALHE_VOTACAO_MUN_ZONA_2014 49 | url: http://agencia.tse.jus.br/estatistica/sead/odsele/detalhe_votacao_munzona/detalhe_votacao_munzona_ 50 | -------------------------------------------------------------------------------- /docs/beginner-tutorial.rst: -------------------------------------------------------------------------------- 1 | tutorial for beginners 2 | ###################### 3 | 4 | step-by-step 5 | ============ 6 | 7 | let's review the steps to start contributing: 8 | 9 | - Fork the project to your account. 10 | 11 | - Choose a path in your computer to store the project, go to it. 12 | 13 | - Clone the fork that you have just done to this path using the 14 | terminal command 15 | 16 | ``git clone https://github.com/YOUR-USERNAME/bradata`` 17 | 18 | - At this point, you should have an exact copy of the latest version of 19 | the project on your machine. 20 | 21 | Congratulations! Now you have a version of the repository in your 22 | machine. If you want to contribute and help to build this incredible 23 | project, keep reading! 24 | 25 | - do your modifications. 26 | 27 | - now you must check if your version is up-to-date with the original 28 | repository: 29 | 30 | ``git pull https://github.com/labFGV/bradata`` 31 | 32 | - if you have a merge conflict, you must solve it before committing 33 | your work. 34 | 35 | - now you stage and commit your work: 36 | 37 | :: 38 | 39 | git add YOUR_FILES 40 | git commit -m "YOUR COMMIT MESSAGE" 41 | 42 | - now you push the changes to your repo: ``git push origin master`` 43 | 44 | - finally, you go to https://github.com/labFGV/bradata and complete 45 | your pull request. 46 | 47 | how-to's 48 | ======== 49 | 50 | google and stackoverflow are your best friends, but: 51 | 52 | - git: 53 | 54 | - dudler's simple guide: 55 | `en `__, 56 | `pt-br `__ 57 | - `official docs `__ 58 | - `oh, sh\*t, git! `__ 59 | - github `guides `__ & 60 | `help `__ 61 | - 62 | 63 | - markdown: 64 | 65 | - `learn in 60 seconds `__ 66 | - `syntax specs `__ 67 | - `syntax philosophy and some 68 | specs `__ 69 | - 70 | 71 | - Restructured text 72 | 73 | - ` Restructured text and sphinx `_ 74 | - 75 | 76 | - -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = bradata 3 | summary = Python package to get Brazilian government data. 4 | author = LABFGV 5 | author-email = bcclaro@protonmail.ch 6 | license = MIT 7 | home-page = http://bradata.readthedocs.io/en/latest/ 8 | description-file = README.rst 9 | # Add here all kinds of additional classifiers as defined under 10 | # https://pypi.python.org/pypi?%3Aaction=list_classifiers 11 | classifier = 12 | Development Status :: 4 - Beta 13 | Programming Language :: Python 14 | 15 | [entry_points] 16 | # Add here console scripts like: 17 | # console_scripts = 18 | # script_name = bradata.module:function 19 | # For example: 20 | # console_scripts = 21 | # fibonacci = bradata.skeleton:run 22 | # as well as other entry_points. 23 | 24 | 25 | [files] 26 | # Add here 'data_files', 'packages' or 'namespace_packages'. 27 | # Additional data files are defined as key value pairs of target directory 28 | # and source location from the root of the repository: 29 | packages = 30 | bradata 31 | # data_files = 32 | # share/bradata_docs = docs/* 33 | 34 | [extras] 35 | # Add here additional requirements for extra features, like: 36 | # PDF = 37 | # ReportLab>=1.2 38 | # RXP 39 | 40 | [test] 41 | # py.test options when running `python setup.py test` 42 | addopts = tests 43 | 44 | [tool:pytest] 45 | # Options for py.test: 46 | # Specify command line options as you would do when invoking py.test directly. 47 | # e.g. --cov-report html (or xml) for html/xml output or --junitxml junit.xml 48 | # in order to write a coverage file that can be read by Jenkins. 49 | addopts = 50 | --cov bradata --cov-report term-missing 51 | --verbose 52 | 53 | [aliases] 54 | docs = build_sphinx 55 | 56 | [bdist_wheel] 57 | # Use this option if your package is pure-python 58 | universal = 1 59 | 60 | [build_sphinx] 61 | source_dir = docs 62 | build_dir = docs/_build 63 | 64 | [pbr] 65 | # Let pbr run sphinx-apidoc 66 | autodoc_tree_index_modules = True 67 | # autodoc_tree_excludes = ... 68 | # Let pbr itself generate the apidoc 69 | # autodoc_index_modules = True 70 | # autodoc_exclude_modules = ... 71 | # Convert warnings to errors 72 | # warnerrors = True 73 | 74 | [devpi:upload] 75 | # Options for the devpi: PyPI server and packaging tool 76 | # VCS export must be deactivated since we are using setuptools-scm 77 | no-vcs = 1 78 | formats = bdist_wheel 79 | -------------------------------------------------------------------------------- /bradata/connection.py: -------------------------------------------------------------------------------- 1 | #import tqdm 2 | import time 3 | import requests 4 | 5 | # how to use the warning below: do something equivalent to `from bradata.connection import _stale_url_warning` and then 6 | # `_stale_url_warning.format((req.status_code, req.text, website.name, website.url) 7 | _stale_url_warning = """the request failed with code {}:\n"{}"\nthe {} server may be down, or it may have changed its address or architecture.\nplease report the latter to the maintainers.\nyou can check if the website is online at {}""" 8 | 9 | class Connection: 10 | """ 11 | Class that handle connections 12 | """ 13 | 14 | def perform_request(self, url, nr_tries=5, binary=False): 15 | """ 16 | Perform a request handling exception and server errors printing status 17 | :param url: string 18 | :param nr_tries: int 19 | :return: dict :: status : ok/error, content: xml/url, [error_type, error_desc] if error 20 | """ 21 | count = 0 22 | while True: 23 | try: 24 | print('Fetch {}'.format(url)) 25 | 26 | #fetch_time = time.time() 27 | req = requests.get(url, timeout=1) 28 | #fetch_time = time.time() - fetch_time 29 | 30 | status = req.status_code 31 | if status != 200: 32 | print('ERROR {}. {}'.format(req.status_code, req.text)) 33 | time.sleep(5) 34 | 35 | count += 1 36 | if count > nr_tries: 37 | print('Too many errors in a row. Returned: ERROR {}'.format(req.status_code)) 38 | return {'status': 'error', 'error_type': req.status_code, 'error_desc': req.text, 'content': url} 39 | 40 | print('Trying Again...') 41 | continue 42 | 43 | else: 44 | if binary: 45 | return {'status': 'ok', 'content': req.content} 46 | else: 47 | return {'status': 'ok', 'content': req.text} 48 | 49 | except Exception as e: 50 | 51 | print('EXCEPTION {}'.format(e)) 52 | 53 | count += 1 54 | if count > nr_tries: 55 | print('Too many exceptions in a row. Returned: ERROR EXC') 56 | return {'status': 'error', 'error_type': 'exception', 'error_desc': e, 'content': url} 57 | 58 | time.sleep(5) 59 | print('Trying Again...') 60 | continue 61 | -------------------------------------------------------------------------------- /bradata/tse/candidatos.py: -------------------------------------------------------------------------------- 1 | from bradata.connection import Connection 2 | from bradata.utils import _must_contain, _treat_inputs, _unzip, _set_download_directory 3 | from bradata.tse.utils_tse import unzip_tse, aggregate_tse, download_headers 4 | import time 5 | import os 6 | 7 | 8 | class Candidatos: 9 | """ 10 | Download, organize and pre-process candidatos data from TSE 11 | 12 | http://www.tse.jus.br/eleicoes/estatisticas/repositorio-de-dados-eleitorais 13 | """ 14 | 15 | def download(self, type=None, year=None): 16 | """ 17 | Download a certain type of data from a year in the Candidatos option 18 | 19 | You can also get several years or types, just pass a list 20 | 21 | Types can be: 22 | - candidatos 23 | - bens 24 | - legendas 25 | - vagas 26 | 27 | This method covers the following years: 2016, 2014 28 | 29 | So, to download candidatos data from 2014, just put download(type='candidatos', ano=2015) 30 | 31 | Args: 32 | type: str or list with the type of the data 33 | year: str or int or list with a year 34 | 35 | Returns: Saves data to a local data file as ../bradata/tse/[state]/candidatos_[type]_[year].csv 36 | """ 37 | 38 | _must_contain({'type': type, 'year': year}, ['type', 'year']) # raises error if keywords do not exist 39 | 40 | if not isinstance(type, list): 41 | type = [type] 42 | 43 | if not isinstance(year, list): 44 | year = [year] 45 | 46 | conn = Connection() 47 | 48 | download_headers() 49 | 50 | for t in type: 51 | if t == 'candidatos': 52 | base_url = "http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_cand/consulta_cand_" 53 | 54 | elif t == 'bens': 55 | base_url = "http://agencia.tse.jus.br/estatistica/sead/odsele/bem_candidato/bem_candidato_" 56 | 57 | elif t == 'legendas': 58 | base_url = "http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_legendas/consulta_legendas_" 59 | 60 | elif t == 'vagas': 61 | base_url = "http://agencia.tse.jus.br/estatistica/sead/odsele/consulta_vagas/consulta_vagas_" 62 | 63 | else: 64 | 65 | print(t) 66 | raise Exception('Type should be candidatos, bens, legendas or vagas') 67 | 68 | print('Type: ', t) 69 | 70 | for y in year: 71 | 72 | print('Year: ', y) 73 | 74 | url = base_url + _treat_inputs(y) + '.zip' # treat_inputs turn int into str, raises error if diff type 75 | 76 | print('Downloading\n') 77 | 78 | time0 = time.time() 79 | result = conn.perform_request(url, binary=True) 80 | 81 | 82 | if result['status'] == 'ok': 83 | result = result['content'] 84 | else: 85 | print('File was not dowloaded') 86 | continue 87 | print('Download Time: ', time.time() - time0) 88 | 89 | print('Unzip') 90 | unzip_tse(result, _set_download_directory() + '/tse/' + t) 91 | 92 | print('Aggregate') 93 | aggregate_tse(_set_download_directory() + '/tse/', t, y) 94 | print('Done') 95 | 96 | print('Finished') -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | .. _bradata-contributing: 2 | 3 | Contributing 4 | ############ 5 | 6 | **note**: nothing here is set in stone. if you think something here is misguided, speak to the maintainers. 7 | 8 | general guidelines 9 | ================== 10 | 11 | - OPEN-SOURCE: this is an open-source project. therefore, everything in 12 | it should be open-source (scripts, documentation, file formats, etc). 13 | 14 | - LANGUAGE: this project's language is English, even if most of our 15 | contributors are Brazilian and we're working with Brazilian data. Our 16 | purpose is to make this project welcoming of international 17 | contributors and maybe even spread its idea abroad. 18 | 19 | - STANDARDS: whenever possible use (or convert things to) the 20 | international standard. for most data, this will mean changing the 21 | encoding from latin1 to UTF-8 and changing the date format from 22 | DD/MM/YYYY to YYYY-MM-DD. standardizing will make it easier to work 23 | with several databases together. if you find something that should be 24 | an exception, open an issue or talk to the coordinators. 25 | 26 | - ATTRIBUTION: please be aware when employing third-party software: 27 | check if their license is compatible with your use. (if unsure, ask). 28 | **always** attribute someone else's work to them. similarly, when you 29 | complete any work, you must attribute it to yourself under an 30 | open-source license. check `here `__ if 31 | unsure about a license, or just pick the MIT license which is our 32 | default. all files contributed must be prefixed by their license and 33 | author in a comment. 34 | 35 | - DOCUMENTATION: all code must be thoroughly documented. undocumented 36 | code or incomprehensible code will not be accepted. choose clarity 37 | over performance unless you absolutely have to pick the latter. 38 | (hint: `you almost never 39 | will `__.) 40 | 41 | - 42 | 43 | code guidelines 44 | =============== 45 | 46 | file structure 47 | -------------- 48 | 49 | in the bradata package, every smodule is an institution (data provider). at its directory, its ``__init__.py`` should contain the functions and classes that are to be available to the public, and **nothing else**. that's because the preferred way for a user to use the ``bradata`` package is to explore what it has to offer by tab-completion available at ipython and jupyter notebook, as the package is projected to have a number of functions greater than what a user would like to memorize. 50 | 51 | importing only the public functions in the ``__init__.py`` file prevents the namespace from being crowded with private objects:: 52 | 53 | import bradata.tse as tse 54 | tse.get_candidatos() 55 | 56 | submodules should be divided by similarity or proximity, for instance ``bradata/cgu/_cadastros.py`` has functions to get three different databases, but as the code to get them is mostly the same they reside together. (the three functions are actually only one function and two wrappers, to prevent writing more code than we need to). if the submodule is not meant to be called by the user, it should start with an underscore (\_), so that it doesn't pollute the namespace. 57 | 58 | git workflow 59 | ============ 60 | 61 | so you've forked the repo and added some nice functionality, or correct some bug. thank you very much! but before we can accept your work, you must follow a few simple procedures: 62 | 63 | - document every function, class, module, etc. you create or change, prerrably using `google-style docstrings `_. if you are implementing some tricky part, we'd appreciate if you wrote a tutorial or some kind of extensive documentation. we autogenerate documentation using sphinx, and you may write in .md or .rst, but please write. 64 | 65 | - always ``git pull [source-repo] master`` before making a pull request! 66 | 67 | - if you created a new public module or submodule, import it in the ``__init__.py`` of the main package. 68 | 69 | - add your name to the :ref:`bradata-authors`; 70 | 71 | contributors 72 | ============ 73 | 74 | contributors are listed under :ref:`bradata-authors`. only people 75 | who have had a pull request accepted are listed as contributors. 76 | -------------------------------------------------------------------------------- /bradata/utils.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | import os 3 | import bradata 4 | import datetime 5 | 6 | # this function is reinventing the wheel, check requests.get documentation 7 | def _make_url(api_house=None, base_url= None, params=None): 8 | """ 9 | It builds the url based on the house webservice and parameters 10 | Args: 11 | api_house: str:: 'camara' or 'senado' 12 | webservice: str:: specify a webservice such as 'deputados', ' 13 | params: dict:: parameters that compose the url 14 | 15 | Returns: str 16 | The API url 17 | """ 18 | 19 | if api_house == None: 20 | raise ReferenceError ('No API House Specified') 21 | 22 | if base_url == None: 23 | raise ReferenceError ('No Base Url Specified') 24 | 25 | elif api_house == 'camara': 26 | # EndPoints 27 | for i, items in enumerate(params.items()): 28 | 29 | key, value = [_treat_inputs(i) for i in items] 30 | 31 | if value == None: 32 | value = '' 33 | 34 | base_url += key + '=' + value 35 | 36 | if len(params) - i > 1: 37 | 38 | base_url += '&' 39 | 40 | return base_url 41 | 42 | # python has a built-in type checking tool in python 3.5 43 | def _treat_inputs(value): 44 | """ 45 | Make sure that inputs are in the right type 46 | 47 | Ints and floats are converted to strings 48 | 49 | Args: 50 | value: str, int, float 51 | 52 | Returns: Ints and floats are converted to strings 53 | """ 54 | if value is None: 55 | return value 56 | 57 | if not isinstance(value, (int, float, str)): 58 | raise AttributeError('This is a {}.\n' 59 | 'Make sure to insert an int, float or str' 60 | .format(type(value))) 61 | 62 | if isinstance(value, (int, float)): 63 | value = str(value) 64 | 65 | return value 66 | 67 | 68 | def _must_contain(this=None, keys=None): 69 | """ 70 | Check whether the specified values exists on a dict 71 | 72 | This function presumes that all keys are mapped on this dict 73 | Args: 74 | this: dict :: variable names and their values 75 | keys: list :: variable names that must not be None 76 | 77 | Returns: 78 | True if the dict contains the values 79 | Raise error if there are missing values 80 | """ 81 | 82 | result = {k: v is None for k, v in this.items() if k in keys} 83 | 84 | missing_keys = [k for k, v in result.items() if v is True] 85 | 86 | if len(missing_keys) != 0: 87 | raise AttributeError('{} must have a value'.format(','.join(str(p) for p in missing_keys))) 88 | 89 | else: 90 | return True 91 | 92 | def _unzip(source_filename, dest_dir): 93 | with zipfile.ZipFile(source_filename) as zf: 94 | zf.extractall(dest_dir) 95 | 96 | 97 | def _set_download_directory(user_path=None): 98 | """ 99 | sets up a directory where files downloaded by bradata will be stored. it is 100 | usually located in the user's home directory at bradata/, but a personalized 101 | path can be set. there's currently no support for persisting this personal 102 | choice. 103 | """ 104 | if user_path is None: 105 | user_path = os.path.expanduser('~') 106 | download_path = os.path.join(user_path, "bradata_download") 107 | try: 108 | os.makedirs(download_path) 109 | except FileExistsError: 110 | pass 111 | except PermissionError: 112 | user_path = input("bradata doesn't seem to have the permission to write to the default download directory. please specify your desired download path:\n ") 113 | download_path = _set_download_directory(user_path) # to check if provided path is writable 114 | return download_path 115 | 116 | def _create_download_subdirectory(submodule_name): 117 | submodule_download_path = os.path.join(bradata.__download_dir__, submodule_name) 118 | if not os.path.exists(submodule_download_path): 119 | os.mkdir(submodule_download_path) 120 | return None 121 | 122 | 123 | def _parse_time(date, freq='d'): 124 | #add docs and make tests (what happens if begin_date>end_date?) 125 | freq_dict = {'d': '%Y-%m-%d', 'm': '%Y-%m', 'y': '%Y', 'a': '%Y'} 126 | freq_str = freq_dict[freq.lower()] 127 | if date is None: 128 | date = datetime.date.today() 129 | elif isinstance(date, str): 130 | date = datetime.datetime.strptime(date, freq_str).date() 131 | if not isinstance(date, datetime.date): 132 | raise Exception("begin_date or end_date not valid input. input must be string in {} format or a valid datetime object.".format(freq_str)) 133 | return date 134 | -------------------------------------------------------------------------------- /bradata/tse/headersTSE.csv: -------------------------------------------------------------------------------- 1 | "PERFIL_ELEITORADO","CONSULTA_CAND_2010","CONSULTA_CAND_2012","CONSULTA_CAND_2014","BEM_CANDIDATO","CONSULTA_LEGENDAS","CONSULTA_VAGAS","VOTACAO_CANDIDATO_MUN_ZONA_2012","VOTACAO_CANDIDATO_MUN_ZONA_2014","VOTACAO_PARTIDO_MUN_ZONA_2012","VOTACAO_PARTIDO_MUN_ZONA_2014","VOTO_SECAO","DETALHE_VOTACAO_MUN_ZONA_2012","DETALHE_VOTACAO_MUN_ZONA_2014" 2 | "PERIODO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO","DATA_GERACAO" 3 | "UF","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO","HORA_GERACAO" 4 | "MUNICIPIO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO","ANO_ELEICAO" 5 | "COD_MUNICIPIO_TSE","NUM_TURNO ","NUM_TURNO ","NUM_TURNO ","DESCRICAO_ELEICAO","NUM_TURNO","DESCRICAO_ELEICAO","NUM_TURNO","NUM_TURNO","NUM_TURNO","NUM_TURNO","NUM_TURNO","NUM_TURNO","NUM_TURNO" 6 | "NR_ZONA","DESCRICAO_ELEICAO","DESCRICAO_ELEICAO","DESCRICAO_ELEICAO","SIGLA_UF","DESCRICAO_ELEICAO","SIGLA_UF","DESCRICAO_ELEICAO","DESCRICAO_ELEICAO","DESCRICAO_ELEICAO","DESCRICAO_ELEICAO","DESCRICAO_ELEICAO","DESCRICAO_ELEICAO","DESCRICAO_ELEICAO" 7 | "SEXO","SIGLA_UF","SIGLA_UF","SIGLA_UF","SQ_CANDIDATO","SIGLA_UF","SIGLA_UE","SIGLA_UF","SIGLA_UF","SIGLA_UF","SIGLA_UF","SIGLA_UF","SIGLA_UF","SIGLA_UF" 8 | "FAIXA_ETARIA","SIGLA_UE ","SIGLA_UE ","SIGLA_UE ","CD_TIPO_BEM_CANDIDATO","SIGLA_UE","NOME_UE","SIGLA_UE","SIGLA_UE","SIGLA_UE","SIGLA_UE","SIGLA_UE","SIGLA_UE","SIGLA_UE" 9 | "GRAU_DE_ESCOLARIDADE","DESCRICAO_UE","DESCRICAO_UE","DESCRICAO_UE","DS_TIPO_BEM_CANDIDATO","NOME_UE","CODIGO_CARGO","CODIGO_MUNICIPIO","CODIGO_MUNICIPIO","CODIGO_MUNICIPIO","CODIGO_MUNICIPIO","CODIGO_MUNICIPIO","CODIGO_MUNICIPIO","CODIGO_MUNICIPIO" 10 | "QTD_ELEITORES_NO_PERFIL","CODIGO_CARGO","CODIGO_CARGO","CODIGO_CARGO","DETALHE_BEM","CODIGO_CARGO","DESCRICAO_CARGO","NOME_MUNICIPIO","NOME_MUNICIPIO","NOME_MUNICIPIO","NOME_MUNICIPIO","NOME_MUNICIPIO","NOME_MUNICIPIO","NOME_MUNICIPIO" 11 | ,"DESCRICAO_CARGO","DESCRICAO_CARGO","DESCRICAO_CARGO","VALOR_BEM","DESCRICAO_CARGO","QTDE_VAGAS","NUMERO_ZONA","NUMERO_ZONA","NUMERO_ZONA","NUMERO_ZONA","NUM_ZONA","NUMERO_ZONA","NUMERO_ZONA" 12 | ,"NOME_CANDIDATO","NOME_CANDIDATO","NOME_CANDIDATO","DATA_ULTIMA_ATUALIZACAO","TIPO_LEGENDA",,"CODIGO_CARGO","CODIGO_CARGO","CODIGO_CARGO","CODIGO_CARGO","NUM_SECAO","CODIGO_CARGO","CODIGO_CARGO" 13 | ,"SEQUENCIAL_CANDIDATO","SEQUENCIAL_CANDIDATO","SEQUENCIAL_CANDIDATO","HORA_ULTIMA_ATUALIZACAO ","NUM_PARTIDO",,"NUMERO_CAND","NUMERO_CAND","DESCRICAO_CARGO","DESCRICAO_CARGO","CODIGO_CARGO","DESCRICAO_CARGO","DESCRICAO_CARGO" 14 | ,"NUMERO_CANDIDATO","NUMERO_CANDIDATO","NUMERO_CANDIDATO",,"SIGLA_PARTIDO",,"SQ_CANDIDATO","SQ_CANDIDATO","TIPO_LEGENDA","TIPO_LEGENDA","DESCRICAO_CARGO","QTD_APTOS","QTD_APTOS" 15 | ,"CPF_CANDIDATO","CPF_CANDIDATO","CPF_CANDIDATO",,"NOME_PARTIDO",,"NOME_CANDIDATO","NOME_CANDIDATO","NOME_COLIGACAO","NOME_COLIGACAO","NUM_VOTAVEL","QTD_SECOES","QTD_SECOES" 16 | ,"NOME_URNA_CANDIDATO","NOME_URNA_CANDIDATO","NOME_URNA_CANDIDATO",,"SIGLA_COLIGACAO",,"NOME_URNA_CANDIDATO","NOME_URNA_CANDIDATO","COMPOSICAO_LEGENDA","COMPOSICAO_LEGENDA","QTDE_VOTOS","QTD_SECOES_AGREGADAS","QTD_SECOES_AGREGADAS" 17 | ,"COD_SITUACAO_CANDIDATURA","COD_SITUACAO_CANDIDATURA","COD_SITUACAO_CANDIDATURA",,"NOME_COLIGACAO",,"DESCRICAO_CARGO","DESCRICAO_CARGO","SIGLA_PARTIDO","SIGLA_PARTIDO",,"QTD_APTOS_TOT","QTD_APTOS_TOT" 18 | ,"DES_SITUACAO_CANDIDATURA","DES_SITUACAO_CANDIDATURA","DES_SITUACAO_CANDIDATURA",,"COMPOSICAO_COLIGACAO",,"COD_SIT_CAND_SUPERIOR","COD_SIT_CAND_SUPERIOR","NUMERO_PARTIDO","NUMERO_PARTIDO",,"QTD_SECOES_TOT","QTD_SECOES_TOT" 19 | ,"NUMERO_PARTIDO","NUMERO_PARTIDO","NUMERO_PARTIDO",,"SEQUENCIAL_COLIGACAO",,"DESC_SIT_CAND_SUPERIOR","DESC_SIT_CAND_SUPERIOR","NOME_PARTIDO","NOME_PARTIDO",,"QTD_COMPARECIMENTO","QTD_COMPARECIMENTO" 20 | ,"SIGLA_PARTIDO","SIGLA_PARTIDO","SIGLA_PARTIDO",,,,"CODIGO_SIT_CANDIDATO","CODIGO_SIT_CANDIDATO","QTDE_VOTOS_NOMINAIS","QTDE_VOTOS_NOMINAIS",,"QTD_ABSTENCOES","QTD_ABSTENCOES" 21 | ,"NOME_PARTIDO","NOME_PARTIDO","NOME_PARTIDO",,,,"DESC_SIT_CANDIDATO","DESC_SIT_CANDIDATO","QTDE_VOTOS_LEGENDA","QTDE_VOTOS_LEGENDA",,"QTD_VOTOS_NOMINAIS","QTD_VOTOS_NOMINAIS" 22 | ,"CODIGO_LEGENDA","CODIGO_LEGENDA","CODIGO_LEGENDA",,,,"CODIGO_SIT_CAND_TOT","CODIGO_SIT_CAND_TOT",,"TRANSITO ",,"QTD_VOTOS_BRANCOS","QTD_VOTOS_BRANCOS" 23 | ,"SIGLA_LEGENDA","SIGLA_LEGENDA","SIGLA_LEGENDA",,,,"DESC_SIT_CAND_TOT","DESC_SIT_CAND_TOT",,,,"QTD_VOTOS_NULOS","QTD_VOTOS_NULOS" 24 | ,"COMPOSICAO_LEGENDA","COMPOSICAO_LEGENDA","COMPOSICAO_LEGENDA",,,,"NUMERO_PARTIDO","NUMERO_PARTIDO",,,,"QTD_VOTOS_LEGENDA","QTD_VOTOS_LEGENDA" 25 | ,"NOME_LEGENDA","NOME_LEGENDA","NOME_LEGENDA",,,,"SIGLA_PARTIDO","SIGLA_PARTIDO",,,,"QTD_VOTOS_ANULADOS_APU_SEP","QTD_VOTOS_ANULADOS_APU_SEP " 26 | ,"CODIGO_OCUPACAO","CODIGO_OCUPACAO","CODIGO_OCUPACAO",,,,"NOME_PARTIDO","NOME_PARTIDO",,,,"DATA_ULT_TOTALIZACAO","DATA_ULT_TOTALIZACAO " 27 | ,"DESCRICAO_OCUPACAO","DESCRICAO_OCUPACAO","DESCRICAO_OCUPACAO",,,,"SEQUENCIAL_LEGENDA","SEQUENCIAL_LEGENDA",,,,,"HORA_ULT_TOTALIZACAO " 28 | ,"DATA_NASCIMENTO","DATA_NASCIMENTO","DATA_NASCIMENTO",,,,"NOME_COLIGACAO","NOME_COLIGACAO",,,,,"TRANSITO " 29 | ,"NUM_TITULO_ELEITORAL_CANDIDATO","NUM_TITULO_ELEITORAL_CANDIDATO","NUM_TITULO_ELEITORAL_CANDIDATO",,,,"COMPOSICAO_LEGENDA","COMPOSICAO_LEGENDA",,,,, 30 | ,"IDADE_DATA_ELEICAO ","IDADE_DATA_ELEICAO","IDADE_DATA_ELEICAO",,,,"TOTAL_VOTOS","TOTAL_VOTOS",,,,, 31 | ,"CODIGO_SEXO ","CODIGO_SEXO","CODIGO_SEXO",,,,,"TRANSITO",,,,, 32 | ,"DESCRICAO_SEXO","DESCRICAO_SEXO","DESCRICAO_SEXO",,,,,,,,,, 33 | ,"COD_GRAU_INSTRUCAO","COD_GRAU_INSTRUCAO","COD_GRAU_INSTRUCAO",,,,,,,,,, 34 | ,"DESCRICAO_GRAU_INSTRUCAO","DESCRICAO_GRAU_INSTRUCAO","DESCRICAO_GRAU_INSTRUCAO",,,,,,,,,, 35 | ,"CODIGO_ESTADO_CIVIL","CODIGO_ESTADO_CIVIL","CODIGO_ESTADO_CIVIL",,,,,,,,,, 36 | ,"DESCRICAO_ESTADO_CIVIL","DESCRICAO_ESTADO_CIVIL","DESCRICAO_ESTADO_CIVIL",,,,,,,,,, 37 | ,"CODIGO_NACIONALIDADE","CODIGO_NACIONALIDADE","CODIGO_COR_RACA",,,,,,,,,, 38 | ,"DESCRICAO_NACIONALIDADE","DESCRICAO_NACIONALIDADE","DESCRICAO_COR_RACA",,,,,,,,,, 39 | ,"SIGLA_UF_NASCIMENTO","SIGLA_UF_NASCIMENTO","CODIGO_NACIONALIDADE",,,,,,,,,, 40 | ,"CODIGO_MUNICIPIO_NASCIMENTO","CODIGO_MUNICIPIO_NASCIMENTO","DESCRICAO_NACIONALIDADE",,,,,,,,,, 41 | ,"NOME_MUNICIPIO_NASCIMENTO","NOME_MUNICIPIO_NASCIMENTO","SIGLA_UF_NASCIMENTO",,,,,,,,,, 42 | ,"DESPESA_MAX_CAMPANHA","DESPESA_MAX_CAMPANHA","CODIGO_MUNICIPIO_NASCIMENTO",,,,,,,,,, 43 | ,"COD_SIT_TOT_TURNO","COD_SIT_TOT_TURNO","NOME_MUNICIPIO_NASCIMENTO",,,,,,,,,, 44 | ,"DESC_SIT_TOT_TURNO ","DESC_SIT_TOT_TURNO","DESPESA_MAX_CAMPANHA",,,,,,,,,, 45 | ,,"NM_EMAIL ","COD_SIT_TOT_TURNO",,,,,,,,,, 46 | ,,,"DESC_SIT_TOT_TURNO",,,,,,,,,, 47 | ,,,"NM_EMAIL ",,,,,,,,,, 48 | -------------------------------------------------------------------------------- /bradata/cgu/cgu.py: -------------------------------------------------------------------------------- 1 | from bradata.connection import _stale_url_warning 2 | import bradata.utils 3 | import requests 4 | import os 5 | import zipfile 6 | import io 7 | import bradata 8 | 9 | 10 | def get_cgu_data(date, cadastro, freq, consulta=None): 11 | """ 12 | gets some CGU data at http://www.portaldatransparencia.gov.br/. it is 13 | wrapped by helper functions that make the library more discoverable. it 14 | converts the csv encoding to utf8. 15 | 16 | :param date: a string in YYYY-mm-dd format or a datetime object with year, 17 | month, and day attributes. if not provided, will get current day (be 18 | careful if on other timezone than Brasília). input can be constructed by 19 | importing datetime module and typing `datetime.date(1994, 07, 18)`. 20 | :param cadastro: this is the database to be fetched (e.g., 'ceis') 21 | :param consulta: usually the same as in cadastro, but sometimes the 22 | internal API calls it something else, as in the case of CEAF. 23 | :param freq: 'd' for daily, 'm' for monthly, 'y' or 'a' for annually. 24 | :return: downloads csv to directory bradata.__download_dir__ 25 | """ 26 | if consulta is None: # because some consultas are not the same as cadastro 27 | consulta=cadastro 28 | freq = freq.lower() 29 | date = bradata.utils._parse_time(date, freq=freq) 30 | time_dict = {'a': ['a', 'consulta'], 'm': ['a', 'm', 'consulta'], 31 | 'd': ['d', 'm', 'a', 'consulta'], 'y': ['a', 'consulta']} 32 | params = {'a': date.year, 'm': '{:02d}'.format(date.month), 33 | 'd': '{:02d}'.format(date.day), 'consulta': consulta} 34 | filtered_params = {key: params[key] for key in time_dict[freq]} # removes day portion if freq='m', e.g. 35 | r = requests.get('http://arquivos.portaldatransparencia.gov.br/downloads.asp', 36 | stream=True, params=filtered_params, timeout=1) 37 | if r.status_code == 200: 38 | request_content = io.BytesIO(r.content) 39 | if zipfile.is_zipfile(request_content): 40 | z = zipfile.ZipFile(request_content) 41 | filename = z.namelist()[0] 42 | with z.open(filename) as f: 43 | latin_db = f.read() 44 | cgu_db = latin_db.decode('cp1252').replace('\x00', '') # http://www.portaldatransparencia.gov.br/faleConosco/perguntas-tema-download-dados.asp 45 | filepath = os.path.join(bradata.__download_dir__, 'CGU', filename) 46 | bradata.utils._create_download_subdirectory('CGU') 47 | with open(filepath, mode='wt', encoding='utf8') as f: 48 | f.write(cgu_db) 49 | return "{} downloaded to {}".format(cadastro, filepath) 50 | else: 51 | print('file from this date is not available. website gave\n\"\"\"\n' 52 | '{}\n\"\"\"\nas a reply. please try a different date.' 53 | .format(r.text)) 54 | return None 55 | else: 56 | print(stale_url_warning.format(r.status_code, r.text, 57 | 'Portal da Transparência do Governo Federal', 58 | 'http://www.portaltransparencia.gov.br/downloads/snapshot.asp?c={}' 59 | .format(cadastro))) 60 | r.raise_for_status() 61 | 62 | 63 | def get_ceis(date=None): 64 | """ 65 | gets CEIS (cadastro de empresas inidôneas e suspensas, 66 | http://www.portaldatransparencia.gov.br/ceis) data. it converts the csv 67 | encoding to utf8. 68 | :param date: a string in YYYY-mm-dd format or a datetime object with year, 69 | month, and day attributes. if not provided, will get current day (be 70 | careful if on other timezone than Brasília). input can be constructed by 71 | importing datetime module and typing `datetime.date(1994, 07, 18)`. 72 | :return: downloads csv to directory bradata.__download_dir__ 73 | """ 74 | return get_cgu_data(date=date, cadastro='CEIS', freq='d') 75 | 76 | 77 | def get_cepim(date=None): 78 | """ 79 | gets CEPIM (Cadastro de Entidades sem Fins Lucrativos Impedidas, 80 | http://www.portaldatransparencia.gov.br/cepim) data. it converts the csv 81 | encoding to utf8. 82 | :param date: a string in YYYY-mm-dd format or a datetime object with year, 83 | month, and day attributes. if not provided, will get current day (be 84 | careful if on other timezone than Brasília). input can be constructed by 85 | importing datetime module and typing `datetime.date(1994, 07, 18)`. 86 | :return: downloads csv to directory bradata.__download_dir__ 87 | """ 88 | return get_cgu_data(date=date, cadastro='CEPIM', freq='d') 89 | 90 | 91 | def get_cnep(date=None): 92 | """ 93 | gets CNEP (Cadastro Nacional de Empresas Punidas, 94 | http://www.portaldatransparencia.gov.br/cnep) data. it converts the csv 95 | encoding to utf8. 96 | :param date: a string in YYYY-mm-dd format or a datetime object with year, 97 | month, and day attributes. if not provided, will get current day (be 98 | careful if on other timezone than Brasília). input can be constructed by 99 | importing datetime module and typing `datetime.date(1994, 07, 18)`. 100 | :return: downloads csv to directory bradata.__download_dir__ 101 | """ 102 | return get_cgu_data(date=date, cadastro='CNEP', freq='d') 103 | 104 | def get_ceaf(date=None): 105 | """ 106 | gets CEAF (Cadastro de Expulsões da Administração Federal, 107 | http://www.transparencia.gov.br/servidores/SaibaMaisPunicoes.asp) data. it 108 | converts the csv encoding to utf8. 109 | :param date: a string in YYYY-mm-dd format or a datetime object with year, 110 | month, and day attributes. if not provided, will get current day (be 111 | careful if on other timezone than Brasília). input can be constructed by 112 | importing datetime module and typing `datetime.date(1994, 07, 18)`. 113 | :return: downloads csv to directory bradata.__download_dir__ 114 | """ 115 | return get_cgu_data(date=date, cadastro='CEAF', consulta='expulsoes', freq='d') 116 | 117 | def get_diarias(date=None): 118 | """ 119 | gets pagamentos de diárias pagas aos servidores e colaboradores eventuais 120 | (http://www.portaltransparencia.gov.br/despesasdiarias/) data. it converts 121 | the csv encoding to utf8. 122 | :param date: a string in YYYY-mm format or a datetime object with year and 123 | month attributes. if not provided, will get current day (be 124 | careful if on other timezone than Brasília). input can be constructed by 125 | importing datetime module and typing `datetime.date(1994, 07, 18)`. 126 | :return: downloads csv to directory bradata.__download_dir__ 127 | """ 128 | return get_cgu_data(date=date, cadastro='Diarias', freq='m') 129 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/bradata.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/bradata.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $HOME/.local/share/devhelp/bradata" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $HOME/.local/share/devhelp/bradata" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /menu-de-dados.csv: -------------------------------------------------------------------------------- 1 | Título,Órgão,URL,microdados,frequência,Município,UF,Esfera,Poder,Solução,observações 2 | SNIS - sistema nacional de informações sobre saneamento,ministério das cidades,http://app.cidades.gov.br/serieHistorica/,0,,,,Federal,Executivo,, 3 | ,MS - ministério da saúde,http://dados.saude.gov.br/organization/ministerio-da-saude-ms?page=2,,,,,Federal,Executivo,, 4 | ,BCB - banco central do brasil,http://dadosabertos.bcb.gov.br/,,,,,Federal,Executivo,, 5 | ,CVM,http://sistemas.cvm.gov.br/Port/DownloadArqs/download02.htm,,,,,Federal,Executivo,, 6 | ,CEF- caixa econômica federal,,,,,,Federal,Executivo,, 7 | ,TSE,http://www.tse.jus.br/transparencia/acesso-a-informacao/acesso-a-informacao,,,,,Federal,Judiciário,, 8 | repositório de dados eleitorais,,http://www.tse.jus.br/eleicoes/estatisticas/repositorio-de-dados-eleitorais,,,,,,,, 9 | ,MTE - ministério do trabalho,http://trabalho.gov.br/dados-abertos,,,,,Federal,Executivo,, 10 | RAIS/Caged,MTE - ministério do trabalho,http://pdet.mte.gov.br/microdados-rais-e-caged,1,mês,,,Federal,Executivo,, 11 | ,ANS,http://www.ans.gov.br/acesso-a-informacao/dados-abertos,,,,,Federal,Executivo,,Não implementado 12 | ,ANTT,http://www3.transparencia.gov.br/index.jsp?CodigoOrgao=39250&TipoOrgao=2&consulta=0,,,,,Federal,Executivo,, 13 | ,ANATEL,http://dados.gov.br/organization/5fbc22fd-5842-4bde-b48e-669c4ec95317?tags=Anatel,,,,,Federal,Executivo,, 14 | ,ANAC,http://www.anac.gov.br/assuntos/dados-e-estatisticas,,,,,Federal,Executivo,, 15 | ,INFRAERO,http://www.infraero.gov.br/index.php/br/acesso-a-informacao.html,,,,,Federal,Executivo,, 16 | ,ANP,http://www.anp.gov.br/wwwanp/dados-estatisticos,,,,,Federal,Executivo,, 17 | ,ANVISA,http://portal.anvisa.gov.br/dados-abertos,,,,,Federal,Executivo,, 18 | ,SUSEP,http://www2.susep.gov.br/menuestatistica/SES/principal.aspx,,mês,,,Federal,Executivo,, 19 | ,ANEEL,http://www.aneel.gov.br/dados,,,,,Federal,Executivo,, 20 | ,INEP,http://portal.inep.gov.br/microdados,,,,,Federal,Executivo,, 21 | ,ANTAQ,http://portal.antaq.gov.br/index.php/estatisticas/,,,,,Federal,Executivo,, 22 | ,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/,1,dia,,,Federal,Executivo,, 23 | Despesas - gasto direto,CGU - Portal da Transparência,,1,mês,,,Federal,Executivo,, 24 | Despesas - diárias de viagens,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=Diarias#exercicios2017,1,mês,,,Federal,Executivo,, 25 | Despesas - CPGF,CGU - Portal da Transparência,,1,mês,,,Federal,Executivo,,fonte é banco central; checar qual é mais apropriado. 26 | Despesas – Gastos Diretos – Lista de Favorecidos - Pessoa Jurídica,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=FavorecidosGastosDiretos,1,mês,,,Federal,Executivo,, 27 | Despesas - Transferências - Pagamentos,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=Transferencias,1,mês,,,Federal,Executivo,, 28 | Despesas - Transferências – Cartão de Pagamentos da Defesa Civil (CPDC),CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=CPDC,1,mês,,,Federal,Executivo,, 29 | Despesas – Tranferências - Lista de Favorecidos - Pessoa Jurídica,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=FavorecidosTransferencias,1,mês,,,Federal,Executivo,, 30 | Despesas – Transferências – Detalhamento de Consórcios - Pessoas Jurídicas,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=ConsorcioTR,1,mês,,,Federal,Executivo,, 31 | Despesas – Transferências – Programas Sociais – Bolsa Família - Pagamentos,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=BolsaFamiliaFolhaPagamento,1,mês,,,Federal,Executivo,, 32 | Despesas – Transferências – Programas Sociais – Bolsa Família - Saques,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=BolsaFamiliaSacado,1,mês,,,Federal,Executivo,, 33 | Despesas – Transferências – Programas Sociais – Pescador Artesanal,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=SeguroDefeso,1,mês,,,Federal,Executivo,, 34 | Despesas – Transferências – Outros Programas Sociais,CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/mensal.asp?c=OutrasTransferenciasCidadao,1,mês,,,Federal,Executivo,, 35 | Cadastro de Entidades sem Fins Lucrativos Impedidas (CEPIM),CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/snapshot.asp?c=CEPIM,1,mês,,,Federal,Executivo,, 36 | Cadastro Nacional de Empresas Punidas (CNEP),CGU - Portal da Transparência,http://www.portaltransparencia.gov.br/downloads/snapshot.asp?c=CNEP,1,dia,,,Federal,Executivo,, 37 | orçamento federal,Ministério do Planejamento,https://www1.siop.planejamento.gov.br/siopdoc/doku.php/acesso_publico:dados_abertos,1,,,,Federal,Planejamento,, 38 | ,tesouro nacional,http://www.tesourotransparente.gov.br/,,,,,Federal,Executivo,, 39 | Alagoas em dados e informações,,http://dados.al.gov.br/,,,,AL,Estadual,Executivo,CKAN, 40 | Fortaleza Dados Abertos,,http://dados.fortaleza.ce.gov.br/,,,Fortaleza,CE,Municipal,Executivo,CKAN, 41 | Dados abertos – TCM-CE,,http://api.tcm.ce.gov.br/,,,,CE,Estadual,Legislativo,Interna, 42 | Dados abertos Distrito Federal,,http://www.dadosabertos.df.gov.br/,,,,DF,Estadual,Executivo,CKAN, 43 | Dados abertos – Governo do ES,,http://www.transparencia.es.gov.br/menu_principal/dados_abertos.asp,,,,ES,Estadual,Executivo,Interna, 44 | Dados abertos – Goiás Transparente,,http://www.transparencia.go.gov.br/pagina.php?id=740,,,,GO,Estadual,Executivo,Interna, 45 | Dados abertos – Assembleia de Minas,,http://dadosabertos.almg.gov.br/ws/ajuda/sobre,,,,MG,Estadual,Legislativo,Interna, 46 | Dados abertos – Estado de MG,,http://www.transparencia.mg.gov.br/dados-abertos,,,,MG,Estadual,Executivo,CKAN, 47 | Dados abertos do SAGRES – TCE/PB,,http://portal.tce.pb.gov.br/dados-abertos-do-sagres-tcepb/,,,,PB,Estadual,Legislativo,Interna, 48 | Dados abertos – Governo de Pernambuco,,http://www.dadosabertos.pe.gov.br/,,,,PE,Estadual,Executivo,Interna, 49 | Dados Recife,,http://dados.recife.pe.gov.br,,,Recife,PE,Municipal,Executivo,CKAN, 50 | Dados Abertos Curitiba,,http://www.curitiba.pr.gov.br/dadosabertos/,,,Curitiba,PR,Municipal,Executivo,Interna, 51 | data.rio,,http://data.rio/,,,Rio de Janeiro,RJ,Municipal,Executivo,CKAN, 52 | Desafio Rio Ideias,,http://ideias.rioapps.com.br/,,,Rio de Janeiro,RJ,Municipal,Executivo,Interna, 53 | Dados Geográficos Abertos da Cidade do Rio de Janeiro,,http://portalgeo.pcrj.opendata.arcgis.com/,,,Rio de Janeiro,RJ,Municipal,Executivo,ArcGIS, 54 | Dados RS,,http://dados.rs.gov.br/,,,,RS,Estadual,Executivo,CKAN, 55 | Dados abertos governamentais – Secretaria de Segurança Pública,,http://www.ssp.rs.gov.br/?model=conteudo&menu=196,,,,RS,Estadual,Executivo,Interna, 56 | Dados abertos Novo Hamburgo,,http://dados.novohamburgo.rs.gov.br/,,,Novo Hamburgo,RS,Municipal,Executivo,CKAN, 57 | DataPOA,,http://www.datapoa.com.br/,,,Porto Alegre,RS,Municipal,Executivo,CKAN, 58 | Governo Aberto SP,,http://www.governoaberto.sp.gov.br/,,,,SP,Estadual,Executivo,Interna, 59 | Programa de Dados Abertos do Parlamento,,http://www.camara.sp.gov.br/transparencia/dados-abertos/,,,São Paulo,SP,Municipal,Legislativo,Interna, 60 | Catálogo Municipal de Bases de Dados,,http://transparencia.prefeitura.sp.gov.br/administracao/Paginas/cmbd.aspx,,,São Paulo,SP,Municipal,Executivo,Interna, 61 | Dados abertos – e-cidadania,,http://dadosabertos.senado.gov.br/,,,,,Federal,Legislativo,CKAN, 62 | Dados abertos da Câmara dos Deputados,,http://www2.camara.leg.br/transparencia/dados-abertos/,,,,,Federal,Legislativo,Interna, 63 | FEE dados abertos,,http://dados.fee.tche.br/,,,,RS,Estadual,Executivo,Interna, 64 | Dados Abertos TCE-RS,,http://dados.tce.rs.gov.br/,,,,RS,Estadual,Legislativo,CKAN, 65 | Dados Abertos MPRS,,http://dados.mprs.mp.br/,,,,RS,Estadual,,Interna, 66 | Portal da Transparência Municipal,,http://transparencia.tce.sp.gov.br/,,,,SP,Municipal,Legislativo,Interna,http://www.portaltransparencia.gov.br/ 67 | Universidade Federal do Rio,,,,,,,,,, 68 | Grande do Norte - UFRN,,http://dados.ufrn.br/,,,,RN,Nacional,Executivo,CKAN, 69 | Portal dos Dados Abertos TCE/RN,,http://apidadosabertos.tce.rn.gov.br/,,,,RN,Estadual,Legislativo,Interna, 70 | Portal de Dados Abertos da Cidade de São Paulo,,http://dados.prefeitura.sp.gov.br/,,,São Paulo,SP,Municipal,Executivo,CKAN, -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is execfile()d with the current directory set to its containing dir. 4 | # 5 | # Note that not all possible configuration values are present in this 6 | # autogenerated file. 7 | # 8 | # All configuration values have a default; values that are commented out 9 | # serve to show the default. 10 | 11 | import sys 12 | #from recommonmark.parser import CommonMarkParser 13 | 14 | #this is needed while the package is not installed 15 | sys.path.append('../bradata/') 16 | 17 | # If extensions (or modules to document with autodoc) are in another directory, 18 | # add these directories to sys.path here. If the directory is relative to the 19 | # documentation root, use os.path.abspath to make it absolute, like shown here. 20 | # sys.path.insert(0, os.path.abspath('.')) 21 | 22 | # -- Hack for ReadTheDocs ------------------------------------------------------ 23 | # This hack is necessary since RTD does not issue `sphinx-apidoc` before running 24 | # `sphinx-build -b html . _build/html`. See Issue: 25 | # https://github.com/rtfd/readthedocs.org/issues/1139 26 | # DON'T FORGET: Check the box "Install your project inside a virtualenv using 27 | # setup.py install" in the RTD Advanced Settings. 28 | import os 29 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 30 | if on_rtd: 31 | import inspect 32 | from sphinx import apidoc 33 | 34 | __location__ = os.path.join(os.getcwd(), os.path.dirname( 35 | inspect.getfile(inspect.currentframe()))) 36 | 37 | output_dir = os.path.join(__location__, "../docs/api") 38 | module_dir = os.path.join(__location__, "../bradata") 39 | cmd_line_template = "sphinx-apidoc -f -o {outputdir} {moduledir}" 40 | cmd_line = cmd_line_template.format(outputdir=output_dir, moduledir=module_dir) 41 | apidoc.main(cmd_line.split(" ")) 42 | 43 | # -- General configuration ----------------------------------------------------- 44 | 45 | # If your documentation needs a minimal Sphinx version, state it here. 46 | # needs_sphinx = '1.0' 47 | 48 | # Add any Sphinx extension module names here, as strings. They can be extensions 49 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 50 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 51 | 'sphinx.ext.autosummary', 'sphinx.ext.viewcode', 'sphinx.ext.coverage', 52 | 'sphinx.ext.doctest', 'sphinx.ext.ifconfig', 'sphinx.ext.imgmath', 53 | 'sphinx.ext.napoleon'] 54 | # Add any paths that contain templates here, relative to this directory. 55 | templates_path = ['_templates'] 56 | 57 | # to make markdown work. needs commonmark package 58 | """source_parsers = { 59 | '.md': 'recommonmark.parser.CommonMarkParser', 60 | }""" 61 | 62 | # The suffix of source filenames. 63 | source_suffix = ['.rst'] 64 | 65 | # The encoding of source files. 66 | # source_encoding = 'utf-8-sig' 67 | 68 | # The master toctree document. 69 | master_doc = 'index' 70 | 71 | # General information about the project. 72 | project = u'bradata' 73 | copyright = u'2017, AUTHORS' 74 | 75 | # The version info for the project you're documenting, acts as replacement for 76 | # |version| and |release|, also used in various other places throughout the 77 | # built documents. 78 | # 79 | # The short X.Y version. 80 | version = '' # Is set by calling `setup.py docs` 81 | # The full version, including alpha/beta/rc tags. 82 | release = '' # Is set by calling `setup.py docs` 83 | 84 | # The language for content autogenerated by Sphinx. Refer to documentation 85 | # for a list of supported languages. 86 | # language = None 87 | 88 | # There are two options for replacing |today|: either, you set today to some 89 | # non-false value, then it is used: 90 | # today = '' 91 | # Else, today_fmt is used as the format for a strftime call. 92 | # today_fmt = '%B %d, %Y' 93 | 94 | # List of patterns, relative to source directory, that match files and 95 | # directories to ignore when looking for source files. 96 | exclude_patterns = ['_build'] 97 | 98 | # The reST default role (used for this markup: `text`) to use for all documents. 99 | # default_role = None 100 | 101 | # If true, '()' will be appended to :func: etc. cross-reference text. 102 | # add_function_parentheses = True 103 | 104 | # If true, the current module name will be prepended to all description 105 | # unit titles (such as .. function::). 106 | # add_module_names = True 107 | 108 | # If true, sectionauthor and moduleauthor directives will be shown in the 109 | # output. They are ignored by default. 110 | # show_authors = False 111 | 112 | # The name of the Pygments (syntax highlighting) style to use. 113 | pygments_style = 'sphinx' 114 | 115 | # A list of ignored prefixes for module index sorting. 116 | # modindex_common_prefix = [] 117 | 118 | # If true, keep warnings as "system message" paragraphs in the built documents. 119 | # keep_warnings = False 120 | 121 | 122 | # -- Options for HTML output --------------------------------------------------- 123 | 124 | # The theme to use for HTML and HTML Help pages. See the documentation for 125 | # a list of builtin themes. 126 | html_theme = 'alabaster' 127 | 128 | # Theme options are theme-specific and customize the look and feel of a theme 129 | # further. For a list of options available for each theme, see the 130 | # documentation. 131 | # html_theme_options = {} 132 | 133 | # Add any paths that contain custom themes here, relative to this directory. 134 | # html_theme_path = [] 135 | 136 | # The name for this set of Sphinx documents. If None, it defaults to 137 | # " v documentation". 138 | try: 139 | from bradata import __version__ as version 140 | except ImportError: 141 | pass 142 | else: 143 | release = version 144 | 145 | # A shorter title for the navigation bar. Default is the same as html_title. 146 | # html_short_title = None 147 | 148 | # The name of an image file (relative to this directory) to place at the top 149 | # of the sidebar. 150 | # html_logo = "" 151 | 152 | # The name of an image file (within the static path) to use as favicon of the 153 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 154 | # pixels large. 155 | # html_favicon = None 156 | 157 | # Add any paths that contain custom static files (such as style sheets) here, 158 | # relative to this directory. They are copied after the builtin static files, 159 | # so a file named "default.css" will overwrite the builtin "default.css". 160 | html_static_path = ['_static'] 161 | 162 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 163 | # using the given strftime format. 164 | # html_last_updated_fmt = '%b %d, %Y' 165 | 166 | # If true, SmartyPants will be used to convert quotes and dashes to 167 | # typographically correct entities. 168 | # html_use_smartypants = True 169 | 170 | # Custom sidebar templates, maps document names to template names. 171 | # html_sidebars = {} 172 | 173 | # Additional templates that should be rendered to pages, maps page names to 174 | # template names. 175 | # html_additional_pages = {} 176 | 177 | # If false, no module index is generated. 178 | # html_domain_indices = True 179 | 180 | # If false, no index is generated. 181 | # html_use_index = True 182 | 183 | # If true, the index is split into individual pages for each letter. 184 | # html_split_index = False 185 | 186 | # If true, links to the reST sources are added to the pages. 187 | # html_show_sourcelink = True 188 | 189 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 190 | # html_show_sphinx = True 191 | 192 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 193 | # html_show_copyright = True 194 | 195 | # If true, an OpenSearch description file will be output, and all pages will 196 | # contain a tag referring to it. The value of this option must be the 197 | # base URL from which the finished HTML is served. 198 | # html_use_opensearch = '' 199 | 200 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 201 | # html_file_suffix = None 202 | 203 | # Output file base name for HTML help builder. 204 | htmlhelp_basename = 'bradata-doc' 205 | 206 | 207 | # -- Options for LaTeX output -------------------------------------------------- 208 | 209 | latex_elements = { 210 | # The paper size ('letterpaper' or 'a4paper'). 211 | # 'papersize': 'letterpaper', 212 | 213 | # The font size ('10pt', '11pt' or '12pt'). 214 | # 'pointsize': '10pt', 215 | 216 | # Additional stuff for the LaTeX preamble. 217 | # 'preamble': '', 218 | } 219 | 220 | # Grouping the document tree into LaTeX files. List of tuples 221 | # (source start file, target name, title, author, documentclass [howto/manual]). 222 | latex_documents = [ 223 | ('index', 'user_guide.tex', u'bradata Documentation', 224 | u'odanoburu', 'manual'), 225 | ] 226 | 227 | # The name of an image file (relative to this directory) to place at the top of 228 | # the title page. 229 | # latex_logo = "" 230 | 231 | # For "manual" documents, if this is true, then toplevel headings are parts, 232 | # not chapters. 233 | # latex_use_parts = False 234 | 235 | # If true, show page references after internal links. 236 | # latex_show_pagerefs = False 237 | 238 | # If true, show URL addresses after external links. 239 | # latex_show_urls = False 240 | 241 | # Documents to append as an appendix to all manuals. 242 | # latex_appendices = [] 243 | 244 | # If false, no module index is generated. 245 | # latex_domain_indices = True 246 | 247 | # -- External mapping ------------------------------------------------------------ 248 | python_version = '.'.join(map(str, sys.version_info[0:2])) 249 | intersphinx_mapping = { 250 | 'sphinx': ('http://sphinx.pocoo.org', None), 251 | 'python': ('http://docs.python.org/' + python_version, None), 252 | 'matplotlib': ('http://matplotlib.sourceforge.net', None), 253 | 'numpy': ('http://docs.scipy.org/doc/numpy', None), 254 | 'sklearn': ('http://scikit-learn.org/stable', None), 255 | 'pandas': ('http://pandas.pydata.org/pandas-docs/stable', None), 256 | 'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None), 257 | } 258 | --------------------------------------------------------------------------------