├── pysus ├── utilities │ ├── __init__.py │ └── brasil.py ├── metadata │ └── SINAN │ │ ├── ANIM.tar.gz │ │ ├── BOTU.tar.gz │ │ ├── CHAG.tar.gz │ │ ├── CHIK.tar.gz │ │ ├── COLE.tar.gz │ │ ├── COQU.tar.gz │ │ ├── DENG.tar.gz │ │ ├── DIFT.tar.gz │ │ ├── ESQU.tar.gz │ │ ├── FAMA.tar.gz │ │ ├── FMAC.tar.gz │ │ ├── FTIF.tar.gz │ │ ├── HANS.tar.gz │ │ ├── HANT.tar.gz │ │ ├── HEPA.tar.gz │ │ ├── IEXO.tar.gz │ │ ├── LEIV.tar.gz │ │ ├── LEPT.tar.gz │ │ ├── LTAN.tar.gz │ │ ├── MALA.tar.gz │ │ ├── MENI.tar.gz │ │ ├── PEST.tar.gz │ │ ├── RAIV.tar.gz │ │ ├── SIFC.tar.gz │ │ ├── SIFG.tar.gz │ │ ├── TETA.tar.gz │ │ ├── TETN.tar.gz │ │ └── TUBE.tar.gz ├── online_data │ ├── __init__.py │ ├── territory.py │ ├── Infogripe.py │ ├── CIHA.py │ ├── PNI.py │ ├── SINASC.py │ ├── SINAN.py │ ├── SIH.py │ ├── ESUS.py │ ├── SIA.py │ ├── Infodengue.py │ ├── CNES.py │ └── vaccine.py ├── tests │ ├── test_data │ │ ├── __init__.py │ │ ├── EPR-2016-06-01-2016.dbf │ │ ├── test_Infogripe.py │ │ ├── test_vaccine.py │ │ └── test_Infodengue.py │ ├── __init__.py │ ├── test_esus.py │ ├── test_utilities.py │ └── test_ibge.py ├── preprocessing │ ├── __init__.py │ ├── ESUS.py │ └── sinan.py ├── ftp │ ├── databases │ │ ├── __init__.py │ │ ├── sim.py │ │ ├── sinasc.py │ │ ├── ibge_datasus.py │ │ ├── pni.py │ │ ├── sih.py │ │ ├── ciha.py │ │ ├── sia.py │ │ ├── cnes.py │ │ └── sinan.py │ └── utils.py ├── __init__.py └── data │ ├── local.py │ └── __init__.py ├── docs ├── requirements.txt └── source │ ├── data │ └── IT_SIHSUS_1603.pdf │ ├── tutorials │ └── tutorials.rst │ ├── index.rst │ ├── locale │ ├── pt │ │ └── LC_MESSAGES │ │ │ ├── Chikungunya.po │ │ │ ├── Analyzing SIA.po │ │ │ ├── SIM.po │ │ │ ├── PNI.po │ │ │ ├── data-sources.po │ │ │ ├── Infogripe.po │ │ │ ├── Dengue.po │ │ │ ├── Zika.po │ │ │ ├── ESUS.po │ │ │ ├── index.po │ │ │ ├── Infodengue.po │ │ │ ├── IBGE_data.po │ │ │ ├── tutorials.po │ │ │ └── SINAN.po │ └── pt_BR │ │ └── LC_MESSAGES │ │ ├── Chikungunya.po │ │ ├── Analyzing SIA.po │ │ ├── SIM.po │ │ ├── PNI.po │ │ ├── data-sources.po │ │ ├── Infogripe.po │ │ ├── Dengue.po │ │ ├── Zika.po │ │ ├── ESUS.po │ │ ├── index.po │ │ ├── Infodengue.po │ │ ├── IBGE_data.po │ │ ├── tutorials.po │ │ └── SINAN.po │ ├── kepler_config.json │ ├── databases │ ├── Utilities.ipynb │ └── data-sources.rst │ └── rio.html ├── MANIFEST.in ├── docker ├── scripts │ ├── poetry-install.sh │ └── entrypoint.sh ├── docker-compose.yaml └── Dockerfile ├── conda └── dev.yaml ├── .idea ├── misc.xml └── PySUS.iml ├── readthedocs.yaml ├── setup.cfg ├── .github ├── FUNDING.yml └── workflows │ ├── python-package.yml │ └── release.yaml ├── .pre-commit-config.yaml ├── condarecipe └── pysus │ └── meta.yaml ├── Makefile ├── pyproject.toml ├── .releaserc.json └── .gitignore /pysus/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | nbsphinx 2 | sphinx 3 | sphinx-rtd-theme 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include pysus *.c *.h 2 | include pysus/utilities/* 3 | include requirements.txt 4 | -------------------------------------------------------------------------------- /pysus/metadata/SINAN/ANIM.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/ANIM.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/BOTU.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/BOTU.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/CHAG.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/CHAG.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/CHIK.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/CHIK.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/COLE.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/COLE.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/COQU.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/COQU.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/DENG.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/DENG.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/DIFT.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/DIFT.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/ESQU.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/ESQU.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/FAMA.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/FAMA.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/FMAC.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/FMAC.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/FTIF.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/FTIF.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/HANS.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/HANS.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/HANT.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/HANT.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/HEPA.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/HEPA.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/IEXO.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/IEXO.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/LEIV.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/LEIV.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/LEPT.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/LEPT.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/LTAN.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/LTAN.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/MALA.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/MALA.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/MENI.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/MENI.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/PEST.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/PEST.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/RAIV.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/RAIV.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/SIFC.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/SIFC.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/SIFG.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/SIFG.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/TETA.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/TETA.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/TETN.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/TETN.tar.gz -------------------------------------------------------------------------------- /pysus/metadata/SINAN/TUBE.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/TUBE.tar.gz -------------------------------------------------------------------------------- /pysus/online_data/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 21/09/18 3 | by fccoelho 4 | license: GPL V3 or Later 5 | """ 6 | -------------------------------------------------------------------------------- /pysus/tests/test_data/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 12/12/18 3 | by fccoelho 4 | license: GPL V3 or Later 5 | """ 6 | -------------------------------------------------------------------------------- /docs/source/data/IT_SIHSUS_1603.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/docs/source/data/IT_SIHSUS_1603.pdf -------------------------------------------------------------------------------- /pysus/tests/test_data/EPR-2016-06-01-2016.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/tests/test_data/EPR-2016-06-01-2016.dbf -------------------------------------------------------------------------------- /docker/scripts/poetry-install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | poetry config virtualenvs.create false 6 | poetry install --without geo 7 | -------------------------------------------------------------------------------- /pysus/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on 19/07/16 3 | by fccoelho 4 | license: GPL V3 or Later 5 | """ 6 | 7 | __docformat__ = "restructuredtext en" 8 | -------------------------------------------------------------------------------- /pysus/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Created on 19/07/16 4 | by fccoelho 5 | license: GPL V3 or Later 6 | """ 7 | 8 | __docformat__ = "restructuredtext en" 9 | -------------------------------------------------------------------------------- /conda/dev.yaml: -------------------------------------------------------------------------------- 1 | name: pysus 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - docker-compose 7 | - python>=3.10,<3.14 8 | - jupyter 9 | - make 10 | - poetry 11 | - pip 12 | -------------------------------------------------------------------------------- /docker/scripts/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | jupyter_lab_path=$(which jupyter) 4 | 5 | if [ -z "$jupyter_lab_path" ]; then 6 | echo "Jupyter not found" 7 | exit 1 8 | fi 9 | 10 | $jupyter_lab_path lab --browser='firefox' --allow-root --NotebookApp.token='' --NotebookApp.password='' 11 | -------------------------------------------------------------------------------- /pysus/ftp/databases/__init__.py: -------------------------------------------------------------------------------- 1 | from .ciha import * # noqa 2 | from .cnes import * # noqa 3 | from .ibge_datasus import * # noqa 4 | from .pni import * # noqa 5 | from .sia import * # noqa 6 | from .sih import * # noqa 7 | from .sim import * # noqa 8 | from .sinan import * # noqa 9 | from .sinasc import * # noqa 10 | -------------------------------------------------------------------------------- /pysus/tests/test_esus.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pytest 4 | from pysus.online_data.ESUS import download 5 | 6 | 7 | class MyTestCase(unittest.TestCase): 8 | @pytest.mark.skip(reason="This test takes too long") 9 | @pytest.mark.timeout(5) 10 | def test_download(self): 11 | df = download(uf="se") 12 | self.assertGreater(len(df), 0) 13 | 14 | 15 | if __name__ == "__main__": 16 | unittest.main() 17 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 10 | 11 | -------------------------------------------------------------------------------- /docker/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | services: 3 | jupyter: 4 | build: 5 | context: ".." 6 | dockerfile: docker/Dockerfile 7 | hostname: pysus-jupyter 8 | container_name: pysus-jupyter 9 | privileged: true 10 | environment: 11 | - DISPLAY=:0 12 | - CI=${CI:-0} 13 | volumes: 14 | - /tmp/.X11-unix:/tmp/.X11-unix 15 | entrypoint: ["/entrypoint.sh"] 16 | command: ["/usr/bin/firefox"] 17 | -------------------------------------------------------------------------------- /pysus/__init__.py: -------------------------------------------------------------------------------- 1 | # type: ignore[attr-defined] 2 | """PySUS Python package""" 3 | 4 | from importlib import metadata as importlib_metadata 5 | 6 | from pysus.ftp.databases import * # noqa 7 | 8 | 9 | def get_version() -> str: 10 | try: 11 | return importlib_metadata.version(__name__) 12 | except importlib_metadata.PackageNotFoundError: # pragma: no cover 13 | return "1.0.0" # changed by semantic-release 14 | 15 | 16 | version: str = get_version() 17 | __version__: str = version 18 | -------------------------------------------------------------------------------- /pysus/tests/test_data/test_Infogripe.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pytest 4 | from pysus.online_data.Infogripe import DATASETS, download 5 | 6 | 7 | class InfoGripeTestCase(unittest.TestCase): 8 | @pytest.mark.skip(reason="This test takes too long") 9 | @pytest.mark.timeout(5) 10 | def test_download(self): 11 | for ds in DATASETS.keys(): 12 | df = download(ds) 13 | self.assertGreater(len(df), 0) # add assertion here 14 | 15 | 16 | if __name__ == "__main__": 17 | unittest.main() 18 | -------------------------------------------------------------------------------- /pysus/tests/test_data/test_vaccine.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | import pandas as pd 5 | import pytest 6 | from pysus.online_data.vaccine import download_covid 7 | 8 | 9 | class VaccineTestCase(unittest.TestCase): 10 | @pytest.mark.timeout(15) 11 | @unittest.skipIf(os.getenv("CI"), "Forbidden on CI") 12 | def test_Download(self): 13 | df = download_covid("BA", only_header=True) 14 | self.assertIsInstance(df, pd.DataFrame) 15 | self.assertEqual(df.shape, (10000, 42)) 16 | 17 | 18 | if __name__ == "__main__": 19 | unittest.main() 20 | -------------------------------------------------------------------------------- /pysus/tests/test_utilities.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pytest 4 | from pysus.utilities.brasil import get_city_name_by_geocode 5 | 6 | 7 | class TestGetMunNameByGeocode(unittest.TestCase): 8 | @pytest.mark.timeout(5) 9 | def test_get_mun_name_by_geocode(self): 10 | rio = get_city_name_by_geocode(3304557) 11 | self.assertEqual(rio, "Rio de Janeiro") 12 | 13 | vale = get_city_name_by_geocode(1101757) 14 | self.assertEqual(vale, "Vale do Anari") 15 | 16 | santa_helena = get_city_name_by_geocode(5219308) 17 | self.assertEqual(santa_helena, "Santa Helena de Goiás") 18 | 19 | 20 | if __name__ == "__main__": 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /docs/source/tutorials/tutorials.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Tutorials 3 | ========= 4 | 5 | PySUS includes some Jupyter notebooks in its distribution package to serve as tutorials. 6 | 7 | 8 | Preprocessing DATASUS data 9 | -------------------------- 10 | #. :doc:`Preprocessing SINAN` 11 | #. :doc:`Preprocessing SIM` 12 | #. :doc:`Preprocessing SIM with municipality` 13 | 14 | 15 | Infodengue 16 | ---------- 17 | #. :doc:`Infodengue` 18 | 19 | 20 | Infogripe 21 | ---------- 22 | #. :doc:`Infogripe` 23 | 24 | 25 | IBGE Data 26 | --------- 27 | #. :doc:`IBGE_data` 28 | 29 | 30 | Dengue, Zika, Chikungunya 31 | ------------------------- 32 | #. :doc:`Dengue` 33 | #. :doc:`Zika` 34 | #. :doc:`Chikungunya` 35 | -------------------------------------------------------------------------------- /readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-20.04 11 | tools: 12 | python: "3.11" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/source/conf.py 17 | 18 | # If using Sphinx, optionally build your docs in additional formats such as PDF 19 | # formats: 20 | # - pdf 21 | 22 | # Optionally declare the Python requirements required to build your docs 23 | python: 24 | install: 25 | - requirements: docs/requirements.txt 26 | -------------------------------------------------------------------------------- /.idea/PySUS.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | 17 | 18 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE 3 | 4 | [options] 5 | packages = find: 6 | include = 7 | pysus 8 | 9 | [build_sphinx] 10 | project = 'PySUS' 11 | version = 0.6 12 | release = 0.6.3 13 | source-dir = ./docs/source 14 | 15 | [flake8] 16 | exclude = tests,build,dist,docs,.git,__pycache__,.tox,.eggs,*.egg,.asv 17 | max-line-length = 79 18 | ignore = D202,D203,W503,E203 19 | 20 | [isort] 21 | known_third_party = dbfread,elasticsearch,geobr,geocoder,numpy,pandas,pyarrow,pyreaddbc,requests,tqdm,urllib3 22 | ensure_newline_before_comments=true 23 | line_length = 79 24 | multi_line_output = 3 25 | include_trailing_comma = true 26 | skip = docs/ 27 | 28 | [aliases] 29 | test = pytest 30 | 31 | [tool:pytest] 32 | addopts = --ignore=setup.py 33 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. PySUS documentation master file, created by 2 | sphinx-quickstart on Thu Aug 25 10:37:19 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to PySUS documentation! 7 | ================================= 8 | 9 | PySUS is a collection of helper codes to download & analyze data from `DATASUS `_ (Brazilian Universal Health System). Contributions are welcome! 10 | 11 | Contents: 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | 16 | Data Sources 17 | Tutorials 18 | 19 | 20 | Indices and tables 21 | ================== 22 | 23 | * :ref:`genindex` 24 | * :ref:`modindex` 25 | * :ref:`search` 26 | -------------------------------------------------------------------------------- /pysus/online_data/territory.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | from pysus.ftp import CACHEPATH, Directory, File 4 | 5 | 6 | def list_tables() -> List[File]: 7 | d = Directory("/territorio/tabelas") 8 | tabelas = [f for f in d.content if "territor" in f.name] 9 | return tabelas 10 | 11 | 12 | def list_maps() -> List[File]: 13 | d = Directory("/territorio/mapas") 14 | mapas = [f for f in d.content if "mapas" in f.name] 15 | return mapas 16 | 17 | 18 | def download(fname: Union[str, list], data_path: str = CACHEPATH): 19 | files = ( 20 | Directory("/territorio/tabelas").content 21 | + Directory("/territorio/mapas").content 22 | ) 23 | for file in files: 24 | if fname in [str(file), file.name]: 25 | # handles suffixed and no suffixed `fname`s 26 | return file.download() 27 | -------------------------------------------------------------------------------- /pysus/online_data/Infogripe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Downloads data made available by the Infogripe service 3 | """ 4 | 5 | import pandas as pd 6 | 7 | BASEURL = r"https://gitlab.fiocruz.br/marcelo.gomes/infogripe/-/raw/master/Dados/InfoGripe/" # noqa 8 | DATASETS = { 9 | "Alerta de situação": r"tabela_de_alerta.csv", 10 | "Casos por idade, sexo e virus": r"dados_semanais_faixa_etaria_sexo_virus.csv.gz", # noqa 11 | "Casos Totais e estimativas": r"serie_temporal_com_estimativas_recentes.csv.gz", # noqa 12 | "Valores esperados por localidades": "valores_esperados_por_localidade.csv", # noqa 13 | } 14 | 15 | 16 | def list_datasets(): 17 | return list(DATASETS.keys()) 18 | 19 | 20 | def download(dataset_name): 21 | url = BASEURL + DATASETS[dataset_name] + "?inline=false" 22 | df = pd.read_csv(url, delimiter=";", decimal=",") 23 | return df 24 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [fccoelho] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/Chikungunya.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Chikungunya.ipynb:9 23 | msgid "Analyzing Chikungunya data" 24 | msgstr "Analyzando dados de Chikungunya" 25 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/Chikungunya.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Chikungunya.ipynb:9 23 | msgid "Analyzing Chikungunya data" 24 | msgstr "Analyzando dados de Chikungunya" 25 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_stages: [commit, push] 2 | 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.1.0 6 | hooks: 7 | - id: end-of-file-fixer 8 | 9 | - repo: local 10 | hooks: 11 | - entry: black 12 | id: black 13 | name: black 14 | exclude: | 15 | (?x)( 16 | docs 17 | ) 18 | files: "" 19 | language: system 20 | pass_filenames: true 21 | types: 22 | - python 23 | - file 24 | - python 25 | 26 | - entry: flake8 27 | exclude: ^$ 28 | files: "" 29 | id: flake8 30 | language: python 31 | name: flake8 32 | pass_filenames: true 33 | types: 34 | - python 35 | 36 | - entry: isort 37 | exclude: "^.*/js/.*$" 38 | files: "" 39 | id: isort 40 | language: python 41 | name: isort 42 | pass_filenames: true 43 | types: 44 | - python 45 | -------------------------------------------------------------------------------- /pysus/ftp/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import Union 3 | 4 | from pysus.ftp import to_list 5 | from pysus.utilities.brasil import MONTHS, UFs # noqa 6 | 7 | 8 | def zfill_year(year: Union[str, int]) -> int: 9 | """ 10 | Formats a len(2) year into len(4) with the correct year preffix 11 | E.g: 20 -> 2020; 99 -> 1999 12 | """ 13 | year = str(year)[-2:].zfill(2) 14 | current_year = str(datetime.datetime.now().year)[-2:] 15 | suffix = "19" if str(year) > current_year else "20" 16 | return int(suffix + str(year)) 17 | 18 | 19 | def parse_UFs(UF: Union[list[str], str]) -> list: 20 | """ 21 | Formats states abbreviations into correct format and retuns a list. 22 | Also checks if there is an incorrect UF in the list. 23 | E.g: ['SC', 'mt', 'ba'] -> ['SC', 'MT', 'BA'] 24 | """ 25 | ufs = [uf.upper() for uf in to_list(UF)] 26 | if not all(uf in list(UFs) for uf in ufs): 27 | raise ValueError(f"Unknown UF(s): {set(ufs).difference(list(UFs))}") 28 | return ufs 29 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | tests: 7 | runs-on: ubuntu-latest 8 | timeout-minutes: 15 9 | 10 | defaults: 11 | run: 12 | shell: bash -l {0} 13 | 14 | strategy: 15 | matrix: 16 | python_version: ["3.10", "3.11", "3.12", "3.13"] 17 | 18 | concurrency: 19 | group: ci-tests-${{ matrix.python_version }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - uses: conda-incubator/setup-miniconda@v3 26 | with: 27 | miniforge-version: latest 28 | environment-file: conda/dev.yaml 29 | channels: conda-forge,nodefaults 30 | activate-environment: pysus 31 | auto-update-conda: true 32 | conda-solver: libmamba 33 | 34 | - name: Run jupyterlab with PySUS 35 | run: | 36 | make run-jupyter-pysus 37 | # make test-jupyter-pysus ## takes too long 38 | 39 | - name: Linting & Tests 40 | run: | 41 | export CI=1 42 | poetry install 43 | pre-commit run --all-files 44 | make test-pysus 45 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/Analyzing SIA.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Analyzing SIA.ipynb:21 23 | msgid "Dataset types" 24 | msgstr "Tipos de Datasets" 25 | 26 | #: ../../source/Analyzing SIA.ipynb:23 27 | msgid "" 28 | "The SIA Information system contains multiple types of datasets we can " 29 | "download with PySUS. These are:" 30 | msgstr "" 31 | "No banco de dados SIA é possível encontrar diferentes grupos de dados que " 32 | "podem ser extraídos com o PySUS. São eles:" 33 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/Analyzing SIA.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Analyzing SIA.ipynb:21 23 | msgid "Dataset types" 24 | msgstr "Tipos de Datasets" 25 | 26 | #: ../../source/Analyzing SIA.ipynb:23 27 | msgid "" 28 | "The SIA Information system contains multiple types of datasets we can " 29 | "download with PySUS. These are:" 30 | msgstr "" 31 | "No banco de dados SIA é possível encontrar diferentes grupos de dados que " 32 | "podem ser extraídos com o PySUS. São eles:" 33 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ main ] 7 | pull_request: 8 | branches: [ main ] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | 14 | defaults: 15 | run: 16 | shell: bash -l {0} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: conda-incubator/setup-miniconda@v3 22 | with: 23 | miniforge-version: latest 24 | environment-file: conda/dev.yaml 25 | channels: conda-forge,nodefaults 26 | activate-environment: pysus 27 | auto-update-conda: true 28 | conda-solver: libmamba 29 | 30 | - uses: actions/setup-node@v3 31 | with: 32 | node-version: 20.11.0 33 | 34 | - name: Test release 35 | if: ${{ github.event_name != 'workflow_dispatch' }} 36 | env: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | run: make release-dry 39 | 40 | - name: Release 41 | if: ${{ github.event_name == 'workflow_dispatch' }} 42 | env: 43 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 44 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 45 | run: | 46 | poetry config pypi-token.pypi ${PYPI_TOKEN} 47 | make release 48 | -------------------------------------------------------------------------------- /condarecipe/pysus/meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "PySUS" %} 2 | {% set version = "0.5.14" %} 3 | 4 | package: 5 | name: "{{ name|lower }}" 6 | version: "{{ version }}" 7 | 8 | source: 9 | url: "https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz" 10 | sha256: b5215bf4bf2afb4f9d552deab717df3ab26aeed8dc9750434dde285f227f045d 11 | 12 | build: 13 | number: 0 14 | script: "{{ PYTHON }} -m pip install . -vv" 15 | 16 | requirements: 17 | host: 18 | - cffi >=1.0.0 19 | - dbfread 20 | - fastparquet 21 | - geocoder 22 | - pandas 23 | - pip 24 | - python 25 | - requests 26 | run: 27 | - cffi >=1.0.0 28 | - dbfread 29 | - fastparquet 30 | - geocoder 31 | - pandas 32 | - pyarrow 33 | - python 34 | - requests 35 | - elasticsearch 36 | 37 | test: 38 | imports: 39 | - pysus 40 | - pysus.demography 41 | - pysus.online_data 42 | - pysus.preprocessing 43 | - pysus.tests 44 | - pysus.tests.test_data 45 | 46 | about: 47 | home: "https://github.com/fccoelho/PySUS" 48 | license: gpl-v3 49 | license_family: GPL3 50 | license_file: 51 | summary: "Tools for dealing with Brazil's Public health data" 52 | doc_url: 53 | dev_url: 54 | 55 | extra: 56 | recipe-maintainers: 57 | - fccoelho 58 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/SIM.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/SIM.ipynb:9 23 | msgid "Downloading data from SIM" 24 | msgstr "Extraindo dados do Sistema de Informação sobre Mortalidade (SIM)" 25 | 26 | #: ../../source/SIM.ipynb:11 27 | msgid "" 28 | "In this notebook we will use PySUS to download and treat mortality data " 29 | "from SIM." 30 | msgstr "" 31 | "Neste notebook, usaremos o PySUS para baixar e tratar os dados de " 32 | "mortalidade do SIM." 33 | 34 | #: ../../source/SIM.ipynb:822 35 | msgid "Humanizing some of the encoded variables." 36 | msgstr "Humanizando algumas das variáveis codificadas." 37 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/SIM.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/SIM.ipynb:9 23 | msgid "Downloading data from SIM" 24 | msgstr "Extraindo dados do Sistema de Informação sobre Mortalidade (SIM)" 25 | 26 | #: ../../source/SIM.ipynb:11 27 | msgid "" 28 | "In this notebook we will use PySUS to download and treat mortality data " 29 | "from SIM." 30 | msgstr "" 31 | "Neste notebook, usaremos o PySUS para baixar e tratar os dados de " 32 | "mortalidade do SIM." 33 | 34 | #: ../../source/SIM.ipynb:822 35 | msgid "Humanizing some of the encoded variables." 36 | msgstr "Humanizando algumas das variáveis codificadas." 37 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/PNI.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/PNI.ipynb:9 23 | msgid "Downloading data from the National Immunization program (PNI)" 24 | msgstr "Extraindo dados do Programa Nacional de Imunização (PNI)" 25 | 26 | #: ../../source/PNI.ipynb:30 27 | msgid "" 28 | "We can start by checking the data available for a given state, for " 29 | "example, Rio de Janeiro:" 30 | msgstr "" 31 | "Podemos começar verificando os dados disponíveis para um determinado " 32 | "estado, por exemplo, o Rio de Janeiro:" 33 | 34 | #: ../../source/PNI.ipynb:151 35 | msgid "Then we can fetch data from a particular year:" 36 | msgstr "Em seguida, podemos extrair os dados de um ano específico:" 37 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/PNI.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/PNI.ipynb:9 23 | msgid "Downloading data from the National Immunization program (PNI)" 24 | msgstr "Extraindo dados do Programa Nacional de Imunização (PNI)" 25 | 26 | #: ../../source/PNI.ipynb:30 27 | msgid "" 28 | "We can start by checking the data available for a given state, for " 29 | "example, Rio de Janeiro:" 30 | msgstr "" 31 | "Podemos começar verificando os dados disponíveis para um determinado " 32 | "estado, por exemplo, o Rio de Janeiro:" 33 | 34 | #: ../../source/PNI.ipynb:151 35 | msgid "Then we can fetch data from a particular year:" 36 | msgstr "Em seguida, podemos extrair os dados de um ano específico:" 37 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/data-sources.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/data-sources.rst:3 23 | msgid "Data Sources" 24 | msgstr "Bancos de Dados" 25 | 26 | #: ../../source/data-sources.rst:5 27 | msgid "" 28 | "PySUS allows you to download data on demand from DATASUS databases. " 29 | "Currently, the following databases can be downloaded:" 30 | msgstr "" 31 | "O PySUS permite a extração de dados das base de dados do DATASUS. " 32 | "Atualmente, as seguintes base de dados podem ser extraídas:" 33 | 34 | #: ../../source/data-sources.rst:8 35 | msgid "SINAN" 36 | msgstr "SINAN" 37 | 38 | #: ../../source/data-sources.rst:9 39 | msgid "SINASC" 40 | msgstr "SINASC" 41 | 42 | #: ../../source/data-sources.rst:10 43 | msgid "SIM" 44 | msgstr "SIM" 45 | 46 | #: ../../source/data-sources.rst:11 47 | msgid "SIH" 48 | msgstr "SIH" 49 | 50 | #: ../../source/data-sources.rst:12 51 | msgid "SIA" 52 | msgstr "SIA" 53 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/data-sources.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/data-sources.rst:3 23 | msgid "Data Sources" 24 | msgstr "Bancos de Dados" 25 | 26 | #: ../../source/data-sources.rst:5 27 | msgid "" 28 | "PySUS allows you to download data on demand from DATASUS databases. " 29 | "Currently, the following databases can be downloaded:" 30 | msgstr "" 31 | "O PySUS permite a extração de dados das base de dados do DATASUS. " 32 | "Atualmente, as seguintes base de dados podem ser extraídas:" 33 | 34 | #: ../../source/data-sources.rst:8 35 | msgid "SINAN" 36 | msgstr "SINAN" 37 | 38 | #: ../../source/data-sources.rst:9 39 | msgid "SINASC" 40 | msgstr "SINASC" 41 | 42 | #: ../../source/data-sources.rst:10 43 | msgid "SIM" 44 | msgstr "SIM" 45 | 46 | #: ../../source/data-sources.rst:11 47 | msgid "SIH" 48 | msgstr "SIH" 49 | 50 | #: ../../source/data-sources.rst:12 51 | msgid "SIA" 52 | msgstr "SIA" 53 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/Infogripe.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Infogripe.ipynb:9 23 | msgid "Working with Infogripe datasets" 24 | msgstr "Trabalhando com a base de dados Infogripe" 25 | 26 | #: ../../source/Infogripe.ipynb:11 27 | msgid "" 28 | "`Infogripe `__ is an online platform that " 29 | "tracks epidemiolgical data about influenza-like diseases in Brazil." 30 | msgstr "" 31 | "O `Infogripe `__ é uma plataforma online " 32 | "que acompanha dados epidemiológicos sobre doenças semelhantes à gripe no Brasil." 33 | 34 | #: ../../source/Infogripe.ipynb:33 35 | msgid "" 36 | "Infogripe makes available different datasets. To findout which ones are " 37 | "available before downloading we can ask PySUS to list them:" 38 | msgstr "" 39 | "O Infogripe disponibiliza diferentes conjuntos de dados. Para descobrir " 40 | "quais estão disponíveis antes de baixá-los, podemos solicitar ao PySUS que os liste:" 41 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/Infogripe.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Infogripe.ipynb:9 23 | msgid "Working with Infogripe datasets" 24 | msgstr "Trabalhando com a base de dados Infogripe" 25 | 26 | #: ../../source/Infogripe.ipynb:11 27 | msgid "" 28 | "`Infogripe `__ is an online platform that " 29 | "tracks epidemiolgical data about influenza-like diseases in Brazil." 30 | msgstr "" 31 | "O `Infogripe `__ é uma plataforma online " 32 | "que acompanha dados epidemiológicos sobre doenças semelhantes à gripe no Brasil." 33 | 34 | #: ../../source/Infogripe.ipynb:33 35 | msgid "" 36 | "Infogripe makes available different datasets. To findout which ones are " 37 | "available before downloading we can ask PySUS to list them:" 38 | msgstr "" 39 | "O Infogripe disponibiliza diferentes conjuntos de dados. Para descobrir " 40 | "quais estão disponíveis antes de baixá-los, podemos solicitar ao PySUS que os liste:" 41 | -------------------------------------------------------------------------------- /pysus/utilities/brasil.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | from typing import Union 4 | 5 | with open( 6 | f"{Path(__file__).parent}/municipios.json", "r", encoding="utf-8-sig" 7 | ) as muns: 8 | MUNICIPALITIES = json.loads(muns.read()) 9 | 10 | MUN_BY_GEOCODE = {mun["geocodigo"]: mun["municipio"] for mun in MUNICIPALITIES} 11 | 12 | 13 | UFs = { 14 | "BR": "Brasil", 15 | "AC": "Acre", 16 | "AL": "Alagoas", 17 | "AP": "Amapá", 18 | "AM": "Amazonas", 19 | "BA": "Bahia", 20 | "CE": "Ceará", 21 | "ES": "Espírito Santo", 22 | "GO": "Goiás", 23 | "MA": "Maranhão", 24 | "MT": "Mato Grosso", 25 | "MS": "Mato Grosso do Sul", 26 | "MG": "Minas Gerais", 27 | "PA": "Pará", 28 | "PB": "Paraíba", 29 | "PR": "Paraná", 30 | "PE": "Pernambuco", 31 | "PI": "Piauí", 32 | "RJ": "Rio de Janeiro", 33 | "RN": "Rio Grande do Norte", 34 | "RS": "Rio Grande do Sul", 35 | "RO": "Rondônia", 36 | "RR": "Roraima", 37 | "SC": "Santa Catarina", 38 | "SP": "São Paulo", 39 | "SE": "Sergipe", 40 | "TO": "Tocantins", 41 | "DF": "Distrito Federal", 42 | } 43 | 44 | MONTHS = { 45 | 1: "Janeiro", 46 | 2: "Fevereiro", 47 | 3: "Março", 48 | 4: "Abril", 49 | 5: "Maio", 50 | 6: "Junho", 51 | 7: "Julho", 52 | 8: "Agosto", 53 | 9: "Setembro", 54 | 10: "Outubro", 55 | 11: "Novembro", 56 | 12: "Dezembro", 57 | } 58 | 59 | 60 | def get_city_name_by_geocode(geocode: Union[str, int]): 61 | """ 62 | Returns the Municipality name from its geocode (IBGE) 63 | :param geocode: 7 digits city code, according to IBGE format 64 | :return: City name 65 | """ 66 | 67 | return MUN_BY_GEOCODE[int(geocode)] 68 | -------------------------------------------------------------------------------- /pysus/online_data/CIHA.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download data from CIHA and CIH (Old) 3 | Hospital and Ambulatorial information system 4 | http://ciha.datasus.gov.br/CIHA/index.php?area=03 5 | 6 | by fccoelho 7 | license: GPL V3 or Later 8 | """ 9 | from typing import Union 10 | 11 | from loguru import logger 12 | from pysus.ftp import CACHEPATH 13 | from pysus.ftp.databases.ciha import CIHA 14 | from pysus.ftp.utils import parse_UFs 15 | 16 | ciha = CIHA().load() 17 | 18 | 19 | def get_available_years( 20 | states: Union[list, str] = None, 21 | ) -> dict[str : set[int]]: 22 | """ 23 | Fetch available years for the `states`. 24 | :param states: UF code. E.g: "SP" or ["SP", "RJ"] 25 | :return: list of years in integers 26 | """ 27 | ufs = parse_UFs(states) 28 | 29 | years = dict() 30 | for uf in ufs: 31 | files = ciha.get_files(uf=uf) 32 | years[uf] = set(sorted([ciha.describe(f)["year"] for f in files])) 33 | 34 | if len(set([len(v) for v in years.values()])) > 1: 35 | logger.warning(f"Distinct years were found for UFs: {years}") 36 | 37 | return sorted(list(set.intersection(*map(set, years.values())))) 38 | 39 | 40 | def download( 41 | states: Union[str, list], 42 | years: Union[str, list, int], 43 | months: Union[str, list, int], 44 | data_dir: str = CACHEPATH, 45 | ) -> list: 46 | """ 47 | Download CIHA records for state, year and month and returns the Parquets 48 | files as a list of PartquetData 49 | :param months: 1 to 12, can be a list 50 | :param states: 2 letter state code, 51 | :param years: 4 digit integer 52 | """ 53 | 54 | files = ciha.get_files(uf=states, year=years, month=months) 55 | return ciha.download(files, local_dir=data_dir) 56 | -------------------------------------------------------------------------------- /pysus/online_data/PNI.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download data from the national immunization program 3 | """ 4 | from typing import Literal, Union 5 | 6 | from loguru import logger 7 | from pysus.ftp import CACHEPATH 8 | from pysus.ftp.databases.pni import PNI 9 | from pysus.ftp.utils import parse_UFs 10 | 11 | pni = PNI().load() 12 | 13 | 14 | def get_available_years(group, states): 15 | """ 16 | Fetch available years for `group` and/or `months`. 17 | :param group: PNI group, options are "CPNI" or "DPNI" 18 | :param state: UF code, can be a list. E.g: "SP" or ["SP", "RJ"] 19 | :return: list of available years 20 | """ 21 | ufs = parse_UFs(states) 22 | 23 | years = dict() 24 | for uf in ufs: 25 | files = pni.get_files(group, uf=uf) 26 | years[uf] = set(sorted([pni.describe(f)["year"] for f in files])) 27 | 28 | if len(set([len(v) for v in years.values()])) > 1: 29 | logger.warning(f"Distinct years were found for UFs: {years}") 30 | 31 | return sorted(list(set.intersection(*map(set, years.values())))) 32 | 33 | 34 | def download( 35 | group: Union[list, Literal["CNPI", "DPNI"]], 36 | states: Union[str, list], 37 | years: Union[str, list, int], 38 | data_dir: str = CACHEPATH, 39 | ) -> list: 40 | """ 41 | Download imunization records for a given States and years. 42 | :param group: PNI group, options are "CPNI" or "DPNI" 43 | :param state: uf two letter code, can be a list. E.g: "SP" or ["SP", "RJ"] 44 | :param year: year in 4 digits, can be a list. E.g: 1 or [1, 2, 3] 45 | :param data_dir: directory where data will be downloaded 46 | :return: list of downloaded ParquetData 47 | """ 48 | files = pni.get_files(group, uf=states, year=years) 49 | return pni.download(files, local_dir=data_dir) 50 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM condaforge/mambaforge 2 | 3 | LABEL maintainer="es.loch@gmail.com" 4 | 5 | USER root 6 | 7 | ENV DEBIAN_FRONTEND=noninteractive 8 | 9 | ENV HOME "/home/pysus" 10 | ENV PATH "$PATH:/home/pysus/.local/bin" 11 | ENV ENV_NAME pysus 12 | ENV PATH "/opt/conda/envs/$ENV_NAME/bin:$PATH" 13 | ENV PATH "/opt/poetry/bin:$PATH" 14 | 15 | RUN apt-get -qq update --yes \ 16 | && apt-get -qq install --yes --no-install-recommends \ 17 | build-essential \ 18 | firefox \ 19 | ca-certificates \ 20 | sudo \ 21 | curl \ 22 | && rm -rf /var/lib/apt/lists/* 23 | 24 | RUN useradd -ms /bin/bash pysus \ 25 | && echo "pysus ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/pysus \ 26 | && chmod 0440 /etc/sudoers.d/ \ 27 | && echo 'source /opt/conda/bin/activate "$ENV_NAME" && exec "$@"' > /activate.sh \ 28 | && echo 'source activate "$ENV_NAME"' > /home/pysus/.bashrc \ 29 | && chmod +x /activate.sh \ 30 | && chmod -R a+rwx /opt/conda /tmp \ 31 | && sudo chown -R pysus:pysus /usr/src 32 | 33 | USER pysus 34 | 35 | RUN mkdir -p /home/pysus/Notebooks/ 36 | 37 | COPY --chown=pysus:pysus conda/dev.yaml /tmp/dev.yaml 38 | COPY --chown=pysus:pysus docker/scripts/entrypoint.sh /entrypoint.sh 39 | COPY --chown=pysus:pysus docker/scripts/poetry-install.sh /tmp/poetry-install.sh 40 | COPY --chown=pysus:pysus pyproject.toml poetry.lock LICENSE README.md /usr/src/ 41 | COPY --chown=pysus:pysus pysus /usr/src/pysus 42 | COPY --chown=pysus:pysus docs/source/**/*.ipynb /home/pysus/Notebooks/ 43 | COPY --chown=pysus:pysus docs/source/data /home/pysus/Notebooks/ 44 | 45 | RUN mamba env create -n $ENV_NAME --file /tmp/dev.yaml \ 46 | && mamba clean -afy 47 | 48 | RUN cd /usr/src/ && bash /tmp/poetry-install.sh 49 | 50 | WORKDIR /home/pysus/Notebooks 51 | 52 | ENTRYPOINT ["bash", "/activate.sh", "jupyter", "notebook", "--port=8888", "--ip=0.0.0.0"] 53 | -------------------------------------------------------------------------------- /pysus/online_data/SINASC.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download SINASC data from DATASUS FTP server 3 | Created on 01/11/17 4 | by fccoelho 5 | license: GPL V3 or Later 6 | """ 7 | from typing import Union 8 | 9 | from loguru import logger 10 | from pysus.ftp import CACHEPATH 11 | from pysus.ftp.databases.sinasc import SINASC 12 | from pysus.ftp.utils import parse_UFs 13 | 14 | sinasc = SINASC().load() 15 | 16 | 17 | def get_available_years(group: str, states: Union[str, list[str]]) -> list: 18 | """ 19 | Get SINASC years for states 20 | :param group: 21 | "DN": "Declarações de Nascidos Vivos", 22 | "DNR": "Dados dos Nascidos Vivos por UF de residência", 23 | :param states: 2 letter UF code, can be a list. E.g: "SP" or ["SP", "RJ"] 24 | :return: list of available years 25 | """ 26 | ufs = parse_UFs(states) 27 | 28 | years = dict() 29 | for uf in ufs: 30 | files = sinasc.get_files(group, uf=uf) 31 | years[uf] = set(sorted([sinasc.describe(f)["year"] for f in files])) 32 | 33 | if len(set([len(v) for v in years.values()])) > 1: 34 | logger.warning(f"Distinct years were found for UFs: {years}") 35 | 36 | return sorted(list(set.intersection(*map(set, years.values())))) 37 | 38 | 39 | def download( 40 | groups: Union[str, list], 41 | states: Union[str, list], 42 | years: Union[str, list, int], 43 | data_dir: str = CACHEPATH, 44 | ) -> list: 45 | """ 46 | Downloads data directly from Datasus ftp server 47 | :param groups: either DN, DNR or both 48 | :param states: two-letter state identifier: MG == Minas Gerais, 49 | can be a list 50 | :param years: years to download 51 | :return: list of downloaded files 52 | """ 53 | files = sinasc.get_files(groups, uf=states, year=years) 54 | return sinasc.download(files, local_dir=data_dir) 55 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /usr/bin/env bash 2 | PYTHON := python 3 | PYTHONPATH := ${PWD} 4 | ENVCREATE:= 5 | 6 | 7 | .PHONY: clean clean-test clean-pyc clean-build help 8 | .DEFAULT_GOAL := help 9 | 10 | define PRINT_HELP_PYSCRIPT 11 | import re, sys 12 | 13 | for line in sys.stdin: 14 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 15 | if match: 16 | target, help = match.groups() 17 | print("%-20s %s" % (target, help)) 18 | endef 19 | export PRINT_HELP_PYSCRIPT 20 | 21 | 22 | help: 23 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 24 | 25 | DOCKER = docker compose -p pysus -f docker/docker-compose.yaml 26 | SERVICE := 27 | SEMANTIC_RELEASE = npx --yes \ 28 | -p semantic-release \ 29 | -p conventional-changelog-conventionalcommits \ 30 | -p "@semantic-release/commit-analyzer" \ 31 | -p "@semantic-release/release-notes-generator" \ 32 | -p "@semantic-release/changelog" \ 33 | -p "@semantic-release/exec" \ 34 | -p "@semantic-release/github" \ 35 | -p "@semantic-release/git" \ 36 | -p "semantic-release-replace-plugin" \ 37 | semantic-release 38 | 39 | #* Docker basic 40 | .PHONY: run-jupyter-pysus 41 | run-jupyter-pysus: ## build and deploy all containers 42 | $(DOCKER) up -d --build 43 | 44 | .PHONY: down-jupyter-pysus 45 | down-jupyter-pysus: ## stop and remove containers for all services 46 | $(DOCKER) down -v --remove-orphans 47 | 48 | #* Tests 49 | .PHONY: test-jupyter-pysus 50 | test-jupyter-pysus: ## run pytest for notebooks inside jupyter container 51 | $(DOCKER) exec -T jupyter pytest -vv --nbmake 52 | 53 | .PHONY: test-pysus 54 | test-pysus: ## run tests quickly with the default Python 55 | poetry run pytest -vv pysus/tests/ --retries 3 --retry-delay 15 56 | 57 | # RELEASE 58 | # ======= 59 | 60 | .PHONY: release 61 | release: 62 | $(SEMANTIC_RELEASE) --ci 63 | 64 | 65 | .PHONY: release-dry 66 | release-dry: 67 | $(SEMANTIC_RELEASE) --dry-run 68 | -------------------------------------------------------------------------------- /pysus/online_data/SINAN.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Union 3 | 4 | import pandas as pd 5 | from pysus.ftp import CACHEPATH 6 | from pysus.ftp.databases.sinan import SINAN 7 | 8 | sinan = SINAN().load() 9 | 10 | 11 | def list_diseases() -> dict: 12 | """List available diseases on SINAN""" 13 | return sinan.diseases 14 | 15 | 16 | def get_available_years(disease_code: str) -> list: 17 | """ 18 | Fetch available years for data related to specific disease 19 | :param disease_code: 20 | Disease code. See `SINAN.list_diseases` for valid codes 21 | :return: 22 | A list of DBC files from a specific disease found in the FTP Server. 23 | """ 24 | files = sinan.get_files(dis_code=disease_code) 25 | return sorted(list(set(sinan.describe(f)["year"] for f in files))) 26 | 27 | 28 | def download( 29 | diseases: Union[str, list], 30 | years: Union[str, list, int], 31 | data_path: str = CACHEPATH, 32 | ) -> list: 33 | """ 34 | Downloads SINAN data directly from Datasus ftp server. 35 | :param disease: Disease code according to `agravos`. 36 | :param years: 4 digit integer, can be a list of years. 37 | :param data_path: The directory where the file will be downloaded to. 38 | :return: list of downloaded files. 39 | """ 40 | files = sinan.get_files(dis_code=diseases, year=years) 41 | return sinan.download(files, local_dir=data_path) 42 | 43 | 44 | def metadata_df(disease_code: str) -> pd.DataFrame: 45 | metadata_file = ( 46 | Path(__file__).parent.parent 47 | / "metadata" 48 | / "SINAN" 49 | / f"{disease_code}.tar.gz" 50 | ) 51 | if metadata_file.exists(): 52 | df = pd.read_csv( 53 | metadata_file, 54 | compression="gzip", 55 | header=0, 56 | sep=",", 57 | quotechar='"', 58 | ) 59 | 60 | return df.iloc[:, 1:] 61 | else: 62 | print(f"No metadata available for {disease_code}") 63 | return 64 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pysus" 3 | version = "1.0.0" # changed by semantic-release 4 | description = "Tools for dealing with Brazil's Public health data" 5 | authors = ["Flavio Codeco Coelho "] 6 | license = "GPL" 7 | 8 | packages = [{include='pysus'}] 9 | 10 | [tool.poetry.dependencies] 11 | python = ">=3.10,<3.14" 12 | python-dateutil = "2.8.2" 13 | dbfread = "2.0.7" 14 | fastparquet = ">=2023.10.1,<=2024.11.0" 15 | numpy = ">1,<3" 16 | pyarrow = ">=11.0.0" 17 | pycparser = "2.21" 18 | pyreaddbc = ">=1.1.0" 19 | tqdm = "4.64.0" 20 | wget = "^3.2" 21 | loguru = "^0.6.0" 22 | Unidecode = "^1.3.6" 23 | dateparser = "^1.1.8" 24 | pandas = "^2.2.2" 25 | urwid = "^2.1.2" 26 | elasticsearch = { version = "7.16.2", extras=["preprocessing"] } 27 | # FTP 28 | bigtree = "^0.12.2" 29 | aioftp = "^0.21.4" 30 | humanize = "^4.8.0" 31 | typing-extensions = "^4.9.0" 32 | 33 | [tool.poetry.group.dev.dependencies] 34 | pytest = ">=6.1.0" 35 | black = "^22.6.0" 36 | flake8 = "^5.0.4" 37 | isort = "^5.10.1" 38 | pre-commit = "^2.20.0" 39 | pytest-timeout = "^2.1.0" 40 | nbsphinx = "^0.9.3" 41 | pytest-retry = "1.7.0" 42 | 43 | [tool.poetry.group.docs.dependencies] 44 | sphinx = "^5.1.1" 45 | nbmake = "^1.4.1" 46 | matplotlib = "^3.7.1" 47 | jupyterlab = "^4.0.5" 48 | ipykernel = "^6.25.1" 49 | seaborn = "^0.12.2" 50 | tomli = "^2.0.1" 51 | sphinx-rtd-theme = "^1.3.0" 52 | nbsphinx = "^0.9.3" 53 | 54 | [tool.poetry.group.geo.dependencies] 55 | geocoder = { version = "^1.38.1", extras=["preprocessing"] } 56 | jsonschema = "^4.19.0" 57 | descartes = "^1.1.0" 58 | folium = "^0.14.0" 59 | 60 | [build-system] 61 | requires = ["poetry-core>=1.0.0"] 62 | build-backend = "poetry.core.masonry.api" 63 | 64 | [tool.isort] 65 | profile = "black" 66 | src_paths = ["isort", "test"] 67 | 68 | [tool.black] 69 | # https://github.com/psf/black 70 | target-version = ["py39"] 71 | line-length = 79 72 | color = true 73 | 74 | [tool.pytest.ini_options] 75 | addopts = [ 76 | "--import-mode=importlib", 77 | "-ra -q" 78 | ] 79 | testpaths = [ 80 | "tests" 81 | ] 82 | 83 | exclude = ["*.git", "docs/"] 84 | 85 | [tool.poetry.extras] 86 | preprocessing = ["geobr", "geocoder"] 87 | -------------------------------------------------------------------------------- /pysus/online_data/SIH.py: -------------------------------------------------------------------------------- 1 | """ 2 | Downloads SIH data from Datasus FTP server 3 | Created on 21/09/18 4 | by fccoelho 5 | license: GPL V3 or Later 6 | """ 7 | from typing import Union 8 | 9 | from loguru import logger 10 | from pysus.ftp import CACHEPATH 11 | from pysus.ftp.databases.sih import SIH 12 | from pysus.ftp.utils import parse_UFs 13 | 14 | sih = SIH().load() 15 | 16 | 17 | def get_available_years( 18 | group: str, 19 | states: Union[str, list] = None, 20 | ) -> list: 21 | """ 22 | Get SIH years for group and/or state and returns a list of years 23 | :param group: 24 | RD: AIH Reduzida 25 | RJ: AIH Rejeitada 26 | ER: AIH Rejeitada com erro 27 | SP: Serviços Profissionais 28 | CH: Cadastro Hospitalar 29 | CM: # TODO 30 | :param states: 2 letter uf code, can be a list. E.g: "SP" or ["SP", "RJ"] 31 | :return: list of available years 32 | """ 33 | ufs = parse_UFs(states) 34 | 35 | years = dict() 36 | for uf in ufs: 37 | files = sih.get_files(group, uf=uf) 38 | years[uf] = set(sorted([sih.describe(f)["year"] for f in files])) 39 | 40 | if len(set([len(v) for v in years.values()])) > 1: 41 | logger.warning(f"Distinct years were found for UFs: {years}") 42 | 43 | return sorted(list(set.intersection(*map(set, years.values())))) 44 | 45 | 46 | def download( 47 | states: Union[str, list], 48 | years: Union[str, list, int], 49 | months: Union[str, list, int], 50 | groups: Union[str, list], 51 | data_dir: str = CACHEPATH, 52 | ) -> list: 53 | """ 54 | Download SIH records for state, year and month 55 | :param states: 2 letter state code, can be a list 56 | :param years: 4 digit integer, can be a list 57 | :param months: 1 to 12, can be a list 58 | :param groups: the groups of datasets to be downloaded. 59 | See `sih.groups` 60 | :param data_dir: Directory where parquets will be downloaded. 61 | :return: list with the downloaded files as ParquetData objects 62 | """ 63 | files = sih.get_files(group=groups, uf=states, month=months, year=years) 64 | return sih.download(files, local_dir=data_dir) 65 | -------------------------------------------------------------------------------- /pysus/preprocessing/ESUS.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from pysus.online_data.ESUS import download 4 | 5 | 6 | def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"): 7 | """ 8 | Fetches ESUS covid line list and aggregates by age and sex returning these 9 | counts between start and end dates. 10 | :param UF: State code 11 | :param start: Start date 12 | :param end: end date 13 | :return: dataframe 14 | """ 15 | df = download(uf=UF) 16 | 17 | # Transformando as colunas em datetime type 18 | for cname in df: 19 | if cname.startswith("data"): 20 | df[cname] = pd.to_datetime(df[cname], errors="coerce") 21 | 22 | # Eliminando os valores nulos nas colunas com datas importantes 23 | old_size = len(df) 24 | df.dropna( 25 | subset=["dataNotificacao", "dataInicioSintomas", "dataTeste"], 26 | inplace=True, 27 | ) 28 | print( 29 | f"Removed {old_size - len(df)} rows with missing dates of symptoms, " 30 | "notification or testing" 31 | ) 32 | 33 | # Desconsiderando os resultados negativos ou inconclusivos 34 | df = df.loc[ 35 | ~df.resultadoTeste.isin(["Negativo", "Inconclusivo ou Indeterminado"]) 36 | ] 37 | 38 | # Removendo sexo indeterminado 39 | df = df.loc[df.sexo.isin(["Masculino", "Feminino"])] 40 | 41 | # determinando a data dos primeiros sintomas como a data do index 42 | 43 | df["datesint"] = df["dataInicioSintomas"] 44 | df.set_index("datesint", inplace=True) 45 | df.sort_index(inplace=True, ascending=True) 46 | 47 | # vamos limitar a data inicial e a data final considerando apenas a 48 | # primeira onda 49 | 50 | df = df.loc[start:end] 51 | 52 | ini = np.arange(0, 81, 5) 53 | fin = np.arange(5, 86, 5) 54 | fin[-1] = 120 55 | faixa_etaria = { 56 | f"[{i},{f})": (i, f) for i, f in zip(ini, fin) # noqa: E231 57 | } 58 | 59 | labels = list(faixa_etaria.keys()) 60 | df["faixa_etaria"] = [ 61 | labels[i - 1] for i in np.digitize(df.idade, bins=ini) 62 | ] 63 | 64 | agreg = ( 65 | df[["sexo", "faixa_etaria"]].groupby(["faixa_etaria", "sexo"]).size() 66 | ) 67 | agreg = agreg.reset_index() 68 | agreg.columns = ["faixa_etaria", "sexo", "n"] 69 | return agreg 70 | -------------------------------------------------------------------------------- /.releaserc.json: -------------------------------------------------------------------------------- 1 | { 2 | "branches": ["main"], 3 | "tagFormat": "${version}", 4 | "plugins": [ 5 | [ 6 | "@semantic-release/commit-analyzer", { 7 | "preset": "conventionalcommits" 8 | }], 9 | [ 10 | "semantic-release-replace-plugin", 11 | { 12 | "replacements": [ 13 | { 14 | "files": ["pysus/__init__.py"], 15 | "from": "return \".*\" # changed by semantic-release", 16 | "to": "return \"${nextRelease.version}\" # changed by semantic-release", 17 | "results": [ 18 | { 19 | "file": "pysus/__init__.py", 20 | "hasChanged": true, 21 | "numMatches": 1, 22 | "numReplacements": 1 23 | } 24 | ], 25 | "countMatches": true 26 | }, 27 | { 28 | "files": ["pyproject.toml"], 29 | "from": "version = \".*\" # changed by semantic-release", 30 | "to": "version = \"${nextRelease.version}\" # changed by semantic-release", 31 | "results": [ 32 | { 33 | "file": "pyproject.toml", 34 | "hasChanged": true, 35 | "numMatches": 1, 36 | "numReplacements": 1 37 | } 38 | ], 39 | "countMatches": true 40 | } 41 | ] 42 | } 43 | ], 44 | [ 45 | "@semantic-release/release-notes-generator", { 46 | "preset": "conventionalcommits" 47 | }], 48 | [ 49 | "@semantic-release/changelog", 50 | { 51 | "changelogTitle": "Release Notes\n---", 52 | "changelogFile": "CHANGELOG.md" 53 | } 54 | ], 55 | [ 56 | "@semantic-release/exec", 57 | { 58 | "prepareCmd": "poetry build", 59 | "publishCmd": "poetry publish" 60 | } 61 | ], 62 | [ 63 | "@semantic-release/github", 64 | { 65 | "assets": ["dist/*.whl", "dist/*.tar.gz"] 66 | } 67 | ], 68 | [ 69 | "@semantic-release/git", 70 | { 71 | "assets": [ 72 | "pyproject.toml", 73 | "CHANGELOG.md", 74 | "pysus/__init__.py" 75 | ], 76 | "message": "chore(release): ${nextRelease.version}" 77 | } 78 | ] 79 | ] 80 | } 81 | -------------------------------------------------------------------------------- /pysus/ftp/databases/sim.py: -------------------------------------------------------------------------------- 1 | __all__ = ["SIM"] 2 | 3 | from typing import List, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year 7 | 8 | 9 | class SIM(Database): 10 | name = "SIM" 11 | paths = ( 12 | Directory("/dissemin/publicos/SIM/CID10/DORES"), 13 | Directory("/dissemin/publicos/SIM/CID9/DORES"), 14 | ) 15 | metadata = { 16 | "long_name": "Sistema de Informação sobre Mortalidade", 17 | "source": "http://sim.saude.gov.br", 18 | "description": "", 19 | } 20 | groups = {"CID10": "DO", "CID9": "DOR"} 21 | 22 | def describe(self, file: File) -> dict: 23 | group, _uf, year = self.format(file) 24 | _groups = {v: k for k, v in self.groups.items()} 25 | 26 | try: 27 | uf = UFs[_uf] 28 | except KeyError: 29 | uf = _uf 30 | 31 | description = { 32 | "name": str(file.basename), 33 | "uf": uf, 34 | "year": year, 35 | "group": _groups[group], 36 | "size": file.info["size"], 37 | "last_update": file.info["modify"], 38 | } 39 | 40 | return description 41 | 42 | def format(self, file: File) -> tuple: 43 | if "CID9" in str(file.path): 44 | group, _uf, year = file.name[:-4], file.name[-4:-2], file.name[-2:] 45 | else: 46 | group, _uf, year = file.name[:-6], file.name[-6:-4], file.name[-4:] 47 | return group, _uf, zfill_year(year) 48 | 49 | def get_files( 50 | self, 51 | group: Union[list[str], str], 52 | uf: Optional[Union[list[str], str]] = None, 53 | year: Optional[Union[list, str, int]] = None, 54 | ) -> List[File]: 55 | files = self.files 56 | 57 | groups = [self.groups[g.upper()] for g in to_list(group)] 58 | 59 | files = list(filter(lambda f: self.format(f)[0] in groups, files)) 60 | 61 | if uf: 62 | ufs = parse_UFs(uf) 63 | files = list(filter(lambda f: self.format(f)[1] in ufs, files)) 64 | 65 | if year or str(year) in ["0", "00"]: 66 | years = [zfill_year(y) for y in to_list(year)] 67 | files = list(filter(lambda f: self.format(f)[2] in years, files)) 68 | 69 | return files 70 | -------------------------------------------------------------------------------- /pysus/tests/test_ibge.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pandas as pd 4 | import pytest 5 | from pysus.online_data import IBGE 6 | 7 | 8 | class SIDRA(unittest.TestCase): 9 | @pytest.mark.timeout(120) 10 | def test_get_aggregates(self): 11 | df = IBGE.list_agregados() 12 | self.assertIsInstance(df, pd.DataFrame) 13 | self.assertGreater(df.size, 0) 14 | 15 | @pytest.mark.skip(reason="This test takes too long") 16 | def test_localidades_por_agregado(self): 17 | df = IBGE.localidades_por_agregado(475, nivel="N3") 18 | self.assertIsInstance(df, pd.DataFrame) 19 | self.assertGreater(df.size, 0) 20 | 21 | @pytest.mark.timeout(120) 22 | @pytest.mark.skip(reason="Failing?") 23 | def test_lista_periodos(self): 24 | df = IBGE.lista_periodos(475) 25 | self.assertIsInstance(df, pd.DataFrame) 26 | self.assertGreater(df.size, 0) 27 | 28 | @pytest.mark.timeout(120) 29 | def test_get_sidra_table(self): 30 | df = IBGE.get_sidra_table( 31 | 200, 32 | territorial_level=6, 33 | geocode=4220000, 34 | period="last", 35 | classification=2, 36 | categories="all", 37 | ) 38 | self.assertIsInstance(df, pd.DataFrame) 39 | self.assertGreater(df.size, 0) 40 | 41 | @pytest.mark.skip(reason="This test takes too long") 42 | def test_metadata(self): 43 | md = IBGE.metadados(475) 44 | self.assertIsInstance(md, dict) 45 | self.assertGreater(len(md), 0) 46 | 47 | @pytest.mark.timeout(120) 48 | def test_FetchData(self): 49 | ds = IBGE.FetchData( 50 | 475, 51 | periodos=1996, 52 | variavel=93, 53 | localidades="N3[all]", 54 | classificacao="58[all]|2[4,5]|1[all]", 55 | view="flat", 56 | ) 57 | self.assertIsInstance(ds, IBGE.FetchData) 58 | self.assertGreater(len(ds.JSON), 0) 59 | 60 | @pytest.mark.timeout(120) 61 | def test_get_population(self): 62 | l1 = IBGE.get_population(year=2021, source="POP") 63 | self.assertEqual(type(l1), pd.DataFrame) 64 | self.assertEqual(len(l1), 5570) 65 | l2 = IBGE.get_population(year=2012, source="projpop") 66 | self.assertEqual(type(l2), pd.DataFrame) 67 | self.assertEqual(len(l2), 4914) 68 | 69 | 70 | if __name__ == "__main__": 71 | unittest.main() 72 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/Dengue.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-05-02 14:39-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Dengue.ipynb:9 23 | msgid "Análise exploratória de casos de Dengue notificado pelo SUS" 24 | msgstr "" 25 | 26 | #: ../../source/Dengue.ipynb:11 27 | msgid "" 28 | "Neste tutorial vamos explorar como começar a entender a descrição de " 29 | "casos do sis tema de notificação de agravos do SUS, o SINAN. Estes dados " 30 | "são bastante ricos e a `documentação " 31 | "`__ sobre o " 32 | "significado de cada uma das variáveis pode ser encontrada `aqui " 33 | "`__." 34 | msgstr "" 35 | 36 | #: ../../source/Dengue.ipynb:63 37 | msgid "" 38 | "Primeiro vamos começar pelo carregamento dos dados a partir do servidor " 39 | "do DataSUS. Como o dado está no formato Parquet, nós vamos utilizar a " 40 | "biblioteca pandas para visualizar os dados em um Dataframe:" 41 | msgstr "" 42 | 43 | #: ../../source/Dengue.ipynb:459 44 | msgid "" 45 | "Estes dados correspondem a todos os casos de dengue notificado ao SUS " 46 | "durante um período. Neste caso de 2015 a 2016. Para podermos tratar " 47 | "adequadamente estes dados para fins de visualização ou análise precisamos" 48 | " corrigir os tipos das colunas. Por exemplo vamos converter as datas." 49 | msgstr "" 50 | 51 | #: ../../source/Dengue.ipynb:481 52 | msgid "" 53 | "Para poder organizar os dados temporalmente, é útil indexar a tabela por " 54 | "alguma variável temporal" 55 | msgstr "" 56 | 57 | #: ../../source/Dengue.ipynb:502 58 | msgid "" 59 | "Agora podemos plotar o número de casos por semana de dengue na cidade do " 60 | "Rio de Janeiro." 61 | msgstr "" 62 | 63 | #: ../../source/Dengue.ipynb:531 64 | msgid "" 65 | "Suponhamos agora que desejamos visualizar em um mapa os casos que " 66 | "ocorreram, por exemplo entre janeiro e março de 2015" 67 | msgstr "" 68 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/Dengue.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-05-02 14:39-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Dengue.ipynb:9 23 | msgid "Análise exploratória de casos de Dengue notificado pelo SUS" 24 | msgstr "" 25 | 26 | #: ../../source/Dengue.ipynb:11 27 | msgid "" 28 | "Neste tutorial vamos explorar como começar a entender a descrição de " 29 | "casos do sis tema de notificação de agravos do SUS, o SINAN. Estes dados " 30 | "são bastante ricos e a `documentação " 31 | "`__ sobre o " 32 | "significado de cada uma das variáveis pode ser encontrada `aqui " 33 | "`__." 34 | msgstr "" 35 | 36 | #: ../../source/Dengue.ipynb:63 37 | msgid "" 38 | "Primeiro vamos começar pelo carregamento dos dados a partir do servidor " 39 | "do DataSUS. Como o dado está no formato Parquet, nós vamos utilizar a " 40 | "biblioteca pandas para visualizar os dados em um Dataframe:" 41 | msgstr "" 42 | 43 | #: ../../source/Dengue.ipynb:459 44 | msgid "" 45 | "Estes dados correspondem a todos os casos de dengue notificado ao SUS " 46 | "durante um período. Neste caso de 2015 a 2016. Para podermos tratar " 47 | "adequadamente estes dados para fins de visualização ou análise precisamos" 48 | " corrigir os tipos das colunas. Por exemplo vamos converter as datas." 49 | msgstr "" 50 | 51 | #: ../../source/Dengue.ipynb:481 52 | msgid "" 53 | "Para poder organizar os dados temporalmente, é útil indexar a tabela por " 54 | "alguma variável temporal" 55 | msgstr "" 56 | 57 | #: ../../source/Dengue.ipynb:502 58 | msgid "" 59 | "Agora podemos plotar o número de casos por semana de dengue na cidade do " 60 | "Rio de Janeiro." 61 | msgstr "" 62 | 63 | #: ../../source/Dengue.ipynb:531 64 | msgid "" 65 | "Suponhamos agora que desejamos visualizar em um mapa os casos que " 66 | "ocorreram, por exemplo entre janeiro e março de 2015" 67 | msgstr "" 68 | -------------------------------------------------------------------------------- /pysus/ftp/databases/sinasc.py: -------------------------------------------------------------------------------- 1 | __all__ = ["SINASC"] 2 | 3 | from typing import List, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year 7 | 8 | 9 | class SINASC(Database): 10 | name = "SINASC" 11 | paths = ( 12 | Directory("/dissemin/publicos/SINASC/NOV/DNRES"), 13 | Directory("/dissemin/publicos/SINASC/ANT/DNRES"), 14 | ) 15 | metadata = { 16 | "long_name": "Sistema de Informações sobre Nascidos Vivos", 17 | "source": "http://sinasc.saude.gov.br/", 18 | "description": "", 19 | } 20 | groups = { 21 | "DN": "Declarações de Nascidos Vivos", 22 | "DNR": "Dados dos Nascidos Vivos por UF de residência", 23 | } 24 | 25 | def describe(self, file: File) -> dict: 26 | if file.extension.upper() == ".DBC": 27 | group, _uf, year = self.format(file) 28 | 29 | try: 30 | uf = UFs[_uf] 31 | except KeyError: 32 | uf = _uf 33 | 34 | description = { 35 | "name": file.basename, 36 | "group": self.groups[group], 37 | "uf": uf, 38 | "year": year, 39 | "size": file.info["size"], 40 | "last_update": file.info["modify"], 41 | } 42 | 43 | return description 44 | return {} 45 | 46 | def format(self, file: File) -> tuple: 47 | if file.name == "DNEX2021": 48 | pass 49 | 50 | year = zfill_year(file.name[-2:]) 51 | charname = "".join([c for c in file.name if not c.isnumeric()]) 52 | group, _uf = charname[:-2], charname[-2:] 53 | return group, _uf, zfill_year(year) 54 | 55 | def get_files( 56 | self, 57 | group: Union[List[str], str], 58 | uf: Optional[Union[List[str], str]] = None, 59 | year: Optional[Union[List, str, int]] = None, 60 | ) -> List[File]: 61 | files = self.files 62 | 63 | groups = to_list(group) 64 | 65 | files = list(filter(lambda f: self.format(f)[0] in groups, files)) 66 | 67 | if uf: 68 | if "EX" in to_list(uf): 69 | # DNEX2021 70 | if len(to_list(uf)) == 1: 71 | return [] 72 | 73 | to_list(uf).remove("EX") 74 | 75 | ufs = parse_UFs(uf) 76 | files = list(filter(lambda f: self.format(f)[1] in ufs, files)) 77 | 78 | if year or str(year) in ["0", "00"]: 79 | years = [zfill_year(str(y)[-2:]) for y in to_list(year)] 80 | files = list(filter(lambda f: self.format(f)[2] in years, files)) 81 | 82 | return files 83 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/Zika.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-05-02 14:39-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Zika.ipynb:9 23 | msgid "Análise exploratória de casos de Zika notificado pelo SUS" 24 | msgstr "" 25 | 26 | #: ../../source/Zika.ipynb:11 27 | msgid "" 28 | "Neste tutorial vamos explorar como começar a entender a descrição de " 29 | "casos do sistema de notificação de agravos do SUS, o SINAN. Estes dados " 30 | "são bastante ricos e a `documentação " 31 | "`__ sobre o " 32 | "significado de cada uma das variáveis pode ser encontrada `aqui " 33 | "`__." 34 | msgstr "" 35 | 36 | #: ../../source/Zika.ipynb:60 37 | msgid "" 38 | "Primeiro vamos começar pelo carregamento dos dados a partir do site do " 39 | "InfoDengue. Como o dado está no formato parquet, nós vamos utilizar a " 40 | "biblioteca pandas para carregar os dados em um Dataframe." 41 | msgstr "" 42 | 43 | #: ../../source/Zika.ipynb:87 44 | msgid "Vejamos os nomes da variáveis" 45 | msgstr "" 46 | 47 | #: ../../source/Zika.ipynb:434 48 | msgid "" 49 | "Estes dados correspondem a todos os casos de Zika notificados ao SUS " 50 | "durante um período. Neste caso de 2015 a 2016. Para podermos tratar " 51 | "adequadamente estes dados para fins de visualização ou análise precisamos" 52 | " corrigir os tipos das colunas. Por exemplo vamos converter as datas." 53 | msgstr "" 54 | 55 | #: ../../source/Zika.ipynb:456 56 | msgid "" 57 | "Para poder organizar os dados temporalmente, é útil indexar a tabela por " 58 | "alguma variável temporal. Vamos usar a data de notifiacão de cada caso " 59 | "como índice" 60 | msgstr "" 61 | 62 | #: ../../source/Zika.ipynb:477 63 | msgid "" 64 | "Agora podemos plotar o número de casos por semana de dengue na cidade do " 65 | "Rio de Janeiro." 66 | msgstr "" 67 | 68 | #: ../../source/Zika.ipynb:532 69 | msgid "" 70 | "Suponhamos agora que desejamos visualizar em um mapa os casos que " 71 | "ocorreram, por exemplo entre janeiro e agosto de 2016." 72 | msgstr "" 73 | 74 | #: ../../source/Zika.ipynb:544 75 | msgid "Usando Kepler.gl para visualização" 76 | msgstr "" 77 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/Zika.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-05-02 14:39-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Zika.ipynb:9 23 | msgid "Análise exploratória de casos de Zika notificado pelo SUS" 24 | msgstr "" 25 | 26 | #: ../../source/Zika.ipynb:11 27 | msgid "" 28 | "Neste tutorial vamos explorar como começar a entender a descrição de " 29 | "casos do sistema de notificação de agravos do SUS, o SINAN. Estes dados " 30 | "são bastante ricos e a `documentação " 31 | "`__ sobre o " 32 | "significado de cada uma das variáveis pode ser encontrada `aqui " 33 | "`__." 34 | msgstr "" 35 | 36 | #: ../../source/Zika.ipynb:60 37 | msgid "" 38 | "Primeiro vamos começar pelo carregamento dos dados a partir do site do " 39 | "InfoDengue. Como o dado está no formato parquet, nós vamos utilizar a " 40 | "biblioteca pandas para carregar os dados em um Dataframe." 41 | msgstr "" 42 | 43 | #: ../../source/Zika.ipynb:87 44 | msgid "Vejamos os nomes da variáveis" 45 | msgstr "" 46 | 47 | #: ../../source/Zika.ipynb:434 48 | msgid "" 49 | "Estes dados correspondem a todos os casos de Zika notificados ao SUS " 50 | "durante um período. Neste caso de 2015 a 2016. Para podermos tratar " 51 | "adequadamente estes dados para fins de visualização ou análise precisamos" 52 | " corrigir os tipos das colunas. Por exemplo vamos converter as datas." 53 | msgstr "" 54 | 55 | #: ../../source/Zika.ipynb:456 56 | msgid "" 57 | "Para poder organizar os dados temporalmente, é útil indexar a tabela por " 58 | "alguma variável temporal. Vamos usar a data de notifiacão de cada caso " 59 | "como índice" 60 | msgstr "" 61 | 62 | #: ../../source/Zika.ipynb:477 63 | msgid "" 64 | "Agora podemos plotar o número de casos por semana de dengue na cidade do " 65 | "Rio de Janeiro." 66 | msgstr "" 67 | 68 | #: ../../source/Zika.ipynb:532 69 | msgid "" 70 | "Suponhamos agora que desejamos visualizar em um mapa os casos que " 71 | "ocorreram, por exemplo entre janeiro e agosto de 2016." 72 | msgstr "" 73 | 74 | #: ../../source/Zika.ipynb:544 75 | msgid "Usando Kepler.gl para visualização" 76 | msgstr "" 77 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/ESUS.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/ESUS.ipynb:21 23 | msgid "Downloading data from ESUS" 24 | msgstr "" 25 | 26 | #: ../../source/ESUS.ipynb:23 27 | msgid "" 28 | "This function alows for the download of COVID-19 data from ESUS. For Some" 29 | " States, the size of the resulting table can easily exceed the memory " 30 | "size of most personal computers, in such cases, the ESUS download " 31 | "function will stream the data to disk without filling up the memory and " 32 | "return an iterator of chunks of 1000 rows of data. The user must then " 33 | "iterate over the chunks to analyze the data." 34 | msgstr "" 35 | "Esta função permite o download de dados de COVID-19 do ESUS. Para alguns " 36 | "estados, o tamanho da tabela resultante pode facilmente exceder o tamanho " 37 | "da memória da maioria da máquina, nestes casos, a função de download do ESUS " 38 | "irá extrair os dados para o disco sem preencher a memória, e retornar um " 39 | "iterador de blocos de 1000 linhas de dados. O usuário deve, então, " 40 | "iterar sobre os blocos para analisar os dados." 41 | 42 | #: ../../source/ESUS.ipynb:737 43 | msgid "" 44 | "Now we will create a datetime index for our dataframe, but we must be " 45 | "carefull with missing dates here. For now, to enable a quick " 46 | "visualization, we will simply coerce missing dates to ``NaT``." 47 | msgstr "" 48 | "Agora vamos criar um índice de data e hora para o nosso dataframe, mas " 49 | "devemos ter cuidado com as datas ausentes aqui. Por enquanto, para " 50 | "permitir uma visualização rápida, vamos simplesmente forçar as datas ausentes a ``NaT``." 51 | 52 | #: ../../source/ESUS.ipynb:760 53 | msgid "Now we can count the cases per day and plot." 54 | msgstr "Agora podemos contar os casos diários e plotar." 55 | 56 | #: ../../source/ESUS.ipynb:1244 57 | msgid "Deduplicating the data" 58 | msgstr "Removendo a duplicidade" 59 | 60 | #: ../../source/ESUS.ipynb:1246 61 | msgid "" 62 | "ESUS records are know to have a number of duplicated records. Let's see " 63 | "here how to detect possible duplicates in the dataframe we have just " 64 | "downloaded. For that we will need the ```recordlinkage`` " 65 | "`__ package." 66 | msgstr "" 67 | "Os registros do ESUS são conhecidos por terem vários registros duplicados. " 68 | "Vamos ver aqui como detectar possíveis duplicatas no dataframe que acabamos " 69 | "de baixar. Para isso, precisaremos do pacote ```recordlinkage`` " 70 | "https://recordlinkage.readthedocs.io/en/latest/index.html`__." 71 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/ESUS.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/ESUS.ipynb:21 23 | msgid "Downloading data from ESUS" 24 | msgstr "" 25 | 26 | #: ../../source/ESUS.ipynb:23 27 | msgid "" 28 | "This function alows for the download of COVID-19 data from ESUS. For Some" 29 | " States, the size of the resulting table can easily exceed the memory " 30 | "size of most personal computers, in such cases, the ESUS download " 31 | "function will stream the data to disk without filling up the memory and " 32 | "return an iterator of chunks of 1000 rows of data. The user must then " 33 | "iterate over the chunks to analyze the data." 34 | msgstr "" 35 | "Esta função permite o download de dados de COVID-19 do ESUS. Para alguns " 36 | "estados, o tamanho da tabela resultante pode facilmente exceder o tamanho " 37 | "da memória da maioria da máquina, nestes casos, a função de download do ESUS " 38 | "irá extrair os dados para o disco sem preencher a memória, e retornar um " 39 | "iterador de blocos de 1000 linhas de dados. O usuário deve, então, " 40 | "iterar sobre os blocos para analisar os dados." 41 | 42 | #: ../../source/ESUS.ipynb:737 43 | msgid "" 44 | "Now we will create a datetime index for our dataframe, but we must be " 45 | "carefull with missing dates here. For now, to enable a quick " 46 | "visualization, we will simply coerce missing dates to ``NaT``." 47 | msgstr "" 48 | "Agora vamos criar um índice de data e hora para o nosso dataframe, mas " 49 | "devemos ter cuidado com as datas ausentes aqui. Por enquanto, para " 50 | "permitir uma visualização rápida, vamos simplesmente forçar as datas ausentes a ``NaT``." 51 | 52 | #: ../../source/ESUS.ipynb:760 53 | msgid "Now we can count the cases per day and plot." 54 | msgstr "Agora podemos contar os casos diários e plotar." 55 | 56 | #: ../../source/ESUS.ipynb:1244 57 | msgid "Deduplicating the data" 58 | msgstr "Removendo a duplicidade" 59 | 60 | #: ../../source/ESUS.ipynb:1246 61 | msgid "" 62 | "ESUS records are know to have a number of duplicated records. Let's see " 63 | "here how to detect possible duplicates in the dataframe we have just " 64 | "downloaded. For that we will need the ```recordlinkage`` " 65 | "`__ package." 66 | msgstr "" 67 | "Os registros do ESUS são conhecidos por terem vários registros duplicados. " 68 | "Vamos ver aqui como detectar possíveis duplicatas no dataframe que acabamos " 69 | "de baixar. Para isso, precisaremos do pacote ```recordlinkage`` " 70 | "https://recordlinkage.readthedocs.io/en/latest/index.html`__." 71 | -------------------------------------------------------------------------------- /pysus/ftp/databases/ibge_datasus.py: -------------------------------------------------------------------------------- 1 | __all__ = ["IBGEDATASUS"] 2 | 3 | from typing import List, Literal, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import zfill_year 7 | 8 | 9 | class IBGEDATASUS(Database): 10 | name = "IBGE-DataSUS" 11 | paths = ( 12 | Directory("/dissemin/publicos/IBGE/POP"), 13 | Directory("/dissemin/publicos/IBGE/censo"), 14 | Directory("/dissemin/publicos/IBGE/POPTCU"), 15 | Directory("/dissemin/publicos/IBGE/projpop"), 16 | # Directory("/dissemin/publicos/IBGE/Auxiliar") # this has a different file name pattern # noqa 17 | ) 18 | metadata = { 19 | "long_name": "Populaçao Residente, Censos, Contagens " 20 | "Populacionais e Projeçoes Intercensitarias", 21 | "source": "ftp://ftp.datasus.gov.br/dissemin/publicos/IBGE", 22 | "description": ( 23 | "São aqui apresentados informações sobre a população residente, " 24 | "estratificadas por município, faixas etárias e sexo, obtidas a " 25 | "partir dos Censos Demográficos, Contagens Populacionais " 26 | "e Projeções Intercensitárias." 27 | ), 28 | } 29 | 30 | def describe(self, file: File) -> dict: 31 | if file.extension.upper() in [".ZIP"]: 32 | year = file.name.split(".")[0][-2:] 33 | description = { 34 | "name": str(file.basename), 35 | "year": zfill_year(year), 36 | "size": file.info["size"], 37 | "last_update": file.info["modify"], 38 | } 39 | return description 40 | elif file.extension.upper() == ".DBF": 41 | year = file.name[-2:] 42 | description = { 43 | "name": str(file.basename), 44 | "year": zfill_year(year), 45 | "size": file.info["size"], 46 | "last_update": file.info["modify"], 47 | } 48 | return description 49 | return {} 50 | 51 | def format(self, file: File) -> tuple: 52 | return (file.name[-2:],) 53 | 54 | def get_files( 55 | self, 56 | source: Literal["POP", "censo", "POPTCU", "projpop"] = "POPTCU", 57 | year: Optional[Union[str, int, list]] = None, 58 | *args, 59 | **kwargs, 60 | ) -> List[File]: 61 | sources = ["POP", "censo", "POPTCU", "projpop"] 62 | source_dir = None 63 | 64 | for dir in self.paths: 65 | if source in sources and source in dir.path: 66 | source_dir = dir 67 | 68 | if not source_dir: 69 | raise ValueError(f"Unkown source {source}. Options: {sources}") 70 | 71 | files = source_dir.content 72 | 73 | if year: 74 | if isinstance(year, (str, int)): 75 | files = [ 76 | f 77 | for f in files 78 | if self.describe(f)["year"] == zfill_year(year) 79 | ] 80 | elif isinstance(year, list): 81 | files = [ 82 | f 83 | for f in files 84 | if str(self.describe(f)["year"]) 85 | in [str(zfill_year(y)) for y in year] 86 | ] 87 | 88 | return files 89 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/index.rst:14 23 | msgid "Data Sources" 24 | msgstr "Bases de dados" 25 | 26 | #: ../../source/index.rst:14 27 | msgid "Tutorials" 28 | msgstr "Tutoriais" 29 | 30 | #: ../../source/index.rst:14 31 | msgid "Loading SINAN data" 32 | msgstr "Extração de dados (SINAN)" 33 | 34 | #: ../../source/index.rst:14 35 | msgid "Loading SIM data" 36 | msgstr "Extração de dados (SIM)" 37 | 38 | #: ../../source/index.rst:14 39 | msgid "Loading SIA data" 40 | msgstr "Extração de dados (SIA)" 41 | 42 | #: ../../source/index.rst:14 43 | msgid "LOADING PNI data" 44 | msgstr "Extração de dados (PNI)" 45 | 46 | #: ../../source/index.rst:14 47 | msgid "Analyzing Dengue data" 48 | msgstr "Análise de dados (Dengue)" 49 | 50 | #: ../../source/index.rst:14 51 | msgid "Analyzing Chikungunya data" 52 | msgstr "Análise de dados (Chikungunya)" 53 | 54 | #: ../../source/index.rst:14 55 | msgid "Analyzing Zika data" 56 | msgstr "Análise de dados (Zika)" 57 | 58 | #: ../../source/index.rst:14 59 | msgid "Downloading COVID data from ESUS" 60 | msgstr "Extração de dados de COVID (ESUS)" 61 | 62 | #: ../../source/index.rst:14 63 | msgid "Downloading Infogripe data" 64 | msgstr "Extração de dados (Infogripe)" 65 | 66 | #: ../../source/index.rst:14 67 | msgid "Downloading Infodengue data" 68 | msgstr "Extração de dados (Infodengue)" 69 | 70 | #: ../../source/index.rst:14 71 | msgid "Getting Official Statistics" 72 | msgstr "Estatísticas Oficiais" 73 | 74 | #: ../../source/index.rst:7 75 | msgid "Welcome to PySUS's documentation!" 76 | msgstr "Bem-vindo(a) à documentação do PySUS" 77 | 78 | #: ../../source/index.rst:9 79 | msgid "" 80 | "PySUS is a library which is a collection of helper codes for people which" 81 | " need to analyze data from SUS (Brazilian Universal Health System). " 82 | "Contributions are welcome!" 83 | msgstr "" 84 | "PySUS é uma biblioteca que consiste em uma coleção de códigos auxiliares para " 85 | "pessoas que precisam analisar dados do SUS (Sistema Único de Saúde). " 86 | "Contribuições são bem-vindas!" 87 | 88 | #: ../../source/index.rst:12 89 | msgid "Contents:" 90 | msgstr "Conteúdo:" 91 | 92 | #: ../../source/index.rst:35 93 | msgid "Indices and tables" 94 | msgstr "Tabelas e índices" 95 | 96 | #: ../../source/index.rst:37 97 | msgid ":ref:`genindex`" 98 | msgstr ":ref:`genindex`" 99 | 100 | #: ../../source/index.rst:38 101 | msgid ":ref:`modindex`" 102 | msgstr ":ref:`modindex`" 103 | 104 | #: ../../source/index.rst:39 105 | msgid ":ref:`search`" 106 | msgstr ":ref:`search`" 107 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/index.rst:14 23 | msgid "Data Sources" 24 | msgstr "Bases de dados" 25 | 26 | #: ../../source/index.rst:14 27 | msgid "Tutorials" 28 | msgstr "Tutoriais" 29 | 30 | #: ../../source/index.rst:14 31 | msgid "Loading SINAN data" 32 | msgstr "Extração de dados (SINAN)" 33 | 34 | #: ../../source/index.rst:14 35 | msgid "Loading SIM data" 36 | msgstr "Extração de dados (SIM)" 37 | 38 | #: ../../source/index.rst:14 39 | msgid "Loading SIA data" 40 | msgstr "Extração de dados (SIA)" 41 | 42 | #: ../../source/index.rst:14 43 | msgid "LOADING PNI data" 44 | msgstr "Extração de dados (PNI)" 45 | 46 | #: ../../source/index.rst:14 47 | msgid "Analyzing Dengue data" 48 | msgstr "Análise de dados (Dengue)" 49 | 50 | #: ../../source/index.rst:14 51 | msgid "Analyzing Chikungunya data" 52 | msgstr "Análise de dados (Chikungunya)" 53 | 54 | #: ../../source/index.rst:14 55 | msgid "Analyzing Zika data" 56 | msgstr "Análise de dados (Zika)" 57 | 58 | #: ../../source/index.rst:14 59 | msgid "Downloading COVID data from ESUS" 60 | msgstr "Extração de dados de COVID (ESUS)" 61 | 62 | #: ../../source/index.rst:14 63 | msgid "Downloading Infogripe data" 64 | msgstr "Extração de dados (Infogripe)" 65 | 66 | #: ../../source/index.rst:14 67 | msgid "Downloading Infodengue data" 68 | msgstr "Extração de dados (Infodengue)" 69 | 70 | #: ../../source/index.rst:14 71 | msgid "Getting Official Statistics" 72 | msgstr "Estatísticas Oficiais" 73 | 74 | #: ../../source/index.rst:7 75 | msgid "Welcome to PySUS's documentation!" 76 | msgstr "Bem-vindo(a) à documentação do PySUS" 77 | 78 | #: ../../source/index.rst:9 79 | msgid "" 80 | "PySUS is a library which is a collection of helper codes for people which" 81 | " need to analyze data from SUS (Brazilian Universal Health System). " 82 | "Contributions are welcome!" 83 | msgstr "" 84 | "PySUS é uma biblioteca que consiste em uma coleção de códigos auxiliares para " 85 | "pessoas que precisam analisar dados do SUS (Sistema Único de Saúde). " 86 | "Contribuições são bem-vindas!" 87 | 88 | #: ../../source/index.rst:12 89 | msgid "Contents:" 90 | msgstr "Conteúdo:" 91 | 92 | #: ../../source/index.rst:35 93 | msgid "Indices and tables" 94 | msgstr "Tabelas e índices" 95 | 96 | #: ../../source/index.rst:37 97 | msgid ":ref:`genindex`" 98 | msgstr ":ref:`genindex`" 99 | 100 | #: ../../source/index.rst:38 101 | msgid ":ref:`modindex`" 102 | msgstr ":ref:`modindex`" 103 | 104 | #: ../../source/index.rst:39 105 | msgid ":ref:`search`" 106 | msgstr ":ref:`search`" 107 | -------------------------------------------------------------------------------- /pysus/data/local.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path, PurePosixPath, PureWindowsPath 3 | from typing import Dict, List, Union 4 | 5 | import pandas as pd 6 | from loguru import logger 7 | from pysus.data import dbc_to_dbf, dbf_to_parquet, parse_dftypes 8 | 9 | 10 | class ParquetSet: 11 | """ 12 | A local parquet directory or file 13 | """ 14 | 15 | __path__: Union[PurePosixPath, PureWindowsPath] 16 | info: Dict 17 | 18 | def __init__(self, path: str, _pbar=None) -> None: 19 | info = {} 20 | path = Path(path) 21 | 22 | if path.suffix.lower() not in [".parquet", ".dbc", ".dbf"]: 23 | raise NotImplementedError(f"Unknown file type: {path.suffix}") 24 | 25 | if path.suffix.lower() == ".dbc": 26 | path = Path(dbc_to_dbf(path, _pbar=_pbar)) 27 | 28 | if path.suffix.lower() == ".dbf": 29 | path = Path(dbf_to_parquet(path, _pbar=_pbar)) 30 | 31 | if path.is_dir(): 32 | info["size"] = sum( 33 | f.stat().st_size for f in path.glob("**/*") if f.is_file() 34 | ) 35 | else: 36 | info["size"] = os.path.getsize(str(path)) 37 | 38 | self.__path__ = path 39 | self.info = info 40 | 41 | def __str__(self): 42 | return str(self.__path__) 43 | 44 | def __repr__(self): 45 | return str(self.__path__) 46 | 47 | def __hash__(self): 48 | return hash(str(self.__path__)) 49 | 50 | @property 51 | def path(self) -> str: 52 | return str(self.__path__) 53 | 54 | def to_dataframe(self) -> pd.DataFrame: 55 | """ 56 | Read ParquetSet file(s) into a Pandas DataFrame, concatenating the 57 | parquets into a single dataframe 58 | """ 59 | parquets = list(map(str, self.__path__.glob("*.parquet"))) 60 | chunks_list = [ 61 | pd.read_parquet(str(f), engine="fastparquet") for f in parquets 62 | ] 63 | _df = pd.concat(chunks_list, ignore_index=True) 64 | return parse_dftypes(_df) 65 | 66 | 67 | def parse_data_content( 68 | path: Union[List[str], str], _pbar=None 69 | ) -> Union[ParquetSet, List[ParquetSet]]: 70 | if isinstance(path, str): 71 | path = [path] 72 | else: 73 | path = list(path) 74 | 75 | content = [] 76 | for _path in path: 77 | data_path = Path(_path) 78 | 79 | if not data_path.exists(): 80 | continue 81 | 82 | if data_path.suffix.lower() in [".dbc", ".dbf", ".parquet"]: 83 | content.append(ParquetSet(str(data_path), _pbar=_pbar)) 84 | elif data_path.suffix.lower() == ".zip": 85 | content.append(str(data_path)) 86 | else: 87 | continue 88 | 89 | if not content: 90 | logger.warning("path must be absolute") 91 | 92 | if len(content) == 1: 93 | return content[0] 94 | return content 95 | 96 | 97 | class Data: 98 | """ 99 | A class parser. Receives an (or a list of) absolute path(s) and returns 100 | the corresponding ParquetSet instances. 101 | """ 102 | 103 | def __new__( 104 | cls, path: Union[List[str], str], _pbar=None 105 | ) -> Union[ParquetSet, List[ParquetSet]]: 106 | return parse_data_content(path, _pbar=_pbar) 107 | -------------------------------------------------------------------------------- /pysus/online_data/ESUS.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import date 3 | 4 | import pandas as pd 5 | from elasticsearch import Elasticsearch, helpers 6 | from loguru import logger 7 | from pysus.ftp import CACHEPATH 8 | 9 | 10 | def download(uf, cache=True, checkmemory=True): 11 | """ 12 | Download ESUS data by UF 13 | :param uf: rj, mg, etc 14 | :param cache: if results should be cached on disk 15 | :return: DataFrame if data fits in memory, 16 | other an iterator of chunks of size 1000. 17 | """ 18 | uf = uf.lower() 19 | user = "user-public-notificacoes" 20 | pwd = "Za4qNXdyQNSa9YaA" 21 | today = date.today() 22 | dt = today.strftime("_%d_%m_%Y") 23 | base = f"desc-esus-notifica-estado-{uf}" # desc-notificacoes-esusve- 24 | url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br" # noqa: E231 25 | out = f"ESUS_{uf}_{dt}.parquet" 26 | 27 | cachefile = os.path.join(CACHEPATH, out) 28 | tempfile = os.path.join(CACHEPATH, f"ESUS_temp_{uf.upper()}.csv.gz") 29 | if os.path.exists(cachefile): 30 | logger.info(f"Local parquet file found at {cachefile}") 31 | df = pd.read_parquet(cachefile) 32 | elif os.path.exists(tempfile): 33 | logger.info(f"Local csv file found at {tempfile}") 34 | df = pd.read_csv(tempfile, chunksize=1000) 35 | else: 36 | fname = fetch(base, uf, url) 37 | size = os.stat(fname).st_size 38 | if size > 50e6 and checkmemory: 39 | print(f"Downloaded data is to large: {size / 1e6} MB compressed.") 40 | print( 41 | "Only loading the first 1000 rows. If your computer has enough" 42 | + " memory, set 'checkmemory' to False" 43 | ) 44 | print(f"The full data is in {fname}") 45 | df = pd.read_csv(fname, chunksize=1000) 46 | else: 47 | df = pd.read_csv(fname, low_memory=False) 48 | print(f"{df.shape[0]} records downloaded.") 49 | os.unlink(fname) 50 | if cache: 51 | df.to_parquet(cachefile) 52 | logger.info(f"Data stored as parquet at {cachefile}") 53 | 54 | return df 55 | 56 | 57 | def fetch(base, uf, url): 58 | UF = uf.upper() 59 | print(f"Reading ESUS data for {UF}") 60 | es = Elasticsearch([url], send_get_body_as="POST") 61 | body = {"query": {"match_all": {}}} 62 | results = helpers.scan(es, query=body, index=base) 63 | # df = pd.DataFrame.from_dict( 64 | # [document['_source'] for document in results] 65 | # ) 66 | 67 | chunker = chunky_fetch(results, 3000) 68 | h = 1 69 | tempfile = os.path.join(CACHEPATH, f"ESUS_temp_{UF}.csv.gz") 70 | for ch in chunker: 71 | df = pd.DataFrame.from_dict(ch) 72 | df.sintomas = df["sintomas"].str.replace( 73 | ";", 74 | "", 75 | ) # remove os ; 76 | if h: 77 | df.to_csv(tempfile) 78 | h = 0 79 | else: 80 | df.to_csv(tempfile, mode="a", header=False) 81 | # df = pd.read_csv('temp.csv.gz') 82 | 83 | return tempfile 84 | 85 | 86 | def chunky_fetch(results, chunk_size=3000): 87 | """Fetches data in chunks to preserve memory""" 88 | data = [] 89 | i = 0 90 | for d in results: 91 | data.append(d["_source"]) 92 | i += 1 93 | if i == chunk_size: 94 | yield data 95 | data = [] 96 | i = 0 97 | else: 98 | yield data 99 | -------------------------------------------------------------------------------- /pysus/online_data/SIA.py: -------------------------------------------------------------------------------- 1 | """ 2 | Downloads SIA data from Datasus FTP server 3 | Created on 21/09/18 4 | by fccoelho 5 | Modified on 22/11/22 6 | by bcbernardo 7 | license: GPL V3 or Later 8 | """ 9 | from pprint import pprint 10 | from typing import Dict, Tuple, Union 11 | 12 | from loguru import logger 13 | from pysus.ftp import CACHEPATH 14 | from pysus.ftp.databases.sia import SIA 15 | from pysus.ftp.utils import parse_UFs 16 | 17 | sia = SIA().load() 18 | 19 | 20 | group_dict: Dict[str, Tuple[str, int, int]] = { 21 | "PA": ("Produção Ambulatorial", 7, 1994), 22 | "BI": ("Boletim de Produção Ambulatorial individualizado", 1, 2008), 23 | "AD": ("APAC de Laudos Diversos", 1, 2008), 24 | "AM": ("APAC de Medicamentos", 1, 2008), 25 | "AN": ("APAC de Nefrologia", 1, 2008), 26 | "AQ": ("APAC de Quimioterapia", 1, 2008), 27 | "AR": ("APAC de Radioterapia", 1, 2008), 28 | "AB": ("APAC de Cirurgia Bariátrica", 1, 2008), 29 | "ACF": ("APAC de Confecção de Fístula", 1, 2008), 30 | "ATD": ("APAC de Tratamento Dialítico", 1, 2008), 31 | "AMP": ("APAC de Acompanhamento Multiprofissional", 1, 2008), 32 | "SAD": ("RAAS de Atenção Domiciliar", 1, 2008), 33 | "PS": ("RAAS Psicossocial", 1, 2008), 34 | } 35 | 36 | 37 | def get_available_years( 38 | group: str, 39 | states: Union[str, list] = None, 40 | ): 41 | """ 42 | Get SIA years for group and/or state and returns a list of years 43 | :param group: 44 | PA: Produção Ambulatorial (7, 1994) 45 | BI: Boletim de Produção Ambulatorial individualizado (1, 2008) 46 | AD: APAC de Laudos Diversos (1, 2008) 47 | AM: APAC de Medicamentos (1, 2008) 48 | AN: APAC de Nefrologia (1, 2008) 49 | AQ: APAC de Quimioterapia (1, 2008) 50 | AR: APAC de Radioterapia (1, 2008) 51 | AB: APAC de Cirurgia Bariátrica (1, 2008) 52 | ACF: APAC de Confecção de Fístula (1, 2008) 53 | ATD: APAC de Tratamento Dialítico (1, 2008) 54 | AMP: APAC de Acompanhamento Multiprofissional (1, 2008) 55 | SAD: RAAS de Atenção Domiciliar (1, 2008) 56 | PS: RAAS Psicossocial (1, 2008) 57 | :param states: 2 letter state code, can be a list of UFs 58 | """ 59 | ufs = parse_UFs(states) 60 | 61 | years = dict() 62 | for uf in ufs: 63 | files = sia.get_files(group, uf=uf) 64 | years[uf] = set(sorted([sia.describe(f)["year"] for f in files])) 65 | 66 | if len(set([len(v) for v in years.values()])) > 1: 67 | logger.warning(f"Distinct years were found for UFs: {years}") 68 | 69 | return sorted(list(set.intersection(*map(set, years.values())))) 70 | 71 | 72 | def show_datatypes(): 73 | pprint(group_dict) 74 | 75 | 76 | def download( 77 | states: Union[str, list], 78 | years: Union[str, list, int], 79 | months: Union[str, list, int], 80 | groups: Union[str, list], 81 | data_dir: str = CACHEPATH, 82 | ) -> list: 83 | """ 84 | Download SIASUS records for state year and month and returns dataframe 85 | :param states: 2 letter state code, can be a list 86 | :param years: 4 digit integer, can be a list 87 | :param months: 1 to 12, can be a list 88 | :param data_dir: whether to cache files locally. default is True 89 | :param group: SIA groups. For all groups, refer to `sia.groups` 90 | :return: list of downloaded ParquetData 91 | """ 92 | files = sia.get_files(group=groups, uf=states, year=years, month=months) 93 | return sia.download(files, local_dir=data_dir) 94 | -------------------------------------------------------------------------------- /pysus/tests/test_data/test_Infodengue.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pandas as pd 4 | import pytest 5 | from pysus.online_data.Infodengue import download, normalize, search_string 6 | 7 | 8 | class InfoDengueTestCase(unittest.TestCase): 9 | @pytest.mark.timeout(5) 10 | def test_search_string(self): 11 | get_from_dict = search_string("Curitiba") 12 | cites_mathes = { 13 | "Acajutiba": 2900306, 14 | "Aratiba": 4300901, 15 | "Bacurituba": 2101350, 16 | "Buriti": 2102200, 17 | "Buriti Bravo": 2102309, 18 | "Buritirama": 2904753, 19 | "Buritirana": 2102358, 20 | "Buritis": 3109303, 21 | "Buritizal": 3508207, 22 | "Caatiba": 2904803, 23 | "Caraíbas": 2906899, 24 | "Carnaíba": 2603900, 25 | "Caturité": 2504355, 26 | "Craíbas": 2702355, 27 | "Criciúma": 4204608, 28 | "Cristais": 3120201, 29 | "Cristal": 4306056, 30 | "Cristina": 3120508, 31 | "Cromínia": 5206503, 32 | "Cruzília": 3120805, 33 | "Cuiabá": 5103403, 34 | "Cuitegi": 2505204, 35 | "Curimatá": 2203206, 36 | "Curitiba": 4106902, 37 | "Curitibanos": 4204806, 38 | "Curiúva": 4107009, 39 | "Custódia": 2605103, 40 | "Cutias": 1600212, 41 | "Duartina": 3514502, 42 | "Guaraíta": 5209291, 43 | "Guariba": 3518602, 44 | "Guaribas": 2204550, 45 | "Ibatiba": 3202454, 46 | "Ibicuitinga": 2305332, 47 | "Irituia": 1503507, 48 | "Itagibá": 2915205, 49 | "Itaituba": 1503606, 50 | "Itaiçaba": 2306207, 51 | "Itatiba": 3523404, 52 | "Itaíba": 2607505, 53 | "Itiúba": 2917003, 54 | "Jequitibá": 3135704, 55 | "Juquitiba": 3526209, 56 | "Marituba": 1504422, 57 | "Mauriti": 2308104, 58 | "Mucurici": 3203601, 59 | "Muribeca": 2804300, 60 | "Muritiba": 2922300, 61 | "Peritiba": 4212601, 62 | "Piritiba": 2924801, 63 | "Taquarituba": 3553807, 64 | "Tumiritinga": 3169505, 65 | "Turiúba": 3555208, 66 | "Umburatiba": 3170305, 67 | "Urucurituba": 1304401, 68 | } 69 | pattern_city_names = search_string(substr="r de jAiro") 70 | 71 | self.assertIsInstance(get_from_dict, dict) 72 | self.assertEqual(cites_mathes, get_from_dict) 73 | self.assertIn("Rio de Janeiro", pattern_city_names.keys()) 74 | self.assertIn(4204806, get_from_dict.values()) 75 | 76 | @pytest.mark.timeout(5) 77 | def test_normalize(self): 78 | normalized_str = normalize("Rio das Ostras") 79 | 80 | substr_list = normalized_str.split(".") 81 | 82 | self.assertIsInstance(substr_list, list) 83 | # self.assertEqual(substr_list, ['rio', 'das', 'ostras']) 84 | self.assertEqual(normalized_str, "rio das ostras") 85 | 86 | @pytest.mark.timeout(5) 87 | def test_download(self): 88 | df = download( 89 | "dengue", 90 | 202129, 91 | 202152, 92 | "Rio de Janeiro", 93 | ) 94 | df_size = (29, 24) 95 | 96 | self.assertIsInstance(df, pd.DataFrame) 97 | self.assertGreater(len(df), 0) 98 | self.assertEqual(df_size, df.shape) 99 | 100 | 101 | if __name__ == "__main__": 102 | unittest.main() 103 | -------------------------------------------------------------------------------- /pysus/online_data/Infodengue.py: -------------------------------------------------------------------------------- 1 | import json 2 | import string 3 | from difflib import get_close_matches 4 | from pathlib import Path 5 | from typing import Dict 6 | 7 | import pandas as pd 8 | import unidecode 9 | 10 | # from loguru import logger 11 | 12 | APP_DIR = Path(__file__).resolve(strict=True).parent.parent 13 | CID10 = {"dengue": "A90", "chikungunya": "A92.0", "zika": "A928"} 14 | 15 | with open(APP_DIR / "dataset/geocode_by_cities.json", "r") as f: 16 | geocode_by_cities = json.load(f) 17 | 18 | 19 | def normalize(s): 20 | for p in string.punctuation: 21 | s = s.replace(p, "") 22 | 23 | return unidecode.unidecode(s.lower().strip()) 24 | 25 | 26 | def search_string(substr: str) -> Dict[str, int]: 27 | """ 28 | Fetch geocode of the city name matching to the substring. 29 | 30 | Parameters 31 | ---------- 32 | substr: Part of city name 33 | Returns 34 | ------- 35 | dict: Dictionary with key and values 36 | with city name and IBGE codes of all municipalities in Brazil 37 | """ 38 | normalized_list = [normalize(f) for f in list(geocode_by_cities.keys())] 39 | 40 | matching_cities = [ 41 | get_close_matches(i, normalized_list, n=55) 42 | for i in normalize(substr).split(".") 43 | ] 44 | 45 | return { 46 | key: geocode_by_cities[key] 47 | for key in geocode_by_cities 48 | if normalize(key) in list(*matching_cities) 49 | } 50 | 51 | 52 | def download( 53 | disease: str, 54 | eyw_start: int, 55 | eyw_end: int, 56 | city_name: str, 57 | format="csv", 58 | ) -> pd.DataFrame: 59 | """ 60 | Download InfoDengue API data by municipality and disease 61 | in the epidemiological week. 62 | 63 | Parameters 64 | ---------- 65 | disease: Names of the diseases available in the InfoDengue System: 66 | dengue|chikungunya|zika 67 | eyw_start: Epidemiological week start 68 | eyw_end: Epidemiological week end 69 | city_name: Name of the municipalities of Brazil 70 | format="csv": Default data visualization format for the endpoint 71 | Returns 72 | ------- 73 | pd: Pandas dataframe 74 | """ 75 | 76 | geocode = geocode_by_cities.get(city_name) 77 | 78 | if disease not in CID10.keys(): 79 | raise Exception( 80 | f"The diseases available are: {[k for k in CID10.keys()]}" 81 | ) 82 | elif len(str(eyw_start)) != 6 or len(str(eyw_end)) != 6: 83 | raise Exception( 84 | "The epidemiological week must contain 6 digits, " 85 | "started in the year 2010 until 2022. Example: 202248" 86 | ) 87 | elif geocode is None: 88 | list_of_cities = search_string(city_name) 89 | print(f"You must choose one of these city names: {list_of_cities}") 90 | else: 91 | s_yw = str(eyw_start) 92 | e_yw = str(eyw_end) 93 | ew_start, ey_start = s_yw[-2:], s_yw[:4] 94 | ew_end, ey_end = e_yw[-2:], e_yw[:4] 95 | url = "https://info.dengue.mat.br/api/alertcity" 96 | params = ( 97 | "&disease=" 98 | + f"{disease}" 99 | + "&geocode=" 100 | + f"{geocode}" 101 | + "&format=" 102 | + f"{format}" 103 | + "&ew_start=" 104 | + f"{ew_start}" 105 | + "&ew_end=" 106 | + f"{ew_end}" 107 | + "&ey_start=" 108 | + f"{ey_start}" 109 | + "&ey_end=" 110 | + f"{ey_end}" 111 | ) 112 | 113 | url_resp = "?".join([url, params]) 114 | return pd.read_csv(url_resp, index_col="SE").T 115 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/Infodengue.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Infodengue.ipynb:9 23 | msgid "Working with Infodengue datasets" 24 | msgstr "Trabalhando com conjuntos de dados do Infodengue" 25 | 26 | #: ../../source/Infodengue.ipynb:11 27 | msgid "" 28 | "`InfoDengue `__ is an alert system designed " 29 | "to track arboviruses using a unique hybrid data approach that integrates " 30 | "social web data with climatic and epidemiological data. In this tutorial," 31 | " we will walk through the process of using InfoDengue's API with Python " 32 | "to fetch up-to-date arbovirus data." 33 | msgstr "" 34 | "O `InfoDengue `__ é um sistema de alerta projetado " 35 | "para rastrear arbovírus usando uma abordagem de dados híbridos exclusiva que " 36 | "integra dados de mídias sociais com dados climáticos e epidemiológicos. Neste " 37 | "tutorial, iremos percorrer o processo de uso da API do InfoDengue com Python para " 38 | "buscar dados atualizados sobre arbovírus." 39 | 40 | #: ../../source/Infodengue.ipynb:32 41 | msgid "" 42 | "Infodengue is a national-wide system, use the ``search_string`` method to" 43 | " check how the city is found in the API:" 44 | msgstr "" 45 | "Infodengue é um sistema nacional, use o método `search_string` para verificar " 46 | "como o município é encontrado na API:" 47 | 48 | #: ../../source/Infodengue.ipynb:135 49 | msgid "" 50 | "The download method extracts data for a specified range of " 51 | "Epidemiological Weeks (SE in pt) in the format ``YYYYWW``. The output is " 52 | "a Pandas DataFrame containing all the EWs within this range." 53 | msgstr "" 54 | "O método de download extrai dados para um intervalo específico de semanas " 55 | "epidemiológicas no formato `YYYYWW`. A saída é um DataFrame do Pandas " 56 | "contendo todas as semanas epidemiológicas dentro deste intervalo." 57 | 58 | #: ../../source/Infodengue.ipynb:614 59 | msgid "You can save the dataframe in a CSV file" 60 | msgstr "Você pode salvar o dataframe em um arquivo CSV" 61 | 62 | #: ../../source/Infodengue.ipynb:635 63 | msgid "" 64 | "In order to fetch data with different parameters, it is possible to " 65 | "iterate over a list, for instance:" 66 | msgstr "" 67 | "Para buscar dados com diferentes parâmetros, é possível iterar sobre " 68 | "uma lista, por exemplo:" 69 | 70 | #: ../../source/Infodengue.ipynb:663 71 | msgid "Expected files:" 72 | msgstr "Arquivos esperados:" 73 | 74 | #: ../../source/Infodengue.ipynb:665 75 | msgid "dengue_rio_de_janeiro_se01_04.csv" 76 | msgstr "dengue_rio_de_janeiro_se01_04.csv" 77 | 78 | #: ../../source/Infodengue.ipynb:666 79 | msgid "dengue_rio_do_antônio_se01_04.csv" 80 | msgstr "dengue_rio_do_antônio_se01_04.csv" 81 | 82 | #: ../../source/Infodengue.ipynb:667 83 | msgid "dengue_rio_do_pires_se01_04.csv" 84 | msgstr "dengue_rio_do_pires_se01_04.csv" 85 | 86 | #: ../../source/Infodengue.ipynb:668 87 | msgid "zika_rio_de_janeiro_se01_04.csv" 88 | msgstr "zika_rio_de_janeiro_se01_04.csv" 89 | 90 | #: ../../source/Infodengue.ipynb:669 91 | msgid "zika_rio_do_antônio_se01_04.csv" 92 | msgstr "zika_rio_do_antônio_se01_04.csv" 93 | 94 | #: ../../source/Infodengue.ipynb:670 95 | msgid "zika_rio_do_pires_se01_04.csv" 96 | msgstr "zika_rio_do_pires_se01_04.csv" 97 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/Infodengue.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/Infodengue.ipynb:9 23 | msgid "Working with Infodengue datasets" 24 | msgstr "Trabalhando com conjuntos de dados do Infodengue" 25 | 26 | #: ../../source/Infodengue.ipynb:11 27 | msgid "" 28 | "`InfoDengue `__ is an alert system designed " 29 | "to track arboviruses using a unique hybrid data approach that integrates " 30 | "social web data with climatic and epidemiological data. In this tutorial," 31 | " we will walk through the process of using InfoDengue's API with Python " 32 | "to fetch up-to-date arbovirus data." 33 | msgstr "" 34 | "O `InfoDengue `__ é um sistema de alerta projetado " 35 | "para rastrear arbovírus usando uma abordagem de dados híbridos exclusiva que " 36 | "integra dados de mídias sociais com dados climáticos e epidemiológicos. Neste " 37 | "tutorial, iremos percorrer o processo de uso da API do InfoDengue com Python para " 38 | "buscar dados atualizados sobre arbovírus." 39 | 40 | #: ../../source/Infodengue.ipynb:32 41 | msgid "" 42 | "Infodengue is a national-wide system, use the ``search_string`` method to" 43 | " check how the city is found in the API:" 44 | msgstr "" 45 | "Infodengue é um sistema nacional, use o método `search_string` para verificar " 46 | "como o município é encontrado na API:" 47 | 48 | #: ../../source/Infodengue.ipynb:135 49 | msgid "" 50 | "The download method extracts data for a specified range of " 51 | "Epidemiological Weeks (SE in pt) in the format ``YYYYWW``. The output is " 52 | "a Pandas DataFrame containing all the EWs within this range." 53 | msgstr "" 54 | "O método de download extrai dados para um intervalo específico de semanas " 55 | "epidemiológicas no formato `YYYYWW`. A saída é um DataFrame do Pandas " 56 | "contendo todas as semanas epidemiológicas dentro deste intervalo." 57 | 58 | #: ../../source/Infodengue.ipynb:614 59 | msgid "You can save the dataframe in a CSV file" 60 | msgstr "Você pode salvar o dataframe em um arquivo CSV" 61 | 62 | #: ../../source/Infodengue.ipynb:635 63 | msgid "" 64 | "In order to fetch data with different parameters, it is possible to " 65 | "iterate over a list, for instance:" 66 | msgstr "" 67 | "Para buscar dados com diferentes parâmetros, é possível iterar sobre " 68 | "uma lista, por exemplo:" 69 | 70 | #: ../../source/Infodengue.ipynb:663 71 | msgid "Expected files:" 72 | msgstr "Arquivos esperados:" 73 | 74 | #: ../../source/Infodengue.ipynb:665 75 | msgid "dengue_rio_de_janeiro_se01_04.csv" 76 | msgstr "dengue_rio_de_janeiro_se01_04.csv" 77 | 78 | #: ../../source/Infodengue.ipynb:666 79 | msgid "dengue_rio_do_antônio_se01_04.csv" 80 | msgstr "dengue_rio_do_antônio_se01_04.csv" 81 | 82 | #: ../../source/Infodengue.ipynb:667 83 | msgid "dengue_rio_do_pires_se01_04.csv" 84 | msgstr "dengue_rio_do_pires_se01_04.csv" 85 | 86 | #: ../../source/Infodengue.ipynb:668 87 | msgid "zika_rio_de_janeiro_se01_04.csv" 88 | msgstr "zika_rio_de_janeiro_se01_04.csv" 89 | 90 | #: ../../source/Infodengue.ipynb:669 91 | msgid "zika_rio_do_antônio_se01_04.csv" 92 | msgstr "zika_rio_do_antônio_se01_04.csv" 93 | 94 | #: ../../source/Infodengue.ipynb:670 95 | msgid "zika_rio_do_pires_se01_04.csv" 96 | msgstr "zika_rio_do_pires_se01_04.csv" 97 | -------------------------------------------------------------------------------- /docs/source/kepler_config.json: -------------------------------------------------------------------------------- 1 | {"version": "v1", "config": {"visState": {"filters": [], "layers": [{"id": "ydiyslk", "type": "point", "config": {"dataId": "Casos de Zika em 2016", "label": "Casos de Zika", "color": [183, 136, 94], "columns": {"lat": "latitude", "lng": "longitude", "altitude": null}, "isVisible": true, "visConfig": {"radius": 10, "fixedRadius": false, "opacity": 0.8, "outline": false, "thickness": 2, "strokeColor": null, "colorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "strokeColorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "radiusRange": [1, 40], "filled": true}, "textLabel": [{"field": null, "color": [255, 255, 255], "size": 18, "offset": [0, 0], "anchor": "start", "alignment": "center"}]}, "visualChannels": {"colorField": null, "colorScale": "quantile", "strokeColorField": null, "strokeColorScale": "quantile", "sizeField": null, "sizeScale": "linear"}}, {"id": "icdqzjm", "type": "point", "config": {"dataId": "Casos de Dengue em 2016", "label": "Casos de Dengue", "color": [32, 103, 172], "columns": {"lat": "latitude", "lng": "longitude", "altitude": null}, "isVisible": true, "visConfig": {"radius": 10, "fixedRadius": false, "opacity": 0.8, "outline": false, "thickness": 2, "strokeColor": null, "colorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "strokeColorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "radiusRange": [0, 50], "filled": true}, "textLabel": [{"field": null, "color": [255, 255, 255], "size": 18, "offset": [0, 0], "anchor": "start", "alignment": "center"}]}, "visualChannels": {"colorField": null, "colorScale": "quantile", "strokeColorField": null, "strokeColorScale": "quantile", "sizeField": null, "sizeScale": "linear"}}, {"id": "u28x356", "type": "point", "config": {"dataId": "Casos de Chikungunya em 2016", "label": "Casos de Chikungunia", "color": [125, 194, 64], "columns": {"lat": "latitude", "lng": "longitude", "altitude": null}, "isVisible": true, "visConfig": {"radius": 10, "fixedRadius": false, "opacity": 0.8, "outline": false, "thickness": 2, "strokeColor": null, "colorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "strokeColorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "radiusRange": [0, 50], "filled": true}, "textLabel": [{"field": null, "color": [255, 255, 255], "size": 18, "offset": [0, 0], "anchor": "start", "alignment": "center"}]}, "visualChannels": {"colorField": null, "colorScale": "quantile", "strokeColorField": null, "strokeColorScale": "quantile", "sizeField": null, "sizeScale": "linear"}}], "interactionConfig": {"tooltip": {"fieldsToShow": {"Casos de Zika em 2016": ["ID_AGRAVO", "SEM_NOT", "NU_ANO", "ID_MUNICIP", "DT_SIN_PRI"], "Casos de Dengue em 2016": ["ID_AGRAVO", "SEM_NOT", "NU_ANO", "ID_MUNICIP", "DT_SIN_PRI"], "Casos de Chikungunya em 2016": ["ID_AGRAVO", "SEM_NOT", "NU_ANO", "ID_MUNICIP", "DT_SIN_PRI"]}, "enabled": true}, "brush": {"size": 0.5, "enabled": false}}, "layerBlending": "normal", "splitMaps": [], "animationConfig": {"currentTime": null, "speed": 1}}, "mapState": {"bearing": 24, "dragRotate": true, "latitude": -22.802329589865103, "longitude": -43.383896341999, "pitch": 50, "zoom": 9.379836309981588, "isSplit": false}, "mapStyle": {"styleType": "dark", "topLayerGroups": {}, "visibleLayerGroups": {"label": true, "road": true, "border": false, "building": true, "water": true, "land": true, "3d building": false}, "threeDBuildingColor": [9.665468314072013, 17.18305478057247, 31.1442867897876], "mapStyles": {}}}} 2 | -------------------------------------------------------------------------------- /pysus/online_data/CNES.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from loguru import logger 4 | from pysus.ftp import CACHEPATH 5 | from pysus.ftp.databases.cnes import CNES 6 | from pysus.ftp.utils import parse_UFs 7 | 8 | cnes = CNES().load() 9 | 10 | 11 | group_dict = { 12 | "LT": ["Leitos - A partir de Out/2005", 10, 2005], 13 | "ST": ["Estabelecimentos - A partir de Ago/2005", 8, 2005], 14 | "DC": ["Dados Complementares - A partir de Ago/2005", 8, 2005], 15 | "EQ": ["Equipamentos - A partir de Ago/2005", 8, 2005], 16 | "SR": ["Serviço Especializado - A partir de Ago/2005", 8, 2005], 17 | "HB": ["Habilitação - A partir de Mar/2007", 3, 2007], 18 | "PF": ["Profissional - A partir de Ago/2005", 8, 2005], 19 | "EP": ["Equipes - A partir de Abr/2007", 5, 2007], 20 | "IN": ["Incentivos - A partir de Nov/2007", 11, 2007], 21 | "RC": ["Regra Contratual - A partir de Mar/2007", 3, 2007], 22 | "EE": ["Estabelecimento de Ensino - A partir de Mar/2007", 3, 2007], 23 | "EF": ["Estabelecimento Filantrópico - A partir de Mar/2007", 3, 2007], 24 | "GM": ["Gestão e Metas - A partir de Jun/2007", 6, 2007], 25 | } 26 | 27 | 28 | def get_available_years( 29 | group: str, 30 | states: Union[str, list] = None, 31 | ): 32 | """ 33 | Get CNES years for group and/or state and returns a 34 | list of years 35 | :param group: 36 | LT – Leitos - A partir de Out/2005 37 | ST – Estabelecimentos - A partir de Ago/2005 38 | DC - Dados Complementares - A partir de Ago/2005 39 | EQ – Equipamentos - A partir de Ago/2005 40 | SR - Serviço Especializado - A partir de Ago/2005 41 | HB – Habilitação - A partir de Mar/2007 42 | PF – Profissional - A partir de Ago/2005 43 | EP – Equipes - A partir de Abr/2007 44 | IN – Incentivos - A partir de Nov/2007 45 | RC - Regra Contratual - A partir de Mar/2007 46 | EE - Estabelecimento de Ensino - A partir de Mar/2007 47 | EF - Estabelecimento Filantrópico - A partir de Mar/2007 48 | GM - Gestão e Metas - A partir de Jun/2007 49 | :param states: 2 letter state code, can be a list of UFs 50 | """ 51 | cnes.load(group) 52 | 53 | ufs = parse_UFs(states) 54 | 55 | years = dict() 56 | for uf in ufs: 57 | files = cnes.get_files(group, uf=uf) 58 | years[uf] = sorted([cnes.describe(f)["year"] for f in files]) 59 | 60 | if len(set([len(v) for v in years.values()])) > 1: 61 | logger.warning(f"Distinct years were found for UFs: {years}") 62 | 63 | return sorted(list(set.intersection(*map(set, years.values())))) 64 | 65 | 66 | def download( 67 | group: str, 68 | states: Union[str, list], 69 | years: Union[str, list, int], 70 | months: Union[str, list, int], 71 | data_dir: str = CACHEPATH, 72 | ) -> list: 73 | """ 74 | Download CNES records for group, state, year and month and returns a 75 | list of local parquet files 76 | :param group: 77 | LT – Leitos - A partir de Out/2005 78 | ST – Estabelecimentos - A partir de Ago/2005 79 | DC - Dados Complementares - A partir de Ago/2005 80 | EQ – Equipamentos - A partir de Ago/2005 81 | SR - Serviço Especializado - A partir de Ago/2005 82 | HB – Habilitação - A partir de Mar/2007 83 | PF – Profissional - A partir de Ago/2005 84 | EP – Equipes - A partir de Abr/2007 85 | IN – Incentivos - A partir de Nov/2007 86 | RC - Regra Contratual - A partir de Mar/2007 87 | EE - Estabelecimento de Ensino - A partir de Mar/2007 88 | EF - Estabelecimento Filantrópico - A partir de Mar/2007 89 | GM - Gestão e Metas - A partir de Jun/2007 90 | :param months: 1 to 12, can be a list of years 91 | :param states: 2 letter state code, can be a list of UFs 92 | :param years: 4 digit integer, can be a list of years 93 | """ 94 | files = cnes.get_files(group, states, years, months) 95 | return cnes.download(files, local_dir=data_dir) 96 | -------------------------------------------------------------------------------- /pysus/ftp/databases/pni.py: -------------------------------------------------------------------------------- 1 | __all__ = ["PNI"] 2 | 3 | from typing import List, Literal, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year 7 | 8 | 9 | class PNI(Database): 10 | name = "PNI" 11 | paths = (Directory("/dissemin/publicos/PNI/DADOS"),) 12 | metadata = { 13 | "long_name": ( 14 | "Sistema de Informações do Programa Nacional de Imunizações" 15 | ), 16 | "source": ( 17 | "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/", # noqa 18 | "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/", # noqa 19 | ), 20 | "description": ( 21 | "O SI-PNI é um sistema desenvolvido para possibilitar aos " 22 | "gestores envolvidos no Programa Nacional de Imunização, a " 23 | "avaliação dinâmica do risco quanto à ocorrência de surtos ou " 24 | "epidemias, a partir do registro dos imunobiológicos aplicados e " 25 | "do quantitativo populacional vacinado, agregados por faixa " 26 | "etária, período de tempo e área geográfica. Possibilita também " 27 | "o controle do estoque de imunobiológicos necessário aos " 28 | "administradores que têm a incumbência de programar sua aquisição " 29 | "e distribuição. Controla as indicações de aplicação de " 30 | "vacinas de imunobiológicos especiais e seus eventos adversos, " 31 | "dentro dos Centros de Referências em imunobiológicos especiais." 32 | ), 33 | } 34 | groups = { 35 | "CPNI": "Cobertura Vacinal", # TODO: may be incorrect 36 | "DPNI": "Doses Aplicadas", # TODO: may be incorrect 37 | } 38 | 39 | def describe(self, file: File) -> dict: 40 | if file.extension.upper() in [".DBC", ".DBF"]: 41 | group, _uf, year = self.format(file) 42 | 43 | try: 44 | uf = UFs[_uf] 45 | except KeyError: 46 | uf = _uf 47 | 48 | description = { 49 | "name": file.basename, 50 | "group": self.groups[group], 51 | "uf": uf, 52 | "year": zfill_year(year), 53 | "size": file.info["size"], 54 | "last_update": file.info["modify"], 55 | } 56 | 57 | return description 58 | return {} 59 | 60 | def format(self, file: File) -> tuple: 61 | 62 | if len(file.name) != 8: 63 | raise ValueError(f"Can't format {file.name}") 64 | 65 | n = file.name 66 | group, _uf, year = n[:4], n[4:6], n[-2:] 67 | return group, _uf, zfill_year(year) 68 | 69 | def get_files( 70 | self, 71 | group: Union[list, Literal["CNPI", "DPNI"]], 72 | uf: Optional[Union[List[str], str]] = None, 73 | year: Optional[Union[list, str, int]] = None, 74 | ) -> List[File]: 75 | files = list( 76 | filter( 77 | lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files 78 | ) 79 | ) 80 | 81 | groups = [gr.upper() for gr in to_list(group)] 82 | 83 | if not all(gr in list(self.groups) for gr in groups): 84 | raise ValueError( 85 | "Unknown PNI Group(s): " 86 | f"{set(groups).difference(list(self.groups))}" 87 | ) 88 | 89 | files = list(filter(lambda f: self.format(f)[0] in groups, files)) 90 | 91 | if uf: 92 | ufs = parse_UFs(uf) 93 | files = list(filter(lambda f: self.format(f)[1] in ufs, files)) 94 | 95 | if year or str(year) in ["0", "00"]: 96 | years = [zfill_year(str(m)[-2:]) for m in to_list(year)] 97 | files = list(filter(lambda f: self.format(f)[2] in years, files)) 98 | 99 | return files 100 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/IBGE_data.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/IBGE_data.ipynb:9 23 | msgid "Obtaining data from IBGE" 24 | msgstr "Extraindo dados do IBGE" 25 | 26 | #: ../../source/IBGE_data.ipynb:35 27 | msgid "Listing Subject Areas" 28 | msgstr "Listando Grupos de dados" 29 | 30 | #: ../../source/IBGE_data.ipynb:37 31 | msgid "" 32 | "IBGE makes available data from a number of surveys on different subjects." 33 | " We can find out what is available, before downloading data." 34 | msgstr "" 35 | "O IBGE disponibiliza dados de várias pesquisas sobre diferentes assuntos. " 36 | "Podemos descobrir o que está disponível antes de baixar os dados." 37 | 38 | #: ../../source/IBGE_data.ipynb:293 39 | msgid "" 40 | "Let's look at the datasets (called \"agregados\") available within the " 41 | "category of \"Censo Demografico\"." 42 | msgstr "" 43 | "Vamos dar uma olhada nos conjuntos de dados (chamados \"agregados\") " 44 | "disponíveis dentro da categoria de \"Censo Demográfico\"." 45 | 46 | #: ../../source/IBGE_data.ipynb:545 47 | msgid "Again for population projections" 48 | msgstr "Novamente para projeções populacionais" 49 | 50 | #: ../../source/IBGE_data.ipynb:670 51 | msgid "Downloading data" 52 | msgstr "Extraindo dados" 53 | 54 | #: ../../source/IBGE_data.ipynb:672 55 | msgid "" 56 | "Before downloading the data, it may be useful to look at the metadata of " 57 | "the dataset we are interested in." 58 | msgstr "" 59 | "Antes de baixar os dados, pode ser útil olhar para os metadados do " 60 | "conjunto de dados que estamos interessados." 61 | 62 | #: ../../source/IBGE_data.ipynb:1325 63 | msgid "" 64 | "To actually download the data after chosing the dataset, we can use the " 65 | "``FetchData`` class, which will fetch the data and make it available both" 66 | " in JSON format and Dataframe as exemplified below." 67 | msgstr "" 68 | "Para baixar os dados depois de escolher o conjunto de dados, podemos usar a classe " 69 | "`FetchData`, que irá buscar os dados e disponibilizá-los tanto em formato JSON quanto em " 70 | "Dataframe, conforme exemplificado abaixo." 71 | 72 | #: ../../source/IBGE_data.ipynb:1990 73 | msgid "Using the SIDRA endpoint" 74 | msgstr "Usando o endpoint SIDRA" 75 | 76 | #: ../../source/IBGE_data.ipynb:1992 77 | msgid "" 78 | "IBGE also has a simpler API at https://api.sidra.ibge.gov.br that PySUS " 79 | "also gives access through a simple function. Below we have table 200, " 80 | "which is a sample from the resident population. classification ``2`` is " 81 | "sex, of which I am fetching all categories: ``total``, ``Homens``, and " 82 | "``Mulheres``. Terrotorial level 6 is municipality." 83 | msgstr "" 84 | "O IBGE também possui uma API mais simples em https://api.sidra.ibge.gov.br " 85 | "que o PySUS também acessa através de uma função simples. Abaixo temos a tabela " 86 | "200, que é uma amostra da população residente. A classificação 2 é o sexo, do " 87 | "qual estou buscando todas as categorias: total, Homens e Mulheres. " 88 | "O nível territorial 6 é o município." 89 | 90 | #: ../../source/IBGE_data.ipynb:2572 91 | msgid "" 92 | "Suppose we just wanted a single municipality, ``Balneário Rincão (SC)``: " 93 | "Unfortunately for this one there is no data available, thus the ``...`` " 94 | "in the column ``Valor``. Try another one." 95 | msgstr "" 96 | "Suponha que quiséssemos apenas um município, `Balneário Rincão (SC)`: " 97 | "Infelizmente, para este não há dados disponíveis, portanto, há um `...` " 98 | "na coluna `Valor`. Tente outro município." 99 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/IBGE_data.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/IBGE_data.ipynb:9 23 | msgid "Obtaining data from IBGE" 24 | msgstr "Extraindo dados do IBGE" 25 | 26 | #: ../../source/IBGE_data.ipynb:35 27 | msgid "Listing Subject Areas" 28 | msgstr "Listando Grupos de dados" 29 | 30 | #: ../../source/IBGE_data.ipynb:37 31 | msgid "" 32 | "IBGE makes available data from a number of surveys on different subjects." 33 | " We can find out what is available, before downloading data." 34 | msgstr "" 35 | "O IBGE disponibiliza dados de várias pesquisas sobre diferentes assuntos. " 36 | "Podemos descobrir o que está disponível antes de baixar os dados." 37 | 38 | #: ../../source/IBGE_data.ipynb:293 39 | msgid "" 40 | "Let's look at the datasets (called \"agregados\") available within the " 41 | "category of \"Censo Demografico\"." 42 | msgstr "" 43 | "Vamos dar uma olhada nos conjuntos de dados (chamados \"agregados\") " 44 | "disponíveis dentro da categoria de \"Censo Demográfico\"." 45 | 46 | #: ../../source/IBGE_data.ipynb:545 47 | msgid "Again for population projections" 48 | msgstr "Novamente para projeções populacionais" 49 | 50 | #: ../../source/IBGE_data.ipynb:670 51 | msgid "Downloading data" 52 | msgstr "Extraindo dados" 53 | 54 | #: ../../source/IBGE_data.ipynb:672 55 | msgid "" 56 | "Before downloading the data, it may be useful to look at the metadata of " 57 | "the dataset we are interested in." 58 | msgstr "" 59 | "Antes de baixar os dados, pode ser útil olhar para os metadados do " 60 | "conjunto de dados que estamos interessados." 61 | 62 | #: ../../source/IBGE_data.ipynb:1325 63 | msgid "" 64 | "To actually download the data after chosing the dataset, we can use the " 65 | "``FetchData`` class, which will fetch the data and make it available both" 66 | " in JSON format and Dataframe as exemplified below." 67 | msgstr "" 68 | "Para baixar os dados depois de escolher o conjunto de dados, podemos usar a classe " 69 | "`FetchData`, que irá buscar os dados e disponibilizá-los tanto em formato JSON quanto em " 70 | "Dataframe, conforme exemplificado abaixo." 71 | 72 | #: ../../source/IBGE_data.ipynb:1990 73 | msgid "Using the SIDRA endpoint" 74 | msgstr "Usando o endpoint SIDRA" 75 | 76 | #: ../../source/IBGE_data.ipynb:1992 77 | msgid "" 78 | "IBGE also has a simpler API at https://api.sidra.ibge.gov.br that PySUS " 79 | "also gives access through a simple function. Below we have table 200, " 80 | "which is a sample from the resident population. classification ``2`` is " 81 | "sex, of which I am fetching all categories: ``total``, ``Homens``, and " 82 | "``Mulheres``. Terrotorial level 6 is municipality." 83 | msgstr "" 84 | "O IBGE também possui uma API mais simples em https://api.sidra.ibge.gov.br " 85 | "que o PySUS também acessa através de uma função simples. Abaixo temos a tabela " 86 | "200, que é uma amostra da população residente. A classificação 2 é o sexo, do " 87 | "qual estou buscando todas as categorias: total, Homens e Mulheres. " 88 | "O nível territorial 6 é o município." 89 | 90 | #: ../../source/IBGE_data.ipynb:2572 91 | msgid "" 92 | "Suppose we just wanted a single municipality, ``Balneário Rincão (SC)``: " 93 | "Unfortunately for this one there is no data available, thus the ``...`` " 94 | "in the column ``Valor``. Try another one." 95 | msgstr "" 96 | "Suponha que quiséssemos apenas um município, `Balneário Rincão (SC)`: " 97 | "Infelizmente, para este não há dados disponíveis, portanto, há um `...` " 98 | "na coluna `Valor`. Tente outro município." 99 | -------------------------------------------------------------------------------- /pysus/online_data/vaccine.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download of vacination data. 3 | 4 | This module contains function to download from specific campains: 5 | 6 | - COVID-19 in 2020-2021 Downloaded as described [here](http://opendatasus.saude.gov.br/dataset/b772ee55-07cd-44d8-958f-b12edd004e0b/resource/5916b3a4-81e7-4ad5-adb6-b884ff198dc1/download/manual_api_vacina_covid-19.pdf) # noqa 7 | """ 8 | import json 9 | import os 10 | from json import JSONDecodeError 11 | 12 | import pandas as pd 13 | import requests 14 | from loguru import logger 15 | from pysus.ftp import CACHEPATH 16 | from requests.auth import HTTPBasicAuth 17 | 18 | 19 | def download_covid(uf=None, only_header=False): 20 | """ 21 | Download covid vaccination data for a give UF 22 | :param uf: 'RJ' | 'SP', etc. 23 | :param only_header: Used to see the header of the data before downloading. 24 | :return: dataframe iterator as returned by pandas 25 | `read_csv('Vaccine_temp_.csv.gz', chunksize=5000)` 26 | """ 27 | user = "imunizacao_public" 28 | pwd = "qlto5t&7r_@+#Tlstigi" 29 | url = "https://imunizacao-es.saude.gov.br/_search?scroll=1m" 30 | if uf is None: 31 | query = {"query": {"match_all": {}}, "size": 10000} 32 | UF = "BR" 33 | else: 34 | UF = uf.upper() 35 | query = { 36 | "query": {"match": {"paciente_endereco_uf": UF}}, 37 | "size": 10000, 38 | } 39 | 40 | logger.info(f"Searching for COVID data of {UF}") 41 | tempfile = os.path.join(CACHEPATH, f"Vaccine_temp_{UF}.csv.gz") 42 | if os.path.exists(tempfile): 43 | print( 44 | "loading from cache. Returning an iterator of Dataframes in chunks" 45 | " of 5000." 46 | ) 47 | return pd.read_csv(tempfile, chunksize=5000) 48 | 49 | auth = HTTPBasicAuth(user, pwd) 50 | data_gen = elasticsearch_fetch(url, auth, query) 51 | 52 | if only_header: 53 | df = pd.DataFrame(next(data_gen)) 54 | logger.warning( 55 | f"Downloading data sample for visualization of {df.shape[0]} rows" 56 | ) 57 | return df 58 | 59 | h = 1 60 | for dt in data_gen: 61 | df = pd.DataFrame(dt) 62 | if h: 63 | df.to_csv(tempfile) 64 | h = 0 65 | else: 66 | df.to_csv(tempfile, mode="a", header=False) 67 | 68 | logger.info(f"{tempfile} stored at {CACHEPATH}.") 69 | df = pd.read_csv(tempfile, chunksize=5000) 70 | 71 | return df 72 | 73 | 74 | def elasticsearch_fetch(uri, auth, json_body={}): 75 | headers = { 76 | "Content-Type": "application/json", 77 | } 78 | 79 | scroll_id = "" 80 | total = 0 81 | while True: 82 | if scroll_id: 83 | uri = "https://imunizacao-es.saude.gov.br/_search/scroll" 84 | json_body["scroll_id"] = scroll_id 85 | json_body["scroll"] = "1m" 86 | if "query" in json_body: 87 | del json_body["query"] 88 | # for the continuation of the download, 89 | # query parameter is not allowed 90 | del json_body["size"] 91 | try: 92 | s = requests.Session() 93 | response = s.post(uri, auth=auth, headers=headers, json=json_body) 94 | text = response.text 95 | try: 96 | resp = json.loads(text) 97 | except JSONDecodeError: 98 | resp = text 99 | except Exception as error: 100 | print("\nelasticsearch_fetch() error:", error) 101 | raise error 102 | try: 103 | if resp["hits"]["hits"] == []: 104 | break 105 | except KeyError as e: 106 | logger.error(e) 107 | print(resp) 108 | total += len(resp["hits"]["hits"]) 109 | print(f"Downloaded {total} records\r", end="") 110 | yield [h["_source"] for h in resp["hits"]["hits"]] 111 | if "_scroll_id" in resp: 112 | scroll_id = resp["_scroll_id"] 113 | 114 | 115 | if __name__ == "__main__": 116 | print(download_covid("ba", only_header=True)) 117 | -------------------------------------------------------------------------------- /docs/source/databases/Utilities.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "c153a255-ad53-4b27-b689-4c119ea8cc52", 6 | "metadata": {}, 7 | "source": [ 8 | "## Utilities module\n", 9 | "\n", 10 | "Some helper functions that are used throughout the package: " 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "c5c639e6-fa54-482a-a91d-20a8bbe05206", 16 | "metadata": {}, 17 | "source": [ 18 | "### brasil" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 6, 24 | "id": "451830fc-04af-4003-8e70-c71d61a57ac5", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "from pysus.utilities import brasil" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 10, 34 | "id": "76a37da8-7b41-4565-83e2-e23bfbeae5bb", 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "{'BR': 'Brasil',\n", 41 | " 'AC': 'Acre',\n", 42 | " 'AL': 'Alagoas',\n", 43 | " 'AP': 'Amapá',\n", 44 | " 'AM': 'Amazonas',\n", 45 | " 'BA': 'Bahia',\n", 46 | " 'CE': 'Ceará',\n", 47 | " 'ES': 'Espírito Santo',\n", 48 | " 'GO': 'Goiás',\n", 49 | " 'MA': 'Maranhão',\n", 50 | " 'MT': 'Mato Grosso',\n", 51 | " 'MS': 'Mato Grosso do Sul',\n", 52 | " 'MG': 'Minas Gerais',\n", 53 | " 'PA': 'Pará',\n", 54 | " 'PB': 'Paraíba',\n", 55 | " 'PR': 'Paraná',\n", 56 | " 'PE': 'Pernambuco',\n", 57 | " 'PI': 'Piauí',\n", 58 | " 'RJ': 'Rio de Janeiro',\n", 59 | " 'RN': 'Rio Grande do Norte',\n", 60 | " 'RS': 'Rio Grande do Sul',\n", 61 | " 'RO': 'Rondônia',\n", 62 | " 'RR': 'Roraima',\n", 63 | " 'SC': 'Santa Catarina',\n", 64 | " 'SP': 'São Paulo',\n", 65 | " 'SE': 'Sergipe',\n", 66 | " 'TO': 'Tocantins',\n", 67 | " 'DF': 'Distrito Federal'}" 68 | ] 69 | }, 70 | "execution_count": 10, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "brasil.UFs" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 12, 82 | "id": "965a2323-066c-45af-83f7-b20ece735089", 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "{1: 'Janeiro',\n", 89 | " 2: 'Fevereiro',\n", 90 | " 3: 'Março',\n", 91 | " 4: 'Abril',\n", 92 | " 5: 'Maio',\n", 93 | " 6: 'Junho',\n", 94 | " 7: 'Julho',\n", 95 | " 8: 'Agosto',\n", 96 | " 9: 'Setembro',\n", 97 | " 10: 'Outubro',\n", 98 | " 11: 'Novembro',\n", 99 | " 12: 'Dezembro'}" 100 | ] 101 | }, 102 | "execution_count": 12, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "brasil.MONTHS" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 14, 114 | "id": "573f2f20-f038-4384-b6f2-558bad80f276", 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "'Rio de Janeiro'" 121 | ] 122 | }, 123 | "execution_count": 14, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "# Get municipality name by IBGE's geocode \n", 130 | "# https://www.ibge.gov.br/explica/codigos-dos-municipios.php\n", 131 | "brasil.get_city_name_by_geocode(3304557)" 132 | ] 133 | } 134 | ], 135 | "metadata": { 136 | "kernelspec": { 137 | "display_name": "Python 3 (ipykernel)", 138 | "language": "python", 139 | "name": "python3" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 3 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython3", 151 | "version": "3.11.8" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 5 156 | } 157 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | PySUS.egg-info/ 3 | build/ 4 | dist/ 5 | *__pycache__/* 6 | *.o 7 | *.so 8 | *.pyc 9 | *.pyo 10 | .eggs 11 | *.csv 12 | .vscode/ 13 | bin/ 14 | lib/ 15 | pyvenv.cfg 16 | *.cache/ 17 | *.env 18 | *.jupyter/ 19 | *.local/ 20 | *.mozilla/ 21 | *.ipynb_checkpoints* 22 | *.dbc 23 | *.DBC 24 | # *.dbf 25 | # *.DBF 26 | *.pickle 27 | *.parquet 28 | .virtual_documents 29 | 30 | # Byte-compiled / optimized / DLL files 31 | __pycache__/ 32 | *.py[cod] 33 | *$py.class 34 | 35 | # C extensions 36 | *.so 37 | 38 | # Distribution / packaging 39 | .Python 40 | build/ 41 | develop-eggs/ 42 | dist/ 43 | downloads/ 44 | eggs/ 45 | .eggs/ 46 | lib/ 47 | lib64/ 48 | parts/ 49 | sdist/ 50 | var/ 51 | wheels/ 52 | share/python-wheels/ 53 | *.egg-info/ 54 | .installed.cfg 55 | *.egg 56 | MANIFEST 57 | 58 | # PyInstaller 59 | # Usually these files are written by a python script from a template 60 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 61 | *.manifest 62 | *.spec 63 | 64 | # Installer logs 65 | pip-log.txt 66 | pip-delete-this-directory.txt 67 | 68 | # Unit test / coverage reports 69 | htmlcov/ 70 | .tox/ 71 | .nox/ 72 | .coverage 73 | .coverage.* 74 | .cache 75 | nosetests.xml 76 | coverage.xml 77 | *.cover 78 | *.py,cover 79 | .hypothesis/ 80 | .pytest_cache/ 81 | cover/ 82 | 83 | # Translations 84 | # 85 | *.mo 86 | *.pot 87 | 88 | # Django stuff: 89 | *.log 90 | local_settings.py 91 | db.sqlite3 92 | db.sqlite3-journal 93 | 94 | # Flask stuff: 95 | instance/ 96 | .webassets-cache 97 | 98 | # Scrapy stuff: 99 | .scrapy 100 | 101 | # Sphinx documentation 102 | docs/_build/ 103 | 104 | # PyBuilder 105 | .pybuilder/ 106 | target/ 107 | 108 | # Jupyter Notebook 109 | .ipynb_checkpoints 110 | notebooks_tmp/* 111 | 112 | # IPython 113 | profile_default/ 114 | ipython_config.py 115 | 116 | # pyenv 117 | # For a library or package, you might want to ignore these files since the code is 118 | # intended to run in multiple environments; otherwise, check them in: 119 | # .python-version 120 | 121 | # pipenv 122 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 123 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 124 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 125 | # install all needed dependencies. 126 | #Pipfile.lock 127 | 128 | # poetry 129 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 130 | # This is especially recommended for binary packages to ensure reproducibility, and is more 131 | # commonly ignored for libraries. 132 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 133 | #poetry.lock 134 | 135 | # pdm 136 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 137 | #pdm.lock 138 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 139 | # in version control. 140 | # https://pdm.fming.dev/#use-with-ide 141 | .pdm.toml 142 | 143 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 144 | __pypackages__/ 145 | 146 | # Celery stuff 147 | celerybeat-schedule 148 | celerybeat.pid 149 | 150 | # SageMath parsed files 151 | *.sage.py 152 | 153 | # Environments 154 | .env 155 | .venv 156 | env/ 157 | venv/ 158 | ENV/ 159 | env.bak/ 160 | venv.bak/ 161 | 162 | # Spyder project settings 163 | .spyderproject 164 | .spyproject 165 | 166 | # Rope project settings 167 | .ropeproject 168 | 169 | # mkdocs documentation 170 | /site 171 | 172 | # mypy 173 | .mypy_cache/ 174 | .dmypy.json 175 | dmypy.json 176 | 177 | # Pyre type checker 178 | .pyre/ 179 | 180 | # pytype static type analyzer 181 | .pytype/ 182 | 183 | # Cython debug symbols 184 | cython_debug/ 185 | 186 | # PyCharm 187 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 188 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 189 | # and can be added to the global gitignore or merged into this file. For a more nuclear 190 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 191 | .idea/ 192 | -------------------------------------------------------------------------------- /pysus/ftp/databases/sih.py: -------------------------------------------------------------------------------- 1 | __all__ = ["SIH"] 2 | 3 | from typing import List, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year 7 | 8 | 9 | class SIH(Database): 10 | name = "SIH" 11 | paths = ( 12 | Directory("/dissemin/publicos/SIHSUS/199201_200712/Dados"), 13 | Directory("/dissemin/publicos/SIHSUS/200801_/Dados"), 14 | ) 15 | metadata = { 16 | "long_name": "Sistema de Informações Hospitalares", 17 | "source": ( 18 | "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/", # noqa 19 | "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/", # noqa 20 | ), 21 | "description": ( 22 | "A finalidade do AIH (Sistema SIHSUS) é a de transcrever todos os " 23 | "atendimentos que provenientes de internações hospitalares que " 24 | "foram financiadas pelo SUS, e após o processamento, gerarem " 25 | "relatórios para os gestores que lhes possibilitem fazer os " 26 | "pagamentos dos estabelecimentos de saúde. Além disso, o nível " 27 | "Federal recebe mensalmente uma base de dados de todas as " 28 | "internações autorizadas (aprovadas ou não para pagamento) para " 29 | "que possam ser repassados às Secretarias de Saúde os valores de " 30 | "Produção de Média e Alta complexidade além dos valores de CNRAC, " 31 | "FAEC e de Hospitais Universitários – em suas variadas formas de " 32 | "contrato de gestão." 33 | ), 34 | } 35 | groups = { 36 | "RD": "AIH Reduzida", 37 | "RJ": "AIH Rejeitada", 38 | "ER": "AIH Rejeitada com erro", 39 | "SP": "Serviços Profissionais", 40 | "CH": "Cadastro Hospitalar", 41 | "CM": "", # TODO 42 | } 43 | 44 | def describe(self, file: File) -> dict: 45 | if file.extension.upper() in [".DBC", ".DBF"]: 46 | group, _uf, year, month = self.format(file) 47 | 48 | try: 49 | uf = UFs[_uf] 50 | except KeyError: 51 | uf = _uf 52 | 53 | description = { 54 | "name": file.basename, 55 | "group": self.groups[group], 56 | "uf": uf, 57 | "month": MONTHS[int(month)], 58 | "year": zfill_year(year), 59 | "size": file.info["size"], 60 | "last_update": file.info["modify"], 61 | } 62 | 63 | return description 64 | return {} 65 | 66 | def format(self, file: File) -> tuple: 67 | group, _uf = file.name[:2].upper(), file.name[2:4].upper() 68 | year, month = file.name[-4:-2], file.name[-2:] 69 | return group, _uf, zfill_year(year), month 70 | 71 | def get_files( 72 | self, 73 | group: Union[List[str], str], 74 | uf: Optional[Union[List[str], str]] = None, 75 | year: Optional[Union[list, str, int]] = None, 76 | month: Optional[Union[list, str, int]] = None, 77 | ) -> List[File]: 78 | files = list( 79 | filter( 80 | lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files 81 | ) 82 | ) 83 | 84 | groups = [gr.upper() for gr in to_list(group)] 85 | 86 | if not all(gr in list(self.groups) for gr in groups): 87 | raise ValueError( 88 | f"Unknown SIH Group(s): " 89 | f"{set(groups).difference(list(self.groups))}" 90 | ) 91 | 92 | files = list(filter(lambda f: self.format(f)[0] in groups, files)) 93 | 94 | if uf: 95 | ufs = parse_UFs(uf) 96 | files = list(filter(lambda f: self.format(f)[1] in ufs, files)) 97 | 98 | if year or str(year) in ["0", "00"]: 99 | years = [zfill_year(str(m)[-2:]) for m in to_list(year)] 100 | files = list(filter(lambda f: self.format(f)[2] in years, files)) 101 | 102 | if month: 103 | months = [str(y)[-2:].zfill(2) for y in to_list(month)] 104 | files = list(filter(lambda f: self.format(f)[3] in months, files)) 105 | 106 | return files 107 | -------------------------------------------------------------------------------- /pysus/ftp/databases/ciha.py: -------------------------------------------------------------------------------- 1 | __all__ = ["CIHA"] 2 | 3 | from typing import List, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year 7 | 8 | 9 | class CIHA(Database): 10 | name = "CIHA" 11 | paths = (Directory("/dissemin/publicos/CIHA/201101_/Dados"),) 12 | metadata = { 13 | "long_name": "Comunicação de Internação Hospitalar e Ambulatorial", 14 | "source": "http://ciha.datasus.gov.br/CIHA/index.php", 15 | "description": ( 16 | "A CIHA foi criada para ampliar o processo de planejamento, " 17 | "programação, controle, avaliação e regulação da assistência à " 18 | "saúde permitindo um conhecimento mais abrangente e profundo dos " 19 | "perfis nosológico e epidemiológico da população brasileira, da " 20 | "capacidade instalada e do potencial de produção de serviços do " 21 | "conjunto de estabelecimentos de saúde do País. O sistema permite " 22 | "o acompanhamento das ações e serviços de saúde custeados " 23 | "por: planos privados de assistência à saúde; planos públicos; " 24 | "pagamento particular por pessoa física; pagamento particular por " 25 | "pessoa jurídica; programas e projetos federais (PRONON, PRONAS, " 26 | "PROADI); recursos próprios das secretarias municipais e estaduais" 27 | " de saúde; DPVAT; gratuidade e, a partir da publicação da " 28 | "Portaria GM/MS nº 2.905/2022, consórcios públicos. As " 29 | "informações registradas na CIHA servem como base para o processo " 30 | "de Certificação de Entidades Beneficentes de Assistência Social " 31 | "em Saúde (CEBAS) e para monitoramento dos programas PRONAS e " 32 | "PRONON" 33 | ), 34 | } 35 | groups = { 36 | "CIHA": "Comunicação de Internação Hospitalar e Ambulatorial", 37 | } 38 | 39 | def describe(self, file: File): 40 | if not isinstance(file, File): 41 | return file 42 | 43 | if file.extension.upper() in [".DBC", ".DBF"]: 44 | group, _uf, year, month = self.format(file) 45 | 46 | try: 47 | uf = UFs[_uf] 48 | except KeyError: 49 | uf = _uf 50 | 51 | description = { 52 | "name": str(file.basename), 53 | "group": self.groups[group], 54 | "uf": uf, 55 | "month": MONTHS[int(month)], 56 | "year": zfill_year(year), 57 | "size": file.info["size"], 58 | "last_update": file.info["modify"], 59 | } 60 | 61 | return description 62 | return file 63 | 64 | def format(self, file: File) -> tuple: 65 | group, _uf = file.name[:4].upper(), file.name[4:6].upper() 66 | year, month = file.name[-4:-2], file.name[-2:] 67 | return group, _uf, zfill_year(year), month 68 | 69 | def get_files( 70 | self, 71 | uf: Optional[Union[List[str], str]] = None, 72 | year: Optional[Union[list, str, int]] = None, 73 | month: Optional[Union[list, str, int]] = None, 74 | group: Union[List[str], str] = "CIHA", 75 | ) -> List[File]: 76 | files = list( 77 | filter( 78 | lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files 79 | ) 80 | ) 81 | 82 | groups = [gr.upper() for gr in to_list(group)] 83 | 84 | if not all(gr in list(self.groups) for gr in groups): 85 | raise ValueError( 86 | "Unknown CIHA Group(s): " 87 | f"{set(groups).difference(list(self.groups))}" 88 | ) 89 | 90 | files = list(filter(lambda f: self.format(f)[0] in groups, files)) 91 | 92 | if uf: 93 | ufs = parse_UFs(uf) 94 | files = list(filter(lambda f: self.format(f)[1] in ufs, files)) 95 | 96 | if year or str(year) in ["0", "00"]: 97 | years = [zfill_year(str(m)[-2:]) for m in to_list(year)] 98 | files = list(filter(lambda f: self.format(f)[2] in years, files)) 99 | 100 | if month: 101 | months = [str(y)[-2:].zfill(2) for y in to_list(month)] 102 | files = list(filter(lambda f: self.format(f)[3] in months, files)) 103 | 104 | return files 105 | -------------------------------------------------------------------------------- /pysus/preprocessing/sinan.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import lru_cache 3 | 4 | import geocoder 5 | import numpy as np 6 | import pandas as pd 7 | import requests 8 | from dbfread import DBF 9 | 10 | 11 | def read_sinan_dbf(fname, encoding) -> pd.DataFrame: 12 | """ 13 | Read SINAN dbf file returning a Pandas Dataframe with 14 | :param fname: dbf file name 15 | :param encoding: Encoding of the dbf 16 | :return: pandas dataframe 17 | """ 18 | db = DBF(fname, encoding=encoding) 19 | df = pd.DataFrame(list(db)) 20 | 21 | def convert_week(x): 22 | try: 23 | w = int(x) % 100 24 | except ValueError: 25 | w = np.nan 26 | return w 27 | 28 | for cname in df.columns: 29 | df[cname].replace("", np.nan, inplace=True) 30 | if cname.startswith(("NU", "ID")): 31 | try: 32 | df[cname] = pd.to_numeric(df[cname]) 33 | except ValueError as e: 34 | print(f"Column {cname} could not be converted to numeric: {e}") 35 | # certain IDs can be alphanumerical 36 | pass 37 | elif cname.startswith("SEM"): 38 | df[cname] = df[cname].map(convert_week) 39 | 40 | return df 41 | 42 | 43 | @lru_cache(maxsize=None) 44 | def get_geocodes(geoc): 45 | """ 46 | Return city name and state two letter code from geocode 47 | :param geoc: 48 | :return: 49 | """ 50 | url = ( 51 | "http://cidades.ibge.gov.br/services/jSonpMuns.php?" 52 | "busca=330&featureClass=P&style=full&maxRows=5&name_startsWith={}" 53 | ).format(geoc) 54 | resp = requests.get(url) 55 | for d in resp.json()["municipios"]: 56 | if int(geoc) == int(d["c"]): 57 | return [d["n"].encode("latin-1").decode("utf-8"), d["s"]] 58 | 59 | else: 60 | raise KeyError("could not find geocode {} in ".format(geoc)) 61 | 62 | 63 | def _address_generator(df, default=""): 64 | for row in df.iterrows(): 65 | line = dict(row[1]) 66 | try: 67 | line["cidade"] = ",".join(get_geocodes(line["ID_MN_RESI"])) 68 | except KeyError: 69 | print("Could not find geocode {} using default") 70 | line["cidade"] = default 71 | yield line[ 72 | "NU_NOTIFIC" 73 | ], "{NM_LOGRADO}, {NU_NUMERO}, {NM_BAIRRO}, {cidade}, Brasil".format( 74 | **line 75 | ) 76 | 77 | 78 | def geocode(sinan_df, outfile, default_city): 79 | """ 80 | Geocode cases based on addresses included. 81 | :param default_city: default city to use in case of bad Geocode found in 82 | file. It can be "city, state" 83 | :param sinan_df: Dataframe generated from sinan DBF 84 | :param outfile: File on Which 85 | """ 86 | addrs = _address_generator(sinan_df, default_city) 87 | if os.path.exists(outfile): 88 | mode = "a" 89 | coords = pd.read_csv(outfile) 90 | geocoded = coords.NU_NOTIFIC.tolist() 91 | else: 92 | mode = "w" 93 | geocoded = [] 94 | with open(outfile, mode) as of: 95 | if mode == "w": 96 | of.write("NU_NOTIFIC,latitude,longitude\n") 97 | for nu, ad in addrs: 98 | # ad = ad.encode('latin-1').decode('utf-8') 99 | if nu in geocoded: 100 | continue 101 | location = geocoder.google(ad) 102 | if location is None: 103 | raise NameError("Google could not find {}".format(ad)) 104 | if location.latlng == []: 105 | print( 106 | ( 107 | "Search for {} returned {} as coordinates, trying " 108 | "reduced address:" 109 | ).format(ad, location.latlng) 110 | ) 111 | ad = ",".join(ad.split(",")[2:]) 112 | print(ad) 113 | location = geocoder.google(ad) 114 | try: 115 | of.write( 116 | "{},{},{}\n".format( 117 | nu, location.latlng[0], location.latlng[1] 118 | ) 119 | ) 120 | print("Successfully geolocated {}".format(ad)) 121 | except IndexError: 122 | print( 123 | ( 124 | "Search for {} returned {} as coordinates, " "skipping" 125 | ).format(ad, location.latlng) 126 | ) 127 | of.write("{},nan,nan\n".format(nu)) 128 | -------------------------------------------------------------------------------- /docs/source/rio.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 110 | 111 | 112 | 113 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 |
130 | 131 | 132 | 133 | 134 | 168 | -------------------------------------------------------------------------------- /docs/source/databases/data-sources.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Data Sources 3 | ============ 4 | 5 | PySUS allows you to download data on demand from DATASUS FTP databases. 6 | Currently, the following databases can be downloaded: 7 | 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | CNES - Cadastro Nacional de Estabelecimentos de Saúde 13 | SINAN - Doenças e Agravos de Notificação 14 | SINASC - Sistema de Informações sobre Nascidos Vivos 15 | SIM - Sistema de Informação sobre Mortalidade 16 | SIA - Sistema de Informações Ambulatoriais 17 | SIH - Sistema de Informações Hospitalares 18 | territory - Tables and maps about the brazilian territory 19 | 20 | 21 | About SINAN 22 | ---------------------------------------- 23 | #. :doc:`SINAN` 24 | 25 | The Information System for Notifiable Diseases (Sinan) is primarily fed by the notification and investigation of cases of diseases and conditions listed in the national list of notifiable diseases. However, states and municipalities are allowed to include other significant health issues in their region, such as filariasis in the municipality of São Paulo. Its effective use allows for dynamic diagnosis of the occurrence of an event in the population, potentially providing insights into the causal explanations of notifiable diseases, as well as indicating risks to which individuals are exposed. This contributes to the identification of the epidemiological reality of a specific geographic area. Its systematic, decentralized use contributes to the democratization of information, enabling all healthcare professionals to access and make it available to the community. Therefore, it is a relevant tool to assist in health planning, defining intervention priorities, and evaluating the impact of interventions. 26 | 27 | 28 | About SINASC 29 | ---------------------------------------------------- 30 | #. :doc:`SINASC` 31 | 32 | The Information System on Live Births (Sistema de Informações sobre Nascidos Vivos or SINASC) was officially implemented starting from 1990 with the aim of collecting data on reported births across the entire national territory and providing birth-related data for all levels of the Healthcare System. 33 | 34 | The Ministry of Health's Department of Health Surveillance (Secretaria de Vigilância em Saúde or SVS/MS) manages SINASC at the national level. Specifically, the responsibility for changes in layout, as well as arrangements for printing and distributing the Declaration of Live Birth (DN) forms and the System manuals, lies with the General Coordination of Information and Epidemiological Analysis (Coordenação-Geral de Informações e Análises Epidemiológicas or CGIAE) and the Department of Epidemiological Analysis and Surveillance of Non-Communicable Diseases (Departamento de Análise Epidemiológica e Vigilância de Doenças Não Transmissíveis or DAENT). The implementation of SINASC occurred gradually in all federal units and, since 1994, has been showing a higher number of registrations in many municipalities compared to what is published by the Brazilian Institute of Geography and Statistics (Instituto Brasileiro de Geografia e Estatística or IBGE) based on Civil Registry data. The system also enables the construction of useful indicators for healthcare service management planning. 35 | 36 | 37 | About SIM 38 | --------------------------------------------- 39 | #. :doc:`SIM` 40 | 41 | The Mortality Information System (Sistema de Informações sobre Mortalidade or SIM) was established by DATASUS to regularly collect data on mortality in the country. With the creation of SIM, it became possible to comprehensively capture mortality data to support various levels of public health management. Based on this information, it is possible to conduct analyses of the situation, plan, and evaluate actions and programs in the field of public health. 42 | 43 | 44 | About SIH 45 | ----------------------------------------- 46 | #. :doc:`SIH` 47 | 48 | The purpose of the AIH (SIHSUS System) is to document all hospitalization-related services that are FINANCED BY SUS and, after processing, generate reports for managers to facilitate payments to healthcare facilities. Additionally, the federal level receives a monthly database of all authorized hospitalizations (whether approved for payment or not) to enable the transfer of Production values for Medium and High complexity, as well as values for CNRAC, FAEC, and University Hospitals, in their various forms of management contracts, to the Health Departments. 49 | 50 | 51 | About SIA 52 | ------------------------------------------ 53 | #. :doc:`SIA` 54 | 55 | The SIA (Sistema de Informação Ambulatorial) is the system that enables local managers to process information related to outpatient care (non-hospital) recorded in the data collection applications for such services provided by public and private providers, whether contracted or affiliated with SUS. 56 | -------------------------------------------------------------------------------- /pysus/data/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import struct 3 | from datetime import datetime 4 | from pathlib import Path 5 | 6 | import pandas as pd 7 | import pyarrow as pa 8 | import pyarrow.parquet as pq 9 | from dbfread import DBF 10 | from pyreaddbc import dbc2dbf 11 | 12 | 13 | def dbc_to_dbf(dbc: str, _pbar=None) -> str: 14 | """ 15 | Parses DBC files into DBFs 16 | """ 17 | path = Path(dbc) 18 | 19 | if path.suffix.lower() != ".dbc": 20 | raise ValueError(f"Not a DBC file: {path}") 21 | 22 | dbf = path.with_suffix(".dbf") 23 | 24 | if _pbar: 25 | _pbar.reset(total=1) 26 | _pbar.set_description(f"{dbf.name}") 27 | 28 | _parquet = path.with_suffix(".parquet") 29 | if _parquet.exists(): 30 | path.unlink(missing_ok=True) 31 | dbf.unlink(missing_ok=True) 32 | return str(_parquet) 33 | 34 | if dbf.exists(): 35 | path.unlink(missing_ok=True) 36 | return str(dbf) 37 | 38 | dbc2dbf(str(path), str(dbf)) 39 | path.unlink() 40 | 41 | if _pbar: 42 | _pbar.update(1) 43 | 44 | return str(dbf) 45 | 46 | 47 | def stream_dbf(dbf, chunk_size=30000): 48 | """Fetches records in parquet chunks to preserve memory""" 49 | data = [] 50 | i = 0 51 | for records in dbf: 52 | data.append(records) 53 | i += 1 54 | if i == chunk_size: 55 | yield data 56 | data = [] 57 | i = 0 58 | else: 59 | yield data 60 | 61 | 62 | def decode_column(value): 63 | """ 64 | Decodes binary data to str 65 | """ 66 | if isinstance(value, bytes): 67 | return value.decode(encoding="iso-8859-1").replace("\x00", "") 68 | 69 | if isinstance(value, str): 70 | return str(value).replace("\x00", "") 71 | 72 | return value 73 | 74 | 75 | def dbf_to_parquet(dbf: str, _pbar=None) -> str: 76 | """ 77 | Parses DBF file into parquet to preserve memory 78 | """ 79 | path = Path(dbf) 80 | 81 | if path.suffix.lower() != ".dbf": 82 | raise ValueError(f"Not a DBF file: {path}") 83 | 84 | parquet = path.with_suffix(".parquet") 85 | 86 | approx_final_size = ( 87 | os.path.getsize(path) / 200 88 | ) # TODO: not best approx size 89 | if _pbar: 90 | _pbar.unit = "B" 91 | _pbar.unit_scale = True 92 | _pbar.reset(total=approx_final_size) 93 | _pbar.set_description(f"{parquet.name}") 94 | 95 | if parquet.exists(): 96 | if _pbar: 97 | _pbar.update(approx_final_size - _pbar.n) 98 | return str(parquet) 99 | 100 | parquet.absolute().mkdir() 101 | 102 | try: 103 | chunk_size = 30_000 104 | for chunk in stream_dbf( 105 | DBF(path, encoding="iso-8859-1", raw=True), chunk_size 106 | ): 107 | if _pbar: 108 | _pbar.update(chunk_size) 109 | 110 | chunk_df = pd.DataFrame(chunk) 111 | table = pa.Table.from_pandas(chunk_df.map(decode_column)) 112 | pq.write_to_dataset(table, root_path=str(parquet)) 113 | except struct.error as err: 114 | if _pbar: 115 | _pbar.close() 116 | Path(path).unlink() 117 | parquet.rmdir() 118 | raise err 119 | 120 | if _pbar: 121 | _pbar.update(approx_final_size - _pbar.n) 122 | 123 | path.unlink() 124 | 125 | return str(parquet) 126 | 127 | 128 | def parse_dftypes(df: pd.DataFrame) -> pd.DataFrame: 129 | """ 130 | Parse DataFrame values, cleaning blank spaces if needed 131 | and converting dtypes into correct types. 132 | """ 133 | 134 | def map_column_func(column_names: list[str], func): 135 | # Maps a function to each value in each column 136 | columns = [c for c in df.columns if c in column_names] 137 | df[columns] = df[columns].map(func) 138 | 139 | def str_to_int(string: str): 140 | # If removing spaces, all characters are int, 141 | # return int(value). @warning it removes in between 142 | # spaces as well 143 | if str(string).replace(" ", "").isnumeric(): 144 | return int(string.replace(" ", "")) 145 | return string 146 | 147 | def str_to_date(string: str): 148 | if isinstance(string, str): 149 | try: 150 | return datetime.strptime(string, "%Y%m%d").date() 151 | except ValueError: 152 | # Ignore errors, bad value 153 | return string 154 | return string 155 | 156 | map_column_func(["DT_NOTIFIC", "DT_SIN_PRI"], str_to_date) 157 | map_column_func(["CODMUNRES", "SEXO"], str_to_int) 158 | 159 | df = df.map( 160 | lambda x: "" if str(x).isspace() else x 161 | ) # Remove all space values 162 | 163 | df = df.convert_dtypes() 164 | return df 165 | -------------------------------------------------------------------------------- /pysus/ftp/databases/sia.py: -------------------------------------------------------------------------------- 1 | __all__ = ["SIA"] 2 | 3 | from typing import List, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year 7 | 8 | 9 | class SIA(Database): 10 | name = "SIA" 11 | paths = ( 12 | Directory("/dissemin/publicos/SIASUS/199407_200712/Dados"), 13 | Directory("/dissemin/publicos/SIASUS/200801_/Dados"), 14 | ) 15 | metadata = { 16 | "long_name": "Sistema de Informações Ambulatoriais", 17 | "source": "http://sia.datasus.gov.br/principal/index.php", 18 | "description": ( 19 | "O Sistema de Informação Ambulatorial (SIA) foi instituído pela " 20 | "Portaria GM/MS n.º 896 de 29 de junho de 1990. Originalmente, o " 21 | "SIA foi concebido a partir do projeto SICAPS (Sistema de " 22 | "Informação e Controle Ambulatorial da Previdência Social), em " 23 | "que os conceitos, os objetivos e as diretrizes criados para o " 24 | "desenvolvimento do SICAPS foram extremamente importantes e " 25 | "amplamente utilizados para o desenvolvimento do SIA, tais" 26 | " como: (i) o acompanhamento das programações físicas e " 27 | "orçamentárias; (ii) o acompanhamento das ações de saúde " 28 | "produzidas; (iii) a agilização do pagamento e controle " 29 | "orçamentário e financeiro; e (iv) a formação de banco de dados " 30 | "para contribuir com a construção do SUS." 31 | ), 32 | } 33 | groups = { 34 | "AB": "APAC de Cirurgia Bariátrica", 35 | "ABO": "APAC de Acompanhamento Pós Cirurgia Bariátrica", 36 | "ACF": "APAC de Confecção de Fístula", 37 | "AD": "APAC de Laudos Diversos", 38 | "AM": "APAC de Medicamentos", 39 | "AMP": "APAC de Acompanhamento Multiprofissional", 40 | "AN": "APAC de Nefrologia", 41 | "AQ": "APAC de Quimioterapia", 42 | "AR": "APAC de Radioterapia", 43 | "ATD": "APAC de Tratamento Dialítico", 44 | "BI": "Boletim de Produção Ambulatorial individualizado", 45 | "IMPBO": "", # TODO 46 | "PA": "Produção Ambulatorial", 47 | "PAM": "", # TODO 48 | "PAR": "", # TODO 49 | "PAS": "", # TODO 50 | "PS": "RAAS Psicossocial", 51 | "SAD": "RAAS de Atenção Domiciliar", 52 | } 53 | 54 | def describe(self, file: File) -> dict: 55 | if file.extension.upper() == ".DBC": 56 | group, _uf, year, month = self.format(file) 57 | 58 | try: 59 | uf = UFs[_uf] 60 | except KeyError: 61 | uf = _uf 62 | 63 | description = { 64 | "name": str(file.basename), 65 | "group": self.groups[group], 66 | "uf": uf, 67 | "month": MONTHS[int(month)], 68 | "year": zfill_year(year), 69 | "size": file.info["size"], 70 | "last_update": file.info["modify"], 71 | } 72 | 73 | return description 74 | return {} 75 | 76 | def format(self, file: File) -> tuple: 77 | if file.extension.upper() in [".DBC", ".DBF"]: 78 | digits = "".join([d for d in file.name if d.isdigit()]) 79 | if "_" in file.name: 80 | name, _ = file.name.split("_") 81 | digits = "".join([d for d in name if d.isdigit()]) 82 | chars, _ = file.name.split(digits) 83 | year, month = digits[:2], digits[2:] 84 | group, uf = chars[:-2].upper(), chars[-2:].upper() 85 | return group, uf, zfill_year(year), month 86 | return () 87 | 88 | def get_files( 89 | self, 90 | group: Union[List[str], str], 91 | uf: Optional[Union[List[str], str]] = None, 92 | year: Optional[Union[list, str, int]] = None, 93 | month: Optional[Union[list, str, int]] = None, 94 | ) -> List[File]: 95 | files = list( 96 | filter( 97 | lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files 98 | ) 99 | ) 100 | 101 | groups = [gr.upper() for gr in to_list(group)] 102 | 103 | if not all(gr in list(self.groups) for gr in groups): 104 | raise ValueError( 105 | "Unknown SIA Group(s): " 106 | f"{set(groups).difference(list(self.groups))}" 107 | ) 108 | 109 | files = list(filter(lambda f: self.format(f)[0] in groups, files)) 110 | 111 | if uf: 112 | ufs = parse_UFs(uf) 113 | files = list(filter(lambda f: self.format(f)[1] in ufs, files)) 114 | 115 | if year or str(year) in ["0", "00"]: 116 | years = [zfill_year(str(m)[-2:]) for m in to_list(year)] 117 | files = list(filter(lambda f: self.format(f)[2] in years, files)) 118 | 119 | if month: 120 | months = [str(y)[-2:].zfill(2) for y in to_list(month)] 121 | files = list(filter(lambda f: self.format(f)[3] in months, files)) 122 | 123 | return files 124 | -------------------------------------------------------------------------------- /pysus/ftp/databases/cnes.py: -------------------------------------------------------------------------------- 1 | __all__ = ["CNES"] 2 | 3 | from typing import List, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year 7 | 8 | 9 | class CNES(Database): 10 | name = "CNES" 11 | paths = (Directory("/dissemin/publicos/CNES/200508_/Dados"),) 12 | metadata = { 13 | "long_name": "Cadastro Nacional de Estabelecimentos de Saúde", 14 | "source": "https://cnes.datasus.gov.br/", 15 | "description": ( 16 | "O Cadastro Nacional de Estabelecimentos de Saúde (CNES) é o " 17 | "sistema de informação oficial de cadastramento de informações " 18 | "de todos os estabelecimentos de saúde no país, independentemente " 19 | "de sua natureza jurídica ou de integrarem o Sistema Único de " 20 | "Saúde (SUS). Trata-se do cadastro oficial do Ministério da " 21 | "Saúde (MS) no tocante à realidade da capacidade instalada e " 22 | "mão-de-obra assistencial de saúde no Brasil em estabelecimentos " 23 | "de saúde públicos ou privados, com convênio SUS ou não." 24 | ), 25 | } 26 | groups = { 27 | "DC": "Dados Complementares", 28 | "EE": "Estabelecimento de Ensino", 29 | "EF": "Estabelecimento Filantrópico", 30 | "EP": "Equipes", 31 | "EQ": "Equipamentos", 32 | "GM": "Gestão e Metas", 33 | "HB": "Habilitação", 34 | "IN": "Incentivos", 35 | "LT": "Leitos", 36 | "PF": "Profissional", 37 | "RC": "Regra Contratual", 38 | "SR": "Serviço Especializado", 39 | "ST": "Estabelecimentos", 40 | } 41 | __loaded__ = set() 42 | 43 | def load( 44 | self, 45 | groups: Union[str, List[str]] = None, 46 | ): 47 | """ 48 | Loads CNES Groups into content. Will convert the files and directories 49 | found within FTP Directories into self.content 50 | """ 51 | if not self.__content__: 52 | self.paths[0].load() 53 | self.__content__ |= self.paths[0].__content__ 54 | 55 | if groups: 56 | groups = to_list(groups) 57 | 58 | if not all( 59 | group in self.groups for group in [gr.upper() for gr in groups] 60 | ): 61 | raise ValueError( 62 | "Unknown CNES group(s): " 63 | f"{set(groups).difference(self.groups)}" 64 | ) 65 | 66 | for group in groups: 67 | group = group.upper() 68 | if group not in self.__loaded__: 69 | directory = self.__content__[group] 70 | directory.load() 71 | self.__content__ |= directory.__content__ 72 | self.__loaded__.add(directory.name) 73 | return self 74 | 75 | def describe(self, file: File) -> dict: 76 | if not isinstance(file, File): 77 | return {} 78 | 79 | if file.name == "GMufAAmm": 80 | # Leftover 81 | return {} 82 | 83 | if file.extension.upper() in [".DBC", ".DBF"]: 84 | group, _uf, year, month = self.format(file) 85 | 86 | try: 87 | uf = UFs[_uf] 88 | except KeyError: 89 | uf = _uf 90 | 91 | description = { 92 | "name": str(file.basename), 93 | "group": self.groups[group], 94 | "uf": uf, 95 | "month": MONTHS[int(month)], 96 | "year": zfill_year(year), 97 | "size": file.info["size"], 98 | "last_update": file.info["modify"], 99 | } 100 | 101 | return description 102 | return {} 103 | 104 | def format(self, file: File) -> tuple: 105 | group, _uf = file.name[:2].upper(), file.name[2:4].upper() 106 | year, month = file.name[-4:-2], file.name[-2:] 107 | return group, _uf, zfill_year(year), month 108 | 109 | def get_files( 110 | self, 111 | group: Union[List[str], str], 112 | uf: Optional[Union[List[str], str]] = None, 113 | year: Optional[Union[list, str, int]] = None, 114 | month: Optional[Union[list, str, int]] = None, 115 | ) -> List[File]: 116 | if not group: 117 | raise ValueError("At least one CNES group is required") 118 | 119 | groups = [gr.upper() for gr in to_list(group)] 120 | 121 | self.load(groups) 122 | 123 | files = list(filter(lambda f: f.name[:2] in groups, self.files)) 124 | 125 | if uf: 126 | ufs = parse_UFs(uf) 127 | files = list(filter(lambda f: f.name[2:4] in ufs, files)) 128 | 129 | if year or str(year) in ["0", "00"]: 130 | years = [str(m)[-2:].zfill(2) for m in to_list(year)] 131 | files = list(filter(lambda f: f.name[-4:-2] in years, files)) 132 | 133 | if month: 134 | months = [str(y)[-2:].zfill(2) for y in to_list(month)] 135 | files = list(filter(lambda f: f.name[-2:] in months, files)) 136 | 137 | return files 138 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/tutorials.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/tutorials.rst:3 23 | msgid "Tutorials" 24 | msgstr "Tutoriais" 25 | 26 | #: ../../source/tutorials.rst:5 27 | msgid "" 28 | "PySUS includes some Jupyter notebooks in its distribution package to " 29 | "serve as tutorials." 30 | msgstr "" 31 | "O PySUS inclui alguns notebooks Jupyter em seu pacote de distribuição " 32 | "para servir como tutoriais." 33 | 34 | #: ../../source/tutorials.rst:8 35 | msgid "Working with SINASC databases" 36 | msgstr "Trabalhando com a base de dados SINASC" 37 | 38 | #: ../../source/tutorials.rst:9 39 | msgid "" 40 | "SINASC is the national registry of live births. With PySUS, You can " 41 | "download SINASC tables directly and have them as dataframes to integrate " 42 | "in your analysis. `See this notebook. " 43 | "`_" 44 | msgstr "" 45 | "SINASC é o registro nacional de nascidos vivos. Com o PySUS, você pode fazer " 46 | "o download direto das tabelas do SINASC e tê-las como dataframes para integrar " 47 | "em sua análise. Veja este notebook. " 48 | "``_" 49 | 50 | #: ../../source/tutorials.rst:13 51 | msgid "Working with SINAN files" 52 | msgstr "Trabalhando com arquivos SINAN" 53 | 54 | #: ../../source/tutorials.rst:15 55 | msgid "" 56 | "SINAN is the national registry of cases for diseases of required " 57 | "reporting. PySUS offers the possibility of downloading records of " 58 | "individual cases selected for futher laboratory investigation, not the " 59 | "entirety of the reported cases. To see how to download these data look at" 60 | " the example notebook provided." 61 | msgstr "" 62 | "SINAN é o registro nacional de casos de doenças de notificação obrigatória. " 63 | "O PySUS oferece a possibilidade de baixar registros de casos individuais " 64 | "selecionados para investigação laboratorial adicional, não a totalidade " 65 | "dos casos notificados. Para saber como baixar esses dados, consulte o " 66 | "notebook de exemplo fornecido." 67 | 68 | #: ../../source/tutorials.rst:18 69 | msgid "" 70 | "The sinan module in the preprocessing package can load SINAN files from " 71 | "DBF, returning a pandas DataFrame fixing the typing of some columns." 72 | msgstr "" 73 | "O módulo sinan no pacote de pré-processamento pode carregar arquivos SINAN " 74 | "do formato DBF, retornando um pandas DataFrame corrigindo a tipagem de algumas colunas." 75 | 76 | #: ../../source/tutorials.rst:20 77 | msgid "" 78 | "It also offers geocoding capabilities which attributes geographical " 79 | "coordinates to every notified case in a SINAN Dataframe. You can use your" 80 | " Google API KEY to avoid Google's free limits. To do this just create an " 81 | "environment variable called GOOGLE_API_KEY. Warning: This can take a long" 82 | " time! and can stop halfway through, due to connections timing out. But " 83 | "PySUS creates knows how to restart from the last geocoded address. `See " 84 | "this notebook. " 85 | "`_" 86 | msgstr "" 87 | "Ele também oferece capacidades de geocodificação que atribuem coordenadas " 88 | "geográficas a cada caso notificado em um DataFrame do SINAN. Você pode usar " 89 | "sua chave API do Google para evitar os limites gratuitos do Google. Para isso, " 90 | "basta criar uma variável de ambiente chamada GOOGLE_API_KEY. Aviso: isso pode " 91 | "levar muito tempo! e pode parar no meio do caminho, devido a conexões expirando. " 92 | "Mas o PySUS sabe como reiniciar a partir do último endereço geocodificado. " 93 | "Veja este notebook. " 94 | "``_" 95 | 96 | #: ../../source/tutorials.rst:26 97 | msgid "Working with SIH DATA" 98 | msgstr "Trabalhando com a base de dados SIH" 99 | 100 | #: ../../source/tutorials.rst:27 101 | msgid "" 102 | "SIH is DATASUS' Hospital information system and it contains detailed " 103 | "information about hospitalizations. SIH Data can also be downloaded " 104 | "directly with PySUS. `See this notebook. " 105 | "`_" 106 | msgstr "" 107 | "SIH é o sistema de informações hospitalares do DATASUS e contém informações " 108 | "detalhadas sobre internações hospitalares. Os dados do SIH também podem ser " 109 | "baixados diretamente com o PySUS. Veja este notebook. " 110 | "``_" 111 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/tutorials.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/tutorials.rst:3 23 | msgid "Tutorials" 24 | msgstr "Tutoriais" 25 | 26 | #: ../../source/tutorials.rst:5 27 | msgid "" 28 | "PySUS includes some Jupyter notebooks in its distribution package to " 29 | "serve as tutorials." 30 | msgstr "" 31 | "O PySUS inclui alguns notebooks Jupyter em seu pacote de distribuição " 32 | "para servir como tutoriais." 33 | 34 | #: ../../source/tutorials.rst:8 35 | msgid "Working with SINASC databases" 36 | msgstr "Trabalhando com a base de dados SINASC" 37 | 38 | #: ../../source/tutorials.rst:9 39 | msgid "" 40 | "SINASC is the national registry of live births. With PySUS, You can " 41 | "download SINASC tables directly and have them as dataframes to integrate " 42 | "in your analysis. `See this notebook. " 43 | "`_" 44 | msgstr "" 45 | "SINASC é o registro nacional de nascidos vivos. Com o PySUS, você pode fazer " 46 | "o download direto das tabelas do SINASC e tê-las como dataframes para integrar " 47 | "em sua análise. Veja este notebook. " 48 | "``_" 49 | 50 | #: ../../source/tutorials.rst:13 51 | msgid "Working with SINAN files" 52 | msgstr "Trabalhando com arquivos SINAN" 53 | 54 | #: ../../source/tutorials.rst:15 55 | msgid "" 56 | "SINAN is the national registry of cases for diseases of required " 57 | "reporting. PySUS offers the possibility of downloading records of " 58 | "individual cases selected for futher laboratory investigation, not the " 59 | "entirety of the reported cases. To see how to download these data look at" 60 | " the example notebook provided." 61 | msgstr "" 62 | "SINAN é o registro nacional de casos de doenças de notificação obrigatória. " 63 | "O PySUS oferece a possibilidade de baixar registros de casos individuais " 64 | "selecionados para investigação laboratorial adicional, não a totalidade " 65 | "dos casos notificados. Para saber como baixar esses dados, consulte o " 66 | "notebook de exemplo fornecido." 67 | 68 | #: ../../source/tutorials.rst:18 69 | msgid "" 70 | "The sinan module in the preprocessing package can load SINAN files from " 71 | "DBF, returning a pandas DataFrame fixing the typing of some columns." 72 | msgstr "" 73 | "O módulo sinan no pacote de pré-processamento pode carregar arquivos SINAN " 74 | "do formato DBF, retornando um pandas DataFrame corrigindo a tipagem de algumas colunas." 75 | 76 | #: ../../source/tutorials.rst:20 77 | msgid "" 78 | "It also offers geocoding capabilities which attributes geographical " 79 | "coordinates to every notified case in a SINAN Dataframe. You can use your" 80 | " Google API KEY to avoid Google's free limits. To do this just create an " 81 | "environment variable called GOOGLE_API_KEY. Warning: This can take a long" 82 | " time! and can stop halfway through, due to connections timing out. But " 83 | "PySUS creates knows how to restart from the last geocoded address. `See " 84 | "this notebook. " 85 | "`_" 86 | msgstr "" 87 | "Ele também oferece capacidades de geocodificação que atribuem coordenadas " 88 | "geográficas a cada caso notificado em um DataFrame do SINAN. Você pode usar " 89 | "sua chave API do Google para evitar os limites gratuitos do Google. Para isso, " 90 | "basta criar uma variável de ambiente chamada GOOGLE_API_KEY. Aviso: isso pode " 91 | "levar muito tempo! e pode parar no meio do caminho, devido a conexões expirando. " 92 | "Mas o PySUS sabe como reiniciar a partir do último endereço geocodificado. " 93 | "Veja este notebook. " 94 | "``_" 95 | 96 | #: ../../source/tutorials.rst:26 97 | msgid "Working with SIH DATA" 98 | msgstr "Trabalhando com a base de dados SIH" 99 | 100 | #: ../../source/tutorials.rst:27 101 | msgid "" 102 | "SIH is DATASUS' Hospital information system and it contains detailed " 103 | "information about hospitalizations. SIH Data can also be downloaded " 104 | "directly with PySUS. `See this notebook. " 105 | "`_" 106 | msgstr "" 107 | "SIH é o sistema de informações hospitalares do DATASUS e contém informações " 108 | "detalhadas sobre internações hospitalares. Os dados do SIH também podem ser " 109 | "baixados diretamente com o PySUS. Veja este notebook. " 110 | "``_" 111 | -------------------------------------------------------------------------------- /docs/source/locale/pt/LC_MESSAGES/SINAN.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/SINAN.ipynb:9 23 | msgid "Downloading data from the SINAN database" 24 | msgstr "Baixando dados do banco de dados do SINAN" 25 | 26 | #: ../../source/SINAN.ipynb:31 27 | msgid "" 28 | "SINAN is a database of reported cases of certain diseases that Brazilian " 29 | "law requires to be reported. Unfortunately the data available for free " 30 | "download, corresponds only to the investigated cases not the totality of " 31 | "the reported cases. Nevertheless it's an interesting dataset." 32 | msgstr "" 33 | "SINAN é um banco de dados de casos notificados de certas doenças que a " 34 | "lei brasileira exige que sejam reportadas. Infelizmente, os dados " 35 | "disponíveis para download gratuito correspondem apenas aos casos " 36 | "investigados, não à totalidade dos casos reportados. No entanto, é um " 37 | "conjunto de dados interessante." 38 | 39 | #: ../../source/SINAN.ipynb:33 40 | msgid "To find out what are these diseases, we can use PySUS:" 41 | msgstr "Para descobrir quais são essas doenças, podemos usar o PySUS:" 42 | 43 | #: ../../source/SINAN.ipynb:181 44 | msgid "" 45 | "These diseases are available in countrywide tables, so if we want to see " 46 | "the cases of ``Chagas`` disease in the state of Minas Gerais, first we " 47 | "can check which years are available:" 48 | msgstr "" 49 | "Essas doenças estão disponíveis em tabelas de todo o país, então, se " 50 | "quisermos ver os casos de doença de `Chagas` no estado de Minas Gerais, " 51 | "primeiro podemos verificar quais anos estão disponíveis:" 52 | 53 | #: ../../source/SINAN.ipynb:287 54 | msgid "" 55 | "We can also check when it was last updated for every disease, and if the " 56 | "table is preliminary or final." 57 | msgstr "" 58 | "Também podemos verificar quando foi a última atualização para cada " 59 | "doença e se a tabela é preliminar ou final." 60 | 61 | #: ../../source/SINAN.ipynb:884 62 | msgid "" 63 | "We can see, that we have data in final form, from 2000 until 2019, and " 64 | "preliminary data for 2020. Now we can download it:" 65 | msgstr "" 66 | "Podemos ver que temos dados em forma final, de 2000 até 2019, e dados " 67 | "preliminares para 2020. Agora podemos baixá-los:" 68 | 69 | #: ../../source/SINAN.ipynb:1371 70 | msgid "Downloading large files" 71 | msgstr "Baixando arquivos grandes" 72 | 73 | #: ../../source/SINAN.ipynb:1373 74 | msgid "" 75 | "Some SINAN files can be quite large and can take a bit longer to download" 76 | " and convert. As the default behavior is to download data in chunks, some" 77 | " folders may contain lots of parquet chunks" 78 | msgstr "" 79 | "Alguns arquivos do SINAN podem ser bastante grandes e podem demorar um " 80 | "pouco mais para serem baixados e convertidos. Como o comportamento " 81 | "padrão é baixar dados em blocos, algumas pastas podem conter muitos " 82 | "pedaços de parquet." 83 | 84 | #: ../../source/SINAN.ipynb:1394 85 | msgid "" 86 | "The cases of dengue where downloaded to multiple chunks to the directory " 87 | "above" 88 | msgstr "" 89 | "Os casos de dengue foram baixados em vários pedaços para o diretório " 90 | "acima" 91 | 92 | #: ../../source/SINAN.ipynb:1478 ../../source/SINAN.ipynb:1969 93 | msgid "Decoding the age in SINAN tables" 94 | msgstr "Decodificando a idade nas tabelas do SINAN" 95 | 96 | #: ../../source/SINAN.ipynb:1480 ../../source/SINAN.ipynb:1971 97 | msgid "" 98 | "In SINAN the age comes encoded. PySUS can decode the age column " 99 | "``NU_IDADE_N`` into any of these units: years, months, days, or hours." 100 | msgstr "" 101 | "No SINAN, a idade vem codificada. O PySUS pode decodificar a coluna de " 102 | "idade `NU_IDADE_N` em qualquer uma dessas unidades: anos, meses, dias " 103 | "ou horas." 104 | 105 | #: ../../source/SINAN.ipynb:2497 106 | msgid "We can easily convert dates and numerical fields in the dataframe:" 107 | msgstr "Podemos facilmente converter datas e campos numéricos no dataframe:" 108 | 109 | #: ../../source/SINAN.ipynb:2865 110 | msgid "Let's convert the age to years and save it on a different column." 111 | msgstr "Vamos converter a idade para anos e salvá-la em uma coluna diferente." 112 | 113 | #: ../../source/SINAN.ipynb:3046 114 | msgid "Saving the Modified data" 115 | msgstr "Salvando os dados modificados" 116 | 117 | #: ../../source/SINAN.ipynb:3048 118 | msgid "" 119 | "We can save our dataframe in any format we wish to avoid having to redo " 120 | "this analysis next time. If we want to keep only the data from the state " 121 | "of Minas Gerais we need to filter the table using the UF code ``31``." 122 | msgstr "" 123 | "Pode-se salvar nosso conjunto de dados em qualquer formato que desejarmos " 124 | "para evitar ter que refazer esta análise na próxima vez. Se quisermos manter " 125 | "apenas os dados do estado de Minas Gerais, precisamos filtrar a tabela usando " 126 | "o código UF `31`." 127 | -------------------------------------------------------------------------------- /docs/source/locale/pt_BR/LC_MESSAGES/SINAN.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2016, Flavio Codeco Coelho 3 | # This file is distributed under the same license as the PySUS package. 4 | # FIRST AUTHOR , 2023. 5 | # 6 | #, fuzzy 7 | msgid "" 8 | msgstr "" 9 | "Project-Id-Version: PySUS 0.1.13\n" 10 | "Report-Msgid-Bugs-To: \n" 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n" 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 13 | "Last-Translator: FULL NAME \n" 14 | "Language: pt\n" 15 | "Language-Team: pt \n" 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 17 | "MIME-Version: 1.0\n" 18 | "Content-Type: text/plain; charset=utf-8\n" 19 | "Content-Transfer-Encoding: 8bit\n" 20 | "Generated-By: Babel 2.12.1\n" 21 | 22 | #: ../../source/SINAN.ipynb:9 23 | msgid "Downloading data from the SINAN database" 24 | msgstr "Baixando dados do banco de dados do SINAN" 25 | 26 | #: ../../source/SINAN.ipynb:31 27 | msgid "" 28 | "SINAN is a database of reported cases of certain diseases that Brazilian " 29 | "law requires to be reported. Unfortunately the data available for free " 30 | "download, corresponds only to the investigated cases not the totality of " 31 | "the reported cases. Nevertheless it's an interesting dataset." 32 | msgstr "" 33 | "SINAN é um banco de dados de casos notificados de certas doenças que a " 34 | "lei brasileira exige que sejam reportadas. Infelizmente, os dados " 35 | "disponíveis para download gratuito correspondem apenas aos casos " 36 | "investigados, não à totalidade dos casos reportados. No entanto, é um " 37 | "conjunto de dados interessante." 38 | 39 | #: ../../source/SINAN.ipynb:33 40 | msgid "To find out what are these diseases, we can use PySUS:" 41 | msgstr "Para descobrir quais são essas doenças, podemos usar o PySUS:" 42 | 43 | #: ../../source/SINAN.ipynb:181 44 | msgid "" 45 | "These diseases are available in countrywide tables, so if we want to see " 46 | "the cases of ``Chagas`` disease in the state of Minas Gerais, first we " 47 | "can check which years are available:" 48 | msgstr "" 49 | "Essas doenças estão disponíveis em tabelas de todo o país, então, se " 50 | "quisermos ver os casos de doença de `Chagas` no estado de Minas Gerais, " 51 | "primeiro podemos verificar quais anos estão disponíveis:" 52 | 53 | #: ../../source/SINAN.ipynb:287 54 | msgid "" 55 | "We can also check when it was last updated for every disease, and if the " 56 | "table is preliminary or final." 57 | msgstr "" 58 | "Também podemos verificar quando foi a última atualização para cada " 59 | "doença e se a tabela é preliminar ou final." 60 | 61 | #: ../../source/SINAN.ipynb:884 62 | msgid "" 63 | "We can see, that we have data in final form, from 2000 until 2019, and " 64 | "preliminary data for 2020. Now we can download it:" 65 | msgstr "" 66 | "Podemos ver que temos dados em forma final, de 2000 até 2019, e dados " 67 | "preliminares para 2020. Agora podemos baixá-los:" 68 | 69 | #: ../../source/SINAN.ipynb:1371 70 | msgid "Downloading large files" 71 | msgstr "Baixando arquivos grandes" 72 | 73 | #: ../../source/SINAN.ipynb:1373 74 | msgid "" 75 | "Some SINAN files can be quite large and can take a bit longer to download" 76 | " and convert. As the default behavior is to download data in chunks, some" 77 | " folders may contain lots of parquet chunks" 78 | msgstr "" 79 | "Alguns arquivos do SINAN podem ser bastante grandes e podem demorar um " 80 | "pouco mais para serem baixados e convertidos. Como o comportamento " 81 | "padrão é baixar dados em blocos, algumas pastas podem conter muitos " 82 | "pedaços de parquet." 83 | 84 | #: ../../source/SINAN.ipynb:1394 85 | msgid "" 86 | "The cases of dengue where downloaded to multiple chunks to the directory " 87 | "above" 88 | msgstr "" 89 | "Os casos de dengue foram baixados em vários pedaços para o diretório " 90 | "acima" 91 | 92 | #: ../../source/SINAN.ipynb:1478 ../../source/SINAN.ipynb:1969 93 | msgid "Decoding the age in SINAN tables" 94 | msgstr "Decodificando a idade nas tabelas do SINAN" 95 | 96 | #: ../../source/SINAN.ipynb:1480 ../../source/SINAN.ipynb:1971 97 | msgid "" 98 | "In SINAN the age comes encoded. PySUS can decode the age column " 99 | "``NU_IDADE_N`` into any of these units: years, months, days, or hours." 100 | msgstr "" 101 | "No SINAN, a idade vem codificada. O PySUS pode decodificar a coluna de " 102 | "idade `NU_IDADE_N` em qualquer uma dessas unidades: anos, meses, dias " 103 | "ou horas." 104 | 105 | #: ../../source/SINAN.ipynb:2497 106 | msgid "We can easily convert dates and numerical fields in the dataframe:" 107 | msgstr "Podemos facilmente converter datas e campos numéricos no dataframe:" 108 | 109 | #: ../../source/SINAN.ipynb:2865 110 | msgid "Let's convert the age to years and save it on a different column." 111 | msgstr "Vamos converter a idade para anos e salvá-la em uma coluna diferente." 112 | 113 | #: ../../source/SINAN.ipynb:3046 114 | msgid "Saving the Modified data" 115 | msgstr "Salvando os dados modificados" 116 | 117 | #: ../../source/SINAN.ipynb:3048 118 | msgid "" 119 | "We can save our dataframe in any format we wish to avoid having to redo " 120 | "this analysis next time. If we want to keep only the data from the state " 121 | "of Minas Gerais we need to filter the table using the UF code ``31``." 122 | msgstr "" 123 | "Pode-se salvar nosso conjunto de dados em qualquer formato que desejarmos " 124 | "para evitar ter que refazer esta análise na próxima vez. Se quisermos manter " 125 | "apenas os dados do estado de Minas Gerais, precisamos filtrar a tabela usando " 126 | "o código UF `31`." 127 | -------------------------------------------------------------------------------- /pysus/ftp/databases/sinan.py: -------------------------------------------------------------------------------- 1 | __all__ = ["SINAN"] 2 | 3 | from typing import List, Optional, Union 4 | 5 | from pysus.ftp import Database, Directory, File 6 | from pysus.ftp.utils import to_list, zfill_year 7 | 8 | 9 | class SINAN(Database): 10 | name = "SINAN" 11 | paths = ( 12 | Directory("/dissemin/publicos/SINAN/DADOS/FINAIS"), 13 | Directory("/dissemin/publicos/SINAN/DADOS/PRELIM"), 14 | ) 15 | metadata = { 16 | "long_name": "Doenças e Agravos de Notificação", 17 | "source": "https://portalsinan.saude.gov.br/", 18 | "description": ( 19 | "The Notifiable Diseases Information System - Sinan is primarily" 20 | "fed by the notification and investigation of cases of diseases " 21 | "and conditions listed in the national list of compulsorily " 22 | "notifiable diseases (Consolidation Ordinance No. 4, September 28," 23 | " 2017, Annex). However, states and municipalities are allowed to " 24 | "include other important health problems in their region, such as " 25 | "difilobotriasis in the municipality of São Paulo. Its effective " 26 | "use enables the dynamic diagnosis of the occurrence of an event " 27 | "in the population, providing evidence for causal explanations of " 28 | "compulsorily notifiable diseases and indicating risks to which " 29 | "people are exposed. This contributes to identifying the " 30 | "epidemiological reality of a specific geographical area. Its " 31 | "systematic, decentralized use contributes to the democratization " 32 | "of information, allowing all healthcare professionals to access " 33 | "and make it available to the community. Therefore, it is a " 34 | "relevant tool to assist in health planning, define intervention " 35 | "priorities, and evaluate the impact of interventions." 36 | ), 37 | } 38 | 39 | diseases = { 40 | "ACBI": "Acidente de trabalho com material biológico", 41 | "ACGR": "Acidente de trabalho", 42 | "ANIM": "Acidente por Animais Peçonhentos", 43 | "ANTR": "Atendimento Antirrabico", 44 | "BOTU": "Botulismo", 45 | "CANC": "Cancêr relacionado ao trabalho", 46 | "CHAG": "Doença de Chagas Aguda", 47 | "CHIK": "Febre de Chikungunya", 48 | "COLE": "Cólera", 49 | "COQU": "Coqueluche", 50 | "DENG": "Dengue", 51 | "DERM": "Dermatoses ocupacionais", 52 | "DIFT": "Difteria", 53 | "ESQU": "Esquistossomose", 54 | "EXAN": "Doença exantemáticas", 55 | "FMAC": "Febre Maculosa", 56 | "FTIF": "Febre Tifóide", 57 | "HANS": "Hanseníase", 58 | "HANT": "Hantavirose", 59 | "HEPA": "Hepatites Virais", 60 | "IEXO": "Intoxicação Exógena", 61 | "INFL": "Influenza Pandêmica", 62 | "LEIV": "Leishmaniose Visceral", 63 | "LEPT": "Leptospirose", 64 | "LERD": "LER/Dort", 65 | "LTAN": "Leishmaniose Tegumentar Americana", 66 | "MALA": "Malária", 67 | "MENI": "Meningite", 68 | "MENT": "Transtornos mentais relacionados ao trabalho", 69 | "NTRA": "Notificação de Tracoma", 70 | "PAIR": "Perda auditiva por ruído relacionado ao trabalho", 71 | "PEST": "Peste", 72 | "PFAN": "Paralisia Flácida Aguda", 73 | "PNEU": "Pneumoconioses realacionadas ao trabalho", 74 | "RAIV": "Raiva", 75 | "SDTA": "Surto Doenças Transmitidas por Alimentos", 76 | "SIFA": "Sífilis Adquirida", 77 | "SIFC": "Sífilis Congênita", 78 | "SIFG": "Sífilis em Gestante", 79 | "SRC": "Síndrome da Rubéola Congênia", 80 | "TETA": "Tétano Acidental", 81 | "TETN": "Tétano Neonatal", 82 | "TOXC": "Toxoplasmose Congênita", 83 | "TOXG": "Toxoplasmose Gestacional", 84 | "TRAC": "Inquérito de Tracoma", 85 | "TUBE": "Tuberculose", 86 | "VARC": "Varicela", 87 | "VIOL": "Violência doméstica, sexual e/ou outras violências", 88 | "ZIKA": "Zika Vírus", 89 | } 90 | 91 | def describe(self, file: File) -> dict: 92 | if file.extension.upper() == ".DBC": 93 | dis_code, year = self.format(file) 94 | 95 | description = { 96 | "name": str(file.basename), 97 | "disease": self.diseases[dis_code], 98 | "year": zfill_year(year), 99 | "size": file.info["size"], 100 | "last_update": file.info["modify"], 101 | } 102 | return description 103 | return {} 104 | 105 | def format(self, file: File) -> tuple: 106 | year = file.name[-2:] 107 | 108 | if file.name.startswith("SRC"): 109 | dis_code = file.name[:3] 110 | elif file.name == "LEIBR22": 111 | dis_code = "LEIV" # MISPELLED FILE NAME 112 | elif file.name == "LERBR19": 113 | dis_code = "LERD" # ANOTHER ONE 114 | else: 115 | dis_code = file.name[:4] 116 | 117 | return dis_code, zfill_year(year) 118 | 119 | def get_files( 120 | self, 121 | dis_code: Optional[Union[str, list]] = None, 122 | year: Optional[Union[str, int, list]] = None, 123 | ) -> List[File]: 124 | files = list( 125 | filter( 126 | lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files 127 | ) 128 | ) 129 | 130 | if dis_code: 131 | codes = [c.upper() for c in to_list(dis_code)] 132 | 133 | if codes and not all(code in self.diseases for code in codes): 134 | raise ValueError( 135 | "Unknown disease(s): " 136 | f"{set(codes).difference(set(self.diseases))}" 137 | ) 138 | 139 | files = list(filter(lambda f: self.format(f)[0] in codes, files)) 140 | 141 | if year or str(year) in ["0", "00"]: 142 | years = [zfill_year(str(y)[-2:]) for y in to_list(year)] 143 | files = list(filter(lambda f: self.format(f)[1] in years, files)) 144 | 145 | return files 146 | --------------------------------------------------------------------------------