├── pysus
    ├── utilities
    │   ├── __init__.py
    │   └── brasil.py
    ├── metadata
    │   └── SINAN
    │   │   ├── ANIM.tar.gz
    │   │   ├── BOTU.tar.gz
    │   │   ├── CHAG.tar.gz
    │   │   ├── CHIK.tar.gz
    │   │   ├── COLE.tar.gz
    │   │   ├── COQU.tar.gz
    │   │   ├── DENG.tar.gz
    │   │   ├── DIFT.tar.gz
    │   │   ├── ESQU.tar.gz
    │   │   ├── FAMA.tar.gz
    │   │   ├── FMAC.tar.gz
    │   │   ├── FTIF.tar.gz
    │   │   ├── HANS.tar.gz
    │   │   ├── HANT.tar.gz
    │   │   ├── HEPA.tar.gz
    │   │   ├── IEXO.tar.gz
    │   │   ├── LEIV.tar.gz
    │   │   ├── LEPT.tar.gz
    │   │   ├── LTAN.tar.gz
    │   │   ├── MALA.tar.gz
    │   │   ├── MENI.tar.gz
    │   │   ├── PEST.tar.gz
    │   │   ├── RAIV.tar.gz
    │   │   ├── SIFC.tar.gz
    │   │   ├── SIFG.tar.gz
    │   │   ├── TETA.tar.gz
    │   │   ├── TETN.tar.gz
    │   │   └── TUBE.tar.gz
    ├── online_data
    │   ├── __init__.py
    │   ├── territory.py
    │   ├── Infogripe.py
    │   ├── CIHA.py
    │   ├── PNI.py
    │   ├── SINASC.py
    │   ├── SINAN.py
    │   ├── SIH.py
    │   ├── ESUS.py
    │   ├── SIA.py
    │   ├── Infodengue.py
    │   ├── CNES.py
    │   └── vaccine.py
    ├── tests
    │   ├── test_data
    │   │   ├── __init__.py
    │   │   ├── EPR-2016-06-01-2016.dbf
    │   │   ├── test_Infogripe.py
    │   │   ├── test_vaccine.py
    │   │   └── test_Infodengue.py
    │   ├── __init__.py
    │   ├── test_esus.py
    │   ├── test_utilities.py
    │   └── test_ibge.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── ESUS.py
    │   └── sinan.py
    ├── ftp
    │   ├── databases
    │   │   ├── __init__.py
    │   │   ├── sim.py
    │   │   ├── sinasc.py
    │   │   ├── ibge_datasus.py
    │   │   ├── pni.py
    │   │   ├── sih.py
    │   │   ├── ciha.py
    │   │   ├── sia.py
    │   │   ├── cnes.py
    │   │   └── sinan.py
    │   └── utils.py
    ├── __init__.py
    └── data
    │   ├── local.py
    │   └── __init__.py
├── docs
    ├── requirements.txt
    └── source
    │   ├── data
    │       └── IT_SIHSUS_1603.pdf
    │   ├── tutorials
    │       └── tutorials.rst
    │   ├── index.rst
    │   ├── locale
    │       ├── pt
    │       │   └── LC_MESSAGES
    │       │   │   ├── Chikungunya.po
    │       │   │   ├── Analyzing SIA.po
    │       │   │   ├── SIM.po
    │       │   │   ├── PNI.po
    │       │   │   ├── data-sources.po
    │       │   │   ├── Infogripe.po
    │       │   │   ├── Dengue.po
    │       │   │   ├── Zika.po
    │       │   │   ├── ESUS.po
    │       │   │   ├── index.po
    │       │   │   ├── Infodengue.po
    │       │   │   ├── IBGE_data.po
    │       │   │   ├── tutorials.po
    │       │   │   └── SINAN.po
    │       └── pt_BR
    │       │   └── LC_MESSAGES
    │       │       ├── Chikungunya.po
    │       │       ├── Analyzing SIA.po
    │       │       ├── SIM.po
    │       │       ├── PNI.po
    │       │       ├── data-sources.po
    │       │       ├── Infogripe.po
    │       │       ├── Dengue.po
    │       │       ├── Zika.po
    │       │       ├── ESUS.po
    │       │       ├── index.po
    │       │       ├── Infodengue.po
    │       │       ├── IBGE_data.po
    │       │       ├── tutorials.po
    │       │       └── SINAN.po
    │   ├── kepler_config.json
    │   ├── databases
    │       ├── Utilities.ipynb
    │       └── data-sources.rst
    │   └── rio.html
├── MANIFEST.in
├── docker
    ├── scripts
    │   ├── poetry-install.sh
    │   └── entrypoint.sh
    ├── docker-compose.yaml
    └── Dockerfile
├── conda
    └── dev.yaml
├── .idea
    ├── misc.xml
    └── PySUS.iml
├── readthedocs.yaml
├── setup.cfg
├── .github
    ├── FUNDING.yml
    └── workflows
    │   ├── python-package.yml
    │   └── release.yaml
├── .pre-commit-config.yaml
├── condarecipe
    └── pysus
    │   └── meta.yaml
├── Makefile
├── pyproject.toml
├── .releaserc.json
└── .gitignore


/pysus/utilities/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | nbsphinx
2 | sphinx
3 | sphinx-rtd-theme
4 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include pysus *.c *.h
2 | include pysus/utilities/*
3 | include requirements.txt
4 | 


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/ANIM.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/ANIM.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/BOTU.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/BOTU.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/CHAG.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/CHAG.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/CHIK.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/CHIK.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/COLE.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/COLE.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/COQU.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/COQU.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/DENG.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/DENG.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/DIFT.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/DIFT.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/ESQU.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/ESQU.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/FAMA.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/FAMA.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/FMAC.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/FMAC.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/FTIF.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/FTIF.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/HANS.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/HANS.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/HANT.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/HANT.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/HEPA.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/HEPA.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/IEXO.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/IEXO.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/LEIV.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/LEIV.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/LEPT.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/LEPT.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/LTAN.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/LTAN.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/MALA.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/MALA.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/MENI.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/MENI.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/PEST.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/PEST.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/RAIV.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/RAIV.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/SIFC.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/SIFC.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/SIFG.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/SIFG.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/TETA.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/TETA.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/TETN.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/TETN.tar.gz


--------------------------------------------------------------------------------
/pysus/metadata/SINAN/TUBE.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/metadata/SINAN/TUBE.tar.gz


--------------------------------------------------------------------------------
/pysus/online_data/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 21/09/18
3 | by fccoelho
4 | license: GPL V3 or Later
5 | """
6 | 


--------------------------------------------------------------------------------
/pysus/tests/test_data/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 12/12/18
3 | by fccoelho
4 | license: GPL V3 or Later
5 | """
6 | 


--------------------------------------------------------------------------------
/docs/source/data/IT_SIHSUS_1603.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/docs/source/data/IT_SIHSUS_1603.pdf


--------------------------------------------------------------------------------
/pysus/tests/test_data/EPR-2016-06-01-2016.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlertaDengue/PySUS/HEAD/pysus/tests/test_data/EPR-2016-06-01-2016.dbf


--------------------------------------------------------------------------------
/docker/scripts/poetry-install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -ex
4 | 
5 | poetry config virtualenvs.create false
6 | poetry install --without geo
7 | 


--------------------------------------------------------------------------------
/pysus/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on 19/07/16
3 | by fccoelho
4 | license: GPL V3 or Later
5 | """
6 | 
7 | __docformat__ = "restructuredtext en"
8 | 


--------------------------------------------------------------------------------
/pysus/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | """
3 | Created on 19/07/16
4 | by fccoelho
5 | license: GPL V3 or Later
6 | """
7 | 
8 | __docformat__ = "restructuredtext en"
9 | 


--------------------------------------------------------------------------------
/conda/dev.yaml:
--------------------------------------------------------------------------------
 1 | name: pysus
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - docker-compose
 7 |   - python>=3.10,<3.14
 8 |   - jupyter
 9 |   - make
10 |   - poetry
11 |   - pip
12 | 


--------------------------------------------------------------------------------
/docker/scripts/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | jupyter_lab_path=$(which jupyter)
 4 | 
 5 | if [ -z "$jupyter_lab_path" ]; then
 6 |   echo "Jupyter not found"
 7 |   exit 1
 8 | fi
 9 | 
10 | $jupyter_lab_path lab --browser='firefox' --allow-root --NotebookApp.token='' --NotebookApp.password=''
11 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/__init__.py:
--------------------------------------------------------------------------------
 1 | from .ciha import *  # noqa
 2 | from .cnes import *  # noqa
 3 | from .ibge_datasus import *  # noqa
 4 | from .pni import *  # noqa
 5 | from .sia import *  # noqa
 6 | from .sih import *  # noqa
 7 | from .sim import *  # noqa
 8 | from .sinan import *  # noqa
 9 | from .sinasc import *  # noqa
10 | 


--------------------------------------------------------------------------------
/pysus/tests/test_esus.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import pytest
 4 | from pysus.online_data.ESUS import download
 5 | 
 6 | 
 7 | class MyTestCase(unittest.TestCase):
 8 |     @pytest.mark.skip(reason="This test takes too long")
 9 |     @pytest.mark.timeout(5)
10 |     def test_download(self):
11 |         df = download(uf="se")
12 |         self.assertGreater(len(df), 0)
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     unittest.main()
17 | 


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="Black">
 4 |     <option name="sdkName" value="Poetry (PySUS)" />
 5 |   </component>
 6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Poetry (PySUS)" project-jdk-type="Python SDK" />
 7 |   <component name="PythonCompatibilityInspectionAdvertiser">
 8 |     <option name="version" value="3" />
 9 |   </component>
10 | </project>
11 | 


--------------------------------------------------------------------------------
/docker/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3.3'
 2 | services:
 3 |   jupyter:
 4 |     build:
 5 |       context: ".."
 6 |       dockerfile: docker/Dockerfile
 7 |     hostname: pysus-jupyter
 8 |     container_name: pysus-jupyter
 9 |     privileged: true
10 |     environment:
11 |       - DISPLAY=:0
12 |       - CI=${CI:-0}
13 |     volumes:
14 |       - /tmp/.X11-unix:/tmp/.X11-unix
15 |     entrypoint: ["/entrypoint.sh"]
16 |     command: ["/usr/bin/firefox"]
17 | 


--------------------------------------------------------------------------------
/pysus/__init__.py:
--------------------------------------------------------------------------------
 1 | # type: ignore[attr-defined]
 2 | """PySUS Python package"""
 3 | 
 4 | from importlib import metadata as importlib_metadata
 5 | 
 6 | from pysus.ftp.databases import *  # noqa
 7 | 
 8 | 
 9 | def get_version() -> str:
10 |     try:
11 |         return importlib_metadata.version(__name__)
12 |     except importlib_metadata.PackageNotFoundError:  # pragma: no cover
13 |         return "1.0.0"  # changed by semantic-release
14 | 
15 | 
16 | version: str = get_version()
17 | __version__: str = version
18 | 


--------------------------------------------------------------------------------
/pysus/tests/test_data/test_Infogripe.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import pytest
 4 | from pysus.online_data.Infogripe import DATASETS, download
 5 | 
 6 | 
 7 | class InfoGripeTestCase(unittest.TestCase):
 8 |     @pytest.mark.skip(reason="This test takes too long")
 9 |     @pytest.mark.timeout(5)
10 |     def test_download(self):
11 |         for ds in DATASETS.keys():
12 |             df = download(ds)
13 |             self.assertGreater(len(df), 0)  # add assertion here
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     unittest.main()
18 | 


--------------------------------------------------------------------------------
/pysus/tests/test_data/test_vaccine.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | 
 4 | import pandas as pd
 5 | import pytest
 6 | from pysus.online_data.vaccine import download_covid
 7 | 
 8 | 
 9 | class VaccineTestCase(unittest.TestCase):
10 |     @pytest.mark.timeout(15)
11 |     @unittest.skipIf(os.getenv("CI"), "Forbidden on CI")
12 |     def test_Download(self):
13 |         df = download_covid("BA", only_header=True)
14 |         self.assertIsInstance(df, pd.DataFrame)
15 |         self.assertEqual(df.shape, (10000, 42))
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     unittest.main()
20 | 


--------------------------------------------------------------------------------
/pysus/tests/test_utilities.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import pytest
 4 | from pysus.utilities.brasil import get_city_name_by_geocode
 5 | 
 6 | 
 7 | class TestGetMunNameByGeocode(unittest.TestCase):
 8 |     @pytest.mark.timeout(5)
 9 |     def test_get_mun_name_by_geocode(self):
10 |         rio = get_city_name_by_geocode(3304557)
11 |         self.assertEqual(rio, "Rio de Janeiro")
12 | 
13 |         vale = get_city_name_by_geocode(1101757)
14 |         self.assertEqual(vale, "Vale do Anari")
15 | 
16 |         santa_helena = get_city_name_by_geocode(5219308)
17 |         self.assertEqual(santa_helena, "Santa Helena de Goiás")
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     unittest.main()
22 | 


--------------------------------------------------------------------------------
/docs/source/tutorials/tutorials.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | Tutorials
 3 | =========
 4 | 
 5 | PySUS includes some Jupyter notebooks in its distribution package to serve as tutorials.
 6 | 
 7 | 
 8 | Preprocessing DATASUS data
 9 | --------------------------
10 | #. :doc:`Preprocessing SINAN`
11 | #. :doc:`Preprocessing SIM`
12 | #. :doc:`Preprocessing SIM with municipality`
13 | 
14 | 
15 | Infodengue
16 | ----------
17 | #. :doc:`Infodengue`
18 | 
19 | 
20 | Infogripe
21 | ----------
22 | #. :doc:`Infogripe`
23 | 
24 | 
25 | IBGE Data
26 | ---------
27 | #. :doc:`IBGE_data`
28 | 
29 | 
30 | Dengue, Zika, Chikungunya
31 | -------------------------
32 | #. :doc:`Dengue`
33 | #. :doc:`Zika`
34 | #. :doc:`Chikungunya`
35 | 


--------------------------------------------------------------------------------
/readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-20.04
11 |   tools:
12 |     python: "3.11"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |    configuration: docs/source/conf.py
17 | 
18 | # If using Sphinx, optionally build your docs in additional formats such as PDF
19 | # formats:
20 | #    - pdf
21 | 
22 | # Optionally declare the Python requirements required to build your docs
23 | python:
24 |    install:
25 |    - requirements: docs/requirements.txt
26 | 


--------------------------------------------------------------------------------
/.idea/PySUS.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$">
 5 |       <excludeFolder url="file://$MODULE_DIR$/.venv" />
 6 |     </content>
 7 |     <orderEntry type="jdk" jdkName="Poetry (PySUS)" jdkType="Python SDK" />
 8 |     <orderEntry type="sourceFolder" forTests="false" />
 9 |   </component>
10 |   <component name="PyDocumentationSettings">
11 |     <option name="format" value="PLAIN" />
12 |     <option name="myDocStringFormat" value="Plain" />
13 |   </component>
14 |   <component name="TestRunnerService">
15 |     <option name="PROJECT_TEST_RUNNER" value="py.test" />
16 |   </component>
17 | </module>
18 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | license_file = LICENSE
 3 | 
 4 | [options]
 5 | packages = find:
 6 | include =
 7 |     pysus
 8 | 
 9 | [build_sphinx]
10 | project = 'PySUS'
11 | version = 0.6
12 | release = 0.6.3
13 | source-dir = ./docs/source
14 | 
15 | [flake8]
16 | exclude = tests,build,dist,docs,.git,__pycache__,.tox,.eggs,*.egg,.asv
17 | max-line-length = 79
18 | ignore = D202,D203,W503,E203
19 | 
20 | [isort]
21 | known_third_party = dbfread,elasticsearch,geobr,geocoder,numpy,pandas,pyarrow,pyreaddbc,requests,tqdm,urllib3
22 | ensure_newline_before_comments=true
23 | line_length = 79
24 | multi_line_output = 3
25 | include_trailing_comma = true
26 | skip = docs/
27 | 
28 | [aliases]
29 | test = pytest
30 | 
31 | [tool:pytest]
32 | addopts = --ignore=setup.py
33 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. PySUS documentation master file, created by
 2 |    sphinx-quickstart on Thu Aug 25 10:37:19 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to PySUS documentation!
 7 | =================================
 8 | 
 9 | PySUS is a collection of helper codes to download & analyze data from `DATASUS <https://datasus.saude.gov.br/>`_ (Brazilian Universal Health System). Contributions are welcome!
10 | 
11 | Contents:
12 | 
13 | .. toctree::
14 |    :maxdepth: 2
15 | 
16 |    Data Sources <databases/data-sources>
17 |    Tutorials <tutorials/tutorials>
18 | 
19 | 
20 | Indices and tables
21 | ==================
22 | 
23 | * :ref:`genindex`
24 | * :ref:`modindex`
25 | * :ref:`search`
26 | 


--------------------------------------------------------------------------------
/pysus/online_data/territory.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | 
 3 | from pysus.ftp import CACHEPATH, Directory, File
 4 | 
 5 | 
 6 | def list_tables() -> List[File]:
 7 |     d = Directory("/territorio/tabelas")
 8 |     tabelas = [f for f in d.content if "territor" in f.name]
 9 |     return tabelas
10 | 
11 | 
12 | def list_maps() -> List[File]:
13 |     d = Directory("/territorio/mapas")
14 |     mapas = [f for f in d.content if "mapas" in f.name]
15 |     return mapas
16 | 
17 | 
18 | def download(fname: Union[str, list], data_path: str = CACHEPATH):
19 |     files = (
20 |         Directory("/territorio/tabelas").content
21 |         + Directory("/territorio/mapas").content
22 |     )
23 |     for file in files:
24 |         if fname in [str(file), file.name]:
25 |             # handles suffixed and no suffixed `fname`s
26 |             return file.download()
27 | 


--------------------------------------------------------------------------------
/pysus/online_data/Infogripe.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Downloads data made available by the Infogripe service
 3 | """
 4 | 
 5 | import pandas as pd
 6 | 
 7 | BASEURL = r"https://gitlab.fiocruz.br/marcelo.gomes/infogripe/-/raw/master/Dados/InfoGripe/"  # noqa
 8 | DATASETS = {
 9 |     "Alerta de situação": r"tabela_de_alerta.csv",
10 |     "Casos por idade, sexo e virus": r"dados_semanais_faixa_etaria_sexo_virus.csv.gz",  # noqa
11 |     "Casos Totais e estimativas": r"serie_temporal_com_estimativas_recentes.csv.gz",  # noqa
12 |     "Valores esperados por localidades": "valores_esperados_por_localidade.csv",  # noqa
13 | }
14 | 
15 | 
16 | def list_datasets():
17 |     return list(DATASETS.keys())
18 | 
19 | 
20 | def download(dataset_name):
21 |     url = BASEURL + DATASETS[dataset_name] + "?inline=false"
22 |     df = pd.read_csv(url, delimiter=";", decimal=",")
23 |     return df
24 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [fccoelho] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/Chikungunya.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Chikungunya.ipynb:9
23 | msgid "Analyzing Chikungunya data"
24 | msgstr "Analyzando dados de Chikungunya"
25 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/Chikungunya.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Chikungunya.ipynb:9
23 | msgid "Analyzing Chikungunya data"
24 | msgstr "Analyzando dados de Chikungunya"
25 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_stages: [commit, push]
 2 | 
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v4.1.0
 6 |     hooks:
 7 |       - id: end-of-file-fixer
 8 | 
 9 |   - repo: local
10 |     hooks:
11 |     - entry: black
12 |       id: black
13 |       name: black
14 |       exclude: |
15 |         (?x)(
16 |           docs
17 |         )
18 |       files: ""
19 |       language: system
20 |       pass_filenames: true
21 |       types:
22 |         - python
23 |         - file
24 |         - python
25 | 
26 |     - entry: flake8
27 |       exclude: ^$
28 |       files: ""
29 |       id: flake8
30 |       language: python
31 |       name: flake8
32 |       pass_filenames: true
33 |       types:
34 |         - python
35 | 
36 |     - entry: isort
37 |       exclude: "^.*/js/.*$"
38 |       files: ""
39 |       id: isort
40 |       language: python
41 |       name: isort
42 |       pass_filenames: true
43 |       types:
44 |         - python
45 | 


--------------------------------------------------------------------------------
/pysus/ftp/utils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from typing import Union
 3 | 
 4 | from pysus.ftp import to_list
 5 | from pysus.utilities.brasil import MONTHS, UFs  # noqa
 6 | 
 7 | 
 8 | def zfill_year(year: Union[str, int]) -> int:
 9 |     """
10 |     Formats a len(2) year into len(4) with the correct year preffix
11 |     E.g: 20 -> 2020; 99 -> 1999
12 |     """
13 |     year = str(year)[-2:].zfill(2)
14 |     current_year = str(datetime.datetime.now().year)[-2:]
15 |     suffix = "19" if str(year) > current_year else "20"
16 |     return int(suffix + str(year))
17 | 
18 | 
19 | def parse_UFs(UF: Union[list[str], str]) -> list:
20 |     """
21 |     Formats states abbreviations into correct format and retuns a list.
22 |     Also checks if there is an incorrect UF in the list.
23 |     E.g: ['SC', 'mt', 'ba'] -> ['SC', 'MT', 'BA']
24 |     """
25 |     ufs = [uf.upper() for uf in to_list(UF)]
26 |     if not all(uf in list(UFs) for uf in ufs):
27 |         raise ValueError(f"Unknown UF(s): {set(ufs).difference(list(UFs))}")
28 |     return ufs
29 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | name: main
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   tests:
 7 |     runs-on: ubuntu-latest
 8 |     timeout-minutes: 15
 9 | 
10 |     defaults:
11 |       run:
12 |         shell: bash -l {0}
13 | 
14 |     strategy:
15 |       matrix:
16 |         python_version: ["3.10", "3.11", "3.12", "3.13"]
17 | 
18 |     concurrency:
19 |       group: ci-tests-${{ matrix.python_version }}-${{ github.ref }}
20 |       cancel-in-progress: true
21 | 
22 |     steps:
23 |     - uses: actions/checkout@v4
24 | 
25 |     - uses: conda-incubator/setup-miniconda@v3
26 |       with:
27 |         miniforge-version: latest
28 |         environment-file: conda/dev.yaml
29 |         channels: conda-forge,nodefaults
30 |         activate-environment: pysus
31 |         auto-update-conda: true
32 |         conda-solver: libmamba
33 | 
34 |     - name: Run jupyterlab with PySUS
35 |       run: |
36 |         make run-jupyter-pysus
37 |         # make test-jupyter-pysus  ## takes too long
38 | 
39 |     - name: Linting & Tests
40 |       run: |
41 |         export CI=1
42 |         poetry install
43 |         pre-commit run --all-files
44 |         make test-pysus
45 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/Analyzing SIA.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Analyzing SIA.ipynb:21
23 | msgid "Dataset types"
24 | msgstr "Tipos de Datasets"
25 | 
26 | #: ../../source/Analyzing SIA.ipynb:23
27 | msgid ""
28 | "The SIA Information system contains multiple types of datasets we can "
29 | "download with PySUS. These are:"
30 | msgstr ""
31 | "No banco de dados SIA é possível encontrar diferentes grupos de dados que "
32 | "podem ser extraídos com o PySUS. São eles:"
33 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/Analyzing SIA.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Analyzing SIA.ipynb:21
23 | msgid "Dataset types"
24 | msgstr "Tipos de Datasets"
25 | 
26 | #: ../../source/Analyzing SIA.ipynb:23
27 | msgid ""
28 | "The SIA Information system contains multiple types of datasets we can "
29 | "download with PySUS. These are:"
30 | msgstr ""
31 | "No banco de dados SIA é possível encontrar diferentes grupos de dados que "
32 | "podem ser extraídos com o PySUS. São eles:"
33 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: release
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: [ main ]
 7 |   pull_request:
 8 |     branches: [ main ]
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 | 
14 |     defaults:
15 |       run:
16 |         shell: bash -l {0}
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v4
20 | 
21 |     - uses: conda-incubator/setup-miniconda@v3
22 |       with:
23 |         miniforge-version: latest
24 |         environment-file: conda/dev.yaml
25 |         channels: conda-forge,nodefaults
26 |         activate-environment: pysus
27 |         auto-update-conda: true
28 |         conda-solver: libmamba
29 | 
30 |     - uses: actions/setup-node@v3
31 |       with:
32 |         node-version: 20.11.0
33 | 
34 |     - name: Test release
35 |       if: ${{ github.event_name != 'workflow_dispatch' }}
36 |       env:
37 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
38 |       run: make release-dry
39 | 
40 |     - name: Release
41 |       if: ${{ github.event_name == 'workflow_dispatch' }}
42 |       env:
43 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44 |         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
45 |       run: |
46 |         poetry config pypi-token.pypi ${PYPI_TOKEN}
47 |         make release
48 | 


--------------------------------------------------------------------------------
/condarecipe/pysus/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "PySUS" %}
 2 | {% set version = "0.5.14" %}
 3 | 
 4 | package:
 5 |   name: "{{ name|lower }}"
 6 |   version: "{{ version }}"
 7 | 
 8 | source:
 9 |   url: "https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz"
10 |   sha256: b5215bf4bf2afb4f9d552deab717df3ab26aeed8dc9750434dde285f227f045d
11 | 
12 | build:
13 |   number: 0
14 |   script: "{{ PYTHON }} -m pip install . -vv"
15 | 
16 | requirements:
17 |   host:
18 |     - cffi >=1.0.0
19 |     - dbfread
20 |     - fastparquet
21 |     - geocoder
22 |     - pandas
23 |     - pip
24 |     - python
25 |     - requests
26 |   run:
27 |     - cffi >=1.0.0
28 |     - dbfread
29 |     - fastparquet
30 |     - geocoder
31 |     - pandas
32 |     - pyarrow
33 |     - python
34 |     - requests
35 |     - elasticsearch
36 | 
37 | test:
38 |   imports:
39 |     - pysus
40 |     - pysus.demography
41 |     - pysus.online_data
42 |     - pysus.preprocessing
43 |     - pysus.tests
44 |     - pysus.tests.test_data
45 | 
46 | about:
47 |   home: "https://github.com/fccoelho/PySUS"
48 |   license: gpl-v3
49 |   license_family: GPL3
50 |   license_file:
51 |   summary: "Tools for dealing with Brazil's Public health data"
52 |   doc_url:
53 |   dev_url:
54 | 
55 | extra:
56 |   recipe-maintainers:
57 |     - fccoelho
58 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/SIM.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/SIM.ipynb:9
23 | msgid "Downloading data from SIM"
24 | msgstr "Extraindo dados do Sistema de Informação sobre Mortalidade (SIM)"
25 | 
26 | #: ../../source/SIM.ipynb:11
27 | msgid ""
28 | "In this notebook we will use PySUS to download and treat mortality data "
29 | "from SIM."
30 | msgstr ""
31 | "Neste notebook, usaremos o PySUS para baixar e tratar os dados de "
32 | "mortalidade do SIM."
33 | 
34 | #: ../../source/SIM.ipynb:822
35 | msgid "Humanizing some of the encoded variables."
36 | msgstr "Humanizando algumas das variáveis codificadas."
37 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/SIM.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/SIM.ipynb:9
23 | msgid "Downloading data from SIM"
24 | msgstr "Extraindo dados do Sistema de Informação sobre Mortalidade (SIM)"
25 | 
26 | #: ../../source/SIM.ipynb:11
27 | msgid ""
28 | "In this notebook we will use PySUS to download and treat mortality data "
29 | "from SIM."
30 | msgstr ""
31 | "Neste notebook, usaremos o PySUS para baixar e tratar os dados de "
32 | "mortalidade do SIM."
33 | 
34 | #: ../../source/SIM.ipynb:822
35 | msgid "Humanizing some of the encoded variables."
36 | msgstr "Humanizando algumas das variáveis codificadas."
37 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/PNI.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/PNI.ipynb:9
23 | msgid "Downloading data from the National Immunization program (PNI)"
24 | msgstr "Extraindo dados do Programa Nacional de Imunização (PNI)"
25 | 
26 | #: ../../source/PNI.ipynb:30
27 | msgid ""
28 | "We can start by checking the data available for a given state, for "
29 | "example, Rio de Janeiro:"
30 | msgstr ""
31 | "Podemos começar verificando os dados disponíveis para um determinado "
32 | "estado, por exemplo, o Rio de Janeiro:"
33 | 
34 | #: ../../source/PNI.ipynb:151
35 | msgid "Then we can fetch data from a particular year:"
36 | msgstr "Em seguida, podemos extrair os dados de um ano específico:"
37 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/PNI.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/PNI.ipynb:9
23 | msgid "Downloading data from the National Immunization program (PNI)"
24 | msgstr "Extraindo dados do Programa Nacional de Imunização (PNI)"
25 | 
26 | #: ../../source/PNI.ipynb:30
27 | msgid ""
28 | "We can start by checking the data available for a given state, for "
29 | "example, Rio de Janeiro:"
30 | msgstr ""
31 | "Podemos começar verificando os dados disponíveis para um determinado "
32 | "estado, por exemplo, o Rio de Janeiro:"
33 | 
34 | #: ../../source/PNI.ipynb:151
35 | msgid "Then we can fetch data from a particular year:"
36 | msgstr "Em seguida, podemos extrair os dados de um ano específico:"
37 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/data-sources.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/data-sources.rst:3
23 | msgid "Data Sources"
24 | msgstr "Bancos de Dados"
25 | 
26 | #: ../../source/data-sources.rst:5
27 | msgid ""
28 | "PySUS allows you to download data on demand from DATASUS databases. "
29 | "Currently, the following databases can be downloaded:"
30 | msgstr ""
31 | "O PySUS permite a extração de dados das base de dados do DATASUS. "
32 | "Atualmente, as seguintes base de dados podem ser extraídas:"
33 | 
34 | #: ../../source/data-sources.rst:8
35 | msgid "SINAN"
36 | msgstr "SINAN"
37 | 
38 | #: ../../source/data-sources.rst:9
39 | msgid "SINASC"
40 | msgstr "SINASC"
41 | 
42 | #: ../../source/data-sources.rst:10
43 | msgid "SIM"
44 | msgstr "SIM"
45 | 
46 | #: ../../source/data-sources.rst:11
47 | msgid "SIH"
48 | msgstr "SIH"
49 | 
50 | #: ../../source/data-sources.rst:12
51 | msgid "SIA"
52 | msgstr "SIA"
53 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/data-sources.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/data-sources.rst:3
23 | msgid "Data Sources"
24 | msgstr "Bancos de Dados"
25 | 
26 | #: ../../source/data-sources.rst:5
27 | msgid ""
28 | "PySUS allows you to download data on demand from DATASUS databases. "
29 | "Currently, the following databases can be downloaded:"
30 | msgstr ""
31 | "O PySUS permite a extração de dados das base de dados do DATASUS. "
32 | "Atualmente, as seguintes base de dados podem ser extraídas:"
33 | 
34 | #: ../../source/data-sources.rst:8
35 | msgid "SINAN"
36 | msgstr "SINAN"
37 | 
38 | #: ../../source/data-sources.rst:9
39 | msgid "SINASC"
40 | msgstr "SINASC"
41 | 
42 | #: ../../source/data-sources.rst:10
43 | msgid "SIM"
44 | msgstr "SIM"
45 | 
46 | #: ../../source/data-sources.rst:11
47 | msgid "SIH"
48 | msgstr "SIH"
49 | 
50 | #: ../../source/data-sources.rst:12
51 | msgid "SIA"
52 | msgstr "SIA"
53 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/Infogripe.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Infogripe.ipynb:9
23 | msgid "Working with Infogripe datasets"
24 | msgstr "Trabalhando com a base de dados Infogripe"
25 | 
26 | #: ../../source/Infogripe.ipynb:11
27 | msgid ""
28 | "`Infogripe <http://info.gripe.fiocruz.br/>`__ is an online platform that "
29 | "tracks epidemiolgical data about influenza-like diseases in Brazil."
30 | msgstr ""
31 | "O `Infogripe <http://info.gripe.fiocruz.br/>`__ é uma plataforma online "
32 | "que acompanha dados epidemiológicos sobre doenças semelhantes à gripe no Brasil."
33 | 
34 | #: ../../source/Infogripe.ipynb:33
35 | msgid ""
36 | "Infogripe makes available different datasets. To findout which ones are "
37 | "available before downloading we can ask PySUS to list them:"
38 | msgstr ""
39 | "O Infogripe disponibiliza diferentes conjuntos de dados. Para descobrir "
40 | "quais estão disponíveis antes de baixá-los, podemos solicitar ao PySUS que os liste:"
41 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/Infogripe.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Infogripe.ipynb:9
23 | msgid "Working with Infogripe datasets"
24 | msgstr "Trabalhando com a base de dados Infogripe"
25 | 
26 | #: ../../source/Infogripe.ipynb:11
27 | msgid ""
28 | "`Infogripe <http://info.gripe.fiocruz.br/>`__ is an online platform that "
29 | "tracks epidemiolgical data about influenza-like diseases in Brazil."
30 | msgstr ""
31 | "O `Infogripe <http://info.gripe.fiocruz.br/>`__ é uma plataforma online "
32 | "que acompanha dados epidemiológicos sobre doenças semelhantes à gripe no Brasil."
33 | 
34 | #: ../../source/Infogripe.ipynb:33
35 | msgid ""
36 | "Infogripe makes available different datasets. To findout which ones are "
37 | "available before downloading we can ask PySUS to list them:"
38 | msgstr ""
39 | "O Infogripe disponibiliza diferentes conjuntos de dados. Para descobrir "
40 | "quais estão disponíveis antes de baixá-los, podemos solicitar ao PySUS que os liste:"
41 | 


--------------------------------------------------------------------------------
/pysus/utilities/brasil.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | from typing import Union
 4 | 
 5 | with open(
 6 |     f"{Path(__file__).parent}/municipios.json", "r", encoding="utf-8-sig"
 7 | ) as muns:
 8 |     MUNICIPALITIES = json.loads(muns.read())
 9 | 
10 | MUN_BY_GEOCODE = {mun["geocodigo"]: mun["municipio"] for mun in MUNICIPALITIES}
11 | 
12 | 
13 | UFs = {
14 |     "BR": "Brasil",
15 |     "AC": "Acre",
16 |     "AL": "Alagoas",
17 |     "AP": "Amapá",
18 |     "AM": "Amazonas",
19 |     "BA": "Bahia",
20 |     "CE": "Ceará",
21 |     "ES": "Espírito Santo",
22 |     "GO": "Goiás",
23 |     "MA": "Maranhão",
24 |     "MT": "Mato Grosso",
25 |     "MS": "Mato Grosso do Sul",
26 |     "MG": "Minas Gerais",
27 |     "PA": "Pará",
28 |     "PB": "Paraíba",
29 |     "PR": "Paraná",
30 |     "PE": "Pernambuco",
31 |     "PI": "Piauí",
32 |     "RJ": "Rio de Janeiro",
33 |     "RN": "Rio Grande do Norte",
34 |     "RS": "Rio Grande do Sul",
35 |     "RO": "Rondônia",
36 |     "RR": "Roraima",
37 |     "SC": "Santa Catarina",
38 |     "SP": "São Paulo",
39 |     "SE": "Sergipe",
40 |     "TO": "Tocantins",
41 |     "DF": "Distrito Federal",
42 | }
43 | 
44 | MONTHS = {
45 |     1: "Janeiro",
46 |     2: "Fevereiro",
47 |     3: "Março",
48 |     4: "Abril",
49 |     5: "Maio",
50 |     6: "Junho",
51 |     7: "Julho",
52 |     8: "Agosto",
53 |     9: "Setembro",
54 |     10: "Outubro",
55 |     11: "Novembro",
56 |     12: "Dezembro",
57 | }
58 | 
59 | 
60 | def get_city_name_by_geocode(geocode: Union[str, int]):
61 |     """
62 |     Returns the Municipality name from its geocode (IBGE)
63 |     :param geocode: 7 digits city code, according to IBGE format
64 |     :return: City name
65 |     """
66 | 
67 |     return MUN_BY_GEOCODE[int(geocode)]
68 | 


--------------------------------------------------------------------------------
/pysus/online_data/CIHA.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Download data from CIHA and CIH (Old)
 3 | Hospital and Ambulatorial information system
 4 | http://ciha.datasus.gov.br/CIHA/index.php?area=03
 5 | 
 6 | by fccoelho
 7 | license: GPL V3 or Later
 8 | """
 9 | from typing import Union
10 | 
11 | from loguru import logger
12 | from pysus.ftp import CACHEPATH
13 | from pysus.ftp.databases.ciha import CIHA
14 | from pysus.ftp.utils import parse_UFs
15 | 
16 | ciha = CIHA().load()
17 | 
18 | 
19 | def get_available_years(
20 |     states: Union[list, str] = None,
21 | ) -> dict[str : set[int]]:
22 |     """
23 |     Fetch available years for the `states`.
24 |     :param states: UF code. E.g: "SP" or ["SP", "RJ"]
25 |     :return: list of years in integers
26 |     """
27 |     ufs = parse_UFs(states)
28 | 
29 |     years = dict()
30 |     for uf in ufs:
31 |         files = ciha.get_files(uf=uf)
32 |         years[uf] = set(sorted([ciha.describe(f)["year"] for f in files]))
33 | 
34 |     if len(set([len(v) for v in years.values()])) > 1:
35 |         logger.warning(f"Distinct years were found for UFs: {years}")
36 | 
37 |     return sorted(list(set.intersection(*map(set, years.values()))))
38 | 
39 | 
40 | def download(
41 |     states: Union[str, list],
42 |     years: Union[str, list, int],
43 |     months: Union[str, list, int],
44 |     data_dir: str = CACHEPATH,
45 | ) -> list:
46 |     """
47 |     Download CIHA records for state, year and month and returns the Parquets
48 |     files as a list of PartquetData
49 |     :param months: 1 to 12, can be a list
50 |     :param states: 2 letter state code,
51 |     :param years: 4 digit integer
52 |     """
53 | 
54 |     files = ciha.get_files(uf=states, year=years, month=months)
55 |     return ciha.download(files, local_dir=data_dir)
56 | 


--------------------------------------------------------------------------------
/pysus/online_data/PNI.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Download data from the national immunization program
 3 | """
 4 | from typing import Literal, Union
 5 | 
 6 | from loguru import logger
 7 | from pysus.ftp import CACHEPATH
 8 | from pysus.ftp.databases.pni import PNI
 9 | from pysus.ftp.utils import parse_UFs
10 | 
11 | pni = PNI().load()
12 | 
13 | 
14 | def get_available_years(group, states):
15 |     """
16 |     Fetch available years for `group` and/or `months`.
17 |     :param group: PNI group, options are "CPNI" or "DPNI"
18 |     :param state: UF code, can be a list. E.g: "SP" or ["SP", "RJ"]
19 |     :return: list of available years
20 |     """
21 |     ufs = parse_UFs(states)
22 | 
23 |     years = dict()
24 |     for uf in ufs:
25 |         files = pni.get_files(group, uf=uf)
26 |         years[uf] = set(sorted([pni.describe(f)["year"] for f in files]))
27 | 
28 |     if len(set([len(v) for v in years.values()])) > 1:
29 |         logger.warning(f"Distinct years were found for UFs: {years}")
30 | 
31 |     return sorted(list(set.intersection(*map(set, years.values()))))
32 | 
33 | 
34 | def download(
35 |     group: Union[list, Literal["CNPI", "DPNI"]],
36 |     states: Union[str, list],
37 |     years: Union[str, list, int],
38 |     data_dir: str = CACHEPATH,
39 | ) -> list:
40 |     """
41 |     Download imunization records for a given States and years.
42 |     :param group: PNI group, options are "CPNI" or "DPNI"
43 |     :param state: uf two letter code, can be a list. E.g: "SP" or ["SP", "RJ"]
44 |     :param year: year in 4 digits, can be a list. E.g: 1 or [1, 2, 3]
45 |     :param data_dir: directory where data will be downloaded
46 |     :return: list of downloaded ParquetData
47 |     """
48 |     files = pni.get_files(group, uf=states, year=years)
49 |     return pni.download(files, local_dir=data_dir)
50 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM condaforge/mambaforge
 2 | 
 3 | LABEL maintainer="es.loch@gmail.com"
 4 | 
 5 | USER root
 6 | 
 7 | ENV DEBIAN_FRONTEND=noninteractive
 8 | 
 9 | ENV HOME "/home/pysus"
10 | ENV PATH "$PATH:/home/pysus/.local/bin"
11 | ENV ENV_NAME pysus
12 | ENV PATH "/opt/conda/envs/$ENV_NAME/bin:$PATH"
13 | ENV PATH "/opt/poetry/bin:$PATH"
14 | 
15 | RUN apt-get -qq update --yes \
16 |   && apt-get -qq install --yes --no-install-recommends \
17 |   build-essential \
18 |   firefox \
19 |   ca-certificates \
20 |   sudo \
21 |   curl \
22 |   && rm -rf /var/lib/apt/lists/*
23 | 
24 | RUN useradd -ms /bin/bash pysus \
25 |   && echo "pysus ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/pysus \
26 |   && chmod 0440 /etc/sudoers.d/ \
27 |   && echo 'source /opt/conda/bin/activate "$ENV_NAME" && exec "$@"' > /activate.sh \
28 |   && echo 'source activate "$ENV_NAME"' >  /home/pysus/.bashrc \
29 |   && chmod +x /activate.sh \
30 |   && chmod -R a+rwx /opt/conda /tmp \
31 |   && sudo chown -R pysus:pysus /usr/src
32 | 
33 | USER pysus
34 | 
35 | RUN mkdir -p /home/pysus/Notebooks/
36 | 
37 | COPY --chown=pysus:pysus conda/dev.yaml /tmp/dev.yaml
38 | COPY --chown=pysus:pysus docker/scripts/entrypoint.sh /entrypoint.sh
39 | COPY --chown=pysus:pysus docker/scripts/poetry-install.sh /tmp/poetry-install.sh
40 | COPY --chown=pysus:pysus pyproject.toml poetry.lock LICENSE README.md /usr/src/
41 | COPY --chown=pysus:pysus pysus /usr/src/pysus
42 | COPY --chown=pysus:pysus docs/source/**/*.ipynb /home/pysus/Notebooks/
43 | COPY --chown=pysus:pysus docs/source/data /home/pysus/Notebooks/
44 | 
45 | RUN mamba env create -n $ENV_NAME --file /tmp/dev.yaml \
46 |   && mamba clean -afy
47 | 
48 | RUN cd /usr/src/ && bash /tmp/poetry-install.sh
49 | 
50 | WORKDIR /home/pysus/Notebooks
51 | 
52 | ENTRYPOINT ["bash", "/activate.sh", "jupyter", "notebook", "--port=8888", "--ip=0.0.0.0"]
53 | 


--------------------------------------------------------------------------------
/pysus/online_data/SINASC.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Download SINASC data from DATASUS FTP server
 3 | Created on 01/11/17
 4 | by fccoelho
 5 | license: GPL V3 or Later
 6 | """
 7 | from typing import Union
 8 | 
 9 | from loguru import logger
10 | from pysus.ftp import CACHEPATH
11 | from pysus.ftp.databases.sinasc import SINASC
12 | from pysus.ftp.utils import parse_UFs
13 | 
14 | sinasc = SINASC().load()
15 | 
16 | 
17 | def get_available_years(group: str, states: Union[str, list[str]]) -> list:
18 |     """
19 |     Get SINASC years for states
20 |     :param group:
21 |         "DN": "Declarações de Nascidos Vivos",
22 |         "DNR": "Dados dos Nascidos Vivos por UF de residência",
23 |     :param states: 2 letter UF code, can be a list. E.g: "SP" or ["SP", "RJ"]
24 |     :return: list of available years
25 |     """
26 |     ufs = parse_UFs(states)
27 | 
28 |     years = dict()
29 |     for uf in ufs:
30 |         files = sinasc.get_files(group, uf=uf)
31 |         years[uf] = set(sorted([sinasc.describe(f)["year"] for f in files]))
32 | 
33 |     if len(set([len(v) for v in years.values()])) > 1:
34 |         logger.warning(f"Distinct years were found for UFs: {years}")
35 | 
36 |     return sorted(list(set.intersection(*map(set, years.values()))))
37 | 
38 | 
39 | def download(
40 |     groups: Union[str, list],
41 |     states: Union[str, list],
42 |     years: Union[str, list, int],
43 |     data_dir: str = CACHEPATH,
44 | ) -> list:
45 |     """
46 |     Downloads data directly from Datasus ftp server
47 |     :param groups: either DN, DNR or both
48 |     :param states: two-letter state identifier: MG == Minas Gerais,
49 |                    can be a list
50 |     :param years: years to download
51 |     :return: list of downloaded files
52 |     """
53 |     files = sinasc.get_files(groups, uf=states, year=years)
54 |     return sinasc.download(files, local_dir=data_dir)
55 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL := /usr/bin/env bash
 2 | PYTHON := python
 3 | PYTHONPATH := ${PWD}
 4 | ENVCREATE:=
 5 | 
 6 | 
 7 | .PHONY: clean clean-test clean-pyc clean-build help
 8 | .DEFAULT_GOAL := help
 9 | 
10 | define PRINT_HELP_PYSCRIPT
11 | import re, sys
12 | 
13 | for line in sys.stdin:
14 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
15 | 	if match:
16 | 		target, help = match.groups()
17 | 		print("%-20s %s" % (target, help))
18 | endef
19 | export PRINT_HELP_PYSCRIPT
20 | 
21 | 
22 | help:
23 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
24 | 
25 | DOCKER = docker compose -p pysus -f docker/docker-compose.yaml
26 | SERVICE :=
27 | SEMANTIC_RELEASE = npx --yes \
28 |           -p semantic-release \
29 |           -p conventional-changelog-conventionalcommits \
30 |           -p "@semantic-release/commit-analyzer" \
31 |           -p "@semantic-release/release-notes-generator" \
32 |           -p "@semantic-release/changelog" \
33 |           -p "@semantic-release/exec" \
34 |           -p "@semantic-release/github" \
35 |           -p "@semantic-release/git" \
36 |           -p "semantic-release-replace-plugin" \
37 |           semantic-release
38 | 
39 | #* Docker basic
40 | .PHONY: run-jupyter-pysus
41 | run-jupyter-pysus: ## build and deploy all containers
42 | 	$(DOCKER) up -d --build
43 | 
44 | .PHONY: down-jupyter-pysus
45 | down-jupyter-pysus: ## stop and remove containers for all services
46 | 	$(DOCKER) down -v --remove-orphans
47 | 
48 | #* Tests
49 | .PHONY: test-jupyter-pysus
50 | test-jupyter-pysus: ## run pytest for notebooks inside jupyter container
51 | 	$(DOCKER) exec -T jupyter pytest -vv --nbmake
52 | 
53 | .PHONY: test-pysus
54 | test-pysus: ## run tests quickly with the default Python
55 | 	poetry run pytest -vv pysus/tests/ --retries 3 --retry-delay 15
56 | 
57 | # RELEASE
58 | # =======
59 | 
60 | .PHONY: release
61 | release:
62 | 	$(SEMANTIC_RELEASE) --ci
63 | 
64 | 
65 | .PHONY: release-dry
66 | release-dry:
67 | 	$(SEMANTIC_RELEASE) --dry-run
68 | 


--------------------------------------------------------------------------------
/pysus/online_data/SINAN.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Union
 3 | 
 4 | import pandas as pd
 5 | from pysus.ftp import CACHEPATH
 6 | from pysus.ftp.databases.sinan import SINAN
 7 | 
 8 | sinan = SINAN().load()
 9 | 
10 | 
11 | def list_diseases() -> dict:
12 |     """List available diseases on SINAN"""
13 |     return sinan.diseases
14 | 
15 | 
16 | def get_available_years(disease_code: str) -> list:
17 |     """
18 |     Fetch available years for data related to specific disease
19 |     :param disease_code:
20 |         Disease code. See `SINAN.list_diseases` for valid codes
21 |     :return:
22 |         A list of DBC files from a specific disease found in the FTP Server.
23 |     """
24 |     files = sinan.get_files(dis_code=disease_code)
25 |     return sorted(list(set(sinan.describe(f)["year"] for f in files)))
26 | 
27 | 
28 | def download(
29 |     diseases: Union[str, list],
30 |     years: Union[str, list, int],
31 |     data_path: str = CACHEPATH,
32 | ) -> list:
33 |     """
34 |     Downloads SINAN data directly from Datasus ftp server.
35 |     :param disease: Disease code according to `agravos`.
36 |     :param years: 4 digit integer, can be a list of years.
37 |     :param data_path: The directory where the file will be downloaded to.
38 |     :return: list of downloaded files.
39 |     """
40 |     files = sinan.get_files(dis_code=diseases, year=years)
41 |     return sinan.download(files, local_dir=data_path)
42 | 
43 | 
44 | def metadata_df(disease_code: str) -> pd.DataFrame:
45 |     metadata_file = (
46 |         Path(__file__).parent.parent
47 |         / "metadata"
48 |         / "SINAN"
49 |         / f"{disease_code}.tar.gz"
50 |     )
51 |     if metadata_file.exists():
52 |         df = pd.read_csv(
53 |             metadata_file,
54 |             compression="gzip",
55 |             header=0,
56 |             sep=",",
57 |             quotechar='"',
58 |         )
59 | 
60 |         return df.iloc[:, 1:]
61 |     else:
62 |         print(f"No metadata available for {disease_code}")
63 |         return
64 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "pysus"
 3 | version = "1.0.0"  # changed by semantic-release
 4 | description = "Tools for dealing with Brazil's Public health data"
 5 | authors = ["Flavio Codeco Coelho <fccoelho@gmail.com>"]
 6 | license = "GPL"
 7 | 
 8 | packages = [{include='pysus'}]
 9 | 
10 | [tool.poetry.dependencies]
11 | python = ">=3.10,<3.14"
12 | python-dateutil = "2.8.2"
13 | dbfread = "2.0.7"
14 | fastparquet = ">=2023.10.1,<=2024.11.0"
15 | numpy = ">1,<3"
16 | pyarrow = ">=11.0.0"
17 | pycparser = "2.21"
18 | pyreaddbc = ">=1.1.0"
19 | tqdm = "4.64.0"
20 | wget = "^3.2"
21 | loguru = "^0.6.0"
22 | Unidecode = "^1.3.6"
23 | dateparser = "^1.1.8"
24 | pandas = "^2.2.2"
25 | urwid = "^2.1.2"
26 | elasticsearch = { version = "7.16.2", extras=["preprocessing"] }
27 | # FTP
28 | bigtree = "^0.12.2"
29 | aioftp = "^0.21.4"
30 | humanize = "^4.8.0"
31 | typing-extensions = "^4.9.0"
32 | 
33 | [tool.poetry.group.dev.dependencies]
34 | pytest = ">=6.1.0"
35 | black = "^22.6.0"
36 | flake8 = "^5.0.4"
37 | isort = "^5.10.1"
38 | pre-commit = "^2.20.0"
39 | pytest-timeout = "^2.1.0"
40 | nbsphinx = "^0.9.3"
41 | pytest-retry = "1.7.0"
42 | 
43 | [tool.poetry.group.docs.dependencies]
44 | sphinx = "^5.1.1"
45 | nbmake = "^1.4.1"
46 | matplotlib = "^3.7.1"
47 | jupyterlab = "^4.0.5"
48 | ipykernel = "^6.25.1"
49 | seaborn = "^0.12.2"
50 | tomli = "^2.0.1"
51 | sphinx-rtd-theme = "^1.3.0"
52 | nbsphinx = "^0.9.3"
53 | 
54 | [tool.poetry.group.geo.dependencies]
55 | geocoder = { version = "^1.38.1", extras=["preprocessing"] }
56 | jsonschema = "^4.19.0"
57 | descartes = "^1.1.0"
58 | folium = "^0.14.0"
59 | 
60 | [build-system]
61 | requires = ["poetry-core>=1.0.0"]
62 | build-backend = "poetry.core.masonry.api"
63 | 
64 | [tool.isort]
65 | profile = "black"
66 | src_paths = ["isort", "test"]
67 | 
68 | [tool.black]
69 | # https://github.com/psf/black
70 | target-version = ["py39"]
71 | line-length = 79
72 | color = true
73 | 
74 | [tool.pytest.ini_options]
75 | addopts = [
76 |     "--import-mode=importlib",
77 |     "-ra -q"
78 | ]
79 | testpaths = [
80 |     "tests"
81 | ]
82 | 
83 | exclude = ["*.git", "docs/"]
84 | 
85 | [tool.poetry.extras]
86 | preprocessing = ["geobr", "geocoder"]
87 | 


--------------------------------------------------------------------------------
/pysus/online_data/SIH.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Downloads SIH data from Datasus FTP server
 3 | Created on 21/09/18
 4 | by fccoelho
 5 | license: GPL V3 or Later
 6 | """
 7 | from typing import Union
 8 | 
 9 | from loguru import logger
10 | from pysus.ftp import CACHEPATH
11 | from pysus.ftp.databases.sih import SIH
12 | from pysus.ftp.utils import parse_UFs
13 | 
14 | sih = SIH().load()
15 | 
16 | 
17 | def get_available_years(
18 |     group: str,
19 |     states: Union[str, list] = None,
20 | ) -> list:
21 |     """
22 |     Get SIH years for group and/or state and returns a list of years
23 |     :param group:
24 |         RD: AIH Reduzida
25 |         RJ: AIH Rejeitada
26 |         ER: AIH Rejeitada com erro
27 |         SP: Serviços Profissionais
28 |         CH: Cadastro Hospitalar
29 |         CM: # TODO
30 |     :param states: 2 letter uf code, can be a list. E.g: "SP" or ["SP", "RJ"]
31 |     :return: list of available years
32 |     """
33 |     ufs = parse_UFs(states)
34 | 
35 |     years = dict()
36 |     for uf in ufs:
37 |         files = sih.get_files(group, uf=uf)
38 |         years[uf] = set(sorted([sih.describe(f)["year"] for f in files]))
39 | 
40 |     if len(set([len(v) for v in years.values()])) > 1:
41 |         logger.warning(f"Distinct years were found for UFs: {years}")
42 | 
43 |     return sorted(list(set.intersection(*map(set, years.values()))))
44 | 
45 | 
46 | def download(
47 |     states: Union[str, list],
48 |     years: Union[str, list, int],
49 |     months: Union[str, list, int],
50 |     groups: Union[str, list],
51 |     data_dir: str = CACHEPATH,
52 | ) -> list:
53 |     """
54 |     Download SIH records for state, year and month
55 |     :param states: 2 letter state code, can be a list
56 |     :param years: 4 digit integer, can be a list
57 |     :param months: 1 to 12, can be a list
58 |     :param groups: the groups of datasets to be downloaded.
59 |                    See `sih.groups`
60 |     :param data_dir: Directory where parquets will be downloaded.
61 |     :return: list with the downloaded files as ParquetData objects
62 |     """
63 |     files = sih.get_files(group=groups, uf=states, month=months, year=years)
64 |     return sih.download(files, local_dir=data_dir)
65 | 


--------------------------------------------------------------------------------
/pysus/preprocessing/ESUS.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from pysus.online_data.ESUS import download
 4 | 
 5 | 
 6 | def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"):
 7 |     """
 8 |     Fetches ESUS covid line list and aggregates by age and sex returning these
 9 |     counts between start and end dates.
10 |     :param UF: State code
11 |     :param start: Start date
12 |     :param end: end date
13 |     :return: dataframe
14 |     """
15 |     df = download(uf=UF)
16 | 
17 |     # Transformando as colunas em datetime type
18 |     for cname in df:
19 |         if cname.startswith("data"):
20 |             df[cname] = pd.to_datetime(df[cname], errors="coerce")
21 | 
22 |     # Eliminando os valores nulos nas colunas com datas importantes
23 |     old_size = len(df)
24 |     df.dropna(
25 |         subset=["dataNotificacao", "dataInicioSintomas", "dataTeste"],
26 |         inplace=True,
27 |     )
28 |     print(
29 |         f"Removed {old_size - len(df)} rows with missing dates of symptoms, "
30 |         "notification or testing"
31 |     )
32 | 
33 |     # Desconsiderando os resultados negativos ou inconclusivos
34 |     df = df.loc[
35 |         ~df.resultadoTeste.isin(["Negativo", "Inconclusivo ou Indeterminado"])
36 |     ]
37 | 
38 |     # Removendo sexo indeterminado
39 |     df = df.loc[df.sexo.isin(["Masculino", "Feminino"])]
40 | 
41 |     # determinando a data dos primeiros sintomas como a data do index
42 | 
43 |     df["datesint"] = df["dataInicioSintomas"]
44 |     df.set_index("datesint", inplace=True)
45 |     df.sort_index(inplace=True, ascending=True)
46 | 
47 |     # vamos limitar a data inicial e a data final considerando apenas a
48 |     # primeira onda
49 | 
50 |     df = df.loc[start:end]
51 | 
52 |     ini = np.arange(0, 81, 5)
53 |     fin = np.arange(5, 86, 5)
54 |     fin[-1] = 120
55 |     faixa_etaria = {
56 |         f"[{i},{f})": (i, f) for i, f in zip(ini, fin)  # noqa: E231
57 |     }
58 | 
59 |     labels = list(faixa_etaria.keys())
60 |     df["faixa_etaria"] = [
61 |         labels[i - 1] for i in np.digitize(df.idade, bins=ini)
62 |     ]
63 | 
64 |     agreg = (
65 |         df[["sexo", "faixa_etaria"]].groupby(["faixa_etaria", "sexo"]).size()
66 |     )
67 |     agreg = agreg.reset_index()
68 |     agreg.columns = ["faixa_etaria", "sexo", "n"]
69 |     return agreg
70 | 


--------------------------------------------------------------------------------
/.releaserc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "branches": ["main"],
 3 |   "tagFormat": "${version}",
 4 |   "plugins": [
 5 |     [
 6 |       "@semantic-release/commit-analyzer", {
 7 |       "preset": "conventionalcommits"
 8 |     }],
 9 |     [
10 |       "semantic-release-replace-plugin",
11 |       {
12 |         "replacements": [
13 |           {
14 |             "files": ["pysus/__init__.py"],
15 |             "from": "return \".*\"  # changed by semantic-release",
16 |             "to": "return \"${nextRelease.version}\"  # changed by semantic-release",
17 |             "results": [
18 |               {
19 |                 "file": "pysus/__init__.py",
20 |                 "hasChanged": true,
21 |                 "numMatches": 1,
22 |                 "numReplacements": 1
23 |               }
24 |             ],
25 |             "countMatches": true
26 |           },
27 |           {
28 |             "files": ["pyproject.toml"],
29 |             "from": "version = \".*\"  # changed by semantic-release",
30 |             "to": "version = \"${nextRelease.version}\"  # changed by semantic-release",
31 |             "results": [
32 |               {
33 |                 "file": "pyproject.toml",
34 |                 "hasChanged": true,
35 |                 "numMatches": 1,
36 |                 "numReplacements": 1
37 |               }
38 |             ],
39 |             "countMatches": true
40 |           }
41 |         ]
42 |       }
43 |     ],
44 |     [
45 |       "@semantic-release/release-notes-generator", {
46 |         "preset": "conventionalcommits"
47 |       }],
48 |     [
49 |       "@semantic-release/changelog",
50 |       {
51 |         "changelogTitle": "Release Notes\n---",
52 |         "changelogFile": "CHANGELOG.md"
53 |       }
54 |     ],
55 |     [
56 |       "@semantic-release/exec",
57 |       {
58 |         "prepareCmd": "poetry build",
59 |         "publishCmd": "poetry publish"
60 |       }
61 |     ],
62 |     [
63 |       "@semantic-release/github",
64 |       {
65 |         "assets": ["dist/*.whl", "dist/*.tar.gz"]
66 |       }
67 |     ],
68 |     [
69 |       "@semantic-release/git",
70 |       {
71 |         "assets": [
72 |           "pyproject.toml",
73 |           "CHANGELOG.md",
74 |           "pysus/__init__.py"
75 |         ],
76 |         "message": "chore(release): ${nextRelease.version}"
77 |       }
78 |     ]
79 |   ]
80 | }
81 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/sim.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["SIM"]
 2 | 
 3 | from typing import List, Optional, Union
 4 | 
 5 | from pysus.ftp import Database, Directory, File
 6 | from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year
 7 | 
 8 | 
 9 | class SIM(Database):
10 |     name = "SIM"
11 |     paths = (
12 |         Directory("/dissemin/publicos/SIM/CID10/DORES"),
13 |         Directory("/dissemin/publicos/SIM/CID9/DORES"),
14 |     )
15 |     metadata = {
16 |         "long_name": "Sistema de Informação sobre Mortalidade",
17 |         "source": "http://sim.saude.gov.br",
18 |         "description": "",
19 |     }
20 |     groups = {"CID10": "DO", "CID9": "DOR"}
21 | 
22 |     def describe(self, file: File) -> dict:
23 |         group, _uf, year = self.format(file)
24 |         _groups = {v: k for k, v in self.groups.items()}
25 | 
26 |         try:
27 |             uf = UFs[_uf]
28 |         except KeyError:
29 |             uf = _uf
30 | 
31 |         description = {
32 |             "name": str(file.basename),
33 |             "uf": uf,
34 |             "year": year,
35 |             "group": _groups[group],
36 |             "size": file.info["size"],
37 |             "last_update": file.info["modify"],
38 |         }
39 | 
40 |         return description
41 | 
42 |     def format(self, file: File) -> tuple:
43 |         if "CID9" in str(file.path):
44 |             group, _uf, year = file.name[:-4], file.name[-4:-2], file.name[-2:]
45 |         else:
46 |             group, _uf, year = file.name[:-6], file.name[-6:-4], file.name[-4:]
47 |         return group, _uf, zfill_year(year)
48 | 
49 |     def get_files(
50 |         self,
51 |         group: Union[list[str], str],
52 |         uf: Optional[Union[list[str], str]] = None,
53 |         year: Optional[Union[list, str, int]] = None,
54 |     ) -> List[File]:
55 |         files = self.files
56 | 
57 |         groups = [self.groups[g.upper()] for g in to_list(group)]
58 | 
59 |         files = list(filter(lambda f: self.format(f)[0] in groups, files))
60 | 
61 |         if uf:
62 |             ufs = parse_UFs(uf)
63 |             files = list(filter(lambda f: self.format(f)[1] in ufs, files))
64 | 
65 |         if year or str(year) in ["0", "00"]:
66 |             years = [zfill_year(y) for y in to_list(year)]
67 |             files = list(filter(lambda f: self.format(f)[2] in years, files))
68 | 
69 |         return files
70 | 


--------------------------------------------------------------------------------
/pysus/tests/test_ibge.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import pandas as pd
 4 | import pytest
 5 | from pysus.online_data import IBGE
 6 | 
 7 | 
 8 | class SIDRA(unittest.TestCase):
 9 |     @pytest.mark.timeout(120)
10 |     def test_get_aggregates(self):
11 |         df = IBGE.list_agregados()
12 |         self.assertIsInstance(df, pd.DataFrame)
13 |         self.assertGreater(df.size, 0)
14 | 
15 |     @pytest.mark.skip(reason="This test takes too long")
16 |     def test_localidades_por_agregado(self):
17 |         df = IBGE.localidades_por_agregado(475, nivel="N3")
18 |         self.assertIsInstance(df, pd.DataFrame)
19 |         self.assertGreater(df.size, 0)
20 | 
21 |     @pytest.mark.timeout(120)
22 |     @pytest.mark.skip(reason="Failing?")
23 |     def test_lista_periodos(self):
24 |         df = IBGE.lista_periodos(475)
25 |         self.assertIsInstance(df, pd.DataFrame)
26 |         self.assertGreater(df.size, 0)
27 | 
28 |     @pytest.mark.timeout(120)
29 |     def test_get_sidra_table(self):
30 |         df = IBGE.get_sidra_table(
31 |             200,
32 |             territorial_level=6,
33 |             geocode=4220000,
34 |             period="last",
35 |             classification=2,
36 |             categories="all",
37 |         )
38 |         self.assertIsInstance(df, pd.DataFrame)
39 |         self.assertGreater(df.size, 0)
40 | 
41 |     @pytest.mark.skip(reason="This test takes too long")
42 |     def test_metadata(self):
43 |         md = IBGE.metadados(475)
44 |         self.assertIsInstance(md, dict)
45 |         self.assertGreater(len(md), 0)
46 | 
47 |     @pytest.mark.timeout(120)
48 |     def test_FetchData(self):
49 |         ds = IBGE.FetchData(
50 |             475,
51 |             periodos=1996,
52 |             variavel=93,
53 |             localidades="N3[all]",
54 |             classificacao="58[all]|2[4,5]|1[all]",
55 |             view="flat",
56 |         )
57 |         self.assertIsInstance(ds, IBGE.FetchData)
58 |         self.assertGreater(len(ds.JSON), 0)
59 | 
60 |     @pytest.mark.timeout(120)
61 |     def test_get_population(self):
62 |         l1 = IBGE.get_population(year=2021, source="POP")
63 |         self.assertEqual(type(l1), pd.DataFrame)
64 |         self.assertEqual(len(l1), 5570)
65 |         l2 = IBGE.get_population(year=2012, source="projpop")
66 |         self.assertEqual(type(l2), pd.DataFrame)
67 |         self.assertEqual(len(l2), 4914)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     unittest.main()
72 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/Dengue.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-05-02 14:39-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Dengue.ipynb:9
23 | msgid "Análise exploratória de casos de Dengue notificado pelo SUS"
24 | msgstr ""
25 | 
26 | #: ../../source/Dengue.ipynb:11
27 | msgid ""
28 | "Neste tutorial vamos explorar como começar a entender a descrição de "
29 | "casos do sis tema de notificação de agravos do SUS, o SINAN. Estes dados "
30 | "são bastante ricos e a `documentação "
31 | "<www.sgc.goias.gov.br/upload/arquivos/2013-08/dengue2.pdf>`__ sobre o "
32 | "significado de cada uma das variáveis pode ser encontrada `aqui "
33 | "<www.sgc.goias.gov.br/upload/arquivos/2013-08/dengue2.pdf>`__."
34 | msgstr ""
35 | 
36 | #: ../../source/Dengue.ipynb:63
37 | msgid ""
38 | "Primeiro vamos começar pelo carregamento dos dados a partir do servidor "
39 | "do DataSUS. Como o dado está no formato Parquet, nós vamos utilizar a "
40 | "biblioteca pandas para visualizar os dados em um Dataframe:"
41 | msgstr ""
42 | 
43 | #: ../../source/Dengue.ipynb:459
44 | msgid ""
45 | "Estes dados correspondem a todos os casos de dengue notificado ao SUS "
46 | "durante um período. Neste caso de 2015 a 2016. Para podermos tratar "
47 | "adequadamente estes dados para fins de visualização ou análise precisamos"
48 | " corrigir os tipos das colunas. Por exemplo vamos converter as datas."
49 | msgstr ""
50 | 
51 | #: ../../source/Dengue.ipynb:481
52 | msgid ""
53 | "Para poder organizar os dados temporalmente, é útil indexar a tabela por "
54 | "alguma variável temporal"
55 | msgstr ""
56 | 
57 | #: ../../source/Dengue.ipynb:502
58 | msgid ""
59 | "Agora podemos plotar o número de casos por semana de dengue na cidade do "
60 | "Rio de Janeiro."
61 | msgstr ""
62 | 
63 | #: ../../source/Dengue.ipynb:531
64 | msgid ""
65 | "Suponhamos agora que desejamos visualizar em um mapa os casos que "
66 | "ocorreram, por exemplo entre janeiro e março de 2015"
67 | msgstr ""
68 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/Dengue.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-05-02 14:39-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Dengue.ipynb:9
23 | msgid "Análise exploratória de casos de Dengue notificado pelo SUS"
24 | msgstr ""
25 | 
26 | #: ../../source/Dengue.ipynb:11
27 | msgid ""
28 | "Neste tutorial vamos explorar como começar a entender a descrição de "
29 | "casos do sis tema de notificação de agravos do SUS, o SINAN. Estes dados "
30 | "são bastante ricos e a `documentação "
31 | "<www.sgc.goias.gov.br/upload/arquivos/2013-08/dengue2.pdf>`__ sobre o "
32 | "significado de cada uma das variáveis pode ser encontrada `aqui "
33 | "<www.sgc.goias.gov.br/upload/arquivos/2013-08/dengue2.pdf>`__."
34 | msgstr ""
35 | 
36 | #: ../../source/Dengue.ipynb:63
37 | msgid ""
38 | "Primeiro vamos começar pelo carregamento dos dados a partir do servidor "
39 | "do DataSUS. Como o dado está no formato Parquet, nós vamos utilizar a "
40 | "biblioteca pandas para visualizar os dados em um Dataframe:"
41 | msgstr ""
42 | 
43 | #: ../../source/Dengue.ipynb:459
44 | msgid ""
45 | "Estes dados correspondem a todos os casos de dengue notificado ao SUS "
46 | "durante um período. Neste caso de 2015 a 2016. Para podermos tratar "
47 | "adequadamente estes dados para fins de visualização ou análise precisamos"
48 | " corrigir os tipos das colunas. Por exemplo vamos converter as datas."
49 | msgstr ""
50 | 
51 | #: ../../source/Dengue.ipynb:481
52 | msgid ""
53 | "Para poder organizar os dados temporalmente, é útil indexar a tabela por "
54 | "alguma variável temporal"
55 | msgstr ""
56 | 
57 | #: ../../source/Dengue.ipynb:502
58 | msgid ""
59 | "Agora podemos plotar o número de casos por semana de dengue na cidade do "
60 | "Rio de Janeiro."
61 | msgstr ""
62 | 
63 | #: ../../source/Dengue.ipynb:531
64 | msgid ""
65 | "Suponhamos agora que desejamos visualizar em um mapa os casos que "
66 | "ocorreram, por exemplo entre janeiro e março de 2015"
67 | msgstr ""
68 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/sinasc.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["SINASC"]
 2 | 
 3 | from typing import List, Optional, Union
 4 | 
 5 | from pysus.ftp import Database, Directory, File
 6 | from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year
 7 | 
 8 | 
 9 | class SINASC(Database):
10 |     name = "SINASC"
11 |     paths = (
12 |         Directory("/dissemin/publicos/SINASC/NOV/DNRES"),
13 |         Directory("/dissemin/publicos/SINASC/ANT/DNRES"),
14 |     )
15 |     metadata = {
16 |         "long_name": "Sistema de Informações sobre Nascidos Vivos",
17 |         "source": "http://sinasc.saude.gov.br/",
18 |         "description": "",
19 |     }
20 |     groups = {
21 |         "DN": "Declarações de Nascidos Vivos",
22 |         "DNR": "Dados dos Nascidos Vivos por UF de residência",
23 |     }
24 | 
25 |     def describe(self, file: File) -> dict:
26 |         if file.extension.upper() == ".DBC":
27 |             group, _uf, year = self.format(file)
28 | 
29 |             try:
30 |                 uf = UFs[_uf]
31 |             except KeyError:
32 |                 uf = _uf
33 | 
34 |             description = {
35 |                 "name": file.basename,
36 |                 "group": self.groups[group],
37 |                 "uf": uf,
38 |                 "year": year,
39 |                 "size": file.info["size"],
40 |                 "last_update": file.info["modify"],
41 |             }
42 | 
43 |             return description
44 |         return {}
45 | 
46 |     def format(self, file: File) -> tuple:
47 |         if file.name == "DNEX2021":
48 |             pass
49 | 
50 |         year = zfill_year(file.name[-2:])
51 |         charname = "".join([c for c in file.name if not c.isnumeric()])
52 |         group, _uf = charname[:-2], charname[-2:]
53 |         return group, _uf, zfill_year(year)
54 | 
55 |     def get_files(
56 |         self,
57 |         group: Union[List[str], str],
58 |         uf: Optional[Union[List[str], str]] = None,
59 |         year: Optional[Union[List, str, int]] = None,
60 |     ) -> List[File]:
61 |         files = self.files
62 | 
63 |         groups = to_list(group)
64 | 
65 |         files = list(filter(lambda f: self.format(f)[0] in groups, files))
66 | 
67 |         if uf:
68 |             if "EX" in to_list(uf):
69 |                 # DNEX2021
70 |                 if len(to_list(uf)) == 1:
71 |                     return []
72 | 
73 |                 to_list(uf).remove("EX")
74 | 
75 |             ufs = parse_UFs(uf)
76 |             files = list(filter(lambda f: self.format(f)[1] in ufs, files))
77 | 
78 |         if year or str(year) in ["0", "00"]:
79 |             years = [zfill_year(str(y)[-2:]) for y in to_list(year)]
80 |             files = list(filter(lambda f: self.format(f)[2] in years, files))
81 | 
82 |         return files
83 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/Zika.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-05-02 14:39-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Zika.ipynb:9
23 | msgid "Análise exploratória de casos de Zika notificado pelo SUS"
24 | msgstr ""
25 | 
26 | #: ../../source/Zika.ipynb:11
27 | msgid ""
28 | "Neste tutorial vamos explorar como começar a entender a descrição de "
29 | "casos do sistema de notificação de agravos do SUS, o SINAN. Estes dados "
30 | "são bastante ricos e a `documentação "
31 | "<www.sgc.goias.gov.br/upload/arquivos/2013-08/dengue2.pdf>`__ sobre o "
32 | "significado de cada uma das variáveis pode ser encontrada `aqui "
33 | "<www.sgc.goias.gov.br/upload/arquivos/2013-08/dengue2.pdf>`__."
34 | msgstr ""
35 | 
36 | #: ../../source/Zika.ipynb:60
37 | msgid ""
38 | "Primeiro vamos começar pelo carregamento dos dados a partir do site do "
39 | "InfoDengue. Como o dado está no formato parquet, nós vamos utilizar a "
40 | "biblioteca pandas para carregar os dados em um Dataframe."
41 | msgstr ""
42 | 
43 | #: ../../source/Zika.ipynb:87
44 | msgid "Vejamos os nomes da variáveis"
45 | msgstr ""
46 | 
47 | #: ../../source/Zika.ipynb:434
48 | msgid ""
49 | "Estes dados correspondem a todos os casos de Zika notificados ao SUS "
50 | "durante um período. Neste caso de 2015 a 2016. Para podermos tratar "
51 | "adequadamente estes dados para fins de visualização ou análise precisamos"
52 | " corrigir os tipos das colunas. Por exemplo vamos converter as datas."
53 | msgstr ""
54 | 
55 | #: ../../source/Zika.ipynb:456
56 | msgid ""
57 | "Para poder organizar os dados temporalmente, é útil indexar a tabela por "
58 | "alguma variável temporal. Vamos usar a data de notifiacão de cada caso "
59 | "como índice"
60 | msgstr ""
61 | 
62 | #: ../../source/Zika.ipynb:477
63 | msgid ""
64 | "Agora podemos plotar o número de casos por semana de dengue na cidade do "
65 | "Rio de Janeiro."
66 | msgstr ""
67 | 
68 | #: ../../source/Zika.ipynb:532
69 | msgid ""
70 | "Suponhamos agora que desejamos visualizar em um mapa os casos que "
71 | "ocorreram, por exemplo entre janeiro e agosto de 2016."
72 | msgstr ""
73 | 
74 | #: ../../source/Zika.ipynb:544
75 | msgid "Usando Kepler.gl para visualização"
76 | msgstr ""
77 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/Zika.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-05-02 14:39-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Zika.ipynb:9
23 | msgid "Análise exploratória de casos de Zika notificado pelo SUS"
24 | msgstr ""
25 | 
26 | #: ../../source/Zika.ipynb:11
27 | msgid ""
28 | "Neste tutorial vamos explorar como começar a entender a descrição de "
29 | "casos do sistema de notificação de agravos do SUS, o SINAN. Estes dados "
30 | "são bastante ricos e a `documentação "
31 | "<www.sgc.goias.gov.br/upload/arquivos/2013-08/dengue2.pdf>`__ sobre o "
32 | "significado de cada uma das variáveis pode ser encontrada `aqui "
33 | "<www.sgc.goias.gov.br/upload/arquivos/2013-08/dengue2.pdf>`__."
34 | msgstr ""
35 | 
36 | #: ../../source/Zika.ipynb:60
37 | msgid ""
38 | "Primeiro vamos começar pelo carregamento dos dados a partir do site do "
39 | "InfoDengue. Como o dado está no formato parquet, nós vamos utilizar a "
40 | "biblioteca pandas para carregar os dados em um Dataframe."
41 | msgstr ""
42 | 
43 | #: ../../source/Zika.ipynb:87
44 | msgid "Vejamos os nomes da variáveis"
45 | msgstr ""
46 | 
47 | #: ../../source/Zika.ipynb:434
48 | msgid ""
49 | "Estes dados correspondem a todos os casos de Zika notificados ao SUS "
50 | "durante um período. Neste caso de 2015 a 2016. Para podermos tratar "
51 | "adequadamente estes dados para fins de visualização ou análise precisamos"
52 | " corrigir os tipos das colunas. Por exemplo vamos converter as datas."
53 | msgstr ""
54 | 
55 | #: ../../source/Zika.ipynb:456
56 | msgid ""
57 | "Para poder organizar os dados temporalmente, é útil indexar a tabela por "
58 | "alguma variável temporal. Vamos usar a data de notifiacão de cada caso "
59 | "como índice"
60 | msgstr ""
61 | 
62 | #: ../../source/Zika.ipynb:477
63 | msgid ""
64 | "Agora podemos plotar o número de casos por semana de dengue na cidade do "
65 | "Rio de Janeiro."
66 | msgstr ""
67 | 
68 | #: ../../source/Zika.ipynb:532
69 | msgid ""
70 | "Suponhamos agora que desejamos visualizar em um mapa os casos que "
71 | "ocorreram, por exemplo entre janeiro e agosto de 2016."
72 | msgstr ""
73 | 
74 | #: ../../source/Zika.ipynb:544
75 | msgid "Usando Kepler.gl para visualização"
76 | msgstr ""
77 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/ESUS.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/ESUS.ipynb:21
23 | msgid "Downloading data from ESUS"
24 | msgstr ""
25 | 
26 | #: ../../source/ESUS.ipynb:23
27 | msgid ""
28 | "This function alows for the download of COVID-19 data from ESUS. For Some"
29 | " States, the size of the resulting table can easily exceed the memory "
30 | "size of most personal computers, in such cases, the ESUS download "
31 | "function will stream the data to disk without filling up the memory and "
32 | "return an iterator of chunks of 1000 rows of data. The user must then "
33 | "iterate over the chunks to analyze the data."
34 | msgstr ""
35 | "Esta função permite o download de dados de COVID-19 do ESUS. Para alguns "
36 | "estados, o tamanho da tabela resultante pode facilmente exceder o tamanho "
37 | "da memória da maioria da máquina, nestes casos, a função de download do ESUS "
38 | "irá extrair os dados para o disco sem preencher a memória, e retornar um "
39 | "iterador de blocos de 1000 linhas de dados. O usuário deve, então, "
40 | "iterar sobre os blocos para analisar os dados."
41 | 
42 | #: ../../source/ESUS.ipynb:737
43 | msgid ""
44 | "Now we will create a datetime index for our dataframe, but we must be "
45 | "carefull with missing dates here. For now, to enable a quick "
46 | "visualization, we will simply coerce missing dates to ``NaT``."
47 | msgstr ""
48 | "Agora vamos criar um índice de data e hora para o nosso dataframe, mas "
49 | "devemos ter cuidado com as datas ausentes aqui. Por enquanto, para "
50 | "permitir uma visualização rápida, vamos simplesmente forçar as datas ausentes a ``NaT``."
51 | 
52 | #: ../../source/ESUS.ipynb:760
53 | msgid "Now we can count the cases per day and plot."
54 | msgstr "Agora podemos contar os casos diários e plotar."
55 | 
56 | #: ../../source/ESUS.ipynb:1244
57 | msgid "Deduplicating the data"
58 | msgstr "Removendo a duplicidade"
59 | 
60 | #: ../../source/ESUS.ipynb:1246
61 | msgid ""
62 | "ESUS records are know to have a number of duplicated records. Let's see "
63 | "here how to detect possible duplicates in the dataframe we have just "
64 | "downloaded. For that we will need the ```recordlinkage`` "
65 | "<https://recordlinkage.readthedocs.io/en/latest/index.html>`__ package."
66 | msgstr ""
67 | "Os registros do ESUS são conhecidos por terem vários registros duplicados. "
68 | "Vamos ver aqui como detectar possíveis duplicatas no dataframe que acabamos "
69 | "de baixar. Para isso, precisaremos do pacote ```recordlinkage`` "
70 | "https://recordlinkage.readthedocs.io/en/latest/index.html`__."
71 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/ESUS.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/ESUS.ipynb:21
23 | msgid "Downloading data from ESUS"
24 | msgstr ""
25 | 
26 | #: ../../source/ESUS.ipynb:23
27 | msgid ""
28 | "This function alows for the download of COVID-19 data from ESUS. For Some"
29 | " States, the size of the resulting table can easily exceed the memory "
30 | "size of most personal computers, in such cases, the ESUS download "
31 | "function will stream the data to disk without filling up the memory and "
32 | "return an iterator of chunks of 1000 rows of data. The user must then "
33 | "iterate over the chunks to analyze the data."
34 | msgstr ""
35 | "Esta função permite o download de dados de COVID-19 do ESUS. Para alguns "
36 | "estados, o tamanho da tabela resultante pode facilmente exceder o tamanho "
37 | "da memória da maioria da máquina, nestes casos, a função de download do ESUS "
38 | "irá extrair os dados para o disco sem preencher a memória, e retornar um "
39 | "iterador de blocos de 1000 linhas de dados. O usuário deve, então, "
40 | "iterar sobre os blocos para analisar os dados."
41 | 
42 | #: ../../source/ESUS.ipynb:737
43 | msgid ""
44 | "Now we will create a datetime index for our dataframe, but we must be "
45 | "carefull with missing dates here. For now, to enable a quick "
46 | "visualization, we will simply coerce missing dates to ``NaT``."
47 | msgstr ""
48 | "Agora vamos criar um índice de data e hora para o nosso dataframe, mas "
49 | "devemos ter cuidado com as datas ausentes aqui. Por enquanto, para "
50 | "permitir uma visualização rápida, vamos simplesmente forçar as datas ausentes a ``NaT``."
51 | 
52 | #: ../../source/ESUS.ipynb:760
53 | msgid "Now we can count the cases per day and plot."
54 | msgstr "Agora podemos contar os casos diários e plotar."
55 | 
56 | #: ../../source/ESUS.ipynb:1244
57 | msgid "Deduplicating the data"
58 | msgstr "Removendo a duplicidade"
59 | 
60 | #: ../../source/ESUS.ipynb:1246
61 | msgid ""
62 | "ESUS records are know to have a number of duplicated records. Let's see "
63 | "here how to detect possible duplicates in the dataframe we have just "
64 | "downloaded. For that we will need the ```recordlinkage`` "
65 | "<https://recordlinkage.readthedocs.io/en/latest/index.html>`__ package."
66 | msgstr ""
67 | "Os registros do ESUS são conhecidos por terem vários registros duplicados. "
68 | "Vamos ver aqui como detectar possíveis duplicatas no dataframe que acabamos "
69 | "de baixar. Para isso, precisaremos do pacote ```recordlinkage`` "
70 | "https://recordlinkage.readthedocs.io/en/latest/index.html`__."
71 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/ibge_datasus.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["IBGEDATASUS"]
 2 | 
 3 | from typing import List, Literal, Optional, Union
 4 | 
 5 | from pysus.ftp import Database, Directory, File
 6 | from pysus.ftp.utils import zfill_year
 7 | 
 8 | 
 9 | class IBGEDATASUS(Database):
10 |     name = "IBGE-DataSUS"
11 |     paths = (
12 |         Directory("/dissemin/publicos/IBGE/POP"),
13 |         Directory("/dissemin/publicos/IBGE/censo"),
14 |         Directory("/dissemin/publicos/IBGE/POPTCU"),
15 |         Directory("/dissemin/publicos/IBGE/projpop"),
16 |         # Directory("/dissemin/publicos/IBGE/Auxiliar") # this has a different file name pattern  # noqa
17 |     )
18 |     metadata = {
19 |         "long_name": "Populaçao Residente, Censos, Contagens "
20 |         "Populacionais e Projeçoes Intercensitarias",
21 |         "source": "ftp://ftp.datasus.gov.br/dissemin/publicos/IBGE",
22 |         "description": (
23 |             "São aqui apresentados informações sobre a população residente, "
24 |             "estratificadas por município, faixas etárias e sexo, obtidas a "
25 |             "partir dos Censos Demográficos, Contagens Populacionais "
26 |             "e Projeções Intercensitárias."
27 |         ),
28 |     }
29 | 
30 |     def describe(self, file: File) -> dict:
31 |         if file.extension.upper() in [".ZIP"]:
32 |             year = file.name.split(".")[0][-2:]
33 |             description = {
34 |                 "name": str(file.basename),
35 |                 "year": zfill_year(year),
36 |                 "size": file.info["size"],
37 |                 "last_update": file.info["modify"],
38 |             }
39 |             return description
40 |         elif file.extension.upper() == ".DBF":
41 |             year = file.name[-2:]
42 |             description = {
43 |                 "name": str(file.basename),
44 |                 "year": zfill_year(year),
45 |                 "size": file.info["size"],
46 |                 "last_update": file.info["modify"],
47 |             }
48 |             return description
49 |         return {}
50 | 
51 |     def format(self, file: File) -> tuple:
52 |         return (file.name[-2:],)
53 | 
54 |     def get_files(
55 |         self,
56 |         source: Literal["POP", "censo", "POPTCU", "projpop"] = "POPTCU",
57 |         year: Optional[Union[str, int, list]] = None,
58 |         *args,
59 |         **kwargs,
60 |     ) -> List[File]:
61 |         sources = ["POP", "censo", "POPTCU", "projpop"]
62 |         source_dir = None
63 | 
64 |         for dir in self.paths:
65 |             if source in sources and source in dir.path:
66 |                 source_dir = dir
67 | 
68 |         if not source_dir:
69 |             raise ValueError(f"Unkown source {source}. Options: {sources}")
70 | 
71 |         files = source_dir.content
72 | 
73 |         if year:
74 |             if isinstance(year, (str, int)):
75 |                 files = [
76 |                     f
77 |                     for f in files
78 |                     if self.describe(f)["year"] == zfill_year(year)
79 |                 ]
80 |             elif isinstance(year, list):
81 |                 files = [
82 |                     f
83 |                     for f in files
84 |                     if str(self.describe(f)["year"])
85 |                     in [str(zfill_year(y)) for y in year]
86 |                 ]
87 | 
88 |         return files
89 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/index.po:
--------------------------------------------------------------------------------
  1 | # SOME DESCRIPTIVE TITLE.
  2 | # Copyright (C) 2016, Flavio Codeco Coelho
  3 | # This file is distributed under the same license as the PySUS package.
  4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
  5 | #
  6 | #, fuzzy
  7 | msgid ""
  8 | msgstr ""
  9 | "Project-Id-Version: PySUS 0.1.13\n"
 10 | "Report-Msgid-Bugs-To: \n"
 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 14 | "Language: pt\n"
 15 | "Language-Team: pt <LL@li.org>\n"
 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 17 | "MIME-Version: 1.0\n"
 18 | "Content-Type: text/plain; charset=utf-8\n"
 19 | "Content-Transfer-Encoding: 8bit\n"
 20 | "Generated-By: Babel 2.12.1\n"
 21 | 
 22 | #: ../../source/index.rst:14
 23 | msgid "Data Sources"
 24 | msgstr "Bases de dados"
 25 | 
 26 | #: ../../source/index.rst:14
 27 | msgid "Tutorials"
 28 | msgstr "Tutoriais"
 29 | 
 30 | #: ../../source/index.rst:14
 31 | msgid "Loading SINAN data"
 32 | msgstr "Extração de dados (SINAN)"
 33 | 
 34 | #: ../../source/index.rst:14
 35 | msgid "Loading SIM data"
 36 | msgstr "Extração de dados (SIM)"
 37 | 
 38 | #: ../../source/index.rst:14
 39 | msgid "Loading SIA data"
 40 | msgstr "Extração de dados (SIA)"
 41 | 
 42 | #: ../../source/index.rst:14
 43 | msgid "LOADING PNI data"
 44 | msgstr "Extração de dados (PNI)"
 45 | 
 46 | #: ../../source/index.rst:14
 47 | msgid "Analyzing Dengue data"
 48 | msgstr "Análise de dados (Dengue)"
 49 | 
 50 | #: ../../source/index.rst:14
 51 | msgid "Analyzing Chikungunya data"
 52 | msgstr "Análise de dados (Chikungunya)"
 53 | 
 54 | #: ../../source/index.rst:14
 55 | msgid "Analyzing Zika data"
 56 | msgstr "Análise de dados (Zika)"
 57 | 
 58 | #: ../../source/index.rst:14
 59 | msgid "Downloading COVID data from ESUS"
 60 | msgstr "Extração de dados de COVID (ESUS)"
 61 | 
 62 | #: ../../source/index.rst:14
 63 | msgid "Downloading Infogripe data"
 64 | msgstr "Extração de dados (Infogripe)"
 65 | 
 66 | #: ../../source/index.rst:14
 67 | msgid "Downloading Infodengue data"
 68 | msgstr "Extração de dados (Infodengue)"
 69 | 
 70 | #: ../../source/index.rst:14
 71 | msgid "Getting Official Statistics"
 72 | msgstr "Estatísticas Oficiais"
 73 | 
 74 | #: ../../source/index.rst:7
 75 | msgid "Welcome to PySUS's documentation!"
 76 | msgstr "Bem-vindo(a) à documentação do PySUS"
 77 | 
 78 | #: ../../source/index.rst:9
 79 | msgid ""
 80 | "PySUS is a library which is a collection of helper codes for people which"
 81 | " need to analyze data from SUS (Brazilian Universal Health System). "
 82 | "Contributions are welcome!"
 83 | msgstr ""
 84 | "PySUS é uma biblioteca que consiste em uma coleção de códigos auxiliares para "
 85 | "pessoas que precisam analisar dados do SUS (Sistema Único de Saúde). "
 86 | "Contribuições são bem-vindas!"
 87 | 
 88 | #: ../../source/index.rst:12
 89 | msgid "Contents:"
 90 | msgstr "Conteúdo:"
 91 | 
 92 | #: ../../source/index.rst:35
 93 | msgid "Indices and tables"
 94 | msgstr "Tabelas e índices"
 95 | 
 96 | #: ../../source/index.rst:37
 97 | msgid ":ref:`genindex`"
 98 | msgstr ":ref:`genindex`"
 99 | 
100 | #: ../../source/index.rst:38
101 | msgid ":ref:`modindex`"
102 | msgstr ":ref:`modindex`"
103 | 
104 | #: ../../source/index.rst:39
105 | msgid ":ref:`search`"
106 | msgstr ":ref:`search`"
107 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/index.po:
--------------------------------------------------------------------------------
  1 | # SOME DESCRIPTIVE TITLE.
  2 | # Copyright (C) 2016, Flavio Codeco Coelho
  3 | # This file is distributed under the same license as the PySUS package.
  4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
  5 | #
  6 | #, fuzzy
  7 | msgid ""
  8 | msgstr ""
  9 | "Project-Id-Version: PySUS 0.1.13\n"
 10 | "Report-Msgid-Bugs-To: \n"
 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 14 | "Language: pt\n"
 15 | "Language-Team: pt <LL@li.org>\n"
 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 17 | "MIME-Version: 1.0\n"
 18 | "Content-Type: text/plain; charset=utf-8\n"
 19 | "Content-Transfer-Encoding: 8bit\n"
 20 | "Generated-By: Babel 2.12.1\n"
 21 | 
 22 | #: ../../source/index.rst:14
 23 | msgid "Data Sources"
 24 | msgstr "Bases de dados"
 25 | 
 26 | #: ../../source/index.rst:14
 27 | msgid "Tutorials"
 28 | msgstr "Tutoriais"
 29 | 
 30 | #: ../../source/index.rst:14
 31 | msgid "Loading SINAN data"
 32 | msgstr "Extração de dados (SINAN)"
 33 | 
 34 | #: ../../source/index.rst:14
 35 | msgid "Loading SIM data"
 36 | msgstr "Extração de dados (SIM)"
 37 | 
 38 | #: ../../source/index.rst:14
 39 | msgid "Loading SIA data"
 40 | msgstr "Extração de dados (SIA)"
 41 | 
 42 | #: ../../source/index.rst:14
 43 | msgid "LOADING PNI data"
 44 | msgstr "Extração de dados (PNI)"
 45 | 
 46 | #: ../../source/index.rst:14
 47 | msgid "Analyzing Dengue data"
 48 | msgstr "Análise de dados (Dengue)"
 49 | 
 50 | #: ../../source/index.rst:14
 51 | msgid "Analyzing Chikungunya data"
 52 | msgstr "Análise de dados (Chikungunya)"
 53 | 
 54 | #: ../../source/index.rst:14
 55 | msgid "Analyzing Zika data"
 56 | msgstr "Análise de dados (Zika)"
 57 | 
 58 | #: ../../source/index.rst:14
 59 | msgid "Downloading COVID data from ESUS"
 60 | msgstr "Extração de dados de COVID (ESUS)"
 61 | 
 62 | #: ../../source/index.rst:14
 63 | msgid "Downloading Infogripe data"
 64 | msgstr "Extração de dados (Infogripe)"
 65 | 
 66 | #: ../../source/index.rst:14
 67 | msgid "Downloading Infodengue data"
 68 | msgstr "Extração de dados (Infodengue)"
 69 | 
 70 | #: ../../source/index.rst:14
 71 | msgid "Getting Official Statistics"
 72 | msgstr "Estatísticas Oficiais"
 73 | 
 74 | #: ../../source/index.rst:7
 75 | msgid "Welcome to PySUS's documentation!"
 76 | msgstr "Bem-vindo(a) à documentação do PySUS"
 77 | 
 78 | #: ../../source/index.rst:9
 79 | msgid ""
 80 | "PySUS is a library which is a collection of helper codes for people which"
 81 | " need to analyze data from SUS (Brazilian Universal Health System). "
 82 | "Contributions are welcome!"
 83 | msgstr ""
 84 | "PySUS é uma biblioteca que consiste em uma coleção de códigos auxiliares para "
 85 | "pessoas que precisam analisar dados do SUS (Sistema Único de Saúde). "
 86 | "Contribuições são bem-vindas!"
 87 | 
 88 | #: ../../source/index.rst:12
 89 | msgid "Contents:"
 90 | msgstr "Conteúdo:"
 91 | 
 92 | #: ../../source/index.rst:35
 93 | msgid "Indices and tables"
 94 | msgstr "Tabelas e índices"
 95 | 
 96 | #: ../../source/index.rst:37
 97 | msgid ":ref:`genindex`"
 98 | msgstr ":ref:`genindex`"
 99 | 
100 | #: ../../source/index.rst:38
101 | msgid ":ref:`modindex`"
102 | msgstr ":ref:`modindex`"
103 | 
104 | #: ../../source/index.rst:39
105 | msgid ":ref:`search`"
106 | msgstr ":ref:`search`"
107 | 


--------------------------------------------------------------------------------
/pysus/data/local.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path, PurePosixPath, PureWindowsPath
  3 | from typing import Dict, List, Union
  4 | 
  5 | import pandas as pd
  6 | from loguru import logger
  7 | from pysus.data import dbc_to_dbf, dbf_to_parquet, parse_dftypes
  8 | 
  9 | 
 10 | class ParquetSet:
 11 |     """
 12 |     A local parquet directory or file
 13 |     """
 14 | 
 15 |     __path__: Union[PurePosixPath, PureWindowsPath]
 16 |     info: Dict
 17 | 
 18 |     def __init__(self, path: str, _pbar=None) -> None:
 19 |         info = {}
 20 |         path = Path(path)
 21 | 
 22 |         if path.suffix.lower() not in [".parquet", ".dbc", ".dbf"]:
 23 |             raise NotImplementedError(f"Unknown file type: {path.suffix}")
 24 | 
 25 |         if path.suffix.lower() == ".dbc":
 26 |             path = Path(dbc_to_dbf(path, _pbar=_pbar))
 27 | 
 28 |         if path.suffix.lower() == ".dbf":
 29 |             path = Path(dbf_to_parquet(path, _pbar=_pbar))
 30 | 
 31 |         if path.is_dir():
 32 |             info["size"] = sum(
 33 |                 f.stat().st_size for f in path.glob("**/*") if f.is_file()
 34 |             )
 35 |         else:
 36 |             info["size"] = os.path.getsize(str(path))
 37 | 
 38 |         self.__path__ = path
 39 |         self.info = info
 40 | 
 41 |     def __str__(self):
 42 |         return str(self.__path__)
 43 | 
 44 |     def __repr__(self):
 45 |         return str(self.__path__)
 46 | 
 47 |     def __hash__(self):
 48 |         return hash(str(self.__path__))
 49 | 
 50 |     @property
 51 |     def path(self) -> str:
 52 |         return str(self.__path__)
 53 | 
 54 |     def to_dataframe(self) -> pd.DataFrame:
 55 |         """
 56 |         Read ParquetSet file(s) into a Pandas DataFrame, concatenating the
 57 |         parquets into a single dataframe
 58 |         """
 59 |         parquets = list(map(str, self.__path__.glob("*.parquet")))
 60 |         chunks_list = [
 61 |             pd.read_parquet(str(f), engine="fastparquet") for f in parquets
 62 |         ]
 63 |         _df = pd.concat(chunks_list, ignore_index=True)
 64 |         return parse_dftypes(_df)
 65 | 
 66 | 
 67 | def parse_data_content(
 68 |     path: Union[List[str], str], _pbar=None
 69 | ) -> Union[ParquetSet, List[ParquetSet]]:
 70 |     if isinstance(path, str):
 71 |         path = [path]
 72 |     else:
 73 |         path = list(path)
 74 | 
 75 |     content = []
 76 |     for _path in path:
 77 |         data_path = Path(_path)
 78 | 
 79 |         if not data_path.exists():
 80 |             continue
 81 | 
 82 |         if data_path.suffix.lower() in [".dbc", ".dbf", ".parquet"]:
 83 |             content.append(ParquetSet(str(data_path), _pbar=_pbar))
 84 |         elif data_path.suffix.lower() == ".zip":
 85 |             content.append(str(data_path))
 86 |         else:
 87 |             continue
 88 | 
 89 |     if not content:
 90 |         logger.warning("path must be absolute")
 91 | 
 92 |     if len(content) == 1:
 93 |         return content[0]
 94 |     return content
 95 | 
 96 | 
 97 | class Data:
 98 |     """
 99 |     A class parser. Receives an (or a list of) absolute path(s) and returns
100 |     the corresponding ParquetSet instances.
101 |     """
102 | 
103 |     def __new__(
104 |         cls, path: Union[List[str], str], _pbar=None
105 |     ) -> Union[ParquetSet, List[ParquetSet]]:
106 |         return parse_data_content(path, _pbar=_pbar)
107 | 


--------------------------------------------------------------------------------
/pysus/online_data/ESUS.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import date
 3 | 
 4 | import pandas as pd
 5 | from elasticsearch import Elasticsearch, helpers
 6 | from loguru import logger
 7 | from pysus.ftp import CACHEPATH
 8 | 
 9 | 
10 | def download(uf, cache=True, checkmemory=True):
11 |     """
12 |     Download ESUS data by UF
13 |     :param uf: rj, mg, etc
14 |     :param cache: if results should be cached on disk
15 |     :return: DataFrame if data fits in memory,
16 |         other an iterator of chunks of size 1000.
17 |     """
18 |     uf = uf.lower()
19 |     user = "user-public-notificacoes"
20 |     pwd = "Za4qNXdyQNSa9YaA"
21 |     today = date.today()
22 |     dt = today.strftime("_%d_%m_%Y")
23 |     base = f"desc-esus-notifica-estado-{uf}"  # desc-notificacoes-esusve-
24 |     url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br"  # noqa: E231
25 |     out = f"ESUS_{uf}_{dt}.parquet"
26 | 
27 |     cachefile = os.path.join(CACHEPATH, out)
28 |     tempfile = os.path.join(CACHEPATH, f"ESUS_temp_{uf.upper()}.csv.gz")
29 |     if os.path.exists(cachefile):
30 |         logger.info(f"Local parquet file found at {cachefile}")
31 |         df = pd.read_parquet(cachefile)
32 |     elif os.path.exists(tempfile):
33 |         logger.info(f"Local csv file found at {tempfile}")
34 |         df = pd.read_csv(tempfile, chunksize=1000)
35 |     else:
36 |         fname = fetch(base, uf, url)
37 |         size = os.stat(fname).st_size
38 |         if size > 50e6 and checkmemory:
39 |             print(f"Downloaded data is to large: {size / 1e6} MB compressed.")
40 |             print(
41 |                 "Only loading the first 1000 rows. If your computer has enough"
42 |                 + " memory, set 'checkmemory' to False"
43 |             )
44 |             print(f"The full data is in {fname}")
45 |             df = pd.read_csv(fname, chunksize=1000)
46 |         else:
47 |             df = pd.read_csv(fname, low_memory=False)
48 |             print(f"{df.shape[0]} records downloaded.")
49 |             os.unlink(fname)
50 |             if cache:
51 |                 df.to_parquet(cachefile)
52 |                 logger.info(f"Data stored as parquet at {cachefile}")
53 | 
54 |     return df
55 | 
56 | 
57 | def fetch(base, uf, url):
58 |     UF = uf.upper()
59 |     print(f"Reading ESUS data for {UF}")
60 |     es = Elasticsearch([url], send_get_body_as="POST")
61 |     body = {"query": {"match_all": {}}}
62 |     results = helpers.scan(es, query=body, index=base)
63 |     # df = pd.DataFrame.from_dict(
64 |     # [document['_source'] for document in results]
65 |     # )
66 | 
67 |     chunker = chunky_fetch(results, 3000)
68 |     h = 1
69 |     tempfile = os.path.join(CACHEPATH, f"ESUS_temp_{UF}.csv.gz")
70 |     for ch in chunker:
71 |         df = pd.DataFrame.from_dict(ch)
72 |         df.sintomas = df["sintomas"].str.replace(
73 |             ";",
74 |             "",
75 |         )  # remove os  ;
76 |         if h:
77 |             df.to_csv(tempfile)
78 |             h = 0
79 |         else:
80 |             df.to_csv(tempfile, mode="a", header=False)
81 |     # df = pd.read_csv('temp.csv.gz')
82 | 
83 |     return tempfile
84 | 
85 | 
86 | def chunky_fetch(results, chunk_size=3000):
87 |     """Fetches data in chunks to preserve memory"""
88 |     data = []
89 |     i = 0
90 |     for d in results:
91 |         data.append(d["_source"])
92 |         i += 1
93 |         if i == chunk_size:
94 |             yield data
95 |             data = []
96 |             i = 0
97 |     else:
98 |         yield data
99 | 


--------------------------------------------------------------------------------
/pysus/online_data/SIA.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Downloads SIA data from Datasus FTP server
 3 | Created on 21/09/18
 4 | by fccoelho
 5 | Modified on 22/11/22
 6 | by bcbernardo
 7 | license: GPL V3 or Later
 8 | """
 9 | from pprint import pprint
10 | from typing import Dict, Tuple, Union
11 | 
12 | from loguru import logger
13 | from pysus.ftp import CACHEPATH
14 | from pysus.ftp.databases.sia import SIA
15 | from pysus.ftp.utils import parse_UFs
16 | 
17 | sia = SIA().load()
18 | 
19 | 
20 | group_dict: Dict[str, Tuple[str, int, int]] = {
21 |     "PA": ("Produção Ambulatorial", 7, 1994),
22 |     "BI": ("Boletim de Produção Ambulatorial individualizado", 1, 2008),
23 |     "AD": ("APAC de Laudos Diversos", 1, 2008),
24 |     "AM": ("APAC de Medicamentos", 1, 2008),
25 |     "AN": ("APAC de Nefrologia", 1, 2008),
26 |     "AQ": ("APAC de Quimioterapia", 1, 2008),
27 |     "AR": ("APAC de Radioterapia", 1, 2008),
28 |     "AB": ("APAC de Cirurgia Bariátrica", 1, 2008),
29 |     "ACF": ("APAC de Confecção de Fístula", 1, 2008),
30 |     "ATD": ("APAC de Tratamento Dialítico", 1, 2008),
31 |     "AMP": ("APAC de Acompanhamento Multiprofissional", 1, 2008),
32 |     "SAD": ("RAAS de Atenção Domiciliar", 1, 2008),
33 |     "PS": ("RAAS Psicossocial", 1, 2008),
34 | }
35 | 
36 | 
37 | def get_available_years(
38 |     group: str,
39 |     states: Union[str, list] = None,
40 | ):
41 |     """
42 |     Get SIA years for group and/or state and returns a list of years
43 |     :param group:
44 |         PA: Produção Ambulatorial (7, 1994)
45 |         BI: Boletim de Produção Ambulatorial individualizado (1, 2008)
46 |         AD: APAC de Laudos Diversos (1, 2008)
47 |         AM: APAC de Medicamentos (1, 2008)
48 |         AN: APAC de Nefrologia (1, 2008)
49 |         AQ: APAC de Quimioterapia (1, 2008)
50 |         AR: APAC de Radioterapia (1, 2008)
51 |         AB: APAC de Cirurgia Bariátrica (1, 2008)
52 |         ACF: APAC de Confecção de Fístula (1, 2008)
53 |         ATD: APAC de Tratamento Dialítico (1, 2008)
54 |         AMP: APAC de Acompanhamento Multiprofissional (1, 2008)
55 |         SAD: RAAS de Atenção Domiciliar (1, 2008)
56 |         PS: RAAS Psicossocial (1, 2008)
57 |     :param states: 2 letter state code, can be a list of UFs
58 |     """
59 |     ufs = parse_UFs(states)
60 | 
61 |     years = dict()
62 |     for uf in ufs:
63 |         files = sia.get_files(group, uf=uf)
64 |         years[uf] = set(sorted([sia.describe(f)["year"] for f in files]))
65 | 
66 |     if len(set([len(v) for v in years.values()])) > 1:
67 |         logger.warning(f"Distinct years were found for UFs: {years}")
68 | 
69 |     return sorted(list(set.intersection(*map(set, years.values()))))
70 | 
71 | 
72 | def show_datatypes():
73 |     pprint(group_dict)
74 | 
75 | 
76 | def download(
77 |     states: Union[str, list],
78 |     years: Union[str, list, int],
79 |     months: Union[str, list, int],
80 |     groups: Union[str, list],
81 |     data_dir: str = CACHEPATH,
82 | ) -> list:
83 |     """
84 |     Download SIASUS records for state year and month and returns dataframe
85 |     :param states: 2 letter state code, can be a list
86 |     :param years: 4 digit integer, can be a list
87 |     :param months: 1 to 12, can be a list
88 |     :param data_dir: whether to cache files locally. default is True
89 |     :param group: SIA groups. For all groups, refer to `sia.groups`
90 |     :return: list of downloaded ParquetData
91 |     """
92 |     files = sia.get_files(group=groups, uf=states, year=years, month=months)
93 |     return sia.download(files, local_dir=data_dir)
94 | 


--------------------------------------------------------------------------------
/pysus/tests/test_data/test_Infodengue.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | import pandas as pd
  4 | import pytest
  5 | from pysus.online_data.Infodengue import download, normalize, search_string
  6 | 
  7 | 
  8 | class InfoDengueTestCase(unittest.TestCase):
  9 |     @pytest.mark.timeout(5)
 10 |     def test_search_string(self):
 11 |         get_from_dict = search_string("Curitiba")
 12 |         cites_mathes = {
 13 |             "Acajutiba": 2900306,
 14 |             "Aratiba": 4300901,
 15 |             "Bacurituba": 2101350,
 16 |             "Buriti": 2102200,
 17 |             "Buriti Bravo": 2102309,
 18 |             "Buritirama": 2904753,
 19 |             "Buritirana": 2102358,
 20 |             "Buritis": 3109303,
 21 |             "Buritizal": 3508207,
 22 |             "Caatiba": 2904803,
 23 |             "Caraíbas": 2906899,
 24 |             "Carnaíba": 2603900,
 25 |             "Caturité": 2504355,
 26 |             "Craíbas": 2702355,
 27 |             "Criciúma": 4204608,
 28 |             "Cristais": 3120201,
 29 |             "Cristal": 4306056,
 30 |             "Cristina": 3120508,
 31 |             "Cromínia": 5206503,
 32 |             "Cruzília": 3120805,
 33 |             "Cuiabá": 5103403,
 34 |             "Cuitegi": 2505204,
 35 |             "Curimatá": 2203206,
 36 |             "Curitiba": 4106902,
 37 |             "Curitibanos": 4204806,
 38 |             "Curiúva": 4107009,
 39 |             "Custódia": 2605103,
 40 |             "Cutias": 1600212,
 41 |             "Duartina": 3514502,
 42 |             "Guaraíta": 5209291,
 43 |             "Guariba": 3518602,
 44 |             "Guaribas": 2204550,
 45 |             "Ibatiba": 3202454,
 46 |             "Ibicuitinga": 2305332,
 47 |             "Irituia": 1503507,
 48 |             "Itagibá": 2915205,
 49 |             "Itaituba": 1503606,
 50 |             "Itaiçaba": 2306207,
 51 |             "Itatiba": 3523404,
 52 |             "Itaíba": 2607505,
 53 |             "Itiúba": 2917003,
 54 |             "Jequitibá": 3135704,
 55 |             "Juquitiba": 3526209,
 56 |             "Marituba": 1504422,
 57 |             "Mauriti": 2308104,
 58 |             "Mucurici": 3203601,
 59 |             "Muribeca": 2804300,
 60 |             "Muritiba": 2922300,
 61 |             "Peritiba": 4212601,
 62 |             "Piritiba": 2924801,
 63 |             "Taquarituba": 3553807,
 64 |             "Tumiritinga": 3169505,
 65 |             "Turiúba": 3555208,
 66 |             "Umburatiba": 3170305,
 67 |             "Urucurituba": 1304401,
 68 |         }
 69 |         pattern_city_names = search_string(substr="r de jAiro")
 70 | 
 71 |         self.assertIsInstance(get_from_dict, dict)
 72 |         self.assertEqual(cites_mathes, get_from_dict)
 73 |         self.assertIn("Rio de Janeiro", pattern_city_names.keys())
 74 |         self.assertIn(4204806, get_from_dict.values())
 75 | 
 76 |     @pytest.mark.timeout(5)
 77 |     def test_normalize(self):
 78 |         normalized_str = normalize("Rio das Ostras")
 79 | 
 80 |         substr_list = normalized_str.split(".")
 81 | 
 82 |         self.assertIsInstance(substr_list, list)
 83 |         # self.assertEqual(substr_list, ['rio', 'das', 'ostras'])
 84 |         self.assertEqual(normalized_str, "rio das ostras")
 85 | 
 86 |     @pytest.mark.timeout(5)
 87 |     def test_download(self):
 88 |         df = download(
 89 |             "dengue",
 90 |             202129,
 91 |             202152,
 92 |             "Rio de Janeiro",
 93 |         )
 94 |         df_size = (29, 24)
 95 | 
 96 |         self.assertIsInstance(df, pd.DataFrame)
 97 |         self.assertGreater(len(df), 0)
 98 |         self.assertEqual(df_size, df.shape)
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     unittest.main()
103 | 


--------------------------------------------------------------------------------
/pysus/online_data/Infodengue.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import string
  3 | from difflib import get_close_matches
  4 | from pathlib import Path
  5 | from typing import Dict
  6 | 
  7 | import pandas as pd
  8 | import unidecode
  9 | 
 10 | # from loguru import logger
 11 | 
 12 | APP_DIR = Path(__file__).resolve(strict=True).parent.parent
 13 | CID10 = {"dengue": "A90", "chikungunya": "A92.0", "zika": "A928"}
 14 | 
 15 | with open(APP_DIR / "dataset/geocode_by_cities.json", "r") as f:
 16 |     geocode_by_cities = json.load(f)
 17 | 
 18 | 
 19 | def normalize(s):
 20 |     for p in string.punctuation:
 21 |         s = s.replace(p, "")
 22 | 
 23 |     return unidecode.unidecode(s.lower().strip())
 24 | 
 25 | 
 26 | def search_string(substr: str) -> Dict[str, int]:
 27 |     """
 28 |     Fetch geocode of the city name matching to the substring.
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |         substr: Part of city name
 33 |     Returns
 34 |     -------
 35 |         dict: Dictionary with key and values
 36 |             with city name and IBGE codes of all municipalities in Brazil
 37 |     """
 38 |     normalized_list = [normalize(f) for f in list(geocode_by_cities.keys())]
 39 | 
 40 |     matching_cities = [
 41 |         get_close_matches(i, normalized_list, n=55)
 42 |         for i in normalize(substr).split(".")
 43 |     ]
 44 | 
 45 |     return {
 46 |         key: geocode_by_cities[key]
 47 |         for key in geocode_by_cities
 48 |         if normalize(key) in list(*matching_cities)
 49 |     }
 50 | 
 51 | 
 52 | def download(
 53 |     disease: str,
 54 |     eyw_start: int,
 55 |     eyw_end: int,
 56 |     city_name: str,
 57 |     format="csv",
 58 | ) -> pd.DataFrame:
 59 |     """
 60 |     Download InfoDengue API data by municipality and disease
 61 |         in the epidemiological week.
 62 | 
 63 |     Parameters
 64 |     ----------
 65 |         disease: Names of the diseases available in the InfoDengue System:
 66 |             dengue|chikungunya|zika
 67 |         eyw_start: Epidemiological week start
 68 |         eyw_end: Epidemiological week end
 69 |         city_name: Name of the municipalities of Brazil
 70 |         format="csv": Default data visualization format for the endpoint
 71 |     Returns
 72 |     -------
 73 |         pd: Pandas dataframe
 74 |     """
 75 | 
 76 |     geocode = geocode_by_cities.get(city_name)
 77 | 
 78 |     if disease not in CID10.keys():
 79 |         raise Exception(
 80 |             f"The diseases available are: {[k for k in CID10.keys()]}"
 81 |         )
 82 |     elif len(str(eyw_start)) != 6 or len(str(eyw_end)) != 6:
 83 |         raise Exception(
 84 |             "The epidemiological week must contain 6 digits, "
 85 |             "started in the year 2010 until 2022. Example: 202248"
 86 |         )
 87 |     elif geocode is None:
 88 |         list_of_cities = search_string(city_name)
 89 |         print(f"You must choose one of these city names: {list_of_cities}")
 90 |     else:
 91 |         s_yw = str(eyw_start)
 92 |         e_yw = str(eyw_end)
 93 |         ew_start, ey_start = s_yw[-2:], s_yw[:4]
 94 |         ew_end, ey_end = e_yw[-2:], e_yw[:4]
 95 |         url = "https://info.dengue.mat.br/api/alertcity"
 96 |         params = (
 97 |             "&disease="
 98 |             + f"{disease}"
 99 |             + "&geocode="
100 |             + f"{geocode}"
101 |             + "&format="
102 |             + f"{format}"
103 |             + "&ew_start="
104 |             + f"{ew_start}"
105 |             + "&ew_end="
106 |             + f"{ew_end}"
107 |             + "&ey_start="
108 |             + f"{ey_start}"
109 |             + "&ey_end="
110 |             + f"{ey_end}"
111 |         )
112 | 
113 |         url_resp = "?".join([url, params])
114 |         return pd.read_csv(url_resp, index_col="SE").T
115 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/Infodengue.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Infodengue.ipynb:9
23 | msgid "Working with Infodengue datasets"
24 | msgstr "Trabalhando com conjuntos de dados do Infodengue"
25 | 
26 | #: ../../source/Infodengue.ipynb:11
27 | msgid ""
28 | "`InfoDengue <https://info.dengue.mat.br/>`__ is an alert system designed "
29 | "to track arboviruses using a unique hybrid data approach that integrates "
30 | "social web data with climatic and epidemiological data. In this tutorial,"
31 | " we will walk through the process of using InfoDengue's API with Python "
32 | "to fetch up-to-date arbovirus data."
33 | msgstr ""
34 | "O `InfoDengue <https://info.dengue.mat.br/>`__ é um sistema de alerta projetado "
35 | "para rastrear arbovírus usando uma abordagem de dados híbridos exclusiva que "
36 | "integra dados de mídias sociais com dados climáticos e epidemiológicos. Neste "
37 | "tutorial, iremos percorrer o processo de uso da API do InfoDengue com Python para "
38 | "buscar dados atualizados sobre arbovírus."
39 | 
40 | #: ../../source/Infodengue.ipynb:32
41 | msgid ""
42 | "Infodengue is a national-wide system, use the ``search_string`` method to"
43 | " check how the city is found in the API:"
44 | msgstr ""
45 | "Infodengue é um sistema nacional, use o método `search_string` para verificar "
46 | "como o município é encontrado na API:"
47 | 
48 | #: ../../source/Infodengue.ipynb:135
49 | msgid ""
50 | "The download method extracts data for a specified range of "
51 | "Epidemiological Weeks (SE in pt) in the format ``YYYYWW``. The output is "
52 | "a Pandas DataFrame containing all the EWs within this range."
53 | msgstr ""
54 | "O método de download extrai dados para um intervalo específico de semanas "
55 | "epidemiológicas no formato `YYYYWW`. A saída é um DataFrame do Pandas "
56 | "contendo todas as semanas epidemiológicas dentro deste intervalo."
57 | 
58 | #: ../../source/Infodengue.ipynb:614
59 | msgid "You can save the dataframe in a CSV file"
60 | msgstr "Você pode salvar o dataframe em um arquivo CSV"
61 | 
62 | #: ../../source/Infodengue.ipynb:635
63 | msgid ""
64 | "In order to fetch data with different parameters, it is possible to "
65 | "iterate over a list, for instance:"
66 | msgstr ""
67 | "Para buscar dados com diferentes parâmetros, é possível iterar sobre "
68 | "uma lista, por exemplo:"
69 | 
70 | #: ../../source/Infodengue.ipynb:663
71 | msgid "Expected files:"
72 | msgstr "Arquivos esperados:"
73 | 
74 | #: ../../source/Infodengue.ipynb:665
75 | msgid "dengue_rio_de_janeiro_se01_04.csv"
76 | msgstr "dengue_rio_de_janeiro_se01_04.csv"
77 | 
78 | #: ../../source/Infodengue.ipynb:666
79 | msgid "dengue_rio_do_antônio_se01_04.csv"
80 | msgstr "dengue_rio_do_antônio_se01_04.csv"
81 | 
82 | #: ../../source/Infodengue.ipynb:667
83 | msgid "dengue_rio_do_pires_se01_04.csv"
84 | msgstr "dengue_rio_do_pires_se01_04.csv"
85 | 
86 | #: ../../source/Infodengue.ipynb:668
87 | msgid "zika_rio_de_janeiro_se01_04.csv"
88 | msgstr "zika_rio_de_janeiro_se01_04.csv"
89 | 
90 | #: ../../source/Infodengue.ipynb:669
91 | msgid "zika_rio_do_antônio_se01_04.csv"
92 | msgstr "zika_rio_do_antônio_se01_04.csv"
93 | 
94 | #: ../../source/Infodengue.ipynb:670
95 | msgid "zika_rio_do_pires_se01_04.csv"
96 | msgstr "zika_rio_do_pires_se01_04.csv"
97 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/Infodengue.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/Infodengue.ipynb:9
23 | msgid "Working with Infodengue datasets"
24 | msgstr "Trabalhando com conjuntos de dados do Infodengue"
25 | 
26 | #: ../../source/Infodengue.ipynb:11
27 | msgid ""
28 | "`InfoDengue <https://info.dengue.mat.br/>`__ is an alert system designed "
29 | "to track arboviruses using a unique hybrid data approach that integrates "
30 | "social web data with climatic and epidemiological data. In this tutorial,"
31 | " we will walk through the process of using InfoDengue's API with Python "
32 | "to fetch up-to-date arbovirus data."
33 | msgstr ""
34 | "O `InfoDengue <https://info.dengue.mat.br/>`__ é um sistema de alerta projetado "
35 | "para rastrear arbovírus usando uma abordagem de dados híbridos exclusiva que "
36 | "integra dados de mídias sociais com dados climáticos e epidemiológicos. Neste "
37 | "tutorial, iremos percorrer o processo de uso da API do InfoDengue com Python para "
38 | "buscar dados atualizados sobre arbovírus."
39 | 
40 | #: ../../source/Infodengue.ipynb:32
41 | msgid ""
42 | "Infodengue is a national-wide system, use the ``search_string`` method to"
43 | " check how the city is found in the API:"
44 | msgstr ""
45 | "Infodengue é um sistema nacional, use o método `search_string` para verificar "
46 | "como o município é encontrado na API:"
47 | 
48 | #: ../../source/Infodengue.ipynb:135
49 | msgid ""
50 | "The download method extracts data for a specified range of "
51 | "Epidemiological Weeks (SE in pt) in the format ``YYYYWW``. The output is "
52 | "a Pandas DataFrame containing all the EWs within this range."
53 | msgstr ""
54 | "O método de download extrai dados para um intervalo específico de semanas "
55 | "epidemiológicas no formato `YYYYWW`. A saída é um DataFrame do Pandas "
56 | "contendo todas as semanas epidemiológicas dentro deste intervalo."
57 | 
58 | #: ../../source/Infodengue.ipynb:614
59 | msgid "You can save the dataframe in a CSV file"
60 | msgstr "Você pode salvar o dataframe em um arquivo CSV"
61 | 
62 | #: ../../source/Infodengue.ipynb:635
63 | msgid ""
64 | "In order to fetch data with different parameters, it is possible to "
65 | "iterate over a list, for instance:"
66 | msgstr ""
67 | "Para buscar dados com diferentes parâmetros, é possível iterar sobre "
68 | "uma lista, por exemplo:"
69 | 
70 | #: ../../source/Infodengue.ipynb:663
71 | msgid "Expected files:"
72 | msgstr "Arquivos esperados:"
73 | 
74 | #: ../../source/Infodengue.ipynb:665
75 | msgid "dengue_rio_de_janeiro_se01_04.csv"
76 | msgstr "dengue_rio_de_janeiro_se01_04.csv"
77 | 
78 | #: ../../source/Infodengue.ipynb:666
79 | msgid "dengue_rio_do_antônio_se01_04.csv"
80 | msgstr "dengue_rio_do_antônio_se01_04.csv"
81 | 
82 | #: ../../source/Infodengue.ipynb:667
83 | msgid "dengue_rio_do_pires_se01_04.csv"
84 | msgstr "dengue_rio_do_pires_se01_04.csv"
85 | 
86 | #: ../../source/Infodengue.ipynb:668
87 | msgid "zika_rio_de_janeiro_se01_04.csv"
88 | msgstr "zika_rio_de_janeiro_se01_04.csv"
89 | 
90 | #: ../../source/Infodengue.ipynb:669
91 | msgid "zika_rio_do_antônio_se01_04.csv"
92 | msgstr "zika_rio_do_antônio_se01_04.csv"
93 | 
94 | #: ../../source/Infodengue.ipynb:670
95 | msgid "zika_rio_do_pires_se01_04.csv"
96 | msgstr "zika_rio_do_pires_se01_04.csv"
97 | 


--------------------------------------------------------------------------------
/docs/source/kepler_config.json:
--------------------------------------------------------------------------------
1 | {"version": "v1", "config": {"visState": {"filters": [], "layers": [{"id": "ydiyslk", "type": "point", "config": {"dataId": "Casos de Zika em 2016", "label": "Casos de Zika", "color": [183, 136, 94], "columns": {"lat": "latitude", "lng": "longitude", "altitude": null}, "isVisible": true, "visConfig": {"radius": 10, "fixedRadius": false, "opacity": 0.8, "outline": false, "thickness": 2, "strokeColor": null, "colorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "strokeColorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "radiusRange": [1, 40], "filled": true}, "textLabel": [{"field": null, "color": [255, 255, 255], "size": 18, "offset": [0, 0], "anchor": "start", "alignment": "center"}]}, "visualChannels": {"colorField": null, "colorScale": "quantile", "strokeColorField": null, "strokeColorScale": "quantile", "sizeField": null, "sizeScale": "linear"}}, {"id": "icdqzjm", "type": "point", "config": {"dataId": "Casos de Dengue em 2016", "label": "Casos de Dengue", "color": [32, 103, 172], "columns": {"lat": "latitude", "lng": "longitude", "altitude": null}, "isVisible": true, "visConfig": {"radius": 10, "fixedRadius": false, "opacity": 0.8, "outline": false, "thickness": 2, "strokeColor": null, "colorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "strokeColorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "radiusRange": [0, 50], "filled": true}, "textLabel": [{"field": null, "color": [255, 255, 255], "size": 18, "offset": [0, 0], "anchor": "start", "alignment": "center"}]}, "visualChannels": {"colorField": null, "colorScale": "quantile", "strokeColorField": null, "strokeColorScale": "quantile", "sizeField": null, "sizeScale": "linear"}}, {"id": "u28x356", "type": "point", "config": {"dataId": "Casos de Chikungunya em 2016", "label": "Casos de Chikungunia", "color": [125, 194, 64], "columns": {"lat": "latitude", "lng": "longitude", "altitude": null}, "isVisible": true, "visConfig": {"radius": 10, "fixedRadius": false, "opacity": 0.8, "outline": false, "thickness": 2, "strokeColor": null, "colorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "strokeColorRange": {"name": "Global Warming", "type": "sequential", "category": "Uber", "colors": ["#5A1846", "#900C3F", "#C70039", "#E3611C", "#F1920E", "#FFC300"]}, "radiusRange": [0, 50], "filled": true}, "textLabel": [{"field": null, "color": [255, 255, 255], "size": 18, "offset": [0, 0], "anchor": "start", "alignment": "center"}]}, "visualChannels": {"colorField": null, "colorScale": "quantile", "strokeColorField": null, "strokeColorScale": "quantile", "sizeField": null, "sizeScale": "linear"}}], "interactionConfig": {"tooltip": {"fieldsToShow": {"Casos de Zika em 2016": ["ID_AGRAVO", "SEM_NOT", "NU_ANO", "ID_MUNICIP", "DT_SIN_PRI"], "Casos de Dengue em 2016": ["ID_AGRAVO", "SEM_NOT", "NU_ANO", "ID_MUNICIP", "DT_SIN_PRI"], "Casos de Chikungunya em 2016": ["ID_AGRAVO", "SEM_NOT", "NU_ANO", "ID_MUNICIP", "DT_SIN_PRI"]}, "enabled": true}, "brush": {"size": 0.5, "enabled": false}}, "layerBlending": "normal", "splitMaps": [], "animationConfig": {"currentTime": null, "speed": 1}}, "mapState": {"bearing": 24, "dragRotate": true, "latitude": -22.802329589865103, "longitude": -43.383896341999, "pitch": 50, "zoom": 9.379836309981588, "isSplit": false}, "mapStyle": {"styleType": "dark", "topLayerGroups": {}, "visibleLayerGroups": {"label": true, "road": true, "border": false, "building": true, "water": true, "land": true, "3d building": false}, "threeDBuildingColor": [9.665468314072013, 17.18305478057247, 31.1442867897876], "mapStyles": {}}}}
2 | 


--------------------------------------------------------------------------------
/pysus/online_data/CNES.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from loguru import logger
 4 | from pysus.ftp import CACHEPATH
 5 | from pysus.ftp.databases.cnes import CNES
 6 | from pysus.ftp.utils import parse_UFs
 7 | 
 8 | cnes = CNES().load()
 9 | 
10 | 
11 | group_dict = {
12 |     "LT": ["Leitos - A partir de Out/2005", 10, 2005],
13 |     "ST": ["Estabelecimentos - A partir de Ago/2005", 8, 2005],
14 |     "DC": ["Dados Complementares - A partir de Ago/2005", 8, 2005],
15 |     "EQ": ["Equipamentos - A partir de Ago/2005", 8, 2005],
16 |     "SR": ["Serviço Especializado - A partir de Ago/2005", 8, 2005],
17 |     "HB": ["Habilitação - A partir de Mar/2007", 3, 2007],
18 |     "PF": ["Profissional - A partir de Ago/2005", 8, 2005],
19 |     "EP": ["Equipes - A partir de Abr/2007", 5, 2007],
20 |     "IN": ["Incentivos - A partir de Nov/2007", 11, 2007],
21 |     "RC": ["Regra Contratual - A partir de Mar/2007", 3, 2007],
22 |     "EE": ["Estabelecimento de Ensino - A partir de Mar/2007", 3, 2007],
23 |     "EF": ["Estabelecimento Filantrópico - A partir de Mar/2007", 3, 2007],
24 |     "GM": ["Gestão e Metas - A partir de Jun/2007", 6, 2007],
25 | }
26 | 
27 | 
28 | def get_available_years(
29 |     group: str,
30 |     states: Union[str, list] = None,
31 | ):
32 |     """
33 |     Get CNES years for group and/or state and returns a
34 |     list of years
35 |     :param group:
36 |         LT – Leitos - A partir de Out/2005
37 |         ST – Estabelecimentos - A partir de Ago/2005
38 |         DC - Dados Complementares - A partir de Ago/2005
39 |         EQ – Equipamentos - A partir de Ago/2005
40 |         SR - Serviço Especializado - A partir de Ago/2005
41 |         HB – Habilitação - A partir de Mar/2007
42 |         PF – Profissional - A partir de Ago/2005
43 |         EP – Equipes - A partir de Abr/2007
44 |         IN – Incentivos - A partir de Nov/2007
45 |         RC - Regra Contratual - A partir de Mar/2007
46 |         EE - Estabelecimento de Ensino - A partir de Mar/2007
47 |         EF - Estabelecimento Filantrópico - A partir de Mar/2007
48 |         GM - Gestão e Metas - A partir de Jun/2007
49 |     :param states: 2 letter state code, can be a list of UFs
50 |     """
51 |     cnes.load(group)
52 | 
53 |     ufs = parse_UFs(states)
54 | 
55 |     years = dict()
56 |     for uf in ufs:
57 |         files = cnes.get_files(group, uf=uf)
58 |         years[uf] = sorted([cnes.describe(f)["year"] for f in files])
59 | 
60 |     if len(set([len(v) for v in years.values()])) > 1:
61 |         logger.warning(f"Distinct years were found for UFs: {years}")
62 | 
63 |     return sorted(list(set.intersection(*map(set, years.values()))))
64 | 
65 | 
66 | def download(
67 |     group: str,
68 |     states: Union[str, list],
69 |     years: Union[str, list, int],
70 |     months: Union[str, list, int],
71 |     data_dir: str = CACHEPATH,
72 | ) -> list:
73 |     """
74 |     Download CNES records for group, state, year and month and returns a
75 |     list of local parquet files
76 |     :param group:
77 |         LT – Leitos - A partir de Out/2005
78 |         ST – Estabelecimentos - A partir de Ago/2005
79 |         DC - Dados Complementares - A partir de Ago/2005
80 |         EQ – Equipamentos - A partir de Ago/2005
81 |         SR - Serviço Especializado - A partir de Ago/2005
82 |         HB – Habilitação - A partir de Mar/2007
83 |         PF – Profissional - A partir de Ago/2005
84 |         EP – Equipes - A partir de Abr/2007
85 |         IN – Incentivos - A partir de Nov/2007
86 |         RC - Regra Contratual - A partir de Mar/2007
87 |         EE - Estabelecimento de Ensino - A partir de Mar/2007
88 |         EF - Estabelecimento Filantrópico - A partir de Mar/2007
89 |         GM - Gestão e Metas - A partir de Jun/2007
90 |     :param months: 1 to 12, can be a list of years
91 |     :param states: 2 letter state code, can be a list of UFs
92 |     :param years: 4 digit integer, can be a list of years
93 |     """
94 |     files = cnes.get_files(group, states, years, months)
95 |     return cnes.download(files, local_dir=data_dir)
96 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/pni.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["PNI"]
  2 | 
  3 | from typing import List, Literal, Optional, Union
  4 | 
  5 | from pysus.ftp import Database, Directory, File
  6 | from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year
  7 | 
  8 | 
  9 | class PNI(Database):
 10 |     name = "PNI"
 11 |     paths = (Directory("/dissemin/publicos/PNI/DADOS"),)
 12 |     metadata = {
 13 |         "long_name": (
 14 |             "Sistema de Informações do Programa Nacional de Imunizações"
 15 |         ),
 16 |         "source": (
 17 |             "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",  # noqa
 18 |             "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",  # noqa
 19 |         ),
 20 |         "description": (
 21 |             "O SI-PNI é um sistema desenvolvido para possibilitar aos "
 22 |             "gestores envolvidos no Programa Nacional de Imunização, a "
 23 |             "avaliação dinâmica do risco quanto à ocorrência de surtos ou "
 24 |             "epidemias, a partir do registro dos imunobiológicos aplicados e "
 25 |             "do quantitativo populacional vacinado, agregados por faixa "
 26 |             "etária, período de tempo e área geográfica. Possibilita também "
 27 |             "o controle do estoque de imunobiológicos necessário aos "
 28 |             "administradores que têm a incumbência de programar sua aquisição "
 29 |             "e distribuição. Controla as indicações de aplicação de "
 30 |             "vacinas de imunobiológicos especiais e seus eventos adversos, "
 31 |             "dentro dos Centros de Referências em imunobiológicos especiais."
 32 |         ),
 33 |     }
 34 |     groups = {
 35 |         "CPNI": "Cobertura Vacinal",  # TODO: may be incorrect
 36 |         "DPNI": "Doses Aplicadas",  # TODO: may be incorrect
 37 |     }
 38 | 
 39 |     def describe(self, file: File) -> dict:
 40 |         if file.extension.upper() in [".DBC", ".DBF"]:
 41 |             group, _uf, year = self.format(file)
 42 | 
 43 |             try:
 44 |                 uf = UFs[_uf]
 45 |             except KeyError:
 46 |                 uf = _uf
 47 | 
 48 |             description = {
 49 |                 "name": file.basename,
 50 |                 "group": self.groups[group],
 51 |                 "uf": uf,
 52 |                 "year": zfill_year(year),
 53 |                 "size": file.info["size"],
 54 |                 "last_update": file.info["modify"],
 55 |             }
 56 | 
 57 |             return description
 58 |         return {}
 59 | 
 60 |     def format(self, file: File) -> tuple:
 61 | 
 62 |         if len(file.name) != 8:
 63 |             raise ValueError(f"Can't format {file.name}")
 64 | 
 65 |         n = file.name
 66 |         group, _uf, year = n[:4], n[4:6], n[-2:]
 67 |         return group, _uf, zfill_year(year)
 68 | 
 69 |     def get_files(
 70 |         self,
 71 |         group: Union[list, Literal["CNPI", "DPNI"]],
 72 |         uf: Optional[Union[List[str], str]] = None,
 73 |         year: Optional[Union[list, str, int]] = None,
 74 |     ) -> List[File]:
 75 |         files = list(
 76 |             filter(
 77 |                 lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
 78 |             )
 79 |         )
 80 | 
 81 |         groups = [gr.upper() for gr in to_list(group)]
 82 | 
 83 |         if not all(gr in list(self.groups) for gr in groups):
 84 |             raise ValueError(
 85 |                 "Unknown PNI Group(s): "
 86 |                 f"{set(groups).difference(list(self.groups))}"
 87 |             )
 88 | 
 89 |         files = list(filter(lambda f: self.format(f)[0] in groups, files))
 90 | 
 91 |         if uf:
 92 |             ufs = parse_UFs(uf)
 93 |             files = list(filter(lambda f: self.format(f)[1] in ufs, files))
 94 | 
 95 |         if year or str(year) in ["0", "00"]:
 96 |             years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
 97 |             files = list(filter(lambda f: self.format(f)[2] in years, files))
 98 | 
 99 |         return files
100 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/IBGE_data.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/IBGE_data.ipynb:9
23 | msgid "Obtaining data from IBGE"
24 | msgstr "Extraindo dados do IBGE"
25 | 
26 | #: ../../source/IBGE_data.ipynb:35
27 | msgid "Listing Subject Areas"
28 | msgstr "Listando Grupos de dados"
29 | 
30 | #: ../../source/IBGE_data.ipynb:37
31 | msgid ""
32 | "IBGE makes available data from a number of surveys on different subjects."
33 | " We can find out what is available, before downloading data."
34 | msgstr ""
35 | "O IBGE disponibiliza dados de várias pesquisas sobre diferentes assuntos. "
36 | "Podemos descobrir o que está disponível antes de baixar os dados."
37 | 
38 | #: ../../source/IBGE_data.ipynb:293
39 | msgid ""
40 | "Let's look at the datasets (called \"agregados\") available within the "
41 | "category of \"Censo Demografico\"."
42 | msgstr ""
43 | "Vamos dar uma olhada nos conjuntos de dados (chamados \"agregados\") "
44 | "disponíveis dentro da categoria de \"Censo Demográfico\"."
45 | 
46 | #: ../../source/IBGE_data.ipynb:545
47 | msgid "Again for population projections"
48 | msgstr "Novamente para projeções populacionais"
49 | 
50 | #: ../../source/IBGE_data.ipynb:670
51 | msgid "Downloading data"
52 | msgstr "Extraindo dados"
53 | 
54 | #: ../../source/IBGE_data.ipynb:672
55 | msgid ""
56 | "Before downloading the data, it may be useful to look at the metadata of "
57 | "the dataset we are interested in."
58 | msgstr ""
59 | "Antes de baixar os dados, pode ser útil olhar para os metadados do "
60 | "conjunto de dados que estamos interessados."
61 | 
62 | #: ../../source/IBGE_data.ipynb:1325
63 | msgid ""
64 | "To actually download the data after chosing the dataset, we can use the "
65 | "``FetchData`` class, which will fetch the data and make it available both"
66 | " in JSON format and Dataframe as exemplified below."
67 | msgstr ""
68 | "Para baixar os dados depois de escolher o conjunto de dados, podemos usar a classe "
69 | "`FetchData`, que irá buscar os dados e disponibilizá-los tanto em formato JSON quanto em "
70 | "Dataframe, conforme exemplificado abaixo."
71 | 
72 | #: ../../source/IBGE_data.ipynb:1990
73 | msgid "Using the SIDRA endpoint"
74 | msgstr "Usando o endpoint SIDRA"
75 | 
76 | #: ../../source/IBGE_data.ipynb:1992
77 | msgid ""
78 | "IBGE also has a simpler API at https://api.sidra.ibge.gov.br that PySUS "
79 | "also gives access through a simple function. Below we have table 200, "
80 | "which is a sample from the resident population. classification ``2`` is "
81 | "sex, of which I am fetching all categories: ``total``, ``Homens``, and "
82 | "``Mulheres``. Terrotorial level 6 is municipality."
83 | msgstr ""
84 | "O IBGE também possui uma API mais simples em https://api.sidra.ibge.gov.br "
85 | "que o PySUS também acessa através de uma função simples. Abaixo temos a tabela "
86 | "200, que é uma amostra da população residente. A classificação 2 é o sexo, do "
87 | "qual estou buscando todas as categorias: total, Homens e Mulheres. "
88 | "O nível territorial 6 é o município."
89 | 
90 | #: ../../source/IBGE_data.ipynb:2572
91 | msgid ""
92 | "Suppose we just wanted a single municipality, ``Balneário Rincão (SC)``: "
93 | "Unfortunately for this one there is no data available, thus the ``...`` "
94 | "in the column ``Valor``. Try another one."
95 | msgstr ""
96 | "Suponha que quiséssemos apenas um município, `Balneário Rincão (SC)`: "
97 | "Infelizmente, para este não há dados disponíveis, portanto, há um `...` "
98 | "na coluna `Valor`. Tente outro município."
99 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/IBGE_data.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) 2016, Flavio Codeco Coelho
 3 | # This file is distributed under the same license as the PySUS package.
 4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
 5 | #
 6 | #, fuzzy
 7 | msgid ""
 8 | msgstr ""
 9 | "Project-Id-Version: PySUS 0.1.13\n"
10 | "Report-Msgid-Bugs-To: \n"
11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
14 | "Language: pt\n"
15 | "Language-Team: pt <LL@li.org>\n"
16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
17 | "MIME-Version: 1.0\n"
18 | "Content-Type: text/plain; charset=utf-8\n"
19 | "Content-Transfer-Encoding: 8bit\n"
20 | "Generated-By: Babel 2.12.1\n"
21 | 
22 | #: ../../source/IBGE_data.ipynb:9
23 | msgid "Obtaining data from IBGE"
24 | msgstr "Extraindo dados do IBGE"
25 | 
26 | #: ../../source/IBGE_data.ipynb:35
27 | msgid "Listing Subject Areas"
28 | msgstr "Listando Grupos de dados"
29 | 
30 | #: ../../source/IBGE_data.ipynb:37
31 | msgid ""
32 | "IBGE makes available data from a number of surveys on different subjects."
33 | " We can find out what is available, before downloading data."
34 | msgstr ""
35 | "O IBGE disponibiliza dados de várias pesquisas sobre diferentes assuntos. "
36 | "Podemos descobrir o que está disponível antes de baixar os dados."
37 | 
38 | #: ../../source/IBGE_data.ipynb:293
39 | msgid ""
40 | "Let's look at the datasets (called \"agregados\") available within the "
41 | "category of \"Censo Demografico\"."
42 | msgstr ""
43 | "Vamos dar uma olhada nos conjuntos de dados (chamados \"agregados\") "
44 | "disponíveis dentro da categoria de \"Censo Demográfico\"."
45 | 
46 | #: ../../source/IBGE_data.ipynb:545
47 | msgid "Again for population projections"
48 | msgstr "Novamente para projeções populacionais"
49 | 
50 | #: ../../source/IBGE_data.ipynb:670
51 | msgid "Downloading data"
52 | msgstr "Extraindo dados"
53 | 
54 | #: ../../source/IBGE_data.ipynb:672
55 | msgid ""
56 | "Before downloading the data, it may be useful to look at the metadata of "
57 | "the dataset we are interested in."
58 | msgstr ""
59 | "Antes de baixar os dados, pode ser útil olhar para os metadados do "
60 | "conjunto de dados que estamos interessados."
61 | 
62 | #: ../../source/IBGE_data.ipynb:1325
63 | msgid ""
64 | "To actually download the data after chosing the dataset, we can use the "
65 | "``FetchData`` class, which will fetch the data and make it available both"
66 | " in JSON format and Dataframe as exemplified below."
67 | msgstr ""
68 | "Para baixar os dados depois de escolher o conjunto de dados, podemos usar a classe "
69 | "`FetchData`, que irá buscar os dados e disponibilizá-los tanto em formato JSON quanto em "
70 | "Dataframe, conforme exemplificado abaixo."
71 | 
72 | #: ../../source/IBGE_data.ipynb:1990
73 | msgid "Using the SIDRA endpoint"
74 | msgstr "Usando o endpoint SIDRA"
75 | 
76 | #: ../../source/IBGE_data.ipynb:1992
77 | msgid ""
78 | "IBGE also has a simpler API at https://api.sidra.ibge.gov.br that PySUS "
79 | "also gives access through a simple function. Below we have table 200, "
80 | "which is a sample from the resident population. classification ``2`` is "
81 | "sex, of which I am fetching all categories: ``total``, ``Homens``, and "
82 | "``Mulheres``. Terrotorial level 6 is municipality."
83 | msgstr ""
84 | "O IBGE também possui uma API mais simples em https://api.sidra.ibge.gov.br "
85 | "que o PySUS também acessa através de uma função simples. Abaixo temos a tabela "
86 | "200, que é uma amostra da população residente. A classificação 2 é o sexo, do "
87 | "qual estou buscando todas as categorias: total, Homens e Mulheres. "
88 | "O nível territorial 6 é o município."
89 | 
90 | #: ../../source/IBGE_data.ipynb:2572
91 | msgid ""
92 | "Suppose we just wanted a single municipality, ``Balneário Rincão (SC)``: "
93 | "Unfortunately for this one there is no data available, thus the ``...`` "
94 | "in the column ``Valor``. Try another one."
95 | msgstr ""
96 | "Suponha que quiséssemos apenas um município, `Balneário Rincão (SC)`: "
97 | "Infelizmente, para este não há dados disponíveis, portanto, há um `...` "
98 | "na coluna `Valor`. Tente outro município."
99 | 


--------------------------------------------------------------------------------
/pysus/online_data/vaccine.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Download of vacination data.
  3 | 
  4 | This module contains function to download from specific campains:
  5 | 
  6 | - COVID-19 in 2020-2021 Downloaded as described [here](http://opendatasus.saude.gov.br/dataset/b772ee55-07cd-44d8-958f-b12edd004e0b/resource/5916b3a4-81e7-4ad5-adb6-b884ff198dc1/download/manual_api_vacina_covid-19.pdf)  # noqa
  7 | """
  8 | import json
  9 | import os
 10 | from json import JSONDecodeError
 11 | 
 12 | import pandas as pd
 13 | import requests
 14 | from loguru import logger
 15 | from pysus.ftp import CACHEPATH
 16 | from requests.auth import HTTPBasicAuth
 17 | 
 18 | 
 19 | def download_covid(uf=None, only_header=False):
 20 |     """
 21 |     Download covid vaccination data for a give UF
 22 |     :param uf: 'RJ' | 'SP', etc.
 23 |     :param only_header: Used to see the header of the data before downloading.
 24 |     :return: dataframe iterator as returned by pandas
 25 |         `read_csv('Vaccine_temp_<uf>.csv.gz', chunksize=5000)`
 26 |     """
 27 |     user = "imunizacao_public"
 28 |     pwd = "qlto5t&7r_@+#Tlstigi"
 29 |     url = "https://imunizacao-es.saude.gov.br/_search?scroll=1m"
 30 |     if uf is None:
 31 |         query = {"query": {"match_all": {}}, "size": 10000}
 32 |         UF = "BR"
 33 |     else:
 34 |         UF = uf.upper()
 35 |         query = {
 36 |             "query": {"match": {"paciente_endereco_uf": UF}},
 37 |             "size": 10000,
 38 |         }
 39 | 
 40 |     logger.info(f"Searching for COVID data of {UF}")
 41 |     tempfile = os.path.join(CACHEPATH, f"Vaccine_temp_{UF}.csv.gz")
 42 |     if os.path.exists(tempfile):
 43 |         print(
 44 |             "loading from cache. Returning an iterator of Dataframes in chunks"
 45 |             " of 5000."
 46 |         )
 47 |         return pd.read_csv(tempfile, chunksize=5000)
 48 | 
 49 |     auth = HTTPBasicAuth(user, pwd)
 50 |     data_gen = elasticsearch_fetch(url, auth, query)
 51 | 
 52 |     if only_header:
 53 |         df = pd.DataFrame(next(data_gen))
 54 |         logger.warning(
 55 |             f"Downloading data sample for visualization of {df.shape[0]} rows"
 56 |         )
 57 |         return df
 58 | 
 59 |     h = 1
 60 |     for dt in data_gen:
 61 |         df = pd.DataFrame(dt)
 62 |         if h:
 63 |             df.to_csv(tempfile)
 64 |             h = 0
 65 |         else:
 66 |             df.to_csv(tempfile, mode="a", header=False)
 67 | 
 68 |     logger.info(f"{tempfile} stored at {CACHEPATH}.")
 69 |     df = pd.read_csv(tempfile, chunksize=5000)
 70 | 
 71 |     return df
 72 | 
 73 | 
 74 | def elasticsearch_fetch(uri, auth, json_body={}):
 75 |     headers = {
 76 |         "Content-Type": "application/json",
 77 |     }
 78 | 
 79 |     scroll_id = ""
 80 |     total = 0
 81 |     while True:
 82 |         if scroll_id:
 83 |             uri = "https://imunizacao-es.saude.gov.br/_search/scroll"
 84 |             json_body["scroll_id"] = scroll_id
 85 |             json_body["scroll"] = "1m"
 86 |             if "query" in json_body:
 87 |                 del json_body["query"]
 88 |                 # for the continuation of the download,
 89 |                 # query parameter is not allowed
 90 |                 del json_body["size"]
 91 |         try:
 92 |             s = requests.Session()
 93 |             response = s.post(uri, auth=auth, headers=headers, json=json_body)
 94 |             text = response.text
 95 |             try:
 96 |                 resp = json.loads(text)
 97 |             except JSONDecodeError:
 98 |                 resp = text
 99 |         except Exception as error:
100 |             print("\nelasticsearch_fetch() error:", error)
101 |             raise error
102 |         try:
103 |             if resp["hits"]["hits"] == []:
104 |                 break
105 |         except KeyError as e:
106 |             logger.error(e)
107 |             print(resp)
108 |         total += len(resp["hits"]["hits"])
109 |         print(f"Downloaded {total} records\r", end="")
110 |         yield [h["_source"] for h in resp["hits"]["hits"]]
111 |         if "_scroll_id" in resp:
112 |             scroll_id = resp["_scroll_id"]
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     print(download_covid("ba", only_header=True))
117 | 


--------------------------------------------------------------------------------
/docs/source/databases/Utilities.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "c153a255-ad53-4b27-b689-4c119ea8cc52",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Utilities module\n",
  9 |     "\n",
 10 |     "Some helper functions that are used throughout the package: "
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "c5c639e6-fa54-482a-a91d-20a8bbe05206",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### brasil"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 6,
 24 |    "id": "451830fc-04af-4003-8e70-c71d61a57ac5",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "from pysus.utilities import brasil"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 10,
 34 |    "id": "76a37da8-7b41-4565-83e2-e23bfbeae5bb",
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "data": {
 39 |       "text/plain": [
 40 |        "{'BR': 'Brasil',\n",
 41 |        " 'AC': 'Acre',\n",
 42 |        " 'AL': 'Alagoas',\n",
 43 |        " 'AP': 'Amapá',\n",
 44 |        " 'AM': 'Amazonas',\n",
 45 |        " 'BA': 'Bahia',\n",
 46 |        " 'CE': 'Ceará',\n",
 47 |        " 'ES': 'Espírito Santo',\n",
 48 |        " 'GO': 'Goiás',\n",
 49 |        " 'MA': 'Maranhão',\n",
 50 |        " 'MT': 'Mato Grosso',\n",
 51 |        " 'MS': 'Mato Grosso do Sul',\n",
 52 |        " 'MG': 'Minas Gerais',\n",
 53 |        " 'PA': 'Pará',\n",
 54 |        " 'PB': 'Paraíba',\n",
 55 |        " 'PR': 'Paraná',\n",
 56 |        " 'PE': 'Pernambuco',\n",
 57 |        " 'PI': 'Piauí',\n",
 58 |        " 'RJ': 'Rio de Janeiro',\n",
 59 |        " 'RN': 'Rio Grande do Norte',\n",
 60 |        " 'RS': 'Rio Grande do Sul',\n",
 61 |        " 'RO': 'Rondônia',\n",
 62 |        " 'RR': 'Roraima',\n",
 63 |        " 'SC': 'Santa Catarina',\n",
 64 |        " 'SP': 'São Paulo',\n",
 65 |        " 'SE': 'Sergipe',\n",
 66 |        " 'TO': 'Tocantins',\n",
 67 |        " 'DF': 'Distrito Federal'}"
 68 |       ]
 69 |      },
 70 |      "execution_count": 10,
 71 |      "metadata": {},
 72 |      "output_type": "execute_result"
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "brasil.UFs"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 12,
 82 |    "id": "965a2323-066c-45af-83f7-b20ece735089",
 83 |    "metadata": {},
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "{1: 'Janeiro',\n",
 89 |        " 2: 'Fevereiro',\n",
 90 |        " 3: 'Março',\n",
 91 |        " 4: 'Abril',\n",
 92 |        " 5: 'Maio',\n",
 93 |        " 6: 'Junho',\n",
 94 |        " 7: 'Julho',\n",
 95 |        " 8: 'Agosto',\n",
 96 |        " 9: 'Setembro',\n",
 97 |        " 10: 'Outubro',\n",
 98 |        " 11: 'Novembro',\n",
 99 |        " 12: 'Dezembro'}"
100 |       ]
101 |      },
102 |      "execution_count": 12,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     }
106 |    ],
107 |    "source": [
108 |     "brasil.MONTHS"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 14,
114 |    "id": "573f2f20-f038-4384-b6f2-558bad80f276",
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "data": {
119 |       "text/plain": [
120 |        "'Rio de Janeiro'"
121 |       ]
122 |      },
123 |      "execution_count": 14,
124 |      "metadata": {},
125 |      "output_type": "execute_result"
126 |     }
127 |    ],
128 |    "source": [
129 |     "# Get municipality name by IBGE's geocode \n",
130 |     "# https://www.ibge.gov.br/explica/codigos-dos-municipios.php\n",
131 |     "brasil.get_city_name_by_geocode(3304557)"
132 |    ]
133 |   }
134 |  ],
135 |  "metadata": {
136 |   "kernelspec": {
137 |    "display_name": "Python 3 (ipykernel)",
138 |    "language": "python",
139 |    "name": "python3"
140 |   },
141 |   "language_info": {
142 |    "codemirror_mode": {
143 |     "name": "ipython",
144 |     "version": 3
145 |    },
146 |    "file_extension": ".py",
147 |    "mimetype": "text/x-python",
148 |    "name": "python",
149 |    "nbconvert_exporter": "python",
150 |    "pygments_lexer": "ipython3",
151 |    "version": "3.11.8"
152 |   }
153 |  },
154 |  "nbformat": 4,
155 |  "nbformat_minor": 5
156 | }
157 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea/
  2 | PySUS.egg-info/
  3 | build/
  4 | dist/
  5 | *__pycache__/*
  6 | *.o
  7 | *.so
  8 | *.pyc
  9 | *.pyo
 10 | .eggs
 11 | *.csv
 12 | .vscode/
 13 | bin/
 14 | lib/
 15 | pyvenv.cfg
 16 | *.cache/
 17 | *.env
 18 | *.jupyter/
 19 | *.local/
 20 | *.mozilla/
 21 | *.ipynb_checkpoints*
 22 | *.dbc
 23 | *.DBC
 24 | # *.dbf
 25 | # *.DBF
 26 | *.pickle
 27 | *.parquet
 28 | .virtual_documents
 29 | 
 30 | # Byte-compiled / optimized / DLL files
 31 | __pycache__/
 32 | *.py[cod]
 33 | *$py.class
 34 | 
 35 | # C extensions
 36 | *.so
 37 | 
 38 | # Distribution / packaging
 39 | .Python
 40 | build/
 41 | develop-eggs/
 42 | dist/
 43 | downloads/
 44 | eggs/
 45 | .eggs/
 46 | lib/
 47 | lib64/
 48 | parts/
 49 | sdist/
 50 | var/
 51 | wheels/
 52 | share/python-wheels/
 53 | *.egg-info/
 54 | .installed.cfg
 55 | *.egg
 56 | MANIFEST
 57 | 
 58 | # PyInstaller
 59 | #  Usually these files are written by a python script from a template
 60 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 61 | *.manifest
 62 | *.spec
 63 | 
 64 | # Installer logs
 65 | pip-log.txt
 66 | pip-delete-this-directory.txt
 67 | 
 68 | # Unit test / coverage reports
 69 | htmlcov/
 70 | .tox/
 71 | .nox/
 72 | .coverage
 73 | .coverage.*
 74 | .cache
 75 | nosetests.xml
 76 | coverage.xml
 77 | *.cover
 78 | *.py,cover
 79 | .hypothesis/
 80 | .pytest_cache/
 81 | cover/
 82 | 
 83 | # Translations
 84 | #
 85 | *.mo
 86 | *.pot
 87 | 
 88 | # Django stuff:
 89 | *.log
 90 | local_settings.py
 91 | db.sqlite3
 92 | db.sqlite3-journal
 93 | 
 94 | # Flask stuff:
 95 | instance/
 96 | .webassets-cache
 97 | 
 98 | # Scrapy stuff:
 99 | .scrapy
100 | 
101 | # Sphinx documentation
102 | docs/_build/
103 | 
104 | # PyBuilder
105 | .pybuilder/
106 | target/
107 | 
108 | # Jupyter Notebook
109 | .ipynb_checkpoints
110 | notebooks_tmp/*
111 | 
112 | # IPython
113 | profile_default/
114 | ipython_config.py
115 | 
116 | # pyenv
117 | #   For a library or package, you might want to ignore these files since the code is
118 | #   intended to run in multiple environments; otherwise, check them in:
119 | # .python-version
120 | 
121 | # pipenv
122 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
123 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
124 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
125 | #   install all needed dependencies.
126 | #Pipfile.lock
127 | 
128 | # poetry
129 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
130 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
131 | #   commonly ignored for libraries.
132 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
133 | #poetry.lock
134 | 
135 | # pdm
136 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
137 | #pdm.lock
138 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
139 | #   in version control.
140 | #   https://pdm.fming.dev/#use-with-ide
141 | .pdm.toml
142 | 
143 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
144 | __pypackages__/
145 | 
146 | # Celery stuff
147 | celerybeat-schedule
148 | celerybeat.pid
149 | 
150 | # SageMath parsed files
151 | *.sage.py
152 | 
153 | # Environments
154 | .env
155 | .venv
156 | env/
157 | venv/
158 | ENV/
159 | env.bak/
160 | venv.bak/
161 | 
162 | # Spyder project settings
163 | .spyderproject
164 | .spyproject
165 | 
166 | # Rope project settings
167 | .ropeproject
168 | 
169 | # mkdocs documentation
170 | /site
171 | 
172 | # mypy
173 | .mypy_cache/
174 | .dmypy.json
175 | dmypy.json
176 | 
177 | # Pyre type checker
178 | .pyre/
179 | 
180 | # pytype static type analyzer
181 | .pytype/
182 | 
183 | # Cython debug symbols
184 | cython_debug/
185 | 
186 | # PyCharm
187 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
188 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
189 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
190 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
191 | .idea/
192 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/sih.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["SIH"]
  2 | 
  3 | from typing import List, Optional, Union
  4 | 
  5 | from pysus.ftp import Database, Directory, File
  6 | from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
  7 | 
  8 | 
  9 | class SIH(Database):
 10 |     name = "SIH"
 11 |     paths = (
 12 |         Directory("/dissemin/publicos/SIHSUS/199201_200712/Dados"),
 13 |         Directory("/dissemin/publicos/SIHSUS/200801_/Dados"),
 14 |     )
 15 |     metadata = {
 16 |         "long_name": "Sistema de Informações Hospitalares",
 17 |         "source": (
 18 |             "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",  # noqa
 19 |             "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",  # noqa
 20 |         ),
 21 |         "description": (
 22 |             "A finalidade do AIH (Sistema SIHSUS) é a de transcrever todos os "
 23 |             "atendimentos que provenientes de internações hospitalares que "
 24 |             "foram financiadas pelo SUS, e após o processamento, gerarem "
 25 |             "relatórios para os gestores que lhes possibilitem fazer os "
 26 |             "pagamentos dos estabelecimentos de saúde. Além disso, o nível "
 27 |             "Federal recebe mensalmente uma base de dados de todas as "
 28 |             "internações autorizadas (aprovadas ou não para pagamento) para "
 29 |             "que possam ser repassados às Secretarias de Saúde os valores de "
 30 |             "Produção de Média e Alta complexidade além dos valores de CNRAC, "
 31 |             "FAEC e de Hospitais Universitários – em suas variadas formas de "
 32 |             "contrato de gestão."
 33 |         ),
 34 |     }
 35 |     groups = {
 36 |         "RD": "AIH Reduzida",
 37 |         "RJ": "AIH Rejeitada",
 38 |         "ER": "AIH Rejeitada com erro",
 39 |         "SP": "Serviços Profissionais",
 40 |         "CH": "Cadastro Hospitalar",
 41 |         "CM": "",  # TODO
 42 |     }
 43 | 
 44 |     def describe(self, file: File) -> dict:
 45 |         if file.extension.upper() in [".DBC", ".DBF"]:
 46 |             group, _uf, year, month = self.format(file)
 47 | 
 48 |             try:
 49 |                 uf = UFs[_uf]
 50 |             except KeyError:
 51 |                 uf = _uf
 52 | 
 53 |             description = {
 54 |                 "name": file.basename,
 55 |                 "group": self.groups[group],
 56 |                 "uf": uf,
 57 |                 "month": MONTHS[int(month)],
 58 |                 "year": zfill_year(year),
 59 |                 "size": file.info["size"],
 60 |                 "last_update": file.info["modify"],
 61 |             }
 62 | 
 63 |             return description
 64 |         return {}
 65 | 
 66 |     def format(self, file: File) -> tuple:
 67 |         group, _uf = file.name[:2].upper(), file.name[2:4].upper()
 68 |         year, month = file.name[-4:-2], file.name[-2:]
 69 |         return group, _uf, zfill_year(year), month
 70 | 
 71 |     def get_files(
 72 |         self,
 73 |         group: Union[List[str], str],
 74 |         uf: Optional[Union[List[str], str]] = None,
 75 |         year: Optional[Union[list, str, int]] = None,
 76 |         month: Optional[Union[list, str, int]] = None,
 77 |     ) -> List[File]:
 78 |         files = list(
 79 |             filter(
 80 |                 lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
 81 |             )
 82 |         )
 83 | 
 84 |         groups = [gr.upper() for gr in to_list(group)]
 85 | 
 86 |         if not all(gr in list(self.groups) for gr in groups):
 87 |             raise ValueError(
 88 |                 f"Unknown SIH Group(s): "
 89 |                 f"{set(groups).difference(list(self.groups))}"
 90 |             )
 91 | 
 92 |         files = list(filter(lambda f: self.format(f)[0] in groups, files))
 93 | 
 94 |         if uf:
 95 |             ufs = parse_UFs(uf)
 96 |             files = list(filter(lambda f: self.format(f)[1] in ufs, files))
 97 | 
 98 |         if year or str(year) in ["0", "00"]:
 99 |             years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
100 |             files = list(filter(lambda f: self.format(f)[2] in years, files))
101 | 
102 |         if month:
103 |             months = [str(y)[-2:].zfill(2) for y in to_list(month)]
104 |             files = list(filter(lambda f: self.format(f)[3] in months, files))
105 | 
106 |         return files
107 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/ciha.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["CIHA"]
  2 | 
  3 | from typing import List, Optional, Union
  4 | 
  5 | from pysus.ftp import Database, Directory, File
  6 | from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
  7 | 
  8 | 
  9 | class CIHA(Database):
 10 |     name = "CIHA"
 11 |     paths = (Directory("/dissemin/publicos/CIHA/201101_/Dados"),)
 12 |     metadata = {
 13 |         "long_name": "Comunicação de Internação Hospitalar e Ambulatorial",
 14 |         "source": "http://ciha.datasus.gov.br/CIHA/index.php",
 15 |         "description": (
 16 |             "A CIHA foi criada para ampliar o processo de planejamento, "
 17 |             "programação, controle, avaliação e regulação da assistência à "
 18 |             "saúde permitindo um conhecimento mais abrangente e profundo dos "
 19 |             "perfis nosológico e epidemiológico da população brasileira, da "
 20 |             "capacidade instalada e do potencial de produção de serviços do "
 21 |             "conjunto de estabelecimentos de saúde do País. O sistema permite "
 22 |             "o acompanhamento das ações e serviços de saúde custeados "
 23 |             "por: planos privados de assistência à saúde; planos públicos; "
 24 |             "pagamento particular por pessoa física; pagamento particular por "
 25 |             "pessoa jurídica; programas e projetos federais (PRONON, PRONAS, "
 26 |             "PROADI); recursos próprios das secretarias municipais e estaduais"
 27 |             " de saúde; DPVAT; gratuidade e, a partir da publicação da "
 28 |             "Portaria GM/MS nº 2.905/2022, consórcios públicos. As "
 29 |             "informações registradas na CIHA servem como base para o processo "
 30 |             "de Certificação de Entidades Beneficentes de Assistência Social "
 31 |             "em Saúde (CEBAS) e para monitoramento dos programas PRONAS e "
 32 |             "PRONON"
 33 |         ),
 34 |     }
 35 |     groups = {
 36 |         "CIHA": "Comunicação de Internação Hospitalar e Ambulatorial",
 37 |     }
 38 | 
 39 |     def describe(self, file: File):
 40 |         if not isinstance(file, File):
 41 |             return file
 42 | 
 43 |         if file.extension.upper() in [".DBC", ".DBF"]:
 44 |             group, _uf, year, month = self.format(file)
 45 | 
 46 |             try:
 47 |                 uf = UFs[_uf]
 48 |             except KeyError:
 49 |                 uf = _uf
 50 | 
 51 |             description = {
 52 |                 "name": str(file.basename),
 53 |                 "group": self.groups[group],
 54 |                 "uf": uf,
 55 |                 "month": MONTHS[int(month)],
 56 |                 "year": zfill_year(year),
 57 |                 "size": file.info["size"],
 58 |                 "last_update": file.info["modify"],
 59 |             }
 60 | 
 61 |             return description
 62 |         return file
 63 | 
 64 |     def format(self, file: File) -> tuple:
 65 |         group, _uf = file.name[:4].upper(), file.name[4:6].upper()
 66 |         year, month = file.name[-4:-2], file.name[-2:]
 67 |         return group, _uf, zfill_year(year), month
 68 | 
 69 |     def get_files(
 70 |         self,
 71 |         uf: Optional[Union[List[str], str]] = None,
 72 |         year: Optional[Union[list, str, int]] = None,
 73 |         month: Optional[Union[list, str, int]] = None,
 74 |         group: Union[List[str], str] = "CIHA",
 75 |     ) -> List[File]:
 76 |         files = list(
 77 |             filter(
 78 |                 lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
 79 |             )
 80 |         )
 81 | 
 82 |         groups = [gr.upper() for gr in to_list(group)]
 83 | 
 84 |         if not all(gr in list(self.groups) for gr in groups):
 85 |             raise ValueError(
 86 |                 "Unknown CIHA Group(s): "
 87 |                 f"{set(groups).difference(list(self.groups))}"
 88 |             )
 89 | 
 90 |         files = list(filter(lambda f: self.format(f)[0] in groups, files))
 91 | 
 92 |         if uf:
 93 |             ufs = parse_UFs(uf)
 94 |             files = list(filter(lambda f: self.format(f)[1] in ufs, files))
 95 | 
 96 |         if year or str(year) in ["0", "00"]:
 97 |             years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
 98 |             files = list(filter(lambda f: self.format(f)[2] in years, files))
 99 | 
100 |         if month:
101 |             months = [str(y)[-2:].zfill(2) for y in to_list(month)]
102 |             files = list(filter(lambda f: self.format(f)[3] in months, files))
103 | 
104 |         return files
105 | 


--------------------------------------------------------------------------------
/pysus/preprocessing/sinan.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from functools import lru_cache
  3 | 
  4 | import geocoder
  5 | import numpy as np
  6 | import pandas as pd
  7 | import requests
  8 | from dbfread import DBF
  9 | 
 10 | 
 11 | def read_sinan_dbf(fname, encoding) -> pd.DataFrame:
 12 |     """
 13 |     Read SINAN dbf file returning a Pandas Dataframe with
 14 |     :param fname: dbf file name
 15 |     :param encoding: Encoding of the dbf
 16 |     :return: pandas dataframe
 17 |     """
 18 |     db = DBF(fname, encoding=encoding)
 19 |     df = pd.DataFrame(list(db))
 20 | 
 21 |     def convert_week(x):
 22 |         try:
 23 |             w = int(x) % 100
 24 |         except ValueError:
 25 |             w = np.nan
 26 |         return w
 27 | 
 28 |     for cname in df.columns:
 29 |         df[cname].replace("", np.nan, inplace=True)
 30 |         if cname.startswith(("NU", "ID")):
 31 |             try:
 32 |                 df[cname] = pd.to_numeric(df[cname])
 33 |             except ValueError as e:
 34 |                 print(f"Column {cname} could not be converted to numeric: {e}")
 35 |                 # certain IDs can be alphanumerical
 36 |                 pass
 37 |         elif cname.startswith("SEM"):
 38 |             df[cname] = df[cname].map(convert_week)
 39 | 
 40 |     return df
 41 | 
 42 | 
 43 | @lru_cache(maxsize=None)
 44 | def get_geocodes(geoc):
 45 |     """
 46 |     Return city name and state two letter code from geocode
 47 |     :param geoc:
 48 |     :return:
 49 |     """
 50 |     url = (
 51 |         "http://cidades.ibge.gov.br/services/jSonpMuns.php?"
 52 |         "busca=330&featureClass=P&style=full&maxRows=5&name_startsWith={}"
 53 |     ).format(geoc)
 54 |     resp = requests.get(url)
 55 |     for d in resp.json()["municipios"]:
 56 |         if int(geoc) == int(d["c"]):
 57 |             return [d["n"].encode("latin-1").decode("utf-8"), d["s"]]
 58 | 
 59 |     else:
 60 |         raise KeyError("could not find geocode {} in ".format(geoc))
 61 | 
 62 | 
 63 | def _address_generator(df, default=""):
 64 |     for row in df.iterrows():
 65 |         line = dict(row[1])
 66 |         try:
 67 |             line["cidade"] = ",".join(get_geocodes(line["ID_MN_RESI"]))
 68 |         except KeyError:
 69 |             print("Could not find geocode {} using default")
 70 |             line["cidade"] = default
 71 |         yield line[
 72 |             "NU_NOTIFIC"
 73 |         ], "{NM_LOGRADO}, {NU_NUMERO}, {NM_BAIRRO}, {cidade}, Brasil".format(
 74 |             **line
 75 |         )
 76 | 
 77 | 
 78 | def geocode(sinan_df, outfile, default_city):
 79 |     """
 80 |     Geocode cases based on addresses included.
 81 |     :param default_city: default city to use in case of bad Geocode found in
 82 |         file. It can be "city, state"
 83 |     :param sinan_df: Dataframe generated from sinan DBF
 84 |     :param outfile: File on Which
 85 |     """
 86 |     addrs = _address_generator(sinan_df, default_city)
 87 |     if os.path.exists(outfile):
 88 |         mode = "a"
 89 |         coords = pd.read_csv(outfile)
 90 |         geocoded = coords.NU_NOTIFIC.tolist()
 91 |     else:
 92 |         mode = "w"
 93 |         geocoded = []
 94 |     with open(outfile, mode) as of:
 95 |         if mode == "w":
 96 |             of.write("NU_NOTIFIC,latitude,longitude\n")
 97 |         for nu, ad in addrs:
 98 |             # ad = ad.encode('latin-1').decode('utf-8')
 99 |             if nu in geocoded:
100 |                 continue
101 |             location = geocoder.google(ad)
102 |             if location is None:
103 |                 raise NameError("Google could not find {}".format(ad))
104 |             if location.latlng == []:
105 |                 print(
106 |                     (
107 |                         "Search for {} returned {} as coordinates, trying "
108 |                         "reduced address:"
109 |                     ).format(ad, location.latlng)
110 |                 )
111 |                 ad = ",".join(ad.split(",")[2:])
112 |                 print(ad)
113 |                 location = geocoder.google(ad)
114 |             try:
115 |                 of.write(
116 |                     "{},{},{}\n".format(
117 |                         nu, location.latlng[0], location.latlng[1]
118 |                     )
119 |                 )
120 |                 print("Successfully geolocated {}".format(ad))
121 |             except IndexError:
122 |                 print(
123 |                     (
124 |                         "Search for {} returned {} as coordinates, " "skipping"
125 |                     ).format(ad, location.latlng)
126 |                 )
127 |                 of.write("{},nan,nan\n".format(nu))
128 | 


--------------------------------------------------------------------------------
/docs/source/rio.html:
--------------------------------------------------------------------------------
  1 | 
  2 |         <!DOCTYPE html>
  3 |         <head>
  4 | 
  5 | 
  6 |             <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
  7 | 
  8 | 
  9 | 
 10 |             <script src="https://cdnjs.cloudflare.com/ajax/libs/leaflet/0.7.3/leaflet.js"></script>
 11 | 
 12 | 
 13 | 
 14 | 
 15 | 
 16 |             <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
 17 | 
 18 | 
 19 | 
 20 | 
 21 | 
 22 |             <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/js/bootstrap.min.js"></script>
 23 | 
 24 | 
 25 | 
 26 | 
 27 | 
 28 |             <script src="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.min.js"></script>
 29 | 
 30 | 
 31 | 
 32 | 
 33 | 
 34 |             <script src="https://cdnjs.cloudflare.com/ajax/libs/leaflet.markercluster/0.4.0/leaflet.markercluster-src.js"></script>
 35 | 
 36 | 
 37 | 
 38 | 
 39 | 
 40 |             <script src="https://cdnjs.cloudflare.com/ajax/libs/leaflet.markercluster/0.4.0/leaflet.markercluster.js"></script>
 41 | 
 42 | 
 43 | 
 44 | 
 45 | 
 46 |             <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/leaflet/0.7.3/leaflet.css" />
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 |             <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css" />
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 |             <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap-theme.min.css" />
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 |             <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.1.0/css/font-awesome.min.css" />
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 |             <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.css" />
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 |             <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/leaflet.markercluster/0.4.0/MarkerCluster.Default.css" />
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 |             <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/leaflet.markercluster/0.4.0/MarkerCluster.css" />
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 |             <link rel="stylesheet" href="https://raw.githubusercontent.com/python-visualization/folium/master/folium/templates/leaflet.awesome.rotate.css" />
 89 | 
 90 | 
 91 | 
 92 | 
 93 |             <style>
 94 | 
 95 |             html, body {
 96 |                 width: 100%;
 97 |                 height: 100%;
 98 |                 margin: 0;
 99 |                 padding: 0;
100 |                 }
101 | 
102 |             #map {
103 |                 position:absolute;
104 |                 top:0;
105 |                 bottom:0;
106 |                 right:0;
107 |                 left:0;
108 |                 }
109 |             </style>
110 | 
111 | 
112 | 
113 |             <style> #map_309ce1ef971d4af68cd7a9bf39526cf5 {
114 |                 position : relative;
115 |                 width : 100.0%;
116 |                 height: 100.0%;
117 |                 left: 0.0%;
118 |                 top: 0.0%;
119 |                 }
120 |             </style>
121 | 
122 | 
123 | 
124 |         </head>
125 |         <body>
126 | 
127 | 
128 | 
129 |             <div class="folium-map" id="map_309ce1ef971d4af68cd7a9bf39526cf5" ></div>
130 | 
131 | 
132 | 
133 |         </body>
134 |         <script>
135 | 
136 | 
137 | 
138 | 
139 |             var southWest = L.latLng(-90, -180);
140 |             var northEast = L.latLng(90, 180);
141 |             var bounds = L.latLngBounds(southWest, northEast);
142 | 
143 |             var map_309ce1ef971d4af68cd7a9bf39526cf5 = L.map('map_309ce1ef971d4af68cd7a9bf39526cf5', {
144 |                                            center:[-22.914921,-43.194043],
145 |                                            zoom: 10,
146 |                                            maxBounds: bounds,
147 |                                            layers: [],
148 |                                            crs: L.CRS.EPSG3857
149 |                                          });
150 | 
151 | 
152 | 
153 | 
154 |             var tile_layer_426a222503fd49d2b786a7b9b8b7fdf3 = L.tileLayer(
155 |                 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png',
156 |                 {
157 |                     maxZoom: 18,
158 |                     minZoom: 1,
159 |                     attribution: 'Data by <a href="http://openstreetmap.org">OpenStreetMap</a>, under <a href="http://www.openstreetmap.org/copyright">ODbL</a>.',
160 |                     detectRetina: false
161 |                     }
162 |                 ).addTo(map_309ce1ef971d4af68cd7a9bf39526cf5);
163 | 
164 | 
165 | 
166 | 
167 |         </script>
168 | 


--------------------------------------------------------------------------------
/docs/source/databases/data-sources.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Data Sources
 3 | ============
 4 | 
 5 | PySUS allows you to download data on demand from DATASUS FTP databases.
 6 | Currently, the following databases can be downloaded:
 7 | 
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |     CNES - Cadastro Nacional de Estabelecimentos de Saúde <CNES.ipynb>
13 |     SINAN - Doenças e Agravos de Notificação <SINAN.ipynb>
14 |     SINASC - Sistema de Informações sobre Nascidos Vivos <SINASC.ipynb>
15 |     SIM - Sistema de Informação sobre Mortalidade <SIM.ipynb>
16 |     SIA - Sistema de Informações Ambulatoriais <SIA.ipynb>
17 |     SIH - Sistema de Informações Hospitalares <SIH.ipynb>
18 |     territory - Tables and maps about the brazilian territory <territory.ipynb>
19 | 
20 | 
21 | About SINAN
22 | ----------------------------------------
23 | #. :doc:`SINAN`
24 | 
25 | The Information System for Notifiable Diseases (Sinan) is primarily fed by the notification and investigation of cases of diseases and conditions listed in the national list of notifiable diseases. However, states and municipalities are allowed to include other significant health issues in their region, such as filariasis in the municipality of São Paulo. Its effective use allows for dynamic diagnosis of the occurrence of an event in the population, potentially providing insights into the causal explanations of notifiable diseases, as well as indicating risks to which individuals are exposed. This contributes to the identification of the epidemiological reality of a specific geographic area. Its systematic, decentralized use contributes to the democratization of information, enabling all healthcare professionals to access and make it available to the community. Therefore, it is a relevant tool to assist in health planning, defining intervention priorities, and evaluating the impact of interventions.
26 | 
27 | 
28 | About SINASC
29 | ----------------------------------------------------
30 | #. :doc:`SINASC`
31 | 
32 | The Information System on Live Births (Sistema de Informações sobre Nascidos Vivos or SINASC) was officially implemented starting from 1990 with the aim of collecting data on reported births across the entire national territory and providing birth-related data for all levels of the Healthcare System.
33 | 
34 | The Ministry of Health's Department of Health Surveillance (Secretaria de Vigilância em Saúde or SVS/MS) manages SINASC at the national level. Specifically, the responsibility for changes in layout, as well as arrangements for printing and distributing the Declaration of Live Birth (DN) forms and the System manuals, lies with the General Coordination of Information and Epidemiological Analysis (Coordenação-Geral de Informações e Análises Epidemiológicas or CGIAE) and the Department of Epidemiological Analysis and Surveillance of Non-Communicable Diseases (Departamento de Análise Epidemiológica e Vigilância de Doenças Não Transmissíveis or DAENT). The implementation of SINASC occurred gradually in all federal units and, since 1994, has been showing a higher number of registrations in many municipalities compared to what is published by the Brazilian Institute of Geography and Statistics (Instituto Brasileiro de Geografia e Estatística or IBGE) based on Civil Registry data. The system also enables the construction of useful indicators for healthcare service management planning.
35 | 
36 | 
37 | About SIM
38 | ---------------------------------------------
39 | #. :doc:`SIM`
40 | 
41 | The Mortality Information System (Sistema de Informações sobre Mortalidade or SIM) was established by DATASUS to regularly collect data on mortality in the country. With the creation of SIM, it became possible to comprehensively capture mortality data to support various levels of public health management. Based on this information, it is possible to conduct analyses of the situation, plan, and evaluate actions and programs in the field of public health.
42 | 
43 | 
44 | About SIH
45 | -----------------------------------------
46 | #. :doc:`SIH`
47 | 
48 | The purpose of the AIH (SIHSUS System) is to document all hospitalization-related services that are FINANCED BY SUS and, after processing, generate reports for managers to facilitate payments to healthcare facilities. Additionally, the federal level receives a monthly database of all authorized hospitalizations (whether approved for payment or not) to enable the transfer of Production values for Medium and High complexity, as well as values for CNRAC, FAEC, and University Hospitals, in their various forms of management contracts, to the Health Departments.
49 | 
50 | 
51 | About SIA
52 | ------------------------------------------
53 | #. :doc:`SIA`
54 | 
55 | The SIA (Sistema de Informação Ambulatorial) is the system that enables local managers to process information related to outpatient care (non-hospital) recorded in the data collection applications for such services provided by public and private providers, whether contracted or affiliated with SUS.
56 | 


--------------------------------------------------------------------------------
/pysus/data/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import struct
  3 | from datetime import datetime
  4 | from pathlib import Path
  5 | 
  6 | import pandas as pd
  7 | import pyarrow as pa
  8 | import pyarrow.parquet as pq
  9 | from dbfread import DBF
 10 | from pyreaddbc import dbc2dbf
 11 | 
 12 | 
 13 | def dbc_to_dbf(dbc: str, _pbar=None) -> str:
 14 |     """
 15 |     Parses DBC files into DBFs
 16 |     """
 17 |     path = Path(dbc)
 18 | 
 19 |     if path.suffix.lower() != ".dbc":
 20 |         raise ValueError(f"Not a DBC file: {path}")
 21 | 
 22 |     dbf = path.with_suffix(".dbf")
 23 | 
 24 |     if _pbar:
 25 |         _pbar.reset(total=1)
 26 |         _pbar.set_description(f"{dbf.name}")
 27 | 
 28 |     _parquet = path.with_suffix(".parquet")
 29 |     if _parquet.exists():
 30 |         path.unlink(missing_ok=True)
 31 |         dbf.unlink(missing_ok=True)
 32 |         return str(_parquet)
 33 | 
 34 |     if dbf.exists():
 35 |         path.unlink(missing_ok=True)
 36 |         return str(dbf)
 37 | 
 38 |     dbc2dbf(str(path), str(dbf))
 39 |     path.unlink()
 40 | 
 41 |     if _pbar:
 42 |         _pbar.update(1)
 43 | 
 44 |     return str(dbf)
 45 | 
 46 | 
 47 | def stream_dbf(dbf, chunk_size=30000):
 48 |     """Fetches records in parquet chunks to preserve memory"""
 49 |     data = []
 50 |     i = 0
 51 |     for records in dbf:
 52 |         data.append(records)
 53 |         i += 1
 54 |         if i == chunk_size:
 55 |             yield data
 56 |             data = []
 57 |             i = 0
 58 |     else:
 59 |         yield data
 60 | 
 61 | 
 62 | def decode_column(value):
 63 |     """
 64 |     Decodes binary data to str
 65 |     """
 66 |     if isinstance(value, bytes):
 67 |         return value.decode(encoding="iso-8859-1").replace("\x00", "")
 68 | 
 69 |     if isinstance(value, str):
 70 |         return str(value).replace("\x00", "")
 71 | 
 72 |     return value
 73 | 
 74 | 
 75 | def dbf_to_parquet(dbf: str, _pbar=None) -> str:
 76 |     """
 77 |     Parses DBF file into parquet to preserve memory
 78 |     """
 79 |     path = Path(dbf)
 80 | 
 81 |     if path.suffix.lower() != ".dbf":
 82 |         raise ValueError(f"Not a DBF file: {path}")
 83 | 
 84 |     parquet = path.with_suffix(".parquet")
 85 | 
 86 |     approx_final_size = (
 87 |         os.path.getsize(path) / 200
 88 |     )  # TODO: not best approx size
 89 |     if _pbar:
 90 |         _pbar.unit = "B"
 91 |         _pbar.unit_scale = True
 92 |         _pbar.reset(total=approx_final_size)
 93 |         _pbar.set_description(f"{parquet.name}")
 94 | 
 95 |     if parquet.exists():
 96 |         if _pbar:
 97 |             _pbar.update(approx_final_size - _pbar.n)
 98 |         return str(parquet)
 99 | 
100 |     parquet.absolute().mkdir()
101 | 
102 |     try:
103 |         chunk_size = 30_000
104 |         for chunk in stream_dbf(
105 |             DBF(path, encoding="iso-8859-1", raw=True), chunk_size
106 |         ):
107 |             if _pbar:
108 |                 _pbar.update(chunk_size)
109 | 
110 |             chunk_df = pd.DataFrame(chunk)
111 |             table = pa.Table.from_pandas(chunk_df.map(decode_column))
112 |             pq.write_to_dataset(table, root_path=str(parquet))
113 |     except struct.error as err:
114 |         if _pbar:
115 |             _pbar.close()
116 |         Path(path).unlink()
117 |         parquet.rmdir()
118 |         raise err
119 | 
120 |     if _pbar:
121 |         _pbar.update(approx_final_size - _pbar.n)
122 | 
123 |     path.unlink()
124 | 
125 |     return str(parquet)
126 | 
127 | 
128 | def parse_dftypes(df: pd.DataFrame) -> pd.DataFrame:
129 |     """
130 |     Parse DataFrame values, cleaning blank spaces if needed
131 |     and converting dtypes into correct types.
132 |     """
133 | 
134 |     def map_column_func(column_names: list[str], func):
135 |         # Maps a function to each value in each column
136 |         columns = [c for c in df.columns if c in column_names]
137 |         df[columns] = df[columns].map(func)
138 | 
139 |     def str_to_int(string: str):
140 |         # If removing spaces, all characters are int,
141 |         # return int(value). @warning it removes in between
142 |         # spaces as well
143 |         if str(string).replace(" ", "").isnumeric():
144 |             return int(string.replace(" ", ""))
145 |         return string
146 | 
147 |     def str_to_date(string: str):
148 |         if isinstance(string, str):
149 |             try:
150 |                 return datetime.strptime(string, "%Y%m%d").date()
151 |             except ValueError:
152 |                 # Ignore errors, bad value
153 |                 return string
154 |         return string
155 | 
156 |     map_column_func(["DT_NOTIFIC", "DT_SIN_PRI"], str_to_date)
157 |     map_column_func(["CODMUNRES", "SEXO"], str_to_int)
158 | 
159 |     df = df.map(
160 |         lambda x: "" if str(x).isspace() else x
161 |     )  # Remove all space values
162 | 
163 |     df = df.convert_dtypes()
164 |     return df
165 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/sia.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["SIA"]
  2 | 
  3 | from typing import List, Optional, Union
  4 | 
  5 | from pysus.ftp import Database, Directory, File
  6 | from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
  7 | 
  8 | 
  9 | class SIA(Database):
 10 |     name = "SIA"
 11 |     paths = (
 12 |         Directory("/dissemin/publicos/SIASUS/199407_200712/Dados"),
 13 |         Directory("/dissemin/publicos/SIASUS/200801_/Dados"),
 14 |     )
 15 |     metadata = {
 16 |         "long_name": "Sistema de Informações Ambulatoriais",
 17 |         "source": "http://sia.datasus.gov.br/principal/index.php",
 18 |         "description": (
 19 |             "O Sistema de Informação Ambulatorial (SIA) foi instituído pela "
 20 |             "Portaria GM/MS n.º 896 de 29 de junho de 1990. Originalmente, o "
 21 |             "SIA foi concebido a partir do projeto SICAPS (Sistema de "
 22 |             "Informação e Controle Ambulatorial da Previdência Social), em "
 23 |             "que os conceitos, os objetivos e as diretrizes criados para o "
 24 |             "desenvolvimento do SICAPS foram extremamente importantes e "
 25 |             "amplamente utilizados para o desenvolvimento do SIA, tais"
 26 |             " como: (i) o acompanhamento das programações físicas e "
 27 |             "orçamentárias; (ii) o acompanhamento das ações de saúde "
 28 |             "produzidas; (iii) a agilização do pagamento e controle "
 29 |             "orçamentário e financeiro; e (iv) a formação de banco de dados "
 30 |             "para contribuir com a construção do SUS."
 31 |         ),
 32 |     }
 33 |     groups = {
 34 |         "AB": "APAC de Cirurgia Bariátrica",
 35 |         "ABO": "APAC de Acompanhamento Pós Cirurgia Bariátrica",
 36 |         "ACF": "APAC de Confecção de Fístula",
 37 |         "AD": "APAC de Laudos Diversos",
 38 |         "AM": "APAC de Medicamentos",
 39 |         "AMP": "APAC de Acompanhamento Multiprofissional",
 40 |         "AN": "APAC de Nefrologia",
 41 |         "AQ": "APAC de Quimioterapia",
 42 |         "AR": "APAC de Radioterapia",
 43 |         "ATD": "APAC de Tratamento Dialítico",
 44 |         "BI": "Boletim de Produção Ambulatorial individualizado",
 45 |         "IMPBO": "",  # TODO
 46 |         "PA": "Produção Ambulatorial",
 47 |         "PAM": "",  # TODO
 48 |         "PAR": "",  # TODO
 49 |         "PAS": "",  # TODO
 50 |         "PS": "RAAS Psicossocial",
 51 |         "SAD": "RAAS de Atenção Domiciliar",
 52 |     }
 53 | 
 54 |     def describe(self, file: File) -> dict:
 55 |         if file.extension.upper() == ".DBC":
 56 |             group, _uf, year, month = self.format(file)
 57 | 
 58 |             try:
 59 |                 uf = UFs[_uf]
 60 |             except KeyError:
 61 |                 uf = _uf
 62 | 
 63 |             description = {
 64 |                 "name": str(file.basename),
 65 |                 "group": self.groups[group],
 66 |                 "uf": uf,
 67 |                 "month": MONTHS[int(month)],
 68 |                 "year": zfill_year(year),
 69 |                 "size": file.info["size"],
 70 |                 "last_update": file.info["modify"],
 71 |             }
 72 | 
 73 |             return description
 74 |         return {}
 75 | 
 76 |     def format(self, file: File) -> tuple:
 77 |         if file.extension.upper() in [".DBC", ".DBF"]:
 78 |             digits = "".join([d for d in file.name if d.isdigit()])
 79 |             if "_" in file.name:
 80 |                 name, _ = file.name.split("_")
 81 |                 digits = "".join([d for d in name if d.isdigit()])
 82 |             chars, _ = file.name.split(digits)
 83 |             year, month = digits[:2], digits[2:]
 84 |             group, uf = chars[:-2].upper(), chars[-2:].upper()
 85 |             return group, uf, zfill_year(year), month
 86 |         return ()
 87 | 
 88 |     def get_files(
 89 |         self,
 90 |         group: Union[List[str], str],
 91 |         uf: Optional[Union[List[str], str]] = None,
 92 |         year: Optional[Union[list, str, int]] = None,
 93 |         month: Optional[Union[list, str, int]] = None,
 94 |     ) -> List[File]:
 95 |         files = list(
 96 |             filter(
 97 |                 lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
 98 |             )
 99 |         )
100 | 
101 |         groups = [gr.upper() for gr in to_list(group)]
102 | 
103 |         if not all(gr in list(self.groups) for gr in groups):
104 |             raise ValueError(
105 |                 "Unknown SIA Group(s): "
106 |                 f"{set(groups).difference(list(self.groups))}"
107 |             )
108 | 
109 |         files = list(filter(lambda f: self.format(f)[0] in groups, files))
110 | 
111 |         if uf:
112 |             ufs = parse_UFs(uf)
113 |             files = list(filter(lambda f: self.format(f)[1] in ufs, files))
114 | 
115 |         if year or str(year) in ["0", "00"]:
116 |             years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
117 |             files = list(filter(lambda f: self.format(f)[2] in years, files))
118 | 
119 |         if month:
120 |             months = [str(y)[-2:].zfill(2) for y in to_list(month)]
121 |             files = list(filter(lambda f: self.format(f)[3] in months, files))
122 | 
123 |         return files
124 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/cnes.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["CNES"]
  2 | 
  3 | from typing import List, Optional, Union
  4 | 
  5 | from pysus.ftp import Database, Directory, File
  6 | from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
  7 | 
  8 | 
  9 | class CNES(Database):
 10 |     name = "CNES"
 11 |     paths = (Directory("/dissemin/publicos/CNES/200508_/Dados"),)
 12 |     metadata = {
 13 |         "long_name": "Cadastro Nacional de Estabelecimentos de Saúde",
 14 |         "source": "https://cnes.datasus.gov.br/",
 15 |         "description": (
 16 |             "O Cadastro Nacional de Estabelecimentos de Saúde (CNES) é o "
 17 |             "sistema de informação oficial de cadastramento de informações "
 18 |             "de todos os estabelecimentos de saúde no país, independentemente "
 19 |             "de sua natureza jurídica ou de integrarem o Sistema Único de "
 20 |             "Saúde (SUS). Trata-se do cadastro oficial do Ministério da "
 21 |             "Saúde (MS) no tocante à realidade da capacidade instalada e "
 22 |             "mão-de-obra assistencial de saúde no Brasil em estabelecimentos "
 23 |             "de saúde públicos ou privados, com convênio SUS ou não."
 24 |         ),
 25 |     }
 26 |     groups = {
 27 |         "DC": "Dados Complementares",
 28 |         "EE": "Estabelecimento de Ensino",
 29 |         "EF": "Estabelecimento Filantrópico",
 30 |         "EP": "Equipes",
 31 |         "EQ": "Equipamentos",
 32 |         "GM": "Gestão e Metas",
 33 |         "HB": "Habilitação",
 34 |         "IN": "Incentivos",
 35 |         "LT": "Leitos",
 36 |         "PF": "Profissional",
 37 |         "RC": "Regra Contratual",
 38 |         "SR": "Serviço Especializado",
 39 |         "ST": "Estabelecimentos",
 40 |     }
 41 |     __loaded__ = set()
 42 | 
 43 |     def load(
 44 |         self,
 45 |         groups: Union[str, List[str]] = None,
 46 |     ):
 47 |         """
 48 |         Loads CNES Groups into content. Will convert the files and directories
 49 |         found within FTP Directories into self.content
 50 |         """
 51 |         if not self.__content__:
 52 |             self.paths[0].load()
 53 |             self.__content__ |= self.paths[0].__content__
 54 | 
 55 |         if groups:
 56 |             groups = to_list(groups)
 57 | 
 58 |             if not all(
 59 |                 group in self.groups for group in [gr.upper() for gr in groups]
 60 |             ):
 61 |                 raise ValueError(
 62 |                     "Unknown CNES group(s): "
 63 |                     f"{set(groups).difference(self.groups)}"
 64 |                 )
 65 | 
 66 |             for group in groups:
 67 |                 group = group.upper()
 68 |                 if group not in self.__loaded__:
 69 |                     directory = self.__content__[group]
 70 |                     directory.load()
 71 |                     self.__content__ |= directory.__content__
 72 |                     self.__loaded__.add(directory.name)
 73 |         return self
 74 | 
 75 |     def describe(self, file: File) -> dict:
 76 |         if not isinstance(file, File):
 77 |             return {}
 78 | 
 79 |         if file.name == "GMufAAmm":
 80 |             # Leftover
 81 |             return {}
 82 | 
 83 |         if file.extension.upper() in [".DBC", ".DBF"]:
 84 |             group, _uf, year, month = self.format(file)
 85 | 
 86 |             try:
 87 |                 uf = UFs[_uf]
 88 |             except KeyError:
 89 |                 uf = _uf
 90 | 
 91 |             description = {
 92 |                 "name": str(file.basename),
 93 |                 "group": self.groups[group],
 94 |                 "uf": uf,
 95 |                 "month": MONTHS[int(month)],
 96 |                 "year": zfill_year(year),
 97 |                 "size": file.info["size"],
 98 |                 "last_update": file.info["modify"],
 99 |             }
100 | 
101 |             return description
102 |         return {}
103 | 
104 |     def format(self, file: File) -> tuple:
105 |         group, _uf = file.name[:2].upper(), file.name[2:4].upper()
106 |         year, month = file.name[-4:-2], file.name[-2:]
107 |         return group, _uf, zfill_year(year), month
108 | 
109 |     def get_files(
110 |         self,
111 |         group: Union[List[str], str],
112 |         uf: Optional[Union[List[str], str]] = None,
113 |         year: Optional[Union[list, str, int]] = None,
114 |         month: Optional[Union[list, str, int]] = None,
115 |     ) -> List[File]:
116 |         if not group:
117 |             raise ValueError("At least one CNES group is required")
118 | 
119 |         groups = [gr.upper() for gr in to_list(group)]
120 | 
121 |         self.load(groups)
122 | 
123 |         files = list(filter(lambda f: f.name[:2] in groups, self.files))
124 | 
125 |         if uf:
126 |             ufs = parse_UFs(uf)
127 |             files = list(filter(lambda f: f.name[2:4] in ufs, files))
128 | 
129 |         if year or str(year) in ["0", "00"]:
130 |             years = [str(m)[-2:].zfill(2) for m in to_list(year)]
131 |             files = list(filter(lambda f: f.name[-4:-2] in years, files))
132 | 
133 |         if month:
134 |             months = [str(y)[-2:].zfill(2) for y in to_list(month)]
135 |             files = list(filter(lambda f: f.name[-2:] in months, files))
136 | 
137 |         return files
138 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/tutorials.po:
--------------------------------------------------------------------------------
  1 | # SOME DESCRIPTIVE TITLE.
  2 | # Copyright (C) 2016, Flavio Codeco Coelho
  3 | # This file is distributed under the same license as the PySUS package.
  4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
  5 | #
  6 | #, fuzzy
  7 | msgid ""
  8 | msgstr ""
  9 | "Project-Id-Version: PySUS 0.1.13\n"
 10 | "Report-Msgid-Bugs-To: \n"
 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 14 | "Language: pt\n"
 15 | "Language-Team: pt <LL@li.org>\n"
 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 17 | "MIME-Version: 1.0\n"
 18 | "Content-Type: text/plain; charset=utf-8\n"
 19 | "Content-Transfer-Encoding: 8bit\n"
 20 | "Generated-By: Babel 2.12.1\n"
 21 | 
 22 | #: ../../source/tutorials.rst:3
 23 | msgid "Tutorials"
 24 | msgstr "Tutoriais"
 25 | 
 26 | #: ../../source/tutorials.rst:5
 27 | msgid ""
 28 | "PySUS includes some Jupyter notebooks in its distribution package to "
 29 | "serve as tutorials."
 30 | msgstr ""
 31 | "O PySUS inclui alguns notebooks Jupyter em seu pacote de distribuição "
 32 | "para servir como tutoriais."
 33 | 
 34 | #: ../../source/tutorials.rst:8
 35 | msgid "Working with SINASC databases"
 36 | msgstr "Trabalhando com a base de dados SINASC"
 37 | 
 38 | #: ../../source/tutorials.rst:9
 39 | msgid ""
 40 | "SINASC is the national registry of live births. With PySUS, You can "
 41 | "download SINASC tables directly and have them as dataframes to integrate "
 42 | "in your analysis. `See this notebook. "
 43 | "<https://github.com/AlertaDengue/PySUS/blob/master/pysus/Notebooks/Analyzing%20SINASC%20Data.ipynb>`_"
 44 | msgstr ""
 45 | "SINASC é o registro nacional de nascidos vivos. Com o PySUS, você pode fazer "
 46 | "o download direto das tabelas do SINASC e tê-las como dataframes para integrar "
 47 | "em sua análise. Veja este notebook. "
 48 | "`<https://github.com/AlertaDengue/PySUS/blob/master/pysus/Notebooks/Analyzing%20SINASC%20Data.ipynb>`_"
 49 | 
 50 | #: ../../source/tutorials.rst:13
 51 | msgid "Working with SINAN files"
 52 | msgstr "Trabalhando com arquivos SINAN"
 53 | 
 54 | #: ../../source/tutorials.rst:15
 55 | msgid ""
 56 | "SINAN is the national registry of cases for diseases of required "
 57 | "reporting. PySUS offers the possibility of downloading records of "
 58 | "individual cases selected for futher laboratory investigation, not the "
 59 | "entirety of the reported cases. To see how to download these data look at"
 60 | " the example notebook provided."
 61 | msgstr ""
 62 | "SINAN é o registro nacional de casos de doenças de notificação obrigatória. "
 63 | "O PySUS oferece a possibilidade de baixar registros de casos individuais "
 64 | "selecionados para investigação laboratorial adicional, não a totalidade "
 65 | "dos casos notificados. Para saber como baixar esses dados, consulte o "
 66 | "notebook de exemplo fornecido."
 67 | 
 68 | #: ../../source/tutorials.rst:18
 69 | msgid ""
 70 | "The sinan module in the preprocessing package can load SINAN files from "
 71 | "DBF, returning a pandas DataFrame fixing the typing of some columns."
 72 | msgstr ""
 73 | "O módulo sinan no pacote de pré-processamento pode carregar arquivos SINAN "
 74 | "do formato DBF, retornando um pandas DataFrame corrigindo a tipagem de algumas colunas."
 75 | 
 76 | #: ../../source/tutorials.rst:20
 77 | msgid ""
 78 | "It also offers geocoding capabilities which attributes geographical "
 79 | "coordinates to every notified case in a SINAN Dataframe. You can use your"
 80 | " Google API KEY to avoid Google's free limits. To do this just create an "
 81 | "environment variable called GOOGLE_API_KEY. Warning: This can take a long"
 82 | " time! and can stop halfway through, due to connections timing out. But "
 83 | "PySUS creates knows how to restart from the last geocoded address. `See "
 84 | "this notebook. "
 85 | "<https://github.com/AlertaDengue/PySUS/blob/master/docs/source/SINAN.ipynb>`_"
 86 | msgstr ""
 87 | "Ele também oferece capacidades de geocodificação que atribuem coordenadas "
 88 | "geográficas a cada caso notificado em um DataFrame do SINAN. Você pode usar "
 89 | "sua chave API do Google para evitar os limites gratuitos do Google. Para isso, "
 90 | "basta criar uma variável de ambiente chamada GOOGLE_API_KEY. Aviso: isso pode "
 91 | "levar muito tempo! e pode parar no meio do caminho, devido a conexões expirando. "
 92 | "Mas o PySUS sabe como reiniciar a partir do último endereço geocodificado. "
 93 | "Veja este notebook. "
 94 | "`<https://github.com/AlertaDengue/PySUS/blob/master/docs/source/SINAN.ipynb>`_"
 95 | 
 96 | #: ../../source/tutorials.rst:26
 97 | msgid "Working with SIH DATA"
 98 | msgstr "Trabalhando com a base de dados SIH"
 99 | 
100 | #: ../../source/tutorials.rst:27
101 | msgid ""
102 | "SIH is DATASUS' Hospital information system and it contains detailed "
103 | "information about hospitalizations. SIH Data can also be downloaded "
104 | "directly with PySUS. `See this notebook. "
105 | "<https://github.com/AlertaDengue/PySUS/blob/master/pysus/Notebooks/Analyzing%20SIH.ipynb>`_"
106 | msgstr ""
107 | "SIH é o sistema de informações hospitalares do DATASUS e contém informações "
108 | "detalhadas sobre internações hospitalares. Os dados do SIH também podem ser "
109 | "baixados diretamente com o PySUS. Veja este notebook. "
110 | "`<https://github.com/AlertaDengue/PySUS/blob/master/pysus/Notebooks/Analyzing%20SIH.ipynb>`_"
111 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/tutorials.po:
--------------------------------------------------------------------------------
  1 | # SOME DESCRIPTIVE TITLE.
  2 | # Copyright (C) 2016, Flavio Codeco Coelho
  3 | # This file is distributed under the same license as the PySUS package.
  4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
  5 | #
  6 | #, fuzzy
  7 | msgid ""
  8 | msgstr ""
  9 | "Project-Id-Version: PySUS 0.1.13\n"
 10 | "Report-Msgid-Bugs-To: \n"
 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 14 | "Language: pt\n"
 15 | "Language-Team: pt <LL@li.org>\n"
 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 17 | "MIME-Version: 1.0\n"
 18 | "Content-Type: text/plain; charset=utf-8\n"
 19 | "Content-Transfer-Encoding: 8bit\n"
 20 | "Generated-By: Babel 2.12.1\n"
 21 | 
 22 | #: ../../source/tutorials.rst:3
 23 | msgid "Tutorials"
 24 | msgstr "Tutoriais"
 25 | 
 26 | #: ../../source/tutorials.rst:5
 27 | msgid ""
 28 | "PySUS includes some Jupyter notebooks in its distribution package to "
 29 | "serve as tutorials."
 30 | msgstr ""
 31 | "O PySUS inclui alguns notebooks Jupyter em seu pacote de distribuição "
 32 | "para servir como tutoriais."
 33 | 
 34 | #: ../../source/tutorials.rst:8
 35 | msgid "Working with SINASC databases"
 36 | msgstr "Trabalhando com a base de dados SINASC"
 37 | 
 38 | #: ../../source/tutorials.rst:9
 39 | msgid ""
 40 | "SINASC is the national registry of live births. With PySUS, You can "
 41 | "download SINASC tables directly and have them as dataframes to integrate "
 42 | "in your analysis. `See this notebook. "
 43 | "<https://github.com/AlertaDengue/PySUS/blob/master/pysus/Notebooks/Analyzing%20SINASC%20Data.ipynb>`_"
 44 | msgstr ""
 45 | "SINASC é o registro nacional de nascidos vivos. Com o PySUS, você pode fazer "
 46 | "o download direto das tabelas do SINASC e tê-las como dataframes para integrar "
 47 | "em sua análise. Veja este notebook. "
 48 | "`<https://github.com/AlertaDengue/PySUS/blob/master/pysus/Notebooks/Analyzing%20SINASC%20Data.ipynb>`_"
 49 | 
 50 | #: ../../source/tutorials.rst:13
 51 | msgid "Working with SINAN files"
 52 | msgstr "Trabalhando com arquivos SINAN"
 53 | 
 54 | #: ../../source/tutorials.rst:15
 55 | msgid ""
 56 | "SINAN is the national registry of cases for diseases of required "
 57 | "reporting. PySUS offers the possibility of downloading records of "
 58 | "individual cases selected for futher laboratory investigation, not the "
 59 | "entirety of the reported cases. To see how to download these data look at"
 60 | " the example notebook provided."
 61 | msgstr ""
 62 | "SINAN é o registro nacional de casos de doenças de notificação obrigatória. "
 63 | "O PySUS oferece a possibilidade de baixar registros de casos individuais "
 64 | "selecionados para investigação laboratorial adicional, não a totalidade "
 65 | "dos casos notificados. Para saber como baixar esses dados, consulte o "
 66 | "notebook de exemplo fornecido."
 67 | 
 68 | #: ../../source/tutorials.rst:18
 69 | msgid ""
 70 | "The sinan module in the preprocessing package can load SINAN files from "
 71 | "DBF, returning a pandas DataFrame fixing the typing of some columns."
 72 | msgstr ""
 73 | "O módulo sinan no pacote de pré-processamento pode carregar arquivos SINAN "
 74 | "do formato DBF, retornando um pandas DataFrame corrigindo a tipagem de algumas colunas."
 75 | 
 76 | #: ../../source/tutorials.rst:20
 77 | msgid ""
 78 | "It also offers geocoding capabilities which attributes geographical "
 79 | "coordinates to every notified case in a SINAN Dataframe. You can use your"
 80 | " Google API KEY to avoid Google's free limits. To do this just create an "
 81 | "environment variable called GOOGLE_API_KEY. Warning: This can take a long"
 82 | " time! and can stop halfway through, due to connections timing out. But "
 83 | "PySUS creates knows how to restart from the last geocoded address. `See "
 84 | "this notebook. "
 85 | "<https://github.com/AlertaDengue/PySUS/blob/master/docs/source/SINAN.ipynb>`_"
 86 | msgstr ""
 87 | "Ele também oferece capacidades de geocodificação que atribuem coordenadas "
 88 | "geográficas a cada caso notificado em um DataFrame do SINAN. Você pode usar "
 89 | "sua chave API do Google para evitar os limites gratuitos do Google. Para isso, "
 90 | "basta criar uma variável de ambiente chamada GOOGLE_API_KEY. Aviso: isso pode "
 91 | "levar muito tempo! e pode parar no meio do caminho, devido a conexões expirando. "
 92 | "Mas o PySUS sabe como reiniciar a partir do último endereço geocodificado. "
 93 | "Veja este notebook. "
 94 | "`<https://github.com/AlertaDengue/PySUS/blob/master/docs/source/SINAN.ipynb>`_"
 95 | 
 96 | #: ../../source/tutorials.rst:26
 97 | msgid "Working with SIH DATA"
 98 | msgstr "Trabalhando com a base de dados SIH"
 99 | 
100 | #: ../../source/tutorials.rst:27
101 | msgid ""
102 | "SIH is DATASUS' Hospital information system and it contains detailed "
103 | "information about hospitalizations. SIH Data can also be downloaded "
104 | "directly with PySUS. `See this notebook. "
105 | "<https://github.com/AlertaDengue/PySUS/blob/master/pysus/Notebooks/Analyzing%20SIH.ipynb>`_"
106 | msgstr ""
107 | "SIH é o sistema de informações hospitalares do DATASUS e contém informações "
108 | "detalhadas sobre internações hospitalares. Os dados do SIH também podem ser "
109 | "baixados diretamente com o PySUS. Veja este notebook. "
110 | "`<https://github.com/AlertaDengue/PySUS/blob/master/pysus/Notebooks/Analyzing%20SIH.ipynb>`_"
111 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt/LC_MESSAGES/SINAN.po:
--------------------------------------------------------------------------------
  1 | # SOME DESCRIPTIVE TITLE.
  2 | # Copyright (C) 2016, Flavio Codeco Coelho
  3 | # This file is distributed under the same license as the PySUS package.
  4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
  5 | #
  6 | #, fuzzy
  7 | msgid ""
  8 | msgstr ""
  9 | "Project-Id-Version: PySUS 0.1.13\n"
 10 | "Report-Msgid-Bugs-To: \n"
 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 14 | "Language: pt\n"
 15 | "Language-Team: pt <LL@li.org>\n"
 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 17 | "MIME-Version: 1.0\n"
 18 | "Content-Type: text/plain; charset=utf-8\n"
 19 | "Content-Transfer-Encoding: 8bit\n"
 20 | "Generated-By: Babel 2.12.1\n"
 21 | 
 22 | #: ../../source/SINAN.ipynb:9
 23 | msgid "Downloading data from the SINAN database"
 24 | msgstr "Baixando dados do banco de dados do SINAN"
 25 | 
 26 | #: ../../source/SINAN.ipynb:31
 27 | msgid ""
 28 | "SINAN is a database of reported cases of certain diseases that Brazilian "
 29 | "law requires to be reported. Unfortunately the data available for free "
 30 | "download, corresponds only to the investigated cases not the totality of "
 31 | "the reported cases. Nevertheless it's an interesting dataset."
 32 | msgstr ""
 33 | "SINAN é um banco de dados de casos notificados de certas doenças que a "
 34 | "lei brasileira exige que sejam reportadas. Infelizmente, os dados "
 35 | "disponíveis para download gratuito correspondem apenas aos casos "
 36 | "investigados, não à totalidade dos casos reportados. No entanto, é um "
 37 | "conjunto de dados interessante."
 38 | 
 39 | #: ../../source/SINAN.ipynb:33
 40 | msgid "To find out what are these diseases, we can use PySUS:"
 41 | msgstr "Para descobrir quais são essas doenças, podemos usar o PySUS:"
 42 | 
 43 | #: ../../source/SINAN.ipynb:181
 44 | msgid ""
 45 | "These diseases are available in countrywide tables, so if we want to see "
 46 | "the cases of ``Chagas`` disease in the state of Minas Gerais, first we "
 47 | "can check which years are available:"
 48 | msgstr ""
 49 | "Essas doenças estão disponíveis em tabelas de todo o país, então, se "
 50 | "quisermos ver os casos de doença de `Chagas` no estado de Minas Gerais, "
 51 | "primeiro podemos verificar quais anos estão disponíveis:"
 52 | 
 53 | #: ../../source/SINAN.ipynb:287
 54 | msgid ""
 55 | "We can also check when it was last updated for every disease, and if the "
 56 | "table is preliminary or final."
 57 | msgstr ""
 58 | "Também podemos verificar quando foi a última atualização para cada "
 59 | "doença e se a tabela é preliminar ou final."
 60 | 
 61 | #: ../../source/SINAN.ipynb:884
 62 | msgid ""
 63 | "We can see, that we have data in final form, from 2000 until 2019, and "
 64 | "preliminary data for 2020. Now we can download it:"
 65 | msgstr ""
 66 | "Podemos ver que temos dados em forma final, de 2000 até 2019, e dados "
 67 | "preliminares para 2020. Agora podemos baixá-los:"
 68 | 
 69 | #: ../../source/SINAN.ipynb:1371
 70 | msgid "Downloading large files"
 71 | msgstr "Baixando arquivos grandes"
 72 | 
 73 | #: ../../source/SINAN.ipynb:1373
 74 | msgid ""
 75 | "Some SINAN files can be quite large and can take a bit longer to download"
 76 | " and convert. As the default behavior is to download data in chunks, some"
 77 | " folders may contain lots of parquet chunks"
 78 | msgstr ""
 79 | "Alguns arquivos do SINAN podem ser bastante grandes e podem demorar um "
 80 | "pouco mais para serem baixados e convertidos. Como o comportamento "
 81 | "padrão é baixar dados em blocos, algumas pastas podem conter muitos "
 82 | "pedaços de parquet."
 83 | 
 84 | #: ../../source/SINAN.ipynb:1394
 85 | msgid ""
 86 | "The cases of dengue where downloaded to multiple chunks to the directory "
 87 | "above"
 88 | msgstr ""
 89 | "Os casos de dengue foram baixados em vários pedaços para o diretório "
 90 | "acima"
 91 | 
 92 | #: ../../source/SINAN.ipynb:1478 ../../source/SINAN.ipynb:1969
 93 | msgid "Decoding the age in SINAN tables"
 94 | msgstr "Decodificando a idade nas tabelas do SINAN"
 95 | 
 96 | #: ../../source/SINAN.ipynb:1480 ../../source/SINAN.ipynb:1971
 97 | msgid ""
 98 | "In SINAN the age comes encoded. PySUS can decode the age column "
 99 | "``NU_IDADE_N`` into any of these units: years, months, days, or hours."
100 | msgstr ""
101 | "No SINAN, a idade vem codificada. O PySUS pode decodificar a coluna de "
102 | "idade `NU_IDADE_N` em qualquer uma dessas unidades: anos, meses, dias "
103 | "ou horas."
104 | 
105 | #: ../../source/SINAN.ipynb:2497
106 | msgid "We can easily convert dates and numerical fields in the dataframe:"
107 | msgstr "Podemos facilmente converter datas e campos numéricos no dataframe:"
108 | 
109 | #: ../../source/SINAN.ipynb:2865
110 | msgid "Let's convert the age to years and save it on a different column."
111 | msgstr "Vamos converter a idade para anos e salvá-la em uma coluna diferente."
112 | 
113 | #: ../../source/SINAN.ipynb:3046
114 | msgid "Saving the Modified data"
115 | msgstr "Salvando os dados modificados"
116 | 
117 | #: ../../source/SINAN.ipynb:3048
118 | msgid ""
119 | "We can save our dataframe in any format we wish to avoid having to redo "
120 | "this analysis next time. If we want to keep only the data from the state "
121 | "of Minas Gerais we need to filter the table using the UF code ``31``."
122 | msgstr ""
123 | "Pode-se salvar nosso conjunto de dados em qualquer formato que desejarmos "
124 | "para evitar ter que refazer esta análise na próxima vez. Se quisermos manter "
125 | "apenas os dados do estado de Minas Gerais, precisamos filtrar a tabela usando "
126 | "o código UF `31`."
127 | 


--------------------------------------------------------------------------------
/docs/source/locale/pt_BR/LC_MESSAGES/SINAN.po:
--------------------------------------------------------------------------------
  1 | # SOME DESCRIPTIVE TITLE.
  2 | # Copyright (C) 2016, Flavio Codeco Coelho
  3 | # This file is distributed under the same license as the PySUS package.
  4 | # FIRST AUTHOR <EMAIL@ADDRESS>, 2023.
  5 | #
  6 | #, fuzzy
  7 | msgid ""
  8 | msgstr ""
  9 | "Project-Id-Version: PySUS 0.1.13\n"
 10 | "Report-Msgid-Bugs-To: \n"
 11 | "POT-Creation-Date: 2023-04-28 16:27-0300\n"
 12 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 13 | "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 14 | "Language: pt\n"
 15 | "Language-Team: pt <LL@li.org>\n"
 16 | "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 17 | "MIME-Version: 1.0\n"
 18 | "Content-Type: text/plain; charset=utf-8\n"
 19 | "Content-Transfer-Encoding: 8bit\n"
 20 | "Generated-By: Babel 2.12.1\n"
 21 | 
 22 | #: ../../source/SINAN.ipynb:9
 23 | msgid "Downloading data from the SINAN database"
 24 | msgstr "Baixando dados do banco de dados do SINAN"
 25 | 
 26 | #: ../../source/SINAN.ipynb:31
 27 | msgid ""
 28 | "SINAN is a database of reported cases of certain diseases that Brazilian "
 29 | "law requires to be reported. Unfortunately the data available for free "
 30 | "download, corresponds only to the investigated cases not the totality of "
 31 | "the reported cases. Nevertheless it's an interesting dataset."
 32 | msgstr ""
 33 | "SINAN é um banco de dados de casos notificados de certas doenças que a "
 34 | "lei brasileira exige que sejam reportadas. Infelizmente, os dados "
 35 | "disponíveis para download gratuito correspondem apenas aos casos "
 36 | "investigados, não à totalidade dos casos reportados. No entanto, é um "
 37 | "conjunto de dados interessante."
 38 | 
 39 | #: ../../source/SINAN.ipynb:33
 40 | msgid "To find out what are these diseases, we can use PySUS:"
 41 | msgstr "Para descobrir quais são essas doenças, podemos usar o PySUS:"
 42 | 
 43 | #: ../../source/SINAN.ipynb:181
 44 | msgid ""
 45 | "These diseases are available in countrywide tables, so if we want to see "
 46 | "the cases of ``Chagas`` disease in the state of Minas Gerais, first we "
 47 | "can check which years are available:"
 48 | msgstr ""
 49 | "Essas doenças estão disponíveis em tabelas de todo o país, então, se "
 50 | "quisermos ver os casos de doença de `Chagas` no estado de Minas Gerais, "
 51 | "primeiro podemos verificar quais anos estão disponíveis:"
 52 | 
 53 | #: ../../source/SINAN.ipynb:287
 54 | msgid ""
 55 | "We can also check when it was last updated for every disease, and if the "
 56 | "table is preliminary or final."
 57 | msgstr ""
 58 | "Também podemos verificar quando foi a última atualização para cada "
 59 | "doença e se a tabela é preliminar ou final."
 60 | 
 61 | #: ../../source/SINAN.ipynb:884
 62 | msgid ""
 63 | "We can see, that we have data in final form, from 2000 until 2019, and "
 64 | "preliminary data for 2020. Now we can download it:"
 65 | msgstr ""
 66 | "Podemos ver que temos dados em forma final, de 2000 até 2019, e dados "
 67 | "preliminares para 2020. Agora podemos baixá-los:"
 68 | 
 69 | #: ../../source/SINAN.ipynb:1371
 70 | msgid "Downloading large files"
 71 | msgstr "Baixando arquivos grandes"
 72 | 
 73 | #: ../../source/SINAN.ipynb:1373
 74 | msgid ""
 75 | "Some SINAN files can be quite large and can take a bit longer to download"
 76 | " and convert. As the default behavior is to download data in chunks, some"
 77 | " folders may contain lots of parquet chunks"
 78 | msgstr ""
 79 | "Alguns arquivos do SINAN podem ser bastante grandes e podem demorar um "
 80 | "pouco mais para serem baixados e convertidos. Como o comportamento "
 81 | "padrão é baixar dados em blocos, algumas pastas podem conter muitos "
 82 | "pedaços de parquet."
 83 | 
 84 | #: ../../source/SINAN.ipynb:1394
 85 | msgid ""
 86 | "The cases of dengue where downloaded to multiple chunks to the directory "
 87 | "above"
 88 | msgstr ""
 89 | "Os casos de dengue foram baixados em vários pedaços para o diretório "
 90 | "acima"
 91 | 
 92 | #: ../../source/SINAN.ipynb:1478 ../../source/SINAN.ipynb:1969
 93 | msgid "Decoding the age in SINAN tables"
 94 | msgstr "Decodificando a idade nas tabelas do SINAN"
 95 | 
 96 | #: ../../source/SINAN.ipynb:1480 ../../source/SINAN.ipynb:1971
 97 | msgid ""
 98 | "In SINAN the age comes encoded. PySUS can decode the age column "
 99 | "``NU_IDADE_N`` into any of these units: years, months, days, or hours."
100 | msgstr ""
101 | "No SINAN, a idade vem codificada. O PySUS pode decodificar a coluna de "
102 | "idade `NU_IDADE_N` em qualquer uma dessas unidades: anos, meses, dias "
103 | "ou horas."
104 | 
105 | #: ../../source/SINAN.ipynb:2497
106 | msgid "We can easily convert dates and numerical fields in the dataframe:"
107 | msgstr "Podemos facilmente converter datas e campos numéricos no dataframe:"
108 | 
109 | #: ../../source/SINAN.ipynb:2865
110 | msgid "Let's convert the age to years and save it on a different column."
111 | msgstr "Vamos converter a idade para anos e salvá-la em uma coluna diferente."
112 | 
113 | #: ../../source/SINAN.ipynb:3046
114 | msgid "Saving the Modified data"
115 | msgstr "Salvando os dados modificados"
116 | 
117 | #: ../../source/SINAN.ipynb:3048
118 | msgid ""
119 | "We can save our dataframe in any format we wish to avoid having to redo "
120 | "this analysis next time. If we want to keep only the data from the state "
121 | "of Minas Gerais we need to filter the table using the UF code ``31``."
122 | msgstr ""
123 | "Pode-se salvar nosso conjunto de dados em qualquer formato que desejarmos "
124 | "para evitar ter que refazer esta análise na próxima vez. Se quisermos manter "
125 | "apenas os dados do estado de Minas Gerais, precisamos filtrar a tabela usando "
126 | "o código UF `31`."
127 | 


--------------------------------------------------------------------------------
/pysus/ftp/databases/sinan.py:
--------------------------------------------------------------------------------
  1 | __all__ = ["SINAN"]
  2 | 
  3 | from typing import List, Optional, Union
  4 | 
  5 | from pysus.ftp import Database, Directory, File
  6 | from pysus.ftp.utils import to_list, zfill_year
  7 | 
  8 | 
  9 | class SINAN(Database):
 10 |     name = "SINAN"
 11 |     paths = (
 12 |         Directory("/dissemin/publicos/SINAN/DADOS/FINAIS"),
 13 |         Directory("/dissemin/publicos/SINAN/DADOS/PRELIM"),
 14 |     )
 15 |     metadata = {
 16 |         "long_name": "Doenças e Agravos de Notificação",
 17 |         "source": "https://portalsinan.saude.gov.br/",
 18 |         "description": (
 19 |             "The Notifiable Diseases Information System - Sinan is primarily"
 20 |             "fed by the notification and investigation of cases of diseases "
 21 |             "and conditions listed in the national list of compulsorily "
 22 |             "notifiable diseases (Consolidation Ordinance No. 4, September 28,"
 23 |             " 2017, Annex). However, states and municipalities are allowed to "
 24 |             "include other important health problems in their region, such as "
 25 |             "difilobotriasis in the municipality of São Paulo. Its effective "
 26 |             "use enables the dynamic diagnosis of the occurrence of an event "
 27 |             "in the population, providing evidence for causal explanations of "
 28 |             "compulsorily notifiable diseases and indicating risks to which "
 29 |             "people are exposed. This contributes to identifying the "
 30 |             "epidemiological reality of a specific geographical area. Its "
 31 |             "systematic, decentralized use contributes to the democratization "
 32 |             "of information, allowing all healthcare professionals to access "
 33 |             "and make it available to the community. Therefore, it is a "
 34 |             "relevant tool to assist in health planning, define intervention "
 35 |             "priorities, and evaluate the impact of interventions."
 36 |         ),
 37 |     }
 38 | 
 39 |     diseases = {
 40 |         "ACBI": "Acidente de trabalho com material biológico",
 41 |         "ACGR": "Acidente de trabalho",
 42 |         "ANIM": "Acidente por Animais Peçonhentos",
 43 |         "ANTR": "Atendimento Antirrabico",
 44 |         "BOTU": "Botulismo",
 45 |         "CANC": "Cancêr relacionado ao trabalho",
 46 |         "CHAG": "Doença de Chagas Aguda",
 47 |         "CHIK": "Febre de Chikungunya",
 48 |         "COLE": "Cólera",
 49 |         "COQU": "Coqueluche",
 50 |         "DENG": "Dengue",
 51 |         "DERM": "Dermatoses ocupacionais",
 52 |         "DIFT": "Difteria",
 53 |         "ESQU": "Esquistossomose",
 54 |         "EXAN": "Doença exantemáticas",
 55 |         "FMAC": "Febre Maculosa",
 56 |         "FTIF": "Febre Tifóide",
 57 |         "HANS": "Hanseníase",
 58 |         "HANT": "Hantavirose",
 59 |         "HEPA": "Hepatites Virais",
 60 |         "IEXO": "Intoxicação Exógena",
 61 |         "INFL": "Influenza Pandêmica",
 62 |         "LEIV": "Leishmaniose Visceral",
 63 |         "LEPT": "Leptospirose",
 64 |         "LERD": "LER/Dort",
 65 |         "LTAN": "Leishmaniose Tegumentar Americana",
 66 |         "MALA": "Malária",
 67 |         "MENI": "Meningite",
 68 |         "MENT": "Transtornos mentais relacionados ao trabalho",
 69 |         "NTRA": "Notificação de Tracoma",
 70 |         "PAIR": "Perda auditiva por ruído relacionado ao trabalho",
 71 |         "PEST": "Peste",
 72 |         "PFAN": "Paralisia Flácida Aguda",
 73 |         "PNEU": "Pneumoconioses realacionadas ao trabalho",
 74 |         "RAIV": "Raiva",
 75 |         "SDTA": "Surto Doenças Transmitidas por Alimentos",
 76 |         "SIFA": "Sífilis Adquirida",
 77 |         "SIFC": "Sífilis Congênita",
 78 |         "SIFG": "Sífilis em Gestante",
 79 |         "SRC": "Síndrome da Rubéola Congênia",
 80 |         "TETA": "Tétano Acidental",
 81 |         "TETN": "Tétano Neonatal",
 82 |         "TOXC": "Toxoplasmose Congênita",
 83 |         "TOXG": "Toxoplasmose Gestacional",
 84 |         "TRAC": "Inquérito de Tracoma",
 85 |         "TUBE": "Tuberculose",
 86 |         "VARC": "Varicela",
 87 |         "VIOL": "Violência doméstica, sexual e/ou outras violências",
 88 |         "ZIKA": "Zika Vírus",
 89 |     }
 90 | 
 91 |     def describe(self, file: File) -> dict:
 92 |         if file.extension.upper() == ".DBC":
 93 |             dis_code, year = self.format(file)
 94 | 
 95 |             description = {
 96 |                 "name": str(file.basename),
 97 |                 "disease": self.diseases[dis_code],
 98 |                 "year": zfill_year(year),
 99 |                 "size": file.info["size"],
100 |                 "last_update": file.info["modify"],
101 |             }
102 |             return description
103 |         return {}
104 | 
105 |     def format(self, file: File) -> tuple:
106 |         year = file.name[-2:]
107 | 
108 |         if file.name.startswith("SRC"):
109 |             dis_code = file.name[:3]
110 |         elif file.name == "LEIBR22":
111 |             dis_code = "LEIV"  # MISPELLED FILE NAME
112 |         elif file.name == "LERBR19":
113 |             dis_code = "LERD"  # ANOTHER ONE
114 |         else:
115 |             dis_code = file.name[:4]
116 | 
117 |         return dis_code, zfill_year(year)
118 | 
119 |     def get_files(
120 |         self,
121 |         dis_code: Optional[Union[str, list]] = None,
122 |         year: Optional[Union[str, int, list]] = None,
123 |     ) -> List[File]:
124 |         files = list(
125 |             filter(
126 |                 lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
127 |             )
128 |         )
129 | 
130 |         if dis_code:
131 |             codes = [c.upper() for c in to_list(dis_code)]
132 | 
133 |             if codes and not all(code in self.diseases for code in codes):
134 |                 raise ValueError(
135 |                     "Unknown disease(s): "
136 |                     f"{set(codes).difference(set(self.diseases))}"
137 |                 )
138 | 
139 |             files = list(filter(lambda f: self.format(f)[0] in codes, files))
140 | 
141 |         if year or str(year) in ["0", "00"]:
142 |             years = [zfill_year(str(y)[-2:]) for y in to_list(year)]
143 |             files = list(filter(lambda f: self.format(f)[1] in years, files))
144 | 
145 |         return files
146 | 


--------------------------------------------------------------------------------