.
675 |
676 |
677 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | clean:
2 | @rm -rf .coverage
3 | @rm -rf .eggs
4 | @rm -rf .mypy_cache
5 | @rm -rf .pytest_cache
6 | @rm -rf .ropeproject
7 | @rm -rf .tox
8 | @rm -rf calculadora_do_cidadao.egg-info
9 | @rm -rf dist
10 | @rm -rf docs/_build/
11 | @rm -rf docs/json/
12 | @rm -rf htmlcov
13 | @find . -iname "*.pyc" | xargs rm -rf
14 | @find . -iname "__pycache__" | xargs rm -rf
15 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Calculadora do Cidadão
2 |
3 | [](https://github.com/cuducos/calculadora-do-cidadao/actions)
4 | [](https://codeclimate.com/github/cuducos/calculadora-do-cidadao/maintainability)
5 | [](https://codeclimate.com/github/cuducos/calculadora-do-cidadao/test_coverage)
6 | [](https://pypi.org/project/calculadora-do-cidadao/)
7 | [](https://pypi.org/project/calculadora-do-cidadao/)
8 | [](https://calculadora-do-cidadao.readthedocs.io/)
9 |
10 | Pacote em Python para correção de valores. Confira a [documentação](https://calculadora-do-cidadao.readthedocs.io/) e o [mini-guia de contribuição](CONTRIBUTING.md) para mais detalhes!
11 |
12 |
13 |
14 | > :warning: **Estou buscando pessoas para manter esse projeto.** Não tenho tido tempo para cuidar dele como deveria, e sinto que tenho pouco conhecimento de economia e mercado financeiro para aprimorá-lo.
15 |
16 |
17 |
18 | ## Exemplo de uso
19 |
20 | ```python
21 | In [1]: from datetime import date
22 | ...: from decimal import Decimal
23 | ...: from calculadora_do_cidadao import Ipca
24 |
25 | In [2]: ipca = Ipca()
26 |
27 | In [3]: ipca.adjust(date(2018, 7, 6))
28 | Out[3]: Decimal('1.051202206630561280035407253')
29 |
30 | In [4]: ipca.adjust("2014-07-08", 7)
31 | Out[4]: Decimal('9.407523138792336916983267321')
32 |
33 | In [5]: ipca.adjust("12/07/1998", 3, "01/07/2006")
34 | Out[5]: Decimal('5.279855889296777979447848574')
35 | ```
36 |
37 | [](https://asciinema.org/a/295920)
38 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/__init__.py:
--------------------------------------------------------------------------------
1 | from calculadora_do_cidadao.adapters.cpi import AllUrbanCityAverage # noqa
2 | from calculadora_do_cidadao.adapters.dieese import ( # noqa
3 | CestaBasica,
4 | CestaBasicaAracaju,
5 | CestaBasicaBelem,
6 | CestaBasicaBeloHorizonte,
7 | CestaBasicaBoaVista,
8 | CestaBasicaBrasilia,
9 | CestaBasicaCampoGrande,
10 | CestaBasicaCentroOeste,
11 | CestaBasicaCuiaba,
12 | CestaBasicaCuritiba,
13 | CestaBasicaFlorianopolis,
14 | CestaBasicaFortaleza,
15 | CestaBasicaGoiania,
16 | CestaBasicaJoaoPessoa,
17 | CestaBasicaMacae,
18 | CestaBasicaMacapa,
19 | CestaBasicaMaceio,
20 | CestaBasicaManaus,
21 | CestaBasicaNatal,
22 | CestaBasicaNordeste,
23 | CestaBasicaNorte,
24 | CestaBasicaPalmas,
25 | CestaBasicaPortoAlegre,
26 | CestaBasicaPortoVelho,
27 | CestaBasicaRecife,
28 | CestaBasicaRioBranco,
29 | CestaBasicaRioDeJaneiro,
30 | CestaBasicaSalvador,
31 | CestaBasicaSaoLuis,
32 | CestaBasicaSaoPaulo,
33 | CestaBasicaSudeste,
34 | CestaBasicaSul,
35 | CestaBasicaTeresina,
36 | CestaBasicaVitoria,
37 | )
38 | from calculadora_do_cidadao.adapters.ibge import Inpc, Ipca, Ipca15, IpcaE # noqa
39 | from calculadora_do_cidadao.adapters.igpm import Igpm # noqa
40 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/__main__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from inspect import isclass
3 | from typing import Iterable, Type
4 |
5 | from typer import Typer, echo
6 |
7 | import calculadora_do_cidadao
8 | from calculadora_do_cidadao.adapters import Adapter
9 | from calculadora_do_cidadao.rows.plugins.plugin_csv import export_to_csv
10 | from calculadora_do_cidadao.rows.plugins.dicts import import_from_dicts
11 |
12 |
13 | DEFAULT_EXPORT_FILE = Path("calculadora-do-cidadao.csv")
14 | cli = Typer()
15 |
16 |
17 | def get_adapters() -> Iterable[Type[Adapter]]:
18 | """Generator with all adapters available in this module."""
19 | for _obj in dir(calculadora_do_cidadao):
20 | obj = getattr(calculadora_do_cidadao, _obj)
21 |
22 | # discard non-adapters
23 | if isclass(obj) and issubclass(obj, Adapter):
24 | yield obj
25 |
26 |
27 | def data() -> Iterable[dict]:
28 | """Generator to get all export data from adapters in this module."""
29 | adapters = tuple(get_adapters())
30 | total = len(adapters)
31 | for count, adapter in enumerate(adapters, 1):
32 | name = adapter.__name__.upper()
33 | echo(f"[{count} of {total}] Exporting {name} data…")
34 | yield from adapter().export(include_name=True)
35 |
36 |
37 | @cli.command()
38 | def export(path: Path = DEFAULT_EXPORT_FILE) -> None:
39 | """Export all data to CSV."""
40 | table = import_from_dicts(data())
41 | export_to_csv(table, path)
42 |
43 |
44 | if __name__ == "__main__":
45 | cli()
46 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/adapters/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | from collections import namedtuple
3 | from datetime import date
4 | from decimal import Decimal
5 | from itertools import chain
6 | from json import load
7 | from pathlib import Path
8 | from typing import Any, Iterable, List, NamedTuple, Optional, Union
9 |
10 | from calculadora_do_cidadao.download import Download
11 | from calculadora_do_cidadao.fields import DateField
12 | from calculadora_do_cidadao.rows.fields import DecimalField
13 | from calculadora_do_cidadao.rows.plugins.dicts import import_from_dicts
14 | from calculadora_do_cidadao.rows.plugins.plugin_csv import (
15 | export_to_csv,
16 | import_from_csv,
17 | )
18 | from calculadora_do_cidadao.rows.plugins.plugin_html import import_from_html
19 | from calculadora_do_cidadao.rows.plugins.xls import import_from_xls
20 | from calculadora_do_cidadao.typing import (
21 | Date,
22 | IndexDictionary,
23 | IndexesGenerator,
24 | MaybeIndexesGenerator,
25 | )
26 |
27 |
28 | def import_from_json(path: Path, json_path: List[str]) -> Iterable[NamedTuple]:
29 | """Imports data form a JSON file `path` creating an iterable of named
30 | tuples similar to the Rows's import functions.
31 |
32 | `json_path` is a sequence of keys or array indexes to get to the array with
33 | the desired data.
34 | """
35 |
36 | with path.open() as handler:
37 | data = load(handler)
38 |
39 | for key_or_index in json_path:
40 | data = data[key_or_index]
41 |
42 | if not data:
43 | return
44 |
45 | keys = tuple(str(key) for key in data[0].keys())
46 | Row = namedtuple("Row", keys) # type: ignore
47 | yield from (Row(**row) for row in data) # type: ignore
48 |
49 |
50 | class AdapterNoImportMethod(Exception):
51 | """To be used when the adapter has no `rows` import method set."""
52 |
53 | pass
54 |
55 |
56 | class AdapterDateNotAvailableError(Exception):
57 | """To be used when using a date outside of the range available."""
58 |
59 | pass
60 |
61 |
62 | class Adapter(metaclass=ABCMeta):
63 | """This is the base adapter, all adapters should inherit from it. Its
64 | children require at least a `url` and `file_type` class variables."""
65 |
66 | def __init__(self, exported_csv: Path = None) -> None:
67 | """The initialization of the Adapter consists of four steps.
68 |
69 | First, it tries to infer the `rows` import method to use from the
70 | `file_type` class variable (which can be `html`, `xls` or `json`).
71 |
72 | Then it detects whether data will come from a download or from an
73 | exported CSV file.
74 |
75 | If it comes from a download, it uses the `Download` class to store
76 | index data.
77 |
78 | Finally, if the source data is disaggregated, it calls the `aggregate`
79 | method.
80 | """
81 | functions = {
82 | "html": import_from_html,
83 | "json": import_from_json,
84 | "xls": import_from_xls,
85 | }
86 | try:
87 | self.read_from = functions[self.file_type]
88 | except KeyError:
89 | msg = (
90 | f"Invalid file type {self.file_type}. "
91 | f"Valid file types are: {', '.join(functions)}."
92 | )
93 | raise AdapterNoImportMethod(msg)
94 |
95 | self.data: IndexDictionary = {}
96 | if exported_csv:
97 | self.data = {key: value for key, value in self.from_csv(exported_csv)}
98 | else:
99 | self.data = {key: value for key, value in self.download()}
100 | if self.should_aggregate:
101 | self.aggregate()
102 |
103 | if self.data:
104 | self.most_recent_date = max(self.data.keys())
105 |
106 | @property
107 | def import_kwargs(self) -> Iterable[dict]:
108 | """Wrapper to get IMPORT_KWARGS if set, avoiding error if not set."""
109 | value = getattr(self, "IMPORT_KWARGS", {})
110 | return (value,) if isinstance(value, dict) else value
111 |
112 | @property
113 | def cookies(self) -> dict:
114 | """Wrapper to get COOKIES if set, avoiding error if not set."""
115 | return getattr(self, "COOKIES", {})
116 |
117 | @property
118 | def post_data(self) -> Optional[dict]:
119 | """Wrapper to get POST_DATA if set, avoiding error if not set."""
120 | return getattr(self, "POST_DATA", None)
121 |
122 | @property
123 | def headers(self) -> Optional[dict]:
124 | """Wrapper to get HEADERS if set, avoiding error if not set."""
125 | return getattr(self, "HEADERS", None)
126 |
127 | @property
128 | def should_unzip(self) -> bool:
129 | """Wrapper to get SHOULD_UNZIP if set, avoiding error if not set."""
130 | return getattr(self, "SHOULD_UNZIP", False)
131 |
132 | @property
133 | def should_aggregate(self) -> bool:
134 | """Wrapper to get SHOULD_AGGREGATE if set, avoiding error if not set."""
135 | return getattr(self, "SHOULD_AGGREGATE", False)
136 |
137 | @property
138 | @abstractmethod
139 | def url(self) -> str:
140 | """The URL where to get data from."""
141 | pass # pragma: no cover
142 |
143 | @property
144 | @abstractmethod
145 | def file_type(self) -> str:
146 | """File type of the response from the `url`, usually html or xls."""
147 | pass # pragma: no cover
148 |
149 | @abstractmethod
150 | def serialize(self, row: NamedTuple) -> MaybeIndexesGenerator:
151 | """This method should be a generator that receives a row from `rows`
152 | (which is a `NamedTuple`) and yields `None` if the row does not hold
153 | any valid index data, or yields `calculadora_do_cidadao.typing.Index`
154 | type if the row has valid data. A row can yield more than one
155 | `calculadora_do_cidadao.typing.Index`.
156 | """
157 | pass # pragma: no cover
158 |
159 | def invalid_date_error_message(self, wanted: date) -> str:
160 | """Helper to generate an error message usually used together with
161 | `AdapterDateNotAvailableError`."""
162 | first, last = min(self.data.keys()), max(self.data.keys())
163 | if first < wanted < last:
164 | msg = (
165 | f"This adapter has data from {first.month:0>2d}/{first.year} "
166 | f"to {last.month:0>2d}/{last.year}, but not for "
167 | f"{wanted.month:0>2d}/{wanted.year}. Available dates are:"
168 | )
169 | available = (f" - {d.month:0>2d}/{d.year}" for d in self.data)
170 | return "\n".join(chain((msg,), available))
171 |
172 | return (
173 | f"This adapter has data from {first.month:0>2d}/{first.year} "
174 | f"to {last.month:0>2d}/{last.year}. "
175 | f"{wanted.month:0>2d}/{wanted.year} is out of range."
176 | )
177 |
178 | def round_date(self, obj: Date, validate: bool = False) -> date:
179 | """Method to round `Date` objects to hold `day = 1`, as indexes usually
180 | refers to monthly periods, not daily periods. It also validates if the
181 | intended date is valid and in the adapter data range."""
182 | parsed = DateField.deserialize(obj)
183 | output = parsed.replace(day=1)
184 | if validate and output not in self.data.keys():
185 | msg = self.invalid_date_error_message(output)
186 | raise AdapterDateNotAvailableError(msg)
187 | return output
188 |
189 | def aggregate(self):
190 | """Being disaggregated here means the index for each month is a
191 | percentage relative to the previous month. However the `adjust` method
192 | gets way simpler if the indexes as stored as the percentage of the
193 | month before the first month of the series. For example, if a given
194 | index starts at January 1994, and all values should be a percentage
195 | referring to December 1993."""
196 | accumulated = 1
197 | for key in sorted(self.data.keys()):
198 | self.data[key] = accumulated * (1 + self.data[key])
199 | accumulated = self.data[key]
200 |
201 | def adjust(
202 | self,
203 | original_date: Date,
204 | value: Union[Decimal, float, int, None] = 0,
205 | target_date: Optional[Date] = None,
206 | ) -> Decimal:
207 | """Main method of an adapter API, the one that actually makes the
208 | monetary correction using adapter's data. It requires a `datetime.date`
209 | used as the reference for the operation.
210 |
211 | If no `value` if given, it returns considering the value is
212 | `decimal.Decimal('1')`.
213 |
214 | If no `target_date` is given, it returns considering the target date is
215 | `datetime.date.today()`."""
216 | original = self.round_date(original_date, validate=True)
217 | target = self.most_recent_date
218 | if target_date:
219 | target = self.round_date(target_date, validate=True)
220 |
221 | value = Decimal(value or "1")
222 | percent = self.data[target] / self.data[original]
223 | return value * percent
224 |
225 | def download(self) -> IndexesGenerator:
226 | """Wrapper to use the `Download` class and pipe the result to `rows`
227 | imported method, yielding a series of rows parsed by `rows`."""
228 | post_processing = getattr(self, "post_processing", None)
229 | download = Download(
230 | url=self.url,
231 | should_unzip=self.should_unzip,
232 | headers=self.headers,
233 | cookies=self.cookies,
234 | post_data=self.post_data,
235 | post_processing=post_processing,
236 | )
237 |
238 | with download() as paths:
239 | for path in paths():
240 | for kwargs in self.import_kwargs:
241 | for data in self.read_from(path, **kwargs): # type: ignore
242 | yield from (row for row in self.serialize(data) if row)
243 |
244 | def export_index(self, key, include_name: bool = False) -> dict:
245 | """Export a given index as a dictionary to be used with
246 | `rows.import_from_dicts`."""
247 | data = {"date": key, "value": self.data[key]}
248 |
249 | if include_name:
250 | data["serie"] = self.__class__.__name__.lower()
251 |
252 | return data
253 |
254 | def export(self, include_name: bool = False) -> Iterable[dict]:
255 | """Wraps adapter's data in a sequence of dictionaries to be used with
256 | `rows.import_from_dicts`."""
257 | keys = sorted(self.data)
258 | yield from (self.export_index(key, include_name) for key in keys)
259 |
260 | def to_csv(self, path: Path) -> Path:
261 | """Export the adapter's data to a CSV file."""
262 | table = import_from_dicts(self.export())
263 | export_to_csv(table, path)
264 | return path
265 |
266 | def from_csv(self, path: Path) -> IndexesGenerator:
267 | """Load adapter's data from a CSV file. If the CSV file has two columns
268 | it is assumed it was generated with the `to_csv` method. If it has 3
269 | columns, it is assumed it is a export of all adapters' data generated
270 | by the CLI."""
271 | fields = {"date": DateField, "value": DecimalField}
272 | table = import_from_csv(path, force_types=fields)
273 | if len(table.fields) == 2: # generated via adapter's to_csv method
274 | yield from table
275 | else: # 3 columns table, export of all adapters generated by CLI
276 | yield from (
277 | (row.date, row.value)
278 | for row in table
279 | if row.serie == self.__class__.__name__.lower()
280 | )
281 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/adapters/cpi.py:
--------------------------------------------------------------------------------
1 | from typing import NamedTuple
2 | from urllib.parse import urlencode
3 |
4 | from calculadora_do_cidadao.adapters import Adapter
5 | from calculadora_do_cidadao.rows.fields import DecimalField
6 | from calculadora_do_cidadao.typing import MaybeIndexesGenerator
7 |
8 |
9 | URL = "https://fred.stlouisfed.org/graph/fredgraph.xls"
10 | URL_PARAMS = {"id": "CPIAUCSL"}
11 |
12 |
13 | class AllUrbanCityAverage(Adapter):
14 | """Adapter for FED's Consumer Price Index for All Urban Consumers: All
15 | Items."""
16 |
17 | file_type = "xls"
18 | url = f"{URL}?{urlencode(URL_PARAMS)}"
19 |
20 | IMPORT_KWARGS = {"start_row": 10, "force_types": {"cpiaucsl": DecimalField}}
21 |
22 | def serialize(self, row: NamedTuple) -> MaybeIndexesGenerator:
23 | """Serialize method to unpack Rows's row into a tuple."""
24 | reference, value = row
25 | yield reference, value
26 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/adapters/dieese.py:
--------------------------------------------------------------------------------
1 | from datetime import date, datetime
2 | from decimal import Decimal
3 | from itertools import chain
4 | from statistics import mean
5 | from typing import NamedTuple, Optional
6 |
7 | from calculadora_do_cidadao.adapters import Adapter
8 | from calculadora_do_cidadao.rows.fields import TextField
9 | from calculadora_do_cidadao.typing import MaybeIndexesGenerator
10 |
11 |
12 | ALL_CITIES = (
13 | "aracaju",
14 | "belem",
15 | "belo_horizonte",
16 | "boa_vista",
17 | "brasilia",
18 | "campo_grande",
19 | "cuiaba",
20 | "curitiba",
21 | "florianopolis",
22 | "fortaleza",
23 | "goiania",
24 | "joao_pessoa",
25 | "macae",
26 | "macapa",
27 | "maceio",
28 | "manaus",
29 | "natal",
30 | "palmas",
31 | "porto_alegre",
32 | "porto_velho",
33 | "recife",
34 | "rio_branco",
35 | "rio_de_janeiro",
36 | "salvador",
37 | "sao_luis",
38 | "sao_paulo",
39 | "teresina",
40 | "vitoria",
41 | )
42 |
43 |
44 | class CestaBasica(Adapter):
45 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index.
46 | If no `cities` variable is created, it averages the value of all available
47 | cities in any given date (this is used in subclasses)."""
48 |
49 | file_type = "html"
50 | url = "https://www.dieese.org.br/cesta/produto"
51 |
52 | POST_DATA = {
53 | "farinha": "false",
54 | "produtos": "1",
55 | "tipoDado": "5",
56 | "cidades": "0",
57 | "dataInicial": "071994", # before that we need currency convertion
58 | "dataFinal": date.today().strftime("%m%Y"),
59 | }
60 | IMPORT_KWARGS = {
61 | "force_types": {k: TextField for k in chain(("field_0",), ALL_CITIES)}
62 | }
63 |
64 | @staticmethod
65 | def post_processing(body: bytes) -> bytes:
66 | """Fixes broken HTML syntax in DIEESE's the source file."""
67 | body = body.strip()
68 | xml = b''
69 | if body.startswith(xml):
70 | body = body.replace(xml, b"", 1)
71 | return body.strip()
72 |
73 | def _mean(self, row: NamedTuple) -> Optional[Decimal]:
74 | cities = getattr(self, "cities", ALL_CITIES)
75 | raw = (getattr(row, city, None) for city in cities)
76 | strings = (value.strip() for value in raw if isinstance(value, str))
77 | cleaned = (value for value in strings if value and value != "-")
78 | values = tuple(Decimal(value.replace(",", ".")) for value in cleaned)
79 |
80 | if not values:
81 | return None
82 |
83 | if len(values) == 1:
84 | return values[0]
85 |
86 | return mean(values)
87 |
88 | def serialize(self, row: NamedTuple) -> MaybeIndexesGenerator:
89 | """Serialize method to unpack rows's row into a tuple. Calculates the
90 | mean for adapters including different cities if needed."""
91 | value = self._mean(row)
92 | if value is None:
93 | yield None
94 | return
95 |
96 | # the index has the price, let's calculate the percentage
97 | self.first_value = getattr(self, "first_value", value)
98 | adjusted_value = value / self.first_value
99 |
100 | reference = datetime.strptime(row[0][:7], "%m-%Y").date()
101 | yield reference, adjusted_value
102 |
103 |
104 | # regional adapters
105 |
106 |
107 | class CestaBasicaCentroOeste(CestaBasica):
108 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
109 | including Brasília, Cuiabá, Campo Grande and Goiânia."""
110 |
111 | cities = (
112 | "brasilia",
113 | "cuiaba",
114 | "campo_grande",
115 | "goiania",
116 | )
117 |
118 |
119 | class CestaBasicaNordeste(CestaBasica):
120 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
121 | including Aracajú, Fortaleza, João Pessoa, Maceió, Natal, Recife, Salvador,
122 | São Luís and Teresina."""
123 |
124 | cities = (
125 | "aracaju",
126 | "fortaleza",
127 | "joao_pessoa",
128 | "maceio",
129 | "natal",
130 | "recife",
131 | "salvador",
132 | "sao_luis",
133 | "teresina",
134 | )
135 |
136 |
137 | class CestaBasicaNorte(CestaBasica):
138 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
139 | including Belém, Boa Vista, Macapá, Manaus, Palmas, Porto Velho and Rio
140 | Branco."""
141 |
142 | cities = (
143 | "belem",
144 | "boa_vista",
145 | "macapa",
146 | "manaus",
147 | "palmas",
148 | "porto_velho",
149 | "rio_branco",
150 | )
151 |
152 |
153 | class CestaBasicaSudeste(CestaBasica):
154 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
155 | including Belo Horizonte, Rio de Janeiro, São Paulo and Vitória."""
156 |
157 | cities = (
158 | "belo_horizonte",
159 | "rio_de_janeiro",
160 | "sao_paulo",
161 | "vitoria",
162 | )
163 |
164 |
165 | class CestaBasicaSul(CestaBasica):
166 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
167 | including Curitiba, Florianópolis and Porto Alegre."""
168 |
169 | cities = (
170 | "curitiba",
171 | "florianopolis",
172 | "porto_alegre",
173 | )
174 |
175 |
176 | # city adapters
177 |
178 |
179 | class CestaBasicaAracaju(CestaBasica):
180 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
181 | for Aracajú."""
182 |
183 | cities = ("aracaju",)
184 |
185 |
186 | class CestaBasicaBelem(CestaBasica):
187 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
188 | for Belém."""
189 |
190 | cities = ("belem",)
191 |
192 |
193 | class CestaBasicaBeloHorizonte(CestaBasica):
194 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
195 | for Belo Horizonte."""
196 |
197 | cities = ("belo_horizonte",)
198 |
199 |
200 | class CestaBasicaBoaVista(CestaBasica):
201 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
202 | for Boa Vista."""
203 |
204 | cities = ("boa_vista",)
205 |
206 |
207 | class CestaBasicaBrasilia(CestaBasica):
208 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
209 | for Brasília."""
210 |
211 | cities = ("brasilia",)
212 |
213 |
214 | class CestaBasicaCampoGrande(CestaBasica):
215 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
216 | for Campo Grande."""
217 |
218 | cities = ("campo_grande",)
219 |
220 |
221 | class CestaBasicaCuiaba(CestaBasica):
222 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
223 | for Cuiabá."""
224 |
225 | cities = ("cuiaba",)
226 |
227 |
228 | class CestaBasicaCuritiba(CestaBasica):
229 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
230 | for Curitiba."""
231 |
232 | cities = ("curitiba",)
233 |
234 |
235 | class CestaBasicaFlorianopolis(CestaBasica):
236 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
237 | for Florianópolis."""
238 |
239 | cities = ("florianopolis",)
240 |
241 |
242 | class CestaBasicaFortaleza(CestaBasica):
243 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
244 | for Fortaleza."""
245 |
246 | cities = ("fortaleza",)
247 |
248 |
249 | class CestaBasicaGoiania(CestaBasica):
250 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
251 | for Goiânia."""
252 |
253 | cities = ("goiania",)
254 |
255 |
256 | class CestaBasicaJoaoPessoa(CestaBasica):
257 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
258 | for João Pessoa."""
259 |
260 | cities = ("joao_pessoa",)
261 |
262 |
263 | class CestaBasicaMacae(CestaBasica):
264 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
265 | for Macaé."""
266 |
267 | cities = ("macae",)
268 |
269 |
270 | class CestaBasicaMacapa(CestaBasica):
271 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
272 | for Macapá."""
273 |
274 | cities = ("macapa",)
275 |
276 |
277 | class CestaBasicaMaceio(CestaBasica):
278 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
279 | for Maceió."""
280 |
281 | cities = ("maceio",)
282 |
283 |
284 | class CestaBasicaManaus(CestaBasica):
285 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
286 | for Manaus."""
287 |
288 | cities = ("manaus",)
289 |
290 |
291 | class CestaBasicaNatal(CestaBasica):
292 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
293 | for Natal."""
294 |
295 | cities = ("natal",)
296 |
297 |
298 | class CestaBasicaPalmas(CestaBasica):
299 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
300 | for Palmas."""
301 |
302 | cities = ("palmas",)
303 |
304 |
305 | class CestaBasicaPortoAlegre(CestaBasica):
306 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
307 | for Porto Alegre."""
308 |
309 | cities = ("porto_alegre",)
310 |
311 |
312 | class CestaBasicaPortoVelho(CestaBasica):
313 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
314 | for Porto Velho."""
315 |
316 | cities = ("porto_velho",)
317 |
318 |
319 | class CestaBasicaRecife(CestaBasica):
320 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
321 | for Recife."""
322 |
323 | cities = ("recife",)
324 |
325 |
326 | class CestaBasicaRioBranco(CestaBasica):
327 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
328 | for Rio Branco."""
329 |
330 | cities = ("rio_branco",)
331 |
332 |
333 | class CestaBasicaRioDeJaneiro(CestaBasica):
334 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
335 | for Rio de Janeiro."""
336 |
337 | cities = ("rio_de_janeiro",)
338 |
339 |
340 | class CestaBasicaSalvador(CestaBasica):
341 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
342 | for Salvador."""
343 |
344 | cities = ("salvador",)
345 |
346 |
347 | class CestaBasicaSaoLuis(CestaBasica):
348 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
349 | for São Luís."""
350 |
351 | cities = ("sao_luis",)
352 |
353 |
354 | class CestaBasicaSaoPaulo(CestaBasica):
355 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
356 | for São Paulo."""
357 |
358 | cities = ("sao_paulo",)
359 |
360 |
361 | class CestaBasicaTeresina(CestaBasica):
362 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
363 | for Teresina."""
364 |
365 | cities = ("teresina",)
366 |
367 |
368 | class CestaBasicaVitoria(CestaBasica):
369 | """Adapter for DIEESE's basic shopping basket (cesta básica) price index
370 | for Vitória."""
371 |
372 | cities = ("vitoria",)
373 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/adapters/ibge.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from decimal import Decimal
3 | from typing import NamedTuple
4 |
5 | from calculadora_do_cidadao.adapters import Adapter
6 | from calculadora_do_cidadao.fields import DateField
7 | from calculadora_do_cidadao.months import MONTHS
8 | from calculadora_do_cidadao.rows.fields import PercentField
9 | from calculadora_do_cidadao.typing import MaybeIndexesGenerator
10 |
11 |
12 | class IbgeAdapter(Adapter):
13 | """This base class is incomplete and should not be used directly. It missed
14 | the `url` class variable to be set in its children. In spite of that, it
15 | implements the serialize and settings that work with most price adjustment
16 | indexes done by IBGE."""
17 |
18 | file_type = "xls"
19 |
20 | IMPORT_KWARGS = {"end_column": 2}
21 | SHOULD_UNZIP = True
22 |
23 | def serialize(self, row: NamedTuple) -> MaybeIndexesGenerator:
24 | """Serialize used for different IBGE price adjustment indexes."""
25 | self.last_year = getattr(self, "last_year", None)
26 | year, month, value = row
27 | if value is None or month is None:
28 | return
29 |
30 | if year is None:
31 | year = ""
32 | year = year.strip() or self.last_year
33 |
34 | try:
35 | month = MONTHS[month.capitalize()]
36 | except KeyError:
37 | return
38 |
39 | value = PercentField.deserialize(f"{value}%")
40 | reference = DateField.deserialize(f"{month}/{year}")
41 |
42 | self.last_year = year
43 | yield reference, value
44 |
45 |
46 | class Inpc(IbgeAdapter):
47 | """Adapter for IBGE's INPC series."""
48 |
49 | url = "http://ftp.ibge.gov.br/Precos_Indices_de_Precos_ao_Consumidor/INPC/Serie_Historica/inpc_SerieHist.zip"
50 |
51 |
52 | class Ipca(IbgeAdapter):
53 | """Adapter for IBGE's IPCA series."""
54 |
55 | url = "http://ftp.ibge.gov.br/Precos_Indices_de_Precos_ao_Consumidor/IPCA/Serie_Historica/ipca_SerieHist.zip"
56 |
57 |
58 | class Ipca15(IbgeAdapter):
59 | """Adapter for IBGE's IPCA-15 series."""
60 |
61 | url = "http://ftp.ibge.gov.br/Precos_Indices_de_Precos_ao_Consumidor/IPCA_15/Series_Historicas/ipca-15_SerieHist.zip"
62 |
63 |
64 | class IpcaE(IbgeAdapter):
65 | """Adapter for IBGE's IPCA-E series."""
66 |
67 | url = "http://ftp.ibge.gov.br/Precos_Indices_de_Precos_ao_Consumidor/IPCA_E/Series_Historicas/ipca-e_SerieHist.zip"
68 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/adapters/igpm.py:
--------------------------------------------------------------------------------
1 | from urllib.parse import urlencode
2 | from typing import NamedTuple
3 |
4 | from calculadora_do_cidadao.adapters import Adapter
5 | from calculadora_do_cidadao.fields import DateField, PercentField
6 | from calculadora_do_cidadao.typing import MaybeIndexesGenerator
7 |
8 |
9 | URL = "https://www3.bcb.gov.br/sgspub/consultarvalores/consultarValoresSeries.do"
10 | URL_PARAMS = {
11 | "method": "consultarValores",
12 | "optSelecionaSerie": 189,
13 | "dataInicio": "30/06/1989",
14 | "selTipoArqDownload": 1,
15 | "hdOidSeriesSelecionadas": 189,
16 | "hdPaginar": "false",
17 | "bilServico": ["[SGSFW2301]"],
18 | }
19 |
20 |
21 | class Igpm(Adapter):
22 | """Adapter for FGV's IGPM series."""
23 |
24 | file_type = "html"
25 | url = f"{URL}?{urlencode(URL_PARAMS)}"
26 |
27 | COOKIES = {"dtcookie": "EB62E3A5ABDDF04A5F354D7F23CC2681|c2dzfDF8X2RlZmF1bHR8MQ"}
28 | IMPORT_KWARGS = {"encoding": "iso-8859-1", "index": 4}
29 | SHOULD_AGGREGATE = True
30 |
31 | def serialize(self, row: NamedTuple) -> MaybeIndexesGenerator:
32 | """Serialize method to discard the rows that are not valid data."""
33 | reference, value = row
34 | try:
35 | value = PercentField.deserialize(f"{value}%")
36 | reference = DateField.deserialize(reference)
37 | except ValueError:
38 | return
39 | yield reference, value
40 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/adapters/selic.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from decimal import Decimal
3 | from typing import NamedTuple
4 | from urllib.parse import urlencode
5 |
6 | from calculadora_do_cidadao.adapters import Adapter
7 | from calculadora_do_cidadao.fields import DateField
8 | from calculadora_do_cidadao.typing import MaybeIndexesGenerator
9 |
10 |
11 | URL = "https://www3.bcb.gov.br/novoselic/rest/fatoresAcumulados/pub/search"
12 | URL_PARAMS = {
13 | "parametrosOrdenacao": '[{"nome":"periodo","decrescente":false}]',
14 | "page": 1,
15 | "pageSize": 48,
16 | }
17 |
18 |
19 | class Selic(Adapter):
20 | """Adapter for Brazilian Central Bank SELIC series."""
21 |
22 | url = f"{URL}?{urlencode(URL_PARAMS)}"
23 | file_type = "json"
24 |
25 | HEADERS = {
26 | "Accept": "application/json, text/plain, */*",
27 | "Accept-Encoding": "gzip, deflate, br",
28 | }
29 | POST_DATA = (
30 | {
31 | "campoPeriodo": "mensal",
32 | "dataInicial": "",
33 | "dataFinal": "",
34 | "ano": year,
35 | "exibirMeses": True,
36 | }
37 | for year in range(date.today().year, 1996, -1)
38 | )
39 | IMPORT_KWARGS = {"json_path": ["registros"]}
40 | SHOULD_AGGREGATE = True
41 |
42 | def serialize(self, row: NamedTuple) -> MaybeIndexesGenerator:
43 | reference = DateField.deserialize(row.periodo.replace(" ", "")) # type: ignore
44 | value = Decimal(row.fator) # type: ignore
45 | yield reference, value
46 |
47 | def aggregate(self):
48 | accumulated = 1
49 | for key in sorted(self.data.keys()):
50 | self.data[key] = accumulated * self.data[key]
51 | accumulated = self.data[key]
52 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/download.py:
--------------------------------------------------------------------------------
1 | from contextlib import contextmanager
2 | from dataclasses import dataclass
3 | from pathlib import Path
4 | from tempfile import NamedTemporaryFile
5 | from typing import Callable, Iterable, Iterator, Optional, Union
6 | from urllib.parse import ParseResult, urlparse
7 | from zipfile import ZipFile
8 |
9 | from requests import Session
10 | from requests.utils import cookiejar_from_dict
11 |
12 |
13 | class DownloadMethodNotImplementedError(Exception):
14 | """To be used when the `Download` class does not have a method implemented
15 | to download a file using the protocol specified in the `url` argument."""
16 |
17 | pass
18 |
19 |
20 | @dataclass
21 | class Download:
22 | """Abstraction for the download of data from the source.
23 |
24 | It can be initialized informing that the resulting file is a Zip archive
25 | that should be unarchived.
26 |
27 | Cookies and headers are just relevant if the URL uses HTTP (and, surely
28 | both are optional).
29 |
30 | The `post_data` dictionary is used to send an HTTP POST request (instead of
31 | the default GET). If this field is a sequence of dictionaries, it will
32 | result in one request per dictionary.
33 |
34 | The `post_processing` as a bytes to bytes function that is able to edit the
35 | contents before saving it locally, allowing adapter to fix malformed
36 | documents."""
37 |
38 | url: str
39 | should_unzip: bool = False
40 | headers: Optional[dict] = None
41 | cookies: Optional[dict] = None
42 | post_data: Optional[Union[dict, Iterable[dict]]] = None
43 | post_processing: Optional[Callable[[bytes], bytes]] = None
44 |
45 | def __post_init__(self) -> None:
46 | """The initialization of this class defines the proper method to be
47 | called for download based on the protocol of the URL."""
48 | self.parsed_url: ParseResult = urlparse(self.url)
49 | self.file_name: str = Path(self.parsed_url.path).name
50 | self.https = self.http # maps HTTPS requests to HTTP method
51 |
52 | try:
53 | self.download = getattr(self, self.parsed_url.scheme)
54 | except AttributeError:
55 | error = f"No method implemented for {self.parsed_url.scheme}."
56 | raise DownloadMethodNotImplementedError(error)
57 |
58 | @staticmethod
59 | def unzip(path: Path, target: Path) -> Path:
60 | """Unzips the first file of an archive and returns its path."""
61 | with ZipFile(path) as archive:
62 | first_file, *_ = archive.namelist()
63 | target.write_bytes(archive.read(first_file))
64 |
65 | return target
66 |
67 | def http(self) -> Iterable[bytes]:
68 | """Download the source file(s) using HTTP."""
69 | session = Session()
70 |
71 | if self.cookies:
72 | session.cookies = cookiejar_from_dict(self.cookies)
73 |
74 | if isinstance(self.post_data, dict):
75 | self.post_data = (self.post_data,)
76 |
77 | def request_generator(method, kwargs=None):
78 | if kwargs is None:
79 | kwargs = ({},)
80 |
81 | for kw in kwargs:
82 | kw["url"] = self.url
83 | if self.headers:
84 | kw["headers"] = self.headers
85 |
86 | yield method(**kw)
87 |
88 | if self.post_data:
89 | send_as_json = False
90 | if self.headers:
91 | send_as_json = any("json" in v.lower() for v in self.headers.values())
92 |
93 | data_key = "json" if send_as_json else "data"
94 | params = ({data_key: data} for data in self.post_data)
95 | responses = request_generator(session.post, params)
96 | else:
97 | responses = request_generator(session.get)
98 |
99 | yield from (response.content for response in responses)
100 |
101 | @contextmanager
102 | def __call__(self) -> Iterator[Callable[[], Iterable[Path]]]:
103 | """Downloads the source file to a temporary directory and yields a
104 | generator of `pathlib.Path` with the path for the proper data file
105 | (which can be the downloaded file or the file unarchived from the
106 | downloaded one)."""
107 |
108 | def generator() -> Iterable[Path]:
109 | for contents in self.download():
110 | with NamedTemporaryFile() as tmp:
111 | path = Path(tmp.name)
112 | path.write_bytes(contents)
113 |
114 | with NamedTemporaryFile() as _unzipped:
115 | unzipped = Path(_unzipped.name)
116 | if self.should_unzip:
117 | path = self.unzip(path, unzipped)
118 |
119 | if self.post_processing:
120 | path.write_bytes(self.post_processing(path.read_bytes()))
121 |
122 | yield path
123 |
124 | yield generator
125 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/fields.py:
--------------------------------------------------------------------------------
1 | from datetime import date, datetime
2 | from decimal import Decimal
3 | from typing import Union
4 |
5 | from calculadora_do_cidadao.rows import fields
6 | from calculadora_do_cidadao.typing import Date
7 |
8 |
9 | class PercentField(fields.PercentField):
10 | """Field for reading percentage in Brazilian Portuguese format."""
11 |
12 | @classmethod
13 | def deserialize(cls, value: str) -> Decimal: # type: ignore
14 | """Deserialize decimals using a comma as a decimal separator."""
15 | value = value or ""
16 | return super().deserialize(value.replace(",", "."))
17 |
18 |
19 | class DateField(fields.DateField):
20 | """DateField which supports different date formats, including Brazilian"""
21 |
22 | INPUT_FORMATS = (
23 | "%Y-%m-%d",
24 | "%d/%m/%Y",
25 | "%Y-%m",
26 | "%m/%Y",
27 | "%b/%Y",
28 | "%b-%Y",
29 | "%b %Y",
30 | "%Y",
31 | )
32 |
33 | @classmethod
34 | def deserialize(cls, value: Date, *args, **kwargs) -> date:
35 | if isinstance(value, datetime):
36 | return value.date()
37 |
38 | if isinstance(value, date):
39 | return value
40 |
41 | if isinstance(value, (int, float)):
42 | return datetime.fromtimestamp(value).date()
43 |
44 | value = fields.Field.deserialize(value)
45 | as_str: str = fields.as_string(value)
46 | as_str = as_str[:10] # in ISO format datetime, discard chars after date
47 | for date_format in cls.INPUT_FORMATS:
48 | try:
49 | dt_object = datetime.strptime(as_str, date_format)
50 | except ValueError:
51 | continue
52 | return dt_object.date()
53 |
54 | raise ValueError(f"Cannot parse value as date: {value}")
55 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/months.py:
--------------------------------------------------------------------------------
1 | from calendar import month_abbr
2 | from itertools import chain, cycle
3 |
4 |
5 | SHORT = (
6 | "Jan",
7 | "Fev",
8 | "Mar",
9 | "Abr",
10 | "Mai",
11 | "Jun",
12 | "Jul",
13 | "Ago",
14 | "Set",
15 | "Out",
16 | "Nov",
17 | "Dez",
18 | )
19 | COMPLETE = (
20 | "Janeiro",
21 | "Fevereiro",
22 | "Março",
23 | "Abril",
24 | "Maio",
25 | "Junho",
26 | "Julho",
27 | "Agosto",
28 | "Setembro",
29 | "Outubro",
30 | "Novembro",
31 | "Dezembro",
32 | )
33 |
34 | MONTHS = dict(zip(chain(SHORT, COMPLETE), cycle(month_abbr[1:13])))
35 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module conatins only files form a third-party package that cannot be
3 | installed via `pip`:
4 | https://github.com/cuducos/calculadora-do-cidadao/issues/51
5 |
6 | We kept the copy files mostly as close as possible to what they were in their
7 | source at the commit fcf226fdc779687df81ee586a31a8acf3f38f715, just clenaing up
8 | and adapting to what is needed in order to work with this package. As as soon
9 | as Rows have a new release we can remove this module and install from `pip`:
10 |
11 | - reverting the commit that has created this file
12 | - updating Rows version in pyproject.toml
13 | """
14 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/fields.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014-2019 Álvaro Justen
2 |
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Lesser General Public License as published by
5 | # the Free Software Foundation, either version 3 of the License, or
6 | # (at your option) any later version.
7 |
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU Lesser General Public License for more details.
12 |
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # along with this program. If not, see .
15 |
16 | import binascii
17 | import datetime
18 | import json
19 | import locale
20 | import re
21 | from base64 import b64decode, b64encode
22 | from collections import OrderedDict, defaultdict
23 | from decimal import Decimal, InvalidOperation
24 | from itertools import zip_longest
25 | from unicodedata import normalize
26 |
27 |
28 | # Order matters here
29 | __all__ = [
30 | "BoolField",
31 | "IntegerField",
32 | "FloatField",
33 | "DatetimeField",
34 | "DateField",
35 | "DecimalField",
36 | "PercentField",
37 | "JSONField",
38 | "EmailField",
39 | "TextField",
40 | "BinaryField",
41 | "Field",
42 | ]
43 | NULL = ("-", "null", "none", "nil", "n/a", "na")
44 | NULL_BYTES = (b"-", b"null", b"none", b"nil", b"n/a", b"na")
45 | REGEXP_ONLY_NUMBERS = re.compile("[^0-9\-]")
46 | SHOULD_NOT_USE_LOCALE = True # This variable is changed by rows.locale_manager
47 | SLUG_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"
48 |
49 |
50 | def value_error(value, cls):
51 | value = repr(value)
52 | if len(value) > 50:
53 | value = value[:50] + "..."
54 | raise ValueError("Value '{}' can't be {}".format(value, cls.__name__))
55 |
56 |
57 | class Field(object):
58 | """Base Field class - all fields should inherit from this
59 |
60 | As the fallback for all other field types are the BinaryField, this Field
61 | actually implements what is expected in the BinaryField
62 | """
63 |
64 | TYPE = (type(None),)
65 |
66 | @classmethod
67 | def serialize(cls, value, *args, **kwargs):
68 | """Serialize a value to be exported
69 |
70 | `cls.serialize` should always return an unicode value, except for
71 | BinaryField
72 | """
73 |
74 | if value is None:
75 | value = ""
76 | return value
77 |
78 | @classmethod
79 | def deserialize(cls, value, *args, **kwargs):
80 | """Deserialize a value just after importing it
81 |
82 | `cls.deserialize` should always return a value of type `cls.TYPE` or
83 | `None`.
84 | """
85 |
86 | if isinstance(value, cls.TYPE):
87 | return value
88 | elif is_null(value):
89 | return None
90 | else:
91 | return value
92 |
93 |
94 | class BinaryField(Field):
95 | """Field class to represent byte arrays
96 |
97 | Is not locale-aware (does not need to be)
98 | """
99 |
100 | TYPE = (bytes,)
101 |
102 | @classmethod
103 | def serialize(cls, value, *args, **kwargs):
104 | if value is not None:
105 | if not isinstance(value, bytes):
106 | value_error(value, cls)
107 | else:
108 | try:
109 | return b64encode(value).decode("ascii")
110 | except (TypeError, binascii.Error):
111 | return value
112 | else:
113 | return ""
114 |
115 | @classmethod
116 | def deserialize(cls, value, *args, **kwargs):
117 | if value is not None:
118 | if isinstance(value, bytes):
119 | return value
120 | elif isinstance(value, str):
121 | try:
122 | return b64decode(value)
123 | except (TypeError, ValueError, binascii.Error):
124 | raise ValueError("Can't decode base64")
125 | else:
126 | value_error(value, cls)
127 | else:
128 | return b""
129 |
130 |
131 | class BoolField(Field):
132 | """Base class to representing boolean
133 |
134 | Is not locale-aware (if you need to, please customize by changing its
135 | attributes like `TRUE_VALUES` and `FALSE_VALUES`)
136 | """
137 |
138 | TYPE = (bool,)
139 | SERIALIZED_VALUES = {True: "true", False: "false", None: ""}
140 | TRUE_VALUES = ("true", "yes")
141 | FALSE_VALUES = ("false", "no")
142 |
143 | @classmethod
144 | def serialize(cls, value, *args, **kwargs):
145 | # TODO: should we serialize `None` as well or give it to the plugin?
146 | return cls.SERIALIZED_VALUES[value]
147 |
148 | @classmethod
149 | def deserialize(cls, value, *args, **kwargs):
150 | value = super(BoolField, cls).deserialize(value)
151 | if value is None or isinstance(value, cls.TYPE):
152 | return value
153 |
154 | value = as_string(value).lower()
155 | if value in cls.TRUE_VALUES:
156 | return True
157 | elif value in cls.FALSE_VALUES:
158 | return False
159 | else:
160 | raise ValueError("Value is not boolean")
161 |
162 |
163 | class IntegerField(Field):
164 | """Field class to represent integer
165 |
166 | Is locale-aware
167 | """
168 |
169 | TYPE = (int,)
170 |
171 | @classmethod
172 | def serialize(cls, value, *args, **kwargs):
173 | if value is None:
174 | return ""
175 |
176 | if SHOULD_NOT_USE_LOCALE:
177 | return str(value)
178 | else:
179 | grouping = kwargs.get("grouping", None)
180 | return locale.format("%d", value, grouping=grouping)
181 |
182 | @classmethod
183 | def deserialize(cls, value, *args, **kwargs):
184 | value = super(IntegerField, cls).deserialize(value)
185 | if value is None or isinstance(value, cls.TYPE):
186 | return value
187 | elif isinstance(value, float):
188 | new_value = int(value)
189 | if new_value != value:
190 | raise ValueError("It's float, not integer")
191 | else:
192 | value = new_value
193 |
194 | value = as_string(value)
195 | if value != "0" and value.startswith("0"):
196 | raise ValueError("It's string, not integer")
197 | return int(value) if SHOULD_NOT_USE_LOCALE else locale.atoi(value)
198 |
199 |
200 | class FloatField(Field):
201 | """Field class to represent float
202 |
203 | Is locale-aware
204 | """
205 |
206 | TYPE = (float,)
207 |
208 | @classmethod
209 | def serialize(cls, value, *args, **kwargs):
210 | if value is None:
211 | return ""
212 |
213 | if SHOULD_NOT_USE_LOCALE:
214 | return str(value)
215 | else:
216 | grouping = kwargs.get("grouping", None)
217 | return locale.format("%f", value, grouping=grouping)
218 |
219 | @classmethod
220 | def deserialize(cls, value, *args, **kwargs):
221 | value = super(FloatField, cls).deserialize(value)
222 | if value is None or isinstance(value, cls.TYPE):
223 | return value
224 |
225 | value = as_string(value)
226 | if SHOULD_NOT_USE_LOCALE:
227 | return float(value)
228 | else:
229 | return locale.atof(value)
230 |
231 |
232 | class DecimalField(Field):
233 | """Field class to represent decimal data (as Python's decimal.Decimal)
234 |
235 | Is locale-aware
236 | """
237 |
238 | TYPE = (Decimal,)
239 |
240 | @classmethod
241 | def serialize(cls, value, *args, **kwargs):
242 | if value is None:
243 | return ""
244 |
245 | value_as_string = str(value)
246 | if SHOULD_NOT_USE_LOCALE:
247 | return value_as_string
248 | else:
249 | grouping = kwargs.get("grouping", None)
250 | has_decimal_places = value_as_string.find(".") != -1
251 | if not has_decimal_places:
252 | string_format = "%d"
253 | else:
254 | decimal_places = len(value_as_string.split(".")[1])
255 | string_format = "%.{}f".format(decimal_places)
256 | return locale.format(string_format, value, grouping=grouping)
257 |
258 | @classmethod
259 | def deserialize(cls, value, *args, **kwargs):
260 | value = super(DecimalField, cls).deserialize(value)
261 | if value is None or isinstance(value, cls.TYPE):
262 | return value
263 | elif type(value) in (int, float):
264 | return Decimal(str(value))
265 |
266 | if SHOULD_NOT_USE_LOCALE:
267 | try:
268 | return Decimal(value)
269 | except InvalidOperation:
270 | value_error(value, cls)
271 | else:
272 | locale_vars = locale.localeconv()
273 | decimal_separator = locale_vars["decimal_point"]
274 | interesting_vars = (
275 | "decimal_point",
276 | "mon_decimal_point",
277 | "mon_thousands_sep",
278 | "negative_sign",
279 | "positive_sign",
280 | "thousands_sep",
281 | )
282 | chars = (
283 | locale_vars[x].replace(".", r"\.").replace("-", r"\-")
284 | for x in interesting_vars
285 | )
286 | interesting_chars = "".join(set(chars))
287 | regexp = re.compile(r"[^0-9{} ]".format(interesting_chars))
288 | value = as_string(value)
289 | if regexp.findall(value):
290 | value_error(value, cls)
291 |
292 | parts = [
293 | REGEXP_ONLY_NUMBERS.subn("", number)[0]
294 | for number in value.split(decimal_separator)
295 | ]
296 | if len(parts) > 2:
297 | raise ValueError("Can't deserialize with this locale.")
298 | try:
299 | value = Decimal(parts[0])
300 | if len(parts) == 2:
301 | decimal_places = len(parts[1])
302 | value = value + (Decimal(parts[1]) / (10 ** decimal_places))
303 | except InvalidOperation:
304 | value_error(value, cls)
305 | return value
306 |
307 |
308 | class PercentField(DecimalField):
309 | """Field class to represent percent values
310 |
311 | Is locale-aware (inherit this behaviour from `rows.DecimalField`)
312 | """
313 |
314 | @classmethod
315 | def serialize(cls, value, *args, **kwargs):
316 | if value is None:
317 | return ""
318 | elif value == Decimal("0"):
319 | return "0.00%"
320 |
321 | value = Decimal(str(value * 100)[:-2])
322 | value = super(PercentField, cls).serialize(value, *args, **kwargs)
323 | return "{}%".format(value)
324 |
325 | @classmethod
326 | def deserialize(cls, value, *args, **kwargs):
327 | if isinstance(value, cls.TYPE):
328 | return value
329 | elif is_null(value):
330 | return None
331 |
332 | value = as_string(value)
333 | if "%" not in value:
334 | value_error(value, cls)
335 | value = value.replace("%", "")
336 | return super(PercentField, cls).deserialize(value) / 100
337 |
338 |
339 | class DateField(Field):
340 | """Field class to represent date
341 |
342 | Is not locale-aware (does not need to be)
343 | """
344 |
345 | TYPE = (datetime.date,)
346 | INPUT_FORMAT = "%Y-%m-%d"
347 | OUTPUT_FORMAT = "%Y-%m-%d"
348 |
349 | @classmethod
350 | def serialize(cls, value, *args, **kwargs):
351 | if value is None:
352 | return ""
353 |
354 | return str(value.strftime(cls.OUTPUT_FORMAT))
355 |
356 | @classmethod
357 | def deserialize(cls, value, *args, **kwargs):
358 | value = super(DateField, cls).deserialize(value)
359 | if value is None or isinstance(value, cls.TYPE):
360 | return value
361 |
362 | value = as_string(value)
363 |
364 | dt_object = datetime.datetime.strptime(value, cls.INPUT_FORMAT)
365 | return datetime.date(dt_object.year, dt_object.month, dt_object.day)
366 |
367 |
368 | class DatetimeField(Field):
369 | """Field class to represent date-time
370 |
371 | Is not locale-aware (does not need to be)
372 | """
373 |
374 | TYPE = (datetime.datetime,)
375 | DATETIME_REGEXP = re.compile(
376 | "^([0-9]{4})-([0-9]{2})-([0-9]{2})[ T]" "([0-9]{2}):([0-9]{2}):([0-9]{2})$"
377 | )
378 |
379 | @classmethod
380 | def serialize(cls, value, *args, **kwargs):
381 | if value is None:
382 | return ""
383 |
384 | return str(value.isoformat())
385 |
386 | @classmethod
387 | def deserialize(cls, value, *args, **kwargs):
388 | value = super(DatetimeField, cls).deserialize(value)
389 | if value is None or isinstance(value, cls.TYPE):
390 | return value
391 |
392 | value = as_string(value)
393 | # TODO: may use iso8601
394 | groups = cls.DATETIME_REGEXP.findall(value)
395 | if not groups:
396 | value_error(value, cls)
397 | else:
398 | return datetime.datetime(*[int(x) for x in groups[0]])
399 |
400 |
401 | class TextField(Field):
402 | """Field class to represent unicode strings
403 |
404 | Is not locale-aware (does not need to be)
405 | """
406 |
407 | TYPE = (str,)
408 |
409 | @classmethod
410 | def deserialize(cls, value, *args, **kwargs):
411 | if value is None or isinstance(value, cls.TYPE):
412 | return value
413 | else:
414 | return as_string(value)
415 |
416 |
417 | class EmailField(TextField):
418 | """Field class to represent e-mail addresses
419 |
420 | Is not locale-aware (does not need to be)
421 | """
422 |
423 | EMAIL_REGEXP = re.compile(
424 | r"^[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]+$", flags=re.IGNORECASE
425 | )
426 |
427 | @classmethod
428 | def serialize(cls, value, *args, **kwargs):
429 | if value is None:
430 | return ""
431 |
432 | return str(value)
433 |
434 | @classmethod
435 | def deserialize(cls, value, *args, **kwargs):
436 | value = super(EmailField, cls).deserialize(value)
437 | if value is None or not value.strip():
438 | return None
439 |
440 | result = cls.EMAIL_REGEXP.findall(value)
441 | if not result:
442 | value_error(value, cls)
443 | else:
444 | return result[0]
445 |
446 |
447 | class JSONField(Field):
448 | """Field class to represent JSON-encoded strings
449 |
450 | Is not locale-aware (does not need to be)
451 | """
452 |
453 | TYPE = (list, dict)
454 |
455 | @classmethod
456 | def serialize(cls, value, *args, **kwargs):
457 | return json.dumps(value)
458 |
459 | @classmethod
460 | def deserialize(cls, value, *args, **kwargs):
461 | value = super(JSONField, cls).deserialize(value)
462 | if value is None or isinstance(value, cls.TYPE):
463 | return value
464 | else:
465 | return json.loads(value)
466 |
467 |
468 | def as_string(value):
469 | if isinstance(value, bytes):
470 | raise ValueError("Binary is not supported")
471 | elif isinstance(value, str):
472 | return value
473 | else:
474 | return str(value)
475 |
476 |
477 | def is_null(value):
478 | if value is None:
479 | return True
480 | elif type(value) is bytes:
481 | value = value.strip().lower()
482 | return not value or value in NULL_BYTES
483 | else:
484 | value_str = as_string(value).strip().lower()
485 | return not value_str or value_str in NULL
486 |
487 |
488 | def unique_values(values):
489 | result = []
490 | for value in values:
491 | if not is_null(value) and value not in result:
492 | result.append(value)
493 | return result
494 |
495 |
496 | def get_items(*indexes):
497 | """Return a callable that fetches the given indexes of an object
498 | Always return a tuple even when len(indexes) == 1.
499 |
500 | Similar to `operator.itemgetter`, but will insert `None` when the object
501 | does not have the desired index (instead of raising IndexError).
502 | """
503 | return lambda obj: tuple(
504 | obj[index] if len(obj) > index else None for index in indexes
505 | )
506 |
507 |
508 | def slug(text, separator="_", permitted_chars=SLUG_CHARS, replace_with_separator=" -_"):
509 | """Generate a slug for the `text`.
510 |
511 | >>> slug(' ÁLVARO justen% ')
512 | 'alvaro_justen'
513 | >>> slug(' ÁLVARO justen% ', separator='-')
514 | 'alvaro-justen'
515 | """
516 |
517 | text = str(text or "")
518 |
519 | # Strip non-ASCII characters
520 | # Example: u' ÁLVARO justen% ' -> ' ALVARO justen% '
521 | text = normalize("NFKD", text.strip()).encode("ascii", "ignore").decode("ascii")
522 |
523 | # Replace spaces and other chars with separator
524 | # Example: u' ALVARO justen% ' -> u'_ALVARO__justen%_'
525 | for char in replace_with_separator:
526 | text = text.replace(char, separator)
527 |
528 | # Remove non-permitted characters and put everything to lowercase
529 | # Example: u'_ALVARO__justen%_' -> u'_alvaro__justen_'
530 | text = "".join(char for char in text if char in permitted_chars).lower()
531 |
532 | # Remove double occurrencies of separator
533 | # Example: u'_alvaro__justen_' -> u'_alvaro_justen_'
534 | double_separator = separator + separator
535 | while double_separator in text:
536 | text = text.replace(double_separator, separator)
537 |
538 | # Strip separators
539 | # Example: u'_alvaro_justen_' -> u'alvaro_justen'
540 | return text.strip(separator)
541 |
542 |
543 | def make_unique_name(name, existing_names, name_format="{name}_{index}", start=2):
544 | """Return a unique name based on `name_format` and `name`."""
545 | index = start
546 | new_name = name
547 | while new_name in existing_names:
548 | new_name = name_format.format(name=name, index=index)
549 | index += 1
550 |
551 | return new_name
552 |
553 |
554 | def make_header(field_names, permit_not=False):
555 | """Return unique and slugged field names."""
556 | slug_chars = SLUG_CHARS if not permit_not else SLUG_CHARS + "^"
557 |
558 | header = [
559 | slug(field_name, permitted_chars=slug_chars) for field_name in field_names
560 | ]
561 | result = []
562 | for index, field_name in enumerate(header):
563 | if not field_name:
564 | field_name = "field_{}".format(index)
565 | elif field_name[0].isdigit():
566 | field_name = "field_{}".format(field_name)
567 |
568 | if field_name in result:
569 | field_name = make_unique_name(
570 | name=field_name, existing_names=result, start=2
571 | )
572 | result.append(field_name)
573 |
574 | return result
575 |
576 |
577 | DEFAULT_TYPES = (
578 | BoolField,
579 | IntegerField,
580 | FloatField,
581 | DecimalField,
582 | PercentField,
583 | DecimalField,
584 | DatetimeField,
585 | DateField,
586 | JSONField,
587 | TextField,
588 | BinaryField,
589 | )
590 |
591 |
592 | class TypeDetector(object):
593 | """Detect data types based on a list of Field classes"""
594 |
595 | def __init__(
596 | self,
597 | field_names=None,
598 | field_types=DEFAULT_TYPES,
599 | fallback_type=TextField,
600 | skip_indexes=None,
601 | ):
602 | self.field_names = field_names or []
603 | self.field_types = list(field_types)
604 | self.fallback_type = fallback_type
605 | self._possible_types = defaultdict(lambda: list(self.field_types))
606 | self._samples = []
607 | self._skip = skip_indexes or tuple()
608 |
609 | def check_type(self, index, value):
610 | for type_ in self._possible_types[index][:]:
611 | try:
612 | type_.deserialize(value)
613 | except (ValueError, TypeError):
614 | self._possible_types[index].remove(type_)
615 |
616 | def process_row(self, row):
617 | for index, value in enumerate(row):
618 | if index in self._skip:
619 | continue
620 | self.check_type(index, value)
621 |
622 | def feed(self, data):
623 | for row in data:
624 | self.process_row(row)
625 |
626 | def priority(self, *field_types):
627 | """Decide the priority between each possible type"""
628 |
629 | return field_types[0] if field_types else self.fallback_type
630 |
631 | @property
632 | def fields(self):
633 | possible, skip = self._possible_types, self._skip
634 |
635 | if possible:
636 | # Create a header with placeholder values for each detected column
637 | # and then join this placeholders with original header - the
638 | # original header may have less columns then the detected ones, so
639 | # we end with a full header having a name for every possible
640 | # column.
641 | placeholders = make_header(range(max(possible.keys()) + 1))
642 | header = [a or b for a, b in zip_longest(self.field_names, placeholders)]
643 | else:
644 | header = self.field_names
645 |
646 | return OrderedDict(
647 | [
648 | (
649 | field_name,
650 | self.priority(*(possible[index] if index in possible else [])),
651 | )
652 | for index, field_name in enumerate(header)
653 | if index not in skip
654 | ]
655 | )
656 |
657 |
658 | def detect_types(
659 | field_names,
660 | field_values,
661 | field_types=DEFAULT_TYPES,
662 | skip_indexes=None,
663 | type_detector=TypeDetector,
664 | fallback_type=TextField,
665 | *args,
666 | **kwargs
667 | ):
668 | """Detect column types (or "where the magic happens")"""
669 |
670 | # TODO: look strategy of csv.Sniffer.has_header
671 | # TODO: may receive 'type hints'
672 | detector = type_detector(
673 | field_names,
674 | field_types=field_types,
675 | fallback_type=fallback_type,
676 | skip_indexes=skip_indexes,
677 | )
678 | detector.feed(field_values)
679 | return detector.fields
680 |
681 |
682 | def identify_type(value):
683 | """Identify the field type for a specific value"""
684 |
685 | return detect_types(["name"], [[value]])["name"]
686 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014-2019 Álvaro Justen
2 |
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Lesser General Public License as published by
5 | # the Free Software Foundation, either version 3 of the License, or
6 | # (at your option) any later version.
7 |
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU Lesser General Public License for more details.
12 |
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # along with this program. If not, see .
15 |
16 | from . import dicts as dicts # NOQA
17 | from . import plugin_csv as csv # NOQA
18 | from . import plugin_html as html # NOQA
19 |
20 | try:
21 | from . import xls as xls
22 | except ImportError:
23 | xls = None
24 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/plugins/dicts.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014-2019 Álvaro Justen
2 |
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Lesser General Public License as published by
5 | # the Free Software Foundation, either version 3 of the License, or
6 | # (at your option) any later version.
7 |
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU Lesser General Public License for more details.
12 |
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # along with this program. If not, see .
15 |
16 | from itertools import chain
17 |
18 | from calculadora_do_cidadao.rows.plugins.utils import create_table
19 |
20 |
21 | def import_from_dicts(data, samples=None, *args, **kwargs):
22 | """Import data from a iterable of dicts
23 |
24 | The algorithm will use the `samples` first `dict`s to determine the field
25 | names (if `samples` is `None` all `dict`s will be used).
26 | """
27 |
28 | data = iter(data)
29 |
30 | cached_rows, headers = [], []
31 | for index, row in enumerate(data, start=1):
32 | cached_rows.append(row)
33 |
34 | for key in row.keys():
35 | if key not in headers:
36 | headers.append(key)
37 |
38 | if samples and index == samples:
39 | break
40 |
41 | data_rows = (
42 | [row.get(header, None) for header in headers]
43 | for row in chain(cached_rows, data)
44 | )
45 |
46 | kwargs["samples"] = samples
47 | meta = {"imported_from": "dicts"}
48 | return create_table(chain([headers], data_rows), meta=meta, *args, **kwargs)
49 |
50 |
51 | def export_to_dicts(table, *args, **kwargs):
52 | """Export a `rows.Table` to a list of dicts"""
53 | field_names = table.field_names
54 | return [{key: getattr(row, key) for key in field_names} for row in table]
55 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/plugins/plugin_csv.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014-2019 Álvaro Justen
2 |
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Lesser General Public License as published by
5 | # the Free Software Foundation, either version 3 of the License, or
6 | # (at your option) any later version.
7 |
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU Lesser General Public License for more details.
12 |
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # along with this program. If not, see .
15 |
16 | from io import BytesIO
17 |
18 | import unicodecsv
19 |
20 | from calculadora_do_cidadao.rows.plugins.utils import (
21 | create_table,
22 | get_filename_and_fobj,
23 | ipartition,
24 | serialize,
25 | )
26 |
27 | sniffer = unicodecsv.Sniffer()
28 | # Some CSV files have more than 128kB of data in a cell, so we force this value
29 | # to be greater (16MB).
30 | # TODO: check if it impacts in memory usage.
31 | # TODO: may add option to change it by passing a parameter to import/export.
32 | unicodecsv.field_size_limit(16777216)
33 |
34 |
35 | def fix_dialect(dialect):
36 | if not dialect.doublequote and dialect.escapechar is None:
37 | dialect.doublequote = True
38 |
39 | if dialect.quoting == unicodecsv.QUOTE_MINIMAL and dialect.quotechar == "'":
40 | # Python csv's Sniffer seems to detect a wrong quotechar when
41 | # quoting is minimal
42 | dialect.quotechar = '"'
43 |
44 |
45 | def discover_dialect(sample, encoding, delimiters=(",", ";", "\t", "|")):
46 | """Discover a CSV dialect based on a sample size.
47 |
48 | `sample` must be `bytes` and an `encoding must be provided (Python 3)
49 | """
50 | # `csv.Sniffer.sniff` on Python 3 requires a `str` object. If we take a
51 | # sample from the `bytes` object and it happens to end in the middle of
52 | # a character which has more than one byte, we're going to have an
53 | # `UnicodeDecodeError`. This `while` avoid this problem by removing the
54 | # last byte until this error stops.
55 | finished = False
56 | while not finished:
57 | try:
58 | decoded = sample.decode(encoding)
59 |
60 | except UnicodeDecodeError as exception:
61 | _, _, _, pos, error = exception.args
62 | if error == "unexpected end of data" and pos == len(sample):
63 | sample = sample[:-1]
64 | else:
65 | raise
66 | else:
67 | finished = True
68 |
69 | try:
70 | dialect = sniffer.sniff(decoded, delimiters=delimiters)
71 |
72 | except unicodecsv.Error: # Couldn't detect: fall back to 'excel'
73 | dialect = unicodecsv.excel
74 |
75 | fix_dialect(dialect)
76 | return dialect
77 |
78 |
79 | def read_sample(fobj, sample):
80 | """Read `sample` bytes from `fobj` and return the cursor to where it was."""
81 | cursor = fobj.tell()
82 | data = fobj.read(sample)
83 | fobj.seek(cursor)
84 | return data
85 |
86 |
87 | def import_from_csv(
88 | filename_or_fobj,
89 | encoding="utf-8",
90 | dialect=None,
91 | sample_size=262144,
92 | *args,
93 | **kwargs
94 | ):
95 | """Import data from a CSV file (automatically detects dialect).
96 |
97 | If a file-like object is provided it MUST be in binary mode, like in
98 | `open(filename, mode='rb')`.
99 | """
100 | filename, fobj = get_filename_and_fobj(filename_or_fobj, mode="rb")
101 |
102 | if dialect is None:
103 | dialect = discover_dialect(
104 | sample=read_sample(fobj, sample_size), encoding=encoding
105 | )
106 |
107 | reader = unicodecsv.reader(fobj, encoding=encoding, dialect=dialect)
108 |
109 | meta = {"imported_from": "csv", "filename": filename, "encoding": encoding}
110 | return create_table(reader, meta=meta, *args, **kwargs)
111 |
112 |
113 | def export_to_csv(
114 | table,
115 | filename_or_fobj=None,
116 | encoding="utf-8",
117 | dialect=unicodecsv.excel,
118 | batch_size=100,
119 | callback=None,
120 | *args,
121 | **kwargs
122 | ):
123 | """Export a `rows.Table` to a CSV file.
124 |
125 |
126 | If a file-like object is provided it MUST be in binary mode, like in
127 | `open(filename, mode='wb')`.
128 | If not filename/fobj is provided, the function returns a string with CSV
129 | contents.
130 | """
131 | # TODO: will work only if table.fields is OrderedDict
132 | # TODO: should use fobj? What about creating a method like json.dumps?
133 |
134 | if filename_or_fobj is not None:
135 | _, fobj = get_filename_and_fobj(filename_or_fobj, mode="wb")
136 | else:
137 | fobj = BytesIO()
138 |
139 | # TODO: may use `io.BufferedWriter` instead of `ipartition` so user can
140 | # choose the real size (in Bytes) when to flush to the file system, instead
141 | # number of rows
142 | writer = unicodecsv.writer(fobj, encoding=encoding, dialect=dialect)
143 |
144 | if callback is None:
145 | for batch in ipartition(serialize(table, *args, **kwargs), batch_size):
146 | writer.writerows(batch)
147 |
148 | else:
149 | serialized = serialize(table, *args, **kwargs)
150 | writer.writerow(next(serialized)) # First, write the header
151 | total = 0
152 | for batch in ipartition(serialized, batch_size):
153 | writer.writerows(batch)
154 | total += len(batch)
155 | callback(total)
156 |
157 | if filename_or_fobj is not None:
158 | fobj.flush()
159 | return fobj
160 | else:
161 | fobj.seek(0)
162 | result = fobj.read()
163 | fobj.close()
164 | return result
165 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/plugins/plugin_html.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014-2019 Álvaro Justen
2 |
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Lesser General Public License as published by
5 | # the Free Software Foundation, either version 3 of the License, or
6 | # (at your option) any later version.
7 |
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU Lesser General Public License for more details.
12 |
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # along with this program. If not, see .
15 |
16 | try:
17 | from lxml.etree import strip_tags
18 | from lxml.etree import tostring as to_string
19 | from lxml.html import document_fromstring
20 | except ImportError:
21 | has_lxml = False
22 | else:
23 | has_lxml = True
24 |
25 | from calculadora_do_cidadao.rows.plugins.utils import (
26 | create_table,
27 | export_data,
28 | get_filename_and_fobj,
29 | serialize,
30 | )
31 |
32 | try:
33 | from HTMLParser import HTMLParser # Python 2
34 |
35 | unescape = HTMLParser().unescape
36 | except:
37 | import html # Python 3
38 |
39 | unescape = html.unescape
40 |
41 |
42 | try:
43 | from html import escape # Python 3
44 | except:
45 | from cgi import escape # Python 2
46 |
47 |
48 | def _get_content(element):
49 | return (element.text if element.text is not None else "") + "".join(
50 | to_string(child, encoding=str) for child in element.getchildren()
51 | )
52 |
53 |
54 | def _get_row(row, column_tag, preserve_html, properties):
55 | if not preserve_html:
56 | data = list(map(_extract_node_text, row.xpath(column_tag)))
57 | else:
58 | data = list(map(_get_content, row.xpath(column_tag)))
59 |
60 | if properties:
61 | data.append(dict(row.attrib))
62 |
63 | return data
64 |
65 |
66 | def import_from_html(
67 | filename_or_fobj,
68 | encoding="utf-8",
69 | index=0,
70 | ignore_colspan=True,
71 | preserve_html=False,
72 | properties=False,
73 | table_tag="table",
74 | row_tag="tr",
75 | column_tag="td|th",
76 | *args,
77 | **kwargs
78 | ):
79 | """Return rows.Table from HTML file."""
80 | filename, fobj = get_filename_and_fobj(filename_or_fobj, mode="rb")
81 | html = fobj.read().decode(encoding)
82 | html_tree = document_fromstring(html)
83 | tables = html_tree.xpath("//{}".format(table_tag))
84 | table = tables[index]
85 |
86 | strip_tags(table, "thead")
87 | strip_tags(table, "tbody")
88 | row_elements = table.xpath(row_tag)
89 |
90 | table_rows = [
91 | _get_row(
92 | row,
93 | column_tag=column_tag,
94 | preserve_html=preserve_html,
95 | properties=properties,
96 | )
97 | for row in row_elements
98 | ]
99 |
100 | if properties:
101 | table_rows[0][-1] = "properties"
102 |
103 | if preserve_html and kwargs.get("fields", None) is None:
104 | # The field names will be the first table row, so we need to strip HTML
105 | # from it even if `preserve_html` is `True` (it's `True` only for rows,
106 | # not for the header).
107 | table_rows[0] = list(map(_extract_node_text, row_elements[0]))
108 |
109 | if ignore_colspan:
110 | max_columns = max(map(len, table_rows))
111 | table_rows = [row for row in table_rows if len(row) == max_columns]
112 |
113 | meta = {"imported_from": "html", "filename": filename, "encoding": encoding}
114 | return create_table(table_rows, meta=meta, *args, **kwargs)
115 |
116 |
117 | def export_to_html(table, filename_or_fobj=None, encoding="utf-8", *args, **kwargs):
118 | """Export and return rows.Table data to HTML file."""
119 | serialized_table = serialize(table, *args, **kwargs)
120 | fields = next(serialized_table)
121 | result = ["\n\n", " \n", " \n"]
122 | header = [" {} | \n".format(field) for field in fields]
123 | result.extend(header)
124 | result.extend(["
\n", " \n", "\n", " \n", "\n"])
125 | for index, row in enumerate(serialized_table, start=1):
126 | css_class = "odd" if index % 2 == 1 else "even"
127 | result.append(' \n'.format(css_class))
128 | for value in row:
129 | result.extend([" ", escape(value), " | \n"])
130 | result.append("
\n\n")
131 | result.append(" \n\n
\n")
132 | html = "".join(result).encode(encoding)
133 |
134 | return export_data(filename_or_fobj, html, mode="wb")
135 |
136 |
137 | def _extract_node_text(node):
138 | """Extract text from a given lxml node."""
139 | texts = map(str.strip, map(str, map(unescape, node.xpath(".//text()"))))
140 | return " ".join(text for text in texts if text)
141 |
142 |
143 | def count_tables(filename_or_fobj, encoding="utf-8", table_tag="table"):
144 | """Read a file passed by arg and return your table HTML tag count."""
145 | filename, fobj = get_filename_and_fobj(filename_or_fobj)
146 | html = fobj.read().decode(encoding)
147 | html_tree = document_fromstring(html)
148 | tables = html_tree.xpath("//{}".format(table_tag))
149 | return len(tables)
150 |
151 |
152 | def tag_to_dict(html):
153 | """Extract tag's attributes into a `dict`."""
154 | element = document_fromstring(html).xpath("//html/body/child::*")[0]
155 | attributes = dict(element.attrib)
156 | attributes["text"] = element.text_content()
157 | return attributes
158 |
159 |
160 | def extract_text(html):
161 | """Extract text from a given HTML."""
162 | return _extract_node_text(document_fromstring(html))
163 |
164 |
165 | def extract_links(html):
166 | """Extract the href values from a given HTML (returns a list of strings)."""
167 | return document_fromstring(html).xpath(".//@href")
168 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/plugins/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014-2019 Álvaro Justen
2 |
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Lesser General Public License as published by
5 | # the Free Software Foundation, either version 3 of the License, or
6 | # (at your option) any later version.
7 |
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU Lesser General Public License for more details.
12 |
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # along with this program. If not, see .
15 |
16 | from collections import OrderedDict
17 | from collections.abc import Iterator
18 | from itertools import chain, islice
19 |
20 | # 'slug' and 'make_unique_name' are required here to maintain backwards compatibility
21 | from calculadora_do_cidadao.rows.fields import (
22 | TextField,
23 | detect_types,
24 | get_items, # NOQA
25 | make_header,
26 | make_unique_name,
27 | slug,
28 | )
29 | from calculadora_do_cidadao.rows.table import FlexibleTable, Table
30 |
31 |
32 | def ipartition(iterable, partition_size):
33 | if not isinstance(iterable, Iterator):
34 | iterator = iter(iterable)
35 | else:
36 | iterator = iterable
37 |
38 | finished = False
39 | while not finished:
40 | data = []
41 | for _ in range(partition_size):
42 | try:
43 | data.append(next(iterator))
44 | except StopIteration:
45 | finished = True
46 | break
47 | if data:
48 | yield data
49 |
50 |
51 | def get_filename_and_fobj(filename_or_fobj, mode="r", dont_open=False):
52 | if getattr(filename_or_fobj, "read", None) is not None:
53 | fobj = filename_or_fobj
54 | filename = getattr(fobj, "name", None)
55 | else:
56 | fobj = open(filename_or_fobj, mode=mode) if not dont_open else None
57 | filename = filename_or_fobj
58 |
59 | return filename, fobj
60 |
61 |
62 | def create_table(
63 | data,
64 | meta=None,
65 | fields=None,
66 | skip_header=True,
67 | import_fields=None,
68 | samples=None,
69 | force_types=None,
70 | max_rows=None,
71 | *args,
72 | **kwargs
73 | ):
74 | """Create a rows.Table object based on data rows and some configurations
75 |
76 | - `skip_header` is only used if `fields` is set
77 | - `samples` is only used if `fields` is `None`. If samples=None, all data
78 | is filled in memory - use with caution.
79 | - `force_types` is only used if `fields` is `None`
80 | - `import_fields` can be used either if `fields` is set or not, the
81 | resulting fields will seek its order
82 | - `fields` must always be in the same order as the data
83 | """
84 |
85 | table_rows = iter(data)
86 | force_types = force_types or {}
87 | if import_fields is not None:
88 | import_fields = make_header(import_fields)
89 |
90 | # TODO: test max_rows
91 | if fields is None: # autodetect field types
92 | # TODO: may add `type_hints` parameter so autodetection can be easier
93 | # (plugins may specify some possible field types).
94 | header = make_header(next(table_rows))
95 |
96 | if samples is not None:
97 | sample_rows = list(islice(table_rows, 0, samples))
98 | table_rows = chain(sample_rows, table_rows)
99 | else:
100 | if max_rows is not None and max_rows > 0:
101 | sample_rows = table_rows = list(islice(table_rows, max_rows))
102 | else:
103 | sample_rows = table_rows = list(table_rows)
104 |
105 | # Detect field types using only the desired columns
106 | detected_fields = detect_types(
107 | header,
108 | sample_rows,
109 | skip_indexes=[
110 | index
111 | for index, field in enumerate(header)
112 | if field in force_types or field not in (import_fields or header)
113 | ],
114 | *args,
115 | **kwargs
116 | )
117 | # Check if any field was added during detecting process
118 | new_fields = [
119 | field_name
120 | for field_name in detected_fields.keys()
121 | if field_name not in header
122 | ]
123 | # Finally create the `fields` with both header and new field names,
124 | # based on detected fields `and force_types`
125 | fields = OrderedDict(
126 | [
127 | (field_name, detected_fields.get(field_name, TextField))
128 | for field_name in header + new_fields
129 | ]
130 | )
131 | fields.update(force_types)
132 |
133 | # Update `header` and `import_fields` based on new `fields`
134 | header = list(fields.keys())
135 | if import_fields is None:
136 | import_fields = header
137 |
138 | else: # using provided field types
139 | if not isinstance(fields, OrderedDict):
140 | raise ValueError("`fields` must be an `OrderedDict`")
141 |
142 | if skip_header:
143 | # If we're skipping the header probably this row is not trustable
144 | # (can be data or garbage).
145 | next(table_rows)
146 |
147 | header = make_header(list(fields.keys()))
148 | if import_fields is None:
149 | import_fields = header
150 |
151 | fields = OrderedDict(
152 | [(field_name, fields[key]) for field_name, key in zip(header, fields)]
153 | )
154 |
155 | diff = set(import_fields) - set(header)
156 | if diff:
157 | field_names = ", ".join('"{}"'.format(field) for field in diff)
158 | raise ValueError("Invalid field names: {}".format(field_names))
159 | fields = OrderedDict(
160 | [(field_name, fields[field_name]) for field_name in import_fields]
161 | )
162 |
163 | get_row = get_items(*map(header.index, import_fields))
164 | table = Table(fields=fields, meta=meta)
165 | if max_rows is not None and max_rows > 0:
166 | table_rows = islice(table_rows, max_rows)
167 | table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows)
168 |
169 | return table
170 |
171 |
172 | def prepare_to_export(table, export_fields=None, *args, **kwargs):
173 | # TODO: optimize for more used cases (export_fields=None)
174 | table_type = type(table)
175 | if table_type not in (FlexibleTable, Table):
176 | raise ValueError("Table type not recognized")
177 |
178 | if export_fields is None:
179 | # we use already slugged-fieldnames
180 | export_fields = table.field_names
181 | else:
182 | # we need to slug all the field names
183 | export_fields = make_header(export_fields)
184 |
185 | table_field_names = table.field_names
186 | diff = set(export_fields) - set(table_field_names)
187 | if diff:
188 | field_names = ", ".join('"{}"'.format(field) for field in diff)
189 | raise ValueError("Invalid field names: {}".format(field_names))
190 |
191 | yield export_fields
192 |
193 | if table_type is Table:
194 | field_indexes = list(map(table_field_names.index, export_fields))
195 | for row in table._rows:
196 | yield [row[field_index] for field_index in field_indexes]
197 | elif table_type is FlexibleTable:
198 | for row in table._rows:
199 | yield [row[field_name] for field_name in export_fields]
200 |
201 |
202 | def serialize(table, *args, **kwargs):
203 | prepared_table = prepare_to_export(table, *args, **kwargs)
204 |
205 | field_names = next(prepared_table)
206 | yield field_names
207 |
208 | field_types = [table.fields[field_name] for field_name in field_names]
209 | for row in prepared_table:
210 | yield [
211 | field_type.serialize(value, *args, **kwargs)
212 | for value, field_type in zip(row, field_types)
213 | ]
214 |
215 |
216 | def export_data(filename_or_fobj, data, mode="w"):
217 | """Return the object ready to be exported or only data if filename_or_fobj is not passed."""
218 | if filename_or_fobj is not None:
219 | _, fobj = get_filename_and_fobj(filename_or_fobj, mode=mode)
220 | fobj.write(data)
221 | fobj.flush()
222 | return fobj
223 | else:
224 | return data
225 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/plugins/xls.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014-2019 Álvaro Justen
2 |
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Lesser General Public License as published by
5 | # the Free Software Foundation, either version 3 of the License, or
6 | # (at your option) any later version.
7 |
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU Lesser General Public License for more details.
12 |
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # along with this program. If not, see .
15 |
16 | import datetime
17 | import os
18 | from io import BytesIO
19 |
20 | import xlrd
21 | import xlwt
22 |
23 | import calculadora_do_cidadao.rows.fields as fields
24 | from calculadora_do_cidadao.rows.plugins.utils import (
25 | create_table,
26 | get_filename_and_fobj,
27 | prepare_to_export,
28 | )
29 |
30 | CELL_TYPES = {
31 | xlrd.XL_CELL_BLANK: fields.TextField,
32 | xlrd.XL_CELL_DATE: fields.DatetimeField,
33 | xlrd.XL_CELL_ERROR: None,
34 | xlrd.XL_CELL_TEXT: fields.TextField,
35 | xlrd.XL_CELL_BOOLEAN: fields.BoolField,
36 | xlrd.XL_CELL_EMPTY: None,
37 | xlrd.XL_CELL_NUMBER: fields.FloatField,
38 | }
39 |
40 |
41 | # TODO: add more formatting styles for other types such as currency
42 | # TODO: styles may be influenced by locale
43 | FORMATTING_STYLES = {
44 | fields.DateField: xlwt.easyxf(num_format_str="yyyy-mm-dd"),
45 | fields.DatetimeField: xlwt.easyxf(num_format_str="yyyy-mm-dd hh:mm:ss"),
46 | fields.PercentField: xlwt.easyxf(num_format_str="0.00%"),
47 | }
48 |
49 |
50 | def _python_to_xls(field_types):
51 | def convert_value(field_type, value):
52 | data = {}
53 | if field_type in FORMATTING_STYLES:
54 | data["style"] = FORMATTING_STYLES[field_type]
55 |
56 | if field_type in (
57 | fields.BinaryField,
58 | fields.BoolField,
59 | fields.DateField,
60 | fields.DatetimeField,
61 | fields.DecimalField,
62 | fields.FloatField,
63 | fields.IntegerField,
64 | fields.PercentField,
65 | fields.TextField,
66 | ):
67 | return value, data
68 |
69 | else: # don't know this field
70 | return field_type.serialize(value), data
71 |
72 | def convert_row(row):
73 | return [
74 | convert_value(field_type, value)
75 | for field_type, value in zip(field_types, row)
76 | ]
77 |
78 | return convert_row
79 |
80 |
81 | def cell_value(sheet, row, col):
82 | """Return the cell value of the table passed by argument, based in row and column."""
83 | cell = sheet.cell(row, col)
84 | field_type = CELL_TYPES[cell.ctype]
85 |
86 | # TODO: this approach will not work if using locale
87 | value = cell.value
88 |
89 | if field_type is None:
90 | return None
91 |
92 | elif field_type is fields.TextField:
93 | if cell.ctype != xlrd.XL_CELL_BLANK:
94 | return value
95 | else:
96 | return ""
97 |
98 | elif field_type is fields.DatetimeField:
99 | if value == 0.0:
100 | return None
101 |
102 | try:
103 | time_tuple = xlrd.xldate_as_tuple(value, sheet.book.datemode)
104 | except xlrd.xldate.XLDateTooLarge:
105 | return None
106 | value = field_type.serialize(datetime.datetime(*time_tuple))
107 | return value.split("T00:00:00")[0]
108 |
109 | elif field_type is fields.BoolField:
110 | if value == 0:
111 | return False
112 | elif value == 1:
113 | return True
114 |
115 | elif cell.xf_index is None:
116 | return value # TODO: test
117 |
118 | else:
119 | book = sheet.book
120 | xf = book.xf_list[cell.xf_index]
121 | fmt = book.format_map[xf.format_key]
122 |
123 | if fmt.format_str.endswith("%"):
124 | # TODO: we may optimize this approach: we're converting to string
125 | # and the library is detecting the type when we could just say to
126 | # the library this value is PercentField
127 |
128 | if value is not None:
129 | try:
130 | decimal_places = len(fmt.format_str[:-1].split(".")[-1])
131 | except IndexError:
132 | decimal_places = 2
133 | return "{}%".format(str(round(value * 100, decimal_places)))
134 | else:
135 | return None
136 |
137 | elif type(value) == float and int(value) == value:
138 | return int(value)
139 |
140 | else:
141 | return value
142 |
143 |
144 | def get_table_start(sheet):
145 | empty_cell_type = xlrd.empty_cell.ctype
146 | start_column, start_row = 0, 0
147 | for col in range(sheet.ncols):
148 | if any(cell for cell in sheet.col(col) if cell.ctype != empty_cell_type):
149 | start_column = col
150 | break
151 | for row in range(sheet.nrows):
152 | if any(cell for cell in sheet.row(row) if cell.ctype != empty_cell_type):
153 | start_row = row
154 | break
155 | return start_row, start_column
156 |
157 |
158 | def import_from_xls(
159 | filename_or_fobj,
160 | sheet_name=None,
161 | sheet_index=0,
162 | start_row=None,
163 | start_column=None,
164 | end_row=None,
165 | end_column=None,
166 | *args,
167 | **kwargs
168 | ):
169 | """Return a rows.Table created from imported XLS file."""
170 |
171 | filename, _ = get_filename_and_fobj(filename_or_fobj, mode="rb")
172 | book = xlrd.open_workbook(
173 | filename, formatting_info=True, logfile=open(os.devnull, mode="w")
174 | )
175 |
176 | if sheet_name is not None:
177 | sheet = book.sheet_by_name(sheet_name)
178 | else:
179 | sheet = book.sheet_by_index(sheet_index)
180 | # TODO: may re-use Excel data types
181 |
182 | # Get header and rows
183 | # xlrd library reads rows and columns starting from 0 and ending on
184 | # sheet.nrows/ncols - 1. rows accepts the same pattern
185 | # The xlrd library reads rows and columns starting from 0 and ending on
186 | # sheet.nrows/ncols - 1. rows also uses 0-based indexes, so no
187 | # transformation is needed
188 | min_row, min_column = get_table_start(sheet)
189 | max_row, max_column = sheet.nrows - 1, sheet.ncols - 1
190 | # TODO: consider adding a parameter `ignore_padding=True` and when it's
191 | # True, consider `start_row` starting from `min_row` and `start_column`
192 | # starting from `min_col`.
193 | start_row = max(start_row if start_row is not None else min_row, min_row)
194 | end_row = min(end_row if end_row is not None else max_row, max_row)
195 | start_column = max(
196 | start_column if start_column is not None else min_column, min_column
197 | )
198 | end_column = min(end_column if end_column is not None else max_column, max_column)
199 |
200 | table_rows = [
201 | [
202 | cell_value(sheet, row_index, column_index)
203 | for column_index in range(start_column, end_column + 1)
204 | ]
205 | for row_index in range(start_row, end_row + 1)
206 | ]
207 |
208 | meta = {"imported_from": "xls", "filename": filename, "sheet_name": sheet.name}
209 | return create_table(table_rows, meta=meta, *args, **kwargs)
210 |
211 |
212 | def export_to_xls(table, filename_or_fobj=None, sheet_name="Sheet1", *args, **kwargs):
213 | """Export the rows.Table to XLS file and return the saved file."""
214 | work_book = xlwt.Workbook()
215 | sheet = work_book.add_sheet(sheet_name)
216 |
217 | prepared_table = prepare_to_export(table, *args, **kwargs)
218 |
219 | field_names = next(prepared_table)
220 | for column_index, field_name in enumerate(field_names):
221 | sheet.write(0, column_index, field_name)
222 |
223 | _convert_row = _python_to_xls([table.fields.get(field) for field in field_names])
224 | for row_index, row in enumerate(prepared_table, start=1):
225 | for column_index, (value, data) in enumerate(_convert_row(row)):
226 | sheet.write(row_index, column_index, value, **data)
227 |
228 | if filename_or_fobj is not None:
229 | _, fobj = get_filename_and_fobj(filename_or_fobj, mode="wb")
230 | work_book.save(fobj)
231 | fobj.flush()
232 | return fobj
233 | else:
234 | fobj = BytesIO()
235 | work_book.save(fobj)
236 | fobj.seek(0)
237 | result = fobj.read()
238 | fobj.close()
239 | return result
240 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/rows/table.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014-2019 Álvaro Justen
2 |
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Lesser General Public License as published by
5 | # the Free Software Foundation, either version 3 of the License, or
6 | # (at your option) any later version.
7 |
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU Lesser General Public License for more details.
12 |
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # along with this program. If not, see .
15 |
16 | import os
17 | from collections import OrderedDict, namedtuple
18 | from collections.abc import MutableSequence, Sized
19 | from operator import itemgetter
20 |
21 |
22 | class Table(MutableSequence):
23 | def __init__(self, fields, meta=None):
24 | from calculadora_do_cidadao.rows.plugins import utils
25 |
26 | # TODO: should we really use OrderedDict here?
27 | # TODO: should use slug on each field name automatically or inside each
28 | # plugin?
29 | self.fields = OrderedDict(
30 | [
31 | (utils.slug(field_name), field_type)
32 | for field_name, field_type in OrderedDict(fields).items()
33 | ]
34 | )
35 |
36 | # TODO: should be able to customize row return type (namedtuple, dict
37 | # etc.)
38 | self.Row = namedtuple("Row", self.field_names)
39 | self._rows = []
40 | self.meta = dict(meta) if meta is not None else {}
41 |
42 | def _repr_html_(self):
43 | import rows.plugins
44 |
45 | return rows.plugins.html.export_to_html(self).decode("utf-8")
46 |
47 | def head(self, n=10):
48 | table = Table(fields=self.fields, meta=self.meta)
49 | table._rows = self._rows[:n]
50 | return table
51 |
52 | def tail(self, n=10):
53 | table = Table(fields=self.fields, meta=self.meta)
54 | table._rows = self._rows[-n:]
55 | return table
56 |
57 | @property
58 | def field_names(self):
59 | return list(self.fields.keys())
60 |
61 | @property
62 | def field_types(self):
63 | return list(self.fields.values())
64 |
65 | @property
66 | def name(self):
67 | """Define table name based on its metadata (filename used on import)
68 |
69 | If `filename` is not available, return `table1`.
70 | """
71 |
72 | from rows.plugins import utils
73 |
74 | # TODO: may try read meta['name'] also (some plugins may set it)
75 | name = os.path.basename(self.meta.get("filename", "table1"))
76 | return utils.slug(os.path.splitext(name)[0])
77 |
78 | def __repr__(self):
79 | length = len(self._rows) if isinstance(self._rows, Sized) else "?"
80 |
81 | imported = ""
82 | if "imported_from" in self.meta:
83 | imported = " (from {})".format(self.meta["imported_from"])
84 |
85 | return "".format(
86 | imported, len(self.fields), length
87 | )
88 |
89 | def _make_row(self, row):
90 | # TODO: should be able to customize row type (namedtuple, dict etc.)
91 | return [
92 | field_type.deserialize(row.get(field_name, None))
93 | for field_name, field_type in self.fields.items()
94 | ]
95 |
96 | def append(self, row):
97 | """Add a row to the table. Should be a dict"""
98 |
99 | self._rows.append(self._make_row(row))
100 |
101 | def __len__(self):
102 | return len(self._rows)
103 |
104 | def __getitem__(self, key):
105 | key_type = type(key)
106 | if key_type == int:
107 | return self.Row(*self._rows[key])
108 | elif key_type == slice:
109 | return [self.Row(*row) for row in self._rows[key]]
110 | elif key_type is str:
111 | try:
112 | field_index = self.field_names.index(key)
113 | except ValueError:
114 | raise KeyError(key)
115 |
116 | # TODO: should change the line below to return a generator exp?
117 | return [row[field_index] for row in self._rows]
118 | else:
119 | raise ValueError("Unsupported key type: {}".format(type(key).__name__))
120 |
121 | def __setitem__(self, key, value):
122 | key_type = type(key)
123 | if key_type == int:
124 | self._rows[key] = self._make_row(value)
125 | elif key_type is str:
126 | from rows import fields
127 | from rows.plugins import utils
128 |
129 | values = list(value) # I'm not lazy, sorry
130 | if len(values) != len(self):
131 | raise ValueError(
132 | "Values length ({}) should be the same as "
133 | "Table length ({})".format(len(values), len(self))
134 | )
135 |
136 | field_name = utils.slug(key)
137 | is_new_field = field_name not in self.field_names
138 | field_type = fields.detect_types(
139 | [field_name], [[value] for value in values]
140 | )[field_name]
141 | self.fields[field_name] = field_type
142 | self.Row = namedtuple("Row", self.field_names)
143 |
144 | if is_new_field:
145 | for row, value in zip(self._rows, values):
146 | row.append(field_type.deserialize(value))
147 | else:
148 | field_index = self.field_names.index(field_name)
149 | for row, value in zip(self._rows, values):
150 | row[field_index] = field_type.deserialize(value)
151 | else:
152 | raise ValueError("Unsupported key type: {}".format(type(key).__name__))
153 |
154 | def __delitem__(self, key):
155 | key_type = type(key)
156 | if key_type == int:
157 | del self._rows[key]
158 | elif key_type is str:
159 | try:
160 | field_index = self.field_names.index(key)
161 | except ValueError:
162 | raise KeyError(key)
163 |
164 | del self.fields[key]
165 | self.Row = namedtuple("Row", self.field_names)
166 | for row in self._rows:
167 | row.pop(field_index)
168 | else:
169 | raise ValueError("Unsupported key type: {}".format(type(key).__name__))
170 |
171 | def insert(self, index, row):
172 | self._rows.insert(index, self._make_row(row))
173 |
174 | def __radd__(self, other):
175 | if other == 0:
176 | return self
177 | raise ValueError()
178 |
179 | def __iadd__(self, other):
180 | return self + other
181 |
182 | def __add__(self, other):
183 | if other == 0:
184 | return self
185 |
186 | if not isinstance(self, type(other)) or self.fields != other.fields:
187 | raise ValueError("Tables have incompatible fields")
188 | else:
189 | table = Table(fields=self.fields)
190 | table._rows = self._rows + other._rows
191 | return table
192 |
193 | def order_by(self, key):
194 | # TODO: implement locale
195 | # TODO: implement for more than one key
196 | reverse = False
197 | if key.startswith("-"):
198 | key = key[1:]
199 | reverse = True
200 |
201 | field_names = self.field_names
202 | if key not in field_names:
203 | raise ValueError('Field "{}" does not exist'.format(key))
204 |
205 | key_index = field_names.index(key)
206 | self._rows.sort(key=itemgetter(key_index), reverse=reverse)
207 |
208 |
209 | class FlexibleTable(Table):
210 | def __init__(self, fields=None, meta=None):
211 | if fields is None:
212 | fields = {}
213 | super(FlexibleTable, self).__init__(fields, meta)
214 |
215 | def __getitem__(self, key):
216 | if isinstance(key, int):
217 | return self.Row(**self._rows[key])
218 | elif isinstance(key, slice):
219 | return [self.Row(**row) for row in self._rows[key]]
220 | else:
221 | raise ValueError("Unsupported key type: {}".format(type(key).__name__))
222 |
223 | def _add_field(self, field_name, field_type):
224 | self.fields[field_name] = field_type
225 | self.Row = namedtuple("Row", self.field_names)
226 |
227 | def _make_row(self, row):
228 | from rows import fields
229 |
230 | for field_name in row.keys():
231 | if field_name not in self.field_names:
232 | self._add_field(field_name, fields.identify_type(row[field_name]))
233 |
234 | return {
235 | field_name: field_type.deserialize(row.get(field_name, None))
236 | for field_name, field_type in self.fields.items()
237 | }
238 |
239 | def insert(self, index, row):
240 | self._rows.insert(index, self._make_row(row))
241 |
242 | def __setitem__(self, key, value):
243 | self._rows[key] = self._make_row(value)
244 |
245 | def append(self, row):
246 | """Add a row to the table. Should be a dict"""
247 |
248 | self._rows.append(self._make_row(row))
249 |
--------------------------------------------------------------------------------
/calculadora_do_cidadao/typing.py:
--------------------------------------------------------------------------------
1 | from datetime import date, datetime
2 | from decimal import Decimal
3 | from typing import Dict, Iterable, Optional, Tuple, Union
4 |
5 |
6 | Index = Tuple[date, Decimal]
7 | MaybeIndex = Optional[Index]
8 |
9 | IndexDictionary = Dict[date, Decimal]
10 |
11 | IndexesGenerator = Iterable[Index]
12 | MaybeIndexesGenerator = Iterable[MaybeIndex]
13 |
14 | Date = Union[date, datetime, int, float, str] # parsed by fields.DateField
15 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | API
2 | ===
3 |
4 | Adaptador Base
5 | --------------
6 |
7 | .. autofunction:: calculadora_do_cidadao.adapters.import_from_json
8 |
9 | .. autoclass:: calculadora_do_cidadao.adapters.Adapter
10 | :members:
11 |
12 | .. autoexception:: calculadora_do_cidadao.adapters.AdapterNoImportMethod
13 | :members:
14 |
15 | .. autoexception:: calculadora_do_cidadao.adapters.AdapterDateNotAvailableError
16 | :members:
17 |
18 | Adaptadores
19 | -----------
20 |
21 | DIEESE Cesta Básica
22 | ~~~~~~~~~~~~~~~~~~~
23 |
24 | .. dieese:
25 | .. autoclass:: calculadora_do_cidadao.CestaBasica
26 | :members:
27 |
28 | .. autoclass:: calculadora_do_cidadao.CestaBasicaCentroOeste
29 | :members:
30 |
31 | .. autoclass:: calculadora_do_cidadao.CestaBasicaNordeste
32 | :members:
33 |
34 | .. autoclass:: calculadora_do_cidadao.CestaBasicaNorte
35 | :members:
36 |
37 | .. autoclass:: calculadora_do_cidadao.CestaBasicaSudeste
38 | :members:
39 |
40 | .. autoclass:: calculadora_do_cidadao.CestaBasicaSul
41 | :members:
42 |
43 | .. autoclass:: calculadora_do_cidadao.CestaBasicaAracaju
44 | :members:
45 |
46 | .. autoclass:: calculadora_do_cidadao.CestaBasicaBelem
47 | :members:
48 |
49 | .. autoclass:: calculadora_do_cidadao.CestaBasicaBeloHorizonte
50 | :members:
51 |
52 | .. autoclass:: calculadora_do_cidadao.CestaBasicaBoaVista
53 | :members:
54 |
55 | .. autoclass:: calculadora_do_cidadao.CestaBasicaBrasilia
56 | :members:
57 |
58 | .. autoclass:: calculadora_do_cidadao.CestaBasicaCampoGrande
59 | :members:
60 |
61 | .. autoclass:: calculadora_do_cidadao.CestaBasicaCuiaba
62 | :members:
63 |
64 | .. autoclass:: calculadora_do_cidadao.CestaBasicaCuritiba
65 | :members:
66 |
67 | .. autoclass:: calculadora_do_cidadao.CestaBasicaFlorianopolis
68 | :members:
69 |
70 | .. autoclass:: calculadora_do_cidadao.CestaBasicaFortaleza
71 | :members:
72 |
73 | .. autoclass:: calculadora_do_cidadao.CestaBasicaGoiania
74 | :members:
75 |
76 | .. autoclass:: calculadora_do_cidadao.CestaBasicaJoaoPessoa
77 | :members:
78 |
79 | .. autoclass:: calculadora_do_cidadao.CestaBasicaMacae
80 | :members:
81 |
82 | .. autoclass:: calculadora_do_cidadao.CestaBasicaMacapa
83 | :members:
84 |
85 | .. autoclass:: calculadora_do_cidadao.CestaBasicaMaceio
86 | :members:
87 |
88 | .. autoclass:: calculadora_do_cidadao.CestaBasicaManaus
89 | :members:
90 |
91 | .. autoclass:: calculadora_do_cidadao.CestaBasicaNatal
92 | :members:
93 |
94 | .. autoclass:: calculadora_do_cidadao.CestaBasicaPalmas
95 | :members:
96 |
97 | .. autoclass:: calculadora_do_cidadao.CestaBasicaPortoAlegre
98 | :members:
99 |
100 | .. autoclass:: calculadora_do_cidadao.CestaBasicaPortoVelho
101 | :members:
102 |
103 | .. autoclass:: calculadora_do_cidadao.CestaBasicaRecife
104 | :members:
105 |
106 | .. autoclass:: calculadora_do_cidadao.CestaBasicaRioBranco
107 | :members:
108 |
109 | .. autoclass:: calculadora_do_cidadao.CestaBasicaRioDeJaneiro
110 | :members:
111 |
112 | .. autoclass:: calculadora_do_cidadao.CestaBasicaSalvador
113 | :members:
114 |
115 | .. autoclass:: calculadora_do_cidadao.CestaBasicaSaoLuis
116 | :members:
117 |
118 | .. autoclass:: calculadora_do_cidadao.CestaBasicaSaoPaulo
119 | :members:
120 |
121 | .. autoclass:: calculadora_do_cidadao.CestaBasicaTeresina
122 | :members:
123 |
124 | .. autoclass:: calculadora_do_cidadao.CestaBasicaVitoria
125 | :members:
126 |
127 | FED's Consumer Price Index for All Urban Consumers: All Items
128 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
129 |
130 | .. AllUrbanCityAverage:
131 | .. autoclass:: calculadora_do_cidadao.AllUrbanCityAverage
132 | :members:
133 |
134 | IGP-M
135 | ~~~~~
136 |
137 | .. igpm:
138 | .. autoclass:: calculadora_do_cidadao.Igpm
139 | :members:
140 |
141 | Família IPCA & INPC
142 | ~~~~~~~~~~~~~~~~~~~
143 |
144 | .. autoclass:: calculadora_do_cidadao.adapters.ibge.IbgeAdapter
145 | :members:
146 |
147 | .. autoclass:: calculadora_do_cidadao.Inpc
148 | :members:
149 |
150 | .. autoclass:: calculadora_do_cidadao.Ipca
151 | :members:
152 |
153 | .. autoclass:: calculadora_do_cidadao.Ipca15
154 | :members:
155 |
156 | .. autoclass:: calculadora_do_cidadao.IpcaE
157 | :members:
158 |
159 | Download
160 | --------
161 |
162 | .. autoclass:: calculadora_do_cidadao.download.Download
163 | :members:
164 |
165 | .. autoexception:: calculadora_do_cidadao.download.DownloadMethodNotImplementedError
166 | :members:
167 |
168 | Campos
169 | ------
170 |
171 | .. autoclass:: calculadora_do_cidadao.fields.DateField
172 | :members:
173 |
174 | .. autoclass:: calculadora_do_cidadao.fields.PercentField
175 | :members:
176 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pathlib import Path
3 | from pkg_resources import get_distribution
4 | from typing import List
5 |
6 |
7 | # Configuration file for the Sphinx documentation builder.
8 | #
9 | # This file only contains a selection of the most common options. For a full
10 | # list see the documentation:
11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
12 |
13 | # -- Path setup --------------------------------------------------------------
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 |
19 | sys.path.insert(0, str(Path().absolute().parent.parent))
20 |
21 |
22 | # -- Project information -----------------------------------------------------
23 |
24 | project = "Calculadora do Cidadão"
25 | copyright = "2021, Eduardo Cuducos"
26 | author = "Eduardo Cuducos"
27 |
28 | # The full version, including alpha/beta/rc tags
29 | release = get_distribution("calculadora-do-cidadao").version
30 |
31 |
32 | # -- General configuration ---------------------------------------------------
33 |
34 | # Add any Sphinx extension module names here, as strings. They can be
35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
36 | # ones.
37 | extensions = [
38 | "readthedocs_ext.readthedocs",
39 | "sphinx.ext.autodoc",
40 | "sphinx.ext.coverage",
41 | "sphinx_rtd_theme",
42 | ]
43 |
44 | # The language for content autogenerated by Sphinx. Refer to documentation
45 | # for a list of supported languages.
46 | #
47 | # This is also used if you do content translation via gettext catalogs.
48 | # Usually you set "language" from the command line for these cases.
49 | language = "pt-br"
50 |
51 | # List of patterns, relative to source directory, that match files and
52 | # directories to ignore when looking for source files.
53 | # This pattern also affects html_static_path and html_extra_path.
54 | exclude_patterns: List[str] = []
55 |
56 |
57 | # -- Options for HTML output -------------------------------------------------
58 |
59 | # The theme to use for HTML and HTML Help pages. See the documentation for
60 | # a list of builtin themes.
61 | #
62 | html_theme = "sphinx_rtd_theme"
63 | master_doc = "index"
64 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | Calculadora do Cidadão
2 | ======================
3 |
4 | A `Calculadora do Cidadão `_ requer Python 3.7 ou 3.8. Como ela está `disponível no PyPI `_, pode ser instalada com o `pip`:
5 |
6 | ::
7 |
8 | $ pip install calculadora-do-cidadao
9 |
10 | .. toctree::
11 | :maxdepth: 2
12 | :caption: Índice:
13 |
14 | usage
15 | new_adapters
16 | api
17 |
18 | Referências
19 | ===========
20 |
21 | * :ref:`genindex`
22 | * :ref:`modindex`
23 | * :ref:`search`
24 |
--------------------------------------------------------------------------------
/docs/new_adapters.rst:
--------------------------------------------------------------------------------
1 | Desenvolvendo novos adaptadores
2 | ===============================
3 |
4 | Todos os adaptadores herdam de :class:`calculadora_do_cidadao.adapters.Adapter`.
5 |
6 | Método obrigatório
7 | ------------------
8 |
9 | Todo adaptador precisa de um método `serialize`. Esse método sempre recebe uma linha da tabela (`NamedTuple` instanciada pela `rows `_ e é um **gerador** que devolve:
10 |
11 | * ou `None` (caso seja uma linha inválida)
12 | * ou uma tupla contendo um `datetime.date` e um `decimal.Decimal`
13 |
14 | Variáveis obrigatórias
15 | ----------------------
16 |
17 | =========== ============================================================
18 | Variável Descrição
19 | =========== ============================================================
20 | `url` URL da fonte para baixar os dados.
21 | `file_type` `"html"` ou `"xls"`, indicando o formato dos dados na fonte.
22 | =========== ============================================================
23 |
24 | Métodos opcionais
25 | -----------------
26 |
27 | `post_processing`
28 | ~~~~~~~~~~~~~~~~~
29 |
30 | Um método estático (``staticmethod``) ou função que recebe `bytes` como seu único argumento e também retorna `bytes`. Utilizado quando o documento a ser baixado está corrimpido na fonte, por exemplo. Essa função é executada antes de salvar o arquivo, dando a chance de corrigi-lo caso necessário.
31 |
32 | Variáveis opcionais
33 | -------------------
34 |
35 | `HEADERS`
36 | ~~~~~~~~~
37 |
38 | No caso de a URL usar o protocolo HTTP, essa variável pode ser um dicionário que será incluído como _headers_ em cada requisição HTTP.
39 |
40 | `COOKIES`
41 | ~~~~~~~~~
42 |
43 | No caso de a URL usar o protocolo HTTP, essa variável pode ser um dicionário que será incluído como _cookies_ da sessão na requisição HTTP.
44 |
45 | `SHOULD_UNZIP`
46 | ~~~~~~~~~~~~~~
47 |
48 | Um booleano informando se o arquivo baixado da URL precisa ser descompactado ou não (apenas `.zip` é suportado por enquanto).
49 |
50 | `SHOULD_AGGREGATE`
51 | ~~~~~~~~~~~~~~~~~~
52 |
53 | Um booleano informando se os dados estão desagregados (por exemplo, 0,42%) ou se eles já representam o acumulado desde o início da série (1,0042, por exemplo).
54 |
55 | `IMPORT_KWARGS`
56 | ~~~~~~~~~~~~~~~
57 |
58 | Argumentos nomeados que serem passados passados para a função de leitura dos dados (`rows.import_from_html`, por exemplo).
59 |
60 | Essa variável pode ser um dicionário e, nesse caso, a função de leitura será chamada apenas uma vez, desempacotando o dicionário como argumentos nomeados.
61 |
62 | Ainda, essa variável pode ser uma sequência de dicionários e, nesse caso, a função de leitura será chamada várias vezes, uma vez para cada dicionário da sequência.
63 |
64 | `POST_DATA`
65 | ~~~~~~~~~~~
66 |
67 | Dicionário com valores que serão passados via HTTP POST para a URL especificada nesse adaptdor. A requisição HTTP será do tipo GET caso essa variável não seja criada.
68 |
69 | Ainda, essa variável pode ser uma sequência de dicionários e, nesse caso, serão feitas várias requisições, uma com cada conunto de dados dessa sequência.
70 |
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | Uso
2 | ===
3 |
4 | Todos os adaptadores podem sem iniciados sem argumento algum. Nesse caso, os adaptadores fazem o download dos dados na hora que a classe é instanciada. Ou seja, criar uma instância demora e **é recomendado que sua aplicação faça isso na inicialização, e não a cada uso**.
5 |
6 | Como alternativa, caso você já tenha salvo esses dados localmente (ver :ref:`Exportando os dados`), é possível iniciar qualquer adaptador passando um `pathlib.Path` de onde ele deve ler os dados.
7 |
8 | ::
9 |
10 | from pathlib import Path
11 |
12 | from calculadora_do_cidadao import Ipca
13 |
14 |
15 | backup = Path("backup.csv")
16 |
17 | ipca = Ipca() # vai fazer o download nesse momento
18 | ipca.to_csv(backup)
19 |
20 | ipca = Ipca(backup) # não fará o download, carregará do backup
21 |
22 | Adaptadores disponíveis
23 | -----------------------
24 |
25 | ============================================================================================================================================ ==================================================
26 | Índice Módulo
27 | ============================================================================================================================================ ==================================================
28 | `DIEESE Cesta Básica: média de todas as cidades disponíveis `_ :class:`calculadora_do_cidadao.CestaBasica`
29 | `DIEESE Cesta Básica: média das capitais da Região Centro-Oeste `_ :class:`calculadora_do_cidadao.CestaBasicaCentroOeste`
30 | `DIEESE Cesta Básica: média das capitais da Região Nordeste `_ :class:`calculadora_do_cidadao.CestaBasicaNordeste`
31 | `DIEESE Cesta Básica: média das capitais da Região Norte `_ :class:`calculadora_do_cidadao.CestaBasicaNorte`
32 | `DIEESE Cesta Básica: média das capitais da Região Sudeste `_ :class:`calculadora_do_cidadao.CestaBasicaSudeste`
33 | `DIEESE Cesta Básica: média das capitais da Região Sul `_ :class:`calculadora_do_cidadao.CestaBasicaSul`
34 | `DIEESE Cesta Básica: Aracaju `_ :class:`calculadora_do_cidadao.CestaBasicaAracaju`
35 | `DIEESE Cesta Básica: Belém `_ :class:`calculadora_do_cidadao.CestaBasicaBelem`
36 | `DIEESE Cesta Básica: Belo Horizonte `_ :class:`calculadora_do_cidadao.CestaBasicaBeloHorizonte`
37 | `DIEESE Cesta Básica: Boa Vista `_ :class:`calculadora_do_cidadao.CestaBasicaBoaVista`
38 | `DIEESE Cesta Básica: Brasília `_ :class:`calculadora_do_cidadao.CestaBasicaBrasilia`
39 | `DIEESE Cesta Básica: Campo Grande `_ :class:`calculadora_do_cidadao.CestaBasicaCampoGrande`
40 | `DIEESE Cesta Básica: Cuiaba `_ :class:`calculadora_do_cidadao.CestaBasicaCuiaba`
41 | `DIEESE Cesta Básica: Curitiba `_ :class:`calculadora_do_cidadao.CestaBasicaCuritiba`
42 | `DIEESE Cesta Básica: Florianópolis `_ :class:`calculadora_do_cidadao.CestaBasicaFlorianopolis`
43 | `DIEESE Cesta Básica: Fortaleza `_ :class:`calculadora_do_cidadao.CestaBasicaFortaleza`
44 | `DIEESE Cesta Básica: Goiânia `_ :class:`calculadora_do_cidadao.CestaBasicaGoiania`
45 | `DIEESE Cesta Básica: João Pessoa `_ :class:`calculadora_do_cidadao.CestaBasicaJoaoPessoa`
46 | `DIEESE Cesta Básica: Macaé `_ :class:`calculadora_do_cidadao.CestaBasicaMacae`
47 | `DIEESE Cesta Básica: Macapá `_ :class:`calculadora_do_cidadao.CestaBasicaMacapa`
48 | `DIEESE Cesta Básica: Maceió `_ :class:`calculadora_do_cidadao.CestaBasicaMaceio`
49 | `DIEESE Cesta Básica: Manaus `_ :class:`calculadora_do_cidadao.CestaBasicaManaus`
50 | `DIEESE Cesta Básica: Natal `_ :class:`calculadora_do_cidadao.CestaBasicaNatal`
51 | `DIEESE Cesta Básica: Palmas `_ :class:`calculadora_do_cidadao.CestaBasicaPalmas`
52 | `DIEESE Cesta Básica: Porto Alegre `_ :class:`calculadora_do_cidadao.CestaBasicaPortoAlegre`
53 | `DIEESE Cesta Básica: Porto Velho `_ :class:`calculadora_do_cidadao.CestaBasicaPortoVelho`
54 | `DIEESE Cesta Básica: Recife `_ :class:`calculadora_do_cidadao.CestaBasicaRecife`
55 | `DIEESE Cesta Básica: Rio Branco `_ :class:`calculadora_do_cidadao.CestaBasicaRioBranco`
56 | `DIEESE Cesta Básica: Rio de Janeiro `_ :class:`calculadora_do_cidadao.CestaBasicaRioDeJaneiro`
57 | `DIEESE Cesta Básica: Salvador `_ :class:`calculadora_do_cidadao.CestaBasicaSalvador`
58 | `DIEESE Cesta Básica: São Luís `_ :class:`calculadora_do_cidadao.CestaBasicaSaoLuis`
59 | `DIEESE Cesta Básica: São Paulo `_ :class:`calculadora_do_cidadao.CestaBasicaSaoPaulo`
60 | `DIEESE Cesta Básica: Teresina `_ :class:`calculadora_do_cidadao.CestaBasicaTeresina`
61 | `DIEESE Cesta Básica: Vitória `_ :class:`calculadora_do_cidadao.CestaBasicaVitoria`
62 | `FED's Consumer Price Index for All Urban Consumers: All Items `_ :class:`calculadora_do_cidadao.AllUrbanCityAverage`
63 | `IGP-M `_ :class:`calculadora_do_cidadao.Igpm`
64 | `INPC `_ :class:`calculadora_do_cidadao.Inpc`
65 | `IPCA `_ :class:`calculadora_do_cidadao.Ipca`
66 | `IPCA-15 `_ :class:`calculadora_do_cidadao.Ipca15`
67 | `IPCA-E `_ :class:`calculadora_do_cidadao.IpcaE`
68 | ============================================================================================================================================ ==================================================
69 |
70 | Uso de um adaptador
71 | -------------------
72 |
73 | Todos os adaptadores tem o método `adjust` (:meth:`calculadora_do_cidadao.adapters.Adapter.adjust`) que recebe três argumentos:
74 |
75 | ================ =========== ============================================================= =============================================== =======================
76 | Argumento Obrigatório Tipo Descrição Valor padrão
77 | ================ =========== ============================================================= =============================================== =======================
78 | `original_date` ✅ `datetime.date`, `datetime.datetime`, `str`, `int` ou `float` Data original do valor a ser corrigido.
79 | `value` ❌ `decimal.Decimal`, `float` ou `int` Valor a ser corrigido. `decimal.Decimal('1')`
80 | `target_date` ❌ `datetime.date`, `datetime.datetime`, `str`, `int` ou `float` Data para quando o valor tem que ser corrigido. `datetime.date.today()`
81 | ================ =========== ============================================================= =============================================== =======================
82 |
83 |
84 | Exemplo
85 | ~~~~~~~
86 |
87 | ::
88 |
89 | In [1]: from datetime import date
90 | ...: from decimal import Decimal
91 | ...: from calculadora_do_cidadao import Ipca
92 |
93 | In [2]: ipca = Ipca()
94 |
95 | In [3]: ipca.adjust(date(2018, 7, 6))
96 | Out[3]: Decimal('1.051202206630561280035407253')
97 |
98 | In [4]: ipca.adjust(date(2014, 7, 8), 7)
99 | Out[4]: Decimal('9.407523138792336916983267321')
100 |
101 | In [5]: ipca.adjust(date(1998, 7, 12), 3, date(2006, 7, 1))
102 | Out[5]: Decimal('5.279855889296777979447848574')
103 |
104 | .. _Formatos dos campos de data:
105 |
106 | Formatos dos campos de data
107 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 |
109 | Os adaptadores aceitam diversos formatos de data, como descrevem os exemplos a seguir:
110 |
111 | ========================================= =================== ===========================
112 | Entrada Tipo Saída
113 | ========================================= =================== ===========================
114 | `datetime.date(2018, 7, 6)` `datetime.date` `datetime.date(2018, 7, 6)`
115 | `datetime.datetime(2018, 7, 6, 21, 0, 0)` `datetime.datetime` `datetime.date(2018, 7, 6)`
116 | `"2018-07-06T21:00:00"` `str` `datetime.date(2018, 7, 6)`
117 | `"2018-07-06 21:00:00"` `str` `datetime.date(2018, 7, 6)`
118 | `"2018-07-06"` `str` `datetime.date(2018, 7, 6)`
119 | `"06/07/2018"` `str` `datetime.date(2018, 7, 6)`
120 | `"2018-07"` `str` `datetime.date(2018, 7, 1)`
121 | `"Jul/2018"` `str` `datetime.date(2018, 7, 1)`
122 | `"Jul-2018"` `str` `datetime.date(2018, 7, 1)`
123 | `"Jul 2018"` `str` `datetime.date(2018, 7, 1)`
124 | `"07/2018"`` `str` `datetime.date(2018, 7, 1)`
125 | `"2018"` `str` `datetime.date(2018, 1, 1)`
126 | `1530925200` `int` (timestamp) `datetime.date(2018, 7, 6)`
127 | `1530925200.0` `float` (timestamp) `datetime.date(2018, 7, 6)`
128 | ========================================= =================== ===========================
129 |
130 | .. _Exportando os dados:
131 |
132 | Exportando os dados
133 | -------------------
134 |
135 | Todos os adaptadores tem o método `to_csv` (:meth:`calculadora_do_cidadao.adapters.Adapter.to_csv`) para exportar os dados no formato CSV. O único argumento que esse método recebe é um `pathlib.Path` que é o caminho do arquivo para onde os dados serão exportados.
136 |
137 | Para exportar os dados de todos os índices (adaptadores) de uma vez só é só chamar o pacote pela linha de comando (será criado o arquivo `calculadora-do-cidadao.csv` com os dados):
138 |
139 | ::
140 |
141 | $ python -m calculadora_do_cidadao
142 |
143 | Importando os dados
144 | -------------------
145 |
146 | Todos os adaptadores tem o método `from_csv` (:meth:`calculadora_do_cidadao.adapters.Adapter.from_csv`) para importar os dados de um arquivo CSV. O único argumento que esse método recebe é um `pathlib.Path` que é o caminho do arquivo onde os dados estão. O arquivo deve ter duas colunas, `date` no formato `YYYY-MM-DD`, e `value` utilizando um ponto como separador das casas decimais.
147 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "calculadora-do-cidadao"
3 | version = "1.0.0"
4 | description = "Tool for Brazilian Reais monetary adjustment/correction"
5 | authors = ["Eduardo Cuducos "]
6 | license = "GPLv3"
7 | readme = "README.md"
8 | homepage = "https://calculadora-do-cidadao.readthedocs.io/"
9 | repository = "https://github.com/cuducos/calculadora-do-cidadao/"
10 | classifiers = [
11 | "Development Status :: 4 - Beta",
12 | "Intended Audience :: Developers",
13 | "Intended Audience :: End Users/Desktop",
14 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
15 | "Programming Language :: Python :: 3.7",
16 | "Programming Language :: Python :: 3.8",
17 | "Programming Language :: Python :: 3.9",
18 | "Topic :: Utilities",
19 | ]
20 | keywords = [
21 | "Brazil",
22 | "Brazilian Reais",
23 | "monetary adjusment",
24 | "monetary correction",
25 | "Inflation",
26 | ]
27 | packages = [
28 | { include = "calculadora_do_cidadao" },
29 | ]
30 |
31 | [tool.poetry.dependencies]
32 | python = "^3.7"
33 | pip = { version = ">=20.0.0", optional = true }
34 | lxml = "^4.7.1"
35 | readthedocs-sphinx-ext = { version = ">=2.1.3", optional = true }
36 | requests = ">=2.22.0"
37 | sphinx = { version = ">=3.4.3", optional = true }
38 | sphinx-rtd-theme = { version = ">=0.5.1", optional = true }
39 | typer = ">=0.0.8"
40 | unicodecsv = "^0.14.1"
41 | xlrd = "^2.0.1"
42 | xlwt = "^1.3.0"
43 |
44 | [tool.poetry.dev-dependencies]
45 | black = "21.9b0"
46 | coverage = {extras = ["toml"], version = "^5.5"}
47 | freezegun = "^1.1.0"
48 | pytest = "^6.2.5"
49 | pytest-black = "^0.3.12"
50 | pytest-cov = "^2.12.1"
51 | pytest-mock = "^3.6.1"
52 | pytest-mypy = "^0.8.1"
53 | tox = "^3.24.4"
54 | types-freezegun = "^1.1.6"
55 | types-requests = "^2.27.7"
56 | types-setuptools = "^57.4.7"
57 |
58 | [tool.pytest.ini_options]
59 | addopts = "--black --mypy --mypy-ignore-missing-imports --ignore calculadora_do_cidadao/rows/ --cov=calculadora_do_cidadao --cov-report term --disable-warnings"
60 |
61 | [tool.coverage.report]
62 | exclude_lines = [
63 | "pragma: no cover",
64 | "if __name__ == .__main__.:",
65 | ]
66 | omit = ["calculadora_do_cidadao/rows/*"]
67 |
68 | [tool.mypy]
69 | exclude = ["^calculadora_do_cidadao/rows/"]
70 |
71 | [tool.poetry.extras]
72 | docs = ["pip", "readthedocs-sphinx-ext", "sphinx", "sphinx-rtd-theme"]
73 |
74 | [tool.tox]
75 | legacy_tox_ini = """
76 | [tox]
77 | isolated_build = True
78 | envlist =
79 | py{3.7,3.8,3.9}
80 |
81 | [gh-actions]
82 | python =
83 | 3.7: py3.7
84 | 3.8: py3.8
85 | 3.9: py3.9
86 |
87 | [testenv]
88 | deps = poetry
89 | commands =
90 | poetry export --dev --without-hashes --output /tmp/calculadora-do-cidadao.requirements.txt
91 | pip install -r /tmp/calculadora-do-cidadao.requirements.txt
92 | pytest --black --mypy --mypy-ignore-missing-imports --ignore calculadora_do_cidadao/rows/ --cov=calculadora_do_cidadao --cov-report term --disable-warnings
93 | """
94 |
95 | [build-system]
96 | requires = ["poetry-core>=1.0.0"]
97 | build-backend = "poetry.core.masonry.api"
98 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 |
4 | def fixture_path(adapter):
5 | name = adapter if isinstance(adapter, str) else adapter.__name__.lower()
6 | if name.startswith("cestabasica"): # DIEESE adapters use the same fixture
7 | name = "cestabasica"
8 | directory = Path(__file__).parent / "fixtures"
9 | fixture, *_ = directory.glob(f"{name}.*")
10 | return fixture
11 |
12 |
13 | def fixture_generator(adapter):
14 | return lambda: (fixture_path(adapter),)
15 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from tempfile import NamedTemporaryFile, TemporaryDirectory
3 | from zipfile import ZipFile
4 |
5 | from pytest import fixture
6 |
7 | from tests import fixture_path
8 |
9 |
10 | def pytest_configure(config):
11 | plugin = config.pluginmanager.getplugin("mypy")
12 | plugin.mypy_argv.append("--exclude")
13 | plugin.mypy_argv.append('"^calculadora_do_cidadao/rows/"')
14 |
15 |
16 | @fixture
17 | def broken_table():
18 | """This fixtures provide a copy of the broken table file because post
19 | processing it (to fix the borken table) overwrites the orignal file."""
20 | with NamedTemporaryFile() as _tmp:
21 | tmp = Path(_tmp.name)
22 | tmp.write_bytes(fixture_path("broken-table").read_bytes())
23 | yield tmp
24 |
25 |
26 | @fixture(scope="session")
27 | def zip_file():
28 | """Returns a path to a temporary zip file. Its content is a single file.
29 | The contents of this single file is `42` in bytes."""
30 | with TemporaryDirectory() as _tmp:
31 | tmp = Path(_tmp)
32 | fixture = tmp / "fixture"
33 | fixture.write_bytes(b"42")
34 |
35 | path = Path(tmp) / "fixture.zip"
36 | with ZipFile(path, "w") as archive:
37 | archive.write(fixture, arcname=fixture.name)
38 |
39 | yield path
40 |
--------------------------------------------------------------------------------
/tests/fixtures/allurbancityaverage.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuducos/calculadora-do-cidadao/dbd15353c1b45159fbf9c80b10e58736e69a1b01/tests/fixtures/allurbancityaverage.xls
--------------------------------------------------------------------------------
/tests/fixtures/broken-table.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Date |
4 | Value |
5 |
6 |
7 | 2020-01-01 |
8 | 3.1415 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/tests/fixtures/calculadora-do-cidadao.csv:
--------------------------------------------------------------------------------
1 | date,value,serie
2 | 1998-07-12,3.0,goodadapter
3 | 2014-07-08,7.1,dummyadapter
4 |
--------------------------------------------------------------------------------
/tests/fixtures/goodadapter.csv:
--------------------------------------------------------------------------------
1 | date,value
2 | 1998-07-12,3.0
3 | 2014-07-08,7.1
4 |
--------------------------------------------------------------------------------
/tests/fixtures/igpm.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuducos/calculadora-do-cidadao/dbd15353c1b45159fbf9c80b10e58736e69a1b01/tests/fixtures/igpm.html
--------------------------------------------------------------------------------
/tests/fixtures/inpc.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuducos/calculadora-do-cidadao/dbd15353c1b45159fbf9c80b10e58736e69a1b01/tests/fixtures/inpc.xls
--------------------------------------------------------------------------------
/tests/fixtures/ipca.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuducos/calculadora-do-cidadao/dbd15353c1b45159fbf9c80b10e58736e69a1b01/tests/fixtures/ipca.xls
--------------------------------------------------------------------------------
/tests/fixtures/ipca15.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuducos/calculadora-do-cidadao/dbd15353c1b45159fbf9c80b10e58736e69a1b01/tests/fixtures/ipca15.xls
--------------------------------------------------------------------------------
/tests/fixtures/ipcae.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cuducos/calculadora-do-cidadao/dbd15353c1b45159fbf9c80b10e58736e69a1b01/tests/fixtures/ipcae.xls
--------------------------------------------------------------------------------
/tests/fixtures/selic.json:
--------------------------------------------------------------------------------
1 | {"totalItems":12,"registros":[{"periodo":"Jan / 2018","fator":1.00584205,"fatorFormatado":"1,00584205"},{"periodo":"Feb / 2018","fator":1.00465602,"fatorFormatado":"1,00465602"},{"periodo":"Mar / 2018","fator":1.00532345,"fatorFormatado":"1,00532345"},{"periodo":"Apr / 2018","fator":1.00518295,"fatorFormatado":"1,00518295"},{"periodo":"May / 2018","fator":1.00518295,"fatorFormatado":"1,00518295"},{"periodo":"Jun / 2018","fator":1.00518295,"fatorFormatado":"1,00518295"},{"periodo":"Jul / 2018","fator":1.00543042,"fatorFormatado":"1,00543042"},{"periodo":"Aug / 2018","fator":1.00567796,"fatorFormatado":"1,00567796"},{"periodo":"Sep / 2018","fator":1.00468818,"fatorFormatado":"1,00468818"},{"periodo":"Oct / 2018","fator":1.00543042,"fatorFormatado":"1,00543042"},{"periodo":"Nov / 2018","fator":1.00493553,"fatorFormatado":"1,00493553"},{"periodo":"Dec / 2018","fator":1.00493553,"fatorFormatado":"1,00493553"}],"observacoes":null,"dataAtual":"03/11/2022 às 00:11:03"}
--------------------------------------------------------------------------------
/tests/test_adapters.py:
--------------------------------------------------------------------------------
1 | from datetime import date, datetime
2 | from decimal import Decimal
3 |
4 | from pytest import approx, mark, raises
5 | from freezegun import freeze_time
6 |
7 | from calculadora_do_cidadao import (
8 | AllUrbanCityAverage,
9 | CestaBasica,
10 | CestaBasicaAracaju,
11 | CestaBasicaBelem,
12 | CestaBasicaBeloHorizonte,
13 | CestaBasicaBoaVista,
14 | CestaBasicaBrasilia,
15 | CestaBasicaCampoGrande,
16 | CestaBasicaCentroOeste,
17 | CestaBasicaCuiaba,
18 | CestaBasicaCuritiba,
19 | CestaBasicaFlorianopolis,
20 | CestaBasicaFortaleza,
21 | CestaBasicaGoiania,
22 | CestaBasicaJoaoPessoa,
23 | CestaBasicaMacae,
24 | CestaBasicaMacapa,
25 | CestaBasicaMaceio,
26 | CestaBasicaManaus,
27 | CestaBasicaNatal,
28 | CestaBasicaNordeste,
29 | CestaBasicaNorte,
30 | CestaBasicaPalmas,
31 | CestaBasicaPortoAlegre,
32 | CestaBasicaPortoVelho,
33 | CestaBasicaRecife,
34 | CestaBasicaRioBranco,
35 | CestaBasicaRioDeJaneiro,
36 | CestaBasicaSalvador,
37 | CestaBasicaSaoLuis,
38 | CestaBasicaSaoPaulo,
39 | CestaBasicaSudeste,
40 | CestaBasicaSul,
41 | CestaBasicaTeresina,
42 | CestaBasicaVitoria,
43 | Igpm,
44 | Inpc,
45 | Ipca,
46 | Ipca15,
47 | IpcaE,
48 | )
49 | from calculadora_do_cidadao.adapters import AdapterDateNotAvailableError
50 | from tests import fixture_generator
51 |
52 |
53 | def get_error_msg_for_future(start_date, end_date):
54 | try:
55 | future_date = end_date.replace(month=end_date.month + 1)
56 | except ValueError:
57 | future_date = end_date.replace(year=end_date.year + 1, month=1)
58 |
59 | data = {
60 | "start": start_date.strftime("%m/%Y"),
61 | "end": end_date.strftime("%m/%Y"),
62 | "future": future_date.strftime("%m/%Y"),
63 | }
64 | msg = r"This adapter has data from {start} to {end}\. {future} is out of range\."
65 | return future_date, msg.format(**data)
66 |
67 |
68 | @mark.parametrize(
69 | "adapter,original,value,target,expected",
70 | (
71 | (
72 | AllUrbanCityAverage,
73 | date(2000, 1, 1),
74 | None,
75 | None,
76 | "1.526881275841701122268163024",
77 | ),
78 | (
79 | AllUrbanCityAverage,
80 | date(2019, 1, 1),
81 | 42,
82 | None,
83 | "42.96874616599320069813553488",
84 | ),
85 | (
86 | AllUrbanCityAverage,
87 | date(2019, 1, 1),
88 | 3,
89 | date(2006, 7, 1),
90 | "2.409042517403917316056721534",
91 | ),
92 | (CestaBasica, date(2018, 7, 6), None, None, "1.460165126797113622157204568"),
93 | (CestaBasica, date(2014, 7, 8), 7, None, "12.73085672787343640166763121"),
94 | (
95 | CestaBasica,
96 | date(1998, 7, 12),
97 | 3,
98 | date(2014, 7, 8),
99 | "9.800127168524684637393180756",
100 | ),
101 | (
102 | CestaBasicaCentroOeste,
103 | date(2018, 7, 6),
104 | None,
105 | None,
106 | "1.523656211927287441557698990",
107 | ),
108 | (
109 | CestaBasicaCentroOeste,
110 | date(2014, 7, 8),
111 | 7,
112 | None,
113 | "13.85141773482304019009756209",
114 | ),
115 | (
116 | CestaBasicaCentroOeste,
117 | date(1998, 7, 12),
118 | 3,
119 | date(2014, 7, 8),
120 | "9.898117261265336122575903207",
121 | ),
122 | (
123 | CestaBasicaNordeste,
124 | date(2018, 7, 6),
125 | None,
126 | None,
127 | "1.384965255233770727275985515",
128 | ),
129 | (
130 | CestaBasicaNordeste,
131 | date(2014, 7, 8),
132 | 7,
133 | None,
134 | "12.24645904436860068259385665",
135 | ),
136 | (
137 | CestaBasicaNordeste,
138 | date(1998, 7, 12),
139 | 3,
140 | date(2014, 7, 8),
141 | "9.168529280286097451944568616",
142 | ),
143 | (
144 | CestaBasicaNorte,
145 | date(2018, 7, 6),
146 | None,
147 | None,
148 | "1.398587144692019880493661697",
149 | ),
150 | (CestaBasicaNorte, date(2014, 7, 8), 7, None, "11.03941941374641856364724033"),
151 | (
152 | CestaBasicaNorte,
153 | date(1998, 7, 12),
154 | 3,
155 | date(2014, 7, 8),
156 | "10.78471986417657045840407470",
157 | ),
158 | (
159 | CestaBasicaSudeste,
160 | date(2018, 7, 6),
161 | None,
162 | None,
163 | "1.486390590661870326945034438",
164 | ),
165 | (
166 | CestaBasicaSudeste,
167 | date(2014, 7, 8),
168 | 7,
169 | None,
170 | "12.89231003270708146345173804",
171 | ),
172 | (
173 | CestaBasicaSudeste,
174 | date(1998, 7, 12),
175 | 3,
176 | date(2014, 7, 8),
177 | "10.32243712214399748750294433",
178 | ),
179 | (CestaBasicaSul, date(2018, 7, 6), None, None, "1.426849010558871143112571581"),
180 | (CestaBasicaSul, date(2014, 7, 8), 7, None, "12.57287547904373745361640002"),
181 | (
182 | CestaBasicaSul,
183 | date(1998, 7, 12),
184 | 3,
185 | date(2014, 7, 8),
186 | "10.14474766867800329127811300",
187 | ),
188 | (
189 | CestaBasicaAracaju,
190 | date(2018, 7, 6),
191 | None,
192 | None,
193 | "1.313926179361535562063266549",
194 | ),
195 | (
196 | CestaBasicaBelem,
197 | date(2018, 7, 6),
198 | None,
199 | None,
200 | "1.387084267951593697211376035",
201 | ),
202 | (
203 | CestaBasicaBeloHorizonte,
204 | date(2018, 7, 6),
205 | None,
206 | None,
207 | "1.564991191367540189385597886",
208 | ),
209 | (
210 | CestaBasicaBoaVista,
211 | date(2015, 12, 1),
212 | 1,
213 | date(2017, 8, 1),
214 | "1.037235504259411926353393790",
215 | ),
216 | (
217 | CestaBasicaBrasilia,
218 | date(2018, 7, 6),
219 | None,
220 | None,
221 | "1.516165394271660603576369319",
222 | ),
223 | (
224 | CestaBasicaCampoGrande,
225 | date(2018, 7, 6),
226 | None,
227 | None,
228 | "1.555573544887881486278636768",
229 | ),
230 | (
231 | CestaBasicaCuiaba,
232 | date(2018, 7, 6),
233 | None,
234 | None,
235 | "0.9927400046340395952938753443",
236 | ),
237 | (
238 | CestaBasicaCuritiba,
239 | date(2018, 7, 6),
240 | None,
241 | None,
242 | "1.380864765409383624655013799",
243 | ),
244 | (
245 | CestaBasicaFlorianopolis,
246 | date(2018, 7, 6),
247 | None,
248 | None,
249 | "1.482336792929901028246682881",
250 | ),
251 | (
252 | CestaBasicaFortaleza,
253 | date(2018, 7, 6),
254 | None,
255 | None,
256 | "1.410536307546274323682961557",
257 | ),
258 | (
259 | CestaBasicaGoiania,
260 | date(2018, 7, 6),
261 | None,
262 | None,
263 | "1.538839456302199901741361428",
264 | ),
265 | (
266 | CestaBasicaJoaoPessoa,
267 | date(2018, 7, 6),
268 | None,
269 | None,
270 | "1.368831917038744058764222958",
271 | ),
272 | (
273 | CestaBasicaMacae,
274 | date(2018, 7, 6),
275 | None,
276 | None,
277 | "1.573712797206672947957656708",
278 | ),
279 | (
280 | CestaBasicaMacapa,
281 | date(2015, 12, 1),
282 | 1,
283 | date(2017, 8, 1),
284 | "1.052198847177926675834625625",
285 | ),
286 | (
287 | CestaBasicaMaceio,
288 | date(2015, 12, 1),
289 | 1,
290 | date(2017, 8, 1),
291 | "1.155067192701269880409320676",
292 | ),
293 | (
294 | CestaBasicaManaus,
295 | date(2018, 7, 6),
296 | None,
297 | None,
298 | "1.014077765577047611003181575",
299 | ),
300 | (
301 | CestaBasicaNatal,
302 | date(2018, 7, 6),
303 | None,
304 | None,
305 | "1.345070216072004456301855815",
306 | ),
307 | (
308 | CestaBasicaPalmas,
309 | date(2015, 12, 1),
310 | 1,
311 | date(2017, 7, 1),
312 | "1.067597248713947170683775504",
313 | ),
314 | (
315 | CestaBasicaPortoAlegre,
316 | date(2018, 7, 6),
317 | None,
318 | None,
319 | "1.415245276079260723644889890",
320 | ),
321 | (
322 | CestaBasicaPortoVelho,
323 | date(2015, 12, 1),
324 | None,
325 | date(2017, 8, 1),
326 | "1.084525282758223731545169367",
327 | ),
328 | (
329 | CestaBasicaRecife,
330 | date(2018, 7, 6),
331 | None,
332 | None,
333 | "1.351034740811098638574676913",
334 | ),
335 | (
336 | CestaBasicaRioBranco,
337 | date(2015, 12, 1),
338 | 1,
339 | date(2017, 7, 1),
340 | "1.067854386416259325958322614",
341 | ),
342 | (
343 | CestaBasicaRioDeJaneiro,
344 | date(2018, 7, 6),
345 | None,
346 | None,
347 | "1.472160989831472658749911114",
348 | ),
349 | (
350 | CestaBasicaSalvador,
351 | date(2018, 7, 6),
352 | None,
353 | None,
354 | "1.489583981095703003544555687",
355 | ),
356 | (
357 | CestaBasicaSaoLuis,
358 | date(2018, 7, 6),
359 | None,
360 | None,
361 | "1.258383580360590489203077196",
362 | ),
363 | (
364 | CestaBasicaSaoPaulo,
365 | date(2018, 7, 6),
366 | None,
367 | None,
368 | "1.443601115632572813314434639",
369 | ),
370 | (
371 | CestaBasicaTeresina,
372 | date(2015, 12, 1),
373 | 1,
374 | date(2017, 7, 1),
375 | "1.113623795803137460345178847",
376 | ),
377 | (
378 | CestaBasicaVitoria,
379 | date(2018, 7, 6),
380 | None,
381 | None,
382 | "1.476957901729695150456413159",
383 | ),
384 | (Igpm, date(2018, 7, 6), None, None, "1.089562719284143684871778501"),
385 | (Igpm, date(2014, 7, 8), 7, None, "9.695966517693585432732393804"),
386 | (Igpm, date(1998, 7, 12), 3, date(2006, 7, 1), "6.880958439252658773596604453"),
387 | (Inpc, date(2014, 3, 6), None, None, "1.361007124894175467688242800"),
388 | (Inpc, date(2011, 5, 8), 9, None, "14.373499236614377437778943450"),
389 | (Inpc, date(2009, 1, 12), 5, date(2013, 8, 1), "6.410734265150376567640231785"),
390 | (Ipca, date(2018, 7, 6), None, None, "1.051202206630561280035407253"),
391 | (Ipca, date(2014, 7, 8), 7, None, "9.407523138792336916983267321"),
392 | (Ipca, date(1998, 7, 12), 3, date(2006, 7, 1), "5.279855889296777979447848574"),
393 | (Ipca15, date(2017, 2, 13), None, None, "1.101569276203612423894969769"),
394 | (Ipca15, date(2012, 5, 8), 3, None, "4.577960384607494629737626417"),
395 | (Ipca15, date(1999, 11, 10), 5, date(2002, 9, 5), "6.068815714507691510850986"),
396 | (IpcaE, date(2017, 2, 13), None, None, "1.101569276203612423894969769"),
397 | (IpcaE, date(2012, 5, 8), 3, None, "4.577960384607494629737626417"),
398 | (IpcaE, date(1999, 11, 10), 5, date(2002, 9, 5), "6.0688157145076915108509866"),
399 | ),
400 | )
401 | def test_adapter_indexes(adapter, original, value, target, expected, mocker):
402 | download = mocker.patch("calculadora_do_cidadao.adapters.Download")
403 | download.return_value.return_value.__enter__.return_value = fixture_generator(
404 | adapter
405 | )
406 | instance = adapter()
407 | assert instance.adjust(original, value, target) == approx(Decimal(expected))
408 |
409 |
410 | @mark.parametrize(
411 | "adapter,length,start_date,end_date",
412 | (
413 | (AllUrbanCityAverage, 876, date(1947, 1, 1), date(2019, 12, 1)),
414 | (Igpm, 367, date(1989, 6, 1), date(2019, 12, 1)),
415 | (Inpc, 312, date(1994, 1, 1), date(2019, 12, 1)),
416 | (Ipca, 312, date(1994, 1, 1), date(2019, 12, 1)),
417 | (Ipca15, 312, date(1994, 1, 1), date(2019, 12, 1)),
418 | (IpcaE, 312, date(1994, 1, 1), date(2019, 12, 1)),
419 | ),
420 | )
421 | def test_adapter_out_of_range(adapter, length, start_date, end_date, mocker):
422 | download = mocker.patch("calculadora_do_cidadao.adapters.Download")
423 | download.return_value.return_value.__enter__.return_value = fixture_generator(
424 | adapter
425 | )
426 | instance = adapter()
427 | assert len(instance.data) == length
428 | future_date, msg = get_error_msg_for_future(start_date, end_date)
429 | with raises(AdapterDateNotAvailableError, match=msg):
430 | instance.adjust(future_date)
431 |
432 |
433 | def test_adapter_missing_date_within_range(mocker):
434 | download = mocker.patch("calculadora_do_cidadao.adapters.Download")
435 | download.return_value.return_value.__enter__.return_value = fixture_generator(
436 | "cestabasica"
437 | )
438 | bsb = CestaBasicaBrasilia()
439 | msg = (
440 | "This adapter has data from 07/1994 to 12/2020, but not for 11/2019. "
441 | "Available dates are:\n -.+"
442 | )
443 | with raises(AdapterDateNotAvailableError, match=msg):
444 | bsb.adjust(date(2019, 11, 1))
445 |
446 |
447 | @freeze_time("2018-07-06 21:00:00", tz_offset=-3)
448 | @mark.parametrize(
449 | "adapter,original,value,target",
450 | (
451 | (Ipca, date(2018, 7, 6), None, None),
452 | (Ipca, datetime(2018, 7, 6, 21, 00, 00), None, None),
453 | (Ipca, "2018-07-06T21:00:00", None, None),
454 | (Ipca, "2018-07-06 21:00:00", None, None),
455 | (Ipca, "2018-07-06", None, None),
456 | (Ipca, "06/07/2018", None, None),
457 | (Ipca, "2018-07", None, None),
458 | (Ipca, "Jul/2018", None, None),
459 | (Ipca, "Jul-2018", None, None),
460 | (Ipca, "Jul 2018", None, None),
461 | (Ipca, "07/2018", None, None),
462 | (Ipca, 1530925200, None, None),
463 | (Ipca, 1530925200.0, None, None),
464 | ),
465 | )
466 | def test_string_date_inputs(adapter, original, value, target, mocker):
467 | expected = approx(Decimal("1.051202206630561280035407253"))
468 | download = mocker.patch("calculadora_do_cidadao.adapters.Download")
469 | download.return_value.return_value.__enter__.return_value = fixture_generator(
470 | adapter
471 | )
472 | instance = adapter()
473 | assert instance.adjust(original, value, target) == expected
474 |
475 |
476 | def test_diesse_post_processing():
477 | body = b''
478 | assert CestaBasica.post_processing(body) == b""
479 |
--------------------------------------------------------------------------------
/tests/test_base.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from decimal import Decimal
3 | from pathlib import Path
4 | from tempfile import NamedTemporaryFile, TemporaryDirectory
5 |
6 | import pytest
7 |
8 | from calculadora_do_cidadao.adapters import (
9 | Adapter,
10 | AdapterNoImportMethod,
11 | import_from_json,
12 | )
13 | from tests import fixture_path
14 |
15 |
16 | class DummyAdapter(Adapter):
17 | url = "https://here.comes/a/fancy.url"
18 | file_type = "dummy"
19 |
20 | def serialize(self, row):
21 | yield row
22 |
23 |
24 | class GoodAdapter(Adapter):
25 | url = "https://here.comes/a/fancy.url"
26 | file_type = "html"
27 |
28 | def serialize(self, row):
29 | yield row
30 |
31 |
32 | class HeadersAdapter(GoodAdapter):
33 | HEADERS = {"test": 42}
34 |
35 |
36 | class PostAdapter(GoodAdapter):
37 | POST_DATA = {"test": 42}
38 |
39 |
40 | class ProcessingAdapter(GoodAdapter):
41 | @staticmethod
42 | def post_processing(body: bytes) -> bytes:
43 | return b"" + body
44 |
45 |
46 | def test_file_types():
47 | msg = r"Invalid file type dummy\. Valid file types are: html, json, xls\."
48 | with pytest.raises(AdapterNoImportMethod, match=msg):
49 | DummyAdapter()
50 |
51 |
52 | def test_export_index(mocker):
53 | download = mocker.patch.object(GoodAdapter, "download")
54 | download.return_value = (
55 | (date(2014, 7, 8), Decimal("7.1")),
56 | (date(1998, 7, 12), Decimal("3.0")),
57 | )
58 | adapter = GoodAdapter()
59 |
60 | assert adapter.export_index(date(2014, 7, 8)) == {
61 | "date": date(2014, 7, 8),
62 | "value": Decimal("7.1"),
63 | }
64 | assert adapter.export_index(date(2014, 7, 8), include_name=True) == {
65 | "date": date(2014, 7, 8),
66 | "value": Decimal("7.1"),
67 | "serie": "goodadapter",
68 | }
69 |
70 |
71 | def test_export(mocker):
72 | download = mocker.patch.object(GoodAdapter, "download")
73 | download.return_value = (
74 | (date(2014, 7, 8), Decimal("7.1")),
75 | (date(1998, 7, 12), Decimal("3.0")),
76 | )
77 | adapter = GoodAdapter()
78 | assert tuple(adapter.export()) == (
79 | {"date": date(1998, 7, 12), "value": Decimal("3.0")},
80 | {"date": date(2014, 7, 8), "value": Decimal("7.1")},
81 | )
82 | assert tuple(adapter.export(include_name=True)) == (
83 | {"date": date(1998, 7, 12), "value": Decimal("3.0"), "serie": "goodadapter"},
84 | {"date": date(2014, 7, 8), "value": Decimal("7.1"), "serie": "goodadapter"},
85 | )
86 |
87 |
88 | def test_to_csv(mocker):
89 | download = mocker.patch.object(GoodAdapter, "download")
90 | download.return_value = (
91 | (date(2014, 7, 8), Decimal("7.1")),
92 | (date(1998, 7, 12), Decimal("3.0")),
93 | )
94 | adapter = GoodAdapter()
95 |
96 | with TemporaryDirectory() as _tmp:
97 | tmp = Path(_tmp) / "file"
98 | adapter.to_csv(tmp)
99 | assert tmp.read_text() == fixture_path(GoodAdapter).read_text()
100 |
101 |
102 | def test_from_csv():
103 | exported = fixture_path(GoodAdapter)
104 | all_data = fixture_path("calculadora-do-cidadao")
105 |
106 | adapter1 = GoodAdapter(exported)
107 | assert adapter1.data == {
108 | date(2014, 7, 8): Decimal("7.1"),
109 | date(1998, 7, 12): Decimal("3.0"),
110 | }
111 |
112 | adapter2 = GoodAdapter(all_data)
113 | assert adapter2.data == {date(1998, 7, 12): Decimal("3.0")}
114 |
115 |
116 | @pytest.mark.parametrize(
117 | "contents,json_path,expected",
118 | (
119 | ('[{"answer":42},{"answer":21}]', [], (42, 21)),
120 | ("[]", [], tuple()),
121 | (
122 | '{"data":{"rows": [{"answer":42},{"answer":21}]}}',
123 | ["data", "rows"],
124 | (42, 21),
125 | ),
126 | ),
127 | )
128 | def test_import_from_json(contents, json_path, expected):
129 | with NamedTemporaryFile() as tmp:
130 | json = Path(tmp.name)
131 | json.write_text(contents)
132 | data = import_from_json(json, json_path=json_path)
133 | for row, value in zip(data, expected):
134 | assert row.answer == value
135 |
136 |
137 | def test_download_does_not_receive_post_data(mocker):
138 | download = mocker.patch("calculadora_do_cidadao.adapters.Download")
139 | import_from_html = mocker.patch("calculadora_do_cidadao.adapters.import_from_html")
140 | import_from_html.return_value = tuple()
141 | GoodAdapter()
142 | download.assert_called_once_with(
143 | url="https://here.comes/a/fancy.url",
144 | should_unzip=False,
145 | headers=None,
146 | cookies={},
147 | post_data=None,
148 | post_processing=None,
149 | )
150 |
151 |
152 | def test_download_receives_post_data(mocker):
153 | download = mocker.patch("calculadora_do_cidadao.adapters.Download")
154 | import_from_html = mocker.patch("calculadora_do_cidadao.adapters.import_from_html")
155 | import_from_html.return_value = tuple()
156 | PostAdapter()
157 | download.assert_called_once_with(
158 | url="https://here.comes/a/fancy.url",
159 | should_unzip=False,
160 | headers=None,
161 | cookies={},
162 | post_data={"test": 42},
163 | post_processing=None,
164 | )
165 |
166 |
167 | def test_download_receives_headers(mocker):
168 | download = mocker.patch("calculadora_do_cidadao.adapters.Download")
169 | import_from_html = mocker.patch("calculadora_do_cidadao.adapters.import_from_html")
170 | import_from_html.return_value = tuple()
171 | HeadersAdapter()
172 | download.assert_called_once_with(
173 | url="https://here.comes/a/fancy.url",
174 | should_unzip=False,
175 | headers={"test": 42},
176 | cookies={},
177 | post_data=None,
178 | post_processing=None,
179 | )
180 |
181 |
182 | def test_post_processing(mocker):
183 | download = mocker.patch("calculadora_do_cidadao.adapters.Download")
184 | import_from_html = mocker.patch("calculadora_do_cidadao.adapters.import_from_html")
185 | import_from_html.return_value = tuple()
186 | adapter = ProcessingAdapter()
187 | download.assert_called_once_with(
188 | url="https://here.comes/a/fancy.url",
189 | should_unzip=False,
190 | headers=None,
191 | cookies={},
192 | post_data=None,
193 | post_processing=adapter.post_processing,
194 | )
195 |
--------------------------------------------------------------------------------
/tests/test_download.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from tempfile import NamedTemporaryFile
3 |
4 | from pytest import raises
5 |
6 | from calculadora_do_cidadao.download import Download, DownloadMethodNotImplementedError
7 |
8 |
9 | def test_unzip(zip_file):
10 | with NamedTemporaryFile() as tmp:
11 | assert Download.unzip(zip_file, Path(tmp.name)).read_bytes() == b"42"
12 |
13 |
14 | def test_http_get(mocker):
15 | session = mocker.patch("calculadora_do_cidadao.download.Session")
16 | session.return_value.get.return_value.content = b"42"
17 | jar = mocker.patch("calculadora_do_cidadao.download.cookiejar_from_dict")
18 | jar.return_value = "my-cookie-jar"
19 |
20 | download = Download("https://here.comes/a/fancy.url", cookies={"test": 42})
21 | for contents in download.http():
22 | assert contents == b"42"
23 |
24 | jar.assert_called_once_with({"test": 42})
25 | assert session.return_value.cookies == "my-cookie-jar"
26 |
27 | session.assert_called_once_with()
28 | session.return_value.get.assert_called_once_with(
29 | url="https://here.comes/a/fancy.url"
30 | )
31 |
32 |
33 | def test_http_get_zip_file(mocker, zip_file):
34 | session = mocker.patch("calculadora_do_cidadao.download.Session")
35 | session.return_value.get.return_value.content = zip_file.read_bytes()
36 |
37 | download = Download("https://here.comes/a/fancy.url", should_unzip=True)
38 | with download() as paths:
39 | for path in paths():
40 | assert path.read_bytes() == b"42"
41 |
42 | session.assert_called_once_with()
43 | session.return_value.get.assert_called_once_with(
44 | url="https://here.comes/a/fancy.url"
45 | )
46 |
47 |
48 | def test_http_post(mocker):
49 | session = mocker.patch("calculadora_do_cidadao.download.Session")
50 | session.return_value.post.return_value.content = b"42"
51 |
52 | download = Download("https://here.comes/a/fancy.url", post_data={"test": 42})
53 | for contents in download.http():
54 | assert contents == b"42"
55 |
56 | session.assert_called_once_with()
57 | session.return_value.post.assert_called_once_with(
58 | url="https://here.comes/a/fancy.url", data={"test": 42}
59 | )
60 |
61 |
62 | def test_multiple_http_post(mocker):
63 | session = mocker.patch("calculadora_do_cidadao.download.Session")
64 | session.return_value.post.return_value.content = b"42"
65 |
66 | download = Download(
67 | "https://here.comes/a/fancy.url", post_data=({"answer": 42}, {"test": 42})
68 | )
69 | for contents in download.http():
70 | assert contents == b"42"
71 |
72 | session.assert_called_once_with()
73 | session.return_value.post.assert_any_call(
74 | url="https://here.comes/a/fancy.url", data={"answer": 42}
75 | )
76 | session.return_value.post.assert_any_call(
77 | url="https://here.comes/a/fancy.url", data={"test": 42}
78 | )
79 |
80 |
81 | def test_http_post_as_json(mocker):
82 | session = mocker.patch("calculadora_do_cidadao.download.Session")
83 | session.return_value.post.return_value.content = b"42"
84 |
85 | download = Download(
86 | "https://here.comes/a/fancy.url",
87 | post_data={"test": 42},
88 | headers={"Accept": "application/json"},
89 | )
90 | for contents in download.http():
91 | assert contents == b"42"
92 |
93 | session.assert_called_once_with()
94 | session.return_value.post.assert_called_once_with(
95 | url="https://here.comes/a/fancy.url",
96 | json={"test": 42},
97 | headers={"Accept": "application/json"},
98 | )
99 |
100 |
101 | def test_download(mocker):
102 | mocker.patch.object(Download, "http", return_value=(b for b in (b"42",)))
103 | download = Download("http://here.comes/a/fancy/url.zip")
104 | with download() as paths:
105 | for path in paths():
106 | assert path.read_bytes() == b"42"
107 | download.http.assert_called_once_with()
108 |
109 |
110 | def test_download_not_implemented():
111 | expected = r"No method implemented for tcp\." # this is a regex
112 | with raises(DownloadMethodNotImplementedError, match=expected):
113 | Download("tcp://here.comes/a/fancy/url.zip")
114 |
115 |
116 | def test_post_procesing(mocker, broken_table):
117 | mocker.patch.object(
118 | Download, "http", return_value=(b for b in (broken_table.read_bytes(),))
119 | )
120 | download = Download(
121 | "http://here.comes/a/fancy/url.zip", post_processing=lambda b: b"" + b
122 | )
123 | with download() as paths:
124 | for path in paths():
125 | assert path.read_text().startswith("")
126 | assert path.read_text().endswith("
\n")
127 | download.http.assert_called_once()
128 |
--------------------------------------------------------------------------------
/tests/test_fields.py:
--------------------------------------------------------------------------------
1 | from datetime import date, datetime
2 | from decimal import Decimal
3 |
4 | import pytest
5 | from freezegun import freeze_time
6 |
7 | from calculadora_do_cidadao.fields import DateField, PercentField
8 |
9 |
10 | @pytest.mark.parametrize("value", ("12.37%", "12,37%"))
11 | def test_percent_field(value):
12 | assert PercentField.deserialize(value) == Decimal("0.1237")
13 |
14 |
15 | @freeze_time("2018-07-06 21:00:00", tz_offset=-3)
16 | @pytest.mark.parametrize(
17 | "value",
18 | (
19 | date(2018, 7, 6),
20 | datetime(2018, 7, 6, 21, 0, 0),
21 | "2018-07-06T21:00:00",
22 | "2018-07-06 21:00:00",
23 | "2018-07-06",
24 | "06/07/2018",
25 | 1530925200,
26 | 1530925200.0,
27 | ),
28 | )
29 | def test_date_field_with_complete_dates(value):
30 | assert DateField.deserialize(value) == date(2018, 7, 6)
31 |
32 |
33 | @pytest.mark.parametrize(
34 | "value", ("2018-07", "Jul/2018", "Jul-2018", "Jul 2018", "07/2018")
35 | )
36 | def test_date_field_with_incomplete_dates(value):
37 | assert DateField.deserialize(value) == date(2018, 7, 1)
38 |
39 |
40 | def test_date_field_with_only_year():
41 | assert DateField.deserialize("2018") == date(2018, 1, 1)
42 |
43 |
44 | def test_date_field_error():
45 | with pytest.raises(ValueError):
46 | DateField.deserialize("hello, world")
47 |
--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | from datetime import date
2 | from decimal import Decimal
3 | from pathlib import Path
4 | from tempfile import TemporaryDirectory
5 |
6 | from calculadora_do_cidadao import (
7 | AllUrbanCityAverage,
8 | CestaBasica,
9 | CestaBasicaAracaju,
10 | CestaBasicaBelem,
11 | CestaBasicaBeloHorizonte,
12 | CestaBasicaBoaVista,
13 | CestaBasicaBrasilia,
14 | CestaBasicaCampoGrande,
15 | CestaBasicaCentroOeste,
16 | CestaBasicaCuiaba,
17 | CestaBasicaCuritiba,
18 | CestaBasicaFlorianopolis,
19 | CestaBasicaFortaleza,
20 | CestaBasicaGoiania,
21 | CestaBasicaJoaoPessoa,
22 | CestaBasicaMacae,
23 | CestaBasicaMacapa,
24 | CestaBasicaMaceio,
25 | CestaBasicaManaus,
26 | CestaBasicaNatal,
27 | CestaBasicaNordeste,
28 | CestaBasicaNorte,
29 | CestaBasicaPalmas,
30 | CestaBasicaPortoAlegre,
31 | CestaBasicaPortoVelho,
32 | CestaBasicaRecife,
33 | CestaBasicaRioBranco,
34 | CestaBasicaRioDeJaneiro,
35 | CestaBasicaSalvador,
36 | CestaBasicaSaoLuis,
37 | CestaBasicaSaoPaulo,
38 | CestaBasicaSudeste,
39 | CestaBasicaSul,
40 | CestaBasicaTeresina,
41 | CestaBasicaVitoria,
42 | Igpm,
43 | Inpc,
44 | Ipca,
45 | Ipca15,
46 | IpcaE,
47 | )
48 | from calculadora_do_cidadao.__main__ import data, export, get_adapters
49 |
50 |
51 | ADAPTERS = (
52 | AllUrbanCityAverage,
53 | Igpm,
54 | Inpc,
55 | Ipca,
56 | Ipca15,
57 | CestaBasica,
58 | CestaBasicaAracaju,
59 | CestaBasicaBelem,
60 | CestaBasicaBeloHorizonte,
61 | CestaBasicaBoaVista,
62 | CestaBasicaBrasilia,
63 | CestaBasicaCampoGrande,
64 | CestaBasicaCentroOeste,
65 | CestaBasicaCuiaba,
66 | CestaBasicaCuritiba,
67 | CestaBasicaFlorianopolis,
68 | CestaBasicaFortaleza,
69 | CestaBasicaGoiania,
70 | CestaBasicaJoaoPessoa,
71 | CestaBasicaMacae,
72 | CestaBasicaMacapa,
73 | CestaBasicaMaceio,
74 | CestaBasicaManaus,
75 | CestaBasicaNatal,
76 | CestaBasicaNordeste,
77 | CestaBasicaNorte,
78 | CestaBasicaPalmas,
79 | CestaBasicaPortoAlegre,
80 | CestaBasicaPortoVelho,
81 | CestaBasicaRecife,
82 | CestaBasicaRioBranco,
83 | CestaBasicaRioDeJaneiro,
84 | CestaBasicaSalvador,
85 | CestaBasicaSaoLuis,
86 | CestaBasicaSaoPaulo,
87 | CestaBasicaSudeste,
88 | CestaBasicaSul,
89 | CestaBasicaTeresina,
90 | CestaBasicaVitoria,
91 | IpcaE,
92 | )
93 |
94 |
95 | def test_get_adapters():
96 | assert set(get_adapters()) == set(ADAPTERS)
97 |
98 |
99 | def test_data(mocker):
100 | for count, Adapter in enumerate(ADAPTERS, 1):
101 | download = mocker.patch.object(Adapter, "download")
102 | download.return_value = (
103 | (date(2019, 12, 1), Decimal(count)),
104 | (date(2020, 1, 1), Decimal(count * 1.5)),
105 | )
106 |
107 | result = tuple(data())
108 | assert len(result) == 2 * len(ADAPTERS)
109 | for dictionary in result:
110 | assert len(dictionary) == 3
111 |
112 |
113 | def test_export(mocker):
114 | for count, Adapter in enumerate(ADAPTERS, 1):
115 | download = mocker.patch.object(Adapter, "download")
116 | download.return_value = (
117 | (date(2019, 12, 1), Decimal(count)),
118 | (date(2020, 1, 1), Decimal(count * 1.5)),
119 | )
120 |
121 | with TemporaryDirectory() as _tmp:
122 | path = Path(_tmp) / "calculadora-do-cidadao.csv"
123 | export(path)
124 | content = path.read_text()
125 |
126 | for Adapter in ADAPTERS:
127 | assert Adapter.__name__.lower() in content
128 |
129 | assert len(content.split()) == len(ADAPTERS * 2) + 1 # plus 1 for header
130 |
--------------------------------------------------------------------------------