├── scraper
    ├── __init__.py
    ├── tests
    │   ├── __init__.py
    │   └── test_utils.py
    ├── pipelines.py
    ├── extensions.py
    ├── spiders
    │   ├── __init__.py
    │   └── utils.py
    ├── items.py
    ├── settings.py
    ├── monitors.py
    └── validators.py
├── web
    ├── home
    │   ├── __init__.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   └── test_templates.py
    │   ├── migrations
    │   │   └── __init__.py
    │   ├── static
    │   │   └── home
    │   │   │   ├── style.css
    │   │   │   ├── favicon.ico
    │   │   │   ├── imagem-apresentacao-dadosdefeira.png
    │   │   │   ├── bulma.js
    │   │   │   └── hero.css
    │   ├── apps.py
    │   ├── urls.py
    │   ├── views.py
    │   ├── templates
    │   │   ├── admin
    │   │   │   └── base_site.html
    │   │   └── snippets
    │   │   │   └── google-analytics.html
    │   └── context_processors.py
    ├── api
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_health_check.py
    │   │   ├── constants.py
    │   │   ├── conftest.py
    │   │   └── test_serializers.py
    │   ├── filters.py
    │   ├── routes.py
    │   ├── constants.py
    │   ├── serializers.py
    │   └── views.py
    ├── datasets
    │   ├── __init__.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── fixtures
    │   │   │   ├── empty-response.json
    │   │   │   └── response-22042021.json
    │   │   ├── test_signals.py
    │   │   ├── management
    │   │   │   └── commands
    │   │   │   │   ├── test_search_vector.py
    │   │   │   │   └── test_citycouncil.py
    │   │   ├── test_parsers.py
    │   │   └── test_services.py
    │   ├── migrations
    │   │   ├── __init__.py
    │   │   ├── 0029_file_local_path.py
    │   │   ├── 0023_auto_20201124_0458.py
    │   │   ├── 0027_auto_20210501_0839.py
    │   │   ├── 0013_file_search_vector.py
    │   │   ├── 0018_file_external_code.py
    │   │   ├── 0015_drop_gazette_file_trigger.py
    │   │   ├── 0026_auto_20210410_0548.py
    │   │   ├── 0006_gazette_search_vector.py
    │   │   ├── 0004_auto_20200321_0817.py
    │   │   ├── 0001_initial.py
    │   │   ├── 0016_auto_20200522_0647.py
    │   │   ├── 0003_citycouncilattendancelist.py
    │   │   ├── 0028_auto_20210703_0457.py
    │   │   ├── 0005_auto_20200327_1348.py
    │   │   ├── 0010_auto_20200515_0959.py
    │   │   ├── 0007_citycouncilexpense.py
    │   │   ├── 0030_alter_historicalcitycouncilattendancelist_options_and_more.py
    │   │   ├── 0014_citycouncilbid.py
    │   │   ├── 0021_historicalcitycouncilattendancelist.py
    │   │   ├── 0009_auto_20200514_1350.py
    │   │   ├── 0002_auto_20200316_1905.py
    │   │   ├── 0024_auto_20210326_1704.py
    │   │   ├── 0019_auto_20200704_1132.py
    │   │   ├── 0012_auto_20200520_1050.py
    │   │   ├── 0025_auto_20210327_1144.py
    │   │   ├── 0017_citycouncilrevenue.py
    │   │   └── 0008_cityhallbid_cityhallbidevent.py
    │   ├── apps.py
    │   ├── management
    │   │   └── commands
    │   │   │   ├── _file.py
    │   │   │   ├── searchvector.py
    │   │   │   ├── _tcmba.py
    │   │   │   ├── citycouncil_sync.py
    │   │   │   ├── _cityhall.py
    │   │   │   ├── _citycouncil.py
    │   │   │   ├── crawl_tcmba.py
    │   │   │   ├── import.py
    │   │   │   ├── _gazette.py
    │   │   │   ├── load_tcmba_documents.py
    │   │   │   └── crawl.py
    │   ├── signals.py
    │   ├── baker_recipes.py
    │   ├── parsers.py
    │   ├── services.py
    │   └── adapters.py
    ├── __init__.py
    ├── asgi.py
    ├── celery.py
    ├── wsgi.py
    └── urls.py
├── runtime.txt
├── DOKKU_SCALE
├── CHECKS
├── pytest.ini
├── scrapy.cfg
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── cicd.yml
├── Procfile
├── bin
    └── release.sh
├── .dockerignore
├── dev_requirements.txt
├── dependabot.yml
├── .gitignore
├── setup.cfg
├── Dockerfile
├── .pre-commit-config.yaml
├── manage.py
├── .env.example
├── requirements.txt
├── Makefile
├── LICENSE
├── docker-compose.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
└── README.md


/scraper/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/web/home/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.8.6
2 | 


--------------------------------------------------------------------------------
/scraper/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/web/api/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/web/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/web/home/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/web/datasets/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/web/home/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/DOKKU_SCALE:
--------------------------------------------------------------------------------
1 | web=1
2 | worker=1
3 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/CHECKS:
--------------------------------------------------------------------------------
1 | /api/?format=json "status":"available"
2 | 


--------------------------------------------------------------------------------
/web/home/static/home/style.css:
--------------------------------------------------------------------------------
1 | .footer-link {
2 |     color: rgb(41, 92, 173);
3 | }
4 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | DJANGO_SETTINGS_MODULE=web.settings
3 | DJANGO_CONFIGURATION=Test
4 | 


--------------------------------------------------------------------------------
/scrapy.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | default = scraper.settings
3 | 
4 | [deploy]
5 | project = scraper
6 | 


--------------------------------------------------------------------------------
/web/home/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 | 
3 | 
4 | class HomeConfig(AppConfig):
5 |     name = "web.home"
6 | 


--------------------------------------------------------------------------------
/web/__init__.py:
--------------------------------------------------------------------------------
1 | """Inicializa Django web app."""
2 | from .celery import app as celery_app
3 | 
4 | __all__ = ("celery_app",)
5 | 


--------------------------------------------------------------------------------
/web/home/static/home/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DadosAbertosDeFeira/maria-quiteria/HEAD/web/home/static/home/favicon.ico


--------------------------------------------------------------------------------
/web/home/urls.py:
--------------------------------------------------------------------------------
1 | from django.urls import path
2 | 
3 | from . import views
4 | 
5 | urlpatterns = [path("", views.index, name="index")]
6 | 


--------------------------------------------------------------------------------
/web/home/views.py:
--------------------------------------------------------------------------------
1 | from django.shortcuts import render
2 | 
3 | 
4 | def index(request):
5 |     return render(request, "home/index.html", {})
6 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: pip
4 |   directory: "/"
5 |   schedule:
6 |     interval: monthly
7 |   open-pull-requests-limit: 10
8 | 


--------------------------------------------------------------------------------
/web/home/templates/admin/base_site.html:
--------------------------------------------------------------------------------
1 | {% extends 'admin/base_site.html' %}
2 | {% block extrahead %}
3 |     {% include 'snippets/google-analytics.html' %}
4 | {% endblock %}
5 | 


--------------------------------------------------------------------------------
/web/datasets/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | 
4 | @pytest.fixture
5 | def mock_backup_file(mocker):
6 |     return mocker.patch("web.datasets.tasks.backup_file.apply_async")
7 | 


--------------------------------------------------------------------------------
/web/home/context_processors.py:
--------------------------------------------------------------------------------
1 | from django.conf import settings
2 | 
3 | 
4 | def google_analytics_key(request):
5 |     return {"GOOGLE_ANALYTICS_KEY": settings.GOOGLE_ANALYTICS_KEY}
6 | 


--------------------------------------------------------------------------------
/web/home/static/home/imagem-apresentacao-dadosdefeira.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DadosAbertosDeFeira/maria-quiteria/HEAD/web/home/static/home/imagem-apresentacao-dadosdefeira.png


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | release: bin/release.sh
2 | web: gunicorn web.wsgi:application --preload --log-file -
3 | worker: celery -A web worker -l INFO --without-heartbeat --without-gossip --without-mingle
4 | 


--------------------------------------------------------------------------------
/web/asgi.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | from django.core.asgi import get_asgi_application
4 | 
5 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "web.settings")
6 | 
7 | application = get_asgi_application()
8 | 


--------------------------------------------------------------------------------
/bin/release.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -eo pipefail
 4 | 
 5 | PYTHON=$(which python3)
 6 | 
 7 | echo "Running migrations"
 8 | ${PYTHON} manage.py migrate --no-input
 9 | 
10 | echo "Done!"
11 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | venv/
 2 | __pycache__
 3 | .idea
 4 | .scrapy/
 5 | .vscode
 6 | .env
 7 | .pytest_cache
 8 | *.log
 9 | 
10 | # data
11 | *.json
12 | *.csv
13 | *.xls
14 | *.zip
15 | **/data/
16 | *.sqlite*
17 | 


--------------------------------------------------------------------------------
/dev_requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | django-debug-toolbar==4.2.0
3 | model-bakery==1.15.0
4 | pre-commit==3.3.3
5 | pytest==7.4.0
6 | pytest-django==4.5.2
7 | pytest-dotenv==0.5.2
8 | pytest-mock==3.11.1
9 | 


--------------------------------------------------------------------------------
/scraper/pipelines.py:
--------------------------------------------------------------------------------
1 | from scraper.spiders.utils import get_git_commit
2 | 
3 | 
4 | class DefaultValuesPipeline(object):
5 |     def process_item(self, item, spider):
6 |         item.setdefault("git_commit", get_git_commit())
7 |         return item
8 | 


--------------------------------------------------------------------------------
/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "pip"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "monthly"
 7 |     ignore:
 8 |       - dependency-name: "*"
 9 |         update-types: ["version-update:semver-patch"]
10 | 


--------------------------------------------------------------------------------
/web/datasets/apps.py:
--------------------------------------------------------------------------------
 1 | from django.apps import AppConfig
 2 | 
 3 | 
 4 | class DatasetsConfig(AppConfig):
 5 |     name = "web.datasets"
 6 |     verbose_name = "Bases de dados"
 7 | 
 8 |     def ready(self):
 9 |         import web.datasets.signals  # noqa
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | venv/
 2 | .venv/
 3 | __pycache__
 4 | .idea
 5 | .scrapy/
 6 | .vscode
 7 | .env
 8 | .pytest_cache
 9 | *.log
10 | 
11 | # data
12 | *.json
13 | !**/fixtures/*.json
14 | *.csv
15 | *.xls
16 | *.zip
17 | **/data/
18 | *.sqlite*
19 | 
20 | # django
21 | /static/
22 | 
23 | # scrapy
24 | files/
25 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/_file.py:
--------------------------------------------------------------------------------
 1 | from web.datasets.models import File
 2 | 
 3 | 
 4 | def save_file(url, content_type, object_id, checksum=None):
 5 |     File.objects.get_or_create(
 6 |         url=url,
 7 |         content_type=content_type,
 8 |         object_id=object_id,
 9 |         checksum=checksum,
10 |     )
11 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 88
 3 | exclude = .git,*migrations*
 4 | extend-ignore = E203
 5 | 
 6 | [isort]
 7 | multi_line_output = 3
 8 | include_trailing_comma = True
 9 | force_grid_wrap = 0
10 | use_parentheses = True
11 | line_length = 88
12 | 
13 | [tool:pytest]
14 | DJANGO_SETTINGS_MODULE = web.settings
15 | DJANGO_CONFIGURATION = Test
16 | 


--------------------------------------------------------------------------------
/web/datasets/tests/fixtures/empty-response.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "inclusoesContrato": [],
 3 |     "alteracoesContrato": [],
 4 |     "exclusoesContrato": [],
 5 |     "inclusoesLicitacao": [],
 6 |     "alteracoesLicitacao": [],
 7 |     "exclusoesLicitacao": [],
 8 |     "inclusoesReceita": [],
 9 |     "alteracoesReceita": [],
10 |     "exclusoesReceita": [],
11 |     "inclusoesDespesa": [],
12 |     "alteracoesDespesa": [],
13 |     "exclusoesDespesa": []
14 | }
15 | 


--------------------------------------------------------------------------------
/web/celery.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import configurations
 4 | from celery import Celery
 5 | from django.apps import apps
 6 | 
 7 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "web.settings")
 8 | os.environ.setdefault("DJANGO_CONFIGURATION", "Dev")
 9 | 
10 | configurations.setup()
11 | 
12 | app = Celery("web")
13 | app.config_from_object("django.conf:settings", namespace="CELERY")
14 | app.autodiscover_tasks(lambda: [n.name for n in apps.get_app_configs()])
15 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8-slim
 2 | 
 3 | ENV PYTHONUNBUFFERED 1
 4 | 
 5 | WORKDIR /code
 6 | 
 7 | COPY requirements.txt .
 8 | COPY dev_requirements.txt .
 9 | 
10 | RUN apt-get update && \
11 |     apt-get install -y netcat-openbsd gcc && \
12 |     apt-get clean && \
13 |     pip install -r dev_requirements.txt  && \
14 |     apt purge -y gcc && \
15 |     apt autoremove -y && \
16 |     rm -rf /var/lib/apt/lists/*
17 | 
18 | COPY . .
19 | 
20 | RUN python manage.py collectstatic --no-input
21 | 


--------------------------------------------------------------------------------
/web/home/static/home/bulma.js:
--------------------------------------------------------------------------------
 1 | // The following code is based off a toggle menu by @Bradcomp
 2 | // source: https://gist.github.com/Bradcomp/a9ef2ef322a8e8017443b626208999c1
 3 | (function() {
 4 |     var burger = document.querySelector('.burger');
 5 |     var menu = document.querySelector('#'+burger.dataset.target);
 6 |     burger.addEventListener('click', function() {
 7 |         burger.classList.toggle('is-active');
 8 |         menu.classList.toggle('is-active');
 9 |     });
10 | })();
11 | 


--------------------------------------------------------------------------------
/web/wsgi.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from configurations.wsgi import get_wsgi_application
 4 | from django.conf import settings
 5 | 
 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "web.settings")
 7 | os.environ.setdefault("DJANGO_CONFIGURATION", "Dev")
 8 | 
 9 | application = get_wsgi_application()
10 | 
11 | if settings.ENABLE_NEW_RELIC:
12 |     import newrelic.agent
13 | 
14 |     newrelic.agent.initialize(settings.NEW_RELIC_CONFIG_FILE)
15 |     application = newrelic.agent.WSGIApplicationWrapper(application)
16 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0029_file_local_path.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.7 on 2021-09-23 08:38
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0028_auto_20210703_0457"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="file",
14 |             name="local_path",
15 |             field=models.CharField(blank=True, max_length=350, null=True),
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v2.5.0
 4 |   hooks:
 5 |   - id: check-added-large-files
 6 |   - id: debug-statements
 7 |   - id: end-of-file-fixer
 8 |   - id: requirements-txt-fixer
 9 |   - id: trailing-whitespace
10 | - repo: https://github.com/pycqa/flake8
11 |   rev: 6.1.0
12 |   hooks:
13 |   - id: flake8
14 | - repo: https://github.com/pycqa/isort
15 |   rev: 5.12.0
16 |   hooks:
17 |   - id: isort
18 | - repo: https://github.com/ambv/black
19 |   rev: 23.7.0
20 |   hooks:
21 |   - id: black
22 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Django's command-line utility for administrative tasks."""
 3 | import os
 4 | import sys
 5 | 
 6 | from dotenv import find_dotenv, load_dotenv
 7 | 
 8 | 
 9 | def main():
10 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "web.settings")
11 |     os.environ.setdefault("DJANGO_CONFIGURATION", "Dev")
12 | 
13 |     load_dotenv(find_dotenv())
14 | 
15 |     from configurations.management import execute_from_command_line
16 | 
17 |     execute_from_command_line(sys.argv)
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()
22 | 


--------------------------------------------------------------------------------
/scraper/extensions.py:
--------------------------------------------------------------------------------
 1 | import sentry_sdk
 2 | from scrapy.exceptions import NotConfigured
 3 | 
 4 | 
 5 | class SentryLogging(object):
 6 |     """
 7 |     Envia exceções e erros para o Sentry.
 8 | 
 9 |     Copiado de: https://stackoverflow.com/a/54964660/1344295
10 |     """
11 | 
12 |     @classmethod
13 |     def from_crawler(cls, crawler):
14 |         sentry_dsn = crawler.settings.get("SENTRY_DSN", None)
15 |         if sentry_dsn is None:
16 |             raise NotConfigured
17 |         ext = cls()
18 |         sentry_sdk.init(sentry_dsn)
19 |         return ext
20 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | SENTRY_DSN=
 2 | SPIDERMON_TELEGRAM_FAKE=True
 3 | SPIDERMON_SENTRY_FAKE=True
 4 | DJANGO_SETTINGS_MODULE=web.settings
 5 | DJANGO_CONFIGURATION=Dev
 6 | DJANGO_SECRET_KEY=dont-tell-anybody
 7 | ACCESS_TOKEN_LIFETIME_IN_MINUTES=60
 8 | REFRESH_TOKEN_LIFETIME_IN_MINUTES=60
 9 | AWS_ACCESS_KEY_ID=
10 | AWS_SECRET_ACCESS_KEY=
11 | AWS_S3_BUCKET=
12 | AWS_S3_BUCKET_FOLDER=
13 | AWS_S3_REGION=
14 | # A variável abaixo aponta para o arquivo de configuração do NewRelic, se preciso colocar, tb, o path
15 | NEW_RELIC_CONFIG_FILE=newrelic.ini
16 | NEW_RELIC_LICENSE_KEY=
17 | NEW_RELIC_APP_NAME=
18 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0023_auto_20201124_0458.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.2 on 2020-11-24 07:58
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0022_historical_citycouncil"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterField(
13 |             model_name="file",
14 |             name="s3_url",
15 |             field=models.URLField(
16 |                 blank=True, max_length=400, null=True, verbose_name="URL externa"
17 |             ),
18 |         ),
19 |     ]
20 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0027_auto_20210501_0839.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.8 on 2021-05-01 11:39
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0026_auto_20210410_0548"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterModelOptions(
13 |             name="syncinformation",
14 |             options={
15 |                 "ordering": ["-created_at"],
16 |                 "verbose_name": "Sincronização",
17 |                 "verbose_name_plural": "Sincronizações",
18 |             },
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/scraper/spiders/__init__.py:
--------------------------------------------------------------------------------
 1 | import scrapy
 2 | from dateutil.parser import parse
 3 | 
 4 | 
 5 | class BaseSpider(scrapy.Spider):
 6 |     start_from_date = None
 7 | 
 8 |     @property
 9 |     def start_date(self):
10 |         picked_date = None
11 |         if self.start_from_date:
12 |             if isinstance(self.start_from_date, str):
13 |                 picked_date = parse(self.start_from_date, dayfirst=True)
14 |                 picked_date = picked_date.date()
15 |             else:
16 |                 picked_date = self.start_from_date
17 |         elif hasattr(self, "initial_date"):
18 |             picked_date = self.initial_date
19 | 
20 |         return picked_date
21 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0013_file_search_vector.py:
--------------------------------------------------------------------------------
 1 | from django.db import migrations
 2 | 
 3 | 
 4 | class Migration(migrations.Migration):
 5 |     dependencies = [
 6 |         ("datasets", "0012_auto_20200520_1050"),
 7 |     ]
 8 | 
 9 |     operations = [
10 |         migrations.RunSQL(
11 |             sql="""
12 |             CREATE TRIGGER search_vector_file_update BEFORE INSERT OR UPDATE
13 |             ON datasets_file FOR EACH ROW EXECUTE PROCEDURE
14 |             tsvector_update_trigger(search_vector, 'pg_catalog.portuguese', content);
15 |             """,
16 |             reverse_sql="DROP TRIGGER IF EXISTS search_vector_file_update ON datasets_file;",
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0018_file_external_code.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.6 on 2020-06-14 05:36
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0017_citycouncilrevenue"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="file",
14 |             name="external_code",
15 |             field=models.CharField(
16 |                 blank=True,
17 |                 db_index=True,
18 |                 max_length=10,
19 |                 null=True,
20 |                 verbose_name="Código externo",
21 |             ),
22 |         ),
23 |     ]
24 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0015_drop_gazette_file_trigger.py:
--------------------------------------------------------------------------------
 1 | from django.db import migrations
 2 | 
 3 | 
 4 | class Migration(migrations.Migration):
 5 |     dependencies = [
 6 |         ("datasets", "0014_citycouncilbid"),
 7 |     ]
 8 | 
 9 |     operations = [
10 |         migrations.RunSQL(
11 |             sql="DROP TRIGGER IF EXISTS search_vector_update ON datasets_gazette;",
12 |             reverse_sql="""
13 |                 CREATE TRIGGER search_vector_update BEFORE INSERT OR UPDATE
14 |                 ON datasets_gazette FOR EACH ROW EXECUTE PROCEDURE
15 |                 tsvector_update_trigger(search_vector, 'pg_catalog.portuguese', file_content);
16 |             """,
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/web/datasets/signals.py:
--------------------------------------------------------------------------------
 1 | from django.db.models.signals import post_save
 2 | from django.dispatch import receiver
 3 | 
 4 | from .models import File
 5 | 
 6 | 
 7 | @receiver(post_save, sender=File)
 8 | def backup_and_extract_content(sender, instance, **kwargs):
 9 |     """Faz backup e extrai conteúdo de um arquivo após sua criação."""
10 |     from .tasks import backup_file, content_from_file
11 | 
12 |     if instance.s3_url is None:
13 |         backup_file.apply_async(
14 |             (instance.pk,),
15 |             link=content_from_file.si(
16 |                 instance.pk,
17 |             ),
18 |         )
19 |     elif instance.content is None:
20 |         content_from_file.delay(instance.pk)
21 | 


--------------------------------------------------------------------------------
/web/home/templates/snippets/google-analytics.html:
--------------------------------------------------------------------------------
 1 | <!-- Google Analytics -->
 2 | <script>
 3 |     (function (i, s, o, g, r, a, m) {
 4 |         i['GoogleAnalyticsObject'] = r;
 5 |         i[r] = i[r] || function () {
 6 |             (i[r].q = i[r].q || []).push(arguments)
 7 |         }, i[r].l = 1 * new Date();
 8 |         a = s.createElement(o),
 9 |             m = s.getElementsByTagName(o)[0];
10 |         a.async = 1;
11 |         a.src = g;
12 |         m.parentNode.insertBefore(a, m)
13 |     })(window, document, 'script', 'https://www.google-analytics.com/analytics.js', 'ga');
14 | 
15 |     ga('create', '{{ GOOGLE_ANALYTICS_KEY }}', 'auto');
16 |     ga('send', 'pageview');
17 | </script>
18 | <!-- End Google Analytics -->
19 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3==1.28.17
 2 | celery==5.3.1
 3 | dj-database-url==2.0.0
 4 | django==4.1.10
 5 | django-configurations==2.4.1
 6 | django-extensions==3.2.3
 7 | django-filter==23.2
 8 | django-public-admin==0.0.5
 9 | django-simple-history==3.3.0
10 | djangorestframework==3.14.0
11 | djangorestframework-simplejwt==5.3.0
12 | drf-yasg==1.21.7
13 | gunicorn==21.2.0
14 | https://github.com/DadosAbertosDeFeira/tcm-ba/releases/download/0.2.0/documentos_tcmba-0.2.0-py3-none-any.whl
15 | jinja2==3.1.2
16 | newrelic==8.8.1
17 | notifiers==1.2.1
18 | psycopg2-binary==2.9.7
19 | PyJWT==2.8.0
20 | python-dateutil==2.8.2
21 | python-dotenv==1.0.0
22 | schematics==2.1.1
23 | scrapy==2.10.1
24 | sentry-sdk==1.30.0
25 | spidermon==1.19.0
26 | tika==2.6.0
27 | whitenoise==6.5.0
28 | xlrd==2.0.1
29 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | bash:
 2 | 	docker-compose run --rm web bash
 3 | 
 4 | build:
 5 | 	docker-compose build
 6 | 
 7 | collectstatic:
 8 | 	docker-compose run --rm web python manage.py collectstatic
 9 | 
10 | crawl:
11 | 	docker-compose run --rm web python manage.py crawl
12 | 
13 | createsuperuser:
14 | 	docker-compose run --rm web python manage.py createsuperuser
15 | 
16 | makemigrations:
17 | 	docker-compose run --rm web python manage.py makemigrations
18 | 
19 | migrate:
20 | 	docker-compose run --rm web python manage.py migrate
21 | 
22 | run:
23 | 	docker-compose up -d
24 | 
25 | stop:
26 | 	docker-compose stop
27 | 
28 | runspider:
29 | 	docker-compose run --rm web scrapy crawl $(SPIDER) -a start_from_date=$(START_DATE)
30 | 
31 | shell:
32 | 	docker-compose run --rm web python manage.py shell_plus
33 | 
34 | tests:
35 | 	docker-compose run --rm web pytest --dc Test
36 | 


--------------------------------------------------------------------------------
/web/home/tests/test_templates.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | class TestHome:
 5 |     def test_append_google_analytics_key(self, settings, client):
 6 |         settings.GOOGLE_ANALYTICS_KEY = "UA-000000000-1"
 7 |         response = client.get("/")
 8 |         assert "UA-000000000-1" in str(response.content)
 9 | 
10 | 
11 | @pytest.mark.django_db
12 | class TestAdmin:
13 |     def test_append_google_analytics_key(self, settings, admin_client):
14 |         settings.GOOGLE_ANALYTICS_KEY = "UA-000000000-1"
15 |         response = admin_client.get("/admin/")
16 |         assert "UA-000000000-1" in str(response.content)
17 | 
18 | 
19 | @pytest.mark.django_db
20 | class TestPanel:
21 |     def test_append_google_analytics_key(self, settings, client):
22 |         settings.GOOGLE_ANALYTICS_KEY = "UA-000000000-1"
23 |         response = client.get("/painel/")
24 |         assert "UA-000000000-1" in str(response.content)
25 | 


--------------------------------------------------------------------------------
/web/datasets/tests/test_signals.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from model_bakery import baker
 3 | 
 4 | 
 5 | @pytest.mark.django_db
 6 | def test_backup_and_extract_content_when_file_is_saved(mock_backup_file):
 7 |     expected_link_task = "web.datasets.tasks.content_from_file"
 8 |     baker.make("datasets.File", s3_url=None, content=None)
 9 | 
10 |     assert mock_backup_file.called is True
11 |     assert mock_backup_file.call_count == 1
12 |     assert expected_link_task in str(mock_backup_file.call_args_list[0][1]["link"])
13 | 
14 | 
15 | @pytest.mark.django_db
16 | def test_extract_content_when_file_with_backup_is_saved(mocker, mock_backup_file):
17 |     mock_content_from_file = mocker.patch("web.datasets.tasks.content_from_file.delay")
18 |     baker.make("datasets.File", s3_url="https://www.pdf.com/test.pdf", content=None)
19 | 
20 |     assert mock_backup_file.called is False
21 |     assert mock_content_from_file.called is True
22 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/searchvector.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.postgres.search import SearchVector
 2 | from django.core.management.base import BaseCommand
 3 | 
 4 | from web.datasets.models import File
 5 | 
 6 | 
 7 | class Command(BaseCommand):
 8 |     help = """Remonta os indices de busca em caso de problemas
 9 |             com a geração de índice via trigger."""
10 | 
11 |     def echo(self, text, style=None):
12 |         self.stdout.write(style(text) if style else text)
13 | 
14 |     def handle(self, *args, **options):
15 |         file_count = File.objects.count()
16 |         self.echo(
17 |             f"Criando um vetor de busca para os arquivos. "
18 |             f"Total de itens: {file_count:,}",
19 |             self.style.SUCCESS,
20 |         )
21 |         self.echo("Aguarde...", self.style.SUCCESS)
22 | 
23 |         search_vector = SearchVector("content", config="portuguese")
24 | 
25 |         File.objects.update(search_vector=search_vector)
26 | 
27 |         self.echo("Pronto!", self.style.SUCCESS)
28 | 


--------------------------------------------------------------------------------
/web/api/tests/test_health_check.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from django.urls import reverse
 3 | 
 4 | 
 5 | class TestHealthCheck:
 6 |     def test_return_success_when_accessing_health_check(self, api_client, url):
 7 |         response = api_client.get(url, format="json")
 8 |         assert response.status_code == 200
 9 |         assert list(response.json().keys()) == ["status", "time"]
10 |         assert response.json().get("status") == "available"
11 | 
12 |     def test_return_forbidden_when_trying_to_anonymously_access_a_restricted_route(
13 |         self, api_client
14 |     ):
15 |         url = reverse("gazettes-list")
16 |         response = api_client.get(url)
17 |         assert response.status_code == 403
18 | 
19 |     @pytest.mark.django_db
20 |     def test_return_success_when_accessing_a_restricted_route_with_credentials(
21 |         self, api_client_authenticated
22 |     ):
23 |         url = reverse("gazettes-list")
24 |         response = api_client_authenticated.get(url)
25 |         assert response.status_code == 200
26 | 


--------------------------------------------------------------------------------
/web/api/filters.py:
--------------------------------------------------------------------------------
 1 | from django_filters import rest_framework as filters
 2 | 
 3 | from web.datasets.models import CityHallBid, Gazette
 4 | 
 5 | 
 6 | class GazetteFilter(filters.FilterSet):
 7 |     start_date = filters.DateFilter(field_name="date", lookup_expr="gte")
 8 |     end_date = filters.DateFilter(field_name="date", lookup_expr="lte")
 9 | 
10 |     class Meta:
11 |         model = Gazette
12 |         fields = [
13 |             "power",
14 |             "start_date",
15 |             "end_date",
16 |             "events__title",
17 |             "events__secretariat",
18 |             "events__summary",
19 |             "year_and_edition",
20 |         ]
21 | 
22 | 
23 | class CityHallBidFilter(filters.FilterSet):
24 |     start_date = filters.DateFilter(field_name="session_at", lookup_expr="gte")
25 |     end_date = filters.DateFilter(field_name="session_at", lookup_expr="lte")
26 |     description = filters.CharFilter(field_name="description", lookup_expr="icontains")
27 | 
28 |     class Meta:
29 |         model = CityHallBid
30 |         fields = ["public_agency", "description", "modality", "start_date", "end_date"]
31 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0026_auto_20210410_0548.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.8 on 2021-04-10 08:48
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("contenttypes", "0002_remove_content_type_name"),
 9 |         ("datasets", "0025_auto_20210327_1144"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="file",
15 |             name="original_filename",
16 |             field=models.CharField(
17 |                 blank=True,
18 |                 db_index=True,
19 |                 max_length=200,
20 |                 null=True,
21 |                 verbose_name="Nome do arquivo",
22 |             ),
23 |         ),
24 |         migrations.AlterField(
25 |             model_name="file",
26 |             name="url",
27 |             field=models.URLField(db_index=True, verbose_name="URL do arquivo"),
28 |         ),
29 |         migrations.AlterUniqueTogether(
30 |             name="file",
31 |             unique_together={("url", "content_type", "object_id", "original_filename")},
32 |         ),
33 |     ]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Dados Abertos de Feira
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/web/datasets/tests/management/commands/test_search_vector.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import pytest
 4 | from model_bakery import baker
 5 | 
 6 | from web.datasets.management.commands.searchvector import Command
 7 | 
 8 | 
 9 | @pytest.mark.django_db
10 | class TestCommandHandler:
11 |     @pytest.mark.parametrize(
12 |         "text,answer",
13 |         [
14 |             ("O Prefeito Municipal de Feira...", "'feir':5 'municipal':3 'prefeit':2"),
15 |             (
16 |                 "Mussum Ipsum, cacilds vidis litro abertis.",
17 |                 "'abert':6 'cacilds':3 'ipsum':2 'litr':5 'mussum':1 'vid':4",
18 |             ),
19 |         ],
20 |     )
21 |     def test_handler(self, text, answer, capsys):
22 |         gazette = baker.make("datasets.File", content=text)
23 |         assert not gazette.search_vector
24 | 
25 |         command = Command()
26 |         command.handle()
27 | 
28 |         gazette.refresh_from_db()
29 | 
30 |         captured = capsys.readouterr()
31 |         assert re.search(r"Criando um vetor .* Total de itens: 1", captured.out)
32 |         assert "Pronto!" in captured.out
33 | 
34 |         assert gazette.search_vector == answer
35 | 


--------------------------------------------------------------------------------
/web/api/routes.py:
--------------------------------------------------------------------------------
 1 | from django.urls import include, path
 2 | from rest_framework import routers
 3 | 
 4 | from web.api.views import (
 5 |     CityCouncilAgendaView,
 6 |     CityCouncilAttendanceListView,
 7 |     CityCouncilMinuteView,
 8 |     CityHallBidView,
 9 |     FrontendEndpoint,
10 |     GazetteView,
11 |     HealthCheckView,
12 | )
13 | 
14 | router = routers.DefaultRouter()
15 | router.register("", HealthCheckView, basename="root")
16 | router.register("datasets/gazettes", GazetteView, basename="gazettes")
17 | 
18 | 
19 | urlpatterns = [
20 |     path("", include(router.urls)),
21 |     path(
22 |         "datasets/city-council/agenda/",
23 |         CityCouncilAgendaView.as_view(),
24 |         name="city-council-agenda",
25 |     ),
26 |     path(
27 |         "datasets/city-council/attendance-list/",
28 |         CityCouncilAttendanceListView.as_view(),
29 |         name="city-council-attendance-list",
30 |     ),
31 |     path(
32 |         "datasets/city-council/minute/",
33 |         CityCouncilMinuteView.as_view(),
34 |         name="city-council-minute",
35 |     ),
36 |     path("datasets/city-hall/bids/", CityHallBidView.as_view(), name="city-hall-bids"),
37 |     path("datasets/endpoints", FrontendEndpoint.as_view(), name="frontend-endpoints"),
38 | ]
39 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/_tcmba.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.admin.options import get_content_type_for_model
 2 | 
 3 | from web.datasets.models import File, TCMBADocument
 4 | from web.datasets.parsers import from_str_to_date
 5 | 
 6 | 
 7 | def save_document(item):
 8 |     public_view_url = "https://e.tcm.ba.gov.br/epp/ConsultaPublica/listView.seam"
 9 |     document, created = TCMBADocument.objects.get_or_create(
10 |         year=item["year"],
11 |         month=item["month"],
12 |         period=item["period"].lower(),
13 |         category=item["category"],
14 |         unit=item["unit"],
15 |         inserted_at=from_str_to_date(item["inserted_at"]),
16 |         inserted_by=item["inserted_by"],
17 |         original_filename=item["original_filename"],
18 |         crawled_from=public_view_url,
19 |         defaults={
20 |             "crawled_at": item["crawled_at"],
21 |         },
22 |     )
23 |     content_type = get_content_type_for_model(document)
24 |     if created:
25 |         _, file_created = File.objects.get_or_create(
26 |             url=public_view_url,
27 |             content_type=content_type,
28 |             object_id=document.pk,
29 |             local_path=f"{item['filepath']}{item['filename']}",
30 |             original_filename=item["original_filename"],
31 |         )
32 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/citycouncil_sync.py:
--------------------------------------------------------------------------------
 1 | from datetime import date, timedelta
 2 | 
 3 | from celery import chain
 4 | from dateutil.parser import parse
 5 | from django.core.management.base import BaseCommand
 6 | 
 7 | from web.datasets.tasks import (
 8 |     distribute_city_council_objects_to_sync,
 9 |     get_city_council_updates,
10 | )
11 | 
12 | 
13 | class Command(BaseCommand):
14 |     help = "Dispara sincronização com o webservice da Câmara de Vereadores."
15 | 
16 |     def add_arguments(self, parser):
17 |         parser.add_argument("--date", help="Data no formato aaaa-mm-dd")
18 | 
19 |     def handle(self, *args, **options):
20 |         if options.get("date"):
21 |             # converte para datetime para verificar se o formato está correto
22 |             target_date = parse(options.get("date"), yearfirst=True).date()
23 |         else:
24 |             # ontem
25 |             target_date = date.today() - timedelta(days=1)
26 | 
27 |         chain(
28 |             get_city_council_updates.s(target_date.strftime("%Y-%m-%d")),
29 |             distribute_city_council_objects_to_sync.s(),
30 |         )()
31 | 
32 |         self.stdout.write(
33 |             f"Syncronização com a Câmara iniciada (data alvo: {target_date})."
34 |         )
35 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0006_gazette_search_vector.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.5 on 2020-04-03 22:53
 2 | 
 3 | import django.contrib.postgres.indexes
 4 | import django.contrib.postgres.search
 5 | from django.db import migrations
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         ("datasets", "0005_auto_20200327_1348"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AddField(
15 |             model_name="gazette",
16 |             name="search_vector",
17 |             field=django.contrib.postgres.search.SearchVectorField(
18 |                 editable=False, null=True
19 |             ),
20 |         ),
21 |         migrations.AddIndex(
22 |             model_name="gazette",
23 |             index=django.contrib.postgres.indexes.GinIndex(
24 |                 fields=["search_vector"], name="datasets_ga_search__1d3d09_gin"
25 |             ),
26 |         ),
27 |         migrations.RunSQL(
28 |             sql="""
29 |             CREATE TRIGGER search_vector_update BEFORE INSERT OR UPDATE
30 |             ON datasets_gazette FOR EACH ROW EXECUTE PROCEDURE
31 |             tsvector_update_trigger(search_vector, 'pg_catalog.portuguese', file_content);
32 |             """,
33 |             reverse_sql="DROP TRIGGER IF EXISTS search_vector_update ON datasets_gazette;",
34 |         ),
35 |     ]
36 | 


--------------------------------------------------------------------------------
/web/api/constants.py:
--------------------------------------------------------------------------------
 1 | GAZETTES_API = "api/datasets/gazettes"
 2 | CITY_HALL_API = "api/datasets/city-hall"
 3 | CITY_COUNCIL_API = "api/datasets/city-council"
 4 | 
 5 | AVAILABLE_ENDPOINTS_BY_PUBLIC_AGENCY = {
 6 |     "city-council": {
 7 |         "public_agency": "Câmara Municipal",
 8 |         "endpoints": [
 9 |             {
10 |                 "friendly_name": "Agenda dos vereadores",
11 |                 "endpoint": f"{CITY_COUNCIL_API}/agenda/",
12 |             },
13 |             {
14 |                 "friendly_name": "Atas das sessões",
15 |                 "endpoint": f"{CITY_COUNCIL_API}/minute/",
16 |             },
17 |             {
18 |                 "friendly_name": "Diário Oficial - Legislativo",
19 |                 "endpoint": f"{GAZETTES_API}/?power=legislative",
20 |             },
21 |             {
22 |                 "friendly_name": "Lista de presença dos vereadores",
23 |                 "endpoint": f"{CITY_COUNCIL_API}/attendance-list/",
24 |             },
25 |         ],
26 |     },
27 |     "city-hall": {
28 |         "public_agency": "Prefeitura",
29 |         "endpoints": [
30 |             {
31 |                 "friendly_name": "Diário Oficial - Executivo",
32 |                 "endpoint": f"{GAZETTES_API}/?power=executive",
33 |             },
34 |             {
35 |                 "friendly_name": "Licitações",
36 |                 "endpoint": f"{CITY_HALL_API}/bids/",
37 |             },
38 |         ],
39 |     },
40 | }
41 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/_cityhall.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.admin.options import get_content_type_for_model
 2 | 
 3 | from web.datasets.models import CityHallBid, CityHallBidEvent
 4 | 
 5 | from ._file import save_file
 6 | 
 7 | 
 8 | def save_bid(item):
 9 |     bid, created = CityHallBid.objects.update_or_create(
10 |         session_at=item["session_at"],
11 |         public_agency=item["public_agency"],
12 |         codes=item["codes"],
13 |         defaults={
14 |             "crawled_from": item["crawled_from"],
15 |             "crawled_at": item["crawled_at"],
16 |             "description": item["description"],
17 |             "modality": item["modality"],
18 |         },
19 |     )
20 | 
21 |     if created and item.get("files"):
22 |         content_type = get_content_type_for_model(bid)
23 |         for file_ in item["files"]:
24 |             save_file(file_, content_type, bid.pk)
25 | 
26 |     content_type = get_content_type_for_model(CityHallBidEvent)
27 |     for event in item["history"]:
28 |         event_obj, created = CityHallBidEvent.objects.get_or_create(
29 |             crawled_from=item["crawled_from"],
30 |             bid=bid,
31 |             published_at=event["published_at"],
32 |             summary=event["event"],
33 |             defaults={"crawled_at": item["crawled_at"]},
34 |         )
35 |         if created and event.get("url"):
36 |             save_file(event.get("url"), content_type, event_obj.pk)
37 |     return bid
38 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0004_auto_20200321_0817.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0 on 2020-03-21 11:17
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0003_citycouncilattendancelist"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterModelOptions(
13 |             name="citycouncilagenda",
14 |             options={
15 |                 "verbose_name": "Câmara de Vereadores - Agenda",
16 |                 "verbose_name_plural": "Câmara de Vereadores - Agendas",
17 |             },
18 |         ),
19 |         migrations.AlterModelOptions(
20 |             name="citycouncilattendancelist",
21 |             options={
22 |                 "verbose_name": "Câmara de Vereadores - Lista de Presença",
23 |                 "verbose_name_plural": "Câmara de Vereadores - Listas de Presença",
24 |             },
25 |         ),
26 |         migrations.AlterModelOptions(
27 |             name="gazette",
28 |             options={
29 |                 "verbose_name": "Diário Oficial",
30 |                 "verbose_name_plural": "Diários Oficiais",
31 |             },
32 |         ),
33 |         migrations.AlterModelOptions(
34 |             name="gazetteevent",
35 |             options={
36 |                 "verbose_name": "Diário Oficial - Evento",
37 |                 "verbose_name_plural": "Diário Oficial - Eventos",
38 |             },
39 |         ),
40 |     ]
41 | 


--------------------------------------------------------------------------------
/web/api/tests/constants.py:
--------------------------------------------------------------------------------
 1 | GAZZETES_API = "api/datasets/gazettes"
 2 | CITY_HALL_API = "api/datasets/city-hall"
 3 | CITY_COUNCIL_API = "api/datasets/city-council"
 4 | 
 5 | AVAILABLE_ENDPOINTS_BY_PUBLIC_AGENCY = {
 6 |     "city-council": {
 7 |         "public_agency": "Câmara Municipal",
 8 |         "endpoints": [
 9 |             {
10 |                 "friendly_name": "Agenda dos vereadores",
11 |                 "endpoint": f"{CITY_COUNCIL_API}/agenda/",
12 |             },
13 |             {
14 |                 "friendly_name": "Atas das sessões",
15 |                 "endpoint": f"{CITY_COUNCIL_API}/minute/",
16 |             },
17 |             {
18 |                 "friendly_name": "Diário Oficial - Legislativo",
19 |                 "endpoint": f"{GAZZETES_API}/?power=legislative",
20 |             },
21 |             {
22 |                 "friendly_name": "Lista de presença dos vereadores",
23 |                 "endpoint": f"{CITY_COUNCIL_API}/attendance-list/",
24 |             },
25 |         ],
26 |     },
27 |     "city-hall": {
28 |         "public_agency": "Prefeitura",
29 |         "endpoints": [
30 |             {
31 |                 "friendly_name": "Diário Oficial - Executivo",
32 |                 "endpoint": f"{GAZZETES_API}/?power=executive",
33 |             },
34 |             {
35 |                 "friendly_name": "Licitações",
36 |                 "endpoint": f"{CITY_HALL_API}/bids/",
37 |             },
38 |         ],
39 |     },
40 | }
41 | 


--------------------------------------------------------------------------------
/web/api/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | 
 3 | import pytest
 4 | from django.contrib.auth.models import User
 5 | from model_bakery import baker
 6 | from rest_framework.test import APIClient
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def api_client():
11 |     return APIClient()
12 | 
13 | 
14 | @pytest.fixture
15 | def user():
16 |     return User(username="marvin", password="paranoidandroid")
17 | 
18 | 
19 | @pytest.fixture
20 | def api_client_authenticated(api_client, user):
21 |     api_client.force_authenticate(user)
22 |     return api_client
23 | 
24 | 
25 | @pytest.fixture
26 | def url():
27 |     return "/api/"
28 | 
29 | 
30 | @pytest.fixture
31 | def one_gazette():
32 |     return baker.make_recipe("datasets.Gazette", date=date(2021, 4, 21))
33 | 
34 | 
35 | @pytest.fixture
36 | def last_of_two_gazettes():
37 |     baker.make_recipe("datasets.Gazette", date=date(2021, 3, 5))
38 |     return baker.make_recipe("datasets.Gazette", date=date(2021, 4, 21))
39 | 
40 | 
41 | @pytest.fixture
42 | def last_of_three_gazettes():
43 |     baker.make_recipe("datasets.Gazette", date=date(2021, 1, 1), power="executivo")
44 |     baker.make_recipe(
45 |         "datasets.GazetteEvent",
46 |         summary="Life? Don't talk to me about life.",
47 |         gazette__date=date(2021, 3, 5),
48 |         gazette__power="legislativo",
49 |     )
50 |     return baker.make_recipe(
51 |         "datasets.Gazette", date=date(2021, 4, 21), power="executivo"
52 |     )
53 | 


--------------------------------------------------------------------------------
/web/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf import settings
 2 | from django.contrib import admin
 3 | from django.urls import include, path, re_path
 4 | from drf_yasg import openapi
 5 | from drf_yasg.views import get_schema_view
 6 | from rest_framework import permissions
 7 | from rest_framework_simplejwt.views import (
 8 |     TokenObtainPairView,
 9 |     TokenRefreshView,
10 |     TokenVerifyView,
11 | )
12 | 
13 | from web.datasets.admin import public_admin
14 | 
15 | schema_view = get_schema_view(
16 |     openapi.Info(
17 |         title="Maria Quitéria API",
18 |         default_version="v1",
19 |         contact=openapi.Contact(email="dadosabertosdefeira+api@gmail.com"),
20 |         license=openapi.License(name="MIT"),
21 |     ),
22 |     public=True,
23 |     permission_classes=(permissions.AllowAny,),
24 | )
25 | 
26 | urlpatterns = [
27 |     path("admin/", admin.site.urls),
28 |     path("", include("web.home.urls")),
29 |     path("painel/", public_admin.urls),
30 |     path("api/", include("web.api.routes")),
31 |     path("api/token/", TokenObtainPairView.as_view(), name="token_obtain_pair"),
32 |     path("api/token/refresh/", TokenRefreshView.as_view(), name="token_refresh"),
33 |     path("api/token/verify/", TokenVerifyView.as_view(), name="token_verify"),
34 |     re_path(
35 |         r"^api/docs/$",
36 |         schema_view.with_ui("swagger"),
37 |         name="schema-swagger-ui",
38 |     ),
39 | ]
40 | 
41 | 
42 | if settings.DEBUG:
43 |     import debug_toolbar
44 | 
45 |     urlpatterns = [path("__debug__/", include(debug_toolbar.urls))] + urlpatterns
46 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.4"
 2 | 
 3 | services:
 4 |     db:
 5 |         image: library/postgres:11-alpine
 6 |         environment:
 7 |             POSTGRES_DB: mariaquiteria
 8 |             POSTGRES_USER: postgres
 9 |             POSTGRES_PASSWORD: postgres
10 |         volumes:
11 |             - ./db:/var/lib/postgresql
12 | 
13 |     rabbitmq:
14 |         image: rabbitmq
15 |         ports:
16 |           - "5672:5672"
17 |           - "15672:15672"
18 |         healthcheck:
19 |           test: [ "CMD", "nc", "-z", "localhost", "5672" ]
20 |           interval: 5s
21 |           timeout: 15s
22 |           retries: 1
23 | 
24 |     tika:
25 |         image: apache/tika
26 |         ports:
27 |             - "9998:9998"
28 | 
29 |     web:
30 |         build: .
31 |         command: ["python", "manage.py", "runserver", "0.0.0.0:8000"]
32 |         volumes:
33 |             - .:/code
34 |         ports:
35 |             - "8000:8000"
36 |         environment:
37 |             DATABASE_HOST: db
38 |         env_file: .env
39 |         depends_on:
40 |             - db
41 |             - worker
42 | 
43 |     worker:
44 |         build: .
45 |         command: ["celery", "-A", "web", "worker", "-l", "INFO", "--without-heartbeat", "--without-gossip", "--without-mingle"]
46 |         environment:
47 |             DATABASE_HOST: db
48 |             TIKA_CLIENT_ONLY: 1
49 |             TIKA_SERVER_ENDPOINT: http://tika:9998
50 |         env_file: .env
51 |         restart: on-failure
52 |         depends_on:
53 |             - tika
54 |             - db
55 |             - rabbitmq
56 | 


--------------------------------------------------------------------------------
/web/datasets/baker_recipes.py:
--------------------------------------------------------------------------------
 1 | from datetime import date
 2 | 
 3 | from model_bakery.recipe import Recipe, foreign_key
 4 | 
 5 | from web.datasets.models import (
 6 |     CityCouncilAgenda,
 7 |     CityCouncilAttendanceList,
 8 |     CityCouncilBid,
 9 |     CityCouncilContract,
10 |     CityCouncilExpense,
11 |     CityCouncilMinute,
12 |     CityCouncilRevenue,
13 |     CityHallBid,
14 |     File,
15 |     Gazette,
16 |     GazetteEvent,
17 |     SyncInformation,
18 | )
19 | 
20 | CityCouncilAgenda = Recipe(
21 |     CityCouncilAgenda,
22 |     date=date(2020, 3, 18),
23 |     details="PROJETOS DE LEI ORDINÁRIA EM 2ª DISCUSSÃO 017/20",
24 |     event_type="sessao_ordinaria",
25 |     title="ORDEM DO DIA - 18 DE MARÇO DE 2020",
26 | )
27 | 
28 | 
29 | CityCouncilAttendanceList = Recipe(
30 |     CityCouncilAttendanceList,
31 |     date=date(2020, 2, 3),
32 |     description="Abertura da 1ª etapa do 4º período da 18ª legislatura",
33 |     council_member="Competente da Silva",
34 |     status="presente",
35 | )
36 | 
37 | 
38 | CityCouncilBid = Recipe(CityCouncilBid)
39 | 
40 | 
41 | CityCouncilContract = Recipe(CityCouncilContract)
42 | 
43 | 
44 | CityCouncilExpense = Recipe(CityCouncilExpense)
45 | 
46 | 
47 | CityCouncilMinute = Recipe(CityCouncilMinute)
48 | 
49 | 
50 | CityCouncilRevenue = Recipe(CityCouncilRevenue)
51 | 
52 | 
53 | CityHallBid = Recipe(CityHallBid)
54 | 
55 | 
56 | Gazette = Recipe(
57 |     Gazette,
58 | )
59 | 
60 | 
61 | GazetteEvent = Recipe(GazetteEvent, gazette=foreign_key(Gazette))
62 | 
63 | 
64 | File = Recipe(
65 |     File,
66 | )
67 | 
68 | 
69 | SyncInformation = Recipe(
70 |     SyncInformation,
71 | )
72 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0 on 2020-02-02 02:00
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     initial = True
 8 | 
 9 |     dependencies = []
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="CityCouncilAgenda",
14 |             fields=[
15 |                 (
16 |                     "id",
17 |                     models.AutoField(
18 |                         auto_created=True,
19 |                         primary_key=True,
20 |                         serialize=False,
21 |                         verbose_name="ID",
22 |                     ),
23 |                 ),
24 |                 ("crawled_at", models.DateTimeField(auto_now_add=True)),
25 |                 ("updated_at", models.DateTimeField(auto_now=True)),
26 |                 ("crawled_from", models.URLField(blank=True, null=True)),
27 |                 ("notes", models.TextField(blank=True, null=True)),
28 |                 ("date", models.DateField()),
29 |                 ("details", models.TextField(blank=True, null=True)),
30 |                 (
31 |                     "event_type",
32 |                     models.CharField(
33 |                         choices=[
34 |                             ("ordem_do_dia", "Ordem do Dia"),
35 |                             ("sessao_solene", "Sessão Solene"),
36 |                             ("sessao_especial", "Sessão Especial"),
37 |                             ("audiencia_publica", "Audiência Pública"),
38 |                         ],
39 |                         max_length=20,
40 |                     ),
41 |                 ),
42 |                 ("title", models.CharField(blank=True, max_length=100, null=True)),
43 |             ],
44 |         ),
45 |     ]
46 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0016_auto_20200522_0647.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.5 on 2020-05-22 09:47
 2 | 
 3 | import django.db.models.deletion
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("datasets", "0015_drop_gazette_file_trigger"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.RemoveIndex(
14 |             model_name="gazette",
15 |             name="datasets_ga_search__1d3d09_gin",
16 |         ),
17 |         migrations.RemoveField(
18 |             model_name="citycouncilminute",
19 |             name="file_content",
20 |         ),
21 |         migrations.RemoveField(
22 |             model_name="citycouncilminute",
23 |             name="file_url",
24 |         ),
25 |         migrations.RemoveField(
26 |             model_name="cityhallbid",
27 |             name="file_content",
28 |         ),
29 |         migrations.RemoveField(
30 |             model_name="cityhallbid",
31 |             name="file_url",
32 |         ),
33 |         migrations.RemoveField(
34 |             model_name="cityhallbidevent",
35 |             name="file_content",
36 |         ),
37 |         migrations.RemoveField(
38 |             model_name="cityhallbidevent",
39 |             name="file_url",
40 |         ),
41 |         migrations.RemoveField(
42 |             model_name="gazette",
43 |             name="file_content",
44 |         ),
45 |         migrations.RemoveField(
46 |             model_name="gazette",
47 |             name="file_url",
48 |         ),
49 |         migrations.RemoveField(
50 |             model_name="gazette",
51 |             name="search_vector",
52 |         ),
53 |         migrations.AlterField(
54 |             model_name="gazetteevent",
55 |             name="gazette",
56 |             field=models.ForeignKey(
57 |                 on_delete=django.db.models.deletion.CASCADE,
58 |                 related_name="events",
59 |                 to="datasets.Gazette",
60 |             ),
61 |         ),
62 |     ]
63 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0003_citycouncilattendancelist.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0 on 2020-03-21 09:51
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0002_auto_20200316_1905"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="CityCouncilAttendanceList",
14 |             fields=[
15 |                 (
16 |                     "id",
17 |                     models.AutoField(
18 |                         auto_created=True,
19 |                         primary_key=True,
20 |                         serialize=False,
21 |                         verbose_name="ID",
22 |                     ),
23 |                 ),
24 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
25 |                 ("updated_at", models.DateTimeField(auto_now=True)),
26 |                 ("crawled_at", models.DateTimeField()),
27 |                 ("crawled_from", models.URLField()),
28 |                 ("notes", models.TextField(blank=True, null=True)),
29 |                 ("date", models.DateField()),
30 |                 (
31 |                     "description",
32 |                     models.CharField(blank=True, max_length=200, null=True),
33 |                 ),
34 |                 ("council_member", models.CharField(max_length=200)),
35 |                 (
36 |                     "status",
37 |                     models.CharField(
38 |                         choices=[
39 |                             ("presente", "Presente"),
40 |                             ("falta_justificada", "Falta Justificada"),
41 |                             ("licenca_justificada", "Licença Justificada"),
42 |                             ("ausente", "Ausente"),
43 |                         ],
44 |                         max_length=20,
45 |                     ),
46 |                 ),
47 |             ],
48 |             options={
49 |                 "abstract": False,
50 |             },
51 |         ),
52 |     ]
53 | 


--------------------------------------------------------------------------------
/web/home/static/home/hero.css:
--------------------------------------------------------------------------------
 1 | html, body {
 2 |   background: #EFF3F4;
 3 |   font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
 4 | }
 5 | 
 6 | .hero-body .container {
 7 |   max-width: 700px;
 8 | }
 9 | 
10 | .hero-body .title {
11 |   color: hsl(192, 17%, 99%) !important;
12 | }
13 | 
14 | .hero-body .subtitle {
15 |   color: hsl(192, 17%, 99%) !important;
16 |   padding-top: 2rem;
17 |   line-height: 1.5;
18 | }
19 | 
20 | .background {
21 |   background-image: url('./imagem-apresentacao-dadosdefeira.png');
22 |   background-size:100% 100%;
23 |   background-repeat: no-repeat;
24 |   background-position: center;
25 |   background-origin: border-box;
26 |   position: relative;
27 |   z-index: 99999999;
28 | }
29 | 
30 | .features {
31 |   padding: 5rem 0;
32 | }
33 | 
34 | .box.cta {
35 |   border-radius: 0;
36 |   border-left: none;
37 |   border-right: none;
38 | }
39 | 
40 | .card-image>.fa {
41 |   font-size: 8rem;
42 |   padding-top: 2rem;
43 |   padding-bottom: 2rem;
44 |   color: #ffcc00;
45 | }
46 | 
47 | .card-content .content {
48 |   font-size: 14px;
49 |   margin: 1rem 1rem;
50 | }
51 | 
52 | .card-content .content h4 {
53 |   font-size: 16px;
54 |   font-weight: 700;
55 | }
56 | 
57 | .card {
58 |   box-shadow: 0px 2px 4px rgba(0, 0, 0, 0.18);
59 |   margin-bottom: 2rem;
60 | }
61 | 
62 | .intro {
63 |   padding: 5rem 0;
64 |   text-align: center;
65 | }
66 | 
67 | .sandbox {
68 |   padding: 5rem 0;
69 | }
70 | 
71 | .tile.notification {
72 |   display: flex;
73 |   justify-content: center;
74 |   flex-direction: column;
75 | }
76 | 
77 | .is-shady {
78 |   animation: flyintoright .4s backwards;
79 |   background: #fff;
80 |   box-shadow: rgba(0, 0, 0, .1) 0 1px 0;
81 |   border-radius: 4px;
82 |   display: inline-block;
83 |   margin: 10px;
84 |   position: relative;
85 |   transition: all .2s ease-in-out;
86 | }
87 | 
88 | .is-shady:hover {
89 |   box-shadow: 0 10px 16px rgba(0, 0, 0, .13), 0 6px 6px rgba(0, 0, 0, .19);
90 | }
91 | 
92 | .hide {
93 |   display: none;
94 | }
95 | 


--------------------------------------------------------------------------------
/web/api/serializers.py:
--------------------------------------------------------------------------------
 1 | from rest_framework import serializers
 2 | 
 3 | from web.datasets.models import (
 4 |     CityCouncilAgenda,
 5 |     CityCouncilAttendanceList,
 6 |     CityCouncilMinute,
 7 |     CityHallBid,
 8 |     CityHallBidEvent,
 9 |     File,
10 |     Gazette,
11 |     GazetteEvent,
12 | )
13 | 
14 | 
15 | class CityCouncilAgendaSerializer(serializers.ModelSerializer):
16 |     class Meta:
17 |         model = CityCouncilAgenda
18 |         fields = "__all__"
19 | 
20 | 
21 | class CityCouncilAttendanceListSerializer(serializers.ModelSerializer):
22 |     class Meta:
23 |         model = CityCouncilAttendanceList
24 |         fields = "__all__"
25 | 
26 | 
27 | class FileSerializer(serializers.ModelSerializer):
28 |     class Meta:
29 |         model = File
30 |         fields = ["url"]
31 | 
32 | 
33 | class GazetteEventSerializer(serializers.ModelSerializer):
34 |     class Meta:
35 |         model = GazetteEvent
36 |         fields = ["title", "secretariat", "summary", "published_on"]
37 | 
38 | 
39 | class GazetteSerializer(serializers.ModelSerializer):
40 |     events = GazetteEventSerializer(many=True)
41 |     files = FileSerializer(many=True, required=False)
42 | 
43 |     class Meta:
44 |         model = Gazette
45 |         fields = [
46 |             "crawled_from",
47 |             "date",
48 |             "power",
49 |             "year_and_edition",
50 |             "events",
51 |             "files",
52 |         ]
53 | 
54 | 
55 | class CityCouncilMinuteSerializer(serializers.ModelSerializer):
56 |     files = FileSerializer(many=True)
57 | 
58 |     class Meta:
59 |         model = CityCouncilMinute
60 |         fields = "__all__"
61 | 
62 | 
63 | class CityHallBidEventSerializer(serializers.ModelSerializer):
64 |     class Meta:
65 |         model = CityHallBidEvent
66 |         fields = "__all__"
67 | 
68 | 
69 | class CityHallBidSerializer(serializers.ModelSerializer):
70 |     events = CityHallBidEventSerializer(many=True, read_only=True)
71 |     files = FileSerializer(many=True, read_only=True)
72 | 
73 |     class Meta:
74 |         model = CityHallBid
75 |         fields = "__all__"
76 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0028_auto_20210703_0457.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.2.4 on 2021-07-03 07:57
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0027_auto_20210501_0839"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterField(
13 |             model_name="citycouncilagenda",
14 |             name="event_type",
15 |             field=models.CharField(
16 |                 blank=True,
17 |                 choices=[
18 |                     ("sessao_ordinaria", "Sessão Ordinária"),
19 |                     ("ordem_do_dia", "Ordem do Dia"),
20 |                     ("sessao_solene", "Sessão Solene"),
21 |                     ("sessao_especial", "Sessão Especial"),
22 |                     ("audiencia_publica", "Audiência Pública"),
23 |                     ("sessao_extraordinaria", "Sessão Extraordinária"),
24 |                     ("termo_de_encerramento", "Termo de Encerramento"),
25 |                 ],
26 |                 db_index=True,
27 |                 max_length=30,
28 |                 null=True,
29 |                 verbose_name="Tipo do evento",
30 |             ),
31 |         ),
32 |         migrations.AlterField(
33 |             model_name="citycouncilminute",
34 |             name="event_type",
35 |             field=models.CharField(
36 |                 blank=True,
37 |                 choices=[
38 |                     ("sessao_ordinaria", "Sessão Ordinária"),
39 |                     ("ordem_do_dia", "Ordem do Dia"),
40 |                     ("sessao_solene", "Sessão Solene"),
41 |                     ("sessao_especial", "Sessão Especial"),
42 |                     ("audiencia_publica", "Audiência Pública"),
43 |                     ("sessao_extraordinaria", "Sessão Extraordinária"),
44 |                     ("termo_de_encerramento", "Termo de Encerramento"),
45 |                 ],
46 |                 db_index=True,
47 |                 max_length=30,
48 |                 null=True,
49 |                 verbose_name="Tipo de evento",
50 |             ),
51 |         ),
52 |     ]
53 | 


--------------------------------------------------------------------------------
/.github/workflows/cicd.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [ push ]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-20.04
 8 |     steps:
 9 |       - uses: actions/checkout@v2
10 |       - name: Set up Python
11 |         uses: actions/setup-python@v2
12 |         with:
13 |           python-version: 3.8
14 |       - name: Install Dependencies
15 |         run: |
16 |           python -m pip install --upgrade pip
17 |           pip install -r dev_requirements.txt
18 |       - name: Lint
19 |         run: |
20 |           pre-commit run --all-files
21 |       - name: Check migrations
22 |         env:
23 |           DJANGO_SETTINGS_MODULE: "web.settings"
24 |           DATABASE_URL: "postgres://postgres:postgres@localhost:5432/mariaquiteria"
25 |         run: python manage.py makemigrations --check
26 |       - name: Run Tests
27 |         env:
28 |           DJANGO_SETTINGS_MODULE: "web.settings"
29 |           DJANGO_CONFIGURATION: "Test"
30 |           DATABASE_URL: "postgres://postgres:postgres@localhost:5432/mariaquiteria"
31 |         run: |
32 |           python manage.py collectstatic
33 |           pytest
34 |     services:
35 |       postgres:
36 |         image: library/postgres:11-alpine
37 |         env:
38 |           POSTGRES_PASSWORD: postgres
39 |           POSTGRES_DB: mariaquiteria
40 |         ports:
41 |           - 5432:5432
42 |         options: >-
43 |           --health-cmd pg_isready
44 |           --health-interval 10s
45 |           --health-timeout 5s
46 |           --health-retries 5
47 |       rabbitmq:
48 |         image: rabbitmq
49 |         env:
50 |           RABBITMQ_DEFAULT_USER: guest
51 |           RABBITMQ_DEFAULT_PASS: guest
52 |         ports:
53 |           - 5672:5672
54 |   deploy:
55 |     runs-on: ubuntu-20.04
56 |     if: github.ref == 'refs/heads/main'
57 | 
58 |     steps:
59 |       - uses: actions/checkout@v2
60 |         with:
61 |           fetch-depth: 0
62 |       - name: Push to dokku
63 |         uses: dokku/github-action@v1.0.2
64 |         with:
65 |           branch: main
66 |           ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
67 |           git_remote_url: ${{ secrets.DOKKU_REMOTE_URL }}
68 |           ssh_host_key: ${{ secrets.SSH_HOST_KEY }}
69 |     needs: [build]
70 | 


--------------------------------------------------------------------------------
/scraper/items.py:
--------------------------------------------------------------------------------
 1 | import scrapy
 2 | 
 3 | 
 4 | class BaseItem(scrapy.Item):
 5 |     crawled_at = scrapy.Field()
 6 |     crawled_from = scrapy.Field()
 7 |     git_commit = scrapy.Field()
 8 |     errors = scrapy.Field()
 9 | 
10 | 
11 | class LegacyGazetteItem(BaseItem):
12 |     title = scrapy.Field()
13 |     published_on = scrapy.Field()
14 |     date = scrapy.Field()
15 |     details = scrapy.Field()
16 |     files = scrapy.Field()
17 | 
18 | 
19 | class GazetteItem(BaseItem):
20 |     date = scrapy.Field()
21 |     power = scrapy.Field()
22 |     year_and_edition = scrapy.Field()
23 |     events = scrapy.Field()
24 |     files = scrapy.Field()
25 | 
26 | 
27 | class CityCouncilAgendaItem(BaseItem):
28 |     date = scrapy.Field()
29 |     details = scrapy.Field()
30 |     title = scrapy.Field()
31 |     event_type = scrapy.Field()
32 | 
33 | 
34 | class CityCouncilAttendanceListItem(BaseItem):
35 |     date = scrapy.Field()
36 |     description = scrapy.Field()
37 |     council_member = scrapy.Field()
38 |     status = scrapy.Field()
39 | 
40 | 
41 | class CityCouncilMinuteItem(BaseItem):
42 |     date = scrapy.Field()
43 |     title = scrapy.Field()
44 |     event_type = scrapy.Field()
45 |     files = scrapy.Field()
46 | 
47 | 
48 | class CityHallContractItem(BaseItem):
49 |     contract_id = scrapy.Field()
50 |     starts_at = scrapy.Field()
51 |     summary = scrapy.Field()
52 |     contractor_document = scrapy.Field()  # CNPJ or CPF
53 |     contractor_name = scrapy.Field()
54 |     value = scrapy.Field()
55 |     ends_at = scrapy.Field()
56 |     files = scrapy.Field()
57 | 
58 | 
59 | class CityHallBidItem(BaseItem):
60 |     public_agency = scrapy.Field()
61 |     month = scrapy.Field()
62 |     year = scrapy.Field()
63 |     description = scrapy.Field()
64 |     history = scrapy.Field()
65 |     codes = scrapy.Field()
66 |     modality = scrapy.Field()
67 |     session_at = scrapy.Field()
68 |     files = scrapy.Field()
69 | 
70 | 
71 | class CityHallPaymentsItem(BaseItem):
72 |     published_at = scrapy.Field()
73 |     phase = scrapy.Field()
74 |     company_or_person = scrapy.Field()
75 |     value = scrapy.Field()
76 |     number = scrapy.Field()
77 |     document = scrapy.Field()
78 |     date = scrapy.Field()
79 |     process_number = scrapy.Field()
80 |     summary = scrapy.Field()
81 |     group = scrapy.Field()
82 |     action = scrapy.Field()
83 |     function = scrapy.Field()
84 |     subfunction = scrapy.Field()
85 |     type_of_process = scrapy.Field()
86 |     resource = scrapy.Field()
87 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/_citycouncil.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.admin.options import get_content_type_for_model
 2 | 
 3 | from web.datasets.management.commands._file import save_file
 4 | from web.datasets.models import (
 5 |     CityCouncilAgenda,
 6 |     CityCouncilAttendanceList,
 7 |     CityCouncilExpense,
 8 |     CityCouncilMinute,
 9 | )
10 | 
11 | 
12 | def save_agenda(item):
13 |     agenda, _ = CityCouncilAgenda.objects.update_or_create(
14 |         date=item["date"],
15 |         title=item["title"],
16 |         event_type=item["event_type"],
17 |         crawled_from=item["crawled_from"],
18 |         defaults={"crawled_at": item["crawled_at"], "details": item["details"]},
19 |     )
20 |     return agenda
21 | 
22 | 
23 | def save_attendance_list(item):
24 |     attendance, _ = CityCouncilAttendanceList.objects.update_or_create(
25 |         date=item["date"],
26 |         council_member=item["council_member"],
27 |         defaults={
28 |             "crawled_at": item["crawled_at"],
29 |             "crawled_from": item["crawled_from"],
30 |             "status": item.get("status"),
31 |         },
32 |     )
33 |     return attendance
34 | 
35 | 
36 | def save_expense(item):
37 |     attendance, _ = CityCouncilExpense.objects.get_or_create(
38 |         published_at=item["published_at"],
39 |         phase=item["phase"],
40 |         company_or_person=item["company_or_person"],
41 |         value=item["value"],
42 |         number=item["number"],
43 |         document=item["document"],
44 |         date=item["date"],
45 |         process_number=item["process_number"],
46 |         summary=item["summary"],
47 |         legal_status=item["legal_status"],
48 |         function=item["function"],
49 |         subfunction=item["subfunction"],
50 |         type_of_process=item["type_of_process"],
51 |         resource=item["resource"],
52 |         subgroup=item["subgroup"],
53 |         group=item["group"],
54 |         defaults={
55 |             "crawled_at": item["crawled_at"],
56 |             "crawled_from": item["crawled_from"],
57 |         },
58 |     )
59 |     return attendance
60 | 
61 | 
62 | def save_minute(item):
63 |     minute, created = CityCouncilMinute.objects.get_or_create(
64 |         date=item["date"],
65 |         crawled_from=item["crawled_from"],
66 |         defaults={
67 |             "title": item["title"],
68 |             "event_type": item["event_type"],
69 |             "crawled_at": item["crawled_at"],
70 |         },
71 |     )
72 |     if created and item.get("files"):
73 |         content_type = get_content_type_for_model(minute)
74 |         for file_ in item["files"]:
75 |             save_file(file_, content_type, minute.pk)
76 |     return minute
77 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0005_auto_20200327_1348.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0 on 2020-03-27 16:48
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0004_auto_20200321_0817"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="CityCouncilMinute",
14 |             fields=[
15 |                 (
16 |                     "id",
17 |                     models.AutoField(
18 |                         auto_created=True,
19 |                         primary_key=True,
20 |                         serialize=False,
21 |                         verbose_name="ID",
22 |                     ),
23 |                 ),
24 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
25 |                 ("updated_at", models.DateTimeField(auto_now=True)),
26 |                 ("crawled_at", models.DateTimeField()),
27 |                 ("crawled_from", models.URLField()),
28 |                 ("notes", models.TextField(blank=True, null=True)),
29 |                 ("date", models.DateField()),
30 |                 ("title", models.CharField(blank=True, max_length=300, null=True)),
31 |                 (
32 |                     "event_type",
33 |                     models.CharField(
34 |                         choices=[
35 |                             ("sessao_ordinaria", "Sessão Ordinária"),
36 |                             ("ordem_do_dia", "Ordem do Dia"),
37 |                             ("sessao_solene", "Sessão Solene"),
38 |                             ("sessao_especial", "Sessão Especial"),
39 |                             ("audiencia_publica", "Audiência Pública"),
40 |                         ],
41 |                         max_length=20,
42 |                     ),
43 |                 ),
44 |                 ("file_url", models.URLField(blank=True, null=True)),
45 |                 ("file_content", models.TextField(blank=True, null=True)),
46 |             ],
47 |             options={
48 |                 "verbose_name": "Câmara de Vereadores - Atas",
49 |                 "verbose_name_plural": "Câmara de Vereadores - Atas",
50 |             },
51 |         ),
52 |         migrations.AlterField(
53 |             model_name="citycouncilagenda",
54 |             name="event_type",
55 |             field=models.CharField(
56 |                 choices=[
57 |                     ("sessao_ordinaria", "Sessão Ordinária"),
58 |                     ("ordem_do_dia", "Ordem do Dia"),
59 |                     ("sessao_solene", "Sessão Solene"),
60 |                     ("sessao_especial", "Sessão Especial"),
61 |                     ("audiencia_publica", "Audiência Pública"),
62 |                 ],
63 |                 max_length=20,
64 |             ),
65 |         ),
66 |     ]
67 | 


--------------------------------------------------------------------------------
/scraper/settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from tcmba.items import DocumentItem as TCMBADocumentItem
 5 | 
 6 | from .items import (
 7 |     CityCouncilAgendaItem,
 8 |     CityCouncilAttendanceListItem,
 9 |     CityCouncilMinuteItem,
10 |     CityHallBidItem,
11 |     CityHallContractItem,
12 |     CityHallPaymentsItem,
13 |     GazetteItem,
14 |     LegacyGazetteItem,
15 | )
16 | 
17 | # general
18 | BOT_NAME = "maria-quiteria"
19 | SPIDER_MODULES = ["scraper.spiders"]
20 | NEWSPIDER_MODULE = "scraper.spiders"
21 | ROBOTSTXT_OBEY = True
22 | COOKIES_ENABLED = False
23 | EXTENSIONS = {
24 |     "scraper.extensions.SentryLogging": -1,
25 |     "spidermon.contrib.scrapy.extensions.Spidermon": 500,
26 | }
27 | SENTRY_DSN = os.getenv("SENTRY_DSN", "")
28 | 
29 | # pipelines
30 | ITEM_PIPELINES = {
31 |     "spidermon.contrib.scrapy.pipelines.ItemValidationPipeline": 200,
32 |     "scraper.pipelines.DefaultValuesPipeline": 300,
33 | }
34 | 
35 | # http cache
36 | HTTPCACHE_ENABLED = True
37 | HTTPCACHE_EXPIRATION_SECS = 86400  # 24 horas
38 | 
39 | # testing
40 | SPIDERMON_ENABLED = True
41 | SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS = True
42 | SPIDERMON_VALIDATION_ERRORS_FIELD = "errors"
43 | SPIDERMON_VALIDATION_MODELS = {
44 |     LegacyGazetteItem: "scraper.validators.LegacyGazetteItem",
45 |     GazetteItem: "scraper.validators.GazetteItem",
46 |     CityCouncilAgendaItem: "scraper.validators.CityCouncilAgendaItem",
47 |     CityCouncilMinuteItem: "scraper.validators.CityCouncilMinuteItem",
48 |     CityHallContractItem: "scraper.validators.CityHallContractItem",
49 |     CityHallBidItem: "scraper.validators.CityHallBidItem",
50 |     CityHallPaymentsItem: "scraper.validators.CityHallPaymentsItem",
51 |     CityCouncilAttendanceListItem: "scraper.validators.CityCouncilAttendanceListItem",
52 |     TCMBADocumentItem: "scraper.validators.TCMBADocumentItem",
53 | }
54 | 
55 | # monitoring
56 | SPIDERMON_SPIDER_CLOSE_MONITORS = ("scraper.monitors.SpiderCloseMonitorSuite",)
57 | 
58 | # bot
59 | SPIDERMON_TELEGRAM_SENDER_TOKEN = os.getenv("TELEGRAM_SENDER_TOKEN", "fake")
60 | SPIDERMON_TELEGRAM_RECIPIENTS = [os.getenv("TELEGRAM_CHANNEL", None)]
61 | SPIDERMON_TELEGRAM_FAKE = os.getenv("SPIDERMON_TELEGRAM_FAKE", False)
62 | SPIDERMON_DISCORD_WEBHOOK_URL = os.getenv("SPIDERMON_DISCORD_WEBHOOK_URL", "fake")
63 | 
64 | # sentry
65 | SPIDERMON_SENTRY_DSN = SENTRY_DSN
66 | SPIDERMON_SENTRY_PROJECT_NAME = "MariaQuiteria - Scraper"
67 | SPIDERMON_SENTRY_ENVIRONMENT_TYPE = os.getenv(
68 |     "SPIDERMON_SENTRY_ENVIRONMENT_TYPE", "Prod"
69 | )
70 | SPIDERMON_SENTRY_FAKE = os.getenv("SPIDERMON_SENTRY_FAKE", False)
71 | 
72 | # throttling
73 | AUTOTHROTTLE_ENABLED = True
74 | 
75 | if os.getenv("ENABLE_AUTOTHROTTLE_DEBUG", False):
76 |     AUTOTHROTTLE_DEBUG = True
77 | 
78 | FILES_STORE = Path.cwd() / "files"
79 | FILES_STORE.mkdir(parents=True, exist_ok=True)
80 | 


--------------------------------------------------------------------------------
/scraper/monitors.py:
--------------------------------------------------------------------------------
 1 | from spidermon import MonitorSuite
 2 | from spidermon.contrib.actions.discord import SendDiscordMessage
 3 | from spidermon.contrib.actions.telegram import SendTelegramMessage
 4 | 
 5 | 
 6 | def find_exceptions(stats):
 7 |     exceptions = []
 8 |     for key, value in stats.items():
 9 |         if key.startswith("spider_exceptions"):
10 |             exceptions.append(f"`{key}` ({value})")
11 |         elif key.startswith("downloader/response_status_count/4"):
12 |             exceptions.append(f"Página não encontrada ({value})")
13 |     return exceptions
14 | 
15 | 
16 | class CustomSendTelegramMessage(SendTelegramMessage):
17 |     def get_message(self):
18 |         stats = self.data.stats
19 |         n_scraped_items = stats.get("item_scraped_count", 0)
20 | 
21 |         exceptions = find_exceptions(stats)
22 |         exceptions_message = ""
23 |         if exceptions:
24 |             exceptions_message = "\n".join(exceptions)
25 | 
26 |         number_of_failures = len(self.result.failures)
27 |         number_of_exceptions = len(exceptions)
28 |         failed = any(
29 |             [
30 |                 number_of_failures > 0,
31 |                 (n_scraped_items - number_of_exceptions) < 0,
32 |             ]
33 |         )
34 |         emoji = "💀" if failed else "🎉"
35 | 
36 |         message = "\n".join(
37 |             [
38 |                 f"{emoji} Spider `{self.data.spider.name}` {stats['finish_reason']}",
39 |                 f"- Duração em segundos: {round(stats['elapsed_time_seconds'], 1)}",
40 |                 f"- Itens raspados: {n_scraped_items}",
41 |                 f"- Erros: {number_of_failures}",
42 |                 f"- Exceções: {number_of_exceptions}\n{exceptions_message}",
43 |             ]
44 |         )
45 |         return message
46 | 
47 | 
48 | class CustomSendDiscordMessage(SendDiscordMessage):
49 |     def get_message(self):
50 |         stats = self.data.stats
51 |         n_scraped_items = stats.get("item_scraped_count", 0)
52 | 
53 |         exceptions = find_exceptions(stats)
54 |         exceptions_message = ""
55 |         if exceptions:
56 |             exceptions_message = "\n".join(exceptions)
57 | 
58 |         number_of_failures = len(self.result.failures)
59 |         number_of_exceptions = len(exceptions)
60 |         failed = any(
61 |             [
62 |                 number_of_failures > 0,
63 |                 (n_scraped_items - number_of_exceptions) < 0,
64 |             ]
65 |         )
66 |         emoji = "💀" if failed else "🎉"
67 | 
68 |         message = "\n".join(
69 |             [
70 |                 f"{emoji} Spider `{self.data.spider.name}` {stats['finish_reason']}",
71 |                 f"- Duração em segundos: {round(stats['elapsed_time_seconds'], 1)}",
72 |                 f"- Itens raspados: {n_scraped_items}",
73 |                 f"- Erros: {number_of_failures}",
74 |                 f"- Exceções: {number_of_exceptions}\n{exceptions_message}",
75 |             ]
76 |         )
77 |         return message
78 | 
79 | 
80 | class SpiderCloseMonitorSuite(MonitorSuite):
81 |     monitors_finished_actions = [CustomSendTelegramMessage, CustomSendDiscordMessage]
82 | 


--------------------------------------------------------------------------------
/web/datasets/parsers.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import unicodedata
  3 | from datetime import datetime
  4 | 
  5 | from dateutil.parser import ParserError, parse
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | 
 10 | def get_phase(value):
 11 |     mapping = {
 12 |         "emp": "empenho",
 13 |         "liq": "liquidacao",
 14 |         "pag": "pagamento",
 15 |     }
 16 |     return mapping.get(value.lower().strip(), None)
 17 | 
 18 | 
 19 | def currency_to_float(value):
 20 |     """Converte de R$ 69.848,70 (str) para 69848.70 (float)."""
 21 |     try:
 22 |         # format 37500.36 or '37500.36
 23 |         return float(value.replace("'", ""))
 24 |     except ValueError:
 25 |         # format R$ 37.500,36 or 37.500,36
 26 |         cleaned_value = value.replace("R$", "").replace(".", "").replace(",", ".")
 27 |         try:
 28 |             return float(cleaned_value)
 29 |         except ValueError:
 30 |             pass
 31 |     return
 32 | 
 33 | 
 34 | def to_boolean(value):
 35 |     return value.lower() in ["y", "s", 1]
 36 | 
 37 | 
 38 | def from_str_to_datetime(date_str, supported_formats=None):
 39 |     if date_str is None:
 40 |         return
 41 |     try:
 42 |         converted_date = parse(date_str, dayfirst=True)
 43 |     except ParserError:
 44 |         pass
 45 |     else:
 46 |         reference_date = datetime(1833, 9, 18)
 47 |         if converted_date >= reference_date:
 48 |             return converted_date
 49 |     return
 50 | 
 51 | 
 52 | def from_str_to_date(date_str, supported_formats=["%d/%m/%Y", "%d/%m/%y", "%Y-%m-%d"]):
 53 |     if date_str is None:
 54 |         return
 55 |     datetime_obj = from_str_to_datetime(date_str, supported_formats)
 56 |     if datetime_obj:
 57 |         return datetime_obj.date()
 58 | 
 59 | 
 60 | def lower(value):
 61 |     if value:
 62 |         return value.lower()
 63 | 
 64 | 
 65 | def lower_without_spaces(value):
 66 |     if value:
 67 |         return strip_accents(value.lower()).replace(" ", "_")
 68 | 
 69 | 
 70 | def city_council_bid_modality_mapping(code):
 71 |     mapping = {
 72 |         "1": "pregao_eletronico",
 73 |         "2": "convite",
 74 |         "3": "concorrencia",
 75 |         "4": "tomada_de_precos",
 76 |         "5": "concurso",
 77 |         "6": "leilao",
 78 |         "7": "pregao_presencial",
 79 |         "8": "dispensada",
 80 |         "9": "inexigibilidade",
 81 |     }
 82 |     found = mapping.get(code)
 83 |     if found:
 84 |         return found
 85 |     else:
 86 |         logger.warning(f"Código da modalidade não encontrado: {code}")
 87 | 
 88 | 
 89 | def city_council_revenue_type_mapping(code):
 90 |     mapping = {
 91 |         "ORC": "orcamentaria",
 92 |         "NORC": "nao_orcamentaria",
 93 |         "TRANSF": "transferencia",
 94 |     }
 95 |     found = mapping.get(code)
 96 |     if found:
 97 |         return found
 98 |     else:
 99 |         logger.warning(f"Código da tipo de receita não encontrado: {code}")
100 | 
101 | 
102 | def strip_accents(string):
103 |     if string is None:
104 |         return
105 |     return "".join(
106 |         char
107 |         for char in unicodedata.normalize("NFD", string)
108 |         if unicodedata.category(char) != "Mn"
109 |     )
110 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/crawl_tcmba.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from datetime import date
 4 | 
 5 | from dateutil.relativedelta import relativedelta
 6 | from django.core.management.base import BaseCommand
 7 | from scrapy import signals
 8 | from scrapy.crawler import CrawlerProcess
 9 | from scrapy.signalmanager import dispatcher
10 | from scrapy.utils.project import get_project_settings
11 | from tcmba.items import DocumentItem
12 | from tcmba.spiders.consulta_publica import ConsultaPublicaSpider
13 | 
14 | from web.datasets.management.commands._tcmba import save_document
15 | 
16 | 
17 | class Command(BaseCommand):
18 |     """Raspa documentos de uma unidade no TCM-BA.
19 | 
20 |     Unidades:
21 |         "Camara Municipal de FEIRA DE SANTANA"
22 |         "Agência Reguladora de Feira de Santana - ARFES"
23 |         "Fundação Hospitalar de Feira de Santana"
24 |         "Superintendência Municipal de Proteção e Defesa do Consumidor"
25 |         "Consórcio Público Interfederativo De Saúde Da Região de Feira de Santana"
26 |         "Fundação Cultural Municipal Egberto Tavares Costa"
27 |         "Superintendência Municipal de Trânsito - SMT"
28 |         "Instituto de Previdência de Feira de Santana - IPFS"
29 |     """
30 | 
31 |     help = "Executa raspador de documentos públicos do TCM-BA e salva no banco."
32 | 
33 |     def add_arguments(self, parser):
34 |         parser.add_argument("--period")
35 |         parser.add_argument("--period-type", default="mensal")
36 |         parser.add_argument(
37 |             "--unit", default="Prefeitura Municipal de FEIRA DE SANTANA"
38 |         )
39 |         parser.add_argument("--scrapy-args")
40 | 
41 |     def echo(self, text, style=None):
42 |         self.stdout.write(style(text) if style else text)
43 | 
44 |     def warn(self, text):
45 |         return self.echo(text, self.style.WARNING)
46 | 
47 |     def success(self, text):
48 |         return self.echo(text, self.style.SUCCESS)
49 | 
50 |     def save(self, signal, sender, item, response, spider):
51 |         if isinstance(item, DocumentItem):
52 |             save_document(item)
53 | 
54 |     def handle(self, *args, **options):
55 |         if not options.get("period"):
56 |             target_date = date.today() + relativedelta(months=-2)
57 |             target_date = target_date.strftime("%m/%Y")
58 |         else:
59 |             target_date = options.get("period")
60 | 
61 |         dispatcher.connect(self.save, signal=signals.item_passed)
62 |         os.environ["SCRAPY_SETTINGS_MODULE"] = "scraper.settings"
63 |         settings = get_project_settings()
64 |         settings["COOKIES_ENABLED"] = True
65 | 
66 |         if options.get("scrapy_args"):
67 |             scrapy_args = json.loads(options.get("scrapy_args"))
68 |             settings.update(scrapy_args)
69 | 
70 |         process = CrawlerProcess(settings=settings)
71 | 
72 |         args = {
73 |             "unidade": options.get("unit"),
74 |             "competencia": target_date,
75 |             "cidade": "feira de santana",
76 |             "periodicidade": options.get("period_type"),
77 |         }
78 |         self.warn(str(args))
79 |         process.crawl(ConsultaPublicaSpider, **args)
80 |         self.warn("Iniciando a coleta dos documentos do TCM-BA...")
81 |         process.start()
82 |         self.success("Pronto!")
83 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0010_auto_20200515_0959.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2.12 on 2020-05-15 12:59
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0009_auto_20200514_1350"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="CityCouncilContract",
14 |             fields=[
15 |                 (
16 |                     "id",
17 |                     models.AutoField(
18 |                         auto_created=True,
19 |                         primary_key=True,
20 |                         serialize=False,
21 |                         verbose_name="ID",
22 |                     ),
23 |                 ),
24 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
25 |                 ("updated_at", models.DateTimeField(auto_now=True)),
26 |                 ("crawled_at", models.DateTimeField()),
27 |                 ("crawled_from", models.URLField()),
28 |                 ("notes", models.TextField(blank=True, null=True)),
29 |                 (
30 |                     "external_code",
31 |                     models.PositiveIntegerField(verbose_name="Código externo"),
32 |                 ),
33 |                 (
34 |                     "description",
35 |                     models.TextField(blank=True, null=True, verbose_name="Descrição"),
36 |                 ),
37 |                 (
38 |                     "details",
39 |                     models.TextField(
40 |                         blank=True, null=True, verbose_name="Objeto do contrato"
41 |                     ),
42 |                 ),
43 |                 (
44 |                     "company_or_person_document",
45 |                     models.CharField(
46 |                         blank=True, max_length=50, null=True, verbose_name="CNPJ ou CPF"
47 |                     ),
48 |                 ),
49 |                 (
50 |                     "company_or_person",
51 |                     models.TextField(
52 |                         blank=True, null=True, verbose_name="Empresa ou pessoa"
53 |                     ),
54 |                 ),
55 |                 (
56 |                     "value",
57 |                     models.DecimalField(
58 |                         decimal_places=2, max_digits=10, verbose_name="Valor"
59 |                     ),
60 |                 ),
61 |                 ("start_date", models.DateField(verbose_name="Data de início")),
62 |                 ("end_date", models.DateField(verbose_name="Data final")),
63 |                 ("excluded", models.BooleanField(default=False)),
64 |             ],
65 |             options={
66 |                 "verbose_name": "Câmara de Vereadores - Contrato",
67 |                 "verbose_name_plural": "Câmara de Vereadores - Contratos",
68 |                 "get_latest_by": "start_date",
69 |             },
70 |         ),
71 |         migrations.AddField(
72 |             model_name="citycouncilexpense",
73 |             name="external_file_code",
74 |             field=models.CharField(blank=True, max_length=50, null=True),
75 |         ),
76 |         migrations.AddField(
77 |             model_name="citycouncilexpense",
78 |             name="external_file_line",
79 |             field=models.CharField(blank=True, max_length=50, null=True),
80 |         ),
81 |     ]
82 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0007_citycouncilexpense.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.5 on 2020-04-10 18:47
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0006_gazette_search_vector"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="CityCouncilExpense",
14 |             fields=[
15 |                 (
16 |                     "id",
17 |                     models.AutoField(
18 |                         auto_created=True,
19 |                         primary_key=True,
20 |                         serialize=False,
21 |                         verbose_name="ID",
22 |                     ),
23 |                 ),
24 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
25 |                 ("updated_at", models.DateTimeField(auto_now=True)),
26 |                 ("crawled_at", models.DateTimeField()),
27 |                 ("crawled_from", models.URLField()),
28 |                 ("notes", models.TextField(blank=True, null=True)),
29 |                 ("published_at", models.DateField()),
30 |                 (
31 |                     "phase",
32 |                     models.CharField(
33 |                         choices=[
34 |                             ("empenho", "Empenho"),
35 |                             ("liquidacao", "Liquidação"),
36 |                             ("pagamento", "Pagamento"),
37 |                         ],
38 |                         max_length=20,
39 |                     ),
40 |                 ),
41 |                 ("company_or_person", models.TextField(blank=True, null=True)),
42 |                 (
43 |                     "value",
44 |                     models.DecimalField(
45 |                         decimal_places=2, max_digits=10, verbose_name="Valor"
46 |                     ),
47 |                 ),
48 |                 ("number", models.CharField(blank=True, max_length=50, null=True)),
49 |                 ("document", models.CharField(blank=True, max_length=50, null=True)),
50 |                 ("date", models.DateField()),
51 |                 (
52 |                     "process_number",
53 |                     models.CharField(blank=True, max_length=50, null=True),
54 |                 ),
55 |                 ("summary", models.TextField(blank=True, null=True)),
56 |                 (
57 |                     "legal_status",
58 |                     models.CharField(blank=True, max_length=200, null=True),
59 |                 ),
60 |                 ("function", models.CharField(blank=True, max_length=50, null=True)),
61 |                 ("subfunction", models.CharField(blank=True, max_length=50, null=True)),
62 |                 (
63 |                     "type_of_process",
64 |                     models.CharField(blank=True, max_length=50, null=True),
65 |                 ),
66 |                 ("resource", models.CharField(blank=True, max_length=200, null=True)),
67 |                 ("subgroup", models.CharField(blank=True, max_length=100, null=True)),
68 |                 ("group", models.CharField(blank=True, max_length=100, null=True)),
69 |             ],
70 |             options={
71 |                 "verbose_name": "Câmara de Vereadores - Despesa",
72 |                 "verbose_name_plural": "Câmara de Vereadores - Despesas",
73 |             },
74 |         ),
75 |     ]
76 | 


--------------------------------------------------------------------------------
/web/datasets/tests/test_parsers.py:
--------------------------------------------------------------------------------
  1 | from datetime import date, datetime
  2 | 
  3 | import pytest
  4 | 
  5 | from web.datasets.parsers import (
  6 |     city_council_bid_modality_mapping,
  7 |     currency_to_float,
  8 |     from_str_to_date,
  9 |     from_str_to_datetime,
 10 |     lower_without_spaces,
 11 | )
 12 | 
 13 | 
 14 | @pytest.mark.parametrize(
 15 |     "original_value,expected_value",
 16 |     [
 17 |         ("R$ 69.848,70", 69848.70),
 18 |         ("69.848,70", 69848.70),
 19 |         ("R$ -69.848,70", -69848.70),
 20 |         ("1,70", 1.70),
 21 |         ("00,00", 0),
 22 |         ("Random", None),
 23 |         ("37500.36", 37500.36),
 24 |         ("37500", 37500.00),
 25 |         ("'37500.36", 37500.36),
 26 |         ("R$ 37.500,36", 37500.36),
 27 |     ],
 28 | )
 29 | def test_currency_to_float(original_value, expected_value):
 30 |     assert currency_to_float(original_value) == expected_value
 31 | 
 32 | 
 33 | @pytest.mark.parametrize(
 34 |     "datetime_str,expected_obj",
 35 |     [
 36 |         ("26/02/2020 19:28", datetime(2020, 2, 26, 19, 28)),
 37 |         ("26/2/2014 09:00", datetime(2014, 2, 26, 9, 0)),
 38 |         ("26/02/2020 19:28:00", datetime(2020, 2, 26, 19, 28, 0)),
 39 |         ("26/02/2020", datetime(2020, 2, 26)),
 40 |         ("26.02.20", datetime(2020, 2, 26)),
 41 |         ("05/02/23", datetime(2023, 2, 5)),
 42 |         (None, None),
 43 |         ("", None),
 44 |     ],
 45 | )
 46 | def test_possible_datetime(datetime_str, expected_obj):
 47 |     assert from_str_to_datetime(datetime_str) == expected_obj
 48 | 
 49 | 
 50 | @pytest.mark.parametrize(
 51 |     "date_str,expected_obj",
 52 |     [
 53 |         ("26/02/2020 19:28", date(2020, 2, 26)),
 54 |         ("26/2/2014 09:00", date(2014, 2, 26)),
 55 |         ("26/02/2020 19:28:00", date(2020, 2, 26)),
 56 |         ("26/02/2020", date(2020, 2, 26)),
 57 |         ("26/02/20", date(2020, 2, 26)),
 58 |         ("26.02.20", date(2020, 2, 26)),
 59 |         (None, None),
 60 |         ("", None),
 61 |     ],
 62 | )
 63 | def test_possible_date(date_str, expected_obj):
 64 |     assert from_str_to_date(date_str) == expected_obj
 65 | 
 66 | 
 67 | @pytest.mark.parametrize(
 68 |     "datetime_str,expected_obj",
 69 |     [
 70 |         ("18/05/2020", datetime(2020, 5, 18)),
 71 |         ("18/09/1833", datetime(1833, 9, 18)),
 72 |         ("17/09/1833", None),
 73 |         ("01/01/0001", None),
 74 |     ],
 75 | )
 76 | def test_dates_older_than_city_creation(datetime_str, expected_obj):
 77 |     assert from_str_to_datetime(datetime_str) == expected_obj
 78 | 
 79 | 
 80 | @pytest.mark.parametrize(
 81 |     "value,expected_modality",
 82 |     [
 83 |         ("1", "pregao_eletronico"),
 84 |         ("2", "convite"),
 85 |         ("3", "concorrencia"),
 86 |         ("4", "tomada_de_precos"),
 87 |         ("5", "concurso"),
 88 |         ("6", "leilao"),
 89 |         ("7", "pregao_presencial"),
 90 |         ("8", "dispensada"),
 91 |         ("9", "inexigibilidade"),
 92 |     ],
 93 | )
 94 | def test_modality_mapping_from_city_council_db(value, expected_modality):
 95 |     assert city_council_bid_modality_mapping(value) == expected_modality
 96 | 
 97 | 
 98 | @pytest.mark.parametrize(
 99 |     "value,expected",
100 |     [
101 |         ("Tomada de Preço", "tomada_de_preco"),
102 |         ("concorrencia", "concorrencia"),
103 |         ("", None),
104 |         (None, None),
105 |     ],
106 | )
107 | def test_lower_without_spaces(value, expected):
108 |     assert lower_without_spaces(value) == expected
109 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Código de Conduta para Colaboradores
 2 | 
 3 | ## Nossa promessa
 4 | 
 5 | Com o interesse de fomentar uma comunidade aberta e acolhedora,
 6 | nós, como colaboradores e administradores deste projeto, comprometemo-nos
 7 | a fazer a participação deste projeto uma experiência livre de assédio
 8 | para todos, independentemente da aparência pessoal, deficiência,
 9 | etnia, gênero, idade, identidade ou expressão de gênero, identidade
10 | ou orientação sexual, nacionalidade, nível de experiência, porte físico,
11 | raça ou religião.
12 | 
13 | ## Nossos padrões
14 | 
15 | Exemplos de comportamentos que contribuem a criar um ambiente positivo incluem:
16 | 
17 | * Usar linguagem acolhedora e inclusiva
18 | * Respeitar pontos de vista e experiências diferentes
19 | * Aceitar crítica construtiva com graça
20 | * Focar no que é melhor para a comunidade
21 | * Mostrar empatia com outros membros da comunidade
22 | 
23 | Exemplos de comportamentos inaceitáveis por parte dos participantes incluem:
24 | 
25 | * Uso de linguagem ou imagens sexuais e atenção ou avanço sexual indesejada
26 | * Comentários insultuosos e/ou depreciativos e ataques pessoais ou políticos (*Trolling*)
27 | * Assédio público ou privado
28 | * Publicar informação pessoal de outros sem permissão explícita, como, por exemplo, um endereço eletrônico ou residencial
29 | * Qualquer outra forma de conduta que pode ser razoavelmente considerada inapropriada num ambiente profissional
30 | 
31 | ## Nossas responsibilidades
32 | 
33 | Os administradores do projeto são responsáveis por esclarecer os padrões de
34 | comportamento e deverão tomar ação corretiva apropriada e justa em resposta
35 | a qualquer instância de comportamento inaceitável.
36 | 
37 | Os administradores do projeto têm o direito e a responsabilidade de
38 | remover, editar ou rejeitar comentários, commits, código, edições
39 | na wiki, erros ou outras formas de contribuição que não estejam de
40 | acordo com este Código de Conduta, bem como banir temporariamente ou
41 | permanentemente qualquer colaborador por qualquer outro comportamento
42 | que se considere impróprio, perigoso, ofensivo ou problemático.
43 | 
44 | ## Escopo
45 | 
46 | Este Código de Conduta aplica-se dentro dos espaços do projeto ou
47 | qualquer espaço público onde alguém represente o mesmo ou a sua
48 | comunidade. Exemplos de representação do projeto ou comunidade incluem
49 | usar um endereço de email oficial do projeto, postar por uma conta de
50 | mídia social oficial, ou agir como um representante designado num evento
51 | online ou offline. A representação de um projeto pode ser ainda definida e
52 | esclarecida pelos administradores do projeto.
53 | 
54 | ## Aplicação
55 | 
56 | Comportamento abusivo, de assédio ou de outros tipos pode ser
57 | comunicado contatando a equipe do projeto no dadosabertosdefeira@gmail.com.
58 | Todas as queixas serão revistas e investigadas e
59 | resultarão numa resposta necessária e apropriada à situação.
60 | A equipe é obrigada a manter a confidencialidade em relação
61 | ao elemento que reportou o incidente. Demais detalhes de
62 | políticas de aplicação podem ser postadas separadamente.
63 | 
64 | Administradores do projeto que não sigam ou não mantenham o Código
65 | de Conduta em boa fé podem enfrentar repercussões temporárias ou permanentes
66 | determinadas por outros membros da liderança do projeto.
67 | 
68 | ## Atribuição
69 | 
70 | Este Código de Conduta é adaptado do [Contributor Covenant](https://www.contributor-covenant.org),
71 | versão 1.4, disponível em https://www.contributor-covenant.org/pt-br/version/1/4/code-of-conduct.html
72 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/import.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import os
  3 | from datetime import datetime
  4 | 
  5 | from django.conf import settings
  6 | from django.core.management.base import BaseCommand
  7 | 
  8 | from web.datasets.adapters import (
  9 |     to_citycouncil_bid,
 10 |     to_citycouncil_bid_file,
 11 |     to_citycouncil_contract,
 12 |     to_citycouncil_contract_file,
 13 |     to_citycouncil_expense,
 14 |     to_citycouncil_revenue,
 15 | )
 16 | from web.datasets.models import (
 17 |     CityCouncilBid,
 18 |     CityCouncilContract,
 19 |     CityCouncilExpense,
 20 |     CityCouncilRevenue,
 21 |     File,
 22 | )
 23 | 
 24 | mapping = {
 25 |     "citycouncil_expenses": {
 26 |         "model": CityCouncilExpense,
 27 |         "adapter": to_citycouncil_expense,
 28 |     },
 29 |     "citycouncil_contracts": {
 30 |         "model": CityCouncilContract,
 31 |         "adapter": to_citycouncil_contract,
 32 |     },
 33 |     "citycouncil_bids": {"model": CityCouncilBid, "adapter": to_citycouncil_bid},
 34 |     "citycouncil_revenues": {
 35 |         "model": CityCouncilRevenue,
 36 |         "adapter": to_citycouncil_revenue,
 37 |     },
 38 |     "citycouncil_contract_files": {
 39 |         "model": File,
 40 |         "adapter": to_citycouncil_contract_file,
 41 |     },
 42 |     "citycouncil_bid_files": {"model": File, "adapter": to_citycouncil_bid_file},
 43 | }
 44 | 
 45 | 
 46 | class Command(BaseCommand):
 47 |     help = "Importa dados de um arquivo CSV."
 48 | 
 49 |     def add_arguments(self, parser):
 50 |         parser.add_argument("source")
 51 |         parser.add_argument("file")
 52 |         parser.add_argument("--drop-all", action="store_true")
 53 | 
 54 |     def echo(self, text, style=None):
 55 |         self.stdout.write(style(text) if style else text)
 56 | 
 57 |     def warn(self, text):
 58 |         return self.echo(text, self.style.WARNING)
 59 | 
 60 |     def success(self, text):
 61 |         return self.echo(text, self.style.SUCCESS)
 62 | 
 63 |     def handle(self, *args, **options):
 64 |         self.echo(options.get("source"))
 65 |         self.echo(options.get("file"))
 66 | 
 67 |         source_map = mapping.get(options.get("source"))
 68 |         adapter = source_map["adapter"]
 69 |         model = source_map["model"]
 70 | 
 71 |         if options.get("drop_all"):
 72 |             if os.getenv("DJANGO_CONFIGURATION") == "Prod" and not options.get(
 73 |                 "source"
 74 |             ).endswith("_files"):
 75 |                 self.warn(
 76 |                     "VOCÊ ESTÁ EM AMBIENTE DE PRODUÇÃO E TODOS OS DADOS SERÃO APAGADOS."
 77 |                 )
 78 |             confirmation = input("Tem certeza? s/n ")
 79 |             if confirmation.lower() in ["s", "y"]:
 80 |                 model.objects.all().delete()
 81 | 
 82 |         saved = 0
 83 |         errors = 0
 84 |         with open(options.get("file"), newline="") as csv_file:
 85 |             reader = csv.DictReader(csv_file)
 86 | 
 87 |             for row in reader:
 88 |                 item = adapter(row)
 89 |                 if not options.get("source").endswith("_files"):
 90 |                     item["crawled_at"] = datetime.now()
 91 |                     item["crawled_from"] = settings.CITY_COUNCIL_WEBSERVICE
 92 |                 try:
 93 |                     model.objects.create(**item)
 94 |                     saved += 1
 95 |                 except Exception as e:
 96 |                     errors += 1
 97 |                     self.warn(f"{e}\n{str(row)}")
 98 | 
 99 |         self.success(f"Concluído!\nSalvos: {saved} Erros: {errors}")
100 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0030_alter_historicalcitycouncilattendancelist_options_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.1.5 on 2023-01-03 07:52
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0029_file_local_path"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterModelOptions(
13 |             name="historicalcitycouncilattendancelist",
14 |             options={
15 |                 "get_latest_by": ("history_date", "history_id"),
16 |                 "ordering": ("-history_date", "-history_id"),
17 |                 "verbose_name": "historical Câmara de Vereadores - Lista de Presença",
18 |                 "verbose_name_plural": "historical Câmara de Vereadores - Listas de Presença",
19 |             },
20 |         ),
21 |         migrations.AlterModelOptions(
22 |             name="historicalcitycouncilbid",
23 |             options={
24 |                 "get_latest_by": ("history_date", "history_id"),
25 |                 "ordering": ("-history_date", "-history_id"),
26 |                 "verbose_name": "historical Câmara de Vereadores - Licitação",
27 |                 "verbose_name_plural": "historical Câmara de Vereadores - Licitações",
28 |             },
29 |         ),
30 |         migrations.AlterModelOptions(
31 |             name="historicalcitycouncilcontract",
32 |             options={
33 |                 "get_latest_by": ("history_date", "history_id"),
34 |                 "ordering": ("-history_date", "-history_id"),
35 |                 "verbose_name": "historical Câmara de Vereadores - Contrato",
36 |                 "verbose_name_plural": "historical Câmara de Vereadores - Contratos",
37 |             },
38 |         ),
39 |         migrations.AlterModelOptions(
40 |             name="historicalcitycouncilexpense",
41 |             options={
42 |                 "get_latest_by": ("history_date", "history_id"),
43 |                 "ordering": ("-history_date", "-history_id"),
44 |                 "verbose_name": "historical Câmara de Vereadores - Despesa",
45 |                 "verbose_name_plural": "historical Câmara de Vereadores - Despesas",
46 |             },
47 |         ),
48 |         migrations.AlterModelOptions(
49 |             name="historicalcitycouncilrevenue",
50 |             options={
51 |                 "get_latest_by": ("history_date", "history_id"),
52 |                 "ordering": ("-history_date", "-history_id"),
53 |                 "verbose_name": "historical Câmara de Vereadores - Receita",
54 |                 "verbose_name_plural": "historical Câmara de Vereadores - Receitas",
55 |             },
56 |         ),
57 |         migrations.AlterField(
58 |             model_name="historicalcitycouncilattendancelist",
59 |             name="history_date",
60 |             field=models.DateTimeField(db_index=True),
61 |         ),
62 |         migrations.AlterField(
63 |             model_name="historicalcitycouncilbid",
64 |             name="history_date",
65 |             field=models.DateTimeField(db_index=True),
66 |         ),
67 |         migrations.AlterField(
68 |             model_name="historicalcitycouncilcontract",
69 |             name="history_date",
70 |             field=models.DateTimeField(db_index=True),
71 |         ),
72 |         migrations.AlterField(
73 |             model_name="historicalcitycouncilexpense",
74 |             name="history_date",
75 |             field=models.DateTimeField(db_index=True),
76 |         ),
77 |         migrations.AlterField(
78 |             model_name="historicalcitycouncilrevenue",
79 |             name="history_date",
80 |             field=models.DateTimeField(db_index=True),
81 |         ),
82 |     ]
83 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/_gazette.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from datetime import date
  3 | 
  4 | from django.contrib.admin.options import get_content_type_for_model
  5 | 
  6 | from web.datasets.models import Gazette, GazetteEvent
  7 | 
  8 | from ._file import save_file
  9 | 
 10 | 
 11 | def save_gazette(item):
 12 |     """Salva diários oficiais do executivo a partir de 2015."""
 13 |     gazette, created = Gazette.objects.update_or_create(
 14 |         date=item["date"],
 15 |         power=item["power"],
 16 |         year_and_edition=item["year_and_edition"],
 17 |         defaults={
 18 |             "crawled_at": item["crawled_at"],
 19 |             "crawled_from": item["crawled_from"],
 20 |         },
 21 |     )
 22 | 
 23 |     if created and item.get("files"):
 24 |         content_type = get_content_type_for_model(gazette)
 25 |         for file_ in item["files"]:
 26 |             save_file(file_, content_type, gazette.pk)
 27 | 
 28 |     for event in item["events"]:
 29 |         GazetteEvent.objects.get_or_create(
 30 |             gazette=gazette,
 31 |             title=event["title"],
 32 |             secretariat=event["secretariat"],
 33 |             crawled_from=item["crawled_from"],
 34 |             summary=event["summary"],
 35 |             defaults={"crawled_at": item["crawled_at"]},
 36 |         )
 37 |     return gazette
 38 | 
 39 | 
 40 | def save_legacy_gazette(item):
 41 |     """Salva diários oficiais do executivo de antes de 2015.
 42 | 
 43 |     Os diários oficiais eram publicados em um site diferente do atual e
 44 |     também em jornais. Além disso, tinham um formato diferente, sendo um
 45 |     arquivo para cada evento (decreto, leis etc).
 46 |     Alguns não possuem data (especialmente os do ano de 2010). Por isso a
 47 |     tentativa de extrair a data do título.
 48 |     """
 49 | 
 50 |     notes = ""
 51 |     if item["date"] is None:
 52 |         extracted_date = _extract_date(item["title"])
 53 |         if extracted_date:
 54 |             item["date"] = extracted_date
 55 |             notes = "Data extraída do título."
 56 | 
 57 |     gazette, created = Gazette.objects.get_or_create(
 58 |         date=item["date"],
 59 |         power="executivo",
 60 |         crawled_from=item["crawled_from"],
 61 |         is_legacy=True,
 62 |         defaults={"crawled_at": item["crawled_at"], "notes": notes},
 63 |     )
 64 | 
 65 |     if created and item.get("files"):
 66 |         content_type = get_content_type_for_model(gazette)
 67 |         for file_ in item["files"]:
 68 |             save_file(file_, content_type, gazette.pk)
 69 | 
 70 |     GazetteEvent.objects.create(
 71 |         gazette=gazette,
 72 |         title=item["title"],
 73 |         crawled_from=item["crawled_from"],
 74 |         summary=item["details"],
 75 |         published_on=item["published_on"],
 76 |         crawled_at=item["crawled_at"],
 77 |     )
 78 |     return gazette
 79 | 
 80 | 
 81 | def _extract_date(str_date):
 82 |     if str_date is None:
 83 |         return
 84 |     pattern = r"(\d+) DE (\w+) DE (\d{4})"
 85 |     result = re.search(pattern, str_date, re.IGNORECASE)
 86 |     if result:
 87 |         months = {
 88 |             "janeiro": 1,
 89 |             "fevereiro": 2,
 90 |             "março": 3,
 91 |             "marco": 3,
 92 |             "abril": 4,
 93 |             "maio": 5,
 94 |             "junho": 6,
 95 |             "julho": 7,
 96 |             "agosto": 8,
 97 |             "setembro": 9,
 98 |             "outubro": 10,
 99 |             "novembro": 11,
100 |             "dezembro": 12,
101 |         }
102 |         day = int(result.group(1))
103 |         month = result.group(2).lower()
104 |         year = int(result.group(3))
105 |         return date(year, months[month], day)
106 |     return result
107 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0014_citycouncilbid.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.5 on 2020-05-22 09:14
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("datasets", "0013_file_search_vector"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="CityCouncilBid",
14 |             fields=[
15 |                 (
16 |                     "id",
17 |                     models.AutoField(
18 |                         auto_created=True,
19 |                         primary_key=True,
20 |                         serialize=False,
21 |                         verbose_name="ID",
22 |                     ),
23 |                 ),
24 |                 (
25 |                     "created_at",
26 |                     models.DateTimeField(auto_now_add=True, verbose_name="Criado em"),
27 |                 ),
28 |                 (
29 |                     "updated_at",
30 |                     models.DateTimeField(auto_now=True, verbose_name="Atualizado em"),
31 |                 ),
32 |                 ("crawled_at", models.DateTimeField(verbose_name="Coletado em")),
33 |                 ("crawled_from", models.URLField(verbose_name="Fonte")),
34 |                 (
35 |                     "notes",
36 |                     models.TextField(blank=True, null=True, verbose_name="Anotações"),
37 |                 ),
38 |                 (
39 |                     "external_code",
40 |                     models.CharField(max_length=10, verbose_name="Código externo"),
41 |                 ),
42 |                 (
43 |                     "modality",
44 |                     models.CharField(
45 |                         blank=True,
46 |                         choices=[
47 |                             ("tomada_de_precos", "Tomada de Preço"),
48 |                             ("pregao_presencial", "Pregão Presencial"),
49 |                             ("pregao_eletronico", "Pregão Eletrônico"),
50 |                             ("leilao", "Leilão"),
51 |                             ("inexigibilidade", "Inexigibilidade"),
52 |                             ("dispensada", "Dispensada"),
53 |                             ("convite", "Convite"),
54 |                             ("concurso", "Concurso"),
55 |                             ("concorrencia", "Concorrência"),
56 |                             ("chamada_publica", "Chamada Pública"),
57 |                         ],
58 |                         max_length=60,
59 |                         null=True,
60 |                         verbose_name="Modalidade",
61 |                     ),
62 |                 ),
63 |                 (
64 |                     "code",
65 |                     models.CharField(max_length=15, verbose_name="Código da licitação"),
66 |                 ),
67 |                 (
68 |                     "code_type",
69 |                     models.CharField(
70 |                         max_length=15, verbose_name="Código do tipo da licitação"
71 |                     ),
72 |                 ),
73 |                 ("description", models.TextField(verbose_name="Descrição (objeto)")),
74 |                 (
75 |                     "session_at",
76 |                     models.DateTimeField(
77 |                         null=True, verbose_name="Sessão Data / Horário"
78 |                     ),
79 |                 ),
80 |                 (
81 |                     "excluded",
82 |                     models.BooleanField(default=False, verbose_name="Excluído?"),
83 |                 ),
84 |             ],
85 |             options={
86 |                 "verbose_name": "Câmara de Vereadores - Licitação",
87 |                 "verbose_name_plural": "Câmara de Vereadores - Licitações",
88 |                 "get_latest_by": "session_at",
89 |             },
90 |         ),
91 |     ]
92 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/load_tcmba_documents.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from datetime import datetime, timezone
 3 | 
 4 | from django.conf import settings
 5 | from django.contrib.admin.options import get_content_type_for_model
 6 | from django.core.management.base import BaseCommand
 7 | 
 8 | from web.datasets.models import File, TCMBADocument
 9 | from web.datasets.parsers import from_str_to_date
10 | from web.datasets.services import get_s3_client
11 | 
12 | client = get_s3_client(settings)
13 | 
14 | 
15 | def build_path(s3_filepath, unit, category, filename):
16 |     parts = s3_filepath.split("/")
17 |     parts.pop()  # remove json da lista
18 |     parts.extend([unit, category, filename])
19 |     return "/".join(parts)
20 | 
21 | 
22 | class Command(BaseCommand):
23 |     help = "Importa documentos do TCM-BA em um bucket S3."
24 | 
25 |     def add_arguments(self, parser):
26 |         parser.add_argument("s3_path")
27 |         parser.add_argument("--drop-all", action="store_true")
28 | 
29 |     def echo(self, text, style=None):
30 |         self.stdout.write(style(text) if style else text)
31 | 
32 |     def warn(self, text):
33 |         return self.echo(text, self.style.WARNING)
34 | 
35 |     def success(self, text):
36 |         return self.echo(text, self.style.SUCCESS)
37 | 
38 |     def handle(self, *args, **options):
39 |         self.echo(f"Caminho no S3: {options.get('s3_path')}")
40 | 
41 |         file_items = client.download_file(options.get("s3_path"))
42 |         json_items = json.loads(open(file_items).read())
43 | 
44 |         public_view_url = "https://e.tcm.ba.gov.br/epp/ConsultaPublica/listView.seam"
45 | 
46 |         if options.get("drop_all"):
47 |             confirmation = input("Apagar todos os arquivos do TCM-BA? s/n ")
48 |             if confirmation.lower() in ["s", "y"]:
49 |                 TCMBADocument.objects.all().delete()
50 | 
51 |         failed = 0
52 |         for item in json_items:
53 |             path = build_path(
54 |                 options.get("s3_path"), item["unit"], item["category"], item["filename"]
55 |             )
56 |             s3_url = f"https://dadosabertosdefeira.s3.eu-central-1.amazonaws.com/{path}"
57 |             s3_file_path = f"s3://dadosabertosdefeira/{path}"
58 | 
59 |             document, created = TCMBADocument.objects.get_or_create(
60 |                 year=item["year"],
61 |                 month=item["month"],
62 |                 period=item["period"].lower(),
63 |                 category=item["category"],
64 |                 unit=item["unit"],
65 |                 inserted_at=from_str_to_date(item["inserted_at"]),
66 |                 inserted_by=item["inserted_by"],
67 |                 original_filename=item["original_filename"],
68 |                 crawled_from=public_view_url,
69 |                 defaults={
70 |                     "crawled_at": datetime.fromisoformat(item["crawled_at"]).replace(
71 |                         tzinfo=timezone.utc
72 |                     ),
73 |                 },
74 |             )
75 |             content_type = get_content_type_for_model(document)
76 |             if created:
77 |                 _, file_created = File.objects.get_or_create(
78 |                     url=public_view_url,
79 |                     content_type=content_type,
80 |                     object_id=document.pk,
81 |                     s3_url=s3_url,
82 |                     s3_file_path=s3_file_path,
83 |                     original_filename=item["original_filename"],
84 |                 )
85 |                 if not file_created:
86 |                     self.warn(f"Arquivo já existe: {document.pk} - {item}")
87 |             else:
88 |                 self.warn(f"Documento já existe: {document.pk} - {item}")
89 |                 failed += 1
90 |         self.warn(f"Warnings: {failed}")
91 | 


--------------------------------------------------------------------------------
/scraper/validators.py:
--------------------------------------------------------------------------------
  1 | from schematics.models import Model
  2 | from schematics.types import (
  3 |     BaseType,
  4 |     DateTimeType,
  5 |     DateType,
  6 |     DictType,
  7 |     IntType,
  8 |     ListType,
  9 |     StringType,
 10 |     URLType,
 11 | )
 12 | 
 13 | 
 14 | class BaseModel(Model):
 15 |     crawled_at = DateTimeType(required=True)
 16 |     crawled_from = URLType(required=True)
 17 |     git_commit = StringType(required=False)
 18 | 
 19 | 
 20 | class LegacyGazetteItem(BaseModel):
 21 |     title = StringType(required=True)
 22 |     published_on = StringType(required=False)
 23 |     # important info but not available in years like 2010
 24 |     date = DateType(required=False)
 25 |     details = StringType(required=True)
 26 |     files = ListType(StringType)
 27 | 
 28 | 
 29 | class GazetteItem(BaseModel):
 30 |     date = DateType()
 31 |     power = StringType(required=True)
 32 |     year_and_edition = StringType(required=True)
 33 |     events = ListType(DictType(StringType), required=True)
 34 |     files = ListType(StringType)
 35 | 
 36 | 
 37 | class CityCouncilAgendaItem(BaseModel):
 38 |     date = DateType()
 39 |     details = StringType()
 40 |     title = StringType(required=True)
 41 |     event_type = StringType(required=True)
 42 | 
 43 | 
 44 | class CityCouncilAttendanceListItem(BaseModel):
 45 |     date = DateType()
 46 |     description = StringType()
 47 |     council_member = StringType(required=True)
 48 |     status = StringType(required=True)
 49 | 
 50 | 
 51 | class CityCouncilMinuteItem(BaseModel):
 52 |     date = DateType()
 53 |     title = StringType(required=True)
 54 |     event_type = StringType(required=True)
 55 |     files = ListType(StringType)
 56 | 
 57 | 
 58 | class CityHallContractItem(BaseModel):
 59 |     contract_id = StringType(required=True)
 60 |     starts_at = DateType(formats=("%d/%m/%Y", "%d/%m/%y"))
 61 |     summary = StringType()
 62 |     contractor_document = StringType()
 63 |     contractor_name = StringType()
 64 |     value = StringType()
 65 |     ends_at = DateType(formats=("%d/%m/%Y", "%d/%m/%y"))
 66 |     files = ListType(StringType)
 67 | 
 68 | 
 69 | class CityHallBidHistoryType(BaseType):
 70 |     event = StringType()
 71 |     published_at = DateTimeType()
 72 |     url = URLType()
 73 | 
 74 | 
 75 | class CityHallBidItem(BaseModel):
 76 |     public_agency = StringType()
 77 |     month = IntType(min_value=1, max_value=12)
 78 |     year = IntType(min_value=1873)  # quando Feira virou cidade :)
 79 |     description = StringType()
 80 |     history = ListType(DictType(CityHallBidHistoryType))
 81 |     codes = StringType()
 82 |     modality = StringType()
 83 |     session_at = DateTimeType()
 84 |     files = ListType(StringType)
 85 | 
 86 | 
 87 | class CityHallPaymentsItem(BaseModel):
 88 |     published_at = DateType(formats=("%d/%m/%Y", "%d/%m/%y"))
 89 |     phase = StringType()
 90 |     company_or_person = StringType(required=True)
 91 |     value = StringType(required=True)
 92 |     number = StringType()
 93 |     document = StringType(required=True)
 94 |     date = DateType(formats=("%d/%m/%Y", "%d/%m/%y"))
 95 |     process_number = StringType()
 96 |     summary = StringType()
 97 |     group = StringType()
 98 |     action = StringType()
 99 |     function = StringType()
100 |     subfunction = StringType()
101 |     type_of_process = StringType()
102 |     resource = StringType()
103 | 
104 | 
105 | class TCMBADocumentItem(Model):
106 |     crawled_at = DateTimeType(required=True)
107 |     category = StringType()
108 |     filename = StringType(required=True)
109 |     original_filename = StringType(required=True)
110 |     filepath = StringType(required=True)
111 |     inserted_by = StringType()
112 |     inserted_at = DateType(formats=("%d/%m/%Y", "%d/%m/%y"))
113 |     unit = StringType(required=True)
114 |     month = StringType()
115 |     year = StringType()
116 |     period = StringType()
117 | 


--------------------------------------------------------------------------------
/web/datasets/services.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | 
  4 | import boto3
  5 | import requests
  6 | 
  7 | 
  8 | class S3Client:
  9 |     __slots__ = ("client", "bucket", "bucket_folder", "bucket_region")
 10 | 
 11 |     def __init__(self, client, bucket, bucket_folder, bucket_region):
 12 |         self.client = client
 13 |         self.bucket = bucket
 14 |         self.bucket_folder = bucket_folder
 15 |         self.bucket_region = bucket_region
 16 | 
 17 |     def _upload_to_s3(self, temp_file_path, bucket_file_path):
 18 |         with open(temp_file_path, "rb") as body_file:
 19 |             self.client.put_object(
 20 |                 Bucket=self.bucket,
 21 |                 Key=bucket_file_path,
 22 |                 Body=body_file,
 23 |             )
 24 | 
 25 |     def upload_file(self, location_or_url, relative_file_path, prefix=""):
 26 |         location = Path(location_or_url)
 27 |         if not location.exists():
 28 |             # se não é um arquivo local, assumimos que é uma url
 29 |             file_name, temp_file_path = self.create_temp_file(
 30 |                 location_or_url, relative_file_path, prefix
 31 |             )
 32 |         else:
 33 |             file_name, temp_file_path = location.name, str(location)
 34 | 
 35 |         bucket_file_path = f"{self.bucket_folder}/files/{relative_file_path}"
 36 |         bucket_file_path = f"{bucket_file_path}{file_name}"
 37 |         url = (
 38 |             f"https://{self.bucket}.s3.{self.bucket_region}.amazonaws.com/"
 39 |             f"{bucket_file_path}"
 40 |         )
 41 |         self._upload_to_s3(temp_file_path, bucket_file_path)
 42 |         self.delete_temp_file(temp_file_path)
 43 | 
 44 |         return url, bucket_file_path
 45 | 
 46 |     @staticmethod
 47 |     def create_temp_file(url, relative_file_path="", prefix=""):
 48 |         temporary_directory = f"{Path.cwd()}/data/tmp/{relative_file_path}"
 49 |         Path(temporary_directory).mkdir(parents=True, exist_ok=True)
 50 | 
 51 |         response = requests.get(url)
 52 |         start_index = url.rfind("/") + 1
 53 |         temp_file_name = f"{url[start_index:]}"
 54 |         if prefix:
 55 |             temp_file_name = f"{prefix}-{temp_file_name}"
 56 |         temp_file_path = f"{temporary_directory}{temp_file_name}"
 57 |         with open(temp_file_path, "wb") as tmp_file:
 58 |             tmp_file.write(response.content)
 59 |         return temp_file_name, temp_file_path
 60 | 
 61 |     def download_file(self, s3_file_path):
 62 |         temporary_directory = f"{Path.cwd()}/data/tmp/"
 63 |         Path(temporary_directory).mkdir(parents=True, exist_ok=True)
 64 | 
 65 |         start_index = s3_file_path.rfind("/") + 1
 66 |         file_name = s3_file_path[start_index:]
 67 | 
 68 |         local_path = f"{temporary_directory}{file_name}"
 69 |         with open(local_path, "wb") as file_:
 70 |             self.client.download_fileobj(self.bucket, s3_file_path, file_)
 71 | 
 72 |         return local_path
 73 | 
 74 |     @staticmethod
 75 |     def delete_temp_file(temp_file_path):
 76 |         Path(temp_file_path).unlink()
 77 | 
 78 | 
 79 | class FakeS3Client(S3Client):
 80 |     def _upload_to_s3(self, temp_file_path, bucket_file_path):
 81 |         pass
 82 | 
 83 |     def download_file(self, s3_file_path):
 84 |         return f"{Path.cwd()}/data/tmp/{s3_file_path}"
 85 | 
 86 | 
 87 | def get_s3_client(settings):
 88 |     if os.getenv("DJANGO_CONFIGURATION") != "Prod":
 89 |         from unittest.mock import Mock
 90 | 
 91 |         client = Mock()
 92 |         return FakeS3Client(client, "teste", "maria-quiteria-local", "brasil")
 93 | 
 94 |     client = boto3.client(
 95 |         service_name="s3",
 96 |         region_name=settings.AWS_S3_REGION,
 97 |         aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
 98 |         aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
 99 |     )
100 |     return S3Client(
101 |         client,
102 |         settings.AWS_S3_BUCKET,
103 |         settings.AWS_S3_BUCKET_FOLDER,
104 |         settings.AWS_S3_REGION,
105 |     )
106 | 


--------------------------------------------------------------------------------
/web/datasets/management/commands/crawl.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | from django.core.management.base import BaseCommand
  5 | from scrapy import signals
  6 | from scrapy.crawler import CrawlerProcess
  7 | from scrapy.signalmanager import dispatcher
  8 | from scrapy.utils.project import get_project_settings
  9 | 
 10 | from scraper.items import (
 11 |     CityCouncilAttendanceListItem,
 12 |     CityCouncilMinuteItem,
 13 |     CityHallBidItem,
 14 |     GazetteItem,
 15 |     LegacyGazetteItem,
 16 | )
 17 | from scraper.spiders.citycouncil import AttendanceListSpider, MinuteSpider
 18 | from scraper.spiders.cityhall import BidsSpider
 19 | from scraper.spiders.gazette import (
 20 |     ExecutiveAndLegislativeGazetteSpider,
 21 |     LegacyGazetteSpider,
 22 | )
 23 | from web.datasets.models import (
 24 |     CityCouncilAttendanceList,
 25 |     CityCouncilMinute,
 26 |     CityHallBid,
 27 |     File,
 28 |     Gazette,
 29 |     GazetteEvent,
 30 | )
 31 | 
 32 | from ._citycouncil import save_attendance_list, save_minute
 33 | from ._cityhall import save_bid
 34 | from ._gazette import save_gazette, save_legacy_gazette
 35 | 
 36 | 
 37 | class Command(BaseCommand):
 38 |     help = "Executa todos os coletores e salva os itens recentes no banco."
 39 | 
 40 |     def add_arguments(self, parser):
 41 |         drop_all_help = "Limpa o banco antes de iniciar a coleta."
 42 |         parser.add_argument("--drop-all", action="store_true", help=drop_all_help)
 43 |         parser.add_argument("--scrapy-args")
 44 | 
 45 |     def echo(self, text, style=None):
 46 |         self.stdout.write(style(text) if style else text)
 47 | 
 48 |     def warn(self, text):
 49 |         return self.echo(text, self.style.WARNING)
 50 | 
 51 |     def success(self, text):
 52 |         return self.echo(text, self.style.SUCCESS)
 53 | 
 54 |     def save(self, signal, sender, item, response, spider):
 55 |         if isinstance(item, CityCouncilAttendanceListItem):
 56 |             save_attendance_list(item)
 57 |         if isinstance(item, CityCouncilMinuteItem):
 58 |             save_minute(item)
 59 |         if isinstance(item, CityHallBidItem):
 60 |             save_bid(item)
 61 |         if isinstance(item, LegacyGazetteItem):
 62 |             save_legacy_gazette(item)
 63 |         if isinstance(item, GazetteItem):
 64 |             save_gazette(item)
 65 | 
 66 |     def handle(self, *args, **options):
 67 |         if options.get("drop_all"):
 68 |             self.warn("Apagando registros...")
 69 |             CityCouncilAttendanceList.objects.all().delete()
 70 |             CityCouncilMinute.objects.all().delete()
 71 |             CityHallBid.objects.all().delete()
 72 |             Gazette.objects.all().delete()
 73 |             GazetteEvent.objects.all().delete()
 74 |             File.objects.all().delete()
 75 | 
 76 |         dispatcher.connect(self.save, signal=signals.item_passed)
 77 |         os.environ["SCRAPY_SETTINGS_MODULE"] = "scraper.settings"
 78 |         settings = get_project_settings()
 79 | 
 80 |         if options.get("scrapy_args"):
 81 |             scrapy_args = json.loads(options.get("scrapy_args"))
 82 |             settings.update(scrapy_args)
 83 | 
 84 |         process = CrawlerProcess(settings=settings)
 85 |         process.crawl(
 86 |             AttendanceListSpider,
 87 |             start_from_date=CityCouncilAttendanceList.last_collected_item_date(),
 88 |         )
 89 |         process.crawl(
 90 |             MinuteSpider, start_from_date=CityCouncilMinute.last_collected_item_date()
 91 |         )
 92 |         process.crawl(
 93 |             BidsSpider, start_from_date=CityHallBid.last_collected_item_date()
 94 |         )
 95 | 
 96 |         last_collected_gazette = Gazette.last_collected_item_date()
 97 |         if last_collected_gazette is None:
 98 |             process.crawl(LegacyGazetteSpider)
 99 |         process.crawl(
100 |             ExecutiveAndLegislativeGazetteSpider,
101 |             start_from_date=last_collected_gazette,
102 |         )
103 | 
104 |         self.warn("Iniciando a coleta...")
105 |         process.start()
106 |         self.success("Pronto!")
107 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0021_historicalcitycouncilattendancelist.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.8 on 2020-08-30 17:12
 2 | 
 3 | import django.db.models.deletion
 4 | import simple_history.models
 5 | from django.conf import settings
 6 | from django.db import migrations, models
 7 | 
 8 | 
 9 | class Migration(migrations.Migration):
10 |     dependencies = [
11 |         migrations.swappable_dependency(settings.AUTH_USER_MODEL),
12 |         ("datasets", "0020_auto_20200718_2347"),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name="HistoricalCityCouncilAttendanceList",
18 |             fields=[
19 |                 (
20 |                     "id",
21 |                     models.IntegerField(
22 |                         auto_created=True, blank=True, db_index=True, verbose_name="ID"
23 |                     ),
24 |                 ),
25 |                 (
26 |                     "created_at",
27 |                     models.DateTimeField(
28 |                         blank=True, editable=False, verbose_name="Criado em"
29 |                     ),
30 |                 ),
31 |                 (
32 |                     "updated_at",
33 |                     models.DateTimeField(
34 |                         blank=True, editable=False, verbose_name="Atualizado em"
35 |                     ),
36 |                 ),
37 |                 ("crawled_at", models.DateTimeField(verbose_name="Coletado em")),
38 |                 ("crawled_from", models.URLField(verbose_name="Fonte")),
39 |                 (
40 |                     "notes",
41 |                     models.TextField(blank=True, null=True, verbose_name="Anotações"),
42 |                 ),
43 |                 ("date", models.DateField(verbose_name="Data")),
44 |                 (
45 |                     "description",
46 |                     models.CharField(
47 |                         blank=True, max_length=200, null=True, verbose_name="Descrição"
48 |                     ),
49 |                 ),
50 |                 (
51 |                     "council_member",
52 |                     models.CharField(
53 |                         db_index=True, max_length=200, verbose_name="Vereador"
54 |                     ),
55 |                 ),
56 |                 (
57 |                     "status",
58 |                     models.CharField(
59 |                         choices=[
60 |                             ("presente", "Presente"),
61 |                             ("falta_justificada", "Falta Justificada"),
62 |                             ("licenca_justificada", "Licença Justificada"),
63 |                             ("ausente", "Ausente"),
64 |                         ],
65 |                         db_index=True,
66 |                         max_length=20,
67 |                         verbose_name="Situação",
68 |                     ),
69 |                 ),
70 |                 ("history_id", models.AutoField(primary_key=True, serialize=False)),
71 |                 ("history_date", models.DateTimeField()),
72 |                 ("history_change_reason", models.CharField(max_length=100, null=True)),
73 |                 (
74 |                     "history_type",
75 |                     models.CharField(
76 |                         choices=[("+", "Created"), ("~", "Changed"), ("-", "Deleted")],
77 |                         max_length=1,
78 |                     ),
79 |                 ),
80 |                 (
81 |                     "history_user",
82 |                     models.ForeignKey(
83 |                         null=True,
84 |                         on_delete=django.db.models.deletion.SET_NULL,
85 |                         related_name="+",
86 |                         to=settings.AUTH_USER_MODEL,
87 |                     ),
88 |                 ),
89 |             ],
90 |             options={
91 |                 "verbose_name": "historical Câmara de Vereadores - Lista de Presença",
92 |                 "ordering": ("-history_date", "-history_id"),
93 |                 "get_latest_by": "history_date",
94 |             },
95 |             bases=(simple_history.models.HistoricalChanges, models.Model),
96 |         ),
97 |     ]
98 | 


--------------------------------------------------------------------------------
/scraper/spiders/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import re
  4 | import unicodedata
  5 | from datetime import datetime, timezone
  6 | from urllib.parse import parse_qs, urlparse
  7 | 
  8 | from web.datasets.parsers import from_str_to_date
  9 | 
 10 | DOMAIN_FORMAT = re.compile(
 11 |     r"(?:^(\w{1,255}):(.{1,255})@|^)"
 12 |     r"(?:(?:(?=\S{0,253}(?:$|:))"
 13 |     r"((?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+"
 14 |     r"(?:[a-z0-9]{1,63})))"
 15 |     r"|localhost)"
 16 |     r"(:\d{1,5})?",
 17 |     re.IGNORECASE,
 18 | )
 19 | 
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | def replace_query_param(url, field, value):
 25 |     return re.sub(r"{}=\d+".format(field), r"{}={}".format(field, str(value)), url)
 26 | 
 27 | 
 28 | def identify_contract_id(text):
 29 |     CONTRACT_NUMBER_PATTERN = re.compile(r"\d+[-|\/]\d{4}?[-|\/]\d+C|\d+-\d{4}|\d+")
 30 |     result = re.findall(CONTRACT_NUMBER_PATTERN, text)
 31 |     if result:
 32 |         return result[0]
 33 | 
 34 | 
 35 | def extract_param(url, param):
 36 |     parsed = urlparse(url)
 37 |     try:
 38 |         value = parse_qs(parsed.query)[param]
 39 |         return value[0]
 40 |     except KeyError:
 41 |         return
 42 | 
 43 | 
 44 | def months_and_years(start_date, end_date):
 45 |     pairs = []
 46 |     if start_date.year == end_date.year:
 47 |         if start_date.month == end_date.month:
 48 |             return [(start_date.month, start_date.year)]
 49 |     for year in range(start_date.year, end_date.year + 1):
 50 |         for month in range(1, 13):
 51 |             if start_date.year == end_date.year:
 52 |                 if start_date.month < month <= end_date.month:
 53 |                     pairs.append((month, year))
 54 |             elif year == start_date.year:
 55 |                 if month > start_date.month:
 56 |                     pairs.append((month, year))
 57 |             elif year == end_date.year:
 58 |                 if month <= end_date.month:
 59 |                     pairs.append((month, year))
 60 |             elif year not in (start_date.year, end_date.year):
 61 |                 pairs.append((month, year))
 62 |     return pairs
 63 | 
 64 | 
 65 | def extract_date(str_with_date):
 66 |     DATE_PATTERN = re.compile(r"\d+\/\d+\/\d+")
 67 |     result = re.search(DATE_PATTERN, str_with_date)
 68 |     if result:
 69 |         return from_str_to_date(result.group(0))
 70 |     return
 71 | 
 72 | 
 73 | def is_url(url):
 74 |     if not url:
 75 |         return False
 76 | 
 77 |     url = url.strip()
 78 | 
 79 |     if len(url) > 2048:
 80 |         logger.warning(
 81 |             f"URL ultrapassa limite de 2048 caracteres (tamanho = {len(url)})"
 82 |         )
 83 |         return False
 84 | 
 85 |     result = urlparse(url)
 86 |     scheme = result.scheme
 87 |     domain = result.netloc
 88 | 
 89 |     if not scheme:
 90 |         logger.warning("Nenhum URL scheme especificado")
 91 |         return is_url(f"http://{url}")
 92 | 
 93 |     if not domain:
 94 |         logger.warning("Nenhum domínio especificado")
 95 |         return False
 96 | 
 97 |     if not re.fullmatch(DOMAIN_FORMAT, domain):
 98 |         logger.warning(f"Domínio inválido ({domain})")
 99 |         return False
100 | 
101 |     return True
102 | 
103 | 
104 | def strip_accents(string):
105 |     if string is None:
106 |         return
107 |     return "".join(
108 |         char
109 |         for char in unicodedata.normalize("NFD", string)
110 |         if unicodedata.category(char) != "Mn"
111 |     )
112 | 
113 | 
114 | def datetime_utcnow_aware() -> datetime:
115 |     """Data e hora UTC com informação de timezone."""
116 |     return datetime.utcnow().replace(tzinfo=timezone.utc)
117 | 
118 | 
119 | def get_git_commit() -> str:
120 |     """Retorna o hash ID do atual commit."""
121 |     git_rev = os.environ.get("GIT_REV")
122 |     if git_rev in [None, "None"]:
123 |         return ""
124 |     return git_rev
125 | 
126 | 
127 | def get_status(status):
128 |     """Retorna label dos status.
129 |     Consultado em 01/01/2022."""
130 |     if not status:
131 |         return ""
132 |     status = strip_accents(status.strip())
133 |     return status.lower().replace(" ", "_")
134 | 


--------------------------------------------------------------------------------
/web/datasets/tests/fixtures/response-22042021.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "inclusoesContrato": [],
 3 |     "alteracoesContrato": [],
 4 |     "exclusoesContrato": [],
 5 |     "inclusoesLicitacao": [
 6 |         {
 7 |             "codLic": "229",
 8 |             "codTipoLic": "7",
 9 |             "numLic": "001\/2021",
10 |             "numTipoLic": "001\/2021",
11 |             "objetoLic": "Contratacao de empresa especializada na prestacao de servicos tecnicos na area de Solucoes Integradas em Tecnologia, para fornecimento de licenca de uso de Software de Gerenciamento de Processo Legislativo (sistema de protocolo legislativo WEB e DESK, sistema de tramitacao legislativa WEB e DESK, sistema de bancos de leis WEB, sistema inibidor de multiplicidade de materias, sistema de transparencia legislativa, sistema de painel eletronico, sistema em plataforma mobile de votacao em plenario e sistema em plataforma mobile de consulta de projetos e seus tramites), incluindo instalacao, configuracao, treinamento e parametrizacao, atendendo as caracteristicas da Camara Municipal de Feira de Santana ? Bahia, conforme especificacoes, quantitativos e condicoes estabelecidas no Edital e seus Anexos",
12 |             "dtLic": "2021-05-05 09:00:00",
13 |             "arquivos": [
14 |                 {
15 |                     "codArqLic": "1588",
16 |                     "codLic": "229",
17 |                     "dsArqLic": "Aviso PP 001-2021 - Publicacao.doc",
18 |                     "caminhoArqLic": "https:\/\/www.transparencia.feiradesantana.ba.leg.br\/adm\/upload\/licitacao\/Aviso PP 001-2021 - Publicacao.doc"
19 |                 },
20 |                 {
21 |                     "codArqLic": "1590",
22 |                     "codLic": "229",
23 |                     "dsArqLic": "Edital Lic 001-2021 - Sist Gerenc Legislativo - PP 001-2021.doc",
24 |                     "caminhoArqLic": "https:\/\/www.transparencia.feiradesantana.ba.leg.br\/adm\/upload\/licitacao\/Edital Lic 001-2021 - Sist Gerenc Legislativo - PP 001-2021.doc"
25 |                 }
26 |             ]
27 |         }
28 |     ],
29 |     "alteracoesLicitacao": [
30 |         {
31 |             "codLic": "229",
32 |             "codTipoLic": "7",
33 |             "numLic": "001\/2021",
34 |             "numTipoLic": "001\/2021",
35 |             "objetoLic": "Contratacao de empresa especializada na prestacao de servicos tecnicos na area de Solucoes Integradas em Tecnologia, para fornecimento de licenca de uso de Software de Gerenciamento de Processo Legislativo (sistema de protocolo legislativo WEB e DESK, sistema de tramitacao legislativa WEB e DESK, sistema de bancos de leis WEB, sistema inibidor de multiplicidade de materias, sistema de transparencia legislativa, sistema de painel eletronico, sistema em plataforma mobile de votacao em plenario e sistema em plataforma mobile de consulta de projetos e seus tramites), incluindo instalacao, configuracao, treinamento e parametrizacao, atendendo as caracteristicas da Camara Municipal de Feira de Santana ? Bahia, conforme especificacoes, quantitativos e condicoes estabelecidas no Edital e seus Anexos",
36 |             "dtLic": "2021-05-05 09:00:00",
37 |             "arquivos": [
38 |                 {
39 |                     "codArqLic": "1588",
40 |                     "codLic": "229",
41 |                     "dsArqLic": "Aviso PP 001-2021 - Publicacao.doc",
42 |                     "caminhoArqLic": "https:\/\/www.transparencia.feiradesantana.ba.leg.br\/adm\/upload\/licitacao\/Aviso PP 001-2021 - Publicacao.doc"
43 |                 },
44 |                 {
45 |                     "codArqLic": "1590",
46 |                     "codLic": "229",
47 |                     "dsArqLic": "Edital Lic 001-2021 - Sist Gerenc Legislativo - PP 001-2021.doc",
48 |                     "caminhoArqLic": "https:\/\/www.transparencia.feiradesantana.ba.leg.br\/adm\/upload\/licitacao\/Edital Lic 001-2021 - Sist Gerenc Legislativo - PP 001-2021.doc"
49 |                 }
50 |             ]
51 |         }
52 |     ],
53 |     "exclusoesLicitacao": [],
54 |     "inclusoesReceita": [],
55 |     "alteracoesReceita": [],
56 |     "exclusoesReceita": [],
57 |     "inclusoesDespesa": [],
58 |     "alteracoesDespesa": [],
59 |     "exclusoesDespesa": []
60 | }
61 | 


--------------------------------------------------------------------------------
/web/datasets/tests/test_services.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | 
  4 | from django.conf import settings
  5 | 
  6 | from web.datasets.services import get_s3_client
  7 | 
  8 | client = get_s3_client(settings)
  9 | 
 10 | 
 11 | class TestS3Client:
 12 |     def test_upload_file(self):
 13 |         relative_path = "TestModel/2020/10/23/"
 14 |         s3_url, bucket_file_path = client.upload_file(
 15 |             "https://www.google.com/robots.txt", relative_path
 16 |         )
 17 | 
 18 |         expected_file_path = f"maria-quiteria-local/files/{relative_path}robots.txt"
 19 |         expected_s3_url = f"https://teste.s3.brasil.amazonaws.com/{bucket_file_path}"
 20 |         real_path = f"{os.getcwd()}/data/tmp/{expected_file_path}"
 21 | 
 22 |         assert s3_url == expected_s3_url
 23 |         assert bucket_file_path == expected_file_path
 24 |         assert Path(real_path).exists() is False
 25 | 
 26 |     def test_create_temp_file(self):
 27 |         url = (
 28 |             "http://www.feiradesantana.ba.gov.br/licitacoes/"
 29 |             "respostas/4924SUSPENS%C3%83O.pdf"
 30 |         )
 31 |         temp_file_name, temp_file_path = client.create_temp_file(url)
 32 | 
 33 |         assert temp_file_name == "4924SUSPENS%C3%83O.pdf"
 34 |         assert Path(temp_file_path).is_file() is True
 35 | 
 36 |         client.delete_temp_file(temp_file_path)
 37 |         assert Path(temp_file_path).is_file() is False
 38 | 
 39 |     def test_create_temp_file_with_prefix(self):
 40 |         url = (
 41 |             "http://www.feiradesantana.ba.gov.br/licitacoes/"
 42 |             "respostas/4924SUSPENS%C3%83O.pdf"
 43 |         )
 44 |         prefix = "eu-sou-um-checksum"
 45 |         expected_file_name = f"{prefix}-4924SUSPENS%C3%83O.pdf"
 46 |         temp_file_name, temp_file_path = client.create_temp_file(url, prefix=prefix)
 47 | 
 48 |         assert temp_file_name == expected_file_name
 49 |         assert Path(temp_file_path).is_file() is True
 50 | 
 51 |         client.delete_temp_file(temp_file_path)
 52 |         assert Path(temp_file_path).is_file() is False
 53 | 
 54 |     def test_create_temp_file_with_relative_file_path(self):
 55 |         url = (
 56 |             "http://www.feiradesantana.ba.gov.br/licitacoes/"
 57 |             "respostas/4924SUSPENS%C3%83O.pdf"
 58 |         )
 59 |         relative_file_path = "extra/"
 60 |         temp_file_name, temp_file_path = client.create_temp_file(
 61 |             url, relative_file_path=relative_file_path
 62 |         )
 63 | 
 64 |         assert temp_file_name == "4924SUSPENS%C3%83O.pdf"
 65 |         assert Path(temp_file_path).is_file() is True
 66 | 
 67 |         client.delete_temp_file(temp_file_path)
 68 | 
 69 |         assert Path(temp_file_path).is_file() is False
 70 | 
 71 |     def test_download_file(self):
 72 |         relative_path = "TestModel/2020/10/23/"
 73 |         s3_url, relative_file_path = client.upload_file(
 74 |             "https://www.google.com/robots.txt", relative_path
 75 |         )
 76 | 
 77 |         expected_file_path = f"maria-quiteria-local/files/{relative_path}robots.txt"
 78 |         expected_s3_url = f"https://teste.s3.brasil.amazonaws.com/{expected_file_path}"
 79 |         real_path = f"{os.getcwd()}/data/tmp/{expected_file_path}"
 80 | 
 81 |         assert s3_url == expected_s3_url
 82 |         assert relative_file_path == expected_file_path
 83 |         assert Path(real_path).exists() is False
 84 | 
 85 |         absolute_file_path = client.download_file(relative_file_path)
 86 | 
 87 |         assert absolute_file_path == real_path
 88 | 
 89 |     def test_upload_file_from_local_path(self):
 90 |         local_path = Path("conteudo.txt")
 91 |         local_path.write_text("Testando")
 92 |         relative_path = "TestModel/2021/06/23/"
 93 |         s3_url, bucket_file_path = client.upload_file(str(local_path), relative_path)
 94 | 
 95 |         expected_file_path = f"maria-quiteria-local/files/{relative_path}conteudo.txt"
 96 |         expected_s3_url = f"https://teste.s3.brasil.amazonaws.com/{bucket_file_path}"
 97 | 
 98 |         assert s3_url == expected_s3_url
 99 |         assert bucket_file_path == expected_file_path
100 |         assert Path(local_path).exists() is False
101 | 


--------------------------------------------------------------------------------
/web/api/views.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | from django_filters.rest_framework import DjangoFilterBackend
  4 | from rest_framework.filters import SearchFilter
  5 | from rest_framework.generics import ListAPIView
  6 | from rest_framework.permissions import AllowAny
  7 | from rest_framework.renderers import JSONRenderer
  8 | from rest_framework.response import Response
  9 | from rest_framework.views import APIView
 10 | from rest_framework.viewsets import ReadOnlyModelViewSet, ViewSet
 11 | 
 12 | from web.api.constants import AVAILABLE_ENDPOINTS_BY_PUBLIC_AGENCY
 13 | from web.api.filters import CityHallBidFilter, GazetteFilter
 14 | from web.api.serializers import (
 15 |     CityCouncilAgendaSerializer,
 16 |     CityCouncilAttendanceListSerializer,
 17 |     CityCouncilMinuteSerializer,
 18 |     CityHallBidSerializer,
 19 |     GazetteSerializer,
 20 | )
 21 | from web.datasets.models import (
 22 |     CityCouncilAgenda,
 23 |     CityCouncilAttendanceList,
 24 |     CityCouncilMinute,
 25 |     CityHallBid,
 26 |     Gazette,
 27 | )
 28 | 
 29 | 
 30 | class HealthCheckView(ViewSet):
 31 |     permission_classes = [AllowAny]
 32 | 
 33 |     def list(self, request):
 34 |         return Response({"status": "available", "time": datetime.now()})
 35 | 
 36 | 
 37 | class CityCouncilAgendaView(ListAPIView):
 38 |     queryset = CityCouncilAgenda.objects.all()
 39 |     serializer_class = CityCouncilAgendaSerializer
 40 | 
 41 |     def get_queryset(self):
 42 |         query = self.request.query_params.get("query", None)
 43 |         start_date = self.request.query_params.get("start_date", None)
 44 |         end_date = self.request.query_params.get("end_date", None)
 45 |         kwargs = {}
 46 | 
 47 |         if query:
 48 |             kwargs["details__icontains"] = query
 49 |         if start_date:
 50 |             kwargs["date__gte"] = start_date
 51 |         if end_date:
 52 |             kwargs["date__lte"] = end_date
 53 | 
 54 |         return self.queryset.filter(**kwargs)
 55 | 
 56 | 
 57 | class CityCouncilAttendanceListView(ListAPIView):
 58 |     queryset = CityCouncilAttendanceList.objects.all()
 59 |     serializer_class = CityCouncilAttendanceListSerializer
 60 | 
 61 |     def get_queryset(self):
 62 |         query = self.request.query_params.get("query", None)
 63 |         status = self.request.query_params.get("status", None)
 64 |         start_date = self.request.query_params.get("start_date", None)
 65 |         end_date = self.request.query_params.get("end_date", None)
 66 | 
 67 |         kwargs = {}
 68 | 
 69 |         if query:
 70 |             kwargs["council_member__icontains"] = query
 71 |         if status:
 72 |             kwargs["status"] = status
 73 |         if start_date:
 74 |             kwargs["date__gte"] = start_date
 75 |         if end_date:
 76 |             kwargs["date__lte"] = end_date
 77 | 
 78 |         return self.queryset.filter(**kwargs)
 79 | 
 80 | 
 81 | class CityCouncilMinuteView(ListAPIView):
 82 |     queryset = CityCouncilMinute.objects.all()
 83 |     serializer_class = CityCouncilMinuteSerializer
 84 | 
 85 |     def get_queryset(self):
 86 |         query = self.request.query_params.get("query", None)
 87 |         start_date = self.request.query_params.get("start_date", None)
 88 |         end_date = self.request.query_params.get("end_date", None)
 89 |         kwargs = {}
 90 | 
 91 |         if query:
 92 |             kwargs["title__icontains"] = query
 93 |         if start_date:
 94 |             kwargs["date__gte"] = start_date
 95 |         if end_date:
 96 |             kwargs["date__lte"] = end_date
 97 | 
 98 |         return self.queryset.filter(**kwargs)
 99 | 
100 | 
101 | class GazetteView(ReadOnlyModelViewSet):
102 |     queryset = Gazette.objects.all()
103 |     serializer_class = GazetteSerializer
104 |     filterset_class = GazetteFilter
105 |     filter_backends = [SearchFilter, DjangoFilterBackend]
106 | 
107 | 
108 | class CityHallBidView(ListAPIView):
109 |     queryset = CityHallBid.objects.prefetch_related("events").prefetch_related("files")
110 |     serializer_class = CityHallBidSerializer
111 |     filterset_class = CityHallBidFilter
112 |     filter_backends = [SearchFilter, DjangoFilterBackend]
113 | 
114 | 
115 | class FrontendEndpoint(APIView):
116 |     renderer_classes = [JSONRenderer]
117 | 
118 |     def get(self, request, format=None):
119 |         return Response(AVAILABLE_ENDPOINTS_BY_PUBLIC_AGENCY)
120 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0009_auto_20200514_1350.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 2.2.12 on 2020-05-14 16:50
  2 | 
  3 | import django.db.models.expressions
  4 | from django.db import migrations, models
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 |     dependencies = [
  9 |         ("datasets", "0008_cityhallbid_cityhallbidevent"),
 10 |     ]
 11 | 
 12 |     operations = [
 13 |         migrations.AlterModelOptions(
 14 |             name="citycouncilagenda",
 15 |             options={
 16 |                 "get_latest_by": "date",
 17 |                 "verbose_name": "Câmara de Vereadores - Agenda",
 18 |                 "verbose_name_plural": "Câmara de Vereadores - Agendas",
 19 |             },
 20 |         ),
 21 |         migrations.AlterModelOptions(
 22 |             name="citycouncilattendancelist",
 23 |             options={
 24 |                 "get_latest_by": "date",
 25 |                 "verbose_name": "Câmara de Vereadores - Lista de Presença",
 26 |                 "verbose_name_plural": "Câmara de Vereadores - Listas de Presença",
 27 |             },
 28 |         ),
 29 |         migrations.AlterModelOptions(
 30 |             name="citycouncilexpense",
 31 |             options={
 32 |                 "get_latest_by": "date",
 33 |                 "verbose_name": "Câmara de Vereadores - Despesa",
 34 |                 "verbose_name_plural": "Câmara de Vereadores - Despesas",
 35 |             },
 36 |         ),
 37 |         migrations.AlterModelOptions(
 38 |             name="citycouncilminute",
 39 |             options={
 40 |                 "get_latest_by": "date",
 41 |                 "verbose_name": "Câmara de Vereadores - Atas",
 42 |                 "verbose_name_plural": "Câmara de Vereadores - Atas",
 43 |             },
 44 |         ),
 45 |         migrations.AlterModelOptions(
 46 |             name="cityhallbid",
 47 |             options={
 48 |                 "get_latest_by": "session_at",
 49 |                 "verbose_name": "Prefeitura - Licitação",
 50 |                 "verbose_name_plural": "Prefeitura - Licitações",
 51 |             },
 52 |         ),
 53 |         migrations.AlterModelOptions(
 54 |             name="gazette",
 55 |             options={
 56 |                 "get_latest_by": "date",
 57 |                 "ordering": [
 58 |                     django.db.models.expressions.OrderBy(
 59 |                         django.db.models.expressions.F("date"),
 60 |                         descending=True,
 61 |                         nulls_last=True,
 62 |                     )
 63 |                 ],
 64 |                 "verbose_name": "Diário Oficial",
 65 |                 "verbose_name_plural": "Diários Oficiais",
 66 |             },
 67 |         ),
 68 |         migrations.RemoveField(
 69 |             model_name="citycouncilexpense",
 70 |             name="type_of_process",
 71 |         ),
 72 |         migrations.AddField(
 73 |             model_name="citycouncilexpense",
 74 |             name="budget_unit",
 75 |             field=models.PositiveIntegerField(default=101),
 76 |         ),
 77 |         migrations.AddField(
 78 |             model_name="citycouncilexpense",
 79 |             name="excluded",
 80 |             field=models.BooleanField(default=False),
 81 |         ),
 82 |         migrations.AddField(
 83 |             model_name="citycouncilexpense",
 84 |             name="modality",
 85 |             field=models.CharField(
 86 |                 blank=True,
 87 |                 choices=[
 88 |                     ("convenio", "Convênio"),
 89 |                     ("tomada_de_precos", "Tomada de Preço"),
 90 |                     ("pregao", "Pregão"),
 91 |                     ("inexigibilidade", "Inexigibilidade"),
 92 |                     ("convite", "Convite"),
 93 |                     ("concorrencia", "Concorrência"),
 94 |                     ("dispensa", "Dispensa"),
 95 |                     ("isento", "Isento"),
 96 |                 ],
 97 |                 max_length=50,
 98 |                 null=True,
 99 |             ),
100 |         ),
101 |         migrations.AddField(
102 |             model_name="citycouncilexpense",
103 |             name="phase_code",
104 |             field=models.CharField(blank=True, max_length=20, null=True),
105 |         ),
106 |         migrations.AlterField(
107 |             model_name="citycouncilexpense",
108 |             name="published_at",
109 |             field=models.DateField(blank=True, null=True),
110 |         ),
111 |     ]
112 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0002_auto_20200316_1905.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 3.0 on 2020-03-16 22:05
  2 | 
  3 | import django.db.models.deletion
  4 | import django.utils.timezone
  5 | from django.db import migrations, models
  6 | 
  7 | 
  8 | class Migration(migrations.Migration):
  9 |     dependencies = [
 10 |         ("datasets", "0001_initial"),
 11 |     ]
 12 | 
 13 |     operations = [
 14 |         migrations.CreateModel(
 15 |             name="Gazette",
 16 |             fields=[
 17 |                 (
 18 |                     "id",
 19 |                     models.AutoField(
 20 |                         auto_created=True,
 21 |                         primary_key=True,
 22 |                         serialize=False,
 23 |                         verbose_name="ID",
 24 |                     ),
 25 |                 ),
 26 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
 27 |                 ("updated_at", models.DateTimeField(auto_now=True)),
 28 |                 ("crawled_at", models.DateTimeField()),
 29 |                 ("crawled_from", models.URLField()),
 30 |                 ("notes", models.TextField(blank=True, null=True)),
 31 |                 ("date", models.DateField(null=True)),
 32 |                 (
 33 |                     "power",
 34 |                     models.CharField(
 35 |                         choices=[
 36 |                             ("executivo", "Poder Executivo"),
 37 |                             ("legislativo", "Poder Legislativo"),
 38 |                         ],
 39 |                         max_length=25,
 40 |                     ),
 41 |                 ),
 42 |                 ("year_and_edition", models.CharField(max_length=100)),
 43 |                 ("is_legacy", models.BooleanField(default=False)),
 44 |                 ("file_url", models.URLField(blank=True, null=True)),
 45 |                 ("file_content", models.TextField(blank=True, null=True)),
 46 |             ],
 47 |             options={
 48 |                 "abstract": False,
 49 |             },
 50 |         ),
 51 |         migrations.AddField(
 52 |             model_name="citycouncilagenda",
 53 |             name="created_at",
 54 |             field=models.DateTimeField(
 55 |                 auto_now_add=True, default=django.utils.timezone.now
 56 |             ),
 57 |             preserve_default=False,
 58 |         ),
 59 |         migrations.AlterField(
 60 |             model_name="citycouncilagenda",
 61 |             name="crawled_at",
 62 |             field=models.DateTimeField(),
 63 |         ),
 64 |         migrations.AlterField(
 65 |             model_name="citycouncilagenda",
 66 |             name="crawled_from",
 67 |             field=models.URLField(default=django.utils.timezone.now),
 68 |             preserve_default=False,
 69 |         ),
 70 |         migrations.CreateModel(
 71 |             name="GazetteEvent",
 72 |             fields=[
 73 |                 (
 74 |                     "id",
 75 |                     models.AutoField(
 76 |                         auto_created=True,
 77 |                         primary_key=True,
 78 |                         serialize=False,
 79 |                         verbose_name="ID",
 80 |                     ),
 81 |                 ),
 82 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
 83 |                 ("updated_at", models.DateTimeField(auto_now=True)),
 84 |                 ("crawled_at", models.DateTimeField()),
 85 |                 ("crawled_from", models.URLField()),
 86 |                 ("notes", models.TextField(blank=True, null=True)),
 87 |                 ("title", models.CharField(blank=True, max_length=300, null=True)),
 88 |                 (
 89 |                     "secretariat",
 90 |                     models.CharField(blank=True, max_length=100, null=True),
 91 |                 ),
 92 |                 ("summary", models.TextField(blank=True, null=True)),
 93 |                 (
 94 |                     "published_on",
 95 |                     models.CharField(blank=True, max_length=100, null=True),
 96 |                 ),
 97 |                 (
 98 |                     "gazette",
 99 |                     models.ForeignKey(
100 |                         on_delete=django.db.models.deletion.CASCADE,
101 |                         to="datasets.Gazette",
102 |                     ),
103 |                 ),
104 |             ],
105 |             options={
106 |                 "abstract": False,
107 |             },
108 |         ),
109 |     ]
110 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0024_auto_20210326_1704.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 3.1.7 on 2021-03-26 20:04
  2 | 
  3 | from django.db import migrations, models
  4 | 
  5 | 
  6 | class Migration(migrations.Migration):
  7 |     dependencies = [
  8 |         ("datasets", "0023_auto_20201124_0458"),
  9 |     ]
 10 | 
 11 |     operations = [
 12 |         migrations.CreateModel(
 13 |             name="SyncInformation",
 14 |             fields=[
 15 |                 (
 16 |                     "id",
 17 |                     models.AutoField(
 18 |                         auto_created=True,
 19 |                         primary_key=True,
 20 |                         serialize=False,
 21 |                         verbose_name="ID",
 22 |                     ),
 23 |                 ),
 24 |                 (
 25 |                     "created_at",
 26 |                     models.DateTimeField(auto_now_add=True, verbose_name="Criado em"),
 27 |                 ),
 28 |                 (
 29 |                     "updated_at",
 30 |                     models.DateTimeField(auto_now=True, verbose_name="Atualizado em"),
 31 |                 ),
 32 |                 ("date", models.DateField(verbose_name="Data alvo")),
 33 |                 (
 34 |                     "source",
 35 |                     models.CharField(
 36 |                         choices=[
 37 |                             ("camara", "Câmara Municipal"),
 38 |                             ("prefeitura", "Prefeitura"),
 39 |                         ],
 40 |                         db_index=True,
 41 |                         max_length=20,
 42 |                         verbose_name="Fonte",
 43 |                     ),
 44 |                 ),
 45 |                 (
 46 |                     "succeed",
 47 |                     models.BooleanField(
 48 |                         null=True, verbose_name="Concluída com sucesso?"
 49 |                     ),
 50 |                 ),
 51 |                 ("response", models.JSONField(null=True, verbose_name="Resposta")),
 52 |             ],
 53 |         ),
 54 |         migrations.AlterField(
 55 |             model_name="citycouncilcontract",
 56 |             name="external_code",
 57 |             field=models.PositiveIntegerField(
 58 |                 db_index=True, unique=True, verbose_name="Código externo"
 59 |             ),
 60 |         ),
 61 |         migrations.AlterField(
 62 |             model_name="citycouncilcontract",
 63 |             name="value",
 64 |             field=models.DecimalField(
 65 |                 decimal_places=2, max_digits=20, verbose_name="Valor"
 66 |             ),
 67 |         ),
 68 |         migrations.AlterField(
 69 |             model_name="citycouncilexpense",
 70 |             name="value",
 71 |             field=models.DecimalField(
 72 |                 decimal_places=2, max_digits=20, verbose_name="Valor"
 73 |             ),
 74 |         ),
 75 |         migrations.AlterField(
 76 |             model_name="citycouncilrevenue",
 77 |             name="external_code",
 78 |             field=models.PositiveIntegerField(
 79 |                 db_index=True, unique=True, verbose_name="Código externo"
 80 |             ),
 81 |         ),
 82 |         migrations.AlterField(
 83 |             model_name="citycouncilrevenue",
 84 |             name="value",
 85 |             field=models.DecimalField(
 86 |                 decimal_places=2, max_digits=20, verbose_name="Valor"
 87 |             ),
 88 |         ),
 89 |         migrations.AlterField(
 90 |             model_name="historicalcitycouncilcontract",
 91 |             name="value",
 92 |             field=models.DecimalField(
 93 |                 decimal_places=2, max_digits=20, verbose_name="Valor"
 94 |             ),
 95 |         ),
 96 |         migrations.AlterField(
 97 |             model_name="historicalcitycouncilexpense",
 98 |             name="value",
 99 |             field=models.DecimalField(
100 |                 decimal_places=2, max_digits=20, verbose_name="Valor"
101 |             ),
102 |         ),
103 |         migrations.AlterField(
104 |             model_name="historicalcitycouncilrevenue",
105 |             name="external_code",
106 |             field=models.PositiveIntegerField(
107 |                 db_index=True, verbose_name="Código externo"
108 |             ),
109 |         ),
110 |         migrations.AlterField(
111 |             model_name="historicalcitycouncilrevenue",
112 |             name="value",
113 |             field=models.DecimalField(
114 |                 decimal_places=2, max_digits=20, verbose_name="Valor"
115 |             ),
116 |         ),
117 |     ]
118 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0019_auto_20200704_1132.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 3.0.5 on 2020-07-04 14:32
  2 | 
  3 | import django.db.models.expressions
  4 | from django.db import migrations
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 |     dependencies = [
  9 |         ("datasets", "0018_file_external_code"),
 10 |     ]
 11 | 
 12 |     operations = [
 13 |         migrations.AlterModelOptions(
 14 |             name="citycouncilagenda",
 15 |             options={
 16 |                 "get_latest_by": "date",
 17 |                 "ordering": ["-date"],
 18 |                 "verbose_name": "Câmara de Vereadores - Agenda",
 19 |                 "verbose_name_plural": "Câmara de Vereadores - Agendas",
 20 |             },
 21 |         ),
 22 |         migrations.AlterModelOptions(
 23 |             name="citycouncilattendancelist",
 24 |             options={
 25 |                 "get_latest_by": "date",
 26 |                 "ordering": ["-date"],
 27 |                 "verbose_name": "Câmara de Vereadores - Lista de Presença",
 28 |                 "verbose_name_plural": "Câmara de Vereadores - Listas de Presença",
 29 |             },
 30 |         ),
 31 |         migrations.AlterModelOptions(
 32 |             name="citycouncilbid",
 33 |             options={
 34 |                 "get_latest_by": "session_at",
 35 |                 "ordering": [
 36 |                     django.db.models.expressions.OrderBy(
 37 |                         django.db.models.expressions.F("session_at"),
 38 |                         descending=True,
 39 |                         nulls_last=True,
 40 |                     )
 41 |                 ],
 42 |                 "verbose_name": "Câmara de Vereadores - Licitação",
 43 |                 "verbose_name_plural": "Câmara de Vereadores - Licitações",
 44 |             },
 45 |         ),
 46 |         migrations.AlterModelOptions(
 47 |             name="citycouncilcontract",
 48 |             options={
 49 |                 "get_latest_by": "start_date",
 50 |                 "ordering": ["-start_date"],
 51 |                 "verbose_name": "Câmara de Vereadores - Contrato",
 52 |                 "verbose_name_plural": "Câmara de Vereadores - Contratos",
 53 |             },
 54 |         ),
 55 |         migrations.AlterModelOptions(
 56 |             name="citycouncilexpense",
 57 |             options={
 58 |                 "get_latest_by": "date",
 59 |                 "ordering": ["-date"],
 60 |                 "verbose_name": "Câmara de Vereadores - Despesa",
 61 |                 "verbose_name_plural": "Câmara de Vereadores - Despesas",
 62 |             },
 63 |         ),
 64 |         migrations.AlterModelOptions(
 65 |             name="citycouncilminute",
 66 |             options={
 67 |                 "get_latest_by": "date",
 68 |                 "ordering": ["-date"],
 69 |                 "verbose_name": "Câmara de Vereadores - Atas",
 70 |                 "verbose_name_plural": "Câmara de Vereadores - Atas",
 71 |             },
 72 |         ),
 73 |         migrations.AlterModelOptions(
 74 |             name="citycouncilrevenue",
 75 |             options={
 76 |                 "get_latest_by": "published_at",
 77 |                 "ordering": [
 78 |                     django.db.models.expressions.OrderBy(
 79 |                         django.db.models.expressions.F("published_at"),
 80 |                         descending=True,
 81 |                         nulls_last=True,
 82 |                     )
 83 |                 ],
 84 |                 "verbose_name": "Câmara de Vereadores - Receita",
 85 |                 "verbose_name_plural": "Câmara de Vereadores - Receitas",
 86 |             },
 87 |         ),
 88 |         migrations.AlterModelOptions(
 89 |             name="cityhallbid",
 90 |             options={
 91 |                 "get_latest_by": "session_at",
 92 |                 "ordering": [
 93 |                     django.db.models.expressions.OrderBy(
 94 |                         django.db.models.expressions.F("session_at"),
 95 |                         descending=True,
 96 |                         nulls_last=True,
 97 |                     )
 98 |                 ],
 99 |                 "verbose_name": "Prefeitura - Licitação",
100 |                 "verbose_name_plural": "Prefeitura - Licitações",
101 |             },
102 |         ),
103 |         migrations.AlterModelOptions(
104 |             name="file",
105 |             options={
106 |                 "ordering": ["-created_at"],
107 |                 "verbose_name": "Arquivo",
108 |                 "verbose_name_plural": "Arquivos",
109 |             },
110 |         ),
111 |     ]
112 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0012_auto_20200520_1050.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 3.0.5 on 2020-05-20 13:50
  2 | 
  3 | import django.contrib.postgres.indexes
  4 | import django.contrib.postgres.search
  5 | import django.db.models.deletion
  6 | from django.db import migrations, models
  7 | 
  8 | 
  9 | class Migration(migrations.Migration):
 10 |     dependencies = [
 11 |         ("contenttypes", "0002_remove_content_type_name"),
 12 |         ("datasets", "0011_auto_20200515_1115"),
 13 |     ]
 14 | 
 15 |     operations = [
 16 |         migrations.AlterField(
 17 |             model_name="citycouncilagenda",
 18 |             name="event_type",
 19 |             field=models.CharField(
 20 |                 blank=True,
 21 |                 choices=[
 22 |                     ("sessao_ordinaria", "Sessão Ordinária"),
 23 |                     ("ordem_do_dia", "Ordem do Dia"),
 24 |                     ("sessao_solene", "Sessão Solene"),
 25 |                     ("sessao_especial", "Sessão Especial"),
 26 |                     ("audiencia_publica", "Audiência Pública"),
 27 |                 ],
 28 |                 max_length=20,
 29 |                 null=True,
 30 |                 verbose_name="Tipo do evento",
 31 |             ),
 32 |         ),
 33 |         migrations.AlterField(
 34 |             model_name="citycouncilminute",
 35 |             name="event_type",
 36 |             field=models.CharField(
 37 |                 blank=True,
 38 |                 choices=[
 39 |                     ("sessao_ordinaria", "Sessão Ordinária"),
 40 |                     ("ordem_do_dia", "Ordem do Dia"),
 41 |                     ("sessao_solene", "Sessão Solene"),
 42 |                     ("sessao_especial", "Sessão Especial"),
 43 |                     ("audiencia_publica", "Audiência Pública"),
 44 |                 ],
 45 |                 max_length=20,
 46 |                 null=True,
 47 |                 verbose_name="Tipo de evento",
 48 |             ),
 49 |         ),
 50 |         migrations.CreateModel(
 51 |             name="File",
 52 |             fields=[
 53 |                 (
 54 |                     "id",
 55 |                     models.AutoField(
 56 |                         auto_created=True,
 57 |                         primary_key=True,
 58 |                         serialize=False,
 59 |                         verbose_name="ID",
 60 |                     ),
 61 |                 ),
 62 |                 (
 63 |                     "created_at",
 64 |                     models.DateTimeField(auto_now_add=True, verbose_name="Criado em"),
 65 |                 ),
 66 |                 (
 67 |                     "updated_at",
 68 |                     models.DateTimeField(auto_now=True, verbose_name="Atualizado em"),
 69 |                 ),
 70 |                 ("url", models.URLField(verbose_name="Arquivo")),
 71 |                 (
 72 |                     "content",
 73 |                     models.TextField(blank=True, null=True, verbose_name="Conteúdo"),
 74 |                 ),
 75 |                 ("object_id", models.PositiveIntegerField()),
 76 |                 ("checksum", models.CharField(blank=True, max_length=128, null=True)),
 77 |                 (
 78 |                     "s3_url",
 79 |                     models.URLField(blank=True, null=True, verbose_name="URL externa"),
 80 |                 ),
 81 |                 (
 82 |                     "s3_file_path",
 83 |                     models.CharField(blank=True, max_length=300, null=True),
 84 |                 ),
 85 |                 (
 86 |                     "search_vector",
 87 |                     django.contrib.postgres.search.SearchVectorField(
 88 |                         editable=False, null=True
 89 |                     ),
 90 |                 ),
 91 |                 (
 92 |                     "content_type",
 93 |                     models.ForeignKey(
 94 |                         on_delete=django.db.models.deletion.CASCADE,
 95 |                         to="contenttypes.ContentType",
 96 |                     ),
 97 |                 ),
 98 |             ],
 99 |             options={
100 |                 "verbose_name": "Arquivo",
101 |                 "verbose_name_plural": "Arquivos",
102 |             },
103 |         ),
104 |         migrations.AddIndex(
105 |             model_name="file",
106 |             index=django.contrib.postgres.indexes.GinIndex(
107 |                 fields=["search_vector"], name="datasets_fi_search__52321c_gin"
108 |             ),
109 |         ),
110 |         migrations.AlterUniqueTogether(
111 |             name="file",
112 |             unique_together={("url", "content_type", "object_id")},
113 |         ),
114 |     ]
115 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0025_auto_20210327_1144.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 3.1.7 on 2021-03-27 14:44
  2 | 
  3 | import django.db.models.expressions
  4 | from django.db import migrations, models
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 |     dependencies = [
  9 |         ("datasets", "0024_auto_20210326_1704"),
 10 |     ]
 11 | 
 12 |     operations = [
 13 |         migrations.CreateModel(
 14 |             name="TCMBADocument",
 15 |             fields=[
 16 |                 (
 17 |                     "id",
 18 |                     models.AutoField(
 19 |                         auto_created=True,
 20 |                         primary_key=True,
 21 |                         serialize=False,
 22 |                         verbose_name="ID",
 23 |                     ),
 24 |                 ),
 25 |                 (
 26 |                     "created_at",
 27 |                     models.DateTimeField(auto_now_add=True, verbose_name="Criado em"),
 28 |                 ),
 29 |                 (
 30 |                     "updated_at",
 31 |                     models.DateTimeField(auto_now=True, verbose_name="Atualizado em"),
 32 |                 ),
 33 |                 ("crawled_at", models.DateTimeField(verbose_name="Coletado em")),
 34 |                 ("crawled_from", models.URLField(verbose_name="Fonte")),
 35 |                 (
 36 |                     "notes",
 37 |                     models.TextField(blank=True, null=True, verbose_name="Anotações"),
 38 |                 ),
 39 |                 (
 40 |                     "year",
 41 |                     models.PositiveIntegerField(db_index=True, verbose_name="Ano"),
 42 |                 ),
 43 |                 (
 44 |                     "month",
 45 |                     models.PositiveIntegerField(
 46 |                         db_index=True, null=True, verbose_name="Mês"
 47 |                     ),
 48 |                 ),
 49 |                 (
 50 |                     "period",
 51 |                     models.CharField(
 52 |                         choices=[("mensal", "Mensal"), ("anual", "Anual")],
 53 |                         db_index=True,
 54 |                         max_length=10,
 55 |                         verbose_name="Periodicidade",
 56 |                     ),
 57 |                 ),
 58 |                 (
 59 |                     "category",
 60 |                     models.CharField(
 61 |                         db_index=True, max_length=200, verbose_name="Categoria"
 62 |                     ),
 63 |                 ),
 64 |                 (
 65 |                     "unit",
 66 |                     models.CharField(
 67 |                         db_index=True, max_length=100, verbose_name="Unidade"
 68 |                     ),
 69 |                 ),
 70 |                 (
 71 |                     "inserted_at",
 72 |                     models.DateField(null=True, verbose_name="Inserido em"),
 73 |                 ),
 74 |                 (
 75 |                     "inserted_by",
 76 |                     models.CharField(
 77 |                         blank=True,
 78 |                         max_length=50,
 79 |                         null=True,
 80 |                         verbose_name="Inserido por",
 81 |                     ),
 82 |                 ),
 83 |                 (
 84 |                     "original_filename",
 85 |                     models.CharField(max_length=200, verbose_name="Nome do arquivo"),
 86 |                 ),
 87 |             ],
 88 |             options={
 89 |                 "verbose_name": "TCM-BA - Documento",
 90 |                 "verbose_name_plural": "TCM-BA - Documentos",
 91 |                 "ordering": [
 92 |                     django.db.models.expressions.OrderBy(
 93 |                         django.db.models.expressions.F("year"), descending=True
 94 |                     ),
 95 |                     django.db.models.expressions.OrderBy(
 96 |                         django.db.models.expressions.F("month"), descending=True
 97 |                     ),
 98 |                 ],
 99 |                 "get_latest_by": "inserted_at",
100 |             },
101 |         ),
102 |         migrations.AlterField(
103 |             model_name="file",
104 |             name="s3_file_path",
105 |             field=models.CharField(
106 |                 blank=True, max_length=400, null=True, verbose_name="Caminho interno"
107 |             ),
108 |         ),
109 |         migrations.AlterField(
110 |             model_name="file",
111 |             name="s3_url",
112 |             field=models.URLField(
113 |                 blank=True, max_length=600, null=True, verbose_name="URL externa"
114 |             ),
115 |         ),
116 |     ]
117 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0017_citycouncilrevenue.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 3.0.6 on 2020-06-04 12:40
  2 | 
  3 | from django.db import migrations, models
  4 | 
  5 | 
  6 | class Migration(migrations.Migration):
  7 |     dependencies = [
  8 |         ("datasets", "0016_auto_20200522_0647"),
  9 |     ]
 10 | 
 11 |     operations = [
 12 |         migrations.CreateModel(
 13 |             name="CityCouncilRevenue",
 14 |             fields=[
 15 |                 (
 16 |                     "id",
 17 |                     models.AutoField(
 18 |                         auto_created=True,
 19 |                         primary_key=True,
 20 |                         serialize=False,
 21 |                         verbose_name="ID",
 22 |                     ),
 23 |                 ),
 24 |                 (
 25 |                     "created_at",
 26 |                     models.DateTimeField(auto_now_add=True, verbose_name="Criado em"),
 27 |                 ),
 28 |                 (
 29 |                     "updated_at",
 30 |                     models.DateTimeField(auto_now=True, verbose_name="Atualizado em"),
 31 |                 ),
 32 |                 ("crawled_at", models.DateTimeField(verbose_name="Coletado em")),
 33 |                 ("crawled_from", models.URLField(verbose_name="Fonte")),
 34 |                 (
 35 |                     "notes",
 36 |                     models.TextField(blank=True, null=True, verbose_name="Anotações"),
 37 |                 ),
 38 |                 (
 39 |                     "external_code",
 40 |                     models.CharField(
 41 |                         db_index=True, max_length=10, verbose_name="Código externo"
 42 |                     ),
 43 |                 ),
 44 |                 (
 45 |                     "budget_unit",
 46 |                     models.PositiveIntegerField(
 47 |                         default=101, verbose_name="Unidade gestora"
 48 |                     ),
 49 |                 ),
 50 |                 (
 51 |                     "published_at",
 52 |                     models.DateField(
 53 |                         db_index=True, null=True, verbose_name="Publicado em"
 54 |                     ),
 55 |                 ),
 56 |                 (
 57 |                     "registered_at",
 58 |                     models.DateField(
 59 |                         db_index=True, null=True, verbose_name="Registrado em"
 60 |                     ),
 61 |                 ),
 62 |                 (
 63 |                     "revenue_type",
 64 |                     models.CharField(
 65 |                         choices=[
 66 |                             ("orcamentaria", "Orçamentária"),
 67 |                             ("nao_orcamentaria", "Não-orçamentária"),
 68 |                             ("transferencia", "Transferência"),
 69 |                         ],
 70 |                         db_index=True,
 71 |                         max_length=20,
 72 |                         verbose_name="Tipo da receita",
 73 |                     ),
 74 |                 ),
 75 |                 (
 76 |                     "modality",
 77 |                     models.CharField(
 78 |                         blank=True, max_length=60, null=True, verbose_name="Modalidade"
 79 |                     ),
 80 |                 ),
 81 |                 ("description", models.TextField(verbose_name="Descrição")),
 82 |                 (
 83 |                     "value",
 84 |                     models.DecimalField(
 85 |                         decimal_places=2, max_digits=10, verbose_name="Valor"
 86 |                     ),
 87 |                 ),
 88 |                 (
 89 |                     "resource",
 90 |                     models.CharField(
 91 |                         blank=True,
 92 |                         default="prefeitura",
 93 |                         max_length=200,
 94 |                         null=True,
 95 |                         verbose_name="Fonte",
 96 |                     ),
 97 |                 ),
 98 |                 (
 99 |                     "legal_status",
100 |                     models.CharField(
101 |                         blank=True,
102 |                         db_index=True,
103 |                         max_length=200,
104 |                         null=True,
105 |                         verbose_name="Natureza",
106 |                     ),
107 |                 ),
108 |                 (
109 |                     "destination",
110 |                     models.CharField(
111 |                         blank=True, max_length=200, null=True, verbose_name="Destinação"
112 |                     ),
113 |                 ),
114 |                 (
115 |                     "excluded",
116 |                     models.BooleanField(default=False, verbose_name="Excluído?"),
117 |                 ),
118 |             ],
119 |             options={
120 |                 "verbose_name": "Câmara de Vereadores - Receita",
121 |                 "verbose_name_plural": "Câmara de Vereadores - Receitas",
122 |                 "get_latest_by": "published_at",
123 |             },
124 |         ),
125 |     ]
126 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Guia de contribuição
  2 | 
  3 | Ficamos muito felizes que você está lendo este guia de contribuição, sempre precisamos
  4 | de pessoas voluntárias que acreditem na ideia e queiram contribuir com o projeto.
  5 | 
  6 | Se você ainda não fez isso, junte-se a nós no [nosso grupo aberto do Telegram](https://t.me/dadosabertosdefeira)
  7 | ou no nosso [Discord](https://discord.gg/bPZ9TSjdUn) e participe das discussões.
  8 | Não hesite em nos procurar para tirar todas as suas dúvidas e conhecer mais sobre o projeto.
  9 | 
 10 | [![Convite Discord](https://invidget.switchblade.xyz/bPZ9TSjdUn?language=pt)](https://discord.gg/bPZ9TSjdUn)
 11 | 
 12 | ## Antes de começar
 13 | 
 14 | Aqui estão alguns recursos importantes que você deve estar ciente antes de começar:
 15 | 
 16 | - [Manual de dados abertos para desenvolvedores](https://www.w3c.br/pub/Materiais/PublicacoesW3C/manual_dados_abertos_desenvolvedores_web.pdf)
 17 | te explicará um pouco sobre o que são e os principais conceitos por trás dos dados abertos.
 18 | 
 19 | - Nossos [projetos](https://github.com/DadosAbertosDeFeira/maria-quiteria/projects),
 20 | são um conjunto de funcionalidades e melhorias que queremos desenvolver nesse repositório.
 21 | Caso não tenha nada que seja a sua praia, você pode dar uma olhada nos
 22 | [projetos gerais](https://github.com/orgs/DadosAbertosDeFeira/projects) do projeto.
 23 | 
 24 | - No [nosso Trello](https://trello.com/b/E8v20MFs/dados-abertos-de-feira) você pode
 25 | acompanhar o que a comunidade em geral vem trabalhando. Lá você encontrá coisas desde
 26 | fotografia até pedidos de acesso à informação.
 27 | 
 28 | Os detalhes de como instalar e executar este projeto podem ser encontrados no
 29 | [`README.md`](https://github.com/DadosAbertosDeFeira/maria-quiteria/blob/main/README.md).
 30 | 
 31 | ## Reportando bugs
 32 | 
 33 | Você encontrou um bug?
 34 | 
 35 | * Sugestões de melhoria são rastreadas através de [_issues_](https://guides.github.com/features/issues/)
 36 | e [_pull requests_](https://guides.github.com/activities/hello-world/#pr) no GitHub.
 37 | Verifique se nenhuma _issue_ ou _pull request_ foi criada por outra pessoa com o mesmo bug.
 38 | * Se não, [crie uma _issue_](https://github.com/DadosAbertosDeFeira/maria-quiteria/issues/new)
 39 | explicando o problema e adicionando novas informações detalhadas que ajudem
 40 | a reproduzir o problema.
 41 | 
 42 | ## Sugerindo melhorias
 43 | 
 44 | Você é mais que bem-vinda(o) a sugerir melhorias a MQ. Pedimos apenas que tente incluir o
 45 | máximo de detalhes possíveis e que verifique se nenhuma _issue_ ou _pull request_ já foi
 46 | criado por outra pessoa com a mesma sugestão.
 47 | 
 48 | Caso seja algo novo, você tem duas alternativas:
 49 | 
 50 | - Criar uma nova _issue_
 51 | - Compartilhar a sua sugestão com outros participantes e mantenedores do projeto em nosso [Discord](https://discord.gg/BS4GNf)
 52 | 
 53 | Em ambos, tente usar uma linguagem clara, e com o máximo de detalhes. Qual a motivação,
 54 | qual problema resolveria e possíveis desafios, por exemplo, são importantes para entender
 55 | o que você precisa. Esse é um projeto de código aberto, mantido por voluntários.
 56 | Frequentemente precisamos escolher bem o que vamos fazer com os recursos que temos. :)
 57 | 
 58 | ## Criando _pull requests_
 59 | 
 60 | Você decidiu contribuir para o projeto! Yay!
 61 | 
 62 | Faça um _fork_ do projeto e crie uma nova _branch_.
 63 | Mais detalhes [aqui](https://help.github.com/pt/enterprise/2.17/user/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork).
 64 | 
 65 | Aqui algumas dicas:
 66 | 
 67 | * Caso decida trabalhar em alguma _issue_, comente na _issue_ escolhida. Dessa forma,
 68 | outras pessoas saberão que tem alguém trabalhando nela. Caso tenha ficado perdido ou com
 69 | dúvidas, peça ajuda.
 70 | 
 71 | * Caso tenha visto algo pontual, como um _typo_ ou algo que pode ser corrigido e testado
 72 | rapidamente e não envolva mudanças estruturais, você é bem vindo a abrir um novo PR também.
 73 | 
 74 | * Antes de qualquer coisa, tente rodar o projeto localmente.
 75 | 
 76 | * Instale o `pre-commit` localmente. Dessa forma, o código que você _commitar_ já estará
 77 | formatado, com os _imports_ ordenados e mais arrumado.
 78 | 
 79 | * Rode os testes localmente. Além de ser uma boa prática, previne idas e vindas nas
 80 | revisões.
 81 | 
 82 | * Adicione novos testes para novas funcionalidades ou bugs.
 83 | 
 84 | * Use o tempo presente nas mensagens do _commit_. Exemplo: _"Adiciona funcionalidade"_
 85 | e não _"Adicionada a funcionalidade"_.
 86 | 
 87 | * Atualize o [README.md](https://github.com/DadosAbertosDeFeira/maria-quiteria/blob/main/README.md)
 88 | com os detalhes da mudança caso esta inclua uma nova base de dados ou um novo comando na CLI.
 89 | 
 90 | * Embora o código esteja escrito em inglês, por convenção, as mensagens de _commit_,
 91 | comentários, _pull requests_, _issues_, e demais comunicações do projeto deverão ser
 92 | escritas em português.
 93 | 
 94 | * Marque a opção "Permitir edição pelos mantenedores". Assim poderemos fazer modificações de emergência
 95 | mantendo o _pull request_ aberto por você.
 96 | 
 97 | Sentiu falta de algo nesse guia? Conta pra gente!
 98 | 
 99 | Bem vinda(o) ao Dados Abertos de Feira!
100 | 


--------------------------------------------------------------------------------
/web/datasets/migrations/0008_cityhallbid_cityhallbidevent.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 3.0.5 on 2020-04-19 15:51
  2 | 
  3 | import django.db.models.deletion
  4 | from django.db import migrations, models
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 |     dependencies = [
  9 |         ("datasets", "0007_citycouncilexpense"),
 10 |     ]
 11 | 
 12 |     operations = [
 13 |         migrations.CreateModel(
 14 |             name="CityHallBid",
 15 |             fields=[
 16 |                 (
 17 |                     "id",
 18 |                     models.AutoField(
 19 |                         auto_created=True,
 20 |                         primary_key=True,
 21 |                         serialize=False,
 22 |                         verbose_name="ID",
 23 |                     ),
 24 |                 ),
 25 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
 26 |                 ("updated_at", models.DateTimeField(auto_now=True)),
 27 |                 ("crawled_at", models.DateTimeField()),
 28 |                 ("crawled_from", models.URLField()),
 29 |                 ("notes", models.TextField(blank=True, null=True)),
 30 |                 (
 31 |                     "session_at",
 32 |                     models.DateTimeField(
 33 |                         null=True, verbose_name="Sessão Data / Horário"
 34 |                     ),
 35 |                 ),
 36 |                 (
 37 |                     "public_agency",
 38 |                     models.CharField(max_length=200, verbose_name="Órgão"),
 39 |                 ),
 40 |                 (
 41 |                     "description",
 42 |                     models.TextField(blank=True, null=True, verbose_name="Descrição"),
 43 |                 ),
 44 |                 (
 45 |                     "modality",
 46 |                     models.CharField(
 47 |                         blank=True,
 48 |                         choices=[
 49 |                             ("tomada_de_precos", "Tomada de Preço"),
 50 |                             ("pregao_presencial", "Pregão Presencial"),
 51 |                             ("pregao_eletronico", "Pregão Eletrônico"),
 52 |                             ("leilao", "Leilão"),
 53 |                             ("inexigibilidade", "Inexigibilidade"),
 54 |                             ("dispensada", "Dispensada"),
 55 |                             ("convite", "Convite"),
 56 |                             ("concurso", "Concurso"),
 57 |                             ("concorrencia", "Concorrência"),
 58 |                             ("chamada_publica", "Chamada Pública"),
 59 |                         ],
 60 |                         max_length=60,
 61 |                         null=True,
 62 |                         verbose_name="Modalidade",
 63 |                     ),
 64 |                 ),
 65 |                 ("codes", models.CharField(max_length=300, verbose_name="Códigos")),
 66 |                 (
 67 |                     "file_url",
 68 |                     models.URLField(blank=True, null=True, verbose_name="Arquivo"),
 69 |                 ),
 70 |                 (
 71 |                     "file_content",
 72 |                     models.TextField(blank=True, null=True, verbose_name="Conteúdo"),
 73 |                 ),
 74 |             ],
 75 |             options={
 76 |                 "verbose_name": "Prefeitura - Licitação",
 77 |                 "verbose_name_plural": "Prefeitura - Licitações",
 78 |             },
 79 |         ),
 80 |         migrations.CreateModel(
 81 |             name="CityHallBidEvent",
 82 |             fields=[
 83 |                 (
 84 |                     "id",
 85 |                     models.AutoField(
 86 |                         auto_created=True,
 87 |                         primary_key=True,
 88 |                         serialize=False,
 89 |                         verbose_name="ID",
 90 |                     ),
 91 |                 ),
 92 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
 93 |                 ("updated_at", models.DateTimeField(auto_now=True)),
 94 |                 ("crawled_at", models.DateTimeField()),
 95 |                 ("crawled_from", models.URLField()),
 96 |                 ("notes", models.TextField(blank=True, null=True)),
 97 |                 (
 98 |                     "published_at",
 99 |                     models.DateTimeField(null=True, verbose_name="Publicado em"),
100 |                 ),
101 |                 (
102 |                     "summary",
103 |                     models.TextField(blank=True, null=True, verbose_name="Descrição"),
104 |                 ),
105 |                 (
106 |                     "file_url",
107 |                     models.URLField(blank=True, null=True, verbose_name="Arquivo"),
108 |                 ),
109 |                 (
110 |                     "file_content",
111 |                     models.TextField(blank=True, null=True, verbose_name="Conteúdo"),
112 |                 ),
113 |                 (
114 |                     "bid",
115 |                     models.ForeignKey(
116 |                         on_delete=django.db.models.deletion.CASCADE,
117 |                         related_name="events",
118 |                         to="datasets.CityHallBid",
119 |                     ),
120 |                 ),
121 |             ],
122 |             options={
123 |                 "verbose_name": "Prefeitura - Licitação - Histórico",
124 |                 "verbose_name_plural": "Prefeitura - Licitações - Históricos",
125 |             },
126 |         ),
127 |     ]
128 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Maria Quitéria
  2 | 
  3 | [![CI](https://github.com/DadosAbertosDeFeira/maria-quiteria/actions/workflows/cicd.yml/badge.svg)](https://github.com/DadosAbertosDeFeira/maria-quiteria/actions/workflows/cicd.yml)
  4 | 
  5 | Tem a missão de libertar dados do município de [Feira de Santana](https://pt.wikipedia.org/wiki/Feira_de_Santana).
  6 | Responsável pela raspagem e o armazenamento.
  7 | 
  8 | Não sabe quem foi [Maria Quitéria](https://pt.wikipedia.org/wiki/Maria_Quit%C3%A9ria)?
  9 | 
 10 | ## Dados
 11 | 
 12 | Você pode visualizar e fazer buscas nossos dados [aqui](https://mq.dadosabertosdefeira.com.br/painel/).
 13 | 
 14 | | Base de dados | Fonte | Descrição        | Coleta          | Banco de dados | Download |
 15 | | ------------- | ------------- | ------------- |:-------------:|:-----:|:-----:|
 16 | | Agenda (`citycouncil.py`) | Câmara Municipal | Agenda (ordem do dia, homenagens, sessões ordinárias etc) da Câmara Municipal. | :heavy_check_mark: | :heavy_check_mark: | [Kaggle](https://www.kaggle.com/dadosabertosdefeira/agenda-da-cmara-de-vereadores) |
 17 | | Atas das sessões (`citycouncil.py`) | Câmara Municipal | Atas das sessões da Câmara Municipal. | :heavy_check_mark: | :heavy_check_mark: | 🔜 |
 18 | | Lista de Presença (`citycouncil.py`) | Câmara Municipal | Assiduidade dos vereadores da Câmara Municipal. | :heavy_check_mark: | :heavy_check_mark: | [Kaggle](https://www.kaggle.com/dadosabertosdefeira/assiduidade-dos-vereadores) |
 19 | | Despesas (`citycouncil.py`) | Câmara Municipal | Gastos realizados pela Câmara Municipal. | :heavy_check_mark: | :heavy_check_mark: | 🔜 |
 20 | | Contratos (`cityhall.py`) | Prefeitura | Contratos realizados pela prefeitura entre 2016 e 2017. | 🔜 | 🔜 | 🔜 |
 21 | | Diário Oficial (`gazette.py`) | Prefeitura/Câmara de Vereadores | Diário oficial do executivo e legislativo. | :heavy_check_mark: | :heavy_check_mark: | [Kaggle](https://www.kaggle.com/dadosabertosdefeira/dirios-oficiais-do-executivo-e-do-legislativo)  |
 22 | | Licitações (`cityhall.py`) | Prefeitura | Licitações realizadas pela prefeitura desde 2015. | :heavy_check_mark: | :heavy_check_mark: | [Kaggle](https://www.kaggle.com/dadosabertosdefeira/licitaes-da-prefeitura-de-feira-de-santana) |
 23 | | Pagamentos (`cityhall.py`) | Prefeitura | Pagamentos realizados pela prefeitura desde 2010. | 🔜 | 🔜 | 🔜 |
 24 | 
 25 | ## Contribuindo para o projeto
 26 | 
 27 | Contribuições são muito bem-vindas. Veja como contribuir no nosso [Guia de Contribuição](CONTRIBUTING.md).
 28 | 
 29 | Toda a comunicação e demais interações do Dados Abertos de Feira estão sujeitas
 30 | ao nosso [Código de Conduta](CODE_OF_CONDUCT.md).
 31 | 
 32 | ### Configurando seu ambiente
 33 | 
 34 | Você precisará do [Docker](https://docs.docker.com/install/)
 35 | e do [Docker-Compose](https://docs.docker.com/compose/install/) para rodar o projeto.
 36 | 
 37 | #### Carregue as variáveis de ambiente
 38 | 
 39 | Um exemplo das configurações pode ser encontrado no arquivo `.env.example`,
 40 | que deve ser copiado para um arquivo `.env` na raiz do projeto.
 41 | 
 42 | Caso queira utilizar um banco de dados diferente basta configurar a variável
 43 | de ambiente `DATABASE_URL` em seu `.env`.
 44 | 
 45 | #### Instale as dependências e prepare os serviços
 46 | 
 47 | ```bash
 48 | make build
 49 | ```
 50 | 
 51 | O passo anterior vai criar um banco de dados postgres.
 52 | Agora, basta aplicar as `migrations` executar o `collectstatic`:
 53 | 
 54 | ```
 55 | make migrate
 56 | make collectstatic
 57 | ```
 58 | 
 59 | ### Executando os testes
 60 | 
 61 | ```
 62 | make tests
 63 | ```
 64 | 
 65 | ### Acessando o site
 66 | 
 67 | Rode o servidor com:
 68 | ```
 69 | make run
 70 | ```
 71 | 
 72 | Com as configurações padrão o painel de controle estará acessível pela URL:
 73 | [`localhost:8000`](http://localhost:8000). Veja as bases de dados disponíveis
 74 | no nosso painel público [`localhost:8000/painel`](http://localhost:8000/painel).
 75 | 
 76 | Para navegar no admin, primeiro crie um super administrador:
 77 | ```
 78 | make createsuperuser
 79 | ```
 80 | 
 81 | ### Coletando os dados
 82 | 
 83 | Boa parte dos dados que temos vem da raspagem de dados feita por _spiders_.
 84 | O comando abaixo vai executar todos os _spiders_ e salvar os itens raspados
 85 | no banco de dados:
 86 | 
 87 | ```
 88 | make crawl
 89 | ```
 90 | 
 91 | Durante a coleta e adição ao banco, vamos também tentar extrair o conteúdo
 92 | dos arquivos encontrados.
 93 | 
 94 | ### Rodando os spiders individualmente
 95 | 
 96 | No diretório `scraper` você poderá encontrar os _spiders_ responsáveis pela
 97 | coleta dos dados. Para entender melhor como eles funcionam, dê uma olhada
 98 | na documentação do [scrapy](https://docs.scrapy.org/).
 99 | 
100 | Para rodar um _spider_, execute:
101 | 
102 | ```
103 | SPIDER=citycouncil_agenda make runspider
104 | # ou
105 | SPIDER=citycouncil_agenda START_DATE=03/01/2020 make runspider
106 | ```
107 | 
108 | Para salvar os dados de um _spider_ em um arquivo:
109 | 
110 | ```
111 | docker-compose run --rm web scrapy crawl citycouncil_agenda -o citycouncil_agenda.json
112 | ```
113 | 
114 | Você pode substituir `json` por outros formatos como `csv`.
115 | 
116 | Caso queira passar alguma configuração extra para o Scrapy através
117 | do comando `crawl` você pode adicionar após o parâmetro `--scrapy-args`:
118 | 
119 | ```
120 | docker-compose run --rm web python manage.py crawl --scrapy-args '{"LOG_FILE": "test.log"}'
121 | ```
122 | 
123 | ### API
124 | 
125 | Sobre acesso a API veja instruções em nossa [Wiki](https://github.com/DadosAbertosDeFeira/maria-quiteria/wiki/API).
126 | 
127 | 
128 | ### Infraestrutura
129 | 
130 | Essa aplicação está sendo hospedada no PaaS [Dokku](https://dokku.com/docs/) e todo código IaC está [nesse repositório](https://github.com/DadosAbertosDeFeira/iac).
131 | 


--------------------------------------------------------------------------------
/web/datasets/adapters.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from django.contrib.admin.options import get_content_type_for_model
  4 | 
  5 | from web.datasets.models import CityCouncilBid, CityCouncilContract
  6 | from web.datasets.parsers import (
  7 |     city_council_bid_modality_mapping,
  8 |     city_council_revenue_type_mapping,
  9 |     currency_to_float,
 10 |     from_str_to_date,
 11 |     from_str_to_datetime,
 12 |     get_phase,
 13 |     lower,
 14 |     lower_without_spaces,
 15 |     to_boolean,
 16 | )
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | CITYCOUNCIL_BID_FIELDS_MAPPING = {
 21 |     "CODLIC": "external_code",
 22 |     "CODTIPOLIC": "modality",
 23 |     "NUMLIC": "code",
 24 |     "NUMTIPOLIC": "code_type",
 25 |     "OBJETOLIC": "description",
 26 |     "DTLIC": "session_at",
 27 |     "EXCLUIDO": "excluded",
 28 |     "ARQUIVOS": None,
 29 | }
 30 | 
 31 | 
 32 | CITYCOUNCIL_BID_FUNCTIONS = {
 33 |     "excluded": to_boolean,
 34 |     "session_at": from_str_to_datetime,
 35 |     "modality": city_council_bid_modality_mapping,
 36 | }
 37 | 
 38 | 
 39 | CITYCOUNCIL_CONTRACT_FIELDS_MAPPING = {
 40 |     "CODCON": "external_code",
 41 |     "DSCON": "description",
 42 |     "OBJETOCON": "details",
 43 |     "CPFCNPJCON": "company_or_person_document",
 44 |     "NMCON": "company_or_person",
 45 |     "VALORCON": "value",
 46 |     "DTCON": "start_date",
 47 |     "DTCONFIM": "end_date",
 48 |     "EXCLUIDO": "excluded",
 49 |     "ARQUIVOS": None,
 50 | }
 51 | 
 52 | 
 53 | CITYCOUNCIL_CONTRACT_FUNCTIONS = {
 54 |     "value": currency_to_float,
 55 |     "excluded": to_boolean,
 56 |     "start_date": from_str_to_date,
 57 |     "end_date": from_str_to_date,
 58 | }
 59 | 
 60 | 
 61 | CITYCOUNCIL_REVENUE_FIELDS_MAPPING = {
 62 |     "CODLINHA": "external_code",
 63 |     "CODUNIDGESTORA": "budget_unit",
 64 |     "DTPUBLICACAO": "published_at",
 65 |     "DTREGISTRO": "registered_at",
 66 |     "TIPOREC": "revenue_type",
 67 |     "MODALIDADE": "modality",
 68 |     "DSRECEITA": "description",
 69 |     "VALOR": "value",
 70 |     "FONTE": "resource",
 71 |     "DSNATUREZA": "legal_status",  # TODO natureza do TCM-BA
 72 |     "DESTINACAO": "destination",
 73 |     "EXCLUIDO": "excluded",
 74 | }
 75 | 
 76 | 
 77 | CITYCOUNCIL_REVENUE_FUNCTIONS = {
 78 |     "excluded": to_boolean,
 79 |     "published_at": from_str_to_date,
 80 |     "registered_at": from_str_to_date,
 81 |     "value": currency_to_float,
 82 |     "modality": lower,
 83 |     "revenue_type": city_council_revenue_type_mapping,
 84 |     "resource": lower,
 85 |     "legal_status": lower,
 86 |     "destination": lower,
 87 | }
 88 | 
 89 | 
 90 | CITYCOUNCIL_EXPENSE_FIELDS_MAPPING = {
 91 |     "CODARQUIVO": "external_file_code",
 92 |     "CODLINHA": "external_file_line",
 93 |     "CODUNIDORCAM": "budget_unit",
 94 |     "DTPUBLICACAO": "published_at",
 95 |     "DTREGISTRO": "date",
 96 |     "CODETAPA": "phase",
 97 |     "NUMPROCADM": "number",
 98 |     "NUMPROCLIC": "process_number",
 99 |     "DSDESPESA": "summary",
100 |     "NMCREDOR": "company_or_person",
101 |     "NUCPFCNPJ": "document",
102 |     "VALOR": "value",
103 |     "DSFUNCAO": "function",
104 |     "DSSUBFUNCAO": "subfunction",
105 |     "DSNATUREZA": "legal_status",  # TODO natureza do TCM-BA
106 |     "DSFONTEREC": "resource",
107 |     "NUMETAPA": "phase_code",
108 |     "MODALIDADE": "modality",
109 |     "EXCLUIDO": "excluded",
110 | }
111 | 
112 | 
113 | CITYCOUNCIL_EXPENSE_FUNCTIONS = {
114 |     "value": currency_to_float,
115 |     "excluded": to_boolean,
116 |     "published_at": from_str_to_date,
117 |     "date": from_str_to_date,
118 |     "phase": get_phase,
119 |     "modality": lower_without_spaces,
120 | }
121 | 
122 | 
123 | def map_to_fields(item, fields_mapping, functions):
124 |     new_item = {}
125 |     for key, value in item.items():
126 |         field = fields_mapping[key.upper()]
127 |         if field:
128 |             value = value.strip()
129 |             new_item[field] = functions.get(field, lambda x: x)(value)
130 |     return new_item
131 | 
132 | 
133 | def to_citycouncil_expense(item):
134 |     return map_to_fields(
135 |         item, CITYCOUNCIL_EXPENSE_FIELDS_MAPPING, CITYCOUNCIL_EXPENSE_FUNCTIONS
136 |     )
137 | 
138 | 
139 | def to_citycouncil_contract(item):
140 |     return map_to_fields(
141 |         item, CITYCOUNCIL_CONTRACT_FIELDS_MAPPING, CITYCOUNCIL_CONTRACT_FUNCTIONS
142 |     )
143 | 
144 | 
145 | def to_citycouncil_bid(item):
146 |     return map_to_fields(
147 |         item, CITYCOUNCIL_BID_FIELDS_MAPPING, CITYCOUNCIL_BID_FUNCTIONS
148 |     )
149 | 
150 | 
151 | def to_citycouncil_revenue(item):
152 |     return map_to_fields(
153 |         item, CITYCOUNCIL_REVENUE_FIELDS_MAPPING, CITYCOUNCIL_REVENUE_FUNCTIONS
154 |     )
155 | 
156 | 
157 | def to_citycouncil_contract_file(item):
158 |     try:
159 |         contract = CityCouncilContract.objects.get(external_code=item["CODCON"])
160 |     except CityCouncilContract.DoesNotExist:
161 |         logger.error(f"Contrato não encontrado: {item}")
162 |         return
163 | 
164 |     content_type = get_content_type_for_model(contract)
165 |     return {
166 |         "url": item["CAMINHO"],
167 |         "content_type": content_type,
168 |         "object_id": contract.pk,
169 |         "external_code": item["CODARQCON"],
170 |     }
171 | 
172 | 
173 | def to_citycouncil_bid_file(item):
174 |     try:
175 |         bid = CityCouncilBid.objects.get(external_code=item["CODLIC"])
176 |     except CityCouncilBid.DoesNotExist:
177 |         logger.error(f"Licitação não encontrada: {item}")
178 |         return
179 | 
180 |     content_type = get_content_type_for_model(bid)
181 |     return {
182 |         "url": item["CAMINHOARQLIC"],
183 |         "content_type": content_type,
184 |         "object_id": bid.pk,
185 |         "external_code": item["CODARQLIC"],
186 |     }
187 | 


--------------------------------------------------------------------------------
/web/datasets/tests/management/commands/test_citycouncil.py:
--------------------------------------------------------------------------------
  1 | from datetime import date, datetime
  2 | 
  3 | import pytest
  4 | from django.utils.timezone import make_aware
  5 | 
  6 | from web.datasets.management.commands._citycouncil import (
  7 |     save_agenda,
  8 |     save_attendance_list,
  9 |     save_minute,
 10 | )
 11 | 
 12 | 
 13 | @pytest.mark.django_db
 14 | class TestSaveAgenda:
 15 |     def test_save_gazette(self):
 16 |         item = {
 17 |             "crawled_at": make_aware(datetime(2020, 3, 21, 7, 15, 17, 908831)),
 18 |             "crawled_from": "https://www.feiradesantana.ba.leg.br/agenda",
 19 |             "date": date(2019, 8, 29),
 20 |             "details": "- Especial , dia 29 (quinta-feira), às 09 horas,"
 21 |             " para apresentar a sociedade\r\n"
 22 |             "civil e aos órgãos competentes e afins, os resultados dos "
 23 |             "trabalhos\r\n"
 24 |             "desenvolvidos pela Fundação Municipal de Tecnologia da "
 25 |             "informação,\r\n"
 26 |             "Telecomunicações e Cultura Egberto Tavares Costa- FUNTITEC, "
 27 |             "atendendo ao\r\n"
 28 |             "Requerimento nº 142/2019.",
 29 |             "event_type": "sessao_especial",
 30 |             "title": "SESSÃO ESPECIAL 29 DE AGOSTO",
 31 |         }
 32 | 
 33 |         agenda = save_agenda(item)
 34 |         assert agenda.date == item["date"]
 35 |         assert agenda.details == item["details"]
 36 |         assert agenda.event_type == item["event_type"]
 37 |         assert agenda.title == item["title"]
 38 |         assert agenda.crawled_at == item["crawled_at"]
 39 |         assert agenda.crawled_from == item["crawled_from"]
 40 | 
 41 |     def test_handle_with_changed_agenda(self):
 42 |         item = {
 43 |             "crawled_at": make_aware(datetime(2020, 3, 21, 7, 15, 17, 908831)),
 44 |             "crawled_from": "https://www.feiradesantana.ba.leg.br/agenda",
 45 |             "date": date(2019, 8, 29),
 46 |             "details": "- Especial , dia 29 (quinta-feira), às 09 horas,"
 47 |             " para apresentar a sociedade\r\n"
 48 |             "civil e aos órgãos competentes e afins, os resultados dos "
 49 |             "trabalhos\r\n"
 50 |             "desenvolvidos pela Fundação Municipal de Tecnologia da "
 51 |             "informação,\r\n"
 52 |             "Telecomunicações e Cultura Egberto Tavares Costa- FUNTITEC, "
 53 |             "atendendo ao\r\n"
 54 |             "Requerimento nº 142/2019.",
 55 |             "event_type": "sessao_especial",
 56 |             "title": "SESSÃO ESPECIAL 29 DE AGOSTO",
 57 |         }
 58 | 
 59 |         agenda = save_agenda(item)
 60 |         item["details"] = "Festa na cidade bla bla bla"
 61 |         item["crawled_at"] = make_aware(datetime(2020, 3, 22, 7, 15, 17, 908831))
 62 | 
 63 |         updated_agenda = save_agenda(item)
 64 | 
 65 |         assert agenda.pk == updated_agenda.pk
 66 |         assert agenda.details != updated_agenda.details
 67 |         assert agenda.crawled_at != updated_agenda.crawled_at
 68 | 
 69 | 
 70 | @pytest.mark.django_db
 71 | class TestSaveAttendanceList:
 72 |     def test_save_attendance_list(self):
 73 |         item = {
 74 |             "date": date(2020, 2, 3),
 75 |             "council_member": "Roberto Luis da Silva Tourinho",
 76 |             "status": "presente",
 77 |             "crawled_at": make_aware(datetime(2020, 3, 21, 7, 15, 17, 276019)),
 78 |             "crawled_from": "https://www.feiradesantana.ba.leg.br/lista/7/03-02-2020",
 79 |         }
 80 | 
 81 |         attendance = save_attendance_list(item)
 82 |         assert attendance.date == item["date"]
 83 |         assert attendance.council_member == item["council_member"]
 84 |         assert attendance.status == item["status"]
 85 |         assert attendance.crawled_at == item["crawled_at"]
 86 |         assert attendance.crawled_from == item["crawled_from"]
 87 | 
 88 |     def test_handle_with_changed_attendance_list(self):
 89 |         item = {
 90 |             "date": date(2020, 2, 3),
 91 |             "description": "Abertura da 1ª etapa do 4º período da 18ª legislatura",
 92 |             "council_member": "Roberto Luis da Silva Tourinho",
 93 |             "status": "ausente",
 94 |             "crawled_at": make_aware(datetime(2020, 3, 21, 7, 15, 17, 276019)),
 95 |             "crawled_from": "https://www.feiradesantana.ba.leg.br/lista/7/03-02-2020",
 96 |         }
 97 | 
 98 |         attendance = save_attendance_list(item)
 99 |         item["status"] = "falta_justificada"
100 |         item["crawled_at"] = make_aware(datetime(2020, 3, 22, 7, 15, 17, 908831))
101 | 
102 |         updated_attendance = save_attendance_list(item)
103 | 
104 |         assert attendance.pk == updated_attendance.pk
105 |         assert attendance.council_member == updated_attendance.council_member
106 |         assert attendance.description == updated_attendance.description
107 |         assert attendance.crawled_from == updated_attendance.crawled_from
108 |         assert attendance.status != updated_attendance.status
109 |         assert attendance.crawled_at != updated_attendance.crawled_at
110 | 
111 | 
112 | @pytest.mark.django_db
113 | class TestSaveMinute:
114 |     def test_save_minute(self, mock_backup_file):
115 |         item = {
116 |             "crawled_at": make_aware(datetime(2020, 4, 30, 18, 18, 56, 173788)),
117 |             "crawled_from": "https://www.feiradesantana.ba.leg.br/atas?"
118 |             "mes=9&ano=2018&Acessar=OK",
119 |             "date": date(2018, 9, 11),
120 |             "event_type": None,
121 |             "files": [
122 |                 {
123 |                     "url": "https://www.feiradesantana.ba.leg.br/5eaabb5e91088.pd",
124 |                     "checksum": "checksum",
125 |                     "content": None,
126 |                 }
127 |             ],
128 |             "title": "Ata da 4ª Reunião para Instalação da Comissão Especial",
129 |         }
130 | 
131 |         minute = save_minute(item)
132 |         assert minute.date == item["date"]
133 |         assert minute.title == item["title"]
134 |         assert minute.event_type == item["event_type"]
135 |         assert minute.crawled_from == item["crawled_from"]
136 | 


--------------------------------------------------------------------------------
/scraper/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import date, datetime
  2 | 
  3 | import pytest
  4 | 
  5 | from ..spiders.utils import (
  6 |     extract_date,
  7 |     extract_param,
  8 |     get_git_commit,
  9 |     identify_contract_id,
 10 |     is_url,
 11 |     months_and_years,
 12 |     replace_query_param,
 13 |     strip_accents,
 14 | )
 15 | 
 16 | 
 17 | @pytest.mark.parametrize(
 18 |     "old_url,field,value,new_url",
 19 |     [
 20 |         (
 21 |             "http://www.diariooficial.feiradesantana.ba.gov.br/"
 22 |             "abrir.asp?edi=590&p=1",
 23 |             "p",
 24 |             999,
 25 |             "http://www.diariooficial.feiradesantana.ba.gov.br/"
 26 |             "abrir.asp?edi=590&p=999",
 27 |         ),
 28 |         (
 29 |             "http://www.diariooficial.feiradesantana.ba.gov.br/"
 30 |             "detalhes.asp?acao=&p=1116&menu=&idsec=1&tipo=&publicacao"
 31 |             "=1&st=&rad=&txtlei=''&dtlei=''&dtlei1=''"
 32 |             "&edicao=&hom=&ini=&fim=&meshom=#links>",
 33 |             "publicacao",
 34 |             "88",
 35 |             "http://www.diariooficial.feiradesantana.ba.gov.br/"
 36 |             "detalhes.asp?acao=&p=1116&menu=&idsec=1&tipo="
 37 |             "&publicacao=88&st=&rad=&txtlei=''&dtlei=''&dtlei1=''"
 38 |             "&edicao=&hom=&ini=&fim=&meshom=#links>",
 39 |         ),
 40 |         (
 41 |             "detalhes.asp?acao=&p=991&menu=&idsec=1&tipo=&publicacao=1&st=&rad="
 42 |             "&txtlei=''&dtlei=''&dtlei1=''&edicao=&hom=&ini=&fim=&meshom=#links",
 43 |             "p",
 44 |             "",
 45 |             "detalhes.asp?acao=&p=&menu=&idsec=1&tipo=&publicacao=1&st=&rad="
 46 |             "&txtlei=''&dtlei=''&dtlei1=''&edicao=&hom=&ini=&fim=&meshom=#links",
 47 |         ),
 48 |     ],
 49 | )
 50 | def test_replace_query_parameter_from_a_url(old_url, field, value, new_url):
 51 |     assert replace_query_param(old_url, field, value) == new_url
 52 | 
 53 | 
 54 | @pytest.mark.parametrize(
 55 |     "text, expected_contract_id",
 56 |     [
 57 |         (" CONTRATO N�� 295-2017-10C ", "295-2017-10C"),
 58 |         ("CONTRATO N° 11-2017-10C", "11-2017-10C"),
 59 |         ("4/2016/09C", "4/2016/09C"),
 60 |         ("860/2015/05C", "860/2015/05C"),
 61 |         ("3-2017-1926C", "3-2017-1926C"),
 62 |         ("CONTRATO N�� 23820161111 ", "23820161111"),
 63 |         ("CONTRATO N° 05820171111 ", "05820171111"),
 64 |         ("CONTRATO N° 010521004-2017", "010521004-2017"),
 65 |     ],
 66 | )
 67 | def test_identify_contract_ids(text, expected_contract_id):
 68 |     assert identify_contract_id(text) == expected_contract_id
 69 | 
 70 | 
 71 | @pytest.mark.parametrize(
 72 |     "url, param, value",
 73 |     [
 74 |         (
 75 |             "http://www.feiradesantana.ba.gov.br/seadm/servicos.asp?"
 76 |             "id=2&s=a&link=seadm/licitacoes_pm.asp&cat=PMFS&dt=01-2019#links",
 77 |             "dt",
 78 |             "01-2019",
 79 |         ),
 80 |         ("http://www.ba.gov.br/servicos.asp?dt=01-2019#links", "dt", "01-2019"),
 81 |         ("http://www.ba.gov.br/servicos.asp?dt=01-2019#links", "invalid", None),
 82 |     ],
 83 | )
 84 | def test_extract_param(url, param, value):
 85 |     assert extract_param(url, param) == value
 86 | 
 87 | 
 88 | @pytest.mark.parametrize(
 89 |     "start_date,end_date,expected_month_and_year",
 90 |     [
 91 |         (datetime(2020, 1, 10), datetime(2020, 3, 1), [(2, 2020), (3, 2020)]),
 92 |         (
 93 |             datetime(2019, 10, 1),
 94 |             datetime(2020, 3, 1),
 95 |             [(11, 2019), (12, 2019), (1, 2020), (2, 2020), (3, 2020)],
 96 |         ),
 97 |         (datetime(2020, 2, 10), datetime(2020, 3, 1), [(3, 2020)]),
 98 |         (datetime(2020, 6, 1), datetime(2020, 3, 1), []),
 99 |         (
100 |             datetime(2008, 10, 11),
101 |             datetime(2012, 3, 29),
102 |             [(11, 2008), (12, 2008)]
103 |             + [(m, y) for y in range(2009, 2012) for m in range(1, 13)]
104 |             + [(1, 2012), (2, 2012), (3, 2012)],
105 |         ),
106 |         (datetime(2020, 4, 14), datetime(2020, 4, 23), [(4, 2020)]),
107 |     ],
108 | )
109 | def test_months_and_years(start_date, end_date, expected_month_and_year):
110 |     assert months_and_years(start_date, end_date) == expected_month_and_year
111 | 
112 | 
113 | @pytest.mark.parametrize(
114 |     "str_with_date,expected_obj",
115 |     [
116 |         ("26/02/2020", date(2020, 2, 26)),
117 |         ("26/02/2020 19:28", date(2020, 2, 26)),
118 |         ("26/02/20", date(2020, 2, 26)),
119 |         ("26.02.20", None),
120 |         ("Random", None),
121 |     ],
122 | )
123 | def test_extract_date(str_with_date, expected_obj):
124 |     assert extract_date(str_with_date) == expected_obj
125 | 
126 | 
127 | @pytest.mark.parametrize(
128 |     "original_value,expected_value",
129 |     [
130 |         ("tomada", "tomada"),
131 |         ("pregão presencial", "pregao presencial"),
132 |         ("pregão eletrônico", "pregao eletronico"),
133 |         ("concorrência", "concorrencia"),
134 |         ("çãôéà", "caoea"),
135 |         (None, None),
136 |     ],
137 | )
138 | def test_strip_accents(original_value, expected_value):
139 |     assert strip_accents(original_value) == expected_value
140 | 
141 | 
142 | @pytest.mark.parametrize(
143 |     "original_value,expected_value",
144 |     [
145 |         ("google.com", True),
146 |         ("www.google", True),
147 |         ("feiraeh.top", True),
148 |         ("http://feiradesantana.com.br", True),
149 |         ("https://feiradesantana.com.br", True),
150 |         ("https://feiradesantana.com.br", True),
151 |         ("http://www.feiradesantana.com.br", True),
152 |         ("https://www.feiradesantana.com.br", True),
153 |         ("https://monitor.dadosabertosdefeira.com.br", True),
154 |         ("http://www.feiradesantana.ba.gov.br/Word - Port20130001.pdf", True),
155 |         ("tel:42384248", False),
156 |         ("bobagem", False),
157 |         ("#", False),
158 |         (None, False),
159 |     ],
160 | )
161 | def test_is_url(original_value, expected_value):
162 |     assert is_url(original_value) is expected_value
163 | 
164 | 
165 | def test_get_git_commit(monkeypatch):
166 |     expected_git_commit = "43fb0339d3758204cef63d3bc3ffadfda9b8dd3b"
167 |     monkeypatch.setenv("GIT_REV", expected_git_commit)
168 | 
169 |     git_commit = get_git_commit()
170 | 
171 |     assert len(git_commit) == 40
172 |     assert git_commit == expected_git_commit
173 | 
174 | 
175 | def test_get_git_commit_when_git_rev_is_none(monkeypatch):
176 |     monkeypatch.setenv("GIT_REV", None)
177 | 
178 |     assert get_git_commit() == ""
179 | 


--------------------------------------------------------------------------------
/web/api/tests/test_serializers.py:
--------------------------------------------------------------------------------
  1 | from datetime import date, datetime
  2 | 
  3 | import pytest
  4 | from dateutil.parser import parse
  5 | from model_bakery import baker
  6 | 
  7 | from web.api.serializers import (
  8 |     CityCouncilAgendaSerializer,
  9 |     CityCouncilAttendanceListSerializer,
 10 |     CityCouncilMinuteSerializer,
 11 |     CityHallBidEventSerializer,
 12 |     CityHallBidSerializer,
 13 |     FileSerializer,
 14 | )
 15 | 
 16 | pytestmark = pytest.mark.django_db
 17 | 
 18 | 
 19 | class TestCityCouncilAgendaSerializer:
 20 |     def test_city_council_agenda_serializer(self):
 21 |         data = {
 22 |             "date": "2020-03-18",
 23 |             "details": "PROJETOS DE LEI ORDINÁRIA EM 2ª DISCUSSÃO 017/20",
 24 |             "event_type": "sessao_ordinaria",
 25 |             "title": "ORDEM DO DIA - 18 DE MARÇO DE 2020",
 26 |             "crawled_at": "2020-01-01T04:16:13-04:00",
 27 |             "crawled_from": "http://www.pudim.com.br/",
 28 |         }
 29 |         serializer = CityCouncilAgendaSerializer(data=data)
 30 | 
 31 |         assert serializer.is_valid()
 32 |         assert (
 33 |             serializer.validated_data["date"]
 34 |             == parse(data["date"], dayfirst=True).date()
 35 |         )
 36 |         assert serializer.validated_data["details"] == data["details"]
 37 |         assert serializer.validated_data["event_type"] == data["event_type"]
 38 |         assert serializer.validated_data["title"] == data["title"]
 39 |         assert serializer.validated_data["crawled_at"] == datetime.fromisoformat(
 40 |             data["crawled_at"]
 41 |         )
 42 |         assert serializer.validated_data["crawled_from"] == data["crawled_from"]
 43 | 
 44 | 
 45 | class TestCityCouncilAttendanceList:
 46 |     def test_city_council_attendance_list(self):
 47 |         data = {
 48 |             "date": date(2020, 12, 14),
 49 |             "description": None,
 50 |             "council_member": "Zé Curuca",
 51 |             "status": "ausente",
 52 |             "crawled_at": "2020-01-01T04:16:13-03:00",
 53 |             "crawled_from": (
 54 |                 "https://www.feiradesantana.ba.leg.br/"
 55 |                 "lista-presenca-vereadores/107/14-12-2020"
 56 |             ),
 57 |             "notes": "-",
 58 |         }
 59 | 
 60 |         serializer = CityCouncilAttendanceListSerializer(data=data)
 61 |         assert serializer.is_valid()
 62 |         assert serializer.validated_data["date"] == data["date"]
 63 |         assert serializer.validated_data["description"] == data["description"]
 64 |         assert serializer.validated_data["council_member"] == data["council_member"]
 65 |         assert serializer.validated_data["status"] == data["status"]
 66 |         assert serializer.validated_data["crawled_at"] == datetime.fromisoformat(
 67 |             data["crawled_at"]
 68 |         )
 69 |         assert serializer.validated_data["crawled_from"] == data["crawled_from"]
 70 |         assert serializer.validated_data["notes"] == data["notes"]
 71 | 
 72 | 
 73 | class TestCityCouncilMinuteSerializer:
 74 |     def test_city_council_minute_serializer(self):
 75 |         data = {
 76 |             "date": "2020-03-18",
 77 |             "event_type": "sessao_ordinaria",
 78 |             "title": "ORDEM DO DIA - 18 DE MARÇO DE 2020",
 79 |             "crawled_at": "2020-01-01T04:16:13-04:00",
 80 |             "crawled_from": "http://www.pudim.com.br/",
 81 |             "files": [
 82 |                 {
 83 |                     "url": "https://www.feiradesantana.ba.leg.br/5eaabb5e91088.pd",
 84 |                     "checksum": "checksum",
 85 |                     "content": None,
 86 |                 },
 87 |             ],
 88 |         }
 89 |         serializer = CityCouncilMinuteSerializer(data=data)
 90 | 
 91 |         assert serializer.is_valid()
 92 |         assert (
 93 |             serializer.validated_data["date"]
 94 |             == parse(data["date"], dayfirst=True).date()
 95 |         )
 96 |         assert serializer.validated_data["event_type"] == data["event_type"]
 97 |         assert serializer.validated_data["title"] == data["title"]
 98 | 
 99 | 
100 | class TestCityHallBidEventSerializer:
101 |     def test_city_hall_bid_event_serializer(self):
102 |         bid = baker.make_recipe("datasets.CityHallBid")
103 | 
104 |         data = {
105 |             "published_at": "2020-07-21T11:49:00-03:00",
106 |             "summary": "Julgamento do recurso administrativo",
107 |             "bid": bid.pk,
108 |             "crawled_at": datetime.now(),
109 |             "crawled_from": "https://www.example.com",
110 |         }
111 | 
112 |         serializer = CityHallBidEventSerializer(data=data)
113 |         assert serializer.is_valid()
114 | 
115 |         assert serializer.validated_data["published_at"] == datetime.fromisoformat(
116 |             data["published_at"]
117 |         )
118 |         assert serializer.validated_data["summary"] == data["summary"]
119 |         assert serializer.validated_data["bid"] == bid
120 | 
121 | 
122 | class TestFileSerializer:
123 |     def test_file_serializer(self):
124 |         data = {"url": "https://www.example.com/file.pdf"}
125 | 
126 |         serializer = FileSerializer(data=data)
127 |         assert serializer.is_valid()
128 |         assert serializer.validated_data["url"] == data["url"]
129 | 
130 | 
131 | class TestCityHallBidSerializer:
132 |     def test_city_hall_bid_serializer(self):
133 |         data = {
134 |             "session_at": "2021-01-06T08:30:00-03:00",
135 |             "public_agency": "PMFS",
136 |             "description": "Contratação de empresa de engenharia",
137 |             "modality": "convite",
138 |             "codes": "LICITAÇÃO Nº 150-2020 TOMADA DE PREÇO Nº 038-2020",
139 |             "crawled_at": "2020-01-01T04:16:13-04:00",
140 |             "crawled_from": "http://www.pudim.com.br/",
141 |             "events": [
142 |                 {
143 |                     "id": 243,
144 |                     "created_at": "2021-01-01T20:00:32.209476-03:00",
145 |                     "updated_at": "2021-01-01T20:00:32.209508-03:00",
146 |                     "crawled_at": "2021-01-01T20:00:32.185236-03:00",
147 |                     "crawled_from": "http://www.dadosdafeira.br/teste",
148 |                     "notes": "",
149 |                     "published_at": "2020-07-21T11:49:00-03:00",
150 |                     "summary": "Julgamento do recurso administrativo",
151 |                     "bid": 315,
152 |                 },
153 |             ],
154 |             "files": [{"url": "http://www.dadosdafeira.br/licitacoes/testes.pdf"}],
155 |         }
156 | 
157 |         serializer = CityHallBidSerializer(data=data)
158 |         assert serializer.is_valid()
159 | 
160 |         assert serializer.validated_data["session_at"] == datetime.fromisoformat(
161 |             data["session_at"]
162 |         )
163 |         assert serializer.validated_data["public_agency"] == data["public_agency"]
164 |         assert serializer.validated_data["description"] == data["description"]
165 |         assert serializer.validated_data["modality"] == data["modality"]
166 |         assert serializer.validated_data["codes"] == data["codes"]
167 |         assert serializer.validated_data["crawled_at"] == datetime.fromisoformat(
168 |             data["crawled_at"]
169 |         )
170 |         assert serializer.validated_data["crawled_from"] == data["crawled_from"]
171 | 


--------------------------------------------------------------------------------