├── apps ├── __init__.py └── civic_pulse │ ├── __init__.py │ ├── api │ ├── __init__.py │ ├── viewsets.py │ └── serializers.py │ ├── tests │ ├── __init__.py │ ├── test_models.py │ ├── test_views.py │ └── test_api.py │ ├── utils │ ├── __init__.py │ └── load_models.py │ ├── migrations │ ├── __init__.py │ ├── 0010_entry_notes.py │ ├── 0006_entry_hsts_enabled.py │ ├── 0007_increase_length_add_notes.py │ ├── 0008_agency_aliases.py │ ├── 0002_increase_name_length.py │ ├── 0004_bunch_of_boolean_flags.py │ ├── 0009_auto_20200115_0210.py │ ├── 0003_extra_fields_on_agency.py │ ├── 0005_add_agency_fields.py │ └── 0001_initial.py │ ├── static │ ├── favicon.ico │ ├── images │ │ ├── BlueLogo.png │ │ ├── cp_icon.png │ │ ├── cp_icon.pxm │ │ ├── WhiteLogo.png │ │ ├── map_boston.png │ │ ├── civicSquare.png │ │ └── boston_screenshot.png │ └── styles.css │ ├── apps.py │ ├── admin.py │ ├── templates │ ├── check_box.html │ ├── base.html │ ├── home.html │ ├── agency-list.html │ └── agency-detail.html │ ├── management │ └── commands │ │ └── create_scraper_user.py │ ├── views.py │ └── models.py ├── scrapers ├── __init__.py ├── scrapers │ ├── __init__.py │ ├── base_scraper.py │ ├── security_scraper.py │ ├── base_api_client.py │ ├── accessibility_scraper.py │ └── social_scraper.py ├── agency_api_service.py ├── scrape_handler.py ├── lighthouse.py ├── settings.py ├── urls.json ├── agency_dataaccessor.py ├── process_agency_info.py └── README.rst ├── config ├── settings │ ├── __init__.py │ └── settings.py ├── .DS_Store ├── wsgi.py └── urls.py ├── dev_requirements.txt ├── .DS_Store ├── README_images └── scorecard.png ├── scheduler ├── requirements.txt ├── src │ ├── server.py │ ├── biz │ │ ├── job_config.json │ │ ├── agency_api_service.py │ │ ├── scrape_data.py │ │ └── scheduler.py │ ├── controllers │ │ ├── registrator.py │ │ └── health.py │ └── app.py ├── Dockerfile └── scheduler-readme.md ├── .pre-commit-config.yaml ├── requirements.txt ├── deploy ├── container_start.sh ├── nginx │ └── nginx.conf ├── docker-compose.yaml └── Dockerrun.aws.json ├── .ebextensions └── django.config ├── .editorconfig ├── .github └── workflows │ ├── deploy.yml │ └── lint_and_test.yml ├── Dockerfile ├── manage.py ├── LICENSE ├── CivicPulse.drawio ├── .gitignore └── README.md /apps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scrapers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/civic_pulse/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/settings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/civic_pulse/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scrapers/scrapers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/civic_pulse/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/civic_pulse/tests/test_models.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/civic_pulse/tests/test_views.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/civic_pulse/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | coloredlogs==10.0 4 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/.DS_Store -------------------------------------------------------------------------------- /config/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/config/.DS_Store -------------------------------------------------------------------------------- /README_images/scorecard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/README_images/scorecard.png -------------------------------------------------------------------------------- /scheduler/requirements.txt: -------------------------------------------------------------------------------- 1 | APScheduler==3.6.1 2 | requests==2.21.0 3 | aiohttp==2.3.9 4 | structlog==16.1.0 5 | boto3==1.4.4 -------------------------------------------------------------------------------- /apps/civic_pulse/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/apps/civic_pulse/static/favicon.ico -------------------------------------------------------------------------------- /apps/civic_pulse/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class CivicPulseConfig(AppConfig): 5 | name = "civic_pulse" 6 | -------------------------------------------------------------------------------- /apps/civic_pulse/static/images/BlueLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/apps/civic_pulse/static/images/BlueLogo.png -------------------------------------------------------------------------------- /apps/civic_pulse/static/images/cp_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/apps/civic_pulse/static/images/cp_icon.png -------------------------------------------------------------------------------- /apps/civic_pulse/static/images/cp_icon.pxm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/apps/civic_pulse/static/images/cp_icon.pxm -------------------------------------------------------------------------------- /scheduler/src/server.py: -------------------------------------------------------------------------------- 1 | from app import Application 2 | 3 | app = Application({}) 4 | 5 | if __name__ == "__main__": 6 | app.run() 7 | -------------------------------------------------------------------------------- /apps/civic_pulse/static/images/WhiteLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/apps/civic_pulse/static/images/WhiteLogo.png -------------------------------------------------------------------------------- /apps/civic_pulse/static/images/map_boston.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/apps/civic_pulse/static/images/map_boston.png -------------------------------------------------------------------------------- /apps/civic_pulse/static/images/civicSquare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/apps/civic_pulse/static/images/civicSquare.png -------------------------------------------------------------------------------- /apps/civic_pulse/static/images/boston_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeforboston/GovLens/HEAD/apps/civic_pulse/static/images/boston_screenshot.png -------------------------------------------------------------------------------- /apps/civic_pulse/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | from .models import Agency 4 | 5 | # Register your models here. 6 | admin.site.register(Agency) 7 | -------------------------------------------------------------------------------- /apps/civic_pulse/templates/check_box.html: -------------------------------------------------------------------------------- 1 | {% if has_feature %} 2 | 3 | {% else %} 4 | 5 | {% endif %} -------------------------------------------------------------------------------- /scheduler/src/biz/job_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "job_trigger_settings": { 3 | "day_of_job": "sun", 4 | "hour": "11", 5 | "minute": "25", 6 | "second": "32" 7 | }, 8 | "interval_between_runs_seconds": 20, 9 | "agency_batch_size": 2 10 | } -------------------------------------------------------------------------------- /scheduler/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-alpine 2 | COPY . /app 3 | WORKDIR /app 4 | RUN date 5 | RUN apk add tzdata 6 | RUN apk add build-base 7 | RUN cp /usr/share/zoneinfo/America/New_York /etc/localtime 8 | RUN echo "America/New_York" > /etc/timezone 9 | RUN date 10 | RUN pip install -r requirements.txt 11 | CMD [ "python", "-u", "src/server.py" ] -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: stable 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | - repo: https://gitlab.com/pycqa/flake8 8 | rev: 3.7-maintenance 9 | hooks: 10 | - id: flake8 11 | args: 12 | - '--ignore' 13 | - 'E501,W503,E203' 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.7.1 2 | certifi==2019.3.9 3 | chardet==3.0.4 4 | Django==2.2.10 5 | djangorestframework==3.9.3 6 | gunicorn==19.9.0 7 | idna==2.8 8 | Pillow==6.2.0 9 | psycopg2==2.7.5 10 | pytz==2018.9 11 | requests==2.21.0 12 | simplejson==3.16.0 13 | sqlparse==0.3.0 14 | urllib3==1.24.2 15 | apscheduler==3.6.0 16 | python-dotenv==0.11.0 17 | -------------------------------------------------------------------------------- /deploy/container_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cd /usr/src/app/civicpulse 4 | python /usr/src/app/civicpulse/manage.py migrate # apply database migrations 5 | python /usr/src/app/civicpulse/manage.py collectstatic --clear --noinput # clearstatic files 6 | python /usr/src/app/civicpulse/manage.py collectstatic --noinput # collect static files 7 | 8 | gunicorn --reload config.wsgi:application -b 0.0.0.0:8000 9 | -------------------------------------------------------------------------------- /deploy/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | # define group app 2 | upstream app { 3 | # balancing by ip 4 | ip_hash; 5 | 6 | # define server app 7 | server app:8000; 8 | } 9 | 10 | # portal 11 | server { 12 | # all requests proxies to app 13 | location / { 14 | proxy_pass http://app/; 15 | } 16 | 17 | # only respond to port 80 18 | listen 80; 19 | 20 | # domain localhost 21 | server_name localhost; 22 | } 23 | -------------------------------------------------------------------------------- /scheduler/src/controllers/registrator.py: -------------------------------------------------------------------------------- 1 | from .health import HealthController 2 | 3 | 4 | class Registrator: 5 | def __init__(self, app, base_url): 6 | self.controllers = [HealthController()] 7 | self.app = app 8 | self.base_url = base_url 9 | 10 | def register_controllers(self): 11 | for controller in self.controllers: 12 | controller.register_routes(self.app, self.base_url) 13 | -------------------------------------------------------------------------------- /.ebextensions/django.config: -------------------------------------------------------------------------------- 1 | container_commands: 2 | 01_migrate: 3 | command: "python manage.py migrate" 4 | leader_only: true 5 | option_settings: 6 | aws:elasticbeanstalk:application:environment: 7 | DJANGO_SETTINGS_MODULE: config.settings.settings 8 | aws:elasticbeanstalk:container:python: 9 | WSGIPath: config/wsgi.py 10 | 11 | 12 | packages: 13 | yum: 14 | git: [] 15 | postgresql93-devel: [] 16 | libjpeg-turbo-devel: [] -------------------------------------------------------------------------------- /scheduler/src/controllers/health.py: -------------------------------------------------------------------------------- 1 | from aiohttp import web 2 | 3 | 4 | class HealthController: 5 | def __init__(self): 6 | self.resource = "/health" 7 | 8 | def register_routes(self, app: web.Application, base_url: str): 9 | app.router.add_get(base_url + self.resource, self.__health_) 10 | 11 | async def __health_(self, request: web.Request): 12 | print("received health request") 13 | return web.Response(text="OK") 14 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0010_entry_notes.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1.11 on 2020-01-22 01:07 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("civic_pulse", "0009_auto_20200115_0210"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="entry", name="notes", field=models.TextField(blank=True), 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /config/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for civic_pulse_app project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /apps/civic_pulse/api/viewsets.py: -------------------------------------------------------------------------------- 1 | from rest_framework import viewsets 2 | from apps.civic_pulse.api.serializers import AgencySerializer, EntrySerializer 3 | from apps.civic_pulse.models import Agency, Entry 4 | 5 | 6 | class AgencyViewSet(viewsets.ModelViewSet): 7 | queryset = Agency.objects.all() 8 | serializer_class = AgencySerializer 9 | 10 | 11 | class EntryViewSet(viewsets.ModelViewSet): 12 | queryset = Entry.objects.all() 13 | serializer_class = EntrySerializer 14 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent coding styles 2 | # between different editors and IDEs 3 | # editorconfig.org 4 | 5 | 6 | # Topmost EditorConfig file 7 | root = true 8 | 9 | # Overall defaults 10 | [*] 11 | 12 | # Tab style 13 | indent_style = space 14 | indent_size = 2 15 | 16 | # File format and handling 17 | charset = utf-8 18 | end_of_line = lf 19 | insert_final_newline = true 20 | trim_trailing_whitespace = true 21 | 22 | # HTML 23 | [*.html] 24 | indent_size = 4 25 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0006_entry_hsts_enabled.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1 on 2019-07-16 23:23 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("civic_pulse", "0005_add_agency_fields"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="entry", 15 | name="hsts_enabled", 16 | field=models.BooleanField(default=False), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0007_increase_length_add_notes.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1 on 2019-12-05 05:37 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("civic_pulse", "0006_entry_hsts_enabled"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="agency", 15 | name="notes", 16 | field=models.CharField(blank=True, max_length=500), 17 | ) 18 | ] 19 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0008_agency_aliases.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1.11 on 2020-01-08 02:36 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("civic_pulse", "0007_increase_length_add_notes"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="agency", 15 | name="aliases", 16 | field=models.TextField(blank=True, max_length=500), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0002_increase_name_length.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.1 on 2019-04-06 20:10 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("civic_pulse", "0001_initial"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="agency", name="name", field=models.CharField(max_length=500), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /scrapers/scrapers/base_scraper.py: -------------------------------------------------------------------------------- 1 | """Creating a class BaseScraper as a blueprint with methods: 2 | get_random_value() = creates an empty object with zero values as a starting point 3 | get_criteria_object()- return an object with set criteria for each object(as dict) 4 | """ 5 | 6 | 7 | class BaseScraper: 8 | def __init__(self, raw_page_content, url): 9 | self.raw_page_content = raw_page_content 10 | self.url = url 11 | 12 | def get_random_value(self, url): 13 | return self.get_criteria_object(None, True) 14 | 15 | def get_criteria_object(self, criteria, is_met): 16 | return {"met_criteria": is_met, "info": criteria} 17 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to Staging 2 | 3 | 4 | on: 5 | push: 6 | tags: 7 | - release* 8 | pull_request: 9 | tags: 10 | - release* 11 | 12 | 13 | 14 | jobs: 15 | build: 16 | 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Configure AWS Credentials 21 | uses: aws-actions/configure-aws-credentials@v1.0.1 22 | with: 23 | # AWS Access Key ID 24 | aws-access-key-id: ${{ secrets.aws_access_key }} 25 | # AWS Secret Access Key 26 | aws-secret-access-key: ${{ secrets.aws_secret_access_key }} 27 | aws-region: us-east-2 28 | mask-aws-account-id: true 29 | - uses: actions/checkout@v1 30 | - name: Run a one-line script 31 | run: echo Hello, world! 32 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-alpine 2 | 3 | ENV PYTHONUNBUFFERED 1 4 | 5 | ARG PROJECT=muckrock 6 | ARG PROJECT_DIR=/usr/src/app 7 | 8 | RUN mkdir -p $PROJECT_DIR 9 | WORKDIR $PROJECT_DIR 10 | 11 | COPY requirements.txt . 12 | RUN apk update && apk add postgresql-dev gcc python3-dev musl-dev zlib-dev jpeg-dev \ 13 | zlib-dev \ 14 | freetype-dev \ 15 | lcms2-dev \ 16 | openjpeg-dev \ 17 | tiff-dev \ 18 | tk-dev \ 19 | tcl-dev \ 20 | harfbuzz-dev \ 21 | fribidi-dev 22 | RUN pip3 install --upgrade setuptools 23 | RUN pip3 install -r requirements.txt 24 | 25 | # Server 26 | EXPOSE 8000 27 | STOPSIGNAL SIGINT 28 | -------------------------------------------------------------------------------- /scrapers/agency_api_service.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import requests 4 | 5 | from . import settings 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class AgencyApiService: 11 | def __init__(self): 12 | self.base_url = settings.GOVLENS_API_ENDPOINT 13 | 14 | def get_all_agencies(self): 15 | try: 16 | all_agency_list = self._get(self.base_url) 17 | return all_agency_list 18 | except Exception as ex: 19 | logger.error(ex, "Error while retrieving all the agency information") 20 | 21 | def _get(self, url): 22 | response = requests.get( 23 | url, 24 | headers={ 25 | "Content-type": "application/json", 26 | "Authorization": "Token {}".format(settings.GOVLENS_API_TOKEN), 27 | }, 28 | ) 29 | response.raise_for_status() 30 | return response.json() 31 | -------------------------------------------------------------------------------- /scrapers/scrape_handler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from .process_agency_info import AgencyInfo 3 | from .agency_api_service import AgencyApiService 4 | 5 | from . import settings 6 | 7 | settings.setup_logging() 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | # method invoked by lambda 13 | def scrape_data(event, context=None): 14 | agencies = event["agencies"] 15 | if event.get("agencies", None) is None or len(agencies) <= 0: 16 | logger.warning("No Agency information was passed to scrape") 17 | return 18 | 19 | for agency in agencies: 20 | agency_instance = AgencyInfo(agency) 21 | agency_instance.process_agency_info() 22 | 23 | 24 | if __name__ == "__main__": 25 | 26 | agency_api_service = AgencyApiService() 27 | agencies = agency_api_service.get_all_agencies() 28 | event = {"agencies": agencies} 29 | scrape_data(event) 30 | logger.info("Finished scraping") 31 | -------------------------------------------------------------------------------- /deploy/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | # File structure version 2 | version: '2' 3 | 4 | services: 5 | # Web server based on official nginx image 6 | # Connect external 8080 (which you can access from browser) 7 | # with internal port 80 (which will be linked to app port 8000 in configs) 8 | # Connect local nginx configuration with image configuration 9 | nginx: 10 | container_name: nginx 11 | image: nginx 12 | hostname: nginx 13 | ports: 14 | - "80:80" 15 | mem_limit: 120m 16 | volumes: 17 | - /var/app/current/civicpulse/deploy/nginx:/etc/nginx/conf.d 18 | depends_on: 19 | - app 20 | 21 | app: 22 | container_name: django_server 23 | image: muckrock/muckrock:1.0 24 | command: ["sh", "/usr/src/app/deploy/container_start1.sh"] 25 | stdin_open: true 26 | tty: true 27 | mem_limit: 120m 28 | volumes: 29 | - /var/app/current/civicpulse:/usr/src/app 30 | expose: 31 | - "8000" 32 | -------------------------------------------------------------------------------- /.github/workflows/lint_and_test.yml: -------------------------------------------------------------------------------- 1 | name: Lint and Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v1 18 | - name: Set up Python 3.7 19 | uses: actions/setup-python@v1 20 | with: 21 | python-version: 3.7 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install -r requirements.txt 26 | pip install -r dev_requirements.txt 27 | - name: Lint with flake8 28 | run: | 29 | # stop the build if there are Python syntax errors or undefined names 30 | flake8 . --count --ignore E501,W503,E203 --show-source --statistics 31 | - name: Lint with Black 32 | run: | 33 | black . 34 | - name: Test with django 35 | run: | 36 | python manage.py test 37 | -------------------------------------------------------------------------------- /scheduler/src/biz/agency_api_service.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | class AgencyApiService: 5 | def __init__(self): 6 | self.base_url = "http://govlens.us-east-2.elasticbeanstalk.com/api/agencies/" 7 | 8 | def get_all_agencies(self): 9 | try: 10 | agency_list = self._get(self.base_url) 11 | return agency_list 12 | except Exception as ex: 13 | print(f"Error while retrieving all the agency information: {str(ex)}") 14 | return [] 15 | 16 | def _get(self): 17 | try: 18 | response = requests.get( 19 | self.base_url, headers={"Content-type": "application/json"} 20 | ) 21 | response_json = response.json() 22 | print(f"received {len(response_json)} agencies from {self.base_url}") 23 | return response_json 24 | except Exception as ex: 25 | print(f"Error while retrieving agency list from {self.base_url}: {str(ex)}") 26 | return [] 27 | -------------------------------------------------------------------------------- /apps/civic_pulse/api/serializers.py: -------------------------------------------------------------------------------- 1 | from rest_framework import serializers 2 | from apps.civic_pulse.models import Agency, Entry 3 | 4 | 5 | class AgencySerializer(serializers.ModelSerializer): 6 | class Meta: 7 | model = Agency 8 | fields = [ 9 | "id", 10 | "name", 11 | "website", 12 | "twitter", 13 | "facebook", 14 | "phone_number", 15 | "address", 16 | "description", 17 | "last_successful_scrape", 18 | "scrape_counter", 19 | "notes", 20 | ] 21 | 22 | 23 | class EntrySerializer(serializers.ModelSerializer): 24 | class Meta: 25 | model = Entry 26 | fields = [ 27 | "id", 28 | "agency", 29 | "https_enabled", 30 | "has_privacy_policy", 31 | "mobile_friendly", 32 | "good_performance", 33 | "has_social_media", 34 | "has_contact_info", 35 | "notes", 36 | ] 37 | -------------------------------------------------------------------------------- /apps/civic_pulse/management/commands/create_scraper_user.py: -------------------------------------------------------------------------------- 1 | """Idempotent management command to create the scraper user with a DRF token 2 | """ 3 | from django.core.management.base import BaseCommand 4 | from django.contrib.auth.models import User 5 | from rest_framework.authtoken.models import Token 6 | 7 | SCRAPER_USERNAME = "scraper" 8 | 9 | 10 | class Command(BaseCommand): 11 | help = "Get or create a scraper user with a Django REST Framework token" 12 | 13 | def add_arguments(self, parser): 14 | pass 15 | 16 | def handle(self, *args, **options): 17 | user, created = User.objects.get_or_create(username=SCRAPER_USERNAME) 18 | user.save() 19 | 20 | if created: 21 | self.stdout.write(f"Created new user with username {SCRAPER_USERNAME}") 22 | else: 23 | self.stdout.write(f"User {SCRAPER_USERNAME} already exists.") 24 | 25 | token, created = Token.objects.get_or_create(user=user) 26 | self.stdout.write(f"The token for the user {SCRAPER_USERNAME} is {token}") 27 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.settings") 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError: 10 | # The above import may fail for some other reason. Ensure that the 11 | # issue is really that Django is missing to avoid masking other 12 | # exceptions on Python 2. 13 | try: 14 | import django 15 | 16 | raise ImportError( 17 | "Django installed with version {} but failed to import core.management".format( 18 | django.VERSION 19 | ) 20 | ) 21 | except ImportError: 22 | raise ImportError( 23 | "Couldn't import Django. Are you sure it's installed and " 24 | "available on your PYTHONPATH environment variable? Did you " 25 | "forget to activate a virtual environment?" 26 | ) 27 | raise 28 | execute_from_command_line(sys.argv) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Code for Boston 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /apps/civic_pulse/views.py: -------------------------------------------------------------------------------- 1 | from django.views import generic 2 | from .models import Agency 3 | 4 | 5 | class AgencyListView(generic.ListView): 6 | template_name = "agency-list.html" 7 | context_object_name = "agencies" 8 | paginate_by = 25 9 | 10 | def get_queryset(self): 11 | return Agency.objects.order_by("created_date") 12 | 13 | 14 | class AgencyView(generic.DetailView): 15 | model = Agency 16 | template_name = "agency-detail.html" 17 | 18 | def get_context_data(self, **kwargs): 19 | context = super(AgencyView, self).get_context_data(**kwargs) 20 | agency = context["object"] 21 | context["last_entry"] = agency.entry_set.last() 22 | return context 23 | 24 | 25 | class HomeView(generic.ListView): 26 | template_name = "home.html" 27 | model = Agency 28 | 29 | # def get_context_data(self, **kwargs): 30 | # context = super(HomeView, self).get_context_data(**kwargs) 31 | # agency = context['object'] 32 | # context['last_entry'] = agency.entry_set.last() 33 | # return context 34 | 35 | def get_queryset(self): 36 | return Agency.objects.order_by("created_date") 37 | -------------------------------------------------------------------------------- /scheduler/src/biz/scrape_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import boto3 4 | 5 | 6 | class ScraperService: 7 | def __init__(self): 8 | # fill in the information with correct credentials 9 | self.boto3client = boto3.client( 10 | "lambda", aws_access_key_id="", aws_secret_access_key="", region_name="" 11 | ) 12 | 13 | async def scrape_data(self, agencies): 14 | if len(agencies) <= 0: 15 | print("No Agency information was passed to scrape") 16 | return 17 | else: 18 | try: 19 | agency_info_request = {} 20 | agency_info_request["agencies"] = agencies 21 | json_request = json.dumps(agency_info_request) 22 | names = [o["name"] for o in agencies] 23 | print(f"Scraping for Agencies: {json.dumps(names)}") 24 | self.boto3client.invoke( 25 | FunctionName="scrapers", 26 | InvocationType="Event", 27 | Payload=json_request, 28 | ) 29 | print(f"Completed invoking the lambda") 30 | except Exception as ex: 31 | print(f"Error while invoking the lambda: {str(ex)}") 32 | return [] 33 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0004_bunch_of_boolean_flags.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2 on 2019-04-07 15:59 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("civic_pulse", "0003_extra_fields_on_agency"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="entry", 15 | name="good_performance", 16 | field=models.BooleanField(default=False), 17 | ), 18 | migrations.AddField( 19 | model_name="entry", 20 | name="has_contact_info", 21 | field=models.BooleanField(default=False), 22 | ), 23 | migrations.AddField( 24 | model_name="entry", 25 | name="has_privacy_policy", 26 | field=models.BooleanField(default=False), 27 | ), 28 | migrations.AddField( 29 | model_name="entry", 30 | name="has_social_media", 31 | field=models.BooleanField(default=False), 32 | ), 33 | migrations.AddField( 34 | model_name="entry", 35 | name="https_enabled", 36 | field=models.BooleanField(default=False), 37 | ), 38 | migrations.AddField( 39 | model_name="entry", 40 | name="mobile_friendly", 41 | field=models.BooleanField(default=False), 42 | ), 43 | ] 44 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0009_auto_20200115_0210.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1.11 on 2020-01-15 02:10 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("civic_pulse", "0008_agency_aliases"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name="agency", name="address", field=models.TextField(blank=True), 15 | ), 16 | migrations.AlterField( 17 | model_name="agency", name="description", field=models.TextField(blank=True), 18 | ), 19 | migrations.AlterField( 20 | model_name="agency", name="facebook", field=models.TextField(blank=True), 21 | ), 22 | migrations.AlterField( 23 | model_name="agency", name="name", field=models.TextField(), 24 | ), 25 | migrations.AlterField( 26 | model_name="agency", name="notes", field=models.TextField(blank=True), 27 | ), 28 | migrations.AlterField( 29 | model_name="agency", 30 | name="phone_number", 31 | field=models.TextField(blank=True), 32 | ), 33 | migrations.AlterField( 34 | model_name="agency", name="twitter", field=models.TextField(blank=True), 35 | ), 36 | migrations.AlterField( 37 | model_name="agency", name="website", field=models.TextField(blank=True), 38 | ), 39 | ] 40 | -------------------------------------------------------------------------------- /scrapers/lighthouse.py: -------------------------------------------------------------------------------- 1 | from .scrapers.base_api_client import ApiClient 2 | from . import settings 3 | 4 | 5 | PAGE_INSIGHTS_ENDPOINT = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed" 6 | MOBILE_FRIENDLY_ENDPOINT = "https://searchconsole.googleapis.com/v1/urlTestingTools/mobileFriendlyTest:run" # from what i have tested, very hard to automate 7 | 8 | """ 9 | Lighthouse has 5 categories of information that can be pulled from a url 10 | - performance 11 | - accessibility 12 | - best_practices 13 | - pwa proressive web app : relatively fast, mobile friendly, secure origin some best practices 14 | - seo search engine optimization """ 15 | 16 | 17 | class PageInsightsClient(ApiClient): 18 | def __init__(self, api_uri=PAGE_INSIGHTS_ENDPOINT, api_key=settings.GOOGLE_API_KEY): 19 | ApiClient.__init__(self, api_uri, api_key) 20 | 21 | def get_page_insights(self, url, category): 22 | data = {"url": url, "key": self.api_key, "category": category} 23 | return self.get("", data=data) 24 | 25 | 26 | class GoogleMobileFriendlyClient(ApiClient): 27 | def __init__( 28 | self, api_uri=MOBILE_FRIENDLY_ENDPOINT, api_key=settings.GOOGLE_API_KEY 29 | ): 30 | self.urls = [] 31 | self.results = [] 32 | ApiClient.__init__(self, api_uri, api_key) 33 | 34 | def get_mobile_friendly(self, url, index): 35 | data = {"url": url} 36 | params = {"key": self.api_key} 37 | return self.post("", index, data=data, params=params) 38 | -------------------------------------------------------------------------------- /config/urls.py: -------------------------------------------------------------------------------- 1 | """civic_pulse_app URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.11/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf.urls import url, include 17 | from django.contrib import admin 18 | from rest_framework import routers 19 | from apps.civic_pulse.api.viewsets import AgencyViewSet, EntryViewSet 20 | from apps.civic_pulse.views import AgencyView, HomeView 21 | from django.contrib.staticfiles.urls import staticfiles_urlpatterns 22 | 23 | router = routers.DefaultRouter() 24 | router.register(r"entries", EntryViewSet) 25 | router.register(r"agencies", AgencyViewSet) 26 | 27 | urlpatterns = [ 28 | url(r"^admin/", admin.site.urls), 29 | url(r"^api/", include(router.urls)), 30 | # url(r'^$', AgencyListView.as_view(), name='index'), 31 | url(r"^$", HomeView.as_view(), name="index"), 32 | url(r"^agency/(?P[0-9]+)/$", AgencyView.as_view(), name="agency-detail"), 33 | ] 34 | 35 | urlpatterns += staticfiles_urlpatterns() 36 | -------------------------------------------------------------------------------- /deploy/Dockerrun.aws.json: -------------------------------------------------------------------------------- 1 | { 2 | "AWSEBDockerrunVersion": 2, 3 | "volumes": [ 4 | { 5 | "name": "app-vol", 6 | "host": { 7 | "sourcePath": "/var/app/current" 8 | } 9 | }, 10 | { 11 | "name": "nginx-proxy-conf", 12 | "host": { 13 | "sourcePath": "/var/app/current/civicpulse/deploy/nginx/" 14 | } 15 | } 16 | ], 17 | "containerDefinitions": [ 18 | { 19 | "command": [ 20 | "/bin/sh", 21 | "-c", 22 | "chmod +x /usr/src/app/civicpulse/deploy/container_start.sh && /usr/src/app/civicpulse/deploy/container_start.sh" 23 | ], 24 | "essential": true, 25 | "image": "muckrock/muckrock:1.0", 26 | "memory": 120, 27 | "mountPoints": [ 28 | { 29 | "containerPath": "/usr/src/app", 30 | "sourceVolume": "app-vol" 31 | } 32 | ], 33 | "name": "app" 34 | }, 35 | { 36 | "essential": true, 37 | "image": "nginx", 38 | "memory": 120, 39 | "links": [ 40 | "app" 41 | ], 42 | "mountPoints": [ 43 | { 44 | "containerPath": "/etc/nginx/conf.d", 45 | "sourceVolume": "nginx-proxy-conf" 46 | } 47 | ], 48 | "name": "nginx", 49 | "portMappings": [ 50 | { 51 | "containerPort": 80, 52 | "hostPort": 80 53 | } 54 | ] 55 | } 56 | ] 57 | } 58 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0003_extra_fields_on_agency.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.1 on 2019-04-07 00:51 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.utils.timezone 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ("civic_pulse", "0002_increase_name_length"), 13 | ] 14 | 15 | operations = [ 16 | migrations.AddField( 17 | model_name="agency", 18 | name="created_date", 19 | field=models.DateTimeField(default=django.utils.timezone.now), 20 | ), 21 | migrations.AddField( 22 | model_name="agency", 23 | name="facebook", 24 | field=models.CharField(blank=True, max_length=100), 25 | ), 26 | migrations.AddField( 27 | model_name="agency", 28 | name="phone_number", 29 | field=models.CharField(blank=True, max_length=15), 30 | ), 31 | migrations.AddField( 32 | model_name="agency", 33 | name="twitter", 34 | field=models.CharField(blank=True, max_length=100), 35 | ), 36 | migrations.AddField( 37 | model_name="agency", 38 | name="website", 39 | field=models.CharField(blank=True, max_length=100), 40 | ), 41 | migrations.AlterField( 42 | model_name="agency", 43 | name="id", 44 | field=models.IntegerField(primary_key=True, serialize=False), 45 | ), 46 | migrations.AlterField( 47 | model_name="agency", name="name", field=models.CharField(max_length=250), 48 | ), 49 | ] 50 | -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0005_add_agency_fields.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2 on 2019-04-23 23:30 2 | 3 | import apps.civic_pulse.models 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ("civic_pulse", "0004_bunch_of_boolean_flags"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddField( 15 | model_name="agency", 16 | name="address", 17 | field=models.CharField(blank=True, max_length=250), 18 | ), 19 | migrations.AddField( 20 | model_name="agency", 21 | name="description", 22 | field=models.CharField(blank=True, max_length=1000), 23 | ), 24 | migrations.AddField( 25 | model_name="agency", 26 | name="last_successful_scrape", 27 | field=models.DateTimeField(blank=True, null=True), 28 | ), 29 | migrations.AddField( 30 | model_name="agency", 31 | name="latitude", 32 | field=models.DecimalField(decimal_places=3, default=0, max_digits=8), 33 | ), 34 | migrations.AddField( 35 | model_name="agency", 36 | name="logo", 37 | field=models.ImageField( 38 | blank=True, upload_to=apps.civic_pulse.models.logo_path 39 | ), 40 | ), 41 | migrations.AddField( 42 | model_name="agency", 43 | name="longitude", 44 | field=models.DecimalField(decimal_places=3, default=0, max_digits=8), 45 | ), 46 | migrations.AddField( 47 | model_name="agency", 48 | name="scrape_counter", 49 | field=models.IntegerField(default=0), 50 | ), 51 | ] 52 | -------------------------------------------------------------------------------- /scrapers/settings.py: -------------------------------------------------------------------------------- 1 | """Unified configuration for GovLens scraper 2 | 3 | Goals of this file: 4 | - Express the entire configuration of the scraper in one place 5 | - Quickly fail in the event of an improper configuration 6 | """ 7 | import logging 8 | import os 9 | import sys 10 | 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def setup_logging(): 16 | """configure logging 17 | 18 | call this as soon as possible 19 | """ 20 | try: 21 | import coloredlogs 22 | 23 | coloredlogs.install(level=logging.INFO) 24 | except Exception: 25 | logger.warning("Could not import coloredlogs") 26 | # fall back to basicConfig 27 | logging.basicConfig(level=logging.INFO,) 28 | 29 | 30 | # setup the logger 31 | setup_logging() 32 | 33 | # attempt to load variables from a file called '.env' 34 | try: 35 | from dotenv import load_dotenv 36 | 37 | load_dotenv() 38 | except ImportError: 39 | logger.warning( 40 | "dotenv could not be imported. Variables will only be loaded from the environment." 41 | ) 42 | 43 | GOVLENS_API_TOKEN = os.environ.get("GOVLENS_API_TOKEN") 44 | GOVLENS_API_ENDPOINT = os.environ.get("GOVLENS_API_ENDPOINT") 45 | GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") 46 | 47 | if not GOVLENS_API_ENDPOINT: 48 | logger.warning( 49 | "The environmental variable GOVLENS_API_ENDPOINT was not provided. Exiting..." 50 | ) 51 | sys.exit(1) 52 | 53 | if not GOVLENS_API_TOKEN: 54 | logger.warning( 55 | "The environmental variable GOVLENS_API_TOKEN was not provided. Exiting..." 56 | ) 57 | sys.exit(1) 58 | 59 | if not GOOGLE_API_KEY: 60 | logger.warning( 61 | "The environmental variable GOOGLE_API_KEY was not provided; no lighthouse data will be collected." 62 | ) 63 | -------------------------------------------------------------------------------- /CivicPulse.drawio: -------------------------------------------------------------------------------- 1 | 5Vldc+I2FP01PCZjWxjDI4F0NzPLNBPSbvuo2oqtIiyvJPPRX1/Jlo0tGRY2cbK0L2COZEn3nKt7r8QAzNa7TwxmyYJGiAw8J9oNwHzgeQGYyE8F7CsgKIGY4aiE3AOwxP8gDToazXGEeKujoJQInLXBkKYpCkULg4zRbbvbCyXtWTMYIwtYhpDY6FcciaREx75zwD8jHCfVzK6jW9aw6qwBnsCIbhsQuB+AGaNUlE/r3QwRxV3FS/neL0da64UxlIpzXpik87+/+r67WLnxHw+rX/8M6MMNKEfZQJJrgxd5uHqi4eo2pOuBNxp4YAcH4E7bIPYVMXyL1wSm8tcdF5AJLR0Adefm0vRqN4gJtGtAeqmfEF0jwfayS9Xql29ot3EnmsXtQQQv0FjSFKBSBmrh43roAzfyQdNzAVWuRZXNSQIz9RhBAbmgTJGzTbBAywyGqmEr94jEErGWU8/dTqpO6nQ+fwaBgc3fqIO+UV/sjSz2pjFKw/3c9ixpoWiTxAWjKzSjhDKJpLTwuhdMiAFBguNU/gwlmUjid4ovLDfzVDescRSRY6owmqcRUkY4fQrjGZ7t2MoMO5Tx+lJmaCmzDCUhiPFTe/4NtviwMlMzMQQ2E6Biq0kF6I2LauDWJh+Rwh2lMaNYFKaXSE5MhGAT4RlMWxyOvuUq4N+90FTc8CJmTmUH18t28qt40ynattpg1ZpStobk8HI1ixZK9mDoW464kJI5eSbDj3RjSU+xw+QDwVwMvJlq5DiN5fcMb8LHnHAkn5/ul8/ya/r4cFstW5JXrrxtzV/MRGRH2+gPpCFPFQVCU0HVpK6yXXPxPQN/MnOynCeIF/LynBQmQfWhgqYjN16xmwQt9cS1oBsMTeh8jX8iCpQPf35efFEhQRmxhamoDOZwg4oRFU8JVtlWBfqCK8hp4QYRo1k5Ck6Ll1T1h9hVMPCEMgRFbWGqcuKrvFiCdsCSYDOuvVkilqsKVaABc//w65nKLDu/Kajoyr9Upo4XUpTIiczTKP3RnDy8NBOBdiJyO2rNSUdKBk5veeidas3Lc3bQpsob+h9bVwYn6kozaS2wPBCyrmR2JRXo5XKNDbkCW653LTZd+xTVzFP91pvmNu8go7Pe7O9M6dnFdz8b/bQYZ1MoCWpR6J93qhz7ffHnHzm8qJKnqpnM7d4VGq4mBLyZkO1T6GRkCemP3zMunLERwpxtCmaUGiiNpup6TxFNIOc4PDPXoah14Wcz1KSgw5crjCECBd60rwm7eNEzPFJcVK+VAI4RmScGtZzmLET6rea1njGQlZHHxkACshgJayDJHtw3umWqAz+xYL97nmPrqg0yDDzWvz7Ud/eXD+WKDz5Wa/YKt7PjR3X/2To0XU14uDgQACMp1vH7oyoEr6NCePhd3V08/vZlef8/kcX3vdt2hAaj4NauV+rbw/fR5oxzybUH5qERt4D7o4HZqL3rQP3GgRlMuufpNXB69h9H/zlPCIwU7ZsCnusJgRlje0rRYyNF+99J0WPjAt57nefIn4d/M8vuh7+Ewf2/ -------------------------------------------------------------------------------- /apps/civic_pulse/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11.1 on 2019-04-06 19:07 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | import django.utils.timezone 8 | 9 | 10 | class Migration(migrations.Migration): 11 | 12 | initial = True 13 | 14 | dependencies = [] 15 | 16 | operations = [ 17 | migrations.CreateModel( 18 | name="Agency", 19 | fields=[ 20 | ( 21 | "id", 22 | models.AutoField( 23 | auto_created=True, 24 | primary_key=True, 25 | serialize=False, 26 | verbose_name="ID", 27 | ), 28 | ), 29 | ("name", models.CharField(max_length=100)), 30 | ], 31 | ), 32 | migrations.CreateModel( 33 | name="Entry", 34 | fields=[ 35 | ( 36 | "id", 37 | models.AutoField( 38 | auto_created=True, 39 | primary_key=True, 40 | serialize=False, 41 | verbose_name="ID", 42 | ), 43 | ), 44 | ( 45 | "created_date", 46 | models.DateTimeField(default=django.utils.timezone.now), 47 | ), 48 | ( 49 | "agency", 50 | models.ForeignKey( 51 | on_delete=django.db.models.deletion.CASCADE, 52 | to="civic_pulse.Agency", 53 | ), 54 | ), 55 | ], 56 | ), 57 | ] 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Grabbed gitignore django file from here: https://djangowaves.com/tips-tricks/gitignore-for-a-django-project/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # Django # 9 | *.log 10 | *.pot 11 | *.pyc 12 | __pycache__ 13 | db.sqlite3 14 | media 15 | .DS_Store 16 | 17 | # Backup files # 18 | *.bak 19 | 20 | # If you are using PyCharm # 21 | .idea 22 | 23 | # Python # 24 | *.py[cod] 25 | *$py.class 26 | 27 | # Distribution / packaging 28 | .Python build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | *.manifest 44 | *.spec 45 | 46 | # Installer logs 47 | pip-log.txt 48 | pip-delete-this-directory.txt 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | .pytest_cache/ 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | .hypothesis/ 61 | 62 | # Jupyter Notebook 63 | .ipynb_checkpoints 64 | 65 | # pyenv 66 | .python-version 67 | 68 | # celery 69 | celerybeat-schedule.* 70 | 71 | # SageMath parsed files 72 | *.sage.py 73 | 74 | # Environments 75 | .env 76 | .venv 77 | env/ 78 | venv/ 79 | ENV/ 80 | env.bak/ 81 | venv.bak/ 82 | 83 | # mkdocs documentation 84 | /site 85 | 86 | # mypy 87 | .mypy_cache/ 88 | 89 | # Sublime Text # 90 | *.tmlanguage.cache 91 | *.tmPreferences.cache 92 | *.stTheme.cache 93 | *.sublime-workspace 94 | *.sublime-project 95 | 96 | # sftp configuration file 97 | sftp-config.json 98 | 99 | .vscode 100 | /db.sqlite3 101 | # Elastic Beanstalk Files 102 | .elasticbeanstalk/* 103 | !.elasticbeanstalk/*.cfg.yml 104 | !.elasticbeanstalk/*.global.yml 105 | 106 | # rebase/merge diff files 107 | *.orig 108 | -------------------------------------------------------------------------------- /scrapers/urls.json: -------------------------------------------------------------------------------- 1 | { 2 | "urls":[ 3 | "http://www.mass.gov/eohhs/gov/departments/dph/", 4 | "https://www.massport.com/", 5 | "https://www.massbbo.org/Who_We_Are_OBC_ACAP", 6 | "https://www.publiccounsel.net/", 7 | "http://www.mass.gov/ig/", 8 | "http://massgaming.com/", 9 | "http://www.mass.gov/ocabr/government/oca-agencies/doi-lp/", 10 | "http://www.mass.gov/eea/agencies/massdep/", 11 | "http://www.mass.gov/courts/court-info/courthouses/bri-chi/cambridge-district-court-generic.html", 12 | "http://www.mass.gov/eopss/law-enforce-and-cj/law-enforce/mptc/", 13 | "http://www.mass.gov/eohhs/gov/departments/dmh/", 14 | "http://www.mass.gov/eopss/law-enforce-and-cj/prisons/doc-facilities/northeastern-correctional-center.html", 15 | "http://vineyardtransit.com/Pages/index", 16 | "http://lrta.com/", 17 | "http://www.mass.gov/eohhs/gov/departments/dph/programs/community-health/oral-health/", 18 | "http://www.bc.edu/", 19 | "http://www.mass.gov/ocabr/government/oca-agencies/dpl-lp/opsi/massachusetts-department-of-public-safety.html", 20 | "http://www.mass.gov/eopss/law-enforce-and-cj/prisons/doc-facilities/south-middlesex-correctional-center.html", 21 | "http://www.mass.gov/eopss/law-enforce-and-cj/prisons/doc-facilities/mci-framingham.html", 22 | "http://www.mapc.org/", 23 | "http://www.mbta.com/", 24 | "http://www.mass.gov/da/cape/", 25 | "http://www.masslottery.com/", 26 | "https://www.uml.edu/", 27 | "http://www.mass.gov/lwd/labor-relations/", 28 | "http://www.mass.gov/eea/agencies/dfg/dfw/", 29 | "http://www.mass.gov/eea/agencies/agr/", 30 | "http://www.mass.gov/dppc/", 31 | "http://www.umassd.edu/law/", 32 | "http://www.mass.gov/courts/court-info/trial-court/dc/", 33 | "http://www.mass.gov/ethics/" 34 | ] 35 | } -------------------------------------------------------------------------------- /apps/civic_pulse/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.utils import timezone 3 | 4 | 5 | def logo_path(instance, filename): 6 | # file will be uploaded to MEDIA_ROOT/agency_logos// 7 | return "agency_logos/{0}/{1}".format(instance.id, filename) 8 | 9 | 10 | # Create your models here. 11 | class Agency(models.Model): 12 | id = models.IntegerField(primary_key=True) 13 | created_date = models.DateTimeField(default=timezone.now) 14 | name = models.TextField() 15 | website = models.TextField(blank=True) 16 | twitter = models.TextField(blank=True) 17 | facebook = models.TextField(blank=True) 18 | phone_number = models.TextField(blank=True) 19 | address = models.TextField(blank=True) 20 | notes = models.TextField(blank=True) 21 | description = models.TextField(blank=True) 22 | aliases = models.TextField(max_length=500, blank=True) 23 | last_successful_scrape = models.DateTimeField(blank=True, null=True) 24 | scrape_counter = models.IntegerField(default=0) 25 | logo = models.ImageField(upload_to=logo_path, blank=True) 26 | 27 | # Geolocation 28 | latitude = models.DecimalField(max_digits=8, decimal_places=3, default=0) 29 | longitude = models.DecimalField(max_digits=8, decimal_places=3, default=0) 30 | 31 | def __str__(self): 32 | return self.name 33 | 34 | 35 | class Entry(models.Model): 36 | created_date = models.DateTimeField(default=timezone.now) 37 | agency = models.ForeignKey(Agency, on_delete=models.CASCADE) 38 | 39 | # Security/Privacy 40 | https_enabled = models.BooleanField(default=False) 41 | hsts_enabled = models.BooleanField(default=False) 42 | has_privacy_policy = models.BooleanField(default=False) 43 | 44 | # A11y 45 | mobile_friendly = models.BooleanField(default=False) 46 | good_performance = models.BooleanField(default=False) 47 | 48 | # Outreach/Communication 49 | has_social_media = models.BooleanField(default=False) 50 | has_contact_info = models.BooleanField(default=False) 51 | 52 | # notes 53 | notes = models.TextField(blank=True) 54 | 55 | def __str__(self): 56 | return self.agency.__str__() + "_" + self.created_date.strftime("%m_%d") 57 | -------------------------------------------------------------------------------- /scrapers/scrapers/security_scraper.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ..lighthouse import PageInsightsClient 4 | 5 | from .base_scraper import BaseScraper 6 | 7 | 8 | class SecurityScraper(BaseScraper): 9 | def __init__(self, raw_page_content, url): 10 | self.page = raw_page_content 11 | self.url = url 12 | self.apiClient = PageInsightsClient() 13 | 14 | def get_security_privacy_info(self): 15 | return { 16 | "https": self.get_http_access(), 17 | "hsts": self.get_hsts(), 18 | "privacy_policies": self.get_privacy_policies(), 19 | } 20 | 21 | def get_http_access(self): 22 | try: 23 | lighthouse_results = self.apiClient.get_page_insights( 24 | self.url, "pwa" 25 | ).content["lighthouseResult"] 26 | score = lighthouse_results["audits"]["is-on-https"]["score"] 27 | is_criteria_met = True if score == 1 else False 28 | return self.get_criteria_object(score, is_criteria_met) 29 | except Exception as ex: 30 | print(f"Error in get_http_access for {self.url}, exception: {str(ex)}") 31 | logging.error( 32 | f"Error in get_http_access for {self.url}, exception: {str(ex)}" 33 | ) 34 | 35 | def get_hsts(self): 36 | try: 37 | lighthouse_results = self.apiClient.get_page_insights( 38 | self.url, "pwa" 39 | ).content["lighthouseResult"] 40 | score = lighthouse_results["audits"]["redirects-http"]["score"] 41 | is_criteria_met = True if score == 1 else False 42 | return self.get_criteria_object(score, is_criteria_met) 43 | except Exception as ex: 44 | print(f"Error in get_hsts for {self.url}, exception: {str(ex)}") 45 | logging.error(f"Error in get_hsts for {self.url}, exception: {str(ex)}") 46 | 47 | def get_privacy_policies(self): 48 | try: 49 | is_criteria_met = ( 50 | True if "privacy policy" in self.page.text.lower() else False 51 | ) 52 | return self.get_criteria_object(None, is_criteria_met) 53 | except Exception as ex: 54 | print(f"Error in get_privacy_policies for {self.url}, exception: {str(ex)}") 55 | logging.error( 56 | f"Error in get_privacy_policies for {self.url}, exception: {str(ex)}" 57 | ) 58 | -------------------------------------------------------------------------------- /apps/civic_pulse/templates/base.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | 4 | 5 | 6 | GovLens 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | {% block css %} 19 | {% endblock css %} 20 | 21 | 22 | 23 | 24 | 44 | 45 | {% block content %} 46 | 47 | {% endblock content %} 48 | 49 | -------------------------------------------------------------------------------- /apps/civic_pulse/utils/load_models.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import os 4 | from ..models import Agency, Entry 5 | 6 | ID_ROW = 1 7 | NAME_ROW = 2 8 | PHONE_ROW = 5 9 | BACKUP_PHONE_ROW = 6 10 | WEBSITE_ROW = 8 11 | TWITTER_ROW = 9 12 | BACKUP_TWITTER_ROW = 10 13 | FACEBOOK_ROW = 17 14 | 15 | 16 | def fill_agency_objects( 17 | filepath=os.path.join(os.path.dirname(__file__), "../data/agencies.csv") 18 | ): 19 | 20 | with open(filepath, encoding='utf-8') as file: 21 | reader = csv.reader(file) 22 | next(reader, None) # skip the headers 23 | 24 | for row in reader: 25 | optional_args = { 26 | "phone_number": row[PHONE_ROW] 27 | if row[PHONE_ROW] 28 | else row[BACKUP_PHONE_ROW], 29 | "website": row[WEBSITE_ROW], 30 | "twitter": row[TWITTER_ROW] 31 | if row[TWITTER_ROW] 32 | else row[BACKUP_TWITTER_ROW], 33 | "facebook": row[FACEBOOK_ROW], 34 | } 35 | 36 | agency, created = Agency.objects.update_or_create( 37 | id=row[ID_ROW], name=row[NAME_ROW], defaults=optional_args 38 | ) 39 | 40 | print(agency) 41 | 42 | 43 | def fill_entry_objects(filepath=os.path.join("scrapers/sample_data.json")): 44 | 45 | with open(filepath) as file: 46 | data = json.load(file) 47 | 48 | for entry in data: 49 | if not entry: 50 | return 51 | 52 | profile = entry["profile"] 53 | update_args = { 54 | "https_enabled": profile["security_and_privacy"]["https"][ 55 | "met_criteria" 56 | ], 57 | "has_privacy_policy": profile["security_and_privacy"][ 58 | "privacy_policies" 59 | ]["met_criteria"], 60 | "mobile_friendly": profile["website_accessibility"]["mobile_friendly"][ 61 | "met_criteria" 62 | ], 63 | "good_performance": profile["website_accessibility"]["performance"][ 64 | "met_criteria" 65 | ], 66 | "has_social_media": profile["outreach_and_communication"][ 67 | "social_media_access" 68 | ]["met_criteria"], 69 | "has_contact_info": profile["outreach_and_communication"][ 70 | "contact_access" 71 | ]["met_criteria"], 72 | } 73 | entry, created = Entry.objects.update_or_create( 74 | agency_id=entry["id"], defaults=update_args 75 | ) 76 | -------------------------------------------------------------------------------- /scheduler/src/app.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | import structlog 6 | from aiohttp import web, ClientSession 7 | from biz.scheduler import Scheduler 8 | from controllers.registrator import Registrator 9 | 10 | 11 | # close http client on server shutdown 12 | async def __on_shutdown__(app): 13 | print("Server going down!") 14 | app["http_client"].close() 15 | 16 | 17 | class Application: 18 | def __init__(self, config): 19 | self.base_url = "/api/GovLens/Scheduler/v1" 20 | self.log_format = ( 21 | 'Level="%(levelname)s", Date="%(asctime)s", ProcessId=%(process)d, ' 22 | 'Module="%(module)s", Logger="%(name)s", Method="%(funcName)s", Line=%(lineno)d, ' 23 | "Message=%(message)s " 24 | ) 25 | self.path = os.path.dirname(os.path.abspath(__file__)) 26 | self.config = config 27 | self.server = web.Application(client_max_size=1024 * 1024 * 10) # max 10 MB 28 | self.server["http_client"] = ClientSession() 29 | self.server["server_base_url"] = self.base_url 30 | self.server["unauthenticated_urls"] = self.base_url + "/health" 31 | self.registrator = Registrator(self.server, self.base_url) 32 | self.__initialize_environ_vars__() 33 | self.__register_routes__() 34 | self.logger = self.__config_logger__() 35 | self.server.on_shutdown.append(__on_shutdown__) 36 | 37 | def __initialize_environ_vars__(self): 38 | for key, item in self.config.items(): 39 | os.environ[key] = item 40 | 41 | def __register_routes__(self): 42 | self.registrator.register_controllers() 43 | 44 | def __config_logger__(self): 45 | logging.basicConfig( 46 | level=logging.INFO, format=self.log_format, stream=sys.stdout, 47 | ) 48 | 49 | structlog.configure( 50 | processors=[ 51 | structlog.stdlib.filter_by_level, 52 | structlog.stdlib.PositionalArgumentsFormatter(), 53 | structlog.processors.StackInfoRenderer(), 54 | structlog.processors.format_exc_info, 55 | structlog.processors.JSONRenderer(), 56 | ], 57 | logger_factory=structlog.stdlib.LoggerFactory(), 58 | wrapper_class=structlog.stdlib.BoundLogger, 59 | ) 60 | return structlog.get_logger("Startup") 61 | 62 | def run(self): 63 | self.logger.info("Starting service on 10004") 64 | self.start_scheduler() 65 | web.run_app(self.server, host="0.0.0.0", port=10004, access_log=None) 66 | 67 | def start_scheduler(self): 68 | scheduler_instance = Scheduler() 69 | scheduler_instance.read_settings() 70 | scheduler_instance.scrape_websites() 71 | -------------------------------------------------------------------------------- /scrapers/scrapers/base_api_client.py: -------------------------------------------------------------------------------- 1 | import json 2 | from requests import request 3 | 4 | 5 | class ApiClient(object): 6 | def __init__(self, api_uri, api_key): 7 | self.api_uri = api_uri 8 | self.api_key = api_key 9 | 10 | def set_headers(self, nonce=None): 11 | if not nonce: 12 | from time import time 13 | 14 | nonce = int(time()) 15 | 16 | return { 17 | "X-Authentication-Key": self.api_key, 18 | "X-Authentication-Nonce": str(nonce), 19 | } 20 | 21 | @staticmethod 22 | def merge_dicts(*dict_args): 23 | result = {} 24 | for dictionary in dict_args: 25 | result.update(dictionary) 26 | 27 | return result 28 | 29 | def request(self, method, path, data={}, headers={}, params={}): 30 | url = "{0}{1}".format(self.api_uri, path) 31 | headers = self.merge_dicts(self.set_headers(), headers) 32 | 33 | if method == "GET": 34 | params.update(data) 35 | return request(method, url, headers=headers, params=params) 36 | else: 37 | return request( 38 | method, url, headers=headers, params=params, data=json.dumps(data) 39 | ) 40 | 41 | def post(self, path, data={}, params={}): 42 | return Response( 43 | self.request( 44 | "POST", path, data, {"Content-Type": "application/json"}, params 45 | ) 46 | ) 47 | 48 | def get(self, path, data={}): 49 | return Response(self.request("GET", path, data, params={})) 50 | 51 | def put(self, path, data={}): 52 | return Response( 53 | self.request("PUT", path, data, {"Content-Type": "application/json"}) 54 | ) 55 | 56 | def delete(self, path, data={}): 57 | return Response(self.request("DELETE", path, data)) 58 | 59 | 60 | class Response(object): 61 | def __init__(self, response): 62 | self.response = response 63 | 64 | try: 65 | self.content = self.response.json() 66 | except ValueError: 67 | self.content = self.response.text 68 | print(f"Error while getting the response: {str(ValueError)}") 69 | 70 | def ok(self): 71 | import requests 72 | 73 | return self.response.status_code == requests.codes.ok 74 | 75 | def errors(self): 76 | if self.ok(): 77 | return {} 78 | 79 | errors = self.content 80 | 81 | if not isinstance(errors, dict): 82 | errors = {"error": errors} # convert to dict for consistency 83 | elif "errors" in errors: 84 | errors = errors["errors"] 85 | 86 | return errors 87 | 88 | def __getitem__(self, key): 89 | return self.content[key] 90 | -------------------------------------------------------------------------------- /scheduler/scheduler-readme.md: -------------------------------------------------------------------------------- 1 | Scheduler working: 2 | 3 | Scheduler reads the list of the agencies from Django API (Govlens API) and then calls the Lambda function which scrapes the websites. 4 | job_config.json contains all the configuration for the scheduler. 5 | We need to provide the following information in the configuration 6 | a) Schedule start time: At what time the schedule should start every week. 7 | b) "agency_batch_size" -- The number of agencies we should scrape at a time. It depends on how the lambda performs. If lambda can scrape 100's of websites at a time, 8 | the number can be 100. 9 | c) "interval_between_runs_seconds" -- The number of seconds we have in between the scheduled runs. For example, we send the request to scrape 100 websites and then wait 10 | for 10 minutes before scraping the next 100 websites. 11 | 12 | Currently the website gets the data from "http://govlens.us-east-2.elasticbeanstalk.com/api/agencies/" 13 | 14 | scrape_data sends the request to the lambda function. The credentials for the lambda function should be stored as environment variables and used here. 15 | 16 | self.boto3client = boto3.client('lambda', aws_access_key_id="",aws_secret_access_key="", region_name='') (make sure we have the proper credentials for accessing the AWS lambda) 17 | 18 | Instructions for running in EC2 instance. (all the below needs to be automated. A person with some AWS experience will find these steps elementary) 19 | 1) After you make some changes to the scheduler, we need to push the image to amazon's registry. 20 | Get the AWS keys for aws_access_key_id and aws_secret_access_key and store it in (~/.aws) in a file named "credentials" 21 | 2) login into ECR (Elastic Container Registry). aws ecr get-login --region us-east-1 22 | 3) Build the docker image and the important point is image needs to be tagged. The image should be tagged with the repository name in ECR. 23 | That's how Amazon determines the repo for the image you post. 24 | this is the link for ECR. https://console.aws.amazon.com/ecr/repositories?region=us-east-1 25 | For each image, you will be able to see the URI. the URI looks like this dummy.dkr.ecr.us-east-1.amazonaws.com/scheduler (replace dummy with the actual ID) 26 | you need to tag the image with the URI. for example, it looks something like this. docker build . -t dummy.dkr.ecr.us-east-1.amazonaws.com/scheduler:v5 27 | (remember to give a version number which doesn't already exist there in ECR. Otherwise it gives an error when you try to push the image to ECR) 28 | after the image is built, you can push the image using the command docker push dummy.dkr.ecr.us-east-1.amazonaws.com/scheduler:v5 29 | 4) If you refresh the ECR page, you should see the latest version there. 30 | 5) login into the ECR instance ssh -i test-pem.pem ec2-user@(actual ip).compute-1.amazonaws.com 31 | 6) docer pull the latest image (which we just pushed) and run the image. The scheduler will start scraping the websites from (time specified in job_config.json file) 32 | 33 | 34 | -------------------------------------------------------------------------------- /scrapers/agency_dataaccessor.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | import requests 5 | 6 | 7 | class AgencyDataAccessor: 8 | def __init__(self, token, agency_info): 9 | if os.environ.get("govlens_api", None) is None: 10 | self.base_url = ( 11 | "http://govlens.us-east-2.elasticbeanstalk.com/api/agencies/" 12 | ) 13 | else: 14 | self.base_url = os.environ["govlens_api"] 15 | self.agency_info = agency_info 16 | if token is None: 17 | self.token = "Django Api token" 18 | 19 | def enrich_agency_info_with_scrape_info(self, scrape_info): 20 | try: 21 | outreach_and_communication = scrape_info["profile"][ 22 | "outreach_and_communication" 23 | ] 24 | contact_info = outreach_and_communication["contact_access"]["info"] 25 | self.agency_info["address"] = contact_info.get("address", None) 26 | self.agency_info["phone_number"] = contact_info.get("phone_number", None) 27 | 28 | # todo: get the twitter and facebook links 29 | social_media_info = outreach_and_communication["social_media_access"][ 30 | "info" 31 | ] 32 | if len(social_media_info) > 0: 33 | self.agency_info["facebook"] = self.get_social_media_links( 34 | social_media_info, "facebook" 35 | ) 36 | self.agency_info["twitter"] = self.get_social_media_links( 37 | social_media_info, "twitter" 38 | ) 39 | else: 40 | print( 41 | f"social media information not available for the agency {scrape_info['Website']}" 42 | ) 43 | 44 | response = self.update_agency_info(self.agency_info) 45 | return response 46 | except Exception as ex: 47 | print( 48 | f"An error occurred while enriching the agency information with scrape information: {str(ex)}" 49 | ) 50 | 51 | def update_agency_info(self, agency_info): 52 | try: 53 | self.agency_info["scrape_counter"] = self.agency_info["scrape_counter"] + 1 54 | self.agency_info["last_successful_scrape"] = datetime.datetime.now() 55 | agency_url = f"{self.base_url}{self.agency_info['id']}/" 56 | response = requests.put( 57 | agency_url, 58 | data=self.agency_info, 59 | headers={"accept": "application/json", "Authorization": self.token}, 60 | ) 61 | return response 62 | except Exception as ex: 63 | print(f"An error occurred while posting the agency information: {str(ex)}") 64 | 65 | def get_social_media_links(self, social_media_links, social_media_type): 66 | return next( 67 | ( 68 | social_media_link 69 | for social_media_link in social_media_links 70 | if social_media_type.lower() in social_media_link.lower() 71 | ), 72 | None, 73 | ) 74 | -------------------------------------------------------------------------------- /scrapers/process_agency_info.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import logging 3 | from .scrapers.social_scraper import SocialScraper 4 | from .scrapers.security_scraper import SecurityScraper 5 | from .scrapers.accessibility_scraper import AccessibilityScraper 6 | from .agency_dataaccessor import AgencyDataAccessor 7 | from . import settings 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class AgencyInfo: 13 | def __init__(self, agency): 14 | self.agency_firms = [] 15 | self.agency = agency 16 | self.website = agency["website"] 17 | self.buckets = [ 18 | "security_and_privacy", 19 | "outreach_and_communication", 20 | "website_accessibility", 21 | ] 22 | self.agency_dataaccessor = AgencyDataAccessor(None, self.agency) 23 | 24 | def process_agency_info(self): 25 | try: 26 | # HTTP Get on agency url 27 | agency_url = self.agency.get("website", None) 28 | if agency_url is None or agency_url == "": 29 | logger.error( 30 | f"Website url is not available for {self.agency['id']}, name: {self.agency['name']}" 31 | ) 32 | self.agency_dataaccessor.update_agency_info(self.agency) 33 | return 34 | logger.info(f"Scraping the website {agency_url}") 35 | page = requests.get(agency_url, timeout=30) 36 | # Initialize scrapers 37 | socialScraper = SocialScraper(page, agency_url) 38 | securityScraper = SecurityScraper(page, agency_url) 39 | accessibilityScraper = AccessibilityScraper(page, agency_url) 40 | 41 | social_media_info, contact_info = socialScraper.scrape_info() 42 | profile_info = {} 43 | 44 | # Figure out the google_api_key and then fix the below buckets 45 | for bucket in self.buckets: 46 | if bucket == "security_and_privacy": 47 | if settings.GOOGLE_API_KEY: 48 | profile_info[ 49 | bucket 50 | ] = securityScraper.get_security_privacy_info() 51 | elif bucket == "outreach_and_communication": 52 | profile_info[ 53 | bucket 54 | ] = socialScraper.get_outreach_communication_info( 55 | social_media_info, contact_info 56 | ) 57 | elif bucket == "website_accessibility": 58 | if settings.GOOGLE_API_KEY: 59 | profile_info[ 60 | bucket 61 | ] = accessibilityScraper.get_website_accessibility_info() 62 | 63 | agency_details = { 64 | "id": self.agency["id"], 65 | "name": self.agency["name"], 66 | "Website": self.website, 67 | "profile": profile_info, 68 | } 69 | 70 | self.agency_dataaccessor.enrich_agency_info_with_scrape_info(agency_details) 71 | return agency_details 72 | except Exception as ex: 73 | logger.error( 74 | ex, "An error occurred while processing the agency information" 75 | ) 76 | -------------------------------------------------------------------------------- /apps/civic_pulse/templates/home.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% load static %} 3 | {% block css %} 4 | 5 | {% endblock css %} 6 | {% block content %} 7 |
8 |

9 | Search your local government websites to see how they stack up! 10 |

11 |
12 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 80 28 | % 29 | of municipal 30 | sites are not 31 | secure 32 | 33 |
34 |
35 |
36 | 37 |
38 | 39 |
40 |
41 |
42 |
43 |
44 |
45 |
What we do...
46 |
47 |

When our physical infrastructure — from bridges to sidewalks to playgrounds —isn’t up to shape, we can often see it, and the consequences are dire. Digital infrastructure can be just as important to maintain, and GovLens provides an easy way to see how your community is doing.

48 |
< 49 |
50 |
51 |
Why does HTTPS matter?
52 |
53 |

HTTPS means that your traffic is encrypted so only you and the website you are visiting know what you are communicating. This isn’t fool-proof security, but it is a basic step to ensure that it’s harder for people to eavesdrop on what you are reading, intercept messages you send to government agencies, and secretly modify what you see.

54 |
55 |
56 |
57 |
Take action!
58 |
59 |

60 | We want to help agency staff, the press, and the general public better understand the state of government web security while also providing tips on how things can be improved — even on tight budgets. Look up your city or an agency in the search box above, see how it scores, and learn more about potential ways to keep improving. 61 |

62 |
63 |
64 |
65 |
66 | {% endblock content %} 67 | -------------------------------------------------------------------------------- /scrapers/scrapers/accessibility_scraper.py: -------------------------------------------------------------------------------- 1 | from .base_scraper import BaseScraper 2 | from ..lighthouse import PageInsightsClient 3 | 4 | 5 | class AccessibilityScraper(BaseScraper): 6 | def __init__(self, raw_page_content, url): 7 | self.page = raw_page_content 8 | self.url = url 9 | self.apiClient = PageInsightsClient() 10 | 11 | def get_website_accessibility_info(self): 12 | return { 13 | "mobile_friendly": self.get_mobile_friendliness(), 14 | "page_speed": self.get_page_speed(), 15 | "performance": self.get_site_performance(), 16 | "multi_lingual": self.get_multi_lingual(), 17 | } 18 | 19 | def get_multi_lingual(self): 20 | is_criteria_met = ( 21 | True 22 | if ( 23 | ( 24 | "translate" 25 | or "select language" 26 | or "select-language" in self.page.text.lower() 27 | ) 28 | or ("espanol" or "Español") in self.page.a 29 | ) 30 | else False 31 | ) 32 | return self.get_criteria_object(None, is_criteria_met) 33 | 34 | def get_site_performance(self): 35 | try: 36 | lighthouse_results = self.apiClient.get_page_insights( 37 | self.url, "performance" 38 | ).content["lighthouseResult"] 39 | performanceResults = lighthouse_results["categories"]["performance"][ 40 | "score" 41 | ] 42 | is_criteria_met = ( 43 | True if performanceResults * 100 >= 80 else False 44 | ) # the score in the Json file is a percentage 45 | return self.get_criteria_object(performanceResults, is_criteria_met) 46 | except Exception: 47 | print("Error in get_site_performance for", self.url) 48 | 49 | def get_mobile_friendliness(self): 50 | try: 51 | lighthouse_results = self.apiClient.get_page_insights( 52 | self.url, "pwa" 53 | ).content["lighthouseResult"] 54 | # If the width of your app's content doesn't match the width of the viewport, your app might not be optimized for mobile screens. 55 | score = lighthouse_results["audits"]["content-width"]["score"] 56 | title = lighthouse_results["audits"]["content-width"]["title"] 57 | is_criteria_met = ( 58 | True 59 | if title == "Content is sized correctly for the viewport" 60 | else False 61 | ) 62 | return self.get_criteria_object(score, is_criteria_met) 63 | except Exception: 64 | print("Error in get_mobile_friendliness for", self.url) 65 | 66 | def get_page_speed(self): 67 | try: 68 | lighthouse_results = self.apiClient.get_page_insights( 69 | self.url, "performance" 70 | ).content["lighthouseResult"] 71 | speed_index = lighthouse_results["audits"]["speed-index"]["score"] 72 | is_criteria_met = ( 73 | True if speed_index * 100 >= 80 else False 74 | ) # the score in the Json file is a percentage 75 | return self.get_criteria_object(speed_index, is_criteria_met) 76 | except Exception: 77 | print("Error in get_page_speed for", self.url) 78 | -------------------------------------------------------------------------------- /scrapers/README.rst: -------------------------------------------------------------------------------- 1 | ``scrapers`` 2 | ------------ 3 | 4 | Description 5 | =========== 6 | Code related to scripts, "scrapers", which scrape the agency information and post the information to the Django API. 7 | 8 | Directory Structure 9 | =================== 10 | 11 | :: 12 | 13 | ├── agency_api_service.py - connects to GovLens API for agency info 14 | ├── agency_dataaccessor.py - read/write to/from database containing scraped info 15 | ├── lighthouse.py - connects to Google Lighthouse API 16 | ├── process_agency_info.py - connects to an agency site & runs scrapers 17 | ├── README.rst - this file! 18 | ├── scrape_handler.py - **Start here!** Starts API services and maps to agency processors. 19 | ├── urls.json - list of URLS pointing to government sites 20 | ├── data/ 21 | │   └── agencies.csv - spreadsheet containing scraped information (match of Google Sheets?) 22 | └── scrapers/ 23 |    ├── __init__.py 24 | ├── accessibility_scraper.py - scrapes for multi-language, performance, mobile-bility 25 | ├── base_api_client.py 26 | ├── base_scraper.py - base class for scrapers to inherit 27 | ├── security_scraper.py - scrapes for HTTPS & privacy policy 28 |    └── social_scraper.py - scrapes for phone number, email, address, social media 29 | 30 | Quick Start 31 | =========== 32 | 33 | Configuration 34 | ~~~~~~~~~~~~~ 35 | 36 | There are a few required environmental variables. The easiest way to set them in development is to create a file called `.env` in the root directory of this repository (don't commit this file). The file (named `.env`) should contain the following text:: 37 | 38 | GOVLENS_API_TOKEN=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 39 | GOVLENS_API_ENDPOINT=http://127.0.0.1:8000/api/agencies/ 40 | GOOGLE_API_TOKEN=XXXXXXXXXXXXXXXXXXXXXXXX 41 | 42 | To get the ``GOOGLE_API_TOKEN``, you need to visit the following page: https://developers.google.com/speed/docs/insights/v5/get-started 43 | 44 | To get the ``GOVLENS_API_TOKEN``, run ``python3 manage.py create_scraper_user``. Copy the token from the command output and paste it into the ``.env`` file. 45 | 46 | Execution 47 | ~~~~~~~~~ 48 | 49 | Once you have created the `.env` file as mentioned above, run the scraper:: 50 | 51 | # run the following from the root directory of the repository 52 | python3 -m scrapers.scrape_handler 53 | 54 | Design 55 | ====== 56 | 57 | The scraper is intended to be used both locally and on AWS Lambda. 58 | 59 | The ``scrapers`` directory in the root of this repository is the top-level Python package for this project. This means that any absolute imports should begin with ``scrapers.MODULE_NAME_HERE``. 60 | 61 | ``scrapers/scrape_handler.py`` is the main Python module invoked. On AWS Lambda, the method ``scrape_handler.scrape_data()`` is imported and called directly. 62 | 63 | AWS Lambda 64 | ~~~~~~~~~~ 65 | Pushing it to AWS lambda: 66 | 67 | 1. zip the ``scraper/`` folder. 68 | 2. go to AWS lamba and upload the zipped folder: https://console.aws.amazon.com/lambda/home?region=us-east-1#/functions 69 | 3. test the lambda by using this json (??) 70 | 4. confirm that there are no errors by looking at cloudwatch logs: https://console.aws.amazon.com/cloudwatch/home?region=us-east-1#logStream:group=/aws/lambda/scrapers;streamFilter=typeLogStreamPrefix 71 | -------------------------------------------------------------------------------- /apps/civic_pulse/templates/agency-list.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% load static %} 3 | {% block css %} 4 | 5 | {% endblock css %} 6 | {% block content %} 7 | 8 | 35 |
36 | {% if agencies %} 37 | 38 |
39 | 40 | 41 | {% for agency in agencies %} 42 |
43 | 44 |
45 |
{{ agency.name }}
46 |

47 |
48 | 51 |
52 | 53 | {% endfor %} 54 | 55 |
56 | 57 | {% else %} 58 |

No agencies available.

59 | {% endif %} 60 | 61 | {% if is_paginated %} 62 |
    63 | {% if page_obj.has_previous %} 64 |
  • «
  • 65 | {% else %} 66 |
  • «
  • 67 | {% endif %} 68 | {% for i in paginator.page_range %} 69 | {% if page_obj.number == i %} 70 |
  • {{ i }} (current)
  • 71 | {% else %} 72 |
  • {{ i }}
  • 73 | {% endif %} 74 | {% endfor %} 75 | {% if page_obj.has_next %} 76 |
  • »
  • 77 | {% else %} 78 |
  • »
  • 79 | {% endif %} 80 |
81 | {% endif %} 82 |
83 | 84 | {% endblock content %} -------------------------------------------------------------------------------- /config/settings/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for civic_pulse project. 3 | 4 | Generated by 'django-admin startproject' using Django 1.11.1. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.11/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/1.11/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # the project directory is obtained by finding the directory path two levels 16 | # up the directory of this file. dirname is to get the directory, and then 17 | # join and normpath functions generate and convert the path to the correct 18 | # project directory 19 | BASE_DIR = os.path.normpath( 20 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..") 21 | ) 22 | 23 | 24 | # Quick-start development settings - unsuitable for production 25 | # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ 26 | 27 | # SECURITY WARNING: keep the secret key used in production secret! 28 | SECRET_KEY = "=wc@ng&5%_1)sbj&&wuq8$oy5a#1k%^3qqkam@x%9*6k57t30)" 29 | 30 | # SECURITY WARNING: don't run with debug turned on in production! 31 | DEBUG = True 32 | 33 | ALLOWED_HOSTS = [ 34 | "app", 35 | "localhost", 36 | "0.0.0.0", 37 | "127.0.0.1", 38 | "civicpulse-env.4bvxnwhus8.us-east-1.elasticbeanstalk.com", 39 | ] 40 | 41 | # Database 42 | # https://docs.djangoproject.com/en/1.11/ref/settings/#databases 43 | 44 | """ 45 | Elastic beanstalk automatically adds database environment variables. 46 | By checking for environment variable 'RDS_DB_NAME' we can determine whether this is 47 | development or production environment. 48 | """ 49 | if "RDS_DB_NAME" in os.environ: 50 | DATABASES = { 51 | "default": { 52 | "ENGINE": "django.db.backends.postgresql_psycopg2", 53 | "NAME": os.environ["RDS_DB_NAME"], 54 | "USER": os.environ["RDS_USERNAME"], 55 | "PASSWORD": os.environ["RDS_PASSWORD"], 56 | "HOST": os.environ["RDS_HOSTNAME"], 57 | "PORT": os.environ["RDS_PORT"], 58 | } 59 | } 60 | else: 61 | DATABASES = { 62 | "default": { 63 | "ENGINE": "django.db.backends.sqlite3", 64 | "NAME": os.path.join(BASE_DIR, "db.sqlite3"), 65 | } 66 | } 67 | 68 | 69 | # Application definition 70 | 71 | INSTALLED_APPS = [ 72 | "apps.civic_pulse", 73 | "rest_framework", 74 | "rest_framework.authtoken", 75 | "django.contrib.admin", 76 | "django.contrib.auth", 77 | "django.contrib.contenttypes", 78 | "django.contrib.sessions", 79 | "django.contrib.messages", 80 | "django.contrib.staticfiles", 81 | ] 82 | 83 | MIDDLEWARE = [ 84 | "django.middleware.security.SecurityMiddleware", 85 | "django.contrib.sessions.middleware.SessionMiddleware", 86 | "django.middleware.common.CommonMiddleware", 87 | "django.middleware.csrf.CsrfViewMiddleware", 88 | "django.contrib.auth.middleware.AuthenticationMiddleware", 89 | "django.contrib.messages.middleware.MessageMiddleware", 90 | "django.middleware.clickjacking.XFrameOptionsMiddleware", 91 | ] 92 | 93 | ROOT_URLCONF = "config.urls" 94 | 95 | TEMPLATES = [ 96 | { 97 | "BACKEND": "django.template.backends.django.DjangoTemplates", 98 | "DIRS": [os.path.join(BASE_DIR, "../../templates")], 99 | "APP_DIRS": True, 100 | "OPTIONS": { 101 | "context_processors": [ 102 | "django.template.context_processors.debug", 103 | "django.template.context_processors.request", 104 | "django.contrib.auth.context_processors.auth", 105 | "django.contrib.messages.context_processors.messages", 106 | ], 107 | }, 108 | }, 109 | ] 110 | 111 | WSGI_APPLICATION = "config.wsgi.application" 112 | 113 | 114 | # Password validation 115 | # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators 116 | 117 | AUTH_PASSWORD_VALIDATORS = [ 118 | { 119 | "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator" 120 | }, 121 | {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator"}, 122 | {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"}, 123 | {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator"}, 124 | ] 125 | 126 | 127 | REST_FRAMEWORK = { 128 | "DEFAULT_AUTHENTICATION_CLASSES": ( 129 | "rest_framework.authentication.TokenAuthentication", 130 | ), 131 | "DEFAULT_PERMISSION_CLASSES": ( 132 | "rest_framework.permissions.IsAuthenticatedOrReadOnly", 133 | ), 134 | } 135 | 136 | # Internationalization 137 | # https://docs.djangoproject.com/en/1.11/topics/i18n/ 138 | 139 | LANGUAGE_CODE = "en-us" 140 | 141 | TIME_ZONE = "UTC" 142 | 143 | USE_I18N = True 144 | 145 | USE_L10N = True 146 | 147 | USE_TZ = True 148 | 149 | 150 | # Static files (CSS, JavaScript, Images) 151 | # https://docs.djangoproject.com/en/1.11/howto/static-files/ 152 | 153 | STATIC_URL = "/static/" 154 | 155 | STATIC_ROOT = os.path.join(BASE_DIR, "apps/static") 156 | 157 | GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "") 158 | -------------------------------------------------------------------------------- /scheduler/src/biz/scheduler.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | import os 3 | import math 4 | import queue 5 | import json 6 | import asyncio 7 | from apscheduler.schedulers.blocking import BlockingScheduler 8 | from apscheduler.schedulers.background import BackgroundScheduler 9 | from .agency_api_service import AgencyApiService 10 | from .scrape_data import ScraperService 11 | 12 | 13 | class Scheduler: 14 | def __init__(self): 15 | self.queue_size = 0 16 | self.job_execution_counter = 0 17 | self.scraper_service = ScraperService() 18 | 19 | def read_settings(self): 20 | data = {} 21 | with open( 22 | os.path.dirname(os.path.abspath(__file__)) + "/job_config.json", "r" 23 | ) as f: 24 | data = json.load(f) 25 | self.agency_list_size = data["agency_batch_size"] 26 | self.job_trigger_settings = data["job_trigger_settings"] 27 | self.interval_between_runs_seconds = data["interval_between_runs_seconds"] 28 | # there is an option to pass all these variables at run time using environment variables 29 | if os.environ.get("day", None) is not None: 30 | self.read_settings_from_environment_variables() 31 | 32 | def read_settings_from_environment_variables(self): 33 | if os.environ.get("agency_batch_size", None) is not None: 34 | self.agency_list_size = int(os.environ.get("agency_batch_size")) 35 | else: 36 | self.agency_list_size = 4 37 | if os.environ.get("interval_between_runs_seconds", None) is not None: 38 | self.interval_between_runs_seconds = int( 39 | os.environ.get("interval_between_runs_seconds") 40 | ) 41 | else: 42 | self.interval_between_runs_seconds = 20 43 | if os.environ.get("day", None) is not None: 44 | print(os.environ.get("day")) 45 | print(os.environ.get("hour")) 46 | print(os.environ.get("minute")) 47 | print(os.environ.get("second")) 48 | self.job_trigger_settings["day_of_job"] = os.environ.get("day") 49 | else: 50 | raise Exception("day of job is not specified in the environment variable") 51 | if os.environ.get("hour", None) is not None: 52 | self.job_trigger_settings["hour"] = os.environ.get("hour") 53 | else: 54 | raise Exception("hour is not specified in the environment variable") 55 | if os.environ.get("minute", None) is not None: 56 | self.job_trigger_settings["minute"] = os.environ.get("minute") 57 | else: 58 | raise Exception("minute is not specified in the environment variable") 59 | if os.environ.get("second", None) is not None: 60 | self.job_trigger_settings["second"] = os.environ.get("second") 61 | else: 62 | raise Exception("second is not specified in the environment variable") 63 | 64 | def scheduled_method(self): 65 | print( 66 | f"Started scraping the agency info at {str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))}" 67 | ) 68 | agency_api_service = AgencyApiService() 69 | agency_list = agency_api_service._get() 70 | self.queue_size = math.ceil(len(agency_list) / self.agency_list_size) 71 | self.job_queue = queue.Queue(maxsize=self.queue_size) 72 | self.split_data_into_chunks(agency_list) 73 | self.scrape_scheduled_method() 74 | 75 | def scrape_scheduled_method(self): 76 | self.job_execution_counter = self.job_execution_counter + 1 77 | print( 78 | f"Executing the {self.job_execution_counter} job. {self.queue_size - self.job_execution_counter} to be executed at {str(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))}" 79 | ) 80 | if self.job_queue.empty() is False: 81 | agencies = self.job_queue.get() 82 | loop = asyncio.new_event_loop() 83 | asyncio.set_event_loop(loop) 84 | loop.run_until_complete(self.scraper_service.scrape_data(agencies)) 85 | scheduler = BlockingScheduler() 86 | scheduler.add_job( 87 | self.scrape_scheduled_method, 88 | next_run_time=datetime.now() 89 | + timedelta(seconds=self.interval_between_runs_seconds), 90 | ) 91 | scheduler.start() 92 | else: 93 | print( 94 | f"done with scraping at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" 95 | ) 96 | 97 | def reset_schedule_parameters(self): 98 | self.queue_size = 0 99 | self.job_queue = None 100 | print( 101 | f"Done Scraping the data for the agencies at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" 102 | ) 103 | 104 | def scrape_websites(self): 105 | scheduler = BackgroundScheduler() 106 | scheduler.add_job( 107 | self.scheduled_method, 108 | "cron", 109 | day_of_week=self.job_trigger_settings["day_of_job"], 110 | hour=self.job_trigger_settings["hour"], 111 | minute=self.job_trigger_settings["minute"], 112 | second=self.job_trigger_settings["second"], 113 | ) 114 | try: 115 | scheduler.start() 116 | except (KeyboardInterrupt, SystemExit): 117 | pass 118 | 119 | def split_data_into_chunks(self, agencies): 120 | for i in range(0, len(agencies), self.agency_list_size): 121 | self.job_queue.put(agencies[i : i + self.agency_list_size]) 122 | -------------------------------------------------------------------------------- /scrapers/scrapers/social_scraper.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | from .base_scraper import BaseScraper 4 | import re 5 | import logging 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class SocialScraper(BaseScraper): 11 | 12 | phone_regex = re.compile(r"((?:\d{3}|\(\d{3}\))?(?:\s|-|\.)?\d{3}(?:\s|-|\.)\d{4})") 13 | email_regex = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,3}") 14 | address_regex = re.compile( 15 | r"\s\d{1,5} [a-zA-Z0-9\s\.,]+[A-Z]{2}[\s\-\s]{0,3}[0-9]{5,6}?[,]*" 16 | ) 17 | 18 | def __init__(self, raw_page_content, url): 19 | self.raw_content = raw_page_content 20 | self.url = url 21 | 22 | def scrape_info(self): 23 | soup = BeautifulSoup(self.raw_content.content, "html.parser") 24 | social_media_criteria = [ 25 | "twitter.com", 26 | "facebook.com", 27 | "instagram.com", 28 | "youtube.com", 29 | "linkedin.com", 30 | ] 31 | a_tags = soup.findAll("a", href=True) 32 | social_media_links = [] 33 | contact_us_link = "" 34 | try: 35 | for tag in a_tags: 36 | try: 37 | href_link = tag.get("href", None) 38 | if href_link is not None: 39 | if "contact" in tag.text.lower(): 40 | contact_us_link = tag 41 | elif any(link in tag["href"] for link in social_media_criteria): 42 | social_media_links.append(tag["href"]) 43 | except Exception as ex: 44 | logging.error( 45 | ex, 46 | "An error occurred while trying to extract the social media information", 47 | ) 48 | if contact_us_link: 49 | if "http" in contact_us_link["href"]: 50 | logger.info( 51 | f"making an extra call to get the contact info: {contact_us_link['href']}" 52 | ) 53 | contact_us_page = requests.get(contact_us_link["href"]) 54 | else: 55 | logger.info( 56 | f"making an extra call to get the contact info: {self.url+contact_us_link['href']}" 57 | ) 58 | contact_us_page = requests.get(self.url + contact_us_link["href"]) 59 | contact_us_soup = BeautifulSoup(contact_us_page.content, "html.parser") 60 | contact_info = self.get_contact_info(contact_us_soup) 61 | else: 62 | logger.info("not making an extra call to get the contact info") 63 | contact_info = self.get_contact_info(soup) 64 | except Exception as ex: 65 | logging.error( 66 | ex, f"An error occurred while processing the social media information" 67 | ) 68 | 69 | return social_media_links, contact_info 70 | 71 | def get_contact_info(self, soup): 72 | try: 73 | contact_us_all_elements = soup.findAll() 74 | contact_us_str = "" 75 | emails = [] 76 | phone_numbers = [] 77 | address = [] 78 | for element in contact_us_all_elements: 79 | if "contact" in element.text.lower(): 80 | contact_us_str = element.text.replace("\n", " ") 81 | contact_us_str = re.sub("<[^<]+?>", "", contact_us_str) 82 | emails = ( 83 | SocialScraper.email_regex.findall(contact_us_str) 84 | if not emails 85 | else emails 86 | ) 87 | phone_numbers = ( 88 | SocialScraper.phone_regex.findall(contact_us_str) 89 | if not phone_numbers 90 | else phone_numbers 91 | ) 92 | address = ( 93 | SocialScraper.address_regex.findall(contact_us_str) 94 | if not address 95 | else address 96 | ) 97 | 98 | all_contact_info = {} 99 | if contact_us_str: 100 | all_contact_info = { 101 | "email": list(set(emails)), 102 | "phone_number": list(set(phone_numbers)), 103 | "address": list(set(address))[0] if address else [], 104 | } 105 | else: 106 | logger.warning("Contact Information not available") 107 | all_contact_info = {"email": [], "phone_number": [], "address": []} 108 | return all_contact_info 109 | except Exception as ex: 110 | logging.error( 111 | ex, 112 | "An error occurred while extracting the contact information for the firm {self.url}", 113 | ) 114 | return None 115 | 116 | def get_outreach_communication_info(self, social_media_info, contact_info): 117 | agency_info = { 118 | "social_media_access": self.get_socialmedia_access(social_media_info), 119 | "contact_access": self.get_contact_access(contact_info), 120 | } 121 | return agency_info 122 | 123 | def get_contact_access(self, contact_info): 124 | is_contact_info_available = False 125 | if ( 126 | contact_info 127 | and contact_info["phone_number"] 128 | or contact_info["email"] 129 | or contact_info["address"] 130 | ): 131 | is_contact_info_available = True 132 | return self.get_criteria_object(contact_info, is_contact_info_available) 133 | 134 | def get_socialmedia_access(self, social_media_info): 135 | is_criteria_met = ( 136 | True if social_media_info and len(social_media_info) > 0 else False 137 | ) 138 | return self.get_criteria_object(social_media_info, is_criteria_met) 139 | -------------------------------------------------------------------------------- /apps/civic_pulse/static/styles.css: -------------------------------------------------------------------------------- 1 | .primary-color { 2 | color: #2E2757; 3 | } 4 | 5 | .primary-background { 6 | background-color: #2E2757; 7 | } 8 | 9 | .secondary-color { 10 | color: #0F8EC5; 11 | } 12 | 13 | .secondary-background { 14 | background-color: #0F8EC5; 15 | } 16 | 17 | body { 18 | font: 400 15px Lato, sans-serif; 19 | line-height: 1.8; 20 | color: #818181; 21 | } 22 | 23 | h2 { 24 | font-size: 24px; 25 | text-transform: uppercase; 26 | color: #303030; 27 | font-weight: 600; 28 | margin-bottom: 30px; 29 | } 30 | h4 { 31 | font-size: 19px; 32 | line-height: 1.375em; 33 | color: #303030; 34 | font-weight: 400; 35 | margin-bottom: 30px; 36 | } 37 | 38 | .tagline { 39 | padding-top: 3vh; 40 | font-weight: 600; 41 | color: white; 42 | text-align: center; 43 | } 44 | 45 | .jumbotron { 46 | background-color: #ff0000; 47 | background-image: url('images/map_boston.png'); 48 | background-position: center center; 49 | background-size: cover; 50 | background-repeat: no-repeat; 51 | color: black; 52 | padding: 100px 25px; 53 | font-family: Montserrat, sans-serif; 54 | font-weight: bold !important; 55 | text-shadow: 1px 1px 5px white; 56 | } 57 | .container-fluid { 58 | padding: 60px 50px; 59 | } 60 | .bg-grey { 61 | background-color: #f6f6f6; 62 | } 63 | .logo-small { 64 | color: #f4511e; 65 | font-size: 50px; 66 | } 67 | .logo { 68 | color: #f4511e; 69 | font-size: 200px; 70 | } 71 | .thumbnail { 72 | padding: 0 0 15px 0; 73 | border: none; 74 | border-radius: 0; 75 | } 76 | .thumbnail img { 77 | width: 100%; 78 | height: 100%; 79 | margin-bottom: 10px; 80 | } 81 | .carousel-control.right, 82 | .carousel-control.left { 83 | background-image: none; 84 | color: #f4511e; 85 | } 86 | .carousel-indicators li { 87 | border-color: #f4511e; 88 | } 89 | .carousel-indicators li.active { 90 | background-color: #f4511e; 91 | } 92 | .item h4 { 93 | font-size: 19px; 94 | line-height: 1.375em; 95 | font-weight: 400; 96 | font-style: italic; 97 | margin: 70px 0; 98 | } 99 | .item span { 100 | font-style: normal; 101 | } 102 | .panel { 103 | border: 1px solid #f4511e; 104 | border-radius: 0 !important; 105 | transition: box-shadow 0.5s; 106 | } 107 | .panel:hover { 108 | box-shadow: 5px 0px 40px rgba(0, 0, 0, 0.2); 109 | } 110 | .panel-footer .btn:hover { 111 | border: 1px solid #f4511e; 112 | background-color: #fff !important; 113 | color: #f4511e; 114 | } 115 | .panel-heading { 116 | color: #fff !important; 117 | background-color: #e60000 !important; 118 | padding: 25px; 119 | border-bottom: 1px solid transparent; 120 | border-top-left-radius: 0px; 121 | border-top-right-radius: 0px; 122 | border-bottom-left-radius: 0px; 123 | border-bottom-right-radius: 0px; 124 | } 125 | .panel-footer { 126 | background-color: white !important; 127 | } 128 | .panel-footer h3 { 129 | font-size: 32px; 130 | } 131 | .panel-footer h4 { 132 | color: #aaa; 133 | font-size: 14px; 134 | } 135 | .panel-footer .btn { 136 | margin: 15px 0; 137 | background-color: #f4511e; 138 | color: #fff; 139 | } 140 | 141 | /* total height minus navbar */ 142 | .homebody { 143 | height: calc(100vh - 120px); 144 | background: url('images/civicSquare.png') no-repeat fixed; 145 | background-position: center center; 146 | -webkit-background-size: cover; 147 | -moz-background-size: cover; 148 | background-size: cover; 149 | -o-background-size: cover; 150 | 151 | } 152 | 153 | .navbar { 154 | height: 120px; 155 | margin-bottom: 0; 156 | /*background-color: #e60000; */ 157 | z-index: 9999; 158 | border: 0; 159 | font-size: 12px !important; 160 | line-height: 1.42857143 !important; 161 | letter-spacing: 4px; 162 | border-radius: 0; 163 | font-family: Montserrat, sans-serif; 164 | } 165 | .navbar li a, 166 | .navbar .navbar-brand { 167 | color: #fff !important; 168 | } 169 | .navbar-nav li a:hover, 170 | .navbar-nav li.active a { 171 | background-color: #ff0000 !important; 172 | } 173 | .navbar-default .navbar-toggle { 174 | border-color: transparent; 175 | color: #fff !important; 176 | } 177 | footer .glyphicon { 178 | font-size: 20px; 179 | margin-bottom: 20px; 180 | color: #f4511e; 181 | } 182 | .slideanim { 183 | visibility: hidden; 184 | } 185 | .slide { 186 | animation-name: slide; 187 | -webkit-animation-name: slide; 188 | animation-duration: 1s; 189 | -webkit-animation-duration: 1s; 190 | visibility: visible; 191 | } 192 | .score { 193 | display: block; 194 | font-size: 1.3em; 195 | font-style: oblique; 196 | } 197 | .text-green { 198 | color: #006f6f; 199 | } 200 | .text-red { 201 | color: #b90000; 202 | } 203 | @keyframes slide { 204 | 0% { 205 | opacity: 0; 206 | transform: translateY(70%); 207 | } 208 | 100% { 209 | opacity: 1; 210 | transform: translateY(0%); 211 | } 212 | } 213 | @-webkit-keyframes slide { 214 | 0% { 215 | opacity: 0; 216 | -webkit-transform: translateY(70%); 217 | } 218 | 100% { 219 | opacity: 1; 220 | -webkit-transform: translateY(0%); 221 | } 222 | } 223 | @media screen and (max-width: 768px) { 224 | .col-sm-4 { 225 | text-align: center; 226 | margin: 25px 0; 227 | } 228 | .btn-lg { 229 | width: 100%; 230 | margin-bottom: 35px; 231 | } 232 | } 233 | @media screen and (max-width: 480px) { 234 | .logo { 235 | font-size: 150px; 236 | } 237 | } 238 | 239 | .agency-list { 240 | display: inline-block; 241 | } 242 | 243 | .agency-list li { 244 | display: inline-block; 245 | width: 200px; 246 | border: solid 1px gainsboro; 247 | height: 100px; 248 | vertical-align: top; 249 | margin: 0.4em; 250 | border-radius: 6px; 251 | padding: 0.5em; 252 | transition: all 0.2s ease; 253 | cursor: pointer; 254 | user-select: none; 255 | box-shadow: 1px 1px 2px #0000000f; 256 | line-height: 1.2; 257 | } 258 | 259 | .agency-list a { 260 | color: #3a3535; 261 | text-decoration: none; 262 | } 263 | 264 | html, 265 | body { 266 | font-family: sans-serif; 267 | } 268 | 269 | .agency-list li:hover { 270 | transform: scale(1.1); 271 | background: #f9f9f9; 272 | } 273 | 274 | .fadein { 275 | animation: fadein 2s; 276 | opacity: 1; 277 | } 278 | 279 | @keyframes fadein { 280 | 0% { 281 | opacity: 0; 282 | } 283 | 100% { 284 | opacity: 1; 285 | } 286 | } 287 | 288 | .svg { 289 | height: 35%; 290 | margin: 0 auto; 291 | } 292 | 293 | .donut { 294 | stroke: #c4c4c4; 295 | stroke-width: 6; 296 | animation: donut2 3s; 297 | stroke-dasharray: 80, 20; 298 | filter:url(#shadow); 299 | } 300 | 301 | @keyframes donut2 { 302 | 0% { 303 | stroke-dasharray: 0, 100; 304 | } 305 | 100% { 306 | stroke-dasharray: 80, 20; 307 | } 308 | } 309 | 310 | .our-card { 311 | background-color: rgba(255,255,255,0.8); 312 | } 313 | 314 | .card-header { 315 | color: #2E2757; 316 | font-size: 1.4em; 317 | font-style: bold; 318 | } 319 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GovLens 2 | 3 | ![](https://github.com/codeforboston/GovLens/workflows/Lint%20and%20Test/badge.svg) 4 | 5 | ## About the project 6 | 7 | GovLens is a government transparency project developed by MuckRock and Code for Boston engineers. Our mission is to create a more open, accessible, and secure democracy through examining the technical elements of government agency websites. We use algorithms to score thousands of federal and state agencies based on their transparency, security, privacy, and accessibility. We then publish our findings and help communicate to government agencies possible improvements to their infrastructures that would better the agency as a whole. 8 | 9 | ![A screenshot of what a GovLens Scorecard looks like](README_images/scorecard.png ) 10 | 11 | ## Why? 12 | 13 | We get reminders all the time of how well our physical civic infrastructure is doing: Did my car hit a pothole? Are the swing sets covered in rust? It can be harder to see how well our digital civic infrastructure is holding up, however, particularly when it comes to the parts of the web that can be invisible to many people: How accessible is a site to people who rely on screen readers or who have reduced vision? Which third-party trackers have access to visitor data, and how is that data being guarded? Are government websites following basic best practices in utilizing secure connections? 14 | 15 | While we have a [National Bridge Inventory](https://www.fhwa.dot.gov/bridge/nbi.cfm) that monitors dangerous bridges and other federal agencies that monitor other core infrastructure issues, we do not have similar insights into how strong or weak much of our digital infrastructure is. 16 | 17 | GovLens helps to provide at least the start of an answer to that, by making those oftentimes overlooked aspects of digital infrastructure more visible via public report cards for each agency in our database as well as collated data for each jurisdiction and state, letting us see which areas of the country are leading the way and which might need a little more prodding. 18 | 19 | This is partially inspired by the work of Pulse.CIO.Gov, an official federal government website that monitored the adoption of HTTPS compliance among federal websites, as well as [SecureThe.News](https://securethe.news), which did the same thing for news websites. Both of these projects brought wider visibility to the issue and provided natural and effective peer pressure for website operators to improve. Our hope is we can do the same for local government, while also compiling a rich research data set for future analysis. 20 | 21 | ## Who is this site for? 22 | This site has three core planned audiences: 23 | 24 | * __The general public__, so that they’re better educated about the state of government digital infrastructure and why it matters. 25 | * __Government decision makers__, so that they can understand why they need to invest in better adhering to web standards as well as see where their sites stand compared to their peers. 26 | * __Local and national media outlets__, so as best to reach and influence the above categories. 27 | 28 | 29 | ## Getting started basics 30 | 31 | - [ ] Make sure [you've registered for the Code for Boston Slack](https://communityinviter.com/apps/cfb-public/code-for-boston-slack-invite). 32 | - [ ] Join the #MuckRock channel on Slack. 33 | - [ ] Ask a current member to be added to our Github organization ([They'll need to click here](https://github.com/codeforboston/GovLens/settings/collaboration)). After they've sent you an invite, you'll need to either check your email or notifications in Github (the alarm icon on the top right of your Github page) to accept the invite. 34 | - [ ] If you're interested in working on the backend of the site, [try following the instructions](#installation-instructions) 35 | 36 | ## Project goals 37 | 38 | The goal is to create an automatically updated database that tracks, over time, how well government agencies websites at the state, local, and federal levels follow best practices when it comes to HTTPS security, mobile friendliness, reader accessibility, and other key areas. 39 | 40 | Over time, we hope to show whether both individual agencies are improving or worsening, as well as help highlight national shifts along the metrics we monitor. Individual pages show the most recent snapshot ranking, but our API will make historical data available. 41 | 42 | ## Current status 43 | 44 | The project is currently in testing stages, as we work to both develop usable, accurate data and build a pipeline for regularly populating it. The site currently can run locally, but several of the data categories are filled with randomized testing data and any report cards generated are for **demonstration purposes only**. These scores do not represent actual scores for agencies. 45 | 46 | ## Installation instructions 47 | 48 | Install python3 if you haven't installed it yet. 49 | ```bash 50 | python3 --version 51 | ``` 52 | If you do not see a version you will need to visit [Python](https://www.python.org/downloads/) or google how to install it for your operating system. You want python3 as well as pip3. 53 | 54 | 55 | Create a developer account on Github if you don't have one: [Github](https://github.com/) 56 | 57 | Fork the repository on Github, see: [Fork a Repo](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) 58 | 59 | Clone your forked repository from the command line (this will create a GovLens directory): 60 | ```bash 61 | git clone https://github.com/--your-github-name--/GovLens.git 62 | ``` 63 | 64 | Navigate to the base directory of the reposistory and prepare to install depedencies. 65 | 66 | To start, it is recommend to create a 67 | [virtual environment](https://virtualenv.pypa.io/en/stable/userguide/). If you have not 68 | used `virtualenv` before, install it with: `pip3 install virtualenv`. 69 | 70 | ```bash 71 | # Create a virtual environment to manage dependencies 72 | virtualenv venv 73 | source venv/bin/activate 74 | ``` 75 | 76 | Now install the dependencies with pip: 77 | 78 | ```bash 79 | # Install requirements.txt 80 | pip3 install -r requirements.txt 81 | ``` 82 | 83 | After the dependencies have installed, we want to prepare the database. 84 | 85 | ```bash 86 | # Perform data migrations 87 | python3 manage.py migrate 88 | ``` 89 | 90 | Then, we need to import a CSV file containing existing agency information. Start by 91 | running a Django shell: 92 | 93 | ```bash 94 | python3 manage.py shell 95 | 96 | # From within the shell 97 | >>> from apps.civic_pulse.utils.load_models import * 98 | >>> fill_agency_objects() 99 | >>> exit() 100 | ``` 101 | 102 | The following steps are needed in order to connect the api with the scrapers. If you do not wish to do that, then this may be skipped. We need to create a dummy user for the scraper to be able to access the api. The api is part of the Django projet. 103 | Note: The scrapers live in an independent environment not neccessarily in the same server as the Django website. The scrapers read and write data to the website using api endpoints. 104 | 105 | - create an admin user to be able to login to the admin portal of the website: /admin 106 | 107 | ```bash 108 | python3 manage.py createsuperuser --username admin --email admin@admin.com 109 | 110 | # enter the password when prompted. It can be any password that you wish to use. 111 | # It is used for login to the admin website. 112 | ``` 113 | - Start up the webserver 114 | ```bash 115 | python3 manage.py runserver 116 | ``` 117 | Navigate in your browser to `http://127.0.0.1:8000/admin`. Log in with the new admin user you just created. Click on Agencys and you should see a list of 118 | agencies created with the ``fill_agency_objects`` command. 119 | 120 | To setup the scraper, read [the scraper README](scrapers/README.rst). 121 | 122 | ## Code formatting 123 | GovLens enforces code style using [Black](https://github.com/psf/black) and pep8 rules using [Flake8](http://flake8.pycqa.org/en/latest/). 124 | To set up automatic code formatting for black standards, perform the following steps: 125 | - `pip install -U black pre-commit` 126 | - `pre-commit install` 127 | 128 | To manually run Flake8 from project root: 129 | - `pip install -U flake8` 130 | - `flake8 . --ignore E501,W503,E203` 131 | -------------------------------------------------------------------------------- /apps/civic_pulse/tests/test_api.py: -------------------------------------------------------------------------------- 1 | import json 2 | from django.test import TestCase 3 | from rest_framework.authtoken.models import Token 4 | from rest_framework.test import APIClient 5 | from django.contrib.auth.models import User 6 | from apps.civic_pulse.models import Agency, Entry 7 | 8 | 9 | class AgencyAPITest(TestCase): 10 | def setUp(self): 11 | Agency.objects.create(name="Test Agency 1") 12 | Agency.objects.create(name="Test Agency 2") 13 | self.client = APIClient() 14 | 15 | def test_GET(self): 16 | response = self.client.get("/api/agencies/") 17 | self.assertEqual(200, response.status_code) 18 | 19 | agencies_json = json.loads(response.content.decode("utf-8")) 20 | expected_results = [ 21 | { 22 | "id": 1, 23 | "name": "Test Agency 1", 24 | "website": "", 25 | "twitter": "", 26 | "facebook": "", 27 | "phone_number": "", 28 | "address": "", 29 | "description": "", 30 | "notes": "", 31 | "last_successful_scrape": None, 32 | "scrape_counter": 0, 33 | }, 34 | { 35 | "id": 2, 36 | "name": "Test Agency 2", 37 | "website": "", 38 | "twitter": "", 39 | "facebook": "", 40 | "phone_number": "", 41 | "address": "", 42 | "description": "", 43 | "notes": "", 44 | "last_successful_scrape": None, 45 | "scrape_counter": 0, 46 | }, 47 | ] 48 | self.assertEqual(agencies_json, expected_results) 49 | 50 | def test_GET_Individual(self): 51 | response = self.client.get("/api/agencies/1/") 52 | self.assertEqual(200, response.status_code) 53 | 54 | agency_json = json.loads(response.content.decode("utf-8")) 55 | expected_results = { 56 | "id": 1, 57 | "name": "Test Agency 1", 58 | "website": "", 59 | "twitter": "", 60 | "facebook": "", 61 | "phone_number": "", 62 | "address": "", 63 | "description": "", 64 | "notes": "", 65 | "last_successful_scrape": None, 66 | "scrape_counter": 0, 67 | } 68 | self.assertEqual(agency_json, expected_results) 69 | 70 | def test_POST_Unauthorized(self): 71 | data = {"name": "Test POST Agency"} 72 | response = self.client.post("/api/agencies/", data=data, format="json") 73 | self.assertEqual(401, response.status_code) 74 | 75 | json_response = json.loads(response.content.decode("utf-8")) 76 | self.assertEqual( 77 | "Authentication credentials were not provided.", json_response["detail"] 78 | ) 79 | 80 | def test_POST_Authorized(self): 81 | user = User.objects.create_user( 82 | username="test", email="test@test.test", password="test" 83 | ) 84 | token = Token.objects.create(user=user) 85 | 86 | data = {"id": 5, "name": "Test POST Agency"} 87 | 88 | self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key) 89 | response = self.client.post("/api/agencies/", data=data, format="json") 90 | self.assertEqual(201, response.status_code) 91 | 92 | json_response = json.loads(response.content.decode("utf-8")) 93 | expected_results = { 94 | "id": 5, 95 | "name": "Test POST Agency", 96 | "website": "", 97 | "twitter": "", 98 | "facebook": "", 99 | "phone_number": "", 100 | "address": "", 101 | "description": "", 102 | "notes": "", 103 | "last_successful_scrape": None, 104 | "scrape_counter": 0, 105 | } 106 | 107 | self.assertEqual(json_response, expected_results) 108 | 109 | 110 | class EntryAPITest(TestCase): 111 | def setUp(self): 112 | self.agency = Agency.objects.create(name="Test Agency 1", id=1) 113 | Entry.objects.create(agency_id=self.agency.id,) 114 | Entry.objects.create( 115 | agency_id=self.agency.id, https_enabled=True, 116 | ) 117 | 118 | self.client = APIClient() 119 | 120 | def test_GET(self): 121 | response = self.client.get("/api/entries/") 122 | self.assertEqual(200, response.status_code) 123 | 124 | entries_json = json.loads(response.content.decode("utf-8")) 125 | expected_results = [ 126 | { 127 | "id": 1, 128 | "agency": 1, 129 | "https_enabled": False, 130 | "has_privacy_policy": False, 131 | "mobile_friendly": False, 132 | "good_performance": False, 133 | "has_social_media": False, 134 | "has_contact_info": False, 135 | "notes": "", 136 | }, 137 | { 138 | "id": 2, 139 | "agency": 1, 140 | "https_enabled": True, 141 | "has_privacy_policy": False, 142 | "mobile_friendly": False, 143 | "good_performance": False, 144 | "has_social_media": False, 145 | "has_contact_info": False, 146 | "notes": "", 147 | }, 148 | ] 149 | 150 | self.assertEqual(entries_json, expected_results) 151 | 152 | def test_GET_Individual(self): 153 | response = self.client.get("/api/entries/1/") 154 | self.assertEqual(200, response.status_code) 155 | 156 | entry_json = json.loads(response.content.decode("utf-8")) 157 | expected_results = { 158 | "id": 1, 159 | "agency": 1, 160 | "https_enabled": False, 161 | "has_privacy_policy": False, 162 | "mobile_friendly": False, 163 | "good_performance": False, 164 | "has_social_media": False, 165 | "has_contact_info": False, 166 | "notes": "", 167 | } 168 | 169 | self.assertEqual(entry_json, expected_results) 170 | 171 | def test_POST_Unauthorized(self): 172 | data = { 173 | "agency": 1, 174 | "https_enabled": True, 175 | "has_privacy_policy": False, 176 | "mobile_friendly": False, 177 | "good_performance": False, 178 | "has_social_media": True, 179 | "has_contact_info": False, 180 | "notes": "", 181 | } 182 | response = self.client.post("/api/entries/", data=data, format="json") 183 | self.assertEqual(401, response.status_code) 184 | 185 | json_response = json.loads(response.content.decode("utf-8")) 186 | self.assertEqual( 187 | "Authentication credentials were not provided.", json_response["detail"] 188 | ) 189 | 190 | def test_POST_Authorized(self): 191 | user = User.objects.create_user( 192 | username="test", email="test@test.test", password="test" 193 | ) 194 | token = Token.objects.create(user=user) 195 | 196 | data = { 197 | "agency": 1, 198 | "https_enabled": True, 199 | "has_privacy_policy": False, 200 | "mobile_friendly": False, 201 | "good_performance": False, 202 | "has_social_media": True, 203 | "has_contact_info": False, 204 | "notes": "", 205 | } 206 | 207 | self.client.credentials(HTTP_AUTHORIZATION="Token " + token.key) 208 | response = self.client.post("/api/entries/", data=data, format="json") 209 | self.assertEqual(201, response.status_code) 210 | 211 | json_response = json.loads(response.content.decode("utf-8")) 212 | expected_results = { 213 | "id": 3, 214 | "agency": 1, 215 | "https_enabled": True, 216 | "has_privacy_policy": False, 217 | "mobile_friendly": False, 218 | "good_performance": False, 219 | "has_social_media": True, 220 | "has_contact_info": False, 221 | "notes": "", 222 | } 223 | self.assertEqual(json_response, expected_results) 224 | -------------------------------------------------------------------------------- /apps/civic_pulse/templates/agency-detail.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | {% load static %} 3 | 4 | {% block content %} 5 |
6 |

{{ agency }}

7 |
8 | 9 | 10 |
11 |
12 |
13 |

About {{ agency }}

14 |

Insert agency description

15 |
Visit website 16 |
17 |
18 |
19 | 20 |
21 |
22 |
23 |
24 | 25 |
26 |
27 |

7/10 {{ agency }} ranks higher than 75% of agencies on our security and privacy criteria. See why.


28 |
29 |
30 |
31 |
32 | 33 |
34 |
35 |

5/10 {{ agency }} ranks lower than 53% of agencies on our accessibility criteria. See why.


36 |
37 |
38 |
39 |
40 | 41 |
42 |
43 |

7/10 {{ agency }} ranks higher than 79% of agencies on our communication criteria. See why.


44 |
45 |
46 |
47 |
48 | 49 | 50 | 51 | 52 | 53 |
54 |
55 |

We test each agency once a week. We last checked {{ agency.website }} {{ last_entry.created_date | timesince }} ago.

56 | Read more about our methodology. 57 |
58 |
59 |
60 |
61 |
62 |

Security & Privacy

63 |
64 |
65 |
66 |
67 | {% include 'check_box.html' with has_feature=last_entry.https_enabled %} 68 |
69 |
70 |

Uses HTTPS


71 |
72 |
73 |
74 |
75 | {% include 'check_box.html' with has_feature=True %} 76 |
77 |
78 |

No third party trackers


79 |
80 |
81 |
82 |
83 | {% include 'check_box.html' with has_feature=last_entry.has_privacy_policy %} 84 |
85 |
86 |

Has a privacy policy


87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |

Website Accessibility

96 |
97 |
98 |
99 |
100 | {% include 'check_box.html' with has_feature=last_entry.mobile_friendly %} 101 |
102 |
103 |

Is mobile friendly


104 |
105 |
106 |
107 |
108 | {% include 'check_box.html' with has_feature=True %} 109 |
110 |
111 |

Passes Google's accessibility test


112 |
113 |
114 |
115 |
116 | {% include 'check_box.html' with has_feature=last_entry.good_performance %} 117 |
118 |
119 |

Passes Google's speed test


120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |

Communication

129 |
130 |
131 |
132 |
133 | {% include 'check_box.html' with has_feature=last_entry.has_social_media %} 134 |
135 |
136 |

Uses social media


137 |
138 |
139 |
140 |
141 | {% include 'check_box.html' with has_feature=last_entry.has_contact_info %} 142 |
143 |
144 |

Multiple ways to contact


145 |
146 |
147 |
148 |
149 | {% include 'check_box.html' with has_feature=True %} 150 |
151 |
152 |

Posts meetings and minutes


153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 | 161 | 162 |
163 |

CONTACT

164 |
165 |
166 |

Cambridge, MA

167 |

867-5309

168 |

GitHub

169 |
170 |
171 |
172 |
173 | 174 |
175 |
176 | 177 |
178 |
179 |
180 |
181 |
182 | 183 |
184 |
185 |
186 |
187 |
188 | 189 | 190 | 191 |
192 |
193 | 194 | 230 | 231 | {% endblock content %} 232 | --------------------------------------------------------------------------------