├── .coveragerc ├── .github ├── CONTRIBUTING.md └── workflows │ └── continuous-deployment.yaml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── calaccess_raw ├── __init__.py ├── admin │ ├── __init__.py │ ├── base.py │ ├── campaign.py │ ├── common.py │ ├── inactive.py │ └── lobbying.py ├── annotations │ ├── __init__.py │ ├── choices.py │ ├── documents.py │ ├── forms.csv │ ├── forms.py │ └── sections.csv ├── apps.py ├── fields.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── cleancalaccessrawfile.py │ │ ├── downloadcalaccessrawdata.py │ │ ├── extractcalaccessrawfiles.py │ │ ├── loadcalaccessrawfile.py │ │ └── updatecalaccessrawdata.py ├── managers.py ├── migrations │ ├── 0001_squashed_0014_auto_20160801_2039.py │ ├── 0002_auto_20160802_2101.py │ ├── 0003_auto_20160804_1443.py │ ├── 0004_auto_20160804_1758.py │ ├── 0005_auto_20160826_1449.py │ ├── 0006_auto_20160826_2039.py │ ├── 0007_auto_20160831_0132.py │ ├── 0008_auto_20161025_1527.py │ ├── 0009_auto_20161026_1641.py │ ├── 0010_auto_20161026_1643.py │ ├── 0011_auto_20161028_0129.py │ ├── 0012_auto_20161123_2217.py │ ├── 0013_auto_20161123_2219.py │ ├── 0014_auto_20170421_1821.py │ ├── 0015_auto_20170729_0218.py │ ├── 0016_auto_20200818_0542.py │ ├── 0017_auto_20210422_0047.py │ ├── 0018_auto_20210426_2015.py │ ├── 0019_alter_rawdataversion_download_zip_archive.py │ ├── 0020_auto_20210614_1556.py │ ├── 0021_auto_20210614_1604.py │ ├── 0022_auto_20210614_1627.py │ ├── 0023_alter_rawdataversion_clean_zip_archive.py │ ├── 0024_auto_20210822_2158.py │ ├── 0025_auto_20220725_1711.py │ ├── 0026_auto_20220909_2321.py │ ├── 0027_alter_cvre530cd_other_desc.py │ ├── 0028_remove_cvrf470cd_cand_adr1.py │ ├── 0029_alter_cvrf470cd_date_1000.py │ └── __init__.py ├── models │ ├── __init__.py │ ├── base.py │ ├── campaign.py │ ├── common.py │ ├── inactive.py │ └── lobbying.py └── tests │ ├── __init__.py │ ├── test_admins.py │ ├── test_annotations.py │ ├── test_commands.py │ ├── test_docs.py │ ├── test_fields.py │ ├── test_models.py │ └── test_utilities.py ├── docs ├── Makefile ├── _static │ ├── application-layers.png │ ├── ccdc-logo.png │ ├── cir-logo.png │ ├── css │ │ └── custom.css │ ├── latimes-logo.gif │ ├── los-angeles-times-logo.png │ ├── opennews-logo.png │ └── stanford-logo.png ├── calaccess_raw_files_report.csv ├── changelog.rst ├── conf.py ├── faq.rst ├── filingforms.rst ├── howtocontribute.rst ├── howtouseit.rst ├── index.rst ├── make.bat ├── managementcommands.rst ├── models.rst ├── officialdocumentation.rst ├── releasechecklist.rst └── tracking.rst ├── example ├── .documentcloud_metadata │ ├── 2711614-CalAccessTablesWeb.json │ ├── 2711614.json │ ├── 2711616.json │ ├── 2712033.json │ └── 2712034.json ├── __init__.py ├── manage.py ├── network-analysis │ └── contributors.csv ├── settings.py ├── settings_local.py.template ├── settings_test_multi_dbs.py.template ├── settings_test_postgresql.py.template ├── test-data │ ├── dbwebexport.zip │ └── raw.zip ├── toolbox │ ├── __init__.py │ ├── management │ │ ├── __init__.py │ │ └── commands │ │ │ ├── __init__.py │ │ │ ├── analyzecoderush.py │ │ │ ├── analyzefielddocs.py │ │ │ ├── analyzeuniquekeys.py │ │ │ ├── createchoicefieldissues.py │ │ │ ├── createfielddocissues.py │ │ │ ├── createuniquekeyissues.py │ │ │ ├── createverboseandhelptextissues.py │ │ │ ├── reprocesscalaccessrawdata.py │ │ │ ├── samplecalaccessrawdata.py │ │ │ └── verifycalaccesschoicefields.py │ └── templates │ │ └── toolbox │ │ ├── createchoicefieldissue.md │ │ ├── createverboseandhelptextissues.md │ │ ├── filingforms.rst │ │ └── models.rst ├── urls.py └── wsgi.py ├── manage.py ├── setup.cfg └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | include = 3 | calaccess_raw/*.py 4 | 5 | omit = 6 | calaccess_raw/tests/*.py 7 | calaccess_raw/migrations/*.py 8 | 9 | [report] 10 | exclude_lines = 11 | pragma: no cover 12 | add_introspection_rules 13 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | We welcome issues and pull requests for all parties. If you're looking for a place to plug in, 4 | work through our [getting started guide of new contributors](http://django-calaccess.californiacivicdata.org/en/latest/howtocontribute.html) 5 | and then check out [our open issues list](https://github.com/california-civic-data-coalition/django-calaccess-raw-data/issues) to see if anything interests you. If this repository isn't your cup of tea, check out our other projects [on GitHub](https://github.com/california-civic-data-coalition/). 6 | 7 | ## How do the tests work? 8 | 9 | Our code is tested using [Django's built-in unittesting](https://docs.djangoproject.com/en/1.9/topics/testing/) system via the [TravisCI](https://travis-ci.org/california-civic-data-coalition/django-calaccess-raw-data) 10 | continuous integration service. 11 | 12 | In addition, prior to the Django unittests code is evaluated using Python's 13 | [pep8](https://pypi.python.org/pypi/pep8) and [pyflakes](https://pypi.python.org/pypi/pyflakes) style 14 | guide enforcement tools. 15 | 16 | When a commit or pull request is made with our repository, those tests are rerun with the latest code. 17 | We try not to be too uptight, but we generally expect the tests to be pass before we will merge a request. 18 | 19 | ## How can I contribute documentation? 20 | 21 | The [documentation](http://django-calaccess.californiacivicdata.org/) for this 22 | project is published online by ReadTheDocs using the files found in [a dedicated repository](https://github.com/california-civic-data-coalition/django-calaccess-technical-documentation). 23 | 24 | Those files are compiled using Python's [Sphinx](http://www.sphinx-doc.org/en/stable/) documentation framework, which is written in [reStructuredText](http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html) format. 25 | 26 | To make edits to the documentation, change the ``.rst`` files found in that directory, commit 27 | your changes and submit them as a pull request. 28 | 29 | ## Who is in charge of the repository? 30 | 31 | Maintaining the code and handling outside pull requests is managed by the leadership 32 | team at the California Civic Data Coalition. 33 | 34 | ## What is the California Civic Data Coalition? 35 | 36 | The California Civic Data Coalition is a loosely coupled team of journalists from the Los Angeles Times Data Desk, the Washington Post, The Center for Investigative Reporting and Stanford's Computational Journalism Lab. 37 | 38 | The coalition was formed in 2014 by Ben Welsh and Agustin Armendariz to lead the development of open-source software that makes California's public data easier to access and analyze. The effort has drawn hundreds of contributions from developers and journalists at dozens of competing news outlets. 39 | 40 | Its primary focus is refining CAL-ACCESS, the jumbled, dirty and difficult government database that tracks campaign finance and lobbying activity in California politics. 41 | 42 | In 2015 the coalition was named a winner of the Knight News Challenge and awarded $250,000 in philanthropic funding from the Knight Foundation, the Democracy Fund, the William and Flora Hewlett Foundation and the Rita Allen Foundation. 43 | 44 | Read more at [californiacivicdata.org](http://www.californiacivicdata.org) 45 | -------------------------------------------------------------------------------- /.github/workflows/continuous-deployment.yaml: -------------------------------------------------------------------------------- 1 | name: Testing and distribution 2 | on: 3 | push: 4 | pull_request: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | lint-python: 9 | name: Lint Python code 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v4 14 | 15 | - name: Install pipenv 16 | run: pipx install pipenv 17 | 18 | - uses: actions/setup-python@v4 19 | with: 20 | python-version: '3.9' 21 | cache: 'pipenv' 22 | 23 | - id: pipenv-install 24 | name: Install Python dependencies 25 | run: pipenv install --dev --python `which python` 26 | 27 | - id: lint 28 | name: Lint 29 | run: pipenv run flake8 ./calaccess_raw 30 | 31 | test-python: 32 | name: "Test Python code" 33 | runs-on: ubuntu-latest 34 | strategy: 35 | matrix: 36 | settings: ['postgresql', 'multi_dbs'] 37 | python: ['3.8', '3.9', '3.10'] 38 | services: 39 | postgres: 40 | image: postgres 41 | env: 42 | POSTGRES_PASSWORD: postgres 43 | POSTGRES_DB: calaccess_raw 44 | POSTGRES_USER: postgres 45 | options: >- 46 | --health-cmd pg_isready 47 | --health-interval 10s 48 | --health-timeout 5s 49 | --health-retries 5 50 | ports: 51 | - 5432:5432 52 | steps: 53 | - name: Checkout the repo 54 | uses: actions/checkout@v2 55 | 56 | - name: Install pipenv 57 | run: pipx install pipenv 58 | 59 | - uses: actions/setup-python@v2 60 | with: 61 | python-version: ${{ matrix.python }} 62 | cache: 'pipenv' 63 | 64 | - id: pipenv-install 65 | name: Install Python dependencies 66 | run: pipenv install --dev --python `which python` --skip-lock 67 | 68 | - name: Test 69 | run: | 70 | cp example/settings_test_${{ matrix.settings }}.py.template example/settings_local.py 71 | pipenv run coverage run example/manage.py test calaccess_raw 72 | pipenv run coverage report -m 73 | env: 74 | IA_STORAGE_ACCESS_KEY: ${{ secrets.IA_STORAGE_ACCESS_KEY }} 75 | IA_STORAGE_SECRET_KEY: ${{ secrets.IA_STORAGE_SECRET_KEY }} 76 | 77 | test-build: 78 | name: Build Python package 79 | runs-on: ubuntu-latest 80 | needs: [test-python] 81 | steps: 82 | - name: Checkout 83 | uses: actions/checkout@v3 84 | 85 | - name: Install pipenv 86 | run: pipx install pipenv 87 | 88 | - uses: actions/setup-python@v4 89 | with: 90 | python-version: '3.9' 91 | cache: 'pipenv' 92 | 93 | - id: pipenv-install 94 | name: Install Python dependencies 95 | run: pipenv install --dev --python `which python` 96 | 97 | - id: build 98 | name: Build release 99 | run: | 100 | pipenv run python setup.py sdist 101 | pipenv run python setup.py bdist_wheel 102 | shell: bash 103 | 104 | - id: check 105 | name: Check release 106 | run: pipenv run twine check dist/* 107 | shell: bash 108 | 109 | - id: save 110 | name: Save artifact 111 | uses: actions/upload-artifact@v2 112 | with: 113 | name: release 114 | path: ./dist 115 | if-no-files-found: error 116 | 117 | tag-release: 118 | name: Tagged PyPI release 119 | runs-on: ubuntu-latest 120 | needs: [test-build] 121 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 122 | steps: 123 | - uses: actions/setup-python@v3 124 | with: 125 | python-version: '3.9' 126 | 127 | - id: fetch 128 | name: Fetch artifact 129 | uses: actions/download-artifact@v3 130 | with: 131 | name: release 132 | path: ./dist 133 | 134 | - id: publish 135 | name: Publish release 136 | uses: pypa/gh-action-pypi-publish@release/v1 137 | with: 138 | user: __token__ 139 | password: ${{ secrets.PYPI_API_TOKEN }} 140 | verbose: true 141 | verify_metadata: false 142 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | debug.log 3 | *.csv# 4 | *.egg/ 5 | *.egg-info* 6 | .eggs/ 7 | inspect.py 8 | speed.txt 9 | *.pyc 10 | *.swp 11 | .DS_Store 12 | build 13 | django_calaccess_parser.egg-info 14 | reference 15 | dist 16 | data 17 | *.swo 18 | docs/_build 19 | .coverage 20 | .tox 21 | example/data/ 22 | example/test-data/download/ 23 | example/test-data/csv/ 24 | example/test-data/tsv/ 25 | example/test-data/log/ 26 | example/test-data/clean.zip 27 | example/test-data/.media/ 28 | example/.media/* 29 | example/.static/ 30 | example/routers.py 31 | example/settings_local.py 32 | *dbwebexport_sample/ 33 | jupyter_notebooks 34 | example/.documentcloud_metadata/2753585.json 35 | django.log 36 | .python-version 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 California Civic Data Coalition 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include .github/CONTRIBUTING.md 4 | recursive-include calaccess_raw/templates * 5 | include calaccess_raw/annotations/*.csv 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: bootstrap docs load rs sh ship test 2 | 3 | bootstrap: 4 | pipenv run python example/manage.py makemigrations calaccess_raw 5 | pipenv run python example/manage.py migrate 6 | pipenv run python example/manage.py update 7 | pipenv run python example/manage.py collectstatic --noinput 8 | pipenv run python example/manage.py runserver 9 | 10 | docs: 11 | pipenv run python example/manage.py createcalaccessrawmodeldocs 12 | cd docs && make livehtml 13 | 14 | load: 15 | pipenv run python example/manage.py updatecalaccessrawdata --skip-download --skip-clean --keep-files 16 | 17 | testload: 18 | dropdb calaccess_raw 19 | createdb calaccess_raw 20 | pipenv run python example/manage.py migrate 21 | pipenv run python example/manage.py test calaccess_raw; 22 | 23 | rs: 24 | pipenv run python example/manage.py runserver 25 | 26 | sh: 27 | pipenv run python example/manage.py shell 28 | 29 | ship: 30 | rm -rf build/ 31 | pipenv run python setup.py sdist bdist_wheel 32 | pipenv run twine upload dist/* --skip-existing 33 | 34 | test: 35 | pipenv run flake8 calaccess_raw 36 | pipenv run coverage run example/manage.py test calaccess_raw 37 | pipenv run coverage report -m 38 | 39 | testdocs: 40 | pipenv run python example/manage.py test calaccess_raw.tests.test_docs.DocumentationTestCase 41 | 42 | testutils: 43 | pipenv run coverage run example/manage.py test calaccess_raw.tests.test_utilities.UtilityTestCase 44 | pipenv run coverage report -m 45 | 46 | testannotations: 47 | pipenv run coverage run example/manage.py test calaccess_raw.tests.test_annotations.AnnotationTestCase 48 | pipenv run coverage report -m 49 | 50 | testfields: 51 | pipenv run coverage run example/manage.py test calaccess_raw.tests.test_fields.FieldTestCase 52 | pipenv run coverage report -m 53 | 54 | testcommands: 55 | pipenv run coverage run example/manage.py test calaccess_raw.tests.test_commands.CommandTestCase 56 | pipenv run coverage report -m 57 | 58 | testadmins: 59 | pipenv run coverage run example/manage.py test calaccess_raw.tests.test_admins.AdminTestCase 60 | pipenv run coverage report -m 61 | 62 | testmodels: 63 | pipenv run coverage run example/manage.py test calaccess_raw.tests.test_models.ModelTestCase 64 | pipenv run coverage report -m 65 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.python.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | Django = "==4.1.*" 8 | django-postgres-copy = ">=2.6.*" 9 | "psycopg2-binary" = "*" 10 | pytz = "*" 11 | requests = "*" 12 | csvkit = "*" 13 | 14 | [dev-packages] 15 | Sphinx = "*" 16 | sphinx_rtd_theme = "*" 17 | sphinx-autobuild = "*" 18 | subsample = "*" 19 | pydocstyle = "*" 20 | "flake8" = "*" 21 | "flake8-docstrings" = "*" 22 | coverage = "*" 23 | requests-mock = "*" 24 | twine = "*" 25 | setuptools-scm = "*" 26 | django-internetarchive-storage = "*" 27 | black = "*" 28 | 29 | [requires] 30 | python_version = "3.9" 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A Django app to download, extract and load campaign finance and lobbying activity data from the California Secretary of State's [CAL-ACCESS](http://www.sos.ca.gov/prd/cal-access/) database 2 | 3 | - Documentation: [django-calaccess.californiacivicdata.org](https://django-calaccess.californiacivicdata.org/en/latest/) 4 | - Issues: [github.com/california-civic-data-coalition/django-calaccess-raw-data/issues](https://github.com/california-civic-data-coalition/django-calaccess-raw-data/issues) 5 | - Packaging: [pypi.python.org/pypi/django-calaccess-raw-data](https://pypi.python.org/pypi/django-calaccess-raw-data) 6 | - Testing: [github.com/california-civic-data-coalition/django-calaccess-raw-data/actions/](https://github.com/california-civic-data-coalition/django-calaccess-raw-data/actions/workflows/tests.yaml) 7 | -------------------------------------------------------------------------------- /calaccess_raw/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A hodgepodge of utilities related to the app's settings and configuration. 3 | """ 4 | import os 5 | from django.conf import settings 6 | 7 | default_app_config = "calaccess_raw.apps.CalAccessRawConfig" 8 | 9 | 10 | def get_data_directory(): 11 | """ 12 | Returns download directory for data storage downloaded data. 13 | """ 14 | if getattr(settings, "CALACCESS_DATA_DIR", None): 15 | return getattr(settings, "CALACCESS_DATA_DIR") 16 | elif getattr(settings, "BASE_DIR", None): 17 | return os.path.join(getattr(settings, "BASE_DIR"), "data") 18 | raise ValueError( 19 | "CAL-ACCESS download directory not configured. Set either \ 20 | CALACCESS_DATA_DIR or BASE_DIR in settings.py" 21 | ) 22 | 23 | 24 | def get_model_list(): 25 | """ 26 | Returns a model list with all the data tables in this application. 27 | """ 28 | from django.apps import apps 29 | 30 | model_list = apps.get_app_config("calaccess_raw").models.values() 31 | return [m for m in model_list if "CalAccessBaseModel" in str(m.__base__)] 32 | -------------------------------------------------------------------------------- /calaccess_raw/admin/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Import all of the admins from submodules and thread them together. 5 | """ 6 | from calaccess_raw.admin.base import BaseAdmin 7 | from calaccess_raw.admin.campaign import ( 8 | CvrSoCdAdmin, 9 | Cvr2SoCdAdmin, 10 | CvrCampaignDisclosureCdAdmin, 11 | Cvr2CampaignDisclosureCdAdmin, 12 | RcptCdAdmin, 13 | Cvr3VerificationInfoCdAdmin, 14 | LoanCdAdmin, 15 | S401CdAdmin, 16 | ExpnCdAdmin, 17 | F495P2CdAdmin, 18 | DebtCdAdmin, 19 | S496CdAdmin, 20 | S497CdAdmin, 21 | F501502CdAdmin, 22 | S498CdAdmin, 23 | ) 24 | from calaccess_raw.admin.inactive import ( 25 | BallotMeasuresCdAdmin, 26 | CvrF470CdAdmin, 27 | FilerTypePeriodsCd, 28 | LobbyistContributions1CdAdmin, 29 | LobbyistContributions2CdAdmin, 30 | LobbyistContributions3CdAdmin, 31 | LobbyistEmpLobbyist1CdAdmin, 32 | LobbyistEmpLobbyist2CdAdmin, 33 | LobbyistEmployer1CdAdmin, 34 | LobbyistEmployer2CdAdmin, 35 | LobbyistEmployer3CdAdmin, 36 | LobbyistEmployerFirms1CdAdmin, 37 | LobbyistEmployerFirms2CdAdmin, 38 | LobbyistEmployerHistoryCdAdmin, 39 | LobbyistFirm1CdAdmin, 40 | LobbyistFirm2CdAdmin, 41 | LobbyistFirm3CdAdmin, 42 | LobbyistFirmEmployer1CdAdmin, 43 | LobbyistFirmEmployer2CdAdmin, 44 | LobbyistFirmHistoryCdAdmin, 45 | LobbyistFirmLobbyist1CdAdmin, 46 | LobbyistFirmLobbyist2CdAdmin, 47 | EfsFilingLogCdAdmin, 48 | ) 49 | from calaccess_raw.admin.lobbying import ( 50 | CvrRegistrationCdAdmin, 51 | Cvr2RegistrationCdAdmin, 52 | CvrLobbyDisclosureCdAdmin, 53 | Cvr2LobbyDisclosureCdAdmin, 54 | LobbyAmendmentsCdAdmin, 55 | F690P2CdAdmin, 56 | LattCdAdmin, 57 | LexpCdAdmin, 58 | LccmCdAdmin, 59 | LothCdAdmin, 60 | LempCdAdmin, 61 | LpayCdAdmin, 62 | LobbyingChgLogCdAdmin, 63 | ) 64 | from calaccess_raw.admin.common import ( 65 | FilernameCdAdmin, 66 | FilerFilingsCdAdmin, 67 | FilingsCdAdmin, 68 | SmryCdAdmin, 69 | CvrE530CdAdmin, 70 | SpltCdAdmin, 71 | TextMemoCdAdmin, 72 | AcronymsCdAdmin, 73 | AddressCdAdmin, 74 | FilersCdAdmin, 75 | FilerAcronymsCdAdmin, 76 | FilerAddressCdAdmin, 77 | FilerEthicsClassCdAdmin, 78 | FilerInterestsCdAdmin, 79 | FilerLinksCdAdmin, 80 | FilerStatusTypesCdAdmin, 81 | FilerToFilerTypeCdAdmin, 82 | FilerTypesCdAdmin, 83 | FilerXrefCdAdmin, 84 | FilingPeriodCdAdmin, 85 | GroupTypesCdAdmin, 86 | HeaderCdAdmin, 87 | HdrCdAdmin, 88 | ImageLinksCdAdmin, 89 | LegislativeSessionsCdAdmin, 90 | LookupCodesCdAdmin, 91 | NamesCdAdmin, 92 | ReceivedFilingsCdAdmin, 93 | ReportsCdAdmin, 94 | ) 95 | 96 | __all__ = ( 97 | "BaseAdmin", 98 | "CvrSoCdAdmin", 99 | "Cvr2SoCdAdmin", 100 | "CvrCampaignDisclosureCdAdmin", 101 | "Cvr2CampaignDisclosureCdAdmin", 102 | "RcptCdAdmin", 103 | "Cvr3VerificationInfoCdAdmin", 104 | "LoanCdAdmin", 105 | "S401CdAdmin", 106 | "ExpnCdAdmin", 107 | "F495P2CdAdmin", 108 | "DebtCdAdmin", 109 | "S496CdAdmin", 110 | "SpltCdAdmin", 111 | "S497CdAdmin", 112 | "F501502CdAdmin", 113 | "S498CdAdmin", 114 | "CvrF470CdAdmin", 115 | "CvrRegistrationCdAdmin", 116 | "Cvr2RegistrationCdAdmin", 117 | "CvrLobbyDisclosureCdAdmin", 118 | "Cvr2LobbyDisclosureCdAdmin", 119 | "LobbyAmendmentsCdAdmin", 120 | "F690P2CdAdmin", 121 | "LattCdAdmin", 122 | "LexpCdAdmin", 123 | "LccmCdAdmin", 124 | "LothCdAdmin", 125 | "LempCdAdmin", 126 | "LpayCdAdmin", 127 | "FilerFilingsCdAdmin", 128 | "FilingsCdAdmin", 129 | "SmryCdAdmin", 130 | "CvrE530CdAdmin", 131 | "TextMemoCdAdmin", 132 | "AcronymsCdAdmin", 133 | "AddressCdAdmin", 134 | "BallotMeasuresCdAdmin", 135 | "EfsFilingLogCdAdmin", 136 | "FilernameCdAdmin", 137 | "FilersCdAdmin", 138 | "FilerAcronymsCdAdmin", 139 | "FilerAddressCdAdmin", 140 | "FilerEthicsClassCdAdmin", 141 | "FilerInterestsCdAdmin", 142 | "FilerLinksCdAdmin", 143 | "FilerStatusTypesCdAdmin", 144 | "FilerToFilerTypeCdAdmin", 145 | "FilerTypesCdAdmin", 146 | "FilerXrefCdAdmin", 147 | "FilingPeriodCdAdmin", 148 | "FilerTypePeriodsCd", 149 | "GroupTypesCdAdmin", 150 | "HeaderCdAdmin", 151 | "HdrCdAdmin", 152 | "ImageLinksCdAdmin", 153 | "LegislativeSessionsCdAdmin", 154 | "LobbyingChgLogCdAdmin", 155 | "LobbyistContributions1CdAdmin", 156 | "LobbyistContributions2CdAdmin", 157 | "LobbyistContributions3CdAdmin", 158 | "LobbyistEmployer1CdAdmin", 159 | "LobbyistEmployer2CdAdmin", 160 | "LobbyistEmployer3CdAdmin", 161 | "LobbyistEmployerFirms1CdAdmin", 162 | "LobbyistEmployerFirms2CdAdmin", 163 | "LobbyistEmpLobbyist1CdAdmin", 164 | "LobbyistEmpLobbyist2CdAdmin", 165 | "LobbyistFirm1CdAdmin", 166 | "LobbyistFirm2CdAdmin", 167 | "LobbyistFirm3CdAdmin", 168 | "LobbyistFirmEmployer1CdAdmin", 169 | "LobbyistFirmEmployer2CdAdmin", 170 | "LobbyistFirmLobbyist1CdAdmin", 171 | "LobbyistFirmLobbyist2CdAdmin", 172 | "LobbyistFirmHistoryCdAdmin", 173 | "LobbyistEmployerHistoryCdAdmin", 174 | "LookupCodesCdAdmin", 175 | "NamesCdAdmin", 176 | "ReceivedFilingsCdAdmin", 177 | "ReportsCdAdmin", 178 | ) 179 | -------------------------------------------------------------------------------- /calaccess_raw/admin/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Utilities common to all administration panels. 5 | """ 6 | from django.contrib import admin 7 | 8 | 9 | class BaseAdmin(admin.ModelAdmin): 10 | """ 11 | Parent class with attributes common to all CAL-ACCESS data models. 12 | """ 13 | 14 | save_on_top = True 15 | 16 | def get_readonly_fields(self, *args, **kwargs): 17 | """ 18 | Make all fields read only. 19 | """ 20 | return [f.name for f in self.model._meta.fields] 21 | 22 | def get_list_filter(self, request): 23 | """ 24 | Filter all fields with `choices` configured. 25 | """ 26 | return [f.name for f in self.model._meta.fields if f.choices] 27 | 28 | def get_search_fields(self, request): 29 | """ 30 | Search all fields that aren't a ForeignKey field. 31 | """ 32 | return [f.name for f in self.model._meta.fields if not f.is_relation] 33 | -------------------------------------------------------------------------------- /calaccess_raw/admin/campaign.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Custom administration panels for campaign models. 5 | """ 6 | from django.contrib import admin 7 | from calaccess_raw import models 8 | from .base import BaseAdmin 9 | 10 | 11 | @admin.register(models.CvrSoCd) 12 | class CvrSoCdAdmin(BaseAdmin): 13 | """ 14 | Custom admin for the CvrSoCd model. 15 | """ 16 | 17 | list_display = ("filing_id", "amend_id", "rpt_date", "filer_naml", "form_type") 18 | date_hierarchy = "rpt_date" 19 | 20 | 21 | @admin.register(models.Cvr2SoCd) 22 | class Cvr2SoCdAdmin(BaseAdmin): 23 | """ 24 | Custom admin for the Cvr2SoCd model. 25 | """ 26 | 27 | list_display = ("filing_id", "item_cd", "entity_cd", "enty_naml", "form_type") 28 | 29 | 30 | @admin.register(models.CvrCampaignDisclosureCd) 31 | class CvrCampaignDisclosureCdAdmin(BaseAdmin): 32 | """ 33 | Custom admin for the CvrCampaignDisclosureCd model. 34 | """ 35 | 36 | list_display = ("filing_id", "rpt_date", "filer_naml", "cmtte_type", "form_type") 37 | date_hierarchy = "rpt_date" 38 | 39 | 40 | @admin.register(models.Cvr2CampaignDisclosureCd) 41 | class Cvr2CampaignDisclosureCdAdmin(BaseAdmin): 42 | """ 43 | Custom admin for the Cvr2CampaignDisclosureCd model. 44 | """ 45 | 46 | list_display = ("filing_id", "enty_naml", "form_type") 47 | 48 | 49 | @admin.register(models.Cvr3VerificationInfoCd) 50 | class Cvr3VerificationInfoCdAdmin(BaseAdmin): 51 | """ 52 | Custom admin for the Cvr3VerificationInfoCd model. 53 | """ 54 | 55 | list_display = ("filing_id", "sig_date", "sig_naml", "form_type") 56 | date_hierarchy = "sig_date" 57 | 58 | 59 | @admin.register(models.DebtCd) 60 | class DebtCdAdmin(BaseAdmin): 61 | """ 62 | Custom admin for the DebtCd model. 63 | """ 64 | 65 | list_display = ( 66 | "filing_id", 67 | "expn_code", 68 | "payee_naml", 69 | "amt_incur", 70 | "amt_paid", 71 | ) 72 | 73 | 74 | @admin.register(models.ExpnCd) 75 | class ExpnCdAdmin(BaseAdmin): 76 | """ 77 | Custom admin for the ExpnCd model. 78 | """ 79 | 80 | list_display = ( 81 | "filing_id", 82 | "expn_date", 83 | "cand_naml", 84 | "payee_naml", 85 | "form_type", 86 | "amount", 87 | ) 88 | date_hierarchy = "expn_date" 89 | 90 | 91 | @admin.register(models.LoanCd) 92 | class LoanCdAdmin(BaseAdmin): 93 | """ 94 | Custom admin for the LoanCd model. 95 | """ 96 | 97 | list_display = ( 98 | "filing_id", 99 | "form_type", 100 | "loan_date1", 101 | "loan_type", 102 | "lndr_naml", 103 | "loan_amt1", 104 | "loan_amt2", 105 | "loan_amt3", 106 | "loan_amt4", 107 | ) 108 | date_hierarchy = "loan_date1" 109 | 110 | 111 | @admin.register(models.RcptCd) 112 | class RcptCdAdmin(BaseAdmin): 113 | """ 114 | Custom admin for the RcptCd model. 115 | """ 116 | 117 | list_display = ( 118 | "filing_id", 119 | "form_type", 120 | "rcpt_date", 121 | "ctrib_naml", 122 | "ctrib_emp", 123 | "ctrib_occ", 124 | "amount", 125 | ) 126 | date_hierarchy = "rcpt_date" 127 | 128 | 129 | @admin.register(models.S401Cd) 130 | class S401CdAdmin(BaseAdmin): 131 | """ 132 | Custom admin for the S401Cd model. 133 | """ 134 | 135 | list_display = ( 136 | "filing_id", 137 | "form_type", 138 | "cand_naml", 139 | "payee_naml", 140 | "amount", 141 | ) 142 | 143 | 144 | @admin.register(models.F495P2Cd) 145 | class F495P2CdAdmin(BaseAdmin): 146 | """ 147 | Custom admin for the F495P2Cd model. 148 | """ 149 | 150 | list_display = ("filing_id", "form_type", "elect_date", "contribamt") 151 | date_hierarchy = "elect_date" 152 | 153 | 154 | @admin.register(models.S496Cd) 155 | class S496CdAdmin(BaseAdmin): 156 | """ 157 | Custom admin for the S496Cd model. 158 | """ 159 | 160 | list_display = ("filing_id", "exp_date", "expn_dscr", "amount") 161 | date_hierarchy = "exp_date" 162 | 163 | 164 | @admin.register(models.S497Cd) 165 | class S497CdAdmin(BaseAdmin): 166 | """ 167 | Custom admin for the S497Cd model. 168 | """ 169 | 170 | list_display = ( 171 | "filing_id", 172 | "ctrib_date", 173 | "cand_naml", 174 | "enty_naml", 175 | "amount", 176 | ) 177 | date_hierarchy = "ctrib_date" 178 | 179 | 180 | @admin.register(models.S498Cd) 181 | class S498CdAdmin(BaseAdmin): 182 | """ 183 | Custom admin for the S498Cd model. 184 | """ 185 | 186 | list_display = ( 187 | "filing_id", 188 | "date_rcvd", 189 | "payor_naml", 190 | "cand_naml", 191 | "amt_rcvd", 192 | ) 193 | date_hierarchy = "date_rcvd" 194 | 195 | 196 | @admin.register(models.F501502Cd) 197 | class F501502CdAdmin(BaseAdmin): 198 | """ 199 | Custom admin for the F501502Cd model. 200 | """ 201 | 202 | list_display = ( 203 | "filing_id", 204 | "rpt_date", 205 | "cand_naml", 206 | "office_cd", 207 | "elec_type", 208 | ) 209 | date_hierarchy = "rpt_date" 210 | -------------------------------------------------------------------------------- /calaccess_raw/admin/lobbying.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Custom administration panels for lobbying models. 5 | """ 6 | from django.contrib import admin 7 | from calaccess_raw import models 8 | from .base import BaseAdmin 9 | 10 | 11 | @admin.register(models.CvrRegistrationCd) 12 | class CvrRegistrationCdAdmin(BaseAdmin): 13 | """ 14 | Custom admin for the CvrRegistrationCd model. 15 | """ 16 | 17 | list_display = ( 18 | "filing_id", 19 | "rpt_date", 20 | "form_type", 21 | "filer_naml", 22 | "firm_name", 23 | ) 24 | date_hierarchy = "rpt_date" 25 | 26 | 27 | @admin.register(models.Cvr2RegistrationCd) 28 | class Cvr2RegistrationCdAdmin(BaseAdmin): 29 | """ 30 | Custom admin for the Cvr2RegistrationCd model. 31 | """ 32 | 33 | list_display = ( 34 | "filing_id", 35 | "form_type", 36 | "enty_naml", 37 | ) 38 | 39 | 40 | @admin.register(models.LobbyAmendmentsCd) 41 | class LobbyAmendmentsCdAdmin(BaseAdmin): 42 | """ 43 | Custom admin for the LobbyAmendmentsCd model. 44 | """ 45 | 46 | list_display = ( 47 | "filing_id", 48 | "exec_date", 49 | "form_type", 50 | ) 51 | date_hierarchy = "exec_date" 52 | 53 | 54 | @admin.register(models.LobbyingChgLogCd) 55 | class LobbyingChgLogCdAdmin(BaseAdmin): 56 | """ 57 | Custom admin for the LobbyingChgLogCd model. 58 | """ 59 | 60 | list_display = ( 61 | "filer_id", 62 | "entity_type", 63 | "log_dt", 64 | "filer_full_name", 65 | ) 66 | date_hierarchy = "log_dt" 67 | 68 | 69 | @admin.register(models.LempCd) 70 | class LempCdAdmin(BaseAdmin): 71 | """ 72 | Custom admin for the LempCd model. 73 | """ 74 | 75 | list_display = ( 76 | "filing_id", 77 | "eff_date", 78 | "cli_naml", 79 | "form_type", 80 | ) 81 | 82 | 83 | @admin.register(models.F690P2Cd) 84 | class F690P2CdAdmin(BaseAdmin): 85 | """ 86 | Custom admin for the F690P2Cd model. 87 | """ 88 | 89 | list_display = ( 90 | "filing_id", 91 | "exec_date", 92 | "form_type", 93 | ) 94 | date_hierarchy = "exec_date" 95 | 96 | 97 | @admin.register(models.CvrLobbyDisclosureCd) 98 | class CvrLobbyDisclosureCdAdmin(BaseAdmin): 99 | """ 100 | Custom admin for the CvrLobbyDisclosureCd model. 101 | """ 102 | 103 | list_display = ( 104 | "filing_id", 105 | "rpt_date", 106 | "filer_naml", 107 | "firm_name", 108 | "form_type", 109 | ) 110 | date_hierarchy = "rpt_date" 111 | 112 | 113 | @admin.register(models.Cvr2LobbyDisclosureCd) 114 | class Cvr2LobbyDisclosureCdAdmin(BaseAdmin): 115 | """ 116 | Custom admin for the Cvr2LobbyDisclosureCd model. 117 | """ 118 | 119 | list_display = ( 120 | "filing_id", 121 | "form_type", 122 | "entity_cd", 123 | "enty_naml", 124 | ) 125 | 126 | 127 | @admin.register(models.LattCd) 128 | class LattCdAdmin(BaseAdmin): 129 | """ 130 | Custom admin for the LattCd model. 131 | """ 132 | 133 | list_display = ("filing_id", "form_type", "pmt_date", "recip_naml", "amount") 134 | date_hierarchy = "pmt_date" 135 | 136 | 137 | @admin.register(models.LexpCd) 138 | class LexpCdAdmin(BaseAdmin): 139 | """ 140 | Custom admin for the LexpCd model. 141 | """ 142 | 143 | list_display = ("filing_id", "expn_date", "form_type", "payee_naml", "amount") 144 | date_hierarchy = "expn_date" 145 | 146 | 147 | @admin.register(models.LccmCd) 148 | class LccmCdAdmin(BaseAdmin): 149 | """ 150 | Custom admin for the LccmCd model. 151 | """ 152 | 153 | list_display = ( 154 | "filing_id", 155 | "form_type", 156 | "ctrib_date", 157 | "ctrib_naml", 158 | "recip_naml", 159 | ) 160 | date_hierarchy = "ctrib_date" 161 | 162 | 163 | @admin.register(models.LpayCd) 164 | class LpayCdAdmin(BaseAdmin): 165 | """ 166 | Custom admin for the LpayCd model. 167 | """ 168 | 169 | list_display = ("filing_id", "form_type", "emplr_naml", "per_total") 170 | 171 | 172 | @admin.register(models.LothCd) 173 | class LothCdAdmin(BaseAdmin): 174 | """ 175 | Custom admin for the LothCd model. 176 | """ 177 | 178 | list_display = ("filing_id", "pmt_date", "form_type", "firm_name", "amount") 179 | date_hierarchy = "pmt_date" 180 | -------------------------------------------------------------------------------- /calaccess_raw/annotations/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Utilities for representing and interacting with CAL-ACCESS reference documents and forms. 5 | """ 6 | # Helpers 7 | import os 8 | import csv 9 | 10 | # Annotations data 11 | from . import choices 12 | from .forms import FilingForm, FilingFormSection 13 | from .documents import DocumentCloud, DocumentCloudPage 14 | 15 | 16 | def load_forms(): 17 | """ 18 | Load all the FilingForm objects from the source CSV. 19 | """ 20 | this_dir = os.path.dirname(__file__) 21 | 22 | # Read in forms 23 | form_path = os.path.join(this_dir, "forms.csv") 24 | with open(form_path, "r") as form_obj: 25 | form_reader = csv.DictReader(form_obj) 26 | form_list = [FilingForm(**row) for row in form_reader] 27 | 28 | # Read in sections 29 | section_path = os.path.join(this_dir, "sections.csv") 30 | with open(section_path, "r") as section_obj: 31 | section_reader = csv.DictReader(section_obj) 32 | for section in section_reader: 33 | form = next((x for x in form_list if x.id == section["form_id"])) 34 | form.add_section(**section) 35 | 36 | # Pass it out 37 | return form_list 38 | 39 | 40 | # Boot up all the forms from our source CSV files 41 | FORMS = load_forms() 42 | 43 | 44 | def get_form(id): 45 | """ 46 | Takes an id for a filing form and returns a FilingForm object. 47 | """ 48 | return next((x for x in FORMS if x.id == id.upper()), None) 49 | 50 | 51 | def sort_choices(codes_dict): 52 | """ 53 | Returns a tuple of tuples, sorted by the given codes_dict's key. 54 | """ 55 | return tuple(sorted(codes_dict.items(), key=lambda x: x[0])) 56 | 57 | 58 | __all__ = ( 59 | "DocumentCloud", 60 | "DocumentCloudPage", 61 | "FilingForm", 62 | "FilingFormSection", 63 | "FORMS", 64 | "get_form", 65 | "choices", 66 | "sort_choices", 67 | ) 68 | -------------------------------------------------------------------------------- /calaccess_raw/annotations/choices.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Includes dicts of cannonical look-up / choice field values (e.g., 'codes' and 'types'). 5 | 6 | The keys of each look up dict are the valid values for the corresponding database 7 | columns, and the values are the full descriptions. This allows for mapping of invalid 8 | values observed in db columns to their valid equivalents in the _CHOICES attrs of our 9 | models, like so: 10 | 11 | OFFICE_CD_CHOICES = ( 12 | ('gov', look_ups.OFFICE_CODES['GOV']), 13 | ('GUV', look_ups.OFFICE_CODES['GOV']), 14 | # ... 15 | ), 16 | 17 | DOCS is a dict of look-up set names to sets of DocumentCloud objects referencing 18 | where the look-ups are defined. This allows for easy referencing to these document 19 | pages in our models, like so: 20 | 21 | documentcloud_pages=look_ups.DOCS['office_codes'], 22 | 23 | """ 24 | from .documents import DocumentCloud 25 | 26 | DOCS = { 27 | "entity_codes": [ 28 | DocumentCloud(id="2712033", start_page=8, end_page=9), 29 | DocumentCloud(id="2712034", start_page=9, end_page=11), 30 | ], 31 | "expense_codes": [ 32 | DocumentCloud(id="2712033", start_page=11), 33 | DocumentCloud(id="2712034", start_page=13, end_page=14), 34 | ], 35 | "office_codes": [ 36 | DocumentCloud(id="2712033", start_page=10), 37 | DocumentCloud(id="2712034", start_page=12), 38 | DocumentCloud(id="2712032", start_page=2), 39 | ], 40 | } 41 | 42 | CAMPAIGN_ENTITY_CODES = { 43 | # for CVR records 44 | "CAO": "Candidate/officeholder", 45 | "BMC": "Ballot measure committee", 46 | "CTL": "Controlled committee", 47 | "MDI": "Major Donor/Ind Expenditure", 48 | "RCP": "Recipient committee", 49 | "SMO": "Slate-mailer organization", 50 | # for CVR2 records (includes CAO, CTL, RCP) 51 | "ATH": "Authorizing individual", 52 | "ATR": "Assistant treasurer", 53 | "BNM": "Ballot measure's name/title", 54 | "COM": "Committee", 55 | "OFF": "Officer", 56 | "POF": "Principal officer", 57 | "PRO": "Proponent", 58 | "SPO": "Sponsor", 59 | # for CVR3 records (includes CAO, OFF, PRO, SPO) 60 | "TRE": "Treasurer", 61 | # for schedule records (includes COM and RCP) 62 | "IND": "Individual", 63 | "OTH": "Other", 64 | "PTY": "Political Party", 65 | "SCC": "Small Contributor Committee", 66 | } 67 | 68 | LOBBYING_ENTITY_CODES = { 69 | # for CVR records 70 | "FRM": "Lobbying Firm", 71 | "IND": "Person (spending > $5000)", 72 | "LBY": "Lobbyist (an individual)", 73 | "LCO": "Lobbying Coalition", 74 | "LEM": "Lobbying Employer", 75 | # for CVR2 records (includes FRM) 76 | "AGY": "State Agency", 77 | "EMP": "Employer", 78 | "FRM": "Lobbying Firm", 79 | "MBR": "Member of Associaton", 80 | "OFF": "Officer", 81 | "OWN": "Owner", 82 | "PTN": "Partner", 83 | "SCL": "Subcontracted Client", 84 | # for schedule records (includes IND) 85 | "COM": "Committee", 86 | "OTH": "Other", 87 | "PTY": "Political Party", 88 | "RCP": "Recipient Committee", 89 | "SCC": "Small Contributor Committee", 90 | } 91 | 92 | EXPENSE_CODES = { 93 | "CMP": "campaign paraphernalia/miscellaneous", 94 | "CNS": "campaign consultants", 95 | "CTB": "contribution (if nonmonetary, explain)*", 96 | "CVC": "civic donations", 97 | "FIL": "candidate filing/ballot feeds", 98 | "FND": "fundraising events", 99 | "IKD": "In-kind contribution (nonmonetary)", 100 | "IND": "independent expenditure supporting/opposing others (explain)*", 101 | "LEG": "legal defense", 102 | "LIT": "campaign literature and mailings", 103 | "LON": "loan", 104 | "MBR": "member communications", 105 | "MON": "monetary contribution", 106 | "MTG": "meetings and appearances", 107 | "OFC": "office expenses", 108 | "PET": "petition circulating", 109 | "PHO": "phone banks", 110 | "POL": "polling and survey research", 111 | "POS": "postage, delivery and messenger services", 112 | "PRO": "professional services (legal, accounting)", 113 | "PRT": "print ads", 114 | "RAD": "radio airtime and production costs", 115 | "RFD": "returned contributions", 116 | "SAL": "campaign workers salaries", 117 | "TEL": "T.V. or cable airtime and production costs", 118 | "TRC": "candidate travel, lodging and meals (explain)", 119 | "TRS": "staff/spouse travel, lodging and meals (explain)", 120 | "TSF": "transfer between committees of the same candidate/sponsor", 121 | "VOT": "voter registration", 122 | "WEB": "information technology costs (internet, e-mail)", 123 | } 124 | 125 | OFF_S_H_CODES = { 126 | "S": "SOUGHT", 127 | "H": "HELD", 128 | } 129 | 130 | OFFICE_CODES = { 131 | "APP": "State Appellate Court Justice", 132 | "ASM": "State Assembly Person", 133 | "ASR": "Assessor", 134 | "ATT": "Attorney General", 135 | "BED": "Board of Education", 136 | "BOE": "Board of Equalization Member", 137 | "BSU": "Board of Supervisors", 138 | "CAT": "City Attorney", 139 | "CCB": "Community College Board", 140 | "CCM": "City Council Member", 141 | "CON": "State Controller", 142 | "COU": "County Counsel", 143 | "CSU": "County Supervisor", 144 | "CTR": "Local Controller", 145 | "DAT": "District Attorney", 146 | "GOV": "Governor", 147 | "INS": "Insurance Commissioner", 148 | "LTG": "Lieutenant Governor", 149 | "MAY": "Mayor", 150 | "OTH": "Other", 151 | "PDR": "Public Defender", 152 | "PER": "Public Employees Retirement System", 153 | "PLN": "Planning Commissioner", 154 | "SCJ": "Superior Court Judge", 155 | "SEN": "State Senator", 156 | "SHC": "Sheriff-Coroner", 157 | "SOS": "Secretary of State", 158 | "SPM": "Supreme Court Justice", 159 | "SUP": "Superintendent of Public Instruction", 160 | "TRE": "State Treasurer", 161 | "TRS": "Local Treasurer", 162 | } 163 | 164 | JURIS_CODES = { 165 | "ASM": "Assembly District", 166 | "BOE": "Board of Equalization District", 167 | "CIT": "City", 168 | "CTY": "County", 169 | "LOC": "Local", 170 | "OTH": "Other", 171 | "SEN": "Senate District", 172 | "STW": "Statewide", 173 | } 174 | 175 | STMT_TYPES = { 176 | "PE": "Pre-Election (F450, F460)", 177 | "QT": "Quarterly Stmt (F450,F460)", 178 | "SA": "Semi-annual (F450, F460)", 179 | "SE": "Supplemental Pre-elect (F450, F460, F495)", 180 | "SY": "Special Odd-Yr. Campaign (F450, F460)", 181 | "S1": "Semi-Annual (Jan1-Jun30) (F425)", 182 | "S2": "Semi-Annual (Jul1-Dec31) (F425)", 183 | "TS": "Termination Statement (F450, F460)", 184 | } 185 | 186 | SUP_OPP_CODES = { 187 | "S": "SUPPORT", 188 | "O": "OPPOSITION", 189 | } 190 | -------------------------------------------------------------------------------- /calaccess_raw/annotations/documents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Utilities for representing DocumentCloud documents. 5 | """ 6 | import os 7 | import json 8 | import requests 9 | from django.conf import settings 10 | from django.utils.deconstruct import deconstructible 11 | 12 | 13 | @deconstructible 14 | class DocumentCloudPage(object): 15 | """ 16 | A page in a DocumentCloud document. 17 | """ 18 | 19 | def __init__(self, num, canonical_url, thumbnail_url): 20 | """ 21 | Create a page. 22 | """ 23 | self.num = num 24 | self.canonical_url = canonical_url 25 | self.thumbnail_url = thumbnail_url 26 | 27 | 28 | @deconstructible 29 | class DocumentCloud(object): 30 | """ 31 | A document hosted on DocumentCloud. 32 | 33 | Cited in our Python code and then republished in our HTML documentation. 34 | """ 35 | 36 | def __init__(self, id, start_page=None, end_page=None): 37 | """ 38 | Create a new object by submitting a unique ID from DocumentCloud.org. 39 | """ 40 | self.id = id 41 | self.start_page = start_page 42 | self.end_page = end_page 43 | self.metadata_cache_dir = os.path.join( 44 | settings.BASE_DIR, ".documentcloud_metadata" 45 | ) 46 | self.metadata_filename = os.path.join( 47 | self.metadata_cache_dir, "{}.json".format(self.id) 48 | ) 49 | 50 | def _request_metadata(self): 51 | """ 52 | Returns contents of GET request to /api/documents/[id].json method. 53 | """ 54 | url = f"https://api.www.documentcloud.org/api/documents/{self.id}" 55 | r = requests.get(url) 56 | return r.content.decode("utf-8") 57 | 58 | def _cache_metadata(self): 59 | """ 60 | Requests metadata and stores in .json file in .documentcloud_cache dir. 61 | 62 | Creates .documentcloud_cache dir if it doesn't already exist. 63 | """ 64 | os.path.exists(self.metadata_cache_dir) or os.makedirs(self.metadata_cache_dir) 65 | with open(self.metadata_filename, "w") as f: 66 | f.write(self._request_metadata()) 67 | 68 | @property 69 | def metadata(self): 70 | """ 71 | Returns a dictionary with the document's metadata retrieved from DocumentCould. 72 | """ 73 | if not os.path.exists(self.metadata_filename): 74 | self._cache_metadata() 75 | with open(self.metadata_filename, "r") as f: 76 | self._metadata = json.loads(f.read()) 77 | return self._metadata 78 | 79 | @property 80 | def title(self): 81 | """ 82 | Returns the title of the document. 83 | """ 84 | self._title = self.metadata["title"] 85 | return self._title 86 | 87 | @property 88 | def slug(self): 89 | """ 90 | Returns the slug of the document. 91 | """ 92 | self._slug = self.metadata["slug"] 93 | return self._slug 94 | 95 | @property 96 | def canonical_url(self): 97 | """ 98 | Returns the URL where the document can be found on DocumentCloud. 99 | """ 100 | if self.start_page: 101 | canonical_url = self.metadata["canonical_url"] + "#document/p{}".format( 102 | self.start_page 103 | ) 104 | else: 105 | canonical_url = self.metadata["canonical_url"] 106 | return canonical_url 107 | 108 | @property 109 | def thumbnail_url(self): 110 | """ 111 | Returns a URL to the thumbnail image of the document's first page. 112 | """ 113 | page = self.start_page or 1 114 | url = f"https://assets.documentcloud.org/documents/{self.id}/pages/{self.slug}-p{page}-thumbnail.gif" 115 | self._thumbnail_url = url 116 | return self._thumbnail_url 117 | 118 | @property 119 | def pdf_url(self): 120 | """ 121 | Returns a URL to the full PDF of the document. 122 | """ 123 | self._pdf_url = ( 124 | f"https://assets.documentcloud.org/documents/{self.id}/{self.slug}.pdf" 125 | ) 126 | return self._pdf_url 127 | 128 | @property 129 | def text_url(self): 130 | """ 131 | Returns a URL to the full text of the document. 132 | """ 133 | self._text_url = ( 134 | f"https://assets.documentcloud.org/documents/{self.id}/{self.slug}.txt" 135 | ) 136 | return self._text_url 137 | 138 | @property 139 | def num_pages(self): 140 | """ 141 | Returns the number of pages in this document. 142 | """ 143 | if self.start_page and self.end_page: 144 | self._num_pages = self.end_page - self.start_page + 1 145 | elif self.end_page: 146 | self._num_pages = self.end_page 147 | elif self.start_page: 148 | self._num_pages = 1 149 | # ignored case: User wants to specify and start page and 150 | # expects to include all subsequent pages in doc 151 | else: 152 | self._num_pages = self.metadata["page_count"] 153 | return self._num_pages 154 | 155 | @property 156 | def pages(self): 157 | """ 158 | Returns a list of the pages in this form as DocPage objects. 159 | """ 160 | canonical_url = "https://www.documentcloud.org/documents/{id}/pages/{page}.html" 161 | start = self.start_page or 1 162 | return [ 163 | DocumentCloudPage( 164 | x, 165 | canonical_url.format(id=self.id, page=x), 166 | f"https://assets.documentcloud.org/documents/{self.id}/pages/{self.slug}-p{x}-thumbnail.gif", 167 | ) 168 | for x in range(start, start + self.num_pages) 169 | ] 170 | 171 | @property 172 | def formatted_page_nums(self): 173 | """ 174 | Returns the page range as a pretty string. 175 | """ 176 | if self.end_page: 177 | formatted_str = "{0}-{1}".format(self.start_page, self.end_page) 178 | else: 179 | formatted_str = str(self.start_page) 180 | return formatted_str 181 | -------------------------------------------------------------------------------- /calaccess_raw/annotations/forms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Utilities for representing CAL-ACCESS forms. 5 | """ 6 | from .documents import DocumentCloud 7 | from calaccess_raw import get_model_list 8 | from django.utils.deconstruct import deconstructible 9 | 10 | 11 | @deconstructible 12 | class FilingForm(object): 13 | """ 14 | A form used to collect information for the CAL-ACCESS database. 15 | """ 16 | 17 | def __init__(self, id, title, **kwargs): 18 | """ 19 | Create a new object by submitting a unique ID and title. 20 | """ 21 | self.id = id 22 | self.title = title 23 | self.description = kwargs.get("description") 24 | self.group = kwargs.get("group") 25 | self.documentcloud_id = kwargs.get("documentcloud_id") 26 | self.db_value = kwargs.get("db_value", self.id) 27 | self.sections = [] 28 | if self.documentcloud_id: 29 | self.documentcloud = DocumentCloud(self.documentcloud_id) 30 | else: 31 | self.documentcloud = None 32 | 33 | @property 34 | def type_and_num(self): 35 | """ 36 | Returns a short title for the form that includes its type and number. 37 | """ 38 | if self.id[0] == "E": 39 | self._type_and_num = "Electronic Form {}".format(self.id[1:]) 40 | elif self.id[0] == "S": 41 | self._type_and_num = "Schedule {}".format(self.id[1:]) 42 | else: 43 | self._type_and_num = "Form {}".format(self.id[1:]) 44 | 45 | return self._type_and_num 46 | 47 | @property 48 | def full_title(self): 49 | """ 50 | Returns the full title of the form. 51 | """ 52 | self._full_title = "{0}: {1}".format(self.type_and_num, self.title) 53 | return self._full_title 54 | 55 | def add_section(self, id, title, **kwargs): 56 | """ 57 | Adds a Section with the provided title and options to this object. 58 | """ 59 | new_section = FilingFormSection( 60 | form=self, 61 | id=id, 62 | title=title, 63 | db_value=kwargs.get("db_value", self.db_value), 64 | start_page=kwargs.get("start_page"), 65 | end_page=kwargs.get("end_page"), 66 | documentcloud_id=kwargs.get("documentcloud_id"), 67 | ) 68 | self.sections.append(new_section) 69 | return new_section 70 | 71 | def get_section(self, id): 72 | """ 73 | Returns the Section object with the given id. 74 | """ 75 | section_dict = {i.id: i for i in self.sections} 76 | return section_dict[id] 77 | 78 | def get_models(self): 79 | """ 80 | Returns all the CAL-ACCESS models connected with this form. 81 | """ 82 | models = [] 83 | for model in get_model_list(): 84 | if self in [x[0] for x in model().get_filing_forms_w_sections()]: 85 | models.append(model) 86 | 87 | return models 88 | 89 | def __str__(self): 90 | return str(self.id) 91 | 92 | 93 | @deconstructible 94 | class FilingFormSection(object): 95 | """ 96 | A section of a FilingForm (e.g., a cover page, summary sheet, schedule or part). 97 | """ 98 | 99 | def __init__(self, form, id, title, **kwargs): 100 | """ 101 | Create a new object by submitting a FilingForm parent with an ID and title. 102 | """ 103 | self.form = form 104 | self.id = id 105 | self.title = title 106 | self.db_value = kwargs.get("db_value", form.db_value) 107 | self.start_page = kwargs.get("start_page") 108 | self.end_page = kwargs.get("end_page") 109 | self.documentcloud = DocumentCloud( 110 | self.form.documentcloud_id, self.start_page, self.end_page 111 | ) 112 | 113 | @property 114 | def full_title(self): 115 | """ 116 | Returns full title of the section, including the parent form's name. 117 | """ 118 | self._full_title = "{0} ({1}): {2}".format( 119 | self.form.type_and_num, 120 | self.form.title, 121 | self.title, 122 | ) 123 | return self._full_title 124 | 125 | def __str__(self): 126 | return str(self.id) 127 | -------------------------------------------------------------------------------- /calaccess_raw/annotations/sections.csv: -------------------------------------------------------------------------------- 1 | form_id,id,title,db_value,start_page,end_page 2 | F400,P1,"Part 1, Slate Mailer Organization Information",,2, 3 | F400,P2,"Part 2, Treasurer And Other Principal Officers",,2, 4 | F400,P3,"Part 3, Individuals Who Authorize Contents Of Slate Mailers",,3, 5 | F400,P4,"Part 4, Is This Organization A ""Committee"" Pursuant To Government Code Section 82013?",,3, 6 | F400,P5,"Part 5, Verification",,3, 7 | F401,CVR,Cover Page,,3,4 8 | F401,A,"Schedule A, Payments Received",F401A,5,7 9 | F401,B,"Schedule B, Payments Made",F401B,8,9 10 | F401,B-1,"Schedule B-1, Payments Made by Agent or Independent Contractor",F401B-1,10, 11 | F401,C,"Schedule C, Persons Receiving $1,000 or More",F401C,11,12 12 | F401,D,"Schedule D, Candidates and Measures Not Listed on Schedule A",F401D,13,14 13 | F402,CVR,Cover Page,,2, 14 | F402,VER,Verification,,2, 15 | F410,P1,"Part 1, Committee Information",,2, 16 | F410,P2,"Part 2, Treasurer and Other Principal Officers",,2, 17 | F410,P3,"Part 3, Verification",,2, 18 | F410,P4,"Part 4, Type of Committee",,2,3 19 | F425,P1,"Part 1, Committee Information",,1, 20 | F425,P2,"Part 2, Period of No Activity",,1, 21 | F425,P3,"Part 3, Verification",,1, 22 | F450,CVR,"Cover Page, Type of Recipient Committee",,3, 23 | F450,P1,"Part 1, Payments Made",,3, 24 | F450,P2,"Part 2, Type of Statement",,3, 25 | F450,P3,"Part 3, Committee Information",,3, 26 | F450,P4,"Part 4, Verification",,3, 27 | F450,SMRY,Summary Page,,5, 28 | F450,P5,"Part 5, Payments Made",F450P5,6,7 29 | F460,CVR,"Cover Page, Part 1",CVR,3,4 30 | F460,CVR2,"Cover Page, Part 2",CVR2,2, 31 | F460,SMRY,Summary Page,SMRY,7,8 32 | F460,A,"Schedule A, Monetary Contributions Received",A,9,11 33 | F460,A-1,"Schedule A-1, Contributions Transferred to Special Election Commitee",A-1,, 34 | F460,B1,"Schedule B, Part 1, Loans Received",B1,12,13 35 | F460,B2,"Schedule B, Part 2, Loan Guarantors",B2,14,15 36 | F460,B3,"Schedule B, Part 3, Outstanding Balance",B3,, 37 | F460,C,"Schedule C, Non-Monetary Contributions Received",C,16,17 38 | F460,D,"Schedule D, Summary of Expenditures Supporting / Opposing Other Candidates, Measures and Committees",D,18,20 39 | F460,E,"Schedule E, Payments Made",E,21,24 40 | F460,F,"Schedule F, Accrued Expenses (Unpaid Bills)",F,25,27 41 | F460,G,"Schedule G, Payments Made by an Agent or Independent Contractor (on Behalf of This Committee)",G,28,29 42 | F460,H,"Schedule H, Loans Made to Others",H,29,30 43 | F460,H1,"Schedule H, Part 1, Loans Made",H1,, 44 | F460,H2,"Schedule H, Part 2, Repayments Rcvd",H2,, 45 | F460,H3,"Schedule H, Part 3, Outstanding Loans",H3,, 46 | F460,I,"Schedule I, Miscellanous increases to cash",I,31,32 47 | F461,P1,"Part 1, Name and Address of Filer",F461P1,3, 48 | F461,P2,"Part 2, Nature and Interests of Filer",F461P2,3, 49 | F461,P3,"Part 3, Summary",F461P3,3, 50 | F461,P4,"Part 4, Verification",F461P4,3, 51 | F461,P5,"Part 5, Contributions (Including Loans, Forgiveness of Loans, and LoanGuarantees) and Expenditures Made",F461P5,5,6 52 | F465,P1,"Part 1, Committee/Filer Information",F465P1,2, 53 | F465,P2,"Part 2, Name of Candidate or Measure Supported or Opposed",F465P2,2, 54 | F465,P3,"Part 3, Independent Expenditures Made",F465P3,2, 55 | F465,P4,"Part 4, Summary",F465P4,4, 56 | F465,P5,"Part 5, Filing Officers",F465P5,4, 57 | F465,P6,"Part 6, Verification",F465P6,4, 58 | F496,P1,"Part 1, List Only One Candidate or Ballot Measure",F496P1,3, 59 | F496,P2,"Part 2, Independent Expenditures Made",F496P2,3, 60 | F496,P3,"Part 3, Contributions > $100 Received",F496P3,3, 61 | F497,P1,"Part 1, Contributions Received",F497P1,2, 62 | F497,P2,"Part 2, Contributions Made",F497P2,4, 63 | F498,A,"Part A, Late Payments Attributed To",F498-A,, 64 | F498,R,"Part R, Late Payments Received From",F498-R,2, 65 | F601,P1,"Part 1, Individual Lobbyists",F601P1,2, 66 | F601,P2A,"Part 2, Section A, Lobbyist Employers",F601P2A,2,4 67 | F601,P2B,"Part 2, Section B, Subcontracted Clients",F601P2B,4, 68 | F615,P1,"Part 1, Activity Expenses Paid, Incurred, Arranged or Provided by the Lobbyist",F615P1,2,4 69 | F615,P2,"Part 2, Campaign Contributions Made or Delivered",F615P2,5, 70 | F625,P1,"Part 1, Partners, Owners, Officers, and Employees",F625P1,2, 71 | F625,P2,"Part 2, Payments Received in Connection with Lobbying Activity",F625P2,4, 72 | F625,P3A,"Part 3, Payments Made In Connection With Lobbying Activities, Section A, Activity Expenses",F625P3A,4, 73 | F625,P3B,"Part 3, Payments Made In Connection With Lobbying Activities, Section B, Payments Made",F625P3B,8, 74 | F625,P3C,"Part 3, Payments Made In Connection With Lobbying Activities, Section C, Summary of Payments",F625P3C,2, 75 | F625,P4B,"Part 4, Campaign Contributions Made",F625P4B,2, 76 | F635,P1,"Part 1, Legislative or State Agency Administrative Actions Actively Lobbied During the Period",F635P1,2, 77 | F635,P2,"Part 2, Partners, Owners, and Employees whose ""Lobbyist Reports"" (Form 615) are Attached to this Report",F635P2,4, 78 | F635,P3A,"Part 3, Payments Made in Connection with Lobbying Activities, Section A, Payments To In-house Employee Lobbyists",F635P3A,4, 79 | F635,P3B,"Part 3, Payments Made in Connection with Lobbying Activities, Section B, Payments To Lobbying Firms",F635P3B,4, 80 | F635,P3C,"Part 3, Payments Made in Connection with Lobbying Activities, Section C, Activity Expenses",F635P3C,6, 81 | F635,P3D,"Part 3, Payments Made in Connection with Lobbying Activities, Section D, Other Payments to Influence Legislative or Administrative Action",F635P3D,6, 82 | F635,P3E,"Part 3, Payments Made in Connection with Lobbying Activities, Section E, Payments in Connection with Administrative Testimony in Ratemaking Proceedings Before The California Public Utilities Commission",F635P3E,6, 83 | F635,P4B,"Part 4, Campaign Contributions Made",F635P4B,8, 84 | F645,P1,"Part 1, Legislative or State Agency Administrative Actions Actively Lobbied during the Period",F645P1,2, 85 | F645,P2A,"Part 2, Payments Made this Period, Section A, Activity Expenses",F645P2A,4, 86 | F645,P2B,"Part 2, Payments Made this Period, Section B, Other Payments to Influence Legislative or Administrative Action",F645P2B,4, 87 | F645,P2C,"Part 2, Payments Made this Period, Section C, Payments in Connection with Administrative Testimony in Ratemaking Proceedings Before the California Public Utilities Commission",F645P2C,4, 88 | F645,P3B,"Part 3, Campaign Contributions Made",F645P3B,4, 89 | -------------------------------------------------------------------------------- /calaccess_raw/apps.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration of the Django app. 3 | """ 4 | from django.apps import AppConfig 5 | 6 | 7 | class CalAccessRawConfig(AppConfig): 8 | """ 9 | Configuration of the `calaccess_raw` Django app. 10 | """ 11 | 12 | name = "calaccess_raw" 13 | verbose_name = "CAL-ACCESS raw data" 14 | default_auto_field = "django.db.models.BigAutoField" 15 | -------------------------------------------------------------------------------- /calaccess_raw/fields.py: -------------------------------------------------------------------------------- 1 | """ 2 | Custom field overrides. 3 | 4 | Allow for cleaning and transforming the data when it is loaded into the database with PostgreSQL's COPY command. 5 | """ 6 | from django.db.models import fields 7 | from django.db.models import ForeignKey 8 | from django.template.defaultfilters import capfirst 9 | 10 | 11 | class CalAccessFieldMixin(fields.Field): 12 | """ 13 | A set of common helpers for all of our custom fields. 14 | """ 15 | 16 | def definition(self): 17 | """ 18 | A humanized definition of what's the in field for documentation. 19 | """ 20 | if self.help_text: 21 | return capfirst(self.help_text) 22 | else: 23 | return "" 24 | 25 | def is_unique_key(self): 26 | """ 27 | Tests if the column is part of its model's unique key. 28 | 29 | Returns True or False 30 | """ 31 | if self.__dict__["db_column"] in self.model().get_unique_key_list(): 32 | return True 33 | return False 34 | 35 | 36 | class DocumentCloudMixin(fields.Field): 37 | """ 38 | Adds a documentcloud_pages keyword argument to the Field. 39 | 40 | Allows it to link to documents that explain the contents of the data. 41 | """ 42 | 43 | def __init__(self, documentcloud_pages=[], *args, **kwargs): 44 | """ 45 | Overrides the standard __init__ to add our documentcloud_page_urls option. 46 | """ 47 | self.documentcloud_pages = documentcloud_pages 48 | super(DocumentCloudMixin, self).__init__(*args, **kwargs) 49 | 50 | def deconstruct(self): 51 | """ 52 | Overrides the standard deconstruct method to add our documentcloud_page_urls option. 53 | """ 54 | name, path, args, kwargs = super(DocumentCloudMixin, self).deconstruct() 55 | # Only include kwarg if it's not the default 56 | if self.documentcloud_pages != []: 57 | kwargs["documentcloud_pages"] = self.documentcloud_pages 58 | return name, path, args, kwargs 59 | 60 | 61 | class CharField(fields.CharField, CalAccessFieldMixin, DocumentCloudMixin): 62 | """ 63 | A custom character field. 64 | """ 65 | 66 | copy_type = "text" 67 | copy_template = """ 68 | CASE 69 | WHEN "%(name)s" IS NULL 70 | THEN '' 71 | ELSE regexp_replace(TRIM("%(name)s"), '\r|\n', '', 'g') 72 | END""" 73 | 74 | def description(self): 75 | """ 76 | Returns a custom description for documentation that includes the max length. 77 | """ 78 | return super(CharField, self).description % dict(max_length=self.max_length) 79 | 80 | 81 | class DateField(fields.DateField, CalAccessFieldMixin): 82 | """ 83 | A custom date field. 84 | """ 85 | 86 | copy_type = "text" 87 | copy_template = """ 88 | CASE 89 | WHEN "%(name)s" IS NOT NULL AND TRIM("%(name)s") != '' 90 | THEN to_date(substring("%(name)s" from 1 for 10), 'MM/DD/YYYY') 91 | ELSE null 92 | END""" 93 | 94 | 95 | class DateTimeField(fields.DateTimeField, CalAccessFieldMixin): 96 | """ 97 | A custom datetime field. 98 | """ 99 | 100 | copy_type = "text" 101 | copy_template = """ 102 | CASE 103 | WHEN "%(name)s" IS NOT NULL AND TRIM("%(name)s") != '' 104 | THEN to_timestamp("%(name)s", 'MM/DD/YYYY HH12:MI:SS AM') 105 | ELSE null 106 | END""" 107 | 108 | 109 | class DecimalField(fields.DecimalField, CalAccessFieldMixin): 110 | """ 111 | A custom decimal field. 112 | """ 113 | 114 | copy_type = "text" 115 | copy_template = """ 116 | CASE 117 | WHEN "%(name)s" = '' 118 | THEN 0.0 119 | WHEN "%(name)s" IS NULL 120 | THEN 0.0 121 | WHEN "%(name)s" IS NOT NULL 122 | THEN "%(name)s"::numeric 123 | END""" 124 | 125 | 126 | class FloatField(fields.FloatField, CalAccessFieldMixin): 127 | """ 128 | A custom float field. 129 | """ 130 | 131 | copy_type = "text" 132 | copy_template = """ 133 | CASE 134 | WHEN TRIM("%(name)s") = '' 135 | THEN 0.0 136 | WHEN TRIM("%(name)s") IS NULL 137 | THEN 0.0 138 | WHEN TRIM("%(name)s") IS NOT NULL 139 | THEN "%(name)s"::double precision 140 | END""" 141 | 142 | 143 | class IntegerField(fields.IntegerField, CalAccessFieldMixin, DocumentCloudMixin): 144 | """ 145 | A custom integer field. 146 | """ 147 | 148 | copy_type = "text" 149 | copy_template = """ 150 | CASE 151 | WHEN TRIM("%(name)s") = '' 152 | THEN NULL 153 | WHEN "%(name)s" = ' ' 154 | THEN NULL 155 | WHEN "%(name)s" = ' ' 156 | THEN NULL 157 | WHEN "%(name)s" = 'Y' 158 | THEN 1 159 | WHEN "%(name)s" = 'y' 160 | THEN 1 161 | WHEN "%(name)s" = 'X' 162 | THEN 1 163 | WHEN "%(name)s" = 'x' 164 | THEN 1 165 | WHEN "%(name)s" = 'N' 166 | THEN 0 167 | WHEN "%(name)s" = 'n' 168 | THEN 0 169 | WHEN "%(name)s" IS NOT NULL 170 | THEN "%(name)s"::int 171 | ELSE NULL 172 | END""" 173 | 174 | 175 | class ForeignKeyField(ForeignKey, CalAccessFieldMixin, DocumentCloudMixin): 176 | """ 177 | A custom foreign key field. 178 | """ 179 | 180 | @property 181 | def copy_type(self): 182 | """ 183 | Returns the copy_type of the related foreign key field. 184 | """ 185 | return self.foreign_related_fields[0].copy_type 186 | 187 | @property 188 | def copy_template(self): 189 | """ 190 | Returns the copy_template of the related foreign key field. 191 | """ 192 | return self.foreign_related_fields[0].copy_template 193 | 194 | def description(self): 195 | """ 196 | Returns a description for documentation from the related foreign key field. 197 | """ 198 | return self.foreign_related_fields[0].description 199 | -------------------------------------------------------------------------------- /calaccess_raw/management/__init__.py: -------------------------------------------------------------------------------- 1 | """Management commands.""" 2 | -------------------------------------------------------------------------------- /calaccess_raw/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base management command that provides common functionality for the other commands in this app. 3 | """ 4 | import logging 5 | import os 6 | from re import sub 7 | from datetime import datetime 8 | from django.utils.termcolors import colorize 9 | from django.core.management.base import BaseCommand 10 | from calaccess_raw import get_data_directory 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class CalAccessCommand(BaseCommand): 16 | """ 17 | Base management command that provides common functionality for the other commands in this app. 18 | """ 19 | 20 | url = "https://campaignfinance.cdn.sos.ca.gov/dbwebexport.zip" 21 | 22 | def handle(self, *args, **options): 23 | """ 24 | The generic handler function. 25 | 26 | Any command subclassing this object should implement its own 27 | handle method, as is standard in Django, and run this method 28 | via a super call to inherit its functionality. 29 | """ 30 | self.set_global_options(options) 31 | 32 | def set_global_options(self, options): 33 | """ 34 | Set options to all commands. 35 | """ 36 | # Set global options 37 | self.verbosity = options.get("verbosity") 38 | self.no_color = options.get("no_color") 39 | 40 | # set up data directories 41 | self.data_dir = get_data_directory() 42 | self.tsv_dir = os.path.join(self.data_dir, "tsv") 43 | self.csv_dir = os.path.join(self.data_dir, "csv") 44 | 45 | os.path.exists(self.data_dir) or os.makedirs(self.data_dir) 46 | os.path.exists(self.tsv_dir) or os.makedirs(self.tsv_dir) 47 | os.path.exists(self.csv_dir) or os.makedirs(self.csv_dir) 48 | 49 | # set path where zip will be downloaded 50 | self.download_dir = os.path.join(self.data_dir, "download") 51 | self.zip_path = os.path.join(self.download_dir, self.url.split("/")[-1]) 52 | 53 | # Start the clock 54 | self.start_datetime = datetime.now() 55 | 56 | # 57 | # Logging methods 58 | # 59 | 60 | def header(self, string): 61 | """ 62 | Writes out a string to stdout formatted to look like a header. 63 | """ 64 | logger.debug(string) 65 | if not self.no_color: 66 | string = colorize(string, fg="cyan", opts=("bold",)) 67 | self.stdout.write(string) 68 | 69 | def log(self, string): 70 | """ 71 | Writes out a string to stdout formatted to look like a standard line. 72 | """ 73 | logger.debug(string) 74 | if not self.no_color: 75 | string = colorize("%s" % string, fg="white") 76 | self.stdout.write(string) 77 | 78 | def success(self, string): 79 | """ 80 | Writes out a string to stdout formatted green to communicate success. 81 | """ 82 | logger.debug(string) 83 | if not self.no_color: 84 | string = colorize(string, fg="green") 85 | self.stdout.write(string) 86 | 87 | def failure(self, string): 88 | """ 89 | Writes out a string to stdout formatted red to communicate failure. 90 | """ 91 | logger.debug(string) 92 | if not self.no_color: 93 | string = colorize(string, fg="red") 94 | self.stdout.write(string) 95 | 96 | def duration(self): 97 | """ 98 | Calculates how long the command has been running and writes it to stdout. 99 | """ 100 | duration = datetime.now() - self.start_datetime 101 | self.stdout.write("Duration: {}".format(str(duration))) 102 | logger.debug("Duration: {}".format(str(duration))) 103 | 104 | def __str__(self): 105 | return sub(r"(.+\.)*", "", self.__class__.__module__) 106 | -------------------------------------------------------------------------------- /calaccess_raw/management/commands/cleancalaccessrawfile.py: -------------------------------------------------------------------------------- 1 | """ 2 | Clean a source CAL-ACCESS TSV file and reformat it as a CSV. 3 | """ 4 | import os 5 | import csv 6 | 7 | import csvkit 8 | from calaccess_raw.management.commands import CalAccessCommand 9 | 10 | 11 | class Command(CalAccessCommand): 12 | """ 13 | Clean a source CAL-ACCESS TSV file and reformat it as a CSV. 14 | """ 15 | 16 | help = "Clean a source CAL-ACCESS TSV file and reformat it as a CSV" 17 | 18 | def add_arguments(self, parser): 19 | """ 20 | Adds custom arguments specific to this command. 21 | """ 22 | super(Command, self).add_arguments(parser) 23 | parser.add_argument( 24 | "file_name", 25 | help="Name of the TSV file to be cleaned and discarded for a CSV", 26 | ) 27 | parser.add_argument( 28 | "--keep-file", 29 | action="store_true", 30 | dest="keep_file", 31 | default=False, 32 | help="Keep original TSV file", 33 | ) 34 | 35 | def handle(self, *args, **options): 36 | """ 37 | Make it happen. 38 | """ 39 | super(Command, self).handle(*args, **options) 40 | 41 | # Set all the config options 42 | self.set_options(options) 43 | 44 | # If the file has data ... 45 | if self.row_count: 46 | # Walk through the raw TSV file and create a clean CSV file 47 | if self.verbosity > 1: 48 | self.log(" Cleaning %s" % self.file_name) 49 | self.clean() 50 | 51 | # Unless keeping files, remove the raw TSV file 52 | if not options["keep_file"]: 53 | os.remove(self.tsv_path) 54 | 55 | def set_options(self, options): 56 | """ 57 | Set options for use in other methods. 58 | """ 59 | # Set options 60 | self.file_name = options["file_name"] 61 | 62 | # Set log variables 63 | self.log_dir = os.path.join(self.data_dir, "log/") 64 | self.log_name = self.file_name.lower().replace("tsv", "errors.csv") 65 | self.error_log_path = os.path.join(self.log_dir, self.log_name) 66 | self.log_rows = [] 67 | 68 | # Make sure the log directory exists 69 | os.path.exists(self.log_dir) or os.makedirs(self.log_dir) 70 | 71 | # Input and output paths 72 | self.tsv_path = os.path.join(self.tsv_dir, self.file_name) 73 | self.csv_name = self.file_name.lower().replace("tsv", "csv") 74 | self.csv_path = os.path.join(self.csv_dir, self.csv_name) 75 | 76 | # Pull and clean the headers 77 | self.headers = self.get_headers() 78 | self.headers_count = len(self.headers) 79 | 80 | # Get the row count 81 | with open(self.tsv_path, "r") as tsv_file: 82 | self.row_count = max(sum(1 for line in tsv_file), 0) 83 | 84 | def get_headers(self): 85 | """ 86 | Returns the headers from the TSV file. 87 | """ 88 | with open(self.tsv_path, "r") as tsv_file: 89 | tsv_reader = csvkit.reader(tsv_file, delimiter=str("\t")) 90 | try: 91 | return next(tsv_reader) 92 | except StopIteration: 93 | return [] 94 | 95 | def _convert_tsv(self): 96 | """ 97 | Given it a raw list of rows from a TSV, yields cleaned rows for a CSV. 98 | """ 99 | with open(self.tsv_path, "rb") as tsv_file: 100 | # Pop the headers out of the TSV file 101 | next(tsv_file) 102 | 103 | # Loop through all the rows 104 | for tsv_line in tsv_file: 105 | # Decode the line for testing 106 | tsv_line = tsv_line.decode("ascii", "replace") 107 | 108 | # If the line is empty skip it 109 | if not tsv_line.strip(): 110 | continue 111 | 112 | # Nuke any null bytes 113 | if tsv_line.count("\x00"): 114 | tsv_line = tsv_line.replace("\x00", " ") 115 | 116 | # Nuke the ASCII "substitute character." chr(26) in Python 117 | if tsv_line.count("\x1a"): 118 | tsv_line = tsv_line.replace("\x1a", "") 119 | 120 | # Remove any extra newline chars 121 | tsv_line = ( 122 | tsv_line.replace("\r\n", "").replace("\r", "").replace("\n", "") 123 | ) 124 | 125 | # Split on tabs so we can later spit it back out as a CSV row 126 | csv_line = tsv_line.split("\t") 127 | csv_field_count = len(csv_line) 128 | 129 | # If it matches the header count, yield it 130 | if csv_field_count == self.headers_count: 131 | yield csv_line 132 | else: 133 | # Otherwise log it 134 | self.log_rows.append( 135 | [self.headers_count, csv_field_count, ",".join(csv_line)] 136 | ) 137 | 138 | def clean(self): 139 | """ 140 | Cleans the provided source TSV file and writes it out in CSV format. 141 | """ 142 | # Create the output object 143 | with open(self.csv_path, "w") as csv_file: 144 | # Create the CSV writer 145 | csv_writer = csvkit.writer(csv_file) 146 | # Write the headers 147 | csv_writer.writerow(self.headers) 148 | # Write out the rows 149 | [csv_writer.writerow(row) for row in self._convert_tsv()] 150 | 151 | # Log errors if there are any 152 | if self.log_rows: 153 | # Log to the terminal 154 | if self.verbosity > 2: 155 | msg = " {} errors logged (not including empty lines)" 156 | self.failure(msg.format(len(self.log_rows))) 157 | 158 | # Log to the file 159 | with open(self.error_log_path, "w") as log_file: 160 | log_writer = csvkit.writer(log_file, quoting=csv.QUOTE_ALL) 161 | log_writer.writerow(["headers", "fields", "value"]) 162 | log_writer.writerows(self.log_rows) 163 | -------------------------------------------------------------------------------- /calaccess_raw/management/commands/downloadcalaccessrawdata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download the latest CAL-ACCESS database ZIP. 3 | """ 4 | import os 5 | import shutil 6 | 7 | import requests 8 | from calaccess_raw.management.commands import CalAccessCommand 9 | 10 | 11 | class Command(CalAccessCommand): 12 | """ 13 | Download the latest CAL-ACCESS database ZIP. 14 | """ 15 | 16 | help = "Download the latest CAL-ACCESS database ZIP" 17 | 18 | def add_arguments(self, parser): 19 | """ 20 | Adds custom arguments specific to this command. 21 | """ 22 | super(Command, self).add_arguments(parser) 23 | 24 | def handle(self, *args, **options): 25 | """ 26 | Make it happen. 27 | """ 28 | super(Command, self).handle(*args, **options) 29 | 30 | # flush previous download 31 | if os.path.exists(self.download_dir): 32 | shutil.rmtree(self.download_dir) 33 | os.mkdir(self.download_dir) 34 | 35 | # Stream the download 36 | self.header("Downloading ZIP file") 37 | headers = { 38 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" 39 | } 40 | with requests.get(self.url, stream=True, headers=headers, verify=False) as r: 41 | r.raise_for_status() 42 | chunk_size = 1024 43 | with open(self.zip_path, "ab") as fp: 44 | for chunk in r.iter_content(chunk_size=chunk_size): 45 | fp.write(chunk) 46 | -------------------------------------------------------------------------------- /calaccess_raw/management/commands/extractcalaccessrawfiles.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extract the CAL-ACCESS raw data files from downloaded ZIP. 3 | """ 4 | import re 5 | import os 6 | import shutil 7 | import zipfile 8 | 9 | from calaccess_raw.management.commands import CalAccessCommand 10 | 11 | 12 | class Command(CalAccessCommand): 13 | """ 14 | Extract the CAL-ACCESS raw data files from downloaded ZIP. 15 | """ 16 | 17 | help = "Extract the CAL-ACCESS raw data files from downloaded ZIP" 18 | 19 | def add_arguments(self, parser): 20 | """ 21 | Adds custom arguments specific to this command. 22 | """ 23 | super(Command, self).add_arguments(parser) 24 | parser.add_argument( 25 | "--keep-files", 26 | action="store_true", 27 | dest="keep_files", 28 | default=False, 29 | help="Keep downloaded zipped files", 30 | ) 31 | 32 | def handle(self, *args, **options): 33 | """ 34 | Make it happen. 35 | """ 36 | super(Command, self).handle(*args, **options) 37 | self.header("Extracting raw data files") 38 | 39 | # flush tsv dir 40 | if os.path.exists(self.tsv_dir): 41 | shutil.rmtree(self.tsv_dir) 42 | os.mkdir(self.tsv_dir) 43 | 44 | # Do it 45 | self.extract_tsv_files() 46 | 47 | if not options["keep_files"]: 48 | shutil.rmtree(self.download_dir) 49 | 50 | def extract_tsv_files(self): 51 | """ 52 | Extract all files with .TSV extension from downloaded zip. 53 | """ 54 | if self.verbosity: 55 | self.log(" Extracting .TSV files") 56 | pattern = r"^.+\.TSV$" 57 | with zipfile.ZipFile(self.zip_path) as zf: 58 | tsv_files = [f for f in zf.namelist() if re.match(pattern, f)] 59 | for f in tsv_files: 60 | # extract 61 | extracted_path = zf.extract(f, self.download_dir) 62 | # move 63 | file_name = os.path.basename(extracted_path).upper() 64 | shutil.move( 65 | extracted_path, 66 | os.path.join(self.tsv_dir, file_name), 67 | ) 68 | -------------------------------------------------------------------------------- /calaccess_raw/management/commands/loadcalaccessrawfile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Load clean CAL-ACCESS CSV file into a database model. 5 | """ 6 | # Files 7 | import os 8 | from csvkit import reader 9 | 10 | # Django config 11 | from django.apps import apps 12 | from django.conf import settings 13 | 14 | # Database 15 | from django.db import connections, router 16 | 17 | # Commands 18 | from django.core.management.base import CommandError 19 | from calaccess_raw.management.commands import CalAccessCommand 20 | 21 | 22 | class Command(CalAccessCommand): 23 | """ 24 | Load clean CAL-ACCESS CSV file into a database model. 25 | """ 26 | 27 | help = "Load clean CAL-ACCESS CSV file into a database model" 28 | 29 | def add_arguments(self, parser): 30 | """ 31 | Adds custom arguments specific to this command. 32 | """ 33 | super(Command, self).add_arguments(parser) 34 | # positional (required) arg 35 | parser.add_argument( 36 | "model_name", help="Name of the model into which data will be loaded" 37 | ) 38 | # keyword (optional) args 39 | parser.add_argument( 40 | "--c", 41 | "--csv", 42 | dest="csv", 43 | default=None, 44 | help="Path to comma-delimited file to be loaded. Defaults to one associated with model.", 45 | ) 46 | parser.add_argument( 47 | "--keep-file", 48 | action="store_true", 49 | dest="keep_file", 50 | default=False, 51 | help="Keep clean CSV file after loading", 52 | ) 53 | 54 | def handle(self, *args, **options): 55 | """ 56 | Make it happen. 57 | """ 58 | super(Command, self).handle(*args, **options) 59 | 60 | # set / compute any attributes that multiple class methods need 61 | self.keep_file = options["keep_file"] 62 | 63 | # get model based on strings of app_name and model_name 64 | self.model = apps.get_model("calaccess_raw", options["model_name"]) 65 | 66 | # load from provided csv or csv mapped to model 67 | self.csv = options["csv"] or self.model.objects.get_csv_path() 68 | 69 | # load into database suggested for model by router 70 | self.database = router.db_for_write(model=self.model) 71 | 72 | # Get the row count from the source CSV 73 | with open(self.csv, "r") as infile: 74 | self.csv_row_count = max(sum(1 for line in infile) - 1, 0) 75 | 76 | # Quit if the CSV is empty. 77 | if not self.csv_row_count: 78 | if self.verbosity > 2: 79 | self.failure("{} is empty.".format(self.csv)) 80 | return 81 | 82 | # Get the headers from the source CSV 83 | with open(self.csv, "r") as infile: 84 | csv_reader = reader(infile) 85 | self.csv_headers = next(csv_reader) 86 | 87 | # Load table 88 | if self.verbosity > 2: 89 | self.log(" Loading {}".format(options["model_name"])) 90 | self.load() 91 | 92 | # if not keeping files, remove the csv file 93 | if not self.keep_file: 94 | os.remove(self.csv) 95 | 96 | def load(self): 97 | """ 98 | Loads the source CSV for the provided model based on settings and database connections. 99 | """ 100 | # if not using default db, make sure the database is set up in django's settings 101 | if self.database: 102 | try: 103 | engine = settings.DATABASES[self.database]["ENGINE"] 104 | except KeyError: 105 | raise TypeError( 106 | "{} not configured in DATABASES settings.".format(self.database) 107 | ) 108 | 109 | # set up database connection 110 | self.connection = connections[self.database] 111 | self.cursor = self.connection.cursor() 112 | 113 | # check the kind of database before calling db-specific load method 114 | if engine in ( 115 | "django.db.backends.postgresql_psycopg2", 116 | "django.db.backends.postgresql", 117 | "django.contrib.gis.db.backends.postgis", 118 | ): 119 | self.load_postgresql() 120 | else: 121 | self.failure("Sorry your database engine is unsupported") 122 | raise CommandError("Only PostgresSQL backends supported.") 123 | 124 | def load_postgresql(self): 125 | """ 126 | Load the file into a PostgreSQL database using COPY. 127 | """ 128 | # Drop all the records from the target model's real table 129 | sql = 'TRUNCATE TABLE "{}" RESTART IDENTITY CASCADE'.format( 130 | self.model._meta.db_table 131 | ) 132 | self.cursor.execute(sql) 133 | 134 | # Create a mapping between our django models and the CSV headers 135 | model_mapping = dict( 136 | (f.name, f.db_column) for f in self.model._meta.fields if f.db_column 137 | ) 138 | 139 | # Load the data 140 | self.model.objects.from_csv(self.csv, model_mapping, using=self.database) 141 | -------------------------------------------------------------------------------- /calaccess_raw/management/commands/updatecalaccessrawdata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Download, unzip, clean and load the latest CAL-ACCESS database ZIP. 3 | """ 4 | # Files 5 | import os 6 | 7 | # Commands 8 | from django.core.management import call_command 9 | from calaccess_raw.management.commands import CalAccessCommand 10 | 11 | # Models 12 | from calaccess_raw import get_model_list 13 | 14 | # Logging 15 | import logging 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class Command(CalAccessCommand): 21 | """ 22 | Download, unzip, clean and load the latest CAL-ACCESS database ZIP. 23 | """ 24 | 25 | help = "Download, unzip, clean and load the latest CAL-ACCESS database ZIP" 26 | 27 | def add_arguments(self, parser): 28 | """ 29 | Adds custom arguments specific to this command. 30 | """ 31 | super(Command, self).add_arguments(parser) 32 | parser.add_argument( 33 | "--keep-files", 34 | action="store_true", 35 | dest="keep_files", 36 | default=False, 37 | help="Keep zip, unzipped, TSV and CSV files", 38 | ) 39 | 40 | def handle(self, *args, **options): 41 | """ 42 | Make it happen. 43 | """ 44 | super(Command, self).handle(*args, **options) 45 | 46 | # set / compute any attributes that multiple class methods need 47 | self.keep_files = options["keep_files"] 48 | 49 | # Download 50 | call_command( 51 | "downloadcalaccessrawdata", 52 | verbosity=self.verbosity, 53 | ) 54 | if self.verbosity: 55 | self.duration() 56 | 57 | # Extract 58 | call_command("extractcalaccessrawfiles", keep_files=self.keep_files) 59 | if self.verbosity: 60 | self.duration() 61 | 62 | # Clean 63 | self.clean() 64 | if self.verbosity: 65 | self.duration() 66 | 67 | # Load 68 | self.load() 69 | if self.verbosity: 70 | self.duration() 71 | self.success("Done!") 72 | 73 | def clean(self): 74 | """ 75 | Clean up the raw data files from the state so they are ready to get loaded into the database. 76 | """ 77 | if self.verbosity: 78 | self.header("Cleaning data files") 79 | 80 | tsv_list = [f for f in os.listdir(self.tsv_dir) if ".TSV" in f.upper()] 81 | 82 | # Loop through all the files in the source directory 83 | for name in tsv_list: 84 | call_command( 85 | "cleancalaccessrawfile", 86 | name, 87 | verbosity=self.verbosity, 88 | keep_file=self.keep_files, 89 | ) 90 | 91 | def load(self): 92 | """ 93 | Loads the cleaned up csv files into the database. 94 | """ 95 | if self.verbosity: 96 | self.header("Loading data files") 97 | 98 | model_list = [ 99 | x for x in get_model_list() if os.path.exists(x.objects.get_csv_path()) 100 | ] 101 | 102 | for model in model_list: 103 | call_command( 104 | "loadcalaccessrawfile", 105 | model.__name__, 106 | verbosity=self.verbosity, 107 | keep_file=self.keep_files, 108 | ) 109 | -------------------------------------------------------------------------------- /calaccess_raw/managers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Custom managers for working with CAL-ACCESS data. 3 | """ 4 | import os 5 | import calaccess_raw 6 | from postgres_copy import CopyManager 7 | 8 | 9 | class CalAccessManager(CopyManager): 10 | """ 11 | Utilities for accessing the raw data associated with a model. 12 | """ 13 | 14 | def get_csv_name(self): 15 | """ 16 | Returns the name of the model's CSV data file. 17 | """ 18 | return "{}.csv".format(self.model._meta.db_table.lower()) 19 | 20 | def get_csv_path(self): 21 | """ 22 | Returns the path to the model's CSV data file. 23 | """ 24 | return os.path.join( 25 | calaccess_raw.get_data_directory(), "csv", self.get_csv_name() 26 | ) 27 | 28 | def get_tsv_name(self): 29 | """ 30 | Returns the name of the model's raw TSV data file. 31 | """ 32 | return "{}.TSV".format(self.model._meta.db_table) 33 | 34 | def get_tsv_path(self): 35 | """ 36 | Returns the path to the model's raw TSV data file. 37 | """ 38 | return os.path.join( 39 | calaccess_raw.get_data_directory(), "tsv", self.get_tsv_name() 40 | ) 41 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0002_auto_20160802_2101.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.9.7 on 2016-08-02 21:01 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("calaccess_raw", "0001_squashed_0014_auto_20160801_2039"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterModelOptions( 16 | name="rawdataversion", 17 | options={ 18 | "get_latest_by": "release_datetime", 19 | "ordering": ("-release_datetime",), 20 | "verbose_name": "CAL-ACCESS raw data version", 21 | }, 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0003_auto_20160804_1443.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.9.7 on 2016-08-04 14:43 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("calaccess_raw", "0002_auto_20160802_2101"), 12 | ] 13 | 14 | operations = [ 15 | migrations.RenameField("RawDataVersion", "size", "expected_size"), 16 | ] 17 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0004_auto_20160804_1758.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.9.7 on 2016-08-04 17:58 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("calaccess_raw", "0003_auto_20160804_1443"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name="rawdataversion", 17 | name="clean_zip_size", 18 | field=models.BigIntegerField( 19 | help_text="The size of the zip containing all cleaned raw data files and error logs.", 20 | null=True, 21 | verbose_name="clean zip size", 22 | ), 23 | ), 24 | migrations.AddField( 25 | model_name="rawdataversion", 26 | name="download_zip_size", 27 | field=models.BigIntegerField( 28 | help_text="The actual size of the downloaded CAL-ACCESS zip after the downloaded completed.", 29 | null=True, 30 | verbose_name="downloaded zip size", 31 | ), 32 | ), 33 | migrations.AlterField( 34 | model_name="rawdataversion", 35 | name="expected_size", 36 | field=models.BigIntegerField( 37 | help_text="The expected size of the downloaded CAL-ACCESS zip, as specified in the content-length field in HTTP response header.", 38 | verbose_name="expected downloaded zip size", 39 | ), 40 | ), 41 | ] 42 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0005_auto_20160826_1449.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.10 on 2016-08-26 14:49 3 | from __future__ import unicode_literals 4 | 5 | import calaccess_raw.annotations 6 | import calaccess_raw.fields 7 | from django.db import migrations 8 | 9 | 10 | class Migration(migrations.Migration): 11 | 12 | dependencies = [ 13 | ("calaccess_raw", "0004_auto_20160804_1758"), 14 | ] 15 | 16 | operations = [ 17 | migrations.AlterField( 18 | model_name="receivedfilingscd", 19 | name="filing_directory", 20 | field=calaccess_raw.fields.CharField( 21 | db_column="FILING_DIRECTORY", 22 | help_text="This field is undocumented", 23 | max_length=60, 24 | ), 25 | ), 26 | migrations.AlterField( 27 | model_name="receivedfilingscd", 28 | name="filing_file_name", 29 | field=calaccess_raw.fields.CharField( 30 | db_column="FILING_FILE_NAME", 31 | help_text="The field is undocumented", 32 | max_length=60, 33 | ), 34 | ), 35 | migrations.AlterField( 36 | model_name="receivedfilingscd", 37 | name="form_id", 38 | field=calaccess_raw.fields.CharField( 39 | blank=True, 40 | choices=[ 41 | ( 42 | b"F400", 43 | b"Form 400: Statement of Organization (Slate Mailer Organization)", 44 | ), 45 | ( 46 | b"F401", 47 | b"Form 401: Slate Mailer Organization Campaign Statement", 48 | ), 49 | ( 50 | b"F402", 51 | b"Form 402: Statement of Termination (Slate Mailer Organization)", 52 | ), 53 | ( 54 | b"F410", 55 | b"Form 410: Statement of Organization Recipient Committee", 56 | ), 57 | (b"F425", b"Form 425: Semi-Annual Statement of no Activity"), 58 | ( 59 | b"F450", 60 | b"Form 450: Recipient Committee Campaign Disclosure Statement - Short Form", 61 | ), 62 | (b"F460", b"Form 460: Recipient Committee Campaign Statement"), 63 | ( 64 | b"F461", 65 | b"Form 461: Independent Expenditure Committee & Major Donor Committee Campaign Statement", 66 | ), 67 | (b"F465", b"Form 465: Supplemental Independent Expenditure Report"), 68 | (b"F496", b"Form 496: Late Independent Expenditure Report"), 69 | (b"F497", b"Form 497: Late Contribution Report"), 70 | (b"F498", b"Form 498: Slate Mailer Late Payment Report"), 71 | (b"F601", b"Form 601: Lobbying Firm Registration Statement"), 72 | (b"F602", b"Form 602: Lobbying Firm Activity Authorization"), 73 | ( 74 | b"F603", 75 | b"Form 603: Lobbyist Employer or Lobbying Coalition Registration Statement", 76 | ), 77 | (b"F604", b"Form 604: Lobbyist Certification Statement"), 78 | (b"F606", b"Form 606: Notice of Termination"), 79 | (b"F607", b"Form 607: Notice of Withdrawal"), 80 | (b"F615", b"Form 615: Lobbyist Report"), 81 | (b"F625", b"Form 625: Report of Lobbying Firm"), 82 | ( 83 | b"F635", 84 | b"Form 635: Report of Lobbyist Employer or Report of Lobbying Coalition", 85 | ), 86 | (b"F645", b"Form 645: Report of Person Spending $5,000 or More"), 87 | ], 88 | db_column="FORM_ID", 89 | documentcloud_pages=[ 90 | calaccess_raw.annotations.DocumentCloud( 91 | end_page=8, id="2711624-Overview", start_page=4 92 | ) 93 | ], 94 | help_text="Form identification code", 95 | max_length=7, 96 | verbose_name="form identification code", 97 | ), 98 | ), 99 | migrations.AlterField( 100 | model_name="receivedfilingscd", 101 | name="receive_comment", 102 | field=calaccess_raw.fields.CharField( 103 | db_column="RECEIVE_COMMENT", help_text="A comment", max_length=120 104 | ), 105 | ), 106 | ] 107 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0006_auto_20160826_2039.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.10 on 2016-08-26 20:39 3 | from __future__ import unicode_literals 4 | 5 | import calaccess_raw.fields 6 | from django.db import migrations 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ("calaccess_raw", "0005_auto_20160826_1449"), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name="rcptcd", 18 | name="amount", 19 | field=calaccess_raw.fields.DecimalField( 20 | db_column="AMOUNT", 21 | decimal_places=2, 22 | help_text="Amount Received (Monetary, In-kind, Promise)", 23 | max_digits=14, 24 | ), 25 | ), 26 | ] 27 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0007_auto_20160831_0132.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.10 on 2016-08-31 01:32 3 | from __future__ import unicode_literals 4 | 5 | import calaccess_raw.fields 6 | from django.db import migrations 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ("calaccess_raw", "0006_auto_20160826_2039"), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name="filernamecd", 18 | name="naml", 19 | field=calaccess_raw.fields.CharField( 20 | db_column="NAML", 21 | help_text="Last name, sometimes full name in the case of PACs, firms and employers. Major donors can be split between first and last name fields, but usually are contained in the last name field only. Individual names of lobbyists, politicans and officers tend to use both the first and last name.", 22 | max_length=200, 23 | ), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0010_auto_20161026_1643.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.10 on 2016-10-26 16:43 3 | from __future__ import unicode_literals 4 | 5 | import calaccess_raw.fields 6 | from django.db import migrations 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ("calaccess_raw", "0009_auto_20161026_1641"), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name="cvr2campaigndisclosurecd", 18 | name="cmte_id", 19 | field=calaccess_raw.fields.CharField( 20 | blank=True, 21 | db_column="CMTE_ID", 22 | help_text="Committee identification number, when the entity is a committee", 23 | max_length=9, 24 | ), 25 | ), 26 | ] 27 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0011_auto_20161028_0129.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.10 on 2016-10-28 01:29 3 | from __future__ import unicode_literals 4 | 5 | import calaccess_raw.fields 6 | from django.db import migrations 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ("calaccess_raw", "0010_auto_20161026_1643"), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name="expncd", 18 | name="bakref_tid", 19 | field=calaccess_raw.fields.CharField( 20 | blank=True, 21 | db_column="BAKREF_TID", 22 | help_text="Back Reference to a Tran_ID of a 'parent' record", 23 | max_length=20, 24 | verbose_name="back reference transaction id", 25 | ), 26 | ), 27 | migrations.AlterField( 28 | model_name="expncd", 29 | name="bal_juris", 30 | field=calaccess_raw.fields.CharField( 31 | blank=True, 32 | db_column="BAL_JURIS", 33 | help_text="Ballot measure's jurisdiction", 34 | max_length=40, 35 | verbose_name="ballot measure jurisdiction", 36 | ), 37 | ), 38 | migrations.AlterField( 39 | model_name="expncd", 40 | name="bal_name", 41 | field=calaccess_raw.fields.CharField( 42 | blank=True, 43 | db_column="BAL_NAME", 44 | help_text="Ballot Measure Name", 45 | max_length=200, 46 | verbose_name="ballot measure name", 47 | ), 48 | ), 49 | migrations.AlterField( 50 | model_name="expncd", 51 | name="bal_num", 52 | field=calaccess_raw.fields.CharField( 53 | blank=True, 54 | db_column="BAL_NUM", 55 | help_text="Ballot Number or Letter", 56 | max_length=7, 57 | verbose_name="ballot measure number", 58 | ), 59 | ), 60 | migrations.AlterField( 61 | model_name="expncd", 62 | name="expn_dscr", 63 | field=calaccess_raw.fields.CharField( 64 | blank=True, 65 | db_column="EXPN_DSCR", 66 | help_text="Purpose of expense and/or description/explanation", 67 | max_length=400, 68 | verbose_name="expense description", 69 | ), 70 | ), 71 | ] 72 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0012_auto_20161123_2217.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.9.1 on 2016-11-23 22:17 3 | from __future__ import unicode_literals 4 | 5 | import calaccess_raw.fields 6 | from django.db import migrations 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ("calaccess_raw", "0011_auto_20161028_0129"), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name="expncd", 18 | name="memo_code", 19 | field=calaccess_raw.fields.CharField( 20 | blank=True, 21 | db_column="MEMO_CODE", 22 | help_text="Memo Amount? (Date/Amount are informational only). For Form 460 filings, this indicates the record is a sub-item and its amount is included in another item reported on the filing.", 23 | max_length=1, 24 | ), 25 | ), 26 | migrations.AlterField( 27 | model_name="loancd", 28 | name="loan_date1", 29 | field=calaccess_raw.fields.DateField( 30 | db_column="LOAN_DATE1", 31 | help_text="Date the loan was made or received. The content of this column varies based on the schedule/part that the record applies to. See the CAL document for a description of the value.", 32 | null=True, 33 | ), 34 | ), 35 | ] 36 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0014_auto_20170421_1821.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2017-04-21 18:21 3 | from __future__ import unicode_literals 4 | 5 | import calaccess_raw.annotations 6 | import calaccess_raw.fields 7 | from django.db import migrations 8 | 9 | 10 | class Migration(migrations.Migration): 11 | 12 | dependencies = [ 13 | ("calaccess_raw", "0013_auto_20161123_2219"), 14 | ] 15 | 16 | operations = [ 17 | migrations.AlterModelOptions( 18 | name="rawdatafile", 19 | options={ 20 | "ordering": ("-version_id", "file_name"), 21 | "verbose_name": "TRACKING: CAL-ACCESS raw data file", 22 | }, 23 | ), 24 | migrations.AlterModelOptions( 25 | name="rawdataversion", 26 | options={ 27 | "get_latest_by": "release_datetime", 28 | "ordering": ("-release_datetime",), 29 | "verbose_name": "TRACKING: CAL-ACCESS raw data version", 30 | }, 31 | ), 32 | migrations.AlterField( 33 | model_name="rcptcd", 34 | name="tran_type", 35 | field=calaccess_raw.fields.CharField( 36 | blank=True, 37 | choices=[ 38 | ("F", "Forgiven Loan"), 39 | ("I", "Intermediary"), 40 | ("R", "Returned"), 41 | ("T", "Third Party Repayment"), 42 | ("X", "Transfer"), 43 | ("0", "Unknown"), 44 | ("M", "Unknown"), 45 | ("N", "Unknown"), 46 | ], 47 | db_column="TRAN_TYPE", 48 | documentcloud_pages=[ 49 | calaccess_raw.annotations.DocumentCloud( 50 | id="2711616-MapCalFormat2Fields", start_page=72 51 | ), 52 | calaccess_raw.annotations.DocumentCloud( 53 | id="2712033-Cal-Format-1-05-02", start_page=29 54 | ), 55 | calaccess_raw.annotations.DocumentCloud( 56 | id="2712034-Cal-Format-201", start_page=38 57 | ), 58 | ], 59 | help_text="Transaction Type", 60 | max_length=1, 61 | verbose_name="transaction type", 62 | ), 63 | ), 64 | ] 65 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0016_auto_20200818_0542.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.1.15 on 2020-08-18 05:42 2 | 3 | import calaccess_raw.annotations.documents 4 | import calaccess_raw.annotations.forms 5 | import calaccess_raw.fields 6 | from django.db import migrations 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ("calaccess_raw", "0015_auto_20170729_0218"), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name="cvr3verificationinfocd", 18 | name="sig_naml", 19 | field=calaccess_raw.fields.CharField( 20 | blank=True, 21 | db_column="SIG_NAML", 22 | help_text="last name of the signer", 23 | max_length=500, 24 | verbose_name="last name", 25 | ), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0019_alter_rawdataversion_download_zip_archive.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.4 on 2021-06-13 22:58 2 | 3 | from django.db import migrations 4 | import ia_storage.fields 5 | import ia_storage.storage 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("calaccess_raw", "0018_auto_20210426_2015"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="rawdataversion", 17 | name="download_zip_archive", 18 | field=ia_storage.fields.InternetArchiveFileField( 19 | blank=True, 20 | help_text="An archive of the original zipped file downloaded from CAL-ACCESS.", 21 | max_length=255, 22 | storage=ia_storage.storage.InternetArchiveStorage, 23 | upload_to="", 24 | verbose_name="download files zip file", 25 | ), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0020_auto_20210614_1556.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.4 on 2021-06-14 15:56 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("calaccess_raw", "0019_alter_rawdataversion_download_zip_archive"), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name="rawdatafile", 15 | name="download_file_archive", 16 | ), 17 | migrations.AddField( 18 | model_name="rawdatafile", 19 | name="InternetArchiveFileField", 20 | field=models.FileField( 21 | blank=True, 22 | help_text="An archive of the original raw data file downloaded from CAL-ACCESS.", 23 | max_length=255, 24 | upload_to="", 25 | verbose_name="archive of download file", 26 | ), 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0021_auto_20210614_1604.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.4 on 2021-06-14 16:04 2 | 3 | from django.db import migrations 4 | import ia_storage.fields 5 | import ia_storage.storage 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("calaccess_raw", "0020_auto_20210614_1556"), 12 | ] 13 | 14 | operations = [ 15 | migrations.RemoveField( 16 | model_name="rawdatafile", 17 | name="InternetArchiveFileField", 18 | ), 19 | migrations.AddField( 20 | model_name="rawdatafile", 21 | name="download_file_archive", 22 | field=ia_storage.fields.InternetArchiveFileField( 23 | blank=True, 24 | help_text="An archive of the original raw data file downloaded from CAL-ACCESS.", 25 | max_length=255, 26 | storage=ia_storage.storage.InternetArchiveStorage, 27 | upload_to="", 28 | verbose_name="archive of download file", 29 | ), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0022_auto_20210614_1627.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.4 on 2021-06-14 16:27 2 | 3 | from django.db import migrations 4 | import ia_storage.fields 5 | import ia_storage.storage 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("calaccess_raw", "0021_auto_20210614_1604"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="rawdatafile", 17 | name="clean_file_archive", 18 | field=ia_storage.fields.InternetArchiveFileField( 19 | blank=True, 20 | help_text="An archive of the raw data file after being cleaned.", 21 | max_length=255, 22 | storage=ia_storage.storage.InternetArchiveStorage, 23 | upload_to="", 24 | verbose_name="archive of clean file", 25 | ), 26 | ), 27 | migrations.AlterField( 28 | model_name="rawdatafile", 29 | name="error_log_archive", 30 | field=ia_storage.fields.InternetArchiveFileField( 31 | blank=True, 32 | help_text="An archive of the error log containing lines from the original download file that could not be parsed and are excluded from the cleaned file.", 33 | max_length=255, 34 | storage=ia_storage.storage.InternetArchiveStorage, 35 | upload_to="", 36 | verbose_name="archive of error log", 37 | ), 38 | ), 39 | ] 40 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0023_alter_rawdataversion_clean_zip_archive.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.4 on 2021-06-17 08:12 2 | 3 | from django.db import migrations 4 | import ia_storage.fields 5 | import ia_storage.storage 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ("calaccess_raw", "0022_auto_20210614_1627"), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name="rawdataversion", 17 | name="clean_zip_archive", 18 | field=ia_storage.fields.InternetArchiveFileField( 19 | blank=True, 20 | help_text="An archive zip of cleaned (and error log) files", 21 | max_length=255, 22 | storage=ia_storage.storage.InternetArchiveStorage, 23 | upload_to="", 24 | verbose_name="cleaned files zip archive", 25 | ), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0024_auto_20210822_2158.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.6 on 2021-08-22 21:58 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("calaccess_raw", "0023_alter_rawdataversion_clean_zip_archive"), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name="rawdatafile", 15 | name="download_columns_count", 16 | ), 17 | migrations.RemoveField( 18 | model_name="rawdatafile", 19 | name="download_records_count", 20 | ), 21 | migrations.RemoveField( 22 | model_name="rawdatafile", 23 | name="error_count", 24 | ), 25 | migrations.RemoveField( 26 | model_name="rawdatafile", 27 | name="error_log_archive", 28 | ), 29 | ] 30 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0025_auto_20220725_1711.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.13 on 2022-07-25 17:11 2 | 3 | import calaccess_raw.fields 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ("calaccess_raw", "0024_auto_20210822_2158"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="namescd", 16 | name="fullname", 17 | field=calaccess_raw.fields.CharField( 18 | db_column="FULLNAME", help_text="Full name", max_length=1000 19 | ), 20 | ), 21 | migrations.AlterField( 22 | model_name="namescd", 23 | name="naml", 24 | field=calaccess_raw.fields.CharField( 25 | db_column="NAML", help_text="Last name", max_length=1000 26 | ), 27 | ), 28 | migrations.AlterField( 29 | model_name="namescd", 30 | name="naml_search", 31 | field=calaccess_raw.fields.CharField( 32 | db_column="NAML_SEARCH", help_text="Last name", max_length=1000 33 | ), 34 | ), 35 | ] 36 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0026_auto_20220909_2321.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.2.13 on 2022-09-09 23:21 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("calaccess_raw", "0025_auto_20220725_1711"), 10 | ] 11 | 12 | operations = [ 13 | migrations.DeleteModel( 14 | name="RawDataFile", 15 | ), 16 | migrations.DeleteModel( 17 | name="RawDataVersion", 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0027_alter_cvre530cd_other_desc.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.0.8 on 2022-10-31 14:44 2 | 3 | import calaccess_raw.fields 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ('calaccess_raw', '0026_auto_20220909_2321'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name='cvre530cd', 16 | name='other_desc', 17 | field=calaccess_raw.fields.CharField(db_column='OTHER_DESC', help_text='This field is undocumented', max_length=500), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0028_remove_cvrf470cd_cand_adr1.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.1.13 on 2024-07-12 12:55 2 | 3 | from django.db import migrations 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("calaccess_raw", "0027_alter_cvre530cd_other_desc"), 10 | ] 11 | 12 | operations = [ 13 | migrations.RemoveField( 14 | model_name="cvrf470cd", 15 | name="cand_adr1", 16 | ), 17 | migrations.RemoveField( 18 | model_name="cvrf470cd", 19 | name="cand_adr2", 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/0029_alter_cvrf470cd_date_1000.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.1.13 on 2024-07-12 14:35 2 | 3 | import calaccess_raw.fields 4 | from django.db import migrations 5 | 6 | 7 | class Migration(migrations.Migration): 8 | 9 | dependencies = [ 10 | ("calaccess_raw", "0028_remove_cvrf470cd_cand_adr1"), 11 | ] 12 | 13 | operations = [ 14 | migrations.AlterField( 15 | model_name="cvrf470cd", 16 | name="date_1000", 17 | field=calaccess_raw.fields.DateField( 18 | blank=True, 19 | db_column="DATE_1000", 20 | help_text="Date contributions totaling $1,000 or more. (For the 470-S)", 21 | null=True, 22 | ), 23 | ), 24 | ] 25 | -------------------------------------------------------------------------------- /calaccess_raw/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/calaccess_raw/migrations/__init__.py -------------------------------------------------------------------------------- /calaccess_raw/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Import all of the models from submodules and thread them together. 5 | """ 6 | from calaccess_raw.models.base import CalAccessBaseModel 7 | from calaccess_raw.models.campaign import ( 8 | CvrSoCd, 9 | Cvr2SoCd, 10 | CvrCampaignDisclosureCd, 11 | Cvr2CampaignDisclosureCd, 12 | Cvr3VerificationInfoCd, 13 | DebtCd, 14 | ExpnCd, 15 | LoanCd, 16 | RcptCd, 17 | S401Cd, 18 | F495P2Cd, 19 | S496Cd, 20 | S497Cd, 21 | S498Cd, 22 | F501502Cd, 23 | ) 24 | from calaccess_raw.models.inactive import ( 25 | BallotMeasuresCd, 26 | CvrF470Cd, 27 | FilerTypePeriodsCd, 28 | LobbyistContributions1Cd, 29 | LobbyistContributions2Cd, 30 | LobbyistContributions3Cd, 31 | LobbyistEmpLobbyist1Cd, 32 | LobbyistEmpLobbyist2Cd, 33 | LobbyistEmployer1Cd, 34 | LobbyistEmployer2Cd, 35 | LobbyistEmployer3Cd, 36 | LobbyistEmployerFirms1Cd, 37 | LobbyistEmployerFirms2Cd, 38 | LobbyistEmployerHistoryCd, 39 | LobbyistFirm1Cd, 40 | LobbyistFirm2Cd, 41 | LobbyistFirm3Cd, 42 | LobbyistFirmEmployer1Cd, 43 | LobbyistFirmEmployer2Cd, 44 | LobbyistFirmHistoryCd, 45 | LobbyistFirmLobbyist1Cd, 46 | LobbyistFirmLobbyist2Cd, 47 | EfsFilingLogCd, 48 | ) 49 | from calaccess_raw.models.lobbying import ( 50 | CvrRegistrationCd, 51 | Cvr2RegistrationCd, 52 | LobbyAmendmentsCd, 53 | LobbyingChgLogCd, 54 | LempCd, 55 | CvrLobbyDisclosureCd, 56 | Cvr2LobbyDisclosureCd, 57 | F690P2Cd, 58 | LattCd, 59 | LexpCd, 60 | LccmCd, 61 | LpayCd, 62 | LothCd, 63 | ) 64 | from calaccess_raw.models.common import ( 65 | CvrE530Cd, 66 | FilernameCd, 67 | FilerFilingsCd, 68 | FilingsCd, 69 | HdrCd, 70 | HeaderCd, 71 | SmryCd, 72 | SpltCd, 73 | TextMemoCd, 74 | AcronymsCd, 75 | AddressCd, 76 | FilersCd, 77 | FilerAcronymsCd, 78 | FilerAddressCd, 79 | FilerEthicsClassCd, 80 | FilerInterestsCd, 81 | FilerLinksCd, 82 | FilerStatusTypesCd, 83 | FilerToFilerTypeCd, 84 | FilerTypesCd, 85 | FilerXrefCd, 86 | FilingPeriodCd, 87 | GroupTypesCd, 88 | ImageLinksCd, 89 | LegislativeSessionsCd, 90 | LookupCodesCd, 91 | NamesCd, 92 | ReceivedFilingsCd, 93 | ReportsCd, 94 | ) 95 | 96 | __all__ = ( 97 | "CalAccessBaseModel", 98 | "CvrSoCd", 99 | "Cvr2SoCd", 100 | "CvrCampaignDisclosureCd", 101 | "Cvr2CampaignDisclosureCd", 102 | "CvrF470Cd", 103 | "RcptCd", 104 | "Cvr3VerificationInfoCd", 105 | "LoanCd", 106 | "S401Cd", 107 | "ExpnCd", 108 | "F495P2Cd", 109 | "DebtCd", 110 | "S496Cd", 111 | "SpltCd", 112 | "S497Cd", 113 | "F501502Cd", 114 | "S498Cd", 115 | "CvrRegistrationCd", 116 | "Cvr2RegistrationCd", 117 | "CvrLobbyDisclosureCd", 118 | "Cvr2LobbyDisclosureCd", 119 | "LobbyAmendmentsCd", 120 | "F690P2Cd", 121 | "LattCd", 122 | "LexpCd", 123 | "LccmCd", 124 | "LothCd", 125 | "LempCd", 126 | "LpayCd", 127 | "FilerFilingsCd", 128 | "FilingsCd", 129 | "SmryCd", 130 | "CvrE530Cd", 131 | "TextMemoCd", 132 | "AcronymsCd", 133 | "AddressCd", 134 | "BallotMeasuresCd", 135 | "EfsFilingLogCd", 136 | "FilernameCd", 137 | "FilersCd", 138 | "FilerAcronymsCd", 139 | "FilerAddressCd", 140 | "FilerEthicsClassCd", 141 | "FilerInterestsCd", 142 | "FilerLinksCd", 143 | "FilerStatusTypesCd", 144 | "FilerToFilerTypeCd", 145 | "FilerTypesCd", 146 | "FilerXrefCd", 147 | "FilingPeriodCd", 148 | "FilerTypePeriodsCd", 149 | "GroupTypesCd", 150 | "HeaderCd", 151 | "HdrCd", 152 | "ImageLinksCd", 153 | "LegislativeSessionsCd", 154 | "LobbyingChgLogCd", 155 | "LobbyistContributions1Cd", 156 | "LobbyistContributions2Cd", 157 | "LobbyistContributions3Cd", 158 | "LobbyistEmployer1Cd", 159 | "LobbyistEmployer2Cd", 160 | "LobbyistEmployer3Cd", 161 | "LobbyistEmployerHistoryCd", 162 | "LobbyistEmployerFirms1Cd", 163 | "LobbyistEmployerFirms2Cd", 164 | "LobbyistEmpLobbyist1Cd", 165 | "LobbyistEmpLobbyist2Cd", 166 | "LobbyistFirm1Cd", 167 | "LobbyistFirm2Cd", 168 | "LobbyistFirm3Cd", 169 | "LobbyistFirmEmployer1Cd", 170 | "LobbyistFirmEmployer2Cd", 171 | "LobbyistFirmLobbyist1Cd", 172 | "LobbyistFirmLobbyist2Cd", 173 | "LobbyistFirmHistoryCd", 174 | "LookupCodesCd", 175 | "NamesCd", 176 | "ReceivedFilingsCd", 177 | "ReportsCd", 178 | ) 179 | -------------------------------------------------------------------------------- /calaccess_raw/models/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Base models for tables from the CAL-ACCESS database. 5 | """ 6 | # Text 7 | import textwrap 8 | 9 | # Models 10 | from django.db import models 11 | from calaccess_raw import managers 12 | from django.db.models.base import ModelBase 13 | 14 | 15 | class CalAccessMetaClass(ModelBase): 16 | """ 17 | A custom metaclass for our base model. 18 | 19 | Automatically configures Meta attributes common to all models. 20 | """ 21 | 22 | def __new__(cls, name, bases, attrs): 23 | """ 24 | Override the default __new__ behavior. 25 | """ 26 | klass = super(CalAccessMetaClass, cls).__new__(cls, name, bases, attrs) 27 | 28 | # Cook up an automated verbose name for each model 29 | klass_group = str(klass).split(".")[-2].upper() 30 | klass_table = klass._meta.db_table 31 | klass_name = "{0}: {1}".format(klass_group, klass_table) 32 | 33 | # Insert the verbose name into each model's configuration 34 | klass._meta.verbose_name = klass_name 35 | klass._meta.verbose_name_plural = klass_name 36 | 37 | # Set the app_label too 38 | klass._meta.app_label = "calaccess_raw" 39 | 40 | # Finish up 41 | return klass 42 | 43 | 44 | class CalAccessBaseModel(models.Model): 45 | """ 46 | An abstract model with some tricks we'll reuse. 47 | """ 48 | 49 | __metaclass__ = CalAccessMetaClass 50 | 51 | # The UNIQUE_KEY is one or more fields that, taken together, are unique 52 | # within the database. https://en.wikipedia.org/wiki/Unique_key 53 | 54 | # Because the CAL-ACCESS database is released without unique keys specified 55 | # we determine these on our own and list them with each model. 56 | 57 | # If a single field is believed to be unique, it can be set as a simple 58 | # string variable, like: 59 | 60 | # UNIQUE_KEY = 'field_name' 61 | 62 | # If multiple fields must be combined to guarantee uniqueness, they 63 | # should be listed as tuple like: 64 | 65 | # UNIQUE_KEY = ('field_one', 'field_two') 66 | 67 | # If the unique key does not exist or cannot be determined it will be 68 | # set to False 69 | 70 | # UNIQUE_KEY = False 71 | UNIQUE_KEY = None 72 | 73 | # A list of URL strings that point to pages hosted on DocumentCloud.org 74 | # that contain documentation for this model. Once assembled they can be 75 | # embedded in our user-facing documentation as images. 76 | 77 | # Should be filled with instances of our DocumentCloud class below 78 | # which accepts a unique DocumentCloud id along with start and/or end 79 | # page numbers 80 | DOCUMENTCLOUD_PAGES = [] 81 | FILING_FORMS = [] 82 | 83 | # Default manager 84 | objects = managers.CalAccessManager() 85 | 86 | def doc(self): 87 | """ 88 | Return the model's docstring as a readable string ready to print. 89 | """ 90 | if self.__doc__.startswith(self.klass_name): 91 | return "" 92 | return textwrap.dedent(self.__doc__).strip() 93 | 94 | @property 95 | def db_table(self): 96 | """ 97 | Return the model's database table name as a string. 98 | """ 99 | return self._meta.db_table 100 | 101 | @property 102 | def klass(self): 103 | """ 104 | Return the model class itself. 105 | """ 106 | return self.__class__ 107 | 108 | @property 109 | def klass_name(self): 110 | """ 111 | Return the name of the model class. 112 | """ 113 | return self.__class__.__name__ 114 | 115 | @property 116 | def klass_group(self): 117 | """ 118 | Return the name of the model's group, as determined by its submodule. 119 | """ 120 | return str(self.__class__).split(".")[-2] 121 | 122 | def get_field_list(self): 123 | """ 124 | Return all the fields on the model as a list. 125 | """ 126 | return self._meta.fields 127 | 128 | def get_csv_name(self): 129 | """ 130 | Return the name of the clean CSV file that contains the model's data. 131 | """ 132 | return self.__class__.objects.get_csv_name() 133 | 134 | def get_csv_path(self): 135 | """ 136 | Return the path to the clean CSV file that contains the model's data. 137 | """ 138 | return self.__class__.objects.get_csv_path() 139 | 140 | def get_tsv_name(self): 141 | """ 142 | Return the name of the raw TSV file that contains the model's data. 143 | """ 144 | return self.__class__.objects.get_tsv_name() 145 | 146 | def get_tsv_path(self): 147 | """ 148 | Return the path to the raw TSV file that contains the model's data. 149 | """ 150 | return self.__class__.objects.get_tsv_path() 151 | 152 | def get_unique_key_list(self): 153 | """ 154 | Return UNIQUE_KEY setting as a list regardless of its data type. 155 | """ 156 | if self.__class__.UNIQUE_KEY is None: 157 | return [] 158 | elif self.__class__.UNIQUE_KEY is False: 159 | return [] 160 | elif isinstance(self.__class__.UNIQUE_KEY, (list, tuple)): 161 | return self.__class__.UNIQUE_KEY 162 | else: 163 | return [self.__class__.UNIQUE_KEY] 164 | 165 | def get_documentcloud_pages(self): 166 | """ 167 | Return a list of tuples for each page or each document in the DOCUMENTCLOUD_PAGES attr. 168 | 169 | Each tuple contains a DocumentCloud and DocPage object. 170 | """ 171 | page_list = [] 172 | for dc in self.DOCUMENTCLOUD_PAGES: 173 | page_list.extend([(dc, page) for page in dc.pages]) 174 | return page_list 175 | 176 | def get_filing_forms_w_sections(self): 177 | """ 178 | Returns a list of tuples, each containing a FilingForm object and list of FilingFormSection objects. 179 | """ 180 | from calaccess_raw.annotations import FilingForm 181 | 182 | forms_dict = {} 183 | for i in self.FILING_FORMS: 184 | if isinstance(i, FilingForm): 185 | try: 186 | forms_dict[i] 187 | except KeyError: 188 | forms_dict[i] = [] 189 | else: 190 | try: 191 | forms_dict[i.form].append(i) 192 | except KeyError: 193 | forms_dict[i.form] = [i] 194 | return sorted(forms_dict.items(), key=lambda x: x[0].id) 195 | 196 | class Meta: 197 | """ 198 | Meta model options. 199 | """ 200 | 201 | abstract = True 202 | app_label = "calaccess_raw" 203 | -------------------------------------------------------------------------------- /calaccess_raw/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | A package of unittests for the mechanics and documentation of the app. 5 | """ 6 | -------------------------------------------------------------------------------- /calaccess_raw/tests/test_admins.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests the adminstration panel configuration. 3 | """ 4 | # Testing 5 | from django.test import TestCase 6 | 7 | # Stuff to test 8 | from calaccess_raw import admin 9 | from calaccess_raw import models 10 | from calaccess_raw import get_model_list 11 | 12 | # Logging 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class AdminTestCase(TestCase): 19 | """ 20 | Tests the adminstration panel configuration. 21 | """ 22 | 23 | def test_models(self): 24 | """ 25 | Make sure all the models have admins. 26 | """ 27 | model_list = [m.__name__ for m in get_model_list()] 28 | admin_list = [a.replace("Admin", "") for a in admin.__all__] 29 | missing = set(model_list).difference(admin_list) 30 | self.assertEqual(missing, set([])) 31 | 32 | def test_methods(self): 33 | """ 34 | Make sure our custom methods work. 35 | """ 36 | a = admin.CvrSoCdAdmin(models.CvrSoCd(), None) 37 | a.get_readonly_fields() 38 | a.get_list_filter({}) 39 | a.get_search_fields({}) 40 | -------------------------------------------------------------------------------- /calaccess_raw/tests/test_annotations.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests annotations of the data. 3 | """ 4 | # Testing 5 | from django.test import TestCase 6 | 7 | # Stuff to test 8 | import os 9 | from calaccess_raw import annotations 10 | 11 | # Logging 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class AnnotationTestCase(TestCase): 18 | """ 19 | Tests annotations of the data. 20 | """ 21 | 22 | multi_db = True 23 | 24 | def test_filingform(self): 25 | """ 26 | Test attributes of the FilingForm object. 27 | """ 28 | ff = annotations.get_form("F400") 29 | ff.get_models() 30 | ff.__str__() 31 | s = ff.get_section("P1") 32 | s.__str__() 33 | 34 | def test_documentcloud(self): 35 | """ 36 | Test attributes of the DocumentCloud object. 37 | """ 38 | dc = annotations.DocumentCloud("2753585", 1, 1) 39 | if os.path.exists(dc.metadata_filename): 40 | os.remove(dc.metadata_filename) 41 | dc.metadata 42 | dc.metadata 43 | dc.title 44 | dc.canonical_url 45 | dc.thumbnail_url 46 | dc.pdf_url 47 | dc.text_url 48 | dc.num_pages 49 | dc.pages 50 | dc.formatted_page_nums 51 | dc2 = annotations.DocumentCloud("2753585") 52 | dc2.canonical_url 53 | dc2.num_pages 54 | dc2.formatted_page_nums 55 | dc3 = annotations.DocumentCloud("2753585", None, 1) 56 | dc3.num_pages 57 | dc3.formatted_page_nums 58 | dc4 = annotations.DocumentCloud("2753585", 1, None) 59 | dc4.num_pages 60 | dc4.formatted_page_nums 61 | -------------------------------------------------------------------------------- /calaccess_raw/tests/test_commands.py: -------------------------------------------------------------------------------- 1 | """Test commands.""" 2 | # Files 3 | import io 4 | import os 5 | 6 | # Testing 7 | import requests_mock 8 | from django.test import TransactionTestCase 9 | from django.test.utils import override_settings 10 | 11 | # Django etc. 12 | from django.conf import settings 13 | from calaccess_raw import get_model_list 14 | from calaccess_raw.management.commands.loadcalaccessrawfile import ( 15 | Command as LoadCommand, 16 | ) 17 | from calaccess_raw.management.commands.cleancalaccessrawfile import ( 18 | Command as CleanCommand, 19 | ) 20 | from calaccess_raw.management.commands.extractcalaccessrawfiles import ( 21 | Command as ExtractCommand, 22 | ) 23 | from calaccess_raw.management.commands.downloadcalaccessrawdata import ( 24 | Command as DownloadCommand, 25 | ) 26 | 27 | # Logging 28 | import logging 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | @override_settings( 34 | CALACCESS_DATA_DIR=os.path.join(settings.BASE_DIR, "test-data"), 35 | MEDIA_ROOT=os.path.join(settings.BASE_DIR, "test-data", ".media"), 36 | ) 37 | class CommandTestCase(TransactionTestCase): 38 | """ 39 | Tests the management commands that interact with the database. 40 | """ 41 | 42 | multi_db = True 43 | test_archiving = False 44 | 45 | @classmethod 46 | def setUpClass(cls): 47 | """ 48 | Load data into the database before running other tests. 49 | """ 50 | super(CommandTestCase, cls).setUpClass() 51 | with requests_mock.Mocker() as m: 52 | test_zip_path = os.path.join( 53 | settings.BASE_DIR, 54 | settings.CALACCESS_DATA_DIR, 55 | "dbwebexport.zip", 56 | ) 57 | headers = { 58 | "Content-Length": str(os.stat(test_zip_path).st_size), 59 | "Accept-Ranges": "bytes", 60 | "Last-Modified": "Mon, 11 Jul 2017 11:20:31 GMT", 61 | "Connection": "keep-alive", 62 | "Date": "Mon, 10 Jul 2017 21:25:40 GMT", 63 | "Content-Type": "application/zip", 64 | "ETag": "2320c8-30619331-c54f7dc0", 65 | "Server": "Apache/2.2.3 (Red Hat)", 66 | } 67 | m.register_uri( 68 | "HEAD", 69 | "https://campaignfinance.cdn.sos.ca.gov/dbwebexport.zip", 70 | headers=headers, 71 | ) 72 | m.register_uri( 73 | "GET", 74 | "https://campaignfinance.cdn.sos.ca.gov/dbwebexport.zip", 75 | headers=headers, 76 | content=io.open(test_zip_path, mode="rb").read(), 77 | ) 78 | kwargs = dict(verbosity=3) 79 | dcmd = DownloadCommand() 80 | dcmd.handle(**kwargs) 81 | 82 | # Now archive the download 83 | suffix = "-test" # f"-test-{get_random_string()}" 84 | print(f"Suffix: {suffix}") 85 | 86 | # Extract the data 87 | ecmd = ExtractCommand() 88 | ecmd.handle(verbosity=3, keep_files=True) 89 | 90 | # Clean the data 91 | tsv_list = [f for f in os.listdir(ecmd.tsv_dir) if ".TSV" in f.upper()] 92 | for i, name in enumerate(tsv_list): 93 | ccmd = CleanCommand() 94 | ccmd.handle(file_name=name, verbosity=3, keep_file=True) 95 | 96 | model_list = [ 97 | x for x in get_model_list() if os.path.exists(x.objects.get_csv_path()) 98 | ] 99 | for model in model_list: 100 | lcmd = LoadCommand() 101 | lcmd.handle( 102 | model_name=model.__name__, 103 | csv=model.objects.get_csv_path(), 104 | verbosity=3, 105 | keep_file=True, 106 | ) 107 | 108 | def test_csv_gettrs(self): 109 | """ 110 | Verify that get_csv_name methods work for all models. 111 | """ 112 | for m in get_model_list(): 113 | self.assertEqual(m.objects.get_csv_name(), m().get_csv_name()) 114 | self.assertEqual(m.objects.get_csv_path(), m().get_csv_path()) 115 | -------------------------------------------------------------------------------- /calaccess_raw/tests/test_fields.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for custom database fields. 3 | """ 4 | # Testing 5 | from django.test import TestCase 6 | 7 | # Stuff to test 8 | from calaccess_raw import fields 9 | from calaccess_raw import models 10 | 11 | # Logging 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class FieldTestCase(TestCase): 18 | """ 19 | Tests related to our custom database fields. 20 | """ 21 | 22 | multi_db = True 23 | 24 | def test_fields(self): 25 | """ 26 | Creates the fields and tests their attributes. 27 | """ 28 | f = fields.CharField(help_text="foo") 29 | self.assertEqual(f.definition(), "Foo") 30 | f2 = fields.CharField() 31 | self.assertEqual(f2.definition(), "") 32 | 33 | m = models.CvrCampaignDisclosureCd() 34 | f3 = m.get_field_list()[1] 35 | f3.is_unique_key() 36 | f4 = m.get_field_list()[2] 37 | f4.is_unique_key() 38 | f4.description() 39 | 40 | m2 = models.FilerToFilerTypeCd() 41 | f5 = m2.get_field_list()[2] 42 | f5.copy_type 43 | f5.copy_template 44 | f5.description() 45 | -------------------------------------------------------------------------------- /calaccess_raw/tests/test_models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the models.py files. 3 | """ 4 | # Testing 5 | from django.test import TestCase 6 | 7 | # Stuff to test 8 | from calaccess_raw import models 9 | from calaccess_raw.admin import BaseAdmin 10 | from django.contrib.admin import AdminSite 11 | 12 | # Python misc. 13 | from copy import deepcopy 14 | 15 | # Logging 16 | import logging 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | class ModelTestCase(TestCase): 22 | """ 23 | Tests related to the database models. 24 | """ 25 | 26 | multi_db = True 27 | 28 | def test_model_methods(self): 29 | """ 30 | Test the extra methods we've added to models. 31 | """ 32 | # CAL-ACCESS models 33 | m = models.RcptCd() 34 | m.__str__() 35 | m.doc() 36 | m.__doc__ = "RcptCd" 37 | m.doc() 38 | m.db_table 39 | m.klass 40 | m.get_tsv_name() 41 | m.get_tsv_path() 42 | m.get_unique_key_list() 43 | m.get_documentcloud_pages() 44 | m.get_filing_forms_w_sections() 45 | unique_key = deepcopy(models.RcptCd.UNIQUE_KEY) 46 | models.RcptCd.UNIQUE_KEY = None 47 | models.RcptCd().get_unique_key_list() 48 | models.RcptCd.UNIQUE_KEY = unique_key 49 | 50 | def test_admins(self): 51 | """ 52 | Text the extra methods we've added to the admins. 53 | """ 54 | BaseAdmin(models.RcptCd, AdminSite()).get_readonly_fields() 55 | -------------------------------------------------------------------------------- /calaccess_raw/tests/test_utilities.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the utilities storied in app's __init__.py file. 3 | """ 4 | # Testing 5 | from django.test import TestCase 6 | from django.test.utils import override_settings 7 | 8 | # Stuff to test 9 | import calaccess_raw 10 | 11 | # Logging 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class UtilityTestCase(TestCase): 18 | """ 19 | Tests related to our hodgepodge of utilities. 20 | """ 21 | 22 | multi_db = True 23 | 24 | @override_settings(CALACCESS_DATA_DIR=None) 25 | @override_settings(BASE_DIR=None) 26 | def test_dir_errors(self): 27 | """ 28 | Test error expected when download directory is missing. 29 | """ 30 | with self.assertRaises(ValueError): 31 | calaccess_raw.get_data_directory() 32 | 33 | @override_settings(CALACCESS_DATA_DIR="/foo/bar/") 34 | def test_dir_configured(self): 35 | """ 36 | Tests for directory functions __init__.py file. 37 | """ 38 | calaccess_raw.get_data_directory() 39 | 40 | @override_settings(CALACCESS_DATA_DIR=None) 41 | @override_settings(BASE_DIR="/foo/bar/") 42 | def test_dir_basedir(self): 43 | """ 44 | Tests for directory functions __init__.py file with different settings. 45 | """ 46 | calaccess_raw.get_data_directory() 47 | 48 | def test_model_methods(self): 49 | """ 50 | Test the methods that hook up with our models. 51 | """ 52 | calaccess_raw.get_model_list() 53 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | livehtml: 58 | sphinx-autobuild -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 59 | 60 | dirhtml: 61 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 62 | @echo 63 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 64 | 65 | singlehtml: 66 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 67 | @echo 68 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 69 | 70 | pickle: 71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 72 | @echo 73 | @echo "Build finished; now you can process the pickle files." 74 | 75 | json: 76 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 77 | @echo 78 | @echo "Build finished; now you can process the JSON files." 79 | 80 | htmlhelp: 81 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 82 | @echo 83 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 84 | ".hhp project file in $(BUILDDIR)/htmlhelp." 85 | 86 | qthelp: 87 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 88 | @echo 89 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 90 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 91 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/django-calaccess-parser.qhcp" 92 | @echo "To view the help file:" 93 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/django-calaccess-parser.qhc" 94 | 95 | devhelp: 96 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 97 | @echo 98 | @echo "Build finished." 99 | @echo "To view the help file:" 100 | @echo "# mkdir -p $$HOME/.local/share/devhelp/django-calaccess-parser" 101 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/django-calaccess-parser" 102 | @echo "# devhelp" 103 | 104 | epub: 105 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 106 | @echo 107 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 108 | 109 | latex: 110 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 111 | @echo 112 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 113 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 114 | "(use \`make latexpdf' here to do that automatically)." 115 | 116 | latexpdf: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo "Running LaTeX files through pdflatex..." 119 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 120 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 121 | 122 | latexpdfja: 123 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 124 | @echo "Running LaTeX files through platex and dvipdfmx..." 125 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 126 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 127 | 128 | text: 129 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 130 | @echo 131 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 132 | 133 | man: 134 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 135 | @echo 136 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 137 | 138 | texinfo: 139 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 140 | @echo 141 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 142 | @echo "Run \`make' in that directory to run these through makeinfo" \ 143 | "(use \`make info' here to do that automatically)." 144 | 145 | info: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo "Running Texinfo files through makeinfo..." 148 | make -C $(BUILDDIR)/texinfo info 149 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 150 | 151 | gettext: 152 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 153 | @echo 154 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 155 | 156 | changes: 157 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 158 | @echo 159 | @echo "The overview file is in $(BUILDDIR)/changes." 160 | 161 | linkcheck: 162 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 163 | @echo 164 | @echo "Link check complete; look for any errors in the above output " \ 165 | "or in $(BUILDDIR)/linkcheck/output.txt." 166 | 167 | doctest: 168 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 169 | @echo "Testing of doctests in the sources finished, look at the " \ 170 | "results in $(BUILDDIR)/doctest/output.txt." 171 | 172 | xml: 173 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 174 | @echo 175 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 176 | 177 | pseudoxml: 178 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 179 | @echo 180 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 181 | -------------------------------------------------------------------------------- /docs/_static/application-layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/docs/_static/application-layers.png -------------------------------------------------------------------------------- /docs/_static/ccdc-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/docs/_static/ccdc-logo.png -------------------------------------------------------------------------------- /docs/_static/cir-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/docs/_static/cir-logo.png -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | @import url("theme.css"); 2 | 3 | .doc_pages_container { 4 | clear:both; 5 | display:inline-block; 6 | } 7 | 8 | .doc_page_frame { 9 | float: left; 10 | } 11 | 12 | .doc_page_frame p { 13 | text-align: center; 14 | font-size: 0.85em; 15 | } 16 | 17 | .doc_page { 18 | border: 2px solid #aaa; 19 | -webkit-box-shadow: 4px 4px 4px 0px rgba(0,0,0,0.5); 20 | -moz-box-shadow: 4px 4px 4px 0px rgba(0,0,0,0.5); 21 | box-shadow: 4px 4px 4px 0px rgba(0,0,0,0.5); 22 | margin-right: 1em; 23 | padding-bottom: 2px; 24 | } 25 | 26 | .doc_page:hover { 27 | border: 2px solid #3399ff; 28 | border-radius: 3px; 29 | -webkit-box-shadow: 2px 2px 4px 4px rgba(51,153,255,0.5); 30 | -moz-box-shadow: 2px 2px 4px 4px rgba(51,153,255,0.5); 31 | box-shadow: 2px 2px 4px 4px rgba(51,153,255,0.5); 32 | padding-bottom: 2px; 33 | } 34 | -------------------------------------------------------------------------------- /docs/_static/latimes-logo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/docs/_static/latimes-logo.gif -------------------------------------------------------------------------------- /docs/_static/los-angeles-times-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/docs/_static/los-angeles-times-logo.png -------------------------------------------------------------------------------- /docs/_static/opennews-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/docs/_static/opennews-logo.png -------------------------------------------------------------------------------- /docs/_static/stanford-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/docs/_static/stanford-logo.png -------------------------------------------------------------------------------- /docs/calaccess_raw_files_report.csv: -------------------------------------------------------------------------------- 1 | file_name,download_columns_count,clean_columns_count,load_columns_count,download_records_count,clean_records_count,load_records_count,pct_cleaned,pct_loaded 2 | ACRONYMS_CD,4,4,5,885,884,884,99.89%,99.89% 3 | ADDRESS_CD,7,7,8,327042,327041,327041,100.00%,100.00% 4 | BALLOT_MEASURES_CD,6,6,7,110,109,109,99.09%,99.09% 5 | CVR2_CAMPAIGN_DISCLOSURE_CD,38,38,39,97864,97864,97863,100.00%,100.00% 6 | CVR2_LOBBY_DISCLOSURE_CD,13,13,14,96586,96585,96585,100.00%,100.00% 7 | CVR2_REGISTRATION_CD,12,12,13,92175,92174,92174,100.00%,100.00% 8 | CVR2_SO_CD,33,33,34,37296,37295,37295,100.00%,100.00% 9 | CVR3_VERIFICATION_INFO_CD,13,13,14,223367,223367,223366,100.00%,100.00% 10 | CVR_CAMPAIGN_DISCLOSURE_CD,86,86,87,342839,342839,342838,100.00%,100.00% 11 | CVR_E530_CD,32,32,33,191,190,190,99.48%,99.48% 12 | CVR_F470_CD,0,0,29,0,-1,0,0.00%,0.00% 13 | CVR_LOBBY_DISCLOSURE_CD,52,52,53,283404,283403,283403,100.00%,100.00% 14 | CVR_REGISTRATION_CD,71,71,72,43587,43586,43586,100.00%,100.00% 15 | CVR_SO_CD,54,54,55,14900,14899,14899,99.99%,99.99% 16 | DEBT_CD,33,33,34,409801,410180,409800,100.09%,100.00% 17 | EFS_FILING_LOG_CD,6,6,7,146814,146813,146813,100.00%,100.00% 18 | EXPN_CD,53,53,54,4835889,4837987,4835888,100.04%,100.00% 19 | F495P2_CD,8,8,9,965,964,964,99.90%,99.90% 20 | F501_502_CD,54,54,55,15444,15443,15443,99.99%,99.99% 21 | F690P2_CD,11,11,12,18489,18488,18488,99.99%,99.99% 22 | FILERNAME_CD,17,17,18,689541,689540,689540,100.00%,100.00% 23 | FILERS_CD,1,1,2,346584,346583,346583,100.00%,100.00% 24 | FILER_ACRONYMS_CD,2,2,3,1787,1786,1786,99.94%,99.94% 25 | FILER_ADDRESS_CD,5,5,6,402193,402192,402192,100.00%,100.00% 26 | FILER_ETHICS_CLASS_CD,3,3,4,15083,15082,15082,99.99%,99.99% 27 | FILER_FILINGS_CD,16,16,17,1852811,1852810,1852810,100.00%,100.00% 28 | FILER_INTERESTS_CD,4,4,5,35976,35975,35975,100.00%,100.00% 29 | FILER_LINKS_CD,9,9,10,262257,262256,262256,100.00%,100.00% 30 | FILER_STATUS_TYPES_CD,2,2,3,9,8,8,88.89%,88.89% 31 | FILER_TO_FILER_TYPE_CD,16,16,17,481473,481472,481472,100.00%,100.00% 32 | FILER_TYPES_CD,5,5,6,19,18,18,94.74%,94.74% 33 | FILER_TYPE_PERIODS,0,0,4,0,0,0,0.00%,0.00% 34 | FILER_TYPE_PERIODS_CD,0,0,0,0,0,0,0.00%,0.00% 35 | FILER_XREF_CD,4,4,5,346550,346549,346549,100.00%,100.00% 36 | FILINGS_CD,2,2,3,1721705,1721704,1721704,100.00%,100.00% 37 | FILING_PERIOD_CD,7,7,8,1449,1448,1448,99.93%,99.93% 38 | GROUP_TYPES_CD,3,3,4,21,20,20,95.24%,95.24% 39 | HDR_CD,9,9,10,676751,676750,676750,100.00%,100.00% 40 | HEADER_CD,12,12,13,89,88,88,98.88%,98.88% 41 | IMAGE_LINKS_CD,5,5,6,14702,14701,14701,99.99%,99.99% 42 | LATT_CD,20,20,21,73000,72999,72999,100.00%,100.00% 43 | LCCM_CD,24,24,25,87526,87525,87525,100.00%,100.00% 44 | LEGISLATIVE_SESSIONS_CD,3,3,4,31,30,30,96.77%,96.77% 45 | LEMP_CD,24,24,25,268228,268227,268227,100.00%,100.00% 46 | LEXP_CD,25,25,26,180165,180164,180164,100.00%,100.00% 47 | LOAN_CD,49,49,50,68090,68089,68089,100.00%,100.00% 48 | LOBBYING_CHG_LOG_CD,24,24,25,58244,58243,58243,100.00%,100.00% 49 | LOBBYIST_CONTRIBUTIONS1_CD,7,7,8,6506,6505,6505,99.98%,99.98% 50 | LOBBYIST_CONTRIBUTIONS2_CD,7,7,8,6506,6505,6505,99.98%,99.98% 51 | LOBBYIST_CONTRIBUTIONS3_CD,7,7,8,6506,6505,6505,99.98%,99.98% 52 | LOBBYIST_EMPLOYER1_CD,20,20,21,1731,1730,1730,99.94%,99.94% 53 | LOBBYIST_EMPLOYER2_CD,20,20,21,1731,1730,1730,99.94%,99.94% 54 | LOBBYIST_EMPLOYER3_CD,20,20,21,1731,1730,1730,99.94%,99.94% 55 | LOBBYIST_EMPLOYER_FIRMS1_CD,5,5,6,525,524,524,99.81%,99.81% 56 | LOBBYIST_EMPLOYER_FIRMS2_CD,5,5,6,525,524,524,99.81%,99.81% 57 | LOBBYIST_EMPLOYER_HISTORY,0,0,21,0,0,0,0.00%,0.00% 58 | LOBBYIST_EMPLOYER_HISTORY_CD,0,0,0,0,0,0,0.00%,0.00% 59 | LOBBYIST_EMP_LOBBYIST1_CD,6,6,7,1501,1500,1500,99.93%,99.93% 60 | LOBBYIST_EMP_LOBBYIST2_CD,6,6,7,1501,1500,1500,99.93%,99.93% 61 | LOBBYIST_FIRM1_CD,18,18,19,257,256,256,99.61%,99.61% 62 | LOBBYIST_FIRM2_CD,18,18,19,257,256,256,99.61%,99.61% 63 | LOBBYIST_FIRM3_CD,18,18,19,257,256,256,99.61%,99.61% 64 | LOBBYIST_FIRM_EMPLOYER1_CD,11,11,12,171,170,170,99.42%,99.42% 65 | LOBBYIST_FIRM_EMPLOYER2_CD,11,11,12,171,170,170,99.42%,99.42% 66 | LOBBYIST_FIRM_HISTORY,0,0,19,0,0,0,0.00%,0.00% 67 | LOBBYIST_FIRM_HISTORY_CD,0,0,0,0,0,0,0.00%,0.00% 68 | LOBBYIST_FIRM_LOBBYIST1_CD,6,6,7,578,577,577,99.83%,99.83% 69 | LOBBYIST_FIRM_LOBBYIST2_CD,6,6,7,578,577,577,99.83%,99.83% 70 | LOBBY_AMENDMENTS_CD,42,42,43,10923,10922,10922,99.99%,99.99% 71 | LOOKUP_CODES_CD,3,3,4,1131,1130,1130,99.91%,99.91% 72 | LOTH_CD,20,20,21,13589,13588,13588,99.99%,99.99% 73 | LPAY_CD,26,26,27,441091,441090,441090,100.00%,100.00% 74 | NAMES_CD,10,10,11,434689,434688,434688,100.00%,100.00% 75 | RCPT_CD,63,63,64,9300988,9310354,9300987,100.10%,100.00% 76 | RECEIVED_FILINGS_CD,7,7,8,388292,388291,388291,100.00%,100.00% 77 | REPORTS_CD,8,8,9,39,38,38,97.44%,97.44% 78 | S401_CD,37,37,38,41617,41616,41616,100.00%,100.00% 79 | S496_CD,12,12,13,34869,34868,34868,100.00%,100.00% 80 | S497_CD,41,41,42,575083,575082,575082,100.00%,100.00% 81 | S498_CD,37,37,38,9264,9263,9263,99.99%,99.99% 82 | SMRY_CD,9,9,10,8027464,8027463,8027463,100.00%,100.00% 83 | SPLT_CD,8,8,9,3174339,3179896,3174338,100.18%,100.00% 84 | TEXT_MEMO_CD,7,7,8,1525485,1525502,1525484,100.00%,100.00% 85 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/apps/calaccess_raw/changelog.html' 6 | 7 | 8 | ...to `http://django-calaccess.californiacivicdata.org/en/latest/apps/calaccess_raw/changelog.html>`_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /docs/faq.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/faq.html' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/faq.html `_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /docs/filingforms.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/calaccess/filingforms.html' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/calaccess/filingforms.html `_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /docs/howtocontribute.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/howtocontribute.html' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/howtocontribute.html `_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /docs/howtouseit.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/howtouseit.html' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/howtouseit.html `_. You will be re-directed in a few seconds.. -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/ `_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /docs/managementcommands.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/apps/calaccess_raw/managementcommands.html' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/apps/calaccess_raw/managementcommands.html `_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /docs/models.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/calaccess/dbtables.html' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/calaccess/dbtables.html `_. You will be re-directed in a few seconds. 9 | -------------------------------------------------------------------------------- /docs/officialdocumentation.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/calaccess/officialdocumentation.html' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/calaccess/officialdocumentation.html `_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /docs/releasechecklist.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/ `_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /docs/tracking.rst: -------------------------------------------------------------------------------- 1 | These docs have moved... 2 | ======================== 3 | 4 | .. meta:: 5 | :http-equiv=refresh: 4;URL='http://django-calaccess.californiacivicdata.org/en/latest/apps/calaccess_raw/tracking.html' 6 | 7 | 8 | ...to `django-calaccess.californiacivicdata.org/en/latest/apps/calaccess_raw/tracking.html `_. You will be re-directed in a few seconds. -------------------------------------------------------------------------------- /example/.documentcloud_metadata/2711614-CalAccessTablesWeb.json: -------------------------------------------------------------------------------- 1 | {"detail":"Not found."} -------------------------------------------------------------------------------- /example/.documentcloud_metadata/2711614.json: -------------------------------------------------------------------------------- 1 | {"id":2711614,"access":"public","asset_url":"https://assets.documentcloud.org/","canonical_url":"https://www.documentcloud.org/documents/2711614-CalAccessTablesWeb","created_at":"2016-02-12T00:01:42.618073Z","data":{},"description":"","edit_access":false,"file_hash":"f5b305ca7e1bd2b33bf05a0b57d47cfe29b8be03","language":"eng","organization":1793,"original_extension":"pdf","page_count":136,"page_spec":"792.0x612.0:0-135","projects":[25827],"publish_at":null,"published_url":"","related_article":"","slug":"CalAccessTablesWeb","source":"","status":"success","title":"CAL-ACCESS Tables, Columns and Indexes","updated_at":"2020-09-22T16:35:56.747302Z","user":14332} -------------------------------------------------------------------------------- /example/.documentcloud_metadata/2711616.json: -------------------------------------------------------------------------------- 1 | {"id":2711616,"access":"public","asset_url":"https://assets.documentcloud.org/","canonical_url":"https://www.documentcloud.org/documents/2711616-MapCalFormat2Fields","created_at":"2016-02-12T00:02:49.015601Z","data":{},"description":"","edit_access":false,"file_hash":"71f25c32cd810f4a9050e6ffa2eb462e64371850","language":"eng","organization":1793,"original_extension":"pdf","page_count":90,"page_spec":"612.0x792.0:0-1;792.0x612.0:2-89","projects":[25827,24851],"publish_at":null,"published_url":"","related_article":"","slug":"MapCalFormat2Fields","source":"","status":"success","title":"Map from .CAL Format to Database Table and Fields","updated_at":"2020-09-22T16:35:56.748307Z","user":14332} -------------------------------------------------------------------------------- /example/.documentcloud_metadata/2712033.json: -------------------------------------------------------------------------------- 1 | {"id":2712033,"access":"public","asset_url":"https://assets.documentcloud.org/","canonical_url":"https://www.documentcloud.org/documents/2712033-Cal-Format-1-05-02","created_at":"2016-02-12T17:49:05.804008Z","data":{},"description":"","edit_access":false,"file_hash":"bf64f1a3a57eec58e7e2adf6de98887d554c6af7","language":"eng","organization":1793,"original_extension":"pdf","page_count":75,"page_spec":"612.0x792.0:0-74","projects":[24851],"publish_at":null,"published_url":"","related_article":"","slug":"Cal-Format-1-05-02","source":"","status":"success","title":".CAL Format Layout (Version 1.05.02)","updated_at":"2020-09-22T16:35:56.746400Z","user":14332} -------------------------------------------------------------------------------- /example/.documentcloud_metadata/2712034.json: -------------------------------------------------------------------------------- 1 | {"id":2712034,"access":"public","asset_url":"https://assets.documentcloud.org/","canonical_url":"https://www.documentcloud.org/documents/2712034-Cal-Format-201","created_at":"2016-02-12T17:49:10.492988Z","data":{},"description":"","edit_access":false,"file_hash":"38290f0dabeee45f704bc19f236f86dd248d867f","language":"eng","organization":1793,"original_extension":"pdf","page_count":92,"page_spec":"612.0x792.0:0-91","projects":[24851],"publish_at":null,"published_url":"","related_article":"","slug":"Cal-Format-201","source":"","status":"success","title":".CAL Format Layout (Version 2.01)","updated_at":"2020-09-22T16:35:56.746755Z","user":14332} -------------------------------------------------------------------------------- /example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/example/__init__.py -------------------------------------------------------------------------------- /example/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") 7 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 8 | from django.core.management import execute_from_command_line 9 | execute_from_command_line(sys.argv) 10 | -------------------------------------------------------------------------------- /example/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | BASE_DIR = os.path.dirname(__file__) 3 | REPO_DIR = os.path.join(BASE_DIR, os.pardir) 4 | SECRET_KEY = 'w11nbg_3n4+e@qk^b55qgo5qygesn^3=&s1kwtlbpkai$(1jv3' 5 | DEBUG = False 6 | ALLOWED_HOSTS = [ 7 | 'localhost', 8 | '127.0.0.1', 9 | ] 10 | STATIC_ROOT = os.path.join(BASE_DIR, ".static") 11 | MEDIA_ROOT = os.path.join(BASE_DIR, ".media") 12 | 13 | INSTALLED_APPS = ( 14 | 'django.contrib.admin', 15 | 'django.contrib.auth', 16 | 'django.contrib.contenttypes', 17 | 'django.contrib.sessions', 18 | 'django.contrib.messages', 19 | 'django.contrib.staticfiles', 20 | 'toolbox', 21 | 'calaccess_raw', 22 | ) 23 | 24 | MIDDLEWARE = ( 25 | 'django.contrib.sessions.middleware.SessionMiddleware', 26 | 'django.middleware.common.CommonMiddleware', 27 | 'django.middleware.csrf.CsrfViewMiddleware', 28 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 29 | 'django.contrib.messages.middleware.MessageMiddleware', 30 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 31 | ) 32 | 33 | ROOT_URLCONF = 'urls' 34 | WSGI_APPLICATION = 'wsgi.application' 35 | 36 | TEMPLATES = [ 37 | { 38 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 39 | 'DIRS': [], 40 | 'APP_DIRS': True, 41 | 'OPTIONS': { 42 | 'context_processors': [ 43 | 'django.template.context_processors.debug', 44 | 'django.template.context_processors.request', 45 | 'django.contrib.auth.context_processors.auth', 46 | 'django.contrib.messages.context_processors.messages', 47 | ], 48 | }, 49 | }, 50 | ] 51 | 52 | DATABASES = { 53 | 'default': { 54 | 'ENGINE': 'django.db.backends.postgresql_psycopg2', 55 | 'NAME': 'calaccess_raw', 56 | 'USER': 'postgres', 57 | 'PASSWORD': '', 58 | 'HOST': 'localhost', 59 | 'PORT': '5432' 60 | }, 61 | } 62 | CALACCESS_STORE_ARCHIVE = False 63 | 64 | IA_STORAGE_ACCESS_KEY = os.getenv('IA_STORAGE_ACCESS_KEY') 65 | IA_STORAGE_SECRET_KEY = os.getenv('IA_STORAGE_SECRET_KEY') 66 | IA_STORAGE_COLLECTION = 'test_collection' 67 | IA_STORAGE_CONTRIBUTOR = 'palewire' 68 | IA_STORAGE_CREATOR = "palewire" 69 | IA_STORAGE_PUBLISHER = 'california-civic-data-coalition/django-calaccess-raw-data' 70 | IA_STORAGE_MEDIATYPE = "data" 71 | IA_STORAGE_SUBJECT = ['test'] 72 | 73 | LANGUAGE_CODE = 'en-us' 74 | TIME_ZONE = 'UTC' 75 | USE_I18N = False 76 | USE_L10N = False 77 | USE_TZ = True 78 | STATIC_URL = '/static/' 79 | 80 | LOGGING = { 81 | 'version': 1, 82 | 'disable_existing_loggers': False, 83 | 'filters': { 84 | 'require_debug_false': { 85 | '()': 'django.utils.log.RequireDebugFalse' 86 | }, 87 | }, 88 | 'handlers': { 89 | 'null': { 90 | 'level': 'DEBUG', 91 | 'class': 'logging.NullHandler', 92 | }, 93 | 'console': { 94 | 'level': 'DEBUG', 95 | 'class': 'logging.StreamHandler', 96 | 'formatter': 'verbose' 97 | }, 98 | 'logfile': { 99 | 'level': 'DEBUG', 100 | 'class': 'logging.handlers.RotatingFileHandler', 101 | 'filename': os.path.join(BASE_DIR, 'django.log'), 102 | 'maxBytes': 1024*1024*5, # 5MB 103 | 'backupCount': 0, 104 | 'formatter': 'verbose', 105 | }, 106 | }, 107 | 'formatters': { 108 | 'verbose': { 109 | 'format': '%(levelname)s|%(asctime)s|%(module)s|%(message)s', 110 | 'datefmt': "%d/%b/%Y %H:%M:%S" 111 | }, 112 | 'simple': { 113 | 'format': '%(message)s' 114 | }, 115 | }, 116 | 'loggers': { 117 | 'calaccess_raw': { 118 | 'handlers': ['console'], 119 | 'level': 'DEBUG', 120 | 'propagate': True, 121 | }, 122 | 'postgres_copy': { 123 | 'handlers': ['console'], 124 | 'level': 'DEBUG', 125 | 'propagate': True, 126 | }, 127 | 'ia_storage': { 128 | 'handlers': ['console'], 129 | 'level': 'DEBUG', 130 | 'propagate': True, 131 | }, 132 | } 133 | } 134 | 135 | try: 136 | from settings_local import * 137 | except ImportError: 138 | pass 139 | -------------------------------------------------------------------------------- /example/settings_local.py.template: -------------------------------------------------------------------------------- 1 | DATABASES = { 2 | 'default': { 3 | 'ENGINE': 'django.db.backends.postgresql_psycopg2', 4 | 'NAME': 'calaccess_raw', 5 | 'USER': 'postgres', 6 | 'PASSWORD': '', 7 | 'HOST': 'localhost', 8 | 'PORT': '5432' 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /example/settings_test_multi_dbs.py.template: -------------------------------------------------------------------------------- 1 | DATABASES = { 2 | 'default': { 3 | 'ENGINE': 'django.db.backends.postgresql_psycopg2', 4 | 'NAME': 'calaccess_raw', 5 | 'USER': 'postgres', 6 | 'PASSWORD': 'postgres', 7 | 'HOST': 'localhost', 8 | 'PORT': '5432' 9 | }, 10 | 'alt': { 11 | 'ENGINE': 'django.db.backends.postgresql', 12 | 'NAME': 'calaccess_raw_alt', 13 | 'USER': 'postgres', 14 | 'PASSWORD': 'postgres', 15 | 'HOST': 'localhost', 16 | 'PORT': '5432' 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /example/settings_test_postgresql.py.template: -------------------------------------------------------------------------------- 1 | DATABASES = { 2 | 'default': { 3 | 'ENGINE': 'django.db.backends.postgresql_psycopg2', 4 | 'NAME': 'calaccess_raw', 5 | 'USER': 'postgres', 6 | 'PASSWORD': 'postgres', 7 | 'HOST': 'localhost', 8 | 'PORT': '5432' 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /example/test-data/dbwebexport.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/example/test-data/dbwebexport.zip -------------------------------------------------------------------------------- /example/test-data/raw.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/example/test-data/raw.zip -------------------------------------------------------------------------------- /example/toolbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/example/toolbox/__init__.py -------------------------------------------------------------------------------- /example/toolbox/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/example/toolbox/management/__init__.py -------------------------------------------------------------------------------- /example/toolbox/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palewire/django-calaccess-raw-data/8de2240642d3befda2277722364c1c33eddf5e82/example/toolbox/management/commands/__init__.py -------------------------------------------------------------------------------- /example/toolbox/management/commands/analyzecoderush.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import calculate 4 | from github import Github 5 | from datetime import datetime 6 | from django.conf import settings 7 | from calaccess_raw import get_model_list 8 | from calaccess_raw.management.commands import CalAccessCommand 9 | from django.contrib.humanize.templatetags.humanize import intcomma 10 | 11 | 12 | class Command(CalAccessCommand): 13 | help = 'Analyze GitHub commits during one of our code sprints' 14 | 15 | def set_options(self, *args, **kwargs): 16 | """ 17 | Hook up with the GitHub API. 18 | """ 19 | self.gh = Github(os.getenv('GITHUB_TOKEN')) 20 | self.org = self.gh.get_organization("california-civic-data-coalition") 21 | self.repo_list = [ 22 | self.org.get_repo("django-calaccess-raw-data"), 23 | ] 24 | self.start = datetime(2016, 3, 9, 0, 0, 0) 25 | self.end = datetime(2016, 3, 15, 0, 0, 0) 26 | 27 | def handle(self, *args, **kwargs): 28 | """ 29 | Make it happen. 30 | """ 31 | super(Command, self).handle(*args, **kwargs) 32 | self.set_options() 33 | self.header("Analyzing Code Rush commits") 34 | 35 | for repo in self.repo_list: 36 | self.log(repo.name) 37 | commit_count = 0 38 | addition_count = 0 39 | author_list = [] 40 | for commit in repo.get_commits(since=self.start): 41 | commit_count += 1 42 | #addition_count += commit.stats.additions 43 | if commit.author not in author_list: 44 | author_list.append(commit.author) 45 | time.sleep(0.1) 46 | self.log("- Authors: %s" % len(author_list)) 47 | self.log("- Commits: %s" % commit_count) 48 | #self.log("- Additions: %s" % addition_count) 49 | -------------------------------------------------------------------------------- /example/toolbox/management/commands/analyzefielddocs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import calculate 4 | from github import Github 5 | from django.conf import settings 6 | from calaccess_raw import get_model_list 7 | from calaccess_raw.management.commands import CalAccessCommand 8 | from django.contrib.humanize.templatetags.humanize import intcomma 9 | 10 | 11 | class Command(CalAccessCommand): 12 | help = 'Analyze how many model fields lack documentation' 13 | 14 | def handle(self, *args, **kwargs): 15 | """ 16 | Make it happen. 17 | """ 18 | # Loop through all the models and find any fields without docs 19 | field_count = 0 20 | missing_list = [] 21 | for m in get_model_list(): 22 | field_list = m().get_field_list() 23 | field_count += len(field_list) 24 | for f in field_list: 25 | if not self.has_docs(f): 26 | self.log("Missing: %s.%s.%s" % ( 27 | m().klass_group, 28 | m().klass_name, 29 | f 30 | ) 31 | ) 32 | missing_list.append((m, f)) 33 | 34 | # If everything is done, declare victory 35 | if not missing_list: 36 | self.success("All %s fields documented!" % field_count) 37 | return False 38 | 39 | # If not, loop through the missing and create issues 40 | missing_count = len(missing_list) 41 | self.failure( 42 | "%s/%s (%d%%) of fields lack documentation" % ( 43 | intcomma(missing_count), 44 | intcomma(field_count), 45 | calculate.percentage(missing_count, field_count) 46 | ) 47 | ) 48 | 49 | def has_docs(self, field): 50 | """ 51 | Test if a Django field has some kind of documentation already. 52 | 53 | Returns True or False 54 | """ 55 | if field.name == 'id': 56 | return True 57 | if field.help_text: 58 | return True 59 | if field.__dict__['_verbose_name']: 60 | return True 61 | return False 62 | -------------------------------------------------------------------------------- /example/toolbox/management/commands/analyzeuniquekeys.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import calculate 4 | from github import Github 5 | from django.conf import settings 6 | from calaccess_raw import get_model_list 7 | from calaccess_raw.management.commands import CalAccessCommand 8 | from django.contrib.humanize.templatetags.humanize import intcomma 9 | 10 | 11 | class Command(CalAccessCommand): 12 | help = 'Analyze how many model lack a UNIQUE_KEY definition' 13 | 14 | def handle(self, *args, **kwargs): 15 | """ 16 | Make it happen. 17 | """ 18 | # Loop through all the models and find any fields without docs 19 | missing_list = [] 20 | model_count = 0 21 | for m in get_model_list(): 22 | model_count += 1 23 | if m.UNIQUE_KEY is None: 24 | self.log("Missing: %s.%s" % ( 25 | m().klass_group, 26 | m().klass_name, 27 | ) 28 | ) 29 | missing_list.append(m) 30 | 31 | # If everything is done, declare victory 32 | missing_count = len(missing_list) 33 | if not missing_count: 34 | self.success("All %s models have a UNIQUE_KEY!" % missing_count) 35 | return False 36 | 37 | # If not, loop through the missing and create issues 38 | self.failure( 39 | "%s/%s (%d%%) of models lack a UNIQUE_KEY" % ( 40 | intcomma(missing_count), 41 | model_count, 42 | calculate.percentage(missing_count, model_count) 43 | ) 44 | ) 45 | -------------------------------------------------------------------------------- /example/toolbox/management/commands/createchoicefieldissues.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from github import Github 4 | from django.db import models 5 | from calaccess_raw import get_model_list 6 | from django.template.loader import render_to_string 7 | from calaccess_raw.management.commands import CalAccessCommand 8 | 9 | 10 | class Command(CalAccessCommand): 11 | help = 'Create GitHub issues for model choice fields' 12 | 13 | def add_arguments(self, parser): 14 | """ 15 | Adds custom arguments specific to this command. 16 | """ 17 | super(Command, self).add_arguments(parser) 18 | parser.add_argument( 19 | "--dry-run", 20 | action="store_true", 21 | dest="dry_run", 22 | default=False, 23 | help="Print text of issues without sending to Github" 24 | ) 25 | 26 | def handle(self, *args, **options): 27 | super(Command, self).handle(*args, **options) 28 | """ 29 | Connect to Github using token stored in environment, loop over model fields, and \ 30 | create an issue for any choice field missing 31 | """ 32 | self.dry_run = options["dry_run"] 33 | # set up connect to Github account 34 | self.gh = Github(os.getenv('GITHUB_TOKEN')) 35 | self.org = self.gh.get_organization("california-civic-data-coalition") 36 | self.repo = self.org.get_repo("django-calaccess-raw-data") 37 | self.labels = [ 38 | self.repo.get_label("small"), 39 | self.repo.get_label("documentation"), 40 | self.repo.get_label("enhancement"), 41 | ] 42 | self.header( 43 | "Creating GitHub issues for model choice fields" 44 | ) 45 | 46 | choice_field_strs = [ 47 | '_cd', 48 | '_code', 49 | 'type', 50 | 'status', 51 | '_lvl', 52 | 'reportname', 53 | 'form_id', 54 | ] 55 | excluded_fields = [ 56 | 'LookupCodesCd.code_type', 57 | 'S497Cd.sup_off_cd', 58 | ] 59 | 60 | model_list = sorted( 61 | get_model_list(), 62 | key=lambda x: (x().klass_group, x().klass_name) 63 | ) 64 | 65 | for m in model_list: 66 | for f in m._meta.fields: 67 | if ( 68 | any(x in f.name for x in choice_field_strs) and 69 | f.name != 'memo_code' and 70 | f.__class__ is not models.ForeignKey and 71 | '{}.{}'.format(m().klass_name, f.name) not in excluded_fields 72 | ): 73 | # make an issue for every choice field missing docs 74 | # includes those that are also missing choices 75 | if not f.documentcloud_pages: 76 | self.create_issue(f) 77 | 78 | def create_issue(self, field): 79 | """ 80 | Create a GitHub issue for the provided field. 81 | """ 82 | context = dict( 83 | field=field, 84 | model_name=field.model.__name__, 85 | field_class=field.__class__.__name__, 86 | db_table=field.model._meta.fields[1].model._meta.db_table, 87 | has_choices=bool(field.choices), 88 | has_docs=bool(field.documentcloud_pages), 89 | file_name=field.model.__module__.split('.')[-1] + '.py', 90 | ) 91 | title = " to {model}.{field} (in {file_name})".format( 92 | model=field.model.__name__, 93 | field=field.name, 94 | file_name=context['file_name'] 95 | ) 96 | 97 | if not field.choices and not field.documentcloud_pages: 98 | title = 'Add choices and documentcloud_pages' + title 99 | elif not field.documentcloud_pages: 100 | title = 'Add documentcloud_pages' + title 101 | 102 | body = render_to_string( 103 | 'toolbox/createchoicefieldissue.md', 104 | context, 105 | ) 106 | 107 | self.log("-- Creating issue for %s.%s" % ( 108 | field.model.__name__, 109 | field.name 110 | ) 111 | ) 112 | if self.dry_run: 113 | print '==========================' 114 | print title 115 | print '--------------------------' 116 | print body 117 | print '==========================' 118 | else: 119 | self.repo.create_issue( 120 | title, 121 | body=body, 122 | labels=self.labels, 123 | ) 124 | time.sleep(2.5) 125 | -------------------------------------------------------------------------------- /example/toolbox/management/commands/createfielddocissues.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import calculate 4 | from github import Github 5 | from django.conf import settings 6 | from calaccess_raw import get_model_list 7 | from calaccess_raw.management.commands import CalAccessCommand 8 | from django.contrib.humanize.templatetags.humanize import intcomma 9 | 10 | 11 | class Command(CalAccessCommand): 12 | help = 'Create GitHub issues for model fields without documentation' 13 | 14 | def set_options(self, *args, **kwargs): 15 | """ 16 | Hook up with the GitHub API and prepare to create issues. 17 | """ 18 | self.gh = Github(os.getenv('GITHUB_TOKEN')) 19 | self.org = self.gh.get_organization("california-civic-data-coalition") 20 | self.repo = self.org.get_repo("django-calaccess-raw-data") 21 | self.labels = [ 22 | self.repo.get_label("small"), 23 | self.repo.get_label("documentation"), 24 | self.repo.get_label("enhancement"), 25 | ] 26 | self.milestone = self.repo.get_milestone(3) 27 | 28 | def handle(self, *args, **kwargs): 29 | """ 30 | Make it happen. 31 | """ 32 | self.set_options() 33 | self.header( 34 | "Creating GitHub issues for model fields without documentation" 35 | ) 36 | 37 | # Loop through all the models and find any fields without docs 38 | field_count = 0 39 | missing_list = [] 40 | for m in get_model_list(): 41 | field_list = m().get_field_list() 42 | field_count += len(field_list) 43 | for f in field_list: 44 | if not self.has_docs(f): 45 | missing_list.append((m, f)) 46 | 47 | # If everything is done, declare victory 48 | if not missing_list: 49 | self.success("All %s fields documented!" % field_count) 50 | return False 51 | 52 | # If not, loop through the missing and create issues 53 | missing_count = len(missing_list) 54 | self.log( 55 | "- %s/%s (%d%%) of fields lack documentation" % ( 56 | intcomma(missing_count), 57 | intcomma(field_count), 58 | calculate.percentage(missing_count, field_count) 59 | ) 60 | ) 61 | for model, field in missing_list[611:]: 62 | # For now we are excluding the 'other' model module to 63 | # avoid overkill 64 | if model().klass_group != 'other': 65 | self.create_issue(model, field) 66 | 67 | def has_docs(self, field): 68 | """ 69 | Test if a Django field has some kind of documentation already. 70 | 71 | Returns True or False 72 | """ 73 | if field.name == 'id': 74 | return True 75 | if field.help_text: 76 | return True 77 | if field.__dict__['_verbose_name']: 78 | return True 79 | return False 80 | 81 | def create_issue(self, model, field): 82 | """ 83 | Create a GitHub issue for the provided model and field. 84 | """ 85 | title = TITLE_TEMPLATE % (field.name, model().klass_name) 86 | 87 | body = BODY_TEMPLATE % ( 88 | field.name, 89 | model().klass_name, 90 | model().klass_group, 91 | model().klass_group, 92 | ) 93 | self.log("-- Creating issue for %s.%s" % ( 94 | model().klass_name, 95 | field.name 96 | ) 97 | ) 98 | self.repo.create_issue( 99 | title, 100 | body=body, 101 | labels=self.labels, 102 | milestone=self.milestone 103 | ) 104 | time.sleep(2.5) 105 | 106 | TITLE_TEMPLATE = """ 107 | Add documentation for the ``%s`` field on the ``%s`` database model 108 | """.replace("\n", "") 109 | 110 | BODY_TEMPLATE = """ 111 | ## Your mission 112 | 113 | Add documentation for the ``%s`` field on the ``%s`` database model. 114 | 115 | ## Here's how 116 | 117 | **Step 1**: Claim this ticket by leaving a comment below. Tell everyone you're ON IT! 118 | 119 | **Step 2**: Open up the file that contains this model. It should be in calaccess_raw.models.%s.py. 120 | 121 | **Step 3**: Hit the little pencil button in the upper-right corner of the code box to begin editing the file. 122 | 123 | ![Edit](https://dl.dropboxusercontent.com/u/3640647/ScreenCloud/1440367320.67.png) 124 | 125 | **Step 4**: Find this model and field in the file. (Clicking into the box and searching with CTRL-F can help you here.) Once you find it, we expect the field to lack the ``help_text`` field typically used in Django to explain what a field contains. 126 | 127 | ```python 128 | effect_dt = fields.DateField( 129 | null=True, 130 | db_column="EFFECT_DT" 131 | ) 132 | ``` 133 | 134 | **Step 5**: In a separate tab, open up the official state documentation and find the page that defines all the fields in this model. 135 | 136 | ![The docs](https://dl.dropboxusercontent.com/u/3640647/ScreenCloud/1440367001.08.png) 137 | 138 | **Step 6**: Find the row in that table's definition table that spells out what this field contains. If it lacks documentation. Note that in the ticket and close it now. 139 | 140 | ![The definition](https://dl.dropboxusercontent.com/u/3640647/ScreenCloud/1440367068.59.png) 141 | 142 | **Step 7**: Return to the GitHub tab. 143 | 144 | **Step 8**: Add the state's label explaining what's in the field, to our field definition by inserting it a ``help_text`` argument. That should look something like this: 145 | 146 | ```python 147 | effect_dt = fields.DateField( 148 | null=True, 149 | db_column="EFFECT_DT", 150 | # Add a help_text argument like the one here, but put your string in instead. 151 | help_text="The other values in record were effective as of this date" 152 | ) 153 | ``` 154 | 155 | **Step 9**: Scroll down below the code box and describe the change you've made in the commit message. Press the button below. 156 | 157 | ![Commit](https://dl.dropboxusercontent.com/u/3640647/ScreenCloud/1440367511.66.png) 158 | 159 | **Step 10**: Review your changes and create a pull request submitting them to the core team for inclusion. 160 | 161 | ![Pull request](https://dl.dropboxusercontent.com/u/3640647/ScreenCloud/1440368058.52.png) 162 | 163 | That's it! Mission accomplished! 164 | """ 165 | -------------------------------------------------------------------------------- /example/toolbox/management/commands/createuniquekeyissues.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import calculate 4 | from github import Github 5 | from django.conf import settings 6 | from calaccess_raw import get_model_list 7 | from calaccess_raw.management.commands import CalAccessCommand 8 | from django.contrib.humanize.templatetags.humanize import intcomma 9 | 10 | 11 | class Command(CalAccessCommand): 12 | help = "Creating GitHub issues for model models without a UNIQUE_KEY" 13 | 14 | def set_options(self, *args, **kwargs): 15 | """ 16 | Hook up with the GitHub API and prepare to create issues. 17 | """ 18 | self.gh = Github(os.getenv('GITHUB_TOKEN')) 19 | self.org = self.gh.get_organization("california-civic-data-coalition") 20 | self.repo = self.org.get_repo("django-calaccess-raw-data") 21 | self.labels = [ 22 | self.repo.get_label("small"), 23 | self.repo.get_label("documentation"), 24 | self.repo.get_label("enhancement"), 25 | ] 26 | self.milestone = self.repo.get_milestone(4) 27 | 28 | def handle(self, *args, **kwargs): 29 | """ 30 | Make it happen. 31 | """ 32 | self.set_options() 33 | self.header( 34 | "Creating GitHub issues for model models without a UNIQUE_KEY" 35 | ) 36 | 37 | # Loop through all the models and find any fields without docs 38 | missing_list = [] 39 | model_count = 0 40 | for m in get_model_list(): 41 | model_count += 1 42 | if not m.UNIQUE_KEY: 43 | self.log("Missing: %s.%s" % ( 44 | m().klass_group, 45 | m().klass_name, 46 | ) 47 | ) 48 | missing_list.append(m) 49 | 50 | # If everything is done, declare victory 51 | missing_count = len(missing_list) 52 | if not missing_count: 53 | self.success("All %s models have a UNIQUE_KEY!" % missing_count) 54 | return False 55 | 56 | # If not, loop through the missing and create issues 57 | self.log( 58 | "- %s/%s (%d%%) of fields lack a UNIQUE_KEY" % ( 59 | intcomma(missing_count), 60 | intcomma(model_count), 61 | calculate.percentage(missing_count, model_count) 62 | ) 63 | ) 64 | for model in missing_list[1:]: 65 | self.create_issue(model) 66 | 67 | def create_issue(self, model): 68 | """ 69 | Create a GitHub issue for the provided model and field. 70 | """ 71 | title = TITLE_TEMPLATE % model().klass_name 72 | 73 | body = BODY_TEMPLATE % ( 74 | model().klass_name, 75 | model().klass_group, 76 | model().klass_group, 77 | ) 78 | self.log("-- Creating issue for %s" % model().klass_name) 79 | self.repo.create_issue( 80 | title, 81 | body=body, 82 | labels=self.labels, 83 | milestone=self.milestone 84 | ) 85 | time.sleep(2.5) 86 | 87 | TITLE_TEMPLATE = """ 88 | Add UNIQUE_KEY to the ``%s`` database model 89 | """.replace("\n", "") 90 | 91 | BODY_TEMPLATE = """ 92 | ## Your mission 93 | 94 | Add a ``UNIQUE_KEY`` setting to the ``%s`` database model. 95 | 96 | ## Here's how 97 | 98 | **Step 1**: Claim this ticket by leaving a comment below. Tell everyone you're ON IT! 99 | 100 | **Step 2**: In a separate tab, open up the official state documentation and find the page that defines this table. They are listed there in alphabetical order. 101 | 102 | ![The docs](https://dl.dropboxusercontent.com/u/3640647/ScreenCloud/1440367001.08.png) 103 | 104 | **Step 3**: Find the "PK_" row in the index list included with the table's definition that spells out the list of "column codes" that make up the unique key for each row. If it lacks that documentation. Note that in the ticket and close it now. 105 | 106 | ![The definition](https://dl.dropboxusercontent.com/u/3640647/ScreenCloud/1442811831.94.png) 107 | 108 | **Step 4**: Return to this tab. 109 | 110 | **Step 5**: Open up the file that contains this model. It should be in calaccess_raw.models.%s.py. 111 | 112 | **Step 6**: Hit the little pencil button in the upper-right corner of the code box to begin editing the file. 113 | 114 | ![Edit](https://dl.dropboxusercontent.com/u/3640647/ScreenCloud/1440367320.67.png) 115 | 116 | **Step 7**: Find this model in the file. (Clicking into the box and searching with CTRL-F can help you here.) Once you find it, we expect the model to lack a ``UNIQUE_KEY`` attribute indented just under the documentation. 117 | 118 | ```python 119 | class WhateverYourModel(CalAccessBaseModel): 120 | ''' 121 | There will be some documentation here but don't touch it. 122 | ''' 123 | field1 = models.CharField(blah='blah') 124 | ``` 125 | **Step 8**: Add this model's ``UNIQUE_KEY`` list of column codes, to our field definition by inserting it under the documentation. That should look something like this: 126 | 127 | ```python 128 | class WhateverYourModel(CalAccessBaseModel): 129 | ''' 130 | There will be some documentation here but don't touch it. 131 | ''' 132 | UNIQUE_KEY = ['FIELD1', 'FIELD2', 'FIELD3'] 133 | field1 = models.CharField(blah='blah') 134 | ``` 135 | 136 | **Step 9**: Scroll down below the code box and describe the change you've made in the commit message. Press the button below. 137 | 138 | **Step 10**: Review your changes and create a pull request submitting them to the core team for inclusion. 139 | 140 | That's it! Mission accomplished! 141 | """ 142 | -------------------------------------------------------------------------------- /example/toolbox/management/commands/createverboseandhelptextissues.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from github import Github 4 | from django.db import models 5 | from calaccess_raw import get_model_list 6 | from django.template.loader import render_to_string 7 | from calaccess_raw.management.commands import CalAccessCommand 8 | 9 | 10 | class Command(CalAccessCommand): 11 | help = 'Create GitHub issues for fields missing verbose and/or help text' 12 | 13 | def add_arguments(self, parser): 14 | """ 15 | Adds custom arguments specific to this command. 16 | """ 17 | super(Command, self).add_arguments(parser) 18 | parser.add_argument( 19 | "--dry-run", 20 | action="store_true", 21 | dest="dry_run", 22 | default=False, 23 | help="Print text of issues without sending to Github" 24 | ) 25 | 26 | def handle(self, *args, **options): 27 | super(Command, self).handle(*args, **options) 28 | """ 29 | Connect to Github using token stored in environment, loop over model fields, and \ 30 | create an issue for any choice field missing 31 | """ 32 | self.dry_run = options["dry_run"] 33 | # set up connect to Github account 34 | self.gh = Github(os.getenv('GITHUB_TOKEN')) 35 | self.org = self.gh.get_organization("california-civic-data-coalition") 36 | self.repo = self.org.get_repo("django-calaccess-raw-data") 37 | self.labels = [ 38 | self.repo.get_label("small"), 39 | self.repo.get_label("documentation"), 40 | self.repo.get_label("enhancement"), 41 | ] 42 | self.header( 43 | "Creating GitHub issues for model choice fields" 44 | ) 45 | 46 | model_list = sorted( 47 | get_model_list(), 48 | key=lambda x: (x().klass_group, x().klass_name) 49 | ) 50 | 51 | models_to_fix = [] 52 | 53 | for m in model_list: 54 | fields_to_fix = {} 55 | 56 | for f in m._meta.fields: 57 | if f.name == 'id': 58 | continue 59 | # test for verbose name 60 | if not f.__dict__['_verbose_name']: 61 | fields_to_fix[f] = {'no_verbose': True, 'no_help': False} 62 | elif len(f.__dict__['_verbose_name']) == 0: 63 | fields_to_fix[f] = {'no_verbose': True, 'no_help': False} 64 | 65 | # test for help text 66 | if len(f.help_text) == 0: 67 | try: 68 | fields_to_fix[f]['no_help'] = True 69 | except KeyError: 70 | fields_to_fix[f] = {'no_verbose': False, 'no_help': True} 71 | 72 | if len(fields_to_fix) > 0: 73 | fs = [] 74 | for k, v in fields_to_fix.items(): 75 | fs.append((k, v)) 76 | 77 | models_to_fix.append( 78 | (m, tuple(fs)) 79 | ) 80 | 81 | for model, fields in models_to_fix: 82 | 83 | context = dict( 84 | model_name=model.__name__, 85 | model_docs=model().DOCUMENTCLOUD_PAGES, 86 | file_name=model.__module__.split('.')[-1] + '.py', 87 | fields=fields, 88 | ) 89 | 90 | title = "Add verbose and/or help text fields on {model_name} (in \ 91 | {file_name})".format(**context) 92 | 93 | body = render_to_string( 94 | 'toolbox/createverboseandhelptextissues.md', 95 | context, 96 | ) 97 | 98 | self.log("-- Creating issue for {model_name}".format(**context)) 99 | if self.dry_run: 100 | print '==========================' 101 | print title 102 | print '--------------------------' 103 | print body 104 | print '==========================' 105 | else: 106 | self.repo.create_issue( 107 | title, 108 | body=body, 109 | labels=self.labels, 110 | ) 111 | time.sleep(2.5) 112 | 113 | -------------------------------------------------------------------------------- /example/toolbox/management/commands/samplecalaccessrawdata.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from datetime import datetime 4 | from itertools import chain 5 | from optparse import make_option 6 | from clint.textui import progress 7 | from subsample.file_input import FileInput 8 | from subsample.algorithms import two_pass_sample 9 | from django.conf import settings 10 | from calaccess_raw.management.commands import CalAccessCommand 11 | from calaccess_raw.models import RawDataVersion 12 | from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile 13 | 14 | 15 | class Command(CalAccessCommand): 16 | help = 'Create smaller sampled TSV files for unit tests' 17 | 18 | def add_arguments(self, parser): 19 | """ 20 | Adds custom arguments specific to this command. 21 | """ 22 | super(Command, self).add_arguments(parser) 23 | parser.add_argument( 24 | "--sample-rows", 25 | action="store", 26 | dest="samplerows", 27 | default=1000, 28 | help="Number of rows to grab from each table" 29 | ) 30 | 31 | def handle(self, *args, **options): 32 | super(Command, self).handle(*args, **options) 33 | 34 | # Set options 35 | self.test_data_dir = os.path.join(settings.BASE_DIR, 'test-data') 36 | self.sample_dir = os.path.join(self.test_data_dir, "tsv/") 37 | self.sample_rows = int(options['samplerows']) 38 | self.tsv_list = os.listdir(self.tsv_dir) 39 | 40 | self.header("Sampling %i rows from %s source files" % ( 41 | self.sample_rows, 42 | len(self.tsv_list), 43 | )) 44 | 45 | # Make sure sample dir exists and is empty 46 | os.path.exists(self.test_data_dir) or os.makedirs(self.test_data_dir) 47 | os.path.exists(self.sample_dir) and shutil.rmtree(self.sample_dir) 48 | os.makedirs(self.sample_dir) 49 | 50 | # Loop through all the files in the source directory 51 | for name in progress.bar(self.tsv_list): 52 | 53 | # Find the input 54 | file = os.path.join(self.tsv_dir, name) 55 | out_file = os.path.join(self.sample_dir, name) 56 | 57 | if self.verbosity > 2: 58 | self.log(" Sampling %s" % file) 59 | 60 | # Open the file 61 | fi = FileInput(file, True) 62 | 63 | # Generate our sample 64 | sample = two_pass_sample(fi, sample_size=self.sample_rows) 65 | 66 | # Open our output file 67 | with open(out_file, 'wb') as out: 68 | 69 | # Write it out 70 | for line in chain(fi.header, sample): 71 | out.write(line) 72 | 73 | self.header("Compressing zip file...") 74 | self.save_zip() 75 | 76 | def save_zip(self): 77 | """ 78 | Save a zip file containing all the sampled .TSV files 79 | """ 80 | # enable zipfile compression 81 | compression = ZIP_DEFLATED 82 | zip_path = os.path.join(self.test_data_dir, 'dbwebexport.zip') 83 | 84 | try: 85 | zf = ZipFile(zip_path, 'w', compression, allowZip64=True) 86 | except RuntimeError: 87 | logger.error('Zip file cannot be compressed (check zlib module).') 88 | compression = ZIP_STORED 89 | zf = ZipFile(zip_path, 'w', compression, allowZip64=True) 90 | 91 | # loop over and save files in csv dir 92 | for name in os.listdir(self.sample_dir): 93 | if self.verbosity > 2: 94 | self.log(" Adding %s to zip" % name) 95 | f = os.path.join(self.sample_dir, name) 96 | zf.write(f, 'CalAccess/DATA/' + name) 97 | 98 | zf.close() 99 | -------------------------------------------------------------------------------- /example/toolbox/management/commands/verifycalaccesschoicefields.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import agate 4 | from django.db.models import Count 5 | from calaccess_raw.management.commands import CalAccessCommand 6 | from calaccess_raw import get_model_list 7 | 8 | 9 | class Command(CalAccessCommand): 10 | help = "Find db values in choice fields not defined in the fields choices attr." 11 | 12 | def add_arguments(self, parser): 13 | """ 14 | Adds custom arguments specific to this command. 15 | """ 16 | super(Command, self).add_arguments(parser) 17 | parser.add_argument( 18 | "-a", 19 | "--app-name", 20 | dest="app_name", 21 | default="calaccess_raw", 22 | help="Name of Django app with models into which data will " 23 | "be imported (if other not calaccess_raw)" 24 | ) 25 | 26 | def handle(self, *args, **options): 27 | super(Command, self).handle(*args, **options) 28 | 29 | model_list = sorted( 30 | get_model_list(), 31 | key=lambda x: (x().klass_group, x().klass_name) 32 | ) 33 | 34 | results = [] 35 | 36 | for m in model_list: 37 | if self.verbosity > 1: 38 | self.log( 39 | " Verifying {0}.{1} fields".format( 40 | m().klass_group, 41 | m.__name__, 42 | ) 43 | ) 44 | for f in m._meta.fields: 45 | if f.choices: 46 | for value, count in m.objects.order_by().values_list( 47 | f.name, 48 | ).annotate(Count(f.name)): 49 | if ( 50 | value not in [x[0] for x in f.choices] and 51 | value != '' and 52 | value is not None 53 | ): 54 | if self.verbosity > 2: 55 | self.failure( 56 | " Undefined value for {0}: {1} ({2} occurrences)".format( 57 | f.name, 58 | value, 59 | count 60 | ) 61 | ) 62 | results.append(( 63 | m().klass_group, 64 | m.__name__, 65 | f.name, 66 | value, 67 | count, 68 | )) 69 | 70 | if len(results) > 0: 71 | self.failure("{} undefined choice field values".format(len(results))) 72 | 73 | table = agate.Table( 74 | results, 75 | ['group', 'model', 'field', 'undefined_value', 'occurrences'] 76 | ) 77 | table.print_table(max_rows=None, max_column_width=50) 78 | else: 79 | self.success("No undefined choice field values") 80 | -------------------------------------------------------------------------------- /example/toolbox/templates/toolbox/createverboseandhelptextissues.md: -------------------------------------------------------------------------------- 1 | This issue is part of our on-going efforts to thoroughly document the raw data from CAL-ACCESS. 2 | 3 | As such, our goal is to ensure that every field in the database be a `verbose_text` that is more intelligble than its header in the raw file. In many cases, this is just a matter of filling in the missing letters of words in the field names. For example, a field named `acct_opendt` would have `verbose_name='account opened datetime'`. 4 | 5 | Another of our goals in this documenting effort is for every field to have an accurate and descriptive `help_text`. This would include additional explanations about this field, usually discovered in the official documentation. Here are the locations in the official docs where we've previously found descriptions about this table (there may still be others to find): 6 | {% for doc in model_docs %} 7 | * [{{ doc.title }}, p. {{ doc.start_page }}{% if doc.end_page %}-{{ doc.end_page }}{% endif %}]({{ doc.canonical_url }}) 8 | {% endfor %} 9 | 10 | Note that in some cases, a field's `verbose_text` and the `help_text` might be exactly the same. Regardless, both should be assigned. 11 | 12 | The following fields on {{ model_name }} (in [{{ file_name }}](https://github.com/california-civic-data-coalition/django-calaccess-raw-data/blob/master/calaccess_raw/models/{{ file_name }})) are missing either `verbose_text` or `help_text` attribute (or both): 13 | {% for field, issues in fields %} 14 | - [ ] {{ field.name }} is missing{% if issues.no_verbose and issues.no_help %} both `verbose_text` and `help_text`{% elif issues.no_verbose %} `verbose_text`{% elif issues.no_help %} `help_text`{% endif %} 15 | {% endfor %} 16 | 17 | And thanks for helping out! -------------------------------------------------------------------------------- /example/toolbox/templates/toolbox/filingforms.rst: -------------------------------------------------------------------------------- 1 | Filing Forms 2 | ============ 3 | 4 | The data in CAL-ACCESS originates from forms filed by public office candidates, campaign committees, lobbyists and lobbyist employers. These filing forms are outlined below. 5 | 6 | Much of the documents and descriptions included here were gathered from the `Forms `_ section of the California Fair Political Practices Commission (FPPC) website, where you can find even more detailed filing instructions and requirements. 7 | 8 | Similar documentation and background info can also be found in the `Campaign Finance `_ and `Lobbying Activities `_ sections of the California Secretary of State's website. 9 | 10 | .. warning:: 11 | 12 | The sections and page numbers noted below match those of the current sample forms provided by the FPPC, but not necessarily any of the actual forms submitted by filers. This is a result of filings that include multiple pages for a given schedule as well as changes to the layout of forms over the years. 13 | 14 | You can view a PDF of any form submitted by a filer by pointing your browser at http://cal-access.ss.ca.gov/PDFGen/pdfgen.prg?filingid=2028957&amendid=0. Note that in the query string parameters, *2028957* and *0* should be exchanged for the filling id and amendment id of the desired filling. 15 | 16 | 17 | {% for group, form_list in group_list %} 18 | {{ group|title }} Forms 19 | -------------------------- 20 | 21 | {% for form in form_list %} 22 | {{ form.type_and_num|safe }} 23 | ~~~~~~~~~~~~~ 24 | 25 | {{ form.title|safe }} 26 | 27 | {{ form.description|safe }} 28 | {% if form.sections|length > 0 %} 29 | Sections 30 | ^^^^^^^^ 31 | {% for section in form.sections %} 32 | * {{ section.title|safe }} {% if section.documentcloud.start_page %}(`p. {{ section.documentcloud.start_page }}{% if section.documentcloud.end_page %}-{{ section.documentcloud.end_page }}{% endif%} <{{ section.documentcloud.canonical_url }}>`_){% endif %} 33 | 34 | {% endfor %} 35 | {% endif %} 36 | 37 | {% if not form.documentcloud_id %} 38 | *No PDF available.* 39 | {% else %} 40 | Example Form 41 | ^^^^^^^^^^^^ 42 | 43 | 44 | .. raw:: html 45 | 46 |
47 | 48 | 57 | 62 | 63 | {% endif %} 64 | 65 | {% if form.get_models|length > 0 %} 66 | Database Tables 67 | ^^^^^^^^^^^^^^^ 68 | Data collected via {{ form.type_and_num|safe }} filings are written to the following tables: 69 | {% for model in form.get_models %} 70 | * `{{ model.klass_name }} `_ 71 | {% endfor %} 72 | {% endif %} 73 | 74 | {% endfor %} 75 | {% endfor %} -------------------------------------------------------------------------------- /example/toolbox/templates/toolbox/models.rst: -------------------------------------------------------------------------------- 1 | Database tables 2 | =============== 3 | 4 | The {{ model_count }} tab-delimited database exports published by California's Secretary of State and loaded by this Django application. 5 | 6 | .. warning:: 7 | 8 | Most definitions below are drawn from the spotty and incomplete `official documentation `_ verbatim. As we continue our research, we plan to improve the descriptions. 9 | 10 | For the time being, to be absolutely certain about what each table and field contains, you should compare the electronic data back to the original paper records published by the state. 11 | 12 | The categories for these tables are based on what's found in the `ReadMe `_ file for the .ZIP database export file and the `mapping `_ of .CAL format to database fields. However, in cases where this official documentation was incomplete or inconsistent, we've either listed the table under whichever category is most obviously relevant or listed it under "Other". 13 | 14 | {% for group, model_list in group_list %} 15 | {{ group|capfirst }} tables 16 | --------------------------- 17 | 18 | {% for object in model_list %} 19 | {{ object.klass_name }} 20 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 21 | 22 | {{ object.doc.strip|safe }} 23 | 24 | **Sample:** `{{ object.get_tsv_name }} `_ 25 | 26 | {% if object.DOCUMENTCLOUD_PAGES|length > 0 %} 27 | Source Docs 28 | ^^^^^^^^^^^ 29 | {% for doc, pages in object.docs.items %} 30 | *{{ doc }}* 31 | 32 | 33 | .. raw:: html 34 | 35 |
{% for page in pages %}

p. {{ page.num }}

{% endfor %}
36 | 37 | {% endfor %} 38 | {% endif %} 39 | {% if object.FILING_FORMS|length > 0 %} 40 | Filing Forms 41 | ^^^^^^^^^^^^ 42 | {{ object.klass_name }} contains data collected from the following filing forms, form parts and schedules: 43 | 44 | {% for form, sections in object.get_filing_forms_w_sections %} 45 | {% if sections|length > 1 %} 46 | * `{{ form.type_and_num|safe }} `_ ({{ form.title|safe }}) 47 | {% for section in sections %} 48 | * {{ section.title|safe }} 49 | {% endfor %} 50 | {% elif sections|length == 1 %} 51 | * `{{ form.type_and_num|safe }} `_ ({{ form.title|safe }}): {{ sections.0.title|safe }} 52 | {% else %} 53 | * `{{ form.type_and_num|safe }} `_ ({{ form.title|safe }}) 54 | {% endif %} 55 | {% endfor %} 56 | {% endif %} 57 | 58 | Fields 59 | ^^^^^^ 60 | 61 | .. raw:: html 62 | 63 |
64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | {% for field in object.get_field_list %} 75 | {% if field.name != "id" %} 76 | 77 | 78 | 79 | 80 | 81 | 82 | {% endif %} 83 | {% endfor %} 84 | 85 |
NameTypeUnique keyDefinition
{{ field.name }}{{ field.description }}{% if field.is_unique_key %}Yes{% else %}No{% endif %}{{ field.definition|capfirst }}
86 |
87 | {% if object.choice_fields|length > 0 %} 88 | Look-up Codes 89 | ^^^^^^^^^^^^^ 90 | {% for field in object.choice_fields %} 91 | *{{ field.name }}* 92 | {% if field.documentcloud_pages|length > 0%} 93 | {% for doc, pages in field.docs.items %} 94 | *{{ doc }}* 95 | 96 | .. raw:: html 97 | 98 |
{% for page in pages %}

p. {{ page.num }}

{% endfor %}
99 | 100 | {% endfor %} 101 | {% endif %} 102 | 103 | .. raw:: html 104 | 105 |
106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | {% for choice in field.choices %} 115 | 116 | 117 | 118 | 119 | {% endfor %} 120 | 121 |
CodeDefinition
{{ choice.0 }}{{ choice.1 }}
122 |
123 | {% endfor %} 124 | 125 | {% endif %} 126 | {% endfor %} 127 | {% endfor %} 128 | 129 | Empty files 130 | ----------- 131 | 132 | The following tables are described in the official documentation, but the .TSV files of the same name are empty. 133 | 134 | .. raw:: html 135 | 136 |
137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | {% for object in empty_files %} 146 | 147 | 148 | 149 | 150 | {% endfor %} 151 | 152 |
GroupFile Name
{{ object.klass_group }}{{ object.klass_name }}
153 |
-------------------------------------------------------------------------------- /example/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.contrib import admin 3 | from django.urls import re_path 4 | from django.views.static import serve 5 | admin.autodiscover() 6 | 7 | 8 | urlpatterns = ( 9 | re_path(r'^admin/', admin.site.urls), 10 | re_path(r'^static/(?P.*)$', serve, { 11 | 'document_root': settings.STATIC_ROOT, 12 | 'show_indexes': True, 13 | }), 14 | ) 15 | -------------------------------------------------------------------------------- /example/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for example project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.6/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "example.settings") 12 | 13 | from django.core.wsgi import get_wsgi_application 14 | application = get_wsgi_application() 15 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | example/manage.py -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [flake8] 5 | max-line-length = 999 6 | exclude = calaccess_raw/migrations/* 7 | ignore = D105,D200,D204,D401,W503 8 | 9 | [metadata] 10 | license-file = LICENSE 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | 5 | def read(file_name): 6 | """Read the provided file.""" 7 | this_dir = os.path.dirname(__file__) 8 | file_path = os.path.join(this_dir, file_name) 9 | with open(file_path) as f: 10 | return f.read() 11 | 12 | 13 | def version_scheme(version): 14 | """ 15 | Version scheme hack for setuptools_scm. 16 | Appears to be necessary to due to the bug documented here: https://github.com/pypa/setuptools_scm/issues/342 17 | If that issue is resolved, this method can be removed. 18 | """ 19 | import time 20 | 21 | from setuptools_scm.version import guess_next_version 22 | 23 | if version.exact: 24 | return version.format_with("{tag}") 25 | else: 26 | _super_value = version.format_next_version(guess_next_version) 27 | now = int(time.time()) 28 | return _super_value + str(now) 29 | 30 | 31 | def local_version(version): 32 | """ 33 | Local version scheme hack for setuptools_scm. 34 | Appears to be necessary to due to the bug documented here: https://github.com/pypa/setuptools_scm/issues/342 35 | If that issue is resolved, this method can be removed. 36 | """ 37 | return "" 38 | 39 | 40 | setup( 41 | name='django-calaccess-raw-data', 42 | author='Ben Welsh', 43 | author_email='b@palewi.re', 44 | url='http://django-calaccess.californiacivicdata.org/', 45 | description=("A Django app to download, extract and load campaign " 46 | "finance and lobbying activity data from the California " 47 | "Secretary of State's CAL-ACCESS database"), 48 | long_description=read("README.md"), 49 | long_description_content_type="text/markdown", 50 | license='MIT', 51 | packages=( 52 | 'calaccess_raw', 53 | 'calaccess_raw.admin', 54 | 'calaccess_raw.management', 55 | 'calaccess_raw.migrations', 56 | 'calaccess_raw.management.commands', 57 | 'calaccess_raw.models', 58 | 'calaccess_raw.annotations' 59 | ), 60 | include_package_data=True, 61 | zip_safe=False, 62 | install_requires=( 63 | 'django==4.0.*', 64 | 'django-postgres-copy', 65 | 'requests', 66 | 'csvkit', 67 | 'django-internetarchive-storage', 68 | ), 69 | classifiers=( 70 | 'Development Status :: 5 - Production/Stable', 71 | 'Operating System :: OS Independent', 72 | 'Intended Audience :: Developers', 73 | 'Programming Language :: Python', 74 | 'Programming Language :: Python :: 3', 75 | 'Programming Language :: Python :: 3.8', 76 | 'Programming Language :: Python :: 3.9', 77 | 'Programming Language :: Python :: 3.10', 78 | 'Framework :: Django', 79 | 'Framework :: Django :: 4', 80 | 'License :: OSI Approved :: MIT License' 81 | ), 82 | project_urls={ 83 | 'Project': 'https://www.californiacivicdata.org/', 84 | 'Documentation': 'https://django-calaccess.californiacivicdata.org/', 85 | 'Funding': 'https://www.californiacivicdata.org/about/', 86 | 'Source': 'https://github.com/palewire/django-calaccess-raw-data', 87 | 'Testing': 'https://github.com/palewire/django-calaccess-raw-data/actions/workflows/tests.yaml', 88 | 'Tracker': 'https://github.com/palewire/django-calaccess-raw-data/issues' 89 | }, 90 | setup_requires=["setuptools_scm"], 91 | use_scm_version={"version_scheme": version_scheme, "local_scheme": local_version}, 92 | ) 93 | --------------------------------------------------------------------------------