├── federal_spending ├── __init__.py ├── fbo │ ├── __init__.py │ ├── management │ │ ├── __init__.py │ │ └── commands │ │ │ ├── __init__.py │ │ │ └── load_historical_fbo.py │ ├── views.py │ ├── tests.py │ └── models.py ├── usaspending │ ├── scripts │ │ ├── __init__.py │ │ └── usaspending │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ ├── grants_loader.py │ │ │ ├── contracts_loader.py │ │ │ ├── helpers.py │ │ │ ├── faads.py │ │ │ ├── converter.py │ │ │ └── fpds.py │ ├── utils │ │ ├── __init__.py │ │ ├── log.py │ │ └── ucsv.py │ ├── management │ │ ├── __init__.py │ │ ├── base │ │ │ ├── __init__.py │ │ │ ├── usaspending_importer.py │ │ │ └── importer.py │ │ └── commands │ │ │ ├── __init__.py │ │ │ ├── convert_usaspending_contracts.py │ │ │ ├── loadgrants.py │ │ │ ├── loadcontracts.py │ │ │ ├── convert_usaspending_grants.py │ │ │ ├── download_files.py │ │ │ ├── run_data_tests.py │ │ │ ├── create_partition.py │ │ │ ├── fresh_import.py │ │ │ ├── create_indexes.py │ │ │ ├── daily_update.py │ │ │ └── import_updates.py │ ├── views.py │ ├── downloads │ │ ├── delta_downloads.txt │ │ ├── fy_2014.txt │ │ └── all_downloads.txt │ ├── admin.py │ ├── tests.py │ ├── logs │ │ ├── convert_usaspending_grants.log │ │ └── convert_usaspending_contracts.log │ ├── __init__.py │ └── models.py ├── tmp │ ├── convert_usaspending_grants │ └── convert_usaspending_contracts ├── local_settings.example.py ├── urls.py ├── wsgi.py ├── settings.py └── metadata │ ├── piid.yml │ └── departments.yml ├── .gitignore ├── requirements.txt ├── tsconfig.sql ├── manage.py ├── CONTRIBUTING.md ├── federal_spending.stop ├── README.md └── LICENSE /federal_spending/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /federal_spending/fbo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /federal_spending/fbo/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /federal_spending/usaspending/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /federal_spending/fbo/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /federal_spending/tmp/convert_usaspending_grants: -------------------------------------------------------------------------------- 1 | #piid file -------------------------------------------------------------------------------- /federal_spending/usaspending/management/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /federal_spending/fbo/views.py: -------------------------------------------------------------------------------- 1 | # Create your views here. 2 | -------------------------------------------------------------------------------- /federal_spending/tmp/convert_usaspending_contracts: -------------------------------------------------------------------------------- 1 | #piid file -------------------------------------------------------------------------------- /federal_spending/usaspending/management/base/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/usaspending/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /federal_spending/usaspending/views.py: -------------------------------------------------------------------------------- 1 | # Create your views here. 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.zip 3 | *.csv 4 | *.log 5 | local_settings.py 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Django==1.5.5 2 | argparse==1.2.1 3 | django-localflavor==1.0 4 | wsgiref==0.1.2 5 | python-dateutil 6 | psycopg2 7 | -------------------------------------------------------------------------------- /federal_spending/local_settings.example.py: -------------------------------------------------------------------------------- 1 | 2 | LOGGING_EMAIL = {'recipients': [ 'bla@bla.com',], 3 | 'host': 'your.smtp.server', 4 | 'port': 25, 5 | 'username': 'someone', 6 | 'password': 'somepword' 7 | } 8 | -------------------------------------------------------------------------------- /tsconfig.sql: -------------------------------------------------------------------------------- 1 | create text search dictionary federal_spending ( template = simple, stopwords = federal_spending ); 2 | create text search configuration federal_spending ( copy = simple ); 3 | alter text search configuration federal_spending alter mapping for asciiword with federal_spending; 4 | commit; 5 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "federal_spending.settings") 7 | 8 | from django.core.management import execute_from_command_line 9 | 10 | execute_from_command_line(sys.argv) 11 | -------------------------------------------------------------------------------- /federal_spending/usaspending/downloads/delta_downloads.txt: -------------------------------------------------------------------------------- 1 | http://www.usaspending.gov/datafeeds/2013_All_Contracts_Delta_20131015.csv.zip 2 | http://www.usaspending.gov/datafeeds/2014_All_Contracts_Delta_20131015.csv.zip 3 | http://www.usaspending.gov/datafeeds/2013_All_Grants_Delta_20131015.csv.zip 4 | http://www.usaspending.gov/datafeeds/2014_All_Grants_Delta_20131015.csv.zip -------------------------------------------------------------------------------- /federal_spending/usaspending/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from federal_spending.usaspending.models import Contract 3 | #from dcdata.grants.models import Grant 4 | 5 | class ContractAdmin(admin.ModelAdmin): 6 | pass 7 | 8 | admin.site.register(Contract, ContractAdmin) 9 | 10 | 11 | #class GrantAdmin(admin.ModelAdmin): 12 | # pass 13 | 14 | #admin.site.register(Grant, GrantAdmin) -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Public domain 2 | 3 | The project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication][CC0]. 4 | 5 | All contributions to this project will be released under the CC0 dedication. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest. 6 | 7 | [CC0]: http://creativecommons.org/publicdomain/zero/1.0/ 8 | -------------------------------------------------------------------------------- /federal_spending/fbo/tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file demonstrates writing tests using the unittest module. These will pass 3 | when you run "manage.py test". 4 | 5 | Replace this with more appropriate tests for your application. 6 | """ 7 | 8 | from django.test import TestCase 9 | 10 | 11 | class SimpleTest(TestCase): 12 | def test_basic_addition(self): 13 | """ 14 | Tests that 1 + 1 always equals 2. 15 | """ 16 | self.assertEqual(1 + 1, 2) 17 | -------------------------------------------------------------------------------- /federal_spending/usaspending/tests.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file demonstrates writing tests using the unittest module. These will pass 3 | when you run "manage.py test". 4 | 5 | Replace this with more appropriate tests for your application. 6 | """ 7 | 8 | from django.test import TestCase 9 | 10 | 11 | class SimpleTest(TestCase): 12 | def test_basic_addition(self): 13 | """ 14 | Tests that 1 + 1 always equals 2. 15 | """ 16 | self.assertEqual(1 + 1, 2) 17 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/convert_usaspending_contracts.py: -------------------------------------------------------------------------------- 1 | from federal_spending.usaspending.management.base.usaspending_importer import BaseUSASpendingConverter 2 | from federal_spending.usaspending.models import Contract 3 | from federal_spending.usaspending.scripts.usaspending import fpds 4 | 5 | 6 | class Command(BaseUSASpendingConverter): 7 | modelclass = Contract 8 | outfile_basename = 'contracts' 9 | module = fpds 10 | 11 | def __init__(self): 12 | super(Command, self).__init__() 13 | 14 | 15 | def file_is_right_type(self, file_): 16 | return 'Contracts' in file_ -------------------------------------------------------------------------------- /federal_spending/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import patterns, include, url 2 | 3 | # Uncomment the next two lines to enable the admin: 4 | # from django.contrib import admin 5 | # admin.autodiscover() 6 | 7 | urlpatterns = patterns('', 8 | # Examples: 9 | # url(r'^$', 'retinaburner.views.home', name='home'), 10 | # url(r'^retinaburner/', include('retinaburner.foo.urls')), 11 | 12 | # Uncomment the admin/doc line below to enable admin documentation: 13 | # url(r'^admin/doc/', include('django.contrib.admindocs.urls')), 14 | 15 | # Uncomment the next line to enable the admin: 16 | # url(r'^admin/', include(admin.site.urls)), 17 | ) 18 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/loadgrants.py: -------------------------------------------------------------------------------- 1 | from federal_spending.usaspending.models import Grant 2 | from federal_spending.usaspending.scripts.usaspending.grants_loader import Loader 3 | from django.core.management.base import BaseCommand 4 | from django.db import transaction 5 | 6 | 7 | class Command(BaseCommand): 8 | 9 | @transaction.commit_on_success 10 | def handle(self, grant_path, **options): 11 | #print "Current number of rows in grant table: {0}".format(Grant.objects.all().count()) 12 | 13 | Loader().insert_faads(grant_path) 14 | #transaction.set_dirty() 15 | 16 | #print "New number of rows in grant table: {0}".format(Grant.objects.all().count()) -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/loadcontracts.py: -------------------------------------------------------------------------------- 1 | from federal_spending.usaspending.models import Contract 2 | from federal_spending.usaspending.scripts.usaspending.contracts_loader import Loader 3 | from django.core.management.base import BaseCommand 4 | from django.db import transaction 5 | 6 | 7 | class Command(BaseCommand): 8 | 9 | @transaction.commit_on_success 10 | def handle(self, contracts_file, **options): 11 | #print "Current number of rows in contract table: {0}".format(Contract.objects.all().count()) 12 | 13 | Loader().insert_fpds(contracts_file) 14 | #transaction.set_dirty() 15 | 16 | #print "New number of rows in contract table: {0}".format(Contract.objects.all().count()) -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/convert_usaspending_grants.py: -------------------------------------------------------------------------------- 1 | from federal_spending.usaspending.management.base.usaspending_importer import BaseUSASpendingConverter 2 | from federal_spending.usaspending.models import Grant 3 | from federal_spending.usaspending.scripts.usaspending import faads 4 | 5 | 6 | class Command(BaseUSASpendingConverter): 7 | modelclass = Grant 8 | outfile_basename = 'grants' 9 | module = faads 10 | 11 | def __init__(self): 12 | super(Command, self).__init__() 13 | 14 | 15 | def file_is_right_type(self, file_): 16 | if 'Grants' in file_: return True 17 | if 'Loans' in file_: return True 18 | if 'DirectPayments' in file_: return True 19 | if 'Insurance' in file_: return True 20 | 21 | return False -------------------------------------------------------------------------------- /federal_spending/fbo/management/commands/load_historical_fbo.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import CommandError, BaseCommand 2 | from federal_spending.fbo.models import Notice 3 | from django.conf import settings 4 | import os 5 | from zipfile import ZipFile 6 | 7 | class Command(BaseCommand): 8 | def handle(self, *args, **kwargs): 9 | 10 | # unzip bulk csv 11 | # read/process each record 12 | # save to database/postgres copy command? 13 | 14 | ZIP_PATH = settings.PROJECT_ROOT + '/fbo/bulk_csvs/' 15 | 16 | for fi in os.listdir(ZIP_PATH): 17 | if fi[-3:] == "zip": 18 | z = ZipFile(ZIP_PATH + fi, 'r') 19 | z.extractall(ZIP_PATH) 20 | 21 | #for csvfile in os.listdir(ZIP_PATH): 22 | # if csvfile[-3] == "csv": 23 | #process csv -------------------------------------------------------------------------------- /federal_spending/usaspending/logs/convert_usaspending_grants.log: -------------------------------------------------------------------------------- 1 | 2014-01-13 11:56:28,503 - convert_usaspending_grants - INFO - Starting Command 2 | 2014-01-13 11:56:28,503 - convert_usaspending_grants - INFO - Found file 2013_All_Contracts_Delta_20131219.csv 3 | 2014-01-13 11:56:28,503 - convert_usaspending_grants - INFO - Doesn't match file pattern for this importer. Skipping. 4 | 2014-01-13 11:56:28,504 - convert_usaspending_grants - INFO - Found file 2014_All_Contracts_Delta_20131219.csv 5 | 2014-01-13 11:56:28,504 - convert_usaspending_grants - INFO - Doesn't match file pattern for this importer. Skipping. 6 | 2014-01-13 11:56:28,504 - convert_usaspending_grants - INFO - Found file 2014_All_Grants_Delta_20131219.csv 7 | 2014-01-13 11:56:28,504 - convert_usaspending_grants - INFO - Starting... 8 | 2014-01-13 11:56:35,385 - convert_usaspending_grants - INFO - Done. 9 | 2014-01-13 11:56:35,385 - convert_usaspending_grants - INFO - Found file 2013_All_Grants_Delta_20131219.csv 10 | 2014-01-13 11:56:35,385 - convert_usaspending_grants - INFO - Starting... 11 | 2014-01-13 11:56:37,484 - convert_usaspending_grants - INFO - Done. 12 | 2014-01-13 11:56:37,484 - convert_usaspending_grants - INFO - Finished. 13 | -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/usaspending/config.py: -------------------------------------------------------------------------------- 1 | INDEX_COLS_BY_TABLE = { 2 | 'usaspending_contract': [ 3 | 'using gin (to_tsvector(\'federal_spending\'::regconfig, agency_name::text))', 4 | 'statecode, congressionaldistrict', 5 | 'using gin (to_tsvector(\'federal_spending\'::regconfig, contracting_agency_name::text))', 6 | '(fiscal_year DESC, obligatedamount DESC)', 7 | 'dunsnumber', 8 | 'obligatedamount', 9 | 'piid', 10 | 'using gin (to_tsvector(\'federal_spending\'::regconfig, requesting_agency_name::text))', 11 | 'signeddate', 12 | 'using gin (to_tsvector(\'federal_spending\'::regconfig, city::text))', 13 | 'using gin (to_tsvector(\'federal_spending\'::regconfig, vendorname::text))', 14 | 'fiscal_year', 15 | 'unique_transaction_id', 16 | 'id' 17 | ], 18 | 'usaspending_grant': [ 19 | 'using gin (to_tsvector(\'federal_spending\'::regconfig, agency_name::text)) ', 20 | 'using gin (to_tsvector(\'federal_spending\'::regconfig, recipient_name::text))', 21 | 'total_funding_amount', 22 | 'unique_transaction_id', 23 | 'fiscal_year', 24 | 'id' 25 | ], 26 | } 27 | -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/usaspending/grants_loader.py: -------------------------------------------------------------------------------- 1 | from faads import FIELDS, CALCULATED_FIELDS 2 | import os.path 3 | from django.db import connection 4 | from django.db import transaction 5 | import re 6 | 7 | class Loader(): 8 | def fields(self): 9 | return [ x[0] for x in FIELDS ] + [ x[0] for x in CALCULATED_FIELDS ] 10 | 11 | def sql_str(self, infile): 12 | table = 'usaspending_grant' 13 | return self.sql_template_postgres(infile, table, self.fields()) 14 | 15 | def print_sql(self, infile): 16 | print self.sql_str(infile) 17 | 18 | def sql_template_postgres(self, file_, table, fields): 19 | 20 | fy = re.findall('\d{4}', file_)[0] 21 | table = table + '_' + fy 22 | 23 | return """ 24 | copy {1} \ 25 | ({2}) \ 26 | FROM '{0}' \ 27 | DELIMITER '|' \ 28 | CSV QUOTE '"' \ 29 | NULL 'NULL' \ 30 | """.format(os.path.abspath(file_), table, ', '.join(fields)) 31 | 32 | @transaction.commit_on_success 33 | def insert_faads(self, infile): 34 | sql = self.sql_str(infile) 35 | cursor = connection.cursor() 36 | cursor.execute(sql); -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/usaspending/contracts_loader.py: -------------------------------------------------------------------------------- 1 | from fpds import FIELDS, CALCULATED_FIELDS 2 | import os.path 3 | from django.db import connection 4 | from django.db import transaction 5 | import re 6 | 7 | class Loader(): 8 | def fields(self): 9 | return [ x[0] for x in FIELDS ] + [ x[0] for x in CALCULATED_FIELDS ] 10 | 11 | def sql_str(self, infile): 12 | table = 'usaspending_contract' 13 | return self.sql_template_postgres(infile, table, self.fields()) 14 | 15 | def print_sql(self, infile): 16 | print self.sql_str(infile) 17 | 18 | def sql_template_postgres(self, file_, table, fields): 19 | 20 | fy = re.findall('\d{4}', file_)[0] 21 | table = table + '_' + fy 22 | 23 | return """ 24 | copy {1} \ 25 | ({2}) \ 26 | FROM '{0}' \ 27 | DELIMITER '|' \ 28 | CSV QUOTE '"' \ 29 | NULL 'NULL' \ 30 | """.format(os.path.abspath(file_), table, ', '.join(fields)) 31 | 32 | @transaction.commit_on_success 33 | def insert_fpds(self, infile): 34 | sql = self.sql_str(infile) 35 | cursor = connection.cursor() 36 | cursor.execute(sql); 37 | 38 | 39 | -------------------------------------------------------------------------------- /federal_spending/usaspending/downloads/fy_2014.txt: -------------------------------------------------------------------------------- 1 | http://www.usaspending.gov/datafeeds/2014_All_Contracts_Full_20131115.csv.zip 2 | http://www.usaspending.gov/datafeeds/2013_All_Contracts_Full_20131115.csv.zip 3 | http://www.usaspending.gov/datafeeds/2012_All_Contracts_Full_20131115.csv.zip 4 | http://www.usaspending.gov/datafeeds/2014_All_Grants_Full_20131115.csv.zip 5 | http://www.usaspending.gov/datafeeds/2013_All_Grants_Full_20131115.csv.zip 6 | http://www.usaspending.gov/datafeeds/2012_All_Grants_Full_20131115.csv.zip 7 | http://www.usaspending.gov/datafeeds/2014_All_Loans_Full_20131115.csv.zip 8 | http://www.usaspending.gov/datafeeds/2013_All_Loans_Full_20131115.csv.zip 9 | http://www.usaspending.gov/datafeeds/2012_All_Loans_Full_20131115.csv.zip 10 | http://www.usaspending.gov/datafeeds/2014_All_DirectPayments_Full_20131115.csv.zip 11 | http://www.usaspending.gov/datafeeds/2013_All_DirectPayments_Full_20131115.csv.zip 12 | http://www.usaspending.gov/datafeeds/2012_All_DirectPayments_Full_20131115.csv.zip 13 | http://www.usaspending.gov/datafeeds/2014_All_Insurance_Full_20131115.csv.zip 14 | http://www.usaspending.gov/datafeeds/2013_All_Insurance_Full_20131115.csv.zip 15 | http://www.usaspending.gov/datafeeds/2012_All_Insurance_Full_20131115.csv.zip -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/download_files.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | from django.conf import settings 3 | import os 4 | import urllib 5 | import zipfile 6 | 7 | class Command(BaseCommand): 8 | 9 | def handle(self, download_file, **options): 10 | #walk through download file and store everything in CSVs folder. 11 | csv_path = settings.CSV_PATH 12 | for line in open(download_file).xreadlines(): 13 | line = line.strip() 14 | print "Reading line " + line 15 | outfile_name = csv_path + line.split('/')[-1] 16 | print csv_path + 'datafeeds/' + line.split('/')[-1].replace('.zip', '') 17 | if os.path.exists(outfile_name) or os.path.exists(csv_path + 'datafeeds/' + line.split('/')[-1].replace('.zip', '')): 18 | print "File exists, skipping" 19 | else: 20 | print "saving to " + outfile_name 21 | urllib.urlretrieve(line, outfile_name) 22 | 23 | for f in os.listdir(csv_path): 24 | if f[-3:] == 'zip': 25 | print "unzipping " + f 26 | zf = zipfile.ZipFile(csv_path + f) 27 | zf.extractall(csv_path) 28 | 29 | #get rid of zipfile after unzipping 30 | os.remove(csv_path + f) 31 | -------------------------------------------------------------------------------- /federal_spending.stop: -------------------------------------------------------------------------------- 1 | association 2 | assoc 3 | assn 4 | 5 | incorporated 6 | inc 7 | 8 | company 9 | co 10 | 11 | corporation 12 | corp 13 | 14 | committee 15 | cmte 16 | 17 | limited 18 | ltd 19 | i 20 | me 21 | my 22 | myself 23 | we 24 | our 25 | ours 26 | ourselves 27 | you 28 | your 29 | yours 30 | yourself 31 | yourselves 32 | he 33 | him 34 | his 35 | himself 36 | she 37 | her 38 | hers 39 | herself 40 | it 41 | its 42 | itself 43 | they 44 | them 45 | their 46 | theirs 47 | themselves 48 | what 49 | which 50 | who 51 | whom 52 | this 53 | that 54 | these 55 | those 56 | am 57 | is 58 | are 59 | was 60 | were 61 | be 62 | been 63 | being 64 | have 65 | has 66 | had 67 | having 68 | do 69 | does 70 | did 71 | doing 72 | a 73 | an 74 | the 75 | and 76 | but 77 | if 78 | or 79 | because 80 | as 81 | until 82 | while 83 | of 84 | by 85 | for 86 | with 87 | about 88 | against 89 | between 90 | into 91 | through 92 | during 93 | before 94 | after 95 | above 96 | below 97 | to 98 | from 99 | up 100 | down 101 | in 102 | out 103 | on 104 | off 105 | over 106 | under 107 | again 108 | further 109 | then 110 | once 111 | here 112 | there 113 | when 114 | where 115 | why 116 | how 117 | all 118 | any 119 | both 120 | each 121 | few 122 | more 123 | most 124 | other 125 | some 126 | such 127 | no 128 | nor 129 | not 130 | only 131 | own 132 | same 133 | so 134 | than 135 | too 136 | very 137 | s 138 | t 139 | can 140 | will 141 | just 142 | don 143 | should 144 | now 145 | 146 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/run_data_tests.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import CommandError, BaseCommand 2 | from django.db import connections, transaction 3 | from optparse import make_option 4 | from django.conf import settings 5 | import requests 6 | from xml.etree import ElementTree as et 7 | 8 | class Command(BaseCommand): 9 | option_list = BaseCommand.option_list + ( 10 | make_option('-t', '--test', 11 | action='store', 12 | dest='test', 13 | help='Test to run on data' 14 | ), 15 | ) 16 | 17 | def handle(self, *args, **kwargs): 18 | if kwargs.has_key('test'): 19 | test = kwargs['test'] 20 | else: 21 | test = 'all' 22 | 23 | self.transaction_number() 24 | 25 | 26 | def transaction_number(self): 27 | #compare the total number of transactions to USASpending via API 28 | fpds_base = 'http://www.usaspending.gov/faads/faads.php' 29 | print "making request" 30 | resp = requests.get(fpds_base, params={'detail': 's'}) 31 | tree= et.fromstring(resp.text).find('{http://www.usaspending.gov/schemas/}data') 32 | tree = tree.find('{http://www.usaspending.gov/schemas/}record') 33 | totals = tree.find('{http://www.usaspending.gov/schemas/}totals').getchildren() 34 | for t in totals: 35 | print t.text 36 | 37 | fys = tree.find('{http://www.usaspending.gov/schemas/}fiscal_years').getchildren() 38 | for f in fys: 39 | print f.text 40 | 41 | -------------------------------------------------------------------------------- /federal_spending/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for federal_spending project. 3 | 4 | This module contains the WSGI application used by Django's development server 5 | and any production WSGI deployments. It should expose a module-level variable 6 | named ``application``. Django's ``runserver`` and ``runfcgi`` commands discover 7 | this application via the ``WSGI_APPLICATION`` setting. 8 | 9 | Usually you will have the standard Django WSGI application here, but it also 10 | might make sense to replace the whole Django WSGI application with a custom one 11 | that later delegates to the Django one. For example, you could introduce WSGI 12 | middleware here, or combine a Django application with an application of another 13 | framework. 14 | 15 | """ 16 | import os 17 | 18 | # We defer to a DJANGO_SETTINGS_MODULE already in the environment. This breaks 19 | # if running multiple sites in the same mod_wsgi process. To fix this, use 20 | # mod_wsgi daemon mode with each site in its own daemon process, or use 21 | # os.environ["DJANGO_SETTINGS_MODULE"] = "federal_spending.settings" 22 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "federal_spending.settings") 23 | 24 | # This application object is used by any WSGI server configured to use this 25 | # file. This includes Django's development server, if the WSGI_APPLICATION 26 | # setting points here. 27 | from django.core.wsgi import get_wsgi_application 28 | application = get_wsgi_application() 29 | 30 | # Apply WSGI middleware here. 31 | # from helloworld.wsgi import HelloWorldApplication 32 | # application = HelloWorldApplication(application) 33 | -------------------------------------------------------------------------------- /federal_spending/usaspending/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.handlers 3 | import os.path 4 | 5 | from django.conf import settings 6 | 7 | class EncodingFormatter(logging.Formatter): 8 | 9 | def __init__(self, fmt, datefmt=None, encoding=None): 10 | logging.Formatter.__init__(self, fmt, datefmt) 11 | self.encoding = encoding 12 | 13 | def format(self, record): 14 | result = logging.Formatter.format(self, record) 15 | if isinstance(result, unicode): 16 | result = result.encode(self.encoding or 'utf-8') 17 | return result 18 | 19 | def set_up_logger(importer_name, log_path, email_subject, email_recipients=settings.LOGGING_EMAIL['recipients']): 20 | # create logger 21 | log = logging.getLogger(importer_name) 22 | log.setLevel(logging.DEBUG) 23 | formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") 24 | 25 | # create console handler and set level to debug 26 | ch = logging.FileHandler(os.path.join(log_path, importer_name + '.log')) 27 | ch.setLevel(logging.DEBUG) 28 | ch.setFormatter(formatter) 29 | log.addHandler(ch) 30 | 31 | # create email handler and set level to warn 32 | if settings.LOGGING_EMAIL: 33 | eh = logging.handlers.SMTPHandler( 34 | (settings.LOGGING_EMAIL['host'], settings.LOGGING_EMAIL['port']), # host 35 | settings.LOGGING_EMAIL['username'], # from address 36 | email_recipients, 37 | email_subject, 38 | (settings.LOGGING_EMAIL['username'], settings.LOGGING_EMAIL['password']) # credentials tuple 39 | ) 40 | eh.setLevel(logging.WARN) 41 | eh.setFormatter(formatter) 42 | eh.setFormatter(EncodingFormatter('%(message)s', encoding='iso8859-1')) 43 | log.addHandler(eh) 44 | 45 | return log 46 | -------------------------------------------------------------------------------- /federal_spending/usaspending/logs/convert_usaspending_contracts.log: -------------------------------------------------------------------------------- 1 | 2014-01-13 11:53:20,151 - convert_usaspending_contracts - INFO - Starting Command 2 | 2014-01-13 11:53:20,151 - convert_usaspending_contracts - INFO - Found file 2013_All_Contracts_Delta_20131219.csv 3 | 2014-01-13 11:53:20,151 - convert_usaspending_contracts - INFO - Starting... 4 | 2014-01-13 11:53:29,868 - convert_usaspending_contracts - DEBUG - value '0819- 02272 EL DORADO' for field 'zipcode' is too long. 5 | 2014-01-13 11:53:45,964 - convert_usaspending_contracts - DEBUG - value '0819- 02272 EL DORADO' for field 'zipcode' is too long. 6 | 2014-01-13 11:53:50,123 - convert_usaspending_contracts - DEBUG - value '0819- 02272 EL DORADO' for field 'zipcode' is too long. 7 | 2014-01-13 11:53:53,667 - convert_usaspending_contracts - DEBUG - value '0819- 02272 EL DORADO' for field 'zipcode' is too long. 8 | 2014-01-13 11:53:56,105 - convert_usaspending_contracts - INFO - Done. 9 | 2014-01-13 11:53:56,105 - convert_usaspending_contracts - INFO - Found file 2014_All_Contracts_Delta_20131219.csv 10 | 2014-01-13 11:53:56,106 - convert_usaspending_contracts - INFO - Starting... 11 | 2014-01-13 11:54:35,896 - convert_usaspending_contracts - DEBUG - value 'P.O BOX 5175, AMMAN 11183' for field 'zipcode' is too long. 12 | 2014-01-13 11:54:53,202 - convert_usaspending_contracts - INFO - Done. 13 | 2014-01-13 11:54:53,202 - convert_usaspending_contracts - INFO - Found file 2014_All_Grants_Delta_20131219.csv 14 | 2014-01-13 11:54:53,202 - convert_usaspending_contracts - INFO - Doesn't match file pattern for this importer. Skipping. 15 | 2014-01-13 11:54:53,202 - convert_usaspending_contracts - INFO - Found file 2013_All_Grants_Delta_20131219.csv 16 | 2014-01-13 11:54:53,202 - convert_usaspending_contracts - INFO - Doesn't match file pattern for this importer. Skipping. 17 | 2014-01-13 11:54:53,203 - convert_usaspending_contracts - INFO - Finished. 18 | -------------------------------------------------------------------------------- /federal_spending/usaspending/utils/ucsv.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code was taken from the Python documentation. 3 | http://docs.python.org/library/csv.html 4 | """ 5 | 6 | import csv, codecs, cStringIO 7 | 8 | class UTF8Recoder: 9 | """ 10 | Iterator that reads an encoded stream and reencodes the input to UTF-8 11 | """ 12 | def __init__(self, f, encoding): 13 | self.reader = codecs.getreader(encoding)(f) 14 | 15 | def __iter__(self): 16 | return self 17 | 18 | def next(self): 19 | return self.reader.next().encode("utf-8") 20 | 21 | class UnicodeReader: 22 | """ 23 | A CSV reader which will iterate over lines in the CSV file "f", 24 | which is encoded in the given encoding. 25 | """ 26 | 27 | def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): 28 | f = UTF8Recoder(f, encoding) 29 | self.reader = csv.reader(f, dialect=dialect, **kwds) 30 | 31 | def next(self): 32 | row = self.reader.next() 33 | return [unicode(s, "utf-8") for s in row] 34 | 35 | def __iter__(self): 36 | return self 37 | 38 | class UnicodeDictReader: 39 | """ 40 | A CSV reader which will iterate over lines in the CSV file "f", 41 | which is encoded in the given encoding. 42 | """ 43 | 44 | def __init__(self, f, fieldnames=None, dialect=csv.excel, encoding="utf-8", **kwds): 45 | f = UTF8Recoder(f, encoding) 46 | self.reader = csv.DictReader(f, fieldnames=fieldnames, dialect=dialect, **kwds) 47 | 48 | def next(self): 49 | row = self.reader.next() 50 | return {k: unicode(v, "utf-8") for k, v in row.iteritems()} 51 | 52 | def __iter__(self): 53 | return self 54 | 55 | class UnicodeWriter: 56 | """ 57 | A CSV writer which will write rows to CSV file "f", 58 | which is encoded in the given encoding. 59 | """ 60 | 61 | def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): 62 | # Redirect output to a queue 63 | self.queue = cStringIO.StringIO() 64 | self.writer = csv.writer(self.queue, dialect=dialect, **kwds) 65 | self.stream = f 66 | self.encoder = codecs.getincrementalencoder(encoding)() 67 | 68 | def writerow(self, row): 69 | self.writer.writerow([unicode(s).encode("utf-8") for s in row]) 70 | # Fetch UTF-8 output from the queue ... 71 | data = self.queue.getvalue() 72 | data = data.decode("utf-8") 73 | # ... and reencode it into the target encoding 74 | data = self.encoder.encode(data) 75 | # write to the target stream 76 | self.stream.write(data) 77 | # empty queue 78 | self.queue.truncate(0) -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/usaspending/helpers.py: -------------------------------------------------------------------------------- 1 | from localflavor.us.us_states import STATES_NORMALIZED 2 | import datetime 3 | 4 | 5 | def correctionLateIndicator(value): 6 | 7 | if value == 'current entry': 8 | return '' 9 | elif value[0] == 'C': 10 | return 'C' 11 | elif value[0] == 'L': 12 | return 'L' 13 | 14 | return '' 15 | 16 | 17 | def nullable(value): 18 | if value == '' or value == 'null': 19 | return None 20 | 21 | return value 22 | 23 | 24 | def nullable_float(value): 25 | if value == '' or value == 'N/A': 26 | return None 27 | 28 | return float(value) 29 | 30 | 31 | def nullable_int(value): 32 | if value == '': 33 | return None 34 | 35 | parsed_value = int(value) 36 | 37 | # these are Postgres' limits 38 | if parsed_value < -2147483648 or parsed_value > 2147483647: 39 | return None 40 | 41 | return parsed_value 42 | 43 | 44 | def splitInt(value): 45 | 46 | if value: 47 | value = float(value) #solve currency weirdness 48 | value = int(value) 49 | return value 50 | else: 51 | return None 52 | 53 | 54 | def splitIntCode(value): 55 | 56 | code = splitCode(value) 57 | 58 | if not code == '': 59 | return int(code) 60 | else: 61 | return None 62 | 63 | 64 | def splitCode(value): 65 | 66 | if value and value.lower() == 'not applicable': 67 | return None 68 | elif not value is None: 69 | return value.split(u':')[0] 70 | else: 71 | return '' 72 | 73 | 74 | def transformFlag(value): 75 | 76 | if value and value[0]: 77 | if value[0].lower() in ('y', 't'): 78 | return 't' 79 | elif value[0].lower() in ('n', 'f'): 80 | return 'f' 81 | else: 82 | return None 83 | 84 | 85 | def first_char(value): 86 | return value[:1] 87 | 88 | 89 | def recovery_act(value): 90 | if value: 91 | if value.lower() in ("recovery act", 'y', 't'): 92 | return 't' 93 | elif value.lower() in ('n', 'f'): 94 | return 'f' 95 | 96 | return None 97 | 98 | 99 | def state_abbr(value): 100 | return STATES_NORMALIZED.get(value.strip().lower(), '') 101 | 102 | 103 | def agency_name_lookup(value): 104 | agencies = {} 105 | # NOTE: I don't know where this agencies lookup file is. This should not be hardcoded 106 | # but come from a file. I don't see any candidates for it in the repo. :( 107 | return agencies.get(value, '') 108 | 109 | 110 | def datestamp(): 111 | return datetime.datetime.strftime(datetime.datetime.today(), '%Y%m%d') -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/create_partition.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import CommandError, BaseCommand 2 | from django.db import connections, transaction 3 | from optparse import make_option 4 | from django.conf import settings 5 | from federal_spending.usaspending.scripts.usaspending.config import INDEX_COLS_BY_TABLE 6 | 7 | 8 | class Command(BaseCommand): 9 | option_list = BaseCommand.option_list + ( 10 | make_option('-t', '--table', 11 | action='store', 12 | dest='table', 13 | help='Table to create partition for' 14 | ), 15 | make_option('-y', '--fiscal-year', 16 | action='store', 17 | dest='fiscal_year', 18 | help='Fiscal year to create partition for' 19 | ), 20 | ) 21 | 22 | @transaction.commit_on_success 23 | def handle(self, *args, **kwargs): 24 | """ 25 | Takes a relation name and a fiscal year and creates a partition for it. 26 | Current relation names for spending data are: 27 | usaspending_contract 28 | usaspending_grant 29 | """ 30 | 31 | fiscal_year = kwargs['fiscal_year'] 32 | 33 | if kwargs['table']: 34 | self.create_partition(fiscal_year, kwargs['table']) 35 | else: 36 | if fiscal_year == 'all': 37 | for fy in settings.FISCAL_YEARS: 38 | self.create_partition(fy, 'usaspending_contract') 39 | self.create_partition(fy, 'usaspending_grant') 40 | else: 41 | raise CommandError("You must specify a table or specify 'all'") 42 | 43 | def create_partition(self, fiscal_year, base_table): 44 | partition_name = '{}_{}'.format(base_table, fiscal_year) 45 | 46 | create_stmt = """ 47 | create table {} ( 48 | check ( fiscal_year = {} ) 49 | ) inherits ({}) 50 | """.format(partition_name, fiscal_year, base_table) 51 | print create_stmt 52 | 53 | c = connections['default'].cursor() 54 | c.execute(create_stmt) 55 | print "table created" 56 | 57 | a = """for i, colname in enumerate(INDEX_COLS_BY_TABLE[base_table]): 58 | if 'using' in colname or '(' in colname: 59 | idx_stmt = 'create index {0} on {1} {2}; commit;'.format( 60 | partition_name + '_{0}'.format(i), 61 | partition_name, 62 | colname 63 | ) 64 | else: 65 | idx_stmt = 'create index {0} on {1} ({2}); commit;'.format( 66 | partition_name + '_{0}'.format(i), 67 | partition_name, 68 | colname 69 | ) 70 | c.execute(idx_stmt)""" -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/usaspending/faads.py: -------------------------------------------------------------------------------- 1 | from helpers import splitCode, nullable, recovery_act, datestamp, \ 2 | splitInt, correctionLateIndicator 3 | from federal_spending.usaspending.models import Grant 4 | 5 | 6 | FIELDS = [('unique_transaction_id', None), 7 | ('transaction_status', None), 8 | ('fyq', None), 9 | ('cfda_program_num', None), 10 | ('sai_number', None), 11 | ('account_title', None), 12 | ('recipient_name', None), 13 | ('recipient_city_code', nullable), 14 | ('recipient_city_name', None), 15 | ('recipient_county_name', None), 16 | ('recipient_county_code', nullable), 17 | ('recipient_zip', None), 18 | ('recipient_country_code', splitCode), 19 | ('recipient_type', splitCode), 20 | ('action_type', splitCode), 21 | ('agency_code', splitCode), 22 | ('federal_award_id', None), 23 | ('federal_award_mod', None), 24 | ('fed_funding_amount', splitInt), 25 | ('non_fed_funding_amount', splitInt), 26 | ('total_funding_amount', splitInt), 27 | ('obligation_action_date', nullable), 28 | ('starting_date', nullable), 29 | ('ending_date', nullable), 30 | ('assistance_type', splitCode), 31 | ('record_type', splitCode), 32 | ('correction_late_ind', correctionLateIndicator), 33 | ('fyq_correction', None), 34 | ('principal_place_code', None), 35 | ('principal_place_state', None), 36 | ('principal_place_cc', None), 37 | ('principal_place_zip', None), 38 | ('principal_place_cd', None), 39 | ('cfda_program_title', None), 40 | ('agency_name', None), 41 | ('project_description', None), 42 | ('duns_no', None), 43 | ('duns_conf_code', None), 44 | ('progsrc_agen_code', None), 45 | ('progsrc_acnt_code', None), 46 | ('progsrc_subacnt_code', None), 47 | ('receip_addr1', None), 48 | ('receip_addr2', None), 49 | ('receip_addr3', None), 50 | ('face_loan_guran', splitInt), 51 | ('orig_sub_guran', splitInt), 52 | ('fiscal_year', splitInt), 53 | ('principal_place_state_code', splitCode), 54 | ('recip_cat_type', splitCode), 55 | ('asst_cat_type', splitCode), 56 | ('recipient_cd', splitCode), 57 | ('maj_agency_cat', lambda x: splitCode(x)[:Grant._meta.get_field('maj_agency_cat').max_length]), 58 | ('rec_flag', recovery_act), 59 | ('uri', None), 60 | ('recipient_state_code', splitCode)] 61 | 62 | CALCULATED_FIELDS = [ 63 | ('imported_on', None, datestamp) 64 | ] 65 | -------------------------------------------------------------------------------- /federal_spending/fbo/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | SOLICITATION_CHOICES = ( 4 | ('PRESOL', 'Presolicitation'), 5 | ('COMBINE', 'Combined Synopsis/Solicitation'), 6 | ('SRCSGT', 'Sources Sought'), 7 | ('SSALE', 'Sale of Surplus Property'), 8 | ('SNOTE', 'Special Notice'), 9 | ('FSTD', 'Foreign Government Standard') 10 | 11 | ) 12 | 13 | NOTICE_CHOICES = ( 14 | ('PRESOL', 'Presolicitation'), 15 | ('COMBINE', 'Combined Synopsis/Solicitation'), 16 | ('SRCSGT', 'Sources Sought'), 17 | ('SSALE', 'Sale of Surplus Property'), 18 | ('SNOTE', 'Special Notice'), 19 | ('FSTD', 'Foreign Government Standard'), 20 | ('AWARD', 'Award'), 21 | ('ITB', 'Intent to Bundle'), 22 | ('JA', 'Justification and Approval'), 23 | ('FO', 'Fair Opportunity Limited Sources Justification') 24 | 25 | ) 26 | 27 | JUSTIFICATION_CHOICES = ( 28 | (1, 'Urgency'), 29 | (2, 'Only One Source (Except Brand Name)'), 30 | (3, 'Follow-on Delivery Order Following Competitive Initial Order'), 31 | (4, 'Minimum Guarantee'), 32 | (5, 'Other Statutory Authority'), 33 | ) 34 | 35 | 36 | class Notice(models.Model): 37 | 38 | notice_id = models.CharField(max_length=255, null=True, blank=True) 39 | sol_number = models.CharField(max_length=128, null=True, blank=True) 40 | notice_type = models.CharField(max_length=10, choices=NOTICE_CHOICES, null=False, blank=False) 41 | date = models.DateField(null=True, blank=True) 42 | naics = models.IntegerField(max_length=6,null=True, blank=True) 43 | description = models.TextField(null=True, blank=True) 44 | class_code = models.CharField(max_length=20, null=True, blank=True) 45 | subject = models.TextField(null=True, blank=True) 46 | zip_code = models.CharField(max_length=128, null=True, blank=True) 47 | setaside = models.CharField(max_length=128, null=True, blank=True) 48 | contact_name = models.TextField(null=True, blank=True) 49 | contact_description = models.TextField(null=True, blank=True) 50 | contact_email = models.TextField(null=True, blank=True) 51 | notice_title = models.TextField(null=True, blank=True) 52 | link = models.URLField(null=True, blank=True) 53 | email = models.EmailField(null=True, blank=True) 54 | office_address = models.CharField(null=True, blank=True, max_length=255) 55 | archive_date = models.DateField(null=True, blank=True) 56 | agency_name = models.TextField(null=True, blank=True) 57 | 58 | 59 | response_date = models.DateField(null=True, blank=True) 60 | pop_address = models.TextField(null=True, blank=True) 61 | pop_zip = models.CharField(max_length=128, null=True, blank=True) 62 | pop_country = models.CharField(max_length=125, null=True, blank=True) 63 | recovery_act = models.NullBooleanField(default=False, null=True, blank=True) 64 | solicitation_type = models.CharField(choices=SOLICITATION_CHOICES, max_length=20, null=True, blank=True) 65 | 66 | award_number = models.CharField(max_length=255, null=True, blank=True) 67 | award_amount = models.DecimalField(decimal_places=2, max_digits=20, null=True, blank=True) 68 | award_amount_text = models.TextField(null=True, blank=True) 69 | award_date = models.DateField( null=True, blank=True) 70 | line_number = models.CharField(max_length=255, null=True, blank=True) 71 | awardee = models.TextField( null=True, blank=True) 72 | 73 | statutory_authority = models.CharField(max_length=255, null=True, blank=True) 74 | modification_number = models.CharField(max_length=255, null=True, blank=True) 75 | 76 | #Fair Opportunity/Limited Sources Justification Authority 77 | foja = models.IntegerField(choices=JUSTIFICATION_CHOICES, null=True, blank=True) 78 | #Delivery/Task Order Number 79 | order_number = models.CharField(max_length=255, null=True, blank=True) 80 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/fresh_import.py: -------------------------------------------------------------------------------- 1 | from federal_spending.usaspending.models import Contract, Grant 2 | from federal_spending.usaspending.scripts.usaspending.contracts_loader import Loader 3 | from django.core.management.base import BaseCommand 4 | from federal_spending.usaspending.management.commands.create_indexes import contracts_idx, grants_idx 5 | from federal_spending.usaspending.scripts.usaspending.config import INDEX_COLS_BY_TABLE 6 | from django.core import management 7 | from django.db import connection 8 | from django.conf import settings 9 | import os 10 | import csv 11 | import time 12 | 13 | class Command(BaseCommand): 14 | 15 | contracts_idx_drop = contracts_idx[:12] 16 | contracts_idx_add = contracts_idx[12:] 17 | grants_idx_drop = grants_idx[:5] 18 | grants_idx_add = grants_idx[5:] 19 | 20 | 21 | def handle(self, import_file='all_downloads.txt', update=False, **options): 22 | 23 | if update: 24 | warn_text = "This will delete USASpending tables and indexes for years {0}. Proceed? y\\n".format(settings.UPDATE_YEARS) 25 | else: 26 | warn_text = "This will delete all USASpending related tables, indexes, etc. Are you sure you want to proceed? y\\n " 27 | 28 | confirm = raw_input(warn_text) 29 | if confirm != 'y': 30 | return 31 | 32 | print "deleting out files" 33 | OUTPATH = settings.CSV_PATH + 'out/' 34 | for f in os.listdir(OUTPATH): 35 | os.remove(OUTPATH + f) 36 | 37 | 38 | print "deleting old tables and indexes" 39 | cursor = connection.cursor() 40 | if update: 41 | sql = "" 42 | for fy in settings.UPDATE_YEARS: 43 | sql += "Drop table if exists usaspending_contract_{0} cascade; commit; Drop table if exists usaspending_grant_{1} cascade;commit;".format(fy, fy) 44 | 45 | #deleting overall indexes 46 | sql += ''.join(self.contracts_idx_drop) 47 | sql += ''.join(self.grants_idx_drop) 48 | 49 | else: 50 | sql = "Drop table if exists usaspending_contract cascade; commit; Drop table if exists usaspending_grant cascade; commit;" 51 | 52 | print sql 53 | cursor.execute(sql); 54 | 55 | print "Regenerating tables" 56 | management.call_command('syncdb') 57 | 58 | print "Creating partition tables" 59 | if update: 60 | for fy in settings.UPDATE_YEARS: 61 | management.call_command('create_partition', fiscal_year=fy, table='usaspending_contract') 62 | management.call_command('create_partition', fiscal_year=fy, table='usaspending_grant') 63 | else: 64 | management.call_command('create_partition', fiscal_year='all') 65 | 66 | print "Downloading links in {0}".format(import_file) 67 | management.call_command('download_files', settings.PROJECT_ROOT + '/usaspending/downloads/' + import_file) 68 | 69 | print "sleeping for a minute to allow files to close out" 70 | time.sleep(60) 71 | 72 | print "processing downloaded files into proper format" 73 | management.call_command('convert_usaspending_contracts', '--traceback') 74 | management.call_command('convert_usaspending_grants', '--traceback') 75 | 76 | print "Putting processed Contract CSVs in database" 77 | print settings.CSV_PATH + 'out/' 78 | for fname in os.listdir(settings.CSV_PATH + 'out/'): 79 | print fname 80 | if 'contracts' in fname: 81 | management.call_command('loadcontracts', settings.CSV_PATH + 'out/' + fname) 82 | 83 | print"Putting processed Grant CSVs in database" 84 | for fname in os.listdir(settings.CSV_PATH + 'out/'): 85 | print fname 86 | if 'grants' in fname: 87 | management.call_command('loadgrants', settings.CSV_PATH + 'out/' + fname) 88 | 89 | print "Creating partition indexes" 90 | management.call_command('create_indexes') 91 | 92 | -------------------------------------------------------------------------------- /federal_spending/usaspending/downloads/all_downloads.txt: -------------------------------------------------------------------------------- 1 | http://www.usaspending.gov/datafeeds/2014_All_Grants_Full_20131015.csv.zip 2 | http://www.usaspending.gov/datafeeds/2013_All_Grants_Full_20131015.csv.zip 3 | http://www.usaspending.gov/datafeeds/2012_All_Grants_Full_20131015.csv.zip 4 | http://www.usaspending.gov/datafeeds/2011_All_Grants_Full_20131015.csv.zip 5 | http://www.usaspending.gov/datafeeds/2010_All_Grants_Full_20131015.csv.zip 6 | http://www.usaspending.gov/datafeeds/2009_All_Grants_Full_20131015.csv.zip 7 | http://www.usaspending.gov/datafeeds/2008_All_Grants_Full_20131015.csv.zip 8 | http://www.usaspending.gov/datafeeds/2007_All_Grants_Full_20131015.csv.zip 9 | http://www.usaspending.gov/datafeeds/2006_All_Grants_Full_20131015.csv.zip 10 | http://www.usaspending.gov/datafeeds/2005_All_Grants_Full_20131015.csv.zip 11 | http://www.usaspending.gov/datafeeds/2004_All_Grants_Full_20131015.csv.zip 12 | http://www.usaspending.gov/datafeeds/2003_All_Grants_Full_20131015.csv.zip 13 | http://www.usaspending.gov/datafeeds/2002_All_Grants_Full_20130715.csv.zip 14 | http://www.usaspending.gov/datafeeds/2001_All_Grants_Full_20130715.csv.zip 15 | http://www.usaspending.gov/datafeeds/2000_All_Grants_Full_20130715.csv.zip 16 | http://www.usaspending.gov/datafeeds/2013_All_Loans_Full_20131015.csv.zip 17 | http://www.usaspending.gov/datafeeds/2012_All_Loans_Full_20131015.csv.zip 18 | http://www.usaspending.gov/datafeeds/2011_All_Loans_Full_20131015.csv.zip 19 | http://www.usaspending.gov/datafeeds/2010_All_Loans_Full_20131015.csv.zip 20 | http://www.usaspending.gov/datafeeds/2009_All_Loans_Full_20131015.csv.zip 21 | http://www.usaspending.gov/datafeeds/2008_All_Loans_Full_20131015.csv.zip 22 | http://www.usaspending.gov/datafeeds/2007_All_Loans_Full_20131015.csv.zip 23 | http://www.usaspending.gov/datafeeds/2013_All_DirectPayments_Full_20131015.csv.zip 24 | http://www.usaspending.gov/datafeeds/2012_All_DirectPayments_Full_20131015.csv.zip 25 | http://www.usaspending.gov/datafeeds/2011_All_DirectPayments_Full_20131015.csv.zip 26 | http://www.usaspending.gov/datafeeds/2010_All_DirectPayments_Full_20131015.csv.zip 27 | http://www.usaspending.gov/datafeeds/2009_All_DirectPayments_Full_20131015.csv.zip 28 | http://www.usaspending.gov/datafeeds/2008_All_DirectPayments_Full_20131015.csv.zip 29 | http://www.usaspending.gov/datafeeds/2007_All_DirectPayments_Full_20131015.csv.zip 30 | http://www.usaspending.gov/datafeeds/2006_All_DirectPayments_Full_20131015.csv.zip 31 | http://www.usaspending.gov/datafeeds/2005_All_DirectPayments_Full_20131015.csv.zip 32 | http://www.usaspending.gov/datafeeds/2004_All_DirectPayments_Full_20131015.csv.zip 33 | http://www.usaspending.gov/datafeeds/2003_All_DirectPayments_Full_20131015.csv.zip 34 | http://www.usaspending.gov/datafeeds/2002_All_DirectPayments_Full_20130715.csv.zip 35 | http://www.usaspending.gov/datafeeds/2001_All_DirectPayments_Full_20130715.csv.zip 36 | http://www.usaspending.gov/datafeeds/2000_All_DirectPayments_Full_20130715.csv.zip 37 | http://www.usaspending.gov/datafeeds/2013_All_Insurance_Full_20131015.csv.zip 38 | http://www.usaspending.gov/datafeeds/2012_All_Insurance_Full_20131015.csv.zip 39 | http://www.usaspending.gov/datafeeds/2011_All_Insurance_Full_20131015.csv.zip 40 | http://www.usaspending.gov/datafeeds/2010_All_Insurance_Full_20131015.csv.zip 41 | http://www.usaspending.gov/datafeeds/2009_All_Insurance_Full_20131015.csv.zip 42 | http://www.usaspending.gov/datafeeds/2008_All_Insurance_Full_20131015.csv.zip 43 | http://www.usaspending.gov/datafeeds/2007_All_Insurance_Full_20131015.csv.zip 44 | http://www.usaspending.gov/datafeeds/2006_All_Insurance_Full_20131015.csv.zip 45 | http://www.usaspending.gov/datafeeds/2005_All_Insurance_Full_20131015.csv.zip 46 | http://www.usaspending.gov/datafeeds/2004_All_Insurance_Full_20131015.csv.zip 47 | http://www.usaspending.gov/datafeeds/2003_All_Insurance_Full_20131015.csv.zip 48 | http://www.usaspending.gov/datafeeds/2002_All_Insurance_Full_20130715.csv.zip 49 | http://www.usaspending.gov/datafeeds/2001_All_Insurance_Full_20130715.csv.zip 50 | http://www.usaspending.gov/datafeeds/2000_All_Insurance_Full_20130715.csv.zip 51 | http://www.usaspending.gov/datafeeds/2014_All_Contracts_Full_20131015.csv.zip 52 | http://www.usaspending.gov/datafeeds/2013_All_Contracts_Full_20131015.csv.zip 53 | http://www.usaspending.gov/datafeeds/2012_All_Contracts_Full_20131015.csv.zip 54 | http://www.usaspending.gov/datafeeds/2011_All_Contracts_Full_20131015.csv.zip 55 | http://www.usaspending.gov/datafeeds/2010_All_Contracts_Full_20131015.csv.zip 56 | http://www.usaspending.gov/datafeeds/2009_All_Contracts_Full_20131015.csv.zip 57 | http://www.usaspending.gov/datafeeds/2008_All_Contracts_Full_20131015.csv.zip 58 | http://www.usaspending.gov/datafeeds/2007_All_Contracts_Full_20131015.csv.zip 59 | http://www.usaspending.gov/datafeeds/2006_All_Contracts_Full_20131015.csv.zip 60 | http://www.usaspending.gov/datafeeds/2005_All_Contracts_Full_20131015.csv.zip 61 | http://www.usaspending.gov/datafeeds/2004_All_Contracts_Full_20131015.csv.zip 62 | http://www.usaspending.gov/datafeeds/2003_All_Contracts_Full_20131015.csv.zip 63 | http://www.usaspending.gov/datafeeds/2002_All_Contracts_Full_20130715.csv.zip 64 | http://www.usaspending.gov/datafeeds/2001_All_Contracts_Full_20130715.csv.zip 65 | http://www.usaspending.gov/datafeeds/2000_All_Contracts_Full_20130715.csv.zip 66 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/base/usaspending_importer.py: -------------------------------------------------------------------------------- 1 | from federal_spending.usaspending.management.base.importer import BaseImporter 2 | from django.db.models.fields import CharField 3 | from federal_spending.usaspending.utils.ucsv import UnicodeDictReader, UnicodeWriter 4 | from django.conf import settings 5 | import os.path 6 | import re 7 | 8 | 9 | class BaseUSASpendingConverter(BaseImporter): 10 | """ 11 | This performs the conversion from raw USASpending files to a format 12 | acceptable to COPY into PostgreSQL. 13 | 14 | Note: to use the specified data dirs (see below), the directory with the 15 | most current set of files should be symlinked to "latest". 16 | """ 17 | 18 | IN_DIR = settings.CSV_PATH + 'datafeeds' 19 | DONE_DIR = settings.CSV_PATH + 'done' 20 | REJECTED_DIR = settings.CSV_PATH + 'rejected' 21 | OUT_DIR = settings.CSV_PATH + 'out' 22 | FILE_PATTERN = '*_All_*.csv' # bash-style, ala '*.sql' 23 | 24 | email_subject = 'Unhappy USASpending App' 25 | 26 | def __init__(self): 27 | super(BaseUSASpendingConverter, self).__init__() 28 | if not self.FILE_PATTERN: 29 | raise NotImplementedError("Child classes must specify a FILE_PATTERN") 30 | 31 | def do_for_file(self, file_path): 32 | # Since all the files for both contracts and grants importers start out 33 | # in the same directory, we make the file pattern permissive and do an extra 34 | # check in each separate importer 35 | 36 | if not self.file_is_right_type(file_path): 37 | self.log.info("Doesn't match file pattern for this importer. Skipping.") 38 | return 39 | 40 | self.log.info("Starting...") 41 | 42 | outfile_name = '{0}_{1}.csv'.format(self.outfile_basename, self.get_year_from_file_path(file_path)) 43 | outfile_path = os.path.join(self.OUT_DIR, outfile_name) 44 | 45 | self.parse_file(file_path, outfile_path, self.module.FIELDS, self.get_string_fields(), self.module.CALCULATED_FIELDS) 46 | 47 | #self.archive_file(file_path, True) 48 | 49 | self.log.info("Done.") 50 | 51 | def outfile_path(self, infile): 52 | outfile = '{0}_{1}.csv'.format(self.outfile_basename, self.get_year_from_file_path(infile)) 53 | return os.path.join(self.OUT_DIR, outfile) 54 | 55 | def file_is_right_type(self, file_): 56 | raise NotImplementedError("file_is_right_type() must be defined in the child class") 57 | 58 | def parse_file(self, input_, output, fields, string_lengths, calculated_fields=None): 59 | reader = UnicodeDictReader(open(input_, 'r')) 60 | writer = UnicodeWriter(open(output, 'a'), delimiter='|') 61 | 62 | def null_transform(value): 63 | return value 64 | 65 | line_num = 0 66 | 67 | for line in reader: 68 | insert_fields = [] 69 | 70 | for field in fields: 71 | fieldname = field[0] 72 | transform = field[1] or null_transform 73 | 74 | try: 75 | value = transform(line[fieldname]) 76 | except KeyError, e: 77 | self.log.fatal("Key {} was found in our model but not in the file".format(fieldname)) 78 | raise e 79 | except Exception, e: 80 | value = None 81 | self.log.error(u'|'.join([fieldname, line[fieldname], e.message, str(line_num)])) 82 | 83 | insert_fields.append(self.filter_non_values(fieldname, value, string_lengths)) 84 | 85 | if calculated_fields: 86 | for field in calculated_fields: 87 | fieldname, built_on_field, transform = field 88 | 89 | try: 90 | if built_on_field: 91 | value = transform(line[built_on_field]) 92 | else: 93 | value = transform() 94 | except Exception, e: 95 | value = None 96 | self.log.error(u'|'.join([fieldname, line.get(built_on_field, ''), e.message])) 97 | 98 | insert_fields.append(self.filter_non_values(fieldname, value, string_lengths)) 99 | 100 | writer.writerow(insert_fields) 101 | line_num += 1 102 | 103 | def filter_non_values(self, field, value, string_lengths): 104 | # indicates that field should be treated as a CharField 105 | if field in string_lengths: 106 | if not value or value in ('(none)', 'NULL'): 107 | return '' 108 | 109 | if not (isinstance(value, unicode) or isinstance(value, str)): 110 | self.log.warn(u"value '{}' for field '{}' is not a unicode object or string.".format(value, field)) 111 | value = str(value) 112 | 113 | value = value.strip() 114 | 115 | if len(value) > string_lengths[field]: 116 | self.log.debug(u"value '{}' for field '{}' is too long.".format(value, field)) 117 | 118 | value = value[:string_lengths[field]] 119 | 120 | return value 121 | 122 | else: 123 | 124 | if value is None: 125 | return "NULL" 126 | 127 | return value 128 | 129 | def get_string_fields(self): 130 | return dict([(f.name, f.max_length) for f in self.modelclass._meta.fields if isinstance(f, CharField)]) 131 | 132 | def get_year_from_file_path(self, file_path): 133 | return re.search(r'(?P\d{4})_.*\.csv', file_path).group('year') -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/usaspending/converter.py: -------------------------------------------------------------------------------- 1 | from federal_spending.usaspending.models import Contract 2 | from federal_spending.usaspending.grants.models import Grant 3 | from federal_spending.usaspending.management.base.importer import BaseImporter 4 | from django.db.models.fields import CharField 5 | import csv 6 | import faads 7 | import fpds 8 | import os 9 | import os.path 10 | import re 11 | import sys 12 | import logging 13 | 14 | 15 | CONTRACT_STRINGS = dict([(f.name, f.max_length) for f in Contract._meta.fields if isinstance(f, CharField)]) 16 | GRANT_STRINGS = dict([(f.name, f.max_length) for f in Grant._meta.fields if isinstance(f, CharField)]) 17 | 18 | 19 | class USASpendingDenormalizer(BaseImporter): 20 | re_contracts = re.compile('.*[cC]ontracts.*.') 21 | 22 | def __init__(self, logger=None): 23 | self.log = logger or self.set_up_logger() 24 | 25 | 26 | def set_up_logger(self): 27 | # create logger 28 | self.log = logging.getLogger("command") 29 | self.log.setLevel(logging.DEBUG) 30 | # create console handler and set level to debug 31 | ch = logging.StreamHandler() 32 | ch.setLevel(logging.DEBUG) 33 | # create formatter 34 | formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") 35 | ch.setFormatter(formatter) 36 | self.log.addHandler(ch) 37 | 38 | 39 | def parse_file(self, input, output, fields, string_lengths, calculated_fields=None): 40 | reader = csv.DictReader(input) 41 | writer = csv.writer(output, delimiter='|') 42 | 43 | def null_transform(value): 44 | return value 45 | 46 | for line in reader: 47 | insert_fields = [] 48 | 49 | for field in fields: 50 | fieldname = field[0] 51 | transform = field[1] or null_transform 52 | 53 | try: 54 | value = transform(line[fieldname]) 55 | except Exception, e: 56 | value = None 57 | print >> sys.stderr, '|'.join([fieldname, line[fieldname],e.message]) 58 | 59 | insert_fields.append(self.filter_non_values(fieldname, value, string_lengths)) 60 | 61 | if calculated_fields: 62 | for field in calculated_fields: 63 | fieldname, built_on_field, transform = field 64 | 65 | try: 66 | if built_on_field: 67 | value = transform(line[built_on_field]) 68 | else: 69 | value = transform() 70 | except Exception, e: 71 | value = None 72 | print >> sys.stderr, '|'.join([fieldname, line.get(built_on_field, ''), e.message]) 73 | 74 | insert_fields.append(self.filter_non_values(fieldname, value, string_lengths)) 75 | 76 | writer.writerow(insert_fields) 77 | 78 | 79 | def filter_non_values(self, field, value, string_lengths): 80 | # indicates that field should be treated as a CharField 81 | if field in string_lengths: 82 | if not value or value in ('(none)', 'NULL'): 83 | return '' 84 | 85 | if not isinstance(value, str): 86 | print >> sys.stderr, "Warning: value '%s' for field '%s' is not a string." % (value, field) 87 | value = str(value) 88 | 89 | # need value as unicode in order to compute proper length 90 | value = value.decode('utf8') 91 | 92 | value = value.strip() 93 | 94 | if len(value) > string_lengths[field]: 95 | print >> sys.stderr, "Warning: value '%s' for field '%s' is too long." % (value, field) 96 | 97 | value = value[:string_lengths[field]] 98 | 99 | # but need value back as string in order to write to file 100 | value = value.encode('utf8') 101 | 102 | return value 103 | 104 | else: 105 | 106 | if value == None: 107 | return "NULL" 108 | 109 | return value 110 | 111 | 112 | def parse_directory(self, in_path, out_path): 113 | if not out_path: 114 | out_path = os.path.join(os.path.abspath(in_path), 'out') 115 | self.log.info("Out path wasn't set. Setting it to {0}".format(out_path)) 116 | 117 | if not os.path.exists(out_path): 118 | os.mkdir(out_path) 119 | self.log.info("Out path didn't exist. Creating {0}".format(out_path)) 120 | 121 | out_grants = open(os.path.join(out_path, 'grants.out'), 'w') 122 | out_contracts = open(os.path.join(out_path, 'contracts.out'), 'w') 123 | 124 | self.log.info("Looking for input files...") 125 | for file in os.listdir(in_path): 126 | file_path = os.path.join(in_path, file) 127 | self.log.info(" Found {0}".format(file_path)) 128 | 129 | if os.path.isfile(file_path): 130 | input = open(file_path, 'rb') 131 | 132 | self.log.info(" Converting {0}...".format(file_path)) 133 | 134 | if self.re_contracts.match(file): 135 | self.parse_file(input, out_contracts, fpds.FIELDS, CONTRACT_STRINGS, fpds.CALCULATED_FIELDS) 136 | else: 137 | self.parse_file(input, out_grants, faads.FIELDS, GRANT_STRINGS, faads.CALCULATED_FIELDS) 138 | 139 | input.close() 140 | 141 | out_grants.close() 142 | out_contracts.close() 143 | 144 | self.log.info("Done with input files.") 145 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/create_indexes.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import CommandError, BaseCommand 2 | from django.db import connections, transaction 3 | from django.conf import settings 4 | from federal_spending.usaspending.scripts.usaspending.config import INDEX_COLS_BY_TABLE 5 | 6 | 7 | contracts_idx = [ 8 | "drop index if exists usaspending_contract_agency_name_ft;", 9 | "drop index if exists usaspending_contract_contracting_agency_name_ft;", 10 | "drop index if exists usaspending_contract_requesting_agency_name_ft;", 11 | "drop index if exists usaspending_contract_vendor_city_ft;", 12 | "drop index if exists usaspending_contract_vendor_name_ft;", 13 | "drop index if exists usaspending_contract_piid;", 14 | "drop index if exists usaspending_contract_congressionaldistrict;", 15 | "drop index if exists usaspending_contract_signeddate;", 16 | "drop index if exists usaspending_contract_dunsnumber;", 17 | "drop index if exists usaspending_contract_defaultsort;", 18 | "drop index if exists usaspending_contract_fiscal_year;", 19 | "drop index if exists usaspending_contract_unique_transaction_id;", 20 | "create index usaspending_contract_dunsnumber on usaspending_contract (dunsnumber);", 21 | "create index usaspending_contract_signeddate on usaspending_contract (signeddate);", 22 | "create index usaspending_contract_congressionaldistrict on usaspending_contract (statecode, congressionaldistrict);", 23 | "create index usaspending_contract_agency_name_ft on usaspending_contract using gin(to_tsvector('federal_spending', agency_name));", 24 | "create index usaspending_contract_contracting_agency_name_ft on usaspending_contract using gin(to_tsvector('federal_spending', contracting_agency_name));", 25 | "create index usaspending_contract_requesting_agency_name_ft on usaspending_contract using gin(to_tsvector('federal_spending', requesting_agency_name));", 26 | "create index usaspending_contract_vendor_city_ft on usaspending_contract using gin(to_tsvector('federal_spending', city));", 27 | "create index usaspending_contract_vendor_name_ft on usaspending_contract using gin(to_tsvector('federal_spending', vendorname));", 28 | "create index usaspending_contract_defaultsort on usaspending_contract (fiscal_year desc, obligatedamount desc);", 29 | "create index usaspending_contract_piid on usaspending_contract (piid);", 30 | "create index usaspending_contract_fiscal_year on usaspending_contract (fiscal_year);", 31 | "create index usaspending_contract_unique_transaction_id on usaspending_contract (unique_transaction_id);", 32 | "commit;" 33 | ] 34 | 35 | grants_idx = [ 36 | "drop index if exists usaspending_grant_agency_name_ft;", 37 | "drop index if exists usaspending_grant_recipient_name_ft;", 38 | "drop index if exists usaspending_grant_total_funding_amount;", 39 | "drop index if exists usaspending_grant_unique_transaction_id;", 40 | "drop index if exists usaspending_grant_fiscal_year;", 41 | "create index usaspending_grant_agency_name_ft on usaspending_grant using gin(to_tsvector('federal_spending', agency_name));", 42 | "create index usaspending_grant_recipient_name_ft on usaspending_grant using gin(to_tsvector('federal_spending', recipient_name));", 43 | "create index usaspending_grant_total_funding_amount on usaspending_grant (total_funding_amount);", 44 | "create index usaspending_grant_unique_transaction_id on usaspending_grant (unique_transaction_id);", 45 | "create index usaspending_grant_fiscal_year on usaspending_grant (fiscal_year);", 46 | "commit;" 47 | 48 | ] 49 | 50 | 51 | 52 | class Command(BaseCommand): 53 | 54 | @transaction.commit_on_success 55 | def handle(self, *args, **kwargs): 56 | """ 57 | Takes a relation name and a fiscal year and creates a partition for it. 58 | Current relation names for spending data are: 59 | usaspending_contract 60 | usaspending_grant 61 | """ 62 | grants_base = 'usaspending_grant' 63 | contracts_base = 'usaspending_contract' 64 | c = connections['default'].cursor() 65 | 66 | for fy in settings.FISCAL_YEARS: 67 | self.create_partition_indexes(c, contracts_base, "{0}_{1}".format(contracts_base, fy)) 68 | self.create_partition_indexes(c, grants_base, "{0}_{1}".format(grants_base, fy)) 69 | 70 | print "creating overall contract indexes" 71 | for statement in contracts_idx: 72 | print "executing '{0}'".format(statement) 73 | c.execute(statement) 74 | print "creating overall grant indexes" 75 | for statement in grants_idx: 76 | print "executing '{0}'".format(statement) 77 | c.execute(statement) 78 | 79 | def create_partition_indexes(self, c, base_table, partition_name): 80 | 81 | for i, colname in enumerate(INDEX_COLS_BY_TABLE[base_table]): 82 | 83 | del_stmt = 'drop index if exists {0}_{1}; commit;'.format(partition_name, i) 84 | 85 | if 'using' in colname or '(' in colname: 86 | idx_stmt = 'create index {0} on {1} {2}; commit;'.format( 87 | partition_name + '_{0}'.format(i), 88 | partition_name, 89 | colname 90 | ) 91 | else: 92 | idx_stmt = 'create index {0} on {1} ({2}); commit;'.format( 93 | partition_name + '_{0}'.format(i), 94 | partition_name, 95 | colname 96 | ) 97 | c.execute(del_stmt) 98 | c.execute(idx_stmt) 99 | 100 | #create overall indexes 101 | -------------------------------------------------------------------------------- /federal_spending/settings.py: -------------------------------------------------------------------------------- 1 | # Django settings for retinaburner project. 2 | import os.path 3 | 4 | DEBUG = True 5 | TEMPLATE_DEBUG = DEBUG 6 | PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__)) 7 | 8 | ADMINS = ( 9 | ('Kaitlin Devine', 'kdevine@sunlightfoundation.com'), 10 | ) 11 | MANAGERS = ADMINS 12 | 13 | DATABASES = { 14 | 'default': { 15 | 'ENGINE': 'django.db.backends.', # Add 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'oracle'. 16 | 'NAME': '', # Or path to database file if using sqlite3. 17 | # The following settings are not used with sqlite3: 18 | 'USER': '', 19 | 'PASSWORD': '', 20 | 'HOST': '', # Empty for localhost through domain sockets or '127.0.0.1' for localhost through TCP. 21 | 'PORT': '', # Set to empty string for default. 22 | } 23 | } 24 | 25 | # Hosts/domain names that are valid for this site; required if DEBUG is False 26 | # See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts 27 | ALLOWED_HOSTS = [] 28 | 29 | # Local time zone for this installation. Choices can be found here: 30 | # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name 31 | # although not all choices may be available on all operating systems. 32 | # In a Windows environment this must be set to your system time zone. 33 | TIME_ZONE = 'America/Chicago' 34 | 35 | # Language code for this installation. All choices can be found here: 36 | # http://www.i18nguy.com/unicode/language-identifiers.html 37 | LANGUAGE_CODE = 'en-us' 38 | 39 | SITE_ID = 1 40 | 41 | # If you set this to False, Django will make some optimizations so as not 42 | # to load the internationalization machinery. 43 | USE_I18N = True 44 | 45 | # If you set this to False, Django will not format dates, numbers and 46 | # calendars according to the current locale. 47 | USE_L10N = True 48 | 49 | # If you set this to False, Django will not use timezone-aware datetimes. 50 | USE_TZ = True 51 | 52 | # Absolute filesystem path to the directory that will hold user-uploaded files. 53 | # Example: "/var/www/example.com/media/" 54 | MEDIA_ROOT = '' 55 | 56 | # URL that handles the media served from MEDIA_ROOT. Make sure to use a 57 | # trailing slash. 58 | # Examples: "http://example.com/media/", "http://media.example.com/" 59 | MEDIA_URL = '' 60 | 61 | # Absolute path to the directory static files should be collected to. 62 | # Don't put anything in this directory yourself; store your static files 63 | # in apps' "static/" subdirectories and in STATICFILES_DIRS. 64 | # Example: "/var/www/example.com/static/" 65 | STATIC_ROOT = '' 66 | 67 | # URL prefix for static files. 68 | # Example: "http://example.com/static/", "http://static.example.com/" 69 | STATIC_URL = '/static/' 70 | 71 | # Additional locations of static files 72 | STATICFILES_DIRS = ( 73 | # Put strings here, like "/home/html/static" or "C:/www/django/static". 74 | # Always use forward slashes, even on Windows. 75 | # Don't forget to use absolute paths, not relative paths. 76 | ) 77 | 78 | # List of finder classes that know how to find static files in 79 | # various locations. 80 | STATICFILES_FINDERS = ( 81 | 'django.contrib.staticfiles.finders.FileSystemFinder', 82 | 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 83 | # 'django.contrib.staticfiles.finders.DefaultStorageFinder', 84 | ) 85 | 86 | # Make this unique, and don't share it with anybody. 87 | SECRET_KEY = 'd-%xtxi759=renuz$l@@pav@+-_fqm+=j7wcmnk_z@bc&j8pzk' 88 | 89 | # List of callables that know how to import templates from various sources. 90 | TEMPLATE_LOADERS = ( 91 | 'django.template.loaders.filesystem.Loader', 92 | 'django.template.loaders.app_directories.Loader', 93 | # 'django.template.loaders.eggs.Loader', 94 | ) 95 | 96 | MIDDLEWARE_CLASSES = ( 97 | 'django.middleware.common.CommonMiddleware', 98 | 'django.contrib.sessions.middleware.SessionMiddleware', 99 | 'django.middleware.csrf.CsrfViewMiddleware', 100 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 101 | 'django.contrib.messages.middleware.MessageMiddleware', 102 | # Uncomment the next line for simple clickjacking protection: 103 | # 'django.middleware.clickjacking.XFrameOptionsMiddleware', 104 | ) 105 | 106 | ROOT_URLCONF = 'federal_spending.urls' 107 | # Python dotted path to the WSGI application used by Django's runserver. 108 | WSGI_APPLICATION = 'federal_spending.wsgi.application' 109 | 110 | TEMPLATE_DIRS = ( 111 | # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". 112 | # Always use forward slashes, even on Windows. 113 | # Don't forget to use absolute paths, not relative paths. 114 | ) 115 | 116 | INSTALLED_APPS = ( 117 | 'django.contrib.auth', 118 | 'django.contrib.contenttypes', 119 | 'django.contrib.sessions', 120 | 'django.contrib.sites', 121 | 'django.contrib.messages', 122 | 'django.contrib.staticfiles', 123 | 'federal_spending.usaspending', 124 | 'federal_spending.fbo', 125 | # Uncomment the next line to enable the admin: 126 | # 'django.contrib.admin', 127 | # Uncomment the next line to enable admin documentation: 128 | # 'django.contrib.admindocs', 129 | ) 130 | 131 | SESSION_SERIALIZER = 'django.contrib.sessions.serializers.JSONSerializer' 132 | 133 | # A sample logging configuration. The only tangible logging 134 | # performed by this configuration is to send an email to 135 | # the site admins on every HTTP 500 error when DEBUG=False. 136 | # See http://docs.djangoproject.com/en/dev/topics/logging for 137 | # more details on how to customize your logging configuration. 138 | LOGGING = { 139 | 'version': 1, 140 | 'disable_existing_loggers': False, 141 | 'filters': { 142 | 'require_debug_false': { 143 | '()': 'django.utils.log.RequireDebugFalse' 144 | } 145 | }, 146 | 'handlers': { 147 | 'mail_admins': { 148 | 'level': 'ERROR', 149 | 'filters': ['require_debug_false'], 150 | 'class': 'django.utils.log.AdminEmailHandler' 151 | } 152 | }, 153 | 'loggers': { 154 | 'django.request': { 155 | 'handlers': ['mail_admins'], 156 | 'level': 'ERROR', 157 | 'propagate': True, 158 | }, 159 | } 160 | } 161 | 162 | TMP_DIRECTORY = PROJECT_ROOT + '/tmp' 163 | CSV_PATH = PROJECT_ROOT + '/usaspending/downloads/csvs/' 164 | LOGGING_DIRECTORY = PROJECT_ROOT + '/usaspending/logs' 165 | FISCAL_YEARS = [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014] 166 | UPDATE_YEARS = [2012, 2013, 2014] 167 | 168 | from local_settings import * -------------------------------------------------------------------------------- /federal_spending/usaspending/scripts/usaspending/fpds.py: -------------------------------------------------------------------------------- 1 | from helpers import nullable_float, splitCode, transformFlag, nullable, \ 2 | nullable_int, agency_name_lookup, recovery_act, datestamp, \ 3 | first_char 4 | 5 | from federal_spending.usaspending.models import Contract 6 | 7 | FIELDS = [ 8 | ('unique_transaction_id', None), 9 | ('transaction_status', None), 10 | ('obligatedamount', nullable_float), 11 | ('baseandexercisedoptionsvalue', nullable_float), 12 | ('baseandalloptionsvalue', nullable_float), 13 | ('maj_agency_cat', lambda x: splitCode(x)[:Contract._meta.get_field('maj_agency_cat').max_length]), 14 | ('mod_agency', splitCode), 15 | ('maj_fund_agency_cat', lambda x: splitCode(x)[:Contract._meta.get_field('maj_fund_agency_cat').max_length]), 16 | ('contractingofficeagencyid', splitCode), 17 | ('contractingofficeid', splitCode), 18 | ('fundingrequestingagencyid', splitCode), 19 | ('fundingrequestingofficeid', splitCode), 20 | ('fundedbyforeignentity', splitCode), 21 | ('signeddate', nullable), 22 | ('effectivedate', nullable), 23 | ('currentcompletiondate', nullable), 24 | ('ultimatecompletiondate', nullable), 25 | ('lastdatetoorder', None), 26 | ('contractactiontype', lambda x: x.split()[0].strip()), 27 | ('reasonformodification', splitCode), 28 | ('typeofcontractpricing', splitCode), 29 | ('priceevaluationpercentdifference', None), 30 | ('subcontractplan', splitCode), 31 | ('lettercontract', splitCode), 32 | ('multiyearcontract', transformFlag), 33 | ('performancebasedservicecontract', splitCode), 34 | ('majorprogramcode', None), 35 | ('contingencyhumanitarianpeacekeepingoperation', splitCode), 36 | ('contractfinancing', splitCode), 37 | ('costorpricingdata', splitCode), 38 | ('costaccountingstandardsclause', splitCode), 39 | ('descriptionofcontractrequirement', None), 40 | ('purchasecardaspaymentmethod', transformFlag), 41 | ('numberofactions', nullable_int), 42 | ('nationalinterestactioncode', splitCode), 43 | ('progsourceagency', splitCode), 44 | ('progsourceaccount', splitCode), 45 | ('progsourcesubacct', splitCode), 46 | ('account_title', None), 47 | ('rec_flag', recovery_act), 48 | ('typeofidc', None), 49 | ('multipleorsingleawardidc', splitCode), 50 | ('programacronym', None), 51 | ('vendorname', None), 52 | ('vendoralternatename', None), 53 | ('vendorlegalorganizationname', None), 54 | ('vendordoingasbusinessname', None), 55 | ('divisionname', None), 56 | ('divisionnumberorofficecode', None), 57 | ('vendorenabled', None), 58 | ('vendorlocationdisableflag', transformFlag), 59 | ('ccrexception', None), 60 | ('streetaddress', None), 61 | ('streetaddress2', None), 62 | ('streetaddress3', None), 63 | ('city', None), 64 | ('state', splitCode), 65 | ('zipcode', None), 66 | ('vendorcountrycode', None), 67 | ('vendor_state_code', None), 68 | ('vendor_cd', splitCode), 69 | ('congressionaldistrict', None), 70 | ('vendorsitecode', None), 71 | ('vendoralternatesitecode', None), 72 | ('dunsnumber', None), 73 | ('parentdunsnumber', None), 74 | ('phoneno', None), 75 | ('faxno', None), 76 | ('registrationdate', nullable), 77 | ('renewaldate', nullable), 78 | ('mod_parent', None), 79 | ('locationcode', None), 80 | ('statecode', splitCode), 81 | ('pop_state_code', splitCode), 82 | ('placeofperformancecountrycode', splitCode), 83 | ('placeofperformancezipcode', None), 84 | ('pop_cd', splitCode), 85 | ('placeofperformancecongressionaldistrict', None), 86 | ('psc_cat', splitCode), 87 | ('productorservicecode', splitCode), 88 | ('systemequipmentcode', splitCode), 89 | ('claimantprogramcode', splitCode), 90 | ('principalnaicscode', splitCode), 91 | ('informationtechnologycommercialitemcategory', splitCode), 92 | ('gfe_gfp', transformFlag), 93 | ('useofepadesignatedproducts', splitCode), 94 | ('recoveredmaterialclauses', splitCode), 95 | ('seatransportation', first_char), 96 | ('contractbundling', splitCode), 97 | ('consolidatedcontract', transformFlag), 98 | ('countryoforigin', splitCode), 99 | ('placeofmanufacture', splitCode), 100 | ('manufacturingorganizationtype', splitCode), 101 | ('agencyid', splitCode), 102 | ('piid', None), 103 | ('modnumber', None), 104 | ('transactionnumber', None), 105 | ('fiscal_year', nullable_int), 106 | ('idvagencyid', None), 107 | ('idvpiid', None), 108 | ('idvmodificationnumber', None), 109 | ('solicitationid', None), 110 | ('extentcompeted', splitCode), 111 | ('reasonnotcompeted', splitCode), 112 | ('numberofoffersreceived', nullable_int), 113 | ('commercialitemacquisitionprocedures', splitCode), 114 | ('commercialitemtestprogram', transformFlag), 115 | ('smallbusinesscompetitivenessdemonstrationprogram', transformFlag), 116 | ('a76action', transformFlag), 117 | ('competitiveprocedures', splitCode), 118 | ('solicitationprocedures', splitCode), 119 | ('typeofsetaside', splitCode), 120 | ('localareasetaside', None), 121 | ('evaluatedpreference', splitCode), 122 | ('fedbizopps', None), 123 | ('research', splitCode), 124 | ('statutoryexceptiontofairopportunity', splitCode), 125 | ('organizationaltype', None), 126 | ('numberofemployees', nullable_int), 127 | ('annualrevenue', nullable_float), 128 | ('firm8aflag', transformFlag), 129 | ('hubzoneflag', transformFlag), 130 | ('sdbflag', transformFlag), 131 | ('shelteredworkshopflag', transformFlag), 132 | ('hbcuflag', transformFlag), 133 | ('educationalinstitutionflag', transformFlag), 134 | ('womenownedflag', transformFlag), 135 | ('veteranownedflag', transformFlag), 136 | ('srdvobflag', transformFlag), 137 | ('localgovernmentflag', transformFlag), 138 | ('minorityinstitutionflag', transformFlag), 139 | ('aiobflag', splitCode), 140 | ('stategovernmentflag', transformFlag), 141 | ('federalgovernmentflag', transformFlag), 142 | ('minorityownedbusinessflag', transformFlag), 143 | ('apaobflag', transformFlag), 144 | ('tribalgovernmentflag', transformFlag), 145 | ('baobflag', transformFlag), 146 | ('naobflag', transformFlag), 147 | ('saaobflag', transformFlag), 148 | ('nonprofitorganizationflag', transformFlag), 149 | ('haobflag', transformFlag), 150 | ('emergingsmallbusinessflag', transformFlag), 151 | ('hospitalflag', transformFlag), 152 | ('contractingofficerbusinesssizedetermination', splitCode), 153 | ('receivescontracts', splitCode), 154 | ('receivesgrants', splitCode), 155 | ('receivescontractsandgrants', splitCode), 156 | ('walshhealyact', transformFlag), 157 | ('servicecontractact', transformFlag), 158 | ('davisbaconact', transformFlag), 159 | ('clingercohenact', transformFlag), 160 | ('otherstatutoryauthority', None), 161 | ('interagencycontractingauthority', splitCode), 162 | ('isserviceprovider', transformFlag), 163 | ] 164 | 165 | CALCULATED_FIELDS = [ 166 | ('agency_name', 'agencyid', agency_name_lookup), 167 | ('contracting_agency_name', 'contractingofficeagencyid', agency_name_lookup), 168 | ('requesting_agency_name', 'fundingrequestingagencyid', agency_name_lookup), 169 | ('imported_on', None, datestamp), 170 | ] 171 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/base/importer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import fnmatch 4 | import time 5 | import datetime 6 | 7 | from federal_spending.usaspending.utils.log import set_up_logger 8 | from django.core.management.base import BaseCommand, CommandError 9 | from optparse import make_option 10 | from django.conf import settings 11 | 12 | 13 | class BaseImporter(BaseCommand): 14 | 15 | IN_DIR = None # '/home/datacommons/data/auto/nimsp/raw/IN' 16 | DONE_DIR = None # '/home/datacommons/data/auto/nimsp/raw/DONE' 17 | REJECTED_DIR = None # '/home/datacommons/data/auto/nimsp/raw/REJECTED' 18 | OUT_DIR = None # '/home/datacommons/data/auto/nimsp/denormalized/IN' 19 | FILE_PATTERN = None # bash-style, ala '*.sql' 20 | 21 | email_subject = 'Unhappy Loading App' 22 | email_recipients = settings.LOGGING_EMAIL['recipients'] 23 | 24 | # initializing this here so that tests which don't call handle() don't fail 25 | dry_run = None 26 | 27 | option_list = BaseCommand.option_list + ( 28 | make_option('--dry-run', '-d', 29 | action='store_true', 30 | dest='dry_run', 31 | default=False, 32 | help='Do a test run of the command, only printing what would be done.' 33 | ), 34 | ) 35 | 36 | 37 | def __init__(self): 38 | super(BaseImporter, self).__init__() 39 | self.class_name = self.__class__.__name__ 40 | self.module_name = self.__module__.split('.')[-1] 41 | self.log_path = settings.LOGGING_DIRECTORY 42 | self.log = set_up_logger(self.module_name, self.log_path, self.email_subject, email_recipients=self.email_recipients) 43 | self.pid_file_path = os.path.join(settings.TMP_DIRECTORY, self.module_name) 44 | 45 | 46 | def handle(self, *args, **options): 47 | """ 48 | Will run the do_for_file operation from a subclass on every 49 | eligible file found in the IN_DIR, or will log what it would 50 | do if the dry_run option is specified. 51 | """ 52 | 53 | self.die_if_already_running() 54 | self.set_pid_file() 55 | 56 | self.log.info('Starting {0}'.format(self.class_name)) 57 | 58 | self.dry_run = options['dry_run'] 59 | 60 | if not self.dry_run: 61 | self.do_first() 62 | 63 | file_func = self.dry_run_for_file if self.dry_run else self.do_for_file 64 | 65 | self.main_loop(file_func) 66 | 67 | self.destroy_pid_file() 68 | 69 | self.log.info('Finished.') 70 | 71 | 72 | def main_loop(self, file_func): 73 | for file_path in self.find_eligible_files(): 74 | if not os.path.exists(file_path): 75 | continue 76 | else: 77 | try: 78 | file_func(file_path) 79 | except: 80 | self.log.exception("Unexpected error:") 81 | self.reject_file(file_path) 82 | break 83 | 84 | 85 | # define this in the derived classes 86 | def do_for_file(self, file_path): 87 | """ 88 | The meat of the operation happens here. 89 | 90 | Takes the input file basename and its location/path as arguments. 91 | """ 92 | pass 93 | 94 | 95 | # define this in the derived classes 96 | def dry_run_for_file(self, file, file_path): 97 | pass 98 | 99 | 100 | # define this (only if necessary) in the derived classes 101 | def do_first(self): 102 | pass 103 | 104 | 105 | def find_eligible_files(self): 106 | """ 107 | Goes through the IN_DIR and finds files matching the FILE_PATTERN to act on 108 | """ 109 | files = os.listdir(self.IN_DIR) 110 | 111 | if len(files) > 0: 112 | for file in files: 113 | file_path = os.path.join(self.IN_DIR, file) 114 | self.log.info('Found file {0}'.format(file)) 115 | if fnmatch.fnmatch(file, self.FILE_PATTERN): 116 | if self.file_has_not_been_written_to_for_over_a_minute(file_path): 117 | yield file_path 118 | else: 119 | self.log.info('File last modified time is too recent. Skipping.') 120 | else: 121 | self.log.warning('{0} doesn\'t match the file pattern ({1}). Rejecting.'.format(file, self.FILE_PATTERN)) 122 | self.reject_file(file) 123 | else: 124 | self.log.info('No files found.') 125 | 126 | 127 | def reject_file(self, path): 128 | if not self.dry_run: 129 | name = os.path.basename(path) 130 | os.rename(os.path.join(self.IN_DIR, name), os.path.join(self.REJECTED_DIR, name)) 131 | 132 | 133 | def archive_file(self, path, timestamp=False): 134 | if not self.dry_run: 135 | 136 | name = os.path.basename(path) 137 | new_name = name 138 | 139 | if timestamp: 140 | new_name = '_'.join([datetime.datetime.now().strftime('%Y%m%d_%H%M'), name]) 141 | 142 | # make sure all paths exist 143 | 144 | if not os.path.exists(self.DONE_DIR): 145 | raise CommandError("Tried to archive file, but DONE directory doesn't exist: {0}".format(os.path.abspath(self.DONE_DIR))) 146 | 147 | old_path = os.path.join(self.IN_DIR, name) 148 | if not os.path.exists(old_path): 149 | raise CommandError("The old file path doesn't exist: {0}".format(old_path)) 150 | 151 | # save this as a courtesy for tests, since they need to move the archived (timestampped) file back 152 | self.archived_file_path = os.path.join(self.DONE_DIR, new_name) 153 | 154 | os.rename(os.path.join(self.IN_DIR, name), self.archived_file_path) 155 | 156 | 157 | def die_if_already_running(self): 158 | """ 159 | Make sure this script is not already running in another process. 160 | """ 161 | #if os.path.exists(self.pid_file_path): 162 | # raise CommandError("This script is already running in a separate process. (Check {0})".format(self.pid_file_path)) 163 | pass 164 | 165 | def set_pid_file(self): 166 | fh = open(self.pid_file_path, 'w') 167 | fh.write(str(os.getpid())) 168 | fh.close() 169 | 170 | 171 | def destroy_pid_file(self): 172 | os.remove(self.pid_file_path) 173 | 174 | 175 | def file_has_not_been_written_to_for_over_a_minute(self, file_path): 176 | """ 177 | Make sure the file has downloaded completely 178 | """ 179 | now_epoch = time.time() 180 | last_modified_epoch = os.path.getmtime(file_path) 181 | return now_epoch - last_modified_epoch > 20 182 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | US Federal Spending Data 2 | ================================================= 3 | 4 | Intro 5 | --------- 6 | 7 | This is a WIP codebase dedicated to downloading, cleaning and normalizing government spending data. Much of the USASpending code is adapted from github.com/sunlightlabs/datacommons. 8 | 9 | This data importer currently imports the following datasets: 10 | 11 | - [x] USASpending.gov - Contracts 12 | - [x] USASpending.gov - Grants 13 | 14 | Future versions of this importer will support 15 | 16 | - [ ] FedBizOpps.gov data 17 | - [ ] GSA SmartPay Data 18 | 19 | 20 | This repository uses Python and Django to download, clean and normalize spending data, with an option to insert it into a database. All data processing includes an intermediate step that dumps the data into clean csvs. If you wish to use a framework other than Django or database other than Postgresql, you can use the intermediary CSVs to import. 21 | 22 | 23 | Usage 24 | ---------- 25 | 26 | 27 | To get started, install the dependencies while in an [activated python virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) and using [pip](http://www.pip-installer.org/en/latest/installing.html) 28 | 29 | pip install -r requirements.txt 30 | 31 | For the USASpending data, you will need to specify which fiscal years you want to pull in by editing a list in the settings file called "FISCAL_YEARS". The default is 2000-2014. 32 | 33 | Postgresql settings 34 | ------------------- 35 | 36 | After filling in your database settings in the Django settings file, run 37 | `manage.py syncdb` 38 | to create the tables. 39 | 40 | If you are using postgresql and will be using the indexes with this project, you will need to create some text search elements within postgresql. To do that, move the file in the root directory of the project (federal_spending.stop) to your postgresql text search directory. On version 9.1, that is 41 | /usr/share/postgresql/9.1/tsearchdata/ . 42 | 43 | Once you move the stopwords file, you can create the text search indexes like this: 44 | 45 | manage.py dbshell < tsconfig.sql 46 | 47 | tsconfig.sql is also located in the project's root directory. This is a one time step that only needs to be repeated if you blow away your whole database, not just the tables. 48 | 49 | 50 | 51 | Importing Contracts and Grants 52 | ------------------------------ 53 | 54 | The download, cleaning and import processes are broken out into their own Django management commands. To do a fresh import from scratch (and run all the commands at the same time with some sensible defaults) you can run: 55 | 56 | manage.py fresh_import 57 | 58 | This will automatically download the files in federal_spending/usaspending/downloads/all_downloads.txt process them and store them in the database. The default contents of that file are 14 years of fiscal data so just go in and remove some links if you don't need all of that. 59 | 60 | Alternatively, if you want to run the commands individually, either to debug or just see how it works, you can. Here's the steps, each with their own manage command: 61 | 62 | * `manage.py download_files FILENAME` -- download and unzip all the links in FILENAME 63 | * `manage.py convert_usaspending_contract` -- normalizes all the data and dumps into a better structured CSV 64 | * `manage.py convert_usaspending_grants` -- ditto, but for grants 65 | * `manage.py syncdb` -- create tables 66 | * `manage.py create_partition --fiscal-year all` -- create postgresql partitions 67 | * `manage.py loadcontracts FILENAME` -- copy the csv FILENAME (will appear in out folder) into the contracts table 68 | * `manage.py loadgrants FILENAME` -- copy the csv FILENAME (will appear in out folder) into the contracts table 69 | * `manage.py build_indexes` -- Not implemented 70 | 71 | 72 | 73 | manage.py download_files FILENAME 74 | --------------------------------- 75 | To tell this command which files to download, pass it a single argument, the path to a file with the urls to be downloaded. It's expecting urls of the form 76 | 77 | http://www.usaspending.gov/datafeeds/2013_All_Contracts_Full_20131015.csv.zip 78 | 79 | Which are available at http://www.usaspending.gov/data under the Archives tab. There is an example file in the federal_spending/usaspending/downloads folder. Here's an example of how to use the command with the example file included in the project: 80 | 81 | manage.py download_files downloads.20131105.txt 82 | 83 | 84 | manage.py convert_usaspending_contracts 85 | --------------------------------------- 86 | 87 | To convert these raw csvs into more normalized data, you need to run the convert_usaspending_contracts command. 88 | 89 | manage.py convert_usaspending_contracts 90 | 91 | That will take any csvs out of the datafeeds folder, process them and put the result in the out folder. The source file will then have a timestamp prepended to the name and it will be moved to the done folder. If there is a problem with any file or year, you'll need to address the problem, and move the source files __back__ to the datafeeds folder and then remove the timestamp. 92 | 93 | You can stop here if you are not using Postgresql for your database. 94 | 95 | 96 | POSTGRESQL Setup 97 | ----------------- 98 | 99 | manage.py create_partition --fiscal-year all 100 | -------------------------------------------- 101 | Use this command to generate partitions in the contract and grant tables for each fiscal year. This helps with indexing and performance. Either pass in a desired fiscal year, or just pass in all to do all the years in the FISCAL_YEARS setting. 102 | 103 | manage.py loadcontracts FILENAME 104 | ---------------------------------- 105 | Finally, use the Postgresql copy command to dump the csv into the tables. Note that this is not smart. It won't check for duplicate transactions. So you only want to use this when starting with empty tables. The FILENAME should be one of the files that appears in the out file (federal_spending/usaspending/downloads/csvs/out/contracts_2013.csv, for example). 106 | 107 | 108 | manage.py loadgrants FILENAME 109 | ------------------------------- 110 | Same deal as the contracts except you use it for grants files. 111 | 112 | 113 | 114 | manage.py create_indexes 115 | ------------------------------- 116 | 117 | `./manage.py create_indexes` 118 | 119 | That's it! 120 | 121 | ## Public domain 122 | 123 | This project is [dedicated to the public domain](LICENSE). As spelled out in [CONTRIBUTING](CONTRIBUTING.md): 124 | 125 | > The project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](http://creativecommons.org/publicdomain/zero/1.0/). 126 | 127 | > All contributions to this project will be released under the CC0 dedication. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest. 128 | 129 | TO DO: 130 | run tests to ensure import --> write tests, check against [usaspending api](http://www.usaspending.gov/data?carryfilters=on) 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /federal_spending/usaspending/__init__.py: -------------------------------------------------------------------------------- 1 | CONTRACT_ACTION_TYPES = ( 2 | ('A', 'GWAC'), 3 | ('B', 'IDC'), 4 | ('C', 'FSS'), 5 | ('D', 'BOA'), 6 | ('E', 'BPA'), 7 | ) 8 | 9 | PRICING_TYPES = ( 10 | ('A', 'Fixed price redetermination'), 11 | ('B', 'Fixed price level of effort'), 12 | ('J', 'Firm fixed price'), 13 | ('K', 'Fixed price with EPA'), 14 | ('L', 'Fixed price incentive'), 15 | ('M', 'Fixed price award fee'), 16 | ('R', 'Cost plus award fee'), 17 | ('S', 'Cost no fee'), 18 | ('T', 'Cost sharing'), 19 | ('U', 'Cost plus fixed fee'), 20 | ('V', 'Cost plus incentive fee'), 21 | ('Y', 'Time and materials'), 22 | ('Z', 'Labor hours'), 23 | ('1', 'Order dependent'), 24 | ('2', 'Combination'), 25 | ('3', 'Other'), 26 | ) 27 | 28 | NIA_CODES = ( 29 | ('NONE', 'None'), 30 | ('H05K', 'Hurricane Katrina 2005'), 31 | ('H05O', 'Hurricane Ophelia 2005'), 32 | ('H05R', 'Hurricane Rita 2005'), 33 | ('H05W', 'Hurricane Wilma 2005'), 34 | ('W081', 'California Wildfires 2008'), 35 | ('F081', 'Midwest Storms and Flooding 2008'), 36 | ('H08G', 'Hurricane Gustav 2008'), 37 | ('H08H', 'Hurricane Hanna 2008'), 38 | ('H08I', 'Hurricane Ike 2008'), 39 | ('H06C', 'Hurricane Chris 2006'), 40 | ('H06E', 'Hurricane Ernesto 2006'), 41 | ('I09P', 'Inauguration 2009'), 42 | ('T10S', 'American Samoa Earthquake, Tsunami, and Flooding 2010'), 43 | ('Q10H', 'Haiti Earthquake 2010'), 44 | ('Q10G', 'Gulf Oil Spill 2010'), 45 | ) 46 | 47 | MOD_REASONS = ( 48 | ('A', 'Additional work'), 49 | ('B', 'Supplemental agreement for work within scope'), 50 | ('C', 'Funding only action'), 51 | ('D', 'Change order'), 52 | ('E', 'Terminate for default'), 53 | ('F', 'Terminate for convenience'), 54 | ('G', 'Exercise an option'), 55 | ('H', 'Definitize letter contract'), 56 | ('J', 'Novation agreement'), 57 | ('K', 'Close out'), 58 | ('L', 'Definitize change order'), 59 | ('M', 'Other administrative action'), 60 | ('N', 'Legal contract cancellation'), 61 | ('P', 'Rerepsentation of non-novated merger/acquisition'), 62 | ('R', 'Rerepresentation'), 63 | ('S', 'Change PIID'), 64 | ('T', 'Transfer action'), 65 | ('V', 'Vendor DUNS change'), 66 | ('W', 'Vendor address change'), 67 | ('X', 'Terminate for cause'), 68 | ) 69 | 70 | COST_OBTAINED = ( 71 | ('N', 'No'), 72 | ('W', 'Waived'), 73 | ('Y', 'Yes'), 74 | ) 75 | 76 | YNX = ( 77 | ('Y', 'Yes'), 78 | ('N', 'No'), 79 | ('X', 'N/A'), 80 | ) 81 | 82 | FINANCING = ( 83 | ('A', 'FAR 52.232-16 progress payments'), 84 | ('C', 'Percentage of completion progress payments'), 85 | ('D', 'Unusual progress payments or advance payments'), 86 | ('E', 'Commercial financing'), 87 | ('F', 'Performance-based financing'), 88 | ('Z', 'Not applicable'), 89 | ) 90 | 91 | BUNDLING = ( 92 | ('A', 'Mission critical'), 93 | ('B', 'OMB circular A-76'), 94 | ('C', 'Other'), 95 | ('D', 'Not a bundled requirement'), 96 | ) 97 | 98 | RM_CLAUSES = ( 99 | ('A', 'FAR 52.223-4 included'), 100 | ('B', 'FAR 52.223-4 and FAR 52.223-9 included'), 101 | ('C', 'No clauses included'), 102 | ) 103 | 104 | ORG_TYPES = ( 105 | ('A', 'U.S. owned business'), 106 | ('B', 'Other U.S. entity'), 107 | ('C', 'Foreign-owned business incorporated in the U.S.'), 108 | ('D', 'Foreign-owned business not incorporated in the U.S.'), 109 | ('O', 'Other foreign entity'), 110 | ) 111 | 112 | COMMERCIAL = ( 113 | ('A', 'Commercially available'), 114 | ('B', 'Other commercial item'), 115 | ('C', 'Non-developmental item'), 116 | ('D', 'Non-commercial item'), 117 | ('E', 'Commercial service'), 118 | ('F', 'Non-commercial service'), 119 | ('Z', 'Not an IT product or service'), 120 | ) 121 | 122 | PRODUCT_ORIGIN = ( 123 | ('A', 'In U.S. by foreign concern'), 124 | ('B', 'Outside the U.S.'), 125 | ('C', 'Not a manufactured product'), 126 | ('D', 'Manufactured in the U.S.'), 127 | ('E', 'Manufactured outside the U.S.; used outside the U.S.'), 128 | ('F', 'Manufactured outside the U.S.; for resale'), 129 | ('G', 'Manufactured outside the U.S.; trade agreements'), 130 | ('H', 'Manufactured outside the U.S.; commercial IT'), 131 | ('I', 'Manufactured outside the U.S.; public interest'), 132 | ('J', 'Manufactured outside the U.S.; not available domestically'), 133 | ('K', 'Manufactured outside the U.S.; unreasonable cost'), 134 | ('L', 'Manufactured outside the U.S.; DOD qualifying country'), 135 | ) 136 | 137 | CCR_EXCEPTIONS = ( 138 | ('1', 'Government-wide commercial purchase card'), 139 | ('2', 'Classified contracts'), 140 | ('3', 'Contracting officers deployed in the course of military operations'), 141 | ('4', 'Contracting officers conducting emergency operations'), 142 | ('5', 'Contracts to support unusual or compelling needs'), 143 | ('6', 'Awards to foreign vendors for work performed outside the United States'), 144 | ('7', 'Micro-purchases that do not use the EFT'), 145 | ) 146 | 147 | COMPETITIVENESS = ( 148 | ('A', 'Full and open competition'), 149 | ('B', 'Not available for competition'), 150 | ('C', 'Not competed'), 151 | ('D', 'Full and open competition after exclusion of sources'), 152 | ('E', 'Follow on to competed action'), 153 | ('F', 'Competed under SAP'), 154 | ('G', 'Not competed under SAP'), 155 | ('CDO', 'Competitive delivery order'), 156 | ('NDO', 'Non-competitive delivery order'), 157 | ) 158 | 159 | SET_ASIDES = ( 160 | ('NONE', 'No set aside used'), 161 | ('SBA', 'Small business set aside (total)'), 162 | ('SBP', 'Small business set aside (partial)'), 163 | ('8A', '8(a) competed'), 164 | ('8AN', '8(a) sole source'), 165 | ('8AC', 'SDB set aside'), 166 | ('HMT', 'HBCU or MI set aside (total)'), 167 | ('HMP', 'HBCU or MI set aside (partial)'), 168 | ('VSB', 'Very small business set aside'), 169 | ('ESB', 'Emerging small business set aside'), 170 | ('HZC', 'HUBZone set aside'), 171 | ('HZS', 'HUBZone sole source'), 172 | ('HS2', 'Combination HUBZone and 8(a)'), 173 | ('HS3', '8(a) with HUBZone preference'), 174 | ('BI', 'Buy Indian'), 175 | ('RSB', 'Reserved for small business'), 176 | ('VSA', 'Veteran set aside'), 177 | ('VSS', 'Veteran sole source'), 178 | ('SDVOSBC', 'Servide disabled veteran owned small business set aside'), 179 | ('SDVOSBS', 'Servide disabled veteran owned small business sole source'), 180 | ) 181 | 182 | NOCOMPETE_REASONS = ( 183 | ('UNQ', 'Unique source'), 184 | ('FOC', 'Follow-on contract'), 185 | ('UR', 'Unsolicited research proposal'), 186 | ('PDR', 'Patent or data rights'), 187 | ('UT', 'Utilities'), 188 | ('STD', 'Standardization'), 189 | ('ONE', 'Only one source'), 190 | ('URG', 'Urgency'), 191 | ('MES', 'Mobilization, essential R&D'), 192 | ('IA', 'International agreement'), 193 | ('OTH', 'Authorized by statute'), 194 | ('RES', 'Authorized resale'), 195 | ('NS', 'National security'), 196 | ('PI', 'Public interest'), 197 | ('MPT', 'Less than or equal to the MPT'), 198 | ('SP2', 'SAP non-competition'), 199 | ('BND', 'Brand name description'), 200 | ) 201 | 202 | SUBCONTRACT_PLANS = ( 203 | ('A', 'No subcontracting possibilities'), 204 | ('B', 'Plan not required'), 205 | ('C', 'Plan required, incentive not included'), 206 | ('D', 'Plan required, incentive included'), 207 | ('E', 'Plan required'), 208 | ) -------------------------------------------------------------------------------- /federal_spending/metadata/piid.yml: -------------------------------------------------------------------------------- 1 | - piid: 2 | prefix: AID 3 | name: Agency for International Development 4 | prefix_required: true 5 | department_id: '1152' 6 | 7 | - piid: 8 | prefix: AFRH 9 | name: Armed Forces Retirement Home 10 | prefix_required: true 11 | department_id: '8400' 12 | 13 | - piid: 14 | prefix: BB 15 | name: Broadcasting Board of Governors 16 | prefix_required: true 17 | department_id: '9568' 18 | 19 | - piid: 20 | prefix: CF 21 | name: Commodity Futures Trading Commission 22 | prefix_required: true 23 | department_id: '9507' 24 | 25 | - piid: 26 | prefix: CPSC 27 | name: Consumer Product Safety Commission 28 | prefix_required: true 29 | department_id: '6100' 30 | 31 | - piid: 32 | prefix: CFP 33 | name: Consumer Financial Protection Bureau 34 | prefix_required: true 35 | department_id: '955F' 36 | 37 | - piid: 38 | prefix: CNS 39 | name: Corporation for National and Community Service 40 | prefix_required: true 41 | department_id: '9577' 42 | 43 | - piid: 44 | prefix: CSOSA 45 | name: Court Services and Offender Supervision Agency 46 | prefix_required: true 47 | department_id: '9594' 48 | 49 | - piid: 50 | prefix: PSA 51 | name: Court Services and Offender Supervision Agency/Pretrial Services Agency 52 | prefix_required: true 53 | department_id: '959P' 54 | 55 | - piid: 56 | prefix: AG 57 | name: Department of Agriculture 58 | prefix_required: true 59 | department_id: '1200' 60 | 61 | - piid: 62 | prefix: DOC 63 | name: Department of Commerce 64 | prefix_required: true 65 | department_id: '1300' 66 | 67 | - piid: 68 | prefix: ED 69 | name: Department of Education 70 | prefix_required: true 71 | department_id: '9100' 72 | 73 | - piid: 74 | prefix: DE 75 | name: Department of Energy 76 | prefix_required: true 77 | department_id: '8900' 78 | 79 | - piid: 80 | prefix: HS 81 | name: Department of Homeland Security 82 | prefix_required: true 83 | department_id: '7000' 84 | 85 | - piid: 86 | prefix: IN 87 | name: Department of Interior 88 | prefix_required: true 89 | department_id: '1400' 90 | 91 | - piid: 92 | prefix: DJ 93 | name: Department of Justice 94 | prefix_required: true 95 | department_id: '1500' 96 | 97 | - piid: 98 | prefix: DOL 99 | name: Department of Labor 100 | prefix_required: true 101 | department_id: '1600' 102 | 103 | - piid: 104 | prefix: S 105 | name: Department of State 106 | prefix_required: true 107 | department_id: '1900' 108 | 109 | - piid: 110 | prefix: T 111 | name: Department of Treasury 112 | prefix_required: true 113 | department_id: '2000' 114 | 115 | - piid: 116 | prefix: EP 117 | name: Environmental Protection Agency 118 | prefix_required: true 119 | department_id: '6800' 120 | 121 | - piid: 122 | prefix: EEC 123 | name: Equal Employment Opportunity Commission 124 | prefix_required: true 125 | department_id: '4500' 126 | 127 | - piid: 128 | prefix: FCC 129 | name: Federal Communications Commission 130 | prefix_required: true 131 | department_id: '2700' 132 | 133 | - piid: 134 | prefix: FHF 135 | name: Federal Housing Finance Agency 136 | prefix_required: true 137 | department_id: '9542' 138 | 139 | - piid: 140 | prefix: FLRA 141 | name: Federal Labor Relations Authority 142 | prefix_required: true 143 | department_id: '5400' 144 | 145 | - piid: 146 | prefix: FE 147 | name: Federal Elections Commission 148 | prefix_required: true 149 | department_id: '9506' 150 | 151 | - piid: 152 | prefix: FMC 153 | name: Federal Maritime Commission 154 | prefix_required: false 155 | department_id: '6500' 156 | 157 | - piid: 158 | prefix: FERC 159 | name: Federal Energy Regulatory Commission 160 | prefix_required: true 161 | department_id: '8961' 162 | 163 | - piid: 164 | prefix: FTC 165 | name: Federal Trade Commission 166 | prefix_required: true 167 | department_id: '2900' 168 | 169 | - piid: 170 | prefix: FMCS 171 | name: Federal Mediation and Conciliation Service 172 | prefix_required: true 173 | department_id: '9300' 174 | 175 | - piid: 176 | prefix: GS 177 | name: General Services Administration 178 | prefix_required: true 179 | department_id: '4700' 180 | 181 | - piid: 182 | prefix: HHS 183 | name: Health and Human Services 184 | prefix_required: true 185 | department_id: '7500' 186 | 187 | - piid: 188 | prefix: DU 189 | name: Department of Housing and Urban Development 190 | prefix_required: true 191 | department_id: '8600' 192 | 193 | - piid: 194 | prefix: IMLS 195 | name: Institute of Museum and Library Services 196 | parent_department_id: '5900' 197 | prefix_required: true 198 | department_id:'5950' 199 | 200 | - piid: 201 | prefix: IBM 202 | name: International Boundary and Water Commission: U.S. - Mexico 203 | parent_department_id: '1900' 204 | parent_prefix: 'S' 205 | prefix_required: true 206 | department_id: '19BM' 207 | 208 | - piid: 209 | prefix: ITC 210 | name: International Trade Commission 211 | prefix_required: true 212 | department_id: '3400' 213 | 214 | - piid: 215 | prefix: JFKC 216 | name: J. F. Kennedy Center for the Performing Arts 217 | prefix_required: true 218 | department_id: '3352' 219 | 220 | - piid: 221 | prefix: NN 222 | name: National Aeronautics and Space Administration 223 | prefix_required: true 224 | department_id: '8000' 225 | 226 | - piid: 227 | prefix: NAMA 228 | name: National Archives and Records Administration 229 | prefix_required: true 230 | department_id: '8800' 231 | 232 | - piid: 233 | prefix: NEA 234 | name: National Endowment for the Arts 235 | prefix_required: true 236 | department_id: '5920' 237 | 238 | - piid: 239 | prefix: NEH 240 | name: National Endowment for the Humanities 241 | prefix_required: true 242 | department_id: '5940' 243 | 244 | - piid: 245 | prefix: NLR 246 | name: National Labor Relations Board 247 | prefix_required: true 248 | department_id: '6300' 249 | 250 | - piid: 251 | prefix: NSF 252 | name: National Science Foundation 253 | prefix_required: true 254 | department_id: '4900' 255 | 256 | - piid: 257 | prefix: NTSB 258 | name: National Transportation Safety Board 259 | prefix_required: true 260 | department_id: '9508' 261 | 262 | - piid: 263 | prefix: OPM 264 | name: Office of Personnel Management 265 | prefix_required: true 266 | department_id: '2400' 267 | 268 | - piid: 269 | prefix: OSC 270 | name: Office of Special Counsel 271 | prefix_required: true 272 | department_id: '6201' 273 | 274 | - piid: 275 | prefix: OPI 276 | name: Overseas Private Investment Corporation 277 | prefix_required: true 278 | department_id: '7100' 279 | 280 | - piid: 281 | prefix: PC 282 | name: Peace Corps 283 | prefix_required: true 284 | department_id: '1145' 285 | 286 | - piid: 287 | prefix: PBGC01 288 | name: Pension Benefit Guaranty Corporation 289 | parent_prefix: DOL 290 | parent_department_id: '1600' 291 | prefix_required: true 292 | department_id: '1665' 293 | 294 | - piid: 295 | prefix: RRB 296 | name: Railroad Retirement Board 297 | prefix_required: true 298 | department_id: '6000' 299 | 300 | - piid: 301 | prefix: RA 302 | name: Recovery Accountability and Transparency Board 303 | prefix_required: true 304 | department_id: '9561' 305 | 306 | - piid: 307 | prefix: SEC 308 | name: Securities and Exchange Commission 309 | prefix_required: true 310 | department_id: '5000' 311 | 312 | - piid: 313 | prefix: SBA 314 | name: Small Business Administration 315 | prefix_required: true 316 | department_id: '7300' 317 | 318 | - piid: 319 | prefix: F 320 | name: Smithsonian Institution 321 | prefix_required: true 322 | department_id: '3300' 323 | 324 | - piid: 325 | prefix: SS 326 | name: Social Security Administration 327 | prefix_required: true 328 | department_id: '2800' 329 | 330 | - piid: 331 | prefix: DT 332 | name: Department of Transportation 333 | prefix_required: true 334 | department_id: '6900' 335 | 336 | - piid: 337 | prefix: CIG 338 | name: Council of the Inspectors General on Integrity and Efficiency 339 | prefix_required: true 340 | 341 | - piid: 342 | prefix: NRC 343 | name: Nuclear Regulatory Commission 344 | prefix_required: true 345 | department_id: '3100' 346 | 347 | - piid: 348 | prefix: TDA 349 | name: United States Trade and Development Agency 350 | prefix_required: true 351 | department_id: '1153' 352 | 353 | - piid: 354 | prefix: V 355 | name: Department of Veterans Affairs 356 | prefix_required: true 357 | department_id: '3600' 358 | 359 | - piid: 360 | name: American Battle Monuments Commission 361 | prefix_required: false 362 | department_id: '7400' 363 | 364 | - piid: 365 | name: Commission on Civil Rights 366 | prefix_required: false 367 | department_id: '9517' 368 | 369 | - piid: 370 | name: Committee for Purchase from People who are Blind and Severely Disabled 371 | prefix_required: false 372 | department_id: '9518' 373 | 374 | - piid: 375 | name: Defense Nuclear Facilities Safety Board 376 | prefix_required: false 377 | department_id: '9516' 378 | 379 | - piid: 380 | name: Executive Office of the President 381 | prefix_required: false 382 | department_id: '1100' 383 | 384 | - piid: 385 | name: Library of Congress 386 | prefix_required: false 387 | department_id: '0300' 388 | 389 | - piid: 390 | name: Merit Systems Protection Board 391 | prefix_required: false 392 | department_id: '4100' 393 | 394 | - piid: 395 | name: National Commission on Libraries and Information Science 396 | prefix_required: false 397 | department_id: '9527' 398 | 399 | - piid: 400 | name: National Gallery of Art 401 | prefix_required: false 402 | department_id: '3355' 403 | 404 | - piid: 405 | name: National Mediation Board 406 | prefix_required: false 407 | department_id: '9524' 408 | 409 | - piid: 410 | name: United States Holocaust Museum 411 | prefix_required: false 412 | department_id: '9531' -------------------------------------------------------------------------------- /federal_spending/metadata/departments.yml: -------------------------------------------------------------------------------- 1 | - piid: 2 | prefix: AID 3 | name: Agency for International Development 4 | prefix_required: true 5 | department_id: '1152' 6 | 7 | - piid: 8 | prefix: AFRH 9 | name: Armed Forces Retirement Home 10 | prefix_required: true 11 | department_id: '8400' 12 | 13 | - piid: 14 | prefix: BB 15 | name: Broadcasting Board of Governors 16 | prefix_required: true 17 | department_id: '9568' 18 | 19 | - piid: 20 | prefix: CF 21 | name: Commodity Futures Trading Commission 22 | prefix_required: true 23 | department_id: '9507' 24 | 25 | - piid: 26 | prefix: CPSC 27 | name: Consumer Product Safety Commission 28 | prefix_required: true 29 | department_id: '6100' 30 | 31 | - piid: 32 | prefix: CFP 33 | name: Consumer Financial Protection Bureau 34 | prefix_required: true 35 | department_id: '955F' 36 | 37 | - piid: 38 | prefix: CNS 39 | name: Corporation for National and Community Service 40 | prefix_required: true 41 | department_id: '9577' 42 | 43 | - piid: 44 | prefix: CSOSA 45 | name: Court Services and Offender Supervision Agency 46 | prefix_required: true 47 | department_id: '9594' 48 | 49 | - piid: 50 | prefix: PSA 51 | name: Court Services and Offender Supervision Agency/Pretrial Services Agency 52 | prefix_required: true 53 | department_id: '959P' 54 | 55 | - piid: 56 | prefix: AG 57 | name: Department of Agriculture 58 | prefix_required: true 59 | department_id: '1200' 60 | 61 | - piid: 62 | prefix: DOC 63 | name: Department of Commerce 64 | prefix_required: true 65 | department_id: '1300' 66 | 67 | - piid: 68 | prefix: ED 69 | name: Department of Education 70 | prefix_required: true 71 | department_id: '9100' 72 | 73 | - piid: 74 | prefix: DE 75 | name: Department of Energy 76 | prefix_required: true 77 | department_id: '8900' 78 | 79 | - piid: 80 | prefix: HS 81 | name: Department of Homeland Security 82 | prefix_required: true 83 | department_id: '7000' 84 | 85 | - piid: 86 | prefix: IN 87 | name: Department of Interior 88 | prefix_required: true 89 | department_id: '1400' 90 | 91 | - piid: 92 | prefix: DJ 93 | name: Department of Justice 94 | prefix_required: true 95 | department_id: '1500' 96 | 97 | - piid: 98 | prefix: DOL 99 | name: Department of Labor 100 | prefix_required: true 101 | department_id: '1600' 102 | 103 | - piid: 104 | prefix: S 105 | name: Department of State 106 | prefix_required: true 107 | department_id: '1900' 108 | 109 | - piid: 110 | prefix: T 111 | name: Department of Treasury 112 | prefix_required: true 113 | department_id: '2000' 114 | 115 | - piid: 116 | prefix: EP 117 | name: Environmental Protection Agency 118 | prefix_required: true 119 | department_id: '6800' 120 | 121 | - piid: 122 | prefix: EEC 123 | name: Equal Employment Opportunity Commission 124 | prefix_required: true 125 | department_id: '4500' 126 | 127 | - piid: 128 | prefix: FCC 129 | name: Federal Communications Commission 130 | prefix_required: true 131 | department_id: '2700' 132 | 133 | - piid: 134 | prefix: FHF 135 | name: Federal Housing Finance Agency 136 | prefix_required: true 137 | department_id: '9542' 138 | 139 | - piid: 140 | prefix: FLRA 141 | name: Federal Labor Relations Authority 142 | prefix_required: true 143 | department_id: '5400' 144 | 145 | - piid: 146 | prefix: FE 147 | name: Federal Elections Commission 148 | prefix_required: true 149 | department_id: '9506' 150 | 151 | - piid: 152 | prefix: FMC 153 | name: Federal Maritime Commission 154 | prefix_required: false 155 | department_id: '6500' 156 | 157 | - piid: 158 | prefix: FERC 159 | name: Federal Energy Regulatory Commission 160 | prefix_required: true 161 | department_id: '8961' 162 | 163 | - piid: 164 | prefix: FTC 165 | name: Federal Trade Commission 166 | prefix_required: true 167 | department_id: '2900' 168 | 169 | - piid: 170 | prefix: FMCS 171 | name: Federal Mediation and Conciliation Service 172 | prefix_required: true 173 | department_id: '9300' 174 | 175 | - piid: 176 | prefix: GS 177 | name: General Services Administration 178 | prefix_required: true 179 | department_id: '4700' 180 | 181 | - piid: 182 | prefix: HHS 183 | name: Health and Human Services 184 | prefix_required: true 185 | department_id: '7500' 186 | 187 | - piid: 188 | prefix: DU 189 | name: Department of Housing and Urban Development 190 | prefix_required: true 191 | department_id: '8600' 192 | 193 | - piid: 194 | prefix: IMLS 195 | name: Institute of Museum and Library Services 196 | parent_department_id: '5900' 197 | prefix_required: true 198 | department_id:'5950' 199 | 200 | - piid: 201 | prefix: IBM 202 | name: International Boundary and Water Commission: U.S. - Mexico 203 | parent_department_id: '1900' 204 | parent_prefix: 'S' 205 | prefix_required: true 206 | department_id: '19BM' 207 | 208 | - piid: 209 | prefix: ITC 210 | name: International Trade Commission 211 | prefix_required: true 212 | department_id: '3400' 213 | 214 | - piid: 215 | prefix: JFKC 216 | name: J. F. Kennedy Center for the Performing Arts 217 | prefix_required: true 218 | department_id: '3352' 219 | 220 | - piid: 221 | prefix: NN 222 | name: National Aeronautics and Space Administration 223 | prefix_required: true 224 | department_id: '8000' 225 | 226 | - piid: 227 | prefix: NAMA 228 | name: National Archives and Records Administration 229 | prefix_required: true 230 | department_id: '8800' 231 | 232 | - piid: 233 | prefix: NEA 234 | name: National Endowment for the Arts 235 | prefix_required: true 236 | department_id: '5920' 237 | 238 | - piid: 239 | prefix: NEH 240 | name: National Endowment for the Humanities 241 | prefix_required: true 242 | department_id: '5940' 243 | 244 | - piid: 245 | prefix: NLR 246 | name: National Labor Relations Board 247 | prefix_required: true 248 | department_id: '6300' 249 | 250 | - piid: 251 | prefix: NSF 252 | name: National Science Foundation 253 | prefix_required: true 254 | department_id: '4900' 255 | 256 | - piid: 257 | prefix: NTSB 258 | name: National Transportation Safety Board 259 | prefix_required: true 260 | department_id: '9508' 261 | 262 | - piid: 263 | prefix: OPM 264 | name: Office of Personnel Management 265 | prefix_required: true 266 | department_id: '2400' 267 | 268 | - piid: 269 | prefix: OSC 270 | name: Office of Special Counsel 271 | prefix_required: true 272 | department_id: '6201' 273 | 274 | - piid: 275 | prefix: OPI 276 | name: Overseas Private Investment Corporation 277 | prefix_required: true 278 | department_id: '7100' 279 | 280 | - piid: 281 | prefix: PC 282 | name: Peace Corps 283 | prefix_required: true 284 | department_id: '1145' 285 | 286 | - piid: 287 | prefix: PBGC01 288 | name: Pension Benefit Guaranty Corporation 289 | parent_prefix: DOL 290 | parent_department_id: '1600' 291 | prefix_required: true 292 | department_id: '1665' 293 | 294 | - piid: 295 | prefix: RRB 296 | name: Railroad Retirement Board 297 | prefix_required: true 298 | department_id: '6000' 299 | 300 | - piid: 301 | prefix: RA 302 | name: Recovery Accountability and Transparency Board 303 | prefix_required: true 304 | department_id: '9561' 305 | 306 | - piid: 307 | prefix: SEC 308 | name: Securities and Exchange Commission 309 | prefix_required: true 310 | department_id: '5000' 311 | 312 | - piid: 313 | prefix: SBA 314 | name: Small Business Administration 315 | prefix_required: true 316 | department_id: '7300' 317 | 318 | - piid: 319 | prefix: F 320 | name: Smithsonian Institution 321 | prefix_required: true 322 | department_id: '3300' 323 | 324 | - piid: 325 | prefix: SS 326 | name: Social Security Administration 327 | prefix_required: true 328 | department_id: '2800' 329 | 330 | - piid: 331 | prefix: DT 332 | name: Department of Transportation 333 | prefix_required: true 334 | department_id: '6900' 335 | 336 | - piid: 337 | prefix: CIG 338 | name: Council of the Inspectors General on Integrity and Efficiency 339 | prefix_required: true 340 | 341 | - piid: 342 | prefix: NRC 343 | name: Nuclear Regulatory Commission 344 | prefix_required: true 345 | department_id: '3100' 346 | 347 | - piid: 348 | prefix: TDA 349 | name: United States Trade and Development Agency 350 | prefix_required: true 351 | department_id: '1153' 352 | 353 | - piid: 354 | prefix: V 355 | name: Department of Veterans Affairs 356 | prefix_required: true 357 | department_id: '3600' 358 | 359 | - piid: 360 | name: American Battle Monuments Commission 361 | prefix_required: false 362 | department_id: '7400' 363 | 364 | - piid: 365 | name: Commission on Civil Rights 366 | prefix_required: false 367 | department_id: '9517' 368 | 369 | - piid: 370 | name: Committee for Purchase from People who are Blind and Severely Disabled 371 | prefix_required: false 372 | department_id: '9518' 373 | 374 | - piid: 375 | name: Defense Nuclear Facilities Safety Board 376 | prefix_required: false 377 | department_id: '9516' 378 | 379 | - piid: 380 | name: Executive Office of the President 381 | prefix_required: false 382 | department_id: '1100' 383 | 384 | - piid: 385 | name: Library of Congress 386 | prefix_required: false 387 | department_id: '0300' 388 | 389 | - piid: 390 | name: Merit Systems Protection Board 391 | prefix_required: false 392 | department_id: '4100' 393 | 394 | - piid: 395 | name: National Commission on Libraries and Information Science 396 | prefix_required: false 397 | department_id: '9527' 398 | 399 | - piid: 400 | name: National Gallery of Art 401 | prefix_required: false 402 | department_id: '3355' 403 | 404 | - piid: 405 | name: National Mediation Board 406 | prefix_required: false 407 | department_id: '9524' 408 | 409 | - piid: 410 | name: United States Holocaust Museum 411 | prefix_required: false 412 | department_id: '9531' -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/daily_update.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import CommandError, BaseCommand 2 | from federal_spending.usaspending.scripts.usaspending.fpds import FIELDS as CONTRACT_FIELDS, CALCULATED_FIELDS as CONTRACT_CALCULATED_FIELDS 3 | from federal_spending.usaspending.scripts.usaspending.faads import FIELDS as GRANT_FIELDS, CALCULATED_FIELDS as GRANT_CALCULATED_FIELDS 4 | from django.db import connections, transaction 5 | from federal_spending.usaspending.models import Contract, Grant 6 | from django.core import management 7 | from django.conf import settings 8 | import datetime 9 | import requests 10 | import cStringIO 11 | import csv 12 | import os 13 | import sys 14 | import time 15 | from itertools import izip 16 | from dateutil.parser import parse 17 | import logging 18 | 19 | logging.basicConfig(filename=settings.LOGGING_DIRECTORY + "/daily_update.log", level=logging.DEBUG) 20 | 21 | class Command(BaseCommand): 22 | 23 | ALL_CONTRACT_FIELDS = [ x[0] for x in CONTRACT_FIELDS ] + [ x[0] for x in CONTRACT_CALCULATED_FIELDS ] 24 | ALL_GRANT_FIELDS = [ x[0] for x in GRANT_FIELDS ] + [ x[0] for x in GRANT_CALCULATED_FIELDS ] 25 | 26 | def notnull(self, val): 27 | if val and val != '' and 'null' not in val.strip().lower(): 28 | return True 29 | return False 30 | 31 | def handle(self, day=None, type='all', *args, **kwargs): 32 | 33 | try: 34 | print "deleting files in /datafeeds and /out" 35 | 36 | OUTPATH = settings.CSV_PATH + 'out/' 37 | 38 | for f in os.listdir(OUTPATH): 39 | os.remove(OUTPATH + f) 40 | 41 | INPATH = settings.CSV_PATH + 'datafeeds/' 42 | for f in os.listdir(INPATH): 43 | os.remove(INPATH + f) 44 | 45 | base_url = 'http://www.usaspending.gov/customcode/build_feed.php?data_source=PrimeAward&detail_level=Complete&ou_code=All&is_dept=false&recipient_state=All&pop_state=All&format=CSV&recovery_only=&record_count=10000000000' 46 | 47 | if not day: 48 | day = datetime.datetime.now() - datetime.timedelta(days=1) 49 | day = day.strftime("%Y-%m-%d") 50 | 51 | print "Downloading new files" 52 | 53 | for fy in settings.FISCAL_YEARS: 54 | url = base_url + '&fiscal_year=' + str(fy) + '&since=' + day 55 | #grant files 56 | c = requests.get(url + '&spending_category=Grants') 57 | outf = open(INPATH + str(fy) + '_All_Grants_Delta_' + day + '.csv', 'w') 58 | outf.write(c.content) 59 | 60 | c = requests.get(url + '&spending_category=DirectPayments') 61 | if c.content: 62 | outf.write(c.content[c.content.index('\n')+1:]) 63 | 64 | c = requests.get(url + '&spending_category=Insurance') 65 | if c.content: 66 | outf.write(c.content[c.content.index('\n')+1:]) 67 | 68 | c = requests.get(url + '&spending_category=Loans') 69 | if c.content: 70 | outf.write(c.content[c.content.index('\n')+1:]) 71 | 72 | c = requests.get(url + '&spending_category=Contracts') 73 | outf = open(INPATH + str(fy) + '_All_Contracts_Delta_' + day + '.csv', 'w') 74 | outf.write(c.content) 75 | 76 | 77 | print "sleeping for a minute" 78 | time.sleep(60) 79 | 80 | print "processing downloaded files into proper format" 81 | management.call_command('convert_usaspending_contracts') 82 | management.call_command('convert_usaspending_grants') 83 | 84 | print "looping through files" 85 | for sname in os.listdir(OUTPATH): 86 | if 'contracts' in sname: 87 | self.process_contract_file(sname, OUTPATH) 88 | 89 | if 'grants' in sname: 90 | self.process_grant_file(sname, OUTPATH) 91 | except Exception as e: 92 | logging.debug("An exception was thrown: %s" % e) 93 | 94 | @transaction.commit_on_success 95 | def process_contract_file(self, sname, OUTPATH): 96 | print "processing file {0}".format(sname) 97 | line_total = 0 98 | with open(OUTPATH + sname) as f: 99 | reader = csv.reader(f, delimiter='|') 100 | for line in reader: 101 | self.update_contract_row(line) 102 | if line_total % 1000 == 0: print "... on line {0}".format(line_total) 103 | line_total += 1 104 | 105 | line = None 106 | 107 | @transaction.commit_on_success 108 | def process_grant_file(self, sname, OUTPATH): 109 | print "processing file {0}".format(sname) 110 | line_total = 0 111 | with open(OUTPATH + sname) as f: 112 | reader = csv.reader(f, delimiter='|') 113 | for line in reader: 114 | self.update_grant_row(line) 115 | if line_total % 1000 == 0: print "... on line {0}".format(line_total) 116 | line_total += 1 117 | 118 | def check_fiscal_year(self, line, num): 119 | if len(line) >= (num): 120 | fy = line[num] 121 | if fy and fy != '' and len(fy) == 4: 122 | return True 123 | else: 124 | print "it failed! {0}".format(line[0]) 125 | return False 126 | else: 127 | print "length failed {0} it's only {1}".format(line[0], len(line)) 128 | return False 129 | 130 | def update_contract_row(self, line): 131 | c = None 132 | status = line[1] 133 | if status.strip().lower() == 'inactive': 134 | #means that this update deletes a record 135 | try: 136 | c = Contract.objects.get(unique_transaction_id=line[0], fiscal_year=line[97]) 137 | print "Deleting {0}".format(line[0]) 138 | c.delete() 139 | except Contract.DoesNotExist as e: 140 | pass 141 | except Contract.MultipleObjectsReturned as e: 142 | for con in Contract.objects.filter(unique_transaction_id=line[0], fiscal_year=line[97]): 143 | con.delete() 144 | return 145 | else: 146 | if not self.check_fiscal_year(line, 97): 147 | self.contracts_failed.append(line) 148 | return 149 | try: 150 | c = Contract.objects.get(unique_transaction_id=line[0], fiscal_year=line[97]) 151 | except Contract.DoesNotExist as e: 152 | c = Contract(unique_transaction_id=line[0], fiscal_year=line[97]) 153 | except Contract.MultipleObjectsReturned as e: 154 | # delete extra objects 155 | cset = Contract.objects.filter(unique_transaction_id=line[0], fiscal_year=line[97]).order_by('-id') 156 | for i, obj in enumerate(cset): 157 | if i == 0: 158 | c = obj 159 | else: 160 | obj.delete() 161 | 162 | for (i, (column_name, value)) in enumerate(izip(self.ALL_CONTRACT_FIELDS, line)): 163 | if i in [13,14,15,16, 68, 69, 158]: 164 | if self.notnull(value): 165 | #parse date fields into python date objects 166 | try: 167 | value = parse(value) 168 | except OverflowError as e: 169 | value = None 170 | else: 171 | value = None 172 | if value == 'NULL': #convert CSV/Postgresql null values to python null 173 | value = None 174 | 175 | setattr(c, column_name, value) 176 | c.save() 177 | 178 | 179 | def update_grant_row(self, line): 180 | 181 | #To Do: add logging for transactions that fail 182 | c = None 183 | 184 | status = line[1] 185 | #print "processing {0}".format(line[0]) 186 | 187 | if status.strip().lower() == 'inactive': 188 | #means that this update deletes a record 189 | try: 190 | c = Grant.objects.get(unique_transaction_id=line[0], fiscal_year=line[46]) 191 | print "Deleting {0}".format(line[0]) 192 | c.delete() 193 | except Grant.DoesNotExist as e: 194 | pass 195 | except Grant.MultipleObjectsReturned as e: 196 | for c in Grant.objects.filter(unique_transaction_id=line[0], fiscal_year=line[46]): 197 | c.delete() 198 | return 199 | else: 200 | if not self.check_fiscal_year(line, 46): 201 | self.contracts_failed.append(line) 202 | return 203 | 204 | try: 205 | c = Grant.objects.get(unique_transaction_id=line[0], fiscal_year=line[46]) 206 | 207 | except Grant.DoesNotExist as e: 208 | c = Grant(unique_transaction_id=line[0], fiscal_year=line[46]) 209 | 210 | except Grant.MultipleObjectsReturned as f: 211 | print f 212 | cset = Grant.objects.filter(unique_transaction_id=line[0], fiscal_year=line[46]).order_by('-id') 213 | # delete extra objects 214 | for i, obj in enumerate(cset): 215 | print obj 216 | if i == 0: 217 | c = obj 218 | else: 219 | obj.delete() 220 | 221 | for (i, (column_name, value)) in enumerate(izip(self.ALL_GRANT_FIELDS, line)): 222 | if i in [21, 22, 23, 55]: 223 | if self.notnull(value): 224 | #parse date fields into python date objects 225 | try: 226 | value = parse(value).date() 227 | except OverflowError as e: 228 | value = None 229 | else: 230 | value = None 231 | if value == 'NULL': #convert CSV/Postgresql null values to python null 232 | value = None 233 | 234 | setattr(c, column_name, value) 235 | c.save() 236 | 237 | # clear out dailies dir 238 | # for each FY, download the csv 239 | # convert each csv to normal format 240 | # open converted csvs and import each row 241 | 242 | -------------------------------------------------------------------------------- /federal_spending/usaspending/management/commands/import_updates.py: -------------------------------------------------------------------------------- 1 | from federal_spending.usaspending.models import Contract, Grant 2 | from federal_spending.usaspending.scripts.usaspending.contracts_loader import Loader 3 | from federal_spending.usaspending.scripts.usaspending.fpds import FIELDS as CONTRACT_FIELDS, CALCULATED_FIELDS as CONTRACT_CALCULATED_FIELDS 4 | from federal_spending.usaspending.scripts.usaspending.faads import FIELDS as GRANT_FIELDS, CALCULATED_FIELDS as GRANT_CALCULATED_FIELDS 5 | from django.core.management.base import BaseCommand 6 | from django.core import management 7 | from django.conf import settings 8 | from django.db import connections, connection, transaction 9 | from django.db.models import sql 10 | from itertools import izip 11 | from dateutil.parser import parse 12 | import os 13 | import csv 14 | import datetime 15 | import time 16 | from federal_spending.usaspending.management.commands.create_indexes import contracts_idx, grants_idx 17 | from federal_spending.usaspending.scripts.usaspending.config import INDEX_COLS_BY_TABLE 18 | 19 | def notnull(val): 20 | if val and val != '' and 'null' not in val.strip().lower(): 21 | return True 22 | return False 23 | 24 | class Command(BaseCommand): 25 | 26 | ALL_CONTRACT_FIELDS = [ x[0] for x in CONTRACT_FIELDS ] + [ x[0] for x in CONTRACT_CALCULATED_FIELDS ] 27 | ALL_GRANT_FIELDS = [ x[0] for x in GRANT_FIELDS ] + [ x[0] for x in GRANT_CALCULATED_FIELDS ] 28 | 29 | contracts_failed = [] 30 | grants_failed = [] 31 | 32 | contracts_idx_drop = contracts_idx[:10] 33 | contracts_idx_add = contracts_idx[12:22] 34 | grants_idx_drop = grants_idx[:3] 35 | grants_idx_add = grants_idx[5:8] 36 | 37 | @transaction.commit_manually 38 | def handle(self, download_file='delta_downloads.txt', **options): 39 | 40 | OUTPATH = settings.CSV_PATH + 'out/' 41 | 42 | a="""confirm = raw_input("Clearing out the csvs in the out folder, continue? y/n") 43 | if confirm != 'y': 44 | return 45 | 46 | #remove any csvs so we don't reprocess everything 47 | for f in os.listdir(OUTPATH): 48 | os.remove(OUTPATH + f) 49 | 50 | print "Downloading links in {0}".format(download_file) 51 | management.call_command('download_files', settings.PROJECT_ROOT + '/usaspending/downloads/' + download_file) 52 | 53 | print "sleeping for a minute" 54 | time.sleep(60) 55 | 56 | print "processing downloaded files into proper format" 57 | for fname in os.listdir(settings.CSV_PATH + 'datafeeds/'): 58 | if 'Delta' in fname and 'Contracts' in fname: 59 | management.call_command('convert_usaspending_contracts') 60 | 61 | elif 'Delta' in fname and ('Grants' in fname or 'Loans' in fname or 'Insurance' in fname or 'Direct_Payments' in fname): 62 | management.call_command('convert_usaspending_grants') 63 | 64 | 65 | print "Processing transaction updates in database" 66 | #print "Current number of rows in contract table: {0}".format(Contract.objects.all().count()) 67 | #print "Current number of rows in grant table: {0}".format(Grant.objects.all().count()) 68 | """ 69 | c = connections['default'].cursor() 70 | 71 | print 'deleting unecessary indexes' 72 | for x in self.contracts_idx_drop: 73 | print x 74 | c.execute(x) 75 | 76 | for x in self.grants_idx_drop: 77 | print x 78 | c.execute(x) 79 | 80 | for tab in ['usaspending_grant', 'usaspending_contract']: 81 | for fy in settings.FISCAL_YEARS: 82 | for i, colname in enumerate(INDEX_COLS_BY_TABLE[tab]): 83 | if 'fiscal_year' not in colname and 'unique_transaction_id' not in colname: 84 | del_stmt = 'drop index if exists {0}_{1}_{2}; commit;'.format(tab, fy, i) 85 | print del_stmt 86 | c.execute(del_stmt) 87 | 88 | for sname in os.listdir(OUTPATH): 89 | line_total = 0 90 | if 'contracts' in sname: 91 | print "processing file {0}".format(sname) 92 | reader = csv.reader(open(OUTPATH + sname), delimiter='|') 93 | for line in reader: 94 | self.update_contract_row(line) 95 | if line_total % 1000 == 0: print "... on line {0}".format(line_total) 96 | line_total += 1 97 | 98 | line_total = 0 99 | if 'grants' in sname: 100 | print "processing file {0}".format(sname) 101 | reader = csv.reader(open(OUTPATH + sname), delimiter='|') 102 | for line in reader: 103 | self.update_grant_row(line) 104 | if line_total % 1000 == 0: 105 | print "... on line {0}".format(line_total) 106 | transaction.commit() 107 | line_total += 1 108 | 109 | print 'recreating unecessary indexes' 110 | for x in self.contracts_idx_add: 111 | print x 112 | c.execute(x) 113 | 114 | for x in self.grants_idx_add: 115 | print x 116 | c.execute(x) 117 | 118 | #print "New number of rows in contract table: {0}".format(Contract.objects.all().count()) 119 | #print "New number of rows in grant table: {0}".format(Grant.objects.all().count()) 120 | 121 | self.write_log() 122 | 123 | 124 | def check_fiscal_year(self, line, num): 125 | if len(line) >= (num): 126 | fy = line[num] 127 | if fy and fy != '' and len(fy) == 4: 128 | return True 129 | else: 130 | print "it failed! {0}".format(line[0]) 131 | return False 132 | else: 133 | print "length failed {0} it's only {1}".format(line[0], len(line)) 134 | return False 135 | 136 | def update_contract_row(self, line): 137 | c = None 138 | status = line[1] 139 | if status.strip().lower() == 'inactive': 140 | #means that this update deletes a record 141 | try: 142 | c = Contract.objects.get(unique_transaction_id=line[0], fiscal_year=line[97]) 143 | print "Deleting {0}".format(line[0]) 144 | c.delete() 145 | except Contract.DoesNotExist as e: 146 | pass 147 | return 148 | else: 149 | if not self.check_fiscal_year(line, 97): 150 | self.contracts_failed.append(line) 151 | return 152 | try: 153 | c = Contract.objects.get(unique_transaction_id=line[0], fiscal_year=line[97]) 154 | except Contract.DoesNotExist as e: 155 | c = Contract(unique_transaction_id=line[0], fiscal_year=line[97]) 156 | except Contract.MultipleObjectsReturned as e: 157 | # delete extra objects 158 | cset = Contract.objects.filter(unique_transaction_id=line[0], fiscal_year=line[97]).order_by('-id') 159 | for i, obj in enumerate(cset): 160 | if i == 0: 161 | c = obj 162 | else: 163 | obj.delete() 164 | 165 | for (i, (column_name, value)) in enumerate(izip(self.ALL_CONTRACT_FIELDS, line)): 166 | if i in [13,14,15,16, 68, 69, 158]: 167 | if notnull(value): 168 | #parse date fields into python date objects 169 | try: 170 | value = parse(value) 171 | except OverflowError as e: 172 | value = None 173 | else: 174 | value = None 175 | if value == 'NULL': #convert CSV/Postgresql null values to python null 176 | value = None 177 | 178 | setattr(c, column_name, value) 179 | c.save() 180 | 181 | def update_grant_row(self, line): 182 | 183 | #To Do: add logging for transactions that fail 184 | c = None 185 | 186 | status = line[1] 187 | #print "processing {0}".format(line[0]) 188 | 189 | if status.strip().lower() == 'inactive': 190 | #means that this update deletes a record 191 | try: 192 | c = Grant.objects.get(unique_transaction_id=line[0], fiscal_year=line[46]) 193 | print "Deleting {0}".format(line[0]) 194 | c.delete() 195 | except Grant.DoesNotExist as e: 196 | pass 197 | return 198 | else: 199 | if not self.check_fiscal_year(line, 46): 200 | self.contracts_failed.append(line) 201 | return 202 | 203 | try: 204 | c = Grant.objects.get(unique_transaction_id=line[0], fiscal_year=line[46]) 205 | 206 | except Grant.DoesNotExist as e: 207 | c = Grant(unique_transaction_id=line[0], fiscal_year=line[46]) 208 | 209 | except Grant.MultipleObjectsReturned as f: 210 | print f 211 | cset = Grant.objects.filter(unique_transaction_id=line[0], fiscal_year=line[46]).order_by('-id') 212 | # delete extra objects 213 | for i, obj in enumerate(cset): 214 | print obj 215 | if i == 0: 216 | c = obj 217 | else: 218 | obj.delete() 219 | #print connection.queries[-1] 220 | 221 | for (i, (column_name, value)) in enumerate(izip(self.ALL_GRANT_FIELDS, line)): 222 | if i in [21, 22, 23, 55]: 223 | if notnull(value): 224 | #parse date fields into python date objects 225 | try: 226 | value = parse(value).date() 227 | except OverflowError as e: 228 | value = None 229 | else: 230 | value = None 231 | if value == 'NULL': #convert CSV/Postgresql null values to python null 232 | value = None 233 | 234 | setattr(c, column_name, value) 235 | c.save() 236 | #print connection.queries[-1] 237 | 238 | def write_log(self): 239 | today = datetime.datetime.now() 240 | print "Writing Log" 241 | writer = csv.writer(open(settings.LOGGING_DIRECTORY + '/failed_contracts_{0}.csv'.format(today.strftime('%Y%m%d')), 'w+')) 242 | for line in self.contracts_failed: 243 | writer.writerow(line) 244 | 245 | gwriter = csv.writer(open(settings.LOGGING_DIRECTORY + '/failed_grants_{0}.csv'.format(today.strftime('%Y%m%d')), 'w+')) 246 | for line in self.grants_failed: 247 | gwriter.writerow(line) 248 | 249 | -------------------------------------------------------------------------------- /federal_spending/usaspending/models.py: -------------------------------------------------------------------------------- 1 | from localflavor.us.models import USStateField 2 | from django.db import models 3 | import federal_spending.usaspending 4 | from django.db import connection 5 | 6 | class ContractManager(models.Manager): 7 | def get_table_for(self, fiscal_year): 8 | table = 'usaspending_contract_{0}'.format(fiscal_year) 9 | return table 10 | 11 | def in_fiscal_year(self, fiscal_year): 12 | self.fiscal_year = fiscal_year 13 | self.model._meta.db_table = self.get_table_for(fiscal_year) 14 | return self 15 | 16 | class Contract(models.Model): 17 | 18 | objects = ContractManager() 19 | 20 | unique_transaction_id = models.CharField(max_length=32) 21 | transaction_status = models.CharField(max_length=32, blank=True) 22 | obligatedamount = models.DecimalField(default=0, max_digits=20, decimal_places=2, blank=True, null=True) 23 | baseandexercisedoptionsvalue = models.DecimalField(default=0, max_digits=20, decimal_places=2, blank=True, null=True) 24 | baseandalloptionsvalue = models.DecimalField(default=0, max_digits=20, decimal_places=2, blank=True, null=True) 25 | maj_agency_cat = models.CharField(max_length=2, blank=True) 26 | mod_agency = models.CharField(max_length=4, blank=True) 27 | maj_fund_agency_cat = models.CharField(max_length=2, blank=True) 28 | contractingofficeagencyid = models.CharField(max_length=4, blank=True) 29 | contractingofficeid = models.CharField(max_length=6, blank=True) 30 | fundingrequestingagencyid = models.CharField(max_length=4, blank=True) 31 | fundingrequestingofficeid = models.CharField(max_length=6, blank=True) 32 | fundedbyforeignentity = models.CharField(max_length=21, blank=True) 33 | signeddate = models.DateField(blank=True, null=True) 34 | effectivedate = models.DateField(blank=True, null=True) 35 | currentcompletiondate = models.DateField(blank=True, null=True) 36 | ultimatecompletiondate = models.DateField(blank=True, null=True) 37 | lastdatetoorder = models.CharField(max_length=32, blank=True) 38 | contractactiontype = models.CharField(max_length=4, blank=True) 39 | reasonformodification = models.CharField(max_length=1, blank=True) 40 | typeofcontractpricing = models.CharField(max_length=2, blank=True) 41 | priceevaluationpercentdifference = models.CharField(max_length=100, blank=True) 42 | subcontractplan = models.CharField(max_length=1, blank=True) 43 | lettercontract = models.CharField(max_length=1, blank=True) 44 | multiyearcontract = models.NullBooleanField() 45 | performancebasedservicecontract = models.CharField(max_length=1, blank=True) 46 | majorprogramcode = models.CharField(max_length=100, blank=True) 47 | contingencyhumanitarianpeacekeepingoperation = models.CharField(max_length=1, blank=True) 48 | contractfinancing = models.CharField(max_length=1, blank=True) 49 | costorpricingdata = models.CharField(max_length=1, blank=True) 50 | costaccountingstandardsclause = models.CharField(max_length=1, blank=True) 51 | descriptionofcontractrequirement = models.TextField(blank=True, null=True) 52 | purchasecardaspaymentmethod = models.NullBooleanField() 53 | numberofactions = models.IntegerField(null=True) 54 | nationalinterestactioncode = models.CharField(max_length=64, blank=True) 55 | progsourceagency = models.CharField(max_length=2, blank=True) 56 | progsourceaccount = models.CharField(max_length=4, blank=True) 57 | progsourcesubacct = models.CharField(max_length=3, blank=True) 58 | account_title = models.CharField(max_length=255, blank=True) 59 | rec_flag = models.NullBooleanField() 60 | typeofidc = models.CharField(max_length=41, blank=True) 61 | multipleorsingleawardidc = models.CharField(max_length=1, blank=True) 62 | programacronym = models.CharField(max_length=32, blank=True) 63 | vendorname = models.CharField(max_length=400, blank=True) 64 | vendoralternatename = models.CharField(max_length=400, blank=True) 65 | vendorlegalorganizationname = models.CharField(max_length=400, blank=True) 66 | vendordoingasbusinessname = models.CharField(max_length=400, blank=True) 67 | divisionname = models.CharField(max_length=400, blank=True) 68 | divisionnumberorofficecode = models.CharField(max_length=10, blank=True) 69 | vendorenabled = models.CharField(max_length=10, blank=True) 70 | vendorlocationdisableflag = models.NullBooleanField() 71 | ccrexception = models.CharField(max_length=255, blank=True) 72 | streetaddress = models.CharField(max_length=400, blank=True) 73 | streetaddress2 = models.CharField(max_length=400, blank=True) 74 | streetaddress3 = models.CharField(max_length=400, blank=True) 75 | city = models.CharField(max_length=35, blank=True) 76 | state = models.CharField(max_length=35, blank=True) 77 | zipcode = models.CharField(max_length=20, blank=True) 78 | vendorcountrycode = models.CharField(max_length=100, blank=True) 79 | vendor_state_code = models.CharField(max_length=32, blank=True) 80 | vendor_cd = models.CharField(max_length=37, blank=True) 81 | congressionaldistrict = models.CharField(max_length=37, blank=True) 82 | vendorsitecode = models.CharField(max_length=16, blank=True) 83 | vendoralternatesitecode = models.CharField(max_length=20, blank=True) 84 | dunsnumber = models.CharField(max_length=13, blank=True) 85 | parentdunsnumber = models.CharField(max_length=13, blank=True) 86 | phoneno = models.CharField(max_length=20, blank=True) 87 | faxno = models.CharField(max_length=20, blank=True) 88 | registrationdate = models.DateField(blank=True, null=True) 89 | renewaldate = models.DateField(blank=True, null=True) 90 | mod_parent = models.CharField(max_length=100, blank=True) 91 | locationcode = models.CharField(max_length=5, blank=True) 92 | statecode = USStateField(blank=True) 93 | pop_state_code = USStateField(blank=True) 94 | placeofperformancecountrycode = models.CharField(max_length=3, blank=True) 95 | placeofperformancezipcode = models.CharField(max_length=10, blank=True) 96 | pop_cd = models.CharField(max_length=4, blank=True) 97 | placeofperformancecongressionaldistrict = models.CharField(max_length=6, blank=True) 98 | psc_cat = models.CharField(max_length=2, blank=True) 99 | productorservicecode = models.CharField(max_length=4, blank=True) 100 | systemequipmentcode = models.CharField(max_length=4, blank=True) 101 | claimantprogramcode = models.CharField(max_length=3, blank=True) 102 | principalnaicscode = models.CharField(max_length=6, blank=True) 103 | informationtechnologycommercialitemcategory = models.CharField(max_length=1, blank=True) 104 | gfe_gfp = models.NullBooleanField() 105 | useofepadesignatedproducts = models.CharField(max_length=1, blank=True) 106 | recoveredmaterialclauses = models.CharField(max_length=1, blank=True) 107 | seatransportation = models.CharField(max_length=1, blank=True) 108 | contractbundling = models.CharField(max_length=1, blank=True) 109 | consolidatedcontract = models.NullBooleanField() 110 | countryoforigin = models.CharField(max_length=3, blank=True) 111 | placeofmanufacture = models.CharField(max_length=1, blank=True) 112 | manufacturingorganizationtype = models.CharField(max_length=4, blank=True) 113 | agencyid = models.CharField(max_length=4, blank=True) 114 | piid = models.CharField(max_length=50, blank=True) 115 | modnumber = models.CharField(max_length=25, blank=True) 116 | transactionnumber = models.CharField(max_length=6, blank=True) 117 | fiscal_year = models.IntegerField(null=True) 118 | idvagencyid = models.CharField(max_length=4, blank=True) 119 | idvpiid = models.CharField(max_length=50, blank=True) 120 | idvmodificationnumber = models.CharField(max_length=25, blank=True) 121 | solicitationid = models.CharField(max_length=25, blank=True) 122 | extentcompeted = models.CharField(max_length=3, blank=True) 123 | reasonnotcompeted = models.CharField(max_length=3, blank=True) 124 | numberofoffersreceived = models.IntegerField(null=True) 125 | commercialitemacquisitionprocedures = models.CharField(max_length=1, blank=True) 126 | commercialitemtestprogram = models.NullBooleanField() 127 | smallbusinesscompetitivenessdemonstrationprogram = models.NullBooleanField() 128 | a76action = models.NullBooleanField() 129 | competitiveprocedures = models.CharField(max_length=3, blank=True) 130 | solicitationprocedures = models.CharField(max_length=5, blank=True) 131 | typeofsetaside = models.CharField(max_length=10, blank=True) 132 | localareasetaside = models.CharField(max_length=32, blank=True) 133 | evaluatedpreference = models.CharField(max_length=6, blank=True) 134 | fedbizopps = models.CharField(max_length=32, blank=True) 135 | research = models.CharField(max_length=3, blank=True) 136 | statutoryexceptiontofairopportunity = models.CharField(max_length=4, blank=True) 137 | organizationaltype = models.CharField(max_length=64, blank=True) 138 | numberofemployees = models.IntegerField(null=True) 139 | annualrevenue = models.DecimalField(max_digits=20, decimal_places=2, blank=True, null=True) 140 | firm8aflag = models.NullBooleanField() 141 | hubzoneflag = models.NullBooleanField() 142 | sdbflag = models.NullBooleanField() 143 | shelteredworkshopflag = models.NullBooleanField() 144 | hbcuflag = models.NullBooleanField() 145 | educationalinstitutionflag = models.NullBooleanField() 146 | womenownedflag = models.NullBooleanField() 147 | veteranownedflag = models.NullBooleanField() 148 | srdvobflag = models.NullBooleanField() 149 | localgovernmentflag = models.NullBooleanField() 150 | minorityinstitutionflag = models.NullBooleanField() 151 | aiobflag = models.CharField(max_length=1, blank=True) 152 | stategovernmentflag = models.NullBooleanField() 153 | federalgovernmentflag = models.NullBooleanField() 154 | minorityownedbusinessflag = models.NullBooleanField() 155 | apaobflag = models.NullBooleanField() 156 | tribalgovernmentflag = models.NullBooleanField() 157 | baobflag = models.NullBooleanField() 158 | naobflag = models.NullBooleanField() 159 | saaobflag = models.NullBooleanField() 160 | nonprofitorganizationflag = models.NullBooleanField() 161 | haobflag = models.NullBooleanField() 162 | emergingsmallbusinessflag = models.NullBooleanField() 163 | hospitalflag = models.NullBooleanField() 164 | contractingofficerbusinesssizedetermination = models.CharField(max_length=1, blank=True) 165 | receivescontracts = models.CharField(max_length=1, blank=True) 166 | receivesgrants = models.CharField(max_length=1, blank=True) 167 | receivescontractsandgrants = models.CharField(max_length=1, blank=True) 168 | walshhealyact = models.NullBooleanField() 169 | servicecontractact = models.NullBooleanField() 170 | davisbaconact = models.NullBooleanField() 171 | clingercohenact = models.NullBooleanField() 172 | otherstatutoryauthority = models.TextField(blank=True, null=True) 173 | interagencycontractingauthority = models.CharField(max_length=1, blank=True) 174 | isserviceprovider = models.NullBooleanField() 175 | 176 | agency_name = models.CharField(max_length=255, blank=True) 177 | contracting_agency_name = models.CharField(max_length=255, blank=True) 178 | requesting_agency_name = models.CharField(max_length=255, blank=True) 179 | imported_on = models.DateField(auto_now_add=True) 180 | 181 | def save(self, *args, **kwargs): 182 | Contract.objects.in_fiscal_year(self.fiscal_year) 183 | super(Contract, self).save() 184 | 185 | def delete(self, *args, **kwargs): 186 | Contract.objects.in_fiscal_year(self.fiscal_year) 187 | super(Contract, self).delete() 188 | 189 | RECORD_TYPES = ( 190 | ('1', "County aggregate reporting"), 191 | ('2', "Action-by-action reporting"), 192 | ) 193 | 194 | ACTION_TYPES = ( 195 | ('A', 'New assistance action'), 196 | ('B', 'Continuation'), 197 | ('C', 'Revision'), 198 | ('D', 'Funding adjustment to completed project'), 199 | ) 200 | 201 | RECIPIENT_TYPES = ( 202 | ('00', 'State government'), 203 | ('01', 'County government'), 204 | ('02', 'City or township government'), 205 | ('03', '03'), 206 | ('04', 'Special district government'), 207 | ('05', 'Independent school district'), 208 | ('06', 'State controlled institution of higher education'), 209 | ('07', '07'), 210 | ('11', 'Indian tribe'), 211 | ('12', 'Other nonprofit'), 212 | ('20', 'Private higher education'), 213 | ('21', 'individual'), 214 | ('22', 'Profit organization'), 215 | ('23', 'Small business'), 216 | ('25', 'Other'), 217 | ('88', '88'), 218 | ('90', '90'), 219 | ) 220 | 221 | RECIPIENT_CATEGORIES = ( 222 | ('f', 'For Profit'), 223 | ('g', 'Government'), 224 | ('h', 'Higher Education'), 225 | ('i', 'Individual'), 226 | ('n', 'Nonprofit'), 227 | ('o', 'Other'), 228 | ) 229 | 230 | ASSISTANCE_TYPES = ( 231 | ('00', '00'), 232 | ('02', 'Block grant (A)'), 233 | ('03', 'Formula grant (A)'), 234 | ('04', 'Project grant (B)'), 235 | ('05', 'Cooperative agreement (B)'), 236 | ('06', 'Direct payment for specified use, as a subsidy or other non-reimbursable direct financial aid (C)'), 237 | ('07', 'Direct loan (D)'), 238 | ('08', 'Guaranteed/insured loan (F)'), 239 | ('09', 'Insurance (G)'), 240 | ('0E', '0E'), 241 | ('10', 'Direct payment with unrestricted use (D)'), 242 | ('11', 'Other reimbursable, contingent, intangible or indirect financial assistance'), 243 | ('25', '25'), 244 | ('99', '99'), 245 | ) 246 | 247 | ASSISTANCE_CATEGORIES = ( 248 | ('d', 'Direct Payments'), 249 | ('g', 'Grants and Cooperative Agreements'), 250 | ('i', 'Insurance'), 251 | ('l', 'Loans'), 252 | ('o', 'Other'), 253 | ) 254 | 255 | CORRECTIONS = ( 256 | ('0', ''), 257 | ('2', ''), 258 | ('5', ''), 259 | ('6', ''), 260 | ('B', ''), 261 | ('C', ''), 262 | ('F', ''), 263 | ('L', ''), 264 | ('_', ''), 265 | ) 266 | 267 | BFIS = ( 268 | ('000', ''), 269 | ('0NO', ''), 270 | ('NON', ''), 271 | ('REC', ''), 272 | ) 273 | 274 | AGENCY_CATEGORIES = ( 275 | ('12', ''), 276 | ('13', ''), 277 | ('14', ''), 278 | ('15', ''), 279 | ('16', ''), 280 | ('19', ''), 281 | ('20', ''), 282 | ('24', ''), 283 | ('28', ''), 284 | ('31', ''), 285 | ('36', ''), 286 | ('49', ''), 287 | ('68', ''), 288 | ('69', ''), 289 | ('70', ''), 290 | ('72', ''), 291 | ('73', ''), 292 | ('75', ''), 293 | ('80', ''), 294 | ('86', ''), 295 | ('89', ''), 296 | ('91', ''), 297 | ('97', ''), 298 | ('ot', ''), 299 | ) 300 | 301 | class GrantManager(models.Manager): 302 | def get_table_for(self, fiscal_year): 303 | table = 'usaspending_grant_{0}'.format(fiscal_year) 304 | return table 305 | 306 | def in_fiscal_year(self, fiscal_year): 307 | self.fiscal_year = fiscal_year 308 | self.model._meta.db_table = self.get_table_for(fiscal_year) 309 | return self 310 | 311 | 312 | class Grant(models.Model): 313 | 314 | objects = GrantManager() 315 | 316 | imported_on = models.DateField(auto_now_add=True) 317 | fiscal_year = models.IntegerField() 318 | record_type = models.CharField(max_length=1, blank=True, choices=RECORD_TYPES) 319 | rec_flag = models.NullBooleanField(blank=True) 320 | cfda_program_num = models.CharField(max_length=8, blank=True) 321 | cfda_program_title = models.CharField(max_length=255, blank=True) 322 | sai_number = models.CharField(max_length=20, blank=True) 323 | account_title = models.CharField(max_length=100, blank=True) 324 | recipient_name = models.CharField(max_length=100, blank=True) 325 | recipient_city_name = models.CharField(max_length=21, blank=True) 326 | recipient_city_code = models.CharField(max_length=5, blank=True) 327 | recipient_county_name = models.CharField(max_length=21, blank=True) 328 | recipient_county_code = models.CharField(max_length=3, blank=True) 329 | recipient_state_code = USStateField(blank=True) 330 | recipient_zip = models.CharField(max_length=9, blank=True) 331 | recipient_country_code = models.CharField(max_length=3, blank=True) 332 | recipient_cd = models.CharField(max_length=4, blank=True) 333 | recipient_type = models.CharField(max_length=2, blank=True, choices=RECIPIENT_TYPES) 334 | recip_cat_type = models.CharField(max_length=1, blank=True, choices=RECIPIENT_CATEGORIES) 335 | receip_addr1 = models.CharField(max_length=100, blank=True) 336 | receip_addr2 = models.CharField(max_length=100, blank=True) 337 | receip_addr3 = models.CharField(max_length=100, blank=True) 338 | duns_no = models.CharField(max_length=13, blank=True) 339 | obligation_action_date = models.DateField(blank=True, null=True) 340 | action_type = models.CharField(max_length=1, blank=True, choices=ACTION_TYPES) 341 | agency_name = models.CharField(max_length=72, blank=True) 342 | agency_code = models.CharField(max_length=4, blank=True) 343 | maj_agency_cat = models.CharField(max_length=2, blank=True) 344 | federal_award_id = models.CharField(max_length=16, blank=True) 345 | federal_award_mod = models.CharField(max_length=4, blank=True) 346 | fed_funding_amount = models.BigIntegerField(blank=True, default=0) 347 | non_fed_funding_amount = models.BigIntegerField(blank=True, default=0) 348 | total_funding_amount = models.BigIntegerField(blank=True, default=0) 349 | face_loan_guran = models.BigIntegerField(blank=True, default=0) 350 | orig_sub_guran = models.BigIntegerField(blank=True, default=0) 351 | assistance_type = models.CharField(max_length=2, blank=True, choices=ASSISTANCE_TYPES) 352 | asst_cat_type = models.CharField(max_length=1, blank=True, choices=ASSISTANCE_CATEGORIES) 353 | correction_late_ind = models.CharField(max_length=1, blank=True, choices=CORRECTIONS) 354 | principal_place_code = models.CharField(max_length=7, blank=True) 355 | principal_place_state = models.CharField(max_length=64, blank=True) 356 | principal_place_state_code = USStateField(blank=True) 357 | principal_place_cc = models.CharField(max_length=25, blank=True) 358 | principal_place_zip = models.CharField(max_length=9, blank=True) 359 | principal_place_cd = models.CharField(max_length=4, blank=True) 360 | project_description = models.CharField(max_length=255, blank=True) 361 | progsrc_agen_code = models.CharField(max_length=2, blank=True) 362 | progsrc_acnt_code = models.CharField(max_length=4, blank=True) 363 | progsrc_subacnt_code = models.CharField(max_length=3, blank=True) 364 | uri = models.CharField(max_length=70, blank=True) 365 | duns_conf_code = models.CharField(max_length=2, blank=True) 366 | ending_date = models.DateField(blank=True, null=True) 367 | fyq = models.CharField(max_length=10, blank=True) 368 | fyq_correction = models.CharField(max_length=5, blank=True) 369 | starting_date = models.DateField(blank=True, null=True) 370 | transaction_status = models.CharField(max_length=32, blank=True) 371 | unique_transaction_id = models.CharField(max_length=32) 372 | 373 | 374 | def save(self, *args, **kwargs): 375 | Grant.objects.in_fiscal_year(self.fiscal_year) 376 | super(Grant, self).save() 377 | 378 | def delete(self, *args, **kwargs): 379 | Grant.objects.in_fiscal_year(self.fiscal_year) 380 | super(Grant, self).delete() 381 | 382 | class Meta: 383 | ordering = ('fiscal_year','id') 384 | 385 | def __unicode__(self): 386 | return u"%s %s" % (self.fiscal_year, self.project_description) 387 | --------------------------------------------------------------------------------