├── spendb ├── etl │ ├── __init__.py │ ├── manager.py │ ├── tasks.py │ ├── job.py │ ├── upload.py │ └── extract.py ├── lib │ ├── __init__.py │ ├── helpers.py │ └── mailer.py ├── tests │ ├── __init__.py │ ├── etl │ │ ├── __init__.py │ │ ├── test_manager.py │ │ ├── test_queue_tasks.py │ │ ├── test_import_fixtures.py │ │ └── test_load.py │ ├── fixtures │ │ ├── empty.csv │ │ ├── simple.csv │ │ ├── csv_import │ │ │ ├── default │ │ │ │ ├── meta.json │ │ │ │ └── model.json │ │ │ ├── simple │ │ │ │ ├── data.csv │ │ │ │ └── model.json │ │ │ ├── quoting │ │ │ │ ├── data.csv │ │ │ │ └── model.json │ │ │ ├── empty_additional_date │ │ │ │ ├── data.csv │ │ │ │ └── model.json │ │ │ ├── malformed │ │ │ │ └── data.csv │ │ │ ├── import_errors │ │ │ │ └── data.csv │ │ │ ├── successful_import │ │ │ │ └── data.csv │ │ │ ├── erroneous_values │ │ │ │ └── data.csv │ │ │ ├── lbhf │ │ │ │ ├── data.csv │ │ │ │ └── model.json │ │ │ ├── mexico │ │ │ │ └── data.csv │ │ │ └── sample │ │ │ │ ├── model.json │ │ │ │ └── data.csv │ │ ├── data │ │ │ └── simple.csv │ │ ├── demoloader.csv │ │ ├── validation │ │ │ ├── 2011_11_20_name_attribute.json │ │ │ ├── 2011_11_21_normalize.json │ │ │ ├── 2011_11_22_unique_keys.json │ │ │ ├── 2011_12_07_attribute_dicts.json │ │ │ └── default.json │ │ └── meta │ │ │ └── simple.json │ ├── model │ │ ├── __init__.py │ │ └── test_dataset.py │ ├── views │ │ ├── __init__.py │ │ ├── api │ │ │ ├── __init__.py │ │ │ ├── test_meta.py │ │ │ ├── test_slicer.py │ │ │ ├── test_run.py │ │ │ ├── test_session.py │ │ │ └── test_source.py │ │ ├── test_error.py │ │ ├── test_slicer.py │ │ └── test_home.py │ ├── validation │ │ ├── __init__.py │ │ ├── test_dataset.py │ │ └── test_model.py │ ├── base.py │ └── helpers.py ├── views │ ├── api │ │ ├── __init__.py │ │ ├── run.py │ │ ├── meta.py │ │ ├── session.py │ │ ├── source.py │ │ └── dataset.py │ ├── error.py │ ├── __init__.py │ ├── context.py │ └── home.py ├── validation │ ├── __init__.py │ ├── model.py │ ├── account.py │ ├── dataset.py │ └── common.py ├── migrate │ ├── alembic.ini │ ├── script.py.mako │ ├── env.py │ └── versions │ │ └── b5ed9296ff9_initial.py ├── wsgi.py ├── static │ └── img │ │ └── favicon.ico ├── translations │ ├── ar │ │ └── LC_MESSAGES │ │ │ └── messages.mo │ ├── de │ │ └── LC_MESSAGES │ │ │ └── messages.mo │ └── is_IS │ │ └── LC_MESSAGES │ │ └── messages.mo ├── model │ ├── __init__.py │ ├── common.py │ ├── manager.py │ ├── facets.py │ ├── run.py │ ├── fact_table.py │ ├── account.py │ ├── provider.py │ └── dataset.py ├── auth │ ├── account.py │ ├── dataset.py │ └── __init__.py ├── __init__.py ├── command │ ├── db.py │ ├── importer.py │ └── __init__.py ├── tasks.py ├── default_settings.py ├── core.py └── templates │ └── layout.html ├── .bowerrc ├── contrib ├── assets │ ├── pattern_bg.psd │ ├── noun_105482_cc.png │ ├── noun_149515_cc.png │ ├── noun_15332_cc.png │ ├── noun_155670_cc.png │ ├── noun_15772_cc.png │ ├── noun_65370_cc.png │ ├── noun_67972_cc.png │ ├── noun_84870_cc.png │ ├── noun_97430_cc.png │ ├── noun_97900_cc.png │ ├── noun_161002_cc.svg │ └── noun_29578_cc.svg ├── os_export │ ├── analyze.py │ ├── archive_sources.py │ ├── model_migrate.py │ └── export.py └── spendb_importer.py ├── Procfile ├── babel.cfg ├── pages ├── index.html ├── contact.html ├── tos.html └── about.html ├── .tx └── config ├── bower.json ├── .gitignore ├── bin └── update_translations ├── production.env.tmpl ├── swarmvars.json ├── .travis.yml ├── requirements.txt ├── Dockerfile ├── docker-compose.yml ├── prod_settings.py ├── CONTRIBUTORS ├── README.md ├── setup.py └── swarm.json /spendb/etl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/tests/etl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/views/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/empty.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/tests/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/tests/views/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/validation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/tests/validation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spendb/tests/views/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.bowerrc: -------------------------------------------------------------------------------- 1 | { 2 | "directory": "spendb/static" 3 | } 4 | -------------------------------------------------------------------------------- /spendb/migrate/alembic.ini: -------------------------------------------------------------------------------- 1 | [alembic] 2 | script_location=. 3 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/simple.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 1,2,foo 3 | 3,4,bar -------------------------------------------------------------------------------- /spendb/wsgi.py: -------------------------------------------------------------------------------- 1 | from spendb.core import create_web_app 2 | 3 | app = create_web_app() 4 | -------------------------------------------------------------------------------- /contrib/assets/pattern_bg.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/pattern_bg.psd -------------------------------------------------------------------------------- /spendb/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/spendb/static/img/favicon.ico -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn -w 5 spendb.wsgi:app --log-file - 2 | worker: celery -A spendb.tasks worker -c 2 -l debug 3 | -------------------------------------------------------------------------------- /contrib/assets/noun_105482_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_105482_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_149515_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_149515_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_15332_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_15332_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_155670_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_155670_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_15772_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_15772_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_65370_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_65370_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_67972_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_67972_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_84870_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_84870_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_97430_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_97430_cc.png -------------------------------------------------------------------------------- /contrib/assets/noun_97900_cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_97900_cc.png -------------------------------------------------------------------------------- /spendb/translations/ar/LC_MESSAGES/messages.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/spendb/translations/ar/LC_MESSAGES/messages.mo -------------------------------------------------------------------------------- /spendb/translations/de/LC_MESSAGES/messages.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/spendb/translations/de/LC_MESSAGES/messages.mo -------------------------------------------------------------------------------- /spendb/translations/is_IS/LC_MESSAGES/messages.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openspending/spendb/HEAD/spendb/translations/is_IS/LC_MESSAGES/messages.mo -------------------------------------------------------------------------------- /babel.cfg: -------------------------------------------------------------------------------- 1 | [python: **.py] 2 | [jinja2: **/templates/**.html] 3 | extensions=jinja2.ext.autoescape,jinja2.ext.with_,webassets.ext.jinja2.AssetsExtension 4 | 5 | -------------------------------------------------------------------------------- /pages/index.html: -------------------------------------------------------------------------------- 1 | title: Mapping the money 2 | hidden: true 3 | 4 | 5 | A simple tool for opening up government finances. Anyone 6 | can understand how, where and why government spends our money. 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.tx/config: -------------------------------------------------------------------------------- 1 | [main] 2 | host = https://www.transifex.com 3 | 4 | [spendb.translations] 5 | file_filter = spendb/translations//LC_MESSAGES/messages.po 6 | source_file = spendb/translations/messages.pot 7 | source_lang = en 8 | 9 | -------------------------------------------------------------------------------- /spendb/model/__init__.py: -------------------------------------------------------------------------------- 1 | from spendb.model.account import Account # NOQA 2 | from spendb.model.dataset import Dataset # noqa 3 | from spendb.model.facets import DatasetLanguage, DatasetTerritory # noqa 4 | from spendb.model.run import Run # NOQA 5 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/default/meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset": { 3 | "currency": "EUR", 4 | "description": "Description for Test CSV Import", 5 | "label": "Label for Test CSV Import", 6 | "name": "test-csv" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/simple/data.csv: -------------------------------------------------------------------------------- 1 | id,paid_by,date,amount,paid_to 2 | 1,Test From,2010-01-01,100.00,Test To 3 | 2,Test From,2010-01-01,100.00,Test To 4 | 3,Test From,2010-01-01,100.00,Test To 5 | 4,Test From,2010-01-01,100.00,Test To 6 | 5,Test From,2010-01-01,100.00,Test To 7 | -------------------------------------------------------------------------------- /bower.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "spendb", 3 | "version": "0.19", 4 | "homepage": "https://github.com/spendb/spendb", 5 | "authors": [], 6 | "description": "", 7 | "license": "AGPL v3", 8 | "ignore": [ 9 | "**/.*" 10 | ], 11 | "dependencies": { 12 | "spendb.ui": "master" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/quoting/data.csv: -------------------------------------------------------------------------------- 1 | id,paid_by,date,amount,paid_to 2 | 1,Test From,2010-01-01,3.20E+07,"Test 3 | 4 | ""EAT"" 5 | 6 | To" 7 | 2,Test From,2010-01-01,100.00,Test To 8 | 3,Test From,2010-01-01,100.00,Test To 9 | 4,Test From,2010-01-01,100.00,Test To 10 | 5,Test From,2010-01-01,100.00,Test To 11 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/empty_additional_date/data.csv: -------------------------------------------------------------------------------- 1 | id,paid_by,date,amount,paid_to,additional_date 2 | 1,Test From,2010-01-01,100.00,Test To, 3 | 2,Test From,2010-01-01,100.00,Test To,2010-01-01 4 | 3,Test From,2010-01-01,100.00,Test To,2010-01-01 5 | 4,Test From,2010-01-01,100.00,Test To,2010-01-01 6 | 5,Test From,2010-01-01,100.00,Test To,2010-01-01 7 | -------------------------------------------------------------------------------- /spendb/tests/views/test_error.py: -------------------------------------------------------------------------------- 1 | from spendb.tests.base import ControllerTestCase 2 | 3 | 4 | class TestErrors(ControllerTestCase): 5 | 6 | def test_error_404(self): 7 | response = self.client.get('/akhkfhdjkhf/fgfdghfdh') 8 | assert response.status_code == 404, response 9 | 10 | def test_error_403(self): 11 | response = self.client.post('/api/3/datasets') 12 | assert response.status_code == 403, response 13 | -------------------------------------------------------------------------------- /spendb/migrate/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = ${repr(up_revision)} 11 | down_revision = ${repr(down_revision)} 12 | 13 | from alembic import op 14 | import sqlalchemy as sa 15 | ${imports if imports else ""} 16 | 17 | def upgrade(): 18 | ${upgrades if upgrades else "pass"} 19 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/data/simple.csv: -------------------------------------------------------------------------------- 1 | year,amount,field,to_name,to_label,func_name,func_label 2 | 2010,200,foo,"bcorp","Big Corp",food,Food & Nutrition 3 | 2009,190,bar,"bcorp","Big Corp",food,Food & Nutrition 4 | 2010,500,foo,"acorp","Another Corp",food,Food & Nutrition 5 | 2009,900,qux,"acorp","Another Corp",food,Food & Nutrition 6 | 2010,300,foo,"ccorp","Central Corp",school,Schools & Education 7 | 2009,600,qux,"ccorp","Central Corp",school,Schools & Education 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /pyenv 2 | /*.ini 3 | !/test_continuous.ini 4 | /*.egg-info 5 | *.pyc 6 | *.egg 7 | *.DS_Store 8 | /.noseids 9 | /linesman-enabled 10 | *.DS_Store 11 | /db 12 | .*.swp 13 | doc/_build/* 14 | spendb/static/spendb.ui 15 | spendb/static/spendb.ui/* 16 | contrib/os_export/exports/* 17 | *~ 18 | .tx/config 19 | build/* 20 | .vagrant 21 | .project 22 | .pydevproject 23 | settings.py 24 | .env 25 | tmp/* 26 | coverage/* 27 | .coverage 28 | node_modules/ 29 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/malformed/data.csv: -------------------------------------------------------------------------------- 1 | id,Tipologia,Settore,to_label,from_id,from_label,cofog,to_id,name,amount,description,Ordinato al 1/2/2010,date 2 | 1,Contratti,Varie,CEDAT 85 SRL,1,Camera dei Deputati,01.1.1,cedat_85_srl,"SERVIZIO DI SUPPORTO ALLE ATTIVITA' DI 3 | RESOCONTAZIONE STENOGRAF ICA","66,097.77",,0,2010 4 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0 -------------------------------------------------------------------------------- /spendb/auth/account.py: -------------------------------------------------------------------------------- 1 | from flask.ext.login import current_user 2 | 3 | 4 | def logged_in(): 5 | return current_user.is_authenticated() and current_user.is_active() 6 | 7 | 8 | def is_admin(): 9 | return logged_in() and current_user.admin 10 | 11 | 12 | def create(): 13 | return True 14 | 15 | 16 | def read(account): 17 | return True 18 | 19 | 20 | def update(account): 21 | return logged_in() 22 | 23 | 24 | def delete(account): 25 | return False 26 | -------------------------------------------------------------------------------- /bin/update_translations: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | LOCALES=(de ar is_IS) 6 | 7 | #pybabel init -i messages.pot -d spendb/translations -l de 8 | 9 | pybabel extract -F babel.cfg -k lazy_gettext -o spendb/translations/messages.pot spendb 10 | pybabel update -i spendb/translations/messages.pot -d spendb/translations 11 | 12 | for LOCALE in ${LOCALES[*]}; do 13 | pybabel compile -f -i spendb/translations/messages.pot -D messages -l $LOCALE -d spendb/translations; 14 | done; 15 | -------------------------------------------------------------------------------- /production.env.tmpl: -------------------------------------------------------------------------------- 1 | # Configuration options injected into docker-compose, see settings.py.tmpl 2 | SPENDB_SITE_TITLE=SpenDB 3 | # SPENDB_SECRET=foo 4 | SPENDB_DATABASE_URL=postgresql://spendb:spendb@db/spendb 5 | SPENDB_AMQP_URL=amqp://guest:guest@rabbitmq:5672// 6 | SPENDB_MAIL_SERVER=smtp.mandrillapp.com 7 | SPENDB_MAIL_PORT=587 8 | # SPENDB_MAIL_USERNAME= 9 | # SPENDB_MAIL_PASSWORD= 10 | # SPENDB_MAIL_DEFAULT_SENDER= 11 | # SPENDB_AWS_KEY_ID= 12 | # SPENDB_AWS_SECRET= 13 | # SPENDB_AWS_DATA_BUCKET= 14 | -------------------------------------------------------------------------------- /spendb/auth/dataset.py: -------------------------------------------------------------------------------- 1 | from flask.ext.login import current_user 2 | 3 | from account import logged_in 4 | 5 | 6 | def create(): 7 | return logged_in() 8 | 9 | 10 | def read(dataset): 11 | if not dataset.private: 12 | return True 13 | return update(dataset) 14 | 15 | 16 | def update(dataset): 17 | return logged_in() and (current_user.admin or 18 | current_user in dataset.managers) 19 | 20 | 21 | def delete(dataset): 22 | return update(dataset) 23 | -------------------------------------------------------------------------------- /spendb/lib/helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ Helper functions """ 3 | from flask import request 4 | from apikit import obj_or_404 5 | 6 | from spendb.auth import require 7 | from spendb.model import Dataset 8 | 9 | 10 | def get_dataset(name): 11 | dataset = obj_or_404(Dataset.by_name(name)) 12 | require.dataset.read(dataset) 13 | return dataset 14 | 15 | 16 | def get_page(param='page'): 17 | try: 18 | return int(request.args.get(param)) 19 | except: 20 | return 1 21 | -------------------------------------------------------------------------------- /spendb/validation/model.py: -------------------------------------------------------------------------------- 1 | from babbage.validation import validate_model as babbage_validate 2 | 3 | from spendb.core import db 4 | 5 | 6 | TYPES = { 7 | 'string': db.Unicode, 8 | 'integer': db.BigInteger, 9 | 'boolean': db.Boolean, 10 | 'number': db.Float, 11 | # FIXME: add proper support for dates 12 | # 'date': db.Date 13 | 'date': db.Unicode 14 | } 15 | 16 | 17 | def validate_model(model): 18 | """ Apply model validation. """ 19 | babbage_validate(model) 20 | return model 21 | -------------------------------------------------------------------------------- /swarmvars.json: -------------------------------------------------------------------------------- 1 | { 2 | "spendb/production": { 3 | "celery_force_root": "true", 4 | "site_title": "SpenDB", 5 | "secret": "foo", 6 | "database_url": "postgresql://spendb:spendb@db/spendb", 7 | "amqp_url": "amqp://guest:guest@rabbitmq:5672//", 8 | "mail_server": "smtp.mandrillapp.com", 9 | "mail_port": "587", 10 | "mail_username": "", 11 | "mail_password": "", 12 | "mail_default_sender": "", 13 | "aws_key_id": "", 14 | "aws_secret": "", 15 | "aws_data_bucket": "" 16 | } 17 | } -------------------------------------------------------------------------------- /spendb/tests/views/api/test_meta.py: -------------------------------------------------------------------------------- 1 | from flask import url_for 2 | 3 | from spendb.tests.base import ControllerTestCase 4 | 5 | 6 | class TestMetaApiController(ControllerTestCase): 7 | 8 | def setUp(self): 9 | super(TestMetaApiController, self).setUp() 10 | 11 | def test_reference_data(self): 12 | url = url_for('meta_api.reference_data') 13 | res = self.client.get(url) 14 | assert 'territories' in res.json, res.json 15 | assert 'currencies' in res.json, res.json 16 | assert 'languages' in res.json, res.json 17 | 18 | -------------------------------------------------------------------------------- /spendb/tests/etl/test_manager.py: -------------------------------------------------------------------------------- 1 | from spendb.tests.base import DatabaseTestCase 2 | 3 | from spendb.core import data_manager 4 | 5 | 6 | class TestDataManager(DatabaseTestCase): 7 | 8 | def setUp(self): 9 | data_manager._index = None 10 | super(TestDataManager, self).setUp() 11 | 12 | def tearDown(self): 13 | super(TestDataManager, self).tearDown() 14 | 15 | def test_manager(self): 16 | assert data_manager.collection is not None, data_manager.collection 17 | package = data_manager.package('cra') 18 | assert package.id == 'cra', package 19 | -------------------------------------------------------------------------------- /spendb/__init__.py: -------------------------------------------------------------------------------- 1 | # this is a namespace package 2 | try: 3 | import pkg_resources 4 | pkg_resources.declare_namespace(__name__) 5 | except ImportError: 6 | import pkgutil 7 | __path__ = pkgutil.extend_path(__path__, __name__) 8 | 9 | 10 | import warnings 11 | warnings.filterwarnings('ignore', 'Options will be ignored.') 12 | 13 | # Silence SQLAlchemy warning: 14 | import warnings 15 | warnings.filterwarnings( 16 | 'ignore', 17 | 'Unicode type received non-unicode bind param value.') 18 | warnings.filterwarnings( 19 | 'ignore', 20 | 'Unicode type received non-unicodebind param value.') 21 | 22 | 23 | __version__ = '0.19' 24 | -------------------------------------------------------------------------------- /pages/contact.html: -------------------------------------------------------------------------------- 1 | title: Contact 2 | hidden: false 3 | 4 | SpenDB is an open source community project. For most questions about the project, 5 | please check out [our wiki](https://github.com/spendb/spendb/wiki). 6 | 7 | [File an issue](https://github.com/spendb/spendb/issues) for any problems, questions 8 | or ideas you have while using this service. Please note that this discussion forum 9 | is for the software running this site, not for political discussions on individual 10 | datasets. We are also not the government and hence have the same tools for changing 11 | the budget that you do: voting. 12 | 13 | For legal requests, please contact the [site administrators](mailto:friedrich@pudo.org). 14 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/import_errors/data.csv: -------------------------------------------------------------------------------- 1 | id,Tipologia,Settore,to_label,from_id,from_label,cofog,to_id,name,amount,description,Ordinato al 1/2/2010,date 2 | 1,Contratti,Varie,CEDAT 85 SRL,1,Camera dei Deputati,01.1.1,cedat_85_srl,"SERVIZIO DI SUPPORTO ALLE ATTIVITA' DI 3 | RESOCONTAZIONE STENOGRAF ICA","66,097.77",,0,0 4 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei 5 | Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,1 6 | 3,Contratti,Varie,CEDAT 65 SRL |PROGETTO LAVORO SOC. COOP. |,1,Camera dei Deputati,01.1.1,cedat_65_srl_|progetto_lavoro_soc._coop._|,"ASSISTENZE OPERATIVE A SUPPORTO 7 | DELLA GESTIONE DOCUMENTALE E TECNICA","3,135,000.00",da gara,"2,869,548.00",22 -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | services: 5 | - rabbitmq 6 | before_install: 7 | - virtualenv ./pyenv --distribute 8 | - source ./pyenv/bin/activate 9 | install: 10 | # To install external filter binaries, we first need to install 11 | # RubyGems and Node/NPM. I'm not sure why, since it seems clear 12 | # that NVM and RVM are pre-installed (see below). 13 | - sudo apt-get install python-software-properties 14 | - pip install -r requirements.txt -e . 15 | - pip install psycopg2 coveralls 16 | before_script: 17 | - psql -c 'create database spendb;' -U postgres 18 | - cp prod_settings.py settings.py 19 | - nosetests --version 20 | script: 21 | - nosetests --with-coverage --cover-package=spendb 22 | after_success: 23 | - coveralls 24 | -------------------------------------------------------------------------------- /spendb/model/common.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import datetime 3 | import json 4 | 5 | import sqlalchemy as sqla 6 | from sqlalchemy.ext import mutable 7 | 8 | 9 | def json_default(obj): 10 | if isinstance(obj, datetime.datetime): 11 | obj = obj.date() 12 | if isinstance(obj, datetime.date): 13 | obj = obj.isoformat() 14 | return obj 15 | 16 | 17 | class JSONType(sqla.TypeDecorator): 18 | """Enables JSON storage by encoding and decoding on the fly.""" 19 | impl = sqla.Unicode 20 | 21 | def process_bind_param(self, value, dialect): 22 | return json.dumps(value, default=json_default) 23 | 24 | def process_result_value(self, value, dialect): 25 | return json.loads(value) 26 | 27 | mutable.MutableDict.associate_with(JSONType) 28 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/successful_import/data.csv: -------------------------------------------------------------------------------- 1 | id,Tipologia,Settore,to_label,from_id,from_label,cofog,to_id,name,amount,description,Ordinato al 1/2/2010,date 2 | 1,Contratti,Varie,CEDAT 85 SRL,1,Camera dei Deputati,01.1.1,cedat_85_srl,"SERVIZIO DI SUPPORTO ALLE ATTIVITA' DI 3 | RESOCONTAZIONE STENOGRAF ICA","66,097.77",,0,2010 4 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010 5 | 3,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010 6 | 4,Contratti,Varie,CEDAT 65 SRL |PROGETTO LAVORO SOC. COOP. |,1,Camera dei Deputati,01.1.1,cedat_65_srl_|progetto_lavoro_soc._coop._|,"ASSISTENZE OPERATIVE A SUPPORTO 7 | DELLA GESTIONE DOCUMENTALE E TECNICA","3,135,000.00",da gara,"2,869,548.00",2010 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Babel==1.3 2 | Flask==0.10.1 3 | Flask-Babel==0.9 4 | Flask-Cache==0.13.1 5 | Flask-Login==0.2.11 6 | Flask-SQLAlchemy==2.0 7 | Flask-Script==2.0.5 8 | Flask-Testing==0.4.2 9 | Flask-Mail==0.9.1 10 | Flask-Migrate>=1.3.0 11 | Flask-FlatPages==0.6 12 | Flask-Cors==2.0.1 13 | WebHelpers==1.3 14 | celery==3.1.17 15 | colander==1.0 16 | decorator==3.4.0 17 | lxml==3.4.1 18 | json-table-schema<0.2 19 | https://github.com/pudo/messytables/tarball/allow-parser-args 20 | mock==1.0.1 21 | nose==1.3.4 22 | ordereddict==1.1 23 | requests==2.5.1 24 | cssmin==0.2.0 25 | archivekit>=0.4 26 | loadkit>=0.2 27 | apikit>=0.3.1 28 | boto>=2.38.0 29 | git+https://github.com/spendb/fiscalmodel 30 | git+https://github.com/spendb/babbage 31 | billiard 32 | markdown 33 | kombu 34 | 35 | expressions 36 | grako 37 | https://github.com/DataBrewery/cubes/tarball/master 38 | 39 | gunicorn>=19.3.0 40 | psycopg2>=2.6 41 | 42 | . 43 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/demoloader.csv: -------------------------------------------------------------------------------- 1 | "id","spender_id","spender_name","recipient_id","recipient_name","region","sector","date","amount" 2 | "demo-sp-001","dfes","Department for Education","dtlr","Department for the Regions","North Yorkshire","Social Protection","2010-01-01",1200000 3 | "demo-sp-002","dfes","Department for Education","dtlr","Department for the Regions","North Yorkshire","Education","2010-02-01",800000 4 | "demo-sp-003","dfes","Department for Education","society","General Public","North Yorkshire","Education","2011-03-01",500000 5 | "demo-sp-004","dtlr","Department for the Regions","society","General Public","Hartlepool","Health","2011-04-01",1400000 6 | "demo-sp-005","dtlr","Department for the Regions","dfes","Department for Education","Hartlepool","Heath","2010-05-01",260000 7 | "demo-sp-006","dtlr","Department for the Regions","dfes","Department for Education","Hartlepool","Social Protection","2011-06-01",1150000 8 | -------------------------------------------------------------------------------- /spendb/model/manager.py: -------------------------------------------------------------------------------- 1 | from babbage.manager import CubeManager 2 | 3 | from spendb.model.dataset import Dataset 4 | 5 | 6 | class SpendingCubeManager(CubeManager): 7 | """ This enables the babbage API to find and query SpenDB datasets """ 8 | 9 | def __init__(self): 10 | pass 11 | 12 | def has_cube(self, name): 13 | dataset = Dataset.by_name(name) 14 | if dataset is None: 15 | return False 16 | return dataset.model is not None 17 | 18 | def get_cube(self, name): 19 | dataset = Dataset.by_name(name) 20 | if dataset is None or dataset.model is None: 21 | return None 22 | return dataset.cube 23 | 24 | def list_cubes(self): 25 | # TODO: authz, failing conservatively for now. 26 | for dataset in Dataset.all_by_account(None): 27 | if dataset.model is not None: 28 | yield dataset.name 29 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pudo/deb-flask-node 2 | MAINTAINER Friedrich Lindenberg 3 | 4 | ENV DEBIAN_FRONTEND noninteractive 5 | 6 | RUN echo 'deb http://ftp.de.debian.org/debian wheezy-backports main' >> /etc/apt/sources.list \ 7 | && apt-get update -qq \ 8 | && apt-get install -y -q --no-install-recommends \ 9 | git python2.7 python-pip build-essential python-dev \ 10 | libxml2-dev libxslt1-dev libpq-dev curl apt-utils ca-certificates \ 11 | && apt-get clean \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | RUN curl -L https://www.npmjs.org/install.sh | sh 15 | RUN npm install -g bower 16 | 17 | # Use clean checkout because ADD implodes on symlinks. 18 | RUN git clone https://github.com/spendb/spendb.git /spendb 19 | WORKDIR /spendb 20 | 21 | ADD prod_settings.py settings.py 22 | ENV SPENDB_SETTINGS /spendb/settings.py 23 | RUN pip install functools32 && pip install -r requirements.txt -e /spendb 24 | 25 | EXPOSE 8000 26 | -------------------------------------------------------------------------------- /spendb/auth/__init__.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from werkzeug.exceptions import Forbidden 3 | 4 | # These have to be imported for the permission system to work 5 | import account # NOQA 6 | import dataset # NOQA 7 | 8 | 9 | class Requirement(object): 10 | 11 | """ Checks a function call and raises an exception if the 12 | function returns a non-True value. """ 13 | 14 | def __init__(self, wrapped): 15 | self.wrapped = wrapped 16 | 17 | def __getattr__(self, attr): 18 | real = getattr(self.wrapped, attr) 19 | return Requirement(real) 20 | 21 | def __call__(self, *args, **kwargs): 22 | fc = self.wrapped(*args, **kwargs) 23 | if fc is not True: 24 | raise Forbidden('Sorry, you\'re not permitted to do this.') 25 | return fc 26 | 27 | @classmethod 28 | def here(cls): 29 | module = inspect.getmodule(cls) 30 | return cls(module) 31 | 32 | require = Requirement.here() 33 | -------------------------------------------------------------------------------- /spendb/command/db.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from flask.ext.migrate import upgrade 4 | from flask.ext.script import Manager 5 | 6 | from spendb.core import db 7 | from spendb.model import Dataset 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | manager = Manager() 12 | manager.__doc__ = 'Database operations' 13 | 14 | 15 | @manager.command 16 | def drop(): 17 | """ Drop database """ 18 | log.warn("Dropping database") 19 | db.metadata.reflect() 20 | db.metadata.drop_all() 21 | 22 | 23 | @manager.command 24 | def drop_dataset(name): 25 | """ Drop a dataset from the database """ 26 | log.warn("Dropping dataset '%s'", name) 27 | dataset = db.session.query(Dataset).filter_by(name=name).first() 28 | if dataset is None: 29 | raise Exception("Dataset does not exist: '%s'" % name) 30 | dataset.drop() 31 | db.session.delete(dataset) 32 | db.session.commit() 33 | 34 | 35 | @manager.command 36 | def migrate(): 37 | """ Initialize or upgrade the database """ 38 | upgrade() 39 | -------------------------------------------------------------------------------- /spendb/tests/views/test_slicer.py: -------------------------------------------------------------------------------- 1 | 2 | from flask import url_for 3 | 4 | from spendb.tests.base import ControllerTestCase 5 | from spendb.tests.helpers import make_account, load_fixture 6 | 7 | 8 | class TestSlicerController(ControllerTestCase): 9 | 10 | def setUp(self): 11 | super(TestSlicerController, self).setUp() 12 | self.dataset = load_fixture('cra') 13 | self.user = make_account('test') 14 | 15 | def test_index(self): 16 | response = self.client.get(url_for('slicer.show_index')) 17 | assert 'Cubes OLAP' in response.data 18 | 19 | def test_cubes(self): 20 | response = self.client.get(url_for('slicer.list_cubes')) 21 | assert 'cra' in response.data, response.data 22 | 23 | def test_cube_model(self): 24 | response = self.client.get(url_for('slicer.cube_model', 25 | cube_name='cra')) 26 | assert 'cra' in response.data, response.data 27 | assert self.dataset.label in response.data, response.data 28 | -------------------------------------------------------------------------------- /contrib/os_export/analyze.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from datetime import datetime 4 | from urlparse import urljoin 5 | 6 | INSTANCE = 'https://mapthemoney.org' 7 | 8 | 9 | def user_get(url, params={}): 10 | api_key = os.environ.get('OPENSPENDING_APIKEY') 11 | headers = {'Authorization': 'ApiKey %s' % api_key} 12 | if not url.startswith('http'): 13 | url = urljoin(INSTANCE, url) 14 | params['__'] = datetime.utcnow().isoformat() 15 | return requests.get(url, params=params, 16 | headers=headers) 17 | 18 | 19 | def get_sources(dataset): 20 | res = user_get('/%s/sources.json' % dataset['name']) 21 | for source in res.json(): 22 | print source 23 | 24 | 25 | def get_datasets(): 26 | res = user_get('/datasets.json') 27 | for dataset in res.json().get('datasets'): 28 | print dataset.get('name') 29 | get_sources(dataset) 30 | 31 | print len(res.json().get('datasets')), 'datasets' 32 | 33 | 34 | if __name__ == '__main__': 35 | get_datasets() 36 | 37 | 38 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/erroneous_values/data.csv: -------------------------------------------------------------------------------- 1 | id,Tipologia,Settore,to_label,from_id,from_label,cofog,to_id,name,amount,description,Ordinato al 1/2/2010,date 2 | 1,Contratti,Varie,CEDAT 85 SRL,1,Camera dei Deputati,011.1,cedat_85_srl,"SERVIZIO DI SUPPORTO ALLE ATTIVITA' DI 3 | RESOCONTAZIONE STENOGRAF ICA","66,097.77",,0,2010 4 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010 5 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010 6 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010 7 | 3,Contratti,Varie,CEDAT 65 SRL |PROGETTO LAVORO SOC. COOP. |,1,Camera dei Deputati,01.1.1,cedat_65_srl_|progetto_lavoro_soc._coop._|,"ASSISTENZE OPERATIVE A SUPPORTO 8 | DELLA GESTIONE DOCUMENTALE E TECNICA","3.135.000.00",da gara,"2,869,548.00",2010-2012 -------------------------------------------------------------------------------- /spendb/views/api/run.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from flask import Blueprint, request 4 | from apikit import jsonify, Pager, obj_or_404 5 | from loadkit import logger 6 | 7 | from spendb.core import data_manager 8 | from spendb.model import Run 9 | from spendb.lib.helpers import get_dataset 10 | 11 | 12 | log = logging.getLogger(__name__) 13 | blueprint = Blueprint('runs_api', __name__) 14 | 15 | 16 | @blueprint.route('/datasets//runs') 17 | def index(dataset): 18 | dataset = get_dataset(dataset) 19 | q = Run.all(dataset) 20 | if 'source' in request.args: 21 | q = q.filter(Run.source == request.args.get('source')) 22 | pager = Pager(q, dataset=dataset.name) 23 | return jsonify(pager) 24 | 25 | 26 | @blueprint.route('/datasets//runs/') 27 | def view(dataset, id): 28 | dataset = get_dataset(dataset) 29 | run = obj_or_404(Run.by_id(dataset, id)) 30 | data = run.to_dict() 31 | package = data_manager.package(dataset.name) 32 | data['messages'] = list(logger.load(package, run.id)) 33 | return jsonify(data) 34 | -------------------------------------------------------------------------------- /spendb/tests/base.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | from archivekit import open_collection 4 | from flask.ext.testing import TestCase as FlaskTestCase 5 | 6 | from spendb.core import create_web_app, data_manager 7 | from spendb.tests.helpers import clean_db, init_db 8 | 9 | 10 | class TestCase(FlaskTestCase): 11 | 12 | def create_app(self): 13 | app = create_web_app(**{ 14 | 'DEBUG': True, 15 | 'TESTING': True, 16 | 'SITE_TITLE': 'SpenDB', 17 | 'SQLALCHEMY_DATABASE_URI': 'sqlite:///:memory:', 18 | 'PRESERVE_CONTEXT_ON_EXCEPTION': False, 19 | 'CELERY_ALWAYS_EAGER': True 20 | }) 21 | data_manager._coll = open_collection('test', 'file', 22 | path=tempfile.mkdtemp()) 23 | return app 24 | 25 | def setUp(self): 26 | init_db(self.app) 27 | 28 | def tearDown(self): 29 | clean_db(self.app) 30 | 31 | 32 | class DatabaseTestCase(TestCase): 33 | pass 34 | 35 | 36 | class ControllerTestCase(DatabaseTestCase): 37 | pass 38 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | web: 2 | build: . 3 | command: gunicorn -w 5 -b 0.0.0.0:8000 --error-logfile /var/log/gunicorn.error.log --log-file /var/log/gunicorn.log spendb.wsgi:app 4 | ports: 5 | - "127.0.0.1:8000:8000" 6 | links: 7 | - rabbitmq 8 | - db 9 | - worker 10 | volumes: 11 | - /srv/spendb/logs:/var/log 12 | env_file: 13 | - production.env 14 | 15 | db: 16 | image: postgres:9.4 17 | environment: 18 | - POSTGRES_USER=spendb 19 | - POSTGRES_PASSWORD=spendb 20 | expose: 21 | - "5432" 22 | volumes: 23 | - /srv/spendb/db:/var/lib/postgresql/data 24 | - /srv/spendb/logs/postgresql:/var/log 25 | 26 | rabbitmq: 27 | image: rabbitmq 28 | expose: 29 | - "5672" 30 | 31 | worker: 32 | build: . 33 | command: celery -A spendb.tasks worker -c 4 -l info --logfile=/var/log/celery.log 34 | links: 35 | - rabbitmq 36 | - db 37 | volumes: 38 | - /srv/spendb/logs:/var/log 39 | env_file: 40 | - production.env 41 | environment: 42 | - C_FORCE_ROOT=true 43 | -------------------------------------------------------------------------------- /prod_settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | env = os.environ.get 3 | 4 | DEBUG = False 5 | CELERY_ALWAYS_EAGER = False 6 | CACHE = not DEBUG 7 | 8 | SITE_TITLE = env('SPENDB_SITE_TITLE', 'SpenDB') 9 | SECRET_KEY = env('SPENDB_SECRET') 10 | 11 | SQLALCHEMY_DATABASE_URI = env('SPENDB_DATABASE_URL') 12 | CELERY_BROKER_URL = env('SPENDB_AMQP_URL', env('SPENDB_CLOUDAMQP_URL')) 13 | 14 | MAIL_SERVER = env('SPENDB_SMTP_HOST', 'smtp.mandrillapp.com') 15 | MAIL_PORT = int(env('SPENDB_SMTP_PORT', 587)) 16 | MAIL_USE_TLS = True 17 | MAIL_USERNAME = env('SPENDB_SMTP_USERNAME', env('SPENDB_MANDRILL_USERNAME')) 18 | MAIL_PASSWORD = env('SPENDB_SMTP_PASSWORD', env('SPENDB_MANDRILL_PASSWORD')) 19 | 20 | MAIL_DEFAULT_SENDER = env('SPENDB_MAIL_SENDER', 'SpenDB ') 21 | 22 | STORAGE_TYPE = env('SPENDB_STORAGE_TYPE', 's3') #Alternative: 'file' 23 | STORAGE_PATH = env('SPENDB_STORAGE_PATH', '/usr/local/lib/spendb') #Only used if 'file' selected above 24 | 25 | AWS_KEY_ID = env('SPENDB_AWS_KEY_ID') 26 | AWS_SECRET = env('SPENDB_AWS_SECRET') 27 | AWS_DATA_BUCKET = env('SPENDB_AWS_DATA_BUCKET') 28 | 29 | PREFERRED_URL_SCHEME = env('SPENDB_PREFERRED_URL_SCHEME', 'http') 30 | -------------------------------------------------------------------------------- /spendb/tests/etl/test_queue_tasks.py: -------------------------------------------------------------------------------- 1 | from archivekit import Source 2 | 3 | from spendb.core import db, data_manager 4 | from spendb.model import Dataset 5 | from spendb import tasks 6 | 7 | from spendb.tests.helpers import meta_fixture 8 | from spendb.tests.helpers import csvimport_fixture_path 9 | from spendb.tests.base import DatabaseTestCase 10 | 11 | 12 | class TestQueueTasks(DatabaseTestCase): 13 | 14 | def setUp(self): 15 | super(TestQueueTasks, self).setUp() 16 | data_manager._index = None 17 | self.dsn = 'cra' 18 | model = meta_fixture(self.dsn) 19 | self.ds = Dataset(model) 20 | db.session.add(self.ds) 21 | db.session.commit() 22 | self.cra_url = csvimport_fixture_path('../data', 'cra.csv') 23 | 24 | def tearDown(self): 25 | super(TestQueueTasks, self).tearDown() 26 | 27 | def test_load_from_url(self): 28 | tasks.load_from_url(self.dsn, self.cra_url) 29 | package = data_manager.package(self.dsn) 30 | sources = list(package.all(Source)) 31 | assert len(sources) == 1, sources 32 | src0 = sources[0] 33 | assert src0.meta['name'] == 'cra.csv', src0.meta.items() 34 | -------------------------------------------------------------------------------- /pages/tos.html: -------------------------------------------------------------------------------- 1 | title: Terms of Service 2 | hidden: false 3 | 4 | 5 | SpenDB is generously hosted by [Open Knowledge Foundation Deutschland e.V.](http://okfn.de/) 6 | (OKF-DE), the German chapter of the global Open Knowledge community. It is therefore subject to the 7 | [terms of service](http://okfn.de/impressum/) that cover all services operated by OKF-DE, including 8 | the Impressum and data protection rules as mandated by German law. 9 | 10 | ### Community project notice 11 | 12 | Please further be aware that this is a community-run effort without dedicated funding and staff. As 13 | such, all hosting and API services provided by this platform are based on a best effort principle: we 14 | will keep it running as long as there is a relevant level of interest, but reserve the right 15 | to cease operating the service at any time. In such an event, we will make sure that comprehensive 16 | data exports will be made available, so that anyone can continue to operate their own instances of 17 | the service. 18 | 19 | Consider this note a call to action: open services are kept alive by the community that operates 20 | them. If you can, and you're interested - then [join that community](contact.html) and help to make 21 | sure this is a sustainable project! 22 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/lbhf/data.csv: -------------------------------------------------------------------------------- 1 | id,paid_by,date,transaction_id,amount,paid_to,spending_area 2 | 1,London Borough of Hammersmith and Fulham,2010-01-01,405869,898.64,ADT FIRE & SECURITY PLC,Childrens Services 3 | 2,London Borough of Hammersmith and Fulham,2010-01-01,405870,517.85,ADT FIRE & SECURITY PLC,Resident Services 4 | 3,London Borough of Hammersmith and Fulham,2010-01-01,405871,1215.97,ADT FIRE & SECURITY PLC,Regeneration and Housing Services 5 | 4,London Borough of Hammersmith and Fulham,2010-01-01,417742,112.50,ALARM LTD,Finance and Corporate Services 6 | 5,London Borough of Hammersmith and Fulham,2010-01-01,417742,562.50,ALARM LTD,Finance and Corporate Services 7 | 6,London Borough of Hammersmith and Fulham,2010-01-01,391746,1665.62,ASCOM TELE NOVA LTD,Childrens Services 8 | 7,London Borough of Hammersmith and Fulham,2010-01-01,396062,1500.00,BIW TECHNOLOGIES LIMITED,Community Services 9 | 8,London Borough of Hammersmith and Fulham,2010-01-01,392463,560.00,CAPITAL CITY COMMUNICATIONS LTD,Resident Services 10 | 9,London Borough of Hammersmith and Fulham,2010-01-01,393998,1296.00,CAPITAL CITY COMMUNICATIONS LTD,Environment Services 11 | 10,London Borough of Hammersmith and Fulham,2010-01-01,395696,171.39,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 12 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/default/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "dimensions": { 3 | "entry_id": { 4 | "attributes": { 5 | "entry_id": { 6 | "column": "id", 7 | "label": "ID" 8 | } 9 | }, 10 | "key_attribute": "entry_id", 11 | "label": "Entry ID" 12 | }, 13 | "from": { 14 | "attributes": { 15 | "label": { 16 | "column": "from_label", 17 | "label": "Label" 18 | }, 19 | "name": { 20 | "column": "from_id", 21 | "label": "Name" 22 | } 23 | }, 24 | "key_attribute": "name", 25 | "label": "Paid by" 26 | }, 27 | "time": { 28 | "attributes": { 29 | "year": { 30 | "column": "date", 31 | "label": "Year" 32 | } 33 | }, 34 | "key_attribute": "year", 35 | "label": "Time" 36 | } 37 | }, 38 | "measures": { 39 | "amount": { 40 | "column": "amount", 41 | "label": "Amount" 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /spendb/migrate/env.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | from alembic import context 3 | from sqlalchemy import engine_from_config, pool 4 | from flask import current_app 5 | 6 | from spendb.core import db 7 | from spendb.model import * # noqa 8 | 9 | config = context.config 10 | config.set_main_option('sqlalchemy.url', 11 | current_app.config['SQLALCHEMY_DATABASE_URI']) 12 | target_metadata = db.metadata 13 | 14 | 15 | def run_migrations_offline(): 16 | url = config.get_main_option("sqlalchemy.url") 17 | context.configure(url=url) 18 | 19 | with context.begin_transaction(): 20 | context.run_migrations() 21 | 22 | 23 | def run_migrations_online(): 24 | engine = engine_from_config( 25 | config.get_section(config.config_ini_section), 26 | prefix='sqlalchemy.', 27 | poolclass=pool.NullPool) 28 | 29 | connection = engine.connect() 30 | context.configure( 31 | connection=connection, 32 | target_metadata=target_metadata 33 | ) 34 | 35 | try: 36 | with context.begin_transaction(): 37 | context.run_migrations() 38 | finally: 39 | connection.close() 40 | 41 | if context.is_offline_mode(): 42 | run_migrations_offline() 43 | else: 44 | run_migrations_online() 45 | -------------------------------------------------------------------------------- /spendb/etl/manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from archivekit import open_collection 4 | 5 | log = logging.getLogger(__name__) 6 | 7 | 8 | class DataManager(object): 9 | """ The data manager coordinates read and write access to the 10 | ETL data storage. """ 11 | 12 | def __init__(self): 13 | self.app = None 14 | self._coll = None 15 | 16 | @property 17 | def configured(self): 18 | return self.app is not None 19 | 20 | def init_app(self, app): 21 | self.app = app 22 | 23 | def package(self, dataset): 24 | """ Get a package for a given dataset name. """ 25 | assert self.configured, 'Data manager not configured!' 26 | return self.collection.get(dataset) 27 | 28 | @property 29 | def collection(self): 30 | if not self.configured: 31 | return 32 | if self._coll is None: 33 | env = self.app.config 34 | args = { 35 | 'path': env.get('STORAGE_PATH'), 36 | 'aws_key_id': env.get('AWS_KEY_ID'), 37 | 'aws_secret': env.get('AWS_SECRET'), 38 | 'bucket_name': env.get('AWS_DATA_BUCKET') 39 | } 40 | self._coll = open_collection('datasets', env.get('STORAGE_TYPE'), **args) 41 | return self._coll 42 | -------------------------------------------------------------------------------- /spendb/views/error.py: -------------------------------------------------------------------------------- 1 | from werkzeug.exceptions import HTTPException 2 | from flask import request, Response 3 | from colander import Mapping 4 | from apikit import jsonify 5 | 6 | 7 | class NotModified(Exception): 8 | pass 9 | 10 | 11 | def handle_error(exc): 12 | status = 500 13 | title = exc.__class__.__name__ 14 | message = unicode(exc) 15 | headers = {} 16 | if isinstance(exc, HTTPException): 17 | message = exc.get_description(request.environ) 18 | message = message.replace('

', '').replace('

', '') 19 | status = exc.code 20 | title = exc.name 21 | headers = exc.get_headers(request.environ) 22 | data = { 23 | 'status': status, 24 | 'title': title, 25 | 'message': message 26 | } 27 | return jsonify(data, status=status, headers=headers) 28 | 29 | 30 | def handle_invalid(exc): 31 | if isinstance(exc.node.typ, Mapping): 32 | exc.node.name = '' 33 | data = { 34 | 'status': 400, 35 | 'errors': exc.asdict() 36 | } 37 | return jsonify(data, status=400) 38 | 39 | 40 | def handle_validation_error(exc): 41 | return jsonify({ 42 | 'status': 400, 43 | 'message': exc.message, 44 | 'value': exc.instance 45 | }, status=400) 46 | 47 | 48 | def handle_not_modified(exc): 49 | return Response(status=304) 50 | -------------------------------------------------------------------------------- /spendb/lib/mailer.py: -------------------------------------------------------------------------------- 1 | from flask import current_app 2 | from flask.ext.babel import lazy_gettext as _ 3 | from flask.ext.mail import Message 4 | 5 | from spendb.core import mail, url_for 6 | 7 | 8 | RESET_MESSAGE = '''You have requested your password on %(site_title)s to be reset. 9 | 10 | Please click the following link to confirm this request: 11 | 12 | %(reset_link)s 13 | ''' 14 | 15 | 16 | def add_msg_niceties(recipient_name, body, sender_name): 17 | return _(u"Dear %(name)s,", name=recipient_name) \ 18 | + u"\r\n\r\n%s\r\n\r\n" % body \ 19 | + u"--\r\n%s" % sender_name 20 | 21 | 22 | def mail_account(recipient, subject, body, headers=None): 23 | site_title = current_app.config.get('SITE_TITLE') 24 | if (recipient.email is not None) and len(recipient.email): 25 | msg = Message(subject, recipients=[recipient.email]) 26 | msg.body = add_msg_niceties(recipient.display_name, body, site_title) 27 | mail.send(msg) 28 | 29 | 30 | def get_reset_body(account): 31 | reset_link = url_for('account_api.do_reset', 32 | email=account.email, 33 | token=account.token) 34 | return _(RESET_MESSAGE, reset_link=reset_link, 35 | site_title=current_app.config.get('SITE_TITLE')) 36 | 37 | 38 | def send_reset_link(account): 39 | body = get_reset_body(account) 40 | mail_account(account, _('Reset your password'), body) 41 | -------------------------------------------------------------------------------- /spendb/views/api/meta.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from flask import Blueprint 4 | from apikit import jsonify 5 | from fiscalmodel import CURRENCIES, COUNTRIES 6 | from fiscalmodel import CATEGORIES, LANGUAGES 7 | 8 | from spendb.core import pages 9 | from spendb.views.context import etag_cache_keygen 10 | 11 | log = logging.getLogger(__name__) 12 | blueprint = Blueprint('meta_api', __name__) 13 | 14 | 15 | def dicts(d): 16 | for k, v in d.items(): 17 | if isinstance(v, tuple): 18 | yield {'code': k, 'label': v[0], 'key': v[1]} 19 | else: 20 | yield {'code': k, 'label': v} 21 | 22 | 23 | @blueprint.route('/reference') 24 | def reference_data(): 25 | etag_cache_keygen('static') 26 | return jsonify({ 27 | 'currencies': sorted(dicts(CURRENCIES), key=lambda d: d['label']), 28 | 'languages': sorted(dicts(LANGUAGES), key=lambda d: d['label']), 29 | 'territories': sorted(dicts(COUNTRIES), key=lambda d: d['label']), 30 | 'categories': sorted(dicts(CATEGORIES), key=lambda d: d['label']) 31 | }) 32 | 33 | 34 | @blueprint.route('/pages/.html') 35 | def page(path): 36 | page = pages.get_or_404(path) 37 | data = dict(page.meta) 38 | data['html'] = page.html 39 | data['path'] = page.path + '.html' 40 | data['pages'] = {} 41 | for p in pages: 42 | path = p.path + '.html' 43 | data['pages'][path] = p.meta 44 | return jsonify(data) 45 | -------------------------------------------------------------------------------- /spendb/tests/views/api/test_slicer.py: -------------------------------------------------------------------------------- 1 | import json 2 | from flask import url_for 3 | 4 | from spendb.core import db 5 | from spendb.tests.helpers import csvimport_fixture_path 6 | from spendb.tests.base import ControllerTestCase 7 | from spendb.tests.helpers import load_fixture, make_account 8 | 9 | 10 | class TestSlicerApiController(ControllerTestCase): 11 | 12 | def setUp(self): 13 | super(TestSlicerApiController, self).setUp() 14 | self.cra = load_fixture('cra') 15 | self.user = make_account('test') 16 | self.auth_qs = {'api_key': self.user.api_key} 17 | self.cra.managers.append(self.user) 18 | self.cra_url = csvimport_fixture_path('../data', 'cra.csv') 19 | db.session.commit() 20 | 21 | def test_show_index(self): 22 | url = url_for('slicer.show_index') 23 | res = self.client.get(url) 24 | assert 'Cubes OLAP server' in res.data, res.data 25 | 26 | def test_list_cubes(self): 27 | url = url_for('slicer.list_cubes') 28 | res = self.client.get(url) 29 | assert len(res.json) == 1, res.json 30 | assert res.json[0]['name'] == 'cra', res.json 31 | 32 | def test_cube_model(self): 33 | url = url_for('slicer.cube_model', cube_name=self.cra.name) 34 | res = self.client.get(url) 35 | assert 'Country Regional Analysis' in res.json['description'], res.json 36 | assert len(res.json['dimensions']) == 12, len(res.json['dimensions']) 37 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/validation/2011_11_20_name_attribute.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset": { 3 | "name": "test", 4 | "label": "Test Dataset", 5 | "description": "This is a test dataset", 6 | "currency": "EUR" 7 | }, 8 | "mapping": { 9 | "amount": { 10 | "type": "measure", 11 | "label": "Amount", 12 | "datatype": "number", 13 | "column": "AMOUNT" 14 | }, 15 | "cofinance": { 16 | "type": "measure", 17 | "label": "Co-Financed Amount", 18 | "datatype": "number", 19 | "column": "COFIN" 20 | }, 21 | "time": { 22 | "type": "date", 23 | "label": "Time of transaction", 24 | "datatype": "date", 25 | "column": "YEAR" 26 | }, 27 | "transaction_id": { 28 | "type": "value", 29 | "label": "Transaction ID", 30 | "datatype": "id", 31 | "column": "TX" 32 | }, 33 | "function": { 34 | "type": "compound", 35 | "key": true, 36 | "label": "Function", 37 | "fields": [ 38 | {"name": "label", "datatype": "string", "column": "FUNCTION"}, 39 | {"name": "description", "datatype": "string", "column": "FUNCTION_DESC"} 40 | ] 41 | }, 42 | "supplier": { 43 | "type": "compound", 44 | "label": "Supplier", 45 | "fields": [ 46 | {"name": "name", "datatype": "id", "column": "SUPPLIER_ID"}, 47 | {"name": "label", "datatype": "string", "column": "SUPPLIER"} 48 | ] 49 | } 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /spendb/tasks.py: -------------------------------------------------------------------------------- 1 | from celery.utils.log import get_task_logger 2 | 3 | from spendb.core import create_app, create_celery 4 | from spendb.model import Dataset 5 | from spendb.etl import tasks 6 | 7 | 8 | log = get_task_logger(__name__) 9 | 10 | flask_app = create_app() 11 | celery = create_celery(flask_app) 12 | 13 | 14 | @celery.task(ignore_result=True) 15 | def load_from_url(dataset_name, url): 16 | with flask_app.app_context(): 17 | dataset = Dataset.by_name(dataset_name) 18 | if dataset is None: 19 | log.error("Dataset not found: %s", dataset_name) 20 | return 21 | source = tasks.extract_url(dataset, url) 22 | if source is not None: 23 | load_from_source.delay(dataset_name, source.name) 24 | 25 | 26 | @celery.task(ignore_result=True) 27 | def load_from_source(dataset_name, source_name): 28 | with flask_app.app_context(): 29 | dataset = Dataset.by_name(dataset_name) 30 | if dataset is None: 31 | log.error("Dataset not found: %s", dataset_name) 32 | return 33 | if source_name is None: 34 | log.error("No source specified: %s", dataset_name) 35 | return 36 | source = tasks.transform_source(dataset, source_name) 37 | if source is None: 38 | return 39 | tasks.load(dataset, source_name=source_name) 40 | 41 | 42 | @celery.task(ignore_result=True) 43 | def ping(): 44 | with flask_app.app_context(): 45 | log.info("Pong.") 46 | -------------------------------------------------------------------------------- /pages/about.html: -------------------------------------------------------------------------------- 1 | title: About 2 | hidden: false 3 | 4 | SpenDB is a tool for understanding government financial information. Our goal is to find 5 | new ways for those interested - journalists, policy analysts, parliamentarians or even 6 | the mythical armchair auditor - to access, interpret and share data about the public purse. 7 | 8 | ### Is this just about budget visualization? 9 | 10 | While the most commonly looked-at piece of government financial data is probably budgets, 11 | there's no need to remain limited to that source. 12 | 13 | Information about government purchases of goods and services, funding for research, subsidies 14 | or even payments received for resource concessions are all pieces of information that should 15 | be accessible to the broadest possible public. 16 | 17 | ### What can I do with SpenDB? 18 | 19 | On SpenDB, anyone can easily upload information about the way in which their government 20 | manages its finances - whether revenue or expenditure, a budget document or a list of 21 | individual payments for services. 22 | 23 | Our tool provides a basic set of analytical utilities - just enough to answer some interesting 24 | questions about policy, procurement outcomes or trendsover time. But it also provides a rich 25 | data API, which can be used by web developers and data visualizers to build more specific, 26 | interactive, analytical tools for a given type of data. 27 | 28 | SpenDB is an open source project, it's [easy to set up and contribute features 29 | to](https://github.com/spendb/spendb/wiki). 30 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/validation/2011_11_21_normalize.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset": { 3 | "name": "test", 4 | "label": "Test Dataset", 5 | "description": "This is a test dataset", 6 | "currency": "EUR" 7 | }, 8 | "mapping": { 9 | "amount": { 10 | "type": "measure", 11 | "label": "Amount", 12 | "datatype": "float", 13 | "column": "AMOUNT" 14 | }, 15 | "cofinance": { 16 | "type": "measure", 17 | "label": "Co-Financed Amount", 18 | "datatype": "float", 19 | "column": "COFIN" 20 | }, 21 | "time": { 22 | "type": "value", 23 | "label": "Time of transaction", 24 | "datatype": "date", 25 | "column": "YEAR" 26 | }, 27 | "transaction_id": { 28 | "type": "value", 29 | "label": "Transaction ID", 30 | "datatype": "id", 31 | "column": "TX" 32 | }, 33 | "function": { 34 | "type": "classifier", 35 | "key": true, 36 | "label": "Function", 37 | "fields": [ 38 | {"name": "name", "datatype": "id", "column": "FUNCTION_ID"}, 39 | {"name": "label", "datatype": "string", "column": "FUNCTION"}, 40 | {"name": "description", "datatype": "string", "column": "FUNCTION_DESC"} 41 | ] 42 | }, 43 | "supplier": { 44 | "type": "compound", 45 | "label": "Supplier", 46 | "fields": [ 47 | {"name": "name", "datatype": "id", "column": "SUPPLIER_ID"}, 48 | {"name": "label", "datatype": "string", "column": "SUPPLIER"} 49 | ] 50 | } 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/validation/2011_11_22_unique_keys.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset": { 3 | "name": "test", 4 | "label": "Test Dataset", 5 | "description": "This is a test dataset", 6 | "currency": "EUR", 7 | "unique_keys": ["function.name"] 8 | }, 9 | "mapping": { 10 | "amount": { 11 | "type": "measure", 12 | "label": "Amount", 13 | "datatype": "float", 14 | "column": "AMOUNT" 15 | }, 16 | "cofinance": { 17 | "type": "measure", 18 | "label": "Co-Financed Amount", 19 | "datatype": "float", 20 | "column": "COFIN" 21 | }, 22 | "time": { 23 | "type": "date", 24 | "label": "Time of transaction", 25 | "datatype": "date", 26 | "column": "YEAR" 27 | }, 28 | "transaction_id": { 29 | "type": "value", 30 | "label": "Transaction ID", 31 | "datatype": "id", 32 | "column": "TX" 33 | }, 34 | "function": { 35 | "type": "compound", 36 | "label": "Function", 37 | "fields": [ 38 | {"name": "name", "datatype": "id", "column": "FUNCTION_ID"}, 39 | {"name": "label", "datatype": "string", "column": "FUNCTION"}, 40 | {"name": "description", "datatype": "string", "column": "FUNCTION_DESC"} 41 | ] 42 | }, 43 | "supplier": { 44 | "type": "compound", 45 | "label": "Supplier", 46 | "fields": [ 47 | {"name": "name", "datatype": "id", "column": "SUPPLIER_ID"}, 48 | {"name": "label", "datatype": "string", "column": "SUPPLIER"} 49 | ] 50 | } 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/validation/2011_12_07_attribute_dicts.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset": { 3 | "name": "test", 4 | "label": "Test Dataset", 5 | "description": "This is a test dataset", 6 | "currency": "EUR", 7 | "ckan_uri": "urn:/dev/null" 8 | }, 9 | "mapping": { 10 | "amount": { 11 | "type": "measure", 12 | "label": "Amount", 13 | "datatype": "float", 14 | "column": "AMOUNT" 15 | }, 16 | "cofinance": { 17 | "type": "measure", 18 | "label": "Co-Financed Amount", 19 | "datatype": "float", 20 | "column": "COFIN" 21 | }, 22 | "time": { 23 | "type": "date", 24 | "label": "Time of transaction", 25 | "datatype": "date", 26 | "column": "YEAR" 27 | }, 28 | "transaction_id": { 29 | "type": "value", 30 | "label": "Transaction ID", 31 | "datatype": "id", 32 | "column": "TX" 33 | }, 34 | "function": { 35 | "type": "compound", 36 | "key": true, 37 | "label": "Function", 38 | "fields": [ 39 | {"name": "name", "datatype": "id", "column": "FUNCTION_ID"}, 40 | {"name": "label", "datatype": "string", "column": "FUNCTION"}, 41 | {"name": "description", "datatype": "string", "column": "FUNCTION_DESC"} 42 | ] 43 | }, 44 | "supplier": { 45 | "type": "compound", 46 | "label": "Supplier", 47 | "fields": [ 48 | {"name": "name", "datatype": "id", "column": "SUPPLIER_ID"}, 49 | {"name": "label", "datatype": "string", "column": "SUPPLIER"} 50 | ] 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /spendb/default_settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | SECRET_KEY = 'foo' 4 | DEBUG = True 5 | 6 | SITE_TITLE = 'SpenDB' 7 | 8 | ASSETS_PATH_PROD = 'https://dfmbkaoi4kurm.cloudfront.net/libs/spendb.ui/latest/build' 9 | ASSETS_PATH_DEBUG = '/static/spendb.ui/build' 10 | 11 | SQLALCHEMY_DATABASE_URI = 'postgresql://localhost/spendb' 12 | 13 | BABEL_DEFAULT_LOCALE = 'en' 14 | 15 | MAIL_SERVER = 'localhost' 16 | # MAIL_PORT = 25 17 | # MAIL_USE_TLS = False 18 | # MAIL_USE_SSL = False 19 | # MAIL_USERNAME = None 20 | # MAIL_PASSWORD = None 21 | MAIL_DEFAULT_SENDER = 'noreply@mapthemoney.org' 22 | 23 | CACHE = False 24 | CACHE_TYPE = 'simple' 25 | 26 | PREFERRED_URL_SCHEME = 'http' 27 | 28 | ALEMBIC_DIR = os.path.join(os.path.dirname(__file__), 'migrate') 29 | ALEMBIC_DIR = os.path.abspath(ALEMBIC_DIR) 30 | 31 | FLATPAGES_ROOT = os.path.join(os.path.dirname(__file__), '..', 'pages') 32 | FLATPAGES_ROOT = os.path.abspath(FLATPAGES_ROOT) 33 | 34 | # Worker queue configuration. 35 | CELERY_BROKER_URL = 'amqp://guest:guest@localhost:5672//' 36 | 37 | # If you set ``EAGER``, processing will happen inline. 38 | CELERY_ALWAYS_EAGER = False 39 | CELERY_TASK_SERIALIZER = 'json' 40 | CELERY_ACCEPT_CONTENT = ['json'] 41 | 42 | # CELERY_DEFAULT_QUEUE = 'loading' 43 | # CELERY_QUEUES = ( 44 | # Queue('indexing', Exchange('spendb'), routing_key='spendb'), 45 | # Queue('loading', Exchange('spendb'), routing_key='spendb'), 46 | # ) 47 | 48 | # CELERY_ROUTES = { 49 | # 'spendb.tasks.load_from_url': { 50 | # 'queue': 'loading' 51 | # }, 52 | # 'spendb.tasks.index_dataset': { 53 | # 'queue': 'indexing' 54 | # }, 55 | # } 56 | -------------------------------------------------------------------------------- /spendb/validation/account.py: -------------------------------------------------------------------------------- 1 | from colander import SchemaNode, SequenceSchema, Regex, String, Length 2 | from colander import MappingSchema, Email, Boolean 3 | 4 | from spendb.validation.common import Ref 5 | 6 | REGISTER_NAME_RE = r"^[a-zA-Z0-9_\-]{3,255}$" 7 | 8 | 9 | class AccountRef(Ref): 10 | 11 | def decode(self, cstruct): 12 | from spendb.model import Account 13 | if isinstance(cstruct, basestring): 14 | return Account.by_name(cstruct) 15 | if isinstance(cstruct, dict): 16 | return self.decode(cstruct.get('name')) 17 | return None 18 | 19 | 20 | class DatasetAccounts(SequenceSchema): 21 | account = SchemaNode(AccountRef()) 22 | 23 | 24 | class AccountRegister(MappingSchema): 25 | name = SchemaNode(String(), validator=Regex(REGISTER_NAME_RE)) 26 | fullname = SchemaNode(String()) 27 | email = SchemaNode(String(), validator=Email()) 28 | public_email = SchemaNode(Boolean(), missing=False) 29 | password1 = SchemaNode(String(), validator=Length(min=4)) 30 | password2 = SchemaNode(String(), validator=Length(min=4)) 31 | terms = SchemaNode(Boolean()) 32 | 33 | 34 | class AccountSettings(MappingSchema): 35 | fullname = SchemaNode(String()) 36 | email = SchemaNode(String(), validator=Email()) 37 | public_email = SchemaNode(Boolean(), missing=False) 38 | twitter = SchemaNode(String(), missing=None, 39 | validator=Length(max=140)) 40 | public_twitter = SchemaNode(Boolean(), missing=False) 41 | password1 = SchemaNode(String(), missing=None, default=None) 42 | password2 = SchemaNode(String(), missing=None, default=None) 43 | -------------------------------------------------------------------------------- /spendb/validation/dataset.py: -------------------------------------------------------------------------------- 1 | from colander import Schema, SchemaNode, String, Boolean, SequenceSchema 2 | from colander import OneOf, Length, drop 3 | from fiscalmodel import CURRENCIES, LANGUAGES 4 | from fiscalmodel import COUNTRIES, CATEGORIES 5 | 6 | from spendb.validation.common import dataset_name, prepare_name 7 | from spendb.validation.account import AccountRef 8 | 9 | 10 | class DatasetLanguages(SequenceSchema): 11 | language = SchemaNode(String(), validator=OneOf(LANGUAGES.keys())) 12 | 13 | 14 | class DatasetTerritories(SequenceSchema): 15 | territory = SchemaNode(String(), validator=OneOf(COUNTRIES.keys())) 16 | 17 | 18 | class DatasetForm(Schema): 19 | label = SchemaNode(String(), preparer=prepare_name, 20 | validator=Length(min=2)) 21 | name = SchemaNode(String(), preparer=prepare_name, 22 | validator=dataset_name) 23 | description = SchemaNode(String(), missing=drop) 24 | private = SchemaNode(Boolean(), missing=drop) 25 | currency = SchemaNode(String(), missing=drop, 26 | validator=OneOf(CURRENCIES.keys())) 27 | category = SchemaNode(String(), missing=drop, 28 | validator=OneOf(CATEGORIES.keys())) 29 | languages = DatasetLanguages(missing=drop) 30 | territories = DatasetTerritories(missing=drop) 31 | 32 | 33 | class Managers(SequenceSchema): 34 | manager = SchemaNode(AccountRef()) 35 | 36 | 37 | class ManagersForm(Schema): 38 | managers = Managers(missing=[]) 39 | 40 | 41 | def validate_dataset(data): 42 | return DatasetForm().deserialize(data) 43 | 44 | 45 | def validate_managers(data): 46 | return ManagersForm().deserialize(data) 47 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/quoting/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "dimensions": { 3 | "entry_id": { 4 | "attributes": { 5 | "entry_id": { 6 | "column": "id", 7 | "label": "ID" 8 | } 9 | }, 10 | "key_attribute": "entry_id", 11 | "label": "Entry ID" 12 | }, 13 | "from": { 14 | "attributes": { 15 | "label": { 16 | "column": "paid_by", 17 | "label": "Label" 18 | }, 19 | "name": { 20 | "column": "paid_by", 21 | "label": "Name" 22 | } 23 | }, 24 | "key_attribute": "name", 25 | "label": "Spender" 26 | }, 27 | "time": { 28 | "attributes": { 29 | "year": { 30 | "column": "date", 31 | "label": "Year" 32 | } 33 | }, 34 | "key_attribute": "year", 35 | "label": "Time" 36 | }, 37 | "to": { 38 | "attributes": { 39 | "label": { 40 | "column": "paid_to", 41 | "label": "Label" 42 | }, 43 | "name": { 44 | "column": "paid_to", 45 | "label": "Name" 46 | } 47 | }, 48 | "key_attribute": "name", 49 | "label": "Recipient" 50 | } 51 | }, 52 | "measures": { 53 | "amount": { 54 | "column": "amount", 55 | "label": "Amount" 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /spendb/tests/views/api/test_run.py: -------------------------------------------------------------------------------- 1 | from flask import url_for 2 | 3 | from spendb.core import db 4 | from spendb.model import Dataset 5 | from spendb.tests.base import ControllerTestCase 6 | from spendb.tests.helpers import load_fixture, make_account 7 | from spendb.tests.helpers import data_fixture 8 | 9 | 10 | class TestRunApiController(ControllerTestCase): 11 | 12 | def setUp(self): 13 | super(TestRunApiController, self).setUp() 14 | self.cra = load_fixture('cra') 15 | self.user = make_account('test') 16 | self.auth_qs = {'api_key': self.user.api_key} 17 | self.cra.managers.append(self.user) 18 | db.session.commit() 19 | url = url_for('sources_api.upload', dataset=self.cra.name) 20 | fh = data_fixture('cra') 21 | self.source = self.client.post(url, data={ 22 | 'file': (fh, 'cra.csv') 23 | }, query_string=self.auth_qs).json 24 | 25 | def test_runs_index(self): 26 | url = url_for('runs_api.index', dataset=self.cra.name) 27 | res = self.client.get(url) 28 | assert res.json['total'] == 1, res.json 29 | frst = res.json['results'][0] 30 | assert frst['status'] == 'complete', frst 31 | assert 'messages' not in frst, frst 32 | 33 | def test_runs_index_filter(self): 34 | url = url_for('runs_api.index', dataset=self.cra.name, source='foo') 35 | res = self.client.get(url) 36 | assert res.json['total'] == 0, res.json 37 | 38 | def test_runs_view(self): 39 | url = url_for('runs_api.view', dataset=self.cra.name, id=1) 40 | res = self.client.get(url) 41 | assert res.json['status'] == 'complete', res.json 42 | assert len(res.json['messages']), res.json 43 | -------------------------------------------------------------------------------- /spendb/command/importer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import urllib2 4 | import urlparse 5 | import json 6 | 7 | from colander import Invalid 8 | 9 | from spendb.model import Dataset 10 | from spendb.core import db 11 | from spendb.validation.model import validate_model 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | def _is_local_file(url): 17 | """ Check to see if the provided url is a local file. """ 18 | parsed_result = urlparse.urlparse(url) 19 | return parsed_result.scheme in ['', 'file'] 20 | 21 | 22 | def json_of_url(url): 23 | if _is_local_file(url): 24 | url = url.replace('file://', '') 25 | return json.load(open(url, 'r')) 26 | else: 27 | return json.load(urllib2.urlopen(url)) 28 | 29 | 30 | def get_model(model): 31 | """ Get and validate the model. If the model doesn't validate 32 | we exit the program. """ 33 | model = json_of_url(model) 34 | 35 | # Validate the model 36 | try: 37 | log.info("Validating model") 38 | model = validate_model(model) 39 | except Invalid as i: 40 | log.error("Errors occured during model validation:") 41 | for field, error in i.asdict().items(): 42 | log.error("%s: %s", field, error) 43 | sys.exit(1) 44 | return model 45 | 46 | 47 | def get_or_create_dataset(model): 48 | """ Based on a provided model we get the model (if it doesn't 49 | exist we create it). """ 50 | dataset = Dataset.by_name(model['dataset']['name']) 51 | 52 | # If the dataset wasn't found we create it 53 | if dataset is None: 54 | dataset = Dataset(model) 55 | db.session.add(dataset) 56 | db.session.commit() 57 | 58 | log.info("Dataset: %s", dataset.name) 59 | return dataset 60 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/simple/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "dimensions": { 3 | "entry_id": { 4 | "attributes": { 5 | "entry_id": { 6 | "column": "id", 7 | "type": "string" 8 | } 9 | }, 10 | "label": "Unique transaction ID" 11 | }, 12 | "from": { 13 | "attributes": { 14 | "label": { 15 | "column": "paid_by", 16 | "type": "string" 17 | }, 18 | "name": { 19 | "column": "paid_by", 20 | "type": "string" 21 | } 22 | }, 23 | "description": "Payer", 24 | "label": "Payer", 25 | "type": "entity" 26 | }, 27 | "time": { 28 | "attributes": { 29 | "year": { 30 | "column": "date", 31 | "type": "integer" 32 | } 33 | }, 34 | "label": "Time" 35 | }, 36 | "to": { 37 | "attributes": { 38 | "label": { 39 | "column": "paid_to", 40 | "datatype": "string" 41 | }, 42 | "name": { 43 | "column": "paid_to", 44 | "datatype": "id" 45 | } 46 | }, 47 | "description": "Payee", 48 | "label": "Payee", 49 | "type": "entity" 50 | } 51 | }, 52 | "measures": { 53 | "amount": { 54 | "column": "amount", 55 | "description": "Amount", 56 | "label": "Amount", 57 | "type": "number" 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | Alberto Rodriguez Peon 2 | Alistair Turnbull 3 | Andrew Suffield 4 | Andy Lulham 5 | Anna Powell Smith 6 | Carsten Senger 7 | Colin O'Neill 8 | David Jones 9 | Deon Bredenhann 10 | Friedrich Lindenberg 11 | garethpdx 12 | Gregor Aisch 13 | Helen ST 14 | Jake Madison 15 | John Wasack 16 | Jorge C. Leitão 17 | Justin Duke 18 | Kristian Glass 19 | Kristján Oddsson 20 | Martin Keegan 21 | Michael Bauer 22 | Nathan Hilbert 23 | Nick Stenning 24 | Nigel Babu 25 | Randal Moore 26 | Rufus Pollock 27 | Sander van der Waal 28 | Stefan Wehrmeyer 29 | Takashi Nishibayashi 30 | Telmo Brugnara 31 | Tony Hirst 32 | Tryggvi Björgvinsson 33 | Vitor Baptista 34 | 35 | 36 | Noun Project graphics used: 37 | 38 | Bank by Till Teenck from the Noun Project 39 | accounting by Kevin Augustine LO from the Noun Project 40 | Money by Nate Eul from the Noun Project 41 | finance by Vladislav Sergeev from the Noun Project 42 | Planning by Ivan Colic from the Noun Project 43 | Bank by anbileru adaleru from the Noun Project 44 | George Washington by Leonardo Schneider from the Noun Project 45 | bar graph by Anusha Narvekar from the Noun Project 46 | 47 | -------------------------------------------------------------------------------- /spendb/model/facets.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from sqlalchemy.sql.expression import select, func 4 | 5 | from spendb.core import db 6 | 7 | 8 | class DatasetFacetMixin(object): 9 | 10 | @classmethod 11 | def dataset_counts(cls, datasets_q): 12 | sq = datasets_q.subquery() 13 | q = select([cls.code, func.count(cls.dataset_id)], 14 | group_by=cls.code, 15 | order_by=func.count(cls.dataset_id).desc()) 16 | q = q.where(cls.dataset_id == sq.c.id) 17 | return db.session.bind.execute(q).fetchall() 18 | 19 | 20 | class DatasetLanguage(db.Model, DatasetFacetMixin): 21 | __tablename__ = 'dataset_language' 22 | 23 | id = db.Column(db.Integer, primary_key=True) 24 | code = db.Column(db.Unicode) 25 | created_at = db.Column(db.DateTime, default=datetime.utcnow) 26 | updated_at = db.Column(db.DateTime, onupdate=datetime.utcnow) 27 | 28 | dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id')) 29 | dataset = db.relationship('Dataset', backref=db.backref('_languages', 30 | lazy=False)) 31 | 32 | def __init__(self, code): 33 | self.code = code 34 | 35 | 36 | class DatasetTerritory(db.Model, DatasetFacetMixin): 37 | __tablename__ = 'dataset_territory' 38 | 39 | id = db.Column(db.Integer, primary_key=True) 40 | code = db.Column(db.Unicode) 41 | created_at = db.Column(db.DateTime, default=datetime.utcnow) 42 | updated_at = db.Column(db.DateTime, onupdate=datetime.utcnow) 43 | 44 | dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id')) 45 | dataset = db.relationship('Dataset', backref=db.backref('_territories', 46 | lazy=False)) 47 | 48 | def __init__(self, code): 49 | self.code = code 50 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/empty_additional_date/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "dimensions": { 3 | "additionaldate": { 4 | "attributes": { 5 | "year": { 6 | "column": "additional_date", 7 | "type": "integer" 8 | } 9 | }, 10 | "label": "Additional Date" 11 | }, 12 | "entry_id": { 13 | "attributes": { 14 | "entry_id": { 15 | "column": "id", 16 | "type": "string" 17 | } 18 | }, 19 | "label": "Entry ID" 20 | }, 21 | "from": { 22 | "attributes": { 23 | "label": { 24 | "column": "paid_by", 25 | "type": "string" 26 | }, 27 | "name": { 28 | "column": "paid_by", 29 | "type": "string" 30 | } 31 | }, 32 | "label": "Spender" 33 | }, 34 | "time": { 35 | "attributes": { 36 | "year": { 37 | "column": "date", 38 | "type": "integer" 39 | } 40 | }, 41 | "label": "Time" 42 | }, 43 | "to": { 44 | "attributes": { 45 | "label": { 46 | "column": "paid_to", 47 | "type": "string" 48 | }, 49 | "name": { 50 | "column": "paid_to", 51 | "type": "string" 52 | } 53 | }, 54 | "label": "Recipient" 55 | } 56 | }, 57 | "measures": { 58 | "amount": { 59 | "column": "amount", 60 | "type": "number", 61 | "label": "Amount" 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SpenDB 2 | 3 | [![Build Status](https://travis-ci.org/spendb/spendb.png?branch=master)](https://travis-ci.org/spendb/spendb) 4 | [![Coverage Status](https://coveralls.io/repos/spendb/spendb/badge.svg)](https://coveralls.io/r/spendb/spendb) 5 | [![Join the chat at https://gitter.im/pudo/spendb](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pudo/spendb?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 6 | 7 | SpenDB is a project to make government finances easier to explore and understand. It started out as "Where does my money go", a platform to visualize the United Kingdom's state finance, but has been renamed and restructured to allow arbitrary financial data to be loaded and displayed. 8 | 9 | * Documentation is located in the [GitHub Wiki](https://github.com/spendb/spendb/wiki). 10 | * [Conceptual overview](https://github.com/spendb/spendb/wiki/Conceptual-overview). 11 | * [Using the web API](https://github.com/spendb/spendb/wiki/Web-API). 12 | * [Developer installation](https://github.com/pudo/spendb/wiki/Developer-installation). 13 | * Please report any [issues and feature ideas](https://github.com/spendb/spendb/issues) or browse the issue tracker for tickets to start contributing. 14 | * Related codebases: 15 | * [fiscalmodel](https://github.com/spendb/fiscalmodel), metadata used for budget dataset classification in spendb. 16 | * [babbage.ui](https://github.com/spendb/babbage.ui), front-end data analysis and visualisation library. 17 | * [babbage](https://github.com/spendb/babbage), data analysis API and analytical domain model. 18 | * [cubes](https://github.com/DataBrewery/cubes), data analysis API (deprecated) 19 | 20 | 21 | ## Licensing 22 | 23 | SpenDB's code is licensed under the GNU Affero Licence except where otherwise indicated. A copy of this licence is available in the file ``LICENSE``. 24 | 25 | This application is based on the Open Knowledge Foundation's OpenSpending platform. 26 | -------------------------------------------------------------------------------- /spendb/etl/tasks.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from archivekit import Source 4 | 5 | from spendb.core import db 6 | from spendb.etl.job import job 7 | from spendb.etl.extract import validate_table, load_table 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | @job(operation='Import from file') 13 | def extract_fileobj(job, dataset, fh, file_name=None, mime_type=None): 14 | """ Upload contents of an opened fh to the data repository. """ 15 | meta = {'source_file': file_name} 16 | if mime_type is not None: 17 | meta['mime_type'] = mime_type 18 | source = job.package.ingest(fh, meta=meta, overwrite=False) 19 | source.save() 20 | job.set_source(source) 21 | return source 22 | 23 | 24 | @job(operation='Import from URL') 25 | def extract_url(job, dataset, url): 26 | """ Upload contents of a URL to the data repository. """ 27 | source = job.package.ingest(url, overwrite=False) 28 | if source is None: 29 | return 30 | source.save() 31 | job.set_source(source) 32 | return source 33 | 34 | 35 | @job(operation='Clean up source data') 36 | def transform_source(job, dataset, source_name): 37 | """ Transform the contents of an uploaded source dataset to a 38 | well-understood file format. """ 39 | source = Source(job.package, source_name) 40 | job.set_source(source) 41 | source = validate_table(source) 42 | if source.meta.get('num_failed') > 0: 43 | return job.failed() 44 | return source 45 | 46 | 47 | @job(operation='Load to database') 48 | def load(job, dataset, source_name): 49 | """ Load the table artifact for this dataset into the fact 50 | table. """ 51 | source = Source(job.package, source_name) 52 | job.set_source(source) 53 | dataset.data = {} 54 | dataset.fields = source.meta.get('fields', {}) 55 | if not len(dataset.fields): 56 | raise ValueError('No columns recognized in source data.') 57 | 58 | db.session.commit() 59 | dataset.fact_table.drop() 60 | dataset.fact_table.create() 61 | dataset.fact_table.load_iter(load_table(source)) 62 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/meta/simple.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset": { 3 | "description": "I'm a banana!", 4 | "label": "Test Case Model", 5 | "name": "test" 6 | }, 7 | "model": { 8 | "dimensions": { 9 | "field": { 10 | "attributes": { 11 | "field": { 12 | "column": "field", 13 | "label": "Field" 14 | } 15 | }, 16 | "key_attribute": "field", 17 | "label": "Field 1" 18 | }, 19 | "function": { 20 | "attributes": { 21 | "label": { 22 | "column": "func_label", 23 | "label": "Label" 24 | }, 25 | "name": { 26 | "column": "func_name", 27 | "label": "Name" 28 | } 29 | }, 30 | "key_attribute": "name", 31 | "label_attribute": "label", 32 | "label": "Function code" 33 | }, 34 | "time": { 35 | "attributes": { 36 | "year": { 37 | "column": "year", 38 | "label": "Year" 39 | } 40 | }, 41 | "key_attribute": "year", 42 | "label": "Year" 43 | }, 44 | "to": { 45 | "attributes": { 46 | "label": { 47 | "column": "to_label", 48 | "label": "Label" 49 | }, 50 | "name": { 51 | "column": "to_name", 52 | "label": "Name" 53 | } 54 | }, 55 | "key_attribute": "name", 56 | "label_attribute": "label", 57 | "label": "Einzelplan" 58 | } 59 | }, 60 | "measures": { 61 | "amount": { 62 | "column": "amount", 63 | "label": "Amount" 64 | } 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /spendb/model/run.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from sqlalchemy.orm import relationship, backref 4 | from sqlalchemy.schema import Column, ForeignKey 5 | from sqlalchemy.types import Integer, Unicode, DateTime 6 | 7 | from spendb.core import db, url_for 8 | from spendb.model.dataset import Dataset 9 | 10 | 11 | class Run(db.Model): 12 | """ A run is a generic grouping object for background operations 13 | that perform logging to the frontend. """ 14 | __tablename__ = 'run' 15 | 16 | # Status values 17 | STATUS_RUNNING = 'running' 18 | STATUS_COMPLETE = 'complete' 19 | STATUS_FAILED = 'failed' 20 | 21 | id = Column(Integer, primary_key=True) 22 | operation = Column(Unicode()) 23 | status = Column(Unicode()) 24 | source = Column(Unicode()) 25 | time_start = Column(DateTime, default=datetime.utcnow) 26 | time_end = Column(DateTime) 27 | 28 | dataset_id = Column(Integer, ForeignKey('dataset.id'), nullable=True) 29 | dataset = relationship(Dataset, 30 | backref=backref('runs', 31 | order_by='Run.time_start.desc()', 32 | lazy='dynamic')) 33 | 34 | def __init__(self, operation, status, dataset): 35 | self.operation = operation 36 | self.status = status 37 | self.dataset = dataset 38 | 39 | def to_dict(self): 40 | return { 41 | 'id': self.id, 42 | 'api_url': url_for('runs_api.view', dataset=self.dataset.name, 43 | id=self.id), 44 | 'operation': self.operation, 45 | 'status': self.status, 46 | 'source': self.source, 47 | 'time_start': self.time_start, 48 | 'time_end': self.time_end 49 | } 50 | 51 | @classmethod 52 | def all(cls, dataset): 53 | q = db.session.query(cls).filter_by(dataset=dataset) 54 | return q.order_by(cls.time_start.asc()) 55 | 56 | @classmethod 57 | def by_id(cls, dataset, id): 58 | return cls.all(dataset).filter_by(id=id).first() 59 | 60 | def __repr__(self): 61 | return "" % (self.source, self.id, self.status) 62 | -------------------------------------------------------------------------------- /spendb/views/__init__.py: -------------------------------------------------------------------------------- 1 | from cubes.server import slicer 2 | from colander import Invalid 3 | from jsonschema import ValidationError 4 | from babbage import api as babbage_api 5 | 6 | from spendb.model.manager import SpendingCubeManager 7 | from spendb.views.context import home, get_locale 8 | from spendb.views.error import NotModified, handle_not_modified 9 | from spendb.views.error import handle_error, handle_invalid 10 | from spendb.views.error import handle_validation_error 11 | from spendb.views.api.dataset import blueprint as datasets_api 12 | from spendb.views.api.meta import blueprint as meta_api 13 | from spendb.views.api.session import blueprint as session_api 14 | from spendb.views.api.source import blueprint as source_api 15 | from spendb.views.api.run import blueprint as run_api 16 | from spendb.views.api.account import blueprint as account_api 17 | 18 | 19 | def register_views(app, babel): 20 | babel.locale_selector_func = get_locale 21 | 22 | app.register_blueprint(meta_api, url_prefix='/api/3') 23 | app.register_blueprint(session_api, url_prefix='/api/3') 24 | app.register_blueprint(run_api, url_prefix='/api/3') 25 | app.register_blueprint(source_api, url_prefix='/api/3') 26 | app.register_blueprint(datasets_api, url_prefix='/api/3') 27 | app.register_blueprint(account_api, url_prefix='/api/3') 28 | 29 | # expose ``babbage``: 30 | babbage_api.configure_api(app, SpendingCubeManager()) 31 | app.register_blueprint(babbage_api.blueprint, url_prefix='/api/babbage') 32 | 33 | # expose ``cubes``: 34 | app.register_blueprint(slicer, url_prefix='/api/slicer', config={}) 35 | 36 | app.register_blueprint(home) 37 | 38 | app.error_handler_spec[None][400] = handle_error 39 | app.error_handler_spec[None][401] = handle_error 40 | app.error_handler_spec[None][402] = handle_error 41 | app.error_handler_spec[None][403] = handle_error 42 | app.error_handler_spec[None][404] = handle_error 43 | app.error_handler_spec[None][500] = handle_error 44 | 45 | custom = ( 46 | (Invalid, handle_invalid), 47 | (ValidationError, handle_validation_error), 48 | (NotModified, handle_not_modified) 49 | ) 50 | app.error_handler_spec[None][None] = custom 51 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | 5 | PKG_ROOT = os.path.abspath(os.__file__) 6 | 7 | 8 | def files_in_pkgdir(pkg, dirname): 9 | pkgdir = os.path.join(PKG_ROOT, *pkg.split('.')) 10 | walkdir = os.path.join(pkgdir, dirname) 11 | walkfiles = [] 12 | for dirpath, _, files in os.walk(walkdir): 13 | fpaths = (os.path.relpath(os.path.join(dirpath, f), pkgdir) 14 | for f in files) 15 | walkfiles += fpaths 16 | return walkfiles 17 | 18 | try: 19 | import spendb 20 | release = spendb.__version__ 21 | except: 22 | release = 'dev' 23 | 24 | 25 | def package_filter(pkg): 26 | """ 27 | Filter packages so that we exclude test cases but include regular test 28 | objects available in spendb.tests' modules (all test cases are 29 | in subdirectories). 30 | """ 31 | 32 | # We want to include spendb.tests but not its subpackages 33 | # Hence we only check for things starting with spendb.tests. 34 | # (note the trailing period to denote subpackages) 35 | return not pkg.startswith('spendb.tests.') 36 | 37 | setup( 38 | name='spendb', 39 | version=release, 40 | description='SpenDB', 41 | author='Friedrich Lindenberg (formerly OKFN)', 42 | author_email='friedrich@pudo.org', 43 | url='http://github.com/spendb/spendb', 44 | install_requires=[], 45 | setup_requires=[], 46 | packages=filter(package_filter, find_packages()), 47 | namespace_packages=['spendb'], 48 | package_data={ 49 | 'spendb': ( 50 | files_in_pkgdir('spendb', 'static') + 51 | files_in_pkgdir('spendb', 'templates') 52 | ) 53 | }, 54 | test_suite='nose.collector', 55 | zip_safe=False, 56 | entry_points={ 57 | 'console_scripts': [ 58 | 'spendb = spendb.command:main' 59 | ], 60 | 'cubes.providers': [ 61 | 'spending = spendb.model.provider:SpendingModelProvider' 62 | ], 63 | 'cubes.stores': [ 64 | 'spending = spendb.model.provider:SpendingStore' 65 | ] 66 | }, 67 | message_extractors={ 68 | 'spendb': [('**.py', 'python', None), 69 | ('templates/**.html', 'jinja2', None), 70 | ('static/**', 'ignore', None)] 71 | }, 72 | ) 73 | -------------------------------------------------------------------------------- /spendb/tests/validation/test_dataset.py: -------------------------------------------------------------------------------- 1 | from colander import Invalid 2 | from nose.tools import raises 3 | 4 | from spendb.validation.dataset import validate_dataset 5 | 6 | from spendb.tests.base import TestCase 7 | from spendb.tests.helpers import validation_fixture 8 | 9 | 10 | class TestDataset(TestCase): 11 | 12 | def setUp(self): 13 | super(TestDataset, self).setUp() 14 | self.model = validation_fixture('default') 15 | 16 | def test_basic_validate(self): 17 | try: 18 | ds = self.model['dataset'] 19 | out = validate_dataset(ds) 20 | assert sorted(out.keys()) == sorted(ds.keys()), [out, ds] 21 | except Invalid, i: 22 | assert False, i.asdict() 23 | 24 | @raises(Invalid) 25 | def test_underscore_validate(self): 26 | ds = self.model['dataset'].copy() 27 | ds['name'] = 'test__' 28 | validate_dataset(ds) 29 | 30 | @raises(Invalid) 31 | def test_reserved_name_validate(self): 32 | ds = self.model['dataset'].copy() 33 | ds['name'] = 'entRY' 34 | validate_dataset(ds) 35 | 36 | @raises(Invalid) 37 | def test_invalid_currency(self): 38 | ds = self.model['dataset'].copy() 39 | ds['currency'] = 'glass pearls' 40 | validate_dataset(ds) 41 | 42 | @raises(Invalid) 43 | def test_invalid_category(self): 44 | ds = self.model['dataset'].copy() 45 | ds['category'] = 'giraffes' 46 | validate_dataset(ds) 47 | 48 | @raises(Invalid) 49 | def test_invalid_language(self): 50 | ds = self.model['dataset'].copy() 51 | ds['languages'].append('esperanto') 52 | validate_dataset(ds) 53 | 54 | @raises(Invalid) 55 | def test_invalid_country(self): 56 | ds = self.model['dataset'].copy() 57 | ds['territories'].append('SU') 58 | validate_dataset(ds) 59 | 60 | @raises(Invalid) 61 | def test_no_label(self): 62 | ds = self.model['dataset'].copy() 63 | del ds['label'] 64 | validate_dataset(ds) 65 | 66 | @raises(Invalid) 67 | def test_empty_label(self): 68 | ds = self.model['dataset'].copy() 69 | ds['label'] = ' ' 70 | validate_dataset(ds) 71 | 72 | def test_no_description(self): 73 | ds = self.model['dataset'].copy() 74 | del ds['description'] 75 | validate_dataset(ds) 76 | -------------------------------------------------------------------------------- /spendb/validation/common.py: -------------------------------------------------------------------------------- 1 | import re 2 | from colander import Function, All, Length, null, Invalid 3 | 4 | RESERVED_TERMS = ['entry', 'entries', 'dataset', 'datasets', 'dimension', 5 | 'dimensions', 'editor', 'meta', 'id', 'login', 'logout', 6 | 'settings', 'browser', 'explorer', 'member', 'register', 7 | 'after_login', 'after_logout', 'locale', 'reporterror', 8 | 'getinvolved', 'api', '500', 'error', 'url', 'model', 9 | 'distinct', 'views', 'new'] 10 | 11 | 12 | def _dataset_name(name): 13 | """ These are names that have a special meaning in URLs and 14 | cannot be used for dataset names. """ 15 | if name is not None and name.lower() in RESERVED_TERMS: 16 | return "'%s' is a reserved word and cannot be used here" % name 17 | if not re.match(r"^\w[\w\_\-]+$", name): 18 | return ("Name must include only " 19 | "letters, numbers, dashes and underscores") 20 | if '__' in name: 21 | return "Double underscores are not allowed in dataset names." 22 | return True 23 | 24 | 25 | dataset_name = All(Length(min=2, max=30), Function(_dataset_name)) 26 | 27 | 28 | def _field_name(name): 29 | """ These are names that have a special meaning in URLs and 30 | cannot be used for dataset names. """ 31 | if not re.match(r"^\w[\w\_]+$", name): 32 | return ("Name must include only letters, numbers and underscores") 33 | if '__' in name: 34 | return "Double underscores are not allowed in field names." 35 | return True 36 | 37 | 38 | field_name = All(Length(min=2, max=60), Function(_field_name)) 39 | 40 | 41 | def prepare_name(name): 42 | """ Convert a given value to a name. """ 43 | if name is None or name is null: 44 | return '' 45 | return unicode(name).strip() 46 | 47 | 48 | def require_one_child(data): 49 | if isinstance(data, dict) and len(data.keys()): 50 | return True 51 | return "Must have at least one dimension and one measure." 52 | 53 | 54 | class Ref(object): 55 | 56 | def deserialize(self, node, cstruct): 57 | if cstruct is null: 58 | return null 59 | value = self.decode(cstruct) 60 | if value is None: 61 | raise Invalid(node, 'Missing') 62 | return value 63 | 64 | def cstruct_children(self, node, cstruct): 65 | return [] 66 | -------------------------------------------------------------------------------- /spendb/command/__init__.py: -------------------------------------------------------------------------------- 1 | ''' Interface to common administrative tasks for SpenDB. ''' 2 | import logging 3 | from flask.ext.script import Manager 4 | from flask.ext.migrate import MigrateCommand 5 | 6 | from spendb.core import create_web_app 7 | from spendb.tasks import load_from_url 8 | from spendb.command import db 9 | from spendb.command.importer import get_or_create_dataset, get_model 10 | 11 | log = logging.getLogger(__name__.split('.')[0]) 12 | app = create_web_app() 13 | manager = Manager(app, description=__doc__) 14 | 15 | manager.add_command('db', db.manager) 16 | manager.add_command('alembic', MigrateCommand) 17 | 18 | 19 | @manager.command 20 | def grantadmin(username): 21 | """ Grant admin privileges to given user """ 22 | from spendb.model import meta as db 23 | from spendb.model.account import Account 24 | 25 | account = Account.by_name(username) 26 | if account is None: 27 | raise Exception("Account `%s` not found." % username) 28 | 29 | account.admin = True 30 | db.session.add(account) 31 | db.session.commit() 32 | 33 | 34 | @manager.option('-n', '--dry-run', dest='dry_run', action='store_true', 35 | help="Perform a dry run, don't load any data.") 36 | @manager.option('-i', '--index', dest='build_indices', action='store_true', 37 | help="Suppress Solr index build.") 38 | @manager.option('--max-lines', action="store", dest='max_lines', type=int, 39 | default=None, metavar='N', 40 | help="Number of lines to import.") 41 | @manager.option('--raise-on-error', action="store_true", 42 | dest='raise_errors', default=False, 43 | help='Get full traceback on first error.') 44 | @manager.option('--model', action="store", dest='model', 45 | default=None, metavar='url', required=True, 46 | help="URL of JSON format model (metadata and mapping).") 47 | @manager.option('--visualisations', action="store", dest="views", 48 | default=None, metavar='url/file', 49 | help="URL/file of JSON format visualisations.") 50 | @manager.option('data_url', help="Data file URL") 51 | @manager.command 52 | def csvimport(**args): 53 | """ Load a CSV dataset """ 54 | model = get_model(args['model']) 55 | dataset = get_or_create_dataset(model) 56 | load_from_url(dataset, args['data_url']) 57 | 58 | 59 | def main(): 60 | manager.run() 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/mexico/data.csv: -------------------------------------------------------------------------------- 1 | id,RAMO,TPP,GPP,IPP,PP,UR,GF,FUNC,SF,AI,TG,FF,OG,IMPORTE PEF,DATE,TO,FROM 2 | 1,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,K Proyectos de Inversion,025 Proyectos de inmuebles (oficinas administrativas),200 H. Camara de Senadores,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto de obra publica,Recursos fiscales,6200 Obra publica en bienes propios,580000000,2011-01-01,Society 3 | 2,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,K Proyectos de Inversion,027 Mantenimiento de Infraestructura,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto de obra publica,Recursos fiscales,6200 Obra publica en bienes propios,144000000,2011-01-01,Society 4 | 3,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1100 Remuneraciones al personal de caracter permanente,898000000,2011-01-01,Society 5 | 4,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1200 Remuneraciones al personal de caracter transitorio,431000000,2011-01-01,Society 6 | 5,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1300 Remuneraciones adicionales y especiales,358000000,2011-01-01,Society 7 | 6,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1400 Seguridad social,187000000,2011-01-01,Society 8 | 7,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1500 Otras prestaciones sociales y economicas,883000000,2011-01-01,Society 9 | -------------------------------------------------------------------------------- /spendb/views/context.py: -------------------------------------------------------------------------------- 1 | from flask import current_app, request, session 2 | from flask.ext.login import current_user 3 | from babel import Locale 4 | from apikit import cache_hash 5 | 6 | from spendb import __version__ 7 | from spendb.core import babel 8 | from spendb.views.error import NotModified 9 | from spendb.views.home import blueprint as home 10 | 11 | 12 | def get_locale(): 13 | if 'locale' in session: 14 | return Locale.parse(session.get('locale')) 15 | else: 16 | requested = request.accept_languages.values() 17 | requested = [l.replace('-', '_') for l in requested] 18 | available = map(unicode, babel.list_translations()) 19 | return Locale.negotiate(available, requested) 20 | 21 | 22 | @home.before_app_request 23 | def before_request(): 24 | current_app.cubes_workspace.flush_lookup_cache() 25 | request._http_etag = None 26 | request._http_private = False 27 | 28 | 29 | @home.after_app_request 30 | def after_request(resp): 31 | resp.headers['Server'] = 'SpenDB/%s' % __version__ 32 | 33 | if resp.is_streamed and request.endpoint != 'static': 34 | # http://wiki.nginx.org/X-accel#X-Accel-Buffering 35 | resp.headers['X-Accel-Buffering'] = 'no' 36 | 37 | # skip cache under these conditions: 38 | if not current_app.config.get('CACHE') \ 39 | or request.method not in ['GET', 'HEAD', 'OPTIONS'] \ 40 | or resp.status_code > 399: 41 | resp.cache_control.no_cache = True 42 | return resp 43 | 44 | if request.endpoint == 'static': 45 | resp.cache_control.max_age = 3600 * 6 46 | resp.cache_control.public = True 47 | 48 | if request._http_etag: 49 | if not request._http_private: 50 | resp.cache_control.public = True 51 | else: 52 | resp.cache_control.private = True 53 | resp.cache_control.max_age = 3600 * 6 54 | resp.cache_control.must_revalidate = True 55 | resp.set_etag(request._http_etag) 56 | 57 | return resp 58 | 59 | 60 | def etag_cache_keygen(key_obj, private=False): 61 | request._http_private = private 62 | 63 | args = sorted(set(request.args.items())) 64 | # jquery where is your god now?!? 65 | args = filter(lambda (k, v): k != '_', args) 66 | 67 | request._http_etag = cache_hash(args, current_user, 68 | key_obj, get_locale()) 69 | if request.if_none_match == request._http_etag: 70 | raise NotModified() 71 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/lbhf/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "dimensions": { 3 | "entry_id": { 4 | "attributes": { 5 | "entry_id": { 6 | "column": "id", 7 | "label": "string" 8 | } 9 | }, 10 | "key_attribute": "entry_id", 11 | "label": "Entry ID" 12 | }, 13 | "from": { 14 | "attributes": { 15 | "label": { 16 | "column": "paid_by", 17 | "label": "string" 18 | }, 19 | "name": { 20 | "column": "paid_by", 21 | "label": "string" 22 | } 23 | }, 24 | "key_attribute": "name", 25 | "description": "Payer", 26 | "label": "Payer" 27 | }, 28 | "spendingarea": { 29 | "attributes": { 30 | "spendingarea": { 31 | "column": "spending_area", 32 | "label": "string" 33 | } 34 | }, 35 | "key_attribute": "spendingarea", 36 | "description": "Spending Area", 37 | "label": "Spending Area" 38 | }, 39 | "time": { 40 | "attributes": { 41 | "year": { 42 | "column": "date", 43 | "label": "Year" 44 | } 45 | }, 46 | "key_attribute": "year", 47 | "label": "Time" 48 | }, 49 | "to": { 50 | "attributes": { 51 | "label": { 52 | "column": "paid_to", 53 | "label": "Label" 54 | }, 55 | "name": { 56 | "column": "paid_to", 57 | "label": "Name" 58 | } 59 | }, 60 | "key_attribute": "name", 61 | "description": "Payee", 62 | "label": "Payee" 63 | }, 64 | "transactionid": { 65 | "attributes": { 66 | "transactionid": { 67 | "column": "transaction_id", 68 | "label": "ID" 69 | } 70 | }, 71 | "key_attribute": "transactionid", 72 | "description": "Reference", 73 | "label": "Reference" 74 | } 75 | }, 76 | "measures": { 77 | "amount": { 78 | "column": "amount", 79 | "label": "Amount" 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/sample/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "dimensions": { 3 | "entry_id": { 4 | "attributes": { 5 | "entry_id": { 6 | "column": "id", 7 | "label": "ID" 8 | } 9 | }, 10 | "key_attribute": "entry_id", 11 | "label": "Unique transaction ID" 12 | }, 13 | "from": { 14 | "attributes": { 15 | "label": { 16 | "column": "paid_by", 17 | "label": "Label" 18 | }, 19 | "name": { 20 | "column": "paid_by", 21 | "label": "name" 22 | } 23 | }, 24 | "key_attribute": "name", 25 | "description": "Payer", 26 | "label": "Payer" 27 | }, 28 | "spendingarea": { 29 | "attributes": { 30 | "spendingarea": { 31 | "column": "spending_area", 32 | "label": "string" 33 | } 34 | }, 35 | "key_attribute": "spendingarea", 36 | "description": "Spending Area", 37 | "label": "Spending Area" 38 | }, 39 | "time": { 40 | "attributes": { 41 | "year": { 42 | "column": "date", 43 | "label": "integer" 44 | } 45 | }, 46 | "key_attribute": "year", 47 | "label": "Time" 48 | }, 49 | "to": { 50 | "attributes": { 51 | "label": { 52 | "column": "paid_to", 53 | "label": "Label" 54 | }, 55 | "name": { 56 | "column": "paid_to", 57 | "label": "Name" 58 | } 59 | }, 60 | "key_attribute": "name", 61 | "description": "Payee", 62 | "label": "Payee" 63 | }, 64 | "transactionid": { 65 | "attributes": { 66 | "transactionid": { 67 | "column": "transaction_id", 68 | "label": "string" 69 | } 70 | }, 71 | "key_attribute": "transactionid", 72 | "description": "Reference", 73 | "label": "Reference" 74 | } 75 | }, 76 | "measures": { 77 | "amount": { 78 | "column": "amount", 79 | "label": "Amount" 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /contrib/assets/noun_161002_cc.svg: -------------------------------------------------------------------------------- 1 | Created by Kevin Augustine LOfrom the Noun Project -------------------------------------------------------------------------------- /contrib/os_export/archive_sources.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import sys 4 | import os 5 | 6 | 7 | def grab_source(url, output): 8 | """ 9 | Grab a source from a url and store it in an output file 10 | 11 | This creates uses requests as a dependency because I'm lazy. 12 | It probably would have taken me less time to just write it with 13 | urllib than writing this docstring 14 | """ 15 | 16 | # We use stream because these files might be huge 17 | response = requests.get(url, stream=True) 18 | 19 | # We don't do anything if there's something wrong with the url 20 | # This is basically what made urllib.urlretrieve a hassle 21 | if not response.ok: 22 | return 23 | 24 | with open(output, 'w') as output_file: 25 | for block in response.iter_content(1024): 26 | output_file.write(block) 27 | 28 | 29 | def archive(directory): 30 | """ 31 | Archive a OpenSpending dataset export directory 32 | """ 33 | 34 | # If we accidentally pass in something that's not a directory 35 | # we don't do anything 36 | if not os.path.isdir(directory): 37 | return 38 | 39 | # Check if the directory contains a dataset.json file 40 | dataset = os.path.join(directory, 'dataset.json') 41 | if not os.path.isfile(dataset): 42 | return 43 | 44 | # Open the dataset.json file and grab the sources listed in it 45 | with open(dataset) as descriptor: 46 | data = json.load(descriptor) 47 | if len(data['sources']): 48 | # Create an archive directory because there are some 49 | # sources we want to grab 50 | archive_directory = os.path.join(directory, 'archive') 51 | if not os.path.exists(archive_directory): 52 | os.makedirs(archive_directory) 53 | 54 | # Loop through sources, grab them and store in an output file 55 | # called .csv 56 | for source in data['sources']: 57 | filename = '{0}.csv'.format(source['id']) 58 | archive_file = os.path.join(archive_directory, filename) 59 | grab_source(source['url'], output=archive_file) 60 | 61 | # If the archive directory is empty which will happen if 62 | # grabbing the sources failed for some reason 63 | if not os.listdir(archive_directory): 64 | os.rmdir(archive_directory) 65 | 66 | 67 | if __name__ == "__main__": 68 | # Loop through each of the arguments and archive them 69 | for directory in sys.argv[1:]: 70 | archive(directory) 71 | -------------------------------------------------------------------------------- /spendb/etl/job.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from functools import wraps 3 | from datetime import datetime 4 | 5 | from archivekit import Source 6 | from loadkit.logger import capture 7 | 8 | from spendb.core import data_manager, db 9 | from spendb.model.run import Run 10 | 11 | 12 | class Job(object): 13 | 14 | def __init__(self, dataset, operation): 15 | self.log = logging.getLogger('spendb.etl') 16 | self.dataset = dataset 17 | self.operation = operation 18 | self.run = None 19 | 20 | def start(self): 21 | self.run = Run(self.operation, Run.STATUS_RUNNING, self.dataset) 22 | db.session.add(self.run) 23 | db.session.commit() 24 | 25 | self.package = data_manager.package(self.dataset.name) 26 | modules = [self.log, 'loadkit'] 27 | self.log_handler = capture(self.package, self.run.id, modules) 28 | self.log.info("Starting: %s", self.operation) 29 | 30 | def set_source(self, source): 31 | self.run.source = source.name 32 | db.session.commit() 33 | 34 | def end(self, status): 35 | self.run.status = status 36 | self.run.time_end = datetime.utcnow() 37 | self.dataset.touch() 38 | db.session.commit() 39 | self.log_handler.archive() 40 | 41 | @property 42 | def running(self): 43 | return self.run and self.run.status == Run.STATUS_RUNNING 44 | 45 | def complete(self): 46 | if self.running: 47 | self.log.info("Completed: %s", self.operation) 48 | self.end(Run.STATUS_COMPLETE) 49 | 50 | def failed(self): 51 | if self.running: 52 | self.log.warn("Failed: %s", self.operation) 53 | self.end(Run.STATUS_FAILED) 54 | 55 | 56 | def job(operation=None): 57 | """ Wrap an ETL job. This will handle logging, run management 58 | and other tasks. It assumes the first positional argument is 59 | the dataset that this operation is performed on, and will 60 | inject another argument before that, the ``job``. """ 61 | 62 | def decorator(fn): 63 | @wraps(fn) 64 | def wrapper(dataset, *a, **kw): 65 | job = Job(dataset, operation or fn.__name__) 66 | try: 67 | job.start() 68 | result = fn(job, dataset, *a, **kw) 69 | if job.running: 70 | job.complete() 71 | return result 72 | except Exception, e: 73 | job.log.exception(e) 74 | job.failed() 75 | finally: 76 | if job.running: 77 | job.failed() 78 | return wrapper 79 | 80 | return decorator 81 | -------------------------------------------------------------------------------- /spendb/tests/etl/test_import_fixtures.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import json 3 | 4 | from spendb.core import db, data_manager 5 | from spendb.model import Dataset, Run 6 | from spendb.etl import tasks 7 | 8 | from spendb.tests.base import DatabaseTestCase 9 | from spendb.tests.helpers import csvimport_fixture_file 10 | from spendb.tests.helpers import csvimport_fixture_path 11 | 12 | 13 | def import_fixture(name): 14 | meta_fp = csvimport_fixture_file(name, 'meta.json') 15 | model_fp = csvimport_fixture_file(name, 'model.json') 16 | meta = json.load(meta_fp) 17 | if model_fp: 18 | meta['model'] = json.load(model_fp) 19 | dataset = Dataset(meta) 20 | db.session.add(dataset) 21 | data_path = csvimport_fixture_path(name, 'data.csv') 22 | db.session.commit() 23 | return dataset, data_path 24 | 25 | 26 | class TestImportFixtures(DatabaseTestCase): 27 | 28 | def setUp(self): 29 | super(TestImportFixtures, self).setUp() 30 | data_manager._index = None 31 | 32 | def tearDown(self): 33 | super(TestImportFixtures, self).tearDown() 34 | 35 | def count_lines_in_stream(self, f): 36 | from StringIO import StringIO 37 | return len(list(StringIO(f.read()))) 38 | 39 | def _test_import(self, name, lines=None): 40 | dataset, url = import_fixture(name) 41 | data = urllib.urlopen(url) 42 | if lines is None: 43 | lines = self.count_lines_in_stream(data) - 1 # -1 for header row 44 | 45 | source = tasks.extract_url(dataset, url) 46 | tasks.transform_source(dataset, source.name) 47 | tasks.load(dataset, source_name=source.name) 48 | 49 | for run in db.session.query(Run).all(): 50 | assert run.status == Run.STATUS_COMPLETE, run 51 | 52 | # check correct number of entries 53 | dataset = db.session.query(Dataset).first() 54 | q = dataset.fact_table.table.select() 55 | entries = db.engine.execute(q).fetchall() 56 | assert len(entries) == lines, len(entries) 57 | 58 | def test_imports_mexico(self): 59 | self._test_import('mexico') 60 | 61 | def test_imports_lbhf(self): 62 | self._test_import('lbhf') 63 | 64 | def test_imports_sample(self): 65 | self._test_import('sample') 66 | 67 | def test_imports_quoting(self): 68 | self._test_import('quoting', lines=5) 69 | 70 | def test_missing_url(self): 71 | dataset, url = import_fixture('file:///dev/null') 72 | source = tasks.extract_url(dataset, url) 73 | assert source is None, source 74 | 75 | for run in db.session.query(Run).all(): 76 | assert run.status == Run.STATUS_FAILED, run 77 | -------------------------------------------------------------------------------- /swarm.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "spendb", 3 | "components": { 4 | "web": { 5 | "image": "registry.giantswarm.io/spendb/spendb:latest", 6 | "ports": 8000, 7 | "env": { 8 | "SITE_TITLE": "$site_title", 9 | "SECRET": "$secret", 10 | "DATABASE_URL": "$database_url", 11 | "AMQP_URL": "$amqp_url", 12 | "MAIL_SERVER": "$mail_server", 13 | "MAIL_PORT": "$mail_port", 14 | "MAIL_USERNAME": "$mail_username", 15 | "MAIL_PASSWORD": "$mail_password", 16 | "MAIL_DEFAULT_SENDER": "$mail_default_sender", 17 | "AWS_KEY_ID": "$aws_key_id", 18 | "AWS_SECRET": "$aws_secret", 19 | "AWS_DATA_BUCKET": "$aws_data_bucket" 20 | }, 21 | "links": [ 22 | {"component": "rabbitmq", "target_port": "5672"}, 23 | {"component": "db", "target_port": "5432"} 24 | ], 25 | "domains": { 26 | "8000": [ 27 | "spendb.gigantic.io", 28 | "dummy.pudo.org" 29 | ] 30 | }, 31 | "entrypoint": "gunicorn", 32 | "args": [ 33 | "-w", "5", 34 | "-b", "0.0.0.0:8000", 35 | "--error-logfile", "-", 36 | "--log-file", "-", 37 | "spendb.wsgi:app" 38 | ] 39 | }, 40 | "db": { 41 | "image": "postgres:9.4", 42 | "ports": 5432, 43 | "env": { 44 | "POSTGRES_USER": "spendb", 45 | "POSTGRES_PASSWORD": "spendb" 46 | }, 47 | "volumes": [ 48 | { 49 | "path": "/var/lib/postgresql/data", 50 | "size": "4 GB" 51 | } 52 | ] 53 | }, 54 | "rabbitmq": { 55 | "image": "rabbitmq", 56 | "ports": 5672 57 | }, 58 | "worker": { 59 | "image": "registry.giantswarm.io/spendb/spendb:latest", 60 | "env": { 61 | "C_FORCE_ROOT": "$celery_force_root", 62 | "SITE_TITLE": "$site_title", 63 | "SECRET": "$secret", 64 | "DATABASE_URL": "$database_url", 65 | "AMQP_URL": "$amqp_url", 66 | "MAIL_SERVER": "$mail_server", 67 | "MAIL_PORT": "$mail_port", 68 | "MAIL_USERNAME": "$mail_username", 69 | "MAIL_PASSWORD": "$mail_password", 70 | "MAIL_DEFAULT_SENDER": "$mail_default_sender", 71 | "AWS_KEY_ID": "$aws_key_id", 72 | "AWS_SECRET": "$aws_secret", 73 | "AWS_DATA_BUCKET": "$aws_data_bucket" 74 | }, 75 | "links": [ 76 | {"component": "rabbitmq", "target_port": "5672"}, 77 | {"component": "db", "target_port": "5432"} 78 | ], 79 | "entrypoint": "celery", 80 | "args": [ 81 | "-A", "spendb.tasks", "worker", 82 | "-c", "4", 83 | "-l", "info" 84 | ] 85 | } 86 | } 87 | } -------------------------------------------------------------------------------- /contrib/assets/noun_29578_cc.svg: -------------------------------------------------------------------------------- 1 | Created by Nate Eulfrom the Noun Project -------------------------------------------------------------------------------- /spendb/core.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from flask import Flask 3 | from flask import url_for as _url_for 4 | from flask.ext.sqlalchemy import SQLAlchemy 5 | from flask.ext.login import LoginManager 6 | from flask.ext.babel import Babel 7 | from flask.ext.cache import Cache 8 | from flask.ext.mail import Mail 9 | from flask.ext.migrate import Migrate 10 | from flask.ext.cors import CORS 11 | from flask_flatpages import FlatPages 12 | from celery import Celery 13 | from cubes import Workspace, ext 14 | 15 | from spendb import default_settings 16 | from spendb.etl.manager import DataManager 17 | 18 | logging.basicConfig(level=logging.DEBUG) 19 | 20 | # specific loggers 21 | logging.getLogger('cubes').setLevel(logging.WARNING) 22 | logging.getLogger('markdown').setLevel(logging.WARNING) 23 | logging.getLogger('boto').setLevel(logging.WARNING) 24 | logging.getLogger('spendb.core.cors').setLevel(logging.WARNING) 25 | 26 | 27 | db = SQLAlchemy() 28 | babel = Babel() 29 | login_manager = LoginManager() 30 | cache = Cache() 31 | mail = Mail() 32 | migrate = Migrate() 33 | pages = FlatPages() 34 | data_manager = DataManager() 35 | cors = CORS() 36 | 37 | 38 | def create_app(**config): 39 | app = Flask(__name__) 40 | 41 | app.config.from_object(default_settings) 42 | app.config.from_envvar('SPENDB_SETTINGS', silent=True) 43 | app.config.update(config) 44 | 45 | db.init_app(app) 46 | babel.init_app(app) 47 | cache.init_app(app) 48 | mail.init_app(app) 49 | login_manager.init_app(app) 50 | data_manager.init_app(app) 51 | pages.init_app(app) 52 | migrate.init_app(app, db, directory=app.config.get('ALEMBIC_DIR')) 53 | cors.init_app(app, resources=r'/api/*', supports_credentials=True, 54 | methods=['GET', 'HEAD', 'OPTIONS']) 55 | 56 | ws = Workspace() 57 | ext.model_provider("spending", metadata={}) 58 | ext.store("spending") 59 | ws.register_default_store('spending', model_provider='spending') 60 | app.cubes_workspace = ws 61 | return app 62 | 63 | 64 | def create_web_app(**config): 65 | app = create_app(**config) 66 | 67 | from spendb.views import register_views 68 | register_views(app, babel) 69 | return app 70 | 71 | 72 | def create_celery(app): 73 | celery = Celery(app.import_name, broker=app.config['CELERY_BROKER_URL']) 74 | celery.conf.update(app.config) 75 | return celery 76 | 77 | 78 | def url_for(endpoint, **kwargs): 79 | try: 80 | from flask import current_app 81 | if current_app.config.get('PREFERRED_URL_SCHEME'): 82 | kwargs['_scheme'] = current_app.config.get('PREFERRED_URL_SCHEME') 83 | url = _url_for(endpoint, _external=True, **kwargs) 84 | return url 85 | except: 86 | return None 87 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/validation/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset": { 3 | "category": "other", 4 | "currency": "EUR", 5 | "description": "This is a test dataset", 6 | "label": "Test Dataset", 7 | "languages": [ 8 | "en" 9 | ], 10 | "name": "test", 11 | "private": false, 12 | "territories": [ 13 | "DE", 14 | "FR", 15 | "ES" 16 | ] 17 | }, 18 | "model": { 19 | "dimensions": { 20 | "function": { 21 | "attributes": { 22 | "description": { 23 | "column": "FUNCTION_DESC", 24 | "label": "string" 25 | }, 26 | "label": { 27 | "column": "FUNCTION", 28 | "label": "string" 29 | }, 30 | "name": { 31 | "column": "FUNCTION_ID", 32 | "label": "string" 33 | } 34 | }, 35 | "label": "Function", 36 | "key_attribute": "name" 37 | }, 38 | "supplier": { 39 | "attributes": { 40 | "label": { 41 | "column": "SUPPLIER", 42 | "label": "string" 43 | }, 44 | "name": { 45 | "column": "SUPPLIER_ID", 46 | "label": "string" 47 | } 48 | }, 49 | "label": "Supplier", 50 | "key_attribute": "name" 51 | }, 52 | "time": { 53 | "attributes": { 54 | "year": { 55 | "column": "time_from_year", 56 | "label": "string" 57 | } 58 | }, 59 | "description": "The accounting period in which the spending happened", 60 | "label": "Tax year", 61 | "key_attribute": "year" 62 | }, 63 | "transaction_id": { 64 | "attributes": { 65 | "transaction_id": { 66 | "column": "tx", 67 | "label": "string" 68 | } 69 | }, 70 | "label": "Transaction ID", 71 | "key_attribute": "transaction_id" 72 | } 73 | }, 74 | "measures": { 75 | "amount": { 76 | "column": "AMOUNT", 77 | "label": "Amount" 78 | }, 79 | "cofinance": { 80 | "column": "cofin", 81 | "label": "Co-Financed Amount" 82 | } 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /spendb/views/api/session.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from flask import Blueprint, request 4 | from flask.ext.login import current_user, login_user, logout_user 5 | from werkzeug.security import check_password_hash 6 | from flask.ext.babel import gettext as _ 7 | from apikit import jsonify, request_data 8 | 9 | from spendb.core import login_manager 10 | from spendb.auth import dataset 11 | from spendb.model import Account, Dataset 12 | from spendb.views.context import etag_cache_keygen 13 | 14 | log = logging.getLogger(__name__) 15 | blueprint = Blueprint('sessions_api', __name__) 16 | 17 | 18 | @login_manager.request_loader 19 | def load_user_from_request(request): 20 | api_key = request.args.get('api_key') 21 | if api_key and len(api_key): 22 | account = Account.by_api_key(api_key) 23 | if account: 24 | return account 25 | 26 | api_key = request.headers.get('Authorization') 27 | if api_key and len(api_key) and ' ' in api_key: 28 | method, api_key = api_key.split(' ', 1) 29 | if method.lower() == 'apikey': 30 | account = Account.by_api_key(api_key) 31 | if account: 32 | return account 33 | return None 34 | 35 | 36 | @blueprint.route('/sessions') 37 | def session(): 38 | data = { 39 | 'logged_in': current_user.is_authenticated(), 40 | 'user': None 41 | } 42 | if current_user.is_authenticated(): 43 | data['user'] = current_user 44 | data['api_key'] = current_user.api_key 45 | return jsonify(data) 46 | 47 | 48 | @blueprint.route('/sessions/authz') 49 | def authz(): 50 | obj = Dataset.by_name(request.args.get('dataset')) 51 | etag_cache_keygen(obj, private=True) 52 | if obj is None: 53 | return jsonify({ 54 | 'read': False, 55 | 'update': False 56 | }) 57 | return jsonify({ 58 | 'read': dataset.read(obj), 59 | 'update': dataset.update(obj) 60 | }) 61 | 62 | 63 | @blueprint.route('/sessions/login', methods=['POST', 'PUT']) 64 | def login(): 65 | data = request_data() 66 | account = Account.by_name(data.get('login')) 67 | if account is not None: 68 | if check_password_hash(account.password, data.get('password')): 69 | login_user(account, remember=True) 70 | return jsonify({ 71 | 'status': 'ok', 72 | 'message': _("Welcome back, %(name)s!", name=account.name) 73 | }) 74 | return jsonify({ 75 | 'status': 'error', 76 | 'errors': { 77 | 'password': _("Incorrect user name or password!") 78 | } 79 | }, status=400) 80 | 81 | 82 | @blueprint.route('/sessions/logout', methods=['POST', 'PUT']) 83 | def logout(): 84 | logout_user() 85 | return jsonify({ 86 | 'status': 'ok', 87 | 'message': _("You have been logged out.") 88 | }) 89 | -------------------------------------------------------------------------------- /spendb/etl/upload.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import hmac 3 | import json 4 | from base64 import b64encode 5 | from datetime import datetime, timedelta 6 | 7 | from flask import current_app 8 | from boto.s3.cors import CORSConfiguration 9 | from boto.exception import S3ResponseError 10 | 11 | 12 | def enable_bucket_cors(bucket): 13 | """ For direct upload to work, the bucket needs to enable 14 | cross-origin request scripting. """ 15 | try: 16 | cors_cfg = bucket.get_cors() 17 | except S3ResponseError: 18 | cors_cfg = CORSConfiguration() 19 | rules = [r.id for r in cors_cfg] 20 | changed = False 21 | if 'spendb_put' not in rules: 22 | cors_cfg.add_rule(['PUT', 'POST'], '*', 23 | allowed_header='*', 24 | id='spendb_put', 25 | max_age_seconds=3000, 26 | expose_header='x-amz-server-side-encryption') 27 | changed = True 28 | if 'spendb_get' not in rules: 29 | cors_cfg.add_rule('GET', '*', id='spendb_get') 30 | changed = True 31 | 32 | if changed: 33 | bucket.set_cors(cors_cfg) 34 | 35 | 36 | def generate_s3_upload_policy(source, file_name, mime_type): 37 | """ Generate a policy and signature for uploading a file directly to 38 | the specified source on S3. """ 39 | obj = source._obj 40 | if not hasattr(obj, 'key'): 41 | return { 42 | 'status': 'error', 43 | 'message': 'Backend is not on S3, cannot generate signature.' 44 | } 45 | 46 | enable_bucket_cors(obj.store.bucket) 47 | url = obj.key.generate_url(expires_in=0, force_http=True, 48 | query_auth=False) 49 | url = url.split(obj.key.name)[0] 50 | 51 | if 'https' in current_app.config.get('PREFERRED_URL_SCHEME'): 52 | url = url.replace('http://', 'https://') 53 | 54 | data = { 55 | 'url': url, 56 | 'status': 'ok', 57 | 'key': obj.key.name, 58 | 'source_name': source.name, 59 | 'aws_key_id': obj.store.aws_key_id, 60 | 'acl': 'public-read', 61 | 'file_name': file_name, 62 | 'mime_type': mime_type 63 | } 64 | expire = datetime.utcnow() + timedelta(days=7) 65 | expire, ms = expire.isoformat().split('.') 66 | policy = { 67 | "expiration": expire + "Z", 68 | "conditions": [ 69 | {"bucket": obj.store.bucket_name}, 70 | ["starts-with", "$key", data['key']], 71 | {"acl": data['acl']} 72 | ] 73 | } 74 | 75 | # data['raw_policy'] = json.dumps(policy) 76 | data['policy'] = b64encode(json.dumps(policy)) 77 | data['signature'] = b64encode(hmac.new(obj.store.aws_secret, 78 | data['policy'], 79 | hashlib.sha1).digest()) 80 | return data 81 | -------------------------------------------------------------------------------- /contrib/os_export/model_migrate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from normality import slugify 4 | 5 | DIR = 'exports' 6 | 7 | 8 | def slug(name): 9 | return slugify(name, sep='_') 10 | 11 | 12 | def list_datasets(): 13 | for name in os.listdir(DIR): 14 | ds_dir = os.path.join(DIR, name) 15 | with open(os.path.join(ds_dir, 'dataset.json'), 'rb') as fh: 16 | meta = json.load(fh) 17 | yield ds_dir, meta 18 | 19 | 20 | def transform_dataset(source): 21 | mapping = source['data']['mapping'] 22 | model = {'measures': {}, 'dimensions': {}} 23 | types = set() 24 | for name, src in mapping.items(): 25 | norm_name = slug(name) 26 | if src.get('type') == 'measure': 27 | model['measures'][norm_name] = { 28 | 'label': src['label'], 29 | 'description': src['description'] or '', 30 | 'column': norm_name 31 | } 32 | continue 33 | 34 | dim = { 35 | 'label': src['label'], 36 | 'description': src['description'] or '', 37 | 'label_attribute': 'label', 38 | 'key_attribute': 'label', 39 | 'attributes': {} 40 | } 41 | if src.get('type') == 'date': 42 | dim['attributes'] = { 43 | 'label': { 44 | 'label': 'Label', 45 | 'column': norm_name + '_name' 46 | }, 47 | 'year': { 48 | 'label': 'Year', 49 | 'column': norm_name + '_year' 50 | }, 51 | 'month': { 52 | 'label': 'Month', 53 | 'column': norm_name + '_month' 54 | }, 55 | 'day': { 56 | 'label': 'Day', 57 | 'column': norm_name + '_day' 58 | }, 59 | 'yearmonth': { 60 | 'label': 'Year/Month', 61 | 'column': norm_name + '_yearmonth' 62 | } 63 | } 64 | if src.get('type') == 'attribute': 65 | dim['attributes'] = { 66 | 'label': { 67 | 'label': 'Label', 68 | 'column': norm_name 69 | } 70 | } 71 | if src.get('type') == 'compound': 72 | for name, spec in src['attributes'].items(): 73 | attr = slug(name) 74 | dim['attributes'][attr] = { 75 | 'label': spec['column'], 76 | 'column': norm_name + '_' + attr 77 | } 78 | if 'name' in dim['attributes']: 79 | dim['key_attribute'] = 'name' 80 | model['dimensions'][norm_name] = dim 81 | return model 82 | 83 | 84 | if __name__ == '__main__': 85 | for dir, ds in list_datasets(): 86 | data = transform_dataset(ds) 87 | with open(os.path.join(dir, 'model.json'), 'wb') as fh: 88 | json.dump(data, fh, indent=2) 89 | -------------------------------------------------------------------------------- /spendb/tests/model/test_dataset.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Integer, Unicode 2 | from nose.tools import assert_raises 3 | from babbage.model import Dimension, Measure 4 | 5 | from spendb.tests.helpers import load_fixture 6 | from spendb.tests.base import DatabaseTestCase 7 | 8 | from spendb.core import db 9 | 10 | 11 | class TestDataset(DatabaseTestCase): 12 | 13 | def setUp(self): 14 | super(TestDataset, self).setUp() 15 | self.ds = load_fixture('simple') 16 | 17 | def test_load_model_properties(self): 18 | assert self.ds.name == self.ds.to_dict()['name'], self.ds.name 19 | assert self.ds.label == self.ds.to_dict()['label'], self.ds.label 20 | 21 | def test_load_model_dimensions(self): 22 | dims = {d.name: d for d in self.ds.model.dimensions} 23 | assert len(dims) == 4, dims 24 | assert isinstance(dims['time'], Dimension), dims['time'] 25 | assert isinstance(dims['field'], Dimension), dims['field'] 26 | assert isinstance(dims['to'], Dimension), dims['to'] 27 | assert isinstance(dims['function'], Dimension), dims['function'] 28 | meas = {m.name: m for m in self.ds.model.measures} 29 | assert len(meas) == 1, meas 30 | assert isinstance(meas['amount'], Measure), meas['amount'] 31 | 32 | def test_generate_db_entry_table(self): 33 | assert self.ds.fact_table.table.name == 'test__facts', \ 34 | self.ds.fact_table.table.name 35 | cols = self.ds.fact_table.table.c 36 | assert '_id' in cols, cols 37 | assert isinstance(cols['_id'].type, Unicode) 38 | assert 'year' in cols, cols 39 | assert isinstance(cols['year'].type, Integer) 40 | assert 'amount' in cols 41 | assert isinstance(cols['amount'].type, Integer) 42 | assert 'field' in cols 43 | assert isinstance(cols['field'].type, Unicode) 44 | assert 'to_label' in cols, cols 45 | assert isinstance(cols['to_label'].type, Unicode) 46 | assert 'func_label' in cols 47 | assert isinstance(cols['func_label'].type, Unicode) 48 | assert_raises(KeyError, cols.__getitem__, 'foo') 49 | 50 | 51 | class TestDatasetLoad(DatabaseTestCase): 52 | 53 | def setUp(self): 54 | super(TestDatasetLoad, self).setUp() 55 | self.ds = load_fixture('simple') 56 | self.engine = db.engine 57 | 58 | def test_load_all(self): 59 | q = self.ds.fact_table.table.select() 60 | resn = self.engine.execute(q).fetchall() 61 | assert len(resn) == 6, resn 62 | row0 = dict(resn[0].items()) 63 | assert row0['amount'] == 200, row0.items() 64 | assert row0['field'] == 'foo', row0.items() 65 | 66 | def test_drop(self): 67 | tn = self.engine.table_names() 68 | assert 'test__facts' in tn, tn 69 | 70 | self.ds.fact_table.drop() 71 | tn = self.engine.table_names() 72 | assert 'test__facts' not in tn, tn 73 | 74 | def test_dataset_count(self): 75 | q = self.ds.fact_table.table.select() 76 | resn = self.engine.execute(q).fetchall() 77 | assert len(resn) == 6, resn 78 | -------------------------------------------------------------------------------- /spendb/tests/views/api/test_session.py: -------------------------------------------------------------------------------- 1 | import json 2 | from flask import url_for 3 | 4 | from spendb.core import db 5 | from spendb.tests.base import ControllerTestCase 6 | from spendb.tests.helpers import load_fixture, make_account 7 | 8 | 9 | class TestSessionApiController(ControllerTestCase): 10 | 11 | def setUp(self): 12 | super(TestSessionApiController, self).setUp() 13 | self.cra = load_fixture('cra') 14 | self.user = make_account('test') 15 | self.auth_qs = {'api_key': self.user.api_key} 16 | self.cra.managers.append(self.user) 17 | db.session.commit() 18 | 19 | def test_not_logged_in(self): 20 | url = url_for('sessions_api.session') 21 | res = self.client.get(url) 22 | assert res.json.get('logged_in') is False, res.json 23 | assert res.json.get('user') is None, res.json 24 | 25 | def test_authz_anon(self): 26 | url = url_for('sessions_api.authz') 27 | res = self.client.get(url, query_string={'dataset': self.cra.name}) 28 | assert res.json.get('read') is True, res.json 29 | assert res.json.get('update') is False, res.json 30 | 31 | def test_authz_user(self): 32 | url = url_for('sessions_api.authz') 33 | qs = dict(self.auth_qs) 34 | qs['dataset'] = self.cra.name 35 | res = self.client.get(url, query_string=qs) 36 | assert res.json.get('read') is True, res.json 37 | assert res.json.get('update') is True, res.json 38 | 39 | def test_authz_other_user(self): 40 | url = url_for('sessions_api.authz') 41 | user = make_account('foo') 42 | qs = {'api_key': user.api_key} 43 | qs['dataset'] = self.cra.name 44 | res = self.client.get(url, query_string=qs) 45 | assert res.json.get('read') is True, res.json 46 | assert res.json.get('update') is False, res.json 47 | 48 | def test_logged_in(self): 49 | url = url_for('sessions_api.session') 50 | res = self.client.get(url, query_string=self.auth_qs) 51 | assert res.json.get('logged_in') is True, res.json 52 | assert res.json.get('user') is not None, res.json 53 | 54 | def test_logout(self): 55 | url = url_for('sessions_api.logout') 56 | res = self.client.post(url, query_string=self.auth_qs) 57 | assert res.json.get('status') == 'ok', res.json 58 | 59 | def test_login_ok(self): 60 | url = url_for('sessions_api.login') 61 | cred = {'login': 'test', 'password': 'password'} 62 | res = self.client.post(url, data=json.dumps(cred), 63 | headers={'content-type': 'application/json'}) 64 | assert res.json.get('status') == 'ok', res.json 65 | assert res.status_code == 200, res.json 66 | 67 | def test_login_fail(self): 68 | url = url_for('sessions_api.login') 69 | cred = {'login': 'test', 'password': 'wrong'} 70 | res = self.client.post(url, data=json.dumps(cred), 71 | headers={'content-type': 'application/json'}) 72 | assert res.json.get('status') == 'error', res.json 73 | assert res.status_code == 400, res.json 74 | -------------------------------------------------------------------------------- /spendb/tests/fixtures/csv_import/sample/data.csv: -------------------------------------------------------------------------------- 1 | id,paid_by,date,transaction_id,amount,paid_to,spending_area 2 | 1,London Borough of Hammersmith and Fulham,2010-01-01,405869,898.64,ADT FIRE & SECURITY PLC,Childrens Services 3 | 2,London Borough of Hammersmith and Fulham,2010-01-01,405870,517.85,ADT FIRE & SECURITY PLC,Resident Services 4 | 3,London Borough of Hammersmith and Fulham,2010-01-01,405871,1215.97,ADT FIRE & SECURITY PLC,Regeneration and Housing Services 5 | 4,London Borough of Hammersmith and Fulham,2010-01-01,417742,112.50,ALARM LTD,Finance and Corporate Services 6 | 5,London Borough of Hammersmith and Fulham,2010-01-01,417742,562.50,ALARM LTD,Finance and Corporate Services 7 | 6,London Borough of Hammersmith and Fulham,2010-01-01,391746,1665.62,ASCOM TELE NOVA LTD,Childrens Services 8 | 7,London Borough of Hammersmith and Fulham,2010-01-01,396062,1500.00,BIW TECHNOLOGIES LIMITED,Community Services 9 | 8,London Borough of Hammersmith and Fulham,2010-01-01,392463,560.00,CAPITAL CITY COMMUNICATIONS LTD,Resident Services 10 | 9,London Borough of Hammersmith and Fulham,2010-01-01,393998,1296.00,CAPITAL CITY COMMUNICATIONS LTD,Environment Services 11 | 10,London Borough of Hammersmith and Fulham,2010-01-01,395696,171.39,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 12 | 11,London Borough of Hammersmith and Fulham,2010-01-01,395696,180.68,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 13 | 12,London Borough of Hammersmith and Fulham,2010-01-01,395696,182.82,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 14 | 13,London Borough of Hammersmith and Fulham,2010-01-01,395696,185.60,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 15 | 14,London Borough of Hammersmith and Fulham,2010-01-01,395696,244.84,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 16 | 15,London Borough of Hammersmith and Fulham,2010-01-01,395696,265.49,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 17 | 16,London Borough of Hammersmith and Fulham,2010-01-01,395696,384.65,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 18 | 17,London Borough of Hammersmith and Fulham,2010-01-01,395696,148.10,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 19 | 18,London Borough of Hammersmith and Fulham,2010-01-01,395696,168.85,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services 20 | 19,London Borough of Hammersmith and Fulham,2010-01-01,417549,32641.84,CB RICHARD ELLIS LTD CLIENT ACCOUNT,Environment Services 21 | 20,London Borough of Hammersmith and Fulham,2010-01-01,417550,8106.18,CB RICHARD ELLIS LTD CLIENT ACCOUNT,Environment Services 22 | 21,London Borough of Hammersmith and Fulham,2010-01-01,395936,527.00,CHESTERFIELD ASSOCIATES,Childrens Services 23 | 22,London Borough of Hammersmith and Fulham,2010-01-01,407426,525.52,CHESTERFIELD ASSOCIATES,Community Services 24 | 23,London Borough of Hammersmith and Fulham,2010-01-01,460450,136.97,CONSULTUS SERVICES AGENCY LTD,Community Services 25 | 24,London Borough of Hammersmith and Fulham,2010-01-01,460450,1431.85,CONSULTUS SERVICES AGENCY LTD,Community Services 26 | 25,London Borough of Hammersmith and Fulham,2010-01-01,409072,522.10,COYLE PERSONNEL PLC,Community Services 27 | 26,London Borough of Hammersmith and Fulham,2010-01-01,405998,7009.96,CRANSTOUN DRUG SERVICES,Community Services 28 | 27,London Borough of Hammersmith and Fulham,2010-01-01,409318,-1156.27,EDF ENERGY 1 LIMITED,Resident Services 29 | 28,London Borough of Hammersmith and Fulham,2010-01-01,409319,-826.32,EDF ENERGY 1 LIMITED,Resident Services 30 | 29,London Borough of Hammersmith and Fulham,2010-01-01,483559,2950.00,e-MENTORING LIMITED,Childrens Services 31 | -------------------------------------------------------------------------------- /spendb/tests/views/test_home.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from flask import url_for 4 | 5 | from spendb.core import db 6 | from spendb.model.dataset import Dataset 7 | from spendb.tests.base import ControllerTestCase 8 | from spendb.tests.helpers import make_account, load_fixture 9 | 10 | 11 | class TestHomeController(ControllerTestCase): 12 | 13 | def setUp(self): 14 | super(TestHomeController, self).setUp() 15 | self.dataset = load_fixture('cra') 16 | self.user = make_account('test') 17 | 18 | def test_index(self): 19 | response = self.client.get(url_for('home.index')) 20 | assert 'SpenDB' in response.data 21 | 22 | def test_locale(self): 23 | set_l = url_for('home.set_locale') 24 | data = json.dumps({'locale': 'en'}) 25 | self.client.post(set_l, data=data, 26 | headers={'Content-Type': 'application/json'}) 27 | 28 | def test_feeds(self): 29 | # Anonymous user with one public dataset 30 | response = self.client.get(url_for('home.feed_rss')) 31 | assert 'application/xml' in response.content_type 32 | assert 'Recently Created Datasets' in response.data 33 | assert 'Country Regional Analysis v2009' in response.data, response.data 34 | cra = Dataset.by_name('cra') 35 | cra.private = True 36 | db.session.add(cra) 37 | db.session.commit() 38 | 39 | # Anonymous user with one private dataset 40 | response = self.client.get(url_for('home.feed_rss')) 41 | assert 'application/xml' in response.content_type 42 | assert '<title>Recently Created Datasets' in response.data 43 | assert 'Country Regional Analysis v2009' not in response.data 44 | 45 | # Logged in user with one public dataset 46 | cra.private = False 47 | db.session.add(cra) 48 | db.session.commit() 49 | response = self.client.get(url_for('home.feed_rss'), 50 | query_string={'api_key': self.user.api_key}) 51 | assert 'application/xml' in response.content_type 52 | assert '<title>Recently Created Datasets' in response.data 53 | assert 'Country Regional Analysis v2009' in response.data 54 | 55 | # Logged in user with one private dataset 56 | cra.private = True 57 | db.session.add(cra) 58 | db.session.commit() 59 | response = self.client.get(url_for('home.feed_rss'), 60 | query_string={'api_key': self.user.api_key}) 61 | assert 'application/xml' in response.content_type 62 | assert '<title>Recently Created Datasets' in response.data 63 | assert 'Country Regional Analysis v2009' not in response.data 64 | 65 | # Logged in admin user with one private dataset 66 | admin_user = make_account('admin') 67 | admin_user.admin = True 68 | db.session.add(admin_user) 69 | db.session.commit() 70 | response = self.client.get(url_for('home.feed_rss'), 71 | query_string={'api_key': admin_user.api_key}) 72 | assert '<title>Recently Created Datasets' in response.data 73 | assert 'Country Regional Analysis v2009' in response.data 74 | assert 'application/xml' in response.content_type 75 | 76 | response = self.client.get('/') 77 | norm = re.sub('\s+', ' ', response.data) 78 | assert ('<link rel="alternate" type="application/rss+xml" title="' 79 | 'Latest Datasets on SpenDB"' in 80 | norm) 81 | -------------------------------------------------------------------------------- /spendb/views/home.py: -------------------------------------------------------------------------------- 1 | import os 2 | from hashlib import sha1 3 | from StringIO import StringIO 4 | 5 | from flask import Blueprint, render_template, request, redirect 6 | from flask import Response, current_app, session 7 | from webhelpers.feedgenerator import Rss201rev2Feed 8 | from flask.ext.babel import gettext 9 | from apikit import jsonify 10 | 11 | from spendb.core import db, url_for 12 | from spendb import auth, __version__ 13 | from spendb.model import Dataset 14 | from spendb.validation.common import RESERVED_TERMS 15 | 16 | 17 | blueprint = Blueprint('home', __name__) 18 | 19 | 20 | def asset_link(path): 21 | asset_path = current_app.config['ASSETS_PATH_PROD'] 22 | if current_app.config['DEBUG']: 23 | asset_path = current_app.config['ASSETS_PATH_DEBUG'] 24 | cache_key = os.environ.get('CACHE_KEY', __version__) 25 | cache_key = sha1(cache_key).hexdigest()[:10] 26 | return '%s%s?_=%s' % (asset_path, path, cache_key) 27 | 28 | 29 | @blueprint.route('/login') 30 | @blueprint.route('/settings') 31 | @blueprint.route('/accounts/<account>') 32 | @blueprint.route('/docs/<path:page>') 33 | @blueprint.route('/datasets') 34 | @blueprint.route('/datasets/<path:path>') 35 | @blueprint.route('/') 36 | def index(*a, **kw): 37 | from flask.ext.babel import get_locale 38 | from spendb.views.context import etag_cache_keygen 39 | etag_cache_keygen(RESERVED_TERMS) 40 | locale = get_locale() 41 | data = { 42 | 'current_language': locale.language, 43 | 'url_for': url_for, 44 | 'debug': current_app.config['DEBUG'], 45 | 'asset_link': asset_link, 46 | 'reserved_terms': RESERVED_TERMS, 47 | 'site_url': url_for('home.index').rstrip('/'), 48 | 'site_title': current_app.config.get('SITE_TITLE') 49 | } 50 | return render_template('layout.html', **data) 51 | 52 | 53 | @blueprint.route('/set-locale', methods=['POST']) 54 | def set_locale(): 55 | locale = request.json.get('locale') 56 | 57 | if locale is not None: 58 | session['locale'] = locale 59 | session.modified = True 60 | return jsonify({'locale': locale}) 61 | 62 | 63 | @blueprint.route('/favicon.ico') 64 | def favicon(): 65 | return redirect('/static/img/favicon.ico', code=301) 66 | 67 | 68 | @blueprint.route('/__ping__') 69 | def ping(): 70 | from spendb.tasks import ping 71 | ping.delay() 72 | return jsonify({ 73 | 'status': 'ok', 74 | 'message': gettext("Sent ping!") 75 | }) 76 | 77 | 78 | @blueprint.route('/datasets.rss') 79 | def feed_rss(): 80 | q = db.session.query(Dataset) 81 | if not auth.account.is_admin(): 82 | q = q.filter_by(private=False) 83 | feed_items = q.order_by(Dataset.created_at.desc()).limit(20) 84 | items = [] 85 | for feed_item in feed_items: 86 | items.append({ 87 | 'title': feed_item.label, 88 | 'pubdate': feed_item.updated_at, 89 | 'link': '/datasets/%s' % feed_item.name, 90 | 'description': feed_item.description, 91 | 'author_name': ', '.join([person.fullname for person in 92 | feed_item.managers if 93 | person.fullname]), 94 | }) 95 | desc = gettext('Recently created datasets on %(site_title)s', 96 | site_title=current_app.config.get('SITE_TITLE')) 97 | feed = Rss201rev2Feed(gettext('Recently Created Datasets'), 98 | url_for('home.index'), desc) 99 | for item in items: 100 | feed.add_item(**item) 101 | sio = StringIO() 102 | feed.write(sio, 'utf-8') 103 | return Response(sio.getvalue(), mimetype='application/xml') 104 | -------------------------------------------------------------------------------- /spendb/templates/layout.html: -------------------------------------------------------------------------------- 1 | <!DOCTYPE html> 2 | <html lang="{{ current_language }}" ng-app="spendb"> 3 | <head> 4 | <meta charset="utf-8"> 5 | <meta name="viewport" content="width=device-width, initial-scale=1.0"> 6 | <title>{{ site_title }} 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | {% if debug %} 17 | 18 | {% else %} 19 | 20 | {% endif %} 21 | 22 | 23 |
24 |
25 | 26 |
27 |
28 |
29 |
30 |
31 | 32 | 42 | 43 | 76 | 77 | 78 | 84 | 85 | 86 | {% if debug %} 87 | 88 | 89 | {% else %} 90 | 91 | {% endif %} 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /spendb/model/fact_table.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from sqlalchemy import MetaData 4 | from sqlalchemy.schema import Table, Column 5 | from sqlalchemy.types import Unicode 6 | 7 | from spendb.core import db 8 | from spendb.model.common import json_default 9 | from spendb.validation.model import TYPES 10 | 11 | 12 | class FactTable(object): 13 | """ The ``FactTable`` serves as a controller object for 14 | a given ``Model``, handling the creation, filling and migration 15 | of the table schema associated with the dataset. """ 16 | 17 | def __init__(self, dataset): 18 | self.dataset = dataset 19 | self.bind = db.engine 20 | self.table_name = '%s__facts' % dataset.name 21 | self.meta = MetaData() 22 | self.meta.bind = self.bind 23 | self._table = None 24 | 25 | @property 26 | def table(self): 27 | """ Generate an appropriate table representation to mirror the 28 | fields known for this table. """ 29 | if self._table is None: 30 | self._table = Table(self.table_name, self.meta) 31 | id_col = Column('_id', Unicode(42), primary_key=True) 32 | self._table.append_column(id_col) 33 | json_col = Column('_json', Unicode()) 34 | self._table.append_column(json_col) 35 | self._fields_columns(self._table) 36 | return self._table 37 | 38 | @property 39 | def alias(self): 40 | """ An alias used for queries. """ 41 | if not hasattr(self, '_alias'): 42 | self._alias = self.table.alias('entry') 43 | return self._alias 44 | 45 | @property 46 | def mapping(self): 47 | if not hasattr(self, '_mapping'): 48 | self._mapping = {} 49 | for attribute in self.dataset.model.attributes: 50 | if attribute.column in self.alias.columns: 51 | col = self.alias.c[attribute.column] 52 | self._mapping[attribute.path] = col 53 | return self._mapping 54 | 55 | @property 56 | def exists(self): 57 | return db.engine.has_table(self.table.name) 58 | 59 | def _fields_columns(self, table): 60 | """ Transform the (auto-detected) fields into a set of column 61 | specifications. """ 62 | for field in self.dataset.fields: 63 | data_type = TYPES.get(field.get('type'), Unicode) 64 | col = Column(field.get('name'), data_type, nullable=True) 65 | table.append_column(col) 66 | 67 | def load_iter(self, iterable, chunk_size=1000): 68 | """ Bulk load all the data in an artifact to a matching database 69 | table. """ 70 | chunk = [] 71 | 72 | conn = self.bind.connect() 73 | tx = conn.begin() 74 | try: 75 | for i, record in enumerate(iterable): 76 | record['_id'] = i 77 | record['_json'] = json.dumps(record, default=json_default) 78 | chunk.append(record) 79 | if len(chunk) >= chunk_size: 80 | stmt = self.table.insert() 81 | conn.execute(stmt, chunk) 82 | chunk = [] 83 | 84 | if len(chunk): 85 | stmt = self.table.insert() 86 | conn.execute(stmt, chunk) 87 | tx.commit() 88 | except: 89 | tx.rollback() 90 | raise 91 | 92 | def create(self): 93 | """ Create the fact table if it does not exist. """ 94 | if not self.exists: 95 | self.table.create(self.bind) 96 | 97 | def drop(self): 98 | """ Drop the fact table if it does exist. """ 99 | if self.exists: 100 | self.table.drop() 101 | self._table = None 102 | 103 | def __repr__(self): 104 | return "" % (self.dataset) 105 | -------------------------------------------------------------------------------- /spendb/tests/helpers.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import os 3 | import uuid 4 | import json 5 | import urlparse 6 | from StringIO import StringIO 7 | from datetime import datetime 8 | from werkzeug.security import generate_password_hash 9 | 10 | from spendb.model.dataset import Dataset 11 | from spendb.core import db 12 | 13 | 14 | def fixture_file(name): 15 | """Return a file-like object pointing to a named fixture.""" 16 | return open(fixture_path(name)) 17 | 18 | 19 | def meta_fixture(name): 20 | meta_fp = fixture_file('meta/' + name + '.json') 21 | meta = json.load(meta_fp) 22 | meta_fp.close() 23 | return meta 24 | 25 | 26 | def validation_fixture(name): 27 | model_fp = fixture_file('validation/' + name + '.json') 28 | model = json.load(model_fp) 29 | model_fp.close() 30 | if 'fact_table' not in model['model']: 31 | model['model']['fact_table'] = 'table' 32 | return model 33 | 34 | 35 | def data_fixture(name): 36 | return fixture_file('data/' + name + '.csv') 37 | 38 | 39 | def fixture_path(name): 40 | """Return the full path to a named fixture. 41 | Use fixture_file rather than this method wherever possible. 42 | """ 43 | # Get the directory of this file (helpers is placed in the test directory) 44 | test_directory = os.path.dirname(__file__) 45 | # Fixture is a directory in the test directory 46 | return os.path.join(test_directory, 'fixtures', name) 47 | 48 | 49 | def csvimport_fixture_path(name, path): 50 | url = urllib.pathname2url(fixture_path('csv_import/%s/%s' % (name, path))) 51 | return urlparse.urljoin('file:', url) 52 | 53 | 54 | def csvimport_fixture_file(name, path): 55 | try: 56 | fp = urllib.urlopen(csvimport_fixture_path(name, path)) 57 | except IOError: 58 | if name == 'default': 59 | fp = None 60 | else: 61 | fp = csvimport_fixture_file('default', path) 62 | 63 | if fp: 64 | fp = StringIO(fp.read()) 65 | return fp 66 | 67 | 68 | def csvimport_table(name): 69 | from spendb.core import data_manager 70 | from spendb.etl.extract import validate_table, load_table 71 | 72 | package = data_manager.package(uuid.uuid4().hex) 73 | source = package.ingest(data_fixture(name)) 74 | source = validate_table(source) 75 | rows = list(load_table(source)) 76 | return source.meta.get('fields'), rows 77 | 78 | 79 | def load_fixture(name, manager=None): 80 | """ Load fixture data into the database. """ 81 | meta = meta_fixture(name) 82 | dataset = Dataset(meta) 83 | dataset.updated_at = datetime.utcnow() 84 | if manager is not None: 85 | dataset.managers.append(manager) 86 | fields, rows = csvimport_table(name) 87 | dataset.fields = fields 88 | db.session.add(dataset) 89 | db.session.commit() 90 | dataset.fact_table.create() 91 | dataset.fact_table.load_iter(rows) 92 | return dataset 93 | 94 | 95 | def make_account(name='test', fullname='Test User', 96 | email='test@example.com', twitter='testuser', 97 | admin=False, password='password'): 98 | from spendb.model.account import Account 99 | 100 | # First see if the account already exists and if so, return it 101 | account = Account.by_name(name) 102 | if account: 103 | return account 104 | 105 | # Account didn't exist so we create it and return it 106 | account = Account() 107 | account.name = name 108 | account.fullname = fullname 109 | account.email = email 110 | account.twitter_handle = twitter 111 | account.admin = admin 112 | account.password = generate_password_hash(password) 113 | db.session.add(account) 114 | db.session.commit() 115 | return account 116 | 117 | 118 | def init_db(app): 119 | db.create_all(app=app) 120 | 121 | 122 | def clean_db(app): 123 | db.session.rollback() 124 | db.drop_all(app=app) 125 | -------------------------------------------------------------------------------- /spendb/model/account.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import hmac 3 | from hashlib import md5 4 | 5 | from flask.ext.login import AnonymousUserMixin 6 | 7 | from spendb.core import db, login_manager, url_for 8 | 9 | GRAVATAR = 'https://secure.gravatar.com/avatar/%s' 10 | 11 | 12 | def make_uuid(): 13 | return unicode(uuid.uuid4()) 14 | 15 | 16 | account_dataset_table = db.Table( 17 | 'account_dataset', db.metadata, 18 | db.Column('dataset_id', db.Integer, db.ForeignKey('dataset.id'), 19 | primary_key=True), 20 | db.Column('account_id', db.Integer, db.ForeignKey('account.id'), 21 | primary_key=True) 22 | ) 23 | 24 | 25 | class AnonymousAccount(AnonymousUserMixin): 26 | admin = False 27 | 28 | def __repr__(self): 29 | return '' 30 | 31 | login_manager.anonymous_user = AnonymousAccount 32 | 33 | 34 | @login_manager.user_loader 35 | def load_account(account_id): 36 | return Account.by_id(account_id) 37 | 38 | 39 | class Account(db.Model): 40 | __tablename__ = 'account' 41 | 42 | id = db.Column(db.Integer, primary_key=True) 43 | name = db.Column(db.Unicode(255), unique=True) 44 | fullname = db.Column(db.Unicode(2000)) 45 | email = db.Column(db.Unicode(2000)) 46 | twitter_handle = db.Column(db.Unicode(140)) 47 | public_email = db.Column(db.Boolean, default=False) 48 | public_twitter = db.Column(db.Boolean, default=False) 49 | password = db.Column(db.Unicode(2000)) 50 | api_key = db.Column(db.Unicode(2000), default=make_uuid) 51 | admin = db.Column(db.Boolean, default=False) 52 | 53 | datasets = db.relationship('Dataset', 54 | secondary=account_dataset_table, 55 | backref=db.backref('managers', lazy='dynamic')) 56 | 57 | def __init__(self): 58 | self.api_key = make_uuid() 59 | 60 | def is_authenticated(self): 61 | return True 62 | 63 | def is_anonymous(self): 64 | return False 65 | 66 | def is_active(self): 67 | return True 68 | 69 | def get_id(self): 70 | return self.id 71 | 72 | @property 73 | def display_name(self): 74 | return self.fullname or self.name 75 | 76 | @property 77 | def gravatar(self): 78 | sig = self.email or self.name 79 | sig = md5(sig.encode('utf-8')).hexdigest() 80 | return GRAVATAR % sig 81 | 82 | @property 83 | def token(self): 84 | h = hmac.new('') 85 | h.update(self.api_key) 86 | if self.password: 87 | h.update(self.password) 88 | return h.hexdigest() 89 | 90 | @classmethod 91 | def by_name(cls, name): 92 | return db.session.query(cls).filter_by(name=name).first() 93 | 94 | @classmethod 95 | def by_id(cls, id): 96 | return db.session.query(cls).filter_by(id=id).first() 97 | 98 | @classmethod 99 | def by_email(cls, email): 100 | return db.session.query(cls).filter_by(email=email).first() 101 | 102 | @classmethod 103 | def by_api_key(cls, api_key): 104 | return db.session.query(cls).filter_by(api_key=api_key).first() 105 | 106 | def to_dict(self): 107 | """ Return the dictionary representation of the account. """ 108 | account_dict = { 109 | 'name': self.name, 110 | 'fullname': self.fullname, 111 | 'display_name': self.display_name, 112 | 'email': self.email, 113 | 'admin': self.admin, 114 | 'gravatar': self.gravatar, 115 | 'twitter_handle': self.twitter_handle, 116 | 'api_url': url_for('account_api.view', account=self.name) 117 | } 118 | if not self.public_email: 119 | account_dict.pop('email') 120 | if not self.public_twitter: 121 | account_dict.pop('twitter_handle') 122 | return account_dict 123 | 124 | def __repr__(self): 125 | return '' % (self.id, self.name) 126 | -------------------------------------------------------------------------------- /spendb/tests/etl/test_load.py: -------------------------------------------------------------------------------- 1 | from loadkit import logger 2 | 3 | from spendb.core import db, data_manager 4 | from spendb.model import Dataset, Run 5 | from spendb.etl import tasks 6 | 7 | from spendb.tests.helpers import meta_fixture 8 | from spendb.tests.helpers import csvimport_fixture_path 9 | from spendb.tests.helpers import csvimport_fixture_file 10 | from spendb.tests.base import DatabaseTestCase 11 | 12 | 13 | class TestLoad(DatabaseTestCase): 14 | 15 | def setUp(self): 16 | super(TestLoad, self).setUp() 17 | data_manager._index = None 18 | model = meta_fixture('cra') 19 | self.ds = Dataset(model) 20 | db.session.add(self.ds) 21 | db.session.commit() 22 | self.cra_url = csvimport_fixture_path('../data', 'cra.csv') 23 | 24 | def tearDown(self): 25 | super(TestLoad, self).tearDown() 26 | 27 | def test_extract_url(self): 28 | source = tasks.extract_url(self.ds, self.cra_url) 29 | assert 'cra.csv' == source.name, source.name 30 | 31 | def test_extract_missing_url(self): 32 | url = csvimport_fixture_path('../data', 'xcra.csv') 33 | source = tasks.extract_url(self.ds, url) 34 | assert source is None, source 35 | 36 | run = db.session.query(Run).first() 37 | package = data_manager.package(self.ds.name) 38 | messages = list(logger.load(package, run.id)) 39 | assert len(messages) > 2, messages 40 | 41 | def test_extract_file(self): 42 | fp = csvimport_fixture_file('../data', 'cra.csv') 43 | source = tasks.extract_fileobj(self.ds, fp, 44 | file_name='cra2.csv') 45 | assert 'cra2.csv' == source.name, source.name 46 | 47 | fp = csvimport_fixture_file('../data', 'cra.csv') 48 | source = tasks.extract_fileobj(self.ds, fp, 49 | file_name='cra2 HUHU.csv') 50 | assert 'cra2-huhu.csv' == source.name, source.name 51 | 52 | def test_duplicate_file(self): 53 | fp = csvimport_fixture_file('../data', 'cra.csv') 54 | source = tasks.extract_fileobj(self.ds, fp, 55 | file_name='cra2.csv') 56 | assert 'cra2.csv' == source.name, source.name 57 | 58 | fp = csvimport_fixture_file('../data', 'cra.csv') 59 | source = tasks.extract_fileobj(self.ds, fp, 60 | file_name='cra2.csv') 61 | assert 'cra2-2.csv' == source.name, source.name 62 | 63 | def test_transform_source(self): 64 | fp = csvimport_fixture_file('../data', 'cra.csv') 65 | source = tasks.extract_fileobj(self.ds, fp, 66 | file_name='cra2.csv') 67 | src = tasks.transform_source(self.ds, source.name) 68 | assert src.name == source.name, src.name 69 | rows = list(tasks.load_table(src)) 70 | assert len(rows) == 36, rows 71 | assert 'cofog1_label' in rows[1], rows[1] 72 | assert 'cofog1.label' not in rows[1], rows[1] 73 | 74 | def test_field_detection(self): 75 | fp = csvimport_fixture_file('../data', 'cra.csv') 76 | source = tasks.extract_fileobj(self.ds, fp, 77 | file_name='cra2.csv') 78 | source = tasks.transform_source(self.ds, source.name) 79 | fields = source.meta.get('fields') 80 | assert len(fields) == 34, len(fields) 81 | by_name = {f['name']: f for f in fields} 82 | assert 'amount' in by_name, fields 83 | amt = by_name.get('amount') 84 | assert amt['type'] == 'integer', amt 85 | 86 | def test_load_data(self): 87 | fp = csvimport_fixture_file('../data', 'cra.csv') 88 | source = tasks.extract_fileobj(self.ds, fp, 89 | file_name='cra2.csv') 90 | tasks.transform_source(self.ds, source.name) 91 | tasks.load(self.ds, source.name) 92 | q = self.ds.fact_table.table.select() 93 | resn = db.engine.execute(q).fetchall() 94 | assert len(resn) == 36, resn 95 | -------------------------------------------------------------------------------- /spendb/migrate/versions/b5ed9296ff9_initial.py: -------------------------------------------------------------------------------- 1 | """initial 2 | 3 | Revision ID: b5ed9296ff9 4 | Revises: None 5 | Create Date: 2015-04-21 08:55:07.628161 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = 'b5ed9296ff9' 11 | down_revision = None 12 | 13 | from alembic import op 14 | import sqlalchemy as sa 15 | 16 | 17 | def upgrade(): 18 | op.create_table('account', 19 | sa.Column('id', sa.Integer(), nullable=False), 20 | sa.Column('name', sa.Unicode(length=255), nullable=True), 21 | sa.Column('fullname', sa.Unicode(length=2000), nullable=True), 22 | sa.Column('email', sa.Unicode(length=2000), nullable=True), 23 | sa.Column('public_email', sa.Boolean(), nullable=True), 24 | sa.Column('twitter_handle', sa.Unicode(length=140), nullable=True), 25 | sa.Column('public_twitter', sa.Boolean(), nullable=True), 26 | sa.Column('password', sa.Unicode(length=2000), nullable=True), 27 | sa.Column('api_key', sa.Unicode(length=2000), nullable=True), 28 | sa.Column('admin', sa.Boolean(), nullable=True), 29 | sa.Column('script_root', sa.Unicode(length=2000), nullable=True), 30 | sa.Column('terms', sa.Boolean(), nullable=True), 31 | sa.PrimaryKeyConstraint('id'), 32 | sa.UniqueConstraint('name') 33 | ) 34 | op.create_table('dataset', 35 | sa.Column('id', sa.Integer(), nullable=False), 36 | sa.Column('name', sa.Unicode(length=255), nullable=True), 37 | sa.Column('label', sa.Unicode(length=2000), nullable=True), 38 | sa.Column('description', sa.Unicode(), nullable=True), 39 | sa.Column('currency', sa.Unicode(), nullable=True), 40 | sa.Column('default_time', sa.Unicode(), nullable=True), 41 | sa.Column('schema_version', sa.Unicode(), nullable=True), 42 | sa.Column('category', sa.Unicode(), nullable=True), 43 | sa.Column('private', sa.Boolean(), nullable=True), 44 | sa.Column('created_at', sa.DateTime(), nullable=True), 45 | sa.Column('updated_at', sa.DateTime(), nullable=True), 46 | sa.Column('data', sa.Unicode(), nullable=True), 47 | sa.PrimaryKeyConstraint('id'), 48 | sa.UniqueConstraint('name') 49 | ) 50 | op.create_table('dataset_territory', 51 | sa.Column('id', sa.Integer(), nullable=False), 52 | sa.Column('code', sa.Unicode(), nullable=True), 53 | sa.Column('created_at', sa.DateTime(), nullable=True), 54 | sa.Column('updated_at', sa.DateTime(), nullable=True), 55 | sa.Column('dataset_id', sa.Integer(), nullable=True), 56 | sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ), 57 | sa.PrimaryKeyConstraint('id') 58 | ) 59 | op.create_table('dataset_language', 60 | sa.Column('id', sa.Integer(), nullable=False), 61 | sa.Column('code', sa.Unicode(), nullable=True), 62 | sa.Column('created_at', sa.DateTime(), nullable=True), 63 | sa.Column('updated_at', sa.DateTime(), nullable=True), 64 | sa.Column('dataset_id', sa.Integer(), nullable=True), 65 | sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ), 66 | sa.PrimaryKeyConstraint('id') 67 | ) 68 | op.create_table('run', 69 | sa.Column('id', sa.Integer(), nullable=False), 70 | sa.Column('operation', sa.Unicode(), nullable=True), 71 | sa.Column('status', sa.Unicode(), nullable=True), 72 | sa.Column('source', sa.Unicode(), nullable=True), 73 | sa.Column('time_start', sa.DateTime(), nullable=True), 74 | sa.Column('time_end', sa.DateTime(), nullable=True), 75 | sa.Column('dataset_id', sa.Integer(), nullable=True), 76 | sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ), 77 | sa.PrimaryKeyConstraint('id') 78 | ) 79 | op.create_table('account_dataset', 80 | sa.Column('dataset_id', sa.Integer(), nullable=False), 81 | sa.Column('account_id', sa.Integer(), nullable=False), 82 | sa.ForeignKeyConstraint(['account_id'], ['account.id'], ), 83 | sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ), 84 | sa.PrimaryKeyConstraint('dataset_id', 'account_id') 85 | ) 86 | -------------------------------------------------------------------------------- /spendb/tests/validation/test_model.py: -------------------------------------------------------------------------------- 1 | from jsonschema import ValidationError 2 | from nose.tools import raises 3 | 4 | from spendb.validation.model import validate_model 5 | 6 | from spendb.tests.base import TestCase 7 | from spendb.tests.helpers import validation_fixture 8 | 9 | 10 | class TestModel(TestCase): 11 | 12 | def setUp(self): 13 | super(TestModel, self).setUp() 14 | self.model = validation_fixture('default') 15 | 16 | def test_basic_validate(self): 17 | try: 18 | in_ = self.model['model'] 19 | out = validate_model(in_) 20 | assert len(out) == len(in_), out 21 | except ValidationError, i: 22 | assert False, i 23 | 24 | def test_keep_extra_data(self): 25 | ms = self.model['model'] 26 | ms['ignore_columns'] = ['huhu'] 27 | os = validate_model(ms) 28 | assert 'ignore_columns' in os, os.keys() 29 | 30 | @raises(ValidationError) 31 | def test_from_is_compound(self): 32 | ms = self.model['model'] 33 | ms['dimensions']['from'] = ms['measures']['cofinance'] 34 | validate_model(ms) 35 | 36 | @raises(ValidationError) 37 | def test_invalid_name(self): 38 | ms = self.model['model'] 39 | ms['dimensions']['ba nana'] = ms['dimensions']['function'] 40 | validate_model(ms) 41 | 42 | @raises(ValidationError) 43 | def test_no_measures(self): 44 | ms = self.model['model'] 45 | ms['measures'] = {} 46 | validate_model(ms) 47 | 48 | @raises(ValidationError) 49 | def test_measure_has_column(self): 50 | ms = self.model['model'].copy() 51 | del ms['measures']['cofinance']['column'] 52 | validate_model(ms) 53 | 54 | @raises(ValidationError) 55 | def test_date_has_column(self): 56 | ms = self.model['model'].copy() 57 | del ms['dimensions']['time']['attributes']['year']['column'] 58 | validate_model(ms) 59 | 60 | @raises(ValidationError) 61 | def test_compound_has_fields(self): 62 | ms = self.model['model'].copy() 63 | del ms['dimensions']['function']['attributes'] 64 | validate_model(ms) 65 | 66 | @raises(ValidationError) 67 | def test_compound_field_with_dash(self): 68 | ms = self.model['model'].copy() 69 | ms['dimensions']['function']['attributes']['id-col'] = \ 70 | ms['dimensions']['function']['attributes']['description'] 71 | del ms['dimensions']['function']['attributes']['description'] 72 | validate_model(ms) 73 | 74 | @raises(ValidationError) 75 | def test_compound_field_short(self): 76 | ms = self.model['model'].copy() 77 | ms['dimensions']['function']['attributes']['i'] = \ 78 | ms['dimensions']['function']['attributes']['description'] 79 | del ms['dimensions']['function']['attributes']['description'] 80 | validate_model(ms) 81 | 82 | @raises(ValidationError) 83 | def test_compound_field_invalid_name(self): 84 | ms = self.model['model'].copy() 85 | ms['dimensions']['function']['attributes']['ba nanana'] = \ 86 | ms['dimensions']['function']['attributes']['description'] 87 | del ms['dimensions']['function']['attributes']['description'] 88 | validate_model(ms) 89 | 90 | @raises(ValidationError) 91 | def test_compound_field_has_column(self): 92 | ms = self.model['model'].copy() 93 | del ms['dimensions']['function']['attributes']['description']['column'] 94 | validate_model(ms) 95 | 96 | def test_set_label_attribute(self): 97 | ms = self.model['model'].copy() 98 | ms['dimensions']['function']['label_attribute'] = 'label' 99 | ms['dimensions']['function']['key_attribute'] = 'name' 100 | ms = validate_model(ms) 101 | assert ms['dimensions']['function']['label_attribute'] == 'label' 102 | assert ms['dimensions']['function']['key_attribute'] == 'name' 103 | 104 | @raises(ValidationError) 105 | def test_set_invalid_label_attribute(self): 106 | ms = self.model['model'].copy() 107 | ms['dimensions']['function']['label_attribute'] = 'foo' 108 | os = validate_model(ms) 109 | assert False, os['dimensions'] 110 | -------------------------------------------------------------------------------- /contrib/spendb_importer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import json 4 | import time 5 | 6 | DIR = 'os_export/exports' 7 | SPENDB_HOST = os.environ.get('SPENDB_HOST', 'http://spendb.pudo.org') 8 | SPENDB_API_KEY = os.environ.get('SPENDB_API_KEY') 9 | 10 | assert SPENDB_API_KEY, 'Please set the SPENDB_API_KEY environment variable' 11 | 12 | 13 | def make_api_path(*a): 14 | parts = [SPENDB_HOST, '/api/3/datasets'] + list(a) 15 | return '/'.join([p.strip('/') for p in parts]) 16 | 17 | session = requests.Session() 18 | session.headers.update({ 19 | 'Authorization': 'apikey %s' % SPENDB_API_KEY 20 | }) 21 | json_headers = { 22 | 'Content-type': 'application/json', 23 | 'Accept': 'text/plain' 24 | } 25 | 26 | 27 | def list_datasets(): 28 | for name in os.listdir(DIR): 29 | ds_dir = os.path.join(DIR, name) 30 | with open(os.path.join(ds_dir, 'dataset.json'), 'rb') as fh: 31 | meta = json.load(fh) 32 | with open(os.path.join(ds_dir, 'model.json'), 'rb') as fh: 33 | model = json.load(fh) 34 | src_file = os.path.join(ds_dir, 'facts.csv') 35 | yield meta, src_file, model 36 | 37 | 38 | def load_dataset(metadata, source_file, model): 39 | name = metadata.get('name') 40 | 41 | # Available metadata fields (name and label are required): 42 | config = { 43 | 'name': name, 44 | 'label': metadata['label'], 45 | 'private': False, 46 | 'category': metadata['category'], # 'budget' or 'spending' 47 | 'currency': metadata['currency'], # e.g. 'USD', 'EUR' 48 | 'languages': metadata['languages'], # e.g. ['en', 'de'] 49 | 'territories': metadata['territories'], # e.g. ['DE', 'FR'] 50 | } 51 | 52 | # Step 1: Create (or update) the dataset. 53 | print '[spendb-import] Creating/updating %r' % name 54 | res = session.get(make_api_path(name)) 55 | if res.status_code == 404: 56 | # dataset does not exist yet 57 | res = session.post(make_api_path(), data=json.dumps(config), 58 | headers=json_headers) 59 | assert res.status_code == 200, res.content 60 | elif res.status_code == 200: 61 | # update the existing dataset's metadata 62 | res = session.post(make_api_path(name), 63 | data=json.dumps(config), 64 | headers=json_headers) 65 | assert res.status_code == 200, res.content 66 | else: 67 | print 'Error accessing dataset: %r' % res.content 68 | 69 | # Step 2: Upload a source data file. 70 | print '[spendb-import] Uploading %r' % source_file 71 | upload_url = make_api_path(name, 'sources/upload') 72 | # Note: there is also /api/3/datasets//sources/submit which 73 | # will accept a simple URL, then attempt to fetch and load that 74 | # data file. That API call (unlike this one) does not return a 75 | # source object. 76 | 77 | files = {'file': open(source_file, 'rb')} 78 | res = session.post(upload_url, files=files) 79 | assert res.status_code == 200, res.content 80 | 81 | # This is a bit ugly: we need to wait for the source data 82 | # to be parsed before a data model can be applied. 83 | runs_url = res.json().get('runs_url') 84 | while True: 85 | res = session.get(runs_url) 86 | runs = res.json().get('results') 87 | runs = sorted(runs, key=lambda r: r.get('time_start')) 88 | current_run = runs[-1] 89 | assert current_run['status'] != 'failed' 90 | 91 | # There are multiple operations, we want to wait for 92 | # the one related to database loading to complete. 93 | if current_run['status'] == 'complete' and \ 94 | 'database' in current_run['operation']: 95 | break 96 | print '[spendb-import] Waiting for data to be loaded...' 97 | time.sleep(5) 98 | 99 | # Step 3: Map source data columns to OLAP measures and dimensions 100 | print '[spendb-import] Applying model to dataset %r' % name 101 | res = session.post(make_api_path(name, 'model'), 102 | data=json.dumps(model), 103 | headers=json_headers) 104 | assert res.status_code == 200, res.content 105 | 106 | print '[spendb-import] Done.' 107 | 108 | 109 | if __name__ == '__main__': 110 | for meta, src_file, model in list_datasets(): 111 | load_dataset(meta, src_file, model) 112 | -------------------------------------------------------------------------------- /spendb/views/api/source.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from StringIO import StringIO 3 | 4 | from flask import Blueprint, request, redirect, send_file 5 | from archivekit import Source 6 | from werkzeug.exceptions import BadRequest 7 | from apikit import jsonify, Pager, request_data 8 | 9 | from spendb.core import data_manager, url_for 10 | from spendb.auth import require 11 | from spendb.lib.helpers import get_dataset 12 | from spendb.tasks import load_from_url, load_from_source 13 | from spendb.etl.tasks import extract_fileobj 14 | from spendb.etl.upload import generate_s3_upload_policy 15 | 16 | 17 | log = logging.getLogger(__name__) 18 | blueprint = Blueprint('sources_api', __name__) 19 | 20 | 21 | def source_to_dict(dataset, source): 22 | data = dict(source.meta.items()) 23 | data.pop('http_headers', None) 24 | data['data_url'] = url_for('sources_api.serve', dataset=dataset.name, 25 | name=source.name) 26 | data['runs_url'] = url_for('runs_api.index', dataset=dataset.name, 27 | source=source.name) 28 | data['api_url'] = url_for('sources_api.view', dataset=dataset.name, 29 | name=source.name) 30 | return data 31 | 32 | 33 | @blueprint.route('/datasets//sources') 34 | def index(dataset): 35 | dataset = get_dataset(dataset) 36 | package = data_manager.package(dataset.name) 37 | sources = list(package.all(Source)) 38 | sources = sorted(sources, key=lambda s: s.meta.get('updated_at'), 39 | reverse=True) 40 | rc = lambda ss: [source_to_dict(dataset, s) for s in ss] 41 | return jsonify(Pager(sources, dataset=dataset.name, limit=5, 42 | results_converter=rc)) 43 | 44 | 45 | @blueprint.route('/datasets//sources/upload', methods=['POST', 'PUT']) 46 | def upload(dataset): 47 | dataset = get_dataset(dataset) 48 | require.dataset.update(dataset) 49 | file_ = request.files.get('file') 50 | if not file_ or not file_.filename: 51 | raise BadRequest("You need to upload a file") 52 | # TODO: consider copying this into a tempfile before upload to make 53 | # boto happy (it appears to be whacky in it's handling of flask uploads) 54 | source = extract_fileobj(dataset, fh=file_, file_name=file_.filename) 55 | load_from_source.delay(dataset.name, source.name) 56 | return jsonify(source_to_dict(dataset, source)) 57 | 58 | 59 | @blueprint.route('/datasets//sources/sign', methods=['POST', 'PUT']) 60 | def sign(dataset): 61 | dataset = get_dataset(dataset) 62 | require.dataset.update(dataset) 63 | data = request_data() 64 | if not data.get('file_name'): 65 | raise BadRequest("You need to give a file name") 66 | data['mime_type'] = data.get('mime_type') or 'application/octet-stream' 67 | # create a stub: 68 | source = extract_fileobj(dataset, fh=StringIO(), 69 | file_name=data['file_name'], 70 | mime_type=data['mime_type']) 71 | 72 | # generate a policy document to replace with actual content: 73 | res = generate_s3_upload_policy(source, data['file_name'], 74 | data['mime_type']) 75 | return jsonify(res) 76 | 77 | 78 | @blueprint.route('/datasets//sources/submit', methods=['POST', 'PUT']) 79 | def submit(dataset): 80 | dataset = get_dataset(dataset) 81 | require.dataset.update(dataset) 82 | data = request_data() 83 | if not data.get('url'): 84 | raise BadRequest("You need to submit a URL") 85 | load_from_url.delay(dataset.name, data.get('url')) 86 | return jsonify({'status': 'ok'}) 87 | 88 | 89 | @blueprint.route('/datasets//sources/') 90 | def view(dataset, name): 91 | dataset = get_dataset(dataset) 92 | package = data_manager.package(dataset.name) 93 | source = Source(package, name) 94 | return jsonify(source_to_dict(dataset, source)) 95 | 96 | 97 | @blueprint.route('/datasets//serve/') 98 | def serve(dataset, name): 99 | dataset = get_dataset(dataset) 100 | package = data_manager.package(dataset.name) 101 | source = Source(package, name) 102 | if source.url is not None: 103 | return redirect(source.url) 104 | return send_file(source.fh(), 105 | mimetype=source.meta.get('mime_type')) 106 | 107 | 108 | @blueprint.route('/datasets//sources/load/', 109 | methods=['POST', 'PUT']) 110 | def load(dataset, name): 111 | dataset = get_dataset(dataset) 112 | require.dataset.update(dataset) 113 | package = data_manager.package(dataset.name) 114 | source = Source(package, name) 115 | if not source.exists(): 116 | raise BadRequest('Source does not exist.') 117 | load_from_source.delay(dataset.name, source.name) 118 | return jsonify({'status': 'ok'}) 119 | -------------------------------------------------------------------------------- /contrib/os_export/export.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import dataset 4 | from datetime import datetime 5 | from normality import slugify 6 | 7 | DB_URI = 'postgresql://localhost/openspending' 8 | OFFENERHAUSHALT_SETS = ['de-bw', 'open_by', 'berlin_de', 9 | 'de-bonn-planung', 'de-bonn', 'de-brandenburg-land', 10 | 'de-bremen', 'open_gauting', 'de-nrw-cologne', 11 | 'laatzen2014', 'stadt_lueneburg', 'open_bzmfr', 12 | 'lhm_20150415', 'de-muenster-gesamt', 'open_bzobb', 13 | 'hhos', 'open_rgb', 'open_bzswa', 'uelzen', 14 | 'haushalt_ulm', 'haushaltwitten'] 15 | 16 | engine = dataset.connect(os.environ.get('DATABASE_URI', DB_URI), 17 | reflect_metadata=False) 18 | 19 | 20 | def json_default(obj): 21 | if isinstance(obj, datetime): 22 | return obj.isoformat() 23 | 24 | 25 | def get_mappings(): 26 | for ds in list(engine['dataset']): 27 | if ds['name'] not in OFFENERHAUSHALT_SETS: 28 | continue 29 | ds['data'] = json.loads(ds['data']) 30 | 31 | ds['languages'] = [] 32 | for lang in engine['dataset_language'].find(dataset_id=ds['id']): 33 | ds['languages'].append(lang['code']) 34 | 35 | ds['territories'] = [] 36 | for terr in engine['dataset_territory'].find(dataset_id=ds['id']): 37 | ds['territories'].append(terr['code']) 38 | 39 | ds['sources'] = [] 40 | for src in engine['source'].find(dataset_id=ds['id']): 41 | ds['sources'].append(src) 42 | 43 | # Add team members for the dataset 44 | query_stmt = ('SELECT account.name as username FROM account ' 45 | 'INNER JOIN account_dataset ' 46 | 'ON account.id = account_dataset.account_id ' 47 | 'WHERE account_dataset.dataset_id = {dataset_id}') 48 | query = engine.query(query_stmt.format(dataset_id = ds['id'])) 49 | ds['team'] = [member['username'] for member in query] 50 | 51 | mapping = ds['data'].get('mapping') 52 | if mapping is None or not len(mapping): 53 | continue 54 | 55 | yield ds, mapping 56 | 57 | 58 | def get_queries(): 59 | for ds, map in get_mappings(): 60 | ds_name = ds['name'] 61 | table_pattern = ds_name + '__' 62 | entry_table = '"' + table_pattern + 'entry"' 63 | fields = [(entry_table + '.id', '_openspending_id')] 64 | joins = [] 65 | for dim, desc in map.items(): 66 | if desc.get('type') == 'compound': 67 | dim_table = '"%s%s"' % (table_pattern, dim) 68 | joins.append((dim_table, dim)) 69 | for attr, attr_desc in desc.get('attributes').items(): 70 | alias = '%s_%s' % (dim, attr) 71 | fields.append(('%s."%s"' % (dim_table, attr), alias)) 72 | elif desc.get('type') == 'date': 73 | dim_table = '"%s%s"' % (table_pattern, dim) 74 | joins.append((dim_table, dim)) 75 | for attr in ['name', 'year', 'month', 'day', 'week', 76 | 'yearmonth', 'quarter']: 77 | alias = '%s_%s' % (dim, attr) 78 | fields.append(('%s."%s"' % (dim_table, attr), alias)) 79 | fields.append(('%s.name' % dim_table, dim)) 80 | else: 81 | fields.append(('%s."%s"' % (entry_table, dim), dim)) 82 | 83 | select_clause = [] 84 | for src, alias in fields: 85 | select_clause.append('%s AS "%s"' % (src, slugify(alias, sep='_'))) 86 | select_clause = ', '.join(select_clause) 87 | 88 | join_clause = [] 89 | for table, dim in joins: 90 | qb = 'LEFT JOIN %s ON %s."%s_id" = %s.id' 91 | qb = qb % (table, entry_table, dim, table) 92 | join_clause.append(qb) 93 | join_clause = ' '.join(join_clause) 94 | 95 | yield ds, 'SELECT %s FROM %s %s' % (select_clause, entry_table, 96 | join_clause) 97 | 98 | 99 | def freeze_all(): 100 | out_base = 'exports' 101 | for ds, query in get_queries(): 102 | try: 103 | ds['export_query'] = query 104 | path = os.path.join(out_base, ds['name']) 105 | if not os.path.isdir(path): 106 | os.makedirs(path) 107 | 108 | ds_path = os.path.join(path, 'dataset.json') 109 | with open(ds_path, 'wb') as fh: 110 | json.dump(ds, fh, default=json_default, indent=2) 111 | 112 | res = engine.query(query) 113 | dataset.freeze(res, filename='facts.csv', prefix=path, 114 | format='csv') 115 | except Exception, e: 116 | print e 117 | 118 | 119 | freeze_all() 120 | -------------------------------------------------------------------------------- /spendb/etl/extract.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | from decimal import Decimal 4 | from datetime import datetime 5 | 6 | from normality import slugify 7 | import messytables as mt 8 | from messytables.jts import celltype_as_string 9 | 10 | log = logging.getLogger(__name__) 11 | 12 | 13 | def column_alias(cell, names): 14 | """ Generate a normalized version of the column name. """ 15 | column = slugify(cell.column or '', sep='_') 16 | column = column.strip('_') 17 | column = 'column' if not len(column) else column 18 | name, i = column, 2 19 | # de-dupe: column, column_2, column_3, ... 20 | while name in names: 21 | name = '%s_%s' % (name, i) 22 | i += 1 23 | return name 24 | 25 | 26 | def generate_field_spec(row): 27 | """ Generate a set of metadata for each field/column in 28 | the data. This is conformant to jsontableschema. """ 29 | names = set() 30 | fields = [] 31 | for cell in row: 32 | name = column_alias(cell, names) 33 | field = { 34 | 'name': name, 35 | 'title': cell.column, 36 | 'type': celltype_as_string(cell.type), 37 | 'has_empty': False, 38 | 'samples': [] 39 | } 40 | if hasattr(cell.type, 'format'): 41 | field['format'] = cell.type.format 42 | fields.append(field) 43 | return fields 44 | 45 | 46 | def random_sample(value, field, row, num=10): 47 | """ Collect a random sample of the values in a particular 48 | field based on the reservoir sampling technique. """ 49 | # TODO: Could become a more general DQ piece. 50 | if value in field['samples']: 51 | return 52 | if value is None: 53 | field['has_empty'] = True 54 | return 55 | if len(field['samples']) < num: 56 | field['samples'].append(value) 57 | return 58 | j = random.randint(0, row) 59 | if j < (num - 1): 60 | field['samples'][j] = value 61 | 62 | 63 | def convert_row(row, fields, i): 64 | data = {} 65 | for cell, field in zip(row, fields): 66 | value = cell.value 67 | if isinstance(value, datetime): 68 | value = value.date() 69 | if isinstance(value, Decimal): 70 | # Baby jesus forgive me. 71 | value = float(value) 72 | if isinstance(value, basestring) and not len(value.strip()): 73 | value = None 74 | random_sample(value, field, i) 75 | data[field['name']] = value 76 | return data 77 | 78 | 79 | def parse_table(source): 80 | # This is a work-around because messytables hangs on boto file 81 | # handles, so we're doing it via plain old HTTP. 82 | # We're also passing in an extended window size to give more 83 | # reliable type detection. 84 | # Because Python's CSV dialect sniffer isn't the best, this also 85 | # constrains the field quoting character to a double quote. 86 | table_set = mt.any_tableset(source.fh(), 87 | extension=source.meta.get('extension'), 88 | mimetype=source.meta.get('mime_type'), 89 | quotechar='"', window=20000) 90 | tables = list(table_set.tables) 91 | if not len(tables): 92 | log.error("No tables were found in the source file.") 93 | return 94 | row_set = tables[0] 95 | headers = [c.value for c in next(row_set.sample)] 96 | row_set.register_processor(mt.headers_processor(headers)) 97 | row_set.register_processor(mt.offset_processor(1)) 98 | types = mt.type_guess(row_set.sample, strict=True) 99 | row_set.register_processor(mt.types_processor(types, strict=True)) 100 | 101 | fields, i = {}, 0 102 | row_iter = iter(row_set) 103 | 104 | while True: 105 | i += 1 106 | try: 107 | row = row_iter.next() 108 | if not len(fields): 109 | fields = generate_field_spec(row) 110 | 111 | data = convert_row(row, fields, i) 112 | check_empty = set(data.values()) 113 | if None in check_empty and len(check_empty) == 1: 114 | continue 115 | 116 | yield None, fields, data 117 | except StopIteration: 118 | return 119 | except Exception, e: 120 | # log.exception(e) 121 | yield e, fields, None 122 | 123 | 124 | def validate_table(source): 125 | failed = 0 126 | for i, (exc, fields, row) in enumerate(parse_table(source)): 127 | if exc is not None: 128 | log.warning('Error at row %s: %s', i, unicode(exc)) 129 | failed += 1 130 | 131 | log.info("Converted %s rows with %s columns.", i + 1, len(fields)) 132 | source.meta['fields'] = fields 133 | source.meta['num_records'] = i + 1 134 | source.meta['num_failed'] = failed 135 | source.meta.save() 136 | return source 137 | 138 | 139 | def load_table(source): 140 | for exc, fields, row in parse_table(source): 141 | if exc is None: 142 | yield row 143 | -------------------------------------------------------------------------------- /spendb/tests/views/api/test_source.py: -------------------------------------------------------------------------------- 1 | import json 2 | from flask import url_for 3 | 4 | from spendb.core import db 5 | from spendb.tests.helpers import csvimport_fixture_path 6 | from spendb.tests.base import ControllerTestCase 7 | from spendb.tests.helpers import load_fixture, make_account 8 | from spendb.tests.helpers import data_fixture 9 | 10 | 11 | class TestSourceApiController(ControllerTestCase): 12 | 13 | def setUp(self): 14 | super(TestSourceApiController, self).setUp() 15 | self.cra = load_fixture('cra') 16 | self.user = make_account('test') 17 | self.auth_qs = {'api_key': self.user.api_key} 18 | self.cra.managers.append(self.user) 19 | self.cra_url = csvimport_fixture_path('../data', 'cra.csv') 20 | db.session.commit() 21 | 22 | def test_source_index(self): 23 | url = url_for('sources_api.index', dataset=self.cra.name) 24 | res = self.client.get(url) 25 | assert res.json['total'] == 0, res.json 26 | 27 | def test_source_upload_anon(self): 28 | url = url_for('sources_api.upload', dataset=self.cra.name) 29 | fh = data_fixture('cra') 30 | res = self.client.post(url, data={ 31 | 'file': (fh, 'cra.csv') 32 | }) 33 | assert '403' in res.status, res.status 34 | 35 | def test_source_upload_no_file(self): 36 | url = url_for('sources_api.upload', dataset=self.cra.name) 37 | res = self.client.post(url, data={}, query_string=self.auth_qs) 38 | assert '400' in res.status, res.status 39 | 40 | def test_source_upload(self): 41 | url = url_for('sources_api.upload', dataset=self.cra.name) 42 | fh = data_fixture('cra') 43 | res = self.client.post(url, data={ 44 | 'file': (fh, 'cra.csv') 45 | }, query_string=self.auth_qs) 46 | assert '403' not in res.status, res.status 47 | 48 | def test_source_sign(self): 49 | # TODO: how to properly test this? 50 | url = url_for('sources_api.sign', dataset=self.cra.name) 51 | req = {'file_name': 'cra.csv'} 52 | res = self.client.post(url, data=req, 53 | query_string=self.auth_qs) 54 | assert '200' in res.status, res.status 55 | assert 'status' in res.json, res.json 56 | assert res.json['status'] == 'error', res.json 57 | 58 | def test_source_submit_anon(self): 59 | url = url_for('sources_api.submit', dataset=self.cra.name) 60 | res = self.client.post(url, data={ 61 | 'url': self.cra_url 62 | }) 63 | assert '403' in res.status, res.status 64 | 65 | def test_source_submit(self): 66 | url = url_for('sources_api.submit', dataset=self.cra.name) 67 | res = self.client.post(url, data={ 68 | 'url': self.cra_url 69 | }, query_string=self.auth_qs) 70 | assert '200' in res.status, res.status 71 | 72 | def test_source_load(self): 73 | url = url_for('sources_api.upload', dataset=self.cra.name) 74 | fh = data_fixture('cra') 75 | res = self.client.post(url, data={ 76 | 'file': (fh, 'cra.csv') 77 | }, query_string=self.auth_qs) 78 | 79 | self.client.post(url_for('sessions_api.logout')) 80 | 81 | url = url_for('sources_api.load', dataset=self.cra.name, 82 | name='cra.csv') 83 | res = self.client.post(url) 84 | assert '403' in res.status, res.status 85 | res = self.client.post(url, query_string=self.auth_qs) 86 | assert '200' in res.status, res.status 87 | 88 | def test_source_load_non_existing(self): 89 | url = url_for('sources_api.load', dataset=self.cra.name, 90 | name='foo.csv') 91 | res = self.client.post(url, query_string=self.auth_qs) 92 | assert '400' in res.status, res.json 93 | 94 | def test_source_view(self): 95 | url = url_for('sources_api.upload', dataset=self.cra.name) 96 | fh = data_fixture('cra') 97 | res = self.client.post(url, data={ 98 | 'file': (fh, 'cra.csv') 99 | }, query_string=self.auth_qs) 100 | assert res.json['extension'] == 'csv', res.json 101 | assert res.json['mime_type'] == 'text/csv', res.json 102 | url = url_for('sources_api.index', dataset=self.cra.name) 103 | res = self.client.get(url) 104 | assert res.json['total'] == 1, res.json 105 | frst = res.json['results'][0] 106 | assert frst['extension'] == 'csv', res.json 107 | assert frst['mime_type'] == 'text/csv', res.json 108 | assert frst['api_url'], res.json 109 | 110 | def test_source_serve(self): 111 | url = url_for('sources_api.upload', dataset=self.cra.name) 112 | fh = data_fixture('cra') 113 | res = self.client.post(url, data={ 114 | 'file': (fh, 'cra.csv') 115 | }, query_string=self.auth_qs) 116 | url = url_for('sources_api.serve', dataset=self.cra.name, 117 | name=res.json['name']) 118 | res = self.client.get(url, query_string=self.auth_qs) 119 | assert 'text/csv' in res.headers['Content-Type'], res.json 120 | -------------------------------------------------------------------------------- /spendb/model/provider.py: -------------------------------------------------------------------------------- 1 | from cubes.providers import ModelProvider, link_cube 2 | from cubes.model import Cube, Measure, MeasureAggregate, Dimension 3 | from cubes.sql.store import SQLStore, OPTION_TYPES 4 | from cubes.errors import NoSuchCubeError, NoSuchDimensionError 5 | from cubes.common import coalesce_options 6 | from cubes.logging import get_logger 7 | 8 | from spendb.core import db 9 | from spendb.model import Dataset 10 | 11 | 12 | class SpendingModelProvider(ModelProvider): 13 | 14 | def __init__(self, *args, **kwargs): 15 | super(SpendingModelProvider, self).__init__(*args, **kwargs) 16 | 17 | def requires_store(self): 18 | return True 19 | 20 | def has_cube(self, name): 21 | dataset = Dataset.by_name(name) 22 | if dataset is None: 23 | return False 24 | return dataset.model is not None 25 | 26 | def cube(self, name, locale=None, namespace=None): 27 | dataset = Dataset.by_name(name) 28 | if name is None: 29 | raise NoSuchCubeError("Unknown dataset %s" % name, name) 30 | 31 | measures, dimensions, mappings = [], [], {} 32 | aggregates = [MeasureAggregate('fact_count', 33 | label='Number of entries', 34 | function='count')] 35 | 36 | for measure in dataset.model.measures: 37 | cubes_measure = Measure(measure.name, label=measure.label) 38 | measures.append(cubes_measure) 39 | aggregate = MeasureAggregate(measure.name + '_sum', 40 | label=measure.label, 41 | measure=measure.name, 42 | function='sum') 43 | aggregates.append(aggregate) 44 | mappings[measure.name] = measure.column_name 45 | 46 | for dimension in dataset.model.dimensions: 47 | attributes, last_col = [], None 48 | for attr in dimension.attributes: 49 | attributes.append({ 50 | 'name': attr.name, 51 | 'label': attr.label 52 | }) 53 | mappings[attr.ref] = last_col = attr.column_name 54 | 55 | # Workaround because the cubes mapper shortens references 56 | # for single-attribute dimensions to just the dimension name. 57 | if len(attributes) == 1: 58 | mappings[dimension.name] = last_col 59 | 60 | meta = { 61 | 'label': dimension.label, 62 | 'name': dimension.name, 63 | 'cardinality': dimension.cardinality_class, 64 | 'levels': [{ 65 | 'name': dimension.name, 66 | 'label': dimension.label, 67 | 'cardinality': dimension.cardinality_class, 68 | 'attributes': attributes 69 | }] 70 | } 71 | if dimension.key_attribute: 72 | meta['levels'][0]['key'] = dimension.key_attribute.name 73 | if dimension.label_attribute: 74 | meta['levels'][0]['label_attribute'] = \ 75 | dimension.label_attribute.name 76 | meta['levels'][0]['order_attribute'] = \ 77 | dimension.label_attribute.name 78 | dimensions.append(Dimension.from_metadata(meta)) 79 | 80 | cube = Cube(name=dataset.name, 81 | fact=dataset.fact_table.table.name, 82 | aggregates=aggregates, 83 | measures=measures, 84 | label=dataset.label, 85 | description=dataset.description, 86 | dimensions=dimensions, 87 | store=self.store, 88 | mappings=mappings) 89 | 90 | link_cube(cube, locale, provider=self, namespace=namespace) 91 | return cube 92 | 93 | def dimension(self, name, locale=None, templates=[]): 94 | raise NoSuchDimensionError('No global dimensions in OS', name) 95 | 96 | def list_cubes(self): 97 | cubes = [] 98 | for dataset in Dataset.all_by_account(None): 99 | if dataset.model is None: 100 | continue 101 | cubes.append({ 102 | 'name': dataset.name, 103 | 'label': dataset.label 104 | }) 105 | return cubes 106 | 107 | 108 | class SpendingStore(SQLStore): 109 | related_model_provider = "spending" 110 | 111 | def model_provider_name(self): 112 | return self.related_model_provider 113 | 114 | def __init__(self, **options): 115 | super(SQLStore, self).__init__(**options) 116 | options = dict(options) 117 | self.options = coalesce_options(options, OPTION_TYPES) 118 | self.logger = get_logger() 119 | self.schema = None 120 | self._metadata = None 121 | 122 | @property 123 | def connectable(self): 124 | return db.engine 125 | 126 | @property 127 | def metadata(self): 128 | if self._metadata is None: 129 | self._metadata = db.MetaData(bind=self.connectable) 130 | return self._metadata 131 | -------------------------------------------------------------------------------- /spendb/views/api/dataset.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from flask import Blueprint, request 4 | from flask.ext.login import current_user 5 | from flask.ext.babel import gettext as _ 6 | from colander import SchemaNode, String, Invalid 7 | from sqlalchemy.orm import aliased 8 | from apikit import jsonify, Pager, request_data 9 | from fiscalmodel import COUNTRIES, LANGUAGES 10 | 11 | from spendb.core import db 12 | from spendb.model import Dataset, DatasetLanguage, DatasetTerritory, Account 13 | from spendb.auth import require 14 | from spendb.lib.helpers import get_dataset 15 | from spendb.views.context import etag_cache_keygen 16 | from spendb.validation.dataset import validate_dataset, validate_managers 17 | from spendb.validation.model import validate_model 18 | 19 | 20 | log = logging.getLogger(__name__) 21 | blueprint = Blueprint('datasets_api', __name__) 22 | 23 | 24 | def query_index(): 25 | q = Dataset.all_by_account(current_user, order=False) 26 | q = q.order_by(Dataset.updated_at.desc()) 27 | 28 | # Filter by languages if they have been provided 29 | for language in request.args.getlist('languages'): 30 | l = aliased(DatasetLanguage) 31 | q = q.join(l, Dataset._languages) 32 | q = q.filter(l.code == language) 33 | 34 | # Filter by territories if they have been provided 35 | for territory in request.args.getlist('territories'): 36 | t = aliased(DatasetTerritory) 37 | q = q.join(t, Dataset._territories) 38 | q = q.filter(t.code == territory) 39 | 40 | # Filter by account if one has been provided 41 | for account in request.args.getlist('account'): 42 | a = aliased(Account) 43 | q = q.join(a, Dataset.managers) 44 | q = q.filter(a.name == account) 45 | 46 | # Return a list of languages as dicts with code, count, url and label 47 | languages = [{'code': code, 'count': count, 'label': LANGUAGES.get(code)} 48 | for (code, count) in DatasetLanguage.dataset_counts(q)] 49 | 50 | territories = [{'code': code, 'count': count, 'label': COUNTRIES.get(code)} 51 | for (code, count) in DatasetTerritory.dataset_counts(q)] 52 | 53 | pager = Pager(q, limit=15) 54 | return pager, languages, territories 55 | 56 | 57 | @blueprint.route('/datasets') 58 | def index(): 59 | pager, languages, territories = query_index() 60 | data = pager.to_dict() 61 | data['languages'] = languages 62 | data['territories'] = territories 63 | return jsonify(data) 64 | 65 | 66 | @blueprint.route('/datasets/') 67 | def view(name): 68 | dataset = get_dataset(name) 69 | etag_cache_keygen(dataset, private=dataset.private) 70 | return jsonify(dataset) 71 | 72 | 73 | @blueprint.route('/datasets', methods=['POST', 'PUT']) 74 | def create(): 75 | require.dataset.create() 76 | dataset = request_data() 77 | data = validate_dataset(dataset) 78 | if Dataset.by_name(data['name']) is not None: 79 | raise Invalid(SchemaNode(String(), name='name'), 80 | _("A dataset with this identifer already exists!")) 81 | dataset = Dataset({'dataset': data, 'model': {}}) 82 | dataset.managers.append(current_user) 83 | db.session.add(dataset) 84 | db.session.commit() 85 | return view(dataset.name) 86 | 87 | 88 | @blueprint.route('/datasets/', methods=['POST', 'PUT']) 89 | def update(name): 90 | dataset = get_dataset(name) 91 | require.dataset.update(dataset) 92 | dataset.update(validate_dataset(request_data())) 93 | dataset.touch() 94 | db.session.commit() 95 | return view(name) 96 | 97 | 98 | @blueprint.route('/datasets//structure') 99 | def structure(name): 100 | dataset = get_dataset(name) 101 | etag_cache_keygen(dataset, private=dataset.private) 102 | return jsonify({ 103 | 'fields': dataset.fields 104 | }) 105 | 106 | 107 | @blueprint.route('/datasets//model') 108 | def model(name): 109 | dataset = get_dataset(name) 110 | etag_cache_keygen(dataset, private=dataset.private) 111 | return jsonify(dataset.model or {}) 112 | 113 | 114 | @blueprint.route('/datasets//model', methods=['POST', 'PUT']) 115 | def update_model(name): 116 | dataset = get_dataset(name) 117 | require.dataset.update(dataset) 118 | data = request_data() 119 | if isinstance(data, dict): 120 | data['fact_table'] = dataset.fact_table.table_name 121 | dataset.model = validate_model(data) 122 | db.session.commit() 123 | return model(name) 124 | 125 | 126 | @blueprint.route('/datasets//managers') 127 | def managers(name): 128 | dataset = get_dataset(name) 129 | etag_cache_keygen(dataset, private=dataset.private) 130 | return jsonify({'managers': dataset.managers}) 131 | 132 | 133 | @blueprint.route('/datasets//managers', methods=['POST', 'PUT']) 134 | def update_managers(name): 135 | dataset = get_dataset(name) 136 | require.dataset.update(dataset) 137 | data = validate_managers(request_data()) 138 | if current_user not in data['managers']: 139 | data['managers'].append(current_user) 140 | dataset.managers = data['managers'] 141 | dataset.touch() 142 | db.session.commit() 143 | return managers(name) 144 | 145 | 146 | @blueprint.route('/datasets/', methods=['DELETE']) 147 | def delete(name): 148 | dataset = get_dataset(name) 149 | require.dataset.update(dataset) 150 | dataset.fact_table.drop() 151 | db.session.delete(dataset) 152 | db.session.commit() 153 | return jsonify({'status': 'deleted'}, status=410) 154 | -------------------------------------------------------------------------------- /spendb/model/dataset.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from sqlalchemy.orm import reconstructor 3 | from sqlalchemy.schema import Column 4 | from sqlalchemy.types import Integer, Unicode, Boolean, DateTime 5 | from sqlalchemy.sql.expression import or_ 6 | from sqlalchemy.ext.associationproxy import association_proxy 7 | from babbage.model import Model 8 | from babbage.cube import Cube 9 | 10 | from spendb.core import db, url_for 11 | from spendb.model.fact_table import FactTable 12 | from spendb.model.common import JSONType 13 | 14 | 15 | class Dataset(db.Model): 16 | """ The dataset is the core entity of any access to data. 17 | The dataset keeps an in-memory representation of the data model 18 | (including all dimensions and measures) which can be used to 19 | generate necessary queries. """ 20 | __tablename__ = 'dataset' 21 | 22 | id = Column(Integer, primary_key=True) 23 | name = Column(Unicode(255), unique=True) 24 | label = Column(Unicode(2000)) 25 | description = Column(Unicode()) 26 | currency = Column(Unicode()) 27 | category = Column(Unicode()) 28 | private = Column(Boolean, default=False) 29 | created_at = Column(DateTime, default=datetime.utcnow) 30 | updated_at = Column(DateTime, default=datetime.utcnow, 31 | onupdate=datetime.utcnow) 32 | data = Column(JSONType) 33 | 34 | languages = association_proxy('_languages', 'code') 35 | territories = association_proxy('_territories', 'code') 36 | 37 | def __init__(self, data): 38 | self.data = data.copy() 39 | dataset = self.data['dataset'] 40 | del self.data['dataset'] 41 | self.name = dataset.get('name') 42 | self.update(dataset) 43 | self._load() 44 | 45 | @reconstructor 46 | def _load(self): 47 | self.fact_table = FactTable(self) 48 | self._model = None 49 | 50 | def update(self, dataset): 51 | self.label = dataset.get('label') 52 | if 'private' in dataset: 53 | self.private = dataset.get('private') 54 | if 'description' in dataset: 55 | self.description = dataset.get('description') 56 | if 'currency' in dataset: 57 | self.currency = dataset.get('currency') 58 | if 'category' in dataset: 59 | self.category = dataset.get('category') 60 | if 'languages' in dataset: 61 | self.languages = dataset.get('languages', []) 62 | if 'territories' in dataset: 63 | self.territories = dataset.get('territories', []) 64 | 65 | @property 66 | def model(self): 67 | if self._model is None: 68 | if not self.fact_table.exists: 69 | return 70 | data = self.data.get('model') 71 | if not isinstance(data, dict): 72 | return 73 | data['fact_table'] = self.fact_table.table_name 74 | model = Model(data) 75 | if not model.exists: 76 | return 77 | self._model = model 78 | return self._model 79 | 80 | @model.setter 81 | def model(self, model): 82 | self.data['model'] = model 83 | self._model = None 84 | if self.model is not None: 85 | self.cube.compute_cardinalities() 86 | self.touch() 87 | 88 | @property 89 | def cube(self): 90 | """ Babbage query cube for the given dataset. """ 91 | if self.model is not None: 92 | return Cube(db.engine, self.name, self.model, 93 | fact_table=self.fact_table.table) 94 | 95 | @property 96 | def fields(self): 97 | return self.data.get('fields', {}) 98 | 99 | @fields.setter 100 | def fields(self, value): 101 | self.data['fields'] = value 102 | 103 | def touch(self): 104 | """ Update the dataset timestamp. This is used for cache 105 | invalidation. """ 106 | self.updated_at = datetime.utcnow() 107 | db.session.add(self) 108 | 109 | def __repr__(self): 110 | return "" % (self.id, self.name) 111 | 112 | def to_dict(self): 113 | return { 114 | 'label': self.label, 115 | 'name': self.name, 116 | 'description': self.description, 117 | 'currency': self.currency, 118 | 'category': self.category, 119 | 'private': self.private, 120 | 'created_at': self.created_at, 121 | 'updated_at': self.updated_at, 122 | 'languages': list(self.languages), 123 | 'territories': list(self.territories), 124 | 'has_model': self.model is not None, 125 | 'api_url': url_for('datasets_api.view', name=self.name) 126 | } 127 | 128 | def to_full_dict(self): 129 | full = self.data.copy() 130 | full['dataset'] = self.to_dict() 131 | return full 132 | 133 | @classmethod 134 | def all_by_account(cls, account, order=True): 135 | """ Query available datasets based on dataset visibility. """ 136 | from spendb.model.account import Account 137 | has_user = account and account.is_authenticated() 138 | has_admin = has_user and account.admin 139 | q = db.session.query(cls) 140 | if not has_admin: 141 | criteria = [cls.private == False] # noqa 142 | if has_user: 143 | criteria.append(cls.managers.any(Account.id == account.id)) 144 | q = q.filter(or_(*criteria)) 145 | 146 | if order: 147 | q = q.order_by(cls.label.asc()) 148 | return q 149 | 150 | @classmethod 151 | def by_name(cls, name): 152 | return db.session.query(cls).filter_by(name=name).first() 153 | --------------------------------------------------------------------------------