├── spendb
    ├── etl
    │   ├── __init__.py
    │   ├── manager.py
    │   ├── tasks.py
    │   ├── job.py
    │   ├── upload.py
    │   └── extract.py
    ├── lib
    │   ├── __init__.py
    │   ├── helpers.py
    │   └── mailer.py
    ├── tests
    │   ├── __init__.py
    │   ├── etl
    │   │   ├── __init__.py
    │   │   ├── test_manager.py
    │   │   ├── test_queue_tasks.py
    │   │   ├── test_import_fixtures.py
    │   │   └── test_load.py
    │   ├── fixtures
    │   │   ├── empty.csv
    │   │   ├── simple.csv
    │   │   ├── csv_import
    │   │   │   ├── default
    │   │   │   │   ├── meta.json
    │   │   │   │   └── model.json
    │   │   │   ├── simple
    │   │   │   │   ├── data.csv
    │   │   │   │   └── model.json
    │   │   │   ├── quoting
    │   │   │   │   ├── data.csv
    │   │   │   │   └── model.json
    │   │   │   ├── empty_additional_date
    │   │   │   │   ├── data.csv
    │   │   │   │   └── model.json
    │   │   │   ├── malformed
    │   │   │   │   └── data.csv
    │   │   │   ├── import_errors
    │   │   │   │   └── data.csv
    │   │   │   ├── successful_import
    │   │   │   │   └── data.csv
    │   │   │   ├── erroneous_values
    │   │   │   │   └── data.csv
    │   │   │   ├── lbhf
    │   │   │   │   ├── data.csv
    │   │   │   │   └── model.json
    │   │   │   ├── mexico
    │   │   │   │   └── data.csv
    │   │   │   └── sample
    │   │   │   │   ├── model.json
    │   │   │   │   └── data.csv
    │   │   ├── data
    │   │   │   └── simple.csv
    │   │   ├── demoloader.csv
    │   │   ├── validation
    │   │   │   ├── 2011_11_20_name_attribute.json
    │   │   │   ├── 2011_11_21_normalize.json
    │   │   │   ├── 2011_11_22_unique_keys.json
    │   │   │   ├── 2011_12_07_attribute_dicts.json
    │   │   │   └── default.json
    │   │   └── meta
    │   │   │   └── simple.json
    │   ├── model
    │   │   ├── __init__.py
    │   │   └── test_dataset.py
    │   ├── views
    │   │   ├── __init__.py
    │   │   ├── api
    │   │   │   ├── __init__.py
    │   │   │   ├── test_meta.py
    │   │   │   ├── test_slicer.py
    │   │   │   ├── test_run.py
    │   │   │   ├── test_session.py
    │   │   │   └── test_source.py
    │   │   ├── test_error.py
    │   │   ├── test_slicer.py
    │   │   └── test_home.py
    │   ├── validation
    │   │   ├── __init__.py
    │   │   ├── test_dataset.py
    │   │   └── test_model.py
    │   ├── base.py
    │   └── helpers.py
    ├── views
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── run.py
    │   │   ├── meta.py
    │   │   ├── session.py
    │   │   ├── source.py
    │   │   └── dataset.py
    │   ├── error.py
    │   ├── __init__.py
    │   ├── context.py
    │   └── home.py
    ├── validation
    │   ├── __init__.py
    │   ├── model.py
    │   ├── account.py
    │   ├── dataset.py
    │   └── common.py
    ├── migrate
    │   ├── alembic.ini
    │   ├── script.py.mako
    │   ├── env.py
    │   └── versions
    │   │   └── b5ed9296ff9_initial.py
    ├── wsgi.py
    ├── static
    │   └── img
    │   │   └── favicon.ico
    ├── translations
    │   ├── ar
    │   │   └── LC_MESSAGES
    │   │   │   └── messages.mo
    │   ├── de
    │   │   └── LC_MESSAGES
    │   │   │   └── messages.mo
    │   └── is_IS
    │   │   └── LC_MESSAGES
    │   │       └── messages.mo
    ├── model
    │   ├── __init__.py
    │   ├── common.py
    │   ├── manager.py
    │   ├── facets.py
    │   ├── run.py
    │   ├── fact_table.py
    │   ├── account.py
    │   ├── provider.py
    │   └── dataset.py
    ├── auth
    │   ├── account.py
    │   ├── dataset.py
    │   └── __init__.py
    ├── __init__.py
    ├── command
    │   ├── db.py
    │   ├── importer.py
    │   └── __init__.py
    ├── tasks.py
    ├── default_settings.py
    ├── core.py
    └── templates
    │   └── layout.html
├── .bowerrc
├── contrib
    ├── assets
    │   ├── pattern_bg.psd
    │   ├── noun_105482_cc.png
    │   ├── noun_149515_cc.png
    │   ├── noun_15332_cc.png
    │   ├── noun_155670_cc.png
    │   ├── noun_15772_cc.png
    │   ├── noun_65370_cc.png
    │   ├── noun_67972_cc.png
    │   ├── noun_84870_cc.png
    │   ├── noun_97430_cc.png
    │   ├── noun_97900_cc.png
    │   ├── noun_161002_cc.svg
    │   └── noun_29578_cc.svg
    ├── os_export
    │   ├── analyze.py
    │   ├── archive_sources.py
    │   ├── model_migrate.py
    │   └── export.py
    └── spendb_importer.py
├── Procfile
├── babel.cfg
├── pages
    ├── index.html
    ├── contact.html
    ├── tos.html
    └── about.html
├── .tx
    └── config
├── bower.json
├── .gitignore
├── bin
    └── update_translations
├── production.env.tmpl
├── swarmvars.json
├── .travis.yml
├── requirements.txt
├── Dockerfile
├── docker-compose.yml
├── prod_settings.py
├── CONTRIBUTORS
├── README.md
├── setup.py
└── swarm.json


/spendb/etl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/tests/etl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/views/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/empty.csv:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/tests/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/tests/views/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/validation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/tests/validation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spendb/tests/views/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.bowerrc:
--------------------------------------------------------------------------------
1 | {
2 |   "directory": "spendb/static"
3 | }
4 | 


--------------------------------------------------------------------------------
/spendb/migrate/alembic.ini:
--------------------------------------------------------------------------------
1 | [alembic]
2 | script_location=.
3 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/simple.csv:
--------------------------------------------------------------------------------
1 | a,b,c
2 | 1,2,foo
3 | 3,4,bar


--------------------------------------------------------------------------------
/spendb/wsgi.py:
--------------------------------------------------------------------------------
1 | from spendb.core import create_web_app
2 | 
3 | app = create_web_app()
4 | 


--------------------------------------------------------------------------------
/contrib/assets/pattern_bg.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/pattern_bg.psd


--------------------------------------------------------------------------------
/spendb/static/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/spendb/static/img/favicon.ico


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn -w 5 spendb.wsgi:app --log-file -
2 | worker: celery -A spendb.tasks worker -c 2 -l debug
3 | 


--------------------------------------------------------------------------------
/contrib/assets/noun_105482_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_105482_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_149515_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_149515_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_15332_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_15332_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_155670_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_155670_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_15772_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_15772_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_65370_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_65370_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_67972_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_67972_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_84870_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_84870_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_97430_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_97430_cc.png


--------------------------------------------------------------------------------
/contrib/assets/noun_97900_cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/contrib/assets/noun_97900_cc.png


--------------------------------------------------------------------------------
/spendb/translations/ar/LC_MESSAGES/messages.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/spendb/translations/ar/LC_MESSAGES/messages.mo


--------------------------------------------------------------------------------
/spendb/translations/de/LC_MESSAGES/messages.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/spendb/translations/de/LC_MESSAGES/messages.mo


--------------------------------------------------------------------------------
/spendb/translations/is_IS/LC_MESSAGES/messages.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openspending/spendb/HEAD/spendb/translations/is_IS/LC_MESSAGES/messages.mo


--------------------------------------------------------------------------------
/babel.cfg:
--------------------------------------------------------------------------------
1 | [python: **.py]
2 | [jinja2: **/templates/**.html]
3 | extensions=jinja2.ext.autoescape,jinja2.ext.with_,webassets.ext.jinja2.AssetsExtension
4 | 
5 | 


--------------------------------------------------------------------------------
/pages/index.html:
--------------------------------------------------------------------------------
 1 | title: Mapping the money
 2 | hidden: true
 3 | 
 4 | 
 5 | A simple tool for opening up government finances. Anyone
 6 | can understand how, where and why government spends our money.
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/.tx/config:
--------------------------------------------------------------------------------
1 | [main]
2 | host = https://www.transifex.com
3 | 
4 | [spendb.translations]
5 | file_filter = spendb/translations/<lang>/LC_MESSAGES/messages.po
6 | source_file = spendb/translations/messages.pot
7 | source_lang = en
8 | 
9 | 


--------------------------------------------------------------------------------
/spendb/model/__init__.py:
--------------------------------------------------------------------------------
1 | from spendb.model.account import Account  # NOQA
2 | from spendb.model.dataset import Dataset  # noqa
3 | from spendb.model.facets import DatasetLanguage, DatasetTerritory  # noqa
4 | from spendb.model.run import Run  # NOQA
5 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/default/meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "dataset": {
3 |         "currency": "EUR",
4 |         "description": "Description for Test CSV Import",
5 |         "label": "Label for Test CSV Import",
6 |         "name": "test-csv"
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/simple/data.csv:
--------------------------------------------------------------------------------
1 | id,paid_by,date,amount,paid_to
2 | 1,Test From,2010-01-01,100.00,Test To
3 | 2,Test From,2010-01-01,100.00,Test To
4 | 3,Test From,2010-01-01,100.00,Test To
5 | 4,Test From,2010-01-01,100.00,Test To
6 | 5,Test From,2010-01-01,100.00,Test To
7 | 


--------------------------------------------------------------------------------
/bower.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "spendb",
 3 |   "version": "0.19",
 4 |   "homepage": "https://github.com/spendb/spendb",
 5 |   "authors": [],
 6 |   "description": "",
 7 |   "license": "AGPL v3",
 8 |   "ignore": [
 9 |     "**/.*"
10 |   ],
11 |   "dependencies": {
12 |     "spendb.ui": "master"
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/quoting/data.csv:
--------------------------------------------------------------------------------
 1 | id,paid_by,date,amount,paid_to
 2 | 1,Test From,2010-01-01,3.20E+07,"Test 
 3 | 
 4 | ""EAT""
 5 | 
 6 | To"
 7 | 2,Test From,2010-01-01,100.00,Test To
 8 | 3,Test From,2010-01-01,100.00,Test To
 9 | 4,Test From,2010-01-01,100.00,Test To
10 | 5,Test From,2010-01-01,100.00,Test To
11 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/empty_additional_date/data.csv:
--------------------------------------------------------------------------------
1 | id,paid_by,date,amount,paid_to,additional_date
2 | 1,Test From,2010-01-01,100.00,Test To,
3 | 2,Test From,2010-01-01,100.00,Test To,2010-01-01
4 | 3,Test From,2010-01-01,100.00,Test To,2010-01-01
5 | 4,Test From,2010-01-01,100.00,Test To,2010-01-01
6 | 5,Test From,2010-01-01,100.00,Test To,2010-01-01
7 | 


--------------------------------------------------------------------------------
/spendb/tests/views/test_error.py:
--------------------------------------------------------------------------------
 1 | from spendb.tests.base import ControllerTestCase
 2 | 
 3 | 
 4 | class TestErrors(ControllerTestCase):
 5 | 
 6 |     def test_error_404(self):
 7 |         response = self.client.get('/akhkfhdjkhf/fgfdghfdh')
 8 |         assert response.status_code == 404, response
 9 | 
10 |     def test_error_403(self):
11 |         response = self.client.post('/api/3/datasets')
12 |         assert response.status_code == 403, response
13 | 


--------------------------------------------------------------------------------
/spendb/migrate/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | 
 9 | # revision identifiers, used by Alembic.
10 | revision = ${repr(up_revision)}
11 | down_revision = ${repr(down_revision)}
12 | 
13 | from alembic import op
14 | import sqlalchemy as sa
15 | ${imports if imports else ""}
16 | 
17 | def upgrade():
18 |     ${upgrades if upgrades else "pass"}
19 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/data/simple.csv:
--------------------------------------------------------------------------------
1 | year,amount,field,to_name,to_label,func_name,func_label
2 | 2010,200,foo,"bcorp","Big Corp",food,Food & Nutrition
3 | 2009,190,bar,"bcorp","Big Corp",food,Food & Nutrition
4 | 2010,500,foo,"acorp","Another Corp",food,Food & Nutrition
5 | 2009,900,qux,"acorp","Another Corp",food,Food & Nutrition
6 | 2010,300,foo,"ccorp","Central Corp",school,Schools & Education
7 | 2009,600,qux,"ccorp","Central Corp",school,Schools & Education
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /pyenv
 2 | /*.ini
 3 | !/test_continuous.ini
 4 | /*.egg-info
 5 | *.pyc
 6 | *.egg
 7 | *.DS_Store
 8 | /.noseids
 9 | /linesman-enabled
10 | *.DS_Store
11 | /db
12 | .*.swp
13 | doc/_build/*
14 | spendb/static/spendb.ui
15 | spendb/static/spendb.ui/*
16 | contrib/os_export/exports/*
17 | *~
18 | .tx/config
19 | build/*
20 | .vagrant
21 | .project
22 | .pydevproject
23 | settings.py
24 | .env
25 | tmp/*
26 | coverage/*
27 | .coverage
28 | node_modules/
29 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/malformed/data.csv:
--------------------------------------------------------------------------------
1 | id,Tipologia,Settore,to_label,from_id,from_label,cofog,to_id,name,amount,description,Ordinato al 1/2/2010,date
2 | 1,Contratti,Varie,CEDAT 85 SRL,1,Camera dei Deputati,01.1.1,cedat_85_srl,"SERVIZIO DI SUPPORTO ALLE ATTIVITA' DI
3 | RESOCONTAZIONE STENOGRAF ICA","66,097.77",,0,2010
4 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0


--------------------------------------------------------------------------------
/spendb/auth/account.py:
--------------------------------------------------------------------------------
 1 | from flask.ext.login import current_user
 2 | 
 3 | 
 4 | def logged_in():
 5 |     return current_user.is_authenticated() and current_user.is_active()
 6 | 
 7 | 
 8 | def is_admin():
 9 |     return logged_in() and current_user.admin
10 | 
11 | 
12 | def create():
13 |     return True
14 | 
15 | 
16 | def read(account):
17 |     return True
18 | 
19 | 
20 | def update(account):
21 |     return logged_in()
22 | 
23 | 
24 | def delete(account):
25 |     return False
26 | 


--------------------------------------------------------------------------------
/bin/update_translations:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eu
 4 | 
 5 | LOCALES=(de ar is_IS)
 6 | 
 7 | #pybabel init -i messages.pot -d spendb/translations -l de 
 8 | 
 9 | pybabel extract -F babel.cfg -k lazy_gettext -o spendb/translations/messages.pot spendb
10 | pybabel update -i spendb/translations/messages.pot -d spendb/translations
11 | 
12 | for LOCALE in ${LOCALES[*]}; do
13 |     pybabel compile -f -i spendb/translations/messages.pot -D messages -l $LOCALE -d spendb/translations;
14 | done;
15 | 


--------------------------------------------------------------------------------
/production.env.tmpl:
--------------------------------------------------------------------------------
 1 | # Configuration options injected into docker-compose, see settings.py.tmpl
 2 | SPENDB_SITE_TITLE=SpenDB
 3 | # SPENDB_SECRET=foo
 4 | SPENDB_DATABASE_URL=postgresql://spendb:spendb@db/spendb
 5 | SPENDB_AMQP_URL=amqp://guest:guest@rabbitmq:5672//
 6 | SPENDB_MAIL_SERVER=smtp.mandrillapp.com
 7 | SPENDB_MAIL_PORT=587
 8 | # SPENDB_MAIL_USERNAME=
 9 | # SPENDB_MAIL_PASSWORD=
10 | # SPENDB_MAIL_DEFAULT_SENDER=
11 | # SPENDB_AWS_KEY_ID=
12 | # SPENDB_AWS_SECRET=
13 | # SPENDB_AWS_DATA_BUCKET=
14 | 


--------------------------------------------------------------------------------
/spendb/auth/dataset.py:
--------------------------------------------------------------------------------
 1 | from flask.ext.login import current_user
 2 | 
 3 | from account import logged_in
 4 | 
 5 | 
 6 | def create():
 7 |     return logged_in()
 8 | 
 9 | 
10 | def read(dataset):
11 |     if not dataset.private:
12 |         return True
13 |     return update(dataset)
14 | 
15 | 
16 | def update(dataset):
17 |     return logged_in() and (current_user.admin or
18 |                             current_user in dataset.managers)
19 | 
20 | 
21 | def delete(dataset):
22 |     return update(dataset)
23 | 


--------------------------------------------------------------------------------
/spendb/lib/helpers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Helper functions """
 3 | from flask import request
 4 | from apikit import obj_or_404
 5 | 
 6 | from spendb.auth import require
 7 | from spendb.model import Dataset
 8 | 
 9 | 
10 | def get_dataset(name):
11 |     dataset = obj_or_404(Dataset.by_name(name))
12 |     require.dataset.read(dataset)
13 |     return dataset
14 | 
15 | 
16 | def get_page(param='page'):
17 |     try:
18 |         return int(request.args.get(param))
19 |     except:
20 |         return 1
21 | 


--------------------------------------------------------------------------------
/spendb/validation/model.py:
--------------------------------------------------------------------------------
 1 | from babbage.validation import validate_model as babbage_validate
 2 | 
 3 | from spendb.core import db
 4 | 
 5 | 
 6 | TYPES = {
 7 |     'string': db.Unicode,
 8 |     'integer': db.BigInteger,
 9 |     'boolean': db.Boolean,
10 |     'number': db.Float,
11 |     # FIXME: add proper support for dates
12 |     # 'date': db.Date
13 |     'date': db.Unicode
14 | }
15 | 
16 | 
17 | def validate_model(model):
18 |     """ Apply model validation. """
19 |     babbage_validate(model)
20 |     return model
21 | 


--------------------------------------------------------------------------------
/swarmvars.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "spendb/production": {
 3 |     "celery_force_root": "true",
 4 |     "site_title": "SpenDB",
 5 |     "secret": "foo",
 6 |     "database_url": "postgresql://spendb:spendb@db/spendb",
 7 |     "amqp_url": "amqp://guest:guest@rabbitmq:5672//",
 8 |     "mail_server": "smtp.mandrillapp.com",
 9 |     "mail_port": "587",
10 |     "mail_username": "",
11 |     "mail_password": "",
12 |     "mail_default_sender": "",
13 |     "aws_key_id": "",
14 |     "aws_secret": "",
15 |     "aws_data_bucket": ""
16 |   }
17 | }


--------------------------------------------------------------------------------
/spendb/tests/views/api/test_meta.py:
--------------------------------------------------------------------------------
 1 | from flask import url_for
 2 | 
 3 | from spendb.tests.base import ControllerTestCase
 4 | 
 5 | 
 6 | class TestMetaApiController(ControllerTestCase):
 7 | 
 8 |     def setUp(self):
 9 |         super(TestMetaApiController, self).setUp()
10 |         
11 |     def test_reference_data(self):
12 |         url = url_for('meta_api.reference_data')
13 |         res = self.client.get(url)
14 |         assert 'territories' in res.json, res.json
15 |         assert 'currencies' in res.json, res.json
16 |         assert 'languages' in res.json, res.json
17 |         
18 | 


--------------------------------------------------------------------------------
/spendb/tests/etl/test_manager.py:
--------------------------------------------------------------------------------
 1 | from spendb.tests.base import DatabaseTestCase
 2 | 
 3 | from spendb.core import data_manager
 4 | 
 5 | 
 6 | class TestDataManager(DatabaseTestCase):
 7 | 
 8 |     def setUp(self):
 9 |         data_manager._index = None
10 |         super(TestDataManager, self).setUp()
11 | 
12 |     def tearDown(self):
13 |         super(TestDataManager, self).tearDown()
14 | 
15 |     def test_manager(self):
16 |         assert data_manager.collection is not None, data_manager.collection
17 |         package = data_manager.package('cra')
18 |         assert package.id == 'cra', package
19 | 


--------------------------------------------------------------------------------
/spendb/__init__.py:
--------------------------------------------------------------------------------
 1 | # this is a namespace package
 2 | try:
 3 |     import pkg_resources
 4 |     pkg_resources.declare_namespace(__name__)
 5 | except ImportError:
 6 |     import pkgutil
 7 |     __path__ = pkgutil.extend_path(__path__, __name__)
 8 | 
 9 | 
10 | import warnings
11 | warnings.filterwarnings('ignore', 'Options will be ignored.')
12 | 
13 | # Silence SQLAlchemy warning:
14 | import warnings
15 | warnings.filterwarnings(
16 |     'ignore',
17 |     'Unicode type received non-unicode bind param value.')
18 | warnings.filterwarnings(
19 |     'ignore',
20 |     'Unicode type received non-unicodebind param value.')
21 | 
22 | 
23 | __version__ = '0.19'
24 | 


--------------------------------------------------------------------------------
/pages/contact.html:
--------------------------------------------------------------------------------
 1 | title: Contact
 2 | hidden: false
 3 | 
 4 | SpenDB is an open source community project. For most questions about the project,
 5 | please check out [our wiki](https://github.com/spendb/spendb/wiki).
 6 | 
 7 | [File an issue](https://github.com/spendb/spendb/issues) for any problems, questions
 8 | or ideas you have while using this service. Please note that this discussion forum
 9 | is for the software running this site, not for political discussions on individual
10 | datasets. We are also not the government and hence have the same tools for changing
11 | the budget that you do: voting.
12 | 
13 | For legal requests, please contact the [site administrators](mailto:friedrich@pudo.org).
14 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/import_errors/data.csv:
--------------------------------------------------------------------------------
1 | id,Tipologia,Settore,to_label,from_id,from_label,cofog,to_id,name,amount,description,Ordinato al 1/2/2010,date
2 | 1,Contratti,Varie,CEDAT 85 SRL,1,Camera dei Deputati,01.1.1,cedat_85_srl,"SERVIZIO DI SUPPORTO ALLE ATTIVITA' DI
3 | RESOCONTAZIONE STENOGRAF ICA","66,097.77",,0,0
4 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei
5 | Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,1
6 | 3,Contratti,Varie,CEDAT 65 SRL |PROGETTO LAVORO SOC. COOP. |,1,Camera dei Deputati,01.1.1,cedat_65_srl_|progetto_lavoro_soc._coop._|,"ASSISTENZE OPERATIVE A  SUPPORTO
7 | DELLA GESTIONE DOCUMENTALE E TECNICA","3,135,000.00",da gara,"2,869,548.00",22


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "2.7"
 4 | services:
 5 |   - rabbitmq
 6 | before_install:
 7 |   - virtualenv ./pyenv --distribute
 8 |   - source ./pyenv/bin/activate
 9 | install:
10 |   # To install external filter binaries, we first need to install
11 |   # RubyGems and Node/NPM. I'm not sure why, since it seems clear
12 |   # that NVM and RVM are pre-installed (see below).
13 |   - sudo apt-get install python-software-properties
14 |   - pip install -r requirements.txt -e .
15 |   - pip install psycopg2 coveralls
16 | before_script:
17 |   - psql -c 'create database spendb;' -U postgres
18 |   - cp prod_settings.py settings.py
19 |   - nosetests --version
20 | script:
21 |   - nosetests --with-coverage --cover-package=spendb
22 | after_success:
23 |   - coveralls
24 | 


--------------------------------------------------------------------------------
/spendb/model/common.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import datetime
 3 | import json
 4 | 
 5 | import sqlalchemy as sqla
 6 | from sqlalchemy.ext import mutable
 7 | 
 8 | 
 9 | def json_default(obj):
10 |     if isinstance(obj, datetime.datetime):
11 |         obj = obj.date()
12 |     if isinstance(obj, datetime.date):
13 |         obj = obj.isoformat()
14 |     return obj
15 | 
16 | 
17 | class JSONType(sqla.TypeDecorator):
18 |     """Enables JSON storage by encoding and decoding on the fly."""
19 |     impl = sqla.Unicode
20 | 
21 |     def process_bind_param(self, value, dialect):
22 |         return json.dumps(value, default=json_default)
23 | 
24 |     def process_result_value(self, value, dialect):
25 |         return json.loads(value)
26 | 
27 | mutable.MutableDict.associate_with(JSONType)
28 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/successful_import/data.csv:
--------------------------------------------------------------------------------
1 | id,Tipologia,Settore,to_label,from_id,from_label,cofog,to_id,name,amount,description,Ordinato al 1/2/2010,date
2 | 1,Contratti,Varie,CEDAT 85 SRL,1,Camera dei Deputati,01.1.1,cedat_85_srl,"SERVIZIO DI SUPPORTO ALLE ATTIVITA' DI
3 | RESOCONTAZIONE STENOGRAF ICA","66,097.77",,0,2010
4 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010
5 | 3,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010
6 | 4,Contratti,Varie,CEDAT 65 SRL |PROGETTO LAVORO SOC. COOP. |,1,Camera dei Deputati,01.1.1,cedat_65_srl_|progetto_lavoro_soc._coop._|,"ASSISTENZE OPERATIVE A  SUPPORTO
7 | DELLA GESTIONE DOCUMENTALE E TECNICA","3,135,000.00",da gara,"2,869,548.00",2010


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Babel==1.3
 2 | Flask==0.10.1
 3 | Flask-Babel==0.9
 4 | Flask-Cache==0.13.1
 5 | Flask-Login==0.2.11
 6 | Flask-SQLAlchemy==2.0
 7 | Flask-Script==2.0.5
 8 | Flask-Testing==0.4.2
 9 | Flask-Mail==0.9.1
10 | Flask-Migrate>=1.3.0
11 | Flask-FlatPages==0.6
12 | Flask-Cors==2.0.1
13 | WebHelpers==1.3
14 | celery==3.1.17
15 | colander==1.0
16 | decorator==3.4.0
17 | lxml==3.4.1
18 | json-table-schema<0.2
19 | https://github.com/pudo/messytables/tarball/allow-parser-args
20 | mock==1.0.1
21 | nose==1.3.4
22 | ordereddict==1.1
23 | requests==2.5.1
24 | cssmin==0.2.0
25 | archivekit>=0.4
26 | loadkit>=0.2
27 | apikit>=0.3.1
28 | boto>=2.38.0
29 | git+https://github.com/spendb/fiscalmodel
30 | git+https://github.com/spendb/babbage
31 | billiard
32 | markdown
33 | kombu
34 | 
35 | expressions
36 | grako
37 | https://github.com/DataBrewery/cubes/tarball/master
38 | 
39 | gunicorn>=19.3.0
40 | psycopg2>=2.6
41 | 
42 | .
43 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/demoloader.csv:
--------------------------------------------------------------------------------
1 | "id","spender_id","spender_name","recipient_id","recipient_name","region","sector","date","amount"
2 | "demo-sp-001","dfes","Department for Education","dtlr","Department for the Regions","North Yorkshire","Social Protection","2010-01-01",1200000
3 | "demo-sp-002","dfes","Department for Education","dtlr","Department for the Regions","North Yorkshire","Education","2010-02-01",800000
4 | "demo-sp-003","dfes","Department for Education","society","General Public","North Yorkshire","Education","2011-03-01",500000
5 | "demo-sp-004","dtlr","Department for the Regions","society","General Public","Hartlepool","Health","2011-04-01",1400000
6 | "demo-sp-005","dtlr","Department for the Regions","dfes","Department for Education","Hartlepool","Heath","2010-05-01",260000
7 | "demo-sp-006","dtlr","Department for the Regions","dfes","Department for Education","Hartlepool","Social Protection","2011-06-01",1150000
8 | 


--------------------------------------------------------------------------------
/spendb/model/manager.py:
--------------------------------------------------------------------------------
 1 | from babbage.manager import CubeManager
 2 | 
 3 | from spendb.model.dataset import Dataset
 4 | 
 5 | 
 6 | class SpendingCubeManager(CubeManager):
 7 |     """ This enables the babbage API to find and query SpenDB datasets """
 8 | 
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def has_cube(self, name):
13 |         dataset = Dataset.by_name(name)
14 |         if dataset is None:
15 |             return False
16 |         return dataset.model is not None
17 | 
18 |     def get_cube(self, name):
19 |         dataset = Dataset.by_name(name)
20 |         if dataset is None or dataset.model is None:
21 |             return None
22 |         return dataset.cube
23 | 
24 |     def list_cubes(self):
25 |         # TODO: authz, failing conservatively for now.
26 |         for dataset in Dataset.all_by_account(None):
27 |             if dataset.model is not None:
28 |                 yield dataset.name
29 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM pudo/deb-flask-node
 2 | MAINTAINER Friedrich Lindenberg <friedrich@pudo.org>
 3 | 
 4 | ENV DEBIAN_FRONTEND noninteractive
 5 | 
 6 | RUN echo 'deb http://ftp.de.debian.org/debian wheezy-backports main' >> /etc/apt/sources.list \
 7 |   && apt-get update -qq \
 8 |   && apt-get install -y -q --no-install-recommends \
 9 |         git python2.7 python-pip build-essential python-dev \
10 |         libxml2-dev libxslt1-dev libpq-dev curl apt-utils ca-certificates \
11 |   && apt-get clean \
12 |   && rm -rf /var/lib/apt/lists/*
13 | 
14 | RUN curl -L https://www.npmjs.org/install.sh | sh
15 | RUN npm install -g bower
16 | 
17 | # Use clean checkout because ADD implodes on symlinks.
18 | RUN git clone https://github.com/spendb/spendb.git /spendb
19 | WORKDIR /spendb
20 | 
21 | ADD prod_settings.py settings.py
22 | ENV SPENDB_SETTINGS /spendb/settings.py
23 | RUN pip install functools32 && pip install -r requirements.txt -e /spendb
24 | 
25 | EXPOSE 8000
26 | 


--------------------------------------------------------------------------------
/spendb/auth/__init__.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from werkzeug.exceptions import Forbidden
 3 | 
 4 | # These have to be imported for the permission system to work
 5 | import account  # NOQA
 6 | import dataset  # NOQA
 7 | 
 8 | 
 9 | class Requirement(object):
10 | 
11 |     """ Checks a function call and raises an exception if the
12 |     function returns a non-True value. """
13 | 
14 |     def __init__(self, wrapped):
15 |         self.wrapped = wrapped
16 | 
17 |     def __getattr__(self, attr):
18 |         real = getattr(self.wrapped, attr)
19 |         return Requirement(real)
20 | 
21 |     def __call__(self, *args, **kwargs):
22 |         fc = self.wrapped(*args, **kwargs)
23 |         if fc is not True:
24 |             raise Forbidden('Sorry, you\'re not permitted to do this.')
25 |         return fc
26 | 
27 |     @classmethod
28 |     def here(cls):
29 |         module = inspect.getmodule(cls)
30 |         return cls(module)
31 | 
32 | require = Requirement.here()
33 | 


--------------------------------------------------------------------------------
/spendb/command/db.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from flask.ext.migrate import upgrade
 4 | from flask.ext.script import Manager
 5 | 
 6 | from spendb.core import db
 7 | from spendb.model import Dataset
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | manager = Manager()
12 | manager.__doc__ = 'Database operations'
13 | 
14 | 
15 | @manager.command
16 | def drop():
17 |     """ Drop database """
18 |     log.warn("Dropping database")
19 |     db.metadata.reflect()
20 |     db.metadata.drop_all()
21 | 
22 | 
23 | @manager.command
24 | def drop_dataset(name):
25 |     """ Drop a dataset from the database """
26 |     log.warn("Dropping dataset '%s'", name)
27 |     dataset = db.session.query(Dataset).filter_by(name=name).first()
28 |     if dataset is None:
29 |         raise Exception("Dataset does not exist: '%s'" % name)
30 |     dataset.drop()
31 |     db.session.delete(dataset)
32 |     db.session.commit()
33 | 
34 | 
35 | @manager.command
36 | def migrate():
37 |     """ Initialize or upgrade the database """
38 |     upgrade()
39 | 


--------------------------------------------------------------------------------
/spendb/tests/views/test_slicer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from flask import url_for
 3 | 
 4 | from spendb.tests.base import ControllerTestCase
 5 | from spendb.tests.helpers import make_account, load_fixture
 6 | 
 7 | 
 8 | class TestSlicerController(ControllerTestCase):
 9 | 
10 |     def setUp(self):
11 |         super(TestSlicerController, self).setUp()
12 |         self.dataset = load_fixture('cra')
13 |         self.user = make_account('test')
14 | 
15 |     def test_index(self):
16 |         response = self.client.get(url_for('slicer.show_index'))
17 |         assert 'Cubes OLAP' in response.data
18 | 
19 |     def test_cubes(self):
20 |         response = self.client.get(url_for('slicer.list_cubes'))
21 |         assert 'cra' in response.data, response.data
22 | 
23 |     def test_cube_model(self):
24 |         response = self.client.get(url_for('slicer.cube_model',
25 |                                            cube_name='cra'))
26 |         assert 'cra' in response.data, response.data
27 |         assert self.dataset.label in response.data, response.data
28 | 


--------------------------------------------------------------------------------
/contrib/os_export/analyze.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | from datetime import datetime
 4 | from urlparse import urljoin
 5 | 
 6 | INSTANCE = 'https://mapthemoney.org'
 7 | 
 8 | 
 9 | def user_get(url, params={}):
10 |     api_key = os.environ.get('OPENSPENDING_APIKEY')
11 |     headers = {'Authorization': 'ApiKey %s' % api_key}
12 |     if not url.startswith('http'):
13 |         url = urljoin(INSTANCE, url)
14 |     params['__'] = datetime.utcnow().isoformat()
15 |     return requests.get(url, params=params,
16 |                         headers=headers)
17 | 
18 | 
19 | def get_sources(dataset):
20 |     res = user_get('/%s/sources.json' % dataset['name'])
21 |     for source in res.json():
22 |         print source
23 | 
24 | 
25 | def get_datasets():
26 |     res = user_get('/datasets.json')
27 |     for dataset in res.json().get('datasets'):
28 |         print dataset.get('name')
29 |         get_sources(dataset)
30 | 
31 |     print len(res.json().get('datasets')), 'datasets'
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     get_datasets()
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/erroneous_values/data.csv:
--------------------------------------------------------------------------------
1 | id,Tipologia,Settore,to_label,from_id,from_label,cofog,to_id,name,amount,description,Ordinato al 1/2/2010,date
2 | 1,Contratti,Varie,CEDAT 85 SRL,1,Camera dei Deputati,011.1,cedat_85_srl,"SERVIZIO DI SUPPORTO ALLE ATTIVITA' DI
3 | RESOCONTAZIONE STENOGRAF ICA","66,097.77",,0,2010
4 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010
5 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010
6 | 2,Contratti,Varie,TECNOCONFERENCE EUROPE SRL,1,Camera dei Deputati,01.1.1,tecnoconference_europe_srl,NOLEGGIO IMPIANTI TRADUZIONE,"130,000.00",gara in corso,0,2010
7 | 3,Contratti,Varie,CEDAT 65 SRL |PROGETTO LAVORO SOC. COOP. |,1,Camera dei Deputati,01.1.1,cedat_65_srl_|progetto_lavoro_soc._coop._|,"ASSISTENZE OPERATIVE A  SUPPORTO
8 | DELLA GESTIONE DOCUMENTALE E TECNICA","3.135.000.00",da gara,"2,869,548.00",2010-2012


--------------------------------------------------------------------------------
/spendb/views/api/run.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from flask import Blueprint, request
 4 | from apikit import jsonify, Pager, obj_or_404
 5 | from loadkit import logger
 6 | 
 7 | from spendb.core import data_manager
 8 | from spendb.model import Run
 9 | from spendb.lib.helpers import get_dataset
10 | 
11 | 
12 | log = logging.getLogger(__name__)
13 | blueprint = Blueprint('runs_api', __name__)
14 | 
15 | 
16 | @blueprint.route('/datasets/<dataset>/runs')
17 | def index(dataset):
18 |     dataset = get_dataset(dataset)
19 |     q = Run.all(dataset)
20 |     if 'source' in request.args:
21 |         q = q.filter(Run.source == request.args.get('source'))
22 |     pager = Pager(q, dataset=dataset.name)
23 |     return jsonify(pager)
24 | 
25 | 
26 | @blueprint.route('/datasets/<dataset>/runs/<id>')
27 | def view(dataset, id):
28 |     dataset = get_dataset(dataset)
29 |     run = obj_or_404(Run.by_id(dataset, id))
30 |     data = run.to_dict()
31 |     package = data_manager.package(dataset.name)
32 |     data['messages'] = list(logger.load(package, run.id))
33 |     return jsonify(data)
34 | 


--------------------------------------------------------------------------------
/spendb/tests/base.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | 
 3 | from archivekit import open_collection
 4 | from flask.ext.testing import TestCase as FlaskTestCase
 5 | 
 6 | from spendb.core import create_web_app, data_manager
 7 | from spendb.tests.helpers import clean_db, init_db
 8 | 
 9 | 
10 | class TestCase(FlaskTestCase):
11 | 
12 |     def create_app(self):
13 |         app = create_web_app(**{
14 |             'DEBUG': True,
15 |             'TESTING': True,
16 |             'SITE_TITLE': 'SpenDB',
17 |             'SQLALCHEMY_DATABASE_URI': 'sqlite:///:memory:',
18 |             'PRESERVE_CONTEXT_ON_EXCEPTION': False,
19 |             'CELERY_ALWAYS_EAGER': True
20 |         })
21 |         data_manager._coll = open_collection('test', 'file',
22 |                                              path=tempfile.mkdtemp())
23 |         return app
24 | 
25 |     def setUp(self):
26 |         init_db(self.app)
27 | 
28 |     def tearDown(self):
29 |         clean_db(self.app)
30 | 
31 | 
32 | class DatabaseTestCase(TestCase):
33 |     pass
34 | 
35 | 
36 | class ControllerTestCase(DatabaseTestCase):
37 |     pass
38 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | web:
 2 |     build: .
 3 |     command: gunicorn -w 5 -b 0.0.0.0:8000 --error-logfile /var/log/gunicorn.error.log --log-file /var/log/gunicorn.log spendb.wsgi:app
 4 |     ports:
 5 |         - "127.0.0.1:8000:8000"
 6 |     links:
 7 |         - rabbitmq
 8 |         - db
 9 |         - worker
10 |     volumes:
11 |         - /srv/spendb/logs:/var/log
12 |     env_file:
13 |         - production.env
14 | 
15 | db:
16 |     image: postgres:9.4
17 |     environment:
18 |         - POSTGRES_USER=spendb
19 |         - POSTGRES_PASSWORD=spendb
20 |     expose: 
21 |         - "5432"
22 |     volumes:
23 |         - /srv/spendb/db:/var/lib/postgresql/data
24 |         - /srv/spendb/logs/postgresql:/var/log
25 | 
26 | rabbitmq:
27 |     image: rabbitmq
28 |     expose:
29 |         - "5672"
30 | 
31 | worker:
32 |     build: .
33 |     command: celery -A spendb.tasks worker -c 4 -l info --logfile=/var/log/celery.log
34 |     links:
35 |         - rabbitmq
36 |         - db
37 |     volumes:
38 |         - /srv/spendb/logs:/var/log
39 |     env_file:
40 |         - production.env
41 |     environment:
42 |         - C_FORCE_ROOT=true
43 | 


--------------------------------------------------------------------------------
/prod_settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | env = os.environ.get
 3 | 
 4 | DEBUG = False
 5 | CELERY_ALWAYS_EAGER = False
 6 | CACHE = not DEBUG
 7 | 
 8 | SITE_TITLE = env('SPENDB_SITE_TITLE', 'SpenDB')
 9 | SECRET_KEY = env('SPENDB_SECRET')
10 | 
11 | SQLALCHEMY_DATABASE_URI = env('SPENDB_DATABASE_URL')
12 | CELERY_BROKER_URL = env('SPENDB_AMQP_URL', env('SPENDB_CLOUDAMQP_URL'))
13 | 
14 | MAIL_SERVER = env('SPENDB_SMTP_HOST', 'smtp.mandrillapp.com')
15 | MAIL_PORT = int(env('SPENDB_SMTP_PORT', 587))
16 | MAIL_USE_TLS = True
17 | MAIL_USERNAME = env('SPENDB_SMTP_USERNAME', env('SPENDB_MANDRILL_USERNAME'))
18 | MAIL_PASSWORD = env('SPENDB_SMTP_PASSWORD', env('SPENDB_MANDRILL_PASSWORD'))
19 | 
20 | MAIL_DEFAULT_SENDER = env('SPENDB_MAIL_SENDER', 'SpenDB <info@mapthemoney.org>')
21 | 
22 | STORAGE_TYPE = env('SPENDB_STORAGE_TYPE', 's3') #Alternative: 'file'
23 | STORAGE_PATH = env('SPENDB_STORAGE_PATH', '/usr/local/lib/spendb') #Only used if 'file' selected above
24 | 
25 | AWS_KEY_ID = env('SPENDB_AWS_KEY_ID')
26 | AWS_SECRET = env('SPENDB_AWS_SECRET')
27 | AWS_DATA_BUCKET = env('SPENDB_AWS_DATA_BUCKET')
28 | 
29 | PREFERRED_URL_SCHEME = env('SPENDB_PREFERRED_URL_SCHEME', 'http')
30 | 


--------------------------------------------------------------------------------
/spendb/tests/etl/test_queue_tasks.py:
--------------------------------------------------------------------------------
 1 | from archivekit import Source
 2 | 
 3 | from spendb.core import db, data_manager
 4 | from spendb.model import Dataset
 5 | from spendb import tasks
 6 | 
 7 | from spendb.tests.helpers import meta_fixture
 8 | from spendb.tests.helpers import csvimport_fixture_path
 9 | from spendb.tests.base import DatabaseTestCase
10 | 
11 | 
12 | class TestQueueTasks(DatabaseTestCase):
13 | 
14 |     def setUp(self):
15 |         super(TestQueueTasks, self).setUp()
16 |         data_manager._index = None
17 |         self.dsn = 'cra'
18 |         model = meta_fixture(self.dsn)
19 |         self.ds = Dataset(model)
20 |         db.session.add(self.ds)
21 |         db.session.commit()
22 |         self.cra_url = csvimport_fixture_path('../data', 'cra.csv')
23 | 
24 |     def tearDown(self):
25 |         super(TestQueueTasks, self).tearDown()
26 | 
27 |     def test_load_from_url(self):
28 |         tasks.load_from_url(self.dsn, self.cra_url)
29 |         package = data_manager.package(self.dsn)
30 |         sources = list(package.all(Source))
31 |         assert len(sources) == 1, sources
32 |         src0 = sources[0]
33 |         assert src0.meta['name'] == 'cra.csv', src0.meta.items()
34 | 


--------------------------------------------------------------------------------
/pages/tos.html:
--------------------------------------------------------------------------------
 1 | title: Terms of Service
 2 | hidden: false
 3 | 
 4 | 
 5 | SpenDB is generously hosted by [Open Knowledge Foundation Deutschland e.V.](http://okfn.de/)
 6 | (OKF-DE), the German chapter of the global Open Knowledge community. It is therefore subject to the
 7 | [terms of service](http://okfn.de/impressum/) that cover all services operated by OKF-DE, including
 8 | the Impressum and data protection rules as mandated by German law.
 9 | 
10 | ### Community project notice
11 | 
12 | Please further be aware that this is a community-run effort without dedicated funding and staff. As
13 | such, all hosting and API services provided by this platform are based on a best effort principle: we
14 | will keep it running as long as there is a relevant level of interest, but reserve the right
15 | to cease operating the service at any time. In such an event, we will make sure that comprehensive
16 | data exports will be made available, so that anyone can continue to operate their own instances of
17 | the service.
18 | 
19 | Consider this note a call to action: open services are kept alive by the community that operates
20 | them. If you can, and you're interested - then [join that community](contact.html) and help to make
21 | sure this is a sustainable project!
22 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/lbhf/data.csv:
--------------------------------------------------------------------------------
 1 | id,paid_by,date,transaction_id,amount,paid_to,spending_area
 2 | 1,London Borough of Hammersmith and Fulham,2010-01-01,405869,898.64,ADT FIRE & SECURITY PLC,Childrens Services
 3 | 2,London Borough of Hammersmith and Fulham,2010-01-01,405870,517.85,ADT FIRE & SECURITY PLC,Resident Services
 4 | 3,London Borough of Hammersmith and Fulham,2010-01-01,405871,1215.97,ADT FIRE & SECURITY PLC,Regeneration and Housing Services
 5 | 4,London Borough of Hammersmith and Fulham,2010-01-01,417742,112.50,ALARM LTD,Finance and Corporate Services
 6 | 5,London Borough of Hammersmith and Fulham,2010-01-01,417742,562.50,ALARM LTD,Finance and Corporate Services
 7 | 6,London Borough of Hammersmith and Fulham,2010-01-01,391746,1665.62,ASCOM TELE NOVA LTD,Childrens Services
 8 | 7,London Borough of Hammersmith and Fulham,2010-01-01,396062,1500.00,BIW TECHNOLOGIES LIMITED,Community Services
 9 | 8,London Borough of Hammersmith and Fulham,2010-01-01,392463,560.00,CAPITAL CITY COMMUNICATIONS LTD,Resident Services
10 | 9,London Borough of Hammersmith and Fulham,2010-01-01,393998,1296.00,CAPITAL CITY COMMUNICATIONS LTD,Environment Services
11 | 10,London Borough of Hammersmith and Fulham,2010-01-01,395696,171.39,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
12 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/default/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dimensions": {
 3 |         "entry_id": {
 4 |             "attributes": {
 5 |                 "entry_id": {
 6 |                     "column": "id",
 7 |                     "label": "ID"
 8 |                 }
 9 |             },
10 |             "key_attribute": "entry_id",
11 |             "label": "Entry ID"
12 |         },
13 |         "from": {
14 |             "attributes": {
15 |                 "label": {
16 |                     "column": "from_label",
17 |                     "label": "Label"
18 |                 },
19 |                 "name": {
20 |                     "column": "from_id",
21 |                     "label": "Name"
22 |                 }
23 |             },
24 |             "key_attribute": "name",
25 |             "label": "Paid by"
26 |         },
27 |         "time": {
28 |             "attributes": {
29 |                 "year": {
30 |                     "column": "date",
31 |                     "label": "Year"
32 |                 }
33 |             },
34 |             "key_attribute": "year",
35 |             "label": "Time"
36 |         }
37 |     },
38 |     "measures": {
39 |         "amount": {
40 |             "column": "amount",
41 |             "label": "Amount"
42 |         }
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/spendb/migrate/env.py:
--------------------------------------------------------------------------------
 1 | from __future__ import with_statement
 2 | from alembic import context
 3 | from sqlalchemy import engine_from_config, pool
 4 | from flask import current_app
 5 | 
 6 | from spendb.core import db
 7 | from spendb.model import * # noqa
 8 | 
 9 | config = context.config
10 | config.set_main_option('sqlalchemy.url',
11 |                        current_app.config['SQLALCHEMY_DATABASE_URI'])
12 | target_metadata = db.metadata
13 | 
14 | 
15 | def run_migrations_offline():
16 |     url = config.get_main_option("sqlalchemy.url")
17 |     context.configure(url=url)
18 | 
19 |     with context.begin_transaction():
20 |         context.run_migrations()
21 | 
22 | 
23 | def run_migrations_online():
24 |     engine = engine_from_config(
25 |         config.get_section(config.config_ini_section),
26 |         prefix='sqlalchemy.',
27 |         poolclass=pool.NullPool)
28 | 
29 |     connection = engine.connect()
30 |     context.configure(
31 |         connection=connection,
32 |         target_metadata=target_metadata
33 |     )
34 | 
35 |     try:
36 |         with context.begin_transaction():
37 |             context.run_migrations()
38 |     finally:
39 |         connection.close()
40 | 
41 | if context.is_offline_mode():
42 |     run_migrations_offline()
43 | else:
44 |     run_migrations_online()
45 | 


--------------------------------------------------------------------------------
/spendb/etl/manager.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from archivekit import open_collection
 4 | 
 5 | log = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class DataManager(object):
 9 |     """ The data manager coordinates read and write access to the
10 |     ETL data storage. """
11 | 
12 |     def __init__(self):
13 |         self.app = None
14 |         self._coll = None
15 | 
16 |     @property
17 |     def configured(self):
18 |         return self.app is not None
19 | 
20 |     def init_app(self, app):
21 |         self.app = app
22 | 
23 |     def package(self, dataset):
24 |         """ Get a package for a given dataset name. """
25 |         assert self.configured, 'Data manager not configured!'
26 |         return self.collection.get(dataset)
27 | 
28 |     @property
29 |     def collection(self):
30 |         if not self.configured:
31 |             return
32 |         if self._coll is None:
33 |             env = self.app.config
34 |             args = {
35 |                 'path': env.get('STORAGE_PATH'),
36 |                 'aws_key_id': env.get('AWS_KEY_ID'),
37 |                 'aws_secret': env.get('AWS_SECRET'),
38 |                 'bucket_name': env.get('AWS_DATA_BUCKET')
39 |             }
40 |             self._coll = open_collection('datasets', env.get('STORAGE_TYPE'), **args)
41 |         return self._coll
42 | 


--------------------------------------------------------------------------------
/spendb/views/error.py:
--------------------------------------------------------------------------------
 1 | from werkzeug.exceptions import HTTPException
 2 | from flask import request, Response
 3 | from colander import Mapping
 4 | from apikit import jsonify
 5 | 
 6 | 
 7 | class NotModified(Exception):
 8 |     pass
 9 | 
10 | 
11 | def handle_error(exc):
12 |     status = 500
13 |     title = exc.__class__.__name__
14 |     message = unicode(exc)
15 |     headers = {}
16 |     if isinstance(exc, HTTPException):
17 |         message = exc.get_description(request.environ)
18 |         message = message.replace('<p>', '').replace('</p>', '')
19 |         status = exc.code
20 |         title = exc.name
21 |         headers = exc.get_headers(request.environ)
22 |     data = {
23 |         'status': status,
24 |         'title': title,
25 |         'message': message
26 |     }
27 |     return jsonify(data, status=status, headers=headers)
28 | 
29 | 
30 | def handle_invalid(exc):
31 |     if isinstance(exc.node.typ, Mapping):
32 |         exc.node.name = ''
33 |     data = {
34 |         'status': 400,
35 |         'errors': exc.asdict()
36 |     }
37 |     return jsonify(data, status=400)
38 | 
39 | 
40 | def handle_validation_error(exc):
41 |     return jsonify({
42 |         'status': 400,
43 |         'message': exc.message,
44 |         'value': exc.instance
45 |     }, status=400)
46 | 
47 | 
48 | def handle_not_modified(exc):
49 |     return Response(status=304)
50 | 


--------------------------------------------------------------------------------
/spendb/lib/mailer.py:
--------------------------------------------------------------------------------
 1 | from flask import current_app
 2 | from flask.ext.babel import lazy_gettext as _
 3 | from flask.ext.mail import Message
 4 | 
 5 | from spendb.core import mail, url_for
 6 | 
 7 | 
 8 | RESET_MESSAGE = '''You have requested your password on %(site_title)s to be reset.
 9 | 
10 | Please click the following link to confirm this request:
11 | 
12 |    %(reset_link)s
13 | '''
14 | 
15 | 
16 | def add_msg_niceties(recipient_name, body, sender_name):
17 |     return _(u"Dear %(name)s,", name=recipient_name) \
18 |         + u"\r\n\r\n%s\r\n\r\n" % body \
19 |         + u"--\r\n%s" % sender_name
20 | 
21 | 
22 | def mail_account(recipient, subject, body, headers=None):
23 |     site_title = current_app.config.get('SITE_TITLE')
24 |     if (recipient.email is not None) and len(recipient.email):
25 |         msg = Message(subject, recipients=[recipient.email])
26 |         msg.body = add_msg_niceties(recipient.display_name, body, site_title)
27 |         mail.send(msg)
28 | 
29 | 
30 | def get_reset_body(account):
31 |     reset_link = url_for('account_api.do_reset',
32 |                          email=account.email,
33 |                          token=account.token)
34 |     return _(RESET_MESSAGE, reset_link=reset_link,
35 |              site_title=current_app.config.get('SITE_TITLE'))
36 | 
37 | 
38 | def send_reset_link(account):
39 |     body = get_reset_body(account)
40 |     mail_account(account, _('Reset your password'), body)
41 | 


--------------------------------------------------------------------------------
/spendb/views/api/meta.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from flask import Blueprint
 4 | from apikit import jsonify
 5 | from fiscalmodel import CURRENCIES, COUNTRIES
 6 | from fiscalmodel import CATEGORIES, LANGUAGES
 7 | 
 8 | from spendb.core import pages
 9 | from spendb.views.context import etag_cache_keygen
10 | 
11 | log = logging.getLogger(__name__)
12 | blueprint = Blueprint('meta_api', __name__)
13 | 
14 | 
15 | def dicts(d):
16 |     for k, v in d.items():
17 |         if isinstance(v, tuple):
18 |             yield {'code': k, 'label': v[0], 'key': v[1]}
19 |         else:
20 |             yield {'code': k, 'label': v}
21 | 
22 | 
23 | @blueprint.route('/reference')
24 | def reference_data():
25 |     etag_cache_keygen('static')
26 |     return jsonify({
27 |         'currencies': sorted(dicts(CURRENCIES), key=lambda d: d['label']),
28 |         'languages': sorted(dicts(LANGUAGES), key=lambda d: d['label']),
29 |         'territories': sorted(dicts(COUNTRIES), key=lambda d: d['label']),
30 |         'categories': sorted(dicts(CATEGORIES), key=lambda d: d['label'])
31 |     })
32 | 
33 | 
34 | @blueprint.route('/pages/<path:path>.html')
35 | def page(path):
36 |     page = pages.get_or_404(path)
37 |     data = dict(page.meta)
38 |     data['html'] = page.html
39 |     data['path'] = page.path + '.html'
40 |     data['pages'] = {}
41 |     for p in pages:
42 |         path = p.path + '.html'
43 |         data['pages'][path] = p.meta
44 |     return jsonify(data)
45 | 


--------------------------------------------------------------------------------
/spendb/tests/views/api/test_slicer.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from flask import url_for
 3 | 
 4 | from spendb.core import db
 5 | from spendb.tests.helpers import csvimport_fixture_path
 6 | from spendb.tests.base import ControllerTestCase
 7 | from spendb.tests.helpers import load_fixture, make_account
 8 | 
 9 | 
10 | class TestSlicerApiController(ControllerTestCase):
11 | 
12 |     def setUp(self):
13 |         super(TestSlicerApiController, self).setUp()
14 |         self.cra = load_fixture('cra')
15 |         self.user = make_account('test')
16 |         self.auth_qs = {'api_key': self.user.api_key}
17 |         self.cra.managers.append(self.user)
18 |         self.cra_url = csvimport_fixture_path('../data', 'cra.csv')
19 |         db.session.commit()
20 | 
21 |     def test_show_index(self):
22 |         url = url_for('slicer.show_index')
23 |         res = self.client.get(url)
24 |         assert 'Cubes OLAP server' in res.data, res.data
25 | 
26 |     def test_list_cubes(self):
27 |         url = url_for('slicer.list_cubes')
28 |         res = self.client.get(url)
29 |         assert len(res.json) == 1, res.json
30 |         assert res.json[0]['name'] == 'cra', res.json
31 | 
32 |     def test_cube_model(self):
33 |         url = url_for('slicer.cube_model', cube_name=self.cra.name)
34 |         res = self.client.get(url)
35 |         assert 'Country Regional Analysis' in res.json['description'], res.json
36 |         assert len(res.json['dimensions']) == 12, len(res.json['dimensions'])
37 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/validation/2011_11_20_name_attribute.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset": {
 3 |     "name": "test",
 4 |     "label": "Test Dataset", 
 5 |     "description": "This is a test dataset",
 6 |     "currency": "EUR"
 7 |   },
 8 |   "mapping": {
 9 |     "amount": {
10 |       "type": "measure",
11 |       "label": "Amount",
12 |       "datatype": "number",
13 |       "column": "AMOUNT"
14 |     },
15 |     "cofinance": {
16 |       "type": "measure",
17 |       "label": "Co-Financed Amount",
18 |       "datatype": "number",
19 |       "column": "COFIN"
20 |     },
21 |     "time": {
22 |       "type": "date",
23 |       "label": "Time of transaction",
24 |       "datatype": "date",
25 |       "column": "YEAR"
26 |     },
27 |     "transaction_id": {
28 |       "type": "value",
29 |       "label": "Transaction ID",
30 |       "datatype": "id",
31 |       "column": "TX"
32 |     },
33 |     "function": {
34 |       "type": "compound",
35 |       "key": true,
36 |       "label": "Function", 
37 |       "fields": [
38 |         {"name": "label", "datatype": "string", "column": "FUNCTION"},
39 |         {"name": "description", "datatype": "string", "column": "FUNCTION_DESC"}
40 |       ]
41 |     },
42 |     "supplier": {
43 |       "type": "compound",
44 |       "label": "Supplier", 
45 |       "fields": [
46 |         {"name": "name", "datatype": "id", "column": "SUPPLIER_ID"},
47 |         {"name": "label", "datatype": "string", "column": "SUPPLIER"}
48 |       ]
49 |     }
50 |   }
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/spendb/tasks.py:
--------------------------------------------------------------------------------
 1 | from celery.utils.log import get_task_logger
 2 | 
 3 | from spendb.core import create_app, create_celery
 4 | from spendb.model import Dataset
 5 | from spendb.etl import tasks
 6 | 
 7 | 
 8 | log = get_task_logger(__name__)
 9 | 
10 | flask_app = create_app()
11 | celery = create_celery(flask_app)
12 | 
13 | 
14 | @celery.task(ignore_result=True)
15 | def load_from_url(dataset_name, url):
16 |     with flask_app.app_context():
17 |         dataset = Dataset.by_name(dataset_name)
18 |         if dataset is None:
19 |             log.error("Dataset not found: %s", dataset_name)
20 |             return
21 |         source = tasks.extract_url(dataset, url)
22 |         if source is not None:
23 |             load_from_source.delay(dataset_name, source.name)
24 | 
25 | 
26 | @celery.task(ignore_result=True)
27 | def load_from_source(dataset_name, source_name):
28 |     with flask_app.app_context():
29 |         dataset = Dataset.by_name(dataset_name)
30 |         if dataset is None:
31 |             log.error("Dataset not found: %s", dataset_name)
32 |             return
33 |         if source_name is None:
34 |             log.error("No source specified: %s", dataset_name)
35 |             return
36 |         source = tasks.transform_source(dataset, source_name)
37 |         if source is None:
38 |             return
39 |         tasks.load(dataset, source_name=source_name)
40 | 
41 | 
42 | @celery.task(ignore_result=True)
43 | def ping():
44 |     with flask_app.app_context():
45 |         log.info("Pong.")
46 | 


--------------------------------------------------------------------------------
/pages/about.html:
--------------------------------------------------------------------------------
 1 | title: About
 2 | hidden: false
 3 | 
 4 | SpenDB is a tool for understanding government financial information. Our goal is to find
 5 | new ways for those interested - journalists, policy analysts, parliamentarians or even
 6 | the mythical armchair auditor - to access, interpret and share data about the public purse.
 7 | 
 8 | ### Is this just about budget visualization?
 9 | 
10 | While the most commonly looked-at piece of government financial data is probably budgets,
11 | there's no need to remain limited to that source.
12 | 
13 | Information about government purchases of goods and services, funding for research, subsidies
14 | or even payments received for resource concessions are all pieces of information that should
15 | be accessible to the broadest possible public.
16 | 
17 | ### What can I do with SpenDB?
18 | 
19 | On SpenDB, anyone can easily upload information about the way in which their government
20 | manages its finances - whether revenue or expenditure, a budget document or a list of
21 | individual payments for services.
22 | 
23 | Our tool provides a basic set of analytical utilities - just enough to answer some interesting
24 | questions about policy, procurement outcomes or trendsover time. But it also provides a rich
25 | data API, which can be used by web developers and data visualizers to build more specific,
26 | interactive, analytical tools for a given type of data.
27 | 
28 | SpenDB is an open source project, it's [easy to set up and contribute features
29 | to](https://github.com/spendb/spendb/wiki). 
30 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/validation/2011_11_21_normalize.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset": {
 3 |     "name": "test",
 4 |     "label": "Test Dataset", 
 5 |     "description": "This is a test dataset",
 6 |     "currency": "EUR"
 7 |   },
 8 |   "mapping": {
 9 |     "amount": {
10 |       "type": "measure",
11 |       "label": "Amount",
12 |       "datatype": "float",
13 |       "column": "AMOUNT"
14 |     },
15 |     "cofinance": {
16 |       "type": "measure",
17 |       "label": "Co-Financed Amount",
18 |       "datatype": "float",
19 |       "column": "COFIN"
20 |     },
21 |     "time": {
22 |       "type": "value",
23 |       "label": "Time of transaction",
24 |       "datatype": "date",
25 |       "column": "YEAR"
26 |     },
27 |     "transaction_id": {
28 |       "type": "value",
29 |       "label": "Transaction ID",
30 |       "datatype": "id",
31 |       "column": "TX"
32 |     },
33 |     "function": {
34 |       "type": "classifier",
35 |       "key": true,
36 |       "label": "Function", 
37 |       "fields": [
38 |         {"name": "name", "datatype": "id", "column": "FUNCTION_ID"},
39 |         {"name": "label", "datatype": "string", "column": "FUNCTION"},
40 |         {"name": "description", "datatype": "string", "column": "FUNCTION_DESC"}
41 |       ]
42 |     },
43 |     "supplier": {
44 |       "type": "compound",
45 |       "label": "Supplier", 
46 |       "fields": [
47 |         {"name": "name", "datatype": "id", "column": "SUPPLIER_ID"},
48 |         {"name": "label", "datatype": "string", "column": "SUPPLIER"}
49 |       ]
50 |     }
51 |   }
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/validation/2011_11_22_unique_keys.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset": {
 3 |     "name": "test",
 4 |     "label": "Test Dataset", 
 5 |     "description": "This is a test dataset",
 6 |     "currency": "EUR",
 7 |     "unique_keys": ["function.name"]
 8 |   },
 9 |   "mapping": {
10 |     "amount": {
11 |       "type": "measure",
12 |       "label": "Amount",
13 |       "datatype": "float",
14 |       "column": "AMOUNT"
15 |     },
16 |     "cofinance": {
17 |       "type": "measure",
18 |       "label": "Co-Financed Amount",
19 |       "datatype": "float",
20 |       "column": "COFIN"
21 |     },
22 |     "time": {
23 |       "type": "date",
24 |       "label": "Time of transaction",
25 |       "datatype": "date",
26 |       "column": "YEAR"
27 |     },
28 |     "transaction_id": {
29 |       "type": "value",
30 |       "label": "Transaction ID",
31 |       "datatype": "id",
32 |       "column": "TX"
33 |     },
34 |     "function": {
35 |       "type": "compound",
36 |       "label": "Function", 
37 |       "fields": [
38 |         {"name": "name", "datatype": "id", "column": "FUNCTION_ID"},
39 |         {"name": "label", "datatype": "string", "column": "FUNCTION"},
40 |         {"name": "description", "datatype": "string", "column": "FUNCTION_DESC"}
41 |       ]
42 |     },
43 |     "supplier": {
44 |       "type": "compound",
45 |       "label": "Supplier", 
46 |       "fields": [
47 |         {"name": "name", "datatype": "id", "column": "SUPPLIER_ID"},
48 |         {"name": "label", "datatype": "string", "column": "SUPPLIER"}
49 |       ]
50 |     }
51 |   }
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/validation/2011_12_07_attribute_dicts.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dataset": {
 3 |     "name": "test",
 4 |     "label": "Test Dataset", 
 5 |     "description": "This is a test dataset",
 6 |     "currency": "EUR",
 7 |     "ckan_uri": "urn:/dev/null"
 8 |   },
 9 |   "mapping": {
10 |     "amount": {
11 |       "type": "measure",
12 |       "label": "Amount",
13 |       "datatype": "float",
14 |       "column": "AMOUNT"
15 |     },
16 |     "cofinance": {
17 |       "type": "measure",
18 |       "label": "Co-Financed Amount",
19 |       "datatype": "float",
20 |       "column": "COFIN"
21 |     },
22 |     "time": {
23 |       "type": "date",
24 |       "label": "Time of transaction",
25 |       "datatype": "date",
26 |       "column": "YEAR"
27 |     },
28 |     "transaction_id": {
29 |       "type": "value",
30 |       "label": "Transaction ID",
31 |       "datatype": "id",
32 |       "column": "TX"
33 |     },
34 |     "function": {
35 |       "type": "compound",
36 |       "key": true,
37 |       "label": "Function", 
38 |       "fields": [
39 |         {"name": "name", "datatype": "id", "column": "FUNCTION_ID"},
40 |         {"name": "label", "datatype": "string", "column": "FUNCTION"},
41 |         {"name": "description", "datatype": "string", "column": "FUNCTION_DESC"}
42 |       ]
43 |     },
44 |     "supplier": {
45 |       "type": "compound",
46 |       "label": "Supplier", 
47 |       "fields": [
48 |         {"name": "name", "datatype": "id", "column": "SUPPLIER_ID"},
49 |         {"name": "label", "datatype": "string", "column": "SUPPLIER"}
50 |       ]
51 |     }
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/spendb/default_settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | SECRET_KEY = 'foo'
 4 | DEBUG = True
 5 | 
 6 | SITE_TITLE = 'SpenDB'
 7 | 
 8 | ASSETS_PATH_PROD = 'https://dfmbkaoi4kurm.cloudfront.net/libs/spendb.ui/latest/build'
 9 | ASSETS_PATH_DEBUG = '/static/spendb.ui/build'
10 | 
11 | SQLALCHEMY_DATABASE_URI = 'postgresql://localhost/spendb'
12 | 
13 | BABEL_DEFAULT_LOCALE = 'en'
14 | 
15 | MAIL_SERVER = 'localhost'
16 | # MAIL_PORT = 25
17 | # MAIL_USE_TLS = False
18 | # MAIL_USE_SSL = False
19 | # MAIL_USERNAME = None
20 | # MAIL_PASSWORD = None
21 | MAIL_DEFAULT_SENDER = 'noreply@mapthemoney.org'
22 | 
23 | CACHE = False
24 | CACHE_TYPE = 'simple'
25 | 
26 | PREFERRED_URL_SCHEME = 'http'
27 | 
28 | ALEMBIC_DIR = os.path.join(os.path.dirname(__file__), 'migrate')
29 | ALEMBIC_DIR = os.path.abspath(ALEMBIC_DIR)
30 | 
31 | FLATPAGES_ROOT = os.path.join(os.path.dirname(__file__), '..', 'pages')
32 | FLATPAGES_ROOT = os.path.abspath(FLATPAGES_ROOT)
33 | 
34 | # Worker queue configuration.
35 | CELERY_BROKER_URL = 'amqp://guest:guest@localhost:5672//'
36 | 
37 | # If you set ``EAGER``, processing will happen inline.
38 | CELERY_ALWAYS_EAGER = False
39 | CELERY_TASK_SERIALIZER = 'json'
40 | CELERY_ACCEPT_CONTENT = ['json']
41 | 
42 | # CELERY_DEFAULT_QUEUE = 'loading'
43 | # CELERY_QUEUES = (
44 | #     Queue('indexing', Exchange('spendb'), routing_key='spendb'),
45 | #     Queue('loading', Exchange('spendb'), routing_key='spendb'),
46 | # )
47 | 
48 | # CELERY_ROUTES = {
49 | #     'spendb.tasks.load_from_url': {
50 | #         'queue': 'loading'
51 | #     },
52 | #     'spendb.tasks.index_dataset': {
53 | #         'queue': 'indexing'
54 | #     },
55 | # }
56 | 


--------------------------------------------------------------------------------
/spendb/validation/account.py:
--------------------------------------------------------------------------------
 1 | from colander import SchemaNode, SequenceSchema, Regex, String, Length
 2 | from colander import MappingSchema, Email, Boolean
 3 | 
 4 | from spendb.validation.common import Ref
 5 | 
 6 | REGISTER_NAME_RE = r"^[a-zA-Z0-9_\-]{3,255}$"
 7 | 
 8 | 
 9 | class AccountRef(Ref):
10 | 
11 |     def decode(self, cstruct):
12 |         from spendb.model import Account
13 |         if isinstance(cstruct, basestring):
14 |             return Account.by_name(cstruct)
15 |         if isinstance(cstruct, dict):
16 |             return self.decode(cstruct.get('name'))
17 |         return None
18 | 
19 | 
20 | class DatasetAccounts(SequenceSchema):
21 |     account = SchemaNode(AccountRef())
22 | 
23 | 
24 | class AccountRegister(MappingSchema):
25 |     name = SchemaNode(String(), validator=Regex(REGISTER_NAME_RE))
26 |     fullname = SchemaNode(String())
27 |     email = SchemaNode(String(), validator=Email())
28 |     public_email = SchemaNode(Boolean(), missing=False)
29 |     password1 = SchemaNode(String(), validator=Length(min=4))
30 |     password2 = SchemaNode(String(), validator=Length(min=4))
31 |     terms = SchemaNode(Boolean())
32 | 
33 | 
34 | class AccountSettings(MappingSchema):
35 |     fullname = SchemaNode(String())
36 |     email = SchemaNode(String(), validator=Email())
37 |     public_email = SchemaNode(Boolean(), missing=False)
38 |     twitter = SchemaNode(String(), missing=None,
39 |                          validator=Length(max=140))
40 |     public_twitter = SchemaNode(Boolean(), missing=False)
41 |     password1 = SchemaNode(String(), missing=None, default=None)
42 |     password2 = SchemaNode(String(), missing=None, default=None)
43 | 


--------------------------------------------------------------------------------
/spendb/validation/dataset.py:
--------------------------------------------------------------------------------
 1 | from colander import Schema, SchemaNode, String, Boolean, SequenceSchema
 2 | from colander import OneOf, Length, drop
 3 | from fiscalmodel import CURRENCIES, LANGUAGES
 4 | from fiscalmodel import COUNTRIES, CATEGORIES
 5 | 
 6 | from spendb.validation.common import dataset_name, prepare_name
 7 | from spendb.validation.account import AccountRef
 8 | 
 9 | 
10 | class DatasetLanguages(SequenceSchema):
11 |     language = SchemaNode(String(), validator=OneOf(LANGUAGES.keys()))
12 | 
13 | 
14 | class DatasetTerritories(SequenceSchema):
15 |     territory = SchemaNode(String(), validator=OneOf(COUNTRIES.keys()))
16 | 
17 | 
18 | class DatasetForm(Schema):
19 |     label = SchemaNode(String(), preparer=prepare_name,
20 |                        validator=Length(min=2))
21 |     name = SchemaNode(String(), preparer=prepare_name,
22 |                       validator=dataset_name)
23 |     description = SchemaNode(String(), missing=drop)
24 |     private = SchemaNode(Boolean(), missing=drop)
25 |     currency = SchemaNode(String(), missing=drop,
26 |                           validator=OneOf(CURRENCIES.keys()))
27 |     category = SchemaNode(String(), missing=drop,
28 |                           validator=OneOf(CATEGORIES.keys()))
29 |     languages = DatasetLanguages(missing=drop)
30 |     territories = DatasetTerritories(missing=drop)
31 | 
32 | 
33 | class Managers(SequenceSchema):
34 |     manager = SchemaNode(AccountRef())
35 | 
36 | 
37 | class ManagersForm(Schema):
38 |     managers = Managers(missing=[])
39 | 
40 | 
41 | def validate_dataset(data):
42 |     return DatasetForm().deserialize(data)
43 | 
44 | 
45 | def validate_managers(data):
46 |     return ManagersForm().deserialize(data)
47 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/quoting/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dimensions": {
 3 |         "entry_id": {
 4 |             "attributes": {
 5 |                 "entry_id": {
 6 |                     "column": "id",
 7 |                     "label": "ID"
 8 |                 }
 9 |             },
10 |             "key_attribute": "entry_id",
11 |             "label": "Entry ID"
12 |         },
13 |         "from": {
14 |             "attributes": {
15 |                 "label": {
16 |                     "column": "paid_by",
17 |                     "label": "Label"
18 |                 },
19 |                 "name": {
20 |                     "column": "paid_by",
21 |                     "label": "Name"
22 |                 }
23 |             },
24 |             "key_attribute": "name",
25 |             "label": "Spender"
26 |         },
27 |         "time": {
28 |             "attributes": {
29 |                 "year": {
30 |                     "column": "date",
31 |                     "label": "Year"
32 |                 }
33 |             },
34 |             "key_attribute": "year",
35 |             "label": "Time"
36 |         },
37 |         "to": {
38 |             "attributes": {
39 |                 "label": {
40 |                     "column": "paid_to",
41 |                     "label": "Label"
42 |                 },
43 |                 "name": {
44 |                     "column": "paid_to",
45 |                     "label": "Name"
46 |                 }
47 |             },
48 |             "key_attribute": "name",
49 |             "label": "Recipient"
50 |         }
51 |     },
52 |     "measures": {
53 |         "amount": {
54 |             "column": "amount",
55 |             "label": "Amount"
56 |         }
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/spendb/tests/views/api/test_run.py:
--------------------------------------------------------------------------------
 1 | from flask import url_for
 2 | 
 3 | from spendb.core import db
 4 | from spendb.model import Dataset
 5 | from spendb.tests.base import ControllerTestCase
 6 | from spendb.tests.helpers import load_fixture, make_account
 7 | from spendb.tests.helpers import data_fixture
 8 | 
 9 | 
10 | class TestRunApiController(ControllerTestCase):
11 | 
12 |     def setUp(self):
13 |         super(TestRunApiController, self).setUp()
14 |         self.cra = load_fixture('cra')
15 |         self.user = make_account('test')
16 |         self.auth_qs = {'api_key': self.user.api_key}
17 |         self.cra.managers.append(self.user)
18 |         db.session.commit()
19 |         url = url_for('sources_api.upload', dataset=self.cra.name)
20 |         fh = data_fixture('cra')
21 |         self.source = self.client.post(url, data={
22 |             'file': (fh, 'cra.csv')
23 |         }, query_string=self.auth_qs).json
24 | 
25 |     def test_runs_index(self):
26 |         url = url_for('runs_api.index', dataset=self.cra.name)
27 |         res = self.client.get(url)
28 |         assert res.json['total'] == 1, res.json
29 |         frst = res.json['results'][0]
30 |         assert frst['status'] == 'complete', frst
31 |         assert 'messages' not in frst, frst
32 | 
33 |     def test_runs_index_filter(self):
34 |         url = url_for('runs_api.index', dataset=self.cra.name, source='foo')
35 |         res = self.client.get(url)
36 |         assert res.json['total'] == 0, res.json
37 | 
38 |     def test_runs_view(self):
39 |         url = url_for('runs_api.view', dataset=self.cra.name, id=1)
40 |         res = self.client.get(url)
41 |         assert res.json['status'] == 'complete', res.json
42 |         assert len(res.json['messages']), res.json
43 | 


--------------------------------------------------------------------------------
/spendb/command/importer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | import urllib2
 4 | import urlparse
 5 | import json
 6 | 
 7 | from colander import Invalid
 8 | 
 9 | from spendb.model import Dataset
10 | from spendb.core import db
11 | from spendb.validation.model import validate_model
12 | 
13 | log = logging.getLogger(__name__)
14 | 
15 | 
16 | def _is_local_file(url):
17 |     """ Check to see if the provided url is a local file. """
18 |     parsed_result = urlparse.urlparse(url)
19 |     return parsed_result.scheme in ['', 'file']
20 | 
21 | 
22 | def json_of_url(url):
23 |     if _is_local_file(url):
24 |         url = url.replace('file://', '')
25 |         return json.load(open(url, 'r'))
26 |     else:
27 |         return json.load(urllib2.urlopen(url))
28 | 
29 | 
30 | def get_model(model):
31 |     """ Get and validate the model. If the model doesn't validate
32 |     we exit the program. """
33 |     model = json_of_url(model)
34 | 
35 |     # Validate the model
36 |     try:
37 |         log.info("Validating model")
38 |         model = validate_model(model)
39 |     except Invalid as i:
40 |         log.error("Errors occured during model validation:")
41 |         for field, error in i.asdict().items():
42 |             log.error("%s: %s", field, error)
43 |         sys.exit(1)
44 |     return model
45 | 
46 | 
47 | def get_or_create_dataset(model):
48 |     """ Based on a provided model we get the model (if it doesn't
49 |     exist we create it). """
50 |     dataset = Dataset.by_name(model['dataset']['name'])
51 | 
52 |     # If the dataset wasn't found we create it
53 |     if dataset is None:
54 |         dataset = Dataset(model)
55 |         db.session.add(dataset)
56 |         db.session.commit()
57 | 
58 |     log.info("Dataset: %s", dataset.name)
59 |     return dataset
60 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/simple/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dimensions": {
 3 |         "entry_id": {
 4 |             "attributes": {
 5 |                 "entry_id": {
 6 |                     "column": "id",
 7 |                     "type": "string"
 8 |                 }
 9 |             },
10 |             "label": "Unique transaction ID"
11 |         },
12 |         "from": {
13 |             "attributes": {
14 |                 "label": {
15 |                     "column": "paid_by",
16 |                     "type": "string"
17 |                 },
18 |                 "name": {
19 |                     "column": "paid_by",
20 |                     "type": "string"
21 |                 }
22 |             },
23 |             "description": "Payer",
24 |             "label": "Payer",
25 |             "type": "entity"
26 |         },
27 |         "time": {
28 |             "attributes": {
29 |                 "year": {
30 |                     "column": "date",
31 |                     "type": "integer"
32 |                 }
33 |             },
34 |             "label": "Time"
35 |         },
36 |         "to": {
37 |             "attributes": {
38 |                 "label": {
39 |                     "column": "paid_to",
40 |                     "datatype": "string"
41 |                 },
42 |                 "name": {
43 |                     "column": "paid_to",
44 |                     "datatype": "id"
45 |                 }
46 |             },
47 |             "description": "Payee",
48 |             "label": "Payee",
49 |             "type": "entity"
50 |         }
51 |     },
52 |     "measures": {
53 |         "amount": {
54 |             "column": "amount",
55 |             "description": "Amount",
56 |             "label": "Amount",
57 |             "type": "number"
58 |         }
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS:
--------------------------------------------------------------------------------
 1 | Alberto Rodriguez Peon <alberto.rodriguez.peon@cern.ch>
 2 | Alistair Turnbull <apt1002@mupsych.org>
 3 | Andrew Suffield <asuffield@suffields.me.uk>
 4 | Andy Lulham <a.lulham@gmail.com>
 5 | Anna Powell Smith <annapowellsmith@gmail.com>
 6 | Carsten Senger <senger@rehfisch.de>
 7 | Colin O'Neill <faokryn@gmail.com>
 8 | David Jones <drj@ravenbrook.com>
 9 | Deon Bredenhann <deon@yola.com>
10 | Friedrich Lindenberg <friedrich@pudo.org>
11 | garethpdx
12 | Gregor Aisch <contact@vis4.net>
13 | Helen ST <helenst@gmail.com>
14 | Jake Madison <jakemadison@gmail.com>
15 | John Wasack <johnnydiabetic@gmail.com>
16 | Jorge C. Leitão <jorgecarleitao@gmail.com>
17 | Justin Duke <dukerson@gmail.com>
18 | Kristian Glass <git@doismellburning.co.uk>
19 | Kristján Oddsson <koddsson@gmail.com>
20 | Martin Keegan <martin.keegan@okfn.org>
21 | Michael Bauer <mihi@lo-res.org>
22 | Nathan Hilbert <nathanhilbert@gmail.com>
23 | Nick Stenning <nick@whiteink.com>
24 | Nigel Babu <nigelbabu@gmail.com>
25 | Randal Moore <rmoore@linkedin.com>
26 | Rufus Pollock <rufus.pollock@okfn.org>
27 | Sander van der Waal <sander.vanderwaal@okfn.org>
28 | Stefan Wehrmeyer <mail@stefanwehrmeyer.com>
29 | Takashi Nishibayashi <takashi.nishibayashi@gmail.com>
30 | Telmo Brugnara <tbrugz@gmail.com>
31 | Tony Hirst <tony.hirst@gmail.com>
32 | Tryggvi Björgvinsson <tryggvi.bjorgvinsson@okfn.org>
33 | Vitor Baptista <vitor@vitorbaptista.com>
34 | 
35 | 
36 | Noun Project graphics used: 
37 | 
38 | Bank by Till Teenck from the Noun Project
39 | accounting by Kevin Augustine LO from the Noun Project
40 | Money by Nate Eul from the Noun Project
41 | finance by Vladislav  Sergeev from the Noun Project
42 | Planning by Ivan Colic from the Noun Project
43 | Bank by anbileru adaleru from the Noun Project
44 | George Washington by Leonardo Schneider from the Noun Project
45 | bar graph by Anusha Narvekar from the Noun Project
46 | 
47 | 


--------------------------------------------------------------------------------
/spendb/model/facets.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from sqlalchemy.sql.expression import select, func
 4 | 
 5 | from spendb.core import db
 6 | 
 7 | 
 8 | class DatasetFacetMixin(object):
 9 | 
10 |     @classmethod
11 |     def dataset_counts(cls, datasets_q):
12 |         sq = datasets_q.subquery()
13 |         q = select([cls.code, func.count(cls.dataset_id)],
14 |                    group_by=cls.code,
15 |                    order_by=func.count(cls.dataset_id).desc())
16 |         q = q.where(cls.dataset_id == sq.c.id)
17 |         return db.session.bind.execute(q).fetchall()
18 | 
19 | 
20 | class DatasetLanguage(db.Model, DatasetFacetMixin):
21 |     __tablename__ = 'dataset_language'
22 | 
23 |     id = db.Column(db.Integer, primary_key=True)
24 |     code = db.Column(db.Unicode)
25 |     created_at = db.Column(db.DateTime, default=datetime.utcnow)
26 |     updated_at = db.Column(db.DateTime, onupdate=datetime.utcnow)
27 | 
28 |     dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id'))
29 |     dataset = db.relationship('Dataset', backref=db.backref('_languages',
30 |                                                             lazy=False))
31 | 
32 |     def __init__(self, code):
33 |         self.code = code
34 | 
35 | 
36 | class DatasetTerritory(db.Model, DatasetFacetMixin):
37 |     __tablename__ = 'dataset_territory'
38 | 
39 |     id = db.Column(db.Integer, primary_key=True)
40 |     code = db.Column(db.Unicode)
41 |     created_at = db.Column(db.DateTime, default=datetime.utcnow)
42 |     updated_at = db.Column(db.DateTime, onupdate=datetime.utcnow)
43 | 
44 |     dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id'))
45 |     dataset = db.relationship('Dataset', backref=db.backref('_territories',
46 |                                                             lazy=False))
47 | 
48 |     def __init__(self, code):
49 |         self.code = code
50 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/empty_additional_date/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dimensions": {
 3 |         "additionaldate": {
 4 |             "attributes": {
 5 |                 "year": {
 6 |                     "column": "additional_date",
 7 |                     "type": "integer"
 8 |                 }
 9 |             },
10 |             "label": "Additional Date"
11 |         },
12 |         "entry_id": {
13 |             "attributes": {
14 |                 "entry_id": {
15 |                     "column": "id",
16 |                     "type": "string"
17 |                 }
18 |             },
19 |             "label": "Entry ID"
20 |         },
21 |         "from": {
22 |             "attributes": {
23 |                 "label": {
24 |                     "column": "paid_by",
25 |                     "type": "string"
26 |                 },
27 |                 "name": {
28 |                     "column": "paid_by",
29 |                     "type": "string"
30 |                 }
31 |             },
32 |             "label": "Spender"
33 |         },
34 |         "time": {
35 |             "attributes": {
36 |                 "year": {
37 |                     "column": "date",
38 |                     "type": "integer"
39 |                 }
40 |             },
41 |             "label": "Time"
42 |         },
43 |         "to": {
44 |             "attributes": {
45 |                 "label": {
46 |                     "column": "paid_to",
47 |                     "type": "string"
48 |                 },
49 |                 "name": {
50 |                     "column": "paid_to",
51 |                     "type": "string"
52 |                 }
53 |             },
54 |             "label": "Recipient"
55 |         }
56 |     },
57 |     "measures": {
58 |         "amount": {
59 |             "column": "amount",
60 |             "type": "number",
61 |             "label": "Amount"
62 |         }
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SpenDB
 2 | 
 3 | [![Build Status](https://travis-ci.org/spendb/spendb.png?branch=master)](https://travis-ci.org/spendb/spendb)
 4 | [![Coverage Status](https://coveralls.io/repos/spendb/spendb/badge.svg)](https://coveralls.io/r/spendb/spendb)
 5 | [![Join the chat at https://gitter.im/pudo/spendb](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pudo/spendb?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 6 | 
 7 | SpenDB is a project to make government finances easier to explore and understand. It started out as "Where does my money go", a platform to visualize the United Kingdom's state finance, but has been renamed and restructured to allow arbitrary financial data to be loaded and displayed.
 8 | 
 9 | * Documentation is located in the [GitHub Wiki](https://github.com/spendb/spendb/wiki).
10 | 	* [Conceptual overview](https://github.com/spendb/spendb/wiki/Conceptual-overview).
11 | 	* [Using the web API](https://github.com/spendb/spendb/wiki/Web-API).
12 | 	* [Developer installation](https://github.com/pudo/spendb/wiki/Developer-installation).
13 | * Please report any [issues and feature ideas](https://github.com/spendb/spendb/issues) or browse the issue tracker for tickets to start contributing.
14 | * Related codebases:
15 | 	* [fiscalmodel](https://github.com/spendb/fiscalmodel), metadata used for budget dataset classification in spendb.
16 | 	* [babbage.ui](https://github.com/spendb/babbage.ui), front-end data analysis and visualisation library.
17 | 	* [babbage](https://github.com/spendb/babbage), data analysis API and analytical domain model.
18 | 	* [cubes](https://github.com/DataBrewery/cubes), data analysis API (deprecated)
19 | 
20 | 
21 | ## Licensing
22 | 
23 | SpenDB's code is licensed under the GNU Affero Licence except where otherwise indicated. A copy of this licence is available in the file ``LICENSE``.
24 | 
25 | This application is based on the Open Knowledge Foundation's OpenSpending platform.
26 | 


--------------------------------------------------------------------------------
/spendb/etl/tasks.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from archivekit import Source
 4 | 
 5 | from spendb.core import db
 6 | from spendb.etl.job import job
 7 | from spendb.etl.extract import validate_table, load_table
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | @job(operation='Import from file')
13 | def extract_fileobj(job, dataset, fh, file_name=None, mime_type=None):
14 |     """ Upload contents of an opened fh to the data repository. """
15 |     meta = {'source_file': file_name}
16 |     if mime_type is not None:
17 |         meta['mime_type'] = mime_type
18 |     source = job.package.ingest(fh, meta=meta, overwrite=False)
19 |     source.save()
20 |     job.set_source(source)
21 |     return source
22 | 
23 | 
24 | @job(operation='Import from URL')
25 | def extract_url(job, dataset, url):
26 |     """ Upload contents of a URL to the data repository. """
27 |     source = job.package.ingest(url, overwrite=False)
28 |     if source is None:
29 |         return
30 |     source.save()
31 |     job.set_source(source)
32 |     return source
33 | 
34 | 
35 | @job(operation='Clean up source data')
36 | def transform_source(job, dataset, source_name):
37 |     """ Transform the contents of an uploaded source dataset to a
38 |     well-understood file format. """
39 |     source = Source(job.package, source_name)
40 |     job.set_source(source)
41 |     source = validate_table(source)
42 |     if source.meta.get('num_failed') > 0:
43 |         return job.failed()
44 |     return source
45 | 
46 | 
47 | @job(operation='Load to database')
48 | def load(job, dataset, source_name):
49 |     """ Load the table artifact for this dataset into the fact
50 |     table. """
51 |     source = Source(job.package, source_name)
52 |     job.set_source(source)
53 |     dataset.data = {}
54 |     dataset.fields = source.meta.get('fields', {})
55 |     if not len(dataset.fields):
56 |         raise ValueError('No columns recognized in source data.')
57 | 
58 |     db.session.commit()
59 |     dataset.fact_table.drop()
60 |     dataset.fact_table.create()
61 |     dataset.fact_table.load_iter(load_table(source))
62 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/meta/simple.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dataset": {
 3 |         "description": "I'm a banana!",
 4 |         "label": "Test Case Model",
 5 |         "name": "test"
 6 |     },
 7 |     "model": {
 8 |         "dimensions": {
 9 |             "field": {
10 |                 "attributes": {
11 |                     "field": {
12 |                         "column": "field",
13 |                         "label": "Field"
14 |                     }
15 |                 },
16 |                 "key_attribute": "field",
17 |                 "label": "Field 1"
18 |             },
19 |             "function": {
20 |                 "attributes": {
21 |                     "label": {
22 |                         "column": "func_label",
23 |                         "label": "Label"
24 |                     },
25 |                     "name": {
26 |                         "column": "func_name",
27 |                         "label": "Name"
28 |                     }
29 |                 },
30 |                 "key_attribute": "name",
31 |                 "label_attribute": "label",
32 |                 "label": "Function code"
33 |             },
34 |             "time": {
35 |                 "attributes": {
36 |                     "year": {
37 |                         "column": "year",
38 |                         "label": "Year"
39 |                     }
40 |                 },
41 |                 "key_attribute": "year",
42 |                 "label": "Year"
43 |             },
44 |             "to": {
45 |                 "attributes": {
46 |                     "label": {
47 |                         "column": "to_label",
48 |                         "label": "Label"
49 |                     },
50 |                     "name": {
51 |                         "column": "to_name",
52 |                         "label": "Name"
53 |                     }
54 |                 },
55 |                 "key_attribute": "name",
56 |                 "label_attribute": "label",
57 |                 "label": "Einzelplan"
58 |             }
59 |         },
60 |         "measures": {
61 |             "amount": {
62 |                 "column": "amount",
63 |                 "label": "Amount"
64 |             }
65 |         }
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/spendb/model/run.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from sqlalchemy.orm import relationship, backref
 4 | from sqlalchemy.schema import Column, ForeignKey
 5 | from sqlalchemy.types import Integer, Unicode, DateTime
 6 | 
 7 | from spendb.core import db, url_for
 8 | from spendb.model.dataset import Dataset
 9 | 
10 | 
11 | class Run(db.Model):
12 |     """ A run is a generic grouping object for background operations
13 |     that perform logging to the frontend. """
14 |     __tablename__ = 'run'
15 | 
16 |     # Status values
17 |     STATUS_RUNNING = 'running'
18 |     STATUS_COMPLETE = 'complete'
19 |     STATUS_FAILED = 'failed'
20 | 
21 |     id = Column(Integer, primary_key=True)
22 |     operation = Column(Unicode())
23 |     status = Column(Unicode())
24 |     source = Column(Unicode())
25 |     time_start = Column(DateTime, default=datetime.utcnow)
26 |     time_end = Column(DateTime)
27 | 
28 |     dataset_id = Column(Integer, ForeignKey('dataset.id'), nullable=True)
29 |     dataset = relationship(Dataset,
30 |                            backref=backref('runs',
31 |                                            order_by='Run.time_start.desc()',
32 |                                            lazy='dynamic'))
33 | 
34 |     def __init__(self, operation, status, dataset):
35 |         self.operation = operation
36 |         self.status = status
37 |         self.dataset = dataset
38 | 
39 |     def to_dict(self):
40 |         return {
41 |             'id': self.id,
42 |             'api_url': url_for('runs_api.view', dataset=self.dataset.name,
43 |                                id=self.id),
44 |             'operation': self.operation,
45 |             'status': self.status,
46 |             'source': self.source,
47 |             'time_start': self.time_start,
48 |             'time_end': self.time_end
49 |         }
50 | 
51 |     @classmethod
52 |     def all(cls, dataset):
53 |         q = db.session.query(cls).filter_by(dataset=dataset)
54 |         return q.order_by(cls.time_start.asc())
55 | 
56 |     @classmethod
57 |     def by_id(cls, dataset, id):
58 |         return cls.all(dataset).filter_by(id=id).first()
59 | 
60 |     def __repr__(self):
61 |         return "<Run(%r, %r, %r)>" % (self.source, self.id, self.status)
62 | 


--------------------------------------------------------------------------------
/spendb/views/__init__.py:
--------------------------------------------------------------------------------
 1 | from cubes.server import slicer
 2 | from colander import Invalid
 3 | from jsonschema import ValidationError
 4 | from babbage import api as babbage_api
 5 | 
 6 | from spendb.model.manager import SpendingCubeManager
 7 | from spendb.views.context import home, get_locale
 8 | from spendb.views.error import NotModified, handle_not_modified
 9 | from spendb.views.error import handle_error, handle_invalid
10 | from spendb.views.error import handle_validation_error
11 | from spendb.views.api.dataset import blueprint as datasets_api
12 | from spendb.views.api.meta import blueprint as meta_api
13 | from spendb.views.api.session import blueprint as session_api
14 | from spendb.views.api.source import blueprint as source_api
15 | from spendb.views.api.run import blueprint as run_api
16 | from spendb.views.api.account import blueprint as account_api
17 | 
18 | 
19 | def register_views(app, babel):
20 |     babel.locale_selector_func = get_locale
21 | 
22 |     app.register_blueprint(meta_api, url_prefix='/api/3')
23 |     app.register_blueprint(session_api, url_prefix='/api/3')
24 |     app.register_blueprint(run_api, url_prefix='/api/3')
25 |     app.register_blueprint(source_api, url_prefix='/api/3')
26 |     app.register_blueprint(datasets_api, url_prefix='/api/3')
27 |     app.register_blueprint(account_api, url_prefix='/api/3')
28 | 
29 |     # expose ``babbage``:
30 |     babbage_api.configure_api(app, SpendingCubeManager())
31 |     app.register_blueprint(babbage_api.blueprint, url_prefix='/api/babbage')
32 | 
33 |     # expose ``cubes``:
34 |     app.register_blueprint(slicer, url_prefix='/api/slicer', config={})
35 | 
36 |     app.register_blueprint(home)
37 | 
38 |     app.error_handler_spec[None][400] = handle_error
39 |     app.error_handler_spec[None][401] = handle_error
40 |     app.error_handler_spec[None][402] = handle_error
41 |     app.error_handler_spec[None][403] = handle_error
42 |     app.error_handler_spec[None][404] = handle_error
43 |     app.error_handler_spec[None][500] = handle_error
44 | 
45 |     custom = (
46 |         (Invalid, handle_invalid),
47 |         (ValidationError, handle_validation_error),
48 |         (NotModified, handle_not_modified)
49 |     )
50 |     app.error_handler_spec[None][None] = custom
51 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | 
 4 | 
 5 | PKG_ROOT = os.path.abspath(os.__file__)
 6 | 
 7 | 
 8 | def files_in_pkgdir(pkg, dirname):
 9 |     pkgdir = os.path.join(PKG_ROOT, *pkg.split('.'))
10 |     walkdir = os.path.join(pkgdir, dirname)
11 |     walkfiles = []
12 |     for dirpath, _, files in os.walk(walkdir):
13 |         fpaths = (os.path.relpath(os.path.join(dirpath, f), pkgdir)
14 |                   for f in files)
15 |         walkfiles += fpaths
16 |     return walkfiles
17 | 
18 | try:
19 |     import spendb
20 |     release = spendb.__version__
21 | except:
22 |     release = 'dev'
23 | 
24 | 
25 | def package_filter(pkg):
26 |     """
27 |     Filter packages so that we exclude test cases but include regular test
28 |     objects available in spendb.tests' modules (all test cases are
29 |     in subdirectories).
30 |     """
31 | 
32 |     # We want to include spendb.tests but not its subpackages
33 |     # Hence we only check for things starting with spendb.tests.
34 |     # (note the trailing period to denote subpackages)
35 |     return not pkg.startswith('spendb.tests.')
36 | 
37 | setup(
38 |     name='spendb',
39 |     version=release,
40 |     description='SpenDB',
41 |     author='Friedrich Lindenberg (formerly OKFN)',
42 |     author_email='friedrich@pudo.org',
43 |     url='http://github.com/spendb/spendb',
44 |     install_requires=[],
45 |     setup_requires=[],
46 |     packages=filter(package_filter, find_packages()),
47 |     namespace_packages=['spendb'],
48 |     package_data={
49 |         'spendb': (
50 |             files_in_pkgdir('spendb', 'static') +
51 |             files_in_pkgdir('spendb', 'templates')
52 |         )
53 |     },
54 |     test_suite='nose.collector',
55 |     zip_safe=False,
56 |     entry_points={
57 |         'console_scripts': [
58 |             'spendb = spendb.command:main'
59 |         ],
60 |         'cubes.providers': [
61 |             'spending = spendb.model.provider:SpendingModelProvider'
62 |         ],
63 |         'cubes.stores': [
64 |             'spending = spendb.model.provider:SpendingStore'
65 |         ]
66 |     },
67 |     message_extractors={
68 |         'spendb': [('**.py', 'python', None),
69 |                    ('templates/**.html', 'jinja2', None),
70 |                    ('static/**', 'ignore', None)]
71 |     },
72 | )
73 | 


--------------------------------------------------------------------------------
/spendb/tests/validation/test_dataset.py:
--------------------------------------------------------------------------------
 1 | from colander import Invalid
 2 | from nose.tools import raises
 3 | 
 4 | from spendb.validation.dataset import validate_dataset
 5 | 
 6 | from spendb.tests.base import TestCase
 7 | from spendb.tests.helpers import validation_fixture
 8 | 
 9 | 
10 | class TestDataset(TestCase):
11 | 
12 |     def setUp(self):
13 |         super(TestDataset, self).setUp()
14 |         self.model = validation_fixture('default')
15 | 
16 |     def test_basic_validate(self):
17 |         try:
18 |             ds = self.model['dataset']
19 |             out = validate_dataset(ds)
20 |             assert sorted(out.keys()) == sorted(ds.keys()), [out, ds]
21 |         except Invalid, i:
22 |             assert False, i.asdict()
23 | 
24 |     @raises(Invalid)
25 |     def test_underscore_validate(self):
26 |         ds = self.model['dataset'].copy()
27 |         ds['name'] = 'test__'
28 |         validate_dataset(ds)
29 | 
30 |     @raises(Invalid)
31 |     def test_reserved_name_validate(self):
32 |         ds = self.model['dataset'].copy()
33 |         ds['name'] = 'entRY'
34 |         validate_dataset(ds)
35 | 
36 |     @raises(Invalid)
37 |     def test_invalid_currency(self):
38 |         ds = self.model['dataset'].copy()
39 |         ds['currency'] = 'glass pearls'
40 |         validate_dataset(ds)
41 | 
42 |     @raises(Invalid)
43 |     def test_invalid_category(self):
44 |         ds = self.model['dataset'].copy()
45 |         ds['category'] = 'giraffes'
46 |         validate_dataset(ds)
47 | 
48 |     @raises(Invalid)
49 |     def test_invalid_language(self):
50 |         ds = self.model['dataset'].copy()
51 |         ds['languages'].append('esperanto')
52 |         validate_dataset(ds)
53 | 
54 |     @raises(Invalid)
55 |     def test_invalid_country(self):
56 |         ds = self.model['dataset'].copy()
57 |         ds['territories'].append('SU')
58 |         validate_dataset(ds)
59 | 
60 |     @raises(Invalid)
61 |     def test_no_label(self):
62 |         ds = self.model['dataset'].copy()
63 |         del ds['label']
64 |         validate_dataset(ds)
65 | 
66 |     @raises(Invalid)
67 |     def test_empty_label(self):
68 |         ds = self.model['dataset'].copy()
69 |         ds['label'] = '   '
70 |         validate_dataset(ds)
71 | 
72 |     def test_no_description(self):
73 |         ds = self.model['dataset'].copy()
74 |         del ds['description']
75 |         validate_dataset(ds)
76 | 


--------------------------------------------------------------------------------
/spendb/validation/common.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from colander import Function, All, Length, null, Invalid
 3 | 
 4 | RESERVED_TERMS = ['entry', 'entries', 'dataset', 'datasets', 'dimension',
 5 |                   'dimensions', 'editor', 'meta', 'id', 'login', 'logout',
 6 |                   'settings', 'browser', 'explorer', 'member', 'register',
 7 |                   'after_login', 'after_logout', 'locale', 'reporterror',
 8 |                   'getinvolved', 'api', '500', 'error', 'url', 'model',
 9 |                   'distinct', 'views', 'new']
10 | 
11 | 
12 | def _dataset_name(name):
13 |     """ These are names that have a special meaning in URLs and
14 |     cannot be used for dataset names. """
15 |     if name is not None and name.lower() in RESERVED_TERMS:
16 |         return "'%s' is a reserved word and cannot be used here" % name
17 |     if not re.match(r"^\w[\w\_\-]+$", name):
18 |         return ("Name must include only "
19 |                 "letters, numbers, dashes and underscores")
20 |     if '__' in name:
21 |         return "Double underscores are not allowed in dataset names."
22 |     return True
23 | 
24 | 
25 | dataset_name = All(Length(min=2, max=30), Function(_dataset_name))
26 | 
27 | 
28 | def _field_name(name):
29 |     """ These are names that have a special meaning in URLs and
30 |     cannot be used for dataset names. """
31 |     if not re.match(r"^\w[\w\_]+$", name):
32 |         return ("Name must include only letters, numbers and underscores")
33 |     if '__' in name:
34 |         return "Double underscores are not allowed in field names."
35 |     return True
36 | 
37 | 
38 | field_name = All(Length(min=2, max=60), Function(_field_name))
39 | 
40 | 
41 | def prepare_name(name):
42 |     """ Convert a given value to a name. """
43 |     if name is None or name is null:
44 |         return ''
45 |     return unicode(name).strip()
46 | 
47 | 
48 | def require_one_child(data):
49 |     if isinstance(data, dict) and len(data.keys()):
50 |         return True
51 |     return "Must have at least one dimension and one measure."
52 | 
53 | 
54 | class Ref(object):
55 | 
56 |     def deserialize(self, node, cstruct):
57 |         if cstruct is null:
58 |             return null
59 |         value = self.decode(cstruct)
60 |         if value is None:
61 |             raise Invalid(node, 'Missing')
62 |         return value
63 | 
64 |     def cstruct_children(self, node, cstruct):
65 |         return []
66 | 


--------------------------------------------------------------------------------
/spendb/command/__init__.py:
--------------------------------------------------------------------------------
 1 | ''' Interface to common administrative tasks for SpenDB. '''
 2 | import logging
 3 | from flask.ext.script import Manager
 4 | from flask.ext.migrate import MigrateCommand
 5 | 
 6 | from spendb.core import create_web_app
 7 | from spendb.tasks import load_from_url
 8 | from spendb.command import db
 9 | from spendb.command.importer import get_or_create_dataset, get_model
10 | 
11 | log = logging.getLogger(__name__.split('.')[0])
12 | app = create_web_app()
13 | manager = Manager(app, description=__doc__)
14 | 
15 | manager.add_command('db', db.manager)
16 | manager.add_command('alembic', MigrateCommand)
17 | 
18 | 
19 | @manager.command
20 | def grantadmin(username):
21 |     """ Grant admin privileges to given user """
22 |     from spendb.model import meta as db
23 |     from spendb.model.account import Account
24 | 
25 |     account = Account.by_name(username)
26 |     if account is None:
27 |         raise Exception("Account `%s` not found." % username)
28 | 
29 |     account.admin = True
30 |     db.session.add(account)
31 |     db.session.commit()
32 | 
33 | 
34 | @manager.option('-n', '--dry-run', dest='dry_run', action='store_true',
35 |                 help="Perform a dry run, don't load any data.")
36 | @manager.option('-i', '--index', dest='build_indices', action='store_true',
37 |                 help="Suppress Solr index build.")
38 | @manager.option('--max-lines', action="store", dest='max_lines', type=int,
39 |                 default=None, metavar='N',
40 |                 help="Number of lines to import.")
41 | @manager.option('--raise-on-error', action="store_true",
42 |                 dest='raise_errors', default=False,
43 |                 help='Get full traceback on first error.')
44 | @manager.option('--model', action="store", dest='model',
45 |                 default=None, metavar='url', required=True,
46 |                 help="URL of JSON format model (metadata and mapping).")
47 | @manager.option('--visualisations', action="store", dest="views",
48 |                 default=None, metavar='url/file',
49 |                 help="URL/file of JSON format visualisations.")
50 | @manager.option('data_url', help="Data file URL")
51 | @manager.command
52 | def csvimport(**args):
53 |     """ Load a CSV dataset """
54 |     model = get_model(args['model'])
55 |     dataset = get_or_create_dataset(model)
56 |     load_from_url(dataset, args['data_url'])
57 | 
58 | 
59 | def main():
60 |     manager.run()
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/mexico/data.csv:
--------------------------------------------------------------------------------
1 | id,RAMO,TPP,GPP,IPP,PP,UR,GF,FUNC,SF,AI,TG,FF,OG,IMPORTE PEF,DATE,TO,FROM
2 | 1,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,K Proyectos de Inversion,025 Proyectos de inmuebles (oficinas administrativas),200 H. Camara de Senadores,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto de obra publica,Recursos fiscales,6200 Obra publica en bienes propios,580000000,2011-01-01,Society
3 | 2,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,K Proyectos de Inversion,027 Mantenimiento de Infraestructura,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto de obra publica,Recursos fiscales,6200 Obra publica en bienes propios,144000000,2011-01-01,Society
4 | 3,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1100 Remuneraciones al personal de caracter permanente,898000000,2011-01-01,Society
5 | 4,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1200 Remuneraciones al personal de caracter transitorio,431000000,2011-01-01,Society
6 | 5,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1300 Remuneraciones adicionales y especiales,358000000,2011-01-01,Society
7 | 6,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1400 Seguridad social,187000000,2011-01-01,Society
8 | 7,01 Poder Legislativo,1 Programas Federales,2 Desempeno de las Funciones,R Especificos,001 Actividades derivadas del trabajo legislativo,100 H. Camara de Diputados,Gobierno,Legislacion,Legislacion,Llevar a cabo el proceso Legislativo,Gasto corriente,Recursos fiscales,1500 Otras prestaciones sociales y economicas,883000000,2011-01-01,Society
9 | 


--------------------------------------------------------------------------------
/spendb/views/context.py:
--------------------------------------------------------------------------------
 1 | from flask import current_app, request, session
 2 | from flask.ext.login import current_user
 3 | from babel import Locale
 4 | from apikit import cache_hash
 5 | 
 6 | from spendb import __version__
 7 | from spendb.core import babel
 8 | from spendb.views.error import NotModified
 9 | from spendb.views.home import blueprint as home
10 | 
11 | 
12 | def get_locale():
13 |     if 'locale' in session:
14 |         return Locale.parse(session.get('locale'))
15 |     else:
16 |         requested = request.accept_languages.values()
17 |         requested = [l.replace('-', '_') for l in requested]
18 |         available = map(unicode, babel.list_translations())
19 |         return Locale.negotiate(available, requested)
20 | 
21 | 
22 | @home.before_app_request
23 | def before_request():
24 |     current_app.cubes_workspace.flush_lookup_cache()
25 |     request._http_etag = None
26 |     request._http_private = False
27 | 
28 | 
29 | @home.after_app_request
30 | def after_request(resp):
31 |     resp.headers['Server'] = 'SpenDB/%s' % __version__
32 | 
33 |     if resp.is_streamed and request.endpoint != 'static':
34 |         # http://wiki.nginx.org/X-accel#X-Accel-Buffering
35 |         resp.headers['X-Accel-Buffering'] = 'no'
36 | 
37 |     # skip cache under these conditions:
38 |     if not current_app.config.get('CACHE') \
39 |             or request.method not in ['GET', 'HEAD', 'OPTIONS'] \
40 |             or resp.status_code > 399:
41 |         resp.cache_control.no_cache = True
42 |         return resp
43 | 
44 |     if request.endpoint == 'static':
45 |         resp.cache_control.max_age = 3600 * 6
46 |         resp.cache_control.public = True
47 | 
48 |     if request._http_etag:
49 |         if not request._http_private:
50 |             resp.cache_control.public = True
51 |         else:
52 |             resp.cache_control.private = True
53 |         resp.cache_control.max_age = 3600 * 6
54 |         resp.cache_control.must_revalidate = True
55 |         resp.set_etag(request._http_etag)
56 | 
57 |     return resp
58 | 
59 | 
60 | def etag_cache_keygen(key_obj, private=False):
61 |     request._http_private = private
62 | 
63 |     args = sorted(set(request.args.items()))
64 |     # jquery where is your god now?!?
65 |     args = filter(lambda (k, v): k != '_', args)
66 | 
67 |     request._http_etag = cache_hash(args, current_user,
68 |                                     key_obj, get_locale())
69 |     if request.if_none_match == request._http_etag:
70 |         raise NotModified()
71 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/lbhf/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dimensions": {
 3 |         "entry_id": {
 4 |             "attributes": {
 5 |                 "entry_id": {
 6 |                     "column": "id",
 7 |                     "label": "string"
 8 |                 }
 9 |             },
10 |             "key_attribute": "entry_id",
11 |             "label": "Entry ID"
12 |         },
13 |         "from": {
14 |             "attributes": {
15 |                 "label": {
16 |                     "column": "paid_by",
17 |                     "label": "string"
18 |                 },
19 |                 "name": {
20 |                     "column": "paid_by",
21 |                     "label": "string"
22 |                 }
23 |             },
24 |             "key_attribute": "name",
25 |             "description": "Payer",
26 |             "label": "Payer"
27 |         },
28 |         "spendingarea": {
29 |             "attributes": {
30 |                 "spendingarea": {
31 |                     "column": "spending_area",
32 |                     "label": "string"
33 |                 }
34 |             },
35 |             "key_attribute": "spendingarea",
36 |             "description": "Spending Area",
37 |             "label": "Spending Area"
38 |         },
39 |         "time": {
40 |             "attributes": {
41 |                 "year": {
42 |                     "column": "date",
43 |                     "label": "Year"
44 |                 }
45 |             },
46 |             "key_attribute": "year",
47 |             "label": "Time"
48 |         },
49 |         "to": {
50 |             "attributes": {
51 |                 "label": {
52 |                     "column": "paid_to",
53 |                     "label": "Label"
54 |                 },
55 |                 "name": {
56 |                     "column": "paid_to",
57 |                     "label": "Name"
58 |                 }
59 |             },
60 |             "key_attribute": "name",
61 |             "description": "Payee",
62 |             "label": "Payee"
63 |         },
64 |         "transactionid": {
65 |             "attributes": {
66 |                 "transactionid": {
67 |                     "column": "transaction_id",
68 |                     "label": "ID"
69 |                 }
70 |             },
71 |             "key_attribute": "transactionid",
72 |             "description": "Reference",
73 |             "label": "Reference"
74 |         }
75 |     },
76 |     "measures": {
77 |         "amount": {
78 |             "column": "amount",
79 |             "label": "Amount"
80 |         }
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/sample/model.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dimensions": {
 3 |         "entry_id": {
 4 |             "attributes": {
 5 |                 "entry_id": {
 6 |                     "column": "id",
 7 |                     "label": "ID"
 8 |                 }
 9 |             },
10 |             "key_attribute": "entry_id",
11 |             "label": "Unique transaction ID"
12 |         },
13 |         "from": {
14 |             "attributes": {
15 |                 "label": {
16 |                     "column": "paid_by",
17 |                     "label": "Label"
18 |                 },
19 |                 "name": {
20 |                     "column": "paid_by",
21 |                     "label": "name"
22 |                 }
23 |             },
24 |             "key_attribute": "name",
25 |             "description": "Payer",
26 |             "label": "Payer"
27 |         },
28 |         "spendingarea": {
29 |             "attributes": {
30 |                 "spendingarea": {
31 |                     "column": "spending_area",
32 |                     "label": "string"
33 |                 }
34 |             },
35 |             "key_attribute": "spendingarea",
36 |             "description": "Spending Area",
37 |             "label": "Spending Area"
38 |         },
39 |         "time": {
40 |             "attributes": {
41 |                 "year": {
42 |                     "column": "date",
43 |                     "label": "integer"
44 |                 }
45 |             },
46 |             "key_attribute": "year",
47 |             "label": "Time"
48 |         },
49 |         "to": {
50 |             "attributes": {
51 |                 "label": {
52 |                     "column": "paid_to",
53 |                     "label": "Label"
54 |                 },
55 |                 "name": {
56 |                     "column": "paid_to",
57 |                     "label": "Name"
58 |                 }
59 |             },
60 |             "key_attribute": "name",
61 |             "description": "Payee",
62 |             "label": "Payee"
63 |         },
64 |         "transactionid": {
65 |             "attributes": {
66 |                 "transactionid": {
67 |                     "column": "transaction_id",
68 |                     "label": "string"
69 |                 }
70 |             },
71 |             "key_attribute": "transactionid",
72 |             "description": "Reference",
73 |             "label": "Reference"
74 |         }
75 |     },
76 |     "measures": {
77 |         "amount": {
78 |             "column": "amount",
79 |             "label": "Amount"
80 |         }
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/contrib/assets/noun_161002_cc.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 195 243.75" enable-background="new 0 0 195 195" xml:space="preserve"><g><path d="M53,118.7c27.9,0,50.4-22.7,50.4-50.4c0-0.9,0-2.1-0.2-3l17.1-19.5l23,21.1c0.5,0.5,0.9,0.7,1.6,0.5c0.5,0,1.2-0.2,1.4-0.7   l25.1-27.9l5.6,5.2c0.5,0.5,0.9,0.5,1.4,0.5c0.2,0,0.5,0,0.5,0c0.7-0.2,1.2-0.7,1.4-1.4l5.9-20.2c0.2-0.7,0-1.6-0.5-2.1   s-1.4-0.7-2.1-0.5L163.4,27c-0.7,0.2-1.2,0.9-1.4,1.6c-0.2,0.7,0,1.4,0.7,2.1l5.6,5.2l-23.7,26.5l-23-21.1   c-0.5-0.5-0.9-0.5-1.6-0.5c-0.5,0-1.2,0.2-1.4,0.7l-16.2,18.3c-4.2-23.7-24.9-41.7-49.7-41.7C25.1,17.9,2.6,40.4,2.6,68.3   S25.1,118.7,53,118.7z M181.3,25.6l-3.8,12.9l-9.1-8.4L181.3,25.6z M98,68.3c0,13.6-6.1,25.8-15.7,34l-26-33.8l29.1-31.4   C93.1,45.1,98,56.1,98,68.3z M55.6,23.3c9.8,0.5,19,4.2,26.3,10.3L55.6,61.7C55.6,61.7,55.6,23.3,55.6,23.3z M50.4,23.3v45l0,0v0.2   c0,0.2,0,0.5,0.2,0.7c0,0,0,0.2,0.2,0.2c0,0,0,0.2,0.2,0.2l27.4,35.6c-7.3,4.9-15.9,8-25.3,8c-24.9,0-45-20.2-45-45   C8,44.4,26.7,24.7,50.4,23.3z"/><rect x="101.8" y="93.9" width="15.7" height="22"/><rect x="122.6" y="69.9" width="15.7" height="46"/><rect x="143.5" y="75.1" width="15.7" height="40.8"/><rect x="164.4" y="55.9" width="15.7" height="60"/><path d="M48.5,154.8h-8.7v-8.7c0-2.6-2.1-4.5-4.5-4.5c-2.6,0-4.5,2.1-4.5,4.5v8.7h-8.7c-2.6,0-4.5,2.1-4.5,4.5   c0,2.3,2.1,4.5,4.5,4.5H31v8.7c0,2.6,2.1,4.5,4.5,4.5c2.6,0,4.5-2.1,4.5-4.5v-8.7h8.7c2.6,0,4.5-2.1,4.5-4.5   C53,156.9,51.1,154.8,48.5,154.8z"/><path d="M91.9,154.8H66.8c-2.6,0-4.5,2.1-4.5,4.5c0,2.3,2.1,4.5,4.5,4.5h25.1c2.6,0,4.5-2.1,4.5-4.5   C96.4,156.9,94.5,154.8,91.9,154.8z"/><path d="M180.1,154.8h-24.9c-2.6,0-4.5,2.1-4.5,4.5c0,2.3,2.1,4.5,4.5,4.5h24.9c2.6,0,4.5-2.1,4.5-4.5   C184.5,156.9,182.7,154.8,180.1,154.8z"/><circle cx="167.7" cy="148" r="4.7"/><circle cx="167.7" cy="170.5" r="4.7"/><path d="M136.9,145.7c-1.6-1.6-4.7-1.6-6.3,0l-7,7l-7-7c-1.6-1.6-4.7-1.6-6.3,0c-1.6,1.6-1.6,4.7,0,6.3l7,7l-7,7   c-1.6,1.6-1.6,4.7,0,6.3c0.9,0.9,2.1,1.4,3,1.4c1.2,0,2.3-0.5,3-1.4l7-7l7,7c0.9,0.9,2.1,1.4,3,1.4s2.3-0.5,3-1.4   c1.6-1.6,1.6-4.7,0-6.3l-7-7l7-7C138.8,150.4,138.8,147.6,136.9,145.7z"/><path d="M187.8,127.6H14.3c-1.4,0-2.3,1.2-2.3,2.3s1.2,2.3,2.3,2.3h173.5c1.4,0,2.3-1.2,2.3-2.3S189.2,127.6,187.8,127.6z"/></g><text x="0" y="210" fill="#000000" font-size="5px" font-weight="bold" font-family="'Helvetica Neue', Helvetica, Arial-Unicode, Arial, Sans-serif">Created by Kevin Augustine LO</text><text x="0" y="215" fill="#000000" font-size="5px" font-weight="bold" font-family="'Helvetica Neue', Helvetica, Arial-Unicode, Arial, Sans-serif">from the Noun Project</text></svg>


--------------------------------------------------------------------------------
/contrib/os_export/archive_sources.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | import sys
 4 | import os
 5 | 
 6 | 
 7 | def grab_source(url, output):
 8 |     """
 9 |     Grab a source from a url and store it in an output file
10 | 
11 |     This creates uses requests as a dependency because I'm lazy.
12 |     It probably would have taken me less time to just write it with
13 |     urllib than writing this docstring
14 |     """
15 | 
16 |     # We use stream because these files might be huge
17 |     response = requests.get(url, stream=True)
18 | 
19 |     # We don't do anything if there's something wrong with the url
20 |     # This is basically what made urllib.urlretrieve a hassle
21 |     if not response.ok:
22 |         return
23 | 
24 |     with open(output, 'w') as output_file:
25 |         for block in response.iter_content(1024):
26 |             output_file.write(block)
27 | 
28 | 
29 | def archive(directory):
30 |     """
31 |     Archive a OpenSpending dataset export directory
32 |     """
33 | 
34 |     # If we accidentally pass in something that's not a directory
35 |     # we don't do anything
36 |     if not os.path.isdir(directory):
37 |         return
38 | 
39 |     # Check if the directory contains a dataset.json file
40 |     dataset = os.path.join(directory, 'dataset.json')
41 |     if not os.path.isfile(dataset):
42 |         return
43 | 
44 |     # Open the dataset.json file and grab the sources listed in it
45 |     with open(dataset) as descriptor:
46 |         data = json.load(descriptor)
47 |         if len(data['sources']):
48 |             # Create an archive directory because there are some
49 |             # sources we want to grab
50 |             archive_directory = os.path.join(directory, 'archive')
51 |             if not os.path.exists(archive_directory):
52 |                 os.makedirs(archive_directory)            
53 | 
54 |             # Loop through sources, grab them and store in an output file
55 |             # called <source_id>.csv
56 |             for source in data['sources']:
57 |                 filename = '{0}.csv'.format(source['id'])
58 |                 archive_file = os.path.join(archive_directory, filename)
59 |                 grab_source(source['url'], output=archive_file)
60 | 
61 |             # If the archive directory is empty which will happen if
62 |             # grabbing the sources failed for some reason
63 |             if not os.listdir(archive_directory):
64 |                 os.rmdir(archive_directory)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     # Loop through each of the arguments and archive them
69 |     for directory in sys.argv[1:]:
70 |         archive(directory)
71 | 


--------------------------------------------------------------------------------
/spendb/etl/job.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from functools import wraps
 3 | from datetime import datetime
 4 | 
 5 | from archivekit import Source
 6 | from loadkit.logger import capture
 7 | 
 8 | from spendb.core import data_manager, db
 9 | from spendb.model.run import Run
10 | 
11 | 
12 | class Job(object):
13 | 
14 |     def __init__(self, dataset, operation):
15 |         self.log = logging.getLogger('spendb.etl')
16 |         self.dataset = dataset
17 |         self.operation = operation
18 |         self.run = None
19 | 
20 |     def start(self):
21 |         self.run = Run(self.operation, Run.STATUS_RUNNING, self.dataset)
22 |         db.session.add(self.run)
23 |         db.session.commit()
24 | 
25 |         self.package = data_manager.package(self.dataset.name)
26 |         modules = [self.log, 'loadkit']
27 |         self.log_handler = capture(self.package, self.run.id, modules)
28 |         self.log.info("Starting: %s", self.operation)
29 | 
30 |     def set_source(self, source):
31 |         self.run.source = source.name
32 |         db.session.commit()
33 | 
34 |     def end(self, status):
35 |         self.run.status = status
36 |         self.run.time_end = datetime.utcnow()
37 |         self.dataset.touch()
38 |         db.session.commit()
39 |         self.log_handler.archive()
40 | 
41 |     @property
42 |     def running(self):
43 |         return self.run and self.run.status == Run.STATUS_RUNNING
44 | 
45 |     def complete(self):
46 |         if self.running:
47 |             self.log.info("Completed: %s", self.operation)
48 |         self.end(Run.STATUS_COMPLETE)
49 | 
50 |     def failed(self):
51 |         if self.running:
52 |             self.log.warn("Failed: %s", self.operation)
53 |         self.end(Run.STATUS_FAILED)
54 | 
55 | 
56 | def job(operation=None):
57 |     """ Wrap an ETL job. This will handle logging, run management
58 |     and other tasks. It assumes the first positional argument is
59 |     the dataset that this operation is performed on, and will
60 |     inject another argument before that, the ``job``. """
61 | 
62 |     def decorator(fn):
63 |         @wraps(fn)
64 |         def wrapper(dataset, *a, **kw):
65 |             job = Job(dataset, operation or fn.__name__)
66 |             try:
67 |                 job.start()
68 |                 result = fn(job, dataset, *a, **kw)
69 |                 if job.running:
70 |                     job.complete()
71 |                 return result
72 |             except Exception, e:
73 |                 job.log.exception(e)
74 |                 job.failed()
75 |             finally:
76 |                 if job.running:
77 |                     job.failed()
78 |         return wrapper
79 | 
80 |     return decorator
81 | 


--------------------------------------------------------------------------------
/spendb/tests/etl/test_import_fixtures.py:
--------------------------------------------------------------------------------
 1 | import urllib
 2 | import json
 3 | 
 4 | from spendb.core import db, data_manager
 5 | from spendb.model import Dataset, Run
 6 | from spendb.etl import tasks
 7 | 
 8 | from spendb.tests.base import DatabaseTestCase
 9 | from spendb.tests.helpers import csvimport_fixture_file
10 | from spendb.tests.helpers import csvimport_fixture_path
11 | 
12 | 
13 | def import_fixture(name):
14 |     meta_fp = csvimport_fixture_file(name, 'meta.json')
15 |     model_fp = csvimport_fixture_file(name, 'model.json')
16 |     meta = json.load(meta_fp)
17 |     if model_fp:
18 |         meta['model'] = json.load(model_fp)
19 |     dataset = Dataset(meta)
20 |     db.session.add(dataset)
21 |     data_path = csvimport_fixture_path(name, 'data.csv')
22 |     db.session.commit()
23 |     return dataset, data_path
24 | 
25 | 
26 | class TestImportFixtures(DatabaseTestCase):
27 | 
28 |     def setUp(self):
29 |         super(TestImportFixtures, self).setUp()
30 |         data_manager._index = None
31 | 
32 |     def tearDown(self):
33 |         super(TestImportFixtures, self).tearDown()
34 | 
35 |     def count_lines_in_stream(self, f):
36 |         from StringIO import StringIO
37 |         return len(list(StringIO(f.read())))
38 | 
39 |     def _test_import(self, name, lines=None):
40 |         dataset, url = import_fixture(name)
41 |         data = urllib.urlopen(url)
42 |         if lines is None:
43 |             lines = self.count_lines_in_stream(data) - 1  # -1 for header row
44 | 
45 |         source = tasks.extract_url(dataset, url)
46 |         tasks.transform_source(dataset, source.name)
47 |         tasks.load(dataset, source_name=source.name)
48 | 
49 |         for run in db.session.query(Run).all():
50 |             assert run.status == Run.STATUS_COMPLETE, run
51 | 
52 |         # check correct number of entries
53 |         dataset = db.session.query(Dataset).first()
54 |         q = dataset.fact_table.table.select()
55 |         entries = db.engine.execute(q).fetchall()
56 |         assert len(entries) == lines, len(entries)
57 | 
58 |     def test_imports_mexico(self):
59 |         self._test_import('mexico')
60 | 
61 |     def test_imports_lbhf(self):
62 |         self._test_import('lbhf')
63 | 
64 |     def test_imports_sample(self):
65 |         self._test_import('sample')
66 | 
67 |     def test_imports_quoting(self):
68 |         self._test_import('quoting', lines=5)
69 | 
70 |     def test_missing_url(self):
71 |         dataset, url = import_fixture('file:///dev/null')
72 |         source = tasks.extract_url(dataset, url)
73 |         assert source is None, source
74 | 
75 |         for run in db.session.query(Run).all():
76 |             assert run.status == Run.STATUS_FAILED, run
77 | 


--------------------------------------------------------------------------------
/swarm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "spendb",
 3 |   "components": {
 4 |     "web": {
 5 |       "image": "registry.giantswarm.io/spendb/spendb:latest",
 6 |       "ports": 8000,
 7 |       "env": {
 8 |         "SITE_TITLE": "$site_title",
 9 |         "SECRET": "$secret",
10 |         "DATABASE_URL": "$database_url",
11 |         "AMQP_URL": "$amqp_url",
12 |         "MAIL_SERVER": "$mail_server",
13 |         "MAIL_PORT": "$mail_port",
14 |         "MAIL_USERNAME": "$mail_username",
15 |         "MAIL_PASSWORD": "$mail_password",
16 |         "MAIL_DEFAULT_SENDER": "$mail_default_sender",
17 |         "AWS_KEY_ID": "$aws_key_id",
18 |         "AWS_SECRET": "$aws_secret",
19 |         "AWS_DATA_BUCKET": "$aws_data_bucket"
20 |       },
21 |       "links": [
22 |         {"component": "rabbitmq", "target_port": "5672"},
23 |         {"component": "db", "target_port": "5432"}
24 |       ],
25 |       "domains": {
26 |         "8000": [
27 |           "spendb.gigantic.io",
28 |           "dummy.pudo.org"
29 |         ]
30 |       },
31 |       "entrypoint": "gunicorn",
32 |       "args": [
33 |         "-w", "5",
34 |         "-b", "0.0.0.0:8000",
35 |         "--error-logfile", "-",
36 |         "--log-file", "-",
37 |         "spendb.wsgi:app"
38 |       ]
39 |     },
40 |     "db": {
41 |       "image": "postgres:9.4",
42 |       "ports": 5432,
43 |       "env": {
44 |         "POSTGRES_USER": "spendb",
45 |         "POSTGRES_PASSWORD": "spendb"
46 |       },
47 |       "volumes": [
48 |         {
49 |           "path": "/var/lib/postgresql/data",
50 |           "size": "4 GB"
51 |         }
52 |       ]
53 |     },
54 |     "rabbitmq": {
55 |       "image": "rabbitmq",
56 |       "ports": 5672
57 |     },
58 |     "worker": {
59 |       "image": "registry.giantswarm.io/spendb/spendb:latest",
60 |       "env": {
61 |         "C_FORCE_ROOT": "$celery_force_root",
62 |         "SITE_TITLE": "$site_title",
63 |         "SECRET": "$secret",
64 |         "DATABASE_URL": "$database_url",
65 |         "AMQP_URL": "$amqp_url",
66 |         "MAIL_SERVER": "$mail_server",
67 |         "MAIL_PORT": "$mail_port",
68 |         "MAIL_USERNAME": "$mail_username",
69 |         "MAIL_PASSWORD": "$mail_password",
70 |         "MAIL_DEFAULT_SENDER": "$mail_default_sender",
71 |         "AWS_KEY_ID": "$aws_key_id",
72 |         "AWS_SECRET": "$aws_secret",
73 |         "AWS_DATA_BUCKET": "$aws_data_bucket"
74 |       },
75 |       "links": [
76 |         {"component": "rabbitmq", "target_port": "5672"},
77 |         {"component": "db", "target_port": "5432"}
78 |       ],
79 |       "entrypoint": "celery",
80 |       "args": [
81 |         "-A", "spendb.tasks", "worker",
82 |         "-c", "4",
83 |         "-l", "info"
84 |       ]
85 |     }
86 |   }
87 | }


--------------------------------------------------------------------------------
/contrib/assets/noun_29578_cc.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 90 112.5" enable-background="new 0 0 90 90" xml:space="preserve"><g><path d="M23.084,36.702l-4.104-2.198c-0.333-0.178-0.756,0.034-0.94,0.472l-4.158,9.91c-0.184,0.438-0.062,0.942,0.271,1.121   l1.977,1.059l-1.944,8.082c-0.109,0.454,0.061,0.546,0.379,0.203l8.494-9.169c0.318-0.343,0.305-0.769-0.028-0.948l-3.535-1.893   l3.714-5.596C23.474,37.35,23.417,36.881,23.084,36.702z"/><path d="M76.054,36.702l-4.104-2.198c-0.333-0.178-0.756,0.034-0.94,0.472l-4.158,9.91c-0.184,0.438-0.062,0.942,0.271,1.121   l1.977,1.059l-1.944,8.082c-0.109,0.454,0.061,0.546,0.379,0.203l8.494-9.169c0.318-0.343,0.305-0.769-0.028-0.948l-3.535-1.893   l3.714-5.596C76.444,37.35,76.387,36.881,76.054,36.702z"/><path d="M80.432,18.694H9.568c-3.552,0-6.442,2.89-6.442,6.442v39.727c0,3.552,2.89,6.442,6.442,6.442h70.863   c3.552,0,6.442-2.89,6.442-6.442V25.136C86.874,21.584,83.984,18.694,80.432,18.694z M81.72,64.864c0,0.71-0.578,1.288-1.288,1.288   H9.568c-0.71,0-1.288-0.578-1.288-1.288V25.136c0-0.71,0.578-1.288,1.288-1.288h70.863c0.71,0,1.288,0.578,1.288,1.288V64.864z"/><path d="M45,24.607c-9.048,0-16.409,9.148-16.409,20.393S35.952,65.393,45,65.393S61.409,56.245,61.409,45S54.048,24.607,45,24.607   z M57.973,45c0,0.579-0.023,1.152-0.067,1.716l-14.995,15c-0.694-0.148-1.37-0.36-2.022-0.646l17.046-17.051   C57.949,44.345,57.973,44.669,57.973,45z M39.444,60.3c-0.535-0.333-1.053-0.703-1.544-1.125l19.426-19.432   c0.182,0.727,0.323,1.475,0.428,2.241L39.444,60.3z M36.768,58.095c-0.423-0.456-0.816-0.956-1.192-1.477l20.466-20.473   c0.287,0.609,0.547,1.239,0.774,1.895L36.768,58.095z M34.704,55.276c-0.329-0.562-0.632-1.149-0.906-1.763l20.42-20.426   c0.381,0.504,0.73,1.046,1.06,1.609L34.704,55.276z M44.587,28.071c0.138-0.006,0.274-0.027,0.413-0.027   c0.699,0,1.38,0.093,2.05,0.233L32.098,43.232c0.087-1.088,0.255-2.144,0.492-3.161L44.587,28.071z M33.658,36.789   c1.728-4.061,4.696-7.088,8.229-8.232L33.658,36.789z M48.763,28.776c0.61,0.242,1.195,0.553,1.764,0.905L32.24,47.974   c-0.11-0.807-0.174-1.634-0.193-2.476L48.763,28.776z M51.819,30.602c0.483,0.392,0.951,0.811,1.389,1.28L33.171,51.926   c-0.231-0.67-0.432-1.36-0.596-2.073L51.819,30.602z M44.891,61.95l12.642-12.646c-0.27,1.343-0.651,2.628-1.151,3.82l-8.34,8.343   c-0.978,0.309-1.993,0.49-3.042,0.49C44.963,61.957,44.927,61.95,44.891,61.95z"/></g><text x="0" y="105" fill="#000000" font-size="5px" font-weight="bold" font-family="'Helvetica Neue', Helvetica, Arial-Unicode, Arial, Sans-serif">Created by Nate Eul</text><text x="0" y="110" fill="#000000" font-size="5px" font-weight="bold" font-family="'Helvetica Neue', Helvetica, Arial-Unicode, Arial, Sans-serif">from the Noun Project</text></svg>


--------------------------------------------------------------------------------
/spendb/core.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from flask import Flask
 3 | from flask import url_for as _url_for
 4 | from flask.ext.sqlalchemy import SQLAlchemy
 5 | from flask.ext.login import LoginManager
 6 | from flask.ext.babel import Babel
 7 | from flask.ext.cache import Cache
 8 | from flask.ext.mail import Mail
 9 | from flask.ext.migrate import Migrate
10 | from flask.ext.cors import CORS
11 | from flask_flatpages import FlatPages
12 | from celery import Celery
13 | from cubes import Workspace, ext
14 | 
15 | from spendb import default_settings
16 | from spendb.etl.manager import DataManager
17 | 
18 | logging.basicConfig(level=logging.DEBUG)
19 | 
20 | # specific loggers
21 | logging.getLogger('cubes').setLevel(logging.WARNING)
22 | logging.getLogger('markdown').setLevel(logging.WARNING)
23 | logging.getLogger('boto').setLevel(logging.WARNING)
24 | logging.getLogger('spendb.core.cors').setLevel(logging.WARNING)
25 | 
26 | 
27 | db = SQLAlchemy()
28 | babel = Babel()
29 | login_manager = LoginManager()
30 | cache = Cache()
31 | mail = Mail()
32 | migrate = Migrate()
33 | pages = FlatPages()
34 | data_manager = DataManager()
35 | cors = CORS()
36 | 
37 | 
38 | def create_app(**config):
39 |     app = Flask(__name__)
40 |     
41 |     app.config.from_object(default_settings)
42 |     app.config.from_envvar('SPENDB_SETTINGS', silent=True)
43 |     app.config.update(config)
44 | 
45 |     db.init_app(app)
46 |     babel.init_app(app)
47 |     cache.init_app(app)
48 |     mail.init_app(app)
49 |     login_manager.init_app(app)
50 |     data_manager.init_app(app)
51 |     pages.init_app(app)
52 |     migrate.init_app(app, db, directory=app.config.get('ALEMBIC_DIR'))
53 |     cors.init_app(app, resources=r'/api/*', supports_credentials=True,
54 |                   methods=['GET', 'HEAD', 'OPTIONS'])
55 | 
56 |     ws = Workspace()
57 |     ext.model_provider("spending", metadata={})
58 |     ext.store("spending")
59 |     ws.register_default_store('spending', model_provider='spending')
60 |     app.cubes_workspace = ws
61 |     return app
62 | 
63 | 
64 | def create_web_app(**config):
65 |     app = create_app(**config)
66 | 
67 |     from spendb.views import register_views
68 |     register_views(app, babel)
69 |     return app
70 | 
71 | 
72 | def create_celery(app):
73 |     celery = Celery(app.import_name, broker=app.config['CELERY_BROKER_URL'])
74 |     celery.conf.update(app.config)
75 |     return celery
76 | 
77 | 
78 | def url_for(endpoint, **kwargs):
79 |     try:
80 |         from flask import current_app
81 |         if current_app.config.get('PREFERRED_URL_SCHEME'):
82 |             kwargs['_scheme'] = current_app.config.get('PREFERRED_URL_SCHEME')
83 |         url = _url_for(endpoint, _external=True, **kwargs)
84 |         return url
85 |     except:
86 |         return None
87 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/validation/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dataset": {
 3 |         "category": "other",
 4 |         "currency": "EUR",
 5 |         "description": "This is a test dataset",
 6 |         "label": "Test Dataset",
 7 |         "languages": [
 8 |             "en"
 9 |         ],
10 |         "name": "test",
11 |         "private": false,
12 |         "territories": [
13 |             "DE",
14 |             "FR",
15 |             "ES"
16 |         ]
17 |     },
18 |     "model": {
19 |         "dimensions": {
20 |             "function": {
21 |                 "attributes": {
22 |                     "description": {
23 |                         "column": "FUNCTION_DESC",
24 |                         "label": "string"
25 |                     },
26 |                     "label": {
27 |                         "column": "FUNCTION",
28 |                         "label": "string"
29 |                     },
30 |                     "name": {
31 |                         "column": "FUNCTION_ID",
32 |                         "label": "string"
33 |                     }
34 |                 },
35 |                 "label": "Function",
36 |                 "key_attribute": "name"
37 |             },
38 |             "supplier": {
39 |                 "attributes": {
40 |                     "label": {
41 |                         "column": "SUPPLIER",
42 |                         "label": "string"
43 |                     },
44 |                     "name": {
45 |                         "column": "SUPPLIER_ID",
46 |                         "label": "string"
47 |                     }
48 |                 },
49 |                 "label": "Supplier",
50 |                 "key_attribute": "name"
51 |             },
52 |             "time": {
53 |                 "attributes": {
54 |                     "year": {
55 |                         "column": "time_from_year",
56 |                         "label": "string"
57 |                     }
58 |                 },
59 |                 "description": "The accounting period in which the spending happened",
60 |                 "label": "Tax year",
61 |                 "key_attribute": "year"
62 |             },
63 |             "transaction_id": {
64 |                 "attributes": {
65 |                     "transaction_id": {
66 |                         "column": "tx",
67 |                         "label": "string"
68 |                     }
69 |                 },
70 |                 "label": "Transaction ID",
71 |                 "key_attribute": "transaction_id"
72 |             }
73 |         },
74 |         "measures": {
75 |             "amount": {
76 |                 "column": "AMOUNT",
77 |                 "label": "Amount"
78 |             },
79 |             "cofinance": {
80 |                 "column": "cofin",
81 |                 "label": "Co-Financed Amount"
82 |             }
83 |         }
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/spendb/views/api/session.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from flask import Blueprint, request
 4 | from flask.ext.login import current_user, login_user, logout_user
 5 | from werkzeug.security import check_password_hash
 6 | from flask.ext.babel import gettext as _
 7 | from apikit import jsonify, request_data
 8 | 
 9 | from spendb.core import login_manager
10 | from spendb.auth import dataset
11 | from spendb.model import Account, Dataset
12 | from spendb.views.context import etag_cache_keygen
13 | 
14 | log = logging.getLogger(__name__)
15 | blueprint = Blueprint('sessions_api', __name__)
16 | 
17 | 
18 | @login_manager.request_loader
19 | def load_user_from_request(request):
20 |     api_key = request.args.get('api_key')
21 |     if api_key and len(api_key):
22 |         account = Account.by_api_key(api_key)
23 |         if account:
24 |             return account
25 | 
26 |     api_key = request.headers.get('Authorization')
27 |     if api_key and len(api_key) and ' ' in api_key:
28 |         method, api_key = api_key.split(' ', 1)
29 |         if method.lower() == 'apikey':
30 |             account = Account.by_api_key(api_key)
31 |             if account:
32 |                 return account
33 |     return None
34 | 
35 | 
36 | @blueprint.route('/sessions')
37 | def session():
38 |     data = {
39 |         'logged_in': current_user.is_authenticated(),
40 |         'user': None
41 |     }
42 |     if current_user.is_authenticated():
43 |         data['user'] = current_user
44 |         data['api_key'] = current_user.api_key
45 |     return jsonify(data)
46 | 
47 | 
48 | @blueprint.route('/sessions/authz')
49 | def authz():
50 |     obj = Dataset.by_name(request.args.get('dataset'))
51 |     etag_cache_keygen(obj, private=True)
52 |     if obj is None:
53 |         return jsonify({
54 |             'read': False,
55 |             'update': False
56 |         })
57 |     return jsonify({
58 |         'read': dataset.read(obj),
59 |         'update': dataset.update(obj)
60 |     })
61 | 
62 | 
63 | @blueprint.route('/sessions/login', methods=['POST', 'PUT'])
64 | def login():
65 |     data = request_data()
66 |     account = Account.by_name(data.get('login'))
67 |     if account is not None:
68 |         if check_password_hash(account.password, data.get('password')):
69 |             login_user(account, remember=True)
70 |             return jsonify({
71 |                 'status': 'ok',
72 |                 'message': _("Welcome back, %(name)s!", name=account.name)
73 |             })
74 |     return jsonify({
75 |         'status': 'error',
76 |         'errors': {
77 |             'password': _("Incorrect user name or password!")
78 |         }
79 |     }, status=400)
80 | 
81 | 
82 | @blueprint.route('/sessions/logout', methods=['POST', 'PUT'])
83 | def logout():
84 |     logout_user()
85 |     return jsonify({
86 |         'status': 'ok',
87 |         'message': _("You have been logged out.")
88 |     })
89 | 


--------------------------------------------------------------------------------
/spendb/etl/upload.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import hmac
 3 | import json
 4 | from base64 import b64encode
 5 | from datetime import datetime, timedelta
 6 | 
 7 | from flask import current_app
 8 | from boto.s3.cors import CORSConfiguration
 9 | from boto.exception import S3ResponseError
10 | 
11 | 
12 | def enable_bucket_cors(bucket):
13 |     """ For direct upload to work, the bucket needs to enable
14 |     cross-origin request scripting. """
15 |     try:
16 |         cors_cfg = bucket.get_cors()
17 |     except S3ResponseError:
18 |         cors_cfg = CORSConfiguration()
19 |     rules = [r.id for r in cors_cfg]
20 |     changed = False
21 |     if 'spendb_put' not in rules:
22 |         cors_cfg.add_rule(['PUT', 'POST'], '*',
23 |                           allowed_header='*',
24 |                           id='spendb_put',
25 |                           max_age_seconds=3000,
26 |                           expose_header='x-amz-server-side-encryption')
27 |         changed = True
28 |     if 'spendb_get' not in rules:
29 |         cors_cfg.add_rule('GET', '*', id='spendb_get')
30 |         changed = True
31 | 
32 |     if changed:
33 |         bucket.set_cors(cors_cfg)
34 | 
35 | 
36 | def generate_s3_upload_policy(source, file_name, mime_type):
37 |     """ Generate a policy and signature for uploading a file directly to
38 |     the specified source on S3. """
39 |     obj = source._obj
40 |     if not hasattr(obj, 'key'):
41 |         return {
42 |             'status': 'error',
43 |             'message': 'Backend is not on S3, cannot generate signature.'
44 |         }
45 | 
46 |     enable_bucket_cors(obj.store.bucket)
47 |     url = obj.key.generate_url(expires_in=0, force_http=True,
48 |                                query_auth=False)
49 |     url = url.split(obj.key.name)[0]
50 | 
51 |     if 'https' in current_app.config.get('PREFERRED_URL_SCHEME'):
52 |         url = url.replace('http://', 'https://')
53 | 
54 |     data = {
55 |         'url': url,
56 |         'status': 'ok',
57 |         'key': obj.key.name,
58 |         'source_name': source.name,
59 |         'aws_key_id': obj.store.aws_key_id,
60 |         'acl': 'public-read',
61 |         'file_name': file_name,
62 |         'mime_type': mime_type
63 |     }
64 |     expire = datetime.utcnow() + timedelta(days=7)
65 |     expire, ms = expire.isoformat().split('.')
66 |     policy = {
67 |         "expiration": expire + "Z",
68 |         "conditions": [
69 |             {"bucket": obj.store.bucket_name},
70 |             ["starts-with", "$key", data['key']],
71 |             {"acl": data['acl']}
72 |         ]
73 |     }
74 | 
75 |     # data['raw_policy'] = json.dumps(policy)
76 |     data['policy'] = b64encode(json.dumps(policy))
77 |     data['signature'] = b64encode(hmac.new(obj.store.aws_secret,
78 |                                            data['policy'],
79 |                                            hashlib.sha1).digest())
80 |     return data
81 | 


--------------------------------------------------------------------------------
/contrib/os_export/model_migrate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from normality import slugify
 4 | 
 5 | DIR = 'exports'
 6 | 
 7 | 
 8 | def slug(name):
 9 |     return slugify(name, sep='_')
10 | 
11 | 
12 | def list_datasets():
13 |     for name in os.listdir(DIR):
14 |         ds_dir = os.path.join(DIR, name)
15 |         with open(os.path.join(ds_dir, 'dataset.json'), 'rb') as fh:
16 |             meta = json.load(fh)
17 |             yield ds_dir, meta
18 | 
19 | 
20 | def transform_dataset(source):
21 |     mapping = source['data']['mapping']
22 |     model = {'measures': {}, 'dimensions': {}}
23 |     types = set()
24 |     for name, src in mapping.items():
25 |         norm_name = slug(name)
26 |         if src.get('type') == 'measure':
27 |             model['measures'][norm_name] = {
28 |                 'label': src['label'],
29 |                 'description': src['description'] or '',
30 |                 'column': norm_name
31 |             }
32 |             continue
33 | 
34 |         dim = {
35 |             'label': src['label'],
36 |             'description': src['description'] or '',
37 |             'label_attribute': 'label',
38 |             'key_attribute': 'label',
39 |             'attributes': {}
40 |         }
41 |         if src.get('type') == 'date':
42 |             dim['attributes'] = {
43 |                 'label': {
44 |                     'label': 'Label',
45 |                     'column': norm_name + '_name'
46 |                 },
47 |                 'year': {
48 |                     'label': 'Year',
49 |                     'column': norm_name + '_year'
50 |                 },
51 |                 'month': {
52 |                     'label': 'Month',
53 |                     'column': norm_name + '_month'
54 |                 },
55 |                 'day': {
56 |                     'label': 'Day',
57 |                     'column': norm_name + '_day'
58 |                 },
59 |                 'yearmonth': {
60 |                     'label': 'Year/Month',
61 |                     'column': norm_name + '_yearmonth'
62 |                 }
63 |             }
64 |         if src.get('type') == 'attribute':
65 |             dim['attributes'] = {
66 |                 'label': {
67 |                     'label': 'Label',
68 |                     'column': norm_name
69 |                 }
70 |             }
71 |         if src.get('type') == 'compound':
72 |             for name, spec in src['attributes'].items():
73 |                 attr = slug(name)
74 |                 dim['attributes'][attr] = {
75 |                     'label': spec['column'],
76 |                     'column': norm_name + '_' + attr
77 |                 }
78 |             if 'name' in dim['attributes']:
79 |                 dim['key_attribute'] = 'name'
80 |         model['dimensions'][norm_name] = dim
81 |     return model
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     for dir, ds in list_datasets():
86 |         data = transform_dataset(ds)
87 |         with open(os.path.join(dir, 'model.json'), 'wb') as fh:
88 |             json.dump(data, fh, indent=2)
89 | 


--------------------------------------------------------------------------------
/spendb/tests/model/test_dataset.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Integer, Unicode
 2 | from nose.tools import assert_raises
 3 | from babbage.model import Dimension, Measure
 4 | 
 5 | from spendb.tests.helpers import load_fixture
 6 | from spendb.tests.base import DatabaseTestCase
 7 | 
 8 | from spendb.core import db
 9 | 
10 | 
11 | class TestDataset(DatabaseTestCase):
12 | 
13 |     def setUp(self):
14 |         super(TestDataset, self).setUp()
15 |         self.ds = load_fixture('simple')
16 | 
17 |     def test_load_model_properties(self):
18 |         assert self.ds.name == self.ds.to_dict()['name'], self.ds.name
19 |         assert self.ds.label == self.ds.to_dict()['label'], self.ds.label
20 | 
21 |     def test_load_model_dimensions(self):
22 |         dims = {d.name: d for d in self.ds.model.dimensions}
23 |         assert len(dims) == 4, dims
24 |         assert isinstance(dims['time'], Dimension), dims['time']
25 |         assert isinstance(dims['field'], Dimension), dims['field']
26 |         assert isinstance(dims['to'], Dimension), dims['to']
27 |         assert isinstance(dims['function'], Dimension), dims['function']
28 |         meas = {m.name: m for m in self.ds.model.measures}
29 |         assert len(meas) == 1, meas
30 |         assert isinstance(meas['amount'], Measure), meas['amount']
31 | 
32 |     def test_generate_db_entry_table(self):
33 |         assert self.ds.fact_table.table.name == 'test__facts', \
34 |             self.ds.fact_table.table.name
35 |         cols = self.ds.fact_table.table.c
36 |         assert '_id' in cols, cols
37 |         assert isinstance(cols['_id'].type, Unicode)
38 |         assert 'year' in cols, cols
39 |         assert isinstance(cols['year'].type, Integer)
40 |         assert 'amount' in cols
41 |         assert isinstance(cols['amount'].type, Integer)
42 |         assert 'field' in cols
43 |         assert isinstance(cols['field'].type, Unicode)
44 |         assert 'to_label' in cols, cols
45 |         assert isinstance(cols['to_label'].type, Unicode)
46 |         assert 'func_label' in cols
47 |         assert isinstance(cols['func_label'].type, Unicode)
48 |         assert_raises(KeyError, cols.__getitem__, 'foo')
49 | 
50 | 
51 | class TestDatasetLoad(DatabaseTestCase):
52 | 
53 |     def setUp(self):
54 |         super(TestDatasetLoad, self).setUp()
55 |         self.ds = load_fixture('simple')
56 |         self.engine = db.engine
57 | 
58 |     def test_load_all(self):
59 |         q = self.ds.fact_table.table.select()
60 |         resn = self.engine.execute(q).fetchall()
61 |         assert len(resn) == 6, resn
62 |         row0 = dict(resn[0].items())
63 |         assert row0['amount'] == 200, row0.items()
64 |         assert row0['field'] == 'foo', row0.items()
65 | 
66 |     def test_drop(self):
67 |         tn = self.engine.table_names()
68 |         assert 'test__facts' in tn, tn
69 | 
70 |         self.ds.fact_table.drop()
71 |         tn = self.engine.table_names()
72 |         assert 'test__facts' not in tn, tn
73 | 
74 |     def test_dataset_count(self):
75 |         q = self.ds.fact_table.table.select()
76 |         resn = self.engine.execute(q).fetchall()
77 |         assert len(resn) == 6, resn
78 | 


--------------------------------------------------------------------------------
/spendb/tests/views/api/test_session.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from flask import url_for
 3 | 
 4 | from spendb.core import db
 5 | from spendb.tests.base import ControllerTestCase
 6 | from spendb.tests.helpers import load_fixture, make_account
 7 | 
 8 | 
 9 | class TestSessionApiController(ControllerTestCase):
10 | 
11 |     def setUp(self):
12 |         super(TestSessionApiController, self).setUp()
13 |         self.cra = load_fixture('cra')
14 |         self.user = make_account('test')
15 |         self.auth_qs = {'api_key': self.user.api_key}
16 |         self.cra.managers.append(self.user)
17 |         db.session.commit()
18 | 
19 |     def test_not_logged_in(self):
20 |         url = url_for('sessions_api.session')
21 |         res = self.client.get(url)
22 |         assert res.json.get('logged_in') is False, res.json
23 |         assert res.json.get('user') is None, res.json
24 | 
25 |     def test_authz_anon(self):
26 |         url = url_for('sessions_api.authz')
27 |         res = self.client.get(url, query_string={'dataset': self.cra.name})
28 |         assert res.json.get('read') is True, res.json
29 |         assert res.json.get('update') is False, res.json
30 | 
31 |     def test_authz_user(self):
32 |         url = url_for('sessions_api.authz')
33 |         qs = dict(self.auth_qs)
34 |         qs['dataset'] = self.cra.name
35 |         res = self.client.get(url, query_string=qs)
36 |         assert res.json.get('read') is True, res.json
37 |         assert res.json.get('update') is True, res.json
38 | 
39 |     def test_authz_other_user(self):
40 |         url = url_for('sessions_api.authz')
41 |         user = make_account('foo')
42 |         qs = {'api_key': user.api_key}
43 |         qs['dataset'] = self.cra.name
44 |         res = self.client.get(url, query_string=qs)
45 |         assert res.json.get('read') is True, res.json
46 |         assert res.json.get('update') is False, res.json
47 | 
48 |     def test_logged_in(self):
49 |         url = url_for('sessions_api.session')
50 |         res = self.client.get(url, query_string=self.auth_qs)
51 |         assert res.json.get('logged_in') is True, res.json
52 |         assert res.json.get('user') is not None, res.json
53 | 
54 |     def test_logout(self):
55 |         url = url_for('sessions_api.logout')
56 |         res = self.client.post(url, query_string=self.auth_qs)
57 |         assert res.json.get('status') == 'ok', res.json
58 | 
59 |     def test_login_ok(self):
60 |         url = url_for('sessions_api.login')
61 |         cred = {'login': 'test', 'password': 'password'}
62 |         res = self.client.post(url, data=json.dumps(cred),
63 |                                headers={'content-type': 'application/json'})
64 |         assert res.json.get('status') == 'ok', res.json
65 |         assert res.status_code == 200, res.json
66 | 
67 |     def test_login_fail(self):
68 |         url = url_for('sessions_api.login')
69 |         cred = {'login': 'test', 'password': 'wrong'}
70 |         res = self.client.post(url, data=json.dumps(cred),
71 |                                headers={'content-type': 'application/json'})
72 |         assert res.json.get('status') == 'error', res.json
73 |         assert res.status_code == 400, res.json
74 | 


--------------------------------------------------------------------------------
/spendb/tests/fixtures/csv_import/sample/data.csv:
--------------------------------------------------------------------------------
 1 | id,paid_by,date,transaction_id,amount,paid_to,spending_area
 2 | 1,London Borough of Hammersmith and Fulham,2010-01-01,405869,898.64,ADT FIRE & SECURITY PLC,Childrens Services
 3 | 2,London Borough of Hammersmith and Fulham,2010-01-01,405870,517.85,ADT FIRE & SECURITY PLC,Resident Services
 4 | 3,London Borough of Hammersmith and Fulham,2010-01-01,405871,1215.97,ADT FIRE & SECURITY PLC,Regeneration and Housing Services
 5 | 4,London Borough of Hammersmith and Fulham,2010-01-01,417742,112.50,ALARM LTD,Finance and Corporate Services
 6 | 5,London Borough of Hammersmith and Fulham,2010-01-01,417742,562.50,ALARM LTD,Finance and Corporate Services
 7 | 6,London Borough of Hammersmith and Fulham,2010-01-01,391746,1665.62,ASCOM TELE NOVA LTD,Childrens Services
 8 | 7,London Borough of Hammersmith and Fulham,2010-01-01,396062,1500.00,BIW TECHNOLOGIES LIMITED,Community Services
 9 | 8,London Borough of Hammersmith and Fulham,2010-01-01,392463,560.00,CAPITAL CITY COMMUNICATIONS LTD,Resident Services
10 | 9,London Borough of Hammersmith and Fulham,2010-01-01,393998,1296.00,CAPITAL CITY COMMUNICATIONS LTD,Environment Services
11 | 10,London Borough of Hammersmith and Fulham,2010-01-01,395696,171.39,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
12 | 11,London Borough of Hammersmith and Fulham,2010-01-01,395696,180.68,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
13 | 12,London Borough of Hammersmith and Fulham,2010-01-01,395696,182.82,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
14 | 13,London Borough of Hammersmith and Fulham,2010-01-01,395696,185.60,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
15 | 14,London Borough of Hammersmith and Fulham,2010-01-01,395696,244.84,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
16 | 15,London Borough of Hammersmith and Fulham,2010-01-01,395696,265.49,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
17 | 16,London Borough of Hammersmith and Fulham,2010-01-01,395696,384.65,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
18 | 17,London Borough of Hammersmith and Fulham,2010-01-01,395696,148.10,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
19 | 18,London Borough of Hammersmith and Fulham,2010-01-01,395696,168.85,CAR HIRE (DAY OF SWANSEA)LTD,Resident Services
20 | 19,London Borough of Hammersmith and Fulham,2010-01-01,417549,32641.84,CB RICHARD ELLIS LTD CLIENT ACCOUNT,Environment Services
21 | 20,London Borough of Hammersmith and Fulham,2010-01-01,417550,8106.18,CB RICHARD ELLIS LTD CLIENT ACCOUNT,Environment Services
22 | 21,London Borough of Hammersmith and Fulham,2010-01-01,395936,527.00,CHESTERFIELD ASSOCIATES,Childrens Services
23 | 22,London Borough of Hammersmith and Fulham,2010-01-01,407426,525.52,CHESTERFIELD ASSOCIATES,Community Services
24 | 23,London Borough of Hammersmith and Fulham,2010-01-01,460450,136.97,CONSULTUS SERVICES AGENCY LTD,Community Services
25 | 24,London Borough of Hammersmith and Fulham,2010-01-01,460450,1431.85,CONSULTUS SERVICES AGENCY LTD,Community Services
26 | 25,London Borough of Hammersmith and Fulham,2010-01-01,409072,522.10,COYLE PERSONNEL PLC,Community Services
27 | 26,London Borough of Hammersmith and Fulham,2010-01-01,405998,7009.96,CRANSTOUN DRUG SERVICES,Community Services
28 | 27,London Borough of Hammersmith and Fulham,2010-01-01,409318,-1156.27,EDF ENERGY 1 LIMITED,Resident Services
29 | 28,London Borough of Hammersmith and Fulham,2010-01-01,409319,-826.32,EDF ENERGY 1 LIMITED,Resident Services
30 | 29,London Borough of Hammersmith and Fulham,2010-01-01,483559,2950.00,e-MENTORING LIMITED,Childrens Services
31 | 


--------------------------------------------------------------------------------
/spendb/tests/views/test_home.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import json
 3 | from flask import url_for
 4 | 
 5 | from spendb.core import db
 6 | from spendb.model.dataset import Dataset
 7 | from spendb.tests.base import ControllerTestCase
 8 | from spendb.tests.helpers import make_account, load_fixture
 9 | 
10 | 
11 | class TestHomeController(ControllerTestCase):
12 | 
13 |     def setUp(self):
14 |         super(TestHomeController, self).setUp()
15 |         self.dataset = load_fixture('cra')
16 |         self.user = make_account('test')
17 | 
18 |     def test_index(self):
19 |         response = self.client.get(url_for('home.index'))
20 |         assert 'SpenDB' in response.data
21 | 
22 |     def test_locale(self):
23 |         set_l = url_for('home.set_locale')
24 |         data = json.dumps({'locale': 'en'})
25 |         self.client.post(set_l, data=data,
26 |                          headers={'Content-Type': 'application/json'})
27 | 
28 |     def test_feeds(self):
29 |         # Anonymous user with one public dataset
30 |         response = self.client.get(url_for('home.feed_rss'))
31 |         assert 'application/xml' in response.content_type
32 |         assert '<title>Recently Created Datasets</title>' in response.data
33 |         assert '<item><title>Country Regional Analysis v2009' in response.data, response.data
34 |         cra = Dataset.by_name('cra')
35 |         cra.private = True
36 |         db.session.add(cra)
37 |         db.session.commit()
38 | 
39 |         # Anonymous user with one private dataset
40 |         response = self.client.get(url_for('home.feed_rss'))
41 |         assert 'application/xml' in response.content_type
42 |         assert '<title>Recently Created Datasets</title>' in response.data
43 |         assert '<item><title>Country Regional Analysis v2009' not in response.data
44 | 
45 |         # Logged in user with one public dataset
46 |         cra.private = False
47 |         db.session.add(cra)
48 |         db.session.commit()
49 |         response = self.client.get(url_for('home.feed_rss'),
50 |                                    query_string={'api_key': self.user.api_key})
51 |         assert 'application/xml' in response.content_type
52 |         assert '<title>Recently Created Datasets</title>' in response.data
53 |         assert '<item><title>Country Regional Analysis v2009' in response.data
54 | 
55 |         # Logged in user with one private dataset
56 |         cra.private = True
57 |         db.session.add(cra)
58 |         db.session.commit()
59 |         response = self.client.get(url_for('home.feed_rss'),
60 |                                    query_string={'api_key': self.user.api_key})
61 |         assert 'application/xml' in response.content_type
62 |         assert '<title>Recently Created Datasets</title>' in response.data
63 |         assert '<item><title>Country Regional Analysis v2009' not in response.data
64 | 
65 |         # Logged in admin user with one private dataset
66 |         admin_user = make_account('admin')
67 |         admin_user.admin = True
68 |         db.session.add(admin_user)
69 |         db.session.commit()
70 |         response = self.client.get(url_for('home.feed_rss'),
71 |                                    query_string={'api_key': admin_user.api_key})
72 |         assert '<title>Recently Created Datasets</title>' in response.data
73 |         assert '<item><title>Country Regional Analysis v2009' in response.data
74 |         assert 'application/xml' in response.content_type
75 | 
76 |         response = self.client.get('/')
77 |         norm = re.sub('\s+', ' ', response.data)
78 |         assert ('<link rel="alternate" type="application/rss+xml" title="'
79 |                 'Latest Datasets on SpenDB"' in
80 |                 norm)
81 | 


--------------------------------------------------------------------------------
/spendb/views/home.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from hashlib import sha1
  3 | from StringIO import StringIO
  4 | 
  5 | from flask import Blueprint, render_template, request, redirect
  6 | from flask import Response, current_app, session
  7 | from webhelpers.feedgenerator import Rss201rev2Feed
  8 | from flask.ext.babel import gettext
  9 | from apikit import jsonify
 10 | 
 11 | from spendb.core import db, url_for
 12 | from spendb import auth, __version__
 13 | from spendb.model import Dataset
 14 | from spendb.validation.common import RESERVED_TERMS
 15 | 
 16 | 
 17 | blueprint = Blueprint('home', __name__)
 18 | 
 19 | 
 20 | def asset_link(path):
 21 |     asset_path = current_app.config['ASSETS_PATH_PROD']
 22 |     if current_app.config['DEBUG']:
 23 |         asset_path = current_app.config['ASSETS_PATH_DEBUG']
 24 |     cache_key = os.environ.get('CACHE_KEY', __version__)
 25 |     cache_key = sha1(cache_key).hexdigest()[:10]
 26 |     return '%s%s?_=%s' % (asset_path, path, cache_key)
 27 | 
 28 | 
 29 | @blueprint.route('/login')
 30 | @blueprint.route('/settings')
 31 | @blueprint.route('/accounts/<account>')
 32 | @blueprint.route('/docs/<path:page>')
 33 | @blueprint.route('/datasets')
 34 | @blueprint.route('/datasets/<path:path>')
 35 | @blueprint.route('/')
 36 | def index(*a, **kw):
 37 |     from flask.ext.babel import get_locale
 38 |     from spendb.views.context import etag_cache_keygen
 39 |     etag_cache_keygen(RESERVED_TERMS)
 40 |     locale = get_locale()
 41 |     data = {
 42 |         'current_language': locale.language,
 43 |         'url_for': url_for,
 44 |         'debug': current_app.config['DEBUG'],
 45 |         'asset_link': asset_link,
 46 |         'reserved_terms': RESERVED_TERMS,
 47 |         'site_url': url_for('home.index').rstrip('/'),
 48 |         'site_title': current_app.config.get('SITE_TITLE')
 49 |     }
 50 |     return render_template('layout.html', **data)
 51 | 
 52 | 
 53 | @blueprint.route('/set-locale', methods=['POST'])
 54 | def set_locale():
 55 |     locale = request.json.get('locale')
 56 | 
 57 |     if locale is not None:
 58 |         session['locale'] = locale
 59 |         session.modified = True
 60 |     return jsonify({'locale': locale})
 61 | 
 62 | 
 63 | @blueprint.route('/favicon.ico')
 64 | def favicon():
 65 |     return redirect('/static/img/favicon.ico', code=301)
 66 | 
 67 | 
 68 | @blueprint.route('/__ping__')
 69 | def ping():
 70 |     from spendb.tasks import ping
 71 |     ping.delay()
 72 |     return jsonify({
 73 |         'status': 'ok',
 74 |         'message': gettext("Sent ping!")
 75 |     })
 76 | 
 77 | 
 78 | @blueprint.route('/datasets.rss')
 79 | def feed_rss():
 80 |     q = db.session.query(Dataset)
 81 |     if not auth.account.is_admin():
 82 |         q = q.filter_by(private=False)
 83 |     feed_items = q.order_by(Dataset.created_at.desc()).limit(20)
 84 |     items = []
 85 |     for feed_item in feed_items:
 86 |         items.append({
 87 |             'title': feed_item.label,
 88 |             'pubdate': feed_item.updated_at,
 89 |             'link': '/datasets/%s' % feed_item.name,
 90 |             'description': feed_item.description,
 91 |             'author_name': ', '.join([person.fullname for person in
 92 |                                       feed_item.managers if
 93 |                                       person.fullname]),
 94 |         })
 95 |     desc = gettext('Recently created datasets on %(site_title)s',
 96 |                    site_title=current_app.config.get('SITE_TITLE'))
 97 |     feed = Rss201rev2Feed(gettext('Recently Created Datasets'),
 98 |                           url_for('home.index'), desc)
 99 |     for item in items:
100 |         feed.add_item(**item)
101 |     sio = StringIO()
102 |     feed.write(sio, 'utf-8')
103 |     return Response(sio.getvalue(), mimetype='application/xml')
104 | 


--------------------------------------------------------------------------------
/spendb/templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="{{ current_language }}" ng-app="spendb">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>{{ site_title }}</title>
 7 | 
 8 |     <meta name="description" content="{% block page_desc %}{% trans %}We're enabling anyone to explore, visualize and track government spending.{% endtrans %}{% endblock %}" />
 9 |     <link rel="shortcut icon" href="{{url_for('static', filename='img/favicon.ico')}}" type="image/x-icon" />
10 |     <link rel="alternate" type="application/rss+xml"
11 |       title="{% trans %}Latest Datasets on {{ site_title }}{% endtrans %}"
12 |       href="{{ url_for('home.feed_rss') }}" />
13 | 
14 |     <link href='//fonts.googleapis.com/css?family=Open+Sans:400italic,400,300,600' rel='stylesheet' type='text/css'>
15 |     <link href="//maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css" rel="stylesheet">
16 |     {% if debug %}
17 |       <link href="{{ asset_link('/style.css') }}" rel="stylesheet" />
18 |     {% else %}
19 |       <link href="{{ asset_link('/style.min.css') }}" rel="stylesheet" />
20 |     {% endif %}
21 |   </head>
22 |   <body ng-controller="AppCtrl">
23 |     <div id="spinner-overlay" ng-show="showSpinner">
24 |       <div class="spinner">
25 |         <i class="fa fa-spinner fa-pulse"></i>
26 |       </div>
27 |     </div>
28 |     <div id="page">
29 |       <div ng-view></div>
30 |     </div>
31 | 
32 |     <div id="catapult-cookie-bar" ng-show="showCookieWarning" ng-cloak>
33 |       {% trans %}This site uses cookies{% endtrans %}
34 |       <button id="catapultCookie" ng-click="hideCookieWarning()">
35 |         {% trans %}Okay, thanks{% endtrans %}
36 |       </button>
37 | 
38 |       <a href="/docs/tos.html">
39 |         {% trans %}Find out more{% endtrans %}
40 |       </a>
41 |     </div>
42 | 
43 |     <footer>
44 |       <div class="container-fluid">
45 |         <div class="row">
46 |           <div class="col-md-8">
47 |             <ul class="footer-links">
48 |               <li>
49 |                 <a href="/docs/about.html">About</a>
50 |               </li>
51 |               <li>
52 |                 <a href="https://github.com/spendb/spendb/wiki/Web-API">Developers</a>
53 |               </li>
54 |               <li>
55 |                 <a href="https://github.com/spendb/spendb">Source Code</a>
56 |               </li>
57 |               <li>
58 |                 <a href="/docs/contact.html">Contact</a>
59 |               </li>
60 |               <li>
61 |                 <a href="/docs/tos.html">Terms &amp; Privacy</a>
62 |               </li>
63 |             </ul>
64 |           </div>
65 |           <div class="col-md-4">
66 |             <p>
67 |               All <a href="http://www.gnu.org/licenses/agpl.html">code</a>,
68 |               <a href="http://creativecommons.org/licenses/by/3.0/">content</a>
69 |               and <a href="http://opendatacommons.org/licenses/odbl/">data</a>
70 |               is openly licensed.
71 |             </p>
72 |           </div>
73 |         </div>
74 |       </div>
75 |     </footer>
76 | 
77 |     <!-- script boot section -->
78 |     <script>
79 |       var SPENDB_CONFIG = SPENDB_CONFIG || {};
80 |       SPENDB_CONFIG.site_title = "{{ site_title }}";
81 |       SPENDB_CONFIG.site_url = "{{ site_url }}";
82 |       SPENDB_CONFIG.reserved_terms = {{ reserved_terms | tojson }};
83 |     </script>
84 | 
85 |     <script src="{{ asset_link('/vendor.js') }}"></script>
86 |     {% if debug %}
87 |       <script src="{{ asset_link('/templates.js') }}"></script>
88 |       <script src="{{ asset_link('/app.js') }}"></script>
89 |     {% else %}
90 |       <script src="{{ asset_link('/app.min.js') }}"></script>
91 |     {% endif %}
92 |     <!-- end script boot section -->
93 |   </body>
94 | </html>
95 | 


--------------------------------------------------------------------------------
/spendb/model/fact_table.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | from sqlalchemy import MetaData
  4 | from sqlalchemy.schema import Table, Column
  5 | from sqlalchemy.types import Unicode
  6 | 
  7 | from spendb.core import db
  8 | from spendb.model.common import json_default
  9 | from spendb.validation.model import TYPES
 10 | 
 11 | 
 12 | class FactTable(object):
 13 |     """ The ``FactTable`` serves as a controller object for
 14 |     a given ``Model``, handling the creation, filling and migration
 15 |     of the table schema associated with the dataset. """
 16 | 
 17 |     def __init__(self, dataset):
 18 |         self.dataset = dataset
 19 |         self.bind = db.engine
 20 |         self.table_name = '%s__facts' % dataset.name
 21 |         self.meta = MetaData()
 22 |         self.meta.bind = self.bind
 23 |         self._table = None
 24 | 
 25 |     @property
 26 |     def table(self):
 27 |         """ Generate an appropriate table representation to mirror the
 28 |         fields known for this table. """
 29 |         if self._table is None:
 30 |             self._table = Table(self.table_name, self.meta)
 31 |             id_col = Column('_id', Unicode(42), primary_key=True)
 32 |             self._table.append_column(id_col)
 33 |             json_col = Column('_json', Unicode())
 34 |             self._table.append_column(json_col)
 35 |             self._fields_columns(self._table)
 36 |         return self._table
 37 | 
 38 |     @property
 39 |     def alias(self):
 40 |         """ An alias used for queries. """
 41 |         if not hasattr(self, '_alias'):
 42 |             self._alias = self.table.alias('entry')
 43 |         return self._alias
 44 | 
 45 |     @property
 46 |     def mapping(self):
 47 |         if not hasattr(self, '_mapping'):
 48 |             self._mapping = {}
 49 |             for attribute in self.dataset.model.attributes:
 50 |                 if attribute.column in self.alias.columns:
 51 |                     col = self.alias.c[attribute.column]
 52 |                     self._mapping[attribute.path] = col
 53 |         return self._mapping
 54 | 
 55 |     @property
 56 |     def exists(self):
 57 |         return db.engine.has_table(self.table.name)
 58 | 
 59 |     def _fields_columns(self, table):
 60 |         """ Transform the (auto-detected) fields into a set of column
 61 |         specifications. """
 62 |         for field in self.dataset.fields:
 63 |             data_type = TYPES.get(field.get('type'), Unicode)
 64 |             col = Column(field.get('name'), data_type, nullable=True)
 65 |             table.append_column(col)
 66 | 
 67 |     def load_iter(self, iterable, chunk_size=1000):
 68 |         """ Bulk load all the data in an artifact to a matching database
 69 |         table. """
 70 |         chunk = []
 71 | 
 72 |         conn = self.bind.connect()
 73 |         tx = conn.begin()
 74 |         try:
 75 |             for i, record in enumerate(iterable):
 76 |                 record['_id'] = i
 77 |                 record['_json'] = json.dumps(record, default=json_default)
 78 |                 chunk.append(record)
 79 |                 if len(chunk) >= chunk_size:
 80 |                     stmt = self.table.insert()
 81 |                     conn.execute(stmt, chunk)
 82 |                     chunk = []
 83 | 
 84 |             if len(chunk):
 85 |                 stmt = self.table.insert()
 86 |                 conn.execute(stmt, chunk)
 87 |             tx.commit()
 88 |         except:
 89 |             tx.rollback()
 90 |             raise
 91 | 
 92 |     def create(self):
 93 |         """ Create the fact table if it does not exist. """
 94 |         if not self.exists:
 95 |             self.table.create(self.bind)
 96 | 
 97 |     def drop(self):
 98 |         """ Drop the fact table if it does exist. """
 99 |         if self.exists:
100 |             self.table.drop()
101 |         self._table = None
102 | 
103 |     def __repr__(self):
104 |         return "<FactTable(%r)>" % (self.dataset)
105 | 


--------------------------------------------------------------------------------
/spendb/tests/helpers.py:
--------------------------------------------------------------------------------
  1 | import urllib
  2 | import os
  3 | import uuid
  4 | import json
  5 | import urlparse
  6 | from StringIO import StringIO
  7 | from datetime import datetime
  8 | from werkzeug.security import generate_password_hash
  9 | 
 10 | from spendb.model.dataset import Dataset
 11 | from spendb.core import db
 12 | 
 13 | 
 14 | def fixture_file(name):
 15 |     """Return a file-like object pointing to a named fixture."""
 16 |     return open(fixture_path(name))
 17 | 
 18 | 
 19 | def meta_fixture(name):
 20 |     meta_fp = fixture_file('meta/' + name + '.json')
 21 |     meta = json.load(meta_fp)
 22 |     meta_fp.close()
 23 |     return meta
 24 | 
 25 | 
 26 | def validation_fixture(name):
 27 |     model_fp = fixture_file('validation/' + name + '.json')
 28 |     model = json.load(model_fp)
 29 |     model_fp.close()
 30 |     if 'fact_table' not in model['model']:
 31 |         model['model']['fact_table'] = 'table'
 32 |     return model
 33 | 
 34 | 
 35 | def data_fixture(name):
 36 |     return fixture_file('data/' + name + '.csv')
 37 | 
 38 | 
 39 | def fixture_path(name):
 40 |     """Return the full path to a named fixture.
 41 |     Use fixture_file rather than this method wherever possible.
 42 |     """
 43 |     # Get the directory of this file (helpers is placed in the test directory)
 44 |     test_directory = os.path.dirname(__file__)
 45 |     # Fixture is a directory in the test directory
 46 |     return os.path.join(test_directory, 'fixtures', name)
 47 | 
 48 | 
 49 | def csvimport_fixture_path(name, path):
 50 |     url = urllib.pathname2url(fixture_path('csv_import/%s/%s' % (name, path)))
 51 |     return urlparse.urljoin('file:', url)
 52 | 
 53 | 
 54 | def csvimport_fixture_file(name, path):
 55 |     try:
 56 |         fp = urllib.urlopen(csvimport_fixture_path(name, path))
 57 |     except IOError:
 58 |         if name == 'default':
 59 |             fp = None
 60 |         else:
 61 |             fp = csvimport_fixture_file('default', path)
 62 | 
 63 |     if fp:
 64 |         fp = StringIO(fp.read())
 65 |     return fp
 66 | 
 67 | 
 68 | def csvimport_table(name):
 69 |     from spendb.core import data_manager
 70 |     from spendb.etl.extract import validate_table, load_table
 71 | 
 72 |     package = data_manager.package(uuid.uuid4().hex)
 73 |     source = package.ingest(data_fixture(name))
 74 |     source = validate_table(source)
 75 |     rows = list(load_table(source))
 76 |     return source.meta.get('fields'), rows
 77 | 
 78 | 
 79 | def load_fixture(name, manager=None):
 80 |     """ Load fixture data into the database. """
 81 |     meta = meta_fixture(name)
 82 |     dataset = Dataset(meta)
 83 |     dataset.updated_at = datetime.utcnow()
 84 |     if manager is not None:
 85 |         dataset.managers.append(manager)
 86 |     fields, rows = csvimport_table(name)
 87 |     dataset.fields = fields
 88 |     db.session.add(dataset)
 89 |     db.session.commit()
 90 |     dataset.fact_table.create()
 91 |     dataset.fact_table.load_iter(rows)
 92 |     return dataset
 93 | 
 94 | 
 95 | def make_account(name='test', fullname='Test User',
 96 |                  email='test@example.com', twitter='testuser',
 97 |                  admin=False, password='password'):
 98 |     from spendb.model.account import Account
 99 | 
100 |     # First see if the account already exists and if so, return it
101 |     account = Account.by_name(name)
102 |     if account:
103 |         return account
104 | 
105 |     # Account didn't exist so we create it and return it
106 |     account = Account()
107 |     account.name = name
108 |     account.fullname = fullname
109 |     account.email = email
110 |     account.twitter_handle = twitter
111 |     account.admin = admin
112 |     account.password = generate_password_hash(password)
113 |     db.session.add(account)
114 |     db.session.commit()
115 |     return account
116 | 
117 | 
118 | def init_db(app):
119 |     db.create_all(app=app)
120 | 
121 | 
122 | def clean_db(app):
123 |     db.session.rollback()
124 |     db.drop_all(app=app)
125 | 


--------------------------------------------------------------------------------
/spendb/model/account.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import hmac
  3 | from hashlib import md5
  4 | 
  5 | from flask.ext.login import AnonymousUserMixin
  6 | 
  7 | from spendb.core import db, login_manager, url_for
  8 | 
  9 | GRAVATAR = 'https://secure.gravatar.com/avatar/%s'
 10 | 
 11 | 
 12 | def make_uuid():
 13 |     return unicode(uuid.uuid4())
 14 | 
 15 | 
 16 | account_dataset_table = db.Table(
 17 |     'account_dataset', db.metadata,
 18 |     db.Column('dataset_id', db.Integer, db.ForeignKey('dataset.id'),
 19 |               primary_key=True),
 20 |     db.Column('account_id', db.Integer, db.ForeignKey('account.id'),
 21 |               primary_key=True)
 22 | )
 23 | 
 24 | 
 25 | class AnonymousAccount(AnonymousUserMixin):
 26 |     admin = False
 27 | 
 28 |     def __repr__(self):
 29 |         return '<AnonymousAccount()>'
 30 | 
 31 | login_manager.anonymous_user = AnonymousAccount
 32 | 
 33 | 
 34 | @login_manager.user_loader
 35 | def load_account(account_id):
 36 |     return Account.by_id(account_id)
 37 | 
 38 | 
 39 | class Account(db.Model):
 40 |     __tablename__ = 'account'
 41 | 
 42 |     id = db.Column(db.Integer, primary_key=True)
 43 |     name = db.Column(db.Unicode(255), unique=True)
 44 |     fullname = db.Column(db.Unicode(2000))
 45 |     email = db.Column(db.Unicode(2000))
 46 |     twitter_handle = db.Column(db.Unicode(140))
 47 |     public_email = db.Column(db.Boolean, default=False)
 48 |     public_twitter = db.Column(db.Boolean, default=False)
 49 |     password = db.Column(db.Unicode(2000))
 50 |     api_key = db.Column(db.Unicode(2000), default=make_uuid)
 51 |     admin = db.Column(db.Boolean, default=False)
 52 | 
 53 |     datasets = db.relationship('Dataset',
 54 |                                secondary=account_dataset_table,
 55 |                                backref=db.backref('managers', lazy='dynamic'))
 56 | 
 57 |     def __init__(self):
 58 |         self.api_key = make_uuid()
 59 | 
 60 |     def is_authenticated(self):
 61 |         return True
 62 | 
 63 |     def is_anonymous(self):
 64 |         return False
 65 | 
 66 |     def is_active(self):
 67 |         return True
 68 | 
 69 |     def get_id(self):
 70 |         return self.id
 71 | 
 72 |     @property
 73 |     def display_name(self):
 74 |         return self.fullname or self.name
 75 | 
 76 |     @property
 77 |     def gravatar(self):
 78 |         sig = self.email or self.name
 79 |         sig = md5(sig.encode('utf-8')).hexdigest()
 80 |         return GRAVATAR % sig
 81 | 
 82 |     @property
 83 |     def token(self):
 84 |         h = hmac.new('')
 85 |         h.update(self.api_key)
 86 |         if self.password:
 87 |             h.update(self.password)
 88 |         return h.hexdigest()
 89 | 
 90 |     @classmethod
 91 |     def by_name(cls, name):
 92 |         return db.session.query(cls).filter_by(name=name).first()
 93 | 
 94 |     @classmethod
 95 |     def by_id(cls, id):
 96 |         return db.session.query(cls).filter_by(id=id).first()
 97 | 
 98 |     @classmethod
 99 |     def by_email(cls, email):
100 |         return db.session.query(cls).filter_by(email=email).first()
101 | 
102 |     @classmethod
103 |     def by_api_key(cls, api_key):
104 |         return db.session.query(cls).filter_by(api_key=api_key).first()
105 | 
106 |     def to_dict(self):
107 |         """ Return the dictionary representation of the account. """
108 |         account_dict = {
109 |             'name': self.name,
110 |             'fullname': self.fullname,
111 |             'display_name': self.display_name,
112 |             'email': self.email,
113 |             'admin': self.admin,
114 |             'gravatar': self.gravatar,
115 |             'twitter_handle': self.twitter_handle,
116 |             'api_url': url_for('account_api.view', account=self.name)
117 |         }
118 |         if not self.public_email:
119 |             account_dict.pop('email')
120 |         if not self.public_twitter:
121 |             account_dict.pop('twitter_handle')
122 |         return account_dict
123 | 
124 |     def __repr__(self):
125 |         return '<Account(%r,%r)>' % (self.id, self.name)
126 | 


--------------------------------------------------------------------------------
/spendb/tests/etl/test_load.py:
--------------------------------------------------------------------------------
 1 | from loadkit import logger
 2 | 
 3 | from spendb.core import db, data_manager
 4 | from spendb.model import Dataset, Run
 5 | from spendb.etl import tasks
 6 | 
 7 | from spendb.tests.helpers import meta_fixture
 8 | from spendb.tests.helpers import csvimport_fixture_path
 9 | from spendb.tests.helpers import csvimport_fixture_file
10 | from spendb.tests.base import DatabaseTestCase
11 | 
12 | 
13 | class TestLoad(DatabaseTestCase):
14 | 
15 |     def setUp(self):
16 |         super(TestLoad, self).setUp()
17 |         data_manager._index = None
18 |         model = meta_fixture('cra')
19 |         self.ds = Dataset(model)
20 |         db.session.add(self.ds)
21 |         db.session.commit()
22 |         self.cra_url = csvimport_fixture_path('../data', 'cra.csv')
23 | 
24 |     def tearDown(self):
25 |         super(TestLoad, self).tearDown()
26 | 
27 |     def test_extract_url(self):
28 |         source = tasks.extract_url(self.ds, self.cra_url)
29 |         assert 'cra.csv' == source.name, source.name
30 | 
31 |     def test_extract_missing_url(self):
32 |         url = csvimport_fixture_path('../data', 'xcra.csv')
33 |         source = tasks.extract_url(self.ds, url)
34 |         assert source is None, source
35 | 
36 |         run = db.session.query(Run).first()
37 |         package = data_manager.package(self.ds.name)
38 |         messages = list(logger.load(package, run.id))
39 |         assert len(messages) > 2, messages
40 | 
41 |     def test_extract_file(self):
42 |         fp = csvimport_fixture_file('../data', 'cra.csv')
43 |         source = tasks.extract_fileobj(self.ds, fp,
44 |                                        file_name='cra2.csv')
45 |         assert 'cra2.csv' == source.name, source.name
46 | 
47 |         fp = csvimport_fixture_file('../data', 'cra.csv')
48 |         source = tasks.extract_fileobj(self.ds, fp,
49 |                                        file_name='cra2 HUHU.csv')
50 |         assert 'cra2-huhu.csv' == source.name, source.name
51 | 
52 |     def test_duplicate_file(self):
53 |         fp = csvimport_fixture_file('../data', 'cra.csv')
54 |         source = tasks.extract_fileobj(self.ds, fp,
55 |                                        file_name='cra2.csv')
56 |         assert 'cra2.csv' == source.name, source.name
57 | 
58 |         fp = csvimport_fixture_file('../data', 'cra.csv')
59 |         source = tasks.extract_fileobj(self.ds, fp,
60 |                                        file_name='cra2.csv')
61 |         assert 'cra2-2.csv' == source.name, source.name
62 | 
63 |     def test_transform_source(self):
64 |         fp = csvimport_fixture_file('../data', 'cra.csv')
65 |         source = tasks.extract_fileobj(self.ds, fp,
66 |                                        file_name='cra2.csv')
67 |         src = tasks.transform_source(self.ds, source.name)
68 |         assert src.name == source.name, src.name
69 |         rows = list(tasks.load_table(src))
70 |         assert len(rows) == 36, rows
71 |         assert 'cofog1_label' in rows[1], rows[1]
72 |         assert 'cofog1.label' not in rows[1], rows[1]
73 | 
74 |     def test_field_detection(self):
75 |         fp = csvimport_fixture_file('../data', 'cra.csv')
76 |         source = tasks.extract_fileobj(self.ds, fp,
77 |                                        file_name='cra2.csv')
78 |         source = tasks.transform_source(self.ds, source.name)
79 |         fields = source.meta.get('fields')
80 |         assert len(fields) == 34, len(fields)
81 |         by_name = {f['name']: f for f in fields}
82 |         assert 'amount' in by_name, fields
83 |         amt = by_name.get('amount')
84 |         assert amt['type'] == 'integer', amt
85 | 
86 |     def test_load_data(self):
87 |         fp = csvimport_fixture_file('../data', 'cra.csv')
88 |         source = tasks.extract_fileobj(self.ds, fp,
89 |                                        file_name='cra2.csv')
90 |         tasks.transform_source(self.ds, source.name)
91 |         tasks.load(self.ds, source.name)
92 |         q = self.ds.fact_table.table.select()
93 |         resn = db.engine.execute(q).fetchall()
94 |         assert len(resn) == 36, resn
95 | 


--------------------------------------------------------------------------------
/spendb/migrate/versions/b5ed9296ff9_initial.py:
--------------------------------------------------------------------------------
 1 | """initial
 2 | 
 3 | Revision ID: b5ed9296ff9
 4 | Revises: None
 5 | Create Date: 2015-04-21 08:55:07.628161
 6 | 
 7 | """
 8 | 
 9 | # revision identifiers, used by Alembic.
10 | revision = 'b5ed9296ff9'
11 | down_revision = None
12 | 
13 | from alembic import op
14 | import sqlalchemy as sa
15 | 
16 | 
17 | def upgrade():
18 |     op.create_table('account',
19 |         sa.Column('id', sa.Integer(), nullable=False),
20 |         sa.Column('name', sa.Unicode(length=255), nullable=True),
21 |         sa.Column('fullname', sa.Unicode(length=2000), nullable=True),
22 |         sa.Column('email', sa.Unicode(length=2000), nullable=True),
23 |         sa.Column('public_email', sa.Boolean(), nullable=True),
24 |         sa.Column('twitter_handle', sa.Unicode(length=140), nullable=True),
25 |         sa.Column('public_twitter', sa.Boolean(), nullable=True),
26 |         sa.Column('password', sa.Unicode(length=2000), nullable=True),
27 |         sa.Column('api_key', sa.Unicode(length=2000), nullable=True),
28 |         sa.Column('admin', sa.Boolean(), nullable=True),
29 |         sa.Column('script_root', sa.Unicode(length=2000), nullable=True),
30 |         sa.Column('terms', sa.Boolean(), nullable=True),
31 |         sa.PrimaryKeyConstraint('id'),
32 |         sa.UniqueConstraint('name')
33 |     )
34 |     op.create_table('dataset',
35 |         sa.Column('id', sa.Integer(), nullable=False),
36 |         sa.Column('name', sa.Unicode(length=255), nullable=True),
37 |         sa.Column('label', sa.Unicode(length=2000), nullable=True),
38 |         sa.Column('description', sa.Unicode(), nullable=True),
39 |         sa.Column('currency', sa.Unicode(), nullable=True),
40 |         sa.Column('default_time', sa.Unicode(), nullable=True),
41 |         sa.Column('schema_version', sa.Unicode(), nullable=True),
42 |         sa.Column('category', sa.Unicode(), nullable=True),
43 |         sa.Column('private', sa.Boolean(), nullable=True),
44 |         sa.Column('created_at', sa.DateTime(), nullable=True),
45 |         sa.Column('updated_at', sa.DateTime(), nullable=True),
46 |         sa.Column('data', sa.Unicode(), nullable=True),
47 |         sa.PrimaryKeyConstraint('id'),
48 |         sa.UniqueConstraint('name')
49 |     )
50 |     op.create_table('dataset_territory',
51 |         sa.Column('id', sa.Integer(), nullable=False),
52 |         sa.Column('code', sa.Unicode(), nullable=True),
53 |         sa.Column('created_at', sa.DateTime(), nullable=True),
54 |         sa.Column('updated_at', sa.DateTime(), nullable=True),
55 |         sa.Column('dataset_id', sa.Integer(), nullable=True),
56 |         sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ),
57 |         sa.PrimaryKeyConstraint('id')
58 |     )
59 |     op.create_table('dataset_language',
60 |         sa.Column('id', sa.Integer(), nullable=False),
61 |         sa.Column('code', sa.Unicode(), nullable=True),
62 |         sa.Column('created_at', sa.DateTime(), nullable=True),
63 |         sa.Column('updated_at', sa.DateTime(), nullable=True),
64 |         sa.Column('dataset_id', sa.Integer(), nullable=True),
65 |         sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ),
66 |         sa.PrimaryKeyConstraint('id')
67 |     )
68 |     op.create_table('run',
69 |         sa.Column('id', sa.Integer(), nullable=False),
70 |         sa.Column('operation', sa.Unicode(), nullable=True),
71 |         sa.Column('status', sa.Unicode(), nullable=True),
72 |         sa.Column('source', sa.Unicode(), nullable=True),
73 |         sa.Column('time_start', sa.DateTime(), nullable=True),
74 |         sa.Column('time_end', sa.DateTime(), nullable=True),
75 |         sa.Column('dataset_id', sa.Integer(), nullable=True),
76 |         sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ),
77 |         sa.PrimaryKeyConstraint('id')
78 |     )
79 |     op.create_table('account_dataset',
80 |         sa.Column('dataset_id', sa.Integer(), nullable=False),
81 |         sa.Column('account_id', sa.Integer(), nullable=False),
82 |         sa.ForeignKeyConstraint(['account_id'], ['account.id'], ),
83 |         sa.ForeignKeyConstraint(['dataset_id'], ['dataset.id'], ),
84 |         sa.PrimaryKeyConstraint('dataset_id', 'account_id')
85 |     )
86 | 


--------------------------------------------------------------------------------
/spendb/tests/validation/test_model.py:
--------------------------------------------------------------------------------
  1 | from jsonschema import ValidationError
  2 | from nose.tools import raises
  3 | 
  4 | from spendb.validation.model import validate_model
  5 | 
  6 | from spendb.tests.base import TestCase
  7 | from spendb.tests.helpers import validation_fixture
  8 | 
  9 | 
 10 | class TestModel(TestCase):
 11 | 
 12 |     def setUp(self):
 13 |         super(TestModel, self).setUp()
 14 |         self.model = validation_fixture('default')
 15 | 
 16 |     def test_basic_validate(self):
 17 |         try:
 18 |             in_ = self.model['model']
 19 |             out = validate_model(in_)
 20 |             assert len(out) == len(in_), out
 21 |         except ValidationError, i:
 22 |             assert False, i
 23 | 
 24 |     def test_keep_extra_data(self):
 25 |         ms = self.model['model']
 26 |         ms['ignore_columns'] = ['huhu']
 27 |         os = validate_model(ms)
 28 |         assert 'ignore_columns' in os, os.keys()
 29 | 
 30 |     @raises(ValidationError)
 31 |     def test_from_is_compound(self):
 32 |         ms = self.model['model']
 33 |         ms['dimensions']['from'] = ms['measures']['cofinance']
 34 |         validate_model(ms)
 35 | 
 36 |     @raises(ValidationError)
 37 |     def test_invalid_name(self):
 38 |         ms = self.model['model']
 39 |         ms['dimensions']['ba nana'] = ms['dimensions']['function']
 40 |         validate_model(ms)
 41 | 
 42 |     @raises(ValidationError)
 43 |     def test_no_measures(self):
 44 |         ms = self.model['model']
 45 |         ms['measures'] = {}
 46 |         validate_model(ms)
 47 | 
 48 |     @raises(ValidationError)
 49 |     def test_measure_has_column(self):
 50 |         ms = self.model['model'].copy()
 51 |         del ms['measures']['cofinance']['column']
 52 |         validate_model(ms)
 53 | 
 54 |     @raises(ValidationError)
 55 |     def test_date_has_column(self):
 56 |         ms = self.model['model'].copy()
 57 |         del ms['dimensions']['time']['attributes']['year']['column']
 58 |         validate_model(ms)
 59 | 
 60 |     @raises(ValidationError)
 61 |     def test_compound_has_fields(self):
 62 |         ms = self.model['model'].copy()
 63 |         del ms['dimensions']['function']['attributes']
 64 |         validate_model(ms)
 65 | 
 66 |     @raises(ValidationError)
 67 |     def test_compound_field_with_dash(self):
 68 |         ms = self.model['model'].copy()
 69 |         ms['dimensions']['function']['attributes']['id-col'] = \
 70 |             ms['dimensions']['function']['attributes']['description']
 71 |         del ms['dimensions']['function']['attributes']['description']
 72 |         validate_model(ms)
 73 | 
 74 |     @raises(ValidationError)
 75 |     def test_compound_field_short(self):
 76 |         ms = self.model['model'].copy()
 77 |         ms['dimensions']['function']['attributes']['i'] = \
 78 |             ms['dimensions']['function']['attributes']['description']
 79 |         del ms['dimensions']['function']['attributes']['description']
 80 |         validate_model(ms)
 81 | 
 82 |     @raises(ValidationError)
 83 |     def test_compound_field_invalid_name(self):
 84 |         ms = self.model['model'].copy()
 85 |         ms['dimensions']['function']['attributes']['ba nanana'] = \
 86 |             ms['dimensions']['function']['attributes']['description']
 87 |         del ms['dimensions']['function']['attributes']['description']
 88 |         validate_model(ms)
 89 | 
 90 |     @raises(ValidationError)
 91 |     def test_compound_field_has_column(self):
 92 |         ms = self.model['model'].copy()
 93 |         del ms['dimensions']['function']['attributes']['description']['column']
 94 |         validate_model(ms)
 95 | 
 96 |     def test_set_label_attribute(self):
 97 |         ms = self.model['model'].copy()
 98 |         ms['dimensions']['function']['label_attribute'] = 'label'
 99 |         ms['dimensions']['function']['key_attribute'] = 'name'
100 |         ms = validate_model(ms)
101 |         assert ms['dimensions']['function']['label_attribute'] == 'label'
102 |         assert ms['dimensions']['function']['key_attribute'] == 'name'
103 | 
104 |     @raises(ValidationError)
105 |     def test_set_invalid_label_attribute(self):
106 |         ms = self.model['model'].copy()
107 |         ms['dimensions']['function']['label_attribute'] = 'foo'
108 |         os = validate_model(ms)
109 |         assert False, os['dimensions']
110 | 


--------------------------------------------------------------------------------
/contrib/spendb_importer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import requests
  3 | import json
  4 | import time
  5 | 
  6 | DIR = 'os_export/exports'
  7 | SPENDB_HOST = os.environ.get('SPENDB_HOST', 'http://spendb.pudo.org')
  8 | SPENDB_API_KEY = os.environ.get('SPENDB_API_KEY')
  9 | 
 10 | assert SPENDB_API_KEY, 'Please set the SPENDB_API_KEY environment variable'
 11 | 
 12 | 
 13 | def make_api_path(*a):
 14 |     parts = [SPENDB_HOST, '/api/3/datasets'] + list(a)
 15 |     return '/'.join([p.strip('/') for p in parts])
 16 | 
 17 | session = requests.Session()
 18 | session.headers.update({
 19 |     'Authorization': 'apikey %s' % SPENDB_API_KEY
 20 | })
 21 | json_headers = {
 22 |     'Content-type': 'application/json',
 23 |     'Accept': 'text/plain'
 24 | }
 25 | 
 26 | 
 27 | def list_datasets():
 28 |     for name in os.listdir(DIR):
 29 |         ds_dir = os.path.join(DIR, name)
 30 |         with open(os.path.join(ds_dir, 'dataset.json'), 'rb') as fh:
 31 |             meta = json.load(fh)
 32 |         with open(os.path.join(ds_dir, 'model.json'), 'rb') as fh:
 33 |             model = json.load(fh)
 34 |         src_file = os.path.join(ds_dir, 'facts.csv')
 35 |         yield meta, src_file, model
 36 | 
 37 | 
 38 | def load_dataset(metadata, source_file, model):
 39 |     name = metadata.get('name')
 40 | 
 41 |     # Available metadata fields (name and label are required):
 42 |     config = {
 43 |         'name': name,
 44 |         'label': metadata['label'],
 45 |         'private': False,
 46 |         'category': metadata['category'],  # 'budget' or 'spending'
 47 |         'currency': metadata['currency'],  # e.g. 'USD', 'EUR'
 48 |         'languages': metadata['languages'],  # e.g. ['en', 'de']
 49 |         'territories': metadata['territories'],  # e.g. ['DE', 'FR']
 50 |     }
 51 | 
 52 |     # Step 1: Create (or update) the dataset.
 53 |     print '[spendb-import] Creating/updating %r' % name
 54 |     res = session.get(make_api_path(name))
 55 |     if res.status_code == 404:
 56 |         # dataset does not exist yet
 57 |         res = session.post(make_api_path(), data=json.dumps(config),
 58 |                            headers=json_headers)
 59 |         assert res.status_code == 200, res.content
 60 |     elif res.status_code == 200:
 61 |         # update the existing dataset's metadata
 62 |         res = session.post(make_api_path(name),
 63 |                            data=json.dumps(config),
 64 |                            headers=json_headers)
 65 |         assert res.status_code == 200, res.content
 66 |     else:
 67 |         print 'Error accessing dataset: %r' % res.content
 68 | 
 69 |     # Step 2: Upload a source data file.
 70 |     print '[spendb-import] Uploading %r' % source_file
 71 |     upload_url = make_api_path(name, 'sources/upload')
 72 |     # Note: there is also /api/3/datasets/<foo>/sources/submit which
 73 |     # will accept a simple URL, then attempt to fetch and load that
 74 |     # data file. That API call (unlike this one) does not return a
 75 |     # source object.
 76 | 
 77 |     files = {'file': open(source_file, 'rb')}
 78 |     res = session.post(upload_url, files=files)
 79 |     assert res.status_code == 200, res.content
 80 | 
 81 |     # This is a bit ugly: we need to wait for the source data
 82 |     # to be parsed before a data model can be applied.
 83 |     runs_url = res.json().get('runs_url')
 84 |     while True:
 85 |         res = session.get(runs_url)
 86 |         runs = res.json().get('results')
 87 |         runs = sorted(runs, key=lambda r: r.get('time_start'))
 88 |         current_run = runs[-1]
 89 |         assert current_run['status'] != 'failed'
 90 | 
 91 |         # There are multiple operations, we want to wait for
 92 |         # the one related to database loading to complete.
 93 |         if current_run['status'] == 'complete' and \
 94 |                 'database' in current_run['operation']:
 95 |             break
 96 |         print '[spendb-import] Waiting for data to be loaded...'
 97 |         time.sleep(5)
 98 | 
 99 |     # Step 3: Map source data columns to OLAP measures and dimensions
100 |     print '[spendb-import] Applying model to dataset %r' % name
101 |     res = session.post(make_api_path(name, 'model'),
102 |                        data=json.dumps(model),
103 |                        headers=json_headers)
104 |     assert res.status_code == 200, res.content
105 | 
106 |     print '[spendb-import] Done.'
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     for meta, src_file, model in list_datasets():
111 |         load_dataset(meta, src_file, model)
112 | 


--------------------------------------------------------------------------------
/spendb/views/api/source.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from StringIO import StringIO
  3 | 
  4 | from flask import Blueprint, request, redirect, send_file
  5 | from archivekit import Source
  6 | from werkzeug.exceptions import BadRequest
  7 | from apikit import jsonify, Pager, request_data
  8 | 
  9 | from spendb.core import data_manager, url_for
 10 | from spendb.auth import require
 11 | from spendb.lib.helpers import get_dataset
 12 | from spendb.tasks import load_from_url, load_from_source
 13 | from spendb.etl.tasks import extract_fileobj
 14 | from spendb.etl.upload import generate_s3_upload_policy
 15 | 
 16 | 
 17 | log = logging.getLogger(__name__)
 18 | blueprint = Blueprint('sources_api', __name__)
 19 | 
 20 | 
 21 | def source_to_dict(dataset, source):
 22 |     data = dict(source.meta.items())
 23 |     data.pop('http_headers', None)
 24 |     data['data_url'] = url_for('sources_api.serve', dataset=dataset.name,
 25 |                                name=source.name)
 26 |     data['runs_url'] = url_for('runs_api.index', dataset=dataset.name,
 27 |                                source=source.name)
 28 |     data['api_url'] = url_for('sources_api.view', dataset=dataset.name,
 29 |                               name=source.name)
 30 |     return data
 31 | 
 32 | 
 33 | @blueprint.route('/datasets/<dataset>/sources')
 34 | def index(dataset):
 35 |     dataset = get_dataset(dataset)
 36 |     package = data_manager.package(dataset.name)
 37 |     sources = list(package.all(Source))
 38 |     sources = sorted(sources, key=lambda s: s.meta.get('updated_at'),
 39 |                      reverse=True)
 40 |     rc = lambda ss: [source_to_dict(dataset, s) for s in ss]
 41 |     return jsonify(Pager(sources, dataset=dataset.name, limit=5,
 42 |                    results_converter=rc))
 43 | 
 44 | 
 45 | @blueprint.route('/datasets/<dataset>/sources/upload', methods=['POST', 'PUT'])
 46 | def upload(dataset):
 47 |     dataset = get_dataset(dataset)
 48 |     require.dataset.update(dataset)
 49 |     file_ = request.files.get('file')
 50 |     if not file_ or not file_.filename:
 51 |         raise BadRequest("You need to upload a file")
 52 |     # TODO: consider copying this into a tempfile before upload to make
 53 |     # boto happy (it appears to be whacky in it's handling of flask uploads)
 54 |     source = extract_fileobj(dataset, fh=file_, file_name=file_.filename)
 55 |     load_from_source.delay(dataset.name, source.name)
 56 |     return jsonify(source_to_dict(dataset, source))
 57 | 
 58 | 
 59 | @blueprint.route('/datasets/<dataset>/sources/sign', methods=['POST', 'PUT'])
 60 | def sign(dataset):
 61 |     dataset = get_dataset(dataset)
 62 |     require.dataset.update(dataset)
 63 |     data = request_data()
 64 |     if not data.get('file_name'):
 65 |         raise BadRequest("You need to give a file name")
 66 |     data['mime_type'] = data.get('mime_type') or 'application/octet-stream'
 67 |     # create a stub:
 68 |     source = extract_fileobj(dataset, fh=StringIO(),
 69 |                              file_name=data['file_name'],
 70 |                              mime_type=data['mime_type'])
 71 | 
 72 |     # generate a policy document to replace with actual content:
 73 |     res = generate_s3_upload_policy(source, data['file_name'],
 74 |                                     data['mime_type'])
 75 |     return jsonify(res)
 76 | 
 77 | 
 78 | @blueprint.route('/datasets/<dataset>/sources/submit', methods=['POST', 'PUT'])
 79 | def submit(dataset):
 80 |     dataset = get_dataset(dataset)
 81 |     require.dataset.update(dataset)
 82 |     data = request_data()
 83 |     if not data.get('url'):
 84 |         raise BadRequest("You need to submit a URL")
 85 |     load_from_url.delay(dataset.name, data.get('url'))
 86 |     return jsonify({'status': 'ok'})
 87 | 
 88 | 
 89 | @blueprint.route('/datasets/<dataset>/sources/<name>')
 90 | def view(dataset, name):
 91 |     dataset = get_dataset(dataset)
 92 |     package = data_manager.package(dataset.name)
 93 |     source = Source(package, name)
 94 |     return jsonify(source_to_dict(dataset, source))
 95 | 
 96 | 
 97 | @blueprint.route('/datasets/<dataset>/serve/<name>')
 98 | def serve(dataset, name):
 99 |     dataset = get_dataset(dataset)
100 |     package = data_manager.package(dataset.name)
101 |     source = Source(package, name)
102 |     if source.url is not None:
103 |         return redirect(source.url)
104 |     return send_file(source.fh(),
105 |                      mimetype=source.meta.get('mime_type'))
106 | 
107 | 
108 | @blueprint.route('/datasets/<dataset>/sources/load/<name>',
109 |                  methods=['POST', 'PUT'])
110 | def load(dataset, name):
111 |     dataset = get_dataset(dataset)
112 |     require.dataset.update(dataset)
113 |     package = data_manager.package(dataset.name)
114 |     source = Source(package, name)
115 |     if not source.exists():
116 |         raise BadRequest('Source does not exist.')
117 |     load_from_source.delay(dataset.name, source.name)
118 |     return jsonify({'status': 'ok'})
119 | 


--------------------------------------------------------------------------------
/contrib/os_export/export.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import dataset
  4 | from datetime import datetime
  5 | from normality import slugify
  6 | 
  7 | DB_URI = 'postgresql://localhost/openspending'
  8 | OFFENERHAUSHALT_SETS = ['de-bw', 'open_by', 'berlin_de',
  9 |                         'de-bonn-planung', 'de-bonn', 'de-brandenburg-land',
 10 |                         'de-bremen', 'open_gauting', 'de-nrw-cologne',
 11 |                         'laatzen2014', 'stadt_lueneburg', 'open_bzmfr',
 12 |                         'lhm_20150415', 'de-muenster-gesamt', 'open_bzobb',
 13 |                         'hhos', 'open_rgb', 'open_bzswa', 'uelzen',
 14 |                         'haushalt_ulm', 'haushaltwitten']
 15 | 
 16 | engine = dataset.connect(os.environ.get('DATABASE_URI', DB_URI),
 17 |                          reflect_metadata=False)
 18 | 
 19 | 
 20 | def json_default(obj):
 21 |     if isinstance(obj, datetime):
 22 |         return obj.isoformat()
 23 | 
 24 | 
 25 | def get_mappings():
 26 |     for ds in list(engine['dataset']):
 27 |         if ds['name'] not in OFFENERHAUSHALT_SETS:
 28 |             continue
 29 |         ds['data'] = json.loads(ds['data'])
 30 | 
 31 |         ds['languages'] = []
 32 |         for lang in engine['dataset_language'].find(dataset_id=ds['id']):
 33 |             ds['languages'].append(lang['code'])
 34 | 
 35 |         ds['territories'] = []
 36 |         for terr in engine['dataset_territory'].find(dataset_id=ds['id']):
 37 |             ds['territories'].append(terr['code'])
 38 | 
 39 |         ds['sources'] = []
 40 |         for src in engine['source'].find(dataset_id=ds['id']):
 41 |             ds['sources'].append(src)
 42 | 
 43 |         # Add team members for the dataset
 44 |         query_stmt = ('SELECT account.name as username FROM account '
 45 |                       'INNER JOIN account_dataset '
 46 |                       'ON account.id = account_dataset.account_id '
 47 |                       'WHERE account_dataset.dataset_id = {dataset_id}')
 48 |         query = engine.query(query_stmt.format(dataset_id = ds['id']))
 49 |         ds['team'] = [member['username'] for member in query]
 50 | 
 51 |         mapping = ds['data'].get('mapping')
 52 |         if mapping is None or not len(mapping):
 53 |             continue
 54 | 
 55 |         yield ds, mapping
 56 | 
 57 | 
 58 | def get_queries():
 59 |     for ds, map in get_mappings():
 60 |         ds_name = ds['name']
 61 |         table_pattern = ds_name + '__'
 62 |         entry_table = '"' + table_pattern + 'entry"'
 63 |         fields = [(entry_table + '.id', '_openspending_id')]
 64 |         joins = []
 65 |         for dim, desc in map.items():
 66 |             if desc.get('type') == 'compound':
 67 |                 dim_table = '"%s%s"' % (table_pattern, dim)
 68 |                 joins.append((dim_table, dim))
 69 |                 for attr, attr_desc in desc.get('attributes').items():
 70 |                     alias = '%s_%s' % (dim, attr)
 71 |                     fields.append(('%s."%s"' % (dim_table, attr), alias))
 72 |             elif desc.get('type') == 'date':
 73 |                 dim_table = '"%s%s"' % (table_pattern, dim)
 74 |                 joins.append((dim_table, dim))
 75 |                 for attr in ['name', 'year', 'month', 'day', 'week',
 76 |                              'yearmonth', 'quarter']:
 77 |                     alias = '%s_%s' % (dim, attr)
 78 |                     fields.append(('%s."%s"' % (dim_table, attr), alias))
 79 |                 fields.append(('%s.name' % dim_table, dim))
 80 |             else:
 81 |                 fields.append(('%s."%s"' % (entry_table, dim), dim))
 82 | 
 83 |         select_clause = []
 84 |         for src, alias in fields:
 85 |             select_clause.append('%s AS "%s"' % (src, slugify(alias, sep='_')))
 86 |         select_clause = ', '.join(select_clause)
 87 | 
 88 |         join_clause = []
 89 |         for table, dim in joins:
 90 |             qb = 'LEFT JOIN %s ON %s."%s_id" = %s.id'
 91 |             qb = qb % (table, entry_table, dim, table)
 92 |             join_clause.append(qb)
 93 |         join_clause = ' '.join(join_clause)
 94 | 
 95 |         yield ds, 'SELECT %s FROM %s %s' % (select_clause, entry_table,
 96 |                                             join_clause)
 97 | 
 98 | 
 99 | def freeze_all():
100 |     out_base = 'exports'
101 |     for ds, query in get_queries():
102 |         try:
103 |             ds['export_query'] = query
104 |             path = os.path.join(out_base, ds['name'])
105 |             if not os.path.isdir(path):
106 |                 os.makedirs(path)
107 | 
108 |             ds_path = os.path.join(path, 'dataset.json')
109 |             with open(ds_path, 'wb') as fh:
110 |                 json.dump(ds, fh, default=json_default, indent=2)
111 | 
112 |             res = engine.query(query)
113 |             dataset.freeze(res, filename='facts.csv', prefix=path,
114 |                            format='csv')
115 |         except Exception, e:
116 |             print e
117 | 
118 | 
119 | freeze_all()
120 | 


--------------------------------------------------------------------------------
/spendb/etl/extract.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import random
  3 | from decimal import Decimal
  4 | from datetime import datetime
  5 | 
  6 | from normality import slugify
  7 | import messytables as mt
  8 | from messytables.jts import celltype_as_string
  9 | 
 10 | log = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def column_alias(cell, names):
 14 |     """ Generate a normalized version of the column name. """
 15 |     column = slugify(cell.column or '', sep='_')
 16 |     column = column.strip('_')
 17 |     column = 'column' if not len(column) else column
 18 |     name, i = column, 2
 19 |     # de-dupe: column, column_2, column_3, ...
 20 |     while name in names:
 21 |         name = '%s_%s' % (name, i)
 22 |         i += 1
 23 |     return name
 24 | 
 25 | 
 26 | def generate_field_spec(row):
 27 |     """ Generate a set of metadata for each field/column in
 28 |     the data. This is conformant to jsontableschema. """
 29 |     names = set()
 30 |     fields = []
 31 |     for cell in row:
 32 |         name = column_alias(cell, names)
 33 |         field = {
 34 |             'name': name,
 35 |             'title': cell.column,
 36 |             'type': celltype_as_string(cell.type),
 37 |             'has_empty': False,
 38 |             'samples': []
 39 |         }
 40 |         if hasattr(cell.type, 'format'):
 41 |             field['format'] = cell.type.format
 42 |         fields.append(field)
 43 |     return fields
 44 | 
 45 | 
 46 | def random_sample(value, field, row, num=10):
 47 |     """ Collect a random sample of the values in a particular
 48 |     field based on the reservoir sampling technique. """
 49 |     # TODO: Could become a more general DQ piece.
 50 |     if value in field['samples']:
 51 |         return
 52 |     if value is None:
 53 |         field['has_empty'] = True
 54 |         return
 55 |     if len(field['samples']) < num:
 56 |         field['samples'].append(value)
 57 |         return
 58 |     j = random.randint(0, row)
 59 |     if j < (num - 1):
 60 |         field['samples'][j] = value
 61 | 
 62 | 
 63 | def convert_row(row, fields, i):
 64 |     data = {}
 65 |     for cell, field in zip(row, fields):
 66 |         value = cell.value
 67 |         if isinstance(value, datetime):
 68 |             value = value.date()
 69 |         if isinstance(value, Decimal):
 70 |             # Baby jesus forgive me.
 71 |             value = float(value)
 72 |         if isinstance(value, basestring) and not len(value.strip()):
 73 |             value = None
 74 |         random_sample(value, field, i)
 75 |         data[field['name']] = value
 76 |     return data
 77 | 
 78 | 
 79 | def parse_table(source):
 80 |     # This is a work-around because messytables hangs on boto file
 81 |     # handles, so we're doing it via plain old HTTP.
 82 |     # We're also passing in an extended window size to give more
 83 |     # reliable type detection.
 84 |     # Because Python's CSV dialect sniffer isn't the best, this also
 85 |     # constrains the field quoting character to a double quote.
 86 |     table_set = mt.any_tableset(source.fh(),
 87 |                                 extension=source.meta.get('extension'),
 88 |                                 mimetype=source.meta.get('mime_type'),
 89 |                                 quotechar='"', window=20000)
 90 |     tables = list(table_set.tables)
 91 |     if not len(tables):
 92 |         log.error("No tables were found in the source file.")
 93 |         return
 94 |     row_set = tables[0]
 95 |     headers = [c.value for c in next(row_set.sample)]
 96 |     row_set.register_processor(mt.headers_processor(headers))
 97 |     row_set.register_processor(mt.offset_processor(1))
 98 |     types = mt.type_guess(row_set.sample, strict=True)
 99 |     row_set.register_processor(mt.types_processor(types, strict=True))
100 | 
101 |     fields, i = {}, 0
102 |     row_iter = iter(row_set)
103 | 
104 |     while True:
105 |         i += 1
106 |         try:
107 |             row = row_iter.next()
108 |             if not len(fields):
109 |                 fields = generate_field_spec(row)
110 | 
111 |             data = convert_row(row, fields, i)
112 |             check_empty = set(data.values())
113 |             if None in check_empty and len(check_empty) == 1:
114 |                 continue
115 | 
116 |             yield None, fields, data
117 |         except StopIteration:
118 |             return
119 |         except Exception, e:
120 |             # log.exception(e)
121 |             yield e, fields, None
122 | 
123 | 
124 | def validate_table(source):
125 |     failed = 0
126 |     for i, (exc, fields, row) in enumerate(parse_table(source)):
127 |         if exc is not None:
128 |             log.warning('Error at row %s: %s', i, unicode(exc))
129 |             failed += 1
130 | 
131 |     log.info("Converted %s rows with %s columns.", i + 1, len(fields))
132 |     source.meta['fields'] = fields
133 |     source.meta['num_records'] = i + 1
134 |     source.meta['num_failed'] = failed
135 |     source.meta.save()
136 |     return source
137 | 
138 | 
139 | def load_table(source):
140 |     for exc, fields, row in parse_table(source):
141 |         if exc is None:
142 |             yield row
143 | 


--------------------------------------------------------------------------------
/spendb/tests/views/api/test_source.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from flask import url_for
  3 | 
  4 | from spendb.core import db
  5 | from spendb.tests.helpers import csvimport_fixture_path
  6 | from spendb.tests.base import ControllerTestCase
  7 | from spendb.tests.helpers import load_fixture, make_account
  8 | from spendb.tests.helpers import data_fixture
  9 | 
 10 | 
 11 | class TestSourceApiController(ControllerTestCase):
 12 | 
 13 |     def setUp(self):
 14 |         super(TestSourceApiController, self).setUp()
 15 |         self.cra = load_fixture('cra')
 16 |         self.user = make_account('test')
 17 |         self.auth_qs = {'api_key': self.user.api_key}
 18 |         self.cra.managers.append(self.user)
 19 |         self.cra_url = csvimport_fixture_path('../data', 'cra.csv')
 20 |         db.session.commit()
 21 | 
 22 |     def test_source_index(self):
 23 |         url = url_for('sources_api.index', dataset=self.cra.name)
 24 |         res = self.client.get(url)
 25 |         assert res.json['total'] == 0, res.json
 26 | 
 27 |     def test_source_upload_anon(self):
 28 |         url = url_for('sources_api.upload', dataset=self.cra.name)
 29 |         fh = data_fixture('cra')
 30 |         res = self.client.post(url, data={
 31 |             'file': (fh, 'cra.csv')
 32 |         })
 33 |         assert '403' in res.status, res.status
 34 | 
 35 |     def test_source_upload_no_file(self):
 36 |         url = url_for('sources_api.upload', dataset=self.cra.name)
 37 |         res = self.client.post(url, data={}, query_string=self.auth_qs)
 38 |         assert '400' in res.status, res.status
 39 | 
 40 |     def test_source_upload(self):
 41 |         url = url_for('sources_api.upload', dataset=self.cra.name)
 42 |         fh = data_fixture('cra')
 43 |         res = self.client.post(url, data={
 44 |             'file': (fh, 'cra.csv')
 45 |         }, query_string=self.auth_qs)
 46 |         assert '403' not in res.status, res.status
 47 | 
 48 |     def test_source_sign(self):
 49 |         # TODO: how to properly test this?
 50 |         url = url_for('sources_api.sign', dataset=self.cra.name)
 51 |         req = {'file_name': 'cra.csv'}
 52 |         res = self.client.post(url, data=req,
 53 |                                query_string=self.auth_qs)
 54 |         assert '200' in res.status, res.status
 55 |         assert 'status' in res.json, res.json
 56 |         assert res.json['status'] == 'error', res.json
 57 | 
 58 |     def test_source_submit_anon(self):
 59 |         url = url_for('sources_api.submit', dataset=self.cra.name)
 60 |         res = self.client.post(url, data={
 61 |             'url': self.cra_url
 62 |         })
 63 |         assert '403' in res.status, res.status
 64 | 
 65 |     def test_source_submit(self):
 66 |         url = url_for('sources_api.submit', dataset=self.cra.name)
 67 |         res = self.client.post(url, data={
 68 |             'url': self.cra_url
 69 |         }, query_string=self.auth_qs)
 70 |         assert '200' in res.status, res.status
 71 | 
 72 |     def test_source_load(self):
 73 |         url = url_for('sources_api.upload', dataset=self.cra.name)
 74 |         fh = data_fixture('cra')
 75 |         res = self.client.post(url, data={
 76 |             'file': (fh, 'cra.csv')
 77 |         }, query_string=self.auth_qs)
 78 | 
 79 |         self.client.post(url_for('sessions_api.logout'))
 80 | 
 81 |         url = url_for('sources_api.load', dataset=self.cra.name,
 82 |                       name='cra.csv')
 83 |         res = self.client.post(url)
 84 |         assert '403' in res.status, res.status
 85 |         res = self.client.post(url, query_string=self.auth_qs)
 86 |         assert '200' in res.status, res.status
 87 | 
 88 |     def test_source_load_non_existing(self):
 89 |         url = url_for('sources_api.load', dataset=self.cra.name,
 90 |                       name='foo.csv')
 91 |         res = self.client.post(url, query_string=self.auth_qs)
 92 |         assert '400' in res.status, res.json
 93 | 
 94 |     def test_source_view(self):
 95 |         url = url_for('sources_api.upload', dataset=self.cra.name)
 96 |         fh = data_fixture('cra')
 97 |         res = self.client.post(url, data={
 98 |             'file': (fh, 'cra.csv')
 99 |         }, query_string=self.auth_qs)
100 |         assert res.json['extension'] == 'csv', res.json
101 |         assert res.json['mime_type'] == 'text/csv', res.json
102 |         url = url_for('sources_api.index', dataset=self.cra.name)
103 |         res = self.client.get(url)
104 |         assert res.json['total'] == 1, res.json
105 |         frst = res.json['results'][0]
106 |         assert frst['extension'] == 'csv', res.json
107 |         assert frst['mime_type'] == 'text/csv', res.json
108 |         assert frst['api_url'], res.json
109 | 
110 |     def test_source_serve(self):
111 |         url = url_for('sources_api.upload', dataset=self.cra.name)
112 |         fh = data_fixture('cra')
113 |         res = self.client.post(url, data={
114 |             'file': (fh, 'cra.csv')
115 |         }, query_string=self.auth_qs)
116 |         url = url_for('sources_api.serve', dataset=self.cra.name,
117 |                       name=res.json['name'])
118 |         res = self.client.get(url, query_string=self.auth_qs)
119 |         assert 'text/csv' in res.headers['Content-Type'], res.json
120 | 


--------------------------------------------------------------------------------
/spendb/model/provider.py:
--------------------------------------------------------------------------------
  1 | from cubes.providers import ModelProvider, link_cube
  2 | from cubes.model import Cube, Measure, MeasureAggregate, Dimension
  3 | from cubes.sql.store import SQLStore, OPTION_TYPES
  4 | from cubes.errors import NoSuchCubeError, NoSuchDimensionError
  5 | from cubes.common import coalesce_options
  6 | from cubes.logging import get_logger
  7 | 
  8 | from spendb.core import db
  9 | from spendb.model import Dataset
 10 | 
 11 | 
 12 | class SpendingModelProvider(ModelProvider):
 13 | 
 14 |     def __init__(self, *args, **kwargs):
 15 |         super(SpendingModelProvider, self).__init__(*args, **kwargs)
 16 | 
 17 |     def requires_store(self):
 18 |         return True
 19 | 
 20 |     def has_cube(self, name):
 21 |         dataset = Dataset.by_name(name)
 22 |         if dataset is None:
 23 |             return False
 24 |         return dataset.model is not None
 25 | 
 26 |     def cube(self, name, locale=None, namespace=None):
 27 |         dataset = Dataset.by_name(name)
 28 |         if name is None:
 29 |             raise NoSuchCubeError("Unknown dataset %s" % name, name)
 30 | 
 31 |         measures, dimensions, mappings = [], [], {}
 32 |         aggregates = [MeasureAggregate('fact_count',
 33 |                                        label='Number of entries',
 34 |                                        function='count')]
 35 | 
 36 |         for measure in dataset.model.measures:
 37 |             cubes_measure = Measure(measure.name, label=measure.label)
 38 |             measures.append(cubes_measure)
 39 |             aggregate = MeasureAggregate(measure.name + '_sum',
 40 |                                          label=measure.label,
 41 |                                          measure=measure.name,
 42 |                                          function='sum')
 43 |             aggregates.append(aggregate)
 44 |             mappings[measure.name] = measure.column_name
 45 | 
 46 |         for dimension in dataset.model.dimensions:
 47 |             attributes, last_col = [], None
 48 |             for attr in dimension.attributes:
 49 |                 attributes.append({
 50 |                     'name': attr.name,
 51 |                     'label': attr.label
 52 |                 })
 53 |                 mappings[attr.ref] = last_col = attr.column_name
 54 | 
 55 |             # Workaround because the cubes mapper shortens references
 56 |             # for single-attribute dimensions to just the dimension name.
 57 |             if len(attributes) == 1:
 58 |                 mappings[dimension.name] = last_col
 59 | 
 60 |             meta = {
 61 |                 'label': dimension.label,
 62 |                 'name': dimension.name,
 63 |                 'cardinality': dimension.cardinality_class,
 64 |                 'levels': [{
 65 |                     'name': dimension.name,
 66 |                     'label': dimension.label,
 67 |                     'cardinality': dimension.cardinality_class,
 68 |                     'attributes': attributes
 69 |                 }]
 70 |             }
 71 |             if dimension.key_attribute:
 72 |                 meta['levels'][0]['key'] = dimension.key_attribute.name
 73 |             if dimension.label_attribute:
 74 |                 meta['levels'][0]['label_attribute'] = \
 75 |                     dimension.label_attribute.name
 76 |                 meta['levels'][0]['order_attribute'] = \
 77 |                     dimension.label_attribute.name
 78 |             dimensions.append(Dimension.from_metadata(meta))
 79 | 
 80 |         cube = Cube(name=dataset.name,
 81 |                     fact=dataset.fact_table.table.name,
 82 |                     aggregates=aggregates,
 83 |                     measures=measures,
 84 |                     label=dataset.label,
 85 |                     description=dataset.description,
 86 |                     dimensions=dimensions,
 87 |                     store=self.store,
 88 |                     mappings=mappings)
 89 | 
 90 |         link_cube(cube, locale, provider=self, namespace=namespace)
 91 |         return cube
 92 | 
 93 |     def dimension(self, name, locale=None, templates=[]):
 94 |         raise NoSuchDimensionError('No global dimensions in OS', name)
 95 | 
 96 |     def list_cubes(self):
 97 |         cubes = []
 98 |         for dataset in Dataset.all_by_account(None):
 99 |             if dataset.model is None:
100 |                 continue
101 |             cubes.append({
102 |                 'name': dataset.name,
103 |                 'label': dataset.label
104 |             })
105 |         return cubes
106 | 
107 | 
108 | class SpendingStore(SQLStore):
109 |     related_model_provider = "spending"
110 | 
111 |     def model_provider_name(self):
112 |         return self.related_model_provider
113 | 
114 |     def __init__(self, **options):
115 |         super(SQLStore, self).__init__(**options)
116 |         options = dict(options)
117 |         self.options = coalesce_options(options, OPTION_TYPES)
118 |         self.logger = get_logger()
119 |         self.schema = None
120 |         self._metadata = None
121 | 
122 |     @property
123 |     def connectable(self):
124 |         return db.engine
125 | 
126 |     @property
127 |     def metadata(self):
128 |         if self._metadata is None:
129 |             self._metadata = db.MetaData(bind=self.connectable)
130 |         return self._metadata
131 | 


--------------------------------------------------------------------------------
/spendb/views/api/dataset.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from flask import Blueprint, request
  4 | from flask.ext.login import current_user
  5 | from flask.ext.babel import gettext as _
  6 | from colander import SchemaNode, String, Invalid
  7 | from sqlalchemy.orm import aliased
  8 | from apikit import jsonify, Pager, request_data
  9 | from fiscalmodel import COUNTRIES, LANGUAGES
 10 | 
 11 | from spendb.core import db
 12 | from spendb.model import Dataset, DatasetLanguage, DatasetTerritory, Account
 13 | from spendb.auth import require
 14 | from spendb.lib.helpers import get_dataset
 15 | from spendb.views.context import etag_cache_keygen
 16 | from spendb.validation.dataset import validate_dataset, validate_managers
 17 | from spendb.validation.model import validate_model
 18 | 
 19 | 
 20 | log = logging.getLogger(__name__)
 21 | blueprint = Blueprint('datasets_api', __name__)
 22 | 
 23 | 
 24 | def query_index():
 25 |     q = Dataset.all_by_account(current_user, order=False)
 26 |     q = q.order_by(Dataset.updated_at.desc())
 27 | 
 28 |     # Filter by languages if they have been provided
 29 |     for language in request.args.getlist('languages'):
 30 |         l = aliased(DatasetLanguage)
 31 |         q = q.join(l, Dataset._languages)
 32 |         q = q.filter(l.code == language)
 33 | 
 34 |     # Filter by territories if they have been provided
 35 |     for territory in request.args.getlist('territories'):
 36 |         t = aliased(DatasetTerritory)
 37 |         q = q.join(t, Dataset._territories)
 38 |         q = q.filter(t.code == territory)
 39 | 
 40 |     # Filter by account if one has been provided
 41 |     for account in request.args.getlist('account'):
 42 |         a = aliased(Account)
 43 |         q = q.join(a, Dataset.managers)
 44 |         q = q.filter(a.name == account)
 45 | 
 46 |     # Return a list of languages as dicts with code, count, url and label
 47 |     languages = [{'code': code, 'count': count, 'label': LANGUAGES.get(code)}
 48 |                  for (code, count) in DatasetLanguage.dataset_counts(q)]
 49 | 
 50 |     territories = [{'code': code, 'count': count, 'label': COUNTRIES.get(code)}
 51 |                    for (code, count) in DatasetTerritory.dataset_counts(q)]
 52 | 
 53 |     pager = Pager(q, limit=15)
 54 |     return pager, languages, territories
 55 | 
 56 | 
 57 | @blueprint.route('/datasets')
 58 | def index():
 59 |     pager, languages, territories = query_index()
 60 |     data = pager.to_dict()
 61 |     data['languages'] = languages
 62 |     data['territories'] = territories
 63 |     return jsonify(data)
 64 | 
 65 | 
 66 | @blueprint.route('/datasets/<name>')
 67 | def view(name):
 68 |     dataset = get_dataset(name)
 69 |     etag_cache_keygen(dataset, private=dataset.private)
 70 |     return jsonify(dataset)
 71 | 
 72 | 
 73 | @blueprint.route('/datasets', methods=['POST', 'PUT'])
 74 | def create():
 75 |     require.dataset.create()
 76 |     dataset = request_data()
 77 |     data = validate_dataset(dataset)
 78 |     if Dataset.by_name(data['name']) is not None:
 79 |         raise Invalid(SchemaNode(String(), name='name'),
 80 |                       _("A dataset with this identifer already exists!"))
 81 |     dataset = Dataset({'dataset': data, 'model': {}})
 82 |     dataset.managers.append(current_user)
 83 |     db.session.add(dataset)
 84 |     db.session.commit()
 85 |     return view(dataset.name)
 86 | 
 87 | 
 88 | @blueprint.route('/datasets/<name>', methods=['POST', 'PUT'])
 89 | def update(name):
 90 |     dataset = get_dataset(name)
 91 |     require.dataset.update(dataset)
 92 |     dataset.update(validate_dataset(request_data()))
 93 |     dataset.touch()
 94 |     db.session.commit()
 95 |     return view(name)
 96 | 
 97 | 
 98 | @blueprint.route('/datasets/<name>/structure')
 99 | def structure(name):
100 |     dataset = get_dataset(name)
101 |     etag_cache_keygen(dataset, private=dataset.private)
102 |     return jsonify({
103 |         'fields': dataset.fields
104 |     })
105 | 
106 | 
107 | @blueprint.route('/datasets/<name>/model')
108 | def model(name):
109 |     dataset = get_dataset(name)
110 |     etag_cache_keygen(dataset, private=dataset.private)
111 |     return jsonify(dataset.model or {})
112 | 
113 | 
114 | @blueprint.route('/datasets/<name>/model', methods=['POST', 'PUT'])
115 | def update_model(name):
116 |     dataset = get_dataset(name)
117 |     require.dataset.update(dataset)
118 |     data = request_data()
119 |     if isinstance(data, dict):
120 |         data['fact_table'] = dataset.fact_table.table_name
121 |     dataset.model = validate_model(data)
122 |     db.session.commit()
123 |     return model(name)
124 | 
125 | 
126 | @blueprint.route('/datasets/<name>/managers')
127 | def managers(name):
128 |     dataset = get_dataset(name)
129 |     etag_cache_keygen(dataset, private=dataset.private)
130 |     return jsonify({'managers': dataset.managers})
131 | 
132 | 
133 | @blueprint.route('/datasets/<name>/managers', methods=['POST', 'PUT'])
134 | def update_managers(name):
135 |     dataset = get_dataset(name)
136 |     require.dataset.update(dataset)
137 |     data = validate_managers(request_data())
138 |     if current_user not in data['managers']:
139 |         data['managers'].append(current_user)
140 |     dataset.managers = data['managers']
141 |     dataset.touch()
142 |     db.session.commit()
143 |     return managers(name)
144 | 
145 | 
146 | @blueprint.route('/datasets/<name>', methods=['DELETE'])
147 | def delete(name):
148 |     dataset = get_dataset(name)
149 |     require.dataset.update(dataset)
150 |     dataset.fact_table.drop()
151 |     db.session.delete(dataset)
152 |     db.session.commit()
153 |     return jsonify({'status': 'deleted'}, status=410)
154 | 


--------------------------------------------------------------------------------
/spendb/model/dataset.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from sqlalchemy.orm import reconstructor
  3 | from sqlalchemy.schema import Column
  4 | from sqlalchemy.types import Integer, Unicode, Boolean, DateTime
  5 | from sqlalchemy.sql.expression import or_
  6 | from sqlalchemy.ext.associationproxy import association_proxy
  7 | from babbage.model import Model
  8 | from babbage.cube import Cube
  9 | 
 10 | from spendb.core import db, url_for
 11 | from spendb.model.fact_table import FactTable
 12 | from spendb.model.common import JSONType
 13 | 
 14 | 
 15 | class Dataset(db.Model):
 16 |     """ The dataset is the core entity of any access to data.
 17 |     The dataset keeps an in-memory representation of the data model
 18 |     (including all dimensions and measures) which can be used to
 19 |     generate necessary queries. """
 20 |     __tablename__ = 'dataset'
 21 | 
 22 |     id = Column(Integer, primary_key=True)
 23 |     name = Column(Unicode(255), unique=True)
 24 |     label = Column(Unicode(2000))
 25 |     description = Column(Unicode())
 26 |     currency = Column(Unicode())
 27 |     category = Column(Unicode())
 28 |     private = Column(Boolean, default=False)
 29 |     created_at = Column(DateTime, default=datetime.utcnow)
 30 |     updated_at = Column(DateTime, default=datetime.utcnow,
 31 |                         onupdate=datetime.utcnow)
 32 |     data = Column(JSONType)
 33 | 
 34 |     languages = association_proxy('_languages', 'code')
 35 |     territories = association_proxy('_territories', 'code')
 36 | 
 37 |     def __init__(self, data):
 38 |         self.data = data.copy()
 39 |         dataset = self.data['dataset']
 40 |         del self.data['dataset']
 41 |         self.name = dataset.get('name')
 42 |         self.update(dataset)
 43 |         self._load()
 44 | 
 45 |     @reconstructor
 46 |     def _load(self):
 47 |         self.fact_table = FactTable(self)
 48 |         self._model = None
 49 | 
 50 |     def update(self, dataset):
 51 |         self.label = dataset.get('label')
 52 |         if 'private' in dataset:
 53 |             self.private = dataset.get('private')
 54 |         if 'description' in dataset:
 55 |             self.description = dataset.get('description')
 56 |         if 'currency' in dataset:
 57 |             self.currency = dataset.get('currency')
 58 |         if 'category' in dataset:
 59 |             self.category = dataset.get('category')
 60 |         if 'languages' in dataset:
 61 |             self.languages = dataset.get('languages', [])
 62 |         if 'territories' in dataset:
 63 |             self.territories = dataset.get('territories', [])
 64 | 
 65 |     @property
 66 |     def model(self):
 67 |         if self._model is None:
 68 |             if not self.fact_table.exists:
 69 |                 return
 70 |             data = self.data.get('model')
 71 |             if not isinstance(data, dict):
 72 |                 return
 73 |             data['fact_table'] = self.fact_table.table_name
 74 |             model = Model(data)
 75 |             if not model.exists:
 76 |                 return
 77 |             self._model = model
 78 |         return self._model
 79 | 
 80 |     @model.setter
 81 |     def model(self, model):
 82 |         self.data['model'] = model
 83 |         self._model = None
 84 |         if self.model is not None:
 85 |             self.cube.compute_cardinalities()
 86 |         self.touch()
 87 | 
 88 |     @property
 89 |     def cube(self):
 90 |         """ Babbage query cube for the given dataset. """
 91 |         if self.model is not None:
 92 |             return Cube(db.engine, self.name, self.model,
 93 |                         fact_table=self.fact_table.table)
 94 | 
 95 |     @property
 96 |     def fields(self):
 97 |         return self.data.get('fields', {})
 98 | 
 99 |     @fields.setter
100 |     def fields(self, value):
101 |         self.data['fields'] = value
102 | 
103 |     def touch(self):
104 |         """ Update the dataset timestamp. This is used for cache
105 |         invalidation. """
106 |         self.updated_at = datetime.utcnow()
107 |         db.session.add(self)
108 | 
109 |     def __repr__(self):
110 |         return "<Dataset(%r,%r)>" % (self.id, self.name)
111 | 
112 |     def to_dict(self):
113 |         return {
114 |             'label': self.label,
115 |             'name': self.name,
116 |             'description': self.description,
117 |             'currency': self.currency,
118 |             'category': self.category,
119 |             'private': self.private,
120 |             'created_at': self.created_at,
121 |             'updated_at': self.updated_at,
122 |             'languages': list(self.languages),
123 |             'territories': list(self.territories),
124 |             'has_model': self.model is not None,
125 |             'api_url': url_for('datasets_api.view', name=self.name)
126 |         }
127 | 
128 |     def to_full_dict(self):
129 |         full = self.data.copy()
130 |         full['dataset'] = self.to_dict()
131 |         return full
132 | 
133 |     @classmethod
134 |     def all_by_account(cls, account, order=True):
135 |         """ Query available datasets based on dataset visibility. """
136 |         from spendb.model.account import Account
137 |         has_user = account and account.is_authenticated()
138 |         has_admin = has_user and account.admin
139 |         q = db.session.query(cls)
140 |         if not has_admin:
141 |             criteria = [cls.private == False]  # noqa
142 |             if has_user:
143 |                 criteria.append(cls.managers.any(Account.id == account.id))
144 |             q = q.filter(or_(*criteria))
145 | 
146 |         if order:
147 |             q = q.order_by(cls.label.asc())
148 |         return q
149 | 
150 |     @classmethod
151 |     def by_name(cls, name):
152 |         return db.session.query(cls).filter_by(name=name).first()
153 | 


--------------------------------------------------------------------------------