├── .babelrc
├── .github
├── issue_template.md
└── pull_request_template.md
├── .gitignore
├── .jscsrc
├── LEAD.md
├── LICENSE
├── Makefile
├── README.md
├── app
├── backend
│ ├── controllers.js
│ ├── index.js
│ ├── middlewares.js
│ ├── routes.js
│ ├── services.js
│ └── views
│ │ ├── dashboard.jsx
│ │ ├── default.jsx
│ │ └── page.jsx
├── bootstrap.js
├── config.js
├── content
│ ├── about.md
│ └── faq.md
├── index.js
├── ui
│ ├── scripts
│ │ ├── actions
│ │ │ └── index.js
│ │ ├── components
│ │ │ ├── charts
│ │ │ │ ├── index.js
│ │ │ │ ├── main.js
│ │ │ │ ├── publisher.js
│ │ │ │ └── source.js
│ │ │ ├── overviews
│ │ │ │ ├── index.js
│ │ │ │ ├── main.js
│ │ │ │ └── publisher.js
│ │ │ └── tables
│ │ │ │ ├── filter.js
│ │ │ │ ├── head.js
│ │ │ │ ├── index.js
│ │ │ │ ├── info.js
│ │ │ │ ├── resize.js
│ │ │ │ └── table.js
│ │ ├── containers
│ │ │ ├── App.js
│ │ │ ├── Embed.js
│ │ │ ├── Main.js
│ │ │ └── Publisher.js
│ │ ├── index.js
│ │ ├── reducers
│ │ │ └── index.js
│ │ ├── store
│ │ │ └── configureStore.js
│ │ └── utils
│ │ │ ├── calc.js
│ │ │ ├── index.js
│ │ │ └── ui.js
│ └── styles
│ │ ├── _footer.scss
│ │ ├── _theme.scss
│ │ ├── _variables.scss
│ │ ├── app.scss
│ │ └── dashboard.scss
└── utils.js
├── package.json
├── public
├── scripts
│ └── .gitignore
└── styles
│ └── .gitignore
├── server.js
├── tests
└── index.js
├── webpack.config.base.js
├── webpack.config.development.js
└── webpack.config.production.js
/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": ["react", "es2015"]
3 | }
4 |
--------------------------------------------------------------------------------
/.github/issue_template.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | Please replace this line with full information about your idea or problem. If it's a bug share as much as possible to reproduce it
4 |
5 | ---
6 |
7 | Please preserve this line to notify @roll (lead of this repository)
8 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Overview
2 |
3 | Please replace this line with full information about your pull request. Make sure that tests pass before publishing it
4 |
5 | ---
6 |
7 | Please preserve this line to notify @roll (lead of this repository)
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Python
2 | .DS_Store
3 | .idea
4 | .projectile
5 | *.sublime-project
6 | *.sublime-workspace
7 | __pycache__/
8 | *.py[cod]
9 | bower_components/*
10 | node_modules/*
11 | .publish/
12 | .sass-cache
13 | _site/*
14 | .vscode/*
15 | jsconfig.json
16 | npm-debug.log
17 |
--------------------------------------------------------------------------------
/.jscsrc:
--------------------------------------------------------------------------------
1 | {
2 | "esnext": true
3 | }
4 |
--------------------------------------------------------------------------------
/LEAD.md:
--------------------------------------------------------------------------------
1 | roll
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (C) 2015 Open Knowledge Foundation
2 |
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Affero General Public License as
5 | published by the Free Software Foundation, either version 3 of the
6 | License, or (at your option) any later version.
7 |
8 | This program is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Affero General Public License for more details.
12 |
13 | You should have received a copy of the GNU Affero General Public License
14 | along with this program. If not, see .
15 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: templates
2 |
3 |
4 | LEAD := $(shell head -n 1 LEAD.md)
5 |
6 |
7 | all: list
8 |
9 | templates:
10 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/issue_template.md
11 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/pull_request_template.md
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data Quality Dashboard
2 |
3 | Data Quality Dashboard provides access to, and displays statistics on, a collection of published data. This collection of data is logically related: for example, data published by a single government department, or a group of departments.
4 |
5 | The Data Quality Dashboard has been developed in order to display data quality information on the 25K spend data published by the UK Government on [data.gov.uk](https://data.gov.uk/). You can see and interact with this [instance of Data Quality Dashboard here](http://uk-25k.openspending.org/). It is powered by a static database generated with [Data Quality CLI](https://github.com/frictionlessdata/data-quality-cli) that you can find [here](https://github.com/okfn/data-quality-uk-25k-spend).
6 |
7 | The Dashboard can be used for any published collection of data by following a few key steps.
8 |
9 | ## Local development
10 |
11 | ```
12 | # Get the code
13 | git clone https://github.com/okfn/data-quality-dashboard.git
14 |
15 | # Install the dependencies
16 | npm install
17 |
18 | # Just build the sources
19 | npm run build
20 |
21 | # Just run the server
22 | npm run start
23 |
24 | # View the app in your browser
25 | open http://localhost:3000/
26 | ```
27 |
28 | See the `scripts` section in `package.json` for more available commands.
29 |
30 | Read on for details.
31 |
32 | ## Application
33 |
34 | The Data Quality Dashboard is a Node.js application written in ES6, largely using Express and React.
35 |
36 | The `app.backend` module renders the basic views (using React on the server) and is responsible for preparing the data as JSON by parsing the CSV database. It also provides some simple routes for standard pages like FAQ and About.
37 |
38 | The `app.ui` module is a React-Redux application for displaying the data to the user.
39 |
40 | The codebase is written in Node.js-style CommonJS, using ES6 syntax. The `app.ui` code is bundled by (Webpack)[http://webpack.github.io/], and `app.backend` is transformed using Babel at runtime.
41 |
42 | ### Remote deployment
43 |
44 | We push to Heroku, and a `postinstall` script ensures that `app.ui` is bundled before the app is served. Make sure you set `NPM_CONFIG_PRODUCTION=false` to include `devDependencies` on Heroku.
45 |
46 | ## Data
47 |
48 | The Data Quality Dashboard reads data from a flat file storage, with data written to CSV and JSON. Any publicly available file storage will do, as long as the file naming and data structure of the files is consistent.
49 |
50 | Currently, we run the database for the UK Spend Publishing Dashboard from a public repository on GitHub. This gives easy access to the files, and enables a version history of the database.
51 |
52 | As GitHub does not support CORS, we then use a proxy that does - [RawGit](https://rawgit.com/).
53 |
54 | When the application loads, it reads the data from the database, parses the content to JSON, and stores the new data representation as JSON. This JSON representation is accessible via an API endpoint that the frontend app uses.
55 |
56 | To configure the database, the application needs to know the base path as a URL.
57 |
58 | For example:
59 |
60 | * `https://rawgit.com/okfn/data-quality-uk-25k-spend/master/data`
61 |
62 | By default, the application expects to find at that base the following files:
63 |
64 | * `instance.json`: Basic metadata for the instance
65 | * `sources.csv`: The list of data sources that are assessed for quality
66 | * `publishers.csv`: The list of publishers that produce these datasources
67 | * `results.csv`: The results as found by SPD-Admin
68 | * `performance.csv`: The performance as found by SPD-Admin
69 | * `runs.csv`: A log of the results run against these resources
70 |
71 | Of course, each of these files must conform to a certain datastructure - think of them as tables in a database. As long as you conform to the structure and expected data within that structure, it does not matter how the database is actually produced.
72 |
73 | For how to change the database see the [Configure database](#configure-database) section.
74 |
75 | ## Schema
76 |
77 | The Data Quality Dashboard expects the following schema.
78 |
79 | ### instance.json
80 |
81 | A single object with the following fields:
82 |
83 | * `name`: The name of this dashboard
84 | * `admin`: The email address of the administrator of this dashboard
85 | * `validator_url`: The URL to a GoodTables API endpoint (eg: `https://goodtables.okfnlabs.org/api/run`)
86 | * `last_modified`: Time when the data was last modified. Should be updated before each database deploy.
87 |
88 | ### sources.csv
89 |
90 | A CSV with the following columns:
91 |
92 | * `id`: A unique identifier for this data source.
93 | * `publisher_id`: The unique identifier of the publisher this data source belongs to.
94 | * `title`: A title for this data source.
95 | * `data`: The permalink URL for this data source.
96 | * `format`: The file format for this data source.
97 | * `last_modified`: The timestamp that indicates when this data source was last modified.
98 | * `period_id`: The publication period of the data source.
99 | * `schema`: The permalink URL for the schema that this data source should be validated against (if any).
100 |
101 | ### publishers.csv
102 |
103 | A CSV with the following columns:
104 |
105 | * `id`: A unique identifier for this publisher.
106 | * `title`: A proper title for this publisher.
107 | * `type`: A signifying type for this publisher.
108 | * `homepage`: The homepage of this publisher as a URL.
109 | * `contact`: The contact person for this publisher.
110 | * `email`: The contact email for this publisher.
111 | * `parent_id`: The parent publisher for this publisher (nested publishers).
112 |
113 | ### results.csv
114 |
115 | A CSV with the following columns:
116 |
117 | * `id`: A unique identifier for this result.
118 | * `source_id`: The identifier for the data source in this result.
119 | * `publisher_id`: The identifier for the publisher in this result.
120 | * `period_id`: The publication period of this result's data source.
121 | * `score`: The score for this result.
122 | * `data`: The permalink URL for this result's data source.
123 | * `schema`: The permalink URL for this result's data source schema (if any).
124 | * `summary`: A summary of this result.
125 | * `run_id`: The identifier of the run in which this result was generated.
126 | * `timestamp`: The timestamp for this result.
127 | * `report`: The base URL to a more detailed report
128 |
129 | ### performance.csv
130 |
131 | A CSV with the following columns:
132 |
133 | * `publisher_id`: The identifier for the publisher.
134 | * `period_id`: The time span for the analysis.
135 | * `files_count`: Number of files published during the above mentioned time span.
136 | * `score`: Score for the above mentioned files.
137 | * `valid`: How many of the above mentioned files are valid.
138 | * `files_count_to_date`: Total number of files published up to this period.
139 | * `score_to_date`: Score of all the files published up to this period.
140 | * `valid_to_date`: Number of valid files from all published up to this period.
141 |
142 | ### runs.csv
143 |
144 | A CSV with the following columns:
145 |
146 | * `id`
147 | * `timestamp`
148 | * `total_score`
149 |
150 |
151 | ## Configure database
152 |
153 | The database can be configured through the following environment variables:
154 |
155 | * `DATABASE_LOCATION`: Base URL for the files.
156 | * `PUBLISHER_TABLE`: Name of the file containing the publishers (relative to the DATABASE_LOCATION).
157 |
158 | Following this pattern, you can also configure `SOURCE_TABLE`, `RUN_TABLE`, `PERFORMANCE_TABLE` and `INSTANCE_TABLE`.
159 |
160 | ## Tooling
161 |
162 | In order to generate the result set for a Data Quality Dashboard, we build a command line utility that is designed to be run by a developer at regular intervals (as relevant for the data being assessed). This tool, [Data Quality CLI](https://github.com/okfn/data-quality-cli) is configurable to use in assessing data quality based on metrics of:
163 |
164 | * Timeliness
165 | * Structural Validity
166 | * Schema Validity
167 |
168 | Note that, like the Data Quality Dashboard itself, the CLI has currently only been tested on the UK 25K spend data.
169 |
--------------------------------------------------------------------------------
/app/backend/controllers.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import Promise from 'bluebird'
4 | import path from 'path'
5 | import fs from 'fs'
6 |
7 | Promise.promisifyAll(fs)
8 |
9 | function makePage(filename, title) {
10 | return function(req, res) {
11 | var filepath = path.join(req.app.get('config').get('contentDir'), filename)
12 | var backend_config = req.app.get('config').get('backend')
13 | fs.readFileAsync(filepath, 'utf8')
14 | .then(function(content) {
15 | return res.render('page', {
16 | content: content,
17 | title: title,
18 | showPricing: backend_config['showPricing']
19 | })
20 | })
21 | .catch(console.trace.bind(console))
22 | }
23 | }
24 |
25 | function dashboard(req, res) {
26 | var backend_config = req.app.get('config').get('backend')
27 | return res.render('dashboard', {embed: false, showPricing: backend_config['showPricing']})
28 | }
29 |
30 | function embed(req, res) {
31 | return res.render('dashboard', {embed: true})
32 | }
33 |
34 | function api(req, res) {
35 | var db = req.app.get('cache').get('db')
36 | return res.json(db)
37 | }
38 |
39 | function pricing(req, res){
40 | var backend_config = req.app.get('config').get('backend')
41 | res.redirect(backend_config['pricingPageUrl'])
42 | }
43 |
44 | export default {
45 | about: makePage('about.md', 'About'),
46 | faq: makePage('faq.md', 'FAQ'),
47 | pricing,
48 | dashboard,
49 | embed,
50 | api
51 | }
52 |
--------------------------------------------------------------------------------
/app/backend/index.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import controllers from './controllers'
4 | import middlewares from './middlewares'
5 | import routes from './routes'
6 | import services from './services'
7 |
8 | export { controllers, middlewares, routes, services }
9 |
--------------------------------------------------------------------------------
/app/backend/middlewares.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import _ from 'lodash'
4 | import services from './services'
5 |
6 | function getInstance(req, res, next) {
7 | var cacheData = req.app.get('config').get('cacheData')
8 | services.getInstance()
9 | .then(function(result) {
10 | if (_.isEmpty(req.app.get('cache').get('instance')) || !cacheData) {
11 | // cache the instance
12 | req.app.get('cache').set('instance', result)
13 | return next()
14 | }
15 | if (req.app.get('cache').get('instance').last_modified != result.last_modified) {
16 | // flush cached data
17 | req.app.get('cache').flushAll();
18 | req.app.get('cache').set('instance', result)
19 | return next()
20 | }
21 | else{
22 | return next()
23 | }
24 | })
25 | .catch(console.trace.bind(console))
26 | }
27 |
28 | function getDB(req, res, next) {
29 | var cacheData = req.app.get('config').get('cacheData')
30 | if (_.isEmpty(req.app.get('cache').get('db')) || !cacheData) {
31 | services.makeDB()
32 | .then(function(result) {
33 | // cache the db
34 | req.app.get('cache').set('db', result)
35 | return next()
36 | })
37 | .catch(console.trace.bind(console))
38 | } else {
39 | return next()
40 | }
41 | }
42 |
43 |
44 | function setLocals(req, res, next) {
45 | res.locals.instance = req.app.get('cache').get('instance') || {}
46 | return next()
47 | }
48 |
49 | export default { getInstance, getDB, setLocals }
50 |
--------------------------------------------------------------------------------
/app/backend/routes.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import { Router } from 'express'
4 | import controllers from './controllers'
5 |
6 | let router = Router()
7 |
8 | export default function routes() {
9 | router.get('/about', controllers.about)
10 | router.get('/faq', controllers.faq)
11 | router.get('/pricing', controllers.pricing)
12 | router.get('/api', controllers.api)
13 | router.get('/embed', controllers.embed)
14 | router.get(/^(\/embed)\/.*/, controllers.embed)
15 | router.get('*', controllers.dashboard)
16 | return router
17 | }
18 |
--------------------------------------------------------------------------------
/app/backend/services.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import Promise from 'bluebird'
4 | import _ from 'lodash'
5 | import config from '../config'
6 | import utils from '../utils'
7 |
8 | function getInstance() {
9 | return utils.getJSONEndpoint(config.get('backend').instance)
10 | }
11 |
12 | function getPublisherData() {
13 | return utils.getCSVEndpoint(config.get('backend').publishers)
14 | }
15 |
16 | function getSourceData() {
17 | return utils.getCSVEndpoint(config.get('backend').sources)
18 | }
19 |
20 | function getResultData() {
21 | return utils.getCSVEndpoint(config.get('backend').results)
22 | }
23 |
24 | function getRunData() {
25 | return utils.getCSVEndpoint(config.get('backend').runs)
26 | }
27 |
28 | function getPerformanceData() {
29 | return utils.getCSVEndpoint(config.get('backend').performance)
30 | }
31 |
32 | function makeDB() {
33 | return Promise.join(getInstance(), getPublisherData(), getSourceData(), getResultData(),
34 | getRunData(), getPerformanceData(), processData)
35 | }
36 |
37 | function processData(instance, publishers, sources, results, runs, performance) {
38 | return {
39 | data: {
40 | instance: instance,
41 | publishers: publishers,
42 | sources: sources,
43 | results: results,
44 | runs: runs,
45 | performance: performance
46 | }
47 | }
48 | }
49 |
50 | export default { getInstance, makeDB }
51 |
--------------------------------------------------------------------------------
/app/backend/views/dashboard.jsx:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import React, { Component } from 'react'
4 | import DefaultView from './default'
5 |
6 | class DashboardView extends Component {
7 | render() {
8 | const { instance, embed, showPricing} = this.props
9 | return (
10 |
11 |
12 |
13 | )
14 | }
15 | }
16 |
17 | export default DashboardView
18 |
--------------------------------------------------------------------------------
/app/backend/views/default.jsx:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import React, { Component } from 'react'
4 |
5 | class DefaultView extends Component {
6 | render() {
7 | const { children, instance, embed, showPricing } = this.props
8 | return (
9 |
10 |
23 |
24 | )
25 | }
26 | }
27 |
28 | export default PageView
29 |
--------------------------------------------------------------------------------
/app/bootstrap.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import { createEngine } from 'express-react-views'
4 | import path from 'path'
5 | import config from './config'
6 | import _ from 'lodash'
7 | import NodeCache from 'node-cache'
8 | import { middlewares, routes } from './backend'
9 |
10 | export default function bootstrap(app, express) {
11 | let viewPath = path.join(__dirname, 'backend', 'views')
12 | let publicPath = path.join(path.dirname(__dirname), 'public')
13 | // NOTE: We compile ES6 at runtime, in server.js, hence transformViews is
14 | // false due to some weirdness in express-react-views
15 | // https://github.com/reactjs/express-react-views/issues/40
16 | let viewEngine = createEngine({transformViews: false})
17 | let backendCache = new NodeCache()
18 |
19 | app.set('config', config)
20 | app.set('port', config.get('port'))
21 | app.set('cache', backendCache)
22 | app.set('views', viewPath)
23 | app.set('view engine', 'jsx')
24 | app.engine('jsx', viewEngine)
25 | app.use(express.static(publicPath))
26 | app.use([
27 | middlewares.getInstance,
28 | middlewares.getDB,
29 | middlewares.setLocals
30 | ])
31 | app.use('', routes())
32 | return app
33 | }
34 |
--------------------------------------------------------------------------------
/app/config.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import _ from 'lodash'
4 | import path from 'path'
5 | import nconf from 'nconf'
6 | import utils from './utils'
7 |
8 | nconf.file({
9 | file: path.join(path.dirname(__dirname), 'settings.json')
10 | })
11 |
12 | nconf.defaults({
13 | port: process.env.PORT || 3000,
14 | backend: utils.getBackend(),
15 | cacheData: process.env.CACHE_DATA || true,
16 | contentDir: process.env.CONTENT_DIR || path.join(__dirname, 'content')
17 | })
18 |
19 | export default nconf
20 |
--------------------------------------------------------------------------------
/app/content/about.md:
--------------------------------------------------------------------------------
1 | The Spend Publishing Dashboard tracks the timeliness and quality of
2 | the spend data published by UK Government Departments as part of their
3 | transparency commitments.
4 |
5 | Based on the analysis of several thousand individual data files
6 | published by more than thirty departments, it provides a simple and
7 | easy-to-understand overview of performance on key metrics such as:
8 |
9 | * Timeliness: are departments publishing data (at all) and doing so in
10 | a timely manner?
11 | * Quality: are departments publishing "good" data, that is, well
12 | structured and in the standard, prescribed format?
13 |
14 | The goal of the dashboard is to support (and drive) improvement in the
15 | quality of expenditure data published by government entities---be it
16 | local authorities, departments or others. Specifically, it aims to:
17 |
18 | * Enable policy-makers and public able to see how departments and
19 | local authorities are performing against mandated publication
20 | requirements
21 | * Allow identification of best-practice (and worst-practice) for
22 | learning and improvement
23 | * Provide a starting point for those looking to acquire and
24 | consolidate data for their own work and analysis (for example, we
25 | have a list of the nearly 12,000 spend-related files published by UK
26 | government departments)
27 |
28 | As part of the development of the dashboard we have also created
29 | various related tools including the online service
30 | ["GoodTables"][goodtables] that allows users to check the quality of
31 | their CSV or XLS spend data files by validating them against existing
32 | government recommendations such as HMT recommendations for Departments
33 | and the
34 | [Local Government Transparency Code](https://www.gov.uk/government/publications/local-government-transparency-code-2014).
35 |
36 | [goodtables]: http://goodtables.okfnlabs.org/
37 |
38 | ## Background
39 |
40 | The UK leads the world in terms of the publication of [open data][od]
41 | on public finances. Fiscal transparency and provision of open data in
42 | this area are seen as central to the government's transparency and
43 | open data strategy, helping to promote government efficiency and
44 | effectiveness and empowering citizens with an understanding of where
45 | their tax money goes.
46 |
47 | Data is published on spending at both the national and the local level
48 | along with related budgetary and financial data. Specifically, the
49 | Government requires regular publication of detailed, transactional,
50 | expenditure information by departments and local authorities - that
51 | is, information on all individual spending items from monthly mobile
52 | phones contracts to major software systems.
53 |
54 | At the national level, information on expenditure over £25k is one of
55 | the few mandated datasets that Departments must publish. Similarly at
56 | the local level, the
57 | [Local Government Transparency Code][transparency-code] requires
58 | publication of spending over £500 on a quarterly basis. Specifically
59 | paragraph 19, requires publication of itemised spending over £500 on a
60 | quarterly basis on items such as individual invoices, grant payments,
61 | expense payments, payments for goods and services, rent, credit notes
62 | over £500, and transactions with other public bodies. Paragraph 42
63 | recommends - but does not mandate - extending this to publishing on a
64 | monthly basis covering all items over £250 and including the total
65 | amount spent on remuneration over the period, as well as classifying
66 | expenditure using the Chartered Institute of Public Finance and
67 | Accountancy Service Reporting Code of Practice to enable comparability
68 | between local authorities.
69 |
70 | [transparency-code]: https://www.gov.uk/government/publications/local-government-transparency-code-2014
71 |
72 | [od]: http://okfn.org/open
73 |
74 | ## The Problem
75 |
76 | However, whilst the volume of data being is impressive, the quality is
77 | often less so. Poor quality data greatly reduces the usability and
78 | value of the data released - for business, for researchers, for
79 | journalists, for citizens and for government itself. Specific quality
80 | issues include:
81 |
82 | * Format: Data is frequently not provided in the recommended format.
83 | Even within a given department, data is often published in a variety
84 | of formats and structures spread over many files. For example, the
85 | Greater London Authority publishes their spend data in over 65
86 | different CSV files which between them are formatted in nearly 30
87 | different ways! This means that any user of the data must spend,
88 | literally, several days cleaning this data up in order to have a
89 | single consolidated set of data.
90 | * Timeliness: expenditure data is often not published by departments
91 | or local authorities on a timely basis. For example, the
92 | [UK Departmental Spending Report on data.gov.uk](https://data.gov.uk/data/openspending-report/index)
93 | shows that less than 15% of departments have published up-to-date
94 | spending data and some departments are more than 6 months out of
95 | date. This obviously substantially reduces the value of the data to
96 | many potential users both inside and outside government.
97 | * Missing “codings”: most interesting uses of spending data involving
98 | aggregating individual transactions by particular attribute - for
99 | example, calculating how much was spent with a given supplier
100 | involves summing all transactions with a particular supplier or
101 | calculating spend on training would involve summing all transactions
102 | coded as being related to training. However, many published datasets
103 | lack reliable codings. In particular, most spend data does not
104 | identify suppliers with a unique identifier such as a company number
105 | even within a single data file (let alone across data files from
106 | different publishers). Similarly, most spend data does not include
107 | any useful classification of transactions such as a code from a
108 | chart of accounts or a project code (such as included in HMT PESA
109 | data). This means data users must engage in the laborious (and
110 | error-prone) task of normalising and enhancing data (for example,
111 | attempting to correct variant spellings of the same company name or
112 | adding classifiers to expenditure).
113 | * Unconsolidated: data is published in individual files on a monthly
114 | or quarterly basis per department or local authority. However, most
115 | uses of the data involve access to more consolidated data (for
116 | example, one wants to see spending over a period of time (rather
117 | than for just one month) or to compare across departments or
118 | authorities).
119 |
120 | Lastly, though not a data quality issue in the narrow sense, we would add:
121 |
122 | * Usability: many (potential) users of spending data will struggle
123 | when presented with a simple page containing dozens of CSV
124 | files. Providing a simple browser and/or visualisations of the spend
125 | data is not hard to do and would greatly enhance the usability of
126 | the spending data to a large set of actual or potential users
127 | including policy-makers and citizens (whilst also encouraging people
128 | who wanted to dig deeper into the raw data to do so). Note:
129 | provision of these kind of interfaces is often directly dependent on
130 | resolving the previously mentioned quality issues (e.g. you can’t
131 | provide a useful visualisation without consolidated data that has
132 | useful codings).
133 |
134 | ## The Dashboard
135 |
136 | We need to drive and support improvements in the quality and usability
137 | of spending data. This Spend Publishing Dashboard is
138 |
139 | In particular, the Spend Publ exists to provide a simple and
140 | easy-to-understand overview of how
141 |
142 | Like all good simple visual representations it is based on large
143 | amounts of behind the scenes work.
144 |
145 | ## FAQs
146 |
147 | ### Is the project free/open-source?
148 |
149 | Yes, all code is open-source and is published on
150 | [GitHub](https://github.com/okfn/spend-publishing-dashboard).
151 |
152 | ### Is the data open?
153 |
154 | Yes, all the data we have produced---including a database of all
155 | spending files and their quality---is open and published online.
156 |
157 | ### Why are local authorities not included?
158 |
159 | We intend to also support spend data publication by local authorities.
160 | However, unlike departmental spending which is centralized on
161 | data.gov.uk and easily locatable due to consistent tagging, local
162 | authority spending is spread across hundreds of local authority
163 | websites in the UK. Tracking down the thousands of different data
164 | files ultimately has proved too resource-intensive for our limited
165 | funding. However, it is something we are focused on for the future.
166 |
--------------------------------------------------------------------------------
/app/content/faq.md:
--------------------------------------------------------------------------------
1 | ## What is a "schema"?
2 |
3 | A schema is a description of what a given data structure (e.g. a
4 | single CSV file) should look like. Any deviation between the schema
5 | and the actual data structure could mean that manual work must be done
6 | to "correct" the data structure.
7 |
8 | ## How does the scoring system work?
9 |
10 | ## What does "correct" mean?
11 |
12 | "Correct" means 100% compliant with the specified schema.
13 |
--------------------------------------------------------------------------------
/app/index.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import express from 'express'
4 | import bootstrap from './bootstrap'
5 |
6 | export function start() {
7 | let app = express()
8 | app = bootstrap(app, express)
9 | app.listen(app.get('port'), function() {
10 | console.log('Serving from port ' + app.get('port'))
11 | })
12 | }
13 |
--------------------------------------------------------------------------------
/app/ui/scripts/actions/index.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import 'isomorphic-fetch'
4 | import _ from 'lodash'
5 |
6 | export const REQUEST_DATA = 'REQUEST_DATA'
7 | export const RECEIVE_DATA = 'RECEIVE_DATA'
8 | export const REQUEST_ACTIVE_PUBLISHER = 'REQUEST_ACTIVE_PUBLISHER'
9 | export const RECEIVE_ACTIVE_PUBLISHER = 'RECEIVE_ACTIVE_PUBLISHER'
10 |
11 | function requestData() {
12 | return {
13 | type: REQUEST_DATA,
14 | payload: null
15 | }
16 | }
17 |
18 | function receiveData(json) {
19 | return {
20 | type: RECEIVE_DATA,
21 | payload: json.data
22 | }
23 | }
24 |
25 | function requestActivePublisher() {
26 | return {
27 | type: REQUEST_ACTIVE_PUBLISHER,
28 | payload: null
29 | }
30 | }
31 |
32 | function receiveActivePublisher(data) {
33 | return {
34 | type: RECEIVE_ACTIVE_PUBLISHER,
35 | payload: data
36 | }
37 | }
38 |
39 | function fetchData(lookup) {
40 | return dispatch => {
41 | dispatch(requestData())
42 | let promises = fetch('/api')
43 | .then(response => response.json())
44 | .then(json => dispatch(receiveData(json)))
45 | if (lookup) {
46 | promises = promises
47 | .then(data => dispatch(getActivePublisherIfNeeded(lookup)))
48 | }
49 | return promises
50 | }
51 | }
52 |
53 | function getActivePublisher(data, lookup) {
54 | return dispatch => {
55 | dispatch(requestActivePublisher())
56 | let activePublisher = _.find(data.publishers, { 'id': lookup })
57 | activePublisher.sources = _.filter(data.sources, { 'publisher_id': lookup })
58 | activePublisher.results = _.filter(data.results, { 'publisher_id': lookup })
59 | activePublisher.performance = _.filter(data.performance,
60 | { 'publisher_id': lookup })
61 | let newData = Object.assign({}, data, { activePublisher: activePublisher })
62 | return dispatch(receiveActivePublisher(newData))
63 | }
64 | }
65 |
66 | function shouldFetchData(state) {
67 | const { ui, data } = state
68 | if (data.isEmpty) {
69 | return true
70 | }
71 | if (ui.isFetching) {
72 | return false
73 | }
74 | return true
75 | }
76 |
77 | function shouldGetActivePublisher(data, lookup) {
78 | if (_.isEmpty(data.activePublisher)) {
79 | return true
80 | }
81 | if (data.activePublisher.id === lookup) {
82 | return false
83 | }
84 | return true
85 | }
86 |
87 | export function getActivePublisherIfNeeded(lookup) {
88 | return (dispatch, getState) => {
89 | const { data } = getState()
90 | if (shouldGetActivePublisher(data, lookup)) {
91 | return dispatch(getActivePublisher(data, lookup))
92 | }
93 | }
94 | }
95 |
96 | export function fetchDataIfNeeded(lookup) {
97 | return (dispatch, getState) => {
98 | const currentState = getState()
99 | let needData, needActivePublisher
100 | if (shouldFetchData(currentState)) {
101 | needData = true
102 | }
103 | if (shouldGetActivePublisher(currentState, lookup)) {
104 | needActivePublisher = true
105 | }
106 | if (needData && !needActivePublisher) {
107 | return dispatch(fetchData())
108 | }
109 | if (needActivePublisher && !needData) {
110 | return dispatch(getActivePublisher(currentState.data, lookup))
111 | }
112 | if (needData && needActivePublisher) {
113 | return dispatch(fetchData(lookup))
114 | }
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/app/ui/scripts/components/charts/index.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import Main from './main'
4 | import Publisher from './publisher'
5 |
6 | export { Main, Publisher }
7 |
--------------------------------------------------------------------------------
/app/ui/scripts/components/charts/main.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | import React, { Component } from 'react'
4 | import { Line as LineChart } from 'react-chartjs'
5 | import { ui as UIUtils } from '../../utils'
6 | import { calc as CalcUtils } from '../../utils'
7 |
8 | class MainChart extends Component {
9 | render() {
10 | let linePayload = UIUtils.makeScoreLinePayload(
11 | this.props.results,
12 | this.props.performance
13 | )
14 | return (
15 |
16 |