28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_advanced.csv:
--------------------------------------------------------------------------------
1 | date[type=date;format=MM/dd/yyyy],first_category[slice_role=dimension;rollup=true;total_val=total],second_category[slice_role=dimension;rollup=true],first_value[slice_role=metric;type=integer],second_value[slice_role=metric;type=float]
2 | 1/1/2010,red,tall,10,23
3 | 1/1/2010,red,short,90,1
4 | 1/1/2010,blue,tall,12,31
5 | 1/1/2010,blue,short,21,231
6 | 1/1/2010,green,short,20,212
7 | 1/1/2010,total,tall,10,98
8 | 1/1/2010,total,short,-30,39
9 | 1/2/2010,red,tall,10,91
10 | 1/2/2010,red,short,32,123
11 | 1/2/2010,blue,tall,22,121
12 | 1/2/2010,blue,short,20,32
13 | 1/2/2010,green,short,1,19
14 | 1/2/2010,total,short,2,10
15 | 1/3/2010,red,short,10,34
16 | 1/3/2010,red,tall,10,34
17 | 1/3/2010,blue,short,93,21
18 | 1/3/2010,blue,tall,39,12
19 | 1/3/2010,green,short,31,31
20 | 1/3/2010,green,tall,21,31
21 | 1/3/2010,total,short,13,123
22 | 1/4/2010,red,tall,40,21
23 | 1/4/2010,red,short,22,12
24 | 1/4/2010,blue,tall,39,21
25 | 1/4/2010,blue,short,10,12
26 | 1/4/2010,green,tall,30,23
27 | 1/4/2010,green,short,10,123
28 | 1/4/2010,total,tall,-10,23
29 | 1/4/2010,total,short,31,661
30 |
--------------------------------------------------------------------------------
/tools/dspltools/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Setup script for the DSPLtools suite."""
10 |
11 | import setuptools
12 | from distutils.core import setup
13 |
14 |
15 | setup(name='dspltools',
16 | version='0.5.0',
17 | description='Suite of command-line tools for generating DSPL datasets',
18 | author='Public Statistics',
19 | author_email='public-data-import-feedback@google.com',
20 | url='http://github.com/google/dspl',
21 | packages=['dspllib', 'dspllib.data_sources',
22 | 'dspllib.model', 'dspllib.validation'],
23 | package_dir={'dspllib': 'packages/dspllib'},
24 | package_data={'dspllib.validation': ['schemas/*.xsd',
25 | 'test_dataset/*.csv',
26 | 'test_dataset/*.xml']},
27 | scripts=['scripts/dsplcheck.py', 'scripts/dsplgen.py',
28 | 'scripts/run_all_tests.py'])
29 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_data_source_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of csv_data_source module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import unittest
15 |
16 | import csv_data_source
17 | import csv_sources_test_suite
18 |
19 |
20 | class CSVDataSourceTests(csv_sources_test_suite.CSVSourcesTests):
21 | """Tests of the CSVDataSource object."""
22 |
23 | def setUp(self):
24 | self.data_source_class = csv_data_source.CSVDataSource
25 |
26 | super(CSVDataSourceTests, self).setUp()
27 |
28 |
29 | class CSVDataSourceErrorTests(csv_sources_test_suite.CSVSourcesErrorTests):
30 | """Tests of the CSVDataSource object under various error conditions."""
31 |
32 | def setUp(self):
33 | self.data_source_class = csv_data_source.CSVDataSource
34 |
35 | super(CSVDataSourceErrorTests, self).setUp()
36 |
37 |
38 | if __name__ == '__main__':
39 | unittest.main()
40 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/viewer.js:
--------------------------------------------------------------------------------
1 | for (var td of document.querySelectorAll('td:first-child')) {
2 | var sibling = td.nextElementSibling;
3 | if (sibling) {
4 | if (sibling.querySelector('table')) {
5 | if (sibling.children.length < 20) {
6 | td.classList.toggle('open');
7 | } else {
8 | td.classList.toggle('closed');
9 | sibling.classList.toggle('hidden');
10 | }
11 | td.addEventListener('click', (ev) => {
12 | ev.target.classList.toggle('open');
13 | ev.target.classList.toggle('closed');
14 | ev.target.nextElementSibling.classList.toggle('hidden');
15 | });
16 | }
17 | }
18 | }
19 |
20 | function onclick(ev) {
21 | document.querySelectorAll('h2').forEach((elt) => {
22 | elt.classList.remove('active');
23 | });
24 | ev.target.classList.add('active');
25 |
26 | document.querySelectorAll('div').forEach((elt) => {
27 | elt.classList.add('hidden');
28 | });
29 | document.querySelector('div#'+ev.target.textContent.trim().toLowerCase()).classList.remove('hidden');
30 | }
31 |
32 | document.querySelectorAll('h2').forEach((elt) => {
33 | elt.addEventListener('click', onclick);
34 | });
35 |
--------------------------------------------------------------------------------
/samples/us_census/retail_sales/businesses.csv:
--------------------------------------------------------------------------------
1 | "business","name","parent"
2 | "44x72","Retail and Food services",
3 | "44000","Retail services","44x72"
4 | "44100","Motor Vehicle and Parts Dealers","44000"
5 | "44200","Furniture and Home Furnishings Stores","44000"
6 | "44300","Electronics and Appliance Stores","44000"
7 | "44400","Building Material and Garden Equipment and Supplies Dealers","44000"
8 | "44500","Food and Beverage Stores","44000"
9 | "44510","Grocery Stores","44500"
10 | "44600","Health and Personal Care Stores","44000"
11 | "44700","Gasoline Stations","44000"
12 | "44800","Clothing and Clothing Accessories Stores","44000"
13 | "45100","Sporting Goods, Hobby, Book, and Music Stores","44000"
14 | "45200","General Merchandise Stores","44000"
15 | "45210","Department Stores (excluding leased department stores)","45200"
16 | "45300","Miscellaneous Store Retailers","44000"
17 | "45400","Nonstore Retailers","44000"
18 | "72200","Food Services and Drinking Places","44x72"
19 | "44xxx","Other Aggregates",
20 | "44y72","Retail and Food services (excluding motor vehicles)","44xxx"
21 | "4400a","Retail Services (excluding Motor Vehicle and Parts Dealers)","44xxx"
22 | "441x0","Auto and other Motor Vehicle","44xxx"
23 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/samples/bls/unemployment/footnotes.csv:
--------------------------------------------------------------------------------
1 | codeValue,description
2 | 1,Data affected by changes in population controls.
3 | 2,Constructed on the 2002 Census Industry Classification from data originally coded on earlier classifications. Official series was not revised.
4 | 3,2000 forward coded on the 2002 Census Occupation Classification. 1983-99 constructed from data originally coded on earlier classifications.
5 | 4,2000 forward coded on the 2002 Census Industry Classification. 1983-99 constructed from data originally coded on earlier classifications.
6 | 7,Data do not meet publication criteria.
7 | 8,This series id code has been discontinued; data are available using the database tool at www.bls.gov/webapps/legacy/cpsatab8.htm.
8 | 9,Data from 1994 through 2002 were revised in February 2014 with updated seasonal adjustments.
9 | A,Area boundaries do not reflect official OMB definitions.
10 | N,Not available.
11 | P,Preliminary.
12 | V,The survey was not conducted due to bad weather. Interpolated data were seasonally adjusted.
13 | W,The household survey was not conducted for this month due to bad weather. Data were interpolated.
14 | Y,Data reflect controlling to interpolated statewide totals because the survey was not conducted.
15 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_data_source_sqlite_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of csv_data_source_sqlite module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import unittest
15 |
16 | import csv_data_source_sqlite
17 | import csv_sources_test_suite
18 |
19 |
20 | class CSVDataSourceSqliteTests(csv_sources_test_suite.CSVSourcesTests):
21 | """Tests of the CSVDataSourceSqlite object."""
22 |
23 | def setUp(self):
24 | self.data_source_class = csv_data_source_sqlite.CSVDataSourceSqlite
25 |
26 | super(CSVDataSourceSqliteTests, self).setUp()
27 |
28 |
29 | class CSVDataSourceSqliteErrorTests(
30 | csv_sources_test_suite.CSVSourcesErrorTests):
31 | """Tests of the CSVDataSourceSqlite object under various error conditions."""
32 |
33 | def setUp(self):
34 | self.data_source_class = csv_data_source_sqlite.CSVDataSourceSqlite
35 |
36 | super(CSVDataSourceSqliteErrorTests, self).setUp()
37 |
38 |
39 | if __name__ == '__main__':
40 | unittest.main()
41 |
--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Contributing to Data Set Publishing Language, Version 2.0
3 | author: Google
4 | ---
5 | # How to Contribute
6 |
7 | We'd love to accept your patches and contributions to this project. There are
8 | just a few small guidelines you need to follow.
9 |
10 | ## Contributor License Agreement
11 |
12 | Contributions to this project must be accompanied by a Contributor License
13 | Agreement. You (or your employer) retain the copyright to your contribution;
14 | this simply gives us permission to use and redistribute your contributions as
15 | part of the project. Head over to to see
16 | your current agreements on file or to sign a new one.
17 |
18 | You generally only need to submit a CLA once, so if you've already submitted one
19 | (even if it was for a different project), you probably don't need to do it
20 | again.
21 |
22 | ## Code reviews
23 |
24 | All submissions, including submissions by project members, require review. We
25 | use GitHub pull requests for this purpose. Consult
26 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
27 | information on using pull requests.
28 |
29 | ## Community Guidelines
30 |
31 | This project follows [Google's Open Source Community
32 | Guidelines](https://opensource.google.com/conduct/).
33 |
--------------------------------------------------------------------------------
/tools/dspltools/scripts/run_all_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Run all tests defined in the DSPL Tools code."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import unittest
15 |
16 | _TEST_MODULE_NAMES = [
17 | 'dsplcheck_test',
18 | 'dsplgen_test',
19 | 'dspllib.data_sources.csv_data_source_test',
20 | 'dspllib.data_sources.csv_data_source_sqlite_test',
21 | 'dspllib.data_sources.data_source_test',
22 | 'dspllib.data_sources.data_source_to_dspl_test',
23 | 'dspllib.model.dspl_model_loader_test',
24 | 'dspllib.model.dspl_model_test',
25 | 'dspllib.validation.dspl_validation_test',
26 | 'dspllib.validation.xml_validation_test']
27 |
28 |
29 | def main():
30 | """Run all DSPL Tools tests and print the results to stderr."""
31 | test_suite = unittest.TestSuite()
32 |
33 | for test_module_name in _TEST_MODULE_NAMES:
34 | test_suite.addTests(
35 | unittest.defaultTestLoader.loadTestsFromName(test_module_name))
36 |
37 | unittest.TextTestRunner().run(test_suite)
38 |
39 |
40 | if __name__ == '__main__':
41 | main()
42 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/display.html:
--------------------------------------------------------------------------------
1 | {% from 'render.html' import render %}
2 |
3 |
4 | DSPL 2 Viewer
5 |
8 |
9 |
10 |
DSPL 2 Viewer
11 |
Dataset
12 |
Dimensions
13 |
Measures
14 |
Footnotes
15 |
Slices
16 |
17 | {{render(dataset)}}
18 |
19 |
20 |
21 |
22 |
dimensions
23 |
{{ render(dimension) }}
24 |
25 |
26 |
27 |
28 |
29 |
30 |
measures
31 |
{{ render(measure) }}
32 |
33 |
34 |
35 | {% if footnote %}
36 |
37 |
38 |
39 |
footnotes
40 |
{{ render(footnote) }}
41 |
42 |
43 |
44 | {% endif %}
45 |
46 |
47 |
48 |
slices
49 |
{{ render(slice) }}
50 |
51 |
52 |
53 |
54 |
55 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018, Google Inc.
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are
6 | met:
7 |
8 | 1. Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | 3. Neither the name of Google Inc. nor the names of its
17 | contributors may be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/samples/eurostat/population_density/transform_d3dens.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2019 Google LLC
3 | #
4 | # Use of this source code is governed by a BSD-style
5 | # license that can be found in the LICENSE file or at
6 | # https://developers.google.com/open-source/licenses/bsd
7 | import pandas as pd
8 |
9 |
10 | # Read the file and set the index column to the metro region.
11 | df = pd.read_csv(
12 | 'http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz',
13 | delimiter='\t',
14 | index_col='metroreg\\time')
15 |
16 | # Stack the column headers into a single column's values, and make the metro
17 | # region a column again.
18 | df = df.stack().reset_index()
19 |
20 | # Rename the columns
21 | df.columns = ['metroreg', 'year', 'density']
22 |
23 | # Strip surrounding whitespace from each value
24 | for col in df.columns:
25 | df[col] = df[col].str.strip()
26 |
27 | # Indicate that the year is an integer
28 | df['year'] = df['year'].astype(int)
29 |
30 | # Add a string-valued footnote column with default empty string.
31 | df['density*'] = ''
32 |
33 | # Split up any values with footnotes between the value and footnote columns
34 | for idx, density in df.loc[df['density'].str.contains(' '),
35 | 'density'].iteritems():
36 | density, footnote = density.split(' ')
37 | df.loc[idx, 'density'] = density
38 | df.loc[idx, 'density*'] = ';'.join(list(footnote))
39 |
40 | # Remove the placeholder value of ':'
41 | df.loc[df['density'] == ':', 'density'] = None
42 |
43 | # Remove rows with no density
44 | df = df[pd.notnull(df['density'])]
45 |
46 | # And write the results to a CSV file.
47 | df.to_csv('met_d3dens.csv', index=False)
48 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/test_jsonutil.py:
--------------------------------------------------------------------------------
1 | from dspl2.jsonutil import (AsList, GetSchemaProp, JsonToKwArgsDict,
2 | MakeIdKeyedDict, GetSchemaId, GetSchemaType, GetUrl)
3 | import unittest
4 |
5 |
6 | class JsonUtilTests(unittest.TestCase):
7 | def test_AsList(self):
8 | self.assertEqual(AsList(None), [])
9 | self.assertEqual(AsList([]), [])
10 | self.assertEqual(AsList([1]), [1])
11 | self.assertEqual(AsList(1), [1])
12 |
13 | def test_GetSchemaProp(self):
14 | self.assertEqual(GetSchemaProp({'id': 'val'}, 'id'), 'val')
15 | self.assertEqual(GetSchemaProp({'schema:id': 'val'}, 'id'), 'val')
16 |
17 | def test_JsonToKwArgsDict(self):
18 | self.assertEqual(JsonToKwArgsDict({'id': 'val'}), {'dataset': {'id': 'val'}})
19 | self.assertEqual(JsonToKwArgsDict({}), {'dataset': {}})
20 |
21 | def test_MakeIdKeyedDict(self):
22 | objs = [{'@id': '1'}, {'@id': '2'}]
23 | lookup = MakeIdKeyedDict(objs)
24 | self.assertEqual(lookup['1'], {'@id': '1'})
25 | self.assertEqual(lookup['2'], {'@id': '2'})
26 |
27 | def test_GetSchemaId(self):
28 | self.assertEqual(GetSchemaId({'@id': 'val'}), 'val')
29 | self.assertEqual(GetSchemaId({'id': 'val'}), 'val')
30 | self.assertEqual(GetSchemaId({'schema:id': 'val'}), 'val')
31 |
32 | def test_GetSchemaType(self):
33 | self.assertEqual(GetSchemaType({'@type': 'val'}), 'val')
34 | self.assertEqual(GetSchemaType({'type': 'val'}), 'val')
35 | self.assertEqual(GetSchemaType({'schema:type': 'val'}), 'val')
36 |
37 | def test_GetUrl(self):
38 | self.assertEqual(GetUrl({'@id': 'val'}), 'val')
39 | self.assertEqual(GetUrl('val'), 'val')
40 |
41 |
42 | if __name__ == '__main__':
43 | unittest.main()
44 |
--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-pretty-print.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 | from absl import app
8 | from absl import flags
9 | import dspl2
10 | import jinja2
11 | from pathlib import Path
12 | import sys
13 |
14 |
15 | FLAGS = flags.FLAGS
16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.')
17 |
18 |
19 | def _RenderLocalDspl2(path, rdf):
20 | template_dir = Path(dspl2.__file__).parent / 'templates'
21 | env = jinja2.Environment(loader=jinja2.FileSystemLoader(
22 | template_dir.as_posix()))
23 | try:
24 | print("Loading template")
25 | template = env.get_template('display.html')
26 | print("Loading DSPL2")
27 | getter = dspl2.LocalFileGetter(path)
28 | print("Expanding DSPL2")
29 | if rdf:
30 | graph = dspl2.Dspl2RdfExpander(getter).Expand()
31 | print("Framing DSPL2")
32 | json_val = dspl2.FrameGraph(graph)
33 | else:
34 | json_val = dspl2.Dspl2JsonLdExpander(getter).Expand()
35 | print("Rendering template")
36 | return template.render(**dspl2.JsonToKwArgsDict(json_val))
37 | except Exception as e:
38 | raise
39 | template = loader.load(env, 'error.html')
40 | return template.render(action="processing",
41 | url=path,
42 | text=str(type(e)) + ": " + str(e))
43 |
44 |
45 | def main(argv):
46 | if len(argv) != 3:
47 | print(f'Usage: {argv[0]} [input.json] [output.html]', file=sys.stderr)
48 | exit(1)
49 | with open(argv[2], 'w') as f:
50 | print(_RenderLocalDspl2(argv[1], FLAGS.rdf), file=f)
51 |
52 |
53 | if __name__ == '__main__':
54 | app.run(main)
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Dataset Publishing Language
2 |
3 | ## Introduction
4 | **DSPL** stands for **Dataset Publishing Language**. It is a representation
5 | format for both the metadata (information about the dataset, such as its name
6 | and provider, as well as the concepts it contains and displays) and actual data
7 | (the numbers) of datasets. Datasets described in this format can be imported
8 | into the [Google Public Data Explorer](https://www.google.com/publicdata), a
9 | tool that allows for rich, visual exploration of the data.
10 |
11 | This site hosts miscellaneous, open source content (i.e., schemas, example
12 | files, and utilities) associated with the DSPL standard. See our [documentation
13 | site](https://developers.google.com/public-data) for more details on what DSPL
14 | is and how to use it. The utilities in this repository are documented at [this
15 | site](https://developers.google.com/public-data/docs/dspltools).
16 |
17 | ## Build and install
18 | To build the tools, install `lxml`, then use the `setup.py` script in
19 | `tools/dspltools/`. You can use pip to install these:
20 |
21 | ```
22 | pip install -r tools/dspltools/requirements.txt
23 | pip install tools/dspltools
24 | ```
25 |
26 | # DSPL 2
27 | The draft of the DSPL 2 specification, which replaces the existing XML metadata
28 | format with schema.org markup, can be found at the [DSPL GitHub
29 | page](https://google.github.io/dspl). The source for the specification is at
30 | [`docs/dspl2-spec.md`](https://github.com/google/dspl/blob/master/docs/dspl2-spec.md).
31 |
32 | Some initial library and tool support is available in [`tools/dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2)
33 |
34 | ## Build and install
35 | To build the tools, install the prerequisites, then use the `setup.py` script in
36 | `tools/dspl2/`. You can use pip to install these:
37 |
38 | ```
39 | pip install -r tools/dspl2/requirements.txt
40 | pip install tools/dspl2
41 | ```
42 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 | from dspl2.expander import Dspl2JsonLdExpander
8 | from dspl2.expander import Dspl2RdfExpander
9 | from dspl2.filegetter import HybridFileGetter
10 | from dspl2.filegetter import InternetFileGetter
11 | from dspl2.filegetter import LocalFileGetter
12 | from dspl2.filegetter import UploadedFileGetter
13 | from dspl2.jsonutil import AsList
14 | from dspl2.jsonutil import GetSchemaId
15 | from dspl2.jsonutil import GetSchemaProp
16 | from dspl2.jsonutil import GetSchemaType
17 | from dspl2.jsonutil import GetUrl
18 | from dspl2.jsonutil import JsonToKwArgsDict
19 | from dspl2.jsonutil import MakeIdKeyedDict
20 | from dspl2.rdfutil import LoadGraph
21 | from dspl2.rdfutil import FrameGraph
22 | from dspl2.rdfutil import MakeSparqlSelectQuery
23 | from dspl2.rdfutil import SelectFromGraph
24 | from dspl2.validator import CheckDataset
25 | from dspl2.validator import CheckDimension
26 | from dspl2.validator import CheckMeasure
27 | from dspl2.validator import CheckSlice
28 | from dspl2.validator import CheckSliceData
29 | from dspl2.validator import CheckStatisticalDataset
30 | from dspl2.validator import ValidateDspl2
31 |
32 | __all__ = [
33 | "AsList",
34 | "CheckDataset",
35 | "CheckDimension",
36 | "CheckMeasure",
37 | "CheckSlice",
38 | "CheckSliceData",
39 | "CheckStatisticalDataset",
40 | "Dspl2JsonLdExpander",
41 | "Dspl2RdfExpander",
42 | "FrameGraph",
43 | "GetSchemaId",
44 | "GetSchemaProp",
45 | "GetSchemaType",
46 | "GetUrl",
47 | "HybridFileGetter",
48 | "InternetFileGetter",
49 | "JsonToKwArgsDict",
50 | "LoadGraph",
51 | "LocalFileGetter",
52 | "MakeIdKeyedDict",
53 | "MakeSparqlSelectQuery",
54 | "SelectFromGraph",
55 | "UploadedFileGetter",
56 | "ValidateDspl2",
57 | ]
58 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Data Set Publishing Language, Version 2.0
3 | author: Natarajan Krishnaswami
4 | ---
5 | # DSPL 2.0
6 | This is the project website for the DSPL 2.0 specification, samples, and related tools.
7 |
8 | ## Spec
9 |
10 | The draft specification is here: [dspl2-spec.html](dspl2-spec.html).
11 |
12 | To provide feedback on the draft, please create a [GitHub issue](https://github.com/google/dspl/issues), or email us at (public-data-import-feedback@google.com)[mailto:public-data-import-feedback@google.com].
13 |
14 | ## Related tools
15 |
16 | Initial tool and a python library are in the DSPL 2.0 GitHub repository under [`tools/dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2).
17 |
18 | * [`dspl2-expand.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-expand.py): tool to convert a DSPL 2.0 dataset with CSV references to one with only JSON-LD.
19 | * [`dspl2-validate.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-validate.py): tool to do basic validation of a DSPL 2.0 dataset into an HTML file.
20 | * [`dspl2-pretty-print.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-pretty-print.py): tool to pretty print a DSPL 2.0 dataset as HTML tables.
21 | * [`dspl2-pretty-print-server.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-pretty-print-server.py): local web app of the above.
22 | * [`dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2/dspl2): python library to load, normalize, and expand CSV files in DSPL 2.0 datasets.
23 |
24 | ## Samples
25 |
26 | Examples are in the DSPL 2.0 GitHub repository under [`samples`](https://github.com/google/dspl/tree/master/samples). Currently Eurostat unemployment and Eurostat population density samples include DSPL 2.0 metadata.
27 |
28 | ## Contributing
29 |
30 | To contribute, see the [CONTRIBUTING](CONTRIBUTING.html) file and after submitting a CLA, submit pull requests to the [DSPL GitHub repository](https://github.com/google/dspl).
31 |
--------------------------------------------------------------------------------
/samples/google/canonical/states.csv:
--------------------------------------------------------------------------------
1 | state,latitude,longitude,name
2 | AK,63.588753,-154.493062,Alaska
3 | AL,32.318231,-86.902298,Alabama
4 | AR,35.20105,-91.831833,Arkansas
5 | AZ,34.048928,-111.093731,Arizona
6 | CA,36.778261,-119.417932,California
7 | CO,39.550051,-105.782067,Colorado
8 | CT,41.603221,-73.087749,Connecticut
9 | DC,38.905985,-77.033418,"District of Columbia"
10 | DE,38.910832,-75.52767,Delaware
11 | FL,27.664827,-81.515754,Florida
12 | GA,32.157435,-82.907123,Georgia
13 | HI,19.898682,-155.665857,Hawaii
14 | IA,41.878003,-93.097702,Iowa
15 | ID,44.068202,-114.742041,Idaho
16 | IL,40.633125,-89.398528,Illinois
17 | IN,40.551217,-85.602364,Indiana
18 | KS,39.011902,-98.484246,Kansas
19 | KY,37.839333,-84.270018,Kentucky
20 | LA,31.244823,-92.145024,Louisiana
21 | MA,42.407211,-71.382437,Massachusetts
22 | MD,39.045755,-76.641271,Maryland
23 | ME,45.253783,-69.445469,Maine
24 | MI,44.314844,-85.602364,Michigan
25 | MN,46.729553,-94.6859,Minnesota
26 | MO,37.964253,-91.831833,Missouri
27 | MS,32.354668,-89.398528,Mississippi
28 | MT,46.879682,-110.362566,Montana
29 | NC,35.759573,-79.0193,"North Carolina"
30 | ND,47.551493,-101.002012,"North Dakota"
31 | NE,41.492537,-99.901813,Nebraska
32 | NH,43.193852,-71.572395,"New Hampshire"
33 | NJ,40.058324,-74.405661,"New Jersey"
34 | NM,34.97273,-105.032363,"New Mexico"
35 | NV,38.80261,-116.419389,Nevada
36 | NY,43.299428,-74.217933,"New York"
37 | OH,40.417287,-82.907123,Ohio
38 | OK,35.007752,-97.092877,Oklahoma
39 | OR,43.804133,-120.554201,Oregon
40 | PA,41.203322,-77.194525,Pennsylvania
41 | PR,18.220833,-66.590149,"Puerto Rico"
42 | RI,41.580095,-71.477429,"Rhode Island"
43 | SC,33.836081,-81.163725,"South Carolina"
44 | SD,43.969515,-99.901813,"South Dakota"
45 | TN,35.517491,-86.580447,Tennessee
46 | TX,31.968599,-99.901813,Texas
47 | UT,39.32098,-111.093731,Utah
48 | VA,37.431573,-78.656894,Virginia
49 | VT,44.558803,-72.577841,Vermont
50 | WA,47.751074,-120.740139,Washington
51 | WI,43.78444,-88.787868,Wisconsin
52 | WV,38.597626,-80.454903,"West Virginia"
53 | WY,43.075968,-107.290284,Wyoming
54 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/jsonutil.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 |
8 | def AsList(val):
9 | """Ensures the JSON-LD object is a list."""
10 | if isinstance(val, list):
11 | return val
12 | elif val is None:
13 | return []
14 | else:
15 | return [val]
16 |
17 |
18 | def GetSchemaProp(obj, key, default=None):
19 | try:
20 | return obj.get(key, obj.get('schema:' + key, default))
21 | except AttributeError as e:
22 | raise RuntimeError(f"Unable to find key '{key}' in {obj}") from e
23 |
24 |
25 | def JsonToKwArgsDict(json_val):
26 | """Turn a StatisticalDataset object into a kwargs dict for a Jinja2 template.
27 |
28 | Specifically, this collects top-level dataset metadata under a "dataset" key,
29 | and keeps dimensions, measures, footnotes, and slices as they are.
30 | """
31 | ret = {'dataset': {}}
32 | special_keys = {'dimension', 'measure', 'footnote', 'slice'}
33 | for key in json_val:
34 | if key in special_keys:
35 | ret[key] = GetSchemaProp(json_val, key)
36 | else:
37 | ret['dataset'][key] = GetSchemaProp(json_val, key)
38 | return ret
39 |
40 |
41 | def MakeIdKeyedDict(vals):
42 | """Returns a dict mapping objects' IDs to objects in the provided list.
43 |
44 | Given a list of JSON-LD objects, return a dict mapping each element's ID to the
45 | element.
46 |
47 | Parameters:
48 | vals (list): list of JSON-LD objects with IDs as dicts
49 |
50 | Returns
51 | dict:dict whose values are elements of `vals` and whose keys are their IDs.
52 | """
53 | ret = {}
54 | for val in vals:
55 | id = GetSchemaProp(val, '@id')
56 | if id:
57 | ret[id] = val
58 | return ret
59 |
60 |
61 | def GetSchemaId(obj):
62 | return obj.get('@id', GetSchemaProp(obj, 'id'))
63 |
64 |
65 | def GetSchemaType(obj):
66 | return obj.get('@type', GetSchemaProp(obj, 'type'))
67 |
68 |
69 | def GetUrl(obj):
70 | if isinstance(obj, str):
71 | return obj
72 | elif isinstance(obj, dict):
73 | return GetSchemaId(obj)
74 |
--------------------------------------------------------------------------------
/samples/eurostat/unemployment/countries.csv:
--------------------------------------------------------------------------------
1 | "codeValue","alternateName","country_group","name@en","name@fr","name@de","latitude","longitude"
2 | "at","AT","eu","Austria","Autriche","Österreich","47.6965545","13.34598005"
3 | "be","BE","eu","Belgium","Belgique","Belgien","50.501045","4.47667405"
4 | "bg","BG","eu","Bulgaria","Bulgarie","Bulgarien","42.72567375","25.4823218"
5 | "hr","HR","non-eu","Croatia","Croatie","Kroatien","44.74664297","15.34084438"
6 | "cy","CY","eu","Cyprus","Chypre","Zypern","35.129141","33.4286823"
7 | "cz","CZ","eu","Czech Republic","République tchèque","Tschechische Republik","49.803531","15.47499805"
8 | "dk","DK","eu","Denmark","Danemark","Dänemark","55.93968425","9.51668905"
9 | "ee","EE","eu","Estonia","Estonie","Estland","58.5924685","25.8069503"
10 | "fi","FI","eu","Finland","Finlande","Finnland","64.95015875","26.06756405"
11 | "fr","FR","eu","France","France","Frankreich","46.7109945","1.7185608"
12 | "de","DE","eu","Germany (including former GDR from 1991)","Allemagne (incluant l'ancienne RDA à partir de 1991)","Deutschland (einschließlich der ehemaligen DDR seit 1991)","51.16382538","10.4540478"
13 | "gr","GR","eu","Greece","Grèce","Griechenland","39.698467","21.57725572"
14 | "hu","HU","eu","Hungary","Hongrie","Ungarn","47.16116325","19.5042648"
15 | "ie","IE","eu","Ireland","Irlande","Irland","53.41526","-8.2391222"
16 | "it","IT","eu","Italy","Italie","Italien","42.504191","12.57378705"
17 | "lv","LV","eu","Latvia","Lettonie","Lettland","56.880117","24.60655505"
18 | "lt","LT","eu","Lithuania","Lituanie","Litauen","55.173687","23.9431678"
19 | "lu","LU","eu","Luxembourg","Luxembourg","Luxemburg","49.815319","6.13335155"
20 | "mt","MT","eu","Malta","Malte","Malta","35.902422","14.4474608"
21 | "nl","NL","eu","Netherlands","Pays-Bas","Niederlande","52.10811825","5.3301983"
22 | "no","NO","non-eu","Norway","Norvège","Norwegen","64.55645975","12.66576565"
23 | "pl","PL","eu","Poland","Pologne","Polen","51.91890725","19.1343338"
24 | "pt","PT","eu","Portugal","Portugal","Portugal","39.55806875","-7.84494095"
25 | "ro","RO","eu","Romania","Roumanie","Rumänien","45.94261125","24.99015155"
26 | "sk","SK","eu","Slovakia","Slovaquie","Slowakei","48.67264375","19.7000323"
27 | "si","SI","eu","Slovenia","Slovénie","Slowenien","46.14925925","14.98661705"
28 | "es","ES","eu","Spain","Espagne","Spanien","39.8950135","-2.9882957"
29 | "se","SE","eu","Sweden","Suède","Schweden","62.1984675","14.89630657"
30 | "tr","TR","non-eu","Turkey","Turquie","Türkei","38.95294205","35.43979471"
31 | "uk","GB","eu","United Kingdom","Royaume-Uni","Vereinigtes Königreich","54.315447","-2.23261195"
32 |
--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-pretty-print-server.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python3
2 | # Copyright 2018 Google LLC
3 | #
4 | # Use of this source code is governed by a BSD-style
5 | # license that can be found in the LICENSE file or at
6 | # https://developers.google.com/open-source/licenses/bsd
7 |
8 | from flask import Flask, request, render_template
9 | import json
10 | from pathlib import Path
11 | import requests
12 |
13 | import dspl2
14 | from dspl2 import (
15 | Dspl2JsonLdExpander, Dspl2RdfExpander, InternetFileGetter,
16 | JsonToKwArgsDict, LoadGraph, FrameGraph, UploadedFileGetter)
17 |
18 |
19 | def _Display(template, json_val):
20 | return render_template(template, **JsonToKwArgsDict(json_val))
21 |
22 |
23 | template_dir = Path(dspl2.__file__).parent / 'templates'
24 | app = Flask('dspl2-viewer', template_folder=template_dir.as_posix())
25 |
26 | @app.route('/')
27 | def Root():
28 | return render_template('choose.html')
29 |
30 |
31 | @app.route('/render', methods=['GET', 'POST'])
32 | def _HandleUploads():
33 | try:
34 | rdf = request.args.get('rdf') == 'on'
35 | url = request.args.get('url')
36 | if request.method == 'POST':
37 | files = request.files.getlist('files[]')
38 | getter = UploadedFileGetter(files)
39 | else:
40 | if not url:
41 | return render_template('error.html',
42 | message="No URL provided")
43 | getter = InternetFileGetter(url)
44 | if rdf:
45 | graph = Dspl2RdfExpander(getter).Expand()
46 | json_val = FrameGraph(graph)
47 | else:
48 | json_val = Dspl2JsonLdExpander(getter).Expand()
49 | return _Display('display.html', json_val)
50 | except json.JSONDecodeError as e:
51 | return render_template('error.html',
52 | action="decoding",
53 | url=e.doc or url,
54 | text=str(e))
55 | except IOError as e:
56 | return render_template('error.html',
57 | action="loading",
58 | url=e.filename,
59 | text=str(e))
60 | except RuntimeError as e:
61 | return render_template('error.html',
62 | text=str(e))
63 | except requests.exceptions.HTTPError as e:
64 | return render_template('error.html',
65 | url=url,
66 | action="retrieving",
67 | status=e.response.status_code,
68 | text=e.response.text)
69 | except requests.exceptions.RequestException as e:
70 | return render_template('error.html',
71 | url=url,
72 | action="retrieving",
73 | text=str(e))
74 | except Exception as e:
75 | return render_template('error.html',
76 | action="processing",
77 | url=url,
78 | text=str(type(e)) + str(e))
79 |
80 |
81 | if __name__ == '__main__':
82 | app.run()
83 |
--------------------------------------------------------------------------------
/tools/dspltools/README.rst:
--------------------------------------------------------------------------------
1 | Documentation
2 | =============
3 | See https://developers.google.com/public-data/docs/dspltools for documentation.
4 |
5 |
6 | Release Notes
7 | =============
8 | *** v0.1 ***
9 | Release date: April 11, 2011
10 |
11 | Description:
12 | ------------
13 | DSPL Tools released!
14 |
15 |
16 | *** v0.2 ***
17 | Release date: April 18, 2011
18 |
19 | Description:
20 | ------------
21 | Enhanced DSPL Check by adding significant functionality beyond XML schema
22 | validation, including the checking of internal dataset references and CSV
23 | file structure.
24 |
25 |
26 | *** v0.2.1 ***
27 | Release date: April 21, 2011
28 |
29 | Description:
30 | ------------
31 | Use column ID to distinguish between years and integers in dsplgen.
32 |
33 |
34 | *** v0.3 ***
35 | Release date: April 26, 2011
36 |
37 | Description:
38 | ------------
39 | Extended DSPL Check to validate dataset CSV data (sorting, instance IDs)
40 | and slice / table links.
41 |
42 | Added concept hierarchy support to DSPL Gen.
43 |
44 |
45 | *** v0.3.5 ***
46 | Release date: May 4, 2011
47 |
48 | Description:
49 | ------------
50 | Extended DSPL Check to support checking of:
51 | - Table column / concept type consistency
52 | - Date formats
53 | - Formatting of float and integer CSV values
54 | - Datasets where CSV columns are in different order than columns in table
55 | metadata
56 |
57 | Improved error messages when files can't be found or opened.
58 |
59 | Fixed bug in DSPL Gen naming of external concepts.
60 |
61 |
62 | *** v0.3.6 ***
63 | Release date: May 6, 2011
64 |
65 | Description:
66 | ------------
67 | Added 'checking_level' option to DSPL Check.
68 |
69 | CSV files are now loaded in 'universal newline mode' to reduce risk of parsing
70 | problems.
71 |
72 |
73 | *** v0.3.7 ***
74 | Release date: May 6, 2011
75 |
76 | Description:
77 | ------------
78 | Added zipped dataset checking to DSPL Check.
79 |
80 | Strip whitespace from CSV values (to mimic behavior of PDE importer).
81 |
82 |
83 | *** v0.4 ***
84 | Release date: May 20, 2011
85 |
86 | Description:
87 | ------------
88 | Added topic reference checking to DSPL Check.
89 |
90 | Changed schema validation process to use local XML schema files instead of
91 | calling out to W3C servers.
92 |
93 |
94 | *** v0.4.1 ***
95 | Release date: June 2, 2011
96 |
97 | Description:
98 | ------------
99 | Added test for trivial slices to DSPL Check.
100 |
101 | Improved behavior of DSPL Check when empty tables are encountered.
102 |
103 |
104 | *** v0.4.2 ***
105 | Release date: June 20, 2011
106 |
107 | Description:
108 | ------------
109 | Changed implementation of default csv_data_source to use in-memory Python
110 | objects instead of sqlite. The latter can still be used by setting the '-t'
111 | option of dsplgen to 'csv_sqlite'.
112 |
113 |
114 | *** v0.4.3 ***
115 | Release date: November 3, 2011
116 |
117 | Description:
118 | ------------
119 | Fixed some bugs around multi-level concept hierarchies.
120 |
121 | Added total_val parameter to support pre-computed rollups in data.
122 |
123 |
124 | *** v0.5.0 ***
125 | Release date: January 22, 2019
126 |
127 | Description:
128 | ------------
129 | Switch to lxml for XML parsing and schema validation.
130 |
131 |
--------------------------------------------------------------------------------
/samples/bls/unemployment/states.csv:
--------------------------------------------------------------------------------
1 | codeValue,name,identifier,alternateName,geo.latitude,geo.longitude
2 | ST0100000000000,Alabama,AL,Alabama,32.318231,-86.902298
3 | ST0200000000000,Alaska,AK,Alaska,63.588753,-154.493062
4 | ST0400000000000,Arizona,AZ,Arizona,34.048928,-111.093731
5 | ST0500000000000,Arkansas,AR,Arkansas,35.20105,-91.831833
6 | ST0600000000000,California,CA,California,36.778261,-119.417932
7 | ST0800000000000,Colorado,CO,Colorado,39.550051,-105.782067
8 | ST0900000000000,Connecticut,CT,Connecticut,41.603221,-73.087749
9 | ST1000000000000,Delaware,DE,Delaware,38.910832,-75.52767
10 | ST1100000000000,District of Columbia,DC,Washington DC,38.905985,-77.033418
11 | ST1200000000000,Florida,FL,Florida,27.664827,-81.515754
12 | ST1300000000000,Georgia,GA,Georgia,32.157435,-82.907123
13 | ST1500000000000,Hawaii,HI,Hawaii,19.898682,-155.665857
14 | ST1600000000000,Idaho,ID,Idaho,44.068202,-114.742041
15 | ST1700000000000,Illinois,IL,Illinois,40.633125,-89.398528
16 | ST1800000000000,Indiana,IN,Indiana,40.551217,-85.602364
17 | ST1900000000000,Iowa,IA,Iowa,41.878003,-93.097702
18 | ST2000000000000,Kansas,KS,Kansas,39.011902,-98.484246
19 | ST2100000000000,Kentucky,KY,Kentucky,37.839333,-84.270018
20 | ST2200000000000,Louisiana,,Louisiana,31.244823,-92.145024
21 | ST2300000000000,Maine,ME,Maine,45.253783,-69.445469
22 | ST2400000000000,Maryland,MD,Maryland,39.045755,-76.641271
23 | ST2500000000000,Massachusetts,MA,Massachusetts,42.407211,-71.382437
24 | ST2600000000000,Michigan,MI,Michigan,44.314844,-85.602364
25 | ST2700000000000,Minnesota,MN,Minnesota,46.729553,-94.6859
26 | ST2800000000000,Mississippi,MS,Mississippi,32.354668,-89.398528
27 | ST2900000000000,Missouri,MO,Missouri,37.964253,-91.831833
28 | ST3000000000000,Montana,MT,Montana,46.879682,-110.362566
29 | ST3100000000000,Nebraska,NE,Nebraska,41.492537,-99.901813
30 | ST3200000000000,Nevada,NV,Nevada,38.80261,-116.419389
31 | ST3300000000000,New Hampshire,NH,New Hampshire,43.193852,-71.572395
32 | ST3400000000000,New Jersey,NJ,New Jersey,40.058324,-74.405661
33 | ST3500000000000,New Mexico,NM,New Mexico,34.97273,-105.032363
34 | ST3600000000000,New York,NY,New York State,43.299428,-74.217933
35 | ST3700000000000,North Carolina,NC,N Carolina,35.759573,-79.0193
36 | ST3800000000000,North Dakota,ND,N Dakota,47.551493,-101.002012
37 | ST3900000000000,Ohio,OH,Ohio,40.417287,-82.907123
38 | ST4000000000000,Oklahoma,OK,Oklahoma,35.007752,-97.092877
39 | ST4100000000000,Oregon,OR,Oregon,43.804133,-120.554201
40 | ST4200000000000,Pennsylvania,PA,Pennsylvania,41.203322,-77.194525
41 | ST4400000000000,Rhode Island,RI,Rhode Island,41.580095,-71.477429
42 | ST4500000000000,South Carolina,SC,S Carolina,33.836081,-81.163725
43 | ST4600000000000,South Dakota,SD,S Dakota,43.969515,-99.901813
44 | ST4700000000000,Tennessee,TN,Tennessee,35.517491,-86.580447
45 | ST4800000000000,Texas,TX,Texas,31.968599,-99.901813
46 | ST4900000000000,Utah,UT,Utah,39.32098,-111.093731
47 | ST5000000000000,Vermont,VT,Vermont,44.558803,-72.577841
48 | ST5100000000000,Virginia,VA,Virginia,37.431573,-78.656894
49 | ST5300000000000,Washington,WA,Washington State,47.751074,-120.740139
50 | ST5400000000000,West Virginia,WV,W Virginia,38.597626,-80.454903
51 | ST5500000000000,Wisconsin,WI,Wisconsin,43.78444,-88.787868
52 | ST5600000000000,Wyoming,WY,Wyoming,43.075968,-107.290284
53 | ST7200000000000,Puerto Rico,PR,Puerto Rico,18.220833,-66.590149
54 |
--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplgen_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of dsplgen module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import os
15 | import os.path
16 | import re
17 | import shutil
18 | import StringIO
19 | import sys
20 | import tempfile
21 | import unittest
22 |
23 | import dsplcheck
24 | import dsplgen
25 |
26 |
27 | _TEST_CSV_CONTENT = (
28 | """date[type=date;format=yyyy-MM-dd],category1,category2[concept=geo:us_state;rollup=true],metric1[extends=quantity:ratio;slice_role=metric],metric2,metric3
29 | 1980-01-01,red,california,89,321,71.21
30 | 1981-01-01,red,california,99,231,391.2
31 | 1982-01-01,blue,maine's,293,32,2.31
32 | 1983-01-01,blue,california,293,12,10.3
33 | 1984-01-01,red,maine's,932,48,10.78""")
34 |
35 |
36 | class DSPLGenTests(unittest.TestCase):
37 | """Test cases for dsplgen module."""
38 |
39 | def setUp(self):
40 | self.input_dir = tempfile.mkdtemp()
41 |
42 | input_file = open(os.path.join(self.input_dir, 'input.csv'), 'w')
43 | input_file.write(_TEST_CSV_CONTENT)
44 | input_file.close()
45 |
46 | self.output_dir = tempfile.mkdtemp()
47 |
48 | def tearDown(self):
49 | shutil.rmtree(self.input_dir)
50 | shutil.rmtree(self.output_dir)
51 |
52 | def testDSPLGenEndToEnd(self):
53 | """A simple end-to-end test of the dsplgen application."""
54 | dsplgen.main(['-o', self.output_dir, '-q',
55 | os.path.join(self.input_dir, 'input.csv')])
56 |
57 | self.assertTrue(
58 | os.path.isfile(os.path.join(self.output_dir, 'dataset.xml')))
59 | self.assertTrue(
60 | os.path.isfile(os.path.join(self.output_dir, 'category1_table.csv')))
61 | self.assertTrue(
62 | os.path.isfile(os.path.join(self.output_dir, 'slice_0_table.csv')))
63 | self.assertTrue(
64 | os.path.isfile(os.path.join(self.output_dir, 'slice_1_table.csv')))
65 |
66 | # Test that output validates against dsplcheck
67 | saved_stdout = sys.stdout
68 |
69 | redirected_output = StringIO.StringIO()
70 | sys.stdout = redirected_output
71 |
72 | dsplcheck.main([os.path.join(self.output_dir, 'dataset.xml')])
73 |
74 | self.assertTrue(
75 | re.search(
76 | 'validates successfully.*Parsing completed.*'
77 | 'No issues found.*Completed',
78 | redirected_output.getvalue(), re.DOTALL))
79 |
80 | redirected_output.close()
81 |
82 | sys.stdout = saved_stdout
83 |
84 | def testCSVNotFound(self):
85 | """Test case in which CSV can't be opened."""
86 | dsplgen.main(['-o', self.output_dir, '-q',
87 | os.path.join(self.input_dir, 'input.csv')])
88 |
89 | saved_stdout = sys.stdout
90 | redirected_output = StringIO.StringIO()
91 | sys.stdout = redirected_output
92 |
93 | self.assertRaises(SystemExit,
94 | dsplgen.main, ['-q', 'non_existent_input_file.csv'])
95 | self.assertTrue('Error opening CSV file' in redirected_output.getvalue())
96 |
97 | redirected_output.close()
98 | sys.stdout = saved_stdout
99 |
100 |
101 | if __name__ == '__main__':
102 | unittest.main()
103 |
--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplgen.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Generate a DSPL dataset from a tabular data source via the command-line."""
10 | from __future__ import print_function
11 |
12 |
13 | __author__ = 'Benjamin Yolken '
14 |
15 | import optparse
16 | import sys
17 | import time
18 |
19 | from dspllib.data_sources import csv_data_source
20 | from dspllib.data_sources import csv_data_source_sqlite
21 | from dspllib.data_sources import data_source_to_dspl
22 |
23 |
24 | def LoadOptionsFromFlags(argv):
25 | """Parse command-line arguments.
26 |
27 | Args:
28 | argv: The program argument vector (excluding the script name)
29 |
30 | Returns:
31 | A dictionary with key-value pairs for each of the options
32 | """
33 | usage_string = 'python dsplgen.py [options] [csv file]'
34 |
35 | parser = optparse.OptionParser(usage=usage_string)
36 | parser.set_defaults(verbose=True)
37 | parser.add_option('-o', '--output_path', dest='output_path', default='',
38 | help=('Path to a output directory '
39 | '(default: current directory)'))
40 | parser.add_option('-q', '--quiet',
41 | action='store_false', dest='verbose',
42 | help='Quiet mode')
43 | parser.add_option('-t', '--data_type', dest='data_type', type='choice',
44 | choices=['csv', 'csv_sqlite'], default='csv',
45 | help='Type of data source to use (default: csv)')
46 |
47 | (options, args) = parser.parse_args(args=argv)
48 |
49 | if not len(args) == 1:
50 | parser.error('A data source (e.g., path to CSV file) is required')
51 |
52 | return {'data_type': options.data_type,
53 | 'data_source': args[0],
54 | 'output_path': options.output_path,
55 | 'verbose': options.verbose}
56 |
57 |
58 | def main(argv):
59 | """Parse command-line flags and run data source to DSPL conversion process.
60 |
61 | Args:
62 | argv: The program argument vector (excluding the script name)
63 | """
64 | start_time = time.time()
65 | options = LoadOptionsFromFlags(argv)
66 |
67 | # Connect to data source
68 | if options['data_type'] in ['csv', 'csv_sqlite']:
69 | try:
70 | csv_file = open(options['data_source'], 'r')
71 | except IOError as io_error:
72 | print('Error opening CSV file\n\n%s' % io_error)
73 | sys.exit(2)
74 |
75 | if options['data_type'] == 'csv':
76 | data_source_obj = csv_data_source.CSVDataSource(
77 | csv_file, options['verbose'])
78 | else:
79 | data_source_obj = csv_data_source_sqlite.CSVDataSourceSqlite(
80 | csv_file, options['verbose'])
81 | else:
82 | print('Error: Unknown data type: %s' % (options['data_type']))
83 | sys.exit(2)
84 |
85 | # Create DSPL dataset from data source
86 | dataset = data_source_to_dspl.PopulateDataset(
87 | data_source_obj, options['verbose'])
88 | data_source_obj.Close()
89 |
90 | if options['verbose']:
91 | print('Materializing dataset:')
92 | print(str(dataset))
93 |
94 | # Write DSPL dataset to disk
95 | dataset.Materialize(options['output_path'])
96 |
97 | if options['verbose']:
98 | print('Completed in %0.2f seconds' % (time.time() - start_time))
99 |
100 |
101 | if __name__ == '__main__':
102 | main(sys.argv[1:])
103 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/filegetter.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 | import extruct
8 | from io import StringIO
9 | import json
10 | from pathlib import Path
11 | import requests
12 | import sys
13 | from urllib.parse import urljoin, urlparse
14 |
15 | from dspl2.rdfutil import LoadGraph, SelectFromGraph
16 |
17 |
18 | def _ProcessDspl2File(filename, fileobj, *, type=''):
19 | if any([filename.endswith('.html'),
20 | type.startswith('text/html')]):
21 | data = extruct.extract(fileobj.read(), uniform='True')
22 | return LoadGraph({
23 | '@context': 'http://schema.org',
24 | '@graph': [
25 | subdata_elem
26 | for subdata in data.values()
27 | for subdata_elem in subdata
28 | if subdata
29 | ]
30 | }, filename)
31 | if any([filename.endswith('.json'),
32 | filename.endswith('.jsonld'),
33 | type.startswith('application/ld+json')]):
34 | json_val = json.load(fileobj)
35 | return LoadGraph(json_val, filename)
36 |
37 |
38 | class UploadedFileGetter(object):
39 | def __init__(self, files):
40 | json_files = set()
41 | self.graph = None
42 | self.file_map = {}
43 | for f in files:
44 | self.file_map[f.filename] = f
45 | data = _ProcessDspl2File(f.filename, f.stream)
46 | if data:
47 | json_files.add(f.filename)
48 | self.base = f.filename
49 | self.graph = data
50 | if not self.graph:
51 | raise RuntimeError("DSPL 2 file not present in {}".format(
52 | [file.filename for file in self.file_map.values()]))
53 | if len(json_files) > 1:
54 | raise RuntimeError("Multiple DSPL 2 files present: {}".format(json_files))
55 |
56 | def Fetch(self, filename):
57 | f = self.file_map.get(filename)
58 | if not f:
59 | raise IOError(None, 'File not found', filename)
60 | f.stream.seek(0)
61 | return StringIO(f.read().decode('utf-8'))
62 |
63 |
64 | class InternetFileGetter(object):
65 | def __init__(self, url):
66 | self.base = url
67 | r = requests.get(self.base)
68 | r.raise_for_status()
69 | self.graph = _ProcessDspl2File(url, StringIO(r.text), type=r.headers['content-type'])
70 |
71 | def Fetch(self, filename):
72 | r = requests.get(urljoin(self.base, filename))
73 | r.raise_for_status()
74 | return StringIO(r.text)
75 |
76 |
77 | class LocalFileGetter(object):
78 | def __init__(self, path):
79 | self.base = urlparse(path).path
80 | with Path(self.base).open() as f:
81 | self.graph = _ProcessDspl2File(path, f)
82 |
83 | def Fetch(self, filename):
84 | filename = urlparse(filename).path
85 | path = Path(self.base).parent.joinpath(Path(filename)).resolve()
86 | return path.open()
87 |
88 |
89 | class HybridFileGetter(object):
90 | @staticmethod
91 | def _load_file(base, rel=None):
92 | uri = urlparse(base)
93 | if rel:
94 | uri = urlparse(urljoin(base, rel))
95 | if not uri.scheme or uri.scheme == 'file':
96 | return Path(uri.path).open()
97 | elif uri.scheme == 'http' or uri.scheme == 'https':
98 | r = requests.get(uri)
99 | r.raise_for_status()
100 | return StringIO(r.text)
101 |
102 | def __init__(self, json_uri):
103 | self.base = json_uri
104 | self.graph = _ProcessDspl2File(
105 | json_uri,
106 | HybridFileGetter._load_file(json_uri))
107 |
108 | def Fetch(self, uri):
109 | return HybridFileGetter._load_file(self.base, uri)
110 |
--------------------------------------------------------------------------------
/samples/google/canonical/geo.us.xml:
--------------------------------------------------------------------------------
1 |
2 |
32 |
35 |
36 |
37 |
38 |
39 |
40 | US geographical concepts
41 |
42 |
43 | Canonical concepts for US geographical data.
44 |
45 |
46 | http://code.google.com/apis/publicdata/docs/canonical/geo.us.html
47 |
48 |
49 |
50 |
51 |
52 | Google Inc.
53 |
54 |
55 | Google Inc.
56 |
57 |
58 | http://www.google.com
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | US State
67 |
68 |
69 | A US State, identified by its two letter code.
70 |
71 |
72 | States
73 |
74 |
75 | All US
76 |
77 |
78 |
79 |