├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
├── CONTRIBUTING.md
├── LICENSE.md
├── _config.yml
├── dspl2-chart.png
├── dspl2-spec.md
└── index.md
├── samples
├── bls
│ └── unemployment
│ │ ├── .gitattributes
│ │ ├── age.csv
│ │ ├── bls-unemployment.jsonld
│ │ ├── cities.csv
│ │ ├── citiesUnemploymentMonthly.csv
│ │ ├── counties.csv
│ │ ├── countiesUnemploymentMonthly.csv
│ │ ├── footnotes.csv
│ │ ├── metroAreasUnemploymentMonthly.csv
│ │ ├── metro_areas.csv
│ │ ├── states.csv
│ │ ├── statesUnemploymentMonthly.csv
│ │ ├── totalUnemploymentMonthly.csv
│ │ ├── totalUnemploymentMonthly_ByAge.csv
│ │ ├── totalUnemploymentMonthly_BySex.csv
│ │ └── totalUnemploymentMonthly_BySex_ByAge.csv
├── eurostat
│ ├── population_density
│ │ ├── README.md
│ │ ├── eurostat_population_density-inline.json
│ │ ├── eurostat_population_density.html
│ │ ├── eurostat_population_density.json
│ │ ├── met_d3dens.csv
│ │ ├── metroreg.csv
│ │ ├── transform_d3dens.py
│ │ └── transform_metroreg.py
│ └── unemployment
│ │ ├── age_groups.csv
│ │ ├── countries.csv
│ │ ├── country_age.csv
│ │ ├── country_group_age.csv
│ │ ├── country_group_sex.csv
│ │ ├── country_group_sex_age.csv
│ │ ├── country_group_total.csv
│ │ ├── country_groups.csv
│ │ ├── country_sex.csv
│ │ ├── country_sex_age.csv
│ │ ├── country_total.csv
│ │ ├── eurostat-unemployment-dspl-v1-inline-small.json
│ │ ├── eurostat-unemployment-dspl-v1.json
│ │ ├── eurostat-unemployment.xml
│ │ ├── footnotes.csv
│ │ ├── seasonalities.csv
│ │ └── sexes.csv
├── google
│ ├── canonical
│ │ ├── countries.csv
│ │ ├── currencies.csv
│ │ ├── entity.xml
│ │ ├── entity_order.csv
│ │ ├── geo.us.xml
│ │ ├── geo.xml
│ │ ├── granularity.csv
│ │ ├── quantity.xml
│ │ ├── states.csv
│ │ ├── time.xml
│ │ ├── unit.xml
│ │ ├── unit_symbol_positions.csv
│ │ └── us_counties.csv
│ └── dspl-sample
│ │ ├── countries.csv
│ │ ├── country_slice.csv
│ │ ├── dataset.xml
│ │ ├── gender_country_slice.csv
│ │ ├── genders.csv
│ │ ├── state_slice.csv
│ │ └── states.csv
└── us_census
│ ├── population
│ └── census-totpop.json
│ └── retail_sales
│ ├── businesses.csv
│ ├── census-retail-sales.xml
│ ├── retail_sales_business.csv
│ └── seasonalities.csv
├── schema
├── dspl.xsd
└── dspl2.jsonld
└── tools
├── dspl2
├── dspl2
│ ├── __init__.py
│ ├── expander.py
│ ├── filegetter.py
│ ├── jsonutil.py
│ ├── rdfutil.py
│ ├── schema
│ │ ├── jsonldcontext.json
│ │ └── schema.jsonld
│ ├── templates
│ │ ├── choose.html
│ │ ├── display.html
│ │ ├── error.html
│ │ ├── render.html
│ │ ├── viewer.css
│ │ └── viewer.js
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_expander.py
│ │ ├── test_jsonutil.py
│ │ └── test_rdfutil.py
│ └── validator.py
├── requirements.txt
├── scripts
│ ├── dspl2-expand.py
│ ├── dspl2-pretty-print-server.py
│ ├── dspl2-pretty-print.py
│ └── dspl2-validate.py
└── setup.py
├── dspl2viz
├── dspl2viz.py
├── foo.jsonld
├── static
│ ├── dspl2viz.css
│ └── dspl2viz.js
└── templates
│ └── dspl2viz.html
└── dspltools
├── PKG-INFO
├── README.rst
├── examples
├── dsplcheck
│ ├── invalid_dspl
│ │ ├── countries.csv
│ │ ├── country_slice.csv
│ │ └── invalid_dspl.xml
│ ├── invalid_xml
│ │ └── invalid_xml.xml
│ └── valid_dataset
│ │ ├── countries.csv
│ │ ├── country_slice.csv
│ │ └── valid_dataset.xml
└── dsplgen
│ ├── dsplgen_advanced.csv
│ ├── dsplgen_hierarchies.csv
│ ├── dsplgen_simple.csv
│ └── dsplgen_yearly_data.csv
├── packages
└── dspllib
│ ├── __init__.py
│ ├── data_sources
│ ├── __init__.py
│ ├── csv_data_source.py
│ ├── csv_data_source_sqlite.py
│ ├── csv_data_source_sqlite_test.py
│ ├── csv_data_source_test.py
│ ├── csv_sources_test_suite.py
│ ├── csv_utilities.py
│ ├── data_source.py
│ ├── data_source_test.py
│ ├── data_source_to_dspl.py
│ └── data_source_to_dspl_test.py
│ ├── model
│ ├── __init__.py
│ ├── dspl_model.py
│ ├── dspl_model_loader.py
│ ├── dspl_model_loader_test.py
│ └── dspl_model_test.py
│ └── validation
│ ├── __init__.py
│ ├── dspl_validation.py
│ ├── dspl_validation_test.py
│ ├── schemas
│ ├── dspl.xsd
│ ├── xml_1998.xsd
│ └── xml_2001.xsd
│ ├── test_dataset
│ ├── countries.csv
│ ├── country_slice.csv
│ ├── dataset.xml
│ ├── gender_country_slice.csv
│ ├── genders.csv
│ ├── state_slice.csv
│ └── states.csv
│ ├── xml_validation.py
│ └── xml_validation_test.py
├── requirements.txt
├── scripts
├── dsplcheck.py
├── dsplcheck_test.py
├── dsplgen.py
├── dsplgen_test.py
└── run_all_tests.py
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__
3 | _site
4 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2018, Google Inc.
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are
6 | met:
7 |
8 | 1. Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | 3. Neither the name of Google Inc. nor the names of its
17 | contributors may be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Dataset Publishing Language
2 |
3 | ## Introduction
4 | **DSPL** stands for **Dataset Publishing Language**. It is a representation
5 | format for both the metadata (information about the dataset, such as its name
6 | and provider, as well as the concepts it contains and displays) and actual data
7 | (the numbers) of datasets. Datasets described in this format can be imported
8 | into the [Google Public Data Explorer](https://www.google.com/publicdata), a
9 | tool that allows for rich, visual exploration of the data.
10 |
11 | This site hosts miscellaneous, open source content (i.e., schemas, example
12 | files, and utilities) associated with the DSPL standard. See our [documentation
13 | site](https://developers.google.com/public-data) for more details on what DSPL
14 | is and how to use it. The utilities in this repository are documented at [this
15 | site](https://developers.google.com/public-data/docs/dspltools).
16 |
17 | ## Build and install
18 | To build the tools, install `lxml`, then use the `setup.py` script in
19 | `tools/dspltools/`. You can use pip to install these:
20 |
21 | ```
22 | pip install -r tools/dspltools/requirements.txt
23 | pip install tools/dspltools
24 | ```
25 |
26 | # DSPL 2
27 | The draft of the DSPL 2 specification, which replaces the existing XML metadata
28 | format with schema.org markup, can be found at the [DSPL GitHub
29 | page](https://google.github.io/dspl). The source for the specification is at
30 | [`docs/dspl2-spec.md`](https://github.com/google/dspl/blob/master/docs/dspl2-spec.md).
31 |
32 | Some initial library and tool support is available in [`tools/dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2)
33 |
34 | ## Build and install
35 | To build the tools, install the prerequisites, then use the `setup.py` script in
36 | `tools/dspl2/`. You can use pip to install these:
37 |
38 | ```
39 | pip install -r tools/dspl2/requirements.txt
40 | pip install tools/dspl2
41 | ```
42 |
--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Contributing to Data Set Publishing Language, Version 2.0
3 | author: Google
4 | ---
5 | # How to Contribute
6 |
7 | We'd love to accept your patches and contributions to this project. There are
8 | just a few small guidelines you need to follow.
9 |
10 | ## Contributor License Agreement
11 |
12 | Contributions to this project must be accompanied by a Contributor License
13 | Agreement. You (or your employer) retain the copyright to your contribution;
14 | this simply gives us permission to use and redistribute your contributions as
15 | part of the project. Head over to to see
16 | your current agreements on file or to sign a new one.
17 |
18 | You generally only need to submit a CLA once, so if you've already submitted one
19 | (even if it was for a different project), you probably don't need to do it
20 | again.
21 |
22 | ## Code reviews
23 |
24 | All submissions, including submissions by project members, require review. We
25 | use GitHub pull requests for this purpose. Consult
26 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
27 | information on using pull requests.
28 |
29 | ## Community Guidelines
30 |
31 | This project follows [Google's Open Source Community
32 | Guidelines](https://opensource.google.com/conduct/).
33 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | include:
2 | - LICENSE.md
3 | - CONTRIBUTING.md
4 | - index.md
5 | - dspl2-spec.md
6 |
7 | theme: jekyll-theme-cayman
--------------------------------------------------------------------------------
/docs/dspl2-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/docs/dspl2-chart.png
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Data Set Publishing Language, Version 2.0
3 | author: Natarajan Krishnaswami
4 | ---
5 | # DSPL 2.0
6 | This is the project website for the DSPL 2.0 specification, samples, and related tools.
7 |
8 | ## Spec
9 |
10 | The draft specification is here: [dspl2-spec.html](dspl2-spec.html).
11 |
12 | To provide feedback on the draft, please create a [GitHub issue](https://github.com/google/dspl/issues), or email us at (public-data-import-feedback@google.com)[mailto:public-data-import-feedback@google.com].
13 |
14 | ## Related tools
15 |
16 | Initial tool and a python library are in the DSPL 2.0 GitHub repository under [`tools/dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2).
17 |
18 | * [`dspl2-expand.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-expand.py): tool to convert a DSPL 2.0 dataset with CSV references to one with only JSON-LD.
19 | * [`dspl2-validate.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-validate.py): tool to do basic validation of a DSPL 2.0 dataset into an HTML file.
20 | * [`dspl2-pretty-print.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-pretty-print.py): tool to pretty print a DSPL 2.0 dataset as HTML tables.
21 | * [`dspl2-pretty-print-server.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-pretty-print-server.py): local web app of the above.
22 | * [`dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2/dspl2): python library to load, normalize, and expand CSV files in DSPL 2.0 datasets.
23 |
24 | ## Samples
25 |
26 | Examples are in the DSPL 2.0 GitHub repository under [`samples`](https://github.com/google/dspl/tree/master/samples). Currently Eurostat unemployment and Eurostat population density samples include DSPL 2.0 metadata.
27 |
28 | ## Contributing
29 |
30 | To contribute, see the [CONTRIBUTING](CONTRIBUTING.html) file and after submitting a CLA, submit pull requests to the [DSPL GitHub repository](https://github.com/google/dspl).
31 |
--------------------------------------------------------------------------------
/samples/bls/unemployment/.gitattributes:
--------------------------------------------------------------------------------
1 | countiesUnemploymentMonthly.csv filter=lfs diff=lfs merge=lfs -text
2 | citiesUnemploymentMonthly.csv filter=lfs diff=lfs merge=lfs -text
3 |
--------------------------------------------------------------------------------
/samples/bls/unemployment/age.csv:
--------------------------------------------------------------------------------
1 | "codeValue","name"
2 | "07","16 to 17 years"
3 | "08","16 to 19 years"
4 | "10","16 to 24 years"
5 | "13","18 to 19 years"
6 | "15","18 years and over"
7 | "17","20 years and over"
8 | "20","20 to 24 years"
9 | "28","25 years and over"
10 | "30","25 to 29 years"
11 | "31","25 to 34 years"
12 | "33","25 to 54 years"
13 | "36","30 to 34 years"
14 | "37","35 to 39 years"
15 | "38","35 to 44 years"
16 | "39","40 to 44 years"
17 | "40","45 years and over"
18 | "41","45 to 49 years"
19 | "42","45 to 54 years"
20 | "44","50 to 54 years"
21 | "45","55 years and over"
22 | "48","55 to 59 years"
23 | "49","55 to 64 years"
24 | "56","60 to 61 years"
25 | "57","60 to 64 years"
26 | "61","62 to 64 years"
27 | "65","65 years and over"
28 | "66","65 to 69 years"
29 | "72","70 years and over"
30 | "73","70 to 74 years"
31 | "78","75 years and over"
32 |
--------------------------------------------------------------------------------
/samples/bls/unemployment/citiesUnemploymentMonthly.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:b2a75c4df3b7eb6b89bfb8f11227eeeb7c7b33f1f5593cc68b72c431c2e758c5
3 | size 28535697
4 |
--------------------------------------------------------------------------------
/samples/bls/unemployment/countiesUnemploymentMonthly.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:54f0f35753c166fa778f6bbe943dcdc873889b3802a6fd2dae72f0fceb10666d
3 | size 53413322
4 |
--------------------------------------------------------------------------------
/samples/bls/unemployment/footnotes.csv:
--------------------------------------------------------------------------------
1 | codeValue,description
2 | 1,Data affected by changes in population controls.
3 | 2,Constructed on the 2002 Census Industry Classification from data originally coded on earlier classifications. Official series was not revised.
4 | 3,2000 forward coded on the 2002 Census Occupation Classification. 1983-99 constructed from data originally coded on earlier classifications.
5 | 4,2000 forward coded on the 2002 Census Industry Classification. 1983-99 constructed from data originally coded on earlier classifications.
6 | 7,Data do not meet publication criteria.
7 | 8,This series id code has been discontinued; data are available using the database tool at www.bls.gov/webapps/legacy/cpsatab8.htm.
8 | 9,Data from 1994 through 2002 were revised in February 2014 with updated seasonal adjustments.
9 | A,Area boundaries do not reflect official OMB definitions.
10 | N,Not available.
11 | P,Preliminary.
12 | V,The survey was not conducted due to bad weather. Interpolated data were seasonally adjusted.
13 | W,The household survey was not conducted for this month due to bad weather. Data were interpolated.
14 | Y,Data reflect controlling to interpolated statewide totals because the survey was not conducted.
15 |
--------------------------------------------------------------------------------
/samples/bls/unemployment/states.csv:
--------------------------------------------------------------------------------
1 | codeValue,name,identifier,alternateName,geo.latitude,geo.longitude
2 | ST0100000000000,Alabama,AL,Alabama,32.318231,-86.902298
3 | ST0200000000000,Alaska,AK,Alaska,63.588753,-154.493062
4 | ST0400000000000,Arizona,AZ,Arizona,34.048928,-111.093731
5 | ST0500000000000,Arkansas,AR,Arkansas,35.20105,-91.831833
6 | ST0600000000000,California,CA,California,36.778261,-119.417932
7 | ST0800000000000,Colorado,CO,Colorado,39.550051,-105.782067
8 | ST0900000000000,Connecticut,CT,Connecticut,41.603221,-73.087749
9 | ST1000000000000,Delaware,DE,Delaware,38.910832,-75.52767
10 | ST1100000000000,District of Columbia,DC,Washington DC,38.905985,-77.033418
11 | ST1200000000000,Florida,FL,Florida,27.664827,-81.515754
12 | ST1300000000000,Georgia,GA,Georgia,32.157435,-82.907123
13 | ST1500000000000,Hawaii,HI,Hawaii,19.898682,-155.665857
14 | ST1600000000000,Idaho,ID,Idaho,44.068202,-114.742041
15 | ST1700000000000,Illinois,IL,Illinois,40.633125,-89.398528
16 | ST1800000000000,Indiana,IN,Indiana,40.551217,-85.602364
17 | ST1900000000000,Iowa,IA,Iowa,41.878003,-93.097702
18 | ST2000000000000,Kansas,KS,Kansas,39.011902,-98.484246
19 | ST2100000000000,Kentucky,KY,Kentucky,37.839333,-84.270018
20 | ST2200000000000,Louisiana,,Louisiana,31.244823,-92.145024
21 | ST2300000000000,Maine,ME,Maine,45.253783,-69.445469
22 | ST2400000000000,Maryland,MD,Maryland,39.045755,-76.641271
23 | ST2500000000000,Massachusetts,MA,Massachusetts,42.407211,-71.382437
24 | ST2600000000000,Michigan,MI,Michigan,44.314844,-85.602364
25 | ST2700000000000,Minnesota,MN,Minnesota,46.729553,-94.6859
26 | ST2800000000000,Mississippi,MS,Mississippi,32.354668,-89.398528
27 | ST2900000000000,Missouri,MO,Missouri,37.964253,-91.831833
28 | ST3000000000000,Montana,MT,Montana,46.879682,-110.362566
29 | ST3100000000000,Nebraska,NE,Nebraska,41.492537,-99.901813
30 | ST3200000000000,Nevada,NV,Nevada,38.80261,-116.419389
31 | ST3300000000000,New Hampshire,NH,New Hampshire,43.193852,-71.572395
32 | ST3400000000000,New Jersey,NJ,New Jersey,40.058324,-74.405661
33 | ST3500000000000,New Mexico,NM,New Mexico,34.97273,-105.032363
34 | ST3600000000000,New York,NY,New York State,43.299428,-74.217933
35 | ST3700000000000,North Carolina,NC,N Carolina,35.759573,-79.0193
36 | ST3800000000000,North Dakota,ND,N Dakota,47.551493,-101.002012
37 | ST3900000000000,Ohio,OH,Ohio,40.417287,-82.907123
38 | ST4000000000000,Oklahoma,OK,Oklahoma,35.007752,-97.092877
39 | ST4100000000000,Oregon,OR,Oregon,43.804133,-120.554201
40 | ST4200000000000,Pennsylvania,PA,Pennsylvania,41.203322,-77.194525
41 | ST4400000000000,Rhode Island,RI,Rhode Island,41.580095,-71.477429
42 | ST4500000000000,South Carolina,SC,S Carolina,33.836081,-81.163725
43 | ST4600000000000,South Dakota,SD,S Dakota,43.969515,-99.901813
44 | ST4700000000000,Tennessee,TN,Tennessee,35.517491,-86.580447
45 | ST4800000000000,Texas,TX,Texas,31.968599,-99.901813
46 | ST4900000000000,Utah,UT,Utah,39.32098,-111.093731
47 | ST5000000000000,Vermont,VT,Vermont,44.558803,-72.577841
48 | ST5100000000000,Virginia,VA,Virginia,37.431573,-78.656894
49 | ST5300000000000,Washington,WA,Washington State,47.751074,-120.740139
50 | ST5400000000000,West Virginia,WV,W Virginia,38.597626,-80.454903
51 | ST5500000000000,Wisconsin,WI,Wisconsin,43.78444,-88.787868
52 | ST5600000000000,Wyoming,WY,Wyoming,43.075968,-107.290284
53 | ST7200000000000,Puerto Rico,PR,Puerto Rico,18.220833,-66.590149
54 |
--------------------------------------------------------------------------------
/samples/eurostat/population_density/README.md:
--------------------------------------------------------------------------------
1 | # Population Density
2 | This is a small example with one categorical dimension, one measure, and one slice.
3 |
4 | The formats available are:
5 |
6 | * [HTML Microdata](eurostat_population_density.html)
7 | * [JSON-LD + CSV](eurostat_population_density.json)
8 | * [JSON-LD alone](eurostat_population_density-inline.json)
9 |
--------------------------------------------------------------------------------
/samples/eurostat/population_density/eurostat_population_density.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": "http://schema.org",
3 | "@type": "StatisticalDataset",
4 | "@id": "",
5 | "url": "https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w",
6 | "identifier": "met_d3dens",
7 | "name": "Eurostat Population Density",
8 | "description": "Population density by metropolitan regions",
9 | "dateCreated": "2015-10-16",
10 | "dateModified": "2019-06-18",
11 | "temporalCoverage": "1990-01-01/2016-01-01",
12 | "distribution": {
13 | "@type": "DataDownload",
14 | "contentUrl": "http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true",
15 | "encodingFormat": "text/tab-separated-values"
16 | },
17 | "spatialCoverage":{
18 | "@type":"Place",
19 | "geo":{
20 | "@type":"GeoShape",
21 | "name": "European Union",
22 | "box":"34.633285 -10.468556 70.096054 34.597916"
23 | }
24 | },
25 | "license": "https://ec.europa.eu/eurostat/about/policies/copyright",
26 | "creator":{
27 | "@type":"Organization",
28 | "url": "https://ec.europa.eu/eurostat",
29 | "name":"Eurostat"
30 | },
31 | "publisher": {
32 | "@type": "Organization",
33 | "name": "Eurostat",
34 | "url": "https://ec.europa.eu/eurostat",
35 | "contactPoint": {
36 | "@type": "ContactPoint",
37 | "contactType": "User Support",
38 | "url": "https://ec.europa.eu/eurostat/help/support"
39 | }
40 | },
41 | "dimension": [
42 | {
43 | "@type": "CategoricalDimension",
44 | "@id": "#metroreg",
45 | "dataset": {"@id": ""},
46 | "codeList": "metroreg.csv"
47 | },
48 | {
49 | "@type": "TimeDimension",
50 | "@id": "#year",
51 | "dataset": {"@id": ""},
52 | "name": "year",
53 | "equivalentType": "xsd:Year",
54 | "dateFormat": "yyyy"
55 | }
56 | ],
57 | "measure": [
58 | {
59 | "@type": "StatisticalMeasure",
60 | "@id": "#density",
61 | "dataset": {"@id": ""},
62 | "name": "Population density",
63 | "unitText": "persons per square kilometre"
64 | }
65 | ],
66 | "footnote": [
67 | {
68 | "@type": "StatisticalAnnotation",
69 | "@id": "#footnote=b",
70 | "dataset": {"@id": ""},
71 | "codeValue": "b",
72 | "description": "break in time series"
73 | },
74 | {
75 | "@type": "StatisticalAnnotation",
76 | "@id": "#footnote=c",
77 | "dataset": {"@id": ""},
78 | "codeValue": "c",
79 | "description": "confidential"
80 | },
81 | {
82 | "@type": "StatisticalAnnotation",
83 | "@id": "#footnote=d",
84 | "dataset": {"@id": ""},
85 | "codeValue": "d",
86 | "description": "definition differs, see metadata"
87 | },
88 | {
89 | "@type": "StatisticalAnnotation",
90 | "@id": "#footnote=e",
91 | "dataset": {"@id": ""},
92 | "codeValue": "e",
93 | "description": "estimated"
94 | },
95 | {
96 | "@type": "StatisticalAnnotation",
97 | "@id": "#footnote=f",
98 | "dataset": {"@id": ""},
99 | "codeValue": "f",
100 | "description": "forecast"
101 | },
102 | {
103 | "@type": "StatisticalAnnotation",
104 | "@id": "#footnote=n",
105 | "dataset": {"@id": ""},
106 | "codeValue": "n",
107 | "description": "not significant"
108 | },
109 | {
110 | "@type": "StatisticalAnnotation",
111 | "@id": "#footnote=p",
112 | "dataset": {"@id": ""},
113 | "codeValue": "p",
114 | "description": "provisional"
115 | },
116 | {
117 | "@type": "StatisticalAnnotation",
118 | "@id": "#footnote=r",
119 | "dataset": {"@id": ""},
120 | "codeValue": "r",
121 | "description": "revised"
122 | },
123 | {
124 | "@type": "StatisticalAnnotation",
125 | "@id": "#footnote=s",
126 | "dataset": {"@id": ""},
127 | "codeValue": "s",
128 | "description": "Eurostat estimate"
129 | },
130 | {
131 | "@type": "StatisticalAnnotation",
132 | "@id": "#footnote=u",
133 | "dataset": {"@id": ""},
134 | "codeValue": "u",
135 | "description": "low reliability"
136 | },
137 | {
138 | "@type": "StatisticalAnnotation",
139 | "@id": "#footnote=z",
140 | "dataset": {"@id": ""},
141 | "codeValue": "z",
142 | "description": "not applicable"
143 | }
144 | ],
145 | "slice": {
146 | "@type": "DataSlice",
147 | "@id": "#metroreg_year",
148 | "dataset": {"@id": ""},
149 | "dimension": ["#metroreg", "#year"],
150 | "measure": {"@id": "#density"},
151 | "data": {"@id": "met_d3dens.csv"}
152 | }
153 | }
154 |
--------------------------------------------------------------------------------
/samples/eurostat/population_density/transform_d3dens.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2019 Google LLC
3 | #
4 | # Use of this source code is governed by a BSD-style
5 | # license that can be found in the LICENSE file or at
6 | # https://developers.google.com/open-source/licenses/bsd
7 | import pandas as pd
8 |
9 |
10 | # Read the file and set the index column to the metro region.
11 | df = pd.read_csv(
12 | 'http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz',
13 | delimiter='\t',
14 | index_col='metroreg\\time')
15 |
16 | # Stack the column headers into a single column's values, and make the metro
17 | # region a column again.
18 | df = df.stack().reset_index()
19 |
20 | # Rename the columns
21 | df.columns = ['metroreg', 'year', 'density']
22 |
23 | # Strip surrounding whitespace from each value
24 | for col in df.columns:
25 | df[col] = df[col].str.strip()
26 |
27 | # Indicate that the year is an integer
28 | df['year'] = df['year'].astype(int)
29 |
30 | # Add a string-valued footnote column with default empty string.
31 | df['density*'] = ''
32 |
33 | # Split up any values with footnotes between the value and footnote columns
34 | for idx, density in df.loc[df['density'].str.contains(' '),
35 | 'density'].iteritems():
36 | density, footnote = density.split(' ')
37 | df.loc[idx, 'density'] = density
38 | df.loc[idx, 'density*'] = ';'.join(list(footnote))
39 |
40 | # Remove the placeholder value of ':'
41 | df.loc[df['density'] == ':', 'density'] = None
42 |
43 | # Remove rows with no density
44 | df = df[pd.notnull(df['density'])]
45 |
46 | # And write the results to a CSV file.
47 | df.to_csv('met_d3dens.csv', index=False)
48 |
--------------------------------------------------------------------------------
/samples/eurostat/population_density/transform_metroreg.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright 2019 Google LLC
3 | #
4 | # Use of this source code is governed by a BSD-style
5 | # license that can be found in the LICENSE file or at
6 | # https://developers.google.com/open-source/licenses/bsd
7 | import pandas as pd
8 |
9 |
10 | # Read the input file.
11 | df = pd.read_csv('http://dd.eionet.europa.eu/vocabulary/eurostat/metroreg/csv')
12 |
13 | # Drop irrelevant columns
14 | df = df[['Notation', 'Label']]
15 |
16 | # Rename columns
17 | df.columns = ['codeValue', 'name']
18 |
19 | # Write output file
20 | df.to_csv('metroreg.csv', index=False)
21 |
--------------------------------------------------------------------------------
/samples/eurostat/unemployment/age_groups.csv:
--------------------------------------------------------------------------------
1 | "codeValue","name@en","name@fr","name@de"
2 | "y25-74","From 25 to 74 years","De 25 à 74 ans","25 bis 74 Jahre"
3 | "y_lt25","Less than 25 years","Moins de 25 ans","Weniger als 25 Jahre"
4 |
--------------------------------------------------------------------------------
/samples/eurostat/unemployment/countries.csv:
--------------------------------------------------------------------------------
1 | "codeValue","alternateName","country_group","name@en","name@fr","name@de","latitude","longitude"
2 | "at","AT","eu","Austria","Autriche","Österreich","47.6965545","13.34598005"
3 | "be","BE","eu","Belgium","Belgique","Belgien","50.501045","4.47667405"
4 | "bg","BG","eu","Bulgaria","Bulgarie","Bulgarien","42.72567375","25.4823218"
5 | "hr","HR","non-eu","Croatia","Croatie","Kroatien","44.74664297","15.34084438"
6 | "cy","CY","eu","Cyprus","Chypre","Zypern","35.129141","33.4286823"
7 | "cz","CZ","eu","Czech Republic","République tchèque","Tschechische Republik","49.803531","15.47499805"
8 | "dk","DK","eu","Denmark","Danemark","Dänemark","55.93968425","9.51668905"
9 | "ee","EE","eu","Estonia","Estonie","Estland","58.5924685","25.8069503"
10 | "fi","FI","eu","Finland","Finlande","Finnland","64.95015875","26.06756405"
11 | "fr","FR","eu","France","France","Frankreich","46.7109945","1.7185608"
12 | "de","DE","eu","Germany (including former GDR from 1991)","Allemagne (incluant l'ancienne RDA à partir de 1991)","Deutschland (einschließlich der ehemaligen DDR seit 1991)","51.16382538","10.4540478"
13 | "gr","GR","eu","Greece","Grèce","Griechenland","39.698467","21.57725572"
14 | "hu","HU","eu","Hungary","Hongrie","Ungarn","47.16116325","19.5042648"
15 | "ie","IE","eu","Ireland","Irlande","Irland","53.41526","-8.2391222"
16 | "it","IT","eu","Italy","Italie","Italien","42.504191","12.57378705"
17 | "lv","LV","eu","Latvia","Lettonie","Lettland","56.880117","24.60655505"
18 | "lt","LT","eu","Lithuania","Lituanie","Litauen","55.173687","23.9431678"
19 | "lu","LU","eu","Luxembourg","Luxembourg","Luxemburg","49.815319","6.13335155"
20 | "mt","MT","eu","Malta","Malte","Malta","35.902422","14.4474608"
21 | "nl","NL","eu","Netherlands","Pays-Bas","Niederlande","52.10811825","5.3301983"
22 | "no","NO","non-eu","Norway","Norvège","Norwegen","64.55645975","12.66576565"
23 | "pl","PL","eu","Poland","Pologne","Polen","51.91890725","19.1343338"
24 | "pt","PT","eu","Portugal","Portugal","Portugal","39.55806875","-7.84494095"
25 | "ro","RO","eu","Romania","Roumanie","Rumänien","45.94261125","24.99015155"
26 | "sk","SK","eu","Slovakia","Slovaquie","Slowakei","48.67264375","19.7000323"
27 | "si","SI","eu","Slovenia","Slovénie","Slowenien","46.14925925","14.98661705"
28 | "es","ES","eu","Spain","Espagne","Spanien","39.8950135","-2.9882957"
29 | "se","SE","eu","Sweden","Suède","Schweden","62.1984675","14.89630657"
30 | "tr","TR","non-eu","Turkey","Turquie","Türkei","38.95294205","35.43979471"
31 | "uk","GB","eu","United Kingdom","Royaume-Uni","Vereinigtes Königreich","54.315447","-2.23261195"
32 |
--------------------------------------------------------------------------------
/samples/eurostat/unemployment/country_groups.csv:
--------------------------------------------------------------------------------
1 | codeValue,name@en,name@fr,name@de
2 | eu,"European Union","Union européenne","Europäische Union"
3 | non-eu,"Non EU countries","Pays hors Union européenne",Nicht-EU-Länder
4 |
--------------------------------------------------------------------------------
/samples/eurostat/unemployment/eurostat-unemployment-dspl-v1.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": [
3 | "http://schema.org",
4 | {
5 | "name": { "@container": "@language" },
6 | "description": { "@container": "@language" },
7 | "url": { "@container": "@language" }
8 | }
9 | ],
10 | "@type": "StatisticalDataset",
11 | "@id": "#eurostat-unemployment",
12 | "name": {
13 | "en": "Unemployment in Europe (monthly)",
14 | "de": "Arbeitslosigkeit in Europa (monatlich)",
15 | "fr": "Le Chômage en Europe (mensuel)"
16 | },
17 | "description": {
18 | "en": "Harmonized unemployment data for European countries. This dataset was prepared by Google based on data downloaded from Eurostat.",
19 | "de": "Harmonisierte Daten zur Arbeitslosigkeit für europäische Länder. Dieser Datensatz wurde von Google aufbereitet, basierend auf online Daten von Eurostat.",
20 | "fr": "Données harmonisées sur le chômage dans les pays européens. Ces données ont été préparées par Google sur la base de données téléchargées à partir d'Eurostat."
21 | },
22 | "url": {
23 | "en": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-en/employment_unemployment_lfs/introduction",
24 | "de": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-fr/employment_unemployment_lfs/introduction",
25 | "fr": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-de/employment_unemployment_lfs/introduction"
26 | },
27 | "license": "https://ec.europa.eu/eurostat/about/policies/copyright",
28 | "creator":{
29 | "@type":"Organization",
30 | "url": "https://ec.europa.eu/eurostat",
31 | "name":"Eurostat",
32 | "contactPoint": [
33 | {
34 | "@type":"ContactPoint",
35 | "name": "Eurostat Multilingual User Support Network"
36 | "contactType": "Central Support",
37 | "telephone":"+352 4301 36789",
38 | },
39 | {
40 | "@type":"ContactPoint",
41 | "name": "Eurostat Multilingual User Support Network"
42 | "contactType": "Republic of Ireland",
43 | "availableLanguage": "en",
44 | "telephone":"+353 151 33080",
45 | }
46 | ]
47 | },
48 | "distribution":[
49 | {
50 | "@type":"DataDownload",
51 | "encodingFormat":"text/tab-separated-values",
52 | "contentUrl":"https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2Fei_lmhu_m.tsv.gz"
53 | },
54 | {
55 | "@type":"DataDownload",
56 | "encodingFormat":"application/vnd.sdmx.genericdata+xml;version=2.0",
57 | "contentUrl":"https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2Fei_lmhu_m.sdmx.zip"
58 | }
59 | ],
60 | "temporalCoverage":"1993-01/2010-12",
61 | "spatialCoverage":{
62 | "@type":"Place",
63 | "geo":{
64 | "@type":"GeoShape",
65 | "name": "European Union",
66 | "box":"34.633285 -10.468556 70.096054 34.597916"
67 | }
68 | },
69 | "measure": [
70 | {
71 | "@type": "StatisticalMeasure",
72 | "@id": "#unemployment",
73 | "sameAs": "https://www.wikidata.org/wiki/Q41171",
74 | "name": {
75 | "en": "Unemployment (monthly)",
76 | "de": "Arbeitslosigkeit (monatlich)",
77 | "fr": "Chômeurs (mensuel)"
78 | },
79 | "description": {
80 | "en": "The total number of people unemployed",
81 | "de": "Anzahl der Arbeitslosen",
82 | "fr":" Le nombre total de chômeurs"
83 | },
84 | "url": {
85 | "en": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=en",
86 | "de": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=de",
87 | "fr": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=fr"
88 | },
89 | "unitCode": "IE"
90 | },
91 | {
92 | "@type": "StatisticalMeasure",
93 | "@id": "#unemployment_rate",
94 | "sameAs": "https://www.wikidata.org/wiki/Q1787954",
95 | "name": {
96 | "en": "Unemployment rate (monthly)",
97 | "de": "Arbeitslosenquote (monatlich)",
98 | "fr": "Taux de chômage (mensuel)"
99 | },
100 | "description": {
101 | "en": "The unemployment rate represents unemployed persons as a percentage of the labour force. The labour force is the total number of people employed and unemployed.",
102 | "de": "Die Arbeitslosenquote ist definiert als der prozentuale Anteil der Arbeitslosen an den Erwerbspersonen. Die Erwerbspersonen umfassen die Erwerbstätigen und die Arbeitslosen.",
103 | "fr": "Le taux de chômage représente le pourcentage de chômeurs dans la population active. La population active représente le nombre total des personnes ayant un emploi ou étant au chômage."
104 | },
105 | "url": {
106 | "en": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=en",
107 | "de": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=de",
108 | "fr": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=fr"
109 | },
110 | "unitCode": "P1"
111 | }
112 | ],
113 | "dimension": [
114 | {
115 | "@type": "CategoricalDimension",
116 | "@id": "#country_group",
117 | "codeList": "country_groups.csv"
118 | },
119 | {
120 | "@type": "CategoricalDimension",
121 | "@id": "#country",
122 | "codeList": "countries.csv",
123 | "equivalentType": "Country"
124 | },
125 | {
126 | "@type": "CategoricalDimension",
127 | "@id": "#age_group",
128 | "codeList": "age_groups.csv"
129 | },
130 | {
131 | "@type": "CategoricalDimension",
132 | "@id": "#sex",
133 | "codeList": "sexes.csv"
134 | },
135 | {
136 | "@type": "CategoricalDimension",
137 | "@id": "#seasonality",
138 | "codeList": "seasonalities.csv"
139 | },
140 | {
141 | "@type": "TimeDimension",
142 | "@id": "#month",
143 | "equivalentType": "xsd:gYearMonth",
144 | "datePattern": "yyyy.MM"
145 | }
146 | ],
147 | "footnote": "footnotes.csv",
148 | "slice": [
149 | {
150 | "@type": "DataSlice",
151 | "@id": "#country_age",
152 | "dimension": [
153 | "#country",
154 | "#age_group",
155 | "#month"
156 | ],
157 | "measure": [
158 | "#unemployment",
159 | "#unemployment_rate"
160 | ],
161 | "observation": "country_age.csv"
162 | },
163 | {
164 | "@type": "DataSlice",
165 | "@id": "#country_group_age",
166 | "dimension": [
167 | "#country_group",
168 | "#age_group",
169 | "#month"
170 | ],
171 | "measure": [
172 | "#unemployment",
173 | "#unemployment_rate"
174 | ],
175 | "observation": "country_group_age.csv"
176 | },
177 | {
178 | "@type": "DataSlice",
179 | "@id": "#country_group_sex_age",
180 | "dimension": [
181 | "#country_group",
182 | "#sex",
183 | "#age_group",
184 | "#month"
185 | ],
186 | "measure": [
187 | "#unemployment",
188 | "#unemployment_rate"
189 | ],
190 | "observation": "country_group_sex_age.csv"
191 | },
192 | {
193 | "@type": "DataSlice",
194 | "@id": "#country_group_sex",
195 | "dimension": [
196 | "#country_group",
197 | "#sex",
198 | "#month"
199 | ],
200 | "measure": [
201 | "#unemployment",
202 | "#unemployment_rate"
203 | ],
204 | "observation": "country_group_sex.csv"
205 | },
206 | {
207 | "@type": "DataSlice",
208 | "@id": "#country_group_total",
209 | "dimension": [
210 | "#country_group",
211 | "#month"
212 | ],
213 | "measure": [
214 | "#unemployment",
215 | "#unemployment_rate"
216 | ],
217 | "observation": "country_group_total.csv"
218 | },
219 | {
220 | "@type": "DataSlice",
221 | "@id": "#country_sex_age",
222 | "dimension": [
223 | "#country",
224 | "#sex",
225 | "#age_group",
226 | "#month"
227 | ],
228 | "measure": [
229 | "#unemployment",
230 | "#unemployment_rate"
231 | ],
232 | "observation": "country_sex_age.csv"
233 | },
234 | {
235 | "@type": "DataSlice",
236 | "@id": "#country_sex",
237 | "dimension": [
238 | "#country",
239 | "#sex",
240 | "#month"
241 | ],
242 | "measure": [
243 | "#unemployment",
244 | "#unemployment_rate"
245 | ],
246 | "observation": "country_sex.csv"
247 | },
248 | {
249 | "@type": "DataSlice",
250 | "@id": "#country_total",
251 | "dimension": [
252 | "#country",
253 | "#month"
254 | ],
255 | "measure": [
256 | "#unemployment",
257 | "#unemployment_rate"
258 | ],
259 | "observation": "country_total.csv"
260 | }
261 | ]
262 | }
263 |
--------------------------------------------------------------------------------
/samples/eurostat/unemployment/footnotes.csv:
--------------------------------------------------------------------------------
1 | codeValue,description
2 | p,This value is a projection
3 | r,This value has been revised
4 |
--------------------------------------------------------------------------------
/samples/eurostat/unemployment/seasonalities.csv:
--------------------------------------------------------------------------------
1 | "codeValue","name@en","name@fr","name@de"
2 | "nsa","Not seasonally adjusted data","Données non désaisonnalisées","Nichtsaisonbereinigte Daten"
3 | "sa","Seasonally adjusted data","Données désaisonnalisées","Saisonbereinigte Daten"
4 | "trend","Trend cycle","Tendance-cycle","Trend (glatte Komponente)"
5 |
--------------------------------------------------------------------------------
/samples/eurostat/unemployment/sexes.csv:
--------------------------------------------------------------------------------
1 | "codeValue","name@en","name@fr","name@de"
2 | "f","Females","Femmes","Frauen"
3 | "m","Males","Hommes","Männer"
4 |
--------------------------------------------------------------------------------
/samples/google/canonical/currencies.csv:
--------------------------------------------------------------------------------
1 | currency,name,symbol
2 | AED,"UAE Dirham",
3 | AFN,Afghani,؋
4 | ALL,Lek,Lek
5 | AMD,"Armenian Dram",
6 | ANG,"Netherlands Antillian Guilder",ƒ
7 | AOA,Kwanza,
8 | ARS,"Argentine Peso",$
9 | AUD,"Australian Dollar",$
10 | AWG,"Aruban Guilder",ƒ
11 | AZN,"Azerbaijanian Manat",ман
12 | BAM,"Convertible Marks",KM
13 | BBD,"Barbados Dollar",$
14 | BDT,Taka,
15 | BGN,"Bulgarian Lev",лв
16 | BHD,"Bahraini Dinar",
17 | BIF,"Burundi Franc",
18 | BMD,"Bermudian Dollar (customarily known as Bermuda Dollar)",$
19 | BND,"Brunei Dollar",$
20 | "BOB BOV","Boliviano Mvdol",$b
21 | BRL,"Brazilian Real",R$
22 | BSD,"Bahamian Dollar",$
23 | BWP,Pula,P
24 | BYR,"Belarussian Ruble",p.
25 | BZD,"Belize Dollar",BZ$
26 | CAD,"Canadian Dollar",$
27 | CDF,"Congolese Franc",
28 | CHF,"Swiss Franc",CHF
29 | "CLP CLF","Chilean Peso Unidades de fomento",$
30 | CNY,"Yuan Renminbi",¥
31 | "COP COU","Colombian Peso Unidad de Valor Real",$
32 | CRC,"Costa Rican Colon",₡
33 | "CUP CUC","Cuban Peso Peso Convertible",₱
34 | CVE,"Cape Verde Escudo",
35 | CZK,"Czech Koruna",Kč
36 | DJF,"Djibouti Franc",
37 | DKK,"Danish Krone",kr
38 | DOP,"Dominican Peso",RD$
39 | DZD,"Algerian Dinar",
40 | EEK,Kroon,
41 | EGP,"Egyptian Pound",£
42 | ERN,Nakfa,
43 | ETB,"Ethiopian Birr",
44 | EUR,Euro,€
45 | FJD,"Fiji Dollar",$
46 | FKP,"Falkland Islands Pound",£
47 | GBP,"Pound Sterling",£
48 | GEL,Lari,
49 | GHS,Cedi,
50 | GIP,"Gibraltar Pound",£
51 | GMD,Dalasi,
52 | GNF,"Guinea Franc",
53 | GTQ,Quetzal,Q
54 | GYD,"Guyana Dollar",$
55 | HKD,"Hong Kong Dollar",$
56 | HNL,Lempira,L
57 | HRK,"Croatian Kuna",kn
58 | "HTG USD","Gourde US Dollar",
59 | HUF,Forint,Ft
60 | IDR,Rupiah,Rp
61 | ILS,"New Israeli Sheqel",₪
62 | INR,"Indian Rupee",
63 | "INR BTN","Indian Rupee Ngultrum",
64 | IQD,"Iraqi Dinar",
65 | IRR,"Iranian Rial",﷼
66 | ISK,"Iceland Krona",kr
67 | JMD,"Jamaican Dollar",J$
68 | JOD,"Jordanian Dinar",
69 | JPY,Yen,¥
70 | KES,"Kenyan Shilling",
71 | KGS,Som,лв
72 | KHR,Riel,៛
73 | KMF,"Comoro Franc",
74 | KPW,"North Korean Won",₩
75 | KRW,Won,₩
76 | KWD,"Kuwaiti Dinar",
77 | KYD,"Cayman Islands Dollar",$
78 | KZT,Tenge,лв
79 | LAK,Kip,₭
80 | LBP,"Lebanese Pound",£
81 | LKR,"Sri Lanka Rupee",₨
82 | LRD,"Liberian Dollar",$
83 | LTL,"Lithuanian Litas",Lt
84 | LVL,"Latvian Lats",Ls
85 | LYD,"Libyan Dinar",
86 | MAD,"Moroccan Dirham",
87 | MDL,"Moldovan Leu",
88 | MGA,"Malagasy Ariary",
89 | MKD,Denar,ден
90 | MMK,Kyat,
91 | MNT,Tugrik,₮
92 | MOP,Pataca,
93 | MRO,Ouguiya,
94 | MUR,"Mauritius Rupee",₨
95 | MVR,Rufiyaa,
96 | MWK,Kwacha,
97 | "MXN MXV","Mexican Peso Mexican Unidad de Inversion (UDI)",$
98 | MYR,"Malaysian Ringgit",RM
99 | MZN,Metical,MT
100 | NGN,Naira,₦
101 | NIO,"Cordoba Oro",C$
102 | NOK,"Norwegian Krone",kr
103 | NPR,"Nepalese Rupee",₨
104 | NZD,"New Zealand Dollar",$
105 | OMR,"Rial Omani",﷼
106 | "PAB USD","Balboa US Dollar",B/.
107 | PEN,"Nuevo Sol",S/.
108 | PGK,Kina,
109 | PHP,"Philippine Peso",Php
110 | PKR,"Pakistan Rupee",₨
111 | PLN,Zloty,zł
112 | PYG,Guarani,Gs
113 | QAR,"Qatari Rial",﷼
114 | RON,"New Leu",lei
115 | RSD,"Serbian Dinar",Дин.
116 | RUB,"Russian Ruble",руб
117 | RWF,"Rwanda Franc",
118 | SAR,"Saudi Riyal",﷼
119 | SBD,"Solomon Islands Dollar",$
120 | SCR,"Seychelles Rupee",₨
121 | SDG,"Sudanese Pound",
122 | SEK,"Swedish Krona",kr
123 | SGD,"Singapore Dollar",$
124 | SHP,"Saint Helena Pound",£
125 | SLL,Leone,
126 | SOS,"Somali Shilling",S
127 | SRD,"Surinam Dollar",$
128 | STD,Dobra,
129 | "SVC USD","El Salvador Colon US Dollar",$
130 | SYP,"Syrian Pound",£
131 | SZL,Lilangeni,
132 | THB,Baht,฿
133 | TJS,Somoni,
134 | TMT,Manat,
135 | TND,"Tunisian Dinar",
136 | TOP,Pa'anga,
137 | TRY,"Turkish Lira",TL
138 | TTD,"Trinidad and Tobago Dollar",TT$
139 | TWD,"New Taiwan Dollar",NT$
140 | TZS,"Tanzanian Shilling",
141 | UAH,Hryvnia,₴
142 | UGX,"Uganda Shilling",
143 | USD,"US Dollar",$
144 | "UYU UYI","Peso Uruguayo Uruguay Peso en Unidades Indexadas",$U
145 | UZS,"Uzbekistan Sum",лв
146 | VEF,"Bolivar Fuerte",Bs
147 | VND,Dong,₫
148 | VUV,Vatu,
149 | WST,Tala,
150 | XAF,"CFA Franc BEAC",
151 | XAG,Silver,
152 | XAU,Gold,
153 | XBA,"Bond Markets Units European Composite Unit (EURCO)",
154 | XBB,"European Monetary Unit (E.M.U.-6)",
155 | XBC,"European Unit of Account 9(E.U.A.-9)",
156 | XBD,"European Unit of Account 17(E.U.A.-17)",
157 | XCD,"East Caribbean Dollar",$
158 | XDR,SDR,
159 | XFU,UIC-Franc,
160 | XOF,"CFA Franc BCEAO",
161 | XPD,Palladium,
162 | XPF,"CFP Franc",
163 | XPT,Platinum,
164 | XTS,"Codes specifically reserved for testing purposes",
165 | YER,"Yemeni Rial",﷼
166 | ZAR,Rand,R
167 | "ZAR LSL","Rand Loti",
168 | "ZAR NAD","Rand Namibia Dollar",
169 | ZMK,"Zambian Kwacha",
170 | ZWL,"Zimbabwe Dollar",
171 |
--------------------------------------------------------------------------------
/samples/google/canonical/entity_order.csv:
--------------------------------------------------------------------------------
1 | entity_order
2 | ALPHA
3 | TABLE
4 |
--------------------------------------------------------------------------------
/samples/google/canonical/geo.us.xml:
--------------------------------------------------------------------------------
1 |
2 |
32 |
35 |
36 |
37 |
38 |
39 |
40 | US geographical concepts
41 |
42 |
43 | Canonical concepts for US geographical data.
44 |
45 |
46 | http://code.google.com/apis/publicdata/docs/canonical/geo.us.html
47 |
48 |
49 |
50 |
51 |
52 | Google Inc.
53 |
54 |
55 | Google Inc.
56 |
57 |
58 | http://www.google.com
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | US State
67 |
68 |
69 | A US State, identified by its two letter code.
70 |
71 |
72 | States
73 |
74 |
75 | All US
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 | US
91 |
92 |
93 | states.csv
94 |
95 |
96 |
97 |
98 |
--------------------------------------------------------------------------------
/samples/google/canonical/geo.xml:
--------------------------------------------------------------------------------
1 |
2 |
32 |
35 |
36 |
37 |
38 |
39 |
40 | Geographical concepts
41 |
42 |
43 | Canonical concepts for geographical data.
44 |
45 |
46 | http://code.google.com/apis/publicdata/docs/canonical/geo.html
47 |
48 |
49 |
50 |
51 |
52 | Google Inc.
53 |
54 |
55 | Google Inc.
56 |
57 |
58 | http://www.google.com
59 |
60 |
61 |
62 |
63 |
64 |
65 | Location
66 |
67 | Base concept for locations.
68 |
69 |
70 |
71 |
72 |
73 | Latitude
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 | Longitude
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 | Country or Territory
92 |
93 |
94 | A country or territory, identified by its ISO-3166-1 2-letter code.
95 |
96 |
97 | Countries
98 |
99 |
100 | World
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 | countries.csv
115 |
116 |
117 |
118 |
119 |
--------------------------------------------------------------------------------
/samples/google/canonical/granularity.csv:
--------------------------------------------------------------------------------
1 | granularity
2 | YEARLY
3 | QUARTERLY
4 | MONTHLY
5 | WEEKLY
6 | DAILY
--------------------------------------------------------------------------------
/samples/google/canonical/states.csv:
--------------------------------------------------------------------------------
1 | state,latitude,longitude,name
2 | AK,63.588753,-154.493062,Alaska
3 | AL,32.318231,-86.902298,Alabama
4 | AR,35.20105,-91.831833,Arkansas
5 | AZ,34.048928,-111.093731,Arizona
6 | CA,36.778261,-119.417932,California
7 | CO,39.550051,-105.782067,Colorado
8 | CT,41.603221,-73.087749,Connecticut
9 | DC,38.905985,-77.033418,"District of Columbia"
10 | DE,38.910832,-75.52767,Delaware
11 | FL,27.664827,-81.515754,Florida
12 | GA,32.157435,-82.907123,Georgia
13 | HI,19.898682,-155.665857,Hawaii
14 | IA,41.878003,-93.097702,Iowa
15 | ID,44.068202,-114.742041,Idaho
16 | IL,40.633125,-89.398528,Illinois
17 | IN,40.551217,-85.602364,Indiana
18 | KS,39.011902,-98.484246,Kansas
19 | KY,37.839333,-84.270018,Kentucky
20 | LA,31.244823,-92.145024,Louisiana
21 | MA,42.407211,-71.382437,Massachusetts
22 | MD,39.045755,-76.641271,Maryland
23 | ME,45.253783,-69.445469,Maine
24 | MI,44.314844,-85.602364,Michigan
25 | MN,46.729553,-94.6859,Minnesota
26 | MO,37.964253,-91.831833,Missouri
27 | MS,32.354668,-89.398528,Mississippi
28 | MT,46.879682,-110.362566,Montana
29 | NC,35.759573,-79.0193,"North Carolina"
30 | ND,47.551493,-101.002012,"North Dakota"
31 | NE,41.492537,-99.901813,Nebraska
32 | NH,43.193852,-71.572395,"New Hampshire"
33 | NJ,40.058324,-74.405661,"New Jersey"
34 | NM,34.97273,-105.032363,"New Mexico"
35 | NV,38.80261,-116.419389,Nevada
36 | NY,43.299428,-74.217933,"New York"
37 | OH,40.417287,-82.907123,Ohio
38 | OK,35.007752,-97.092877,Oklahoma
39 | OR,43.804133,-120.554201,Oregon
40 | PA,41.203322,-77.194525,Pennsylvania
41 | PR,18.220833,-66.590149,"Puerto Rico"
42 | RI,41.580095,-71.477429,"Rhode Island"
43 | SC,33.836081,-81.163725,"South Carolina"
44 | SD,43.969515,-99.901813,"South Dakota"
45 | TN,35.517491,-86.580447,Tennessee
46 | TX,31.968599,-99.901813,Texas
47 | UT,39.32098,-111.093731,Utah
48 | VA,37.431573,-78.656894,Virginia
49 | VT,44.558803,-72.577841,Vermont
50 | WA,47.751074,-120.740139,Washington
51 | WI,43.78444,-88.787868,Wisconsin
52 | WV,38.597626,-80.454903,"West Virginia"
53 | WY,43.075968,-107.290284,Wyoming
54 |
--------------------------------------------------------------------------------
/samples/google/canonical/time.xml:
--------------------------------------------------------------------------------
1 |
2 |
32 |
34 |
35 |
36 |
37 | Google date and time
38 |
39 |
40 | Google date and time dataset
41 |
42 |
43 | http://code.google.com/apis/publicdata/docs/canonical/time.html
44 |
45 |
46 |
47 | Google Inc.
48 | Google Inc.
49 | http://www.google.com
50 |
51 |
52 |
53 |
54 |
55 | Point in time
56 | Point in time, with a given granularity.
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 | Year date
65 |
66 |
67 | A date with yearly granularity.
68 |
69 | The year concept is usually used directly in a slice definition to define a dimension that
70 | contains year. For example, a slice for yearly population by country would be defined
71 | as follows:
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | ]]>
81 |
82 | The table definition for this slice would be defined as follows:
83 |
84 |
85 | ...
86 |
87 |
88 |
89 |
90 |
91 | country_slice.csv
92 |
93 |
94 | ...
95 | ]]>
96 |
97 |
98 | And the data contained in the CSV file for this table would look like:
99 |
100 | country, year, population
101 | AF, 1960, 9616353
102 | AF, 1961, 9799379
103 | AF, 1962, 9989846
104 | AF, 1963, 10188299
105 | ...
106 |
107 |
108 |
109 |
110 | YEARLY
111 |
112 |
113 |
114 |
115 |
116 | Quarter date
117 |
118 |
119 | A date with quarterly granularity.
120 | See the example for the year concept above.
121 |
122 |
123 |
124 |
125 | QUARTERLY
126 |
127 |
128 |
129 |
130 |
131 |
132 | Month date
133 |
134 |
135 | A date with monthly granularity.
136 | See the example for the year concept above.
137 |
138 |
139 |
140 |
141 | MONTHLY
142 |
143 |
144 |
145 |
146 |
147 | Week date
148 |
149 |
150 | A date with weekly granularity.
151 | See the example for the year concept above.
152 |
153 |
154 |
155 |
156 | WEEKLY
157 |
158 |
159 |
160 |
161 |
162 | Day date
163 |
164 |
165 | A date with daily granularity.
166 | See the example for the year concept above.
167 |
168 |
169 |
170 |
171 | DAILY
172 |
173 |
174 |
175 |
176 |
177 | Granularity
178 |
179 |
180 | Granularity of time, i.e., the uncertainty that
181 | a point in time may be anywhere within some time interval.
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 | granularity.csv
195 |
196 |
197 |
198 |
199 |
--------------------------------------------------------------------------------
/samples/google/canonical/unit.xml:
--------------------------------------------------------------------------------
1 |
2 |
32 |
35 |
36 |
37 |
38 |
39 |
40 | Unit concepts
41 |
42 |
43 | Concepts for to representing units.
44 |
45 |
46 | http://code.google.com/apis/publicdata/docs/canonical/unit.html
47 |
48 |
49 |
50 |
51 |
52 | Google Inc.
53 |
54 |
55 | Google Inc.
56 |
57 |
58 | http://www.google.com
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 | Unit
67 |
68 |
69 |
70 | Specifies the unit associated with a metric concept.
71 |
72 | Example:
73 |
74 |
75 |
76 |
77 | Area in square kilometers
78 |
79 |
80 |
81 | ]]>
82 |
83 | The table contains a single row that contains the property values:
84 |
85 | symbol,symbol_position,unit_text
86 | km²,END,square kilometers
87 |
88 | One can then use this unit in defining a metric concept:
89 |
90 |
91 |
92 |
93 | Country area in square kilometers
94 |
95 |
96 |
97 |
98 | ]]>
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 | The symbol associated with a unit.
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 | Unit text
116 |
117 |
118 |
119 | Descriptive text that can be displayed next to a value.
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 | Unit symbol position
130 |
131 |
132 | Unit symbol position
133 |
134 |
135 |
136 | END
137 |
138 |
139 |
140 |
141 |
142 |
143 | Currency unit
144 |
145 |
146 |
147 | Specifies the currency associated with a metric concept.
148 | Each currency is identified by its 3-letter currency code (ISO 4217).
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 | unit_symbol_positions.csv
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 | currencies.csv
170 |
171 |
172 |
173 |
174 |
--------------------------------------------------------------------------------
/samples/google/canonical/unit_symbol_positions.csv:
--------------------------------------------------------------------------------
1 | symbol_position
2 | START
3 | END
4 |
--------------------------------------------------------------------------------
/samples/google/dspl-sample/countries.csv:
--------------------------------------------------------------------------------
1 | country,name,latitude,longitude
2 | AD,Andorra,42.546245,1.601554
3 | AF,Afghanistan,33.93911,67.709953
4 | AI,Anguilla,18.220554,-63.068615
5 | AL,Albania,41.153332,20.168331
6 | US,United States,37.09024,-95.712891
7 |
--------------------------------------------------------------------------------
/samples/google/dspl-sample/country_slice.csv:
--------------------------------------------------------------------------------
1 | country,year,population
2 | AF,1960,9616353
3 | AF,1961,9799379
4 | AF,1962,9989846
5 | AF,1963,10188299
6 | AD,1960,8616353
7 | AD,1961,8799379
8 | AD,1962,8989846
9 | AD,1963,9188299
10 | US,1960,19616353
11 | US,1961,19799379
12 | US,1962,19989846
13 | US,1963,110188299
--------------------------------------------------------------------------------
/samples/google/dspl-sample/gender_country_slice.csv:
--------------------------------------------------------------------------------
1 | country,gender,year,population
2 | AF,M,1960,4808176
3 | AF,M,1961,4899689
4 | AF,F,1960,4808177
5 | AF,F,1961,4899690
6 | AD,M,1960,3808176
7 | AD,M,1961,3899689
8 | AD,F,1960,3808177
9 | AD,F,1961,3899690
10 | US,M,1960,9808176
11 | US,M,1961,9899689
12 | US,F,1960,9808177
13 | US,F,1961,9899690
--------------------------------------------------------------------------------
/samples/google/dspl-sample/genders.csv:
--------------------------------------------------------------------------------
1 | gender,name
2 | M,Male
3 | F,Female
4 |
--------------------------------------------------------------------------------
/samples/google/dspl-sample/state_slice.csv:
--------------------------------------------------------------------------------
1 | state,year,population,unemployment_rate
2 | AL,1960,9616353,5.1
3 | AL,1961,9799379,5.2
4 | AL,1962,9989846,4.8
5 | AL,1963,10188299,6.9
6 | AK,1960,8616353,6.1
7 | AK,1961,8799379,6.2
8 | AK,1962,8989846,7.8
9 | AK,1963,9188299,7.9
--------------------------------------------------------------------------------
/samples/google/dspl-sample/states.csv:
--------------------------------------------------------------------------------
1 | state,name,latitude,longitude
2 | AL,Alabama,32.318231,-86.902298
3 | AK,Alaska,63.588753,-154.493062
4 | AR,Arkansas,35.20105,-91.831833
5 | AZ,Arizona,34.048928,-111.093731
6 | CA,California,36.778261,-119.417932
7 | CO,Colorado,39.550051,-105.782067
8 | CT,Connecticut,41.603221,-73.087749
9 |
--------------------------------------------------------------------------------
/samples/us_census/retail_sales/businesses.csv:
--------------------------------------------------------------------------------
1 | "business","name","parent"
2 | "44x72","Retail and Food services",
3 | "44000","Retail services","44x72"
4 | "44100","Motor Vehicle and Parts Dealers","44000"
5 | "44200","Furniture and Home Furnishings Stores","44000"
6 | "44300","Electronics and Appliance Stores","44000"
7 | "44400","Building Material and Garden Equipment and Supplies Dealers","44000"
8 | "44500","Food and Beverage Stores","44000"
9 | "44510","Grocery Stores","44500"
10 | "44600","Health and Personal Care Stores","44000"
11 | "44700","Gasoline Stations","44000"
12 | "44800","Clothing and Clothing Accessories Stores","44000"
13 | "45100","Sporting Goods, Hobby, Book, and Music Stores","44000"
14 | "45200","General Merchandise Stores","44000"
15 | "45210","Department Stores (excluding leased department stores)","45200"
16 | "45300","Miscellaneous Store Retailers","44000"
17 | "45400","Nonstore Retailers","44000"
18 | "72200","Food Services and Drinking Places","44x72"
19 | "44xxx","Other Aggregates",
20 | "44y72","Retail and Food services (excluding motor vehicles)","44xxx"
21 | "4400a","Retail Services (excluding Motor Vehicle and Parts Dealers)","44xxx"
22 | "441x0","Auto and other Motor Vehicle","44xxx"
23 |
--------------------------------------------------------------------------------
/samples/us_census/retail_sales/census-retail-sales.xml:
--------------------------------------------------------------------------------
1 |
2 |
32 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 | Retail Sales in the U.S.
48 |
49 |
50 | Monthly Retail Trade and Food Services report
51 | for the United States. This dataset was prepared by Google based
52 | on data downloaded from the U.S. Census Bureau.
53 |
54 |
55 | http://www.census.gov/retail/
56 |
57 |
58 |
59 |
60 |
61 | U.S. Census Bureau
62 |
63 |
64 | U.S. Census Bureau
65 |
66 |
67 | http://www.census.gov/retail/
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | Industry
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 | Type of business
86 |
87 |
88 | The principal kind of business being conducted at an establishment
89 |
90 |
91 | Types of business
92 |
93 |
94 |
95 |
96 |
97 | TABLE
98 |
99 |
100 |
101 |
102 | Parent Business
103 |
104 |
105 |
106 | 44x72
107 |
108 |
109 |
110 |
111 |
112 |
113 | Seasonality
114 |
115 |
116 | Are values seasonally adjusted or not
117 |
118 |
119 | Seasonalities
120 |
121 |
122 |
123 | Seasonally Adjusted
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 | Retail Sales Volume
132 |
133 |
134 | Sales include merchandise sold by establishments primarily engaged in retail trade.
135 |
136 |
137 |
138 |
139 |
140 | USD
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 | businesses.csv
165 |
166 |
167 |
168 |
169 |
170 |
171 | seasonalities.csv
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 | retail_sales_business.csv
182 |
183 |
184 |
185 |
186 |
187 |
--------------------------------------------------------------------------------
/samples/us_census/retail_sales/seasonalities.csv:
--------------------------------------------------------------------------------
1 | "seasonality"
2 | "Not Seasonally Adjusted"
3 | "Seasonally Adjusted"
4 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 | from dspl2.expander import Dspl2JsonLdExpander
8 | from dspl2.expander import Dspl2RdfExpander
9 | from dspl2.filegetter import HybridFileGetter
10 | from dspl2.filegetter import InternetFileGetter
11 | from dspl2.filegetter import LocalFileGetter
12 | from dspl2.filegetter import UploadedFileGetter
13 | from dspl2.jsonutil import AsList
14 | from dspl2.jsonutil import GetSchemaId
15 | from dspl2.jsonutil import GetSchemaProp
16 | from dspl2.jsonutil import GetSchemaType
17 | from dspl2.jsonutil import GetUrl
18 | from dspl2.jsonutil import JsonToKwArgsDict
19 | from dspl2.jsonutil import MakeIdKeyedDict
20 | from dspl2.rdfutil import LoadGraph
21 | from dspl2.rdfutil import FrameGraph
22 | from dspl2.rdfutil import MakeSparqlSelectQuery
23 | from dspl2.rdfutil import SelectFromGraph
24 | from dspl2.validator import CheckDataset
25 | from dspl2.validator import CheckDimension
26 | from dspl2.validator import CheckMeasure
27 | from dspl2.validator import CheckSlice
28 | from dspl2.validator import CheckSliceData
29 | from dspl2.validator import CheckStatisticalDataset
30 | from dspl2.validator import ValidateDspl2
31 |
32 | __all__ = [
33 | "AsList",
34 | "CheckDataset",
35 | "CheckDimension",
36 | "CheckMeasure",
37 | "CheckSlice",
38 | "CheckSliceData",
39 | "CheckStatisticalDataset",
40 | "Dspl2JsonLdExpander",
41 | "Dspl2RdfExpander",
42 | "FrameGraph",
43 | "GetSchemaId",
44 | "GetSchemaProp",
45 | "GetSchemaType",
46 | "GetUrl",
47 | "HybridFileGetter",
48 | "InternetFileGetter",
49 | "JsonToKwArgsDict",
50 | "LoadGraph",
51 | "LocalFileGetter",
52 | "MakeIdKeyedDict",
53 | "MakeSparqlSelectQuery",
54 | "SelectFromGraph",
55 | "UploadedFileGetter",
56 | "ValidateDspl2",
57 | ]
58 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/filegetter.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 | import extruct
8 | from io import StringIO
9 | import json
10 | from pathlib import Path
11 | import requests
12 | import sys
13 | from urllib.parse import urljoin, urlparse
14 |
15 | from dspl2.rdfutil import LoadGraph, SelectFromGraph
16 |
17 |
18 | def _ProcessDspl2File(filename, fileobj, *, type=''):
19 | if any([filename.endswith('.html'),
20 | type.startswith('text/html')]):
21 | data = extruct.extract(fileobj.read(), uniform='True')
22 | return LoadGraph({
23 | '@context': 'http://schema.org',
24 | '@graph': [
25 | subdata_elem
26 | for subdata in data.values()
27 | for subdata_elem in subdata
28 | if subdata
29 | ]
30 | }, filename)
31 | if any([filename.endswith('.json'),
32 | filename.endswith('.jsonld'),
33 | type.startswith('application/ld+json')]):
34 | json_val = json.load(fileobj)
35 | return LoadGraph(json_val, filename)
36 |
37 |
38 | class UploadedFileGetter(object):
39 | def __init__(self, files):
40 | json_files = set()
41 | self.graph = None
42 | self.file_map = {}
43 | for f in files:
44 | self.file_map[f.filename] = f
45 | data = _ProcessDspl2File(f.filename, f.stream)
46 | if data:
47 | json_files.add(f.filename)
48 | self.base = f.filename
49 | self.graph = data
50 | if not self.graph:
51 | raise RuntimeError("DSPL 2 file not present in {}".format(
52 | [file.filename for file in self.file_map.values()]))
53 | if len(json_files) > 1:
54 | raise RuntimeError("Multiple DSPL 2 files present: {}".format(json_files))
55 |
56 | def Fetch(self, filename):
57 | f = self.file_map.get(filename)
58 | if not f:
59 | raise IOError(None, 'File not found', filename)
60 | f.stream.seek(0)
61 | return StringIO(f.read().decode('utf-8'))
62 |
63 |
64 | class InternetFileGetter(object):
65 | def __init__(self, url):
66 | self.base = url
67 | r = requests.get(self.base)
68 | r.raise_for_status()
69 | self.graph = _ProcessDspl2File(url, StringIO(r.text), type=r.headers['content-type'])
70 |
71 | def Fetch(self, filename):
72 | r = requests.get(urljoin(self.base, filename))
73 | r.raise_for_status()
74 | return StringIO(r.text)
75 |
76 |
77 | class LocalFileGetter(object):
78 | def __init__(self, path):
79 | self.base = urlparse(path).path
80 | with Path(self.base).open() as f:
81 | self.graph = _ProcessDspl2File(path, f)
82 |
83 | def Fetch(self, filename):
84 | filename = urlparse(filename).path
85 | path = Path(self.base).parent.joinpath(Path(filename)).resolve()
86 | return path.open()
87 |
88 |
89 | class HybridFileGetter(object):
90 | @staticmethod
91 | def _load_file(base, rel=None):
92 | uri = urlparse(base)
93 | if rel:
94 | uri = urlparse(urljoin(base, rel))
95 | if not uri.scheme or uri.scheme == 'file':
96 | return Path(uri.path).open()
97 | elif uri.scheme == 'http' or uri.scheme == 'https':
98 | r = requests.get(uri)
99 | r.raise_for_status()
100 | return StringIO(r.text)
101 |
102 | def __init__(self, json_uri):
103 | self.base = json_uri
104 | self.graph = _ProcessDspl2File(
105 | json_uri,
106 | HybridFileGetter._load_file(json_uri))
107 |
108 | def Fetch(self, uri):
109 | return HybridFileGetter._load_file(self.base, uri)
110 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/jsonutil.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 |
8 | def AsList(val):
9 | """Ensures the JSON-LD object is a list."""
10 | if isinstance(val, list):
11 | return val
12 | elif val is None:
13 | return []
14 | else:
15 | return [val]
16 |
17 |
18 | def GetSchemaProp(obj, key, default=None):
19 | try:
20 | return obj.get(key, obj.get('schema:' + key, default))
21 | except AttributeError as e:
22 | raise RuntimeError(f"Unable to find key '{key}' in {obj}") from e
23 |
24 |
25 | def JsonToKwArgsDict(json_val):
26 | """Turn a StatisticalDataset object into a kwargs dict for a Jinja2 template.
27 |
28 | Specifically, this collects top-level dataset metadata under a "dataset" key,
29 | and keeps dimensions, measures, footnotes, and slices as they are.
30 | """
31 | ret = {'dataset': {}}
32 | special_keys = {'dimension', 'measure', 'footnote', 'slice'}
33 | for key in json_val:
34 | if key in special_keys:
35 | ret[key] = GetSchemaProp(json_val, key)
36 | else:
37 | ret['dataset'][key] = GetSchemaProp(json_val, key)
38 | return ret
39 |
40 |
41 | def MakeIdKeyedDict(vals):
42 | """Returns a dict mapping objects' IDs to objects in the provided list.
43 |
44 | Given a list of JSON-LD objects, return a dict mapping each element's ID to the
45 | element.
46 |
47 | Parameters:
48 | vals (list): list of JSON-LD objects with IDs as dicts
49 |
50 | Returns
51 | dict:dict whose values are elements of `vals` and whose keys are their IDs.
52 | """
53 | ret = {}
54 | for val in vals:
55 | id = GetSchemaProp(val, '@id')
56 | if id:
57 | ret[id] = val
58 | return ret
59 |
60 |
61 | def GetSchemaId(obj):
62 | return obj.get('@id', GetSchemaProp(obj, 'id'))
63 |
64 |
65 | def GetSchemaType(obj):
66 | return obj.get('@type', GetSchemaProp(obj, 'type'))
67 |
68 |
69 | def GetUrl(obj):
70 | if isinstance(obj, str):
71 | return obj
72 | elif isinstance(obj, dict):
73 | return GetSchemaId(obj)
74 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/rdfutil.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 | import json
8 | from pathlib import Path
9 | from pyld import jsonld
10 | from rdflib import Graph, Namespace
11 | from rdflib.serializer import Serializer
12 | import sys
13 |
14 | from dspl2.jsonutil import AsList
15 |
16 |
17 | SCHEMA = Namespace('http://schema.org/')
18 |
19 |
20 | _Schema = {}
21 | _Context = {}
22 | _DataFileFrame = {
23 | '@context': [_Context, {'schema': 'http://schema.org/'}],
24 | '@type': 'StatisticalDataset',
25 | }
26 | _FullFrame = {
27 | '@context': [_Context, {'schema': 'http://schema.org/'}],
28 | '@type': 'StatisticalDataset',
29 | 'slice': {
30 | 'dimension': {
31 | '@embed': '@never'
32 | },
33 | 'measure': {
34 | '@embed': '@never'
35 | },
36 | 'tableMapping': {
37 | 'sourceEntity': {
38 | '@embed': '@never'
39 | }
40 | },
41 | 'data': {
42 | 'dimensionValue': {
43 | 'dimension': {
44 | '@embed': '@never'
45 | }
46 | },
47 | 'measureValue': {
48 | 'measure': {
49 | '@embed': '@never'
50 | },
51 | 'footnote': {
52 | '@embed': '@never'
53 | }
54 | }
55 | }
56 | }
57 | }
58 | _Initialized = False
59 | _Module_path = Path(__file__).parent
60 | _RdfPrefixes = {
61 | 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
62 | 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
63 | 'schema': 'http://schema.org/',
64 | }
65 |
66 |
67 | def _Init(context, schema):
68 | global _Context, _Schema, _Initialized
69 | if not _Initialized:
70 | with schema.open() as schema:
71 | _Schema.update(json.load(schema))
72 | with context.open() as context:
73 | _Context.update(json.load(context))
74 | del _Context['@context']['id']
75 | del _Context['@context']['type']
76 | _Initialized = True
77 |
78 |
79 | def _LoadJsonLd(json_val, public_id):
80 | _Init(_Module_path / 'schema' / 'jsonldcontext.json',
81 | _Module_path / 'schema' / 'schema.jsonld')
82 | json_val['@context'] = _Context
83 | graph = Graph().parse(
84 | data=json.dumps(json_val).encode('utf-8'),
85 | format='json-ld',
86 | publicID=public_id
87 | )
88 | return graph
89 |
90 |
91 | def LoadGraph(input, public_id):
92 | if isinstance(input, dict):
93 | data = input
94 | elif isinstance(input, str):
95 | data = json.loads(input)
96 | else:
97 | data = json.load(input)
98 |
99 | return _LoadJsonLd(data, public_id)
100 |
101 |
102 | def FrameGraph(graph, frame=_FullFrame):
103 | serialized = graph.serialize(format='json-ld')
104 | json_val = json.loads(serialized)
105 | json_val = {
106 | '@context': _Context,
107 | '@graph': AsList(json_val)
108 | }
109 | framed = jsonld.frame(json_val, frame, {'embed': '@always'})
110 | framed['@context'] = 'http://schema.org'
111 | for items in framed['@graph']:
112 | framed.update(items)
113 | del framed['@graph']
114 | return framed
115 |
116 |
117 | def _N3(obj, namespace_manager):
118 | if isinstance(obj, str):
119 | return obj
120 | return obj.n3(namespace_manager=namespace_manager)
121 |
122 |
123 | def MakeSparqlSelectQuery(*constraints,
124 | ns_manager=None,
125 | rdf_prefixes=_RdfPrefixes):
126 | ret = ''
127 | for prefix, url in rdf_prefixes.items():
128 | ret += f'PREFIX {prefix}: <{url}>\n'
129 | ret += 'SELECT * WHERE {\n'
130 | for constraint in constraints:
131 | sub, pred, obj = (_N3(field, ns_manager)
132 | for field in constraint)
133 | ret += f' {sub} {pred} {obj} .\n'
134 | ret += '}'
135 | return ret
136 |
137 |
138 | def SelectFromGraph(graph, *constraints):
139 | result = graph.query(
140 | MakeSparqlSelectQuery(
141 | *constraints,
142 | ns_manager=graph.namespace_manager))
143 | return list({str(k): str(v)
144 | for k, v in binding.items()}
145 | for binding in result.bindings)
146 |
147 |
148 | def main(args, context, schema):
149 | with open(args[1]) as f:
150 | normalized = FrameGraph(LoadGraph(f, args[1]))
151 | json.dump(normalized, sys.stdout, indent=2)
152 |
153 |
154 | if __name__ == '__main__':
155 | main(sys.argv)
156 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/choose.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | DSPL 2 Viewer
4 |
5 |
6 |
7 | DSPL 2 Viewer
8 | Dataset
9 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/display.html:
--------------------------------------------------------------------------------
1 | {% from 'render.html' import render %}
2 |
3 |
4 | DSPL 2 Viewer
5 |
8 |
9 |
10 | DSPL 2 Viewer
11 | Dataset
12 | Dimensions
13 | Measures
14 | Footnotes
15 | Slices
16 |
17 | {{render(dataset)}}
18 |
19 |
20 |
21 |
22 | dimensions
23 | {{ render(dimension) }}
24 |
25 |
26 |
27 |
28 |
29 |
30 | measures
31 | {{ render(measure) }}
32 |
33 |
34 |
35 | {% if footnote %}
36 |
44 | {% endif %}
45 |
46 |
47 |
48 | slices
49 | {{ render(slice) }}
50 |
51 |
52 |
53 |
54 |
55 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/error.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | DSPL 2 Viewer
4 |
5 |
6 |
7 | DSPL 2 Viewer
8 | Error fetching dataset
9 |
10 |
11 | {% if url %}
12 | Error {{action}} {{url}}
13 | {% endif %}
14 |
15 |
16 | {% if status %}
17 | HTTP status {{status}}:
18 | {% endif %}
19 | {% if text %}
20 | {{text}}
21 | {% endif %}
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/render.html:
--------------------------------------------------------------------------------
1 | {% macro render(obj) %}
2 | {% if obj.items %}
3 |
4 | {% for key, val in obj|dictsort %}
5 |
6 | {{key}}
7 | {{ render(val) }}
8 |
9 | {% endfor %}
10 |
11 | {% elif obj.append %}
12 | {% for val in obj %}
13 |
14 |
15 |
16 | {{ render(val) }}
17 |
18 |
19 | {% endfor %}
20 |
21 | {% elif obj and obj.startswith and (obj.startswith('https://') or obj.startswith('http://')) %}
22 | {{obj}}
23 | {% else %}
24 | {{obj}}
25 | {% endif %}
26 | {% endmacro %}
27 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/viewer.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: sans-serif;
3 | }
4 |
5 | h2 {
6 | display: inline-block;
7 | padding: 1ex;
8 | border: 1px solid;
9 | margin: 0px;
10 | }
11 |
12 | h2.active {
13 | background-color: yellow;
14 | box-shadow: 2px 2px gray;
15 | }
16 |
17 | table {
18 | border-collapse: collapse;
19 | background-color: white;
20 | width: 100%
21 | }
22 |
23 | table,th,td {
24 | border: 1px solid;
25 | }
26 |
27 | td {
28 | vertical-align: top;
29 | }
30 |
31 | tr:nth-child(even) {background-color: #f2f2f2;}
32 |
33 | td:first-child {
34 | width:10%;
35 | }
36 |
37 | .hidden {
38 | display: none;
39 | }
40 |
41 | td:first-child.closed::before {
42 | content: "▶️";
43 | color: red;
44 | }
45 | td:first-child.open::before {
46 | content: "🔻";
47 | color: red;}
48 | }
49 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/viewer.js:
--------------------------------------------------------------------------------
1 | for (var td of document.querySelectorAll('td:first-child')) {
2 | var sibling = td.nextElementSibling;
3 | if (sibling) {
4 | if (sibling.querySelector('table')) {
5 | if (sibling.children.length < 20) {
6 | td.classList.toggle('open');
7 | } else {
8 | td.classList.toggle('closed');
9 | sibling.classList.toggle('hidden');
10 | }
11 | td.addEventListener('click', (ev) => {
12 | ev.target.classList.toggle('open');
13 | ev.target.classList.toggle('closed');
14 | ev.target.nextElementSibling.classList.toggle('hidden');
15 | });
16 | }
17 | }
18 | }
19 |
20 | function onclick(ev) {
21 | document.querySelectorAll('h2').forEach((elt) => {
22 | elt.classList.remove('active');
23 | });
24 | ev.target.classList.add('active');
25 |
26 | document.querySelectorAll('div').forEach((elt) => {
27 | elt.classList.add('hidden');
28 | });
29 | document.querySelector('div#'+ev.target.textContent.trim().toLowerCase()).classList.remove('hidden');
30 | }
31 |
32 | document.querySelectorAll('h2').forEach((elt) => {
33 | elt.addEventListener('click', onclick);
34 | });
35 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/tools/dspl2/dspl2/tests/__init__.py
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/test_expander.py:
--------------------------------------------------------------------------------
1 | from dspl2.expander import Dspl2JsonLdExpander, Dspl2RdfExpander
2 | from dspl2.rdfutil import SCHEMA
3 | from io import StringIO
4 | import rdflib
5 | import unittest
6 |
7 |
8 | class DummyGetter(object):
9 | def __init__(self, graph):
10 | self.graph = graph
11 | self.data = {}
12 |
13 | def Set(self, filename, data):
14 | self.data[filename] = StringIO(data)
15 |
16 | def Fetch(self, filename):
17 | return self.data.get(filename, StringIO(''))
18 |
19 |
20 | class ExpanderTests(unittest.TestCase):
21 | def test_Dspl2RdfExpander_ExpandDimensionValue(self):
22 | graph = rdflib.Graph()
23 | getter = DummyGetter(graph)
24 | expander = Dspl2RdfExpander(getter)
25 | dim = rdflib.URIRef('http://foo.invalid/test.json#dim')
26 | equiv_types = [SCHEMA.Place]
27 | row = {
28 | 'codeValue': 'cv',
29 | 'key1': 'val1',
30 | 'key2': 'val2',
31 | }
32 | row_id = rdflib.URIRef(str(dim) + '=' + row['codeValue'])
33 | expander._ExpandDimensionValue(dim, equiv_types, row_id, row)
34 | self.assertEqual(set(graph.objects(subject=dim, predicate=SCHEMA.codeList)),
35 | {row_id})
36 | self.assertEqual(set(graph.objects(subject=row_id,
37 | predicate=rdflib.RDF.type)),
38 | {SCHEMA.DimensionValue, SCHEMA.Place})
39 | self.assertEqual(set(graph.objects(subject=row_id,
40 | predicate=rdflib.RDF.type)),
41 | {SCHEMA.DimensionValue, SCHEMA.Place})
42 | self.assertEqual(set(graph.objects(subject=row_id, predicate=SCHEMA.key1)),
43 | {rdflib.Literal('val1')})
44 | self.assertEqual(set(graph.objects(subject=row_id, predicate=SCHEMA.key2)),
45 | {rdflib.Literal('val2')})
46 | self.assertEqual(set(graph.objects(subject=row_id,
47 | predicate=SCHEMA.codeValue)),
48 | {rdflib.Literal('cv')})
49 |
50 | def test_Dspl2RdfExpander_ExpandFootnotes(self):
51 | graph = rdflib.Graph()
52 | dim = rdflib.URIRef('#ds')
53 | graph.add((dim, rdflib.RDF.type, SCHEMA.StatisticalDataset))
54 | graph.add((dim, SCHEMA.footnote, rdflib.Literal('foo')))
55 | getter = DummyGetter(graph)
56 | getter.Set('foo', 'codeValue,name,description\np,predicted,Value is predicted rather than measured.\n')
57 | expander = Dspl2RdfExpander(getter)
58 | expander._ExpandFootnotes()
59 | for triple in graph:
60 | print(triple)
61 | footnote_id = rdflib.URIRef('#footnote=p')
62 | self.assertEqual(set(graph.objects(subject=dim,
63 | predicate=SCHEMA.footnote)),
64 | {footnote_id})
65 | self.assertEqual(set(graph.objects(subject=footnote_id,
66 | predicate=SCHEMA.description)),
67 | {rdflib.term.Literal('Value is predicted rather than measured.')})
68 | self.assertEqual(set(graph.objects(subject=footnote_id,
69 | predicate=SCHEMA.name)),
70 | {rdflib.term.Literal('predicted')})
71 | self.assertEqual(set(graph.objects(subject=footnote_id,
72 | predicate=rdflib.RDF.type)),
73 | {SCHEMA.StatisticalAnnotation})
74 | self.assertEqual(set(graph.objects(subject=footnote_id,
75 | predicate=SCHEMA.codeValue)),
76 | {rdflib.term.Literal('p')})
77 |
78 | def test_Dspl2RdfExpander_ExpandSliceData(self):
79 | pass
80 |
81 | def test_Dspl2JsonLdExpander_ExpandCodeList(self):
82 | pass
83 |
84 | def test_Dspl2JsonLdExpander_ExpandFootnotes(self):
85 | pass
86 |
87 | def test_Dspl2JsonLdExpander_ExpandSliceData(self):
88 | pass
89 |
90 |
91 | if __name__ == '__main__':
92 | unittest.main()
93 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/test_jsonutil.py:
--------------------------------------------------------------------------------
1 | from dspl2.jsonutil import (AsList, GetSchemaProp, JsonToKwArgsDict,
2 | MakeIdKeyedDict, GetSchemaId, GetSchemaType, GetUrl)
3 | import unittest
4 |
5 |
6 | class JsonUtilTests(unittest.TestCase):
7 | def test_AsList(self):
8 | self.assertEqual(AsList(None), [])
9 | self.assertEqual(AsList([]), [])
10 | self.assertEqual(AsList([1]), [1])
11 | self.assertEqual(AsList(1), [1])
12 |
13 | def test_GetSchemaProp(self):
14 | self.assertEqual(GetSchemaProp({'id': 'val'}, 'id'), 'val')
15 | self.assertEqual(GetSchemaProp({'schema:id': 'val'}, 'id'), 'val')
16 |
17 | def test_JsonToKwArgsDict(self):
18 | self.assertEqual(JsonToKwArgsDict({'id': 'val'}), {'dataset': {'id': 'val'}})
19 | self.assertEqual(JsonToKwArgsDict({}), {'dataset': {}})
20 |
21 | def test_MakeIdKeyedDict(self):
22 | objs = [{'@id': '1'}, {'@id': '2'}]
23 | lookup = MakeIdKeyedDict(objs)
24 | self.assertEqual(lookup['1'], {'@id': '1'})
25 | self.assertEqual(lookup['2'], {'@id': '2'})
26 |
27 | def test_GetSchemaId(self):
28 | self.assertEqual(GetSchemaId({'@id': 'val'}), 'val')
29 | self.assertEqual(GetSchemaId({'id': 'val'}), 'val')
30 | self.assertEqual(GetSchemaId({'schema:id': 'val'}), 'val')
31 |
32 | def test_GetSchemaType(self):
33 | self.assertEqual(GetSchemaType({'@type': 'val'}), 'val')
34 | self.assertEqual(GetSchemaType({'type': 'val'}), 'val')
35 | self.assertEqual(GetSchemaType({'schema:type': 'val'}), 'val')
36 |
37 | def test_GetUrl(self):
38 | self.assertEqual(GetUrl({'@id': 'val'}), 'val')
39 | self.assertEqual(GetUrl('val'), 'val')
40 |
41 |
42 | if __name__ == '__main__':
43 | unittest.main()
44 |
--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/test_rdfutil.py:
--------------------------------------------------------------------------------
1 | from dspl2.rdfutil import (LoadGraph, FrameGraph, SelectFromGraph)
2 | from io import StringIO
3 | import json
4 | import rdflib
5 | import rdflib.compare
6 | import unittest
7 |
8 |
9 | _SampleJson = '''{
10 | "@context": "http://schema.org",
11 | "@type": "StatisticalDataset",
12 | "@id": "",
13 | "url": "https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w",
14 | "identifier": "met_d3dens",
15 | "name": "Eurostat Population Density",
16 | "description": "Population density by metropolitan regions",
17 | "dateCreated": "2015-10-16",
18 | "dateModified": "2019-06-18",
19 | "temporalCoverage": "1990-01-01/2016-01-01",
20 | "distribution": {
21 | "@type": "DataDownload",
22 | "contentUrl": "http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true",
23 | "encodingFormat": "text/tab-separated-values"
24 | },
25 | "spatialCoverage":{
26 | "@type":"Place",
27 | "geo":{
28 | "@type":"GeoShape",
29 | "name": "European Union",
30 | "box":"34.633285 -10.468556 70.096054 34.597916"
31 | }
32 | },
33 | "license": "https://ec.europa.eu/eurostat/about/policies/copyright",
34 | "creator":{
35 | "@type":"Organization",
36 | "url": "https://ec.europa.eu/eurostat",
37 | "name":"Eurostat"
38 | },
39 | "publisher": {
40 | "@type": "Organization",
41 | "name": "Eurostat",
42 | "url": "https://ec.europa.eu/eurostat",
43 | "contactPoint": {
44 | "@type": "ContactPoint",
45 | "contactType": "User Support",
46 | "url": "https://ec.europa.eu/eurostat/help/support"
47 | }
48 | }
49 | }'''
50 |
51 |
52 | class RdfUtilTests(unittest.TestCase):
53 | def test_LoadGraph(self):
54 | graph1 = LoadGraph(_SampleJson, '')
55 | graph2 = LoadGraph(json.loads(_SampleJson), '')
56 | graph3 = LoadGraph(StringIO(_SampleJson), '')
57 | self.assertTrue(rdflib.compare.isomorphic(graph1, graph2))
58 | self.assertTrue(rdflib.compare.isomorphic(graph1, graph3))
59 |
60 | def test_FrameGraph(self):
61 | json_val = FrameGraph(LoadGraph(_SampleJson, ''))
62 | self.assertEqual(json_val['@context'], 'http://schema.org')
63 | self.assertEqual(json_val['@type'], 'StatisticalDataset')
64 | self.assertEqual(json_val['url'], 'https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w')
65 | self.assertEqual(json_val['identifier'], 'met_d3dens')
66 | self.assertEqual(json_val['name'], 'Eurostat Population Density')
67 | self.assertEqual(json_val['description'], 'Population density by metropolitan regions')
68 | self.assertEqual(json_val['dateCreated'], '2015-10-16')
69 | self.assertEqual(json_val['dateModified'], '2019-06-18')
70 | self.assertEqual(json_val['temporalCoverage'], '1990-01-01/2016-01-01')
71 | self.assertEqual(json_val['distribution']['@type'], 'DataDownload')
72 | self.assertEqual(json_val['distribution']['contentUrl'], 'http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true')
73 | self.assertEqual(json_val['distribution']['encodingFormat'], 'text/tab-separated-values')
74 | self.assertEqual(json_val['spatialCoverage']['@type'], "Place")
75 | self.assertEqual(json_val['spatialCoverage']['geo']['@type'], "GeoShape")
76 | self.assertEqual(json_val['spatialCoverage']['geo']['name'], 'European Union')
77 | self.assertEqual(json_val['spatialCoverage']['geo']['box'], '34.633285 -10.468556 70.096054 34.597916')
78 | self.assertEqual(json_val['license'], 'https://ec.europa.eu/eurostat/about/policies/copyright')
79 | self.assertEqual(json_val['creator']['@type'], "Organization")
80 | self.assertEqual(json_val['creator']['url'], 'https://ec.europa.eu/eurostat')
81 | self.assertEqual(json_val['creator']['name'], 'Eurostat')
82 | self.assertEqual(json_val['publisher']['@type'], 'Organization')
83 | self.assertEqual(json_val['publisher']['name'], 'Eurostat')
84 | self.assertEqual(json_val['publisher']['url'], 'https://ec.europa.eu/eurostat')
85 | self.assertEqual(json_val['publisher']['contactPoint']['@type'], 'ContactPoint')
86 | self.assertEqual(json_val['publisher']['contactPoint']['contactType'], 'User Support')
87 | self.assertEqual(json_val['publisher']['contactPoint']['url'], 'https://ec.europa.eu/eurostat/help/support')
88 |
89 | def test_SelectFromGraph(self):
90 | graph = LoadGraph(_SampleJson, '')
91 | results = list(SelectFromGraph(
92 | graph,
93 | ('?ds', 'rdf:type', 'schema:StatisticalDataset'),
94 | ('?ds', 'schema:name', '?name')))
95 | self.assertEqual(len(results), 1)
96 | self.assertEqual(results[0]['name'], 'Eurostat Population Density')
97 |
98 |
99 | if __name__ == '__main__':
100 | unittest.main()
101 |
--------------------------------------------------------------------------------
/tools/dspl2/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py
2 | extruct
3 | flask
4 | pyicu
5 | jinja2
6 | pandas
7 | pyld
8 | rdflib
9 | rdflib-jsonld
10 | requests
11 |
--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-expand.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python3
2 | # Copyright 2018 Google LLC
3 | #
4 | # Use of this source code is governed by a BSD-style
5 | # license that can be found in the LICENSE file or at
6 | # https://developers.google.com/open-source/licenses/bsd
7 |
8 | from absl import app
9 | from absl import flags
10 | from dspl2 import (Dspl2RdfExpander, Dspl2JsonLdExpander, FrameGraph,
11 | LocalFileGetter)
12 | import json
13 | import sys
14 |
15 |
16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.')
17 |
18 |
19 | def main(args):
20 | if len(args) != 2:
21 | print(f'Usage: {args[0]} [DSPL file]', file=sys.stderr)
22 | exit(1)
23 | getter = LocalFileGetter(args[1])
24 | if flags.FLAGS.rdf:
25 | graph = Dspl2RdfExpander(getter).Expand()
26 | dspl = FrameGraph(getter.graph)
27 | else:
28 | dspl = Dspl2JsonLdExpander(getter).Expand()
29 | json.dump(dspl, sys.stdout, indent=2)
30 |
31 |
32 | if __name__ == '__main__':
33 | app.run(main)
34 |
--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-pretty-print-server.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python3
2 | # Copyright 2018 Google LLC
3 | #
4 | # Use of this source code is governed by a BSD-style
5 | # license that can be found in the LICENSE file or at
6 | # https://developers.google.com/open-source/licenses/bsd
7 |
8 | from flask import Flask, request, render_template
9 | import json
10 | from pathlib import Path
11 | import requests
12 |
13 | import dspl2
14 | from dspl2 import (
15 | Dspl2JsonLdExpander, Dspl2RdfExpander, InternetFileGetter,
16 | JsonToKwArgsDict, LoadGraph, FrameGraph, UploadedFileGetter)
17 |
18 |
19 | def _Display(template, json_val):
20 | return render_template(template, **JsonToKwArgsDict(json_val))
21 |
22 |
23 | template_dir = Path(dspl2.__file__).parent / 'templates'
24 | app = Flask('dspl2-viewer', template_folder=template_dir.as_posix())
25 |
26 | @app.route('/')
27 | def Root():
28 | return render_template('choose.html')
29 |
30 |
31 | @app.route('/render', methods=['GET', 'POST'])
32 | def _HandleUploads():
33 | try:
34 | rdf = request.args.get('rdf') == 'on'
35 | url = request.args.get('url')
36 | if request.method == 'POST':
37 | files = request.files.getlist('files[]')
38 | getter = UploadedFileGetter(files)
39 | else:
40 | if not url:
41 | return render_template('error.html',
42 | message="No URL provided")
43 | getter = InternetFileGetter(url)
44 | if rdf:
45 | graph = Dspl2RdfExpander(getter).Expand()
46 | json_val = FrameGraph(graph)
47 | else:
48 | json_val = Dspl2JsonLdExpander(getter).Expand()
49 | return _Display('display.html', json_val)
50 | except json.JSONDecodeError as e:
51 | return render_template('error.html',
52 | action="decoding",
53 | url=e.doc or url,
54 | text=str(e))
55 | except IOError as e:
56 | return render_template('error.html',
57 | action="loading",
58 | url=e.filename,
59 | text=str(e))
60 | except RuntimeError as e:
61 | return render_template('error.html',
62 | text=str(e))
63 | except requests.exceptions.HTTPError as e:
64 | return render_template('error.html',
65 | url=url,
66 | action="retrieving",
67 | status=e.response.status_code,
68 | text=e.response.text)
69 | except requests.exceptions.RequestException as e:
70 | return render_template('error.html',
71 | url=url,
72 | action="retrieving",
73 | text=str(e))
74 | except Exception as e:
75 | return render_template('error.html',
76 | action="processing",
77 | url=url,
78 | text=str(type(e)) + str(e))
79 |
80 |
81 | if __name__ == '__main__':
82 | app.run()
83 |
--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-pretty-print.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 | from absl import app
8 | from absl import flags
9 | import dspl2
10 | import jinja2
11 | from pathlib import Path
12 | import sys
13 |
14 |
15 | FLAGS = flags.FLAGS
16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.')
17 |
18 |
19 | def _RenderLocalDspl2(path, rdf):
20 | template_dir = Path(dspl2.__file__).parent / 'templates'
21 | env = jinja2.Environment(loader=jinja2.FileSystemLoader(
22 | template_dir.as_posix()))
23 | try:
24 | print("Loading template")
25 | template = env.get_template('display.html')
26 | print("Loading DSPL2")
27 | getter = dspl2.LocalFileGetter(path)
28 | print("Expanding DSPL2")
29 | if rdf:
30 | graph = dspl2.Dspl2RdfExpander(getter).Expand()
31 | print("Framing DSPL2")
32 | json_val = dspl2.FrameGraph(graph)
33 | else:
34 | json_val = dspl2.Dspl2JsonLdExpander(getter).Expand()
35 | print("Rendering template")
36 | return template.render(**dspl2.JsonToKwArgsDict(json_val))
37 | except Exception as e:
38 | raise
39 | template = loader.load(env, 'error.html')
40 | return template.render(action="processing",
41 | url=path,
42 | text=str(type(e)) + ": " + str(e))
43 |
44 |
45 | def main(argv):
46 | if len(argv) != 3:
47 | print(f'Usage: {argv[0]} [input.json] [output.html]', file=sys.stderr)
48 | exit(1)
49 | with open(argv[2], 'w') as f:
50 | print(_RenderLocalDspl2(argv[1], FLAGS.rdf), file=f)
51 |
52 |
53 | if __name__ == '__main__':
54 | app.run(main)
55 |
--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-validate.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python3
2 | # Copyright 2018 Google LLC
3 | #
4 | # Use of this source code is governed by a BSD-style
5 | # license that can be found in the LICENSE file or at
6 | # https://developers.google.com/open-source/licenses/bsd
7 |
8 | from absl import app
9 | from absl import flags
10 | from dspl2 import (Dspl2JsonLdExpander, Dspl2RdfExpander, LocalFileGetter,
11 | FrameGraph, LoadGraph, ValidateDspl2)
12 | import sys
13 |
14 |
15 | FLAGS = flags.FLAGS
16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.')
17 |
18 |
19 | def main(args):
20 | if len(args) != 2:
21 | print(f'Usage: {args[0]} [DSPL file]', file=sys.stderr)
22 | exit(1)
23 | getter = LocalFileGetter(args[1])
24 | if flags.FLAGS.rdf:
25 | graph = Dspl2RdfExpander(getter).Expand()
26 | dspl = FrameGraph(getter.graph)
27 | else:
28 | dspl = Dspl2JsonLdExpander(getter).Expand()
29 | warnings = ValidateDspl2(dspl, getter)
30 | for warning in warnings:
31 | print(warning)
32 |
33 |
34 | if __name__ == '__main__':
35 | app.run(main)
36 |
--------------------------------------------------------------------------------
/tools/dspl2/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google LLC
2 | #
3 | # Use of this source code is governed by a BSD-style
4 | # license that can be found in the LICENSE file or at
5 | # https://developers.google.com/open-source/licenses/bsd
6 |
7 | import setuptools
8 |
9 | setuptools.setup(
10 | name="dspl2",
11 | version="0.0.1",
12 | author="Natarajan Krishnaswami",
13 | author_email="nkrishnaswami@google.com",
14 | description="DSPL 2.0 tools",
15 | url="https://github.com/google/dspl",
16 | packages=setuptools.find_packages(),
17 | classifiers=[
18 | "Programming Language :: Python :: 3",
19 | "OSI Approved :: BSD License",
20 | "Operating System :: OS Independent",
21 | ],
22 | package_data={
23 | 'dspl2': ['templates/*', 'schema/*'],
24 | },
25 | scripts=[
26 | 'scripts/dspl2-expand.py',
27 | 'scripts/dspl2-pretty-print.py',
28 | 'scripts/dspl2-pretty-print-server.py',
29 | 'scripts/dspl2-validate.py',
30 | ],
31 | )
32 |
--------------------------------------------------------------------------------
/tools/dspl2viz/dspl2viz.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import dspl2
3 | from flask import (
4 | Flask, render_template, request, Response)
5 | from functools import lru_cache
6 | from icu import SimpleDateFormat
7 | from io import StringIO
8 | import json
9 | import os.path
10 | import pandas as pd
11 | from urllib.parse import urlparse
12 |
13 |
14 | app = Flask(__name__)
15 |
16 |
17 | @app.route('/')
18 | def main():
19 | return render_template('dspl2viz.html')
20 |
21 |
22 | @app.route('/api/measures')
23 | def api_measures():
24 | dataset = request.args.get('dataset')
25 | if dataset is None:
26 | return Response("Dataset not specified", status=400)
27 | try:
28 | getter = dspl2.LocalFileGetter(
29 | os.path.expanduser('~/dspl/samples/bls/unemployment/bls-unemployment.jsonld'))
30 | expander = dspl2.Dspl2JsonLdExpander(getter)
31 | ds = expander.Expand(expandSlices=False)
32 | return Response(json.dumps(ds['measure'], indent=2), mimetype='application/json')
33 | except Exception as e:
34 | app.logger.warn(e)
35 | return Response("Unable to find requested dataset", status=404)
36 |
37 |
38 | @app.route('/api/dimensions')
39 | def api_dimensions():
40 | dataset = request.args.get('dataset')
41 | if dataset is None:
42 | return Response("Dataset not specified", status=400)
43 | try:
44 | getter = dspl2.HybridFileGetter(dataset)
45 | expander = dspl2.Dspl2JsonLdExpander(getter)
46 | ds = expander.Expand(expandSlices=False, expandDimensions=False)
47 | return Response(json.dumps(ds['dimension'], indent=2), mimetype='application/json')
48 | except Exception as e:
49 | app.logger.warn(e)
50 | return Response("Unable to find requested dataset", status=404)
51 |
52 |
53 | @app.route('/api/dimension_values')
54 | def api_dimension_values():
55 | dataset = request.args.get('dataset')
56 | if dataset is None:
57 | return Response("Dataset not specified", status=400)
58 | dimension = request.args.get('dimension')
59 | if dimension is None:
60 | return Response("Dimension not specified", status=400)
61 | try:
62 | getter = dspl2.HybridFileGetter(dataset)
63 | expander = dspl2.Dspl2JsonLdExpander(getter)
64 | ds = expander.Expand(expandSlices=False, expandDimensions=True)
65 | for dim in ds['dimension']:
66 | if (dimension == dspl2.GetUrl(dim) or
67 | urlparse(dimension).fragment == urlparse(dspl2.GetUrl(dim)).fragment):
68 | return Response(json.dumps(dim, indent=2), mimetype='application/json')
69 | return Response("Unable to find requested dimension", status=404)
70 | except Exception as e:
71 | app.logger.warn(e)
72 | return Response("Unable to find requested dataset", status=404)
73 |
74 |
75 | @app.route('/api/slices_for_measure')
76 | def api_slices_for_measure():
77 | dataset = request.args.get('dataset')
78 | if dataset is None:
79 | return Response("Dataset not specified", status=400)
80 | measure = request.args.get('measure')
81 | if measure is None:
82 | return Response("Measure not specified", status=400)
83 | try:
84 | getter = dspl2.HybridFileGetter(dataset)
85 | expander = dspl2.Dspl2JsonLdExpander(getter)
86 | ds = expander.Expand(expandSlices=False, expandDimensions=False)
87 | slices = []
88 | for slice in ds['slice']:
89 | for sliceMeasure in slice['measure']:
90 | if (measure == dspl2.GetUrl(sliceMeasure) or
91 | urlparse(measure).fragment == urlparse(dspl2.GetUrl(sliceMeasure)).fragment):
92 | slices.append(slice)
93 | break
94 | return Response(json.dumps(slices, indent=2),
95 | mimetype='application/json')
96 | except Exception as e:
97 | app.logger.warn(e)
98 | return Response("Unable to find requested dataset", status=404)
99 |
100 |
101 | @lru_cache(maxsize=10)
102 | def _ExpandDataset(dataset):
103 | getter = dspl2.HybridFileGetter(dataset)
104 | expander = dspl2.Dspl2JsonLdExpander(getter)
105 | return expander.Expand()
106 |
107 |
108 | def _ParseDate(text, date_pattern):
109 | df = SimpleDateFormat(date_pattern)
110 | ts = df.parse(text)
111 | return datetime.datetime.utcfromtimestamp(ts)
112 |
113 |
114 | @lru_cache(maxsize=100)
115 | def _GetDataSeries(dataset, slice, measure, dimension_value):
116 | dim_val_dict = dict([dim_val.split(':')
117 | for dim_val in dimension_value.split(',')])
118 | ds = _ExpandDataset(dataset)
119 | # Identify the time dimension's date format
120 | dateFormat = "yyyy-MM-dd" # default
121 | for dimension in ds['dimension']:
122 | if dimension['@type'] == 'TimeDimension':
123 | dateFormat = dimension.get('dateFormat')
124 | break
125 |
126 | for dsSlice in ds['slice']:
127 | if urlparse(dsSlice['@id']).fragment == urlparse(slice).fragment:
128 | ret = []
129 | for observation in dsSlice['data']:
130 | val = {}
131 | # Slice should have exactly the requested dims + a time dim:
132 | if len(observation['dimensionValues']) != len(dim_val_dict) + 1:
133 | continue
134 | # All the non-time dims should match the filter:
135 | matched_dims = 0
136 | for dim_val in observation['dimensionValues']:
137 | dim_id = urlparse(dim_val['dimension']).fragment
138 | if f'#{dim_id}' in dim_val_dict:
139 | if dim_val.get('codeValue') == dim_val_dict[f'#{dim_id}']:
140 | val[dim_id] = dim_val.get('codeValue')
141 | matched_dims += 1
142 | elif dim_val.get('value'):
143 | val[dim_id] = _ParseDate(dim_val.get('value'), dateFormat)
144 | if matched_dims != len(dim_val_dict):
145 | continue
146 | for meas_val in observation['measureValues']:
147 | if urlparse(meas_val['measure']).fragment == urlparse(measure).fragment:
148 | val[urlparse(measure).fragment] = meas_val['value']
149 | ret.append(val)
150 | return ret
151 |
152 | @app.route('/api/series')
153 | def api_series():
154 | dataset = request.args.get('dataset')
155 | if dataset is None:
156 | return Response("Dataset not specified", status=400)
157 | slice = request.args.get('slice')
158 | if slice is None:
159 | return Response("Slice not specified", status=400)
160 | measure = request.args.get('measure')
161 | if measure is None:
162 | return Response("Measure not specified", status=400)
163 | dimension_values = request.args.get('dimension_value')
164 | if dimension_values is None:
165 | return Response("Dimension values not specified", status=400)
166 | ret = _GetDataSeries(dataset, slice, measure, dimension_values)
167 | if ret is not None:
168 | out = StringIO()
169 | pd.DataFrame(ret).to_csv(out)
170 | return Response(out.getvalue(), mimetype="text/csv")
171 | return Response("Unable to find series for requested dimensions",
172 | status=404)
173 |
--------------------------------------------------------------------------------
/tools/dspl2viz/foo.jsonld:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/tools/dspl2viz/foo.jsonld
--------------------------------------------------------------------------------
/tools/dspl2viz/static/dspl2viz.css:
--------------------------------------------------------------------------------
1 | body {
2 | display: flex;
3 | height: 100vh;
4 | flex-flow: column wrap;
5 | }
6 | #vegalite-container {
7 | width: 30vw;
8 | height: 50vh;
9 | display: block;
10 | }
11 |
12 | #vegalite-input {
13 | width: 100%;
14 | height: 100%;
15 | }
16 |
17 | #dataset-explorer {
18 | width: 30vw;
19 | height: 49vh;
20 | vertical-align: top;
21 | border-style: solid;
22 | border-width: 1px;
23 | }
24 | #measure-explorer {
25 | max-height: 49%;
26 | overflow: scroll;
27 | }
28 | #dimension-explorer {
29 | max-height: 49%;
30 | overflow: scroll;
31 | }
32 |
33 | #chart-container {
34 | width: 65vw;
35 | height: 99vh;
36 | vertical-align: top;
37 | border-style: solid;
38 | border-width: 1px;
39 | }
40 |
41 | #chart {
42 | width: 100%;
43 | height: 100%;
44 | }
45 |
--------------------------------------------------------------------------------
/tools/dspl2viz/static/dspl2viz.js:
--------------------------------------------------------------------------------
1 | var View;
2 |
3 | function drawChart(event) {
4 | try {
5 | var spec = JSON.parse(event.target.value);
6 | spec.height = document.querySelector('#chart-container').scrollHeight;
7 | spec.width = document.querySelector('#chart-container').scrollWidth;
8 | event.target.value = JSON.stringify(spec, null, 2);
9 | vegaEmbed("#chart", spec)
10 | // result.view provides access to the Vega View API
11 | .then(result => {View =result})
12 | .catch(console.warn);
13 | } catch(e) {
14 | console.warn(e);
15 | }
16 | }
17 |
18 | var DatasetId = 'file:///usr/local/google/home/nkrishnaswami/dspl/samples/bls/unemployment/bls-unemployment.jsonld';
19 | var SliceId = '#statesUnemploymentMonthly';
20 | var MeasureId = '#unemployment_rate';
21 | var DimValues = {
22 | seasonality: 'S',
23 | state: 'ST0100000000000',
24 | };
25 |
26 | function setSpec() {
27 | var vlSpec = {
28 | "$schema": "https://vega.github.io/schema/vega-lite/v4.0.0-beta.10.json",
29 | "description": "A simple bar chart with embedded data.",
30 | "autosize": {
31 | "type": "fit",
32 | "resize": true
33 | },
34 | "data": {
35 | "url": "/api/series",
36 | "format": {
37 | "type": "csv"
38 | }
39 | },
40 | "mark": "line",
41 | "encoding": {
42 | "x": {
43 | "field": "month",
44 | "type": "ordinal"
45 | },
46 | "y": {
47 | "field": "unemployment_rate",
48 | "type": "quantitative"
49 | },
50 | "color": {
51 | "field": "state",
52 | "type": "ordinal"
53 | }
54 | }
55 | }
56 | vlSpec.data.url += '?dataset=' + encodeURIComponent(DatasetId);
57 | vlSpec.data.url += '&slice=' + encodeURIComponent(SliceId);
58 | vlSpec.data.url += '&measure=' + encodeURIComponent(MeasureId);
59 | vlSpec.data.url += '&dimension_value=';
60 | for (var idx = 0; idx < Object.keys(DimValues).length; ++idx) {
61 | if (idx != 0) {
62 | vlSpec.data.url += ',';
63 | }
64 | var key = Object.keys(DimValues)[idx];
65 | var val = DimValues[key];
66 | vlSpec.data.url += encodeURIComponent(`#${key}:${val}`);
67 | }
68 |
69 | var input = document.querySelector("#vegalite-input");
70 | input.value = JSON.stringify(vlSpec, null, 2);
71 | input.dispatchEvent(new Event('change'));
72 | }
73 |
74 | function processMeasures(data) {
75 | let measure_container = document.querySelector('#measure-explorer');
76 | measure_container.innerText = "Measures:";
77 | console.log(measure_container);
78 | let ul = document.createElement('ul');
79 | measure_container.appendChild(ul);
80 | for(let measure of data) {
81 | let id = $('', { href: measure['@id'] }).prop('hash').substring(1);
82 | console.log("Processing ", measure.name, 'id:', id, measure);
83 | let li = document.createElement('ul');
84 | li.innerText = measure.name;
85 | if (measure.description) {
86 | li.title = measure.description;
87 | }
88 | li.addEventListener('click', function (event) {
89 | for(var elt of ul.children) {
90 | elt.style.fontWeight = 'normal';
91 | }
92 | event.target.style.fontWeight = 'bold';
93 | MeasureId = '#' + id;
94 | setSpec();
95 | });
96 | ul.appendChild(li);
97 | }
98 | }
99 |
100 | function processSlices(data) {
101 | let slice_container = document.querySelector('#slice-explorer');
102 | slice_container.innerText = "Slices:";
103 | console.log(slice_container);
104 | let ul = document.createElement('ul');
105 | slice_container.appendChild(ul);
106 | for(let slice of data) {
107 | let id = $(' ', { href: slice['@id'] }).prop('hash').substring(1);
108 | console.log("Processing ", slice.name, 'id:', id, slice);
109 | let li = document.createElement('ul');
110 | li.innerText = slice.name;
111 | if (slice.description) {
112 | li.title = slice.description;
113 | }
114 | li.addEventListener('click', function (event) {
115 | for(var elt of ul.children) {
116 | elt.style.fontWeight = 'normal';
117 | }
118 | event.target.style.fontWeight = 'bold';
119 | SliceId = '#' + id;
120 | setSpec();
121 | });
122 | ul.appendChild(li);
123 | }
124 | }
125 |
126 |
127 |
128 |
129 | function processDimensionValues(dimension) {
130 | let id = $(' ', { href: dimension['@id'] }).prop('hash').substring(1);
131 | console.log("Processing ", dimension.name, 'id:', id);
132 | let div = document.createElement('div');
133 | let dimension_container = document.querySelector('#dimension-explorer');
134 | dimension_container.appendChild(div);
135 | div.innerText = dimension.name;
136 | if (dimension.description) {
137 | div.title = dimension.description;
138 | }
139 | let ul = document.createElement('ul');
140 | div.appendChild(ul);
141 | dimension.codes = {};
142 | for(let dimensionValue of dimension.codeList) {
143 | dimension.codes[dimensionValue.codeValue] = dimensionValue;
144 | let li = document.createElement('li');
145 | li.innerText = dimensionValue.name;
146 | if (dimensionValue.description) {
147 | li.title = dimensionValue.description;
148 | }
149 | li.addEventListener('click', function (event) {
150 | for(var elt of ul.children) {
151 | elt.style.fontWeight = 'normal';
152 | }
153 | event.target.style.fontWeight = 'bold';
154 | DimValues[id] = dimensionValue.codeValue;
155 | setSpec()
156 | });
157 | ul.appendChild(li);
158 | }
159 | }
160 |
161 |
162 | function processDimensions(data) {
163 | for(let dimension of data) {
164 | if (dimension.name != 'States' && dimension.name != 'Seasonality') {
165 | continue;
166 | }
167 | $.getJSON('/api/dimension_values?dataset=file:///usr/local/google/home/nkrishnaswami/dspl/samples/bls/unemployment/bls-unemployment.jsonld&dimension='+encodeURIComponent(dimension['@id']),
168 | processDimensionValues);
169 | }
170 | }
171 |
172 | document.querySelector("#vegalite-input").addEventListener('change', drawChart);
173 | setSpec();
174 |
175 |
176 | $.getJSON('/api/measures?dataset=' + encodeURIComponent(DatasetId), processMeasures);
177 | // $.getJSON('/api/slices_for_measure?dataset=' + encodeURIComponent(DatasetId), processMeasures);
178 | $.getJSON('/api/dimensions?dataset=' + encodeURIComponent(DatasetId), processDimensions);
179 |
--------------------------------------------------------------------------------
/tools/dspl2viz/templates/dspl2viz.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | DSPL 2 Dataset Visualizer
4 |
5 |
6 |
7 |
8 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/tools/dspltools/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 1.0
2 | Name: dspltools
3 | Version: 0.4.3
4 | Summary: Suite of command-line tools for generating DSPL datasets
5 | Home-page: http://code.google.com/apis/publicdata/docs/dspltools.html
6 | Author: Benjamin Yolken
7 | Author-email: yolken@google.com
8 | License: UNKNOWN
9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 |
--------------------------------------------------------------------------------
/tools/dspltools/README.rst:
--------------------------------------------------------------------------------
1 | Documentation
2 | =============
3 | See https://developers.google.com/public-data/docs/dspltools for documentation.
4 |
5 |
6 | Release Notes
7 | =============
8 | *** v0.1 ***
9 | Release date: April 11, 2011
10 |
11 | Description:
12 | ------------
13 | DSPL Tools released!
14 |
15 |
16 | *** v0.2 ***
17 | Release date: April 18, 2011
18 |
19 | Description:
20 | ------------
21 | Enhanced DSPL Check by adding significant functionality beyond XML schema
22 | validation, including the checking of internal dataset references and CSV
23 | file structure.
24 |
25 |
26 | *** v0.2.1 ***
27 | Release date: April 21, 2011
28 |
29 | Description:
30 | ------------
31 | Use column ID to distinguish between years and integers in dsplgen.
32 |
33 |
34 | *** v0.3 ***
35 | Release date: April 26, 2011
36 |
37 | Description:
38 | ------------
39 | Extended DSPL Check to validate dataset CSV data (sorting, instance IDs)
40 | and slice / table links.
41 |
42 | Added concept hierarchy support to DSPL Gen.
43 |
44 |
45 | *** v0.3.5 ***
46 | Release date: May 4, 2011
47 |
48 | Description:
49 | ------------
50 | Extended DSPL Check to support checking of:
51 | - Table column / concept type consistency
52 | - Date formats
53 | - Formatting of float and integer CSV values
54 | - Datasets where CSV columns are in different order than columns in table
55 | metadata
56 |
57 | Improved error messages when files can't be found or opened.
58 |
59 | Fixed bug in DSPL Gen naming of external concepts.
60 |
61 |
62 | *** v0.3.6 ***
63 | Release date: May 6, 2011
64 |
65 | Description:
66 | ------------
67 | Added 'checking_level' option to DSPL Check.
68 |
69 | CSV files are now loaded in 'universal newline mode' to reduce risk of parsing
70 | problems.
71 |
72 |
73 | *** v0.3.7 ***
74 | Release date: May 6, 2011
75 |
76 | Description:
77 | ------------
78 | Added zipped dataset checking to DSPL Check.
79 |
80 | Strip whitespace from CSV values (to mimic behavior of PDE importer).
81 |
82 |
83 | *** v0.4 ***
84 | Release date: May 20, 2011
85 |
86 | Description:
87 | ------------
88 | Added topic reference checking to DSPL Check.
89 |
90 | Changed schema validation process to use local XML schema files instead of
91 | calling out to W3C servers.
92 |
93 |
94 | *** v0.4.1 ***
95 | Release date: June 2, 2011
96 |
97 | Description:
98 | ------------
99 | Added test for trivial slices to DSPL Check.
100 |
101 | Improved behavior of DSPL Check when empty tables are encountered.
102 |
103 |
104 | *** v0.4.2 ***
105 | Release date: June 20, 2011
106 |
107 | Description:
108 | ------------
109 | Changed implementation of default csv_data_source to use in-memory Python
110 | objects instead of sqlite. The latter can still be used by setting the '-t'
111 | option of dsplgen to 'csv_sqlite'.
112 |
113 |
114 | *** v0.4.3 ***
115 | Release date: November 3, 2011
116 |
117 | Description:
118 | ------------
119 | Fixed some bugs around multi-level concept hierarchies.
120 |
121 | Added total_val parameter to support pre-computed rollups in data.
122 |
123 |
124 | *** v0.5.0 ***
125 | Release date: January 22, 2019
126 |
127 | Description:
128 | ------------
129 | Switch to lxml for XML parsing and schema validation.
130 |
131 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/invalid_dspl/countries.csv:
--------------------------------------------------------------------------------
1 | country,name,latitude,longitude
2 | AD,Andorra,42.546245,1.601554
3 | AF,Afghanistan,33.93911,67.709953
4 | AI,Anguilla,18.220554,-63.068615
5 | AL,Albania,41.153332,20.168331
6 | US,United States,37.09024,-95.712891
7 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/invalid_dspl/country_slice.csv:
--------------------------------------------------------------------------------
1 | country,year,population
2 | AF,1960,9616353
3 | AF,1961,9799379
4 | AF,1963,10188299
5 | AF,1962,9989846
6 | AD,1960,8616353
7 | AD,1961,8799379
8 | AD,1962,8989846
9 | AD,1963,9188299
10 | US,1960,19616353
11 | UX,1961,19799379
12 | US,1962,392039023
13 | US,1963,110188299
14 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/invalid_dspl/invalid_dspl.xml:
--------------------------------------------------------------------------------
1 |
2 |
32 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 | My statistics
47 |
48 |
49 | Some very interesting statistics about countries
50 |
51 |
52 | http://www.stats-bureau.com/mystats/info.html
53 |
54 |
55 |
56 |
57 |
58 | Bureau of Statistics
59 |
60 |
61 | http://www.stats-bureau.com
62 |
63 |
64 |
65 |
66 |
67 |
68 | Geography
69 |
70 |
71 |
72 |
73 | Social indicators
74 |
75 |
76 |
77 | Population indicators
78 |
79 |
80 |
81 |
82 | Poverty & income
83 |
84 |
85 |
86 |
87 | Health
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 | Population
99 |
100 |
101 | Size of the resident population.
102 |
103 |
104 |
105 |
106 |
107 |
108 |
112 |
113 |
114 |
115 | Country
116 |
117 |
118 | My list of countries
119 |
120 |
121 |
122 |
123 |
124 | Country name
125 |
126 | The official name of the country
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 | countries.csv
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 | country_slice.csv
161 |
162 |
163 |
164 |
165 |
166 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/valid_dataset/countries.csv:
--------------------------------------------------------------------------------
1 | country,name,latitude,longitude
2 | AD,Andorra,42.546245,1.601554
3 | AF,Afghanistan,33.93911,67.709953
4 | AI,Anguilla,18.220554,-63.068615
5 | AL,Albania,41.153332,20.168331
6 | US,United States,37.09024,-95.712891
7 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/valid_dataset/country_slice.csv:
--------------------------------------------------------------------------------
1 | country,year,population
2 | AF,1960,9616353
3 | AF,1961,9799379
4 | AF,1963,10188299
5 | AF,1962,9989846
6 | AD,1960,8616353
7 | AD,1961,8799379
8 | AD,1962,8989846
9 | AD,1963,9188299
10 | US,1960,19616353
11 | US,1961,19799379
12 | US,1962,392039023
13 | US,1963,110188299
14 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/valid_dataset/valid_dataset.xml:
--------------------------------------------------------------------------------
1 |
2 |
32 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 | My statistics
47 |
48 |
49 | Some very interesting statistics about countries
50 |
51 |
52 | http://www.stats-bureau.com/mystats/info.html
53 |
54 |
55 |
56 |
57 |
58 | Bureau of Statistics
59 |
60 |
61 | http://www.stats-bureau.com
62 |
63 |
64 |
65 |
66 |
67 |
68 | Geography
69 |
70 |
71 |
72 |
73 | Social indicators
74 |
75 |
76 |
77 | Population indicators
78 |
79 |
80 |
81 |
82 | Poverty & income
83 |
84 |
85 |
86 |
87 | Health
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 | Population
99 |
100 |
101 | Size of the resident population.
102 |
103 |
104 |
105 |
106 |
107 |
108 |
112 |
113 |
114 |
115 | Country
116 |
117 |
118 | My list of countries
119 |
120 |
121 |
122 |
123 |
124 | Country name
125 |
126 | The official name of the country
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 | countries.csv
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 | country_slice.csv
161 |
162 |
163 |
164 |
165 |
166 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_advanced.csv:
--------------------------------------------------------------------------------
1 | date[type=date;format=MM/dd/yyyy],first_category[slice_role=dimension;rollup=true;total_val=total],second_category[slice_role=dimension;rollup=true],first_value[slice_role=metric;type=integer],second_value[slice_role=metric;type=float]
2 | 1/1/2010,red,tall,10,23
3 | 1/1/2010,red,short,90,1
4 | 1/1/2010,blue,tall,12,31
5 | 1/1/2010,blue,short,21,231
6 | 1/1/2010,green,short,20,212
7 | 1/1/2010,total,tall,10,98
8 | 1/1/2010,total,short,-30,39
9 | 1/2/2010,red,tall,10,91
10 | 1/2/2010,red,short,32,123
11 | 1/2/2010,blue,tall,22,121
12 | 1/2/2010,blue,short,20,32
13 | 1/2/2010,green,short,1,19
14 | 1/2/2010,total,short,2,10
15 | 1/3/2010,red,short,10,34
16 | 1/3/2010,red,tall,10,34
17 | 1/3/2010,blue,short,93,21
18 | 1/3/2010,blue,tall,39,12
19 | 1/3/2010,green,short,31,31
20 | 1/3/2010,green,tall,21,31
21 | 1/3/2010,total,short,13,123
22 | 1/4/2010,red,tall,40,21
23 | 1/4/2010,red,short,22,12
24 | 1/4/2010,blue,tall,39,21
25 | 1/4/2010,blue,short,10,12
26 | 1/4/2010,green,tall,30,23
27 | 1/4/2010,green,short,10,123
28 | 1/4/2010,total,tall,-10,23
29 | 1/4/2010,total,short,31,661
30 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_hierarchies.csv:
--------------------------------------------------------------------------------
1 | year[type=date;format=yyyy],first_category[parent=third_category;rollup=true],second_category[rollup=true],third_category,first_value[type=integer],second_value[type=float]
2 | 2010,red,tall,bucket1,10,23
3 | 2010,red,short,bucket1,90,1
4 | 2010,blue,tall,bucket1,12,31
5 | 2010,blue,short,bucket1,21,231
6 | 2010,green,tall,bucket2,12,31
7 | 2010,green,short,bucket2,11,33
8 | 2011,red,tall,bucket1,12,23
9 | 2011,red,short,bucket1,93,1
10 | 2011,blue,tall,bucket1,15,31
11 | 2011,blue,short,bucket1,25,231
12 | 2011,green,tall,bucket2,13,31
13 | 2011,green,short,bucket2,15,33
14 | 2012,red,tall,bucket1,20,23
15 | 2012,red,short,bucket1,110,1
16 | 2012,blue,tall,bucket1,55,31
17 | 2012,blue,short,bucket1,77,231
18 | 2012,green,tall,bucket2,77,31
19 | 2012,green,short,bucket2,88,33
20 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_simple.csv:
--------------------------------------------------------------------------------
1 | date,first_category,second_category,first_value,second_value
2 | 1/1/2010,red,tall,10,23
3 | 1/1/2010,red,short,90,1
4 | 1/1/2010,blue,tall,12,31
5 | 1/1/2010,blue,short,21,231
6 | 1/1/2010,green,short,20,212
7 | 1/2/2010,red,tall,10,91
8 | 1/2/2010,red,short,32,123
9 | 1/2/2010,blue,tall,22,121
10 | 1/2/2010,blue,short,20,32
11 | 1/2/2010,green,short,1,19
12 | 1/3/2010,red,short,10,34
13 | 1/3/2010,red,tall,10,34
14 | 1/3/2010,blue,short,93,21
15 | 1/3/2010,blue,tall,39,12
16 | 1/3/2010,green,short,31,31
17 | 1/3/2010,green,tall,21,31
18 | 1/4/2010,red,tall,40,21
19 | 1/4/2010,red,short,22,12
20 | 1/4/2010,blue,tall,39,21
21 | 1/4/2010,blue,short,10,12
22 | 1/4/2010,green,tall,30,23
23 | 1/4/2010,green,short,10,123
24 |
--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_yearly_data.csv:
--------------------------------------------------------------------------------
1 | year,first_category,second_category,first_value,second_value
2 | 2010,red,tall,10,23.5
3 | 2010,red,short,90,1.1
4 | 2010,blue,tall,12,31.3
5 | 2010,blue,short,21,231
6 | 2010,green,short,20,212
7 | 2011,red,tall,10,91
8 | 2011,red,short,32,123
9 | 2011,blue,tall,22,121
10 | 2011,blue,short,20,32
11 | 2011,green,short,1,19
12 | 2012,red,short,10,34
13 | 2012,red,tall,10,34.3
14 | 2012,blue,short,93,21
15 | 2012,blue,tall,39,12
16 | 2012,green,short,31,31
17 | 2012,green,tall,21,31
18 | 2013,red,tall,40,21
19 | 2013,red,short,22,12.55
20 | 2013,blue,tall,39,21
21 | 2013,blue,short,10,12
22 | 2013,green,tall,30,23
23 | 2013,green,short,10,123
24 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_data_source_sqlite_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of csv_data_source_sqlite module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken
'
13 |
14 | import unittest
15 |
16 | import csv_data_source_sqlite
17 | import csv_sources_test_suite
18 |
19 |
20 | class CSVDataSourceSqliteTests(csv_sources_test_suite.CSVSourcesTests):
21 | """Tests of the CSVDataSourceSqlite object."""
22 |
23 | def setUp(self):
24 | self.data_source_class = csv_data_source_sqlite.CSVDataSourceSqlite
25 |
26 | super(CSVDataSourceSqliteTests, self).setUp()
27 |
28 |
29 | class CSVDataSourceSqliteErrorTests(
30 | csv_sources_test_suite.CSVSourcesErrorTests):
31 | """Tests of the CSVDataSourceSqlite object under various error conditions."""
32 |
33 | def setUp(self):
34 | self.data_source_class = csv_data_source_sqlite.CSVDataSourceSqlite
35 |
36 | super(CSVDataSourceSqliteErrorTests, self).setUp()
37 |
38 |
39 | if __name__ == '__main__':
40 | unittest.main()
41 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_data_source_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of csv_data_source module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import unittest
15 |
16 | import csv_data_source
17 | import csv_sources_test_suite
18 |
19 |
20 | class CSVDataSourceTests(csv_sources_test_suite.CSVSourcesTests):
21 | """Tests of the CSVDataSource object."""
22 |
23 | def setUp(self):
24 | self.data_source_class = csv_data_source.CSVDataSource
25 |
26 | super(CSVDataSourceTests, self).setUp()
27 |
28 |
29 | class CSVDataSourceErrorTests(csv_sources_test_suite.CSVSourcesErrorTests):
30 | """Tests of the CSVDataSource object under various error conditions."""
31 |
32 | def setUp(self):
33 | self.data_source_class = csv_data_source.CSVDataSource
34 |
35 | super(CSVDataSourceErrorTests, self).setUp()
36 |
37 |
38 | if __name__ == '__main__':
39 | unittest.main()
40 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_sources_test_suite.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """A set of tests useful for CSV data sources."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import StringIO
15 | import unittest
16 |
17 | import data_source
18 |
19 |
20 | _TEST_CSV_CONTENT = (
21 | """date[type=date;format=yyyy-MM-dd],category1,category2[concept=geo:us_state;parent=category3;total_val=total],category3,metric1[extends=quantity:ratio;slice_role=metric],metric2[aggregation=avg],metric3[aggregation=count]
22 | 1980-01-01,red,california,west,89,321,71.21
23 | 1981-01-01,red,california,west,99,231,391.2
24 | 1982-01-01,blue,maine's,east,293,32,2.31
25 | 1983-01-01,blue,california,west,293,12,10.3
26 | 1984-01-01,red,maine's,east,932,48,10.78
27 | 1984-01-01,red,oregon,west,32,33,-14.34
28 | 1985-01-01,red,total,east,21,98,87.0
29 | 1986-01-01,red,total,west,33,90,-10.1""")
30 |
31 |
32 | class CSVSourcesTests(unittest.TestCase):
33 | """Basic tests of a CSV DataSource object."""
34 |
35 | def setUp(self):
36 | self.csv_file = StringIO.StringIO(_TEST_CSV_CONTENT)
37 | self.data_source_obj = self.data_source_class(self.csv_file, verbose=False)
38 |
39 | def tearDown(self):
40 | self.data_source_obj.Close()
41 | self.csv_file.close()
42 |
43 | def testColumnBundle(self):
44 | """Test that column bundle is properly generated."""
45 | column_bundle = self.data_source_obj.GetColumnBundle()
46 |
47 | self.assertEqual(
48 | [c.column_id for c in column_bundle.GetColumnIterator()],
49 | ['date', 'category1', 'category2', 'category3',
50 | 'metric1', 'metric2', 'metric3'])
51 | self.assertEqual(
52 | [c.data_type for c in column_bundle.GetColumnIterator()],
53 | ['date', 'string', 'string', 'string', 'integer', 'integer', 'float'])
54 | self.assertEqual(
55 | [c.data_format for c in column_bundle.GetColumnIterator()],
56 | ['yyyy-MM-dd', '', '', '', '', '', ''])
57 | self.assertEqual(
58 | [c.concept_ref for c in column_bundle.GetColumnIterator()],
59 | ['time:day', '', 'geo:us_state', '', '', '', ''])
60 | self.assertEqual(
61 | [c.concept_extension for c in column_bundle.GetColumnIterator()],
62 | ['', '', '', '', 'quantity:ratio', '', ''])
63 | self.assertEqual(
64 | [c.slice_role for c in column_bundle.GetColumnIterator()],
65 | ['dimension', 'dimension', 'dimension', 'dimension', 'metric', 'metric',
66 | 'metric'])
67 | self.assertEqual(
68 | [c.rollup for c in column_bundle.GetColumnIterator()],
69 | [False, False, False, True, False, False, False])
70 | self.assertEqual(
71 | [c.parent_ref for c in column_bundle.GetColumnIterator()],
72 | ['', '', 'category3', '', '', '', ''])
73 | self.assertEqual(
74 | [c.total_val for c in column_bundle.GetColumnIterator()],
75 | ['', '', 'total', '', '', '', ''])
76 |
77 | def testEntityTableGeneration(self):
78 | """Test that single-concept tables are generated correctly."""
79 | table_data = self.data_source_obj.GetTableData(
80 | data_source.QueryParameters(
81 | data_source.QueryParameters.CONCEPT_QUERY, ['category2']))
82 |
83 | # Make sure quotes are properly escaped
84 | self.assertEqual(table_data.rows,
85 | [['california'], ['maine\'s'], ['oregon']])
86 |
87 | def testMultiEntityTableGeneration(self):
88 | """Test that multi-concept tables are generated correctly."""
89 | table_data = self.data_source_obj.GetTableData(
90 | data_source.QueryParameters(
91 | data_source.QueryParameters.CONCEPT_QUERY,
92 | ['category2', 'category3']))
93 |
94 | # Make sure quotes are properly escaped
95 | self.assertEqual(table_data.rows,
96 | [['california', 'west'], ['maine\'s', 'east'],
97 | ['oregon', 'west']])
98 |
99 | def testSliceTableGeneration(self):
100 | """Test that slice tables are generated correctly."""
101 | table_data = self.data_source_obj.GetTableData(
102 | data_source.QueryParameters(
103 | data_source.QueryParameters.SLICE_QUERY,
104 | ['metric3', 'category2', 'metric1', 'metric2']))
105 |
106 | self.assertEqual(
107 | table_data.rows,
108 | [[3, 'california', 89 + 99 + 293, (321.0 + 231.0 + 12.0) / 3.0],
109 | [2, 'maine\'s', 293 + 932, (32.0 + 48.0) / 2.0],
110 | [1, 'oregon', 32, 33]])
111 |
112 | def testTotalsSliceTableGeneration(self):
113 | """Test that slice tables are generated correctly with total values."""
114 | table_data = self.data_source_obj.GetTableData(
115 | data_source.QueryParameters(
116 | data_source.QueryParameters.SLICE_QUERY,
117 | ['category1', 'metric1', 'metric2', 'metric3']))
118 |
119 | self.assertEqual(
120 | table_data.rows,
121 | [['red', 21 + 33, (98.0 + 90.0) / 2.0, 2]])
122 |
123 |
124 | class CSVSourcesErrorTests(unittest.TestCase):
125 | """Tests of a CSV DataSource object for error cases."""
126 |
127 | def setUp(self):
128 | pass
129 |
130 | def testBadHeaderKey(self):
131 | """Test that unknown key in header generates error."""
132 | csv_file = StringIO.StringIO(
133 | 'date[unknown_key=unknown_value],metric\n1990,23232')
134 |
135 | self.assertRaises(
136 | data_source.DataSourceError,
137 | self.data_source_class,
138 | csv_file, False)
139 |
140 | csv_file.close()
141 |
142 | def testBadDataType(self):
143 | """Test that unknown type value generates error."""
144 | csv_file = StringIO.StringIO('date[type=unknown_type],metric\n1990,23232')
145 |
146 | self.assertRaises(
147 | data_source.DataSourceError,
148 | self.data_source_class,
149 | csv_file, False)
150 |
151 | csv_file.close()
152 |
153 | def testBadAggregation(self):
154 | """Test that unknown aggregation operator generates error."""
155 | csv_file = StringIO.StringIO(
156 | 'date[aggregation=unknown_aggregation],metric\n1990,23232')
157 |
158 | self.assertRaises(
159 | data_source.DataSourceError,
160 | self.data_source_class,
161 | csv_file, False)
162 |
163 | csv_file.close()
164 |
165 | def testBadSliceRoleKey(self):
166 | """Test that unknown value for slice_role generates error."""
167 | csv_file = StringIO.StringIO(
168 | 'date[slice_role=unknown_role],metric\n1990,23232')
169 |
170 | self.assertRaises(
171 | data_source.DataSourceError,
172 | self.data_source_class,
173 | csv_file, False)
174 |
175 | csv_file.close()
176 |
177 | def testBadColumnID(self):
178 | """Test that a badly formatted column ID generates error."""
179 | csv_file = StringIO.StringIO('my date[type=date],metric\n1990,23232')
180 |
181 | self.assertRaises(
182 | data_source.DataSourceError,
183 | self.data_source_class,
184 | csv_file, False)
185 |
186 | csv_file.close()
187 |
188 | def testBadDataRow(self):
189 | """Test that row with wrong number of entries causes error."""
190 | csv_file = StringIO.StringIO(
191 | 'date,column\n01/01/1990,abcd,1234')
192 |
193 | self.assertRaises(
194 | data_source.DataSourceError,
195 | self.data_source_class,
196 | csv_file, False)
197 |
198 | csv_file.close()
199 |
200 | def testBadParentReference(self):
201 | """Test that illegal parent reference causes error."""
202 | csv_file = StringIO.StringIO(
203 | 'date,column[parent=unknown_parent]\n01/01/1990,abcd')
204 |
205 | self.assertRaises(
206 | data_source.DataSourceError,
207 | self.data_source_class,
208 | csv_file, False)
209 |
210 | csv_file.close()
211 |
212 | def testMultipleParents(self):
213 | """Test that having multiple parent instances causes error."""
214 | csv_file = StringIO.StringIO(
215 | 'date,column1[parent=column2],column2,column3\n'
216 | '1/1/2001,val1,parent1,323\n1/2/2001,val1,parent2,123')
217 |
218 | self.assertRaises(
219 | data_source.DataSourceError,
220 | self.data_source_class,
221 | csv_file, False)
222 |
223 | csv_file.close()
224 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_utilities.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Utility functions useful for CSV data sources."""
10 | from __future__ import print_function
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 |
15 | import csv
16 | import re
17 | import string
18 | import warnings
19 |
20 | import data_source
21 |
22 |
23 | def _HeaderToColumn(header_string):
24 | """Parse the header string for a column.
25 |
26 | Args:
27 | header_string: The complete string for the column header
28 |
29 | Returns:
30 | A DataColumn object populated based on the header data
31 |
32 | Raises:
33 | DataSourceError: If there are any errors in parsing, e.g. if an unrecognized
34 | key is found.
35 | """
36 | # The column id must be at least one character long, and cannot contain the
37 | # characters '[', ']', ';', or whitespace
38 | parameters_match = re.match(
39 | '^([^\]\[;\s]+)(?:\[(.*)\]){0,1}$',
40 | header_string.strip().replace('"', ''))
41 |
42 | if not parameters_match:
43 | raise data_source.DataSourceError(
44 | 'Formatting error for header string: %s' % header_string)
45 |
46 | column_id = parameters_match.group(1)
47 | column = data_source.DataSourceColumn(column_id, internal_parameters={})
48 |
49 | if parameters_match.group(2):
50 | # Parse the column parameters
51 | key_value_pairs = parameters_match.group(2).split(';')
52 |
53 | for key_value_pair in key_value_pairs:
54 | try:
55 | [key, value] = key_value_pair.split('=')
56 | except ValueError:
57 | raise data_source.DataSourceError(
58 | 'Formatting error for header string: %s' % header_string)
59 |
60 | # Map the key to the appropriate field of the DataSourceColumn object
61 | if key == 'type':
62 | if value not in ['date', 'float', 'integer', 'string']:
63 | raise data_source.DataSourceError(
64 | 'Unknown data type for column %s: %s' %
65 | (column.column_id, value))
66 |
67 | column.data_type = value
68 | elif key == 'format':
69 | column.data_format = value
70 | elif key == 'concept':
71 | column.concept_ref = value
72 | elif key == 'extends':
73 | column.concept_extension = value
74 | elif key == 'parent':
75 | column.parent_ref = value
76 | elif key == 'slice_role':
77 | role_value = value.lower()
78 |
79 | if role_value not in ['dimension', 'metric']:
80 | raise data_source.DataSourceError(
81 | 'Unrecognized slice_roll in column %s: %s' %
82 | (column.column_id, value))
83 | else:
84 | column.slice_role = role_value
85 | elif key == 'rollup':
86 | if value.lower() == 'true':
87 | column.rollup = True
88 | elif value.lower() == 'false':
89 | column.rollup = False
90 | else:
91 | raise data_source.DataSourceError(
92 | 'Unrecognized boolean value in column %s: %s' %
93 | (column.column_id, value))
94 | elif key == 'total_val':
95 | column.total_val = value
96 | elif key == 'dropif':
97 | column.internal_parameters['dropif_val'] = value
98 | elif key == 'zeroif':
99 | column.internal_parameters['zeroif_val'] = value
100 | elif key == 'aggregation':
101 | if string.lower(value) not in ['sum', 'max', 'min', 'avg', 'count']:
102 | raise data_source.DataSourceError(
103 | 'Unknown aggregation for column %s: %s' %
104 | (column.column_id, value))
105 |
106 | column.internal_parameters['aggregation'] = value
107 | else:
108 | raise data_source.DataSourceError(
109 | 'Unknown parameter for column %s: %s' %
110 | (column.column_id, key))
111 | return column
112 |
113 |
114 | def ConstructColumnBundle(csv_file, verbose=True):
115 | """Construct a ColumnBundle from the header information in a CSV file.
116 |
117 | Args:
118 | csv_file: The complete string for the column header
119 | verbose: Print out extra information to stdout
120 |
121 | Returns:
122 | A data_source.ColumnBundle object populated based on the CSV header
123 |
124 | Raises:
125 | DataSourceError: If there are any parsing errors or data
126 | inconsistencies
127 | """
128 | # Get the first and second rows of the CSV
129 | header_csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
130 | header_row_values = next(header_csv_reader)
131 | second_row_values = next(header_csv_reader)
132 | csv_file.seek(0)
133 |
134 | # Check that second row is properly formatted
135 | if len(header_row_values) != len(second_row_values):
136 | raise data_source.DataSourceError(
137 | 'Number of columns in row 2 (%d) does not match number '
138 | 'expected (%d)' % (len(second_row_values), len(header_row_values)))
139 |
140 | column_bundle = data_source.DataSourceColumnBundle()
141 |
142 | for header_element in header_row_values:
143 | column_bundle.AddColumn(_HeaderToColumn(header_element))
144 |
145 | num_date_columns = 0
146 | has_metric_column = False
147 | column_ids = [column.column_id for column in
148 | column_bundle.GetColumnIterator()]
149 |
150 | # Iterate through columns, populating and refining DataSourceColumn
151 | # parameters as necessary
152 | for c, column in enumerate(column_bundle.GetColumnIterator()):
153 | if verbose:
154 | print('\nEvaluating column %s' % column.column_id)
155 |
156 | # Check data type
157 | if not column.data_type:
158 | column.data_type = (
159 | data_source.GuessDataType(second_row_values[c], column.column_id))
160 |
161 | if verbose:
162 | print('Guessing that column %s is of type %s' % (
163 | column.column_id, column.data_type))
164 |
165 | # Check slice type
166 | if not column.slice_role:
167 | if column.data_type == 'integer' or column.data_type == 'float':
168 | column.slice_role = 'metric'
169 | else:
170 | column.slice_role = 'dimension'
171 |
172 | if verbose:
173 | print('Guessing that column %s is a %s' % (
174 | column.column_id, column.slice_role))
175 |
176 | # Check aggregation
177 | if column.slice_role == 'metric':
178 | has_metric_column = True
179 |
180 | if 'aggregation' not in column.internal_parameters:
181 | column.internal_parameters['aggregation'] = 'SUM'
182 |
183 | if verbose:
184 | print('Guessing that column %s should be aggregated by %s' % (
185 | column.column_id, column.internal_parameters['aggregation']))
186 |
187 | # Check parent
188 | if column.parent_ref:
189 | if column.parent_ref not in column_ids:
190 | raise data_source.DataSourceError(
191 | 'Column %s references a parent not defined in this dataset: %s' %
192 | (column.column_id, column.parent_ref))
193 |
194 | parent_column = column_bundle.GetColumnByID(column.parent_ref)
195 |
196 | if not parent_column.rollup:
197 | parent_column.rollup = True
198 |
199 | if verbose:
200 | print('Making column %s rollup since it is a parent to column %s'
201 | % (parent_column.column_id, column.column_id))
202 |
203 | # Check date format and concept
204 | if column.data_type == 'date':
205 | num_date_columns += 1
206 |
207 | if not column.data_format:
208 | column.data_format = (
209 | data_source.GuessDateFormat(second_row_values[c]))
210 |
211 | if not column.concept_ref:
212 | column.concept_ref = (
213 | data_source.GuessDateConcept(column.data_format))
214 |
215 | if verbose:
216 | print('Guessing that column %s is formatted as %s and '
217 | 'corresponds to %s' % (
218 | column.column_id, column.data_format, column.concept_ref))
219 |
220 | # Warn user if their file will not produce interesting DSPL visualizations
221 | if num_date_columns == 0:
222 | warnings.warn('Input file does not have a date column',
223 | data_source.DataSourceWarning)
224 |
225 | elif num_date_columns > 1:
226 | warnings.warn('Input file has more than one date column',
227 | data_source.DataSourceWarning)
228 |
229 | if not has_metric_column:
230 | warnings.warn('Input file does not have any metrics',
231 | data_source.DataSourceWarning)
232 |
233 | return column_bundle
234 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/data_source_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of data_source module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import unittest
15 |
16 | import data_source
17 |
18 |
19 | class DataSourceColumnBundleTests(unittest.TestCase):
20 | """Tests of DataSourceColumnBundle object."""
21 |
22 | def setUp(self):
23 | self.column_bundle = data_source.DataSourceColumnBundle(
24 | [data_source.DataSourceColumn(column_id='col1'),
25 | data_source.DataSourceColumn(column_id='col2'),
26 | data_source.DataSourceColumn(column_id='col3')])
27 |
28 | def testAddColumn(self):
29 | self.column_bundle.AddColumn(
30 | data_source.DataSourceColumn(column_id='col4'))
31 | self.assertEqual(self.column_bundle.GetColumnByID('col4').column_id,
32 | 'col4')
33 |
34 | def testGetColumnByID(self):
35 | column = self.column_bundle.GetColumnByID('col2')
36 | self.assertEqual(column.column_id, 'col2')
37 |
38 | def testGetColumnByOrder(self):
39 | column = self.column_bundle.GetColumnByOrder(2)
40 | self.assertEqual(column.column_id, 'col3')
41 |
42 | def testGetNumColumns(self):
43 | self.assertEqual(self.column_bundle.GetNumColumns(), 3)
44 |
45 | def testGetColumnIterator(self):
46 | column_iterator = self.column_bundle.GetColumnIterator()
47 | column_id_list = [c.column_id for c in column_iterator]
48 | self.assertEqual(column_id_list, ['col1', 'col2', 'col3'])
49 |
50 |
51 | class TableDataTest(unittest.TestCase):
52 | """Tests of TableData object."""
53 |
54 | def setUp(self):
55 | self.table_data = data_source.TableData(
56 | [[1, 2, 3], [4, 5, 6]])
57 |
58 | def testMergeValues(self):
59 | another_table_data = data_source.TableData([[4, 5, 6], [6, 7, 8]])
60 | merged_table_data = self.table_data.MergeValues(
61 | another_table_data, num_columns=2)
62 | self.assertEqual(merged_table_data.rows,
63 | [[1, 2, 3, 4, 5], [4, 5, 6, 6, 7]])
64 |
65 | def testMergeContant(self):
66 | merged_table_data = self.table_data.MergeConstant('abcd')
67 | self.assertEqual(merged_table_data.rows,
68 | [[1, 2, 3, 'abcd'], [4, 5, 6, 'abcd']])
69 |
70 |
71 | class DataGuessingTest(unittest.TestCase):
72 | """Test of data type / format guessing functions."""
73 |
74 | def setUp(self):
75 | pass
76 |
77 | def testGuessType(self):
78 | self.assertEqual(data_source.GuessDataType('312332'), 'integer')
79 | self.assertEqual(data_source.GuessDataType('1999', 'year'), 'date')
80 | self.assertEqual(data_source.GuessDataType('3123.32'), 'float')
81 | self.assertEqual(data_source.GuessDataType('-3399332'), 'integer')
82 | self.assertEqual(data_source.GuessDataType('-3.0'), 'float')
83 | self.assertEqual(data_source.GuessDataType('1/1/11'), 'date')
84 | self.assertEqual(data_source.GuessDataType('01/1932'), 'date')
85 | self.assertEqual(data_source.GuessDataType('2-3-1932'), 'date')
86 | self.assertEqual(data_source.GuessDataType('something'), 'string')
87 | self.assertEqual(data_source.GuessDataType('3278.23728.223'), 'string')
88 |
89 | def testGuessDateFormat(self):
90 | self.assertEqual(data_source.GuessDateFormat('2819'), 'yyyy')
91 | self.assertEqual(data_source.GuessDateFormat('3/1990'), 'MM/yyyy')
92 | self.assertEqual(data_source.GuessDateFormat('1990-3'), 'yyyy-MM')
93 | self.assertEqual(data_source.GuessDateFormat('01-2-1981'), 'MM-dd-yyyy')
94 | self.assertEqual(data_source.GuessDateFormat('1990/2/3'), 'yyyy/MM/dd')
95 |
96 | self.assertRaises(data_source.DataSourceError,
97 | data_source.GuessDateFormat, '1990.12')
98 | self.assertRaises(data_source.DataSourceError,
99 | data_source.GuessDateFormat, 'Jan 1981')
100 |
101 | def testGuessDateConcept(self):
102 | self.assertEqual(data_source.GuessDateConcept('yyyy'), 'time:year')
103 | self.assertEqual(data_source.GuessDateConcept('yyyy-MM'), 'time:month')
104 | self.assertEqual(data_source.GuessDateConcept('yy.MM.dd'), 'time:day')
105 | self.assertEqual(data_source.GuessDateConcept('dd/MM/yyyy'), 'time:day')
106 |
107 | self.assertRaises(data_source.DataSourceError,
108 | data_source.GuessDateConcept, 'yy-mm')
109 | self.assertRaises(data_source.DataSourceError,
110 | data_source.GuessDateConcept, 'GG yyyy')
111 |
112 |
113 | if __name__ == '__main__':
114 | unittest.main()
115 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/model/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/model/dspl_model_loader_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of dspl_model_loader module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import os
15 | import os.path
16 | import shutil
17 | import tempfile
18 | import unittest
19 |
20 | import dspl_model_loader
21 | import dspl_model_test
22 |
23 |
24 | _SLICE_CSV_DATA = (
25 | """col1,col2
26 | val1,1
27 | val2 , 2
28 | val3,3""")
29 |
30 |
31 | class DSPLModelLoaderTests(unittest.TestCase):
32 | """Basic test cases for dspl_model_loader module."""
33 |
34 | def setUp(self):
35 | self.input_dir = tempfile.mkdtemp()
36 | self.xml_file_path = os.path.join(self.input_dir, 'dataset.xml')
37 |
38 | xml_file = open(self.xml_file_path, 'w')
39 | xml_file.write(dspl_model_test.TEST_DSPL_XML)
40 | xml_file.close()
41 |
42 | slice_csv_file = open(os.path.join(self.input_dir, 'mydata.csv'), 'w')
43 | slice_csv_file.write(_SLICE_CSV_DATA)
44 | slice_csv_file.close()
45 |
46 | def tearDown(self):
47 | shutil.rmtree(self.input_dir)
48 |
49 | def testDSPLImportLoading(self):
50 | """Test that dataset is imported correctly."""
51 | dspl_dataset = dspl_model_loader.LoadDSPLFromFiles(self.xml_file_path)
52 |
53 | # Test basic info
54 | self.assertEqual(dspl_dataset.name, 'My Dataset')
55 | self.assertEqual(dspl_dataset.description, 'My Dataset Description')
56 | self.assertEqual(dspl_dataset.url, 'url1')
57 |
58 | self.assertEqual(dspl_dataset.provider_name, 'Googler')
59 | self.assertEqual(dspl_dataset.provider_url, 'url2')
60 |
61 | # Test imports
62 | self.assertEqual(len(dspl_dataset.imports), 2)
63 |
64 | self.assertEqual(dspl_dataset.imports[0].namespace_id,
65 | 'imported_namespace1')
66 | self.assertEqual(dspl_dataset.imports[0].namespace_url,
67 | 'http://imported_namespace1_url')
68 | self.assertEqual(dspl_dataset.imports[1].namespace_id,
69 | 'imported_namespace2')
70 | self.assertEqual(dspl_dataset.imports[1].namespace_url,
71 | 'http://imported_namespace2_url')
72 |
73 | # Test topics
74 | self.assertEqual(len(dspl_dataset.topics), 2)
75 |
76 | self.assertEqual(dspl_dataset.topics[0].topic_id, 'topic1')
77 | self.assertEqual(dspl_dataset.topics[0].topic_name, 'topic1_name')
78 | self.assertEqual(len(dspl_dataset.topics[0].children), 2)
79 |
80 | self.assertEqual(dspl_dataset.topics[0].children[0].topic_id, 'topic2')
81 | self.assertEqual(
82 | dspl_dataset.topics[0].children[0].topic_name, 'topic2_name')
83 | self.assertEqual(dspl_dataset.topics[0].children[1].topic_id, 'topic3')
84 | self.assertEqual(
85 | dspl_dataset.topics[0].children[1].topic_name, 'topic3_name')
86 |
87 | self.assertEqual(dspl_dataset.topics[1].topic_id, 'topic4')
88 | self.assertEqual(dspl_dataset.topics[1].topic_name, 'topic4_name')
89 | self.assertEqual(len(dspl_dataset.topics[1].children), 0)
90 |
91 | # Test concepts
92 | self.assertEqual(len(dspl_dataset.concepts), 3)
93 |
94 | self.assertEqual(dspl_dataset.concepts[0].concept_id, 'concept1')
95 | self.assertEqual(dspl_dataset.concepts[0].concept_extension_reference,
96 | 'entity:entity')
97 | self.assertEqual(dspl_dataset.concepts[0].concept_name, 'Concept 1')
98 | self.assertEqual(dspl_dataset.concepts[0].concept_description,
99 | 'Concept 1 Description')
100 | self.assertEqual(dspl_dataset.concepts[0].data_type, 'string')
101 | self.assertEqual(len(dspl_dataset.concepts[0].attributes), 1)
102 | self.assertEqual(
103 | dspl_dataset.concepts[0].attributes[0].concept_ref, 'attribute_concept')
104 | self.assertEqual(
105 | dspl_dataset.concepts[0].attributes[0].value, 'attribute_value')
106 | self.assertEqual(len(dspl_dataset.concepts[0].properties), 2)
107 | self.assertEqual(
108 | dspl_dataset.concepts[0].properties[0].concept_ref, 'property_concept')
109 | self.assertEqual(
110 | dspl_dataset.concepts[0].properties[0].is_parent, False)
111 | self.assertEqual(
112 | dspl_dataset.concepts[0].properties[1].concept_ref,
113 | 'another_property_concept')
114 | self.assertEqual(
115 | dspl_dataset.concepts[0].properties[1].is_parent, True)
116 | self.assertEqual(dspl_dataset.concepts[0].table_ref, 'table2')
117 |
118 | self.assertEqual(dspl_dataset.concepts[1].concept_id, 'concept2')
119 | self.assertEqual(dspl_dataset.concepts[1].concept_name, 'Concept 2')
120 | self.assertEqual(dspl_dataset.concepts[1].concept_description,
121 | 'Concept 2 Description')
122 | self.assertEqual(dspl_dataset.concepts[1].topic_references,
123 | ['topic1', 'topic2'])
124 | self.assertEqual(dspl_dataset.concepts[1].data_type, 'integer')
125 | self.assertEqual(len(dspl_dataset.concepts[1].attributes), 0)
126 | self.assertEqual(len(dspl_dataset.concepts[1].properties), 0)
127 |
128 | self.assertEqual(dspl_dataset.concepts[2].concept_id, 'geo:country')
129 | self.assertEqual(dspl_dataset.concepts[2].concept_reference, 'geo:country')
130 |
131 | # Test slices
132 | self.assertEqual(len(dspl_dataset.slices), 1)
133 |
134 | self.assertEqual(dspl_dataset.slices[0].slice_id, 'data_slice')
135 | self.assertEqual(dspl_dataset.slices[0].dimension_refs,
136 | ['concept1', 'geo:country'])
137 | self.assertEqual(dspl_dataset.slices[0].metric_refs, ['concept2'])
138 | self.assertEqual(dspl_dataset.slices[0].table_ref, 'table3')
139 | self.assertEqual(
140 | sorted(dspl_dataset.slices[0].dimension_map.items()),
141 | sorted([('concept1', 'concept_column1'),
142 | ('geo:country', 'concept_column3')]))
143 | self.assertEqual(
144 | dspl_dataset.slices[0].metric_map.items(),
145 | [('concept2', 'concept_column2')])
146 |
147 | # Test tables
148 | self.assertEqual(len(dspl_dataset.tables), 1)
149 |
150 | self.assertEqual(dspl_dataset.tables[0].table_id, 'table')
151 | self.assertEqual(dspl_dataset.tables[0].file_name, 'mydata.csv')
152 |
153 | self.assertEqual(len(dspl_dataset.tables[0].columns), 2)
154 | self.assertEqual(dspl_dataset.tables[0].columns[0].column_id, 'col1')
155 | self.assertEqual(dspl_dataset.tables[0].columns[0].data_type, 'string')
156 | self.assertEqual(dspl_dataset.tables[0].columns[1].column_id, 'col2')
157 | self.assertEqual(dspl_dataset.tables[0].columns[1].data_type, 'integer')
158 |
159 | expected_table_rows = _SLICE_CSV_DATA.splitlines()
160 | expected_table_data = []
161 |
162 | for row in expected_table_rows:
163 | split_row = row.split(',')
164 | cleaned_row = [r.strip() for r in split_row]
165 |
166 | expected_table_data.append(cleaned_row)
167 |
168 | self.assertEqual(dspl_dataset.tables[0].table_data, expected_table_data)
169 |
170 | def testBadFileReference(self):
171 | """Test case in which CSV file does not exist."""
172 | os.remove(os.path.join(self.input_dir, 'mydata.csv'))
173 |
174 | self.assertRaises(
175 | dspl_model_loader.DSPLModelLoaderError,
176 | dspl_model_loader.LoadDSPLFromFiles,
177 | self.xml_file_path)
178 |
179 | def testPartialFileLoading(self):
180 | """Test case in which load_all_data is set to False."""
181 | dspl_dataset = dspl_model_loader.LoadDSPLFromFiles(
182 | self.xml_file_path, load_all_data=False)
183 |
184 | expected_table_rows = _SLICE_CSV_DATA.splitlines()[0:2]
185 | expected_table_data = [r.split(',') for r in expected_table_rows]
186 |
187 | self.assertEqual(dspl_dataset.tables[0].table_data, expected_table_data)
188 |
189 |
190 | if __name__ == '__main__':
191 | unittest.main()
192 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/schemas/xml_1998.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | See http://www.w3.org/XML/1998/namespace.html and
7 | http://www.w3.org/TR/REC-xml for information about this namespace.
8 |
9 | This schema document describes the XML namespace, in a form
10 | suitable for import by other schema documents.
11 |
12 | Note that local names in this namespace are intended to be defined
13 | only by the World Wide Web Consortium or its subgroups. The
14 | following names are currently defined in this namespace and should
15 | not be used with conflicting semantics by any Working Group,
16 | specification, or document instance:
17 |
18 | base (as an attribute name): denotes an attribute whose value
19 | provides a URI to be used as the base for interpreting any
20 | relative URIs in the scope of the element on which it
21 | appears; its value is inherited. This name is reserved
22 | by virtue of its definition in the XML Base specification.
23 |
24 | lang (as an attribute name): denotes an attribute whose value
25 | is a language code for the natural language of the content of
26 | any element; its value is inherited. This name is reserved
27 | by virtue of its definition in the XML specification.
28 |
29 | space (as an attribute name): denotes an attribute whose
30 | value is a keyword indicating what whitespace processing
31 | discipline is intended for the content of the element; its
32 | value is inherited. This name is reserved by virtue of its
33 | definition in the XML specification.
34 |
35 | Father (in any context at all): denotes Jon Bosak, the chair of
36 | the original XML Working Group. This name is reserved by
37 | the following decision of the W3C XML Plenary and
38 | XML Coordination groups:
39 |
40 | In appreciation for his vision, leadership and dedication
41 | the W3C XML Plenary on this 10th day of February, 2000
42 | reserves for Jon Bosak in perpetuity the XML name
43 | xml:Father
44 |
45 |
46 |
47 |
48 | This schema defines attributes and an attribute group
49 | suitable for use by
50 | schemas wishing to allow xml:base, xml:lang or xml:space attributes
51 | on elements they define.
52 |
53 | To enable this, such a schema must import this schema
54 | for the XML namespace, e.g. as follows:
55 | <schema . . .>
56 | . . .
57 | <import namespace="http://www.w3.org/XML/1998/namespace"
58 | schemaLocation="http://www.w3.org/2001/03/xml.xsd"/>
59 |
60 | Subsequently, qualified reference to any of the attributes
61 | or the group defined below will have the desired effect, e.g.
62 |
63 | <type . . .>
64 | . . .
65 | <attributeGroup ref="xml:specialAttrs"/>
66 |
67 | will define a type which will schema-validate an instance
68 | element with any of those attributes
69 |
70 |
71 |
72 | In keeping with the XML Schema WG's standard versioning
73 | policy, this schema document will persist at
74 | http://www.w3.org/2001/03/xml.xsd.
75 | At the date of issue it can also be found at
76 | http://www.w3.org/2001/xml.xsd.
77 | The schema document at that URI may however change in the future,
78 | in order to remain compatible with the latest version of XML Schema
79 | itself. In other words, if the XML Schema namespace changes, the version
80 | of this document at
81 | http://www.w3.org/2001/xml.xsd will change
82 | accordingly; the version at
83 | http://www.w3.org/2001/03/xml.xsd will not change.
84 |
85 |
86 |
87 |
88 |
89 | In due course, we should install the relevant ISO 2- and 3-letter
90 | codes as the enumerated possible values . . .
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 | See http://www.w3.org/TR/xmlbase/ for
106 | information about this attribute.
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/countries.csv:
--------------------------------------------------------------------------------
1 | country,name,latitude,longitude
2 | AD,Andorra,42.546245,1.601554
3 | AF,Afghanistan,33.93911,67.709953
4 | AI,Anguilla,18.220554,-63.068615
5 | AL,Albania,41.153332,20.168331
6 | US,United States,37.09024,-95.712891
7 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/country_slice.csv:
--------------------------------------------------------------------------------
1 | country,year,population
2 | AF,1960,9616353
3 | AF,1961,9799379
4 | AF,1962,9989846
5 | AF,1963,10188299
6 | AD,1960,8616353
7 | AD,1961,8799379
8 | AD,1962,8989846
9 | AD,1963,9188299
10 | US,1960,19616353
11 | US,1961,19799379
12 | US,1962,19989846
13 | US,1963,110188299
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/gender_country_slice.csv:
--------------------------------------------------------------------------------
1 | country,gender,year,population
2 | AF,M,1960,4808176
3 | AF,M,1961,4899689
4 | AF,F,1960,4808177
5 | AF,F,1961,4899690
6 | AD,M,1960,3808176
7 | AD,M,1961,3899689
8 | AD,F,1960,3808177
9 | AD,F,1961,3899690
10 | US,M,1960,9808176
11 | US,M,1961,9899689
12 | US,F,1960,9808177
13 | US,F,1961,9899690
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/genders.csv:
--------------------------------------------------------------------------------
1 | gender,name
2 | M,Male
3 | F,Female
4 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/state_slice.csv:
--------------------------------------------------------------------------------
1 | state,year,population,unemployment_rate
2 | AL,1960,9616353,5.1
3 | AL,1961,9799379,5.2
4 | AL,1962,9989846,4.8
5 | AL,1963,10188299,6.9
6 | AK,1960,8616353,6.1
7 | AK,1961,8799379,6.2
8 | AK,1962,8989846,7.8
9 | AK,1963,9188299,7.9
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/states.csv:
--------------------------------------------------------------------------------
1 | state,name,latitude,longitude
2 | AL,Alabama,32.318231,-86.902298
3 | AK,Alaska,63.588753,-154.493062
4 | AR,Arkansas,35.20105,-91.831833
5 | AZ,Arizona,34.048928,-111.093731
6 | CA,California,36.778261,-119.417932
7 | CO,Colorado,39.550051,-105.782067
8 | CT,Connecticut,41.603221,-73.087749
9 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/xml_validation.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Validate a DSPL XML file."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | from lxml import etree
15 | import os.path
16 | import re
17 |
18 |
19 | # The number of lines of context to show around XML errors
20 | _CONTEXT_LINES = 3
21 |
22 | _SCHEMA_PATH = os.path.join(os.path.split(__file__)[0], 'schemas')
23 | _DSPL_SCHEMA_FILE = 'dspl.xsd'
24 |
25 |
26 | def GetErrorContext(xml_string, error_line_number):
27 | """Generate a string that shows the context of an XML error.
28 |
29 | Args:
30 | xml_string: String containing the contents of an XML file
31 | error_line_number: 1-indexed line number on which error has been detected
32 |
33 | Returns:
34 | A pretty-printed string containing the lines around the error
35 | """
36 | min_error_start_line = (error_line_number - 1) - _CONTEXT_LINES
37 | max_error_end_line = (error_line_number - 1) + _CONTEXT_LINES
38 |
39 | error_context_lines = []
40 |
41 | for l, line in enumerate(xml_string.splitlines()):
42 | if l >= min_error_start_line:
43 | line_string = '%5d' % (l + 1)
44 |
45 | # Highlight the error line with asterisks
46 | if (l + 1) == error_line_number:
47 | line_string = line_string.replace(' ', '*')
48 |
49 | error_context_lines.append('%s: %s' % (line_string, line.rstrip()))
50 |
51 | if l >= max_error_end_line:
52 | break
53 |
54 | return '\n'.join(error_context_lines)
55 |
56 |
57 | def GetErrorLineNumber(error_string):
58 | """Parse out the line number from a minixsv error message.
59 |
60 | Args:
61 | error_string: String returned by minixsv exception
62 |
63 | Returns:
64 | Integer line number on which error was detected
65 | """
66 | line_match = re.search(': line ([0-9]+)', error_string)
67 |
68 | return int(line_match.group(1))
69 |
70 |
71 | def RunValidation(xml_file, schema_file=None, verbose=True):
72 | """Run the validation process and return a message with the result.
73 |
74 | Args:
75 | xml_file: An XML input file
76 | schema_file: A DSPL schema file; if not given, the default 'dspl.xsd' is
77 | used.
78 | verbose: Include helpful, extra information about validation
79 |
80 | Returns:
81 | String containing result of validation process
82 | """
83 | result = ''
84 |
85 | xml_file_text = xml_file.read()
86 |
87 | if schema_file:
88 | schema_file_text = schema_file.read()
89 | else:
90 | schema_file = open(os.path.join(_SCHEMA_PATH, _DSPL_SCHEMA_FILE), 'r')
91 | schema_file_text = schema_file.read()
92 | schema_file.close()
93 |
94 | # Insert proper paths into XSD schemaLocation tags
95 | substitution_function = (
96 | lambda m: 'schemaLocation="%s"' % os.path.join(_SCHEMA_PATH, m.group(1)))
97 |
98 | schema_file_text = re.sub(
99 | 'schemaLocation="([a-zA-Z_0-9.]+)"',
100 | substitution_function,
101 | schema_file_text, 2)
102 |
103 | # Parse the schema file into an etree
104 | schema_file_xml = etree.XML(schema_file_text)
105 |
106 | try:
107 | schema = etree.XMLSchema(schema_file_xml)
108 | parser = etree.XMLParser(schema=schema)
109 | etree.fromstring(xml_file_text, parser)
110 | except etree.XMLSyntaxError as xml_error:
111 | # XML parsing error
112 | error_string = str(xml_error)
113 | if verbose:
114 | result = ('Input does not validate against DSPL schema\n\n%s\n%s' %
115 | (error_string, GetErrorContext(
116 | xml_file_text,
117 | xml_error.lineno)))
118 | else:
119 | result = error_string
120 | else:
121 | if verbose:
122 | result = 'XML file validates successfully!'
123 |
124 | return result
125 |
--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/xml_validation_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of xml_validation module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import re
15 | import StringIO
16 | import unittest
17 |
18 | import xml_validation
19 |
20 |
21 | _DSPL_CONTENT_VALID = (
22 | """
23 |
25 |
26 |
27 |
28 | Dataset Name
29 |
30 |
31 |
32 |
33 | Provider Name
34 |
35 |
36 | """)
37 |
38 |
39 | _DSPL_CONTENT_XML_ERROR = (
40 | """
41 |
43 |
44 |
45 |
46 | Dataset Name
47 |
48 |
49 |
50 |
51 | Provider Name
52 |
53 |
54 | """)
55 |
56 |
57 | _DSPL_CONTENT_SCHEMA_ERROR = (
58 | """
59 |
61 |
62 |
63 |
64 | Dataset Name
65 |
66 |
67 |
68 |
69 | Provider Name
70 |
71 |
72 | """)
73 |
74 | _DSPL_BILLION_LAUGHS = (
75 | """
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 | ]>
87 |
88 |
89 |
90 | &lol9;
91 |
92 |
93 |
94 |
95 | Provider Name
96 |
97 |
98 | """)
99 |
100 |
101 | class XMLValidationTests(unittest.TestCase):
102 | """Test case for xml_validation module."""
103 |
104 | def setUp(self):
105 | pass
106 |
107 | def testXMLValidationGoodXML(self):
108 | """A simple end-to-end test of the valid XML case."""
109 | valid_input_file = StringIO.StringIO(_DSPL_CONTENT_VALID)
110 |
111 | result = xml_validation.RunValidation(valid_input_file)
112 | self.assertTrue(re.search('validates successfully', result))
113 |
114 | valid_input_file.close()
115 |
116 | def testXMLValidationXMLError(self):
117 | """A simple end-to-end test of the bad XML case."""
118 | xml_error_input_file = StringIO.StringIO(_DSPL_CONTENT_XML_ERROR)
119 |
120 | result = xml_validation.RunValidation(xml_error_input_file)
121 | self.assertTrue(
122 | re.search('XML declaration allowed only.*line 1', result, flags=re.DOTALL))
123 |
124 | xml_error_input_file.close()
125 |
126 | def testXMLValidationSchemaError(self):
127 | """A simple end-to-end test of the non-conforming XML case."""
128 | schema_error_input_file = StringIO.StringIO(_DSPL_CONTENT_SCHEMA_ERROR)
129 |
130 | result = xml_validation.RunValidation(schema_error_input_file)
131 | # TODO: this validation failure has lineno 0; look into why lxml is not
132 | # returning the right location.
133 | self.assertTrue(re.search('The attribute \'illegalproperty\' is not allowed',
134 | result, flags=re.DOTALL))
135 |
136 | schema_error_input_file.close()
137 |
138 | def testXMLBillionLaughsAttack(self):
139 | """A simple test to verify that the validation routine is not susceptible
140 | to the billion laughs attack.
141 | """
142 | billion_laughs_input_file = StringIO.StringIO(_DSPL_BILLION_LAUGHS)
143 | result = xml_validation.RunValidation(billion_laughs_input_file)
144 | self.assertTrue(re.search('Detected an entity reference loop', result))
145 |
146 | billion_laughs_input_file.close()
147 |
148 |
149 | if __name__ == '__main__':
150 | unittest.main()
151 |
--------------------------------------------------------------------------------
/tools/dspltools/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml
2 |
--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplcheck.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Check a DSPL dataset for likely import errors."""
10 | from __future__ import print_function
11 |
12 |
13 | __author__ = 'Benjamin Yolken '
14 |
15 | import optparse
16 | import os
17 | import shutil
18 | import sys
19 | import tempfile
20 | import time
21 | import zipfile
22 |
23 | from dspllib.model import dspl_model_loader
24 | from dspllib.validation import dspl_validation
25 | from dspllib.validation import xml_validation
26 |
27 |
28 | def LoadOptionsFromFlags(argv):
29 | """Parse command-line arguments.
30 |
31 | Args:
32 | argv: The program argument vector (excluding the script name)
33 |
34 | Returns:
35 | A dictionary with key-value pairs for each of the options
36 | """
37 | usage_string = 'python dsplcheck.py [options] [DSPL XML file or zip archive]'
38 |
39 | parser = optparse.OptionParser(usage=usage_string)
40 |
41 | parser.set_defaults(verbose=True)
42 | parser.add_option('-q', '--quiet',
43 | action='store_false', dest='verbose',
44 | help='Quiet mode')
45 |
46 | parser.add_option(
47 | '-l', '--checking_level', dest='checking_level', type='choice',
48 | choices=['schema_only', 'schema_and_model', 'full'], default='full',
49 | help='Level of checking to do (default: full)')
50 |
51 | (options, args) = parser.parse_args(args=argv)
52 |
53 | if not len(args) == 1:
54 | parser.error('An XML file or DSPL zip archive is required')
55 |
56 | return {'verbose': options.verbose,
57 | 'checking_level': options.checking_level,
58 | 'file_path': args[0]}
59 |
60 |
61 | def GetInputFilePath(input_file_path):
62 | """Parse the input file path, extracting a zip file if necessary.
63 |
64 | Args:
65 | input_file_path: String path to dsplcheck input file
66 |
67 | Returns:
68 | Dictionary containing final XML file path (post-extraction) and directory
69 | into which zip was extracted (or '' if input was not a zip).
70 | """
71 | if zipfile.is_zipfile(input_file_path):
72 | # Extract files to temporary directory and search for dataset XML
73 | zip_dir = tempfile.mkdtemp()
74 |
75 | zip_file = zipfile.ZipFile(input_file_path, 'r')
76 | zip_file.extractall(zip_dir)
77 |
78 | xml_file_paths = []
79 |
80 | for (dirpath, unused_dirnames, filenames) in os.walk(zip_dir):
81 | for file_name in filenames:
82 | if file_name[-4:] == '.xml':
83 | xml_file_paths.append(os.path.join(dirpath, file_name))
84 |
85 | if not xml_file_paths:
86 | print('Error: zip does not have any XML files')
87 | sys.exit(2)
88 | elif len(xml_file_paths) > 1:
89 | print('Error: zip contains multiple XML files')
90 | sys.exit(2)
91 | else:
92 | xml_file_path = xml_file_paths[0]
93 |
94 | zip_file.close()
95 | else:
96 | xml_file_path = input_file_path
97 | zip_dir = ''
98 |
99 | return {'xml_file_path': xml_file_path,
100 | 'zip_dir': zip_dir}
101 |
102 |
103 | def main(argv):
104 | """Parse command-line flags and run XML validator.
105 |
106 | Args:
107 | argv: The program argument vector (excluding the script name)
108 | """
109 | start_time = time.time()
110 |
111 | options = LoadOptionsFromFlags(argv)
112 | file_paths = GetInputFilePath(options['file_path'])
113 |
114 | try:
115 | xml_file = open(file_paths['xml_file_path'], 'r')
116 | except IOError as io_error:
117 | print('Error opening XML file\n\n%s' % io_error)
118 | sys.exit(2)
119 |
120 | if options['verbose']:
121 | print('==== Checking XML file against DSPL schema....')
122 |
123 | result = xml_validation.RunValidation(
124 | xml_file,
125 | verbose=options['verbose'])
126 |
127 | print(result)
128 |
129 | if 'validates successfully' not in result:
130 | # Stop if XML validation not successful
131 | sys.exit(2)
132 |
133 | if options['checking_level'] != 'schema_only':
134 | if options['verbose']:
135 | print('\n==== Parsing DSPL dataset....')
136 |
137 | if options['checking_level'] == 'full':
138 | full_data_check = True
139 | else:
140 | full_data_check = False
141 |
142 | try:
143 | dataset = dspl_model_loader.LoadDSPLFromFiles(
144 | file_paths['xml_file_path'], load_all_data=full_data_check)
145 | except dspl_model_loader.DSPLModelLoaderError as loader_error:
146 | print('Error while trying to parse DSPL dataset\n\n%s' % loader_error)
147 | sys.exit(2)
148 |
149 | if options['verbose']:
150 | print('Parsing completed.')
151 |
152 | if full_data_check:
153 | print('\n==== Checking DSPL model and data....')
154 | else:
155 | print('\n==== Checking DSPL model....')
156 |
157 | dspl_validator = dspl_validation.DSPLDatasetValidator(
158 | dataset, full_data_check=full_data_check)
159 |
160 | print(dspl_validator.RunValidation(options['verbose']))
161 |
162 | xml_file.close()
163 |
164 | if file_paths['zip_dir']:
165 |
166 | shutil.rmtree(file_paths['zip_dir'])
167 |
168 | if options['verbose']:
169 | print('\nCompleted in %0.2f seconds' % (time.time() - start_time))
170 |
171 |
172 | if __name__ == '__main__':
173 | main(sys.argv[1:])
174 |
--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplcheck_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of dsplcheck module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import os
15 | import os.path
16 | import re
17 | import shutil
18 | import StringIO
19 | import sys
20 | import tempfile
21 | import unittest
22 | import zipfile
23 |
24 | import dsplcheck
25 |
26 |
27 | _DSPL_CONTENT = (
28 | """
29 |
31 |
32 |
33 |
34 | Dataset Name
35 |
36 |
37 |
38 |
39 | Provider Name
40 |
41 |
42 | """)
43 |
44 |
45 | _DSPL_CONTENT_BAD_CSV_PATH = (
46 | """
47 |
49 |
50 |
51 |
52 | Dataset Name
53 |
54 |
55 |
56 |
57 | Provider Name
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 | non_existent_file.csv
66 |
67 |
68 |
69 | """)
70 |
71 |
72 | class DSPLCheckTests(unittest.TestCase):
73 | """Test case for dsplcheck module."""
74 |
75 | def setUp(self):
76 | self.input_dir = tempfile.mkdtemp()
77 | self.valid_dspl_file_path = (
78 | os.path.join(self.input_dir, 'valid_dataset.xml'))
79 |
80 | self.valid_dspl_file = open(
81 | self.valid_dspl_file_path, 'w')
82 | self.valid_dspl_file.write(_DSPL_CONTENT)
83 | self.valid_dspl_file.close()
84 |
85 | def tearDown(self):
86 | shutil.rmtree(self.input_dir)
87 |
88 | def testValidDataset(self):
89 | """Test basic case of dataset that validates and parses correctly."""
90 | self._StdoutTestHelper(
91 | dsplcheck.main, [self.valid_dspl_file_path],
92 | 'validates successfully.*Parsing completed.*'
93 | 'Checking DSPL model and data.*Completed')
94 |
95 | def testBadXMLFilePath(self):
96 | """Test case where bad XML file path is passed in."""
97 | self._StdoutTestHelper(
98 | dsplcheck.main, ['nonexistent_input_file.xml'],
99 | 'Error opening XML file', expect_exit=True)
100 |
101 | def testBadCSVFilePath(self):
102 | """Test case where DSPL file has bad CSV reference."""
103 | bad_csv_dspl_file_path = (
104 | os.path.join(self.input_dir, 'invalid_csv_dataset.xml'))
105 |
106 | bad_csv_dspl_file = open(bad_csv_dspl_file_path, 'w')
107 | bad_csv_dspl_file.write(_DSPL_CONTENT_BAD_CSV_PATH)
108 | bad_csv_dspl_file.close()
109 |
110 | self._StdoutTestHelper(
111 | dsplcheck.main, [bad_csv_dspl_file_path],
112 | 'Error while trying to parse', expect_exit=True)
113 |
114 | def testSchemaOnlyOption(self):
115 | """Test that 'schema only' checking level option works correctly."""
116 | self._StdoutTestHelper(
117 | dsplcheck.main, [self.valid_dspl_file_path, '-l', 'schema_only'],
118 | 'validates successfully\W*Completed')
119 |
120 | def testSchemaAndModelOption(self):
121 | """Test that 'schema and model' checking level option works correctly."""
122 | self._StdoutTestHelper(
123 | dsplcheck.main, [self.valid_dspl_file_path, '-l', 'schema_and_model'],
124 | 'Checking DSPL model(?! and data)')
125 |
126 | def testZipInput(self):
127 | """Test that module properly handles zipped input."""
128 | zip_path = os.path.join(self.input_dir, 'dataset.zip')
129 |
130 | zip_file = zipfile.ZipFile(zip_path, 'w')
131 | zip_file.write(self.valid_dspl_file_path)
132 | zip_file.close()
133 |
134 | self._StdoutTestHelper(
135 | dsplcheck.main, [zip_path],
136 | 'validates successfully.*Parsing completed.*'
137 | 'Checking DSPL model and data.*Completed')
138 |
139 | def testZipMissingXML(self):
140 | """Test that zip file without an XML file produces error."""
141 | zip_path = os.path.join(self.input_dir, 'dataset.zip')
142 |
143 | zip_file = zipfile.ZipFile(zip_path, 'w')
144 | zip_file.writestr('test.txt', 'Text')
145 | zip_file.close()
146 |
147 | self._StdoutTestHelper(
148 | dsplcheck.main, [zip_path],
149 | 'does not have any XML', expect_exit=True)
150 |
151 | def testZipMultipleXMLFiles(self):
152 | """Test that zip file with multiple XML files produces error."""
153 | zip_path = os.path.join(self.input_dir, 'dataset.zip')
154 |
155 | zip_file = zipfile.ZipFile(zip_path, 'w')
156 | zip_file.writestr('test.xml', 'Text')
157 | zip_file.writestr('test2.xml', 'Text')
158 | zip_file.close()
159 |
160 | self._StdoutTestHelper(
161 | dsplcheck.main, [zip_path],
162 | 'multiple XML files', expect_exit=True)
163 |
164 | def _StdoutTestHelper(self, function, args,
165 | expected_output, expect_exit=False):
166 | """Check the stdout output of a function against its expected value.
167 |
168 | Args:
169 | function: A function to execute
170 | args: The arguments to pass to the function
171 | expected_output: A regular expression expected to match the stdout output
172 | expect_exit: Boolean indicating whether the function execution should
173 | trigger a system exit
174 | """
175 | saved_stdout = sys.stdout
176 |
177 | redirected_output = StringIO.StringIO()
178 | sys.stdout = redirected_output
179 |
180 | if expect_exit:
181 | self.assertRaises(SystemExit, function, args)
182 | else:
183 | function(args)
184 |
185 | self.assertTrue(
186 | re.search(expected_output, redirected_output.getvalue(), re.DOTALL))
187 |
188 | redirected_output.close()
189 | sys.stdout = saved_stdout
190 |
191 |
192 | if __name__ == '__main__':
193 | unittest.main()
194 |
--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplgen.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Generate a DSPL dataset from a tabular data source via the command-line."""
10 | from __future__ import print_function
11 |
12 |
13 | __author__ = 'Benjamin Yolken '
14 |
15 | import optparse
16 | import sys
17 | import time
18 |
19 | from dspllib.data_sources import csv_data_source
20 | from dspllib.data_sources import csv_data_source_sqlite
21 | from dspllib.data_sources import data_source_to_dspl
22 |
23 |
24 | def LoadOptionsFromFlags(argv):
25 | """Parse command-line arguments.
26 |
27 | Args:
28 | argv: The program argument vector (excluding the script name)
29 |
30 | Returns:
31 | A dictionary with key-value pairs for each of the options
32 | """
33 | usage_string = 'python dsplgen.py [options] [csv file]'
34 |
35 | parser = optparse.OptionParser(usage=usage_string)
36 | parser.set_defaults(verbose=True)
37 | parser.add_option('-o', '--output_path', dest='output_path', default='',
38 | help=('Path to a output directory '
39 | '(default: current directory)'))
40 | parser.add_option('-q', '--quiet',
41 | action='store_false', dest='verbose',
42 | help='Quiet mode')
43 | parser.add_option('-t', '--data_type', dest='data_type', type='choice',
44 | choices=['csv', 'csv_sqlite'], default='csv',
45 | help='Type of data source to use (default: csv)')
46 |
47 | (options, args) = parser.parse_args(args=argv)
48 |
49 | if not len(args) == 1:
50 | parser.error('A data source (e.g., path to CSV file) is required')
51 |
52 | return {'data_type': options.data_type,
53 | 'data_source': args[0],
54 | 'output_path': options.output_path,
55 | 'verbose': options.verbose}
56 |
57 |
58 | def main(argv):
59 | """Parse command-line flags and run data source to DSPL conversion process.
60 |
61 | Args:
62 | argv: The program argument vector (excluding the script name)
63 | """
64 | start_time = time.time()
65 | options = LoadOptionsFromFlags(argv)
66 |
67 | # Connect to data source
68 | if options['data_type'] in ['csv', 'csv_sqlite']:
69 | try:
70 | csv_file = open(options['data_source'], 'r')
71 | except IOError as io_error:
72 | print('Error opening CSV file\n\n%s' % io_error)
73 | sys.exit(2)
74 |
75 | if options['data_type'] == 'csv':
76 | data_source_obj = csv_data_source.CSVDataSource(
77 | csv_file, options['verbose'])
78 | else:
79 | data_source_obj = csv_data_source_sqlite.CSVDataSourceSqlite(
80 | csv_file, options['verbose'])
81 | else:
82 | print('Error: Unknown data type: %s' % (options['data_type']))
83 | sys.exit(2)
84 |
85 | # Create DSPL dataset from data source
86 | dataset = data_source_to_dspl.PopulateDataset(
87 | data_source_obj, options['verbose'])
88 | data_source_obj.Close()
89 |
90 | if options['verbose']:
91 | print('Materializing dataset:')
92 | print(str(dataset))
93 |
94 | # Write DSPL dataset to disk
95 | dataset.Materialize(options['output_path'])
96 |
97 | if options['verbose']:
98 | print('Completed in %0.2f seconds' % (time.time() - start_time))
99 |
100 |
101 | if __name__ == '__main__':
102 | main(sys.argv[1:])
103 |
--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplgen_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Tests of dsplgen module."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import os
15 | import os.path
16 | import re
17 | import shutil
18 | import StringIO
19 | import sys
20 | import tempfile
21 | import unittest
22 |
23 | import dsplcheck
24 | import dsplgen
25 |
26 |
27 | _TEST_CSV_CONTENT = (
28 | """date[type=date;format=yyyy-MM-dd],category1,category2[concept=geo:us_state;rollup=true],metric1[extends=quantity:ratio;slice_role=metric],metric2,metric3
29 | 1980-01-01,red,california,89,321,71.21
30 | 1981-01-01,red,california,99,231,391.2
31 | 1982-01-01,blue,maine's,293,32,2.31
32 | 1983-01-01,blue,california,293,12,10.3
33 | 1984-01-01,red,maine's,932,48,10.78""")
34 |
35 |
36 | class DSPLGenTests(unittest.TestCase):
37 | """Test cases for dsplgen module."""
38 |
39 | def setUp(self):
40 | self.input_dir = tempfile.mkdtemp()
41 |
42 | input_file = open(os.path.join(self.input_dir, 'input.csv'), 'w')
43 | input_file.write(_TEST_CSV_CONTENT)
44 | input_file.close()
45 |
46 | self.output_dir = tempfile.mkdtemp()
47 |
48 | def tearDown(self):
49 | shutil.rmtree(self.input_dir)
50 | shutil.rmtree(self.output_dir)
51 |
52 | def testDSPLGenEndToEnd(self):
53 | """A simple end-to-end test of the dsplgen application."""
54 | dsplgen.main(['-o', self.output_dir, '-q',
55 | os.path.join(self.input_dir, 'input.csv')])
56 |
57 | self.assertTrue(
58 | os.path.isfile(os.path.join(self.output_dir, 'dataset.xml')))
59 | self.assertTrue(
60 | os.path.isfile(os.path.join(self.output_dir, 'category1_table.csv')))
61 | self.assertTrue(
62 | os.path.isfile(os.path.join(self.output_dir, 'slice_0_table.csv')))
63 | self.assertTrue(
64 | os.path.isfile(os.path.join(self.output_dir, 'slice_1_table.csv')))
65 |
66 | # Test that output validates against dsplcheck
67 | saved_stdout = sys.stdout
68 |
69 | redirected_output = StringIO.StringIO()
70 | sys.stdout = redirected_output
71 |
72 | dsplcheck.main([os.path.join(self.output_dir, 'dataset.xml')])
73 |
74 | self.assertTrue(
75 | re.search(
76 | 'validates successfully.*Parsing completed.*'
77 | 'No issues found.*Completed',
78 | redirected_output.getvalue(), re.DOTALL))
79 |
80 | redirected_output.close()
81 |
82 | sys.stdout = saved_stdout
83 |
84 | def testCSVNotFound(self):
85 | """Test case in which CSV can't be opened."""
86 | dsplgen.main(['-o', self.output_dir, '-q',
87 | os.path.join(self.input_dir, 'input.csv')])
88 |
89 | saved_stdout = sys.stdout
90 | redirected_output = StringIO.StringIO()
91 | sys.stdout = redirected_output
92 |
93 | self.assertRaises(SystemExit,
94 | dsplgen.main, ['-q', 'non_existent_input_file.csv'])
95 | self.assertTrue('Error opening CSV file' in redirected_output.getvalue())
96 |
97 | redirected_output.close()
98 | sys.stdout = saved_stdout
99 |
100 |
101 | if __name__ == '__main__':
102 | unittest.main()
103 |
--------------------------------------------------------------------------------
/tools/dspltools/scripts/run_all_tests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Run all tests defined in the DSPL Tools code."""
10 |
11 |
12 | __author__ = 'Benjamin Yolken '
13 |
14 | import unittest
15 |
16 | _TEST_MODULE_NAMES = [
17 | 'dsplcheck_test',
18 | 'dsplgen_test',
19 | 'dspllib.data_sources.csv_data_source_test',
20 | 'dspllib.data_sources.csv_data_source_sqlite_test',
21 | 'dspllib.data_sources.data_source_test',
22 | 'dspllib.data_sources.data_source_to_dspl_test',
23 | 'dspllib.model.dspl_model_loader_test',
24 | 'dspllib.model.dspl_model_test',
25 | 'dspllib.validation.dspl_validation_test',
26 | 'dspllib.validation.xml_validation_test']
27 |
28 |
29 | def main():
30 | """Run all DSPL Tools tests and print the results to stderr."""
31 | test_suite = unittest.TestSuite()
32 |
33 | for test_module_name in _TEST_MODULE_NAMES:
34 | test_suite.addTests(
35 | unittest.defaultTestLoader.loadTestsFromName(test_module_name))
36 |
37 | unittest.TextTestRunner().run(test_suite)
38 |
39 |
40 | if __name__ == '__main__':
41 | main()
42 |
--------------------------------------------------------------------------------
/tools/dspltools/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 |
9 | """Setup script for the DSPLtools suite."""
10 |
11 | import setuptools
12 | from distutils.core import setup
13 |
14 |
15 | setup(name='dspltools',
16 | version='0.5.0',
17 | description='Suite of command-line tools for generating DSPL datasets',
18 | author='Public Statistics',
19 | author_email='public-data-import-feedback@google.com',
20 | url='http://github.com/google/dspl',
21 | packages=['dspllib', 'dspllib.data_sources',
22 | 'dspllib.model', 'dspllib.validation'],
23 | package_dir={'dspllib': 'packages/dspllib'},
24 | package_data={'dspllib.validation': ['schemas/*.xsd',
25 | 'test_dataset/*.csv',
26 | 'test_dataset/*.xml']},
27 | scripts=['scripts/dsplcheck.py', 'scripts/dsplgen.py',
28 | 'scripts/run_all_tests.py'])
29 |
--------------------------------------------------------------------------------