├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── CONTRIBUTING.md ├── LICENSE.md ├── _config.yml ├── dspl2-chart.png ├── dspl2-spec.md └── index.md ├── samples ├── bls │ └── unemployment │ │ ├── .gitattributes │ │ ├── age.csv │ │ ├── bls-unemployment.jsonld │ │ ├── cities.csv │ │ ├── citiesUnemploymentMonthly.csv │ │ ├── counties.csv │ │ ├── countiesUnemploymentMonthly.csv │ │ ├── footnotes.csv │ │ ├── metroAreasUnemploymentMonthly.csv │ │ ├── metro_areas.csv │ │ ├── states.csv │ │ ├── statesUnemploymentMonthly.csv │ │ ├── totalUnemploymentMonthly.csv │ │ ├── totalUnemploymentMonthly_ByAge.csv │ │ ├── totalUnemploymentMonthly_BySex.csv │ │ └── totalUnemploymentMonthly_BySex_ByAge.csv ├── eurostat │ ├── population_density │ │ ├── README.md │ │ ├── eurostat_population_density-inline.json │ │ ├── eurostat_population_density.html │ │ ├── eurostat_population_density.json │ │ ├── met_d3dens.csv │ │ ├── metroreg.csv │ │ ├── transform_d3dens.py │ │ └── transform_metroreg.py │ └── unemployment │ │ ├── age_groups.csv │ │ ├── countries.csv │ │ ├── country_age.csv │ │ ├── country_group_age.csv │ │ ├── country_group_sex.csv │ │ ├── country_group_sex_age.csv │ │ ├── country_group_total.csv │ │ ├── country_groups.csv │ │ ├── country_sex.csv │ │ ├── country_sex_age.csv │ │ ├── country_total.csv │ │ ├── eurostat-unemployment-dspl-v1-inline-small.json │ │ ├── eurostat-unemployment-dspl-v1.json │ │ ├── eurostat-unemployment.xml │ │ ├── footnotes.csv │ │ ├── seasonalities.csv │ │ └── sexes.csv ├── google │ ├── canonical │ │ ├── countries.csv │ │ ├── currencies.csv │ │ ├── entity.xml │ │ ├── entity_order.csv │ │ ├── geo.us.xml │ │ ├── geo.xml │ │ ├── granularity.csv │ │ ├── quantity.xml │ │ ├── states.csv │ │ ├── time.xml │ │ ├── unit.xml │ │ ├── unit_symbol_positions.csv │ │ └── us_counties.csv │ └── dspl-sample │ │ ├── countries.csv │ │ ├── country_slice.csv │ │ ├── dataset.xml │ │ ├── gender_country_slice.csv │ │ ├── genders.csv │ │ ├── state_slice.csv │ │ └── states.csv └── us_census │ ├── population │ └── census-totpop.json │ └── retail_sales │ ├── businesses.csv │ ├── census-retail-sales.xml │ ├── retail_sales_business.csv │ └── seasonalities.csv ├── schema ├── dspl.xsd └── dspl2.jsonld └── tools ├── dspl2 ├── dspl2 │ ├── __init__.py │ ├── expander.py │ ├── filegetter.py │ ├── jsonutil.py │ ├── rdfutil.py │ ├── schema │ │ ├── jsonldcontext.json │ │ └── schema.jsonld │ ├── templates │ │ ├── choose.html │ │ ├── display.html │ │ ├── error.html │ │ ├── render.html │ │ ├── viewer.css │ │ └── viewer.js │ ├── tests │ │ ├── __init__.py │ │ ├── test_expander.py │ │ ├── test_jsonutil.py │ │ └── test_rdfutil.py │ └── validator.py ├── requirements.txt ├── scripts │ ├── dspl2-expand.py │ ├── dspl2-pretty-print-server.py │ ├── dspl2-pretty-print.py │ └── dspl2-validate.py └── setup.py ├── dspl2viz ├── dspl2viz.py ├── foo.jsonld ├── static │ ├── dspl2viz.css │ └── dspl2viz.js └── templates │ └── dspl2viz.html └── dspltools ├── PKG-INFO ├── README.rst ├── examples ├── dsplcheck │ ├── invalid_dspl │ │ ├── countries.csv │ │ ├── country_slice.csv │ │ └── invalid_dspl.xml │ ├── invalid_xml │ │ └── invalid_xml.xml │ └── valid_dataset │ │ ├── countries.csv │ │ ├── country_slice.csv │ │ └── valid_dataset.xml └── dsplgen │ ├── dsplgen_advanced.csv │ ├── dsplgen_hierarchies.csv │ ├── dsplgen_simple.csv │ └── dsplgen_yearly_data.csv ├── packages └── dspllib │ ├── __init__.py │ ├── data_sources │ ├── __init__.py │ ├── csv_data_source.py │ ├── csv_data_source_sqlite.py │ ├── csv_data_source_sqlite_test.py │ ├── csv_data_source_test.py │ ├── csv_sources_test_suite.py │ ├── csv_utilities.py │ ├── data_source.py │ ├── data_source_test.py │ ├── data_source_to_dspl.py │ └── data_source_to_dspl_test.py │ ├── model │ ├── __init__.py │ ├── dspl_model.py │ ├── dspl_model_loader.py │ ├── dspl_model_loader_test.py │ └── dspl_model_test.py │ └── validation │ ├── __init__.py │ ├── dspl_validation.py │ ├── dspl_validation_test.py │ ├── schemas │ ├── dspl.xsd │ ├── xml_1998.xsd │ └── xml_2001.xsd │ ├── test_dataset │ ├── countries.csv │ ├── country_slice.csv │ ├── dataset.xml │ ├── gender_country_slice.csv │ ├── genders.csv │ ├── state_slice.csv │ └── states.csv │ ├── xml_validation.py │ └── xml_validation_test.py ├── requirements.txt ├── scripts ├── dsplcheck.py ├── dsplcheck_test.py ├── dsplgen.py ├── dsplgen_test.py └── run_all_tests.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__ 3 | _site 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018, Google Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | 3. Neither the name of Google Inc. nor the names of its 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dataset Publishing Language 2 | 3 | ## Introduction 4 | **DSPL** stands for **Dataset Publishing Language**. It is a representation 5 | format for both the metadata (information about the dataset, such as its name 6 | and provider, as well as the concepts it contains and displays) and actual data 7 | (the numbers) of datasets. Datasets described in this format can be imported 8 | into the [Google Public Data Explorer](https://www.google.com/publicdata), a 9 | tool that allows for rich, visual exploration of the data. 10 | 11 | This site hosts miscellaneous, open source content (i.e., schemas, example 12 | files, and utilities) associated with the DSPL standard. See our [documentation 13 | site](https://developers.google.com/public-data) for more details on what DSPL 14 | is and how to use it. The utilities in this repository are documented at [this 15 | site](https://developers.google.com/public-data/docs/dspltools). 16 | 17 | ## Build and install 18 | To build the tools, install `lxml`, then use the `setup.py` script in 19 | `tools/dspltools/`. You can use pip to install these: 20 | 21 | ``` 22 | pip install -r tools/dspltools/requirements.txt 23 | pip install tools/dspltools 24 | ``` 25 | 26 | # DSPL 2 27 | The draft of the DSPL 2 specification, which replaces the existing XML metadata 28 | format with schema.org markup, can be found at the [DSPL GitHub 29 | page](https://google.github.io/dspl). The source for the specification is at 30 | [`docs/dspl2-spec.md`](https://github.com/google/dspl/blob/master/docs/dspl2-spec.md). 31 | 32 | Some initial library and tool support is available in [`tools/dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2) 33 | 34 | ## Build and install 35 | To build the tools, install the prerequisites, then use the `setup.py` script in 36 | `tools/dspl2/`. You can use pip to install these: 37 | 38 | ``` 39 | pip install -r tools/dspl2/requirements.txt 40 | pip install tools/dspl2 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Contributing to Data Set Publishing Language, Version 2.0 3 | author: Google 4 | --- 5 | # How to Contribute 6 | 7 | We'd love to accept your patches and contributions to this project. There are 8 | just a few small guidelines you need to follow. 9 | 10 | ## Contributor License Agreement 11 | 12 | Contributions to this project must be accompanied by a Contributor License 13 | Agreement. You (or your employer) retain the copyright to your contribution; 14 | this simply gives us permission to use and redistribute your contributions as 15 | part of the project. Head over to to see 16 | your current agreements on file or to sign a new one. 17 | 18 | You generally only need to submit a CLA once, so if you've already submitted one 19 | (even if it was for a different project), you probably don't need to do it 20 | again. 21 | 22 | ## Code reviews 23 | 24 | All submissions, including submissions by project members, require review. We 25 | use GitHub pull requests for this purpose. Consult 26 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 27 | information on using pull requests. 28 | 29 | ## Community Guidelines 30 | 31 | This project follows [Google's Open Source Community 32 | Guidelines](https://opensource.google.com/conduct/). 33 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | include: 2 | - LICENSE.md 3 | - CONTRIBUTING.md 4 | - index.md 5 | - dspl2-spec.md 6 | 7 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /docs/dspl2-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/docs/dspl2-chart.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Set Publishing Language, Version 2.0 3 | author: Natarajan Krishnaswami 4 | --- 5 | # DSPL 2.0 6 | This is the project website for the DSPL 2.0 specification, samples, and related tools. 7 | 8 | ## Spec 9 | 10 | The draft specification is here: [dspl2-spec.html](dspl2-spec.html). 11 | 12 | To provide feedback on the draft, please create a [GitHub issue](https://github.com/google/dspl/issues), or email us at (public-data-import-feedback@google.com)[mailto:public-data-import-feedback@google.com]. 13 | 14 | ## Related tools 15 | 16 | Initial tool and a python library are in the DSPL 2.0 GitHub repository under [`tools/dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2). 17 | 18 | * [`dspl2-expand.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-expand.py): tool to convert a DSPL 2.0 dataset with CSV references to one with only JSON-LD. 19 | * [`dspl2-validate.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-validate.py): tool to do basic validation of a DSPL 2.0 dataset into an HTML file. 20 | * [`dspl2-pretty-print.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-pretty-print.py): tool to pretty print a DSPL 2.0 dataset as HTML tables. 21 | * [`dspl2-pretty-print-server.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-pretty-print-server.py): local web app of the above. 22 | * [`dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2/dspl2): python library to load, normalize, and expand CSV files in DSPL 2.0 datasets. 23 | 24 | ## Samples 25 | 26 | Examples are in the DSPL 2.0 GitHub repository under [`samples`](https://github.com/google/dspl/tree/master/samples). Currently Eurostat unemployment and Eurostat population density samples include DSPL 2.0 metadata. 27 | 28 | ## Contributing 29 | 30 | To contribute, see the [CONTRIBUTING](CONTRIBUTING.html) file and after submitting a CLA, submit pull requests to the [DSPL GitHub repository](https://github.com/google/dspl). 31 | -------------------------------------------------------------------------------- /samples/bls/unemployment/.gitattributes: -------------------------------------------------------------------------------- 1 | countiesUnemploymentMonthly.csv filter=lfs diff=lfs merge=lfs -text 2 | citiesUnemploymentMonthly.csv filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /samples/bls/unemployment/age.csv: -------------------------------------------------------------------------------- 1 | "codeValue","name" 2 | "07","16 to 17 years" 3 | "08","16 to 19 years" 4 | "10","16 to 24 years" 5 | "13","18 to 19 years" 6 | "15","18 years and over" 7 | "17","20 years and over" 8 | "20","20 to 24 years" 9 | "28","25 years and over" 10 | "30","25 to 29 years" 11 | "31","25 to 34 years" 12 | "33","25 to 54 years" 13 | "36","30 to 34 years" 14 | "37","35 to 39 years" 15 | "38","35 to 44 years" 16 | "39","40 to 44 years" 17 | "40","45 years and over" 18 | "41","45 to 49 years" 19 | "42","45 to 54 years" 20 | "44","50 to 54 years" 21 | "45","55 years and over" 22 | "48","55 to 59 years" 23 | "49","55 to 64 years" 24 | "56","60 to 61 years" 25 | "57","60 to 64 years" 26 | "61","62 to 64 years" 27 | "65","65 years and over" 28 | "66","65 to 69 years" 29 | "72","70 years and over" 30 | "73","70 to 74 years" 31 | "78","75 years and over" 32 | -------------------------------------------------------------------------------- /samples/bls/unemployment/citiesUnemploymentMonthly.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b2a75c4df3b7eb6b89bfb8f11227eeeb7c7b33f1f5593cc68b72c431c2e758c5 3 | size 28535697 4 | -------------------------------------------------------------------------------- /samples/bls/unemployment/countiesUnemploymentMonthly.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:54f0f35753c166fa778f6bbe943dcdc873889b3802a6fd2dae72f0fceb10666d 3 | size 53413322 4 | -------------------------------------------------------------------------------- /samples/bls/unemployment/footnotes.csv: -------------------------------------------------------------------------------- 1 | codeValue,description 2 | 1,Data affected by changes in population controls. 3 | 2,Constructed on the 2002 Census Industry Classification from data originally coded on earlier classifications. Official series was not revised. 4 | 3,2000 forward coded on the 2002 Census Occupation Classification. 1983-99 constructed from data originally coded on earlier classifications. 5 | 4,2000 forward coded on the 2002 Census Industry Classification. 1983-99 constructed from data originally coded on earlier classifications. 6 | 7,Data do not meet publication criteria. 7 | 8,This series id code has been discontinued; data are available using the database tool at www.bls.gov/webapps/legacy/cpsatab8.htm. 8 | 9,Data from 1994 through 2002 were revised in February 2014 with updated seasonal adjustments. 9 | A,Area boundaries do not reflect official OMB definitions. 10 | N,Not available. 11 | P,Preliminary. 12 | V,The survey was not conducted due to bad weather. Interpolated data were seasonally adjusted. 13 | W,The household survey was not conducted for this month due to bad weather. Data were interpolated. 14 | Y,Data reflect controlling to interpolated statewide totals because the survey was not conducted. 15 | -------------------------------------------------------------------------------- /samples/bls/unemployment/states.csv: -------------------------------------------------------------------------------- 1 | codeValue,name,identifier,alternateName,geo.latitude,geo.longitude 2 | ST0100000000000,Alabama,AL,Alabama,32.318231,-86.902298 3 | ST0200000000000,Alaska,AK,Alaska,63.588753,-154.493062 4 | ST0400000000000,Arizona,AZ,Arizona,34.048928,-111.093731 5 | ST0500000000000,Arkansas,AR,Arkansas,35.20105,-91.831833 6 | ST0600000000000,California,CA,California,36.778261,-119.417932 7 | ST0800000000000,Colorado,CO,Colorado,39.550051,-105.782067 8 | ST0900000000000,Connecticut,CT,Connecticut,41.603221,-73.087749 9 | ST1000000000000,Delaware,DE,Delaware,38.910832,-75.52767 10 | ST1100000000000,District of Columbia,DC,Washington DC,38.905985,-77.033418 11 | ST1200000000000,Florida,FL,Florida,27.664827,-81.515754 12 | ST1300000000000,Georgia,GA,Georgia,32.157435,-82.907123 13 | ST1500000000000,Hawaii,HI,Hawaii,19.898682,-155.665857 14 | ST1600000000000,Idaho,ID,Idaho,44.068202,-114.742041 15 | ST1700000000000,Illinois,IL,Illinois,40.633125,-89.398528 16 | ST1800000000000,Indiana,IN,Indiana,40.551217,-85.602364 17 | ST1900000000000,Iowa,IA,Iowa,41.878003,-93.097702 18 | ST2000000000000,Kansas,KS,Kansas,39.011902,-98.484246 19 | ST2100000000000,Kentucky,KY,Kentucky,37.839333,-84.270018 20 | ST2200000000000,Louisiana,,Louisiana,31.244823,-92.145024 21 | ST2300000000000,Maine,ME,Maine,45.253783,-69.445469 22 | ST2400000000000,Maryland,MD,Maryland,39.045755,-76.641271 23 | ST2500000000000,Massachusetts,MA,Massachusetts,42.407211,-71.382437 24 | ST2600000000000,Michigan,MI,Michigan,44.314844,-85.602364 25 | ST2700000000000,Minnesota,MN,Minnesota,46.729553,-94.6859 26 | ST2800000000000,Mississippi,MS,Mississippi,32.354668,-89.398528 27 | ST2900000000000,Missouri,MO,Missouri,37.964253,-91.831833 28 | ST3000000000000,Montana,MT,Montana,46.879682,-110.362566 29 | ST3100000000000,Nebraska,NE,Nebraska,41.492537,-99.901813 30 | ST3200000000000,Nevada,NV,Nevada,38.80261,-116.419389 31 | ST3300000000000,New Hampshire,NH,New Hampshire,43.193852,-71.572395 32 | ST3400000000000,New Jersey,NJ,New Jersey,40.058324,-74.405661 33 | ST3500000000000,New Mexico,NM,New Mexico,34.97273,-105.032363 34 | ST3600000000000,New York,NY,New York State,43.299428,-74.217933 35 | ST3700000000000,North Carolina,NC,N Carolina,35.759573,-79.0193 36 | ST3800000000000,North Dakota,ND,N Dakota,47.551493,-101.002012 37 | ST3900000000000,Ohio,OH,Ohio,40.417287,-82.907123 38 | ST4000000000000,Oklahoma,OK,Oklahoma,35.007752,-97.092877 39 | ST4100000000000,Oregon,OR,Oregon,43.804133,-120.554201 40 | ST4200000000000,Pennsylvania,PA,Pennsylvania,41.203322,-77.194525 41 | ST4400000000000,Rhode Island,RI,Rhode Island,41.580095,-71.477429 42 | ST4500000000000,South Carolina,SC,S Carolina,33.836081,-81.163725 43 | ST4600000000000,South Dakota,SD,S Dakota,43.969515,-99.901813 44 | ST4700000000000,Tennessee,TN,Tennessee,35.517491,-86.580447 45 | ST4800000000000,Texas,TX,Texas,31.968599,-99.901813 46 | ST4900000000000,Utah,UT,Utah,39.32098,-111.093731 47 | ST5000000000000,Vermont,VT,Vermont,44.558803,-72.577841 48 | ST5100000000000,Virginia,VA,Virginia,37.431573,-78.656894 49 | ST5300000000000,Washington,WA,Washington State,47.751074,-120.740139 50 | ST5400000000000,West Virginia,WV,W Virginia,38.597626,-80.454903 51 | ST5500000000000,Wisconsin,WI,Wisconsin,43.78444,-88.787868 52 | ST5600000000000,Wyoming,WY,Wyoming,43.075968,-107.290284 53 | ST7200000000000,Puerto Rico,PR,Puerto Rico,18.220833,-66.590149 54 | -------------------------------------------------------------------------------- /samples/eurostat/population_density/README.md: -------------------------------------------------------------------------------- 1 | # Population Density 2 | This is a small example with one categorical dimension, one measure, and one slice. 3 | 4 | The formats available are: 5 | 6 | * [HTML Microdata](eurostat_population_density.html) 7 | * [JSON-LD + CSV](eurostat_population_density.json) 8 | * [JSON-LD alone](eurostat_population_density-inline.json) 9 | -------------------------------------------------------------------------------- /samples/eurostat/population_density/eurostat_population_density.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "http://schema.org", 3 | "@type": "StatisticalDataset", 4 | "@id": "", 5 | "url": "https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w", 6 | "identifier": "met_d3dens", 7 | "name": "Eurostat Population Density", 8 | "description": "Population density by metropolitan regions", 9 | "dateCreated": "2015-10-16", 10 | "dateModified": "2019-06-18", 11 | "temporalCoverage": "1990-01-01/2016-01-01", 12 | "distribution": { 13 | "@type": "DataDownload", 14 | "contentUrl": "http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true", 15 | "encodingFormat": "text/tab-separated-values" 16 | }, 17 | "spatialCoverage":{ 18 | "@type":"Place", 19 | "geo":{ 20 | "@type":"GeoShape", 21 | "name": "European Union", 22 | "box":"34.633285 -10.468556 70.096054 34.597916" 23 | } 24 | }, 25 | "license": "https://ec.europa.eu/eurostat/about/policies/copyright", 26 | "creator":{ 27 | "@type":"Organization", 28 | "url": "https://ec.europa.eu/eurostat", 29 | "name":"Eurostat" 30 | }, 31 | "publisher": { 32 | "@type": "Organization", 33 | "name": "Eurostat", 34 | "url": "https://ec.europa.eu/eurostat", 35 | "contactPoint": { 36 | "@type": "ContactPoint", 37 | "contactType": "User Support", 38 | "url": "https://ec.europa.eu/eurostat/help/support" 39 | } 40 | }, 41 | "dimension": [ 42 | { 43 | "@type": "CategoricalDimension", 44 | "@id": "#metroreg", 45 | "dataset": {"@id": ""}, 46 | "codeList": "metroreg.csv" 47 | }, 48 | { 49 | "@type": "TimeDimension", 50 | "@id": "#year", 51 | "dataset": {"@id": ""}, 52 | "name": "year", 53 | "equivalentType": "xsd:Year", 54 | "dateFormat": "yyyy" 55 | } 56 | ], 57 | "measure": [ 58 | { 59 | "@type": "StatisticalMeasure", 60 | "@id": "#density", 61 | "dataset": {"@id": ""}, 62 | "name": "Population density", 63 | "unitText": "persons per square kilometre" 64 | } 65 | ], 66 | "footnote": [ 67 | { 68 | "@type": "StatisticalAnnotation", 69 | "@id": "#footnote=b", 70 | "dataset": {"@id": ""}, 71 | "codeValue": "b", 72 | "description": "break in time series" 73 | }, 74 | { 75 | "@type": "StatisticalAnnotation", 76 | "@id": "#footnote=c", 77 | "dataset": {"@id": ""}, 78 | "codeValue": "c", 79 | "description": "confidential" 80 | }, 81 | { 82 | "@type": "StatisticalAnnotation", 83 | "@id": "#footnote=d", 84 | "dataset": {"@id": ""}, 85 | "codeValue": "d", 86 | "description": "definition differs, see metadata" 87 | }, 88 | { 89 | "@type": "StatisticalAnnotation", 90 | "@id": "#footnote=e", 91 | "dataset": {"@id": ""}, 92 | "codeValue": "e", 93 | "description": "estimated" 94 | }, 95 | { 96 | "@type": "StatisticalAnnotation", 97 | "@id": "#footnote=f", 98 | "dataset": {"@id": ""}, 99 | "codeValue": "f", 100 | "description": "forecast" 101 | }, 102 | { 103 | "@type": "StatisticalAnnotation", 104 | "@id": "#footnote=n", 105 | "dataset": {"@id": ""}, 106 | "codeValue": "n", 107 | "description": "not significant" 108 | }, 109 | { 110 | "@type": "StatisticalAnnotation", 111 | "@id": "#footnote=p", 112 | "dataset": {"@id": ""}, 113 | "codeValue": "p", 114 | "description": "provisional" 115 | }, 116 | { 117 | "@type": "StatisticalAnnotation", 118 | "@id": "#footnote=r", 119 | "dataset": {"@id": ""}, 120 | "codeValue": "r", 121 | "description": "revised" 122 | }, 123 | { 124 | "@type": "StatisticalAnnotation", 125 | "@id": "#footnote=s", 126 | "dataset": {"@id": ""}, 127 | "codeValue": "s", 128 | "description": "Eurostat estimate" 129 | }, 130 | { 131 | "@type": "StatisticalAnnotation", 132 | "@id": "#footnote=u", 133 | "dataset": {"@id": ""}, 134 | "codeValue": "u", 135 | "description": "low reliability" 136 | }, 137 | { 138 | "@type": "StatisticalAnnotation", 139 | "@id": "#footnote=z", 140 | "dataset": {"@id": ""}, 141 | "codeValue": "z", 142 | "description": "not applicable" 143 | } 144 | ], 145 | "slice": { 146 | "@type": "DataSlice", 147 | "@id": "#metroreg_year", 148 | "dataset": {"@id": ""}, 149 | "dimension": ["#metroreg", "#year"], 150 | "measure": {"@id": "#density"}, 151 | "data": {"@id": "met_d3dens.csv"} 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /samples/eurostat/population_density/transform_d3dens.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019 Google LLC 3 | # 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE file or at 6 | # https://developers.google.com/open-source/licenses/bsd 7 | import pandas as pd 8 | 9 | 10 | # Read the file and set the index column to the metro region. 11 | df = pd.read_csv( 12 | 'http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz', 13 | delimiter='\t', 14 | index_col='metroreg\\time') 15 | 16 | # Stack the column headers into a single column's values, and make the metro 17 | # region a column again. 18 | df = df.stack().reset_index() 19 | 20 | # Rename the columns 21 | df.columns = ['metroreg', 'year', 'density'] 22 | 23 | # Strip surrounding whitespace from each value 24 | for col in df.columns: 25 | df[col] = df[col].str.strip() 26 | 27 | # Indicate that the year is an integer 28 | df['year'] = df['year'].astype(int) 29 | 30 | # Add a string-valued footnote column with default empty string. 31 | df['density*'] = '' 32 | 33 | # Split up any values with footnotes between the value and footnote columns 34 | for idx, density in df.loc[df['density'].str.contains(' '), 35 | 'density'].iteritems(): 36 | density, footnote = density.split(' ') 37 | df.loc[idx, 'density'] = density 38 | df.loc[idx, 'density*'] = ';'.join(list(footnote)) 39 | 40 | # Remove the placeholder value of ':' 41 | df.loc[df['density'] == ':', 'density'] = None 42 | 43 | # Remove rows with no density 44 | df = df[pd.notnull(df['density'])] 45 | 46 | # And write the results to a CSV file. 47 | df.to_csv('met_d3dens.csv', index=False) 48 | -------------------------------------------------------------------------------- /samples/eurostat/population_density/transform_metroreg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019 Google LLC 3 | # 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE file or at 6 | # https://developers.google.com/open-source/licenses/bsd 7 | import pandas as pd 8 | 9 | 10 | # Read the input file. 11 | df = pd.read_csv('http://dd.eionet.europa.eu/vocabulary/eurostat/metroreg/csv') 12 | 13 | # Drop irrelevant columns 14 | df = df[['Notation', 'Label']] 15 | 16 | # Rename columns 17 | df.columns = ['codeValue', 'name'] 18 | 19 | # Write output file 20 | df.to_csv('metroreg.csv', index=False) 21 | -------------------------------------------------------------------------------- /samples/eurostat/unemployment/age_groups.csv: -------------------------------------------------------------------------------- 1 | "codeValue","name@en","name@fr","name@de" 2 | "y25-74","From 25 to 74 years","De 25 à 74 ans","25 bis 74 Jahre" 3 | "y_lt25","Less than 25 years","Moins de 25 ans","Weniger als 25 Jahre" 4 | -------------------------------------------------------------------------------- /samples/eurostat/unemployment/countries.csv: -------------------------------------------------------------------------------- 1 | "codeValue","alternateName","country_group","name@en","name@fr","name@de","latitude","longitude" 2 | "at","AT","eu","Austria","Autriche","Österreich","47.6965545","13.34598005" 3 | "be","BE","eu","Belgium","Belgique","Belgien","50.501045","4.47667405" 4 | "bg","BG","eu","Bulgaria","Bulgarie","Bulgarien","42.72567375","25.4823218" 5 | "hr","HR","non-eu","Croatia","Croatie","Kroatien","44.74664297","15.34084438" 6 | "cy","CY","eu","Cyprus","Chypre","Zypern","35.129141","33.4286823" 7 | "cz","CZ","eu","Czech Republic","République tchèque","Tschechische Republik","49.803531","15.47499805" 8 | "dk","DK","eu","Denmark","Danemark","Dänemark","55.93968425","9.51668905" 9 | "ee","EE","eu","Estonia","Estonie","Estland","58.5924685","25.8069503" 10 | "fi","FI","eu","Finland","Finlande","Finnland","64.95015875","26.06756405" 11 | "fr","FR","eu","France","France","Frankreich","46.7109945","1.7185608" 12 | "de","DE","eu","Germany (including former GDR from 1991)","Allemagne (incluant l'ancienne RDA à partir de 1991)","Deutschland (einschließlich der ehemaligen DDR seit 1991)","51.16382538","10.4540478" 13 | "gr","GR","eu","Greece","Grèce","Griechenland","39.698467","21.57725572" 14 | "hu","HU","eu","Hungary","Hongrie","Ungarn","47.16116325","19.5042648" 15 | "ie","IE","eu","Ireland","Irlande","Irland","53.41526","-8.2391222" 16 | "it","IT","eu","Italy","Italie","Italien","42.504191","12.57378705" 17 | "lv","LV","eu","Latvia","Lettonie","Lettland","56.880117","24.60655505" 18 | "lt","LT","eu","Lithuania","Lituanie","Litauen","55.173687","23.9431678" 19 | "lu","LU","eu","Luxembourg","Luxembourg","Luxemburg","49.815319","6.13335155" 20 | "mt","MT","eu","Malta","Malte","Malta","35.902422","14.4474608" 21 | "nl","NL","eu","Netherlands","Pays-Bas","Niederlande","52.10811825","5.3301983" 22 | "no","NO","non-eu","Norway","Norvège","Norwegen","64.55645975","12.66576565" 23 | "pl","PL","eu","Poland","Pologne","Polen","51.91890725","19.1343338" 24 | "pt","PT","eu","Portugal","Portugal","Portugal","39.55806875","-7.84494095" 25 | "ro","RO","eu","Romania","Roumanie","Rumänien","45.94261125","24.99015155" 26 | "sk","SK","eu","Slovakia","Slovaquie","Slowakei","48.67264375","19.7000323" 27 | "si","SI","eu","Slovenia","Slovénie","Slowenien","46.14925925","14.98661705" 28 | "es","ES","eu","Spain","Espagne","Spanien","39.8950135","-2.9882957" 29 | "se","SE","eu","Sweden","Suède","Schweden","62.1984675","14.89630657" 30 | "tr","TR","non-eu","Turkey","Turquie","Türkei","38.95294205","35.43979471" 31 | "uk","GB","eu","United Kingdom","Royaume-Uni","Vereinigtes Königreich","54.315447","-2.23261195" 32 | -------------------------------------------------------------------------------- /samples/eurostat/unemployment/country_groups.csv: -------------------------------------------------------------------------------- 1 | codeValue,name@en,name@fr,name@de 2 | eu,"European Union","Union européenne","Europäische Union" 3 | non-eu,"Non EU countries","Pays hors Union européenne",Nicht-EU-Länder 4 | -------------------------------------------------------------------------------- /samples/eurostat/unemployment/eurostat-unemployment-dspl-v1.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": [ 3 | "http://schema.org", 4 | { 5 | "name": { "@container": "@language" }, 6 | "description": { "@container": "@language" }, 7 | "url": { "@container": "@language" } 8 | } 9 | ], 10 | "@type": "StatisticalDataset", 11 | "@id": "#eurostat-unemployment", 12 | "name": { 13 | "en": "Unemployment in Europe (monthly)", 14 | "de": "Arbeitslosigkeit in Europa (monatlich)", 15 | "fr": "Le Chômage en Europe (mensuel)" 16 | }, 17 | "description": { 18 | "en": "Harmonized unemployment data for European countries. This dataset was prepared by Google based on data downloaded from Eurostat.", 19 | "de": "Harmonisierte Daten zur Arbeitslosigkeit für europäische Länder. Dieser Datensatz wurde von Google aufbereitet, basierend auf online Daten von Eurostat.", 20 | "fr": "Données harmonisées sur le chômage dans les pays européens. Ces données ont été préparées par Google sur la base de données téléchargées à partir d'Eurostat." 21 | }, 22 | "url": { 23 | "en": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-en/employment_unemployment_lfs/introduction", 24 | "de": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-fr/employment_unemployment_lfs/introduction", 25 | "fr": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-de/employment_unemployment_lfs/introduction" 26 | }, 27 | "license": "https://ec.europa.eu/eurostat/about/policies/copyright", 28 | "creator":{ 29 | "@type":"Organization", 30 | "url": "https://ec.europa.eu/eurostat", 31 | "name":"Eurostat", 32 | "contactPoint": [ 33 | { 34 | "@type":"ContactPoint", 35 | "name": "Eurostat Multilingual User Support Network" 36 | "contactType": "Central Support", 37 | "telephone":"+352 4301 36789", 38 | }, 39 | { 40 | "@type":"ContactPoint", 41 | "name": "Eurostat Multilingual User Support Network" 42 | "contactType": "Republic of Ireland", 43 | "availableLanguage": "en", 44 | "telephone":"+353 151 33080", 45 | } 46 | ] 47 | }, 48 | "distribution":[ 49 | { 50 | "@type":"DataDownload", 51 | "encodingFormat":"text/tab-separated-values", 52 | "contentUrl":"https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2Fei_lmhu_m.tsv.gz" 53 | }, 54 | { 55 | "@type":"DataDownload", 56 | "encodingFormat":"application/vnd.sdmx.genericdata+xml;version=2.0", 57 | "contentUrl":"https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2Fei_lmhu_m.sdmx.zip" 58 | } 59 | ], 60 | "temporalCoverage":"1993-01/2010-12", 61 | "spatialCoverage":{ 62 | "@type":"Place", 63 | "geo":{ 64 | "@type":"GeoShape", 65 | "name": "European Union", 66 | "box":"34.633285 -10.468556 70.096054 34.597916" 67 | } 68 | }, 69 | "measure": [ 70 | { 71 | "@type": "StatisticalMeasure", 72 | "@id": "#unemployment", 73 | "sameAs": "https://www.wikidata.org/wiki/Q41171", 74 | "name": { 75 | "en": "Unemployment (monthly)", 76 | "de": "Arbeitslosigkeit (monatlich)", 77 | "fr": "Chômeurs (mensuel)" 78 | }, 79 | "description": { 80 | "en": "The total number of people unemployed", 81 | "de": "Anzahl der Arbeitslosen", 82 | "fr":" Le nombre total de chômeurs" 83 | }, 84 | "url": { 85 | "en": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=en", 86 | "de": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=de", 87 | "fr": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=fr" 88 | }, 89 | "unitCode": "IE" 90 | }, 91 | { 92 | "@type": "StatisticalMeasure", 93 | "@id": "#unemployment_rate", 94 | "sameAs": "https://www.wikidata.org/wiki/Q1787954", 95 | "name": { 96 | "en": "Unemployment rate (monthly)", 97 | "de": "Arbeitslosenquote (monatlich)", 98 | "fr": "Taux de chômage (mensuel)" 99 | }, 100 | "description": { 101 | "en": "The unemployment rate represents unemployed persons as a percentage of the labour force. The labour force is the total number of people employed and unemployed.", 102 | "de": "Die Arbeitslosenquote ist definiert als der prozentuale Anteil der Arbeitslosen an den Erwerbspersonen. Die Erwerbspersonen umfassen die Erwerbstätigen und die Arbeitslosen.", 103 | "fr": "Le taux de chômage représente le pourcentage de chômeurs dans la population active. La population active représente le nombre total des personnes ayant un emploi ou étant au chômage." 104 | }, 105 | "url": { 106 | "en": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=en", 107 | "de": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=de", 108 | "fr": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=fr" 109 | }, 110 | "unitCode": "P1" 111 | } 112 | ], 113 | "dimension": [ 114 | { 115 | "@type": "CategoricalDimension", 116 | "@id": "#country_group", 117 | "codeList": "country_groups.csv" 118 | }, 119 | { 120 | "@type": "CategoricalDimension", 121 | "@id": "#country", 122 | "codeList": "countries.csv", 123 | "equivalentType": "Country" 124 | }, 125 | { 126 | "@type": "CategoricalDimension", 127 | "@id": "#age_group", 128 | "codeList": "age_groups.csv" 129 | }, 130 | { 131 | "@type": "CategoricalDimension", 132 | "@id": "#sex", 133 | "codeList": "sexes.csv" 134 | }, 135 | { 136 | "@type": "CategoricalDimension", 137 | "@id": "#seasonality", 138 | "codeList": "seasonalities.csv" 139 | }, 140 | { 141 | "@type": "TimeDimension", 142 | "@id": "#month", 143 | "equivalentType": "xsd:gYearMonth", 144 | "datePattern": "yyyy.MM" 145 | } 146 | ], 147 | "footnote": "footnotes.csv", 148 | "slice": [ 149 | { 150 | "@type": "DataSlice", 151 | "@id": "#country_age", 152 | "dimension": [ 153 | "#country", 154 | "#age_group", 155 | "#month" 156 | ], 157 | "measure": [ 158 | "#unemployment", 159 | "#unemployment_rate" 160 | ], 161 | "observation": "country_age.csv" 162 | }, 163 | { 164 | "@type": "DataSlice", 165 | "@id": "#country_group_age", 166 | "dimension": [ 167 | "#country_group", 168 | "#age_group", 169 | "#month" 170 | ], 171 | "measure": [ 172 | "#unemployment", 173 | "#unemployment_rate" 174 | ], 175 | "observation": "country_group_age.csv" 176 | }, 177 | { 178 | "@type": "DataSlice", 179 | "@id": "#country_group_sex_age", 180 | "dimension": [ 181 | "#country_group", 182 | "#sex", 183 | "#age_group", 184 | "#month" 185 | ], 186 | "measure": [ 187 | "#unemployment", 188 | "#unemployment_rate" 189 | ], 190 | "observation": "country_group_sex_age.csv" 191 | }, 192 | { 193 | "@type": "DataSlice", 194 | "@id": "#country_group_sex", 195 | "dimension": [ 196 | "#country_group", 197 | "#sex", 198 | "#month" 199 | ], 200 | "measure": [ 201 | "#unemployment", 202 | "#unemployment_rate" 203 | ], 204 | "observation": "country_group_sex.csv" 205 | }, 206 | { 207 | "@type": "DataSlice", 208 | "@id": "#country_group_total", 209 | "dimension": [ 210 | "#country_group", 211 | "#month" 212 | ], 213 | "measure": [ 214 | "#unemployment", 215 | "#unemployment_rate" 216 | ], 217 | "observation": "country_group_total.csv" 218 | }, 219 | { 220 | "@type": "DataSlice", 221 | "@id": "#country_sex_age", 222 | "dimension": [ 223 | "#country", 224 | "#sex", 225 | "#age_group", 226 | "#month" 227 | ], 228 | "measure": [ 229 | "#unemployment", 230 | "#unemployment_rate" 231 | ], 232 | "observation": "country_sex_age.csv" 233 | }, 234 | { 235 | "@type": "DataSlice", 236 | "@id": "#country_sex", 237 | "dimension": [ 238 | "#country", 239 | "#sex", 240 | "#month" 241 | ], 242 | "measure": [ 243 | "#unemployment", 244 | "#unemployment_rate" 245 | ], 246 | "observation": "country_sex.csv" 247 | }, 248 | { 249 | "@type": "DataSlice", 250 | "@id": "#country_total", 251 | "dimension": [ 252 | "#country", 253 | "#month" 254 | ], 255 | "measure": [ 256 | "#unemployment", 257 | "#unemployment_rate" 258 | ], 259 | "observation": "country_total.csv" 260 | } 261 | ] 262 | } 263 | -------------------------------------------------------------------------------- /samples/eurostat/unemployment/footnotes.csv: -------------------------------------------------------------------------------- 1 | codeValue,description 2 | p,This value is a projection 3 | r,This value has been revised 4 | -------------------------------------------------------------------------------- /samples/eurostat/unemployment/seasonalities.csv: -------------------------------------------------------------------------------- 1 | "codeValue","name@en","name@fr","name@de" 2 | "nsa","Not seasonally adjusted data","Données non désaisonnalisées","Nichtsaisonbereinigte Daten" 3 | "sa","Seasonally adjusted data","Données désaisonnalisées","Saisonbereinigte Daten" 4 | "trend","Trend cycle","Tendance-cycle","Trend (glatte Komponente)" 5 | -------------------------------------------------------------------------------- /samples/eurostat/unemployment/sexes.csv: -------------------------------------------------------------------------------- 1 | "codeValue","name@en","name@fr","name@de" 2 | "f","Females","Femmes","Frauen" 3 | "m","Males","Hommes","Männer" 4 | -------------------------------------------------------------------------------- /samples/google/canonical/currencies.csv: -------------------------------------------------------------------------------- 1 | currency,name,symbol 2 | AED,"UAE Dirham", 3 | AFN,Afghani,؋ 4 | ALL,Lek,Lek 5 | AMD,"Armenian Dram", 6 | ANG,"Netherlands Antillian Guilder",ƒ 7 | AOA,Kwanza, 8 | ARS,"Argentine Peso",$ 9 | AUD,"Australian Dollar",$ 10 | AWG,"Aruban Guilder",ƒ 11 | AZN,"Azerbaijanian Manat",ман 12 | BAM,"Convertible Marks",KM 13 | BBD,"Barbados Dollar",$ 14 | BDT,Taka, 15 | BGN,"Bulgarian Lev",лв 16 | BHD,"Bahraini Dinar", 17 | BIF,"Burundi Franc", 18 | BMD,"Bermudian Dollar (customarily known as Bermuda Dollar)",$ 19 | BND,"Brunei Dollar",$ 20 | "BOB BOV","Boliviano Mvdol",$b 21 | BRL,"Brazilian Real",R$ 22 | BSD,"Bahamian Dollar",$ 23 | BWP,Pula,P 24 | BYR,"Belarussian Ruble",p. 25 | BZD,"Belize Dollar",BZ$ 26 | CAD,"Canadian Dollar",$ 27 | CDF,"Congolese Franc", 28 | CHF,"Swiss Franc",CHF 29 | "CLP CLF","Chilean Peso Unidades de fomento",$ 30 | CNY,"Yuan Renminbi",¥ 31 | "COP COU","Colombian Peso Unidad de Valor Real",$ 32 | CRC,"Costa Rican Colon",₡ 33 | "CUP CUC","Cuban Peso Peso Convertible",₱ 34 | CVE,"Cape Verde Escudo", 35 | CZK,"Czech Koruna",Kč 36 | DJF,"Djibouti Franc", 37 | DKK,"Danish Krone",kr 38 | DOP,"Dominican Peso",RD$ 39 | DZD,"Algerian Dinar", 40 | EEK,Kroon, 41 | EGP,"Egyptian Pound",£ 42 | ERN,Nakfa, 43 | ETB,"Ethiopian Birr", 44 | EUR,Euro,€ 45 | FJD,"Fiji Dollar",$ 46 | FKP,"Falkland Islands Pound",£ 47 | GBP,"Pound Sterling",£ 48 | GEL,Lari, 49 | GHS,Cedi, 50 | GIP,"Gibraltar Pound",£ 51 | GMD,Dalasi, 52 | GNF,"Guinea Franc", 53 | GTQ,Quetzal,Q 54 | GYD,"Guyana Dollar",$ 55 | HKD,"Hong Kong Dollar",$ 56 | HNL,Lempira,L 57 | HRK,"Croatian Kuna",kn 58 | "HTG USD","Gourde US Dollar", 59 | HUF,Forint,Ft 60 | IDR,Rupiah,Rp 61 | ILS,"New Israeli Sheqel",₪ 62 | INR,"Indian Rupee", 63 | "INR BTN","Indian Rupee Ngultrum", 64 | IQD,"Iraqi Dinar", 65 | IRR,"Iranian Rial",﷼ 66 | ISK,"Iceland Krona",kr 67 | JMD,"Jamaican Dollar",J$ 68 | JOD,"Jordanian Dinar", 69 | JPY,Yen,¥ 70 | KES,"Kenyan Shilling", 71 | KGS,Som,лв 72 | KHR,Riel,៛ 73 | KMF,"Comoro Franc", 74 | KPW,"North Korean Won",₩ 75 | KRW,Won,₩ 76 | KWD,"Kuwaiti Dinar", 77 | KYD,"Cayman Islands Dollar",$ 78 | KZT,Tenge,лв 79 | LAK,Kip,₭ 80 | LBP,"Lebanese Pound",£ 81 | LKR,"Sri Lanka Rupee",₨ 82 | LRD,"Liberian Dollar",$ 83 | LTL,"Lithuanian Litas",Lt 84 | LVL,"Latvian Lats",Ls 85 | LYD,"Libyan Dinar", 86 | MAD,"Moroccan Dirham", 87 | MDL,"Moldovan Leu", 88 | MGA,"Malagasy Ariary", 89 | MKD,Denar,ден 90 | MMK,Kyat, 91 | MNT,Tugrik,₮ 92 | MOP,Pataca, 93 | MRO,Ouguiya, 94 | MUR,"Mauritius Rupee",₨ 95 | MVR,Rufiyaa, 96 | MWK,Kwacha, 97 | "MXN MXV","Mexican Peso Mexican Unidad de Inversion (UDI)",$ 98 | MYR,"Malaysian Ringgit",RM 99 | MZN,Metical,MT 100 | NGN,Naira,₦ 101 | NIO,"Cordoba Oro",C$ 102 | NOK,"Norwegian Krone",kr 103 | NPR,"Nepalese Rupee",₨ 104 | NZD,"New Zealand Dollar",$ 105 | OMR,"Rial Omani",﷼ 106 | "PAB USD","Balboa US Dollar",B/. 107 | PEN,"Nuevo Sol",S/. 108 | PGK,Kina, 109 | PHP,"Philippine Peso",Php 110 | PKR,"Pakistan Rupee",₨ 111 | PLN,Zloty,zł 112 | PYG,Guarani,Gs 113 | QAR,"Qatari Rial",﷼ 114 | RON,"New Leu",lei 115 | RSD,"Serbian Dinar",Дин. 116 | RUB,"Russian Ruble",руб 117 | RWF,"Rwanda Franc", 118 | SAR,"Saudi Riyal",﷼ 119 | SBD,"Solomon Islands Dollar",$ 120 | SCR,"Seychelles Rupee",₨ 121 | SDG,"Sudanese Pound", 122 | SEK,"Swedish Krona",kr 123 | SGD,"Singapore Dollar",$ 124 | SHP,"Saint Helena Pound",£ 125 | SLL,Leone, 126 | SOS,"Somali Shilling",S 127 | SRD,"Surinam Dollar",$ 128 | STD,Dobra, 129 | "SVC USD","El Salvador Colon US Dollar",$ 130 | SYP,"Syrian Pound",£ 131 | SZL,Lilangeni, 132 | THB,Baht,฿ 133 | TJS,Somoni, 134 | TMT,Manat, 135 | TND,"Tunisian Dinar", 136 | TOP,Pa'anga, 137 | TRY,"Turkish Lira",TL 138 | TTD,"Trinidad and Tobago Dollar",TT$ 139 | TWD,"New Taiwan Dollar",NT$ 140 | TZS,"Tanzanian Shilling", 141 | UAH,Hryvnia,₴ 142 | UGX,"Uganda Shilling", 143 | USD,"US Dollar",$ 144 | "UYU UYI","Peso Uruguayo Uruguay Peso en Unidades Indexadas",$U 145 | UZS,"Uzbekistan Sum",лв 146 | VEF,"Bolivar Fuerte",Bs 147 | VND,Dong,₫ 148 | VUV,Vatu, 149 | WST,Tala, 150 | XAF,"CFA Franc BEAC", 151 | XAG,Silver, 152 | XAU,Gold, 153 | XBA,"Bond Markets Units European Composite Unit (EURCO)", 154 | XBB,"European Monetary Unit (E.M.U.-6)", 155 | XBC,"European Unit of Account 9(E.U.A.-9)", 156 | XBD,"European Unit of Account 17(E.U.A.-17)", 157 | XCD,"East Caribbean Dollar",$ 158 | XDR,SDR, 159 | XFU,UIC-Franc, 160 | XOF,"CFA Franc BCEAO", 161 | XPD,Palladium, 162 | XPF,"CFP Franc", 163 | XPT,Platinum, 164 | XTS,"Codes specifically reserved for testing purposes", 165 | YER,"Yemeni Rial",﷼ 166 | ZAR,Rand,R 167 | "ZAR LSL","Rand Loti", 168 | "ZAR NAD","Rand Namibia Dollar", 169 | ZMK,"Zambian Kwacha", 170 | ZWL,"Zimbabwe Dollar", 171 | -------------------------------------------------------------------------------- /samples/google/canonical/entity_order.csv: -------------------------------------------------------------------------------- 1 | entity_order 2 | ALPHA 3 | TABLE 4 | -------------------------------------------------------------------------------- /samples/google/canonical/geo.us.xml: -------------------------------------------------------------------------------- 1 | 2 | 32 | 35 | 36 | 37 | 38 | 39 | 40 | US geographical concepts 41 | 42 | 43 | Canonical concepts for US geographical data. 44 | 45 | 46 | http://code.google.com/apis/publicdata/docs/canonical/geo.us.html 47 | 48 | 49 | 50 | 51 | 52 | Google Inc. 53 | 54 | 55 | Google Inc. 56 | 57 | 58 | http://www.google.com 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | US State 67 | 68 | 69 | A US State, identified by its two letter code. 70 | 71 | 72 | States 73 | 74 | 75 | All US 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 |
85 | 86 | 87 | 88 | 89 | 90 | US 91 | 92 | 93 | states.csv 94 | 95 |
96 | 97 |
98 | -------------------------------------------------------------------------------- /samples/google/canonical/geo.xml: -------------------------------------------------------------------------------- 1 | 2 | 32 | 35 | 36 | 37 | 38 | 39 | 40 | Geographical concepts 41 | 42 | 43 | Canonical concepts for geographical data. 44 | 45 | 46 | http://code.google.com/apis/publicdata/docs/canonical/geo.html 47 | 48 | 49 | 50 | 51 | 52 | Google Inc. 53 | 54 | 55 | Google Inc. 56 | 57 | 58 | http://www.google.com 59 | 60 | 61 | 62 | 63 | 64 | 65 | Location 66 | 67 | Base concept for locations. 68 | 69 | 70 | 71 | 72 | 73 | Latitude 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | Longitude 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | Country or Territory 92 | 93 | 94 | A country or territory, identified by its ISO-3166-1 2-letter code. 95 | 96 | 97 | Countries 98 | 99 | 100 | World 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 |
109 | 110 | 111 | 112 | 113 | 114 | countries.csv 115 | 116 |
117 | 118 |
119 | -------------------------------------------------------------------------------- /samples/google/canonical/granularity.csv: -------------------------------------------------------------------------------- 1 | granularity 2 | YEARLY 3 | QUARTERLY 4 | MONTHLY 5 | WEEKLY 6 | DAILY -------------------------------------------------------------------------------- /samples/google/canonical/states.csv: -------------------------------------------------------------------------------- 1 | state,latitude,longitude,name 2 | AK,63.588753,-154.493062,Alaska 3 | AL,32.318231,-86.902298,Alabama 4 | AR,35.20105,-91.831833,Arkansas 5 | AZ,34.048928,-111.093731,Arizona 6 | CA,36.778261,-119.417932,California 7 | CO,39.550051,-105.782067,Colorado 8 | CT,41.603221,-73.087749,Connecticut 9 | DC,38.905985,-77.033418,"District of Columbia" 10 | DE,38.910832,-75.52767,Delaware 11 | FL,27.664827,-81.515754,Florida 12 | GA,32.157435,-82.907123,Georgia 13 | HI,19.898682,-155.665857,Hawaii 14 | IA,41.878003,-93.097702,Iowa 15 | ID,44.068202,-114.742041,Idaho 16 | IL,40.633125,-89.398528,Illinois 17 | IN,40.551217,-85.602364,Indiana 18 | KS,39.011902,-98.484246,Kansas 19 | KY,37.839333,-84.270018,Kentucky 20 | LA,31.244823,-92.145024,Louisiana 21 | MA,42.407211,-71.382437,Massachusetts 22 | MD,39.045755,-76.641271,Maryland 23 | ME,45.253783,-69.445469,Maine 24 | MI,44.314844,-85.602364,Michigan 25 | MN,46.729553,-94.6859,Minnesota 26 | MO,37.964253,-91.831833,Missouri 27 | MS,32.354668,-89.398528,Mississippi 28 | MT,46.879682,-110.362566,Montana 29 | NC,35.759573,-79.0193,"North Carolina" 30 | ND,47.551493,-101.002012,"North Dakota" 31 | NE,41.492537,-99.901813,Nebraska 32 | NH,43.193852,-71.572395,"New Hampshire" 33 | NJ,40.058324,-74.405661,"New Jersey" 34 | NM,34.97273,-105.032363,"New Mexico" 35 | NV,38.80261,-116.419389,Nevada 36 | NY,43.299428,-74.217933,"New York" 37 | OH,40.417287,-82.907123,Ohio 38 | OK,35.007752,-97.092877,Oklahoma 39 | OR,43.804133,-120.554201,Oregon 40 | PA,41.203322,-77.194525,Pennsylvania 41 | PR,18.220833,-66.590149,"Puerto Rico" 42 | RI,41.580095,-71.477429,"Rhode Island" 43 | SC,33.836081,-81.163725,"South Carolina" 44 | SD,43.969515,-99.901813,"South Dakota" 45 | TN,35.517491,-86.580447,Tennessee 46 | TX,31.968599,-99.901813,Texas 47 | UT,39.32098,-111.093731,Utah 48 | VA,37.431573,-78.656894,Virginia 49 | VT,44.558803,-72.577841,Vermont 50 | WA,47.751074,-120.740139,Washington 51 | WI,43.78444,-88.787868,Wisconsin 52 | WV,38.597626,-80.454903,"West Virginia" 53 | WY,43.075968,-107.290284,Wyoming 54 | -------------------------------------------------------------------------------- /samples/google/canonical/time.xml: -------------------------------------------------------------------------------- 1 | 2 | 32 | 34 | 35 | 36 | 37 | Google date and time 38 | 39 | 40 | Google date and time dataset 41 | 42 | 43 | http://code.google.com/apis/publicdata/docs/canonical/time.html 44 | 45 | 46 | 47 | Google Inc. 48 | Google Inc. 49 | http://www.google.com 50 | 51 | 52 | 53 | 54 | 55 | Point in time 56 | Point in time, with a given granularity. 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | Year date 65 | 66 | 67 | A date with yearly granularity. 68 | 69 | The year concept is usually used directly in a slice definition to define a dimension that 70 | contains year. For example, a slice for yearly population by country would be defined 71 | as follows: 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | ]]> 81 | 82 | The table definition for this slice would be defined as follows: 83 | 84 | 85 | ... 86 |
87 | 88 | 89 | 90 | 91 | country_slice.csv 92 | 93 |
94 | ... 95 | ]]> 96 | 97 | 98 | And the data contained in the CSV file for this table would look like: 99 | 100 | country, year, population 101 | AF, 1960, 9616353 102 | AF, 1961, 9799379 103 | AF, 1962, 9989846 104 | AF, 1963, 10188299 105 | ... 106 |
107 |
108 |
109 | 110 | YEARLY 111 | 112 |
113 | 114 | 115 | 116 | Quarter date 117 | 118 | 119 | A date with quarterly granularity. 120 | See the example for the year concept above. 121 | 122 | 123 | 124 | 125 | QUARTERLY 126 | 127 | 128 | 129 | 130 | 131 | 132 | Month date 133 | 134 | 135 | A date with monthly granularity. 136 | See the example for the year concept above. 137 | 138 | 139 | 140 | 141 | MONTHLY 142 | 143 | 144 | 145 | 146 | 147 | Week date 148 | 149 | 150 | A date with weekly granularity. 151 | See the example for the year concept above. 152 | 153 | 154 | 155 | 156 | WEEKLY 157 | 158 | 159 | 160 | 161 | 162 | Day date 163 | 164 | 165 | A date with daily granularity. 166 | See the example for the year concept above. 167 | 168 | 169 | 170 | 171 | DAILY 172 | 173 | 174 | 175 | 176 | 177 | Granularity 178 | 179 | 180 | Granularity of time, i.e., the uncertainty that 181 | a point in time may be anywhere within some time interval. 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 |
192 | 193 | 194 | granularity.csv 195 | 196 |
197 | 198 |
199 | -------------------------------------------------------------------------------- /samples/google/canonical/unit.xml: -------------------------------------------------------------------------------- 1 | 2 | 32 | 35 | 36 | 37 | 38 | 39 | 40 | Unit concepts 41 | 42 | 43 | Concepts for to representing units. 44 | 45 | 46 | http://code.google.com/apis/publicdata/docs/canonical/unit.html 47 | 48 | 49 | 50 | 51 | 52 | Google Inc. 53 | 54 | 55 | Google Inc. 56 | 57 | 58 | http://www.google.com 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | Unit 67 | 68 | 69 | 70 | Specifies the unit associated with a metric concept. 71 | 72 | Example: 73 | 74 | 75 | 76 | 77 | Area in square kilometers 78 | 79 | 80 | 81 | ]]> 82 | 83 | The table contains a single row that contains the property values: 84 | 85 | symbol,symbol_position,unit_text 86 | km²,END,square kilometers 87 | 88 | One can then use this unit in defining a metric concept: 89 | 90 | 91 | 92 | 93 | Country area in square kilometers 94 | 95 | 96 | 97 |
98 | ]]> 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | The symbol associated with a unit. 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | Unit text 116 | 117 | 118 | 119 | Descriptive text that can be displayed next to a value. 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | Unit symbol position 130 | 131 | 132 | Unit symbol position 133 | 134 | 135 | 136 | END 137 |
138 | 139 | 140 | 141 | 142 | 143 | Currency unit 144 | 145 | 146 | 147 | Specifies the currency associated with a metric concept. 148 | Each currency is identified by its 3-letter currency code (ISO 4217). 149 | 150 | 151 | 152 |
153 | 154 | 155 | 156 | 157 |
158 | 159 | 160 | unit_symbol_positions.csv 161 | 162 |
163 | 164 | 165 | 166 | 167 | 168 | 169 | currencies.csv 170 | 171 |
172 | 173 |
174 | -------------------------------------------------------------------------------- /samples/google/canonical/unit_symbol_positions.csv: -------------------------------------------------------------------------------- 1 | symbol_position 2 | START 3 | END 4 | -------------------------------------------------------------------------------- /samples/google/dspl-sample/countries.csv: -------------------------------------------------------------------------------- 1 | country,name,latitude,longitude 2 | AD,Andorra,42.546245,1.601554 3 | AF,Afghanistan,33.93911,67.709953 4 | AI,Anguilla,18.220554,-63.068615 5 | AL,Albania,41.153332,20.168331 6 | US,United States,37.09024,-95.712891 7 | -------------------------------------------------------------------------------- /samples/google/dspl-sample/country_slice.csv: -------------------------------------------------------------------------------- 1 | country,year,population 2 | AF,1960,9616353 3 | AF,1961,9799379 4 | AF,1962,9989846 5 | AF,1963,10188299 6 | AD,1960,8616353 7 | AD,1961,8799379 8 | AD,1962,8989846 9 | AD,1963,9188299 10 | US,1960,19616353 11 | US,1961,19799379 12 | US,1962,19989846 13 | US,1963,110188299 -------------------------------------------------------------------------------- /samples/google/dspl-sample/gender_country_slice.csv: -------------------------------------------------------------------------------- 1 | country,gender,year,population 2 | AF,M,1960,4808176 3 | AF,M,1961,4899689 4 | AF,F,1960,4808177 5 | AF,F,1961,4899690 6 | AD,M,1960,3808176 7 | AD,M,1961,3899689 8 | AD,F,1960,3808177 9 | AD,F,1961,3899690 10 | US,M,1960,9808176 11 | US,M,1961,9899689 12 | US,F,1960,9808177 13 | US,F,1961,9899690 -------------------------------------------------------------------------------- /samples/google/dspl-sample/genders.csv: -------------------------------------------------------------------------------- 1 | gender,name 2 | M,Male 3 | F,Female 4 | -------------------------------------------------------------------------------- /samples/google/dspl-sample/state_slice.csv: -------------------------------------------------------------------------------- 1 | state,year,population,unemployment_rate 2 | AL,1960,9616353,5.1 3 | AL,1961,9799379,5.2 4 | AL,1962,9989846,4.8 5 | AL,1963,10188299,6.9 6 | AK,1960,8616353,6.1 7 | AK,1961,8799379,6.2 8 | AK,1962,8989846,7.8 9 | AK,1963,9188299,7.9 -------------------------------------------------------------------------------- /samples/google/dspl-sample/states.csv: -------------------------------------------------------------------------------- 1 | state,name,latitude,longitude 2 | AL,Alabama,32.318231,-86.902298 3 | AK,Alaska,63.588753,-154.493062 4 | AR,Arkansas,35.20105,-91.831833 5 | AZ,Arizona,34.048928,-111.093731 6 | CA,California,36.778261,-119.417932 7 | CO,Colorado,39.550051,-105.782067 8 | CT,Connecticut,41.603221,-73.087749 9 | -------------------------------------------------------------------------------- /samples/us_census/retail_sales/businesses.csv: -------------------------------------------------------------------------------- 1 | "business","name","parent" 2 | "44x72","Retail and Food services", 3 | "44000","Retail services","44x72" 4 | "44100","Motor Vehicle and Parts Dealers","44000" 5 | "44200","Furniture and Home Furnishings Stores","44000" 6 | "44300","Electronics and Appliance Stores","44000" 7 | "44400","Building Material and Garden Equipment and Supplies Dealers","44000" 8 | "44500","Food and Beverage Stores","44000" 9 | "44510","Grocery Stores","44500" 10 | "44600","Health and Personal Care Stores","44000" 11 | "44700","Gasoline Stations","44000" 12 | "44800","Clothing and Clothing Accessories Stores","44000" 13 | "45100","Sporting Goods, Hobby, Book, and Music Stores","44000" 14 | "45200","General Merchandise Stores","44000" 15 | "45210","Department Stores (excluding leased department stores)","45200" 16 | "45300","Miscellaneous Store Retailers","44000" 17 | "45400","Nonstore Retailers","44000" 18 | "72200","Food Services and Drinking Places","44x72" 19 | "44xxx","Other Aggregates", 20 | "44y72","Retail and Food services (excluding motor vehicles)","44xxx" 21 | "4400a","Retail Services (excluding Motor Vehicle and Parts Dealers)","44xxx" 22 | "441x0","Auto and other Motor Vehicle","44xxx" 23 | -------------------------------------------------------------------------------- /samples/us_census/retail_sales/census-retail-sales.xml: -------------------------------------------------------------------------------- 1 | 2 | 32 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | Retail Sales in the U.S. 48 | 49 | 50 | Monthly Retail Trade and Food Services report 51 | for the United States. This dataset was prepared by Google based 52 | on data downloaded from the U.S. Census Bureau. 53 | 54 | 55 | http://www.census.gov/retail/ 56 | 57 | 58 | 59 | 60 | 61 | U.S. Census Bureau 62 | 63 | 64 | U.S. Census Bureau 65 | 66 | 67 | http://www.census.gov/retail/ 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Industry 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | Type of business 86 | 87 | 88 | The principal kind of business being conducted at an establishment 89 | 90 | 91 | Types of business 92 | 93 | 94 | 95 | 96 | 97 | TABLE 98 | 99 | 100 | 101 | 102 | Parent Business 103 | 104 | 105 | 106 | 44x72 107 | 108 | 109 | 110 | 111 | 112 | 113 | Seasonality 114 | 115 | 116 | Are values seasonally adjusted or not 117 | 118 | 119 | Seasonalities 120 | 121 | 122 | 123 | Seasonally Adjusted 124 |
125 | 126 | 127 | 128 | 129 | 130 | 131 | Retail Sales Volume 132 | 133 | 134 | Sales include merchandise sold by establishments primarily engaged in retail trade. 135 | 136 | 137 | 138 | 139 | 140 | USD 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 |
152 | 153 | 154 |
155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | businesses.csv 165 | 166 |
167 | 168 | 169 | 170 | 171 | seasonalities.csv 172 | 173 |
174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | retail_sales_business.csv 182 | 183 |
184 | 185 |
186 |
187 | -------------------------------------------------------------------------------- /samples/us_census/retail_sales/seasonalities.csv: -------------------------------------------------------------------------------- 1 | "seasonality" 2 | "Not Seasonally Adjusted" 3 | "Seasonally Adjusted" 4 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | from dspl2.expander import Dspl2JsonLdExpander 8 | from dspl2.expander import Dspl2RdfExpander 9 | from dspl2.filegetter import HybridFileGetter 10 | from dspl2.filegetter import InternetFileGetter 11 | from dspl2.filegetter import LocalFileGetter 12 | from dspl2.filegetter import UploadedFileGetter 13 | from dspl2.jsonutil import AsList 14 | from dspl2.jsonutil import GetSchemaId 15 | from dspl2.jsonutil import GetSchemaProp 16 | from dspl2.jsonutil import GetSchemaType 17 | from dspl2.jsonutil import GetUrl 18 | from dspl2.jsonutil import JsonToKwArgsDict 19 | from dspl2.jsonutil import MakeIdKeyedDict 20 | from dspl2.rdfutil import LoadGraph 21 | from dspl2.rdfutil import FrameGraph 22 | from dspl2.rdfutil import MakeSparqlSelectQuery 23 | from dspl2.rdfutil import SelectFromGraph 24 | from dspl2.validator import CheckDataset 25 | from dspl2.validator import CheckDimension 26 | from dspl2.validator import CheckMeasure 27 | from dspl2.validator import CheckSlice 28 | from dspl2.validator import CheckSliceData 29 | from dspl2.validator import CheckStatisticalDataset 30 | from dspl2.validator import ValidateDspl2 31 | 32 | __all__ = [ 33 | "AsList", 34 | "CheckDataset", 35 | "CheckDimension", 36 | "CheckMeasure", 37 | "CheckSlice", 38 | "CheckSliceData", 39 | "CheckStatisticalDataset", 40 | "Dspl2JsonLdExpander", 41 | "Dspl2RdfExpander", 42 | "FrameGraph", 43 | "GetSchemaId", 44 | "GetSchemaProp", 45 | "GetSchemaType", 46 | "GetUrl", 47 | "HybridFileGetter", 48 | "InternetFileGetter", 49 | "JsonToKwArgsDict", 50 | "LoadGraph", 51 | "LocalFileGetter", 52 | "MakeIdKeyedDict", 53 | "MakeSparqlSelectQuery", 54 | "SelectFromGraph", 55 | "UploadedFileGetter", 56 | "ValidateDspl2", 57 | ] 58 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/filegetter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | import extruct 8 | from io import StringIO 9 | import json 10 | from pathlib import Path 11 | import requests 12 | import sys 13 | from urllib.parse import urljoin, urlparse 14 | 15 | from dspl2.rdfutil import LoadGraph, SelectFromGraph 16 | 17 | 18 | def _ProcessDspl2File(filename, fileobj, *, type=''): 19 | if any([filename.endswith('.html'), 20 | type.startswith('text/html')]): 21 | data = extruct.extract(fileobj.read(), uniform='True') 22 | return LoadGraph({ 23 | '@context': 'http://schema.org', 24 | '@graph': [ 25 | subdata_elem 26 | for subdata in data.values() 27 | for subdata_elem in subdata 28 | if subdata 29 | ] 30 | }, filename) 31 | if any([filename.endswith('.json'), 32 | filename.endswith('.jsonld'), 33 | type.startswith('application/ld+json')]): 34 | json_val = json.load(fileobj) 35 | return LoadGraph(json_val, filename) 36 | 37 | 38 | class UploadedFileGetter(object): 39 | def __init__(self, files): 40 | json_files = set() 41 | self.graph = None 42 | self.file_map = {} 43 | for f in files: 44 | self.file_map[f.filename] = f 45 | data = _ProcessDspl2File(f.filename, f.stream) 46 | if data: 47 | json_files.add(f.filename) 48 | self.base = f.filename 49 | self.graph = data 50 | if not self.graph: 51 | raise RuntimeError("DSPL 2 file not present in {}".format( 52 | [file.filename for file in self.file_map.values()])) 53 | if len(json_files) > 1: 54 | raise RuntimeError("Multiple DSPL 2 files present: {}".format(json_files)) 55 | 56 | def Fetch(self, filename): 57 | f = self.file_map.get(filename) 58 | if not f: 59 | raise IOError(None, 'File not found', filename) 60 | f.stream.seek(0) 61 | return StringIO(f.read().decode('utf-8')) 62 | 63 | 64 | class InternetFileGetter(object): 65 | def __init__(self, url): 66 | self.base = url 67 | r = requests.get(self.base) 68 | r.raise_for_status() 69 | self.graph = _ProcessDspl2File(url, StringIO(r.text), type=r.headers['content-type']) 70 | 71 | def Fetch(self, filename): 72 | r = requests.get(urljoin(self.base, filename)) 73 | r.raise_for_status() 74 | return StringIO(r.text) 75 | 76 | 77 | class LocalFileGetter(object): 78 | def __init__(self, path): 79 | self.base = urlparse(path).path 80 | with Path(self.base).open() as f: 81 | self.graph = _ProcessDspl2File(path, f) 82 | 83 | def Fetch(self, filename): 84 | filename = urlparse(filename).path 85 | path = Path(self.base).parent.joinpath(Path(filename)).resolve() 86 | return path.open() 87 | 88 | 89 | class HybridFileGetter(object): 90 | @staticmethod 91 | def _load_file(base, rel=None): 92 | uri = urlparse(base) 93 | if rel: 94 | uri = urlparse(urljoin(base, rel)) 95 | if not uri.scheme or uri.scheme == 'file': 96 | return Path(uri.path).open() 97 | elif uri.scheme == 'http' or uri.scheme == 'https': 98 | r = requests.get(uri) 99 | r.raise_for_status() 100 | return StringIO(r.text) 101 | 102 | def __init__(self, json_uri): 103 | self.base = json_uri 104 | self.graph = _ProcessDspl2File( 105 | json_uri, 106 | HybridFileGetter._load_file(json_uri)) 107 | 108 | def Fetch(self, uri): 109 | return HybridFileGetter._load_file(self.base, uri) 110 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/jsonutil.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | 8 | def AsList(val): 9 | """Ensures the JSON-LD object is a list.""" 10 | if isinstance(val, list): 11 | return val 12 | elif val is None: 13 | return [] 14 | else: 15 | return [val] 16 | 17 | 18 | def GetSchemaProp(obj, key, default=None): 19 | try: 20 | return obj.get(key, obj.get('schema:' + key, default)) 21 | except AttributeError as e: 22 | raise RuntimeError(f"Unable to find key '{key}' in {obj}") from e 23 | 24 | 25 | def JsonToKwArgsDict(json_val): 26 | """Turn a StatisticalDataset object into a kwargs dict for a Jinja2 template. 27 | 28 | Specifically, this collects top-level dataset metadata under a "dataset" key, 29 | and keeps dimensions, measures, footnotes, and slices as they are. 30 | """ 31 | ret = {'dataset': {}} 32 | special_keys = {'dimension', 'measure', 'footnote', 'slice'} 33 | for key in json_val: 34 | if key in special_keys: 35 | ret[key] = GetSchemaProp(json_val, key) 36 | else: 37 | ret['dataset'][key] = GetSchemaProp(json_val, key) 38 | return ret 39 | 40 | 41 | def MakeIdKeyedDict(vals): 42 | """Returns a dict mapping objects' IDs to objects in the provided list. 43 | 44 | Given a list of JSON-LD objects, return a dict mapping each element's ID to the 45 | element. 46 | 47 | Parameters: 48 | vals (list): list of JSON-LD objects with IDs as dicts 49 | 50 | Returns 51 | dict:dict whose values are elements of `vals` and whose keys are their IDs. 52 | """ 53 | ret = {} 54 | for val in vals: 55 | id = GetSchemaProp(val, '@id') 56 | if id: 57 | ret[id] = val 58 | return ret 59 | 60 | 61 | def GetSchemaId(obj): 62 | return obj.get('@id', GetSchemaProp(obj, 'id')) 63 | 64 | 65 | def GetSchemaType(obj): 66 | return obj.get('@type', GetSchemaProp(obj, 'type')) 67 | 68 | 69 | def GetUrl(obj): 70 | if isinstance(obj, str): 71 | return obj 72 | elif isinstance(obj, dict): 73 | return GetSchemaId(obj) 74 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/rdfutil.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | import json 8 | from pathlib import Path 9 | from pyld import jsonld 10 | from rdflib import Graph, Namespace 11 | from rdflib.serializer import Serializer 12 | import sys 13 | 14 | from dspl2.jsonutil import AsList 15 | 16 | 17 | SCHEMA = Namespace('http://schema.org/') 18 | 19 | 20 | _Schema = {} 21 | _Context = {} 22 | _DataFileFrame = { 23 | '@context': [_Context, {'schema': 'http://schema.org/'}], 24 | '@type': 'StatisticalDataset', 25 | } 26 | _FullFrame = { 27 | '@context': [_Context, {'schema': 'http://schema.org/'}], 28 | '@type': 'StatisticalDataset', 29 | 'slice': { 30 | 'dimension': { 31 | '@embed': '@never' 32 | }, 33 | 'measure': { 34 | '@embed': '@never' 35 | }, 36 | 'tableMapping': { 37 | 'sourceEntity': { 38 | '@embed': '@never' 39 | } 40 | }, 41 | 'data': { 42 | 'dimensionValue': { 43 | 'dimension': { 44 | '@embed': '@never' 45 | } 46 | }, 47 | 'measureValue': { 48 | 'measure': { 49 | '@embed': '@never' 50 | }, 51 | 'footnote': { 52 | '@embed': '@never' 53 | } 54 | } 55 | } 56 | } 57 | } 58 | _Initialized = False 59 | _Module_path = Path(__file__).parent 60 | _RdfPrefixes = { 61 | 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 62 | 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#', 63 | 'schema': 'http://schema.org/', 64 | } 65 | 66 | 67 | def _Init(context, schema): 68 | global _Context, _Schema, _Initialized 69 | if not _Initialized: 70 | with schema.open() as schema: 71 | _Schema.update(json.load(schema)) 72 | with context.open() as context: 73 | _Context.update(json.load(context)) 74 | del _Context['@context']['id'] 75 | del _Context['@context']['type'] 76 | _Initialized = True 77 | 78 | 79 | def _LoadJsonLd(json_val, public_id): 80 | _Init(_Module_path / 'schema' / 'jsonldcontext.json', 81 | _Module_path / 'schema' / 'schema.jsonld') 82 | json_val['@context'] = _Context 83 | graph = Graph().parse( 84 | data=json.dumps(json_val).encode('utf-8'), 85 | format='json-ld', 86 | publicID=public_id 87 | ) 88 | return graph 89 | 90 | 91 | def LoadGraph(input, public_id): 92 | if isinstance(input, dict): 93 | data = input 94 | elif isinstance(input, str): 95 | data = json.loads(input) 96 | else: 97 | data = json.load(input) 98 | 99 | return _LoadJsonLd(data, public_id) 100 | 101 | 102 | def FrameGraph(graph, frame=_FullFrame): 103 | serialized = graph.serialize(format='json-ld') 104 | json_val = json.loads(serialized) 105 | json_val = { 106 | '@context': _Context, 107 | '@graph': AsList(json_val) 108 | } 109 | framed = jsonld.frame(json_val, frame, {'embed': '@always'}) 110 | framed['@context'] = 'http://schema.org' 111 | for items in framed['@graph']: 112 | framed.update(items) 113 | del framed['@graph'] 114 | return framed 115 | 116 | 117 | def _N3(obj, namespace_manager): 118 | if isinstance(obj, str): 119 | return obj 120 | return obj.n3(namespace_manager=namespace_manager) 121 | 122 | 123 | def MakeSparqlSelectQuery(*constraints, 124 | ns_manager=None, 125 | rdf_prefixes=_RdfPrefixes): 126 | ret = '' 127 | for prefix, url in rdf_prefixes.items(): 128 | ret += f'PREFIX {prefix}: <{url}>\n' 129 | ret += 'SELECT * WHERE {\n' 130 | for constraint in constraints: 131 | sub, pred, obj = (_N3(field, ns_manager) 132 | for field in constraint) 133 | ret += f' {sub} {pred} {obj} .\n' 134 | ret += '}' 135 | return ret 136 | 137 | 138 | def SelectFromGraph(graph, *constraints): 139 | result = graph.query( 140 | MakeSparqlSelectQuery( 141 | *constraints, 142 | ns_manager=graph.namespace_manager)) 143 | return list({str(k): str(v) 144 | for k, v in binding.items()} 145 | for binding in result.bindings) 146 | 147 | 148 | def main(args, context, schema): 149 | with open(args[1]) as f: 150 | normalized = FrameGraph(LoadGraph(f, args[1])) 151 | json.dump(normalized, sys.stdout, indent=2) 152 | 153 | 154 | if __name__ == '__main__': 155 | main(sys.argv) 156 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/templates/choose.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | DSPL 2 Viewer 4 | 5 | 6 | 7 |

DSPL 2 Viewer

8 |

Dataset

9 |
10 | 11 | 12 |
13 | 14 | 15 |
16 | Process as RDF (slow) 17 |
18 | 19 | 20 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/templates/display.html: -------------------------------------------------------------------------------- 1 | {% from 'render.html' import render %} 2 | 3 | 4 | DSPL 2 Viewer 5 | 8 | 9 | 10 |

DSPL 2 Viewer

11 |

Dataset

12 |

Dimensions

13 |

Measures

14 |

Footnotes

15 |

Slices

16 |
17 | {{render(dataset)}} 18 |
19 | 27 | 35 | {% if footnote %} 36 | 44 | {% endif %} 45 | 53 | 54 | 55 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/templates/error.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | DSPL 2 Viewer 4 | 5 | 6 | 7 |

DSPL 2 Viewer

8 |

Error fetching dataset

9 |
10 |
11 | {% if url %} 12 | Error {{action}} {{url}}
13 | {% endif %} 14 |
15 |
16 | {% if status %} 17 | HTTP status {{status}}: 18 | {% endif %} 19 | {% if text %} 20 | {{text}} 21 | {% endif %} 22 |
23 | 24 | 25 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/templates/render.html: -------------------------------------------------------------------------------- 1 | {% macro render(obj) %} 2 | {% if obj.items %} 3 | 4 | {% for key, val in obj|dictsort %} 5 | 6 | 7 | 8 | 9 | {% endfor %} 10 |
{{key}}{{ render(val) }}
11 | {% elif obj.append %} 12 | {% for val in obj %} 13 | 14 | 15 | 18 | 19 | {% endfor %} 20 |
16 | {{ render(val) }} 17 |
21 | {% elif obj and obj.startswith and (obj.startswith('https://') or obj.startswith('http://')) %} 22 | {{obj}} 23 | {% else %} 24 | {{obj}} 25 | {% endif %} 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/templates/viewer.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: sans-serif; 3 | } 4 | 5 | h2 { 6 | display: inline-block; 7 | padding: 1ex; 8 | border: 1px solid; 9 | margin: 0px; 10 | } 11 | 12 | h2.active { 13 | background-color: yellow; 14 | box-shadow: 2px 2px gray; 15 | } 16 | 17 | table { 18 | border-collapse: collapse; 19 | background-color: white; 20 | width: 100% 21 | } 22 | 23 | table,th,td { 24 | border: 1px solid; 25 | } 26 | 27 | td { 28 | vertical-align: top; 29 | } 30 | 31 | tr:nth-child(even) {background-color: #f2f2f2;} 32 | 33 | td:first-child { 34 | width:10%; 35 | } 36 | 37 | .hidden { 38 | display: none; 39 | } 40 | 41 | td:first-child.closed::before { 42 | content: "▶️"; 43 | color: red; 44 | } 45 | td:first-child.open::before { 46 | content: "🔻"; 47 | color: red;} 48 | } 49 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/templates/viewer.js: -------------------------------------------------------------------------------- 1 | for (var td of document.querySelectorAll('td:first-child')) { 2 | var sibling = td.nextElementSibling; 3 | if (sibling) { 4 | if (sibling.querySelector('table')) { 5 | if (sibling.children.length < 20) { 6 | td.classList.toggle('open'); 7 | } else { 8 | td.classList.toggle('closed'); 9 | sibling.classList.toggle('hidden'); 10 | } 11 | td.addEventListener('click', (ev) => { 12 | ev.target.classList.toggle('open'); 13 | ev.target.classList.toggle('closed'); 14 | ev.target.nextElementSibling.classList.toggle('hidden'); 15 | }); 16 | } 17 | } 18 | } 19 | 20 | function onclick(ev) { 21 | document.querySelectorAll('h2').forEach((elt) => { 22 | elt.classList.remove('active'); 23 | }); 24 | ev.target.classList.add('active'); 25 | 26 | document.querySelectorAll('div').forEach((elt) => { 27 | elt.classList.add('hidden'); 28 | }); 29 | document.querySelector('div#'+ev.target.textContent.trim().toLowerCase()).classList.remove('hidden'); 30 | } 31 | 32 | document.querySelectorAll('h2').forEach((elt) => { 33 | elt.addEventListener('click', onclick); 34 | }); 35 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/tools/dspl2/dspl2/tests/__init__.py -------------------------------------------------------------------------------- /tools/dspl2/dspl2/tests/test_expander.py: -------------------------------------------------------------------------------- 1 | from dspl2.expander import Dspl2JsonLdExpander, Dspl2RdfExpander 2 | from dspl2.rdfutil import SCHEMA 3 | from io import StringIO 4 | import rdflib 5 | import unittest 6 | 7 | 8 | class DummyGetter(object): 9 | def __init__(self, graph): 10 | self.graph = graph 11 | self.data = {} 12 | 13 | def Set(self, filename, data): 14 | self.data[filename] = StringIO(data) 15 | 16 | def Fetch(self, filename): 17 | return self.data.get(filename, StringIO('')) 18 | 19 | 20 | class ExpanderTests(unittest.TestCase): 21 | def test_Dspl2RdfExpander_ExpandDimensionValue(self): 22 | graph = rdflib.Graph() 23 | getter = DummyGetter(graph) 24 | expander = Dspl2RdfExpander(getter) 25 | dim = rdflib.URIRef('http://foo.invalid/test.json#dim') 26 | equiv_types = [SCHEMA.Place] 27 | row = { 28 | 'codeValue': 'cv', 29 | 'key1': 'val1', 30 | 'key2': 'val2', 31 | } 32 | row_id = rdflib.URIRef(str(dim) + '=' + row['codeValue']) 33 | expander._ExpandDimensionValue(dim, equiv_types, row_id, row) 34 | self.assertEqual(set(graph.objects(subject=dim, predicate=SCHEMA.codeList)), 35 | {row_id}) 36 | self.assertEqual(set(graph.objects(subject=row_id, 37 | predicate=rdflib.RDF.type)), 38 | {SCHEMA.DimensionValue, SCHEMA.Place}) 39 | self.assertEqual(set(graph.objects(subject=row_id, 40 | predicate=rdflib.RDF.type)), 41 | {SCHEMA.DimensionValue, SCHEMA.Place}) 42 | self.assertEqual(set(graph.objects(subject=row_id, predicate=SCHEMA.key1)), 43 | {rdflib.Literal('val1')}) 44 | self.assertEqual(set(graph.objects(subject=row_id, predicate=SCHEMA.key2)), 45 | {rdflib.Literal('val2')}) 46 | self.assertEqual(set(graph.objects(subject=row_id, 47 | predicate=SCHEMA.codeValue)), 48 | {rdflib.Literal('cv')}) 49 | 50 | def test_Dspl2RdfExpander_ExpandFootnotes(self): 51 | graph = rdflib.Graph() 52 | dim = rdflib.URIRef('#ds') 53 | graph.add((dim, rdflib.RDF.type, SCHEMA.StatisticalDataset)) 54 | graph.add((dim, SCHEMA.footnote, rdflib.Literal('foo'))) 55 | getter = DummyGetter(graph) 56 | getter.Set('foo', 'codeValue,name,description\np,predicted,Value is predicted rather than measured.\n') 57 | expander = Dspl2RdfExpander(getter) 58 | expander._ExpandFootnotes() 59 | for triple in graph: 60 | print(triple) 61 | footnote_id = rdflib.URIRef('#footnote=p') 62 | self.assertEqual(set(graph.objects(subject=dim, 63 | predicate=SCHEMA.footnote)), 64 | {footnote_id}) 65 | self.assertEqual(set(graph.objects(subject=footnote_id, 66 | predicate=SCHEMA.description)), 67 | {rdflib.term.Literal('Value is predicted rather than measured.')}) 68 | self.assertEqual(set(graph.objects(subject=footnote_id, 69 | predicate=SCHEMA.name)), 70 | {rdflib.term.Literal('predicted')}) 71 | self.assertEqual(set(graph.objects(subject=footnote_id, 72 | predicate=rdflib.RDF.type)), 73 | {SCHEMA.StatisticalAnnotation}) 74 | self.assertEqual(set(graph.objects(subject=footnote_id, 75 | predicate=SCHEMA.codeValue)), 76 | {rdflib.term.Literal('p')}) 77 | 78 | def test_Dspl2RdfExpander_ExpandSliceData(self): 79 | pass 80 | 81 | def test_Dspl2JsonLdExpander_ExpandCodeList(self): 82 | pass 83 | 84 | def test_Dspl2JsonLdExpander_ExpandFootnotes(self): 85 | pass 86 | 87 | def test_Dspl2JsonLdExpander_ExpandSliceData(self): 88 | pass 89 | 90 | 91 | if __name__ == '__main__': 92 | unittest.main() 93 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/tests/test_jsonutil.py: -------------------------------------------------------------------------------- 1 | from dspl2.jsonutil import (AsList, GetSchemaProp, JsonToKwArgsDict, 2 | MakeIdKeyedDict, GetSchemaId, GetSchemaType, GetUrl) 3 | import unittest 4 | 5 | 6 | class JsonUtilTests(unittest.TestCase): 7 | def test_AsList(self): 8 | self.assertEqual(AsList(None), []) 9 | self.assertEqual(AsList([]), []) 10 | self.assertEqual(AsList([1]), [1]) 11 | self.assertEqual(AsList(1), [1]) 12 | 13 | def test_GetSchemaProp(self): 14 | self.assertEqual(GetSchemaProp({'id': 'val'}, 'id'), 'val') 15 | self.assertEqual(GetSchemaProp({'schema:id': 'val'}, 'id'), 'val') 16 | 17 | def test_JsonToKwArgsDict(self): 18 | self.assertEqual(JsonToKwArgsDict({'id': 'val'}), {'dataset': {'id': 'val'}}) 19 | self.assertEqual(JsonToKwArgsDict({}), {'dataset': {}}) 20 | 21 | def test_MakeIdKeyedDict(self): 22 | objs = [{'@id': '1'}, {'@id': '2'}] 23 | lookup = MakeIdKeyedDict(objs) 24 | self.assertEqual(lookup['1'], {'@id': '1'}) 25 | self.assertEqual(lookup['2'], {'@id': '2'}) 26 | 27 | def test_GetSchemaId(self): 28 | self.assertEqual(GetSchemaId({'@id': 'val'}), 'val') 29 | self.assertEqual(GetSchemaId({'id': 'val'}), 'val') 30 | self.assertEqual(GetSchemaId({'schema:id': 'val'}), 'val') 31 | 32 | def test_GetSchemaType(self): 33 | self.assertEqual(GetSchemaType({'@type': 'val'}), 'val') 34 | self.assertEqual(GetSchemaType({'type': 'val'}), 'val') 35 | self.assertEqual(GetSchemaType({'schema:type': 'val'}), 'val') 36 | 37 | def test_GetUrl(self): 38 | self.assertEqual(GetUrl({'@id': 'val'}), 'val') 39 | self.assertEqual(GetUrl('val'), 'val') 40 | 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /tools/dspl2/dspl2/tests/test_rdfutil.py: -------------------------------------------------------------------------------- 1 | from dspl2.rdfutil import (LoadGraph, FrameGraph, SelectFromGraph) 2 | from io import StringIO 3 | import json 4 | import rdflib 5 | import rdflib.compare 6 | import unittest 7 | 8 | 9 | _SampleJson = '''{ 10 | "@context": "http://schema.org", 11 | "@type": "StatisticalDataset", 12 | "@id": "", 13 | "url": "https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w", 14 | "identifier": "met_d3dens", 15 | "name": "Eurostat Population Density", 16 | "description": "Population density by metropolitan regions", 17 | "dateCreated": "2015-10-16", 18 | "dateModified": "2019-06-18", 19 | "temporalCoverage": "1990-01-01/2016-01-01", 20 | "distribution": { 21 | "@type": "DataDownload", 22 | "contentUrl": "http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true", 23 | "encodingFormat": "text/tab-separated-values" 24 | }, 25 | "spatialCoverage":{ 26 | "@type":"Place", 27 | "geo":{ 28 | "@type":"GeoShape", 29 | "name": "European Union", 30 | "box":"34.633285 -10.468556 70.096054 34.597916" 31 | } 32 | }, 33 | "license": "https://ec.europa.eu/eurostat/about/policies/copyright", 34 | "creator":{ 35 | "@type":"Organization", 36 | "url": "https://ec.europa.eu/eurostat", 37 | "name":"Eurostat" 38 | }, 39 | "publisher": { 40 | "@type": "Organization", 41 | "name": "Eurostat", 42 | "url": "https://ec.europa.eu/eurostat", 43 | "contactPoint": { 44 | "@type": "ContactPoint", 45 | "contactType": "User Support", 46 | "url": "https://ec.europa.eu/eurostat/help/support" 47 | } 48 | } 49 | }''' 50 | 51 | 52 | class RdfUtilTests(unittest.TestCase): 53 | def test_LoadGraph(self): 54 | graph1 = LoadGraph(_SampleJson, '') 55 | graph2 = LoadGraph(json.loads(_SampleJson), '') 56 | graph3 = LoadGraph(StringIO(_SampleJson), '') 57 | self.assertTrue(rdflib.compare.isomorphic(graph1, graph2)) 58 | self.assertTrue(rdflib.compare.isomorphic(graph1, graph3)) 59 | 60 | def test_FrameGraph(self): 61 | json_val = FrameGraph(LoadGraph(_SampleJson, '')) 62 | self.assertEqual(json_val['@context'], 'http://schema.org') 63 | self.assertEqual(json_val['@type'], 'StatisticalDataset') 64 | self.assertEqual(json_val['url'], 'https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w') 65 | self.assertEqual(json_val['identifier'], 'met_d3dens') 66 | self.assertEqual(json_val['name'], 'Eurostat Population Density') 67 | self.assertEqual(json_val['description'], 'Population density by metropolitan regions') 68 | self.assertEqual(json_val['dateCreated'], '2015-10-16') 69 | self.assertEqual(json_val['dateModified'], '2019-06-18') 70 | self.assertEqual(json_val['temporalCoverage'], '1990-01-01/2016-01-01') 71 | self.assertEqual(json_val['distribution']['@type'], 'DataDownload') 72 | self.assertEqual(json_val['distribution']['contentUrl'], 'http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true') 73 | self.assertEqual(json_val['distribution']['encodingFormat'], 'text/tab-separated-values') 74 | self.assertEqual(json_val['spatialCoverage']['@type'], "Place") 75 | self.assertEqual(json_val['spatialCoverage']['geo']['@type'], "GeoShape") 76 | self.assertEqual(json_val['spatialCoverage']['geo']['name'], 'European Union') 77 | self.assertEqual(json_val['spatialCoverage']['geo']['box'], '34.633285 -10.468556 70.096054 34.597916') 78 | self.assertEqual(json_val['license'], 'https://ec.europa.eu/eurostat/about/policies/copyright') 79 | self.assertEqual(json_val['creator']['@type'], "Organization") 80 | self.assertEqual(json_val['creator']['url'], 'https://ec.europa.eu/eurostat') 81 | self.assertEqual(json_val['creator']['name'], 'Eurostat') 82 | self.assertEqual(json_val['publisher']['@type'], 'Organization') 83 | self.assertEqual(json_val['publisher']['name'], 'Eurostat') 84 | self.assertEqual(json_val['publisher']['url'], 'https://ec.europa.eu/eurostat') 85 | self.assertEqual(json_val['publisher']['contactPoint']['@type'], 'ContactPoint') 86 | self.assertEqual(json_val['publisher']['contactPoint']['contactType'], 'User Support') 87 | self.assertEqual(json_val['publisher']['contactPoint']['url'], 'https://ec.europa.eu/eurostat/help/support') 88 | 89 | def test_SelectFromGraph(self): 90 | graph = LoadGraph(_SampleJson, '') 91 | results = list(SelectFromGraph( 92 | graph, 93 | ('?ds', 'rdf:type', 'schema:StatisticalDataset'), 94 | ('?ds', 'schema:name', '?name'))) 95 | self.assertEqual(len(results), 1) 96 | self.assertEqual(results[0]['name'], 'Eurostat Population Density') 97 | 98 | 99 | if __name__ == '__main__': 100 | unittest.main() 101 | -------------------------------------------------------------------------------- /tools/dspl2/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py 2 | extruct 3 | flask 4 | pyicu 5 | jinja2 6 | pandas 7 | pyld 8 | rdflib 9 | rdflib-jsonld 10 | requests 11 | -------------------------------------------------------------------------------- /tools/dspl2/scripts/dspl2-expand.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | # Copyright 2018 Google LLC 3 | # 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE file or at 6 | # https://developers.google.com/open-source/licenses/bsd 7 | 8 | from absl import app 9 | from absl import flags 10 | from dspl2 import (Dspl2RdfExpander, Dspl2JsonLdExpander, FrameGraph, 11 | LocalFileGetter) 12 | import json 13 | import sys 14 | 15 | 16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.') 17 | 18 | 19 | def main(args): 20 | if len(args) != 2: 21 | print(f'Usage: {args[0]} [DSPL file]', file=sys.stderr) 22 | exit(1) 23 | getter = LocalFileGetter(args[1]) 24 | if flags.FLAGS.rdf: 25 | graph = Dspl2RdfExpander(getter).Expand() 26 | dspl = FrameGraph(getter.graph) 27 | else: 28 | dspl = Dspl2JsonLdExpander(getter).Expand() 29 | json.dump(dspl, sys.stdout, indent=2) 30 | 31 | 32 | if __name__ == '__main__': 33 | app.run(main) 34 | -------------------------------------------------------------------------------- /tools/dspl2/scripts/dspl2-pretty-print-server.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | # Copyright 2018 Google LLC 3 | # 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE file or at 6 | # https://developers.google.com/open-source/licenses/bsd 7 | 8 | from flask import Flask, request, render_template 9 | import json 10 | from pathlib import Path 11 | import requests 12 | 13 | import dspl2 14 | from dspl2 import ( 15 | Dspl2JsonLdExpander, Dspl2RdfExpander, InternetFileGetter, 16 | JsonToKwArgsDict, LoadGraph, FrameGraph, UploadedFileGetter) 17 | 18 | 19 | def _Display(template, json_val): 20 | return render_template(template, **JsonToKwArgsDict(json_val)) 21 | 22 | 23 | template_dir = Path(dspl2.__file__).parent / 'templates' 24 | app = Flask('dspl2-viewer', template_folder=template_dir.as_posix()) 25 | 26 | @app.route('/') 27 | def Root(): 28 | return render_template('choose.html') 29 | 30 | 31 | @app.route('/render', methods=['GET', 'POST']) 32 | def _HandleUploads(): 33 | try: 34 | rdf = request.args.get('rdf') == 'on' 35 | url = request.args.get('url') 36 | if request.method == 'POST': 37 | files = request.files.getlist('files[]') 38 | getter = UploadedFileGetter(files) 39 | else: 40 | if not url: 41 | return render_template('error.html', 42 | message="No URL provided") 43 | getter = InternetFileGetter(url) 44 | if rdf: 45 | graph = Dspl2RdfExpander(getter).Expand() 46 | json_val = FrameGraph(graph) 47 | else: 48 | json_val = Dspl2JsonLdExpander(getter).Expand() 49 | return _Display('display.html', json_val) 50 | except json.JSONDecodeError as e: 51 | return render_template('error.html', 52 | action="decoding", 53 | url=e.doc or url, 54 | text=str(e)) 55 | except IOError as e: 56 | return render_template('error.html', 57 | action="loading", 58 | url=e.filename, 59 | text=str(e)) 60 | except RuntimeError as e: 61 | return render_template('error.html', 62 | text=str(e)) 63 | except requests.exceptions.HTTPError as e: 64 | return render_template('error.html', 65 | url=url, 66 | action="retrieving", 67 | status=e.response.status_code, 68 | text=e.response.text) 69 | except requests.exceptions.RequestException as e: 70 | return render_template('error.html', 71 | url=url, 72 | action="retrieving", 73 | text=str(e)) 74 | except Exception as e: 75 | return render_template('error.html', 76 | action="processing", 77 | url=url, 78 | text=str(type(e)) + str(e)) 79 | 80 | 81 | if __name__ == '__main__': 82 | app.run() 83 | -------------------------------------------------------------------------------- /tools/dspl2/scripts/dspl2-pretty-print.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | from absl import app 8 | from absl import flags 9 | import dspl2 10 | import jinja2 11 | from pathlib import Path 12 | import sys 13 | 14 | 15 | FLAGS = flags.FLAGS 16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.') 17 | 18 | 19 | def _RenderLocalDspl2(path, rdf): 20 | template_dir = Path(dspl2.__file__).parent / 'templates' 21 | env = jinja2.Environment(loader=jinja2.FileSystemLoader( 22 | template_dir.as_posix())) 23 | try: 24 | print("Loading template") 25 | template = env.get_template('display.html') 26 | print("Loading DSPL2") 27 | getter = dspl2.LocalFileGetter(path) 28 | print("Expanding DSPL2") 29 | if rdf: 30 | graph = dspl2.Dspl2RdfExpander(getter).Expand() 31 | print("Framing DSPL2") 32 | json_val = dspl2.FrameGraph(graph) 33 | else: 34 | json_val = dspl2.Dspl2JsonLdExpander(getter).Expand() 35 | print("Rendering template") 36 | return template.render(**dspl2.JsonToKwArgsDict(json_val)) 37 | except Exception as e: 38 | raise 39 | template = loader.load(env, 'error.html') 40 | return template.render(action="processing", 41 | url=path, 42 | text=str(type(e)) + ": " + str(e)) 43 | 44 | 45 | def main(argv): 46 | if len(argv) != 3: 47 | print(f'Usage: {argv[0]} [input.json] [output.html]', file=sys.stderr) 48 | exit(1) 49 | with open(argv[2], 'w') as f: 50 | print(_RenderLocalDspl2(argv[1], FLAGS.rdf), file=f) 51 | 52 | 53 | if __name__ == '__main__': 54 | app.run(main) 55 | -------------------------------------------------------------------------------- /tools/dspl2/scripts/dspl2-validate.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | # Copyright 2018 Google LLC 3 | # 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE file or at 6 | # https://developers.google.com/open-source/licenses/bsd 7 | 8 | from absl import app 9 | from absl import flags 10 | from dspl2 import (Dspl2JsonLdExpander, Dspl2RdfExpander, LocalFileGetter, 11 | FrameGraph, LoadGraph, ValidateDspl2) 12 | import sys 13 | 14 | 15 | FLAGS = flags.FLAGS 16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.') 17 | 18 | 19 | def main(args): 20 | if len(args) != 2: 21 | print(f'Usage: {args[0]} [DSPL file]', file=sys.stderr) 22 | exit(1) 23 | getter = LocalFileGetter(args[1]) 24 | if flags.FLAGS.rdf: 25 | graph = Dspl2RdfExpander(getter).Expand() 26 | dspl = FrameGraph(getter.graph) 27 | else: 28 | dspl = Dspl2JsonLdExpander(getter).Expand() 29 | warnings = ValidateDspl2(dspl, getter) 30 | for warning in warnings: 31 | print(warning) 32 | 33 | 34 | if __name__ == '__main__': 35 | app.run(main) 36 | -------------------------------------------------------------------------------- /tools/dspl2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Use of this source code is governed by a BSD-style 4 | # license that can be found in the LICENSE file or at 5 | # https://developers.google.com/open-source/licenses/bsd 6 | 7 | import setuptools 8 | 9 | setuptools.setup( 10 | name="dspl2", 11 | version="0.0.1", 12 | author="Natarajan Krishnaswami", 13 | author_email="nkrishnaswami@google.com", 14 | description="DSPL 2.0 tools", 15 | url="https://github.com/google/dspl", 16 | packages=setuptools.find_packages(), 17 | classifiers=[ 18 | "Programming Language :: Python :: 3", 19 | "OSI Approved :: BSD License", 20 | "Operating System :: OS Independent", 21 | ], 22 | package_data={ 23 | 'dspl2': ['templates/*', 'schema/*'], 24 | }, 25 | scripts=[ 26 | 'scripts/dspl2-expand.py', 27 | 'scripts/dspl2-pretty-print.py', 28 | 'scripts/dspl2-pretty-print-server.py', 29 | 'scripts/dspl2-validate.py', 30 | ], 31 | ) 32 | -------------------------------------------------------------------------------- /tools/dspl2viz/dspl2viz.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import dspl2 3 | from flask import ( 4 | Flask, render_template, request, Response) 5 | from functools import lru_cache 6 | from icu import SimpleDateFormat 7 | from io import StringIO 8 | import json 9 | import os.path 10 | import pandas as pd 11 | from urllib.parse import urlparse 12 | 13 | 14 | app = Flask(__name__) 15 | 16 | 17 | @app.route('/') 18 | def main(): 19 | return render_template('dspl2viz.html') 20 | 21 | 22 | @app.route('/api/measures') 23 | def api_measures(): 24 | dataset = request.args.get('dataset') 25 | if dataset is None: 26 | return Response("Dataset not specified", status=400) 27 | try: 28 | getter = dspl2.LocalFileGetter( 29 | os.path.expanduser('~/dspl/samples/bls/unemployment/bls-unemployment.jsonld')) 30 | expander = dspl2.Dspl2JsonLdExpander(getter) 31 | ds = expander.Expand(expandSlices=False) 32 | return Response(json.dumps(ds['measure'], indent=2), mimetype='application/json') 33 | except Exception as e: 34 | app.logger.warn(e) 35 | return Response("Unable to find requested dataset", status=404) 36 | 37 | 38 | @app.route('/api/dimensions') 39 | def api_dimensions(): 40 | dataset = request.args.get('dataset') 41 | if dataset is None: 42 | return Response("Dataset not specified", status=400) 43 | try: 44 | getter = dspl2.HybridFileGetter(dataset) 45 | expander = dspl2.Dspl2JsonLdExpander(getter) 46 | ds = expander.Expand(expandSlices=False, expandDimensions=False) 47 | return Response(json.dumps(ds['dimension'], indent=2), mimetype='application/json') 48 | except Exception as e: 49 | app.logger.warn(e) 50 | return Response("Unable to find requested dataset", status=404) 51 | 52 | 53 | @app.route('/api/dimension_values') 54 | def api_dimension_values(): 55 | dataset = request.args.get('dataset') 56 | if dataset is None: 57 | return Response("Dataset not specified", status=400) 58 | dimension = request.args.get('dimension') 59 | if dimension is None: 60 | return Response("Dimension not specified", status=400) 61 | try: 62 | getter = dspl2.HybridFileGetter(dataset) 63 | expander = dspl2.Dspl2JsonLdExpander(getter) 64 | ds = expander.Expand(expandSlices=False, expandDimensions=True) 65 | for dim in ds['dimension']: 66 | if (dimension == dspl2.GetUrl(dim) or 67 | urlparse(dimension).fragment == urlparse(dspl2.GetUrl(dim)).fragment): 68 | return Response(json.dumps(dim, indent=2), mimetype='application/json') 69 | return Response("Unable to find requested dimension", status=404) 70 | except Exception as e: 71 | app.logger.warn(e) 72 | return Response("Unable to find requested dataset", status=404) 73 | 74 | 75 | @app.route('/api/slices_for_measure') 76 | def api_slices_for_measure(): 77 | dataset = request.args.get('dataset') 78 | if dataset is None: 79 | return Response("Dataset not specified", status=400) 80 | measure = request.args.get('measure') 81 | if measure is None: 82 | return Response("Measure not specified", status=400) 83 | try: 84 | getter = dspl2.HybridFileGetter(dataset) 85 | expander = dspl2.Dspl2JsonLdExpander(getter) 86 | ds = expander.Expand(expandSlices=False, expandDimensions=False) 87 | slices = [] 88 | for slice in ds['slice']: 89 | for sliceMeasure in slice['measure']: 90 | if (measure == dspl2.GetUrl(sliceMeasure) or 91 | urlparse(measure).fragment == urlparse(dspl2.GetUrl(sliceMeasure)).fragment): 92 | slices.append(slice) 93 | break 94 | return Response(json.dumps(slices, indent=2), 95 | mimetype='application/json') 96 | except Exception as e: 97 | app.logger.warn(e) 98 | return Response("Unable to find requested dataset", status=404) 99 | 100 | 101 | @lru_cache(maxsize=10) 102 | def _ExpandDataset(dataset): 103 | getter = dspl2.HybridFileGetter(dataset) 104 | expander = dspl2.Dspl2JsonLdExpander(getter) 105 | return expander.Expand() 106 | 107 | 108 | def _ParseDate(text, date_pattern): 109 | df = SimpleDateFormat(date_pattern) 110 | ts = df.parse(text) 111 | return datetime.datetime.utcfromtimestamp(ts) 112 | 113 | 114 | @lru_cache(maxsize=100) 115 | def _GetDataSeries(dataset, slice, measure, dimension_value): 116 | dim_val_dict = dict([dim_val.split(':') 117 | for dim_val in dimension_value.split(',')]) 118 | ds = _ExpandDataset(dataset) 119 | # Identify the time dimension's date format 120 | dateFormat = "yyyy-MM-dd" # default 121 | for dimension in ds['dimension']: 122 | if dimension['@type'] == 'TimeDimension': 123 | dateFormat = dimension.get('dateFormat') 124 | break 125 | 126 | for dsSlice in ds['slice']: 127 | if urlparse(dsSlice['@id']).fragment == urlparse(slice).fragment: 128 | ret = [] 129 | for observation in dsSlice['data']: 130 | val = {} 131 | # Slice should have exactly the requested dims + a time dim: 132 | if len(observation['dimensionValues']) != len(dim_val_dict) + 1: 133 | continue 134 | # All the non-time dims should match the filter: 135 | matched_dims = 0 136 | for dim_val in observation['dimensionValues']: 137 | dim_id = urlparse(dim_val['dimension']).fragment 138 | if f'#{dim_id}' in dim_val_dict: 139 | if dim_val.get('codeValue') == dim_val_dict[f'#{dim_id}']: 140 | val[dim_id] = dim_val.get('codeValue') 141 | matched_dims += 1 142 | elif dim_val.get('value'): 143 | val[dim_id] = _ParseDate(dim_val.get('value'), dateFormat) 144 | if matched_dims != len(dim_val_dict): 145 | continue 146 | for meas_val in observation['measureValues']: 147 | if urlparse(meas_val['measure']).fragment == urlparse(measure).fragment: 148 | val[urlparse(measure).fragment] = meas_val['value'] 149 | ret.append(val) 150 | return ret 151 | 152 | @app.route('/api/series') 153 | def api_series(): 154 | dataset = request.args.get('dataset') 155 | if dataset is None: 156 | return Response("Dataset not specified", status=400) 157 | slice = request.args.get('slice') 158 | if slice is None: 159 | return Response("Slice not specified", status=400) 160 | measure = request.args.get('measure') 161 | if measure is None: 162 | return Response("Measure not specified", status=400) 163 | dimension_values = request.args.get('dimension_value') 164 | if dimension_values is None: 165 | return Response("Dimension values not specified", status=400) 166 | ret = _GetDataSeries(dataset, slice, measure, dimension_values) 167 | if ret is not None: 168 | out = StringIO() 169 | pd.DataFrame(ret).to_csv(out) 170 | return Response(out.getvalue(), mimetype="text/csv") 171 | return Response("Unable to find series for requested dimensions", 172 | status=404) 173 | -------------------------------------------------------------------------------- /tools/dspl2viz/foo.jsonld: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/tools/dspl2viz/foo.jsonld -------------------------------------------------------------------------------- /tools/dspl2viz/static/dspl2viz.css: -------------------------------------------------------------------------------- 1 | body { 2 | display: flex; 3 | height: 100vh; 4 | flex-flow: column wrap; 5 | } 6 | #vegalite-container { 7 | width: 30vw; 8 | height: 50vh; 9 | display: block; 10 | } 11 | 12 | #vegalite-input { 13 | width: 100%; 14 | height: 100%; 15 | } 16 | 17 | #dataset-explorer { 18 | width: 30vw; 19 | height: 49vh; 20 | vertical-align: top; 21 | border-style: solid; 22 | border-width: 1px; 23 | } 24 | #measure-explorer { 25 | max-height: 49%; 26 | overflow: scroll; 27 | } 28 | #dimension-explorer { 29 | max-height: 49%; 30 | overflow: scroll; 31 | } 32 | 33 | #chart-container { 34 | width: 65vw; 35 | height: 99vh; 36 | vertical-align: top; 37 | border-style: solid; 38 | border-width: 1px; 39 | } 40 | 41 | #chart { 42 | width: 100%; 43 | height: 100%; 44 | } 45 | -------------------------------------------------------------------------------- /tools/dspl2viz/static/dspl2viz.js: -------------------------------------------------------------------------------- 1 | var View; 2 | 3 | function drawChart(event) { 4 | try { 5 | var spec = JSON.parse(event.target.value); 6 | spec.height = document.querySelector('#chart-container').scrollHeight; 7 | spec.width = document.querySelector('#chart-container').scrollWidth; 8 | event.target.value = JSON.stringify(spec, null, 2); 9 | vegaEmbed("#chart", spec) 10 | // result.view provides access to the Vega View API 11 | .then(result => {View =result}) 12 | .catch(console.warn); 13 | } catch(e) { 14 | console.warn(e); 15 | } 16 | } 17 | 18 | var DatasetId = 'file:///usr/local/google/home/nkrishnaswami/dspl/samples/bls/unemployment/bls-unemployment.jsonld'; 19 | var SliceId = '#statesUnemploymentMonthly'; 20 | var MeasureId = '#unemployment_rate'; 21 | var DimValues = { 22 | seasonality: 'S', 23 | state: 'ST0100000000000', 24 | }; 25 | 26 | function setSpec() { 27 | var vlSpec = { 28 | "$schema": "https://vega.github.io/schema/vega-lite/v4.0.0-beta.10.json", 29 | "description": "A simple bar chart with embedded data.", 30 | "autosize": { 31 | "type": "fit", 32 | "resize": true 33 | }, 34 | "data": { 35 | "url": "/api/series", 36 | "format": { 37 | "type": "csv" 38 | } 39 | }, 40 | "mark": "line", 41 | "encoding": { 42 | "x": { 43 | "field": "month", 44 | "type": "ordinal" 45 | }, 46 | "y": { 47 | "field": "unemployment_rate", 48 | "type": "quantitative" 49 | }, 50 | "color": { 51 | "field": "state", 52 | "type": "ordinal" 53 | } 54 | } 55 | } 56 | vlSpec.data.url += '?dataset=' + encodeURIComponent(DatasetId); 57 | vlSpec.data.url += '&slice=' + encodeURIComponent(SliceId); 58 | vlSpec.data.url += '&measure=' + encodeURIComponent(MeasureId); 59 | vlSpec.data.url += '&dimension_value='; 60 | for (var idx = 0; idx < Object.keys(DimValues).length; ++idx) { 61 | if (idx != 0) { 62 | vlSpec.data.url += ','; 63 | } 64 | var key = Object.keys(DimValues)[idx]; 65 | var val = DimValues[key]; 66 | vlSpec.data.url += encodeURIComponent(`#${key}:${val}`); 67 | } 68 | 69 | var input = document.querySelector("#vegalite-input"); 70 | input.value = JSON.stringify(vlSpec, null, 2); 71 | input.dispatchEvent(new Event('change')); 72 | } 73 | 74 | function processMeasures(data) { 75 | let measure_container = document.querySelector('#measure-explorer'); 76 | measure_container.innerText = "Measures:"; 77 | console.log(measure_container); 78 | let ul = document.createElement('ul'); 79 | measure_container.appendChild(ul); 80 | for(let measure of data) { 81 | let id = $('', { href: measure['@id'] }).prop('hash').substring(1); 82 | console.log("Processing ", measure.name, 'id:', id, measure); 83 | let li = document.createElement('ul'); 84 | li.innerText = measure.name; 85 | if (measure.description) { 86 | li.title = measure.description; 87 | } 88 | li.addEventListener('click', function (event) { 89 | for(var elt of ul.children) { 90 | elt.style.fontWeight = 'normal'; 91 | } 92 | event.target.style.fontWeight = 'bold'; 93 | MeasureId = '#' + id; 94 | setSpec(); 95 | }); 96 | ul.appendChild(li); 97 | } 98 | } 99 | 100 | function processSlices(data) { 101 | let slice_container = document.querySelector('#slice-explorer'); 102 | slice_container.innerText = "Slices:"; 103 | console.log(slice_container); 104 | let ul = document.createElement('ul'); 105 | slice_container.appendChild(ul); 106 | for(let slice of data) { 107 | let id = $('', { href: slice['@id'] }).prop('hash').substring(1); 108 | console.log("Processing ", slice.name, 'id:', id, slice); 109 | let li = document.createElement('ul'); 110 | li.innerText = slice.name; 111 | if (slice.description) { 112 | li.title = slice.description; 113 | } 114 | li.addEventListener('click', function (event) { 115 | for(var elt of ul.children) { 116 | elt.style.fontWeight = 'normal'; 117 | } 118 | event.target.style.fontWeight = 'bold'; 119 | SliceId = '#' + id; 120 | setSpec(); 121 | }); 122 | ul.appendChild(li); 123 | } 124 | } 125 | 126 | 127 | 128 | 129 | function processDimensionValues(dimension) { 130 | let id = $('', { href: dimension['@id'] }).prop('hash').substring(1); 131 | console.log("Processing ", dimension.name, 'id:', id); 132 | let div = document.createElement('div'); 133 | let dimension_container = document.querySelector('#dimension-explorer'); 134 | dimension_container.appendChild(div); 135 | div.innerText = dimension.name; 136 | if (dimension.description) { 137 | div.title = dimension.description; 138 | } 139 | let ul = document.createElement('ul'); 140 | div.appendChild(ul); 141 | dimension.codes = {}; 142 | for(let dimensionValue of dimension.codeList) { 143 | dimension.codes[dimensionValue.codeValue] = dimensionValue; 144 | let li = document.createElement('li'); 145 | li.innerText = dimensionValue.name; 146 | if (dimensionValue.description) { 147 | li.title = dimensionValue.description; 148 | } 149 | li.addEventListener('click', function (event) { 150 | for(var elt of ul.children) { 151 | elt.style.fontWeight = 'normal'; 152 | } 153 | event.target.style.fontWeight = 'bold'; 154 | DimValues[id] = dimensionValue.codeValue; 155 | setSpec() 156 | }); 157 | ul.appendChild(li); 158 | } 159 | } 160 | 161 | 162 | function processDimensions(data) { 163 | for(let dimension of data) { 164 | if (dimension.name != 'States' && dimension.name != 'Seasonality') { 165 | continue; 166 | } 167 | $.getJSON('/api/dimension_values?dataset=file:///usr/local/google/home/nkrishnaswami/dspl/samples/bls/unemployment/bls-unemployment.jsonld&dimension='+encodeURIComponent(dimension['@id']), 168 | processDimensionValues); 169 | } 170 | } 171 | 172 | document.querySelector("#vegalite-input").addEventListener('change', drawChart); 173 | setSpec(); 174 | 175 | 176 | $.getJSON('/api/measures?dataset=' + encodeURIComponent(DatasetId), processMeasures); 177 | // $.getJSON('/api/slices_for_measure?dataset=' + encodeURIComponent(DatasetId), processMeasures); 178 | $.getJSON('/api/dimensions?dataset=' + encodeURIComponent(DatasetId), processDimensions); 179 | -------------------------------------------------------------------------------- /tools/dspl2viz/templates/dspl2viz.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | DSPL 2 Dataset Visualizer 4 | 5 | 6 | 7 | 8 | 12 | 13 | 14 |
15 | 16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | 24 |
25 |
26 |
27 |
28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /tools/dspltools/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: dspltools 3 | Version: 0.4.3 4 | Summary: Suite of command-line tools for generating DSPL datasets 5 | Home-page: http://code.google.com/apis/publicdata/docs/dspltools.html 6 | Author: Benjamin Yolken 7 | Author-email: yolken@google.com 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /tools/dspltools/README.rst: -------------------------------------------------------------------------------- 1 | Documentation 2 | ============= 3 | See https://developers.google.com/public-data/docs/dspltools for documentation. 4 | 5 | 6 | Release Notes 7 | ============= 8 | *** v0.1 *** 9 | Release date: April 11, 2011 10 | 11 | Description: 12 | ------------ 13 | DSPL Tools released! 14 | 15 | 16 | *** v0.2 *** 17 | Release date: April 18, 2011 18 | 19 | Description: 20 | ------------ 21 | Enhanced DSPL Check by adding significant functionality beyond XML schema 22 | validation, including the checking of internal dataset references and CSV 23 | file structure. 24 | 25 | 26 | *** v0.2.1 *** 27 | Release date: April 21, 2011 28 | 29 | Description: 30 | ------------ 31 | Use column ID to distinguish between years and integers in dsplgen. 32 | 33 | 34 | *** v0.3 *** 35 | Release date: April 26, 2011 36 | 37 | Description: 38 | ------------ 39 | Extended DSPL Check to validate dataset CSV data (sorting, instance IDs) 40 | and slice / table links. 41 | 42 | Added concept hierarchy support to DSPL Gen. 43 | 44 | 45 | *** v0.3.5 *** 46 | Release date: May 4, 2011 47 | 48 | Description: 49 | ------------ 50 | Extended DSPL Check to support checking of: 51 | - Table column / concept type consistency 52 | - Date formats 53 | - Formatting of float and integer CSV values 54 | - Datasets where CSV columns are in different order than columns in table 55 | metadata 56 | 57 | Improved error messages when files can't be found or opened. 58 | 59 | Fixed bug in DSPL Gen naming of external concepts. 60 | 61 | 62 | *** v0.3.6 *** 63 | Release date: May 6, 2011 64 | 65 | Description: 66 | ------------ 67 | Added 'checking_level' option to DSPL Check. 68 | 69 | CSV files are now loaded in 'universal newline mode' to reduce risk of parsing 70 | problems. 71 | 72 | 73 | *** v0.3.7 *** 74 | Release date: May 6, 2011 75 | 76 | Description: 77 | ------------ 78 | Added zipped dataset checking to DSPL Check. 79 | 80 | Strip whitespace from CSV values (to mimic behavior of PDE importer). 81 | 82 | 83 | *** v0.4 *** 84 | Release date: May 20, 2011 85 | 86 | Description: 87 | ------------ 88 | Added topic reference checking to DSPL Check. 89 | 90 | Changed schema validation process to use local XML schema files instead of 91 | calling out to W3C servers. 92 | 93 | 94 | *** v0.4.1 *** 95 | Release date: June 2, 2011 96 | 97 | Description: 98 | ------------ 99 | Added test for trivial slices to DSPL Check. 100 | 101 | Improved behavior of DSPL Check when empty tables are encountered. 102 | 103 | 104 | *** v0.4.2 *** 105 | Release date: June 20, 2011 106 | 107 | Description: 108 | ------------ 109 | Changed implementation of default csv_data_source to use in-memory Python 110 | objects instead of sqlite. The latter can still be used by setting the '-t' 111 | option of dsplgen to 'csv_sqlite'. 112 | 113 | 114 | *** v0.4.3 *** 115 | Release date: November 3, 2011 116 | 117 | Description: 118 | ------------ 119 | Fixed some bugs around multi-level concept hierarchies. 120 | 121 | Added total_val parameter to support pre-computed rollups in data. 122 | 123 | 124 | *** v0.5.0 *** 125 | Release date: January 22, 2019 126 | 127 | Description: 128 | ------------ 129 | Switch to lxml for XML parsing and schema validation. 130 | 131 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplcheck/invalid_dspl/countries.csv: -------------------------------------------------------------------------------- 1 | country,name,latitude,longitude 2 | AD,Andorra,42.546245,1.601554 3 | AF,Afghanistan,33.93911,67.709953 4 | AI,Anguilla,18.220554,-63.068615 5 | AL,Albania,41.153332,20.168331 6 | US,United States,37.09024,-95.712891 7 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplcheck/invalid_dspl/country_slice.csv: -------------------------------------------------------------------------------- 1 | country,year,population 2 | AF,1960,9616353 3 | AF,1961,9799379 4 | AF,1963,10188299 5 | AF,1962,9989846 6 | AD,1960,8616353 7 | AD,1961,8799379 8 | AD,1962,8989846 9 | AD,1963,9188299 10 | US,1960,19616353 11 | UX,1961,19799379 12 | US,1962,392039023 13 | US,1963,110188299 14 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplcheck/invalid_dspl/invalid_dspl.xml: -------------------------------------------------------------------------------- 1 | 2 | 32 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | My statistics 47 | 48 | 49 | Some very interesting statistics about countries 50 | 51 | 52 | http://www.stats-bureau.com/mystats/info.html 53 | 54 | 55 | 56 | 57 | 58 | Bureau of Statistics 59 | 60 | 61 | http://www.stats-bureau.com 62 | 63 | 64 | 65 | 66 | 67 | 68 | Geography 69 | 70 | 71 | 72 | 73 | Social indicators 74 | 75 | 76 | 77 | Population indicators 78 | 79 | 80 | 81 | 82 | Poverty & income 83 | 84 | 85 | 86 | 87 | Health 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | Population 99 | 100 | 101 | Size of the resident population. 102 | 103 | 104 | 105 | 106 | 107 | 108 | 112 | 113 | 114 | 115 | Country 116 | 117 | 118 | My list of countries 119 | 120 | 121 | 122 | 123 | 124 | Country name 125 | 126 | The official name of the country 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 |
141 | 142 | 143 | 144 | 145 |
146 | 147 | 148 | 149 | 150 | 151 | countries.csv 152 | 153 |
154 | 155 | 156 | 157 | 158 | 159 | 160 | country_slice.csv 161 | 162 |
163 | 164 | 165 |
166 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplcheck/valid_dataset/countries.csv: -------------------------------------------------------------------------------- 1 | country,name,latitude,longitude 2 | AD,Andorra,42.546245,1.601554 3 | AF,Afghanistan,33.93911,67.709953 4 | AI,Anguilla,18.220554,-63.068615 5 | AL,Albania,41.153332,20.168331 6 | US,United States,37.09024,-95.712891 7 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplcheck/valid_dataset/country_slice.csv: -------------------------------------------------------------------------------- 1 | country,year,population 2 | AF,1960,9616353 3 | AF,1961,9799379 4 | AF,1963,10188299 5 | AF,1962,9989846 6 | AD,1960,8616353 7 | AD,1961,8799379 8 | AD,1962,8989846 9 | AD,1963,9188299 10 | US,1960,19616353 11 | US,1961,19799379 12 | US,1962,392039023 13 | US,1963,110188299 14 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplcheck/valid_dataset/valid_dataset.xml: -------------------------------------------------------------------------------- 1 | 2 | 32 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | My statistics 47 | 48 | 49 | Some very interesting statistics about countries 50 | 51 | 52 | http://www.stats-bureau.com/mystats/info.html 53 | 54 | 55 | 56 | 57 | 58 | Bureau of Statistics 59 | 60 | 61 | http://www.stats-bureau.com 62 | 63 | 64 | 65 | 66 | 67 | 68 | Geography 69 | 70 | 71 | 72 | 73 | Social indicators 74 | 75 | 76 | 77 | Population indicators 78 | 79 | 80 | 81 | 82 | Poverty & income 83 | 84 | 85 | 86 | 87 | Health 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | Population 99 | 100 | 101 | Size of the resident population. 102 | 103 | 104 | 105 | 106 | 107 | 108 | 112 | 113 | 114 | 115 | Country 116 | 117 | 118 | My list of countries 119 | 120 | 121 | 122 | 123 | 124 | Country name 125 | 126 | The official name of the country 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 |
141 | 142 | 143 | 144 | 145 |
146 | 147 | 148 | 149 | 150 | 151 | countries.csv 152 | 153 |
154 | 155 | 156 | 157 | 158 | 159 | 160 | country_slice.csv 161 | 162 |
163 | 164 | 165 |
166 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplgen/dsplgen_advanced.csv: -------------------------------------------------------------------------------- 1 | date[type=date;format=MM/dd/yyyy],first_category[slice_role=dimension;rollup=true;total_val=total],second_category[slice_role=dimension;rollup=true],first_value[slice_role=metric;type=integer],second_value[slice_role=metric;type=float] 2 | 1/1/2010,red,tall,10,23 3 | 1/1/2010,red,short,90,1 4 | 1/1/2010,blue,tall,12,31 5 | 1/1/2010,blue,short,21,231 6 | 1/1/2010,green,short,20,212 7 | 1/1/2010,total,tall,10,98 8 | 1/1/2010,total,short,-30,39 9 | 1/2/2010,red,tall,10,91 10 | 1/2/2010,red,short,32,123 11 | 1/2/2010,blue,tall,22,121 12 | 1/2/2010,blue,short,20,32 13 | 1/2/2010,green,short,1,19 14 | 1/2/2010,total,short,2,10 15 | 1/3/2010,red,short,10,34 16 | 1/3/2010,red,tall,10,34 17 | 1/3/2010,blue,short,93,21 18 | 1/3/2010,blue,tall,39,12 19 | 1/3/2010,green,short,31,31 20 | 1/3/2010,green,tall,21,31 21 | 1/3/2010,total,short,13,123 22 | 1/4/2010,red,tall,40,21 23 | 1/4/2010,red,short,22,12 24 | 1/4/2010,blue,tall,39,21 25 | 1/4/2010,blue,short,10,12 26 | 1/4/2010,green,tall,30,23 27 | 1/4/2010,green,short,10,123 28 | 1/4/2010,total,tall,-10,23 29 | 1/4/2010,total,short,31,661 30 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplgen/dsplgen_hierarchies.csv: -------------------------------------------------------------------------------- 1 | year[type=date;format=yyyy],first_category[parent=third_category;rollup=true],second_category[rollup=true],third_category,first_value[type=integer],second_value[type=float] 2 | 2010,red,tall,bucket1,10,23 3 | 2010,red,short,bucket1,90,1 4 | 2010,blue,tall,bucket1,12,31 5 | 2010,blue,short,bucket1,21,231 6 | 2010,green,tall,bucket2,12,31 7 | 2010,green,short,bucket2,11,33 8 | 2011,red,tall,bucket1,12,23 9 | 2011,red,short,bucket1,93,1 10 | 2011,blue,tall,bucket1,15,31 11 | 2011,blue,short,bucket1,25,231 12 | 2011,green,tall,bucket2,13,31 13 | 2011,green,short,bucket2,15,33 14 | 2012,red,tall,bucket1,20,23 15 | 2012,red,short,bucket1,110,1 16 | 2012,blue,tall,bucket1,55,31 17 | 2012,blue,short,bucket1,77,231 18 | 2012,green,tall,bucket2,77,31 19 | 2012,green,short,bucket2,88,33 20 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplgen/dsplgen_simple.csv: -------------------------------------------------------------------------------- 1 | date,first_category,second_category,first_value,second_value 2 | 1/1/2010,red,tall,10,23 3 | 1/1/2010,red,short,90,1 4 | 1/1/2010,blue,tall,12,31 5 | 1/1/2010,blue,short,21,231 6 | 1/1/2010,green,short,20,212 7 | 1/2/2010,red,tall,10,91 8 | 1/2/2010,red,short,32,123 9 | 1/2/2010,blue,tall,22,121 10 | 1/2/2010,blue,short,20,32 11 | 1/2/2010,green,short,1,19 12 | 1/3/2010,red,short,10,34 13 | 1/3/2010,red,tall,10,34 14 | 1/3/2010,blue,short,93,21 15 | 1/3/2010,blue,tall,39,12 16 | 1/3/2010,green,short,31,31 17 | 1/3/2010,green,tall,21,31 18 | 1/4/2010,red,tall,40,21 19 | 1/4/2010,red,short,22,12 20 | 1/4/2010,blue,tall,39,21 21 | 1/4/2010,blue,short,10,12 22 | 1/4/2010,green,tall,30,23 23 | 1/4/2010,green,short,10,123 24 | -------------------------------------------------------------------------------- /tools/dspltools/examples/dsplgen/dsplgen_yearly_data.csv: -------------------------------------------------------------------------------- 1 | year,first_category,second_category,first_value,second_value 2 | 2010,red,tall,10,23.5 3 | 2010,red,short,90,1.1 4 | 2010,blue,tall,12,31.3 5 | 2010,blue,short,21,231 6 | 2010,green,short,20,212 7 | 2011,red,tall,10,91 8 | 2011,red,short,32,123 9 | 2011,blue,tall,22,121 10 | 2011,blue,short,20,32 11 | 2011,green,short,1,19 12 | 2012,red,short,10,34 13 | 2012,red,tall,10,34.3 14 | 2012,blue,short,93,21 15 | 2012,blue,tall,39,12 16 | 2012,green,short,31,31 17 | 2012,green,tall,21,31 18 | 2013,red,tall,40,21 19 | 2013,red,short,22,12.55 20 | 2013,blue,tall,39,21 21 | 2013,blue,short,10,12 22 | 2013,green,tall,30,23 23 | 2013,green,short,10,123 24 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/data_sources/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/data_sources/csv_data_source_sqlite_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Tests of csv_data_source_sqlite module.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import unittest 15 | 16 | import csv_data_source_sqlite 17 | import csv_sources_test_suite 18 | 19 | 20 | class CSVDataSourceSqliteTests(csv_sources_test_suite.CSVSourcesTests): 21 | """Tests of the CSVDataSourceSqlite object.""" 22 | 23 | def setUp(self): 24 | self.data_source_class = csv_data_source_sqlite.CSVDataSourceSqlite 25 | 26 | super(CSVDataSourceSqliteTests, self).setUp() 27 | 28 | 29 | class CSVDataSourceSqliteErrorTests( 30 | csv_sources_test_suite.CSVSourcesErrorTests): 31 | """Tests of the CSVDataSourceSqlite object under various error conditions.""" 32 | 33 | def setUp(self): 34 | self.data_source_class = csv_data_source_sqlite.CSVDataSourceSqlite 35 | 36 | super(CSVDataSourceSqliteErrorTests, self).setUp() 37 | 38 | 39 | if __name__ == '__main__': 40 | unittest.main() 41 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/data_sources/csv_data_source_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Tests of csv_data_source module.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import unittest 15 | 16 | import csv_data_source 17 | import csv_sources_test_suite 18 | 19 | 20 | class CSVDataSourceTests(csv_sources_test_suite.CSVSourcesTests): 21 | """Tests of the CSVDataSource object.""" 22 | 23 | def setUp(self): 24 | self.data_source_class = csv_data_source.CSVDataSource 25 | 26 | super(CSVDataSourceTests, self).setUp() 27 | 28 | 29 | class CSVDataSourceErrorTests(csv_sources_test_suite.CSVSourcesErrorTests): 30 | """Tests of the CSVDataSource object under various error conditions.""" 31 | 32 | def setUp(self): 33 | self.data_source_class = csv_data_source.CSVDataSource 34 | 35 | super(CSVDataSourceErrorTests, self).setUp() 36 | 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/data_sources/csv_sources_test_suite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """A set of tests useful for CSV data sources.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import StringIO 15 | import unittest 16 | 17 | import data_source 18 | 19 | 20 | _TEST_CSV_CONTENT = ( 21 | """date[type=date;format=yyyy-MM-dd],category1,category2[concept=geo:us_state;parent=category3;total_val=total],category3,metric1[extends=quantity:ratio;slice_role=metric],metric2[aggregation=avg],metric3[aggregation=count] 22 | 1980-01-01,red,california,west,89,321,71.21 23 | 1981-01-01,red,california,west,99,231,391.2 24 | 1982-01-01,blue,maine's,east,293,32,2.31 25 | 1983-01-01,blue,california,west,293,12,10.3 26 | 1984-01-01,red,maine's,east,932,48,10.78 27 | 1984-01-01,red,oregon,west,32,33,-14.34 28 | 1985-01-01,red,total,east,21,98,87.0 29 | 1986-01-01,red,total,west,33,90,-10.1""") 30 | 31 | 32 | class CSVSourcesTests(unittest.TestCase): 33 | """Basic tests of a CSV DataSource object.""" 34 | 35 | def setUp(self): 36 | self.csv_file = StringIO.StringIO(_TEST_CSV_CONTENT) 37 | self.data_source_obj = self.data_source_class(self.csv_file, verbose=False) 38 | 39 | def tearDown(self): 40 | self.data_source_obj.Close() 41 | self.csv_file.close() 42 | 43 | def testColumnBundle(self): 44 | """Test that column bundle is properly generated.""" 45 | column_bundle = self.data_source_obj.GetColumnBundle() 46 | 47 | self.assertEqual( 48 | [c.column_id for c in column_bundle.GetColumnIterator()], 49 | ['date', 'category1', 'category2', 'category3', 50 | 'metric1', 'metric2', 'metric3']) 51 | self.assertEqual( 52 | [c.data_type for c in column_bundle.GetColumnIterator()], 53 | ['date', 'string', 'string', 'string', 'integer', 'integer', 'float']) 54 | self.assertEqual( 55 | [c.data_format for c in column_bundle.GetColumnIterator()], 56 | ['yyyy-MM-dd', '', '', '', '', '', '']) 57 | self.assertEqual( 58 | [c.concept_ref for c in column_bundle.GetColumnIterator()], 59 | ['time:day', '', 'geo:us_state', '', '', '', '']) 60 | self.assertEqual( 61 | [c.concept_extension for c in column_bundle.GetColumnIterator()], 62 | ['', '', '', '', 'quantity:ratio', '', '']) 63 | self.assertEqual( 64 | [c.slice_role for c in column_bundle.GetColumnIterator()], 65 | ['dimension', 'dimension', 'dimension', 'dimension', 'metric', 'metric', 66 | 'metric']) 67 | self.assertEqual( 68 | [c.rollup for c in column_bundle.GetColumnIterator()], 69 | [False, False, False, True, False, False, False]) 70 | self.assertEqual( 71 | [c.parent_ref for c in column_bundle.GetColumnIterator()], 72 | ['', '', 'category3', '', '', '', '']) 73 | self.assertEqual( 74 | [c.total_val for c in column_bundle.GetColumnIterator()], 75 | ['', '', 'total', '', '', '', '']) 76 | 77 | def testEntityTableGeneration(self): 78 | """Test that single-concept tables are generated correctly.""" 79 | table_data = self.data_source_obj.GetTableData( 80 | data_source.QueryParameters( 81 | data_source.QueryParameters.CONCEPT_QUERY, ['category2'])) 82 | 83 | # Make sure quotes are properly escaped 84 | self.assertEqual(table_data.rows, 85 | [['california'], ['maine\'s'], ['oregon']]) 86 | 87 | def testMultiEntityTableGeneration(self): 88 | """Test that multi-concept tables are generated correctly.""" 89 | table_data = self.data_source_obj.GetTableData( 90 | data_source.QueryParameters( 91 | data_source.QueryParameters.CONCEPT_QUERY, 92 | ['category2', 'category3'])) 93 | 94 | # Make sure quotes are properly escaped 95 | self.assertEqual(table_data.rows, 96 | [['california', 'west'], ['maine\'s', 'east'], 97 | ['oregon', 'west']]) 98 | 99 | def testSliceTableGeneration(self): 100 | """Test that slice tables are generated correctly.""" 101 | table_data = self.data_source_obj.GetTableData( 102 | data_source.QueryParameters( 103 | data_source.QueryParameters.SLICE_QUERY, 104 | ['metric3', 'category2', 'metric1', 'metric2'])) 105 | 106 | self.assertEqual( 107 | table_data.rows, 108 | [[3, 'california', 89 + 99 + 293, (321.0 + 231.0 + 12.0) / 3.0], 109 | [2, 'maine\'s', 293 + 932, (32.0 + 48.0) / 2.0], 110 | [1, 'oregon', 32, 33]]) 111 | 112 | def testTotalsSliceTableGeneration(self): 113 | """Test that slice tables are generated correctly with total values.""" 114 | table_data = self.data_source_obj.GetTableData( 115 | data_source.QueryParameters( 116 | data_source.QueryParameters.SLICE_QUERY, 117 | ['category1', 'metric1', 'metric2', 'metric3'])) 118 | 119 | self.assertEqual( 120 | table_data.rows, 121 | [['red', 21 + 33, (98.0 + 90.0) / 2.0, 2]]) 122 | 123 | 124 | class CSVSourcesErrorTests(unittest.TestCase): 125 | """Tests of a CSV DataSource object for error cases.""" 126 | 127 | def setUp(self): 128 | pass 129 | 130 | def testBadHeaderKey(self): 131 | """Test that unknown key in header generates error.""" 132 | csv_file = StringIO.StringIO( 133 | 'date[unknown_key=unknown_value],metric\n1990,23232') 134 | 135 | self.assertRaises( 136 | data_source.DataSourceError, 137 | self.data_source_class, 138 | csv_file, False) 139 | 140 | csv_file.close() 141 | 142 | def testBadDataType(self): 143 | """Test that unknown type value generates error.""" 144 | csv_file = StringIO.StringIO('date[type=unknown_type],metric\n1990,23232') 145 | 146 | self.assertRaises( 147 | data_source.DataSourceError, 148 | self.data_source_class, 149 | csv_file, False) 150 | 151 | csv_file.close() 152 | 153 | def testBadAggregation(self): 154 | """Test that unknown aggregation operator generates error.""" 155 | csv_file = StringIO.StringIO( 156 | 'date[aggregation=unknown_aggregation],metric\n1990,23232') 157 | 158 | self.assertRaises( 159 | data_source.DataSourceError, 160 | self.data_source_class, 161 | csv_file, False) 162 | 163 | csv_file.close() 164 | 165 | def testBadSliceRoleKey(self): 166 | """Test that unknown value for slice_role generates error.""" 167 | csv_file = StringIO.StringIO( 168 | 'date[slice_role=unknown_role],metric\n1990,23232') 169 | 170 | self.assertRaises( 171 | data_source.DataSourceError, 172 | self.data_source_class, 173 | csv_file, False) 174 | 175 | csv_file.close() 176 | 177 | def testBadColumnID(self): 178 | """Test that a badly formatted column ID generates error.""" 179 | csv_file = StringIO.StringIO('my date[type=date],metric\n1990,23232') 180 | 181 | self.assertRaises( 182 | data_source.DataSourceError, 183 | self.data_source_class, 184 | csv_file, False) 185 | 186 | csv_file.close() 187 | 188 | def testBadDataRow(self): 189 | """Test that row with wrong number of entries causes error.""" 190 | csv_file = StringIO.StringIO( 191 | 'date,column\n01/01/1990,abcd,1234') 192 | 193 | self.assertRaises( 194 | data_source.DataSourceError, 195 | self.data_source_class, 196 | csv_file, False) 197 | 198 | csv_file.close() 199 | 200 | def testBadParentReference(self): 201 | """Test that illegal parent reference causes error.""" 202 | csv_file = StringIO.StringIO( 203 | 'date,column[parent=unknown_parent]\n01/01/1990,abcd') 204 | 205 | self.assertRaises( 206 | data_source.DataSourceError, 207 | self.data_source_class, 208 | csv_file, False) 209 | 210 | csv_file.close() 211 | 212 | def testMultipleParents(self): 213 | """Test that having multiple parent instances causes error.""" 214 | csv_file = StringIO.StringIO( 215 | 'date,column1[parent=column2],column2,column3\n' 216 | '1/1/2001,val1,parent1,323\n1/2/2001,val1,parent2,123') 217 | 218 | self.assertRaises( 219 | data_source.DataSourceError, 220 | self.data_source_class, 221 | csv_file, False) 222 | 223 | csv_file.close() 224 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/data_sources/csv_utilities.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Utility functions useful for CSV data sources.""" 10 | from __future__ import print_function 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | 15 | import csv 16 | import re 17 | import string 18 | import warnings 19 | 20 | import data_source 21 | 22 | 23 | def _HeaderToColumn(header_string): 24 | """Parse the header string for a column. 25 | 26 | Args: 27 | header_string: The complete string for the column header 28 | 29 | Returns: 30 | A DataColumn object populated based on the header data 31 | 32 | Raises: 33 | DataSourceError: If there are any errors in parsing, e.g. if an unrecognized 34 | key is found. 35 | """ 36 | # The column id must be at least one character long, and cannot contain the 37 | # characters '[', ']', ';', or whitespace 38 | parameters_match = re.match( 39 | '^([^\]\[;\s]+)(?:\[(.*)\]){0,1}$', 40 | header_string.strip().replace('"', '')) 41 | 42 | if not parameters_match: 43 | raise data_source.DataSourceError( 44 | 'Formatting error for header string: %s' % header_string) 45 | 46 | column_id = parameters_match.group(1) 47 | column = data_source.DataSourceColumn(column_id, internal_parameters={}) 48 | 49 | if parameters_match.group(2): 50 | # Parse the column parameters 51 | key_value_pairs = parameters_match.group(2).split(';') 52 | 53 | for key_value_pair in key_value_pairs: 54 | try: 55 | [key, value] = key_value_pair.split('=') 56 | except ValueError: 57 | raise data_source.DataSourceError( 58 | 'Formatting error for header string: %s' % header_string) 59 | 60 | # Map the key to the appropriate field of the DataSourceColumn object 61 | if key == 'type': 62 | if value not in ['date', 'float', 'integer', 'string']: 63 | raise data_source.DataSourceError( 64 | 'Unknown data type for column %s: %s' % 65 | (column.column_id, value)) 66 | 67 | column.data_type = value 68 | elif key == 'format': 69 | column.data_format = value 70 | elif key == 'concept': 71 | column.concept_ref = value 72 | elif key == 'extends': 73 | column.concept_extension = value 74 | elif key == 'parent': 75 | column.parent_ref = value 76 | elif key == 'slice_role': 77 | role_value = value.lower() 78 | 79 | if role_value not in ['dimension', 'metric']: 80 | raise data_source.DataSourceError( 81 | 'Unrecognized slice_roll in column %s: %s' % 82 | (column.column_id, value)) 83 | else: 84 | column.slice_role = role_value 85 | elif key == 'rollup': 86 | if value.lower() == 'true': 87 | column.rollup = True 88 | elif value.lower() == 'false': 89 | column.rollup = False 90 | else: 91 | raise data_source.DataSourceError( 92 | 'Unrecognized boolean value in column %s: %s' % 93 | (column.column_id, value)) 94 | elif key == 'total_val': 95 | column.total_val = value 96 | elif key == 'dropif': 97 | column.internal_parameters['dropif_val'] = value 98 | elif key == 'zeroif': 99 | column.internal_parameters['zeroif_val'] = value 100 | elif key == 'aggregation': 101 | if string.lower(value) not in ['sum', 'max', 'min', 'avg', 'count']: 102 | raise data_source.DataSourceError( 103 | 'Unknown aggregation for column %s: %s' % 104 | (column.column_id, value)) 105 | 106 | column.internal_parameters['aggregation'] = value 107 | else: 108 | raise data_source.DataSourceError( 109 | 'Unknown parameter for column %s: %s' % 110 | (column.column_id, key)) 111 | return column 112 | 113 | 114 | def ConstructColumnBundle(csv_file, verbose=True): 115 | """Construct a ColumnBundle from the header information in a CSV file. 116 | 117 | Args: 118 | csv_file: The complete string for the column header 119 | verbose: Print out extra information to stdout 120 | 121 | Returns: 122 | A data_source.ColumnBundle object populated based on the CSV header 123 | 124 | Raises: 125 | DataSourceError: If there are any parsing errors or data 126 | inconsistencies 127 | """ 128 | # Get the first and second rows of the CSV 129 | header_csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"') 130 | header_row_values = next(header_csv_reader) 131 | second_row_values = next(header_csv_reader) 132 | csv_file.seek(0) 133 | 134 | # Check that second row is properly formatted 135 | if len(header_row_values) != len(second_row_values): 136 | raise data_source.DataSourceError( 137 | 'Number of columns in row 2 (%d) does not match number ' 138 | 'expected (%d)' % (len(second_row_values), len(header_row_values))) 139 | 140 | column_bundle = data_source.DataSourceColumnBundle() 141 | 142 | for header_element in header_row_values: 143 | column_bundle.AddColumn(_HeaderToColumn(header_element)) 144 | 145 | num_date_columns = 0 146 | has_metric_column = False 147 | column_ids = [column.column_id for column in 148 | column_bundle.GetColumnIterator()] 149 | 150 | # Iterate through columns, populating and refining DataSourceColumn 151 | # parameters as necessary 152 | for c, column in enumerate(column_bundle.GetColumnIterator()): 153 | if verbose: 154 | print('\nEvaluating column %s' % column.column_id) 155 | 156 | # Check data type 157 | if not column.data_type: 158 | column.data_type = ( 159 | data_source.GuessDataType(second_row_values[c], column.column_id)) 160 | 161 | if verbose: 162 | print('Guessing that column %s is of type %s' % ( 163 | column.column_id, column.data_type)) 164 | 165 | # Check slice type 166 | if not column.slice_role: 167 | if column.data_type == 'integer' or column.data_type == 'float': 168 | column.slice_role = 'metric' 169 | else: 170 | column.slice_role = 'dimension' 171 | 172 | if verbose: 173 | print('Guessing that column %s is a %s' % ( 174 | column.column_id, column.slice_role)) 175 | 176 | # Check aggregation 177 | if column.slice_role == 'metric': 178 | has_metric_column = True 179 | 180 | if 'aggregation' not in column.internal_parameters: 181 | column.internal_parameters['aggregation'] = 'SUM' 182 | 183 | if verbose: 184 | print('Guessing that column %s should be aggregated by %s' % ( 185 | column.column_id, column.internal_parameters['aggregation'])) 186 | 187 | # Check parent 188 | if column.parent_ref: 189 | if column.parent_ref not in column_ids: 190 | raise data_source.DataSourceError( 191 | 'Column %s references a parent not defined in this dataset: %s' % 192 | (column.column_id, column.parent_ref)) 193 | 194 | parent_column = column_bundle.GetColumnByID(column.parent_ref) 195 | 196 | if not parent_column.rollup: 197 | parent_column.rollup = True 198 | 199 | if verbose: 200 | print('Making column %s rollup since it is a parent to column %s' 201 | % (parent_column.column_id, column.column_id)) 202 | 203 | # Check date format and concept 204 | if column.data_type == 'date': 205 | num_date_columns += 1 206 | 207 | if not column.data_format: 208 | column.data_format = ( 209 | data_source.GuessDateFormat(second_row_values[c])) 210 | 211 | if not column.concept_ref: 212 | column.concept_ref = ( 213 | data_source.GuessDateConcept(column.data_format)) 214 | 215 | if verbose: 216 | print('Guessing that column %s is formatted as %s and ' 217 | 'corresponds to %s' % ( 218 | column.column_id, column.data_format, column.concept_ref)) 219 | 220 | # Warn user if their file will not produce interesting DSPL visualizations 221 | if num_date_columns == 0: 222 | warnings.warn('Input file does not have a date column', 223 | data_source.DataSourceWarning) 224 | 225 | elif num_date_columns > 1: 226 | warnings.warn('Input file has more than one date column', 227 | data_source.DataSourceWarning) 228 | 229 | if not has_metric_column: 230 | warnings.warn('Input file does not have any metrics', 231 | data_source.DataSourceWarning) 232 | 233 | return column_bundle 234 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/data_sources/data_source_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Tests of data_source module.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import unittest 15 | 16 | import data_source 17 | 18 | 19 | class DataSourceColumnBundleTests(unittest.TestCase): 20 | """Tests of DataSourceColumnBundle object.""" 21 | 22 | def setUp(self): 23 | self.column_bundle = data_source.DataSourceColumnBundle( 24 | [data_source.DataSourceColumn(column_id='col1'), 25 | data_source.DataSourceColumn(column_id='col2'), 26 | data_source.DataSourceColumn(column_id='col3')]) 27 | 28 | def testAddColumn(self): 29 | self.column_bundle.AddColumn( 30 | data_source.DataSourceColumn(column_id='col4')) 31 | self.assertEqual(self.column_bundle.GetColumnByID('col4').column_id, 32 | 'col4') 33 | 34 | def testGetColumnByID(self): 35 | column = self.column_bundle.GetColumnByID('col2') 36 | self.assertEqual(column.column_id, 'col2') 37 | 38 | def testGetColumnByOrder(self): 39 | column = self.column_bundle.GetColumnByOrder(2) 40 | self.assertEqual(column.column_id, 'col3') 41 | 42 | def testGetNumColumns(self): 43 | self.assertEqual(self.column_bundle.GetNumColumns(), 3) 44 | 45 | def testGetColumnIterator(self): 46 | column_iterator = self.column_bundle.GetColumnIterator() 47 | column_id_list = [c.column_id for c in column_iterator] 48 | self.assertEqual(column_id_list, ['col1', 'col2', 'col3']) 49 | 50 | 51 | class TableDataTest(unittest.TestCase): 52 | """Tests of TableData object.""" 53 | 54 | def setUp(self): 55 | self.table_data = data_source.TableData( 56 | [[1, 2, 3], [4, 5, 6]]) 57 | 58 | def testMergeValues(self): 59 | another_table_data = data_source.TableData([[4, 5, 6], [6, 7, 8]]) 60 | merged_table_data = self.table_data.MergeValues( 61 | another_table_data, num_columns=2) 62 | self.assertEqual(merged_table_data.rows, 63 | [[1, 2, 3, 4, 5], [4, 5, 6, 6, 7]]) 64 | 65 | def testMergeContant(self): 66 | merged_table_data = self.table_data.MergeConstant('abcd') 67 | self.assertEqual(merged_table_data.rows, 68 | [[1, 2, 3, 'abcd'], [4, 5, 6, 'abcd']]) 69 | 70 | 71 | class DataGuessingTest(unittest.TestCase): 72 | """Test of data type / format guessing functions.""" 73 | 74 | def setUp(self): 75 | pass 76 | 77 | def testGuessType(self): 78 | self.assertEqual(data_source.GuessDataType('312332'), 'integer') 79 | self.assertEqual(data_source.GuessDataType('1999', 'year'), 'date') 80 | self.assertEqual(data_source.GuessDataType('3123.32'), 'float') 81 | self.assertEqual(data_source.GuessDataType('-3399332'), 'integer') 82 | self.assertEqual(data_source.GuessDataType('-3.0'), 'float') 83 | self.assertEqual(data_source.GuessDataType('1/1/11'), 'date') 84 | self.assertEqual(data_source.GuessDataType('01/1932'), 'date') 85 | self.assertEqual(data_source.GuessDataType('2-3-1932'), 'date') 86 | self.assertEqual(data_source.GuessDataType('something'), 'string') 87 | self.assertEqual(data_source.GuessDataType('3278.23728.223'), 'string') 88 | 89 | def testGuessDateFormat(self): 90 | self.assertEqual(data_source.GuessDateFormat('2819'), 'yyyy') 91 | self.assertEqual(data_source.GuessDateFormat('3/1990'), 'MM/yyyy') 92 | self.assertEqual(data_source.GuessDateFormat('1990-3'), 'yyyy-MM') 93 | self.assertEqual(data_source.GuessDateFormat('01-2-1981'), 'MM-dd-yyyy') 94 | self.assertEqual(data_source.GuessDateFormat('1990/2/3'), 'yyyy/MM/dd') 95 | 96 | self.assertRaises(data_source.DataSourceError, 97 | data_source.GuessDateFormat, '1990.12') 98 | self.assertRaises(data_source.DataSourceError, 99 | data_source.GuessDateFormat, 'Jan 1981') 100 | 101 | def testGuessDateConcept(self): 102 | self.assertEqual(data_source.GuessDateConcept('yyyy'), 'time:year') 103 | self.assertEqual(data_source.GuessDateConcept('yyyy-MM'), 'time:month') 104 | self.assertEqual(data_source.GuessDateConcept('yy.MM.dd'), 'time:day') 105 | self.assertEqual(data_source.GuessDateConcept('dd/MM/yyyy'), 'time:day') 106 | 107 | self.assertRaises(data_source.DataSourceError, 108 | data_source.GuessDateConcept, 'yy-mm') 109 | self.assertRaises(data_source.DataSourceError, 110 | data_source.GuessDateConcept, 'GG yyyy') 111 | 112 | 113 | if __name__ == '__main__': 114 | unittest.main() 115 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/model/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/model/dspl_model_loader_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Tests of dspl_model_loader module.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import os 15 | import os.path 16 | import shutil 17 | import tempfile 18 | import unittest 19 | 20 | import dspl_model_loader 21 | import dspl_model_test 22 | 23 | 24 | _SLICE_CSV_DATA = ( 25 | """col1,col2 26 | val1,1 27 | val2 , 2 28 | val3,3""") 29 | 30 | 31 | class DSPLModelLoaderTests(unittest.TestCase): 32 | """Basic test cases for dspl_model_loader module.""" 33 | 34 | def setUp(self): 35 | self.input_dir = tempfile.mkdtemp() 36 | self.xml_file_path = os.path.join(self.input_dir, 'dataset.xml') 37 | 38 | xml_file = open(self.xml_file_path, 'w') 39 | xml_file.write(dspl_model_test.TEST_DSPL_XML) 40 | xml_file.close() 41 | 42 | slice_csv_file = open(os.path.join(self.input_dir, 'mydata.csv'), 'w') 43 | slice_csv_file.write(_SLICE_CSV_DATA) 44 | slice_csv_file.close() 45 | 46 | def tearDown(self): 47 | shutil.rmtree(self.input_dir) 48 | 49 | def testDSPLImportLoading(self): 50 | """Test that dataset is imported correctly.""" 51 | dspl_dataset = dspl_model_loader.LoadDSPLFromFiles(self.xml_file_path) 52 | 53 | # Test basic info 54 | self.assertEqual(dspl_dataset.name, 'My Dataset') 55 | self.assertEqual(dspl_dataset.description, 'My Dataset Description') 56 | self.assertEqual(dspl_dataset.url, 'url1') 57 | 58 | self.assertEqual(dspl_dataset.provider_name, 'Googler') 59 | self.assertEqual(dspl_dataset.provider_url, 'url2') 60 | 61 | # Test imports 62 | self.assertEqual(len(dspl_dataset.imports), 2) 63 | 64 | self.assertEqual(dspl_dataset.imports[0].namespace_id, 65 | 'imported_namespace1') 66 | self.assertEqual(dspl_dataset.imports[0].namespace_url, 67 | 'http://imported_namespace1_url') 68 | self.assertEqual(dspl_dataset.imports[1].namespace_id, 69 | 'imported_namespace2') 70 | self.assertEqual(dspl_dataset.imports[1].namespace_url, 71 | 'http://imported_namespace2_url') 72 | 73 | # Test topics 74 | self.assertEqual(len(dspl_dataset.topics), 2) 75 | 76 | self.assertEqual(dspl_dataset.topics[0].topic_id, 'topic1') 77 | self.assertEqual(dspl_dataset.topics[0].topic_name, 'topic1_name') 78 | self.assertEqual(len(dspl_dataset.topics[0].children), 2) 79 | 80 | self.assertEqual(dspl_dataset.topics[0].children[0].topic_id, 'topic2') 81 | self.assertEqual( 82 | dspl_dataset.topics[0].children[0].topic_name, 'topic2_name') 83 | self.assertEqual(dspl_dataset.topics[0].children[1].topic_id, 'topic3') 84 | self.assertEqual( 85 | dspl_dataset.topics[0].children[1].topic_name, 'topic3_name') 86 | 87 | self.assertEqual(dspl_dataset.topics[1].topic_id, 'topic4') 88 | self.assertEqual(dspl_dataset.topics[1].topic_name, 'topic4_name') 89 | self.assertEqual(len(dspl_dataset.topics[1].children), 0) 90 | 91 | # Test concepts 92 | self.assertEqual(len(dspl_dataset.concepts), 3) 93 | 94 | self.assertEqual(dspl_dataset.concepts[0].concept_id, 'concept1') 95 | self.assertEqual(dspl_dataset.concepts[0].concept_extension_reference, 96 | 'entity:entity') 97 | self.assertEqual(dspl_dataset.concepts[0].concept_name, 'Concept 1') 98 | self.assertEqual(dspl_dataset.concepts[0].concept_description, 99 | 'Concept 1 Description') 100 | self.assertEqual(dspl_dataset.concepts[0].data_type, 'string') 101 | self.assertEqual(len(dspl_dataset.concepts[0].attributes), 1) 102 | self.assertEqual( 103 | dspl_dataset.concepts[0].attributes[0].concept_ref, 'attribute_concept') 104 | self.assertEqual( 105 | dspl_dataset.concepts[0].attributes[0].value, 'attribute_value') 106 | self.assertEqual(len(dspl_dataset.concepts[0].properties), 2) 107 | self.assertEqual( 108 | dspl_dataset.concepts[0].properties[0].concept_ref, 'property_concept') 109 | self.assertEqual( 110 | dspl_dataset.concepts[0].properties[0].is_parent, False) 111 | self.assertEqual( 112 | dspl_dataset.concepts[0].properties[1].concept_ref, 113 | 'another_property_concept') 114 | self.assertEqual( 115 | dspl_dataset.concepts[0].properties[1].is_parent, True) 116 | self.assertEqual(dspl_dataset.concepts[0].table_ref, 'table2') 117 | 118 | self.assertEqual(dspl_dataset.concepts[1].concept_id, 'concept2') 119 | self.assertEqual(dspl_dataset.concepts[1].concept_name, 'Concept 2') 120 | self.assertEqual(dspl_dataset.concepts[1].concept_description, 121 | 'Concept 2 Description') 122 | self.assertEqual(dspl_dataset.concepts[1].topic_references, 123 | ['topic1', 'topic2']) 124 | self.assertEqual(dspl_dataset.concepts[1].data_type, 'integer') 125 | self.assertEqual(len(dspl_dataset.concepts[1].attributes), 0) 126 | self.assertEqual(len(dspl_dataset.concepts[1].properties), 0) 127 | 128 | self.assertEqual(dspl_dataset.concepts[2].concept_id, 'geo:country') 129 | self.assertEqual(dspl_dataset.concepts[2].concept_reference, 'geo:country') 130 | 131 | # Test slices 132 | self.assertEqual(len(dspl_dataset.slices), 1) 133 | 134 | self.assertEqual(dspl_dataset.slices[0].slice_id, 'data_slice') 135 | self.assertEqual(dspl_dataset.slices[0].dimension_refs, 136 | ['concept1', 'geo:country']) 137 | self.assertEqual(dspl_dataset.slices[0].metric_refs, ['concept2']) 138 | self.assertEqual(dspl_dataset.slices[0].table_ref, 'table3') 139 | self.assertEqual( 140 | sorted(dspl_dataset.slices[0].dimension_map.items()), 141 | sorted([('concept1', 'concept_column1'), 142 | ('geo:country', 'concept_column3')])) 143 | self.assertEqual( 144 | dspl_dataset.slices[0].metric_map.items(), 145 | [('concept2', 'concept_column2')]) 146 | 147 | # Test tables 148 | self.assertEqual(len(dspl_dataset.tables), 1) 149 | 150 | self.assertEqual(dspl_dataset.tables[0].table_id, 'table') 151 | self.assertEqual(dspl_dataset.tables[0].file_name, 'mydata.csv') 152 | 153 | self.assertEqual(len(dspl_dataset.tables[0].columns), 2) 154 | self.assertEqual(dspl_dataset.tables[0].columns[0].column_id, 'col1') 155 | self.assertEqual(dspl_dataset.tables[0].columns[0].data_type, 'string') 156 | self.assertEqual(dspl_dataset.tables[0].columns[1].column_id, 'col2') 157 | self.assertEqual(dspl_dataset.tables[0].columns[1].data_type, 'integer') 158 | 159 | expected_table_rows = _SLICE_CSV_DATA.splitlines() 160 | expected_table_data = [] 161 | 162 | for row in expected_table_rows: 163 | split_row = row.split(',') 164 | cleaned_row = [r.strip() for r in split_row] 165 | 166 | expected_table_data.append(cleaned_row) 167 | 168 | self.assertEqual(dspl_dataset.tables[0].table_data, expected_table_data) 169 | 170 | def testBadFileReference(self): 171 | """Test case in which CSV file does not exist.""" 172 | os.remove(os.path.join(self.input_dir, 'mydata.csv')) 173 | 174 | self.assertRaises( 175 | dspl_model_loader.DSPLModelLoaderError, 176 | dspl_model_loader.LoadDSPLFromFiles, 177 | self.xml_file_path) 178 | 179 | def testPartialFileLoading(self): 180 | """Test case in which load_all_data is set to False.""" 181 | dspl_dataset = dspl_model_loader.LoadDSPLFromFiles( 182 | self.xml_file_path, load_all_data=False) 183 | 184 | expected_table_rows = _SLICE_CSV_DATA.splitlines()[0:2] 185 | expected_table_data = [r.split(',') for r in expected_table_rows] 186 | 187 | self.assertEqual(dspl_dataset.tables[0].table_data, expected_table_data) 188 | 189 | 190 | if __name__ == '__main__': 191 | unittest.main() 192 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/schemas/xml_1998.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | See http://www.w3.org/XML/1998/namespace.html and 7 | http://www.w3.org/TR/REC-xml for information about this namespace. 8 | 9 | This schema document describes the XML namespace, in a form 10 | suitable for import by other schema documents. 11 | 12 | Note that local names in this namespace are intended to be defined 13 | only by the World Wide Web Consortium or its subgroups. The 14 | following names are currently defined in this namespace and should 15 | not be used with conflicting semantics by any Working Group, 16 | specification, or document instance: 17 | 18 | base (as an attribute name): denotes an attribute whose value 19 | provides a URI to be used as the base for interpreting any 20 | relative URIs in the scope of the element on which it 21 | appears; its value is inherited. This name is reserved 22 | by virtue of its definition in the XML Base specification. 23 | 24 | lang (as an attribute name): denotes an attribute whose value 25 | is a language code for the natural language of the content of 26 | any element; its value is inherited. This name is reserved 27 | by virtue of its definition in the XML specification. 28 | 29 | space (as an attribute name): denotes an attribute whose 30 | value is a keyword indicating what whitespace processing 31 | discipline is intended for the content of the element; its 32 | value is inherited. This name is reserved by virtue of its 33 | definition in the XML specification. 34 | 35 | Father (in any context at all): denotes Jon Bosak, the chair of 36 | the original XML Working Group. This name is reserved by 37 | the following decision of the W3C XML Plenary and 38 | XML Coordination groups: 39 | 40 | In appreciation for his vision, leadership and dedication 41 | the W3C XML Plenary on this 10th day of February, 2000 42 | reserves for Jon Bosak in perpetuity the XML name 43 | xml:Father 44 | 45 | 46 | 47 | 48 | This schema defines attributes and an attribute group 49 | suitable for use by 50 | schemas wishing to allow xml:base, xml:lang or xml:space attributes 51 | on elements they define. 52 | 53 | To enable this, such a schema must import this schema 54 | for the XML namespace, e.g. as follows: 55 | <schema . . .> 56 | . . . 57 | <import namespace="http://www.w3.org/XML/1998/namespace" 58 | schemaLocation="http://www.w3.org/2001/03/xml.xsd"/> 59 | 60 | Subsequently, qualified reference to any of the attributes 61 | or the group defined below will have the desired effect, e.g. 62 | 63 | <type . . .> 64 | . . . 65 | <attributeGroup ref="xml:specialAttrs"/> 66 | 67 | will define a type which will schema-validate an instance 68 | element with any of those attributes 69 | 70 | 71 | 72 | In keeping with the XML Schema WG's standard versioning 73 | policy, this schema document will persist at 74 | http://www.w3.org/2001/03/xml.xsd. 75 | At the date of issue it can also be found at 76 | http://www.w3.org/2001/xml.xsd. 77 | The schema document at that URI may however change in the future, 78 | in order to remain compatible with the latest version of XML Schema 79 | itself. In other words, if the XML Schema namespace changes, the version 80 | of this document at 81 | http://www.w3.org/2001/xml.xsd will change 82 | accordingly; the version at 83 | http://www.w3.org/2001/03/xml.xsd will not change. 84 | 85 | 86 | 87 | 88 | 89 | In due course, we should install the relevant ISO 2- and 3-letter 90 | codes as the enumerated possible values . . . 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | See http://www.w3.org/TR/xmlbase/ for 106 | information about this attribute. 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/test_dataset/countries.csv: -------------------------------------------------------------------------------- 1 | country,name,latitude,longitude 2 | AD,Andorra,42.546245,1.601554 3 | AF,Afghanistan,33.93911,67.709953 4 | AI,Anguilla,18.220554,-63.068615 5 | AL,Albania,41.153332,20.168331 6 | US,United States,37.09024,-95.712891 7 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/test_dataset/country_slice.csv: -------------------------------------------------------------------------------- 1 | country,year,population 2 | AF,1960,9616353 3 | AF,1961,9799379 4 | AF,1962,9989846 5 | AF,1963,10188299 6 | AD,1960,8616353 7 | AD,1961,8799379 8 | AD,1962,8989846 9 | AD,1963,9188299 10 | US,1960,19616353 11 | US,1961,19799379 12 | US,1962,19989846 13 | US,1963,110188299 -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/test_dataset/gender_country_slice.csv: -------------------------------------------------------------------------------- 1 | country,gender,year,population 2 | AF,M,1960,4808176 3 | AF,M,1961,4899689 4 | AF,F,1960,4808177 5 | AF,F,1961,4899690 6 | AD,M,1960,3808176 7 | AD,M,1961,3899689 8 | AD,F,1960,3808177 9 | AD,F,1961,3899690 10 | US,M,1960,9808176 11 | US,M,1961,9899689 12 | US,F,1960,9808177 13 | US,F,1961,9899690 -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/test_dataset/genders.csv: -------------------------------------------------------------------------------- 1 | gender,name 2 | M,Male 3 | F,Female 4 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/test_dataset/state_slice.csv: -------------------------------------------------------------------------------- 1 | state,year,population,unemployment_rate 2 | AL,1960,9616353,5.1 3 | AL,1961,9799379,5.2 4 | AL,1962,9989846,4.8 5 | AL,1963,10188299,6.9 6 | AK,1960,8616353,6.1 7 | AK,1961,8799379,6.2 8 | AK,1962,8989846,7.8 9 | AK,1963,9188299,7.9 -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/test_dataset/states.csv: -------------------------------------------------------------------------------- 1 | state,name,latitude,longitude 2 | AL,Alabama,32.318231,-86.902298 3 | AK,Alaska,63.588753,-154.493062 4 | AR,Arkansas,35.20105,-91.831833 5 | AZ,Arizona,34.048928,-111.093731 6 | CA,California,36.778261,-119.417932 7 | CO,Colorado,39.550051,-105.782067 8 | CT,Connecticut,41.603221,-73.087749 9 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/xml_validation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Validate a DSPL XML file.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | from lxml import etree 15 | import os.path 16 | import re 17 | 18 | 19 | # The number of lines of context to show around XML errors 20 | _CONTEXT_LINES = 3 21 | 22 | _SCHEMA_PATH = os.path.join(os.path.split(__file__)[0], 'schemas') 23 | _DSPL_SCHEMA_FILE = 'dspl.xsd' 24 | 25 | 26 | def GetErrorContext(xml_string, error_line_number): 27 | """Generate a string that shows the context of an XML error. 28 | 29 | Args: 30 | xml_string: String containing the contents of an XML file 31 | error_line_number: 1-indexed line number on which error has been detected 32 | 33 | Returns: 34 | A pretty-printed string containing the lines around the error 35 | """ 36 | min_error_start_line = (error_line_number - 1) - _CONTEXT_LINES 37 | max_error_end_line = (error_line_number - 1) + _CONTEXT_LINES 38 | 39 | error_context_lines = [] 40 | 41 | for l, line in enumerate(xml_string.splitlines()): 42 | if l >= min_error_start_line: 43 | line_string = '%5d' % (l + 1) 44 | 45 | # Highlight the error line with asterisks 46 | if (l + 1) == error_line_number: 47 | line_string = line_string.replace(' ', '*') 48 | 49 | error_context_lines.append('%s: %s' % (line_string, line.rstrip())) 50 | 51 | if l >= max_error_end_line: 52 | break 53 | 54 | return '\n'.join(error_context_lines) 55 | 56 | 57 | def GetErrorLineNumber(error_string): 58 | """Parse out the line number from a minixsv error message. 59 | 60 | Args: 61 | error_string: String returned by minixsv exception 62 | 63 | Returns: 64 | Integer line number on which error was detected 65 | """ 66 | line_match = re.search(': line ([0-9]+)', error_string) 67 | 68 | return int(line_match.group(1)) 69 | 70 | 71 | def RunValidation(xml_file, schema_file=None, verbose=True): 72 | """Run the validation process and return a message with the result. 73 | 74 | Args: 75 | xml_file: An XML input file 76 | schema_file: A DSPL schema file; if not given, the default 'dspl.xsd' is 77 | used. 78 | verbose: Include helpful, extra information about validation 79 | 80 | Returns: 81 | String containing result of validation process 82 | """ 83 | result = '' 84 | 85 | xml_file_text = xml_file.read() 86 | 87 | if schema_file: 88 | schema_file_text = schema_file.read() 89 | else: 90 | schema_file = open(os.path.join(_SCHEMA_PATH, _DSPL_SCHEMA_FILE), 'r') 91 | schema_file_text = schema_file.read() 92 | schema_file.close() 93 | 94 | # Insert proper paths into XSD schemaLocation tags 95 | substitution_function = ( 96 | lambda m: 'schemaLocation="%s"' % os.path.join(_SCHEMA_PATH, m.group(1))) 97 | 98 | schema_file_text = re.sub( 99 | 'schemaLocation="([a-zA-Z_0-9.]+)"', 100 | substitution_function, 101 | schema_file_text, 2) 102 | 103 | # Parse the schema file into an etree 104 | schema_file_xml = etree.XML(schema_file_text) 105 | 106 | try: 107 | schema = etree.XMLSchema(schema_file_xml) 108 | parser = etree.XMLParser(schema=schema) 109 | etree.fromstring(xml_file_text, parser) 110 | except etree.XMLSyntaxError as xml_error: 111 | # XML parsing error 112 | error_string = str(xml_error) 113 | if verbose: 114 | result = ('Input does not validate against DSPL schema\n\n%s\n%s' % 115 | (error_string, GetErrorContext( 116 | xml_file_text, 117 | xml_error.lineno))) 118 | else: 119 | result = error_string 120 | else: 121 | if verbose: 122 | result = 'XML file validates successfully!' 123 | 124 | return result 125 | -------------------------------------------------------------------------------- /tools/dspltools/packages/dspllib/validation/xml_validation_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Tests of xml_validation module.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import re 15 | import StringIO 16 | import unittest 17 | 18 | import xml_validation 19 | 20 | 21 | _DSPL_CONTENT_VALID = ( 22 | """ 23 | 25 | 26 | 27 | 28 | Dataset Name 29 | 30 | 31 | 32 | 33 | Provider Name 34 | 35 | 36 | """) 37 | 38 | 39 | _DSPL_CONTENT_XML_ERROR = ( 40 | """ 41 | 43 | 44 | 45 | 46 | Dataset Name 47 | 48 | 49 | 50 | 51 | Provider Name 52 | 53 | 54 | """) 55 | 56 | 57 | _DSPL_CONTENT_SCHEMA_ERROR = ( 58 | """ 59 | 61 | 62 | 63 | 64 | Dataset Name 65 | 66 | 67 | 68 | 69 | Provider Name 70 | 71 | 72 | """) 73 | 74 | _DSPL_BILLION_LAUGHS = ( 75 | """ 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | ]> 87 | 88 | 89 | 90 | &lol9; 91 | 92 | 93 | 94 | 95 | Provider Name 96 | 97 | 98 | """) 99 | 100 | 101 | class XMLValidationTests(unittest.TestCase): 102 | """Test case for xml_validation module.""" 103 | 104 | def setUp(self): 105 | pass 106 | 107 | def testXMLValidationGoodXML(self): 108 | """A simple end-to-end test of the valid XML case.""" 109 | valid_input_file = StringIO.StringIO(_DSPL_CONTENT_VALID) 110 | 111 | result = xml_validation.RunValidation(valid_input_file) 112 | self.assertTrue(re.search('validates successfully', result)) 113 | 114 | valid_input_file.close() 115 | 116 | def testXMLValidationXMLError(self): 117 | """A simple end-to-end test of the bad XML case.""" 118 | xml_error_input_file = StringIO.StringIO(_DSPL_CONTENT_XML_ERROR) 119 | 120 | result = xml_validation.RunValidation(xml_error_input_file) 121 | self.assertTrue( 122 | re.search('XML declaration allowed only.*line 1', result, flags=re.DOTALL)) 123 | 124 | xml_error_input_file.close() 125 | 126 | def testXMLValidationSchemaError(self): 127 | """A simple end-to-end test of the non-conforming XML case.""" 128 | schema_error_input_file = StringIO.StringIO(_DSPL_CONTENT_SCHEMA_ERROR) 129 | 130 | result = xml_validation.RunValidation(schema_error_input_file) 131 | # TODO: this validation failure has lineno 0; look into why lxml is not 132 | # returning the right location. 133 | self.assertTrue(re.search('The attribute \'illegalproperty\' is not allowed', 134 | result, flags=re.DOTALL)) 135 | 136 | schema_error_input_file.close() 137 | 138 | def testXMLBillionLaughsAttack(self): 139 | """A simple test to verify that the validation routine is not susceptible 140 | to the billion laughs attack. 141 | """ 142 | billion_laughs_input_file = StringIO.StringIO(_DSPL_BILLION_LAUGHS) 143 | result = xml_validation.RunValidation(billion_laughs_input_file) 144 | self.assertTrue(re.search('Detected an entity reference loop', result)) 145 | 146 | billion_laughs_input_file.close() 147 | 148 | 149 | if __name__ == '__main__': 150 | unittest.main() 151 | -------------------------------------------------------------------------------- /tools/dspltools/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | -------------------------------------------------------------------------------- /tools/dspltools/scripts/dsplcheck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Check a DSPL dataset for likely import errors.""" 10 | from __future__ import print_function 11 | 12 | 13 | __author__ = 'Benjamin Yolken ' 14 | 15 | import optparse 16 | import os 17 | import shutil 18 | import sys 19 | import tempfile 20 | import time 21 | import zipfile 22 | 23 | from dspllib.model import dspl_model_loader 24 | from dspllib.validation import dspl_validation 25 | from dspllib.validation import xml_validation 26 | 27 | 28 | def LoadOptionsFromFlags(argv): 29 | """Parse command-line arguments. 30 | 31 | Args: 32 | argv: The program argument vector (excluding the script name) 33 | 34 | Returns: 35 | A dictionary with key-value pairs for each of the options 36 | """ 37 | usage_string = 'python dsplcheck.py [options] [DSPL XML file or zip archive]' 38 | 39 | parser = optparse.OptionParser(usage=usage_string) 40 | 41 | parser.set_defaults(verbose=True) 42 | parser.add_option('-q', '--quiet', 43 | action='store_false', dest='verbose', 44 | help='Quiet mode') 45 | 46 | parser.add_option( 47 | '-l', '--checking_level', dest='checking_level', type='choice', 48 | choices=['schema_only', 'schema_and_model', 'full'], default='full', 49 | help='Level of checking to do (default: full)') 50 | 51 | (options, args) = parser.parse_args(args=argv) 52 | 53 | if not len(args) == 1: 54 | parser.error('An XML file or DSPL zip archive is required') 55 | 56 | return {'verbose': options.verbose, 57 | 'checking_level': options.checking_level, 58 | 'file_path': args[0]} 59 | 60 | 61 | def GetInputFilePath(input_file_path): 62 | """Parse the input file path, extracting a zip file if necessary. 63 | 64 | Args: 65 | input_file_path: String path to dsplcheck input file 66 | 67 | Returns: 68 | Dictionary containing final XML file path (post-extraction) and directory 69 | into which zip was extracted (or '' if input was not a zip). 70 | """ 71 | if zipfile.is_zipfile(input_file_path): 72 | # Extract files to temporary directory and search for dataset XML 73 | zip_dir = tempfile.mkdtemp() 74 | 75 | zip_file = zipfile.ZipFile(input_file_path, 'r') 76 | zip_file.extractall(zip_dir) 77 | 78 | xml_file_paths = [] 79 | 80 | for (dirpath, unused_dirnames, filenames) in os.walk(zip_dir): 81 | for file_name in filenames: 82 | if file_name[-4:] == '.xml': 83 | xml_file_paths.append(os.path.join(dirpath, file_name)) 84 | 85 | if not xml_file_paths: 86 | print('Error: zip does not have any XML files') 87 | sys.exit(2) 88 | elif len(xml_file_paths) > 1: 89 | print('Error: zip contains multiple XML files') 90 | sys.exit(2) 91 | else: 92 | xml_file_path = xml_file_paths[0] 93 | 94 | zip_file.close() 95 | else: 96 | xml_file_path = input_file_path 97 | zip_dir = '' 98 | 99 | return {'xml_file_path': xml_file_path, 100 | 'zip_dir': zip_dir} 101 | 102 | 103 | def main(argv): 104 | """Parse command-line flags and run XML validator. 105 | 106 | Args: 107 | argv: The program argument vector (excluding the script name) 108 | """ 109 | start_time = time.time() 110 | 111 | options = LoadOptionsFromFlags(argv) 112 | file_paths = GetInputFilePath(options['file_path']) 113 | 114 | try: 115 | xml_file = open(file_paths['xml_file_path'], 'r') 116 | except IOError as io_error: 117 | print('Error opening XML file\n\n%s' % io_error) 118 | sys.exit(2) 119 | 120 | if options['verbose']: 121 | print('==== Checking XML file against DSPL schema....') 122 | 123 | result = xml_validation.RunValidation( 124 | xml_file, 125 | verbose=options['verbose']) 126 | 127 | print(result) 128 | 129 | if 'validates successfully' not in result: 130 | # Stop if XML validation not successful 131 | sys.exit(2) 132 | 133 | if options['checking_level'] != 'schema_only': 134 | if options['verbose']: 135 | print('\n==== Parsing DSPL dataset....') 136 | 137 | if options['checking_level'] == 'full': 138 | full_data_check = True 139 | else: 140 | full_data_check = False 141 | 142 | try: 143 | dataset = dspl_model_loader.LoadDSPLFromFiles( 144 | file_paths['xml_file_path'], load_all_data=full_data_check) 145 | except dspl_model_loader.DSPLModelLoaderError as loader_error: 146 | print('Error while trying to parse DSPL dataset\n\n%s' % loader_error) 147 | sys.exit(2) 148 | 149 | if options['verbose']: 150 | print('Parsing completed.') 151 | 152 | if full_data_check: 153 | print('\n==== Checking DSPL model and data....') 154 | else: 155 | print('\n==== Checking DSPL model....') 156 | 157 | dspl_validator = dspl_validation.DSPLDatasetValidator( 158 | dataset, full_data_check=full_data_check) 159 | 160 | print(dspl_validator.RunValidation(options['verbose'])) 161 | 162 | xml_file.close() 163 | 164 | if file_paths['zip_dir']: 165 | 166 | shutil.rmtree(file_paths['zip_dir']) 167 | 168 | if options['verbose']: 169 | print('\nCompleted in %0.2f seconds' % (time.time() - start_time)) 170 | 171 | 172 | if __name__ == '__main__': 173 | main(sys.argv[1:]) 174 | -------------------------------------------------------------------------------- /tools/dspltools/scripts/dsplcheck_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Tests of dsplcheck module.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import os 15 | import os.path 16 | import re 17 | import shutil 18 | import StringIO 19 | import sys 20 | import tempfile 21 | import unittest 22 | import zipfile 23 | 24 | import dsplcheck 25 | 26 | 27 | _DSPL_CONTENT = ( 28 | """ 29 | 31 | 32 | 33 | 34 | Dataset Name 35 | 36 | 37 | 38 | 39 | Provider Name 40 | 41 | 42 | """) 43 | 44 | 45 | _DSPL_CONTENT_BAD_CSV_PATH = ( 46 | """ 47 | 49 | 50 | 51 | 52 | Dataset Name 53 | 54 | 55 | 56 | 57 | Provider Name 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | non_existent_file.csv 66 | 67 |
68 |
69 |
""") 70 | 71 | 72 | class DSPLCheckTests(unittest.TestCase): 73 | """Test case for dsplcheck module.""" 74 | 75 | def setUp(self): 76 | self.input_dir = tempfile.mkdtemp() 77 | self.valid_dspl_file_path = ( 78 | os.path.join(self.input_dir, 'valid_dataset.xml')) 79 | 80 | self.valid_dspl_file = open( 81 | self.valid_dspl_file_path, 'w') 82 | self.valid_dspl_file.write(_DSPL_CONTENT) 83 | self.valid_dspl_file.close() 84 | 85 | def tearDown(self): 86 | shutil.rmtree(self.input_dir) 87 | 88 | def testValidDataset(self): 89 | """Test basic case of dataset that validates and parses correctly.""" 90 | self._StdoutTestHelper( 91 | dsplcheck.main, [self.valid_dspl_file_path], 92 | 'validates successfully.*Parsing completed.*' 93 | 'Checking DSPL model and data.*Completed') 94 | 95 | def testBadXMLFilePath(self): 96 | """Test case where bad XML file path is passed in.""" 97 | self._StdoutTestHelper( 98 | dsplcheck.main, ['nonexistent_input_file.xml'], 99 | 'Error opening XML file', expect_exit=True) 100 | 101 | def testBadCSVFilePath(self): 102 | """Test case where DSPL file has bad CSV reference.""" 103 | bad_csv_dspl_file_path = ( 104 | os.path.join(self.input_dir, 'invalid_csv_dataset.xml')) 105 | 106 | bad_csv_dspl_file = open(bad_csv_dspl_file_path, 'w') 107 | bad_csv_dspl_file.write(_DSPL_CONTENT_BAD_CSV_PATH) 108 | bad_csv_dspl_file.close() 109 | 110 | self._StdoutTestHelper( 111 | dsplcheck.main, [bad_csv_dspl_file_path], 112 | 'Error while trying to parse', expect_exit=True) 113 | 114 | def testSchemaOnlyOption(self): 115 | """Test that 'schema only' checking level option works correctly.""" 116 | self._StdoutTestHelper( 117 | dsplcheck.main, [self.valid_dspl_file_path, '-l', 'schema_only'], 118 | 'validates successfully\W*Completed') 119 | 120 | def testSchemaAndModelOption(self): 121 | """Test that 'schema and model' checking level option works correctly.""" 122 | self._StdoutTestHelper( 123 | dsplcheck.main, [self.valid_dspl_file_path, '-l', 'schema_and_model'], 124 | 'Checking DSPL model(?! and data)') 125 | 126 | def testZipInput(self): 127 | """Test that module properly handles zipped input.""" 128 | zip_path = os.path.join(self.input_dir, 'dataset.zip') 129 | 130 | zip_file = zipfile.ZipFile(zip_path, 'w') 131 | zip_file.write(self.valid_dspl_file_path) 132 | zip_file.close() 133 | 134 | self._StdoutTestHelper( 135 | dsplcheck.main, [zip_path], 136 | 'validates successfully.*Parsing completed.*' 137 | 'Checking DSPL model and data.*Completed') 138 | 139 | def testZipMissingXML(self): 140 | """Test that zip file without an XML file produces error.""" 141 | zip_path = os.path.join(self.input_dir, 'dataset.zip') 142 | 143 | zip_file = zipfile.ZipFile(zip_path, 'w') 144 | zip_file.writestr('test.txt', 'Text') 145 | zip_file.close() 146 | 147 | self._StdoutTestHelper( 148 | dsplcheck.main, [zip_path], 149 | 'does not have any XML', expect_exit=True) 150 | 151 | def testZipMultipleXMLFiles(self): 152 | """Test that zip file with multiple XML files produces error.""" 153 | zip_path = os.path.join(self.input_dir, 'dataset.zip') 154 | 155 | zip_file = zipfile.ZipFile(zip_path, 'w') 156 | zip_file.writestr('test.xml', 'Text') 157 | zip_file.writestr('test2.xml', 'Text') 158 | zip_file.close() 159 | 160 | self._StdoutTestHelper( 161 | dsplcheck.main, [zip_path], 162 | 'multiple XML files', expect_exit=True) 163 | 164 | def _StdoutTestHelper(self, function, args, 165 | expected_output, expect_exit=False): 166 | """Check the stdout output of a function against its expected value. 167 | 168 | Args: 169 | function: A function to execute 170 | args: The arguments to pass to the function 171 | expected_output: A regular expression expected to match the stdout output 172 | expect_exit: Boolean indicating whether the function execution should 173 | trigger a system exit 174 | """ 175 | saved_stdout = sys.stdout 176 | 177 | redirected_output = StringIO.StringIO() 178 | sys.stdout = redirected_output 179 | 180 | if expect_exit: 181 | self.assertRaises(SystemExit, function, args) 182 | else: 183 | function(args) 184 | 185 | self.assertTrue( 186 | re.search(expected_output, redirected_output.getvalue(), re.DOTALL)) 187 | 188 | redirected_output.close() 189 | sys.stdout = saved_stdout 190 | 191 | 192 | if __name__ == '__main__': 193 | unittest.main() 194 | -------------------------------------------------------------------------------- /tools/dspltools/scripts/dsplgen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Generate a DSPL dataset from a tabular data source via the command-line.""" 10 | from __future__ import print_function 11 | 12 | 13 | __author__ = 'Benjamin Yolken ' 14 | 15 | import optparse 16 | import sys 17 | import time 18 | 19 | from dspllib.data_sources import csv_data_source 20 | from dspllib.data_sources import csv_data_source_sqlite 21 | from dspllib.data_sources import data_source_to_dspl 22 | 23 | 24 | def LoadOptionsFromFlags(argv): 25 | """Parse command-line arguments. 26 | 27 | Args: 28 | argv: The program argument vector (excluding the script name) 29 | 30 | Returns: 31 | A dictionary with key-value pairs for each of the options 32 | """ 33 | usage_string = 'python dsplgen.py [options] [csv file]' 34 | 35 | parser = optparse.OptionParser(usage=usage_string) 36 | parser.set_defaults(verbose=True) 37 | parser.add_option('-o', '--output_path', dest='output_path', default='', 38 | help=('Path to a output directory ' 39 | '(default: current directory)')) 40 | parser.add_option('-q', '--quiet', 41 | action='store_false', dest='verbose', 42 | help='Quiet mode') 43 | parser.add_option('-t', '--data_type', dest='data_type', type='choice', 44 | choices=['csv', 'csv_sqlite'], default='csv', 45 | help='Type of data source to use (default: csv)') 46 | 47 | (options, args) = parser.parse_args(args=argv) 48 | 49 | if not len(args) == 1: 50 | parser.error('A data source (e.g., path to CSV file) is required') 51 | 52 | return {'data_type': options.data_type, 53 | 'data_source': args[0], 54 | 'output_path': options.output_path, 55 | 'verbose': options.verbose} 56 | 57 | 58 | def main(argv): 59 | """Parse command-line flags and run data source to DSPL conversion process. 60 | 61 | Args: 62 | argv: The program argument vector (excluding the script name) 63 | """ 64 | start_time = time.time() 65 | options = LoadOptionsFromFlags(argv) 66 | 67 | # Connect to data source 68 | if options['data_type'] in ['csv', 'csv_sqlite']: 69 | try: 70 | csv_file = open(options['data_source'], 'r') 71 | except IOError as io_error: 72 | print('Error opening CSV file\n\n%s' % io_error) 73 | sys.exit(2) 74 | 75 | if options['data_type'] == 'csv': 76 | data_source_obj = csv_data_source.CSVDataSource( 77 | csv_file, options['verbose']) 78 | else: 79 | data_source_obj = csv_data_source_sqlite.CSVDataSourceSqlite( 80 | csv_file, options['verbose']) 81 | else: 82 | print('Error: Unknown data type: %s' % (options['data_type'])) 83 | sys.exit(2) 84 | 85 | # Create DSPL dataset from data source 86 | dataset = data_source_to_dspl.PopulateDataset( 87 | data_source_obj, options['verbose']) 88 | data_source_obj.Close() 89 | 90 | if options['verbose']: 91 | print('Materializing dataset:') 92 | print(str(dataset)) 93 | 94 | # Write DSPL dataset to disk 95 | dataset.Materialize(options['output_path']) 96 | 97 | if options['verbose']: 98 | print('Completed in %0.2f seconds' % (time.time() - start_time)) 99 | 100 | 101 | if __name__ == '__main__': 102 | main(sys.argv[1:]) 103 | -------------------------------------------------------------------------------- /tools/dspltools/scripts/dsplgen_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Tests of dsplgen module.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import os 15 | import os.path 16 | import re 17 | import shutil 18 | import StringIO 19 | import sys 20 | import tempfile 21 | import unittest 22 | 23 | import dsplcheck 24 | import dsplgen 25 | 26 | 27 | _TEST_CSV_CONTENT = ( 28 | """date[type=date;format=yyyy-MM-dd],category1,category2[concept=geo:us_state;rollup=true],metric1[extends=quantity:ratio;slice_role=metric],metric2,metric3 29 | 1980-01-01,red,california,89,321,71.21 30 | 1981-01-01,red,california,99,231,391.2 31 | 1982-01-01,blue,maine's,293,32,2.31 32 | 1983-01-01,blue,california,293,12,10.3 33 | 1984-01-01,red,maine's,932,48,10.78""") 34 | 35 | 36 | class DSPLGenTests(unittest.TestCase): 37 | """Test cases for dsplgen module.""" 38 | 39 | def setUp(self): 40 | self.input_dir = tempfile.mkdtemp() 41 | 42 | input_file = open(os.path.join(self.input_dir, 'input.csv'), 'w') 43 | input_file.write(_TEST_CSV_CONTENT) 44 | input_file.close() 45 | 46 | self.output_dir = tempfile.mkdtemp() 47 | 48 | def tearDown(self): 49 | shutil.rmtree(self.input_dir) 50 | shutil.rmtree(self.output_dir) 51 | 52 | def testDSPLGenEndToEnd(self): 53 | """A simple end-to-end test of the dsplgen application.""" 54 | dsplgen.main(['-o', self.output_dir, '-q', 55 | os.path.join(self.input_dir, 'input.csv')]) 56 | 57 | self.assertTrue( 58 | os.path.isfile(os.path.join(self.output_dir, 'dataset.xml'))) 59 | self.assertTrue( 60 | os.path.isfile(os.path.join(self.output_dir, 'category1_table.csv'))) 61 | self.assertTrue( 62 | os.path.isfile(os.path.join(self.output_dir, 'slice_0_table.csv'))) 63 | self.assertTrue( 64 | os.path.isfile(os.path.join(self.output_dir, 'slice_1_table.csv'))) 65 | 66 | # Test that output validates against dsplcheck 67 | saved_stdout = sys.stdout 68 | 69 | redirected_output = StringIO.StringIO() 70 | sys.stdout = redirected_output 71 | 72 | dsplcheck.main([os.path.join(self.output_dir, 'dataset.xml')]) 73 | 74 | self.assertTrue( 75 | re.search( 76 | 'validates successfully.*Parsing completed.*' 77 | 'No issues found.*Completed', 78 | redirected_output.getvalue(), re.DOTALL)) 79 | 80 | redirected_output.close() 81 | 82 | sys.stdout = saved_stdout 83 | 84 | def testCSVNotFound(self): 85 | """Test case in which CSV can't be opened.""" 86 | dsplgen.main(['-o', self.output_dir, '-q', 87 | os.path.join(self.input_dir, 'input.csv')]) 88 | 89 | saved_stdout = sys.stdout 90 | redirected_output = StringIO.StringIO() 91 | sys.stdout = redirected_output 92 | 93 | self.assertRaises(SystemExit, 94 | dsplgen.main, ['-q', 'non_existent_input_file.csv']) 95 | self.assertTrue('Error opening CSV file' in redirected_output.getvalue()) 96 | 97 | redirected_output.close() 98 | sys.stdout = saved_stdout 99 | 100 | 101 | if __name__ == '__main__': 102 | unittest.main() 103 | -------------------------------------------------------------------------------- /tools/dspltools/scripts/run_all_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Run all tests defined in the DSPL Tools code.""" 10 | 11 | 12 | __author__ = 'Benjamin Yolken ' 13 | 14 | import unittest 15 | 16 | _TEST_MODULE_NAMES = [ 17 | 'dsplcheck_test', 18 | 'dsplgen_test', 19 | 'dspllib.data_sources.csv_data_source_test', 20 | 'dspllib.data_sources.csv_data_source_sqlite_test', 21 | 'dspllib.data_sources.data_source_test', 22 | 'dspllib.data_sources.data_source_to_dspl_test', 23 | 'dspllib.model.dspl_model_loader_test', 24 | 'dspllib.model.dspl_model_test', 25 | 'dspllib.validation.dspl_validation_test', 26 | 'dspllib.validation.xml_validation_test'] 27 | 28 | 29 | def main(): 30 | """Run all DSPL Tools tests and print the results to stderr.""" 31 | test_suite = unittest.TestSuite() 32 | 33 | for test_module_name in _TEST_MODULE_NAMES: 34 | test_suite.addTests( 35 | unittest.defaultTestLoader.loadTestsFromName(test_module_name)) 36 | 37 | unittest.TextTestRunner().run(test_suite) 38 | 39 | 40 | if __name__ == '__main__': 41 | main() 42 | -------------------------------------------------------------------------------- /tools/dspltools/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # 3 | # Copyright 2018 Google LLC 4 | # 5 | # Use of this source code is governed by a BSD-style 6 | # license that can be found in the LICENSE file or at 7 | # https://developers.google.com/open-source/licenses/bsd 8 | 9 | """Setup script for the DSPLtools suite.""" 10 | 11 | import setuptools 12 | from distutils.core import setup 13 | 14 | 15 | setup(name='dspltools', 16 | version='0.5.0', 17 | description='Suite of command-line tools for generating DSPL datasets', 18 | author='Public Statistics', 19 | author_email='public-data-import-feedback@google.com', 20 | url='http://github.com/google/dspl', 21 | packages=['dspllib', 'dspllib.data_sources', 22 | 'dspllib.model', 'dspllib.validation'], 23 | package_dir={'dspllib': 'packages/dspllib'}, 24 | package_data={'dspllib.validation': ['schemas/*.xsd', 25 | 'test_dataset/*.csv', 26 | 'test_dataset/*.xml']}, 27 | scripts=['scripts/dsplcheck.py', 'scripts/dsplgen.py', 28 | 'scripts/run_all_tests.py']) 29 | --------------------------------------------------------------------------------