├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
    ├── CONTRIBUTING.md
    ├── LICENSE.md
    ├── _config.yml
    ├── dspl2-chart.png
    ├── dspl2-spec.md
    └── index.md
├── samples
    ├── bls
    │   └── unemployment
    │   │   ├── .gitattributes
    │   │   ├── age.csv
    │   │   ├── bls-unemployment.jsonld
    │   │   ├── cities.csv
    │   │   ├── citiesUnemploymentMonthly.csv
    │   │   ├── counties.csv
    │   │   ├── countiesUnemploymentMonthly.csv
    │   │   ├── footnotes.csv
    │   │   ├── metroAreasUnemploymentMonthly.csv
    │   │   ├── metro_areas.csv
    │   │   ├── states.csv
    │   │   ├── statesUnemploymentMonthly.csv
    │   │   ├── totalUnemploymentMonthly.csv
    │   │   ├── totalUnemploymentMonthly_ByAge.csv
    │   │   ├── totalUnemploymentMonthly_BySex.csv
    │   │   └── totalUnemploymentMonthly_BySex_ByAge.csv
    ├── eurostat
    │   ├── population_density
    │   │   ├── README.md
    │   │   ├── eurostat_population_density-inline.json
    │   │   ├── eurostat_population_density.html
    │   │   ├── eurostat_population_density.json
    │   │   ├── met_d3dens.csv
    │   │   ├── metroreg.csv
    │   │   ├── transform_d3dens.py
    │   │   └── transform_metroreg.py
    │   └── unemployment
    │   │   ├── age_groups.csv
    │   │   ├── countries.csv
    │   │   ├── country_age.csv
    │   │   ├── country_group_age.csv
    │   │   ├── country_group_sex.csv
    │   │   ├── country_group_sex_age.csv
    │   │   ├── country_group_total.csv
    │   │   ├── country_groups.csv
    │   │   ├── country_sex.csv
    │   │   ├── country_sex_age.csv
    │   │   ├── country_total.csv
    │   │   ├── eurostat-unemployment-dspl-v1-inline-small.json
    │   │   ├── eurostat-unemployment-dspl-v1.json
    │   │   ├── eurostat-unemployment.xml
    │   │   ├── footnotes.csv
    │   │   ├── seasonalities.csv
    │   │   └── sexes.csv
    ├── google
    │   ├── canonical
    │   │   ├── countries.csv
    │   │   ├── currencies.csv
    │   │   ├── entity.xml
    │   │   ├── entity_order.csv
    │   │   ├── geo.us.xml
    │   │   ├── geo.xml
    │   │   ├── granularity.csv
    │   │   ├── quantity.xml
    │   │   ├── states.csv
    │   │   ├── time.xml
    │   │   ├── unit.xml
    │   │   ├── unit_symbol_positions.csv
    │   │   └── us_counties.csv
    │   └── dspl-sample
    │   │   ├── countries.csv
    │   │   ├── country_slice.csv
    │   │   ├── dataset.xml
    │   │   ├── gender_country_slice.csv
    │   │   ├── genders.csv
    │   │   ├── state_slice.csv
    │   │   └── states.csv
    └── us_census
    │   ├── population
    │       └── census-totpop.json
    │   └── retail_sales
    │       ├── businesses.csv
    │       ├── census-retail-sales.xml
    │       ├── retail_sales_business.csv
    │       └── seasonalities.csv
├── schema
    ├── dspl.xsd
    └── dspl2.jsonld
└── tools
    ├── dspl2
        ├── dspl2
        │   ├── __init__.py
        │   ├── expander.py
        │   ├── filegetter.py
        │   ├── jsonutil.py
        │   ├── rdfutil.py
        │   ├── schema
        │   │   ├── jsonldcontext.json
        │   │   └── schema.jsonld
        │   ├── templates
        │   │   ├── choose.html
        │   │   ├── display.html
        │   │   ├── error.html
        │   │   ├── render.html
        │   │   ├── viewer.css
        │   │   └── viewer.js
        │   ├── tests
        │   │   ├── __init__.py
        │   │   ├── test_expander.py
        │   │   ├── test_jsonutil.py
        │   │   └── test_rdfutil.py
        │   └── validator.py
        ├── requirements.txt
        ├── scripts
        │   ├── dspl2-expand.py
        │   ├── dspl2-pretty-print-server.py
        │   ├── dspl2-pretty-print.py
        │   └── dspl2-validate.py
        └── setup.py
    ├── dspl2viz
        ├── dspl2viz.py
        ├── foo.jsonld
        ├── static
        │   ├── dspl2viz.css
        │   └── dspl2viz.js
        └── templates
        │   └── dspl2viz.html
    └── dspltools
        ├── PKG-INFO
        ├── README.rst
        ├── examples
            ├── dsplcheck
            │   ├── invalid_dspl
            │   │   ├── countries.csv
            │   │   ├── country_slice.csv
            │   │   └── invalid_dspl.xml
            │   ├── invalid_xml
            │   │   └── invalid_xml.xml
            │   └── valid_dataset
            │   │   ├── countries.csv
            │   │   ├── country_slice.csv
            │   │   └── valid_dataset.xml
            └── dsplgen
            │   ├── dsplgen_advanced.csv
            │   ├── dsplgen_hierarchies.csv
            │   ├── dsplgen_simple.csv
            │   └── dsplgen_yearly_data.csv
        ├── packages
            └── dspllib
            │   ├── __init__.py
            │   ├── data_sources
            │       ├── __init__.py
            │       ├── csv_data_source.py
            │       ├── csv_data_source_sqlite.py
            │       ├── csv_data_source_sqlite_test.py
            │       ├── csv_data_source_test.py
            │       ├── csv_sources_test_suite.py
            │       ├── csv_utilities.py
            │       ├── data_source.py
            │       ├── data_source_test.py
            │       ├── data_source_to_dspl.py
            │       └── data_source_to_dspl_test.py
            │   ├── model
            │       ├── __init__.py
            │       ├── dspl_model.py
            │       ├── dspl_model_loader.py
            │       ├── dspl_model_loader_test.py
            │       └── dspl_model_test.py
            │   └── validation
            │       ├── __init__.py
            │       ├── dspl_validation.py
            │       ├── dspl_validation_test.py
            │       ├── schemas
            │           ├── dspl.xsd
            │           ├── xml_1998.xsd
            │           └── xml_2001.xsd
            │       ├── test_dataset
            │           ├── countries.csv
            │           ├── country_slice.csv
            │           ├── dataset.xml
            │           ├── gender_country_slice.csv
            │           ├── genders.csv
            │           ├── state_slice.csv
            │           └── states.csv
            │       ├── xml_validation.py
            │       └── xml_validation_test.py
        ├── requirements.txt
        ├── scripts
            ├── dsplcheck.py
            ├── dsplcheck_test.py
            ├── dsplgen.py
            ├── dsplgen_test.py
            └── run_all_tests.py
        └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__
3 | _site
4 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2018, Google Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |    1. Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |       
11 |    2. Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |    3. Neither the name of Google Inc. nor the names of its
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Dataset Publishing Language
 2 | 
 3 | ## Introduction
 4 | **DSPL** stands for **Dataset Publishing Language**. It is a representation
 5 | format for both the metadata (information about the dataset, such as its name
 6 | and provider, as well as the concepts it contains and displays) and actual data
 7 | (the numbers) of datasets. Datasets described in this format can be imported
 8 | into the [Google Public Data Explorer](https://www.google.com/publicdata), a
 9 | tool that allows for rich, visual exploration of the data.
10 | 
11 | This site hosts miscellaneous, open source content (i.e., schemas, example
12 | files, and utilities) associated with the DSPL standard. See our [documentation
13 | site](https://developers.google.com/public-data) for more details on what DSPL
14 | is and how to use it.  The utilities in this repository are documented at [this
15 | site](https://developers.google.com/public-data/docs/dspltools).
16 | 
17 | ## Build and install
18 | To build the tools, install `lxml`, then use the `setup.py` script in
19 | `tools/dspltools/`.  You can use pip to install these:
20 | 
21 | ```
22 | pip install -r tools/dspltools/requirements.txt
23 | pip install tools/dspltools
24 | ```
25 | 
26 | # DSPL 2
27 | The draft of the DSPL 2 specification, which replaces the existing XML metadata
28 | format with schema.org markup, can be found at the [DSPL GitHub
29 | page](https://google.github.io/dspl).  The source for the specification is at
30 | [`docs/dspl2-spec.md`](https://github.com/google/dspl/blob/master/docs/dspl2-spec.md).
31 | 
32 | Some initial library and tool support is available in [`tools/dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2)
33 | 
34 | ## Build and install
35 | To build the tools, install the prerequisites, then use the `setup.py` script in
36 | `tools/dspl2/`.  You can use pip to install these:
37 | 
38 | ```
39 | pip install -r tools/dspl2/requirements.txt
40 | pip install tools/dspl2
41 | ```
42 | 


--------------------------------------------------------------------------------
/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Contributing to Data Set Publishing Language, Version 2.0
 3 | author: Google
 4 | ---
 5 | # How to Contribute
 6 | 
 7 | We'd love to accept your patches and contributions to this project. There are
 8 | just a few small guidelines you need to follow.
 9 | 
10 | ## Contributor License Agreement
11 | 
12 | Contributions to this project must be accompanied by a Contributor License
13 | Agreement. You (or your employer) retain the copyright to your contribution;
14 | this simply gives us permission to use and redistribute your contributions as
15 | part of the project. Head over to <https://cla.developers.google.com/> to see
16 | your current agreements on file or to sign a new one.
17 | 
18 | You generally only need to submit a CLA once, so if you've already submitted one
19 | (even if it was for a different project), you probably don't need to do it
20 | again.
21 | 
22 | ## Code reviews
23 | 
24 | All submissions, including submissions by project members, require review. We
25 | use GitHub pull requests for this purpose. Consult
26 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
27 | information on using pull requests.
28 | 
29 | ## Community Guidelines
30 | 
31 | This project follows [Google's Open Source Community
32 | Guidelines](https://opensource.google.com/conduct/).
33 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | include:
2 |   - LICENSE.md
3 |   - CONTRIBUTING.md
4 |   - index.md
5 |   - dspl2-spec.md
6 | 
7 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/docs/dspl2-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/docs/dspl2-chart.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Data Set Publishing Language, Version 2.0
 3 | author: Natarajan Krishnaswami
 4 | ---
 5 | # DSPL 2.0
 6 | This is the project website for the DSPL 2.0 specification, samples, and related tools.
 7 | 
 8 | ## Spec
 9 | 
10 | The draft specification is here: [dspl2-spec.html](dspl2-spec.html).
11 | 
12 | To provide feedback on the draft, please create a [GitHub issue](https://github.com/google/dspl/issues), or email us at (public-data-import-feedback@google.com)[mailto:public-data-import-feedback@google.com].
13 | 
14 | ## Related tools
15 | 
16 | Initial tool and a python library are in the DSPL 2.0 GitHub repository under [`tools/dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2).
17 | 
18 | * [`dspl2-expand.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-expand.py): tool to convert a DSPL 2.0 dataset with CSV references to one with only JSON-LD.
19 | * [`dspl2-validate.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-validate.py): tool to do basic validation of a DSPL 2.0 dataset into an HTML file.
20 | * [`dspl2-pretty-print.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-pretty-print.py): tool to pretty print a DSPL 2.0 dataset as HTML tables.
21 | * [`dspl2-pretty-print-server.py`](https://github.com/google/dspl/blob/master/tools/dspl2/scripts/dspl2-pretty-print-server.py): local web app of the above.
22 | * [`dspl2`](https://github.com/google/dspl/tree/master/tools/dspl2/dspl2): python library to load, normalize, and expand CSV files in DSPL 2.0 datasets.
23 | 
24 | ## Samples
25 | 
26 | Examples are in the DSPL 2.0 GitHub repository under [`samples`](https://github.com/google/dspl/tree/master/samples). Currently Eurostat unemployment and Eurostat population density samples include DSPL 2.0 metadata.
27 | 
28 | ## Contributing
29 | 
30 | To contribute, see the [CONTRIBUTING](CONTRIBUTING.html) file and after submitting a CLA, submit pull requests to the [DSPL GitHub repository](https://github.com/google/dspl).
31 | 


--------------------------------------------------------------------------------
/samples/bls/unemployment/.gitattributes:
--------------------------------------------------------------------------------
1 | countiesUnemploymentMonthly.csv filter=lfs diff=lfs merge=lfs -text
2 | citiesUnemploymentMonthly.csv filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/samples/bls/unemployment/age.csv:
--------------------------------------------------------------------------------
 1 | "codeValue","name"
 2 | "07","16 to 17 years"
 3 | "08","16 to 19 years"
 4 | "10","16 to 24 years"
 5 | "13","18 to 19 years"
 6 | "15","18 years and over"
 7 | "17","20 years and over"
 8 | "20","20 to 24 years"
 9 | "28","25 years and over"
10 | "30","25 to 29 years"
11 | "31","25 to 34 years"
12 | "33","25 to 54 years"
13 | "36","30 to 34 years"
14 | "37","35 to 39 years"
15 | "38","35 to 44 years"
16 | "39","40 to 44 years"
17 | "40","45 years and over"
18 | "41","45 to 49 years"
19 | "42","45 to 54 years"
20 | "44","50 to 54 years"
21 | "45","55 years and over"
22 | "48","55 to 59 years"
23 | "49","55 to 64 years"
24 | "56","60 to 61 years"
25 | "57","60 to 64 years"
26 | "61","62 to 64 years"
27 | "65","65 years and over"
28 | "66","65 to 69 years"
29 | "72","70 years and over"
30 | "73","70 to 74 years"
31 | "78","75 years and over"
32 | 


--------------------------------------------------------------------------------
/samples/bls/unemployment/citiesUnemploymentMonthly.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:b2a75c4df3b7eb6b89bfb8f11227eeeb7c7b33f1f5593cc68b72c431c2e758c5
3 | size 28535697
4 | 


--------------------------------------------------------------------------------
/samples/bls/unemployment/countiesUnemploymentMonthly.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:54f0f35753c166fa778f6bbe943dcdc873889b3802a6fd2dae72f0fceb10666d
3 | size 53413322
4 | 


--------------------------------------------------------------------------------
/samples/bls/unemployment/footnotes.csv:
--------------------------------------------------------------------------------
 1 | codeValue,description
 2 | 1,Data affected by changes in population controls.
 3 | 2,Constructed on the 2002 Census Industry Classification from data originally coded on earlier classifications. Official series was not revised.
 4 | 3,2000 forward coded on the 2002 Census Occupation Classification. 1983-99 constructed from data originally coded on earlier classifications.
 5 | 4,2000 forward coded on the 2002 Census Industry Classification. 1983-99 constructed from data originally coded on earlier classifications.
 6 | 7,Data do not meet publication criteria.
 7 | 8,This series id code has been discontinued; data are available using the database tool at www.bls.gov/webapps/legacy/cpsatab8.htm.
 8 | 9,Data from 1994 through 2002 were revised in February 2014 with updated seasonal adjustments.
 9 | A,Area boundaries do not reflect official OMB definitions.
10 | N,Not available.
11 | P,Preliminary.
12 | V,The survey was not conducted due to bad weather. Interpolated data were seasonally adjusted.
13 | W,The household survey was not conducted for this month due to bad weather. Data were interpolated.
14 | Y,Data reflect controlling to interpolated statewide totals because the survey was not conducted.
15 | 


--------------------------------------------------------------------------------
/samples/bls/unemployment/states.csv:
--------------------------------------------------------------------------------
 1 | codeValue,name,identifier,alternateName,geo.latitude,geo.longitude
 2 | ST0100000000000,Alabama,AL,Alabama,32.318231,-86.902298
 3 | ST0200000000000,Alaska,AK,Alaska,63.588753,-154.493062
 4 | ST0400000000000,Arizona,AZ,Arizona,34.048928,-111.093731
 5 | ST0500000000000,Arkansas,AR,Arkansas,35.20105,-91.831833
 6 | ST0600000000000,California,CA,California,36.778261,-119.417932
 7 | ST0800000000000,Colorado,CO,Colorado,39.550051,-105.782067
 8 | ST0900000000000,Connecticut,CT,Connecticut,41.603221,-73.087749
 9 | ST1000000000000,Delaware,DE,Delaware,38.910832,-75.52767
10 | ST1100000000000,District of Columbia,DC,Washington DC,38.905985,-77.033418
11 | ST1200000000000,Florida,FL,Florida,27.664827,-81.515754
12 | ST1300000000000,Georgia,GA,Georgia,32.157435,-82.907123
13 | ST1500000000000,Hawaii,HI,Hawaii,19.898682,-155.665857
14 | ST1600000000000,Idaho,ID,Idaho,44.068202,-114.742041
15 | ST1700000000000,Illinois,IL,Illinois,40.633125,-89.398528
16 | ST1800000000000,Indiana,IN,Indiana,40.551217,-85.602364
17 | ST1900000000000,Iowa,IA,Iowa,41.878003,-93.097702
18 | ST2000000000000,Kansas,KS,Kansas,39.011902,-98.484246
19 | ST2100000000000,Kentucky,KY,Kentucky,37.839333,-84.270018
20 | ST2200000000000,Louisiana,,Louisiana,31.244823,-92.145024
21 | ST2300000000000,Maine,ME,Maine,45.253783,-69.445469
22 | ST2400000000000,Maryland,MD,Maryland,39.045755,-76.641271
23 | ST2500000000000,Massachusetts,MA,Massachusetts,42.407211,-71.382437
24 | ST2600000000000,Michigan,MI,Michigan,44.314844,-85.602364
25 | ST2700000000000,Minnesota,MN,Minnesota,46.729553,-94.6859
26 | ST2800000000000,Mississippi,MS,Mississippi,32.354668,-89.398528
27 | ST2900000000000,Missouri,MO,Missouri,37.964253,-91.831833
28 | ST3000000000000,Montana,MT,Montana,46.879682,-110.362566
29 | ST3100000000000,Nebraska,NE,Nebraska,41.492537,-99.901813
30 | ST3200000000000,Nevada,NV,Nevada,38.80261,-116.419389
31 | ST3300000000000,New Hampshire,NH,New Hampshire,43.193852,-71.572395
32 | ST3400000000000,New Jersey,NJ,New Jersey,40.058324,-74.405661
33 | ST3500000000000,New Mexico,NM,New Mexico,34.97273,-105.032363
34 | ST3600000000000,New York,NY,New York State,43.299428,-74.217933
35 | ST3700000000000,North Carolina,NC,N Carolina,35.759573,-79.0193
36 | ST3800000000000,North Dakota,ND,N Dakota,47.551493,-101.002012
37 | ST3900000000000,Ohio,OH,Ohio,40.417287,-82.907123
38 | ST4000000000000,Oklahoma,OK,Oklahoma,35.007752,-97.092877
39 | ST4100000000000,Oregon,OR,Oregon,43.804133,-120.554201
40 | ST4200000000000,Pennsylvania,PA,Pennsylvania,41.203322,-77.194525
41 | ST4400000000000,Rhode Island,RI,Rhode Island,41.580095,-71.477429
42 | ST4500000000000,South Carolina,SC,S Carolina,33.836081,-81.163725
43 | ST4600000000000,South Dakota,SD,S Dakota,43.969515,-99.901813
44 | ST4700000000000,Tennessee,TN,Tennessee,35.517491,-86.580447
45 | ST4800000000000,Texas,TX,Texas,31.968599,-99.901813
46 | ST4900000000000,Utah,UT,Utah,39.32098,-111.093731
47 | ST5000000000000,Vermont,VT,Vermont,44.558803,-72.577841
48 | ST5100000000000,Virginia,VA,Virginia,37.431573,-78.656894
49 | ST5300000000000,Washington,WA,Washington State,47.751074,-120.740139
50 | ST5400000000000,West Virginia,WV,W Virginia,38.597626,-80.454903
51 | ST5500000000000,Wisconsin,WI,Wisconsin,43.78444,-88.787868
52 | ST5600000000000,Wyoming,WY,Wyoming,43.075968,-107.290284
53 | ST7200000000000,Puerto Rico,PR,Puerto Rico,18.220833,-66.590149
54 | 


--------------------------------------------------------------------------------
/samples/eurostat/population_density/README.md:
--------------------------------------------------------------------------------
1 | # Population Density
2 | This is a small example with one categorical dimension, one measure, and one slice.
3 | 
4 | The formats available are:
5 | 
6 |  * [HTML Microdata](eurostat_population_density.html)
7 |  * [JSON-LD + CSV](eurostat_population_density.json)
8 |  * [JSON-LD alone](eurostat_population_density-inline.json)
9 | 


--------------------------------------------------------------------------------
/samples/eurostat/population_density/eurostat_population_density.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "@context": "http://schema.org",
  3 |   "@type": "StatisticalDataset",
  4 |   "@id": "",
  5 |   "url": "https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w",
  6 |   "identifier": "met_d3dens",
  7 |   "name": "Eurostat Population Density",
  8 |   "description": "Population density by metropolitan regions",
  9 |   "dateCreated": "2015-10-16",
 10 |   "dateModified": "2019-06-18",
 11 |   "temporalCoverage": "1990-01-01/2016-01-01",
 12 |   "distribution": {
 13 |     "@type": "DataDownload",
 14 |     "contentUrl": "http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true",
 15 |     "encodingFormat": "text/tab-separated-values"
 16 |   },
 17 |   "spatialCoverage":{
 18 |     "@type":"Place",
 19 |     "geo":{
 20 |       "@type":"GeoShape",
 21 |       "name": "European Union",
 22 |       "box":"34.633285 -10.468556 70.096054 34.597916"
 23 |     }
 24 |   },
 25 |   "license": "https://ec.europa.eu/eurostat/about/policies/copyright",
 26 |   "creator":{
 27 |      "@type":"Organization",
 28 |      "url": "https://ec.europa.eu/eurostat",
 29 |      "name":"Eurostat"
 30 |   },
 31 |   "publisher": {
 32 |     "@type": "Organization",
 33 |     "name": "Eurostat",
 34 |     "url": "https://ec.europa.eu/eurostat",
 35 |     "contactPoint": {
 36 |       "@type": "ContactPoint",
 37 |       "contactType": "User Support",
 38 |       "url": "https://ec.europa.eu/eurostat/help/support"
 39 |     }
 40 |   },
 41 |   "dimension": [
 42 |     {
 43 |       "@type": "CategoricalDimension",
 44 |       "@id": "#metroreg",
 45 |       "dataset": {"@id": ""},
 46 |       "codeList": "metroreg.csv"
 47 |     },
 48 |     {
 49 |       "@type": "TimeDimension",
 50 |       "@id": "#year",
 51 |       "dataset": {"@id": ""},
 52 |       "name": "year",
 53 |       "equivalentType": "xsd:Year",
 54 |       "dateFormat": "yyyy"
 55 |     }
 56 |   ],
 57 |   "measure": [
 58 |     {
 59 |       "@type": "StatisticalMeasure",
 60 |       "@id": "#density",
 61 |       "dataset": {"@id": ""},
 62 |       "name": "Population density",
 63 |       "unitText": "persons per square kilometre"
 64 |    }
 65 |   ],
 66 |   "footnote": [
 67 |     {
 68 |       "@type": "StatisticalAnnotation",
 69 |       "@id": "#footnote=b",
 70 |       "dataset": {"@id": ""},
 71 |       "codeValue": "b",
 72 |       "description": "break in time series"
 73 |     },
 74 |     {
 75 |       "@type": "StatisticalAnnotation",
 76 |       "@id": "#footnote=c",
 77 |       "dataset": {"@id": ""},
 78 |       "codeValue": "c",
 79 |       "description": "confidential"
 80 |     },
 81 |     {
 82 |       "@type": "StatisticalAnnotation",
 83 |       "@id": "#footnote=d",
 84 |       "dataset": {"@id": ""},
 85 |       "codeValue": "d",
 86 |       "description": "definition differs, see metadata"
 87 |     },
 88 |     {
 89 |       "@type": "StatisticalAnnotation",
 90 |       "@id": "#footnote=e",
 91 |       "dataset": {"@id": ""},
 92 |       "codeValue": "e",
 93 |       "description": "estimated"
 94 |     },
 95 |     {
 96 |       "@type": "StatisticalAnnotation",
 97 |       "@id": "#footnote=f",
 98 |       "dataset": {"@id": ""},
 99 |       "codeValue": "f",
100 |       "description": "forecast"
101 |     },
102 |     {
103 |       "@type": "StatisticalAnnotation",
104 |       "@id": "#footnote=n",
105 |       "dataset": {"@id": ""},
106 |       "codeValue": "n",
107 |       "description": "not significant"
108 |     },
109 |     {
110 |       "@type": "StatisticalAnnotation",
111 |       "@id": "#footnote=p",
112 |       "dataset": {"@id": ""},
113 |       "codeValue": "p",
114 |       "description": "provisional"
115 |     },
116 |     {
117 |       "@type": "StatisticalAnnotation",
118 |       "@id": "#footnote=r",
119 |       "dataset": {"@id": ""},
120 |       "codeValue": "r",
121 |       "description": "revised"
122 |     },
123 |     {
124 |       "@type": "StatisticalAnnotation",
125 |       "@id": "#footnote=s",
126 |       "dataset": {"@id": ""},
127 |       "codeValue": "s",
128 |       "description": "Eurostat estimate"
129 |     },
130 |     {
131 |       "@type": "StatisticalAnnotation",
132 |       "@id": "#footnote=u",
133 |       "dataset": {"@id": ""},
134 |       "codeValue": "u",
135 |       "description": "low reliability"
136 |     },
137 |     {
138 |       "@type": "StatisticalAnnotation",
139 |       "@id": "#footnote=z",
140 |       "dataset": {"@id": ""},
141 |       "codeValue": "z",
142 |       "description": "not applicable"
143 |     }
144 |   ],
145 |   "slice":  {
146 |     "@type": "DataSlice",
147 |     "@id": "#metroreg_year",
148 |     "dataset": {"@id": ""},
149 |     "dimension": ["#metroreg", "#year"],
150 |     "measure": {"@id": "#density"},
151 |     "data": {"@id": "met_d3dens.csv"}
152 |   }
153 | }
154 | 


--------------------------------------------------------------------------------
/samples/eurostat/population_density/transform_d3dens.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2019 Google LLC
 3 | #
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE file or at
 6 | # https://developers.google.com/open-source/licenses/bsd
 7 | import pandas as pd
 8 | 
 9 | 
10 | # Read the file and set the index column to the metro region.
11 | df = pd.read_csv(
12 |     'http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz',
13 |     delimiter='\t',
14 |     index_col='metroreg\\time')
15 | 
16 | # Stack the column headers into a single column's values, and make the metro
17 | # region a column again.
18 | df = df.stack().reset_index()
19 | 
20 | # Rename the columns
21 | df.columns = ['metroreg', 'year', 'density']
22 | 
23 | # Strip surrounding whitespace from each value
24 | for col in df.columns:
25 |   df[col] = df[col].str.strip()
26 | 
27 | # Indicate that the year is an integer
28 | df['year'] = df['year'].astype(int)
29 | 
30 | # Add a string-valued footnote column with default empty string.
31 | df['density*'] = ''
32 | 
33 | # Split up any values with footnotes between the value and footnote columns
34 | for idx, density in df.loc[df['density'].str.contains(' '),
35 |                            'density'].iteritems():
36 |   density, footnote = density.split(' ')
37 |   df.loc[idx, 'density'] = density
38 |   df.loc[idx, 'density*'] = ';'.join(list(footnote))
39 | 
40 | # Remove the placeholder value of ':'
41 | df.loc[df['density'] == ':', 'density'] = None
42 | 
43 | # Remove rows with no density
44 | df = df[pd.notnull(df['density'])]
45 | 
46 | # And write the results to a CSV file.
47 | df.to_csv('met_d3dens.csv', index=False)
48 | 


--------------------------------------------------------------------------------
/samples/eurostat/population_density/transform_metroreg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2019 Google LLC
 3 | #
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE file or at
 6 | # https://developers.google.com/open-source/licenses/bsd
 7 | import pandas as pd
 8 | 
 9 | 
10 | # Read the input file.
11 | df = pd.read_csv('http://dd.eionet.europa.eu/vocabulary/eurostat/metroreg/csv')
12 | 
13 | # Drop irrelevant columns
14 | df = df[['Notation', 'Label']]
15 | 
16 | # Rename columns
17 | df.columns = ['codeValue', 'name']
18 | 
19 | # Write output file
20 | df.to_csv('metroreg.csv', index=False)
21 | 


--------------------------------------------------------------------------------
/samples/eurostat/unemployment/age_groups.csv:
--------------------------------------------------------------------------------
1 | "codeValue","name@en","name@fr","name@de"
2 | "y25-74","From 25 to 74 years","De 25 à 74 ans","25 bis 74 Jahre"
3 | "y_lt25","Less than 25 years","Moins de 25 ans","Weniger als 25 Jahre"
4 | 


--------------------------------------------------------------------------------
/samples/eurostat/unemployment/countries.csv:
--------------------------------------------------------------------------------
 1 | "codeValue","alternateName","country_group","name@en","name@fr","name@de","latitude","longitude"
 2 | "at","AT","eu","Austria","Autriche","Österreich","47.6965545","13.34598005"
 3 | "be","BE","eu","Belgium","Belgique","Belgien","50.501045","4.47667405"
 4 | "bg","BG","eu","Bulgaria","Bulgarie","Bulgarien","42.72567375","25.4823218"
 5 | "hr","HR","non-eu","Croatia","Croatie","Kroatien","44.74664297","15.34084438"
 6 | "cy","CY","eu","Cyprus","Chypre","Zypern","35.129141","33.4286823"
 7 | "cz","CZ","eu","Czech Republic","République tchèque","Tschechische Republik","49.803531","15.47499805"
 8 | "dk","DK","eu","Denmark","Danemark","Dänemark","55.93968425","9.51668905"
 9 | "ee","EE","eu","Estonia","Estonie","Estland","58.5924685","25.8069503"
10 | "fi","FI","eu","Finland","Finlande","Finnland","64.95015875","26.06756405"
11 | "fr","FR","eu","France","France","Frankreich","46.7109945","1.7185608"
12 | "de","DE","eu","Germany (including  former GDR from 1991)","Allemagne (incluant l'ancienne RDA à partir de 1991)","Deutschland (einschließlich der ehemaligen DDR seit 1991)","51.16382538","10.4540478"
13 | "gr","GR","eu","Greece","Grèce","Griechenland","39.698467","21.57725572"
14 | "hu","HU","eu","Hungary","Hongrie","Ungarn","47.16116325","19.5042648"
15 | "ie","IE","eu","Ireland","Irlande","Irland","53.41526","-8.2391222"
16 | "it","IT","eu","Italy","Italie","Italien","42.504191","12.57378705"
17 | "lv","LV","eu","Latvia","Lettonie","Lettland","56.880117","24.60655505"
18 | "lt","LT","eu","Lithuania","Lituanie","Litauen","55.173687","23.9431678"
19 | "lu","LU","eu","Luxembourg","Luxembourg","Luxemburg","49.815319","6.13335155"
20 | "mt","MT","eu","Malta","Malte","Malta","35.902422","14.4474608"
21 | "nl","NL","eu","Netherlands","Pays-Bas","Niederlande","52.10811825","5.3301983"
22 | "no","NO","non-eu","Norway","Norvège","Norwegen","64.55645975","12.66576565"
23 | "pl","PL","eu","Poland","Pologne","Polen","51.91890725","19.1343338"
24 | "pt","PT","eu","Portugal","Portugal","Portugal","39.55806875","-7.84494095"
25 | "ro","RO","eu","Romania","Roumanie","Rumänien","45.94261125","24.99015155"
26 | "sk","SK","eu","Slovakia","Slovaquie","Slowakei","48.67264375","19.7000323"
27 | "si","SI","eu","Slovenia","Slovénie","Slowenien","46.14925925","14.98661705"
28 | "es","ES","eu","Spain","Espagne","Spanien","39.8950135","-2.9882957"
29 | "se","SE","eu","Sweden","Suède","Schweden","62.1984675","14.89630657"
30 | "tr","TR","non-eu","Turkey","Turquie","Türkei","38.95294205","35.43979471"
31 | "uk","GB","eu","United Kingdom","Royaume-Uni","Vereinigtes Königreich","54.315447","-2.23261195"
32 | 


--------------------------------------------------------------------------------
/samples/eurostat/unemployment/country_groups.csv:
--------------------------------------------------------------------------------
1 | codeValue,name@en,name@fr,name@de
2 | eu,"European Union","Union européenne","Europäische Union"
3 | non-eu,"Non EU countries","Pays hors Union européenne",Nicht-EU-Länder
4 | 


--------------------------------------------------------------------------------
/samples/eurostat/unemployment/eurostat-unemployment-dspl-v1.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "@context": [
  3 |     "http://schema.org",
  4 |     {
  5 |       "name": { "@container": "@language" },
  6 |       "description": { "@container": "@language" },
  7 |       "url": { "@container": "@language" }
  8 |     }
  9 |   ],
 10 |   "@type": "StatisticalDataset",
 11 |   "@id": "#eurostat-unemployment",
 12 |   "name": {
 13 |     "en": "Unemployment in Europe (monthly)",
 14 |     "de": "Arbeitslosigkeit in Europa (monatlich)",
 15 |     "fr": "Le Chômage en Europe (mensuel)"
 16 |   },
 17 |   "description": {
 18 |     "en": "Harmonized unemployment data for European countries. This dataset was prepared by Google based on data downloaded from Eurostat.",
 19 |     "de": "Harmonisierte Daten zur Arbeitslosigkeit für europäische Länder. Dieser Datensatz wurde von Google aufbereitet, basierend auf online Daten von Eurostat.",
 20 |     "fr": "Données harmonisées sur le chômage dans les pays européens. Ces données ont été préparées par Google sur la base de données téléchargées à partir d'Eurostat."
 21 |   },
 22 |   "url": {
 23 |     "en": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-en/employment_unemployment_lfs/introduction",
 24 |     "de": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-fr/employment_unemployment_lfs/introduction",
 25 |     "fr": "http://epp.eurostat.ec.europa.eu/portal/page/portal/lang-de/employment_unemployment_lfs/introduction"
 26 |   },
 27 |   "license": "https://ec.europa.eu/eurostat/about/policies/copyright",
 28 |   "creator":{
 29 |      "@type":"Organization",
 30 |      "url": "https://ec.europa.eu/eurostat",
 31 |      "name":"Eurostat",
 32 |      "contactPoint": [
 33 |        {
 34 |          "@type":"ContactPoint",
 35 |          "name": "Eurostat Multilingual User Support Network"
 36 |          "contactType": "Central Support",
 37 |          "telephone":"+352 4301 36789",
 38 |        },
 39 |        {
 40 |          "@type":"ContactPoint",
 41 |          "name": "Eurostat Multilingual User Support Network"
 42 |          "contactType": "Republic of Ireland",
 43 |          "availableLanguage": "en",
 44 |          "telephone":"+353 151 33080",
 45 |        }
 46 |      ]
 47 |   },
 48 |   "distribution":[
 49 |      {
 50 |         "@type":"DataDownload",
 51 |         "encodingFormat":"text/tab-separated-values",
 52 |         "contentUrl":"https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2Fei_lmhu_m.tsv.gz"
 53 |      },
 54 |      {
 55 |         "@type":"DataDownload",
 56 |         "encodingFormat":"application/vnd.sdmx.genericdata+xml;version=2.0",
 57 |         "contentUrl":"https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2Fei_lmhu_m.sdmx.zip"
 58 |      }
 59 |   ],
 60 |   "temporalCoverage":"1993-01/2010-12",
 61 |   "spatialCoverage":{
 62 |      "@type":"Place",
 63 |      "geo":{
 64 |        "@type":"GeoShape",
 65 |        "name": "European Union",
 66 |         "box":"34.633285 -10.468556 70.096054 34.597916"
 67 |      }
 68 |   },
 69 |   "measure": [
 70 |     {
 71 |       "@type": "StatisticalMeasure",
 72 |       "@id": "#unemployment",
 73 |       "sameAs": "https://www.wikidata.org/wiki/Q41171",
 74 |       "name": {
 75 |         "en": "Unemployment (monthly)",
 76 |         "de": "Arbeitslosigkeit (monatlich)",
 77 |         "fr": "Chômeurs (mensuel)"
 78 |       },
 79 |       "description": {
 80 |         "en": "The total number of people unemployed",
 81 |         "de": "Anzahl der Arbeitslosen",
 82 |         "fr":" Le nombre total de chômeurs"
 83 |       },
 84 |       "url": {
 85 |         "en": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=en",
 86 |         "de": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=de",
 87 |         "fr": "http://ec.europa.eu/eurostat/product?code=une_nb_m&language=fr"
 88 |       },
 89 |       "unitCode": "IE"
 90 |     },
 91 |     {
 92 |       "@type": "StatisticalMeasure",
 93 |       "@id": "#unemployment_rate",
 94 |       "sameAs": "https://www.wikidata.org/wiki/Q1787954",
 95 |       "name": {
 96 |         "en": "Unemployment rate (monthly)",
 97 |         "de": "Arbeitslosenquote (monatlich)",
 98 |         "fr": "Taux de chômage (mensuel)"
 99 |       },
100 |       "description": {
101 |         "en": "The unemployment rate represents unemployed persons as a percentage of the labour force. The labour force is the total number of people employed and unemployed.",
102 |         "de": "Die Arbeitslosenquote ist definiert als der prozentuale Anteil der Arbeitslosen an den Erwerbspersonen. Die Erwerbspersonen umfassen die Erwerbstätigen und die Arbeitslosen.",
103 |         "fr": "Le taux de chômage représente le pourcentage de chômeurs dans la population active. La population active représente le nombre total des personnes ayant un emploi ou étant au chômage."
104 |       },
105 |       "url": {
106 |         "en": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=en",
107 |         "de": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=de",
108 |         "fr": "http://ec.europa.eu/eurostat/product?code=une_rt_m&language=fr"
109 |       },
110 |       "unitCode": "P1"
111 |     }
112 |   ],
113 |   "dimension": [
114 |     {
115 |       "@type": "CategoricalDimension",
116 |       "@id": "#country_group",
117 |       "codeList": "country_groups.csv"
118 |     },
119 |     {
120 |       "@type": "CategoricalDimension",
121 |       "@id": "#country",
122 |       "codeList": "countries.csv",
123 |       "equivalentType": "Country"
124 |     },
125 |     {
126 |       "@type": "CategoricalDimension",
127 |       "@id": "#age_group",
128 |       "codeList": "age_groups.csv"
129 |     },
130 |     {
131 |       "@type": "CategoricalDimension",
132 |       "@id": "#sex",
133 |       "codeList": "sexes.csv"
134 |     },
135 |     {
136 |       "@type": "CategoricalDimension",
137 |       "@id": "#seasonality",
138 |       "codeList": "seasonalities.csv"
139 |     },
140 |     {
141 |       "@type": "TimeDimension",
142 |       "@id": "#month",
143 |       "equivalentType": "xsd:gYearMonth",
144 |       "datePattern": "yyyy.MM"
145 |     }
146 |   ],
147 |   "footnote": "footnotes.csv",
148 |   "slice": [
149 |     {
150 |       "@type": "DataSlice",
151 |       "@id": "#country_age",
152 |       "dimension": [
153 |         "#country",
154 |         "#age_group",
155 |         "#month"
156 |       ],
157 |       "measure": [
158 |         "#unemployment",
159 |         "#unemployment_rate"
160 |       ],
161 |       "observation": "country_age.csv"
162 |     },
163 |     {
164 |       "@type": "DataSlice",
165 |       "@id": "#country_group_age",
166 |       "dimension": [
167 |         "#country_group",
168 |         "#age_group",
169 |         "#month"
170 |       ],
171 |       "measure": [
172 |         "#unemployment",
173 |         "#unemployment_rate"
174 |       ],
175 |       "observation": "country_group_age.csv"
176 |     },
177 |     {
178 |       "@type": "DataSlice",
179 |       "@id": "#country_group_sex_age",
180 |       "dimension": [
181 |         "#country_group",
182 |         "#sex",
183 |         "#age_group",
184 |         "#month"
185 |       ],
186 |       "measure": [
187 |         "#unemployment",
188 |         "#unemployment_rate"
189 |       ],
190 |       "observation": "country_group_sex_age.csv"
191 |     },
192 |     {
193 |       "@type": "DataSlice",
194 |       "@id": "#country_group_sex",
195 |       "dimension": [
196 |         "#country_group",
197 |         "#sex",
198 |         "#month"
199 |       ],
200 |       "measure": [
201 |         "#unemployment",
202 |         "#unemployment_rate"
203 |       ],
204 |       "observation": "country_group_sex.csv"
205 |     },
206 |     {
207 |       "@type": "DataSlice",
208 |       "@id": "#country_group_total",
209 |       "dimension": [
210 |         "#country_group",
211 |         "#month"
212 |       ],
213 |       "measure": [
214 |         "#unemployment",
215 |         "#unemployment_rate"
216 |       ],
217 |       "observation": "country_group_total.csv"
218 |     },
219 |     {
220 |       "@type": "DataSlice",
221 |       "@id": "#country_sex_age",
222 |       "dimension": [
223 |         "#country",
224 |         "#sex",
225 |         "#age_group",
226 |         "#month"
227 |       ],
228 |       "measure": [
229 |         "#unemployment",
230 |         "#unemployment_rate"
231 |       ],
232 |       "observation": "country_sex_age.csv"
233 |     },
234 |     {
235 |       "@type": "DataSlice",
236 |       "@id": "#country_sex",
237 |       "dimension": [
238 |         "#country",
239 |         "#sex",
240 |         "#month"
241 |       ],
242 |       "measure": [
243 |         "#unemployment",
244 |         "#unemployment_rate"
245 |       ],
246 |       "observation": "country_sex.csv"
247 |     },
248 |     {
249 |       "@type": "DataSlice",
250 |       "@id": "#country_total",
251 |       "dimension": [
252 |         "#country",
253 |         "#month"
254 |       ],
255 |       "measure": [
256 |         "#unemployment",
257 |         "#unemployment_rate"
258 |       ],
259 |       "observation": "country_total.csv"
260 |     }
261 |   ]
262 | }
263 | 


--------------------------------------------------------------------------------
/samples/eurostat/unemployment/footnotes.csv:
--------------------------------------------------------------------------------
1 | codeValue,description
2 | p,This value is a projection
3 | r,This value has been revised
4 | 


--------------------------------------------------------------------------------
/samples/eurostat/unemployment/seasonalities.csv:
--------------------------------------------------------------------------------
1 | "codeValue","name@en","name@fr","name@de"
2 | "nsa","Not seasonally adjusted data","Données non désaisonnalisées","Nichtsaisonbereinigte Daten"
3 | "sa","Seasonally adjusted data","Données désaisonnalisées","Saisonbereinigte Daten"
4 | "trend","Trend cycle","Tendance-cycle","Trend (glatte Komponente)"
5 | 


--------------------------------------------------------------------------------
/samples/eurostat/unemployment/sexes.csv:
--------------------------------------------------------------------------------
1 | "codeValue","name@en","name@fr","name@de"
2 | "f","Females","Femmes","Frauen"
3 | "m","Males","Hommes","Männer"
4 | 


--------------------------------------------------------------------------------
/samples/google/canonical/currencies.csv:
--------------------------------------------------------------------------------
  1 | currency,name,symbol
  2 | AED,"UAE Dirham",
  3 | AFN,Afghani,؋
  4 | ALL,Lek,Lek
  5 | AMD,"Armenian Dram",
  6 | ANG,"Netherlands Antillian Guilder",ƒ
  7 | AOA,Kwanza,
  8 | ARS,"Argentine Peso",$
  9 | AUD,"Australian Dollar",$
 10 | AWG,"Aruban Guilder",ƒ
 11 | AZN,"Azerbaijanian Manat",ман
 12 | BAM,"Convertible Marks",KM
 13 | BBD,"Barbados Dollar",$
 14 | BDT,Taka,
 15 | BGN,"Bulgarian Lev",лв
 16 | BHD,"Bahraini Dinar",
 17 | BIF,"Burundi Franc",
 18 | BMD,"Bermudian Dollar (customarily known as Bermuda Dollar)",$
 19 | BND,"Brunei Dollar",$
 20 | "BOB BOV","Boliviano Mvdol",$b
 21 | BRL,"Brazilian Real",R$
 22 | BSD,"Bahamian Dollar",$
 23 | BWP,Pula,P
 24 | BYR,"Belarussian Ruble",p.
 25 | BZD,"Belize Dollar",BZ$
 26 | CAD,"Canadian Dollar",$
 27 | CDF,"Congolese Franc",
 28 | CHF,"Swiss Franc",CHF
 29 | "CLP CLF","Chilean Peso Unidades de fomento",$
 30 | CNY,"Yuan Renminbi",¥
 31 | "COP COU","Colombian Peso Unidad de Valor Real",$
 32 | CRC,"Costa Rican Colon",₡
 33 | "CUP CUC","Cuban Peso Peso Convertible",₱
 34 | CVE,"Cape Verde Escudo",
 35 | CZK,"Czech Koruna",Kč
 36 | DJF,"Djibouti Franc",
 37 | DKK,"Danish Krone",kr
 38 | DOP,"Dominican Peso",RD$
 39 | DZD,"Algerian Dinar",
 40 | EEK,Kroon,
 41 | EGP,"Egyptian Pound",£
 42 | ERN,Nakfa,
 43 | ETB,"Ethiopian Birr",
 44 | EUR,Euro,€
 45 | FJD,"Fiji Dollar",$
 46 | FKP,"Falkland Islands Pound",£
 47 | GBP,"Pound Sterling",£
 48 | GEL,Lari,
 49 | GHS,Cedi,
 50 | GIP,"Gibraltar Pound",£
 51 | GMD,Dalasi,
 52 | GNF,"Guinea Franc",
 53 | GTQ,Quetzal,Q
 54 | GYD,"Guyana Dollar",$
 55 | HKD,"Hong Kong Dollar",$
 56 | HNL,Lempira,L
 57 | HRK,"Croatian Kuna",kn
 58 | "HTG USD","Gourde US Dollar",
 59 | HUF,Forint,Ft
 60 | IDR,Rupiah,Rp
 61 | ILS,"New Israeli Sheqel",₪
 62 | INR,"Indian Rupee",
 63 | "INR BTN","Indian Rupee Ngultrum",
 64 | IQD,"Iraqi Dinar",
 65 | IRR,"Iranian Rial",﷼
 66 | ISK,"Iceland Krona",kr
 67 | JMD,"Jamaican Dollar",J$
 68 | JOD,"Jordanian Dinar",
 69 | JPY,Yen,¥
 70 | KES,"Kenyan Shilling",
 71 | KGS,Som,лв
 72 | KHR,Riel,៛
 73 | KMF,"Comoro Franc",
 74 | KPW,"North Korean Won",₩
 75 | KRW,Won,₩
 76 | KWD,"Kuwaiti Dinar",
 77 | KYD,"Cayman Islands Dollar",$
 78 | KZT,Tenge,лв
 79 | LAK,Kip,₭
 80 | LBP,"Lebanese Pound",£
 81 | LKR,"Sri Lanka Rupee",₨
 82 | LRD,"Liberian Dollar",$
 83 | LTL,"Lithuanian Litas",Lt
 84 | LVL,"Latvian Lats",Ls
 85 | LYD,"Libyan Dinar",
 86 | MAD,"Moroccan Dirham",
 87 | MDL,"Moldovan Leu",
 88 | MGA,"Malagasy Ariary",
 89 | MKD,Denar,ден
 90 | MMK,Kyat,
 91 | MNT,Tugrik,₮
 92 | MOP,Pataca,
 93 | MRO,Ouguiya,
 94 | MUR,"Mauritius Rupee",₨
 95 | MVR,Rufiyaa,
 96 | MWK,Kwacha,
 97 | "MXN MXV","Mexican Peso Mexican Unidad de Inversion (UDI)",$
 98 | MYR,"Malaysian Ringgit",RM
 99 | MZN,Metical,MT
100 | NGN,Naira,₦
101 | NIO,"Cordoba Oro",C$
102 | NOK,"Norwegian Krone",kr
103 | NPR,"Nepalese Rupee",₨
104 | NZD,"New Zealand Dollar",$
105 | OMR,"Rial Omani",﷼
106 | "PAB USD","Balboa US Dollar",B/.
107 | PEN,"Nuevo Sol",S/.
108 | PGK,Kina,
109 | PHP,"Philippine Peso",Php
110 | PKR,"Pakistan Rupee",₨
111 | PLN,Zloty,zł
112 | PYG,Guarani,Gs
113 | QAR,"Qatari Rial",﷼
114 | RON,"New Leu",lei
115 | RSD,"Serbian Dinar",Дин.
116 | RUB,"Russian Ruble",руб
117 | RWF,"Rwanda Franc",
118 | SAR,"Saudi Riyal",﷼
119 | SBD,"Solomon Islands Dollar",$
120 | SCR,"Seychelles Rupee",₨
121 | SDG,"Sudanese Pound",
122 | SEK,"Swedish Krona",kr
123 | SGD,"Singapore Dollar",$
124 | SHP,"Saint Helena Pound",£
125 | SLL,Leone,
126 | SOS,"Somali Shilling",S
127 | SRD,"Surinam Dollar",$
128 | STD,Dobra,
129 | "SVC USD","El Salvador Colon US Dollar",$
130 | SYP,"Syrian Pound",£
131 | SZL,Lilangeni,
132 | THB,Baht,฿
133 | TJS,Somoni,
134 | TMT,Manat,
135 | TND,"Tunisian Dinar",
136 | TOP,Pa'anga,
137 | TRY,"Turkish Lira",TL
138 | TTD,"Trinidad and Tobago Dollar",TT$
139 | TWD,"New Taiwan Dollar",NT$
140 | TZS,"Tanzanian Shilling",
141 | UAH,Hryvnia,₴
142 | UGX,"Uganda Shilling",
143 | USD,"US Dollar",$
144 | "UYU UYI","Peso Uruguayo Uruguay Peso en Unidades Indexadas",$U
145 | UZS,"Uzbekistan Sum",лв
146 | VEF,"Bolivar Fuerte",Bs
147 | VND,Dong,₫
148 | VUV,Vatu,
149 | WST,Tala,
150 | XAF,"CFA Franc BEAC",
151 | XAG,Silver,
152 | XAU,Gold,
153 | XBA,"Bond Markets Units European Composite Unit (EURCO)",
154 | XBB,"European Monetary Unit (E.M.U.-6)",
155 | XBC,"European Unit of Account 9(E.U.A.-9)",
156 | XBD,"European Unit of Account 17(E.U.A.-17)",
157 | XCD,"East Caribbean Dollar",$
158 | XDR,SDR,
159 | XFU,UIC-Franc,
160 | XOF,"CFA Franc BCEAO",
161 | XPD,Palladium,
162 | XPF,"CFP Franc",
163 | XPT,Platinum,
164 | XTS,"Codes specifically reserved for testing purposes",
165 | YER,"Yemeni Rial",﷼
166 | ZAR,Rand,R
167 | "ZAR LSL","Rand Loti",
168 | "ZAR NAD","Rand Namibia Dollar",
169 | ZMK,"Zambian Kwacha",
170 | ZWL,"Zimbabwe Dollar",
171 | 


--------------------------------------------------------------------------------
/samples/google/canonical/entity_order.csv:
--------------------------------------------------------------------------------
1 | entity_order
2 | ALPHA
3 | TABLE
4 | 


--------------------------------------------------------------------------------
/samples/google/canonical/geo.us.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
 2 | <!--
 3 | Copyright 2018, Google Inc.
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 |     * Redistributions of source code must retain the above copyright
11 | notice, this list of conditions and the following disclaimer.
12 |     * Redistributions in binary form must reproduce the above
13 | copyright notice, this list of conditions and the following disclaimer
14 | in the documentation and/or other materials provided with the
15 | distribution.
16 |     * Neither the name of Google Inc. nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | -->
32 | <dspl targetNamespace="http://www.google.com/publicdata/dataset/google/geo/us"
33 |       xmlns="http://schemas.google.com/dspl/2010"
34 |       xmlns:geo="http://www.google.com/publicdata/dataset/google/geo">
35 | 
36 |   <import namespace="http://www.google.com/publicdata/dataset/google/geo"/>
37 | 
38 |   <info>
39 |     <name>
40 |       <value xml:lang="en">US geographical concepts</value>
41 |     </name>
42 |     <description>
43 |       <value xml:lang="en">Canonical concepts for US geographical data.</value>
44 |     </description>
45 |     <url>
46 |       <value xml:lang="en">http://code.google.com/apis/publicdata/docs/canonical/geo.us.html</value>
47 |     </url>
48 |   </info>
49 | 
50 |   <provider>
51 |     <name>
52 |       <value xml:lang="en">Google Inc.</value>
53 |     </name>
54 |     <description>
55 |       <value xml:lang="en">Google Inc.</value>
56 |     </description>
57 |     <url>
58 |       <value xml:lang="en">http://www.google.com</value>
59 |     </url>
60 |   </provider>
61 | 
62 |   <concepts>
63 |     <concept id="state" extends="geo:location">
64 |       <info>
65 |         <name>
66 |           <value xml:lang="en">US State</value>
67 |         </name>
68 |         <description>
69 |           <value xml:lang="en">A US State, identified by its two letter code.</value>
70 |         </description>
71 |         <pluralName>
72 |           <value xml:lang="en">States</value>
73 |         </pluralName>
74 |         <totalName>
75 |           <value xml:lang="en">All US</value>
76 |         </totalName>
77 |       </info>
78 |       <property concept="geo:country" isParent="true" />
79 |       <table ref="states_table"/>
80 |     </concept>
81 |   </concepts>
82 | 
83 |   <tables>
84 |     <table id="states_table">
85 |       <column id="state" type="string"/>
86 |       <column id="latitude" type="float"/>
87 |       <column id="longitude" type="float"/>
88 |       <column id="name" type="string"/>
89 |       <column id="country" type="string">
90 |         <value>US</value>
91 |       </column>
92 |       <data>
93 |         <file format="csv" encoding="utf-8">states.csv</file>
94 |       </data>
95 |     </table>
96 |   </tables>
97 | </dspl>
98 | 


--------------------------------------------------------------------------------
/samples/google/canonical/geo.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
  2 | <!--
  3 | Copyright 2018, Google Inc.
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are
  8 | met:
  9 | 
 10 |     * Redistributions of source code must retain the above copyright
 11 | notice, this list of conditions and the following disclaimer.
 12 |     * Redistributions in binary form must reproduce the above
 13 | copyright notice, this list of conditions and the following disclaimer
 14 | in the documentation and/or other materials provided with the
 15 | distribution.
 16 |     * Neither the name of Google Inc. nor the names of its
 17 | contributors may be used to endorse or promote products derived from
 18 | this software without specific prior written permission.
 19 | 
 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | -->
 32 | <dspl targetNamespace="http://www.google.com/publicdata/dataset/google/geo"
 33 |       xmlns="http://schemas.google.com/dspl/2010"
 34 |       xmlns:entity="http://www.google.com/publicdata/dataset/google/entity">
 35 | 
 36 |   <import namespace="http://www.google.com/publicdata/dataset/google/entity"/>
 37 | 
 38 |   <info>
 39 |     <name>
 40 |       <value xml:lang="en">Geographical concepts</value>
 41 |     </name>
 42 |     <description>
 43 |       <value xml:lang="en">Canonical concepts for geographical data.</value>
 44 |     </description>
 45 |     <url>
 46 |       <value xml:lang="en">http://code.google.com/apis/publicdata/docs/canonical/geo.html</value>
 47 |     </url>
 48 |   </info>
 49 | 
 50 |   <provider>
 51 |     <name>
 52 |       <value xml:lang="en">Google Inc.</value>
 53 |     </name>
 54 |     <description>
 55 |       <value xml:lang="en">Google Inc.</value>
 56 |     </description>
 57 |     <url>
 58 |       <value xml:lang="en">http://www.google.com</value>
 59 |     </url>
 60 |   </provider>
 61 | 
 62 |   <concepts>
 63 |     <concept id="location" extends="entity:entity">
 64 |       <info>
 65 |         <name><value xml:lang="en">Location</value></name>
 66 |         <description>
 67 |           <value xml:lang="en">Base concept for locations.</value>
 68 |         </description>
 69 |       </info>
 70 |       <property id="latitude">
 71 |         <info>
 72 |           <name>
 73 |             <value xml:lang="en">Latitude</value>
 74 |           </name>
 75 |         </info>
 76 |         <type ref="float"/>
 77 |       </property>
 78 |       <property id="longitude">
 79 |         <info>
 80 |           <name>
 81 |             <value xml:lang="en">Longitude</value>
 82 |           </name>
 83 |         </info>
 84 |         <type ref="float"/>
 85 |       </property>
 86 |     </concept>
 87 | 
 88 |     <concept id="country" extends="location">
 89 |       <info>
 90 |         <name>
 91 |           <value xml:lang="en">Country or Territory</value>
 92 |         </name>
 93 |         <description>
 94 |           <value xml:lang="en">A country or territory, identified by its ISO-3166-1 2-letter code.</value>
 95 |         </description>
 96 |         <pluralName>
 97 |           <value xml:lang="en">Countries</value>
 98 |         </pluralName>
 99 |         <totalName>
100 |           <value xml:lang="en">World</value>
101 |         </totalName>
102 |       </info>
103 |       <table ref="countries_table"/>
104 |     </concept>
105 |   </concepts>
106 | 
107 |   <tables>
108 |     <table id="countries_table">
109 |       <column id="country" type="string"/>
110 |       <column id="latitude" type="float"/>
111 |       <column id="longitude" type="float"/>
112 |       <column id="name" type="string"/>
113 |       <data>
114 |         <file format="csv" encoding="utf-8">countries.csv</file>
115 |       </data>
116 |     </table>
117 |   </tables>
118 | </dspl>
119 | 


--------------------------------------------------------------------------------
/samples/google/canonical/granularity.csv:
--------------------------------------------------------------------------------
1 | granularity
2 | YEARLY
3 | QUARTERLY
4 | MONTHLY
5 | WEEKLY
6 | DAILY


--------------------------------------------------------------------------------
/samples/google/canonical/states.csv:
--------------------------------------------------------------------------------
 1 | state,latitude,longitude,name
 2 | AK,63.588753,-154.493062,Alaska
 3 | AL,32.318231,-86.902298,Alabama
 4 | AR,35.20105,-91.831833,Arkansas
 5 | AZ,34.048928,-111.093731,Arizona
 6 | CA,36.778261,-119.417932,California
 7 | CO,39.550051,-105.782067,Colorado
 8 | CT,41.603221,-73.087749,Connecticut
 9 | DC,38.905985,-77.033418,"District of Columbia"
10 | DE,38.910832,-75.52767,Delaware
11 | FL,27.664827,-81.515754,Florida
12 | GA,32.157435,-82.907123,Georgia
13 | HI,19.898682,-155.665857,Hawaii
14 | IA,41.878003,-93.097702,Iowa
15 | ID,44.068202,-114.742041,Idaho
16 | IL,40.633125,-89.398528,Illinois
17 | IN,40.551217,-85.602364,Indiana
18 | KS,39.011902,-98.484246,Kansas
19 | KY,37.839333,-84.270018,Kentucky
20 | LA,31.244823,-92.145024,Louisiana
21 | MA,42.407211,-71.382437,Massachusetts
22 | MD,39.045755,-76.641271,Maryland
23 | ME,45.253783,-69.445469,Maine
24 | MI,44.314844,-85.602364,Michigan
25 | MN,46.729553,-94.6859,Minnesota
26 | MO,37.964253,-91.831833,Missouri
27 | MS,32.354668,-89.398528,Mississippi
28 | MT,46.879682,-110.362566,Montana
29 | NC,35.759573,-79.0193,"North Carolina"
30 | ND,47.551493,-101.002012,"North Dakota"
31 | NE,41.492537,-99.901813,Nebraska
32 | NH,43.193852,-71.572395,"New Hampshire"
33 | NJ,40.058324,-74.405661,"New Jersey"
34 | NM,34.97273,-105.032363,"New Mexico"
35 | NV,38.80261,-116.419389,Nevada
36 | NY,43.299428,-74.217933,"New York"
37 | OH,40.417287,-82.907123,Ohio
38 | OK,35.007752,-97.092877,Oklahoma
39 | OR,43.804133,-120.554201,Oregon
40 | PA,41.203322,-77.194525,Pennsylvania
41 | PR,18.220833,-66.590149,"Puerto Rico"
42 | RI,41.580095,-71.477429,"Rhode Island"
43 | SC,33.836081,-81.163725,"South Carolina"
44 | SD,43.969515,-99.901813,"South Dakota"
45 | TN,35.517491,-86.580447,Tennessee
46 | TX,31.968599,-99.901813,Texas
47 | UT,39.32098,-111.093731,Utah
48 | VA,37.431573,-78.656894,Virginia
49 | VT,44.558803,-72.577841,Vermont
50 | WA,47.751074,-120.740139,Washington
51 | WI,43.78444,-88.787868,Wisconsin
52 | WV,38.597626,-80.454903,"West Virginia"
53 | WY,43.075968,-107.290284,Wyoming
54 | 


--------------------------------------------------------------------------------
/samples/google/canonical/time.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
  2 | <!--
  3 | Copyright 2018, Google Inc.
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are
  8 | met:
  9 | 
 10 |     * Redistributions of source code must retain the above copyright
 11 | notice, this list of conditions and the following disclaimer.
 12 |     * Redistributions in binary form must reproduce the above
 13 | copyright notice, this list of conditions and the following disclaimer
 14 | in the documentation and/or other materials provided with the
 15 | distribution.
 16 |     * Neither the name of Google Inc. nor the names of its
 17 | contributors may be used to endorse or promote products derived from
 18 | this software without specific prior written permission.
 19 | 
 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | -->
 32 | <dspl targetNamespace="http://www.google.com/publicdata/dataset/google/time"
 33 |       xmlns="http://schemas.google.com/dspl/2010">
 34 | 
 35 |     <info>
 36 |       <name>
 37 |         <value xml:lang="en">Google date and time</value>
 38 |       </name>
 39 |       <description>
 40 |         <value xml:lang="en">Google date and time dataset</value>
 41 |       </description>
 42 |       <url>
 43 |         <value xml:lang="en">http://code.google.com/apis/publicdata/docs/canonical/time.html</value>
 44 |       </url>
 45 |     </info>
 46 |     <provider>
 47 |       <name><value xml:lang="en">Google Inc.</value></name>
 48 |       <description><value xml:lang="en">Google Inc.</value></description>
 49 |       <url><value xml:lang="en">http://www.google.com</value></url>
 50 |     </provider>
 51 | 
 52 |     <concepts>
 53 |       <concept id="time_point">
 54 |         <info>
 55 |           <name><value>Point in time</value></name>
 56 |           <description><value>Point in time, with a given granularity.</value></description>
 57 |         </info>
 58 |         <type ref="date"/>
 59 |         <attribute concept="granularity"/>
 60 |       </concept>
 61 | 
 62 |       <concept id="year" extends="time_point">
 63 |         <info>
 64 |           <name><value xml:lang="en">Year date</value></name>
 65 |           <description>
 66 |             <value xml:lang="en">
 67 |               A date with yearly granularity.
 68 | 
 69 |               The year concept is usually used directly in a slice definition to define a dimension that
 70 |               contains year. For example, a slice for yearly population by country would be defined
 71 |               as follows:
 72 | 
 73 |                 <![CDATA[<slices>
 74 |                   <slice id="country_slice">
 75 |                     <dimension concept="country"/>
 76 |                     <dimension concept="time:year"/>
 77 |                     <metric concept="population"/>
 78 |                     <table ref="country_slice_table"/>
 79 |                   </slice>
 80 |                 </slices>]]>
 81 | 
 82 |               The table definition for this slice would be defined as follows:
 83 | 
 84 |                 <![CDATA[<tables>
 85 |                   ...
 86 |                   <table id="country_slice_table">
 87 |                     <column id="country" type="text"/>
 88 |                     <column id="year" type="date" format="yyyy"/>
 89 |                     <column id="population" type="integer"/>
 90 |                     <data>
 91 |                       <file format="csv" encoding="utf-8">country_slice.csv</file>
 92 |                     </data>
 93 |                   </table>
 94 |                   ...
 95 |                 </tables>]]>
 96 | 
 97 | 
 98 |               And the data contained in the CSV file for this table would look like:
 99 | 
100 |                 country, year, population
101 |                 AF, 1960, 9616353
102 |                 AF, 1961, 9799379
103 |                 AF, 1962, 9989846
104 |                 AF, 1963, 10188299
105 |                 ...
106 |             </value>
107 |           </description>
108 |         </info>
109 |         <attribute id="granularity">
110 |           <value>YEARLY</value>
111 |         </attribute>
112 |       </concept>
113 | 
114 |       <concept id="quarter" extends="time_point">
115 |         <info>
116 |           <name><value xml:lang="en">Quarter date</value></name>
117 |           <description>
118 |             <value xml:lang="en">
119 |               A date with quarterly granularity.
120 |               See the example for the year concept above.
121 |             </value>
122 |           </description>
123 |         </info>
124 |         <attribute id="granularity">
125 |           <value>QUARTERLY</value>
126 |         </attribute>
127 |       </concept>
128 | 
129 |       <concept id="month" extends="time_point">
130 |         <info>
131 |           <name>
132 |             <value xml:lang="en">Month date</value></name>
133 |           <description>
134 |             <value xml:lang="en">
135 |               A date with monthly granularity.
136 |               See the example for the year concept above.
137 |             </value>
138 |           </description>
139 |         </info>
140 |         <attribute id="granularity">
141 |           <value>MONTHLY</value>
142 |         </attribute>
143 |       </concept>
144 | 
145 |       <concept id="week" extends="time_point">
146 |         <info>
147 |           <name><value xml:lang="en">Week date</value></name>
148 |           <description>
149 |             <value xml:lang="en">
150 |               A date with weekly granularity.
151 |               See the example for the year concept above.
152 |             </value>
153 |           </description>
154 |         </info>
155 |         <attribute id="granularity">
156 |           <value>WEEKLY</value>
157 |         </attribute>
158 |       </concept>
159 | 
160 |       <concept id="day" extends="time_point">
161 |         <info>
162 |           <name><value xml:lang="en">Day date</value></name>
163 |           <description>
164 |             <value xml:lang="en">
165 |               A date with daily granularity.
166 |               See the example for the year concept above.
167 |             </value>
168 |           </description>
169 |         </info>
170 |         <attribute id="granularity">
171 |           <value>DAILY</value>
172 |         </attribute>
173 |       </concept>
174 | 
175 |       <concept id="granularity">
176 |         <info>
177 |           <name><value xml:lang="en">Granularity</value></name>
178 |           <description>
179 |             <value xml:lang="en">
180 |               Granularity of time, i.e., the uncertainty that
181 |               a point in time may be anywhere within some time interval.
182 |             </value>
183 |           </description>
184 |         </info>
185 |         <type ref="string"/>
186 |         <table ref="granularity_table"/>
187 |       </concept>
188 |     </concepts>
189 | 
190 |   <tables>
191 |     <table id="granularity_table">
192 |       <column id="granularity" type="string" />
193 |       <data>
194 |         <file format="csv" encoding="utf-8">granularity.csv</file>
195 |       </data>
196 |     </table>
197 |   </tables>
198 | </dspl>
199 | 


--------------------------------------------------------------------------------
/samples/google/canonical/unit.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
  2 | <!--
  3 | Copyright 2018, Google Inc.
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are
  8 | met:
  9 | 
 10 |     * Redistributions of source code must retain the above copyright
 11 | notice, this list of conditions and the following disclaimer.
 12 |     * Redistributions in binary form must reproduce the above
 13 | copyright notice, this list of conditions and the following disclaimer
 14 | in the documentation and/or other materials provided with the
 15 | distribution.
 16 |     * Neither the name of Google Inc. nor the names of its
 17 | contributors may be used to endorse or promote products derived from
 18 | this software without specific prior written permission.
 19 | 
 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | -->
 32 | <dspl targetNamespace="http://www.google.com/publicdata/dataset/google/unit"
 33 |       xmlns="http://schemas.google.com/dspl/2010"
 34 |       xmlns:entity="http://www.google.com/publicdata/dataset/google/entity">
 35 | 
 36 |   <import namespace="http://www.google.com/publicdata/dataset/google/entity"/>
 37 | 
 38 |   <info>
 39 |     <name>
 40 |       <value xml:lang="en">Unit concepts</value>
 41 |     </name>
 42 |     <description>
 43 |       <value xml:lang="en">Concepts for to representing units.</value>
 44 |     </description>
 45 |     <url>
 46 |       <value xml:lang="en">http://code.google.com/apis/publicdata/docs/canonical/unit.html</value>
 47 |     </url>
 48 |   </info>
 49 | 
 50 |   <provider>
 51 |     <name>
 52 |       <value xml:lang="en">Google Inc.</value>
 53 |     </name>
 54 |     <description>
 55 |       <value xml:lang="en">Google Inc.</value>
 56 |     </description>
 57 |     <url>
 58 |       <value xml:lang="en">http://www.google.com</value>
 59 |     </url>
 60 |   </provider>
 61 | 
 62 |   <concepts>
 63 |     <concept id="unit" extends="entity:entity">
 64 |       <info>
 65 |         <name>
 66 |           <value xml:lang="en">Unit</value>
 67 |         </name>
 68 |         <description>
 69 |           <value xml:lang="en">
 70 |             Specifies the unit associated with a metric concept.
 71 | 
 72 |             Example:
 73 | 
 74 |               <![CDATA[<concept id="area_unit" extends="unit:unit">
 75 |                 <info>
 76 |                   <name>
 77 |                     <value>Area in square kilometers</value>
 78 |                   </name>
 79 |                 </info>
 80 |                 <table ref="area_unit_table"/>
 81 |               </concept>]]>
 82 | 
 83 |             The table contains a single row that contains the property values:
 84 | 
 85 |               symbol,symbol_position,unit_text
 86 |               km²,END,square kilometers
 87 | 
 88 |             One can then use this unit in defining a metric concept:
 89 | 
 90 |               <![CDATA[<concept id="country_area" extends="quantity:magnitude">
 91 |                 <info>
 92 |                   <name>
 93 |                     <value>Country area in square kilometers</value>
 94 |                   </name>
 95 |                 </info>
 96 |                 <attribute id="unit" concept="area_unit"/>
 97 |                 <table ref="country_area_table"/>
 98 |               </concept>]]>
 99 | 
100 |           </value>
101 |         </description>
102 |       </info>
103 |       <property id="symbol">
104 |         <info>
105 |           <name>
106 |             <value xml:lang="en">The symbol associated with a unit.</value>
107 |           </name>
108 |         </info>
109 |         <type ref="string"/>
110 |       </property>
111 |       <property concept="symbol_position"/>
112 |       <property id="unit_text">
113 |         <info>
114 |           <name>
115 |             <value xml:lang="en">Unit text</value>
116 |           </name>
117 |           <description>
118 |             <value xml:lang="en">
119 |               Descriptive text that can be displayed next to a value.</value>
120 |           </description>
121 |         </info>
122 |         <type ref="string"/>
123 |       </property>
124 |     </concept>
125 | 
126 |     <concept id="symbol_position">
127 |       <info>
128 |         <name>
129 |           <value xml:lang="en">Unit symbol position</value>
130 |         </name>
131 |         <description>
132 |           <value xml:lang="en">Unit symbol position</value>
133 |         </description>
134 |       </info>
135 |       <type ref="string"/>
136 |       <defaultValue>END</defaultValue>
137 |       <table ref="unit_symbol_position_table"/>
138 |     </concept>
139 | 
140 |     <concept id="currency" extends="unit">
141 |       <info>
142 |         <name>
143 |           <value xml:lang="en">Currency unit</value>
144 |         </name>
145 |         <description>
146 |           <value xml:lang="en">
147 |             Specifies the currency associated with a metric concept.
148 |             Each currency is identified by its 3-letter currency code (ISO 4217).
149 |           </value>
150 |         </description>
151 |       </info>
152 |       <table ref="currency_table"/>
153 |     </concept>
154 |   </concepts>
155 | 
156 |   <tables>
157 |     <table id="unit_symbol_position_table">
158 |       <column id="symbol_position" type="string"/>
159 |       <data>
160 |         <file format="csv" encoding="utf-8">unit_symbol_positions.csv</file>
161 |       </data>
162 |     </table>
163 | 
164 |     <table id="currency_table">
165 |       <column id="currency" type="string"/>
166 |       <column id="name" type="string"/>
167 |       <column id="symbol" type="string"/>
168 |       <data>
169 |         <file format="csv" encoding="utf-8">currencies.csv</file>
170 |       </data>
171 |     </table>
172 |   </tables>
173 | </dspl>
174 | 


--------------------------------------------------------------------------------
/samples/google/canonical/unit_symbol_positions.csv:
--------------------------------------------------------------------------------
1 | symbol_position
2 | START
3 | END
4 | 


--------------------------------------------------------------------------------
/samples/google/dspl-sample/countries.csv:
--------------------------------------------------------------------------------
1 | country,name,latitude,longitude
2 | AD,Andorra,42.546245,1.601554
3 | AF,Afghanistan,33.93911,67.709953
4 | AI,Anguilla,18.220554,-63.068615
5 | AL,Albania,41.153332,20.168331
6 | US,United States,37.09024,-95.712891
7 | 


--------------------------------------------------------------------------------
/samples/google/dspl-sample/country_slice.csv:
--------------------------------------------------------------------------------
 1 | country,year,population
 2 | AF,1960,9616353
 3 | AF,1961,9799379
 4 | AF,1962,9989846
 5 | AF,1963,10188299
 6 | AD,1960,8616353
 7 | AD,1961,8799379
 8 | AD,1962,8989846
 9 | AD,1963,9188299
10 | US,1960,19616353
11 | US,1961,19799379
12 | US,1962,19989846
13 | US,1963,110188299


--------------------------------------------------------------------------------
/samples/google/dspl-sample/gender_country_slice.csv:
--------------------------------------------------------------------------------
 1 | country,gender,year,population
 2 | AF,M,1960,4808176
 3 | AF,M,1961,4899689
 4 | AF,F,1960,4808177
 5 | AF,F,1961,4899690
 6 | AD,M,1960,3808176
 7 | AD,M,1961,3899689
 8 | AD,F,1960,3808177
 9 | AD,F,1961,3899690
10 | US,M,1960,9808176
11 | US,M,1961,9899689
12 | US,F,1960,9808177
13 | US,F,1961,9899690


--------------------------------------------------------------------------------
/samples/google/dspl-sample/genders.csv:
--------------------------------------------------------------------------------
1 | gender,name
2 | M,Male
3 | F,Female
4 | 


--------------------------------------------------------------------------------
/samples/google/dspl-sample/state_slice.csv:
--------------------------------------------------------------------------------
1 | state,year,population,unemployment_rate
2 | AL,1960,9616353,5.1
3 | AL,1961,9799379,5.2
4 | AL,1962,9989846,4.8
5 | AL,1963,10188299,6.9
6 | AK,1960,8616353,6.1
7 | AK,1961,8799379,6.2
8 | AK,1962,8989846,7.8
9 | AK,1963,9188299,7.9


--------------------------------------------------------------------------------
/samples/google/dspl-sample/states.csv:
--------------------------------------------------------------------------------
1 | state,name,latitude,longitude
2 | AL,Alabama,32.318231,-86.902298
3 | AK,Alaska,63.588753,-154.493062
4 | AR,Arkansas,35.20105,-91.831833
5 | AZ,Arizona,34.048928,-111.093731
6 | CA,California,36.778261,-119.417932
7 | CO,Colorado,39.550051,-105.782067
8 | CT,Connecticut,41.603221,-73.087749
9 | 


--------------------------------------------------------------------------------
/samples/us_census/retail_sales/businesses.csv:
--------------------------------------------------------------------------------
 1 | "business","name","parent"
 2 | "44x72","Retail and Food services",
 3 | "44000","Retail services","44x72"
 4 | "44100","Motor Vehicle and Parts Dealers","44000"
 5 | "44200","Furniture and Home Furnishings Stores","44000"
 6 | "44300","Electronics and Appliance Stores","44000"
 7 | "44400","Building Material and Garden Equipment and Supplies Dealers","44000"
 8 | "44500","Food and Beverage Stores","44000"
 9 | "44510","Grocery Stores","44500"
10 | "44600","Health and Personal Care Stores","44000"
11 | "44700","Gasoline Stations","44000"
12 | "44800","Clothing and Clothing Accessories Stores","44000"
13 | "45100","Sporting Goods, Hobby, Book, and Music Stores","44000"
14 | "45200","General Merchandise Stores","44000"
15 | "45210","Department Stores (excluding leased department stores)","45200"
16 | "45300","Miscellaneous Store Retailers","44000"
17 | "45400","Nonstore Retailers","44000"
18 | "72200","Food Services and Drinking Places","44x72"
19 | "44xxx","Other Aggregates",
20 | "44y72","Retail and Food services (excluding motor vehicles)","44xxx"
21 | "4400a","Retail Services (excluding Motor Vehicle and Parts Dealers)","44xxx"
22 | "441x0","Auto and other Motor Vehicle","44xxx"
23 | 


--------------------------------------------------------------------------------
/samples/us_census/retail_sales/census-retail-sales.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--
  3 | Copyright 2018, Google Inc.
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are
  8 | met:
  9 | 
 10 |     * Redistributions of source code must retain the above copyright
 11 | notice, this list of conditions and the following disclaimer.
 12 |     * Redistributions in binary form must reproduce the above
 13 | copyright notice, this list of conditions and the following disclaimer
 14 | in the documentation and/or other materials provided with the
 15 | distribution.
 16 |     * Neither the name of Google Inc. nor the names of its
 17 | contributors may be used to endorse or promote products derived from
 18 | this software without specific prior written permission.
 19 | 
 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | -->
 32 | <dspl xmlns="http://schemas.google.com/dspl/2010"
 33 |       xmlns:entity="http://www.google.com/publicdata/dataset/google/entity"
 34 |       xmlns:quantity="http://www.google.com/publicdata/dataset/google/quantity"
 35 |       xmlns:unit="http://www.google.com/publicdata/dataset/google/unit"
 36 |       xmlns:geo="http://www.google.com/publicdata/dataset/google/geo"
 37 |       xmlns:time="http://www.google.com/publicdata/dataset/google/time">
 38 | 
 39 |    <import namespace="http://www.google.com/publicdata/dataset/google/entity"/>
 40 |    <import namespace="http://www.google.com/publicdata/dataset/google/quantity"/>
 41 |    <import namespace="http://www.google.com/publicdata/dataset/google/unit"/>
 42 |    <import namespace="http://www.google.com/publicdata/dataset/google/geo"/>
 43 |    <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
 44 | 
 45 |    <info>
 46 |       <name>
 47 |          <value xml:lang="en">Retail Sales in the U.S.</value>
 48 |       </name>
 49 |       <description>
 50 |          <value xml:lang="en">Monthly Retail Trade and Food Services report
 51 |             for the United States. This dataset was prepared by Google based
 52 |             on data downloaded from the U.S. Census Bureau.</value>
 53 |       </description>
 54 |       <url>
 55 |          <value xml:lang="en">http://www.census.gov/retail/</value>
 56 |       </url>
 57 |    </info>
 58 | 
 59 |    <provider>
 60 |       <name>
 61 |          <value xml:lang="en">U.S. Census Bureau</value>
 62 |       </name>
 63 |       <description>
 64 |          <value xml:lang="en">U.S. Census Bureau</value>
 65 |       </description>
 66 |       <url>
 67 |          <value xml:lang="en">http://www.census.gov/retail/</value>
 68 |       </url>
 69 |    </provider>
 70 | 
 71 |    <topics>
 72 |       <topic id="industry">
 73 |          <info>
 74 |             <name>
 75 |                <value xml:lang="en">Industry</value>
 76 |             </name>
 77 |          </info>
 78 |       </topic>
 79 |    </topics>
 80 | 
 81 |    <concepts>
 82 |       <concept id="business" extends="entity:entity">
 83 |          <info>
 84 |             <name>
 85 |                <value xml:lang="en">Type of business</value>
 86 |             </name>
 87 |             <description>
 88 |                <value xml:lang="en">The principal kind of business being conducted at an establishment</value>
 89 |             </description>
 90 |             <pluralName>
 91 |                <value xml:lang="en">Types of business</value>
 92 |             </pluralName>
 93 |          </info>
 94 |          <topic ref="industry"/>
 95 |          <type ref="string"/>
 96 |          <attribute concept="entity:entity_order">
 97 |             <value>TABLE</value>
 98 |          </attribute>
 99 |          <property id="parent" concept="business" isParent="true">
100 |             <info>
101 |                <name>
102 |                   <value xml:lang="en">Parent Business</value>
103 |                </name>
104 |             </info>
105 |          </property>
106 |          <defaultValue>44x72</defaultValue>
107 |          <table ref="businesses"/>
108 |       </concept>
109 | 
110 |       <concept id="seasonality">
111 |          <info>
112 |             <name>
113 |                <value xml:lang="en">Seasonality</value>
114 |             </name>
115 |             <description>
116 |                <value xml:lang="en">Are values seasonally adjusted or not</value>
117 |             </description>
118 |             <pluralName>
119 |                <value xml:lang="en">Seasonalities</value>
120 |             </pluralName>
121 |          </info>
122 |          <type ref="string"/>
123 |          <defaultValue>Seasonally Adjusted</defaultValue>
124 |          <table ref="seasonalities"/>
125 |       </concept>
126 | 
127 |      <!-- Metrics -->
128 |      <concept id="sales" extends="quantity:amount">
129 |          <info>
130 |             <name>
131 |                <value xml:lang="en">Retail Sales Volume</value>
132 |             </name>
133 |             <description>
134 |                <value xml:lang="en">Sales include merchandise sold by establishments primarily engaged in retail trade.</value>
135 |             </description>
136 |           </info>
137 |         <type ref="integer"/>
138 |         <attribute id="unit" concept="unit:currency">
139 |            <type ref="string"/>
140 |            <value>USD</value>
141 |         </attribute>
142 |       </concept>
143 |    </concepts>
144 | 
145 |    <slices>
146 |       <slice id="retail_sales_business">
147 |          <dimension concept="seasonality"/>
148 |          <dimension concept="business"/>
149 |          <dimension concept="time:month"/>
150 |          <metric concept="sales"/>
151 |          <table ref="retail_sales_business_tbl">
152 |             <mapDimension concept="time:month" toColumn="date"/>
153 |             <mapDimension concept="seasonality" toColumn="s_adj"/>
154 |          </table>
155 |       </slice>
156 |    </slices>
157 | 
158 |    <tables>
159 |       <table id="businesses">
160 |          <column id="business" type="string"/>
161 |          <column id="name" type="string"/>
162 |          <column id="parent" type="string"/>
163 |          <data>
164 |             <file encoding="utf-8" format="csv">businesses.csv</file>
165 |          </data>
166 |       </table>
167 | 
168 |       <table id="seasonalities">
169 |          <column id="seasonality" type="string"/>
170 |          <data>
171 |             <file encoding="utf-8" format="csv">seasonalities.csv</file>
172 |          </data>
173 |       </table>
174 | 
175 |       <table id="retail_sales_business_tbl">
176 |          <column id="s_adj" type="string"/>
177 |          <column id="business" type="string"/>
178 |          <column format="yyyy.MM" id="date" type="date"/>
179 |          <column id="sales" type="integer"/>
180 |          <data>
181 |             <file encoding="utf-8" format="csv">retail_sales_business.csv</file>
182 |          </data>
183 |       </table>
184 | 
185 |    </tables>
186 | </dspl>
187 | 


--------------------------------------------------------------------------------
/samples/us_census/retail_sales/seasonalities.csv:
--------------------------------------------------------------------------------
1 | "seasonality"
2 | "Not Seasonally Adjusted"
3 | "Seasonally Adjusted"
4 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | from dspl2.expander import Dspl2JsonLdExpander
 8 | from dspl2.expander import Dspl2RdfExpander
 9 | from dspl2.filegetter import HybridFileGetter
10 | from dspl2.filegetter import InternetFileGetter
11 | from dspl2.filegetter import LocalFileGetter
12 | from dspl2.filegetter import UploadedFileGetter
13 | from dspl2.jsonutil import AsList
14 | from dspl2.jsonutil import GetSchemaId
15 | from dspl2.jsonutil import GetSchemaProp
16 | from dspl2.jsonutil import GetSchemaType
17 | from dspl2.jsonutil import GetUrl
18 | from dspl2.jsonutil import JsonToKwArgsDict
19 | from dspl2.jsonutil import MakeIdKeyedDict
20 | from dspl2.rdfutil import LoadGraph
21 | from dspl2.rdfutil import FrameGraph
22 | from dspl2.rdfutil import MakeSparqlSelectQuery
23 | from dspl2.rdfutil import SelectFromGraph
24 | from dspl2.validator import CheckDataset
25 | from dspl2.validator import CheckDimension
26 | from dspl2.validator import CheckMeasure
27 | from dspl2.validator import CheckSlice
28 | from dspl2.validator import CheckSliceData
29 | from dspl2.validator import CheckStatisticalDataset
30 | from dspl2.validator import ValidateDspl2
31 | 
32 | __all__ = [
33 |     "AsList",
34 |     "CheckDataset",
35 |     "CheckDimension",
36 |     "CheckMeasure",
37 |     "CheckSlice",
38 |     "CheckSliceData",
39 |     "CheckStatisticalDataset",
40 |     "Dspl2JsonLdExpander",
41 |     "Dspl2RdfExpander",
42 |     "FrameGraph",
43 |     "GetSchemaId",
44 |     "GetSchemaProp",
45 |     "GetSchemaType",
46 |     "GetUrl",
47 |     "HybridFileGetter",
48 |     "InternetFileGetter",
49 |     "JsonToKwArgsDict",
50 |     "LoadGraph",
51 |     "LocalFileGetter",
52 |     "MakeIdKeyedDict",
53 |     "MakeSparqlSelectQuery",
54 |     "SelectFromGraph",
55 |     "UploadedFileGetter",
56 |     "ValidateDspl2",
57 | ]
58 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/filegetter.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | import extruct
  8 | from io import StringIO
  9 | import json
 10 | from pathlib import Path
 11 | import requests
 12 | import sys
 13 | from urllib.parse import urljoin, urlparse
 14 | 
 15 | from dspl2.rdfutil import LoadGraph, SelectFromGraph
 16 | 
 17 | 
 18 | def _ProcessDspl2File(filename, fileobj, *, type=''):
 19 |   if any([filename.endswith('.html'),
 20 |           type.startswith('text/html')]):
 21 |     data = extruct.extract(fileobj.read(), uniform='True')
 22 |     return LoadGraph({
 23 |         '@context': 'http://schema.org',
 24 |         '@graph': [
 25 |             subdata_elem
 26 |             for subdata in data.values()
 27 |             for subdata_elem in subdata
 28 |             if subdata
 29 |         ]
 30 |     }, filename)
 31 |   if any([filename.endswith('.json'),
 32 |           filename.endswith('.jsonld'),
 33 |           type.startswith('application/ld+json')]):
 34 |     json_val = json.load(fileobj)
 35 |     return LoadGraph(json_val, filename)
 36 | 
 37 | 
 38 | class UploadedFileGetter(object):
 39 |   def __init__(self, files):
 40 |     json_files = set()
 41 |     self.graph = None
 42 |     self.file_map = {}
 43 |     for f in files:
 44 |       self.file_map[f.filename] = f
 45 |       data = _ProcessDspl2File(f.filename, f.stream)
 46 |       if data:
 47 |         json_files.add(f.filename)
 48 |         self.base = f.filename
 49 |         self.graph = data
 50 |     if not self.graph:
 51 |       raise RuntimeError("DSPL 2 file not present in {}".format(
 52 |           [file.filename for file in self.file_map.values()]))
 53 |     if len(json_files) > 1:
 54 |       raise RuntimeError("Multiple DSPL 2 files present: {}".format(json_files))
 55 | 
 56 |   def Fetch(self, filename):
 57 |     f = self.file_map.get(filename)
 58 |     if not f:
 59 |       raise IOError(None, 'File not found', filename)
 60 |     f.stream.seek(0)
 61 |     return StringIO(f.read().decode('utf-8'))
 62 | 
 63 | 
 64 | class InternetFileGetter(object):
 65 |   def __init__(self, url):
 66 |     self.base = url
 67 |     r = requests.get(self.base)
 68 |     r.raise_for_status()
 69 |     self.graph = _ProcessDspl2File(url, StringIO(r.text), type=r.headers['content-type'])
 70 | 
 71 |   def Fetch(self, filename):
 72 |     r = requests.get(urljoin(self.base, filename))
 73 |     r.raise_for_status()
 74 |     return StringIO(r.text)
 75 | 
 76 | 
 77 | class LocalFileGetter(object):
 78 |   def __init__(self, path):
 79 |     self.base = urlparse(path).path
 80 |     with Path(self.base).open() as f:
 81 |       self.graph = _ProcessDspl2File(path, f)
 82 | 
 83 |   def Fetch(self, filename):
 84 |     filename = urlparse(filename).path
 85 |     path = Path(self.base).parent.joinpath(Path(filename)).resolve()
 86 |     return path.open()
 87 | 
 88 | 
 89 | class HybridFileGetter(object):
 90 |   @staticmethod
 91 |   def _load_file(base, rel=None):
 92 |     uri = urlparse(base)
 93 |     if rel:
 94 |       uri = urlparse(urljoin(base, rel))
 95 |     if not uri.scheme or uri.scheme == 'file':
 96 |       return Path(uri.path).open()
 97 |     elif uri.scheme == 'http' or uri.scheme == 'https':
 98 |       r = requests.get(uri)
 99 |       r.raise_for_status()
100 |       return StringIO(r.text)
101 | 
102 |   def __init__(self, json_uri):
103 |     self.base = json_uri
104 |     self.graph = _ProcessDspl2File(
105 |         json_uri,
106 |         HybridFileGetter._load_file(json_uri))
107 | 
108 |   def Fetch(self, uri):
109 |     return HybridFileGetter._load_file(self.base, uri)
110 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/jsonutil.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | 
 8 | def AsList(val):
 9 |   """Ensures the JSON-LD object is a list."""
10 |   if isinstance(val, list):
11 |     return val
12 |   elif val is None:
13 |     return []
14 |   else:
15 |     return [val]
16 | 
17 | 
18 | def GetSchemaProp(obj, key, default=None):
19 |   try:
20 |     return obj.get(key, obj.get('schema:' + key, default))
21 |   except AttributeError as e:
22 |     raise RuntimeError(f"Unable to find key '{key}' in {obj}") from e
23 | 
24 | 
25 | def JsonToKwArgsDict(json_val):
26 |   """Turn a StatisticalDataset object into a kwargs dict for a Jinja2 template.
27 | 
28 |   Specifically, this collects top-level dataset metadata under a "dataset" key,
29 |   and keeps dimensions, measures, footnotes, and slices as they are.
30 |   """
31 |   ret = {'dataset': {}}
32 |   special_keys = {'dimension', 'measure', 'footnote', 'slice'}
33 |   for key in json_val:
34 |     if key in special_keys:
35 |       ret[key] = GetSchemaProp(json_val, key)
36 |     else:
37 |       ret['dataset'][key] = GetSchemaProp(json_val, key)
38 |   return ret
39 | 
40 | 
41 | def MakeIdKeyedDict(vals):
42 |   """Returns a dict mapping objects' IDs to objects in the provided list.
43 | 
44 |   Given a list of JSON-LD objects, return a dict mapping each element's ID to the
45 |   element.
46 | 
47 |   Parameters:
48 |   vals (list): list of JSON-LD objects with IDs as dicts
49 | 
50 |   Returns
51 |   dict:dict whose values are elements of `vals` and whose keys are their IDs.
52 |   """
53 |   ret = {}
54 |   for val in vals:
55 |     id = GetSchemaProp(val, '@id')
56 |     if id:
57 |       ret[id] = val
58 |   return ret
59 | 
60 | 
61 | def GetSchemaId(obj):
62 |   return obj.get('@id', GetSchemaProp(obj, 'id'))
63 | 
64 | 
65 | def GetSchemaType(obj):
66 |   return obj.get('@type', GetSchemaProp(obj, 'type'))
67 | 
68 | 
69 | def GetUrl(obj):
70 |   if isinstance(obj, str):
71 |     return obj
72 |   elif isinstance(obj, dict):
73 |     return GetSchemaId(obj)
74 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/rdfutil.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Google LLC
  2 | #
  3 | # Use of this source code is governed by a BSD-style
  4 | # license that can be found in the LICENSE file or at
  5 | # https://developers.google.com/open-source/licenses/bsd
  6 | 
  7 | import json
  8 | from pathlib import Path
  9 | from pyld import jsonld
 10 | from rdflib import Graph, Namespace
 11 | from rdflib.serializer import Serializer
 12 | import sys
 13 | 
 14 | from dspl2.jsonutil import AsList
 15 | 
 16 | 
 17 | SCHEMA = Namespace('http://schema.org/')
 18 | 
 19 | 
 20 | _Schema = {}
 21 | _Context = {}
 22 | _DataFileFrame = {
 23 |     '@context': [_Context, {'schema': 'http://schema.org/'}],
 24 |     '@type': 'StatisticalDataset',
 25 | }
 26 | _FullFrame = {
 27 |     '@context': [_Context, {'schema': 'http://schema.org/'}],
 28 |     '@type': 'StatisticalDataset',
 29 |     'slice': {
 30 |         'dimension': {
 31 |             '@embed': '@never'
 32 |         },
 33 |         'measure': {
 34 |             '@embed': '@never'
 35 |         },
 36 |         'tableMapping': {
 37 |             'sourceEntity': {
 38 |                 '@embed': '@never'
 39 |             }
 40 |         },
 41 |         'data': {
 42 |             'dimensionValue': {
 43 |                 'dimension': {
 44 |                     '@embed': '@never'
 45 |                 }
 46 |             },
 47 |             'measureValue': {
 48 |                 'measure': {
 49 |                     '@embed': '@never'
 50 |                 },
 51 |                 'footnote': {
 52 |                     '@embed': '@never'
 53 |                 }
 54 |             }
 55 |         }
 56 |     }
 57 | }
 58 | _Initialized = False
 59 | _Module_path = Path(__file__).parent
 60 | _RdfPrefixes = {
 61 |     'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
 62 |     'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
 63 |     'schema': 'http://schema.org/',
 64 | }
 65 | 
 66 | 
 67 | def _Init(context, schema):
 68 |   global _Context, _Schema, _Initialized
 69 |   if not _Initialized:
 70 |     with schema.open() as schema:
 71 |       _Schema.update(json.load(schema))
 72 |     with context.open() as context:
 73 |       _Context.update(json.load(context))
 74 |     del _Context['@context']['id']
 75 |     del _Context['@context']['type']
 76 |     _Initialized = True
 77 | 
 78 | 
 79 | def _LoadJsonLd(json_val, public_id):
 80 |   _Init(_Module_path / 'schema' / 'jsonldcontext.json',
 81 |         _Module_path / 'schema' / 'schema.jsonld')
 82 |   json_val['@context'] = _Context
 83 |   graph = Graph().parse(
 84 |       data=json.dumps(json_val).encode('utf-8'),
 85 |       format='json-ld',
 86 |       publicID=public_id
 87 |   )
 88 |   return graph
 89 | 
 90 | 
 91 | def LoadGraph(input, public_id):
 92 |   if isinstance(input, dict):
 93 |     data = input
 94 |   elif isinstance(input, str):
 95 |     data = json.loads(input)
 96 |   else:
 97 |     data = json.load(input)
 98 | 
 99 |   return _LoadJsonLd(data, public_id)
100 | 
101 | 
102 | def FrameGraph(graph, frame=_FullFrame):
103 |   serialized = graph.serialize(format='json-ld')
104 |   json_val = json.loads(serialized)
105 |   json_val = {
106 |       '@context': _Context,
107 |       '@graph': AsList(json_val)
108 |   }
109 |   framed = jsonld.frame(json_val, frame, {'embed': '@always'})
110 |   framed['@context'] = 'http://schema.org'
111 |   for items in framed['@graph']:
112 |     framed.update(items)
113 |   del framed['@graph']
114 |   return framed
115 | 
116 | 
117 | def _N3(obj, namespace_manager):
118 |   if isinstance(obj, str):
119 |     return obj
120 |   return obj.n3(namespace_manager=namespace_manager)
121 | 
122 | 
123 | def MakeSparqlSelectQuery(*constraints,
124 |                           ns_manager=None,
125 |                           rdf_prefixes=_RdfPrefixes):
126 |   ret = ''
127 |   for prefix, url in rdf_prefixes.items():
128 |     ret += f'PREFIX {prefix}: <{url}>\n'
129 |   ret += 'SELECT * WHERE {\n'
130 |   for constraint in constraints:
131 |     sub, pred, obj = (_N3(field, ns_manager)
132 |                       for field in constraint)
133 |     ret += f'    {sub} {pred} {obj} .\n'
134 |   ret += '}'
135 |   return ret
136 | 
137 | 
138 | def SelectFromGraph(graph, *constraints):
139 |   result = graph.query(
140 |       MakeSparqlSelectQuery(
141 |           *constraints,
142 |           ns_manager=graph.namespace_manager))
143 |   return list({str(k): str(v)
144 |                for k, v in binding.items()}
145 |               for binding in result.bindings)
146 | 
147 | 
148 | def main(args, context, schema):
149 |   with open(args[1]) as f:
150 |     normalized = FrameGraph(LoadGraph(f, args[1]))
151 |     json.dump(normalized, sys.stdout, indent=2)
152 | 
153 | 
154 | if __name__ == '__main__':
155 |   main(sys.argv)
156 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/choose.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <title>DSPL 2 Viewer</title>
 4 |     <link rel="stylesheet" href="static/viewer.css">
 5 |   </head>
 6 |   <body>
 7 |     <h1>DSPL 2 Viewer</h1>
 8 |     <h2>Dataset</h2>
 9 |     <form action="/render">
10 |       <input type="text" name="url">
11 |       <input type="submit" name="submit" formmethod="get" value="Fetch URL">
12 |       <br>
13 |       <input type="file" name="files[]" multiple>
14 |       <input type="submit" name="submit" formmethod="post" formenctype="multipart/form-data" value="Upload Files">
15 |       <br>
16 |       <input type="checkbox" name="rdf">Process as RDF (slow)</input>
17 |     </form>
18 |   </body>
19 | </html>
20 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/display.html:
--------------------------------------------------------------------------------
 1 | {% from 'render.html' import render %}
 2 | <html>
 3 |   <head>
 4 |     <title>DSPL 2 Viewer</title>
 5 |     <style>
 6 |       {% include 'viewer.css' %}
 7 |     </style>
 8 |   </head>
 9 |   <body>
10 |     <h1>DSPL 2 Viewer</h1>
11 |     <h2 class="active">Dataset</h2>
12 |     <h2>Dimensions</h2>
13 |     <h2>Measures</h2>
14 |     <h2>Footnotes</h2>
15 |     <h2>Slices</h2>
16 |     <div id="dataset">
17 |       {{render(dataset)}}
18 |     </div>
19 |     <div id="dimensions" class="hidden">
20 |       <table>
21 |         <tr>
22 |           <td>dimensions</td>
23 |           <td>{{ render(dimension) }}</td>
24 |         </tr>
25 |       </table>
26 |     </div>
27 |     <div id="measures" class="hidden">
28 |       <table>
29 |         <tr>
30 |           <td>measures</td>
31 |           <td>{{ render(measure) }}</td>
32 |         </tr>
33 |       </table>
34 |     </div>
35 |     {% if footnote %}
36 |     <div id="footnotes" class="hidden">
37 |       <table>
38 |         <tr>
39 |           <td>footnotes</td>
40 |           <td>{{ render(footnote) }}</td>
41 |         </tr>
42 |       </table>
43 |     </div>
44 |     {% endif %}
45 |     <div id="slices" class="hidden">
46 |       <table>
47 |         <tr>
48 |           <td>slices</td>
49 |           <td>{{ render(slice) }}</td>
50 |         </tr>
51 |       </table>
52 |     </div>
53 | 
54 |     <!-- Footer -->
55 |     <script>
56 |       {% include 'viewer.js' %}
57 |     </script>
58 |   </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/error.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <title>DSPL 2 Viewer</title>
 4 |     <link rel="stylesheet" href="static/viewer.css">
 5 |   </head>
 6 |   <body>
 7 |     <h1>DSPL 2 Viewer</h1>
 8 |     <h2>Error fetching dataset</h2>
 9 |     <div>
10 |       <div>
11 |       {% if url %}
12 |       Error {{action}} {{url}}</div>
13 |       {% endif %}
14 |       </div>
15 |       <div>
16 |       {% if status %}
17 |       HTTP status {{status}}:
18 |       {% endif %}
19 |       {% if text %}
20 |       {{text}}
21 |       {% endif %}
22 |       </div>
23 |   </body>
24 | </html>
25 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/render.html:
--------------------------------------------------------------------------------
 1 | {% macro render(obj) %}
 2 | {% if obj.items %}
 3 | <table>
 4 |   {% for key, val in obj|dictsort %}
 5 |   <tr>
 6 |     <td>{{key}}</td>
 7 |     <td>{{ render(val) }}</td>
 8 |   </tr>
 9 |   {% endfor %}
10 | </table>
11 | {% elif obj.append %}
12 | {% for val in obj %}
13 | <table>
14 |   <tr>
15 |     <td>
16 |       {{ render(val) }}
17 |     </td>
18 |   </tr>
19 |   {% endfor %}
20 | </table>
21 | {% elif obj and obj.startswith and (obj.startswith('https://') or obj.startswith('http://')) %}
22 | <a href="{{obj}}">{{obj}}</a>
23 | {% else %}
24 | {{obj}}
25 | {% endif %}
26 | {% endmacro %}
27 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/viewer.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   font-family: sans-serif;
 3 | }
 4 | 
 5 | h2 {
 6 |   display: inline-block;
 7 |   padding: 1ex;
 8 |   border: 1px solid;
 9 |   margin: 0px;
10 | }
11 | 
12 | h2.active {
13 |   background-color: yellow;
14 |   box-shadow: 2px 2px gray;
15 | }
16 | 
17 | table {
18 |   border-collapse: collapse;
19 |   background-color: white;
20 |   width: 100%
21 | }
22 | 
23 | table,th,td {
24 |   border: 1px solid;
25 | }
26 | 
27 | td {
28 |   vertical-align: top;
29 | }
30 | 
31 | tr:nth-child(even) {background-color: #f2f2f2;}
32 | 
33 | td:first-child {
34 |   width:10%;
35 | }
36 | 
37 | .hidden {
38 |   display: none;
39 | }
40 | 
41 | td:first-child.closed::before {
42 |   content: "▶️";
43 |   color: red;
44 | }
45 | td:first-child.open::before {
46 |   content: "🔻";
47 |   color: red;}
48 | }
49 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/templates/viewer.js:
--------------------------------------------------------------------------------
 1 | for (var td of document.querySelectorAll('td:first-child')) {
 2 |   var sibling = td.nextElementSibling;
 3 |   if (sibling) {
 4 |     if (sibling.querySelector('table')) {
 5 |       if (sibling.children.length < 20) {
 6 |         td.classList.toggle('open');
 7 |       } else {
 8 |         td.classList.toggle('closed');
 9 |         sibling.classList.toggle('hidden');
10 |       }
11 |       td.addEventListener('click', (ev) => {
12 |         ev.target.classList.toggle('open');
13 |         ev.target.classList.toggle('closed');
14 |         ev.target.nextElementSibling.classList.toggle('hidden');
15 |       });
16 |     }
17 |   }
18 | }
19 | 
20 | function onclick(ev) {
21 |   document.querySelectorAll('h2').forEach((elt) => {
22 |     elt.classList.remove('active');
23 |   });
24 |   ev.target.classList.add('active');
25 | 
26 |   document.querySelectorAll('div').forEach((elt) => {
27 |     elt.classList.add('hidden');
28 |   });
29 |   document.querySelector('div#'+ev.target.textContent.trim().toLowerCase()).classList.remove('hidden');
30 | }
31 | 
32 | document.querySelectorAll('h2').forEach((elt) => {
33 |   elt.addEventListener('click', onclick);
34 | });
35 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/tools/dspl2/dspl2/tests/__init__.py


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/test_expander.py:
--------------------------------------------------------------------------------
 1 | from dspl2.expander import Dspl2JsonLdExpander, Dspl2RdfExpander
 2 | from dspl2.rdfutil import SCHEMA
 3 | from io import StringIO
 4 | import rdflib
 5 | import unittest
 6 | 
 7 | 
 8 | class DummyGetter(object):
 9 |   def __init__(self, graph):
10 |     self.graph = graph
11 |     self.data = {}
12 | 
13 |   def Set(self, filename, data):
14 |     self.data[filename] = StringIO(data)
15 | 
16 |   def Fetch(self, filename):
17 |     return self.data.get(filename, StringIO(''))
18 | 
19 | 
20 | class ExpanderTests(unittest.TestCase):
21 |   def test_Dspl2RdfExpander_ExpandDimensionValue(self):
22 |     graph = rdflib.Graph()
23 |     getter = DummyGetter(graph)
24 |     expander = Dspl2RdfExpander(getter)
25 |     dim = rdflib.URIRef('http://foo.invalid/test.json#dim')
26 |     equiv_types = [SCHEMA.Place]
27 |     row = {
28 |         'codeValue': 'cv',
29 |         'key1': 'val1',
30 |         'key2': 'val2',
31 |     }
32 |     row_id = rdflib.URIRef(str(dim) + '=' + row['codeValue'])
33 |     expander._ExpandDimensionValue(dim, equiv_types, row_id, row)
34 |     self.assertEqual(set(graph.objects(subject=dim, predicate=SCHEMA.codeList)),
35 |                      {row_id})
36 |     self.assertEqual(set(graph.objects(subject=row_id,
37 |                                        predicate=rdflib.RDF.type)),
38 |                      {SCHEMA.DimensionValue, SCHEMA.Place})
39 |     self.assertEqual(set(graph.objects(subject=row_id,
40 |                                        predicate=rdflib.RDF.type)),
41 |                      {SCHEMA.DimensionValue, SCHEMA.Place})
42 |     self.assertEqual(set(graph.objects(subject=row_id, predicate=SCHEMA.key1)),
43 |                      {rdflib.Literal('val1')})
44 |     self.assertEqual(set(graph.objects(subject=row_id, predicate=SCHEMA.key2)),
45 |                      {rdflib.Literal('val2')})
46 |     self.assertEqual(set(graph.objects(subject=row_id,
47 |                                        predicate=SCHEMA.codeValue)),
48 |                      {rdflib.Literal('cv')})
49 | 
50 |   def test_Dspl2RdfExpander_ExpandFootnotes(self):
51 |     graph = rdflib.Graph()
52 |     dim = rdflib.URIRef('#ds')
53 |     graph.add((dim, rdflib.RDF.type, SCHEMA.StatisticalDataset))
54 |     graph.add((dim, SCHEMA.footnote, rdflib.Literal('foo')))
55 |     getter = DummyGetter(graph)
56 |     getter.Set('foo', 'codeValue,name,description\np,predicted,Value is predicted rather than measured.\n')
57 |     expander = Dspl2RdfExpander(getter)
58 |     expander._ExpandFootnotes()
59 |     for triple in graph:
60 |       print(triple)
61 |     footnote_id = rdflib.URIRef('#footnote=p')
62 |     self.assertEqual(set(graph.objects(subject=dim,
63 |                                        predicate=SCHEMA.footnote)),
64 |                      {footnote_id})
65 |     self.assertEqual(set(graph.objects(subject=footnote_id,
66 |                                        predicate=SCHEMA.description)),
67 |                      {rdflib.term.Literal('Value is predicted rather than measured.')})
68 |     self.assertEqual(set(graph.objects(subject=footnote_id,
69 |                                        predicate=SCHEMA.name)),
70 |                      {rdflib.term.Literal('predicted')})
71 |     self.assertEqual(set(graph.objects(subject=footnote_id,
72 |                                        predicate=rdflib.RDF.type)),
73 |                      {SCHEMA.StatisticalAnnotation})
74 |     self.assertEqual(set(graph.objects(subject=footnote_id,
75 |                                        predicate=SCHEMA.codeValue)),
76 |                      {rdflib.term.Literal('p')})
77 | 
78 |   def test_Dspl2RdfExpander_ExpandSliceData(self):
79 |     pass
80 | 
81 |   def test_Dspl2JsonLdExpander_ExpandCodeList(self):
82 |     pass
83 | 
84 |   def test_Dspl2JsonLdExpander_ExpandFootnotes(self):
85 |     pass
86 | 
87 |   def test_Dspl2JsonLdExpander_ExpandSliceData(self):
88 |     pass
89 | 
90 | 
91 | if __name__ == '__main__':
92 |   unittest.main()
93 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/test_jsonutil.py:
--------------------------------------------------------------------------------
 1 | from dspl2.jsonutil import (AsList, GetSchemaProp, JsonToKwArgsDict,
 2 |                             MakeIdKeyedDict, GetSchemaId, GetSchemaType, GetUrl)
 3 | import unittest
 4 | 
 5 | 
 6 | class JsonUtilTests(unittest.TestCase):
 7 |   def test_AsList(self):
 8 |     self.assertEqual(AsList(None), [])
 9 |     self.assertEqual(AsList([]), [])
10 |     self.assertEqual(AsList([1]), [1])
11 |     self.assertEqual(AsList(1), [1])
12 | 
13 |   def test_GetSchemaProp(self):
14 |     self.assertEqual(GetSchemaProp({'id': 'val'}, 'id'), 'val')
15 |     self.assertEqual(GetSchemaProp({'schema:id': 'val'}, 'id'), 'val')
16 | 
17 |   def test_JsonToKwArgsDict(self):
18 |     self.assertEqual(JsonToKwArgsDict({'id': 'val'}), {'dataset': {'id': 'val'}})
19 |     self.assertEqual(JsonToKwArgsDict({}), {'dataset': {}})
20 | 
21 |   def test_MakeIdKeyedDict(self):
22 |     objs = [{'@id': '1'}, {'@id': '2'}]
23 |     lookup = MakeIdKeyedDict(objs)
24 |     self.assertEqual(lookup['1'], {'@id': '1'})
25 |     self.assertEqual(lookup['2'], {'@id': '2'})
26 | 
27 |   def test_GetSchemaId(self):
28 |     self.assertEqual(GetSchemaId({'@id': 'val'}), 'val')
29 |     self.assertEqual(GetSchemaId({'id': 'val'}), 'val')
30 |     self.assertEqual(GetSchemaId({'schema:id': 'val'}), 'val')
31 | 
32 |   def test_GetSchemaType(self):
33 |     self.assertEqual(GetSchemaType({'@type': 'val'}), 'val')
34 |     self.assertEqual(GetSchemaType({'type': 'val'}), 'val')
35 |     self.assertEqual(GetSchemaType({'schema:type': 'val'}), 'val')
36 | 
37 |   def test_GetUrl(self):
38 |     self.assertEqual(GetUrl({'@id': 'val'}), 'val')
39 |     self.assertEqual(GetUrl('val'), 'val')
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------
/tools/dspl2/dspl2/tests/test_rdfutil.py:
--------------------------------------------------------------------------------
  1 | from dspl2.rdfutil import (LoadGraph, FrameGraph, SelectFromGraph)
  2 | from io import StringIO
  3 | import json
  4 | import rdflib
  5 | import rdflib.compare
  6 | import unittest
  7 | 
  8 | 
  9 | _SampleJson = '''{
 10 |   "@context": "http://schema.org",
 11 |   "@type": "StatisticalDataset",
 12 |   "@id": "",
 13 |   "url": "https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w",
 14 |   "identifier": "met_d3dens",
 15 |   "name": "Eurostat Population Density",
 16 |   "description": "Population density by metropolitan regions",
 17 |   "dateCreated": "2015-10-16",
 18 |   "dateModified": "2019-06-18",
 19 |   "temporalCoverage": "1990-01-01/2016-01-01",
 20 |   "distribution": {
 21 |     "@type": "DataDownload",
 22 |     "contentUrl": "http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true",
 23 |     "encodingFormat": "text/tab-separated-values"
 24 |   },
 25 |   "spatialCoverage":{
 26 |     "@type":"Place",
 27 |     "geo":{
 28 |       "@type":"GeoShape",
 29 |       "name": "European Union",
 30 |       "box":"34.633285 -10.468556 70.096054 34.597916"
 31 |     }
 32 |   },
 33 |   "license": "https://ec.europa.eu/eurostat/about/policies/copyright",
 34 |   "creator":{
 35 |      "@type":"Organization",
 36 |      "url": "https://ec.europa.eu/eurostat",
 37 |      "name":"Eurostat"
 38 |   },
 39 |   "publisher": {
 40 |     "@type": "Organization",
 41 |     "name": "Eurostat",
 42 |     "url": "https://ec.europa.eu/eurostat",
 43 |     "contactPoint": {
 44 |       "@type": "ContactPoint",
 45 |       "contactType": "User Support",
 46 |       "url": "https://ec.europa.eu/eurostat/help/support"
 47 |     }
 48 |   }
 49 | }'''
 50 | 
 51 | 
 52 | class RdfUtilTests(unittest.TestCase):
 53 |   def test_LoadGraph(self):
 54 |     graph1 = LoadGraph(_SampleJson, '')
 55 |     graph2 = LoadGraph(json.loads(_SampleJson), '')
 56 |     graph3 = LoadGraph(StringIO(_SampleJson), '')
 57 |     self.assertTrue(rdflib.compare.isomorphic(graph1, graph2))
 58 |     self.assertTrue(rdflib.compare.isomorphic(graph1, graph3))
 59 | 
 60 |   def test_FrameGraph(self):
 61 |     json_val = FrameGraph(LoadGraph(_SampleJson, ''))
 62 |     self.assertEqual(json_val['@context'], 'http://schema.org')
 63 |     self.assertEqual(json_val['@type'], 'StatisticalDataset')
 64 |     self.assertEqual(json_val['url'], 'https://data.europa.eu/euodp/en/data/dataset/bAzn6fiusnRFOBwUeIo78w')
 65 |     self.assertEqual(json_val['identifier'], 'met_d3dens')
 66 |     self.assertEqual(json_val['name'], 'Eurostat Population Density')
 67 |     self.assertEqual(json_val['description'], 'Population density by metropolitan regions')
 68 |     self.assertEqual(json_val['dateCreated'], '2015-10-16')
 69 |     self.assertEqual(json_val['dateModified'], '2019-06-18')
 70 |     self.assertEqual(json_val['temporalCoverage'], '1990-01-01/2016-01-01')
 71 |     self.assertEqual(json_val['distribution']['@type'], 'DataDownload')
 72 |     self.assertEqual(json_val['distribution']['contentUrl'], 'http://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?file=data/met_d3dens.tsv.gz&unzip=true')
 73 |     self.assertEqual(json_val['distribution']['encodingFormat'], 'text/tab-separated-values')
 74 |     self.assertEqual(json_val['spatialCoverage']['@type'], "Place")
 75 |     self.assertEqual(json_val['spatialCoverage']['geo']['@type'], "GeoShape")
 76 |     self.assertEqual(json_val['spatialCoverage']['geo']['name'], 'European Union')
 77 |     self.assertEqual(json_val['spatialCoverage']['geo']['box'], '34.633285 -10.468556 70.096054 34.597916')
 78 |     self.assertEqual(json_val['license'], 'https://ec.europa.eu/eurostat/about/policies/copyright')
 79 |     self.assertEqual(json_val['creator']['@type'], "Organization")
 80 |     self.assertEqual(json_val['creator']['url'], 'https://ec.europa.eu/eurostat')
 81 |     self.assertEqual(json_val['creator']['name'], 'Eurostat')
 82 |     self.assertEqual(json_val['publisher']['@type'], 'Organization')
 83 |     self.assertEqual(json_val['publisher']['name'], 'Eurostat')
 84 |     self.assertEqual(json_val['publisher']['url'], 'https://ec.europa.eu/eurostat')
 85 |     self.assertEqual(json_val['publisher']['contactPoint']['@type'], 'ContactPoint')
 86 |     self.assertEqual(json_val['publisher']['contactPoint']['contactType'], 'User Support')
 87 |     self.assertEqual(json_val['publisher']['contactPoint']['url'], 'https://ec.europa.eu/eurostat/help/support')
 88 | 
 89 |   def test_SelectFromGraph(self):
 90 |     graph = LoadGraph(_SampleJson, '')
 91 |     results = list(SelectFromGraph(
 92 |         graph,
 93 |         ('?ds', 'rdf:type', 'schema:StatisticalDataset'),
 94 |         ('?ds', 'schema:name', '?name')))
 95 |     self.assertEqual(len(results), 1)
 96 |     self.assertEqual(results[0]['name'], 'Eurostat Population Density')
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     unittest.main()
101 | 


--------------------------------------------------------------------------------
/tools/dspl2/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py
 2 | extruct
 3 | flask
 4 | pyicu
 5 | jinja2
 6 | pandas
 7 | pyld
 8 | rdflib
 9 | rdflib-jsonld
10 | requests
11 | 


--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-expand.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE file or at
 6 | # https://developers.google.com/open-source/licenses/bsd
 7 | 
 8 | from absl import app
 9 | from absl import flags
10 | from dspl2 import (Dspl2RdfExpander, Dspl2JsonLdExpander, FrameGraph,
11 |                    LocalFileGetter)
12 | import json
13 | import sys
14 | 
15 | 
16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.')
17 | 
18 | 
19 | def main(args):
20 |   if len(args) != 2:
21 |     print(f'Usage: {args[0]} [DSPL file]', file=sys.stderr)
22 |     exit(1)
23 |   getter = LocalFileGetter(args[1])
24 |   if flags.FLAGS.rdf:
25 |     graph = Dspl2RdfExpander(getter).Expand()
26 |     dspl = FrameGraph(getter.graph)
27 |   else:
28 |     dspl = Dspl2JsonLdExpander(getter).Expand()
29 |   json.dump(dspl, sys.stdout, indent=2)
30 | 
31 | 
32 | if __name__ == '__main__':
33 |   app.run(main)
34 | 


--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-pretty-print-server.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE file or at
 6 | # https://developers.google.com/open-source/licenses/bsd
 7 | 
 8 | from flask import Flask, request, render_template
 9 | import json
10 | from pathlib import Path
11 | import requests
12 | 
13 | import dspl2
14 | from dspl2 import (
15 |     Dspl2JsonLdExpander, Dspl2RdfExpander, InternetFileGetter,
16 |     JsonToKwArgsDict, LoadGraph, FrameGraph, UploadedFileGetter)
17 | 
18 | 
19 | def _Display(template, json_val):
20 |   return render_template(template, **JsonToKwArgsDict(json_val))
21 | 
22 | 
23 | template_dir = Path(dspl2.__file__).parent / 'templates'
24 | app = Flask('dspl2-viewer', template_folder=template_dir.as_posix())
25 | 
26 | @app.route('/')
27 | def Root():
28 |   return render_template('choose.html')
29 | 
30 | 
31 | @app.route('/render', methods=['GET', 'POST'])
32 | def _HandleUploads():
33 |   try:
34 |     rdf = request.args.get('rdf') == 'on'
35 |     url = request.args.get('url')
36 |     if request.method == 'POST':
37 |       files = request.files.getlist('files[]')
38 |       getter = UploadedFileGetter(files)
39 |     else:
40 |       if not url:
41 |         return render_template('error.html',
42 |                                message="No URL provided")
43 |       getter = InternetFileGetter(url)
44 |     if rdf:
45 |       graph = Dspl2RdfExpander(getter).Expand()
46 |       json_val = FrameGraph(graph)
47 |     else:
48 |       json_val = Dspl2JsonLdExpander(getter).Expand()
49 |     return _Display('display.html', json_val)
50 |   except json.JSONDecodeError as e:
51 |     return render_template('error.html',
52 |                            action="decoding",
53 |                            url=e.doc or url,
54 |                            text=str(e))
55 |   except IOError as e:
56 |     return render_template('error.html',
57 |                            action="loading",
58 |                            url=e.filename,
59 |                            text=str(e))
60 |   except RuntimeError as e:
61 |     return render_template('error.html',
62 |                            text=str(e))
63 |   except requests.exceptions.HTTPError as e:
64 |     return render_template('error.html',
65 |                            url=url,
66 |                            action="retrieving",
67 |                            status=e.response.status_code,
68 |                            text=e.response.text)
69 |   except requests.exceptions.RequestException as e:
70 |     return render_template('error.html',
71 |                            url=url,
72 |                            action="retrieving",
73 |                            text=str(e))
74 |   except Exception as e:
75 |     return render_template('error.html',
76 |                            action="processing",
77 |                            url=url,
78 |                            text=str(type(e)) + str(e))
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     app.run()
83 | 


--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-pretty-print.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | from absl import app
 8 | from absl import flags
 9 | import dspl2
10 | import jinja2
11 | from pathlib import Path
12 | import sys
13 | 
14 | 
15 | FLAGS = flags.FLAGS
16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.')
17 | 
18 | 
19 | def _RenderLocalDspl2(path, rdf):
20 |   template_dir = Path(dspl2.__file__).parent / 'templates'
21 |   env = jinja2.Environment(loader=jinja2.FileSystemLoader(
22 |       template_dir.as_posix()))
23 |   try:
24 |     print("Loading template")
25 |     template = env.get_template('display.html')
26 |     print("Loading DSPL2")
27 |     getter = dspl2.LocalFileGetter(path)
28 |     print("Expanding DSPL2")
29 |     if rdf:
30 |       graph = dspl2.Dspl2RdfExpander(getter).Expand()
31 |       print("Framing DSPL2")
32 |       json_val = dspl2.FrameGraph(graph)
33 |     else:
34 |       json_val = dspl2.Dspl2JsonLdExpander(getter).Expand()
35 |     print("Rendering template")
36 |     return template.render(**dspl2.JsonToKwArgsDict(json_val))
37 |   except Exception as e:
38 |     raise
39 |     template = loader.load(env, 'error.html')
40 |     return template.render(action="processing",
41 |                            url=path,
42 |                            text=str(type(e)) + ": " + str(e))
43 | 
44 | 
45 | def main(argv):
46 |   if len(argv) != 3:
47 |     print(f'Usage: {argv[0]} [input.json] [output.html]', file=sys.stderr)
48 |     exit(1)
49 |   with open(argv[2], 'w') as f:
50 |     print(_RenderLocalDspl2(argv[1], FLAGS.rdf), file=f)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |   app.run(main)
55 | 


--------------------------------------------------------------------------------
/tools/dspl2/scripts/dspl2-validate.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | # Copyright 2018 Google LLC
 3 | #
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE file or at
 6 | # https://developers.google.com/open-source/licenses/bsd
 7 | 
 8 | from absl import app
 9 | from absl import flags
10 | from dspl2 import (Dspl2JsonLdExpander, Dspl2RdfExpander, LocalFileGetter,
11 |                    FrameGraph, LoadGraph, ValidateDspl2)
12 | import sys
13 | 
14 | 
15 | FLAGS = flags.FLAGS
16 | flags.DEFINE_boolean('rdf', False, 'Process the JSON-LD as RDF.')
17 | 
18 | 
19 | def main(args):
20 |   if len(args) != 2:
21 |     print(f'Usage: {args[0]} [DSPL file]', file=sys.stderr)
22 |     exit(1)
23 |   getter = LocalFileGetter(args[1])
24 |   if flags.FLAGS.rdf:
25 |     graph = Dspl2RdfExpander(getter).Expand()
26 |     dspl = FrameGraph(getter.graph)
27 |   else:
28 |     dspl = Dspl2JsonLdExpander(getter).Expand()
29 |   warnings = ValidateDspl2(dspl, getter)
30 |   for warning in warnings:
31 |     print(warning)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |   app.run(main)
36 | 


--------------------------------------------------------------------------------
/tools/dspl2/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Use of this source code is governed by a BSD-style
 4 | # license that can be found in the LICENSE file or at
 5 | # https://developers.google.com/open-source/licenses/bsd
 6 | 
 7 | import setuptools
 8 | 
 9 | setuptools.setup(
10 |     name="dspl2",
11 |     version="0.0.1",
12 |     author="Natarajan Krishnaswami",
13 |     author_email="nkrishnaswami@google.com",
14 |     description="DSPL 2.0 tools",
15 |     url="https://github.com/google/dspl",
16 |     packages=setuptools.find_packages(),
17 |     classifiers=[
18 |         "Programming Language :: Python :: 3",
19 |         "OSI Approved :: BSD License",
20 |         "Operating System :: OS Independent",
21 |     ],
22 |     package_data={
23 |         'dspl2': ['templates/*', 'schema/*'],
24 |     },
25 |     scripts=[
26 |         'scripts/dspl2-expand.py',
27 |         'scripts/dspl2-pretty-print.py',
28 |         'scripts/dspl2-pretty-print-server.py',
29 |         'scripts/dspl2-validate.py',
30 |     ],
31 | )
32 | 


--------------------------------------------------------------------------------
/tools/dspl2viz/dspl2viz.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import dspl2
  3 | from flask import (
  4 |     Flask, render_template, request, Response)
  5 | from functools import lru_cache
  6 | from icu import SimpleDateFormat
  7 | from io import StringIO
  8 | import json
  9 | import os.path
 10 | import pandas as pd
 11 | from urllib.parse import urlparse
 12 | 
 13 | 
 14 | app = Flask(__name__)
 15 | 
 16 | 
 17 | @app.route('/')
 18 | def main():
 19 |     return render_template('dspl2viz.html')
 20 | 
 21 | 
 22 | @app.route('/api/measures')
 23 | def api_measures():
 24 |   dataset = request.args.get('dataset')
 25 |   if dataset is None:
 26 |     return Response("Dataset not specified", status=400)
 27 |   try:
 28 |     getter = dspl2.LocalFileGetter(
 29 |         os.path.expanduser('~/dspl/samples/bls/unemployment/bls-unemployment.jsonld'))
 30 |     expander = dspl2.Dspl2JsonLdExpander(getter)
 31 |     ds = expander.Expand(expandSlices=False)
 32 |     return Response(json.dumps(ds['measure'], indent=2), mimetype='application/json')
 33 |   except Exception as e:
 34 |     app.logger.warn(e)
 35 |     return Response("Unable to find requested dataset", status=404)
 36 | 
 37 | 
 38 | @app.route('/api/dimensions')
 39 | def api_dimensions():
 40 |   dataset = request.args.get('dataset')
 41 |   if dataset is None:
 42 |     return Response("Dataset not specified", status=400)
 43 |   try:
 44 |     getter = dspl2.HybridFileGetter(dataset)
 45 |     expander = dspl2.Dspl2JsonLdExpander(getter)
 46 |     ds = expander.Expand(expandSlices=False, expandDimensions=False)
 47 |     return Response(json.dumps(ds['dimension'], indent=2), mimetype='application/json')
 48 |   except Exception as e:
 49 |     app.logger.warn(e)
 50 |     return Response("Unable to find requested dataset", status=404)
 51 | 
 52 | 
 53 | @app.route('/api/dimension_values')
 54 | def api_dimension_values():
 55 |   dataset = request.args.get('dataset')
 56 |   if dataset is None:
 57 |     return Response("Dataset not specified", status=400)
 58 |   dimension = request.args.get('dimension')
 59 |   if dimension is None:
 60 |     return Response("Dimension not specified", status=400)
 61 |   try:
 62 |     getter = dspl2.HybridFileGetter(dataset)
 63 |     expander = dspl2.Dspl2JsonLdExpander(getter)
 64 |     ds = expander.Expand(expandSlices=False, expandDimensions=True)
 65 |     for dim in ds['dimension']:
 66 |       if (dimension == dspl2.GetUrl(dim) or
 67 |           urlparse(dimension).fragment == urlparse(dspl2.GetUrl(dim)).fragment):
 68 |         return Response(json.dumps(dim, indent=2), mimetype='application/json')
 69 |     return Response("Unable to find requested dimension", status=404)
 70 |   except Exception as e:
 71 |     app.logger.warn(e)
 72 |     return Response("Unable to find requested dataset", status=404)
 73 | 
 74 | 
 75 | @app.route('/api/slices_for_measure')
 76 | def api_slices_for_measure():
 77 |   dataset = request.args.get('dataset')
 78 |   if dataset is None:
 79 |     return Response("Dataset not specified", status=400)
 80 |   measure = request.args.get('measure')
 81 |   if measure is None:
 82 |     return Response("Measure not specified", status=400)
 83 |   try:
 84 |     getter = dspl2.HybridFileGetter(dataset)
 85 |     expander = dspl2.Dspl2JsonLdExpander(getter)
 86 |     ds = expander.Expand(expandSlices=False, expandDimensions=False)
 87 |     slices = []
 88 |     for slice in ds['slice']:
 89 |       for sliceMeasure in slice['measure']:
 90 |         if (measure == dspl2.GetUrl(sliceMeasure) or
 91 |             urlparse(measure).fragment == urlparse(dspl2.GetUrl(sliceMeasure)).fragment):
 92 |           slices.append(slice)
 93 |         break
 94 |     return Response(json.dumps(slices, indent=2),
 95 |                     mimetype='application/json')
 96 |   except Exception as e:
 97 |     app.logger.warn(e)
 98 |     return Response("Unable to find requested dataset", status=404)
 99 | 
100 | 
101 | @lru_cache(maxsize=10)
102 | def _ExpandDataset(dataset):
103 |   getter = dspl2.HybridFileGetter(dataset)
104 |   expander = dspl2.Dspl2JsonLdExpander(getter)
105 |   return expander.Expand()
106 | 
107 | 
108 | def _ParseDate(text, date_pattern):
109 |   df = SimpleDateFormat(date_pattern)
110 |   ts = df.parse(text)
111 |   return datetime.datetime.utcfromtimestamp(ts)
112 | 
113 | 
114 | @lru_cache(maxsize=100)
115 | def _GetDataSeries(dataset, slice, measure, dimension_value):
116 |   dim_val_dict = dict([dim_val.split(':')
117 |                        for dim_val in dimension_value.split(',')])
118 |   ds = _ExpandDataset(dataset)
119 |   # Identify the time dimension's date format
120 |   dateFormat = "yyyy-MM-dd"  # default
121 |   for dimension in ds['dimension']:
122 |     if dimension['@type'] == 'TimeDimension':
123 |       dateFormat = dimension.get('dateFormat')
124 |       break
125 | 
126 |   for dsSlice in ds['slice']:
127 |     if urlparse(dsSlice['@id']).fragment == urlparse(slice).fragment:
128 |       ret = []
129 |       for observation in dsSlice['data']:
130 |         val = {}
131 |         # Slice should have exactly the requested dims + a time dim:
132 |         if len(observation['dimensionValues']) != len(dim_val_dict) + 1:
133 |           continue
134 |         # All the non-time dims should match the filter:
135 |         matched_dims = 0
136 |         for dim_val in observation['dimensionValues']:
137 |           dim_id = urlparse(dim_val['dimension']).fragment
138 |           if f'#{dim_id}' in dim_val_dict:
139 |             if dim_val.get('codeValue') == dim_val_dict[f'#{dim_id}']:
140 |               val[dim_id] = dim_val.get('codeValue')
141 |               matched_dims += 1
142 |           elif dim_val.get('value'):
143 |             val[dim_id] = _ParseDate(dim_val.get('value'), dateFormat)
144 |         if matched_dims != len(dim_val_dict):
145 |           continue
146 |         for meas_val in observation['measureValues']:
147 |           if urlparse(meas_val['measure']).fragment == urlparse(measure).fragment:
148 |             val[urlparse(measure).fragment] = meas_val['value']
149 |         ret.append(val)
150 |       return ret
151 | 
152 | @app.route('/api/series')
153 | def api_series():
154 |   dataset = request.args.get('dataset')
155 |   if dataset is None:
156 |     return Response("Dataset not specified", status=400)
157 |   slice = request.args.get('slice')
158 |   if slice is None:
159 |     return Response("Slice not specified", status=400)
160 |   measure = request.args.get('measure')
161 |   if measure is None:
162 |     return Response("Measure not specified", status=400)
163 |   dimension_values = request.args.get('dimension_value')
164 |   if dimension_values is None:
165 |     return Response("Dimension values not specified", status=400)
166 |   ret = _GetDataSeries(dataset, slice, measure, dimension_values)
167 |   if ret is not None:
168 |     out = StringIO()
169 |     pd.DataFrame(ret).to_csv(out)
170 |     return Response(out.getvalue(), mimetype="text/csv")
171 |   return Response("Unable to find series for requested dimensions",
172 |                   status=404)
173 | 


--------------------------------------------------------------------------------
/tools/dspl2viz/foo.jsonld:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/dspl/db79dad685276dbf98ca44b875d1481bc240c5c1/tools/dspl2viz/foo.jsonld


--------------------------------------------------------------------------------
/tools/dspl2viz/static/dspl2viz.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   display: flex;
 3 |   height: 100vh;
 4 |   flex-flow: column wrap;
 5 | }
 6 | #vegalite-container {
 7 |   width: 30vw;
 8 |   height: 50vh;
 9 |   display: block;
10 | }
11 | 
12 | #vegalite-input {
13 |   width: 100%;
14 |   height: 100%;
15 | }
16 | 
17 | #dataset-explorer {
18 |   width: 30vw;
19 |   height: 49vh;
20 |   vertical-align: top;
21 |   border-style: solid;
22 |   border-width: 1px;
23 | }
24 | #measure-explorer {
25 |   max-height: 49%;
26 |   overflow: scroll;
27 | }
28 | #dimension-explorer {
29 |   max-height: 49%;
30 |   overflow: scroll;
31 | }
32 | 
33 | #chart-container {
34 |   width: 65vw;
35 |   height: 99vh;
36 |   vertical-align: top;
37 |   border-style: solid;
38 |   border-width: 1px;
39 | }
40 | 
41 | #chart {
42 |   width: 100%;
43 |   height: 100%;
44 | }
45 | 


--------------------------------------------------------------------------------
/tools/dspl2viz/static/dspl2viz.js:
--------------------------------------------------------------------------------
  1 | var View;
  2 | 
  3 | function drawChart(event) {
  4 |   try {
  5 |     var spec = JSON.parse(event.target.value);
  6 |     spec.height = document.querySelector('#chart-container').scrollHeight;
  7 |     spec.width = document.querySelector('#chart-container').scrollWidth;
  8 |     event.target.value = JSON.stringify(spec, null, 2);
  9 |     vegaEmbed("#chart", spec)
 10 |     // result.view provides access to the Vega View API
 11 |         .then(result => {View =result})
 12 |         .catch(console.warn);
 13 |   } catch(e) {
 14 |     console.warn(e);
 15 |   }
 16 | }
 17 | 
 18 | var DatasetId = 'file:///usr/local/google/home/nkrishnaswami/dspl/samples/bls/unemployment/bls-unemployment.jsonld';
 19 | var SliceId = '#statesUnemploymentMonthly';
 20 | var MeasureId = '#unemployment_rate';
 21 | var DimValues = {
 22 |   seasonality: 'S',
 23 |   state: 'ST0100000000000',
 24 | };
 25 | 
 26 | function setSpec() {
 27 |   var vlSpec = {
 28 |     "$schema": "https://vega.github.io/schema/vega-lite/v4.0.0-beta.10.json",
 29 |     "description": "A simple bar chart with embedded data.",
 30 |     "autosize": {
 31 |       "type": "fit",
 32 |       "resize": true
 33 |     },
 34 |     "data": {
 35 |       "url": "/api/series",
 36 |       "format": {
 37 |         "type": "csv"
 38 |       }
 39 |     },
 40 |     "mark": "line",
 41 |     "encoding": {
 42 |       "x": {
 43 |         "field": "month",
 44 |         "type": "ordinal"
 45 |       },
 46 |       "y": {
 47 |         "field": "unemployment_rate",
 48 |         "type": "quantitative"
 49 |       },
 50 |       "color": {
 51 |         "field": "state",
 52 |         "type": "ordinal"
 53 |       }
 54 |     }
 55 |   }
 56 |   vlSpec.data.url += '?dataset=' + encodeURIComponent(DatasetId);
 57 |   vlSpec.data.url += '&slice=' + encodeURIComponent(SliceId);
 58 |   vlSpec.data.url += '&measure=' + encodeURIComponent(MeasureId);
 59 |   vlSpec.data.url += '&dimension_value=';
 60 |   for (var idx = 0; idx < Object.keys(DimValues).length; ++idx) {
 61 |     if (idx != 0) {
 62 |       vlSpec.data.url += ',';
 63 |     }
 64 |     var key = Object.keys(DimValues)[idx];
 65 |     var val = DimValues[key];
 66 |     vlSpec.data.url += encodeURIComponent(`#${key}:${val}`);
 67 |   }
 68 | 
 69 |   var input = document.querySelector("#vegalite-input");
 70 |   input.value = JSON.stringify(vlSpec, null, 2);
 71 |   input.dispatchEvent(new Event('change'));
 72 | }
 73 | 
 74 | function processMeasures(data) {
 75 |   let measure_container = document.querySelector('#measure-explorer');
 76 |   measure_container.innerText = "Measures:";
 77 |   console.log(measure_container);
 78 |   let ul = document.createElement('ul');
 79 |   measure_container.appendChild(ul);
 80 |   for(let measure of data) {
 81 |     let id = $('<a>', { href: measure['@id'] }).prop('hash').substring(1);
 82 |     console.log("Processing ", measure.name, 'id:', id, measure);
 83 |     let li = document.createElement('ul');
 84 |     li.innerText = measure.name;
 85 |     if (measure.description) {
 86 |       li.title = measure.description;
 87 |     }
 88 |     li.addEventListener('click', function (event) {
 89 |       for(var elt of ul.children) {
 90 |         elt.style.fontWeight = 'normal';
 91 |       }
 92 |       event.target.style.fontWeight = 'bold';
 93 |       MeasureId = '#' + id;
 94 |       setSpec();
 95 |     });
 96 |     ul.appendChild(li);
 97 |   }
 98 | }
 99 | 
100 | function processSlices(data) {
101 |   let slice_container = document.querySelector('#slice-explorer');
102 |   slice_container.innerText = "Slices:";
103 |   console.log(slice_container);
104 |   let ul = document.createElement('ul');
105 |   slice_container.appendChild(ul);
106 |   for(let slice of data) {
107 |     let id = $('<a>', { href: slice['@id'] }).prop('hash').substring(1);
108 |     console.log("Processing ", slice.name, 'id:', id, slice);
109 |     let li = document.createElement('ul');
110 |     li.innerText = slice.name;
111 |     if (slice.description) {
112 |       li.title = slice.description;
113 |     }
114 |     li.addEventListener('click', function (event) {
115 |       for(var elt of ul.children) {
116 |         elt.style.fontWeight = 'normal';
117 |       }
118 |       event.target.style.fontWeight = 'bold';
119 |       SliceId = '#' + id;
120 |       setSpec();
121 |     });
122 |     ul.appendChild(li);
123 |   }
124 | }
125 | 
126 | 
127 | 
128 | 
129 | function processDimensionValues(dimension) {
130 |   let id = $('<a>', { href: dimension['@id'] }).prop('hash').substring(1);
131 |   console.log("Processing ", dimension.name, 'id:', id);
132 |   let div = document.createElement('div');
133 |   let dimension_container = document.querySelector('#dimension-explorer');
134 |   dimension_container.appendChild(div);
135 |   div.innerText = dimension.name;
136 |   if (dimension.description) {
137 |     div.title = dimension.description;
138 |   }
139 |   let ul = document.createElement('ul');
140 |   div.appendChild(ul);
141 |   dimension.codes = {};
142 |   for(let dimensionValue of dimension.codeList) {
143 |     dimension.codes[dimensionValue.codeValue] = dimensionValue;
144 |     let li = document.createElement('li');
145 |     li.innerText = dimensionValue.name;
146 |     if (dimensionValue.description) {
147 |       li.title = dimensionValue.description;
148 |     }
149 |     li.addEventListener('click', function (event) {
150 |       for(var elt of ul.children) {
151 |         elt.style.fontWeight = 'normal';
152 |       }
153 |       event.target.style.fontWeight = 'bold';
154 |       DimValues[id] = dimensionValue.codeValue;
155 |       setSpec()
156 |     });
157 |     ul.appendChild(li);
158 |   }
159 | }
160 | 
161 | 
162 | function processDimensions(data) {
163 |   for(let dimension of data) {
164 |     if (dimension.name != 'States' && dimension.name != 'Seasonality') {
165 |       continue;
166 |     }
167 |     $.getJSON('/api/dimension_values?dataset=file:///usr/local/google/home/nkrishnaswami/dspl/samples/bls/unemployment/bls-unemployment.jsonld&dimension='+encodeURIComponent(dimension['@id']),
168 |               processDimensionValues);
169 |   }
170 | }
171 | 
172 | document.querySelector("#vegalite-input").addEventListener('change', drawChart);
173 | setSpec();
174 | 
175 | 
176 | $.getJSON('/api/measures?dataset=' + encodeURIComponent(DatasetId), processMeasures);
177 | // $.getJSON('/api/slices_for_measure?dataset=' + encodeURIComponent(DatasetId), processMeasures);
178 | $.getJSON('/api/dimensions?dataset=' + encodeURIComponent(DatasetId), processDimensions);
179 | 


--------------------------------------------------------------------------------
/tools/dspl2viz/templates/dspl2viz.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |   <head>
 3 |     <title>DSPL 2 Dataset Visualizer</title>
 4 |     <link rel="stylesheet" href="/static/dspl2viz.css">
 5 |     <script src="https://cdn.jsdelivr.net/npm/vega@5.7.2"></script>
 6 |     <script src="https://cdn.jsdelivr.net/npm/vega-lite@4.0.0-beta.10"></script>
 7 |     <script src="https://cdn.jsdelivr.net/npm/vega-embed@5.1.3"></script>
 8 |     <script
 9 |       src="https://code.jquery.com/jquery-3.4.1.min.js"
10 |       integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo="
11 |       crossorigin="anonymous"></script>
12 |   </head>
13 |   <body>
14 |     <div id="vegalite-container">
15 |       <textarea id="vegalite-input" rows="40"></textarea>
16 |     </div>
17 |     <div id="dataset-explorer">
18 |       <div id="measure-explorer">
19 |       </div>
20 |       <div id="slice-explorer">
21 |       </div>
22 |       <div id="dimension-explorer">
23 |       </dim>
24 |     </div>
25 |     <div id="chart-container">
26 |       <div id="chart"></div>
27 |     </div>
28 |     <script src="/static/dspl2viz.js"></script>
29 |   </body>
30 | </html>
31 | 


--------------------------------------------------------------------------------
/tools/dspltools/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: dspltools
 3 | Version: 0.4.3
 4 | Summary: Suite of command-line tools for generating DSPL datasets
 5 | Home-page: http://code.google.com/apis/publicdata/docs/dspltools.html
 6 | Author: Benjamin Yolken
 7 | Author-email: yolken@google.com
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/tools/dspltools/README.rst:
--------------------------------------------------------------------------------
  1 | Documentation
  2 | =============
  3 | See https://developers.google.com/public-data/docs/dspltools for documentation.
  4 | 
  5 | 
  6 | Release Notes
  7 | =============
  8 | *** v0.1 ***
  9 | Release date: April 11, 2011
 10 | 
 11 | Description:
 12 | ------------
 13 | DSPL Tools released!
 14 | 
 15 | 
 16 | *** v0.2 ***
 17 | Release date: April 18, 2011
 18 | 
 19 | Description:
 20 | ------------
 21 | Enhanced DSPL Check by adding significant functionality beyond XML schema
 22 | validation, including the checking of internal dataset references and CSV
 23 | file structure.
 24 | 
 25 | 
 26 | *** v0.2.1 ***
 27 | Release date: April 21, 2011
 28 | 
 29 | Description:
 30 | ------------
 31 | Use column ID to distinguish between years and integers in dsplgen.
 32 | 
 33 | 
 34 | *** v0.3 ***
 35 | Release date: April 26, 2011
 36 | 
 37 | Description:
 38 | ------------
 39 | Extended DSPL Check to validate dataset CSV data (sorting, instance IDs)
 40 | and slice / table links. 
 41 | 
 42 | Added concept hierarchy support to DSPL Gen.
 43 | 
 44 | 
 45 | *** v0.3.5 ***
 46 | Release date: May 4, 2011
 47 | 
 48 | Description:
 49 | ------------
 50 | Extended DSPL Check to support checking of:
 51 |   - Table column / concept type consistency
 52 |   - Date formats
 53 |   - Formatting of float and integer CSV values
 54 |   - Datasets where CSV columns are in different order than columns in table
 55 |     metadata
 56 | 
 57 | Improved error messages when files can't be found or opened.
 58 | 
 59 | Fixed bug in DSPL Gen naming of external concepts.
 60 | 
 61 | 
 62 | *** v0.3.6 ***
 63 | Release date: May 6, 2011
 64 | 
 65 | Description:
 66 | ------------
 67 | Added 'checking_level' option to DSPL Check.
 68 | 
 69 | CSV files are now loaded in 'universal newline mode' to reduce risk of parsing
 70 | problems.
 71 | 
 72 | 
 73 | *** v0.3.7 ***
 74 | Release date: May 6, 2011
 75 | 
 76 | Description:
 77 | ------------
 78 | Added zipped dataset checking to DSPL Check.
 79 | 
 80 | Strip whitespace from CSV values (to mimic behavior of PDE importer).
 81 | 
 82 | 
 83 | *** v0.4 ***
 84 | Release date: May 20, 2011
 85 | 
 86 | Description:
 87 | ------------
 88 | Added topic reference checking to DSPL Check.
 89 | 
 90 | Changed schema validation process to use local XML schema files instead of
 91 | calling out to W3C servers.
 92 | 
 93 | 
 94 | *** v0.4.1 ***
 95 | Release date: June 2, 2011
 96 | 
 97 | Description:
 98 | ------------
 99 | Added test for trivial slices to DSPL Check.
100 | 
101 | Improved behavior of DSPL Check when empty tables are encountered.
102 | 
103 | 
104 | *** v0.4.2 ***
105 | Release date: June 20, 2011
106 | 
107 | Description:
108 | ------------
109 | Changed implementation of default csv_data_source to use in-memory Python
110 | objects instead of sqlite. The latter can still be used by setting the '-t'
111 | option of dsplgen to 'csv_sqlite'.
112 | 
113 | 
114 | *** v0.4.3 ***
115 | Release date: November 3, 2011
116 | 
117 | Description:
118 | ------------
119 | Fixed some bugs around multi-level concept hierarchies.
120 | 
121 | Added total_val parameter to support pre-computed rollups in data.
122 | 
123 | 
124 | *** v0.5.0 ***
125 | Release date: January 22, 2019
126 | 
127 | Description:
128 | ------------
129 | Switch to lxml for XML parsing and schema validation.
130 | 
131 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/invalid_dspl/countries.csv:
--------------------------------------------------------------------------------
1 | country,name,latitude,longitude
2 | AD,Andorra,42.546245,1.601554
3 | AF,Afghanistan,33.93911,67.709953
4 | AI,Anguilla,18.220554,-63.068615
5 | AL,Albania,41.153332,20.168331
6 | US,United States,37.09024,-95.712891
7 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/invalid_dspl/country_slice.csv:
--------------------------------------------------------------------------------
 1 | country,year,population
 2 | AF,1960,9616353
 3 | AF,1961,9799379
 4 | AF,1963,10188299
 5 | AF,1962,9989846
 6 | AD,1960,8616353
 7 | AD,1961,8799379
 8 | AD,1962,8989846
 9 | AD,1963,9188299
10 | US,1960,19616353
11 | UX,1961,19799379
12 | US,1962,392039023
13 | US,1963,110188299
14 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/invalid_dspl/invalid_dspl.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--
  3 | Copyright 2018, Google Inc.
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are
  8 | met:
  9 | 
 10 |     * Redistributions of source code must retain the above copyright
 11 | notice, this list of conditions and the following disclaimer.
 12 |     * Redistributions in binary form must reproduce the above
 13 | copyright notice, this list of conditions and the following disclaimer
 14 | in the documentation and/or other materials provided with the
 15 | distribution.
 16 |     * Neither the name of Google Inc. nor the names of its
 17 | contributors may be used to endorse or promote products derived from
 18 | this software without specific prior written permission.
 19 | 
 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | -->
 32 | <dspl xmlns="http://schemas.google.com/dspl/2010"
 33 |     xmlns:geo="http://www.google.com/publicdata/dataset/google/geo"
 34 |     xmlns:geo_usa="http://www.google.com/publicdata/dataset/google/geo/us"
 35 |     xmlns:time="http://www.google.com/publicdata/dataset/google/time"
 36 |     xmlns:quantity="http://www.google.com/publicdata/dataset/google/quantity"
 37 |     xmlns:entity="http://www.google.com/publicdata/dataset/google/entity">
 38 | 
 39 |   <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
 40 |   <import namespace="http://www.google.com/publicdata/dataset/google/quantity"/>
 41 |   <import namespace="http://www.google.com/publicdata/dataset/google/entity"/>
 42 |   <import namespace="http://www.google.com/publicdata/dataset/google/geo"/>
 43 |   
 44 |   <info>
 45 |     <name>
 46 |       <value>My statistics</value>
 47 |     </name>
 48 |     <description>
 49 |       <value>Some very interesting statistics about countries</value>
 50 |     </description>
 51 |     <url>
 52 |       <value>http://www.stats-bureau.com/mystats/info.html</value>
 53 |     </url>
 54 |   </info>
 55 | 
 56 |   <provider>
 57 |     <name>
 58 |       <value>Bureau of Statistics</value>
 59 |     </name>
 60 |     <url>
 61 |       <value>http://www.stats-bureau.com</value>
 62 |     </url>
 63 |   </provider>
 64 | 
 65 |   <topics>
 66 |     <topic id="geography">
 67 |       <info>
 68 |         <name><value>Geography</value></name>
 69 |       </info>
 70 |     </topic>
 71 |     <topic id="social_indicators">
 72 |       <info>
 73 |         <name><value>Social indicators</value></name>
 74 |       </info>
 75 |       <topic id="population_indicators">
 76 |         <info>
 77 |           <name><value>Population indicators</value></name>
 78 |         </info>
 79 |       </topic>
 80 |       <topic id="poverty_and_income">
 81 |         <info>
 82 |           <name><value>Poverty &amp; income</value></name>
 83 |         </info>
 84 |       </topic>
 85 |       <topic id="health">
 86 |         <info>
 87 |           <name><value>Health</value></name>
 88 |         </info>
 89 |       </topic>
 90 |     </topic>
 91 |   </topics>
 92 | 
 93 |   <concepts>
 94 |     <!-- As noted in the tutorial, this concept should extend quantity:amount.-->
 95 |     <concept id="population">
 96 |       <info>
 97 |         <name>
 98 |           <value>Population</value>
 99 |         </name>
100 |         <description>
101 |           <value>Size of the resident population.</value>
102 |         </description>
103 |       </info>
104 |       <topic ref="population_indicators"/>
105 |       <type ref="integer"/>
106 |     </concept>
107 | 
108 |     <!-- This country concept is defined for educational purposes only. A country
109 |     concept exists in the Google geo dataset. See:
110 |     
111 |     http://code.google.com/apis/publicdata/docs/canonical/geo.html --> 
112 |     <concept id="country" extends="geo:location">
113 |       <info>
114 |         <name>
115 |           <value>Country</value>
116 |         </name>
117 |         <description>
118 |           <value>My list of countries</value>
119 |         </description>
120 |       </info>
121 |       <type ref="string"/>
122 |       <property id="name">
123 |         <info>
124 |           <name><value xml:lang="en">Country name</value></name>
125 |           <description>
126 |             <value xml:lang="en">The official name of the country</value>
127 |           </description>
128 |         </info>
129 |         <type ref="string"/>
130 |       </property>
131 |       <table ref="countries_table"/>
132 |     </concept>
133 |   </concepts>
134 | 
135 |   <slices>
136 |     <slice id="countries_slice">
137 |       <dimension concept="country"/>
138 |       <dimension concept="time:year"/>
139 |       <metric concept="population"/>
140 |       <table ref="countries_slice_table"/>
141 |     </slice>
142 |   </slices>
143 | 
144 |   <tables>
145 |     <table id="countries_table">
146 |       <column id="country" type="string"/>
147 |       <column id="name" type="string"/>
148 |       <column id="latitudex" type="float"/>
149 |       <column id="longitude" type="float"/>
150 |       <data>
151 |         <file format="csv" encoding="utf-8">countries.csv</file>
152 |       </data>
153 |     </table>
154 | 
155 |     <table id="countries_slice_table">
156 |       <column id="country" type="string"/>
157 |       <column id="year" type="date" format="yyyy"/>
158 |       <column id="populationx" type="integer"/>
159 |       <data>
160 |         <file format="csv" encoding="utf-8">country_slice.csv</file>
161 |       </data>
162 |     </table>
163 |   </tables>
164 | 
165 | </dspl>
166 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/valid_dataset/countries.csv:
--------------------------------------------------------------------------------
1 | country,name,latitude,longitude
2 | AD,Andorra,42.546245,1.601554
3 | AF,Afghanistan,33.93911,67.709953
4 | AI,Anguilla,18.220554,-63.068615
5 | AL,Albania,41.153332,20.168331
6 | US,United States,37.09024,-95.712891
7 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/valid_dataset/country_slice.csv:
--------------------------------------------------------------------------------
 1 | country,year,population
 2 | AF,1960,9616353
 3 | AF,1961,9799379
 4 | AF,1963,10188299
 5 | AF,1962,9989846
 6 | AD,1960,8616353
 7 | AD,1961,8799379
 8 | AD,1962,8989846
 9 | AD,1963,9188299
10 | US,1960,19616353
11 | US,1961,19799379
12 | US,1962,392039023
13 | US,1963,110188299
14 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplcheck/valid_dataset/valid_dataset.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--
  3 | Copyright 2018, Google Inc.
  4 | All rights reserved.
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are
  8 | met:
  9 | 
 10 |     * Redistributions of source code must retain the above copyright
 11 | notice, this list of conditions and the following disclaimer.
 12 |     * Redistributions in binary form must reproduce the above
 13 | copyright notice, this list of conditions and the following disclaimer
 14 | in the documentation and/or other materials provided with the
 15 | distribution.
 16 |     * Neither the name of Google Inc. nor the names of its
 17 | contributors may be used to endorse or promote products derived from
 18 | this software without specific prior written permission.
 19 | 
 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | -->
 32 | <dspl xmlns="http://schemas.google.com/dspl/2010"
 33 |     xmlns:geo="http://www.google.com/publicdata/dataset/google/geo"
 34 |     xmlns:geo_usa="http://www.google.com/publicdata/dataset/google/geo/us"
 35 |     xmlns:time="http://www.google.com/publicdata/dataset/google/time"
 36 |     xmlns:quantity="http://www.google.com/publicdata/dataset/google/quantity"
 37 |     xmlns:entity="http://www.google.com/publicdata/dataset/google/entity">
 38 | 
 39 |   <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
 40 |   <import namespace="http://www.google.com/publicdata/dataset/google/quantity"/>
 41 |   <import namespace="http://www.google.com/publicdata/dataset/google/entity"/>
 42 |   <import namespace="http://www.google.com/publicdata/dataset/google/geo"/>
 43 |   
 44 |   <info>
 45 |     <name>
 46 |       <value>My statistics</value>
 47 |     </name>
 48 |     <description>
 49 |       <value>Some very interesting statistics about countries</value>
 50 |     </description>
 51 |     <url>
 52 |       <value>http://www.stats-bureau.com/mystats/info.html</value>
 53 |     </url>
 54 |   </info>
 55 | 
 56 |   <provider>
 57 |     <name>
 58 |       <value>Bureau of Statistics</value>
 59 |     </name>
 60 |     <url>
 61 |       <value>http://www.stats-bureau.com</value>
 62 |     </url>
 63 |   </provider>
 64 | 
 65 |   <topics>
 66 |     <topic id="geography">
 67 |       <info>
 68 |         <name><value>Geography</value></name>
 69 |       </info>
 70 |     </topic>
 71 |     <topic id="social_indicators">
 72 |       <info>
 73 |         <name><value>Social indicators</value></name>
 74 |       </info>
 75 |       <topic id="population_indicators">
 76 |         <info>
 77 |           <name><value>Population indicators</value></name>
 78 |         </info>
 79 |       </topic>
 80 |       <topic id="poverty_and_income">
 81 |         <info>
 82 |           <name><value>Poverty &amp; income</value></name>
 83 |         </info>
 84 |       </topic>
 85 |       <topic id="health">
 86 |         <info>
 87 |           <name><value>Health</value></name>
 88 |         </info>
 89 |       </topic>
 90 |     </topic>
 91 |   </topics>
 92 | 
 93 |   <concepts>
 94 |     <!-- As noted in the tutorial, this concept should extend quantity:amount.-->
 95 |     <concept id="population">
 96 |       <info>
 97 |         <name>
 98 |           <value>Population</value>
 99 |         </name>
100 |         <description>
101 |           <value>Size of the resident population.</value>
102 |         </description>
103 |       </info>
104 |       <topic ref="population_indicators"/>
105 |       <type ref="integer"/>
106 |     </concept>
107 | 
108 |     <!-- This country concept is defined for educational purposes only. A country
109 |     concept exists in the Google geo dataset. See:
110 |     
111 |     http://code.google.com/apis/publicdata/docs/canonical/geo.html --> 
112 |     <concept id="country" extends="geo:location">
113 |       <info>
114 |         <name>
115 |           <value>Country</value>
116 |         </name>
117 |         <description>
118 |           <value>My list of countries</value>
119 |         </description>
120 |       </info>
121 |       <type ref="string"/>
122 |       <property id="name">
123 |         <info>
124 |           <name><value xml:lang="en">Country name</value></name>
125 |           <description>
126 |             <value xml:lang="en">The official name of the country</value>
127 |           </description>
128 |         </info>
129 |         <type ref="string"/>
130 |       </property>
131 |       <table ref="countries_table"/>
132 |     </concept>
133 |   </concepts>
134 | 
135 |   <slices>
136 |     <slice id="countries_slice">
137 |       <dimension concept="country"/>
138 |       <dimension concept="time:year"/>
139 |       <metric concept="population"/>
140 |       <table ref="countries_slice_table"/>
141 |     </slice>
142 |   </slices>
143 | 
144 |   <tables>
145 |     <table id="countries_table">
146 |       <column id="country" type="string"/>
147 |       <column id="name" type="string"/>
148 |       <column id="latitude" type="float"/>
149 |       <column id="longitude" type="float"/>
150 |       <data>
151 |         <file format="csv" encoding="utf-8">countries.csv</file>
152 |       </data>
153 |     </table>
154 | 
155 |     <table id="countries_slice_table">
156 |       <column id="country" type="string"/>
157 |       <column id="year" type="date" format="yyyy"/>
158 |       <column id="population" type="integer"/>
159 |       <data>
160 |         <file format="csv" encoding="utf-8">country_slice.csv</file>
161 |       </data>
162 |     </table>
163 |   </tables>
164 | 
165 | </dspl>
166 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_advanced.csv:
--------------------------------------------------------------------------------
 1 | date[type=date;format=MM/dd/yyyy],first_category[slice_role=dimension;rollup=true;total_val=total],second_category[slice_role=dimension;rollup=true],first_value[slice_role=metric;type=integer],second_value[slice_role=metric;type=float]
 2 | 1/1/2010,red,tall,10,23
 3 | 1/1/2010,red,short,90,1
 4 | 1/1/2010,blue,tall,12,31
 5 | 1/1/2010,blue,short,21,231
 6 | 1/1/2010,green,short,20,212
 7 | 1/1/2010,total,tall,10,98
 8 | 1/1/2010,total,short,-30,39
 9 | 1/2/2010,red,tall,10,91
10 | 1/2/2010,red,short,32,123
11 | 1/2/2010,blue,tall,22,121
12 | 1/2/2010,blue,short,20,32
13 | 1/2/2010,green,short,1,19
14 | 1/2/2010,total,short,2,10
15 | 1/3/2010,red,short,10,34
16 | 1/3/2010,red,tall,10,34
17 | 1/3/2010,blue,short,93,21
18 | 1/3/2010,blue,tall,39,12
19 | 1/3/2010,green,short,31,31
20 | 1/3/2010,green,tall,21,31
21 | 1/3/2010,total,short,13,123
22 | 1/4/2010,red,tall,40,21
23 | 1/4/2010,red,short,22,12
24 | 1/4/2010,blue,tall,39,21
25 | 1/4/2010,blue,short,10,12
26 | 1/4/2010,green,tall,30,23
27 | 1/4/2010,green,short,10,123
28 | 1/4/2010,total,tall,-10,23
29 | 1/4/2010,total,short,31,661
30 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_hierarchies.csv:
--------------------------------------------------------------------------------
 1 | year[type=date;format=yyyy],first_category[parent=third_category;rollup=true],second_category[rollup=true],third_category,first_value[type=integer],second_value[type=float]
 2 | 2010,red,tall,bucket1,10,23
 3 | 2010,red,short,bucket1,90,1
 4 | 2010,blue,tall,bucket1,12,31
 5 | 2010,blue,short,bucket1,21,231
 6 | 2010,green,tall,bucket2,12,31
 7 | 2010,green,short,bucket2,11,33
 8 | 2011,red,tall,bucket1,12,23
 9 | 2011,red,short,bucket1,93,1
10 | 2011,blue,tall,bucket1,15,31
11 | 2011,blue,short,bucket1,25,231
12 | 2011,green,tall,bucket2,13,31
13 | 2011,green,short,bucket2,15,33
14 | 2012,red,tall,bucket1,20,23
15 | 2012,red,short,bucket1,110,1
16 | 2012,blue,tall,bucket1,55,31
17 | 2012,blue,short,bucket1,77,231
18 | 2012,green,tall,bucket2,77,31
19 | 2012,green,short,bucket2,88,33
20 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_simple.csv:
--------------------------------------------------------------------------------
 1 | date,first_category,second_category,first_value,second_value
 2 | 1/1/2010,red,tall,10,23
 3 | 1/1/2010,red,short,90,1
 4 | 1/1/2010,blue,tall,12,31
 5 | 1/1/2010,blue,short,21,231
 6 | 1/1/2010,green,short,20,212
 7 | 1/2/2010,red,tall,10,91
 8 | 1/2/2010,red,short,32,123
 9 | 1/2/2010,blue,tall,22,121
10 | 1/2/2010,blue,short,20,32
11 | 1/2/2010,green,short,1,19
12 | 1/3/2010,red,short,10,34
13 | 1/3/2010,red,tall,10,34
14 | 1/3/2010,blue,short,93,21
15 | 1/3/2010,blue,tall,39,12
16 | 1/3/2010,green,short,31,31
17 | 1/3/2010,green,tall,21,31
18 | 1/4/2010,red,tall,40,21
19 | 1/4/2010,red,short,22,12
20 | 1/4/2010,blue,tall,39,21
21 | 1/4/2010,blue,short,10,12
22 | 1/4/2010,green,tall,30,23
23 | 1/4/2010,green,short,10,123
24 | 


--------------------------------------------------------------------------------
/tools/dspltools/examples/dsplgen/dsplgen_yearly_data.csv:
--------------------------------------------------------------------------------
 1 | year,first_category,second_category,first_value,second_value
 2 | 2010,red,tall,10,23.5
 3 | 2010,red,short,90,1.1
 4 | 2010,blue,tall,12,31.3
 5 | 2010,blue,short,21,231
 6 | 2010,green,short,20,212
 7 | 2011,red,tall,10,91
 8 | 2011,red,short,32,123
 9 | 2011,blue,tall,22,121
10 | 2011,blue,short,20,32
11 | 2011,green,short,1,19
12 | 2012,red,short,10,34
13 | 2012,red,tall,10,34.3
14 | 2012,blue,short,93,21
15 | 2012,blue,tall,39,12
16 | 2012,green,short,31,31
17 | 2012,green,tall,21,31
18 | 2013,red,tall,40,21
19 | 2013,red,short,22,12.55
20 | 2013,blue,tall,39,21
21 | 2013,blue,short,10,12
22 | 2013,green,tall,30,23
23 | 2013,green,short,10,123
24 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_data_source_sqlite_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2
 2 | #
 3 | # Copyright 2018 Google LLC
 4 | #
 5 | # Use of this source code is governed by a BSD-style
 6 | # license that can be found in the LICENSE file or at
 7 | # https://developers.google.com/open-source/licenses/bsd
 8 | 
 9 | """Tests of csv_data_source_sqlite module."""
10 | 
11 | 
12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
13 | 
14 | import unittest
15 | 
16 | import csv_data_source_sqlite
17 | import csv_sources_test_suite
18 | 
19 | 
20 | class CSVDataSourceSqliteTests(csv_sources_test_suite.CSVSourcesTests):
21 |   """Tests of the CSVDataSourceSqlite object."""
22 | 
23 |   def setUp(self):
24 |     self.data_source_class = csv_data_source_sqlite.CSVDataSourceSqlite
25 | 
26 |     super(CSVDataSourceSqliteTests, self).setUp()
27 | 
28 | 
29 | class CSVDataSourceSqliteErrorTests(
30 |         csv_sources_test_suite.CSVSourcesErrorTests):
31 |   """Tests of the CSVDataSourceSqlite object under various error conditions."""
32 | 
33 |   def setUp(self):
34 |     self.data_source_class = csv_data_source_sqlite.CSVDataSourceSqlite
35 | 
36 |     super(CSVDataSourceSqliteErrorTests, self).setUp()
37 | 
38 | 
39 | if __name__ == '__main__':
40 |   unittest.main()
41 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_data_source_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2
 2 | #
 3 | # Copyright 2018 Google LLC
 4 | #
 5 | # Use of this source code is governed by a BSD-style
 6 | # license that can be found in the LICENSE file or at
 7 | # https://developers.google.com/open-source/licenses/bsd
 8 | 
 9 | """Tests of csv_data_source module."""
10 | 
11 | 
12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
13 | 
14 | import unittest
15 | 
16 | import csv_data_source
17 | import csv_sources_test_suite
18 | 
19 | 
20 | class CSVDataSourceTests(csv_sources_test_suite.CSVSourcesTests):
21 |   """Tests of the CSVDataSource object."""
22 | 
23 |   def setUp(self):
24 |     self.data_source_class = csv_data_source.CSVDataSource
25 | 
26 |     super(CSVDataSourceTests, self).setUp()
27 | 
28 | 
29 | class CSVDataSourceErrorTests(csv_sources_test_suite.CSVSourcesErrorTests):
30 |   """Tests of the CSVDataSource object under various error conditions."""
31 | 
32 |   def setUp(self):
33 |     self.data_source_class = csv_data_source.CSVDataSource
34 | 
35 |     super(CSVDataSourceErrorTests, self).setUp()
36 | 
37 | 
38 | if __name__ == '__main__':
39 |   unittest.main()
40 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_sources_test_suite.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """A set of tests useful for CSV data sources."""
 10 | 
 11 | 
 12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 13 | 
 14 | import StringIO
 15 | import unittest
 16 | 
 17 | import data_source
 18 | 
 19 | 
 20 | _TEST_CSV_CONTENT = (
 21 |     """date[type=date;format=yyyy-MM-dd],category1,category2[concept=geo:us_state;parent=category3;total_val=total],category3,metric1[extends=quantity:ratio;slice_role=metric],metric2[aggregation=avg],metric3[aggregation=count]
 22 | 1980-01-01,red,california,west,89,321,71.21
 23 | 1981-01-01,red,california,west,99,231,391.2
 24 | 1982-01-01,blue,maine's,east,293,32,2.31
 25 | 1983-01-01,blue,california,west,293,12,10.3
 26 | 1984-01-01,red,maine's,east,932,48,10.78
 27 | 1984-01-01,red,oregon,west,32,33,-14.34
 28 | 1985-01-01,red,total,east,21,98,87.0
 29 | 1986-01-01,red,total,west,33,90,-10.1""")
 30 | 
 31 | 
 32 | class CSVSourcesTests(unittest.TestCase):
 33 |   """Basic tests of a CSV DataSource object."""
 34 | 
 35 |   def setUp(self):
 36 |     self.csv_file = StringIO.StringIO(_TEST_CSV_CONTENT)
 37 |     self.data_source_obj = self.data_source_class(self.csv_file, verbose=False)
 38 | 
 39 |   def tearDown(self):
 40 |     self.data_source_obj.Close()
 41 |     self.csv_file.close()
 42 | 
 43 |   def testColumnBundle(self):
 44 |     """Test that column bundle is properly generated."""
 45 |     column_bundle = self.data_source_obj.GetColumnBundle()
 46 | 
 47 |     self.assertEqual(
 48 |         [c.column_id for c in column_bundle.GetColumnIterator()],
 49 |         ['date', 'category1', 'category2', 'category3',
 50 |          'metric1', 'metric2', 'metric3'])
 51 |     self.assertEqual(
 52 |         [c.data_type for c in column_bundle.GetColumnIterator()],
 53 |         ['date', 'string', 'string', 'string', 'integer', 'integer', 'float'])
 54 |     self.assertEqual(
 55 |         [c.data_format for c in column_bundle.GetColumnIterator()],
 56 |         ['yyyy-MM-dd', '', '', '', '', '', ''])
 57 |     self.assertEqual(
 58 |         [c.concept_ref for c in column_bundle.GetColumnIterator()],
 59 |         ['time:day', '', 'geo:us_state', '', '', '', ''])
 60 |     self.assertEqual(
 61 |         [c.concept_extension for c in column_bundle.GetColumnIterator()],
 62 |         ['', '', '', '', 'quantity:ratio', '', ''])
 63 |     self.assertEqual(
 64 |         [c.slice_role for c in column_bundle.GetColumnIterator()],
 65 |         ['dimension', 'dimension', 'dimension', 'dimension', 'metric', 'metric',
 66 |          'metric'])
 67 |     self.assertEqual(
 68 |         [c.rollup for c in column_bundle.GetColumnIterator()],
 69 |         [False, False, False, True, False, False, False])
 70 |     self.assertEqual(
 71 |         [c.parent_ref for c in column_bundle.GetColumnIterator()],
 72 |         ['', '', 'category3', '', '', '', ''])
 73 |     self.assertEqual(
 74 |         [c.total_val for c in column_bundle.GetColumnIterator()],
 75 |         ['', '', 'total', '', '', '', ''])
 76 | 
 77 |   def testEntityTableGeneration(self):
 78 |     """Test that single-concept tables are generated correctly."""
 79 |     table_data = self.data_source_obj.GetTableData(
 80 |         data_source.QueryParameters(
 81 |             data_source.QueryParameters.CONCEPT_QUERY, ['category2']))
 82 | 
 83 |     # Make sure quotes are properly escaped
 84 |     self.assertEqual(table_data.rows,
 85 |                      [['california'], ['maine\'s'], ['oregon']])
 86 | 
 87 |   def testMultiEntityTableGeneration(self):
 88 |     """Test that multi-concept tables are generated correctly."""
 89 |     table_data = self.data_source_obj.GetTableData(
 90 |         data_source.QueryParameters(
 91 |             data_source.QueryParameters.CONCEPT_QUERY,
 92 |             ['category2', 'category3']))
 93 | 
 94 |     # Make sure quotes are properly escaped
 95 |     self.assertEqual(table_data.rows,
 96 |                      [['california', 'west'], ['maine\'s', 'east'],
 97 |                       ['oregon', 'west']])
 98 | 
 99 |   def testSliceTableGeneration(self):
100 |     """Test that slice tables are generated correctly."""
101 |     table_data = self.data_source_obj.GetTableData(
102 |         data_source.QueryParameters(
103 |             data_source.QueryParameters.SLICE_QUERY,
104 |             ['metric3', 'category2', 'metric1', 'metric2']))
105 | 
106 |     self.assertEqual(
107 |         table_data.rows,
108 |         [[3, 'california', 89 + 99 + 293, (321.0 + 231.0 + 12.0) / 3.0],
109 |          [2, 'maine\'s', 293 + 932, (32.0 + 48.0) / 2.0],
110 |          [1, 'oregon', 32, 33]])
111 | 
112 |   def testTotalsSliceTableGeneration(self):
113 |     """Test that slice tables are generated correctly with total values."""
114 |     table_data = self.data_source_obj.GetTableData(
115 |         data_source.QueryParameters(
116 |             data_source.QueryParameters.SLICE_QUERY,
117 |             ['category1', 'metric1', 'metric2', 'metric3']))
118 | 
119 |     self.assertEqual(
120 |         table_data.rows,
121 |         [['red', 21 + 33, (98.0 + 90.0) / 2.0, 2]])
122 | 
123 | 
124 | class CSVSourcesErrorTests(unittest.TestCase):
125 |   """Tests of a CSV DataSource object for error cases."""
126 | 
127 |   def setUp(self):
128 |     pass
129 | 
130 |   def testBadHeaderKey(self):
131 |     """Test that unknown key in header generates error."""
132 |     csv_file = StringIO.StringIO(
133 |         'date[unknown_key=unknown_value],metric\n1990,23232')
134 | 
135 |     self.assertRaises(
136 |         data_source.DataSourceError,
137 |         self.data_source_class,
138 |         csv_file, False)
139 | 
140 |     csv_file.close()
141 | 
142 |   def testBadDataType(self):
143 |     """Test that unknown type value generates error."""
144 |     csv_file = StringIO.StringIO('date[type=unknown_type],metric\n1990,23232')
145 | 
146 |     self.assertRaises(
147 |         data_source.DataSourceError,
148 |         self.data_source_class,
149 |         csv_file, False)
150 | 
151 |     csv_file.close()
152 | 
153 |   def testBadAggregation(self):
154 |     """Test that unknown aggregation operator generates error."""
155 |     csv_file = StringIO.StringIO(
156 |         'date[aggregation=unknown_aggregation],metric\n1990,23232')
157 | 
158 |     self.assertRaises(
159 |         data_source.DataSourceError,
160 |         self.data_source_class,
161 |         csv_file, False)
162 | 
163 |     csv_file.close()
164 | 
165 |   def testBadSliceRoleKey(self):
166 |     """Test that unknown value for slice_role generates error."""
167 |     csv_file = StringIO.StringIO(
168 |         'date[slice_role=unknown_role],metric\n1990,23232')
169 | 
170 |     self.assertRaises(
171 |         data_source.DataSourceError,
172 |         self.data_source_class,
173 |         csv_file, False)
174 | 
175 |     csv_file.close()
176 | 
177 |   def testBadColumnID(self):
178 |     """Test that a badly formatted column ID generates error."""
179 |     csv_file = StringIO.StringIO('my date[type=date],metric\n1990,23232')
180 | 
181 |     self.assertRaises(
182 |         data_source.DataSourceError,
183 |         self.data_source_class,
184 |         csv_file, False)
185 | 
186 |     csv_file.close()
187 | 
188 |   def testBadDataRow(self):
189 |     """Test that row with wrong number of entries causes error."""
190 |     csv_file = StringIO.StringIO(
191 |         'date,column\n01/01/1990,abcd,1234')
192 | 
193 |     self.assertRaises(
194 |         data_source.DataSourceError,
195 |         self.data_source_class,
196 |         csv_file, False)
197 | 
198 |     csv_file.close()
199 | 
200 |   def testBadParentReference(self):
201 |     """Test that illegal parent reference causes error."""
202 |     csv_file = StringIO.StringIO(
203 |         'date,column[parent=unknown_parent]\n01/01/1990,abcd')
204 | 
205 |     self.assertRaises(
206 |         data_source.DataSourceError,
207 |         self.data_source_class,
208 |         csv_file, False)
209 | 
210 |     csv_file.close()
211 | 
212 |   def testMultipleParents(self):
213 |     """Test that having multiple parent instances causes error."""
214 |     csv_file = StringIO.StringIO(
215 |         'date,column1[parent=column2],column2,column3\n'
216 |         '1/1/2001,val1,parent1,323\n1/2/2001,val1,parent2,123')
217 | 
218 |     self.assertRaises(
219 |         data_source.DataSourceError,
220 |         self.data_source_class,
221 |         csv_file, False)
222 | 
223 |     csv_file.close()
224 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/csv_utilities.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Utility functions useful for CSV data sources."""
 10 | from __future__ import print_function
 11 | 
 12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 13 | 
 14 | 
 15 | import csv
 16 | import re
 17 | import string
 18 | import warnings
 19 | 
 20 | import data_source
 21 | 
 22 | 
 23 | def _HeaderToColumn(header_string):
 24 |   """Parse the header string for a column.
 25 | 
 26 |   Args:
 27 |     header_string: The complete string for the column header
 28 | 
 29 |   Returns:
 30 |     A DataColumn object populated based on the header data
 31 | 
 32 |   Raises:
 33 |     DataSourceError: If there are any errors in parsing, e.g. if an unrecognized
 34 |                      key is found.
 35 |   """
 36 |   # The column id must be at least one character long, and cannot contain the
 37 |   # characters '[', ']', ';', or whitespace
 38 |   parameters_match = re.match(
 39 |       '^([^\]\[;\s]+)(?:\[(.*)\]){0,1}$',
 40 |       header_string.strip().replace('"', ''))
 41 | 
 42 |   if not parameters_match:
 43 |     raise data_source.DataSourceError(
 44 |         'Formatting error for header string: %s' % header_string)
 45 | 
 46 |   column_id = parameters_match.group(1)
 47 |   column = data_source.DataSourceColumn(column_id, internal_parameters={})
 48 | 
 49 |   if parameters_match.group(2):
 50 |     # Parse the column parameters
 51 |     key_value_pairs = parameters_match.group(2).split(';')
 52 | 
 53 |     for key_value_pair in key_value_pairs:
 54 |       try:
 55 |         [key, value] = key_value_pair.split('=')
 56 |       except ValueError:
 57 |         raise data_source.DataSourceError(
 58 |             'Formatting error for header string: %s' % header_string)
 59 | 
 60 |       # Map the key to the appropriate field of the DataSourceColumn object
 61 |       if key == 'type':
 62 |         if value not in ['date', 'float', 'integer', 'string']:
 63 |           raise data_source.DataSourceError(
 64 |               'Unknown data type for column %s: %s' %
 65 |               (column.column_id, value))
 66 | 
 67 |         column.data_type = value
 68 |       elif key == 'format':
 69 |         column.data_format = value
 70 |       elif key == 'concept':
 71 |         column.concept_ref = value
 72 |       elif key == 'extends':
 73 |         column.concept_extension = value
 74 |       elif key == 'parent':
 75 |         column.parent_ref = value
 76 |       elif key == 'slice_role':
 77 |         role_value = value.lower()
 78 | 
 79 |         if role_value not in ['dimension', 'metric']:
 80 |           raise data_source.DataSourceError(
 81 |               'Unrecognized slice_roll in column %s: %s' %
 82 |               (column.column_id, value))
 83 |         else:
 84 |           column.slice_role = role_value
 85 |       elif key == 'rollup':
 86 |         if value.lower() == 'true':
 87 |           column.rollup = True
 88 |         elif value.lower() == 'false':
 89 |           column.rollup = False
 90 |         else:
 91 |           raise data_source.DataSourceError(
 92 |               'Unrecognized boolean value in column %s: %s' %
 93 |               (column.column_id, value))
 94 |       elif key == 'total_val':
 95 |         column.total_val = value
 96 |       elif key == 'dropif':
 97 |         column.internal_parameters['dropif_val'] = value
 98 |       elif key == 'zeroif':
 99 |         column.internal_parameters['zeroif_val'] = value
100 |       elif key == 'aggregation':
101 |         if string.lower(value) not in ['sum', 'max', 'min', 'avg', 'count']:
102 |           raise data_source.DataSourceError(
103 |               'Unknown aggregation for column %s: %s' %
104 |               (column.column_id, value))
105 | 
106 |         column.internal_parameters['aggregation'] = value
107 |       else:
108 |         raise data_source.DataSourceError(
109 |             'Unknown parameter for column %s: %s' %
110 |             (column.column_id, key))
111 |   return column
112 | 
113 | 
114 | def ConstructColumnBundle(csv_file, verbose=True):
115 |   """Construct a ColumnBundle from the header information in a CSV file.
116 | 
117 |   Args:
118 |     csv_file: The complete string for the column header
119 |     verbose: Print out extra information to stdout
120 | 
121 |   Returns:
122 |     A data_source.ColumnBundle object populated based on the CSV header
123 | 
124 |   Raises:
125 |     DataSourceError: If there are any parsing errors or data
126 |                      inconsistencies
127 |   """
128 |   # Get the first and second rows of the CSV
129 |   header_csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
130 |   header_row_values = next(header_csv_reader)
131 |   second_row_values = next(header_csv_reader)
132 |   csv_file.seek(0)
133 | 
134 |   # Check that second row is properly formatted
135 |   if len(header_row_values) != len(second_row_values):
136 |     raise data_source.DataSourceError(
137 |         'Number of columns in row 2 (%d) does not match number '
138 |         'expected (%d)' % (len(second_row_values), len(header_row_values)))
139 | 
140 |   column_bundle = data_source.DataSourceColumnBundle()
141 | 
142 |   for header_element in header_row_values:
143 |     column_bundle.AddColumn(_HeaderToColumn(header_element))
144 | 
145 |   num_date_columns = 0
146 |   has_metric_column = False
147 |   column_ids = [column.column_id for column in
148 |                 column_bundle.GetColumnIterator()]
149 | 
150 |   # Iterate through columns, populating and refining DataSourceColumn
151 |   # parameters as necessary
152 |   for c, column in enumerate(column_bundle.GetColumnIterator()):
153 |     if verbose:
154 |       print('\nEvaluating column %s' % column.column_id)
155 | 
156 |     # Check data type
157 |     if not column.data_type:
158 |       column.data_type = (
159 |           data_source.GuessDataType(second_row_values[c], column.column_id))
160 | 
161 |       if verbose:
162 |         print('Guessing that column %s is of type %s' % (
163 |             column.column_id, column.data_type))
164 | 
165 |     # Check slice type
166 |     if not column.slice_role:
167 |       if column.data_type == 'integer' or column.data_type == 'float':
168 |         column.slice_role = 'metric'
169 |       else:
170 |         column.slice_role = 'dimension'
171 | 
172 |       if verbose:
173 |         print('Guessing that column %s is a %s' % (
174 |             column.column_id, column.slice_role))
175 | 
176 |     # Check aggregation
177 |     if column.slice_role == 'metric':
178 |       has_metric_column = True
179 | 
180 |       if 'aggregation' not in column.internal_parameters:
181 |         column.internal_parameters['aggregation'] = 'SUM'
182 | 
183 |         if verbose:
184 |           print('Guessing that column %s should be aggregated by %s' % (
185 |               column.column_id, column.internal_parameters['aggregation']))
186 | 
187 |     # Check parent
188 |     if column.parent_ref:
189 |       if column.parent_ref not in column_ids:
190 |         raise data_source.DataSourceError(
191 |             'Column %s references a parent not defined in this dataset: %s' %
192 |             (column.column_id, column.parent_ref))
193 | 
194 |       parent_column = column_bundle.GetColumnByID(column.parent_ref)
195 | 
196 |       if not parent_column.rollup:
197 |         parent_column.rollup = True
198 | 
199 |         if verbose:
200 |           print('Making column %s rollup since it is a parent to column %s'
201 |                 % (parent_column.column_id, column.column_id))
202 | 
203 |     # Check date format and concept
204 |     if column.data_type == 'date':
205 |       num_date_columns += 1
206 | 
207 |       if not column.data_format:
208 |         column.data_format = (
209 |             data_source.GuessDateFormat(second_row_values[c]))
210 | 
211 |       if not column.concept_ref:
212 |         column.concept_ref = (
213 |             data_source.GuessDateConcept(column.data_format))
214 | 
215 |       if verbose:
216 |         print('Guessing that column %s is formatted as %s and '
217 |               'corresponds to %s' % (
218 |                   column.column_id, column.data_format, column.concept_ref))
219 | 
220 |   # Warn user if their file will not produce interesting DSPL visualizations
221 |   if num_date_columns == 0:
222 |     warnings.warn('Input file does not have a date column',
223 |                   data_source.DataSourceWarning)
224 | 
225 |   elif num_date_columns > 1:
226 |     warnings.warn('Input file has more than one date column',
227 |                   data_source.DataSourceWarning)
228 | 
229 |   if not has_metric_column:
230 |     warnings.warn('Input file does not have any metrics',
231 |                   data_source.DataSourceWarning)
232 | 
233 |   return column_bundle
234 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/data_sources/data_source_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Tests of data_source module."""
 10 | 
 11 | 
 12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 13 | 
 14 | import unittest
 15 | 
 16 | import data_source
 17 | 
 18 | 
 19 | class DataSourceColumnBundleTests(unittest.TestCase):
 20 |   """Tests of DataSourceColumnBundle object."""
 21 | 
 22 |   def setUp(self):
 23 |     self.column_bundle = data_source.DataSourceColumnBundle(
 24 |         [data_source.DataSourceColumn(column_id='col1'),
 25 |          data_source.DataSourceColumn(column_id='col2'),
 26 |          data_source.DataSourceColumn(column_id='col3')])
 27 | 
 28 |   def testAddColumn(self):
 29 |     self.column_bundle.AddColumn(
 30 |         data_source.DataSourceColumn(column_id='col4'))
 31 |     self.assertEqual(self.column_bundle.GetColumnByID('col4').column_id,
 32 |                      'col4')
 33 | 
 34 |   def testGetColumnByID(self):
 35 |     column = self.column_bundle.GetColumnByID('col2')
 36 |     self.assertEqual(column.column_id, 'col2')
 37 | 
 38 |   def testGetColumnByOrder(self):
 39 |     column = self.column_bundle.GetColumnByOrder(2)
 40 |     self.assertEqual(column.column_id, 'col3')
 41 | 
 42 |   def testGetNumColumns(self):
 43 |     self.assertEqual(self.column_bundle.GetNumColumns(), 3)
 44 | 
 45 |   def testGetColumnIterator(self):
 46 |     column_iterator = self.column_bundle.GetColumnIterator()
 47 |     column_id_list = [c.column_id for c in column_iterator]
 48 |     self.assertEqual(column_id_list, ['col1', 'col2', 'col3'])
 49 | 
 50 | 
 51 | class TableDataTest(unittest.TestCase):
 52 |   """Tests of TableData object."""
 53 | 
 54 |   def setUp(self):
 55 |     self.table_data = data_source.TableData(
 56 |         [[1, 2, 3], [4, 5, 6]])
 57 | 
 58 |   def testMergeValues(self):
 59 |     another_table_data = data_source.TableData([[4, 5, 6], [6, 7, 8]])
 60 |     merged_table_data = self.table_data.MergeValues(
 61 |         another_table_data, num_columns=2)
 62 |     self.assertEqual(merged_table_data.rows,
 63 |                      [[1, 2, 3, 4, 5], [4, 5, 6, 6, 7]])
 64 | 
 65 |   def testMergeContant(self):
 66 |     merged_table_data = self.table_data.MergeConstant('abcd')
 67 |     self.assertEqual(merged_table_data.rows,
 68 |                      [[1, 2, 3, 'abcd'], [4, 5, 6, 'abcd']])
 69 | 
 70 | 
 71 | class DataGuessingTest(unittest.TestCase):
 72 |   """Test of data type / format guessing functions."""
 73 | 
 74 |   def setUp(self):
 75 |     pass
 76 | 
 77 |   def testGuessType(self):
 78 |     self.assertEqual(data_source.GuessDataType('312332'), 'integer')
 79 |     self.assertEqual(data_source.GuessDataType('1999', 'year'), 'date')
 80 |     self.assertEqual(data_source.GuessDataType('3123.32'), 'float')
 81 |     self.assertEqual(data_source.GuessDataType('-3399332'), 'integer')
 82 |     self.assertEqual(data_source.GuessDataType('-3.0'), 'float')
 83 |     self.assertEqual(data_source.GuessDataType('1/1/11'), 'date')
 84 |     self.assertEqual(data_source.GuessDataType('01/1932'), 'date')
 85 |     self.assertEqual(data_source.GuessDataType('2-3-1932'), 'date')
 86 |     self.assertEqual(data_source.GuessDataType('something'), 'string')
 87 |     self.assertEqual(data_source.GuessDataType('3278.23728.223'), 'string')
 88 | 
 89 |   def testGuessDateFormat(self):
 90 |     self.assertEqual(data_source.GuessDateFormat('2819'), 'yyyy')
 91 |     self.assertEqual(data_source.GuessDateFormat('3/1990'), 'MM/yyyy')
 92 |     self.assertEqual(data_source.GuessDateFormat('1990-3'), 'yyyy-MM')
 93 |     self.assertEqual(data_source.GuessDateFormat('01-2-1981'), 'MM-dd-yyyy')
 94 |     self.assertEqual(data_source.GuessDateFormat('1990/2/3'), 'yyyy/MM/dd')
 95 | 
 96 |     self.assertRaises(data_source.DataSourceError,
 97 |                       data_source.GuessDateFormat, '1990.12')
 98 |     self.assertRaises(data_source.DataSourceError,
 99 |                       data_source.GuessDateFormat, 'Jan 1981')
100 | 
101 |   def testGuessDateConcept(self):
102 |     self.assertEqual(data_source.GuessDateConcept('yyyy'), 'time:year')
103 |     self.assertEqual(data_source.GuessDateConcept('yyyy-MM'), 'time:month')
104 |     self.assertEqual(data_source.GuessDateConcept('yy.MM.dd'), 'time:day')
105 |     self.assertEqual(data_source.GuessDateConcept('dd/MM/yyyy'), 'time:day')
106 | 
107 |     self.assertRaises(data_source.DataSourceError,
108 |                       data_source.GuessDateConcept, 'yy-mm')
109 |     self.assertRaises(data_source.DataSourceError,
110 |                       data_source.GuessDateConcept, 'GG yyyy')
111 | 
112 | 
113 | if __name__ == '__main__':
114 |   unittest.main()
115 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/model/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/model/dspl_model_loader_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Tests of dspl_model_loader module."""
 10 | 
 11 | 
 12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 13 | 
 14 | import os
 15 | import os.path
 16 | import shutil
 17 | import tempfile
 18 | import unittest
 19 | 
 20 | import dspl_model_loader
 21 | import dspl_model_test
 22 | 
 23 | 
 24 | _SLICE_CSV_DATA = (
 25 |     """col1,col2
 26 | val1,1
 27 | val2 , 2 
 28 | val3,3""")
 29 | 
 30 | 
 31 | class DSPLModelLoaderTests(unittest.TestCase):
 32 |   """Basic test cases for dspl_model_loader module."""
 33 | 
 34 |   def setUp(self):
 35 |     self.input_dir = tempfile.mkdtemp()
 36 |     self.xml_file_path = os.path.join(self.input_dir, 'dataset.xml')
 37 | 
 38 |     xml_file = open(self.xml_file_path, 'w')
 39 |     xml_file.write(dspl_model_test.TEST_DSPL_XML)
 40 |     xml_file.close()
 41 | 
 42 |     slice_csv_file = open(os.path.join(self.input_dir, 'mydata.csv'), 'w')
 43 |     slice_csv_file.write(_SLICE_CSV_DATA)
 44 |     slice_csv_file.close()
 45 | 
 46 |   def tearDown(self):
 47 |     shutil.rmtree(self.input_dir)
 48 | 
 49 |   def testDSPLImportLoading(self):
 50 |     """Test that dataset is imported correctly."""
 51 |     dspl_dataset = dspl_model_loader.LoadDSPLFromFiles(self.xml_file_path)
 52 | 
 53 |     # Test basic info
 54 |     self.assertEqual(dspl_dataset.name, 'My Dataset')
 55 |     self.assertEqual(dspl_dataset.description, 'My Dataset Description')
 56 |     self.assertEqual(dspl_dataset.url, 'url1')
 57 | 
 58 |     self.assertEqual(dspl_dataset.provider_name, 'Googler')
 59 |     self.assertEqual(dspl_dataset.provider_url, 'url2')
 60 | 
 61 |     # Test imports
 62 |     self.assertEqual(len(dspl_dataset.imports), 2)
 63 | 
 64 |     self.assertEqual(dspl_dataset.imports[0].namespace_id,
 65 |                      'imported_namespace1')
 66 |     self.assertEqual(dspl_dataset.imports[0].namespace_url,
 67 |                      'http://imported_namespace1_url')
 68 |     self.assertEqual(dspl_dataset.imports[1].namespace_id,
 69 |                      'imported_namespace2')
 70 |     self.assertEqual(dspl_dataset.imports[1].namespace_url,
 71 |                      'http://imported_namespace2_url')
 72 | 
 73 |     # Test topics
 74 |     self.assertEqual(len(dspl_dataset.topics), 2)
 75 | 
 76 |     self.assertEqual(dspl_dataset.topics[0].topic_id, 'topic1')
 77 |     self.assertEqual(dspl_dataset.topics[0].topic_name, 'topic1_name')
 78 |     self.assertEqual(len(dspl_dataset.topics[0].children), 2)
 79 | 
 80 |     self.assertEqual(dspl_dataset.topics[0].children[0].topic_id, 'topic2')
 81 |     self.assertEqual(
 82 |         dspl_dataset.topics[0].children[0].topic_name, 'topic2_name')
 83 |     self.assertEqual(dspl_dataset.topics[0].children[1].topic_id, 'topic3')
 84 |     self.assertEqual(
 85 |         dspl_dataset.topics[0].children[1].topic_name, 'topic3_name')
 86 | 
 87 |     self.assertEqual(dspl_dataset.topics[1].topic_id, 'topic4')
 88 |     self.assertEqual(dspl_dataset.topics[1].topic_name, 'topic4_name')
 89 |     self.assertEqual(len(dspl_dataset.topics[1].children), 0)
 90 | 
 91 |     # Test concepts
 92 |     self.assertEqual(len(dspl_dataset.concepts), 3)
 93 | 
 94 |     self.assertEqual(dspl_dataset.concepts[0].concept_id, 'concept1')
 95 |     self.assertEqual(dspl_dataset.concepts[0].concept_extension_reference,
 96 |                      'entity:entity')
 97 |     self.assertEqual(dspl_dataset.concepts[0].concept_name, 'Concept 1')
 98 |     self.assertEqual(dspl_dataset.concepts[0].concept_description,
 99 |                      'Concept 1 Description')
100 |     self.assertEqual(dspl_dataset.concepts[0].data_type, 'string')
101 |     self.assertEqual(len(dspl_dataset.concepts[0].attributes), 1)
102 |     self.assertEqual(
103 |         dspl_dataset.concepts[0].attributes[0].concept_ref, 'attribute_concept')
104 |     self.assertEqual(
105 |         dspl_dataset.concepts[0].attributes[0].value, 'attribute_value')
106 |     self.assertEqual(len(dspl_dataset.concepts[0].properties), 2)
107 |     self.assertEqual(
108 |         dspl_dataset.concepts[0].properties[0].concept_ref, 'property_concept')
109 |     self.assertEqual(
110 |         dspl_dataset.concepts[0].properties[0].is_parent, False)
111 |     self.assertEqual(
112 |         dspl_dataset.concepts[0].properties[1].concept_ref,
113 |         'another_property_concept')
114 |     self.assertEqual(
115 |         dspl_dataset.concepts[0].properties[1].is_parent, True)
116 |     self.assertEqual(dspl_dataset.concepts[0].table_ref, 'table2')
117 | 
118 |     self.assertEqual(dspl_dataset.concepts[1].concept_id, 'concept2')
119 |     self.assertEqual(dspl_dataset.concepts[1].concept_name, 'Concept 2')
120 |     self.assertEqual(dspl_dataset.concepts[1].concept_description,
121 |                      'Concept 2 Description')
122 |     self.assertEqual(dspl_dataset.concepts[1].topic_references,
123 |                      ['topic1', 'topic2'])
124 |     self.assertEqual(dspl_dataset.concepts[1].data_type, 'integer')
125 |     self.assertEqual(len(dspl_dataset.concepts[1].attributes), 0)
126 |     self.assertEqual(len(dspl_dataset.concepts[1].properties), 0)
127 | 
128 |     self.assertEqual(dspl_dataset.concepts[2].concept_id, 'geo:country')
129 |     self.assertEqual(dspl_dataset.concepts[2].concept_reference, 'geo:country')
130 | 
131 |     # Test slices
132 |     self.assertEqual(len(dspl_dataset.slices), 1)
133 | 
134 |     self.assertEqual(dspl_dataset.slices[0].slice_id, 'data_slice')
135 |     self.assertEqual(dspl_dataset.slices[0].dimension_refs,
136 |                      ['concept1', 'geo:country'])
137 |     self.assertEqual(dspl_dataset.slices[0].metric_refs, ['concept2'])
138 |     self.assertEqual(dspl_dataset.slices[0].table_ref, 'table3')
139 |     self.assertEqual(
140 |         sorted(dspl_dataset.slices[0].dimension_map.items()),
141 |         sorted([('concept1', 'concept_column1'),
142 |                 ('geo:country', 'concept_column3')]))
143 |     self.assertEqual(
144 |         dspl_dataset.slices[0].metric_map.items(),
145 |         [('concept2', 'concept_column2')])
146 | 
147 |     # Test tables
148 |     self.assertEqual(len(dspl_dataset.tables), 1)
149 | 
150 |     self.assertEqual(dspl_dataset.tables[0].table_id, 'table')
151 |     self.assertEqual(dspl_dataset.tables[0].file_name, 'mydata.csv')
152 | 
153 |     self.assertEqual(len(dspl_dataset.tables[0].columns), 2)
154 |     self.assertEqual(dspl_dataset.tables[0].columns[0].column_id, 'col1')
155 |     self.assertEqual(dspl_dataset.tables[0].columns[0].data_type, 'string')
156 |     self.assertEqual(dspl_dataset.tables[0].columns[1].column_id, 'col2')
157 |     self.assertEqual(dspl_dataset.tables[0].columns[1].data_type, 'integer')
158 | 
159 |     expected_table_rows = _SLICE_CSV_DATA.splitlines()
160 |     expected_table_data = []
161 | 
162 |     for row in expected_table_rows:
163 |       split_row = row.split(',')
164 |       cleaned_row = [r.strip() for r in split_row]
165 | 
166 |       expected_table_data.append(cleaned_row)
167 | 
168 |     self.assertEqual(dspl_dataset.tables[0].table_data, expected_table_data)
169 | 
170 |   def testBadFileReference(self):
171 |     """Test case in which CSV file does not exist."""
172 |     os.remove(os.path.join(self.input_dir, 'mydata.csv'))
173 | 
174 |     self.assertRaises(
175 |         dspl_model_loader.DSPLModelLoaderError,
176 |         dspl_model_loader.LoadDSPLFromFiles,
177 |         self.xml_file_path)
178 | 
179 |   def testPartialFileLoading(self):
180 |     """Test case in which load_all_data is set to False."""
181 |     dspl_dataset = dspl_model_loader.LoadDSPLFromFiles(
182 |         self.xml_file_path, load_all_data=False)
183 | 
184 |     expected_table_rows = _SLICE_CSV_DATA.splitlines()[0:2]
185 |     expected_table_data = [r.split(',') for r in expected_table_rows]
186 | 
187 |     self.assertEqual(dspl_dataset.tables[0].table_data, expected_table_data)
188 | 
189 | 
190 | if __name__ == '__main__':
191 |   unittest.main()
192 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python2
2 | #
3 | # Copyright 2018 Google LLC
4 | #
5 | # Use of this source code is governed by a BSD-style
6 | # license that can be found in the LICENSE file or at
7 | # https://developers.google.com/open-source/licenses/bsd
8 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/schemas/xml_1998.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
  2 | <xs:schema targetNamespace="http://www.w3.org/XML/1998/namespace" xmlns:xs="http://www.w3.org/2001/XMLSchema" xml:lang="en">
  3 | 
  4 |  <xs:annotation>
  5 |   <xs:documentation>
  6 |    See http://www.w3.org/XML/1998/namespace.html and
  7 |    http://www.w3.org/TR/REC-xml for information about this namespace.
  8 | 
  9 |     This schema document describes the XML namespace, in a form
 10 |     suitable for import by other schema documents.  
 11 | 
 12 |     Note that local names in this namespace are intended to be defined
 13 |     only by the World Wide Web Consortium or its subgroups.  The
 14 |     following names are currently defined in this namespace and should
 15 |     not be used with conflicting semantics by any Working Group,
 16 |     specification, or document instance:
 17 | 
 18 |     base (as an attribute name): denotes an attribute whose value
 19 |          provides a URI to be used as the base for interpreting any
 20 |          relative URIs in the scope of the element on which it
 21 |          appears; its value is inherited.  This name is reserved
 22 |          by virtue of its definition in the XML Base specification.
 23 | 
 24 |     lang (as an attribute name): denotes an attribute whose value
 25 |          is a language code for the natural language of the content of
 26 |          any element; its value is inherited.  This name is reserved
 27 |          by virtue of its definition in the XML specification.
 28 |   
 29 |     space (as an attribute name): denotes an attribute whose
 30 |          value is a keyword indicating what whitespace processing
 31 |          discipline is intended for the content of the element; its
 32 |          value is inherited.  This name is reserved by virtue of its
 33 |          definition in the XML specification.
 34 | 
 35 |     Father (in any context at all): denotes Jon Bosak, the chair of 
 36 |          the original XML Working Group.  This name is reserved by 
 37 |          the following decision of the W3C XML Plenary and 
 38 |          XML Coordination groups:
 39 | 
 40 |              In appreciation for his vision, leadership and dedication
 41 |              the W3C XML Plenary on this 10th day of February, 2000
 42 |              reserves for Jon Bosak in perpetuity the XML name
 43 |              xml:Father
 44 |   </xs:documentation>
 45 |  </xs:annotation>
 46 | 
 47 |  <xs:annotation>
 48 |   <xs:documentation>This schema defines attributes and an attribute group
 49 |         suitable for use by
 50 |         schemas wishing to allow xml:base, xml:lang or xml:space attributes
 51 |         on elements they define.
 52 | 
 53 |         To enable this, such a schema must import this schema
 54 |         for the XML namespace, e.g. as follows:
 55 |         &lt;schema . . .>
 56 |          . . .
 57 |          &lt;import namespace="http://www.w3.org/XML/1998/namespace"
 58 |                     schemaLocation="http://www.w3.org/2001/03/xml.xsd"/>
 59 | 
 60 |         Subsequently, qualified reference to any of the attributes
 61 |         or the group defined below will have the desired effect, e.g.
 62 | 
 63 |         &lt;type . . .>
 64 |          . . .
 65 |          &lt;attributeGroup ref="xml:specialAttrs"/>
 66 |  
 67 |          will define a type which will schema-validate an instance
 68 |          element with any of those attributes</xs:documentation>
 69 |  </xs:annotation>
 70 | 
 71 |  <xs:annotation>
 72 |   <xs:documentation>In keeping with the XML Schema WG's standard versioning
 73 |    policy, this schema document will persist at
 74 |    http://www.w3.org/2001/03/xml.xsd.
 75 |    At the date of issue it can also be found at
 76 |    http://www.w3.org/2001/xml.xsd.
 77 |    The schema document at that URI may however change in the future,
 78 |    in order to remain compatible with the latest version of XML Schema
 79 |    itself.  In other words, if the XML Schema namespace changes, the version
 80 |    of this document at
 81 |    http://www.w3.org/2001/xml.xsd will change
 82 |    accordingly; the version at
 83 |    http://www.w3.org/2001/03/xml.xsd will not change.
 84 |   </xs:documentation>
 85 |  </xs:annotation>
 86 | 
 87 |  <xs:attribute name="lang" type="xs:language">
 88 |   <xs:annotation>
 89 |    <xs:documentation>In due course, we should install the relevant ISO 2- and 3-letter
 90 |          codes as the enumerated possible values . . .</xs:documentation>
 91 |   </xs:annotation>
 92 |  </xs:attribute>
 93 | 
 94 |  <xs:attribute name="space" default="preserve">
 95 |   <xs:simpleType>
 96 |    <xs:restriction base="xs:NCName">
 97 |     <xs:enumeration value="default"/>
 98 |     <xs:enumeration value="preserve"/>
 99 |    </xs:restriction>
100 |   </xs:simpleType>
101 |  </xs:attribute>
102 | 
103 |  <xs:attribute name="base" type="xs:anyURI">
104 |   <xs:annotation>
105 |    <xs:documentation>See http://www.w3.org/TR/xmlbase/ for
106 |                      information about this attribute.</xs:documentation>
107 |   </xs:annotation>
108 |  </xs:attribute>
109 | 
110 |  <xs:attributeGroup name="specialAttrs">
111 |   <xs:attribute ref="xml:base"/>
112 |   <xs:attribute ref="xml:lang"/>
113 |   <xs:attribute ref="xml:space"/>
114 |  </xs:attributeGroup>
115 | 
116 | </xs:schema>
117 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/countries.csv:
--------------------------------------------------------------------------------
1 | country,name,latitude,longitude
2 | AD,Andorra,42.546245,1.601554
3 | AF,Afghanistan,33.93911,67.709953
4 | AI,Anguilla,18.220554,-63.068615
5 | AL,Albania,41.153332,20.168331
6 | US,United States,37.09024,-95.712891
7 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/country_slice.csv:
--------------------------------------------------------------------------------
 1 | country,year,population
 2 | AF,1960,9616353
 3 | AF,1961,9799379
 4 | AF,1962,9989846
 5 | AF,1963,10188299
 6 | AD,1960,8616353
 7 | AD,1961,8799379
 8 | AD,1962,8989846
 9 | AD,1963,9188299
10 | US,1960,19616353
11 | US,1961,19799379
12 | US,1962,19989846
13 | US,1963,110188299


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/gender_country_slice.csv:
--------------------------------------------------------------------------------
 1 | country,gender,year,population
 2 | AF,M,1960,4808176
 3 | AF,M,1961,4899689
 4 | AF,F,1960,4808177
 5 | AF,F,1961,4899690
 6 | AD,M,1960,3808176
 7 | AD,M,1961,3899689
 8 | AD,F,1960,3808177
 9 | AD,F,1961,3899690
10 | US,M,1960,9808176
11 | US,M,1961,9899689
12 | US,F,1960,9808177
13 | US,F,1961,9899690


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/genders.csv:
--------------------------------------------------------------------------------
1 | gender,name
2 | M,Male
3 | F,Female
4 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/state_slice.csv:
--------------------------------------------------------------------------------
1 | state,year,population,unemployment_rate
2 | AL,1960,9616353,5.1
3 | AL,1961,9799379,5.2
4 | AL,1962,9989846,4.8
5 | AL,1963,10188299,6.9
6 | AK,1960,8616353,6.1
7 | AK,1961,8799379,6.2
8 | AK,1962,8989846,7.8
9 | AK,1963,9188299,7.9


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/test_dataset/states.csv:
--------------------------------------------------------------------------------
1 | state,name,latitude,longitude
2 | AL,Alabama,32.318231,-86.902298
3 | AK,Alaska,63.588753,-154.493062
4 | AR,Arkansas,35.20105,-91.831833
5 | AZ,Arizona,34.048928,-111.093731
6 | CA,California,36.778261,-119.417932
7 | CO,Colorado,39.550051,-105.782067
8 | CT,Connecticut,41.603221,-73.087749
9 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/xml_validation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Validate a DSPL XML file."""
 10 | 
 11 | 
 12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 13 | 
 14 | from lxml import etree
 15 | import os.path
 16 | import re
 17 | 
 18 | 
 19 | # The number of lines of context to show around XML errors
 20 | _CONTEXT_LINES = 3
 21 | 
 22 | _SCHEMA_PATH = os.path.join(os.path.split(__file__)[0], 'schemas')
 23 | _DSPL_SCHEMA_FILE = 'dspl.xsd'
 24 | 
 25 | 
 26 | def GetErrorContext(xml_string, error_line_number):
 27 |   """Generate a string that shows the context of an XML error.
 28 | 
 29 |   Args:
 30 |     xml_string: String containing the contents of an XML file
 31 |     error_line_number: 1-indexed line number on which error has been detected
 32 | 
 33 |   Returns:
 34 |     A pretty-printed string containing the lines around the error
 35 |   """
 36 |   min_error_start_line = (error_line_number - 1) - _CONTEXT_LINES
 37 |   max_error_end_line = (error_line_number - 1) + _CONTEXT_LINES
 38 | 
 39 |   error_context_lines = []
 40 | 
 41 |   for l, line in enumerate(xml_string.splitlines()):
 42 |     if l >= min_error_start_line:
 43 |       line_string = '%5d' % (l + 1)
 44 | 
 45 |       # Highlight the error line with asterisks
 46 |       if (l + 1) == error_line_number:
 47 |         line_string = line_string.replace(' ', '*')
 48 | 
 49 |       error_context_lines.append('%s: %s' % (line_string, line.rstrip()))
 50 | 
 51 |       if l >= max_error_end_line:
 52 |         break
 53 | 
 54 |   return '\n'.join(error_context_lines)
 55 | 
 56 | 
 57 | def GetErrorLineNumber(error_string):
 58 |   """Parse out the line number from a minixsv error message.
 59 | 
 60 |   Args:
 61 |     error_string: String returned by minixsv exception
 62 | 
 63 |   Returns:
 64 |     Integer line number on which error was detected
 65 |   """
 66 |   line_match = re.search(': line ([0-9]+)', error_string)
 67 | 
 68 |   return int(line_match.group(1))
 69 | 
 70 | 
 71 | def RunValidation(xml_file, schema_file=None, verbose=True):
 72 |   """Run the validation process and return a message with the result.
 73 | 
 74 |   Args:
 75 |     xml_file: An XML input file
 76 |     schema_file: A DSPL schema file; if not given, the default 'dspl.xsd' is
 77 |                  used.
 78 |     verbose: Include helpful, extra information about validation
 79 | 
 80 |   Returns:
 81 |     String containing result of validation process
 82 |   """
 83 |   result = ''
 84 | 
 85 |   xml_file_text = xml_file.read()
 86 | 
 87 |   if schema_file:
 88 |     schema_file_text = schema_file.read()
 89 |   else:
 90 |     schema_file = open(os.path.join(_SCHEMA_PATH, _DSPL_SCHEMA_FILE), 'r')
 91 |     schema_file_text = schema_file.read()
 92 |     schema_file.close()
 93 | 
 94 |   # Insert proper paths into XSD schemaLocation tags
 95 |   substitution_function = (
 96 |       lambda m: 'schemaLocation="%s"' % os.path.join(_SCHEMA_PATH, m.group(1)))
 97 | 
 98 |   schema_file_text = re.sub(
 99 |       'schemaLocation="([a-zA-Z_0-9.]+)"',
100 |       substitution_function,
101 |       schema_file_text, 2)
102 | 
103 |   # Parse the schema file into an etree
104 |   schema_file_xml = etree.XML(schema_file_text)
105 | 
106 |   try:
107 |     schema = etree.XMLSchema(schema_file_xml)
108 |     parser = etree.XMLParser(schema=schema)
109 |     etree.fromstring(xml_file_text, parser)
110 |   except etree.XMLSyntaxError as xml_error:
111 |     # XML parsing error
112 |     error_string = str(xml_error)
113 |     if verbose:
114 |       result = ('Input does not validate against DSPL schema\n\n%s\n%s' %
115 |                 (error_string, GetErrorContext(
116 |                     xml_file_text,
117 |                     xml_error.lineno)))
118 |     else:
119 |       result = error_string
120 |   else:
121 |     if verbose:
122 |       result = 'XML file validates successfully!'
123 | 
124 |   return result
125 | 


--------------------------------------------------------------------------------
/tools/dspltools/packages/dspllib/validation/xml_validation_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Tests of xml_validation module."""
 10 | 
 11 | 
 12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 13 | 
 14 | import re
 15 | import StringIO
 16 | import unittest
 17 | 
 18 | import xml_validation
 19 | 
 20 | 
 21 | _DSPL_CONTENT_VALID = (
 22 |     """<?xml version="1.0" encoding="UTF-8"?>
 23 | <dspl xmlns="http://schemas.google.com/dspl/2010"
 24 |     xmlns:time="http://www.google.com/publicdata/dataset/google/time">
 25 |   <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
 26 |   <info>
 27 |     <name>
 28 |       <value>Dataset Name</value>
 29 |     </name>
 30 |   </info>
 31 |   <provider>
 32 |     <name>
 33 |       <value>Provider Name</value>
 34 |     </name>
 35 |   </provider>
 36 | </dspl>""")
 37 | 
 38 | 
 39 | _DSPL_CONTENT_XML_ERROR = (
 40 |     """   <?xml version="1.0" encoding="UTF-8"?>
 41 | <dspl xmlns="http://schemas.google.com/dspl/2010"
 42 |     xmlns:time="http://www.google.com/publicdata/dataset/google/time">
 43 |   <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
 44 |   <info>
 45 |     <name>
 46 |       <value>Dataset Name</value>
 47 |     </name>
 48 |   </info>
 49 |   <provider>
 50 |     <name>
 51 |       <value>Provider Name</value>
 52 |     </name>
 53 |   </provider>
 54 | </dspl>""")
 55 | 
 56 | 
 57 | _DSPL_CONTENT_SCHEMA_ERROR = (
 58 |     """<?xml version="1.0" encoding="UTF-8"?>
 59 | <dspl xmlns="http://schemas.google.com/dspl/2010"
 60 |     xmlns:time="http://www.google.com/publicdata/dataset/google/time">
 61 |   <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
 62 |   <info>
 63 |     <name illegalproperty="illegalvalue">
 64 |       <value>Dataset Name</value>
 65 |     </name>
 66 |   </info>
 67 |   <provider>
 68 |     <name>
 69 |       <value>Provider Name</value>
 70 |     </name>
 71 |   </provider>
 72 | </dspl>""")
 73 | 
 74 | _DSPL_BILLION_LAUGHS = (
 75 |     """<!DOCTYPE lolz [
 76 |  <!ENTITY lol "lol">
 77 |  <!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
 78 |  <!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
 79 |  <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
 80 |  <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
 81 |  <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
 82 |  <!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
 83 |  <!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
 84 |  <!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
 85 |  <!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
 86 | ]>
 87 | <dspl xmlns="http://schemas.google.com/dspl/2010">
 88 |   <info>
 89 |     <name>
 90 |       <value>&lol9;</value>
 91 |     </name>
 92 |   </info>
 93 |   <provider>
 94 |     <name>
 95 |       <value>Provider Name</value>
 96 |     </name>
 97 |   </provider>
 98 | </dspl>""")
 99 | 
100 | 
101 | class XMLValidationTests(unittest.TestCase):
102 |   """Test case for xml_validation module."""
103 | 
104 |   def setUp(self):
105 |     pass
106 | 
107 |   def testXMLValidationGoodXML(self):
108 |     """A simple end-to-end test of the valid XML case."""
109 |     valid_input_file = StringIO.StringIO(_DSPL_CONTENT_VALID)
110 | 
111 |     result = xml_validation.RunValidation(valid_input_file)
112 |     self.assertTrue(re.search('validates successfully', result))
113 | 
114 |     valid_input_file.close()
115 | 
116 |   def testXMLValidationXMLError(self):
117 |     """A simple end-to-end test of the bad XML case."""
118 |     xml_error_input_file = StringIO.StringIO(_DSPL_CONTENT_XML_ERROR)
119 | 
120 |     result = xml_validation.RunValidation(xml_error_input_file)
121 |     self.assertTrue(
122 |         re.search('XML declaration allowed only.*line 1', result, flags=re.DOTALL))
123 | 
124 |     xml_error_input_file.close()
125 | 
126 |   def testXMLValidationSchemaError(self):
127 |     """A simple end-to-end test of the non-conforming XML case."""
128 |     schema_error_input_file = StringIO.StringIO(_DSPL_CONTENT_SCHEMA_ERROR)
129 | 
130 |     result = xml_validation.RunValidation(schema_error_input_file)
131 |     # TODO: this validation failure has lineno 0; look into why lxml is not
132 |     #       returning the right location.
133 |     self.assertTrue(re.search('The attribute \'illegalproperty\' is not allowed',
134 |                               result, flags=re.DOTALL))
135 | 
136 |     schema_error_input_file.close()
137 | 
138 |   def testXMLBillionLaughsAttack(self):
139 |     """A simple test to verify that the validation routine is not susceptible
140 |     to the billion laughs attack.
141 |     """
142 |     billion_laughs_input_file = StringIO.StringIO(_DSPL_BILLION_LAUGHS)
143 |     result = xml_validation.RunValidation(billion_laughs_input_file)
144 |     self.assertTrue(re.search('Detected an entity reference loop', result))
145 | 
146 |     billion_laughs_input_file.close()
147 | 
148 | 
149 | if __name__ == '__main__':
150 |   unittest.main()
151 | 


--------------------------------------------------------------------------------
/tools/dspltools/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml
2 | 


--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplcheck.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Check a DSPL dataset for likely import errors."""
 10 | from __future__ import print_function
 11 | 
 12 | 
 13 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 14 | 
 15 | import optparse
 16 | import os
 17 | import shutil
 18 | import sys
 19 | import tempfile
 20 | import time
 21 | import zipfile
 22 | 
 23 | from dspllib.model import dspl_model_loader
 24 | from dspllib.validation import dspl_validation
 25 | from dspllib.validation import xml_validation
 26 | 
 27 | 
 28 | def LoadOptionsFromFlags(argv):
 29 |   """Parse command-line arguments.
 30 | 
 31 |   Args:
 32 |     argv: The program argument vector (excluding the script name)
 33 | 
 34 |   Returns:
 35 |     A dictionary with key-value pairs for each of the options
 36 |   """
 37 |   usage_string = 'python dsplcheck.py [options] [DSPL XML file or zip archive]'
 38 | 
 39 |   parser = optparse.OptionParser(usage=usage_string)
 40 | 
 41 |   parser.set_defaults(verbose=True)
 42 |   parser.add_option('-q', '--quiet',
 43 |                     action='store_false', dest='verbose',
 44 |                     help='Quiet mode')
 45 | 
 46 |   parser.add_option(
 47 |       '-l', '--checking_level', dest='checking_level', type='choice',
 48 |       choices=['schema_only', 'schema_and_model', 'full'], default='full',
 49 |       help='Level of checking to do (default: full)')
 50 | 
 51 |   (options, args) = parser.parse_args(args=argv)
 52 | 
 53 |   if not len(args) == 1:
 54 |     parser.error('An XML file or DSPL zip archive is required')
 55 | 
 56 |   return {'verbose': options.verbose,
 57 |           'checking_level': options.checking_level,
 58 |           'file_path': args[0]}
 59 | 
 60 | 
 61 | def GetInputFilePath(input_file_path):
 62 |   """Parse the input file path, extracting a zip file if necessary.
 63 | 
 64 |   Args:
 65 |     input_file_path: String path to dsplcheck input file
 66 | 
 67 |   Returns:
 68 |     Dictionary containing final XML file path (post-extraction) and directory
 69 |     into which zip was extracted (or '' if input was not a zip).
 70 |   """
 71 |   if zipfile.is_zipfile(input_file_path):
 72 |     # Extract files to temporary directory and search for dataset XML
 73 |     zip_dir = tempfile.mkdtemp()
 74 | 
 75 |     zip_file = zipfile.ZipFile(input_file_path, 'r')
 76 |     zip_file.extractall(zip_dir)
 77 | 
 78 |     xml_file_paths = []
 79 | 
 80 |     for (dirpath, unused_dirnames, filenames) in os.walk(zip_dir):
 81 |       for file_name in filenames:
 82 |         if file_name[-4:] == '.xml':
 83 |           xml_file_paths.append(os.path.join(dirpath, file_name))
 84 | 
 85 |     if not xml_file_paths:
 86 |       print('Error: zip does not have any XML files')
 87 |       sys.exit(2)
 88 |     elif len(xml_file_paths) > 1:
 89 |       print('Error: zip contains multiple XML files')
 90 |       sys.exit(2)
 91 |     else:
 92 |       xml_file_path = xml_file_paths[0]
 93 | 
 94 |     zip_file.close()
 95 |   else:
 96 |     xml_file_path = input_file_path
 97 |     zip_dir = ''
 98 | 
 99 |   return {'xml_file_path': xml_file_path,
100 |           'zip_dir': zip_dir}
101 | 
102 | 
103 | def main(argv):
104 |   """Parse command-line flags and run XML validator.
105 | 
106 |   Args:
107 |     argv: The program argument vector (excluding the script name)
108 |   """
109 |   start_time = time.time()
110 | 
111 |   options = LoadOptionsFromFlags(argv)
112 |   file_paths = GetInputFilePath(options['file_path'])
113 | 
114 |   try:
115 |     xml_file = open(file_paths['xml_file_path'], 'r')
116 |   except IOError as io_error:
117 |     print('Error opening XML file\n\n%s' % io_error)
118 |     sys.exit(2)
119 | 
120 |   if options['verbose']:
121 |     print('==== Checking XML file against DSPL schema....')
122 | 
123 |   result = xml_validation.RunValidation(
124 |       xml_file,
125 |       verbose=options['verbose'])
126 | 
127 |   print(result)
128 | 
129 |   if 'validates successfully' not in result:
130 |     # Stop if XML validation not successful
131 |     sys.exit(2)
132 | 
133 |   if options['checking_level'] != 'schema_only':
134 |     if options['verbose']:
135 |       print('\n==== Parsing DSPL dataset....')
136 | 
137 |     if options['checking_level'] == 'full':
138 |       full_data_check = True
139 |     else:
140 |       full_data_check = False
141 | 
142 |     try:
143 |       dataset = dspl_model_loader.LoadDSPLFromFiles(
144 |           file_paths['xml_file_path'], load_all_data=full_data_check)
145 |     except dspl_model_loader.DSPLModelLoaderError as loader_error:
146 |       print('Error while trying to parse DSPL dataset\n\n%s' % loader_error)
147 |       sys.exit(2)
148 | 
149 |     if options['verbose']:
150 |       print('Parsing completed.')
151 | 
152 |       if full_data_check:
153 |         print('\n==== Checking DSPL model and data....')
154 |       else:
155 |         print('\n==== Checking DSPL model....')
156 | 
157 |     dspl_validator = dspl_validation.DSPLDatasetValidator(
158 |         dataset, full_data_check=full_data_check)
159 | 
160 |     print(dspl_validator.RunValidation(options['verbose']))
161 | 
162 |   xml_file.close()
163 | 
164 |   if file_paths['zip_dir']:
165 | 
166 |     shutil.rmtree(file_paths['zip_dir'])
167 | 
168 |   if options['verbose']:
169 |     print('\nCompleted in %0.2f seconds' % (time.time() - start_time))
170 | 
171 | 
172 | if __name__ == '__main__':
173 |   main(sys.argv[1:])
174 | 


--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplcheck_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Tests of dsplcheck module."""
 10 | 
 11 | 
 12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 13 | 
 14 | import os
 15 | import os.path
 16 | import re
 17 | import shutil
 18 | import StringIO
 19 | import sys
 20 | import tempfile
 21 | import unittest
 22 | import zipfile
 23 | 
 24 | import dsplcheck
 25 | 
 26 | 
 27 | _DSPL_CONTENT = (
 28 |     """<?xml version="1.0" encoding="UTF-8"?>
 29 | <dspl xmlns="http://schemas.google.com/dspl/2010"
 30 |     xmlns:time="http://www.google.com/publicdata/dataset/google/time">
 31 |   <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
 32 |   <info>
 33 |     <name>
 34 |       <value>Dataset Name</value>
 35 |     </name>
 36 |   </info>
 37 |   <provider>
 38 |     <name>
 39 |       <value>Provider Name</value>
 40 |     </name>
 41 |   </provider>
 42 | </dspl>""")
 43 | 
 44 | 
 45 | _DSPL_CONTENT_BAD_CSV_PATH = (
 46 |     """<?xml version="1.0" encoding="UTF-8"?>
 47 | <dspl xmlns="http://schemas.google.com/dspl/2010"
 48 |     xmlns:time="http://www.google.com/publicdata/dataset/google/time">
 49 |   <import namespace="http://www.google.com/publicdata/dataset/google/time"/>
 50 |   <info>
 51 |     <name>
 52 |       <value>Dataset Name</value>
 53 |     </name>
 54 |   </info>
 55 |   <provider>
 56 |     <name>
 57 |       <value>Provider Name</value>
 58 |     </name>
 59 |   </provider>
 60 |   <tables>
 61 |     <table id="my_table">
 62 |       <column id="col1" type="string"/>
 63 |       <column id="col2" type="string"/>
 64 |       <data>
 65 |         <file format="csv" encoding="utf-8">non_existent_file.csv</file>
 66 |       </data>
 67 |     </table>
 68 |   </tables>
 69 | </dspl>""")
 70 | 
 71 | 
 72 | class DSPLCheckTests(unittest.TestCase):
 73 |   """Test case for dsplcheck module."""
 74 | 
 75 |   def setUp(self):
 76 |     self.input_dir = tempfile.mkdtemp()
 77 |     self.valid_dspl_file_path = (
 78 |         os.path.join(self.input_dir, 'valid_dataset.xml'))
 79 | 
 80 |     self.valid_dspl_file = open(
 81 |         self.valid_dspl_file_path, 'w')
 82 |     self.valid_dspl_file.write(_DSPL_CONTENT)
 83 |     self.valid_dspl_file.close()
 84 | 
 85 |   def tearDown(self):
 86 |     shutil.rmtree(self.input_dir)
 87 | 
 88 |   def testValidDataset(self):
 89 |     """Test basic case of dataset that validates and parses correctly."""
 90 |     self._StdoutTestHelper(
 91 |         dsplcheck.main, [self.valid_dspl_file_path],
 92 |         'validates successfully.*Parsing completed.*'
 93 |         'Checking DSPL model and data.*Completed')
 94 | 
 95 |   def testBadXMLFilePath(self):
 96 |     """Test case where bad XML file path is passed in."""
 97 |     self._StdoutTestHelper(
 98 |         dsplcheck.main, ['nonexistent_input_file.xml'],
 99 |         'Error opening XML file', expect_exit=True)
100 | 
101 |   def testBadCSVFilePath(self):
102 |     """Test case where DSPL file has bad CSV reference."""
103 |     bad_csv_dspl_file_path = (
104 |         os.path.join(self.input_dir, 'invalid_csv_dataset.xml'))
105 | 
106 |     bad_csv_dspl_file = open(bad_csv_dspl_file_path, 'w')
107 |     bad_csv_dspl_file.write(_DSPL_CONTENT_BAD_CSV_PATH)
108 |     bad_csv_dspl_file.close()
109 | 
110 |     self._StdoutTestHelper(
111 |         dsplcheck.main, [bad_csv_dspl_file_path],
112 |         'Error while trying to parse', expect_exit=True)
113 | 
114 |   def testSchemaOnlyOption(self):
115 |     """Test that 'schema only' checking level option works correctly."""
116 |     self._StdoutTestHelper(
117 |         dsplcheck.main, [self.valid_dspl_file_path, '-l', 'schema_only'],
118 |         'validates successfully\W*Completed')
119 | 
120 |   def testSchemaAndModelOption(self):
121 |     """Test that 'schema and model' checking level option works correctly."""
122 |     self._StdoutTestHelper(
123 |         dsplcheck.main, [self.valid_dspl_file_path, '-l', 'schema_and_model'],
124 |         'Checking DSPL model(?! and data)')
125 | 
126 |   def testZipInput(self):
127 |     """Test that module properly handles zipped input."""
128 |     zip_path = os.path.join(self.input_dir, 'dataset.zip')
129 | 
130 |     zip_file = zipfile.ZipFile(zip_path, 'w')
131 |     zip_file.write(self.valid_dspl_file_path)
132 |     zip_file.close()
133 | 
134 |     self._StdoutTestHelper(
135 |         dsplcheck.main, [zip_path],
136 |         'validates successfully.*Parsing completed.*'
137 |         'Checking DSPL model and data.*Completed')
138 | 
139 |   def testZipMissingXML(self):
140 |     """Test that zip file without an XML file produces error."""
141 |     zip_path = os.path.join(self.input_dir, 'dataset.zip')
142 | 
143 |     zip_file = zipfile.ZipFile(zip_path, 'w')
144 |     zip_file.writestr('test.txt', 'Text')
145 |     zip_file.close()
146 | 
147 |     self._StdoutTestHelper(
148 |         dsplcheck.main, [zip_path],
149 |         'does not have any XML', expect_exit=True)
150 | 
151 |   def testZipMultipleXMLFiles(self):
152 |     """Test that zip file with multiple XML files produces error."""
153 |     zip_path = os.path.join(self.input_dir, 'dataset.zip')
154 | 
155 |     zip_file = zipfile.ZipFile(zip_path, 'w')
156 |     zip_file.writestr('test.xml', 'Text')
157 |     zip_file.writestr('test2.xml', 'Text')
158 |     zip_file.close()
159 | 
160 |     self._StdoutTestHelper(
161 |         dsplcheck.main, [zip_path],
162 |         'multiple XML files', expect_exit=True)
163 | 
164 |   def _StdoutTestHelper(self, function, args,
165 |                         expected_output, expect_exit=False):
166 |     """Check the stdout output of a function against its expected value.
167 | 
168 |     Args:
169 |       function: A function to execute
170 |       args: The arguments to pass to the function
171 |       expected_output: A regular expression expected to match the stdout output
172 |       expect_exit: Boolean indicating whether the function execution should
173 |                    trigger a system exit
174 |     """
175 |     saved_stdout = sys.stdout
176 | 
177 |     redirected_output = StringIO.StringIO()
178 |     sys.stdout = redirected_output
179 | 
180 |     if expect_exit:
181 |       self.assertRaises(SystemExit, function, args)
182 |     else:
183 |       function(args)
184 | 
185 |     self.assertTrue(
186 |         re.search(expected_output, redirected_output.getvalue(), re.DOTALL))
187 | 
188 |     redirected_output.close()
189 |     sys.stdout = saved_stdout
190 | 
191 | 
192 | if __name__ == '__main__':
193 |   unittest.main()
194 | 


--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplgen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Generate a DSPL dataset from a tabular data source via the command-line."""
 10 | from __future__ import print_function
 11 | 
 12 | 
 13 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 14 | 
 15 | import optparse
 16 | import sys
 17 | import time
 18 | 
 19 | from dspllib.data_sources import csv_data_source
 20 | from dspllib.data_sources import csv_data_source_sqlite
 21 | from dspllib.data_sources import data_source_to_dspl
 22 | 
 23 | 
 24 | def LoadOptionsFromFlags(argv):
 25 |   """Parse command-line arguments.
 26 | 
 27 |   Args:
 28 |     argv: The program argument vector (excluding the script name)
 29 | 
 30 |   Returns:
 31 |     A dictionary with key-value pairs for each of the options
 32 |   """
 33 |   usage_string = 'python dsplgen.py [options] [csv file]'
 34 | 
 35 |   parser = optparse.OptionParser(usage=usage_string)
 36 |   parser.set_defaults(verbose=True)
 37 |   parser.add_option('-o', '--output_path', dest='output_path', default='',
 38 |                     help=('Path to a output directory '
 39 |                           '(default: current directory)'))
 40 |   parser.add_option('-q', '--quiet',
 41 |                     action='store_false', dest='verbose',
 42 |                     help='Quiet mode')
 43 |   parser.add_option('-t', '--data_type', dest='data_type', type='choice',
 44 |                     choices=['csv', 'csv_sqlite'], default='csv',
 45 |                     help='Type of data source to use (default: csv)')
 46 | 
 47 |   (options, args) = parser.parse_args(args=argv)
 48 | 
 49 |   if not len(args) == 1:
 50 |     parser.error('A data source (e.g., path to CSV file) is required')
 51 | 
 52 |   return {'data_type': options.data_type,
 53 |           'data_source': args[0],
 54 |           'output_path': options.output_path,
 55 |           'verbose': options.verbose}
 56 | 
 57 | 
 58 | def main(argv):
 59 |   """Parse command-line flags and run data source to DSPL conversion process.
 60 | 
 61 |   Args:
 62 |     argv: The program argument vector (excluding the script name)
 63 |   """
 64 |   start_time = time.time()
 65 |   options = LoadOptionsFromFlags(argv)
 66 | 
 67 |   # Connect to data source
 68 |   if options['data_type'] in ['csv', 'csv_sqlite']:
 69 |     try:
 70 |       csv_file = open(options['data_source'], 'r')
 71 |     except IOError as io_error:
 72 |       print('Error opening CSV file\n\n%s' % io_error)
 73 |       sys.exit(2)
 74 | 
 75 |     if options['data_type'] == 'csv':
 76 |       data_source_obj = csv_data_source.CSVDataSource(
 77 |           csv_file, options['verbose'])
 78 |     else:
 79 |       data_source_obj = csv_data_source_sqlite.CSVDataSourceSqlite(
 80 |           csv_file, options['verbose'])
 81 |   else:
 82 |     print('Error: Unknown data type: %s' % (options['data_type']))
 83 |     sys.exit(2)
 84 | 
 85 |   # Create DSPL dataset from data source
 86 |   dataset = data_source_to_dspl.PopulateDataset(
 87 |       data_source_obj, options['verbose'])
 88 |   data_source_obj.Close()
 89 | 
 90 |   if options['verbose']:
 91 |     print('Materializing dataset:')
 92 |     print(str(dataset))
 93 | 
 94 |   # Write DSPL dataset to disk
 95 |   dataset.Materialize(options['output_path'])
 96 | 
 97 |   if options['verbose']:
 98 |     print('Completed in %0.2f seconds' % (time.time() - start_time))
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |   main(sys.argv[1:])
103 | 


--------------------------------------------------------------------------------
/tools/dspltools/scripts/dsplgen_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python2
  2 | #
  3 | # Copyright 2018 Google LLC
  4 | #
  5 | # Use of this source code is governed by a BSD-style
  6 | # license that can be found in the LICENSE file or at
  7 | # https://developers.google.com/open-source/licenses/bsd
  8 | 
  9 | """Tests of dsplgen module."""
 10 | 
 11 | 
 12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
 13 | 
 14 | import os
 15 | import os.path
 16 | import re
 17 | import shutil
 18 | import StringIO
 19 | import sys
 20 | import tempfile
 21 | import unittest
 22 | 
 23 | import dsplcheck
 24 | import dsplgen
 25 | 
 26 | 
 27 | _TEST_CSV_CONTENT = (
 28 |     """date[type=date;format=yyyy-MM-dd],category1,category2[concept=geo:us_state;rollup=true],metric1[extends=quantity:ratio;slice_role=metric],metric2,metric3
 29 | 1980-01-01,red,california,89,321,71.21
 30 | 1981-01-01,red,california,99,231,391.2
 31 | 1982-01-01,blue,maine's,293,32,2.31
 32 | 1983-01-01,blue,california,293,12,10.3
 33 | 1984-01-01,red,maine's,932,48,10.78""")
 34 | 
 35 | 
 36 | class DSPLGenTests(unittest.TestCase):
 37 |   """Test cases for dsplgen module."""
 38 | 
 39 |   def setUp(self):
 40 |     self.input_dir = tempfile.mkdtemp()
 41 | 
 42 |     input_file = open(os.path.join(self.input_dir, 'input.csv'), 'w')
 43 |     input_file.write(_TEST_CSV_CONTENT)
 44 |     input_file.close()
 45 | 
 46 |     self.output_dir = tempfile.mkdtemp()
 47 | 
 48 |   def tearDown(self):
 49 |     shutil.rmtree(self.input_dir)
 50 |     shutil.rmtree(self.output_dir)
 51 | 
 52 |   def testDSPLGenEndToEnd(self):
 53 |     """A simple end-to-end test of the dsplgen application."""
 54 |     dsplgen.main(['-o', self.output_dir, '-q',
 55 |                   os.path.join(self.input_dir, 'input.csv')])
 56 | 
 57 |     self.assertTrue(
 58 |         os.path.isfile(os.path.join(self.output_dir, 'dataset.xml')))
 59 |     self.assertTrue(
 60 |         os.path.isfile(os.path.join(self.output_dir, 'category1_table.csv')))
 61 |     self.assertTrue(
 62 |         os.path.isfile(os.path.join(self.output_dir, 'slice_0_table.csv')))
 63 |     self.assertTrue(
 64 |         os.path.isfile(os.path.join(self.output_dir, 'slice_1_table.csv')))
 65 | 
 66 |     # Test that output validates against dsplcheck
 67 |     saved_stdout = sys.stdout
 68 | 
 69 |     redirected_output = StringIO.StringIO()
 70 |     sys.stdout = redirected_output
 71 | 
 72 |     dsplcheck.main([os.path.join(self.output_dir, 'dataset.xml')])
 73 | 
 74 |     self.assertTrue(
 75 |         re.search(
 76 |             'validates successfully.*Parsing completed.*'
 77 |             'No issues found.*Completed',
 78 |             redirected_output.getvalue(), re.DOTALL))
 79 | 
 80 |     redirected_output.close()
 81 | 
 82 |     sys.stdout = saved_stdout
 83 | 
 84 |   def testCSVNotFound(self):
 85 |     """Test case in which CSV can't be opened."""
 86 |     dsplgen.main(['-o', self.output_dir, '-q',
 87 |                   os.path.join(self.input_dir, 'input.csv')])
 88 | 
 89 |     saved_stdout = sys.stdout
 90 |     redirected_output = StringIO.StringIO()
 91 |     sys.stdout = redirected_output
 92 | 
 93 |     self.assertRaises(SystemExit,
 94 |                       dsplgen.main, ['-q', 'non_existent_input_file.csv'])
 95 |     self.assertTrue('Error opening CSV file' in redirected_output.getvalue())
 96 | 
 97 |     redirected_output.close()
 98 |     sys.stdout = saved_stdout
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |   unittest.main()
103 | 


--------------------------------------------------------------------------------
/tools/dspltools/scripts/run_all_tests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2
 2 | #
 3 | # Copyright 2018 Google LLC
 4 | #
 5 | # Use of this source code is governed by a BSD-style
 6 | # license that can be found in the LICENSE file or at
 7 | # https://developers.google.com/open-source/licenses/bsd
 8 | 
 9 | """Run all tests defined in the DSPL Tools code."""
10 | 
11 | 
12 | __author__ = 'Benjamin Yolken <yolken@google.com>'
13 | 
14 | import unittest
15 | 
16 | _TEST_MODULE_NAMES = [
17 |     'dsplcheck_test',
18 |     'dsplgen_test',
19 |     'dspllib.data_sources.csv_data_source_test',
20 |     'dspllib.data_sources.csv_data_source_sqlite_test',
21 |     'dspllib.data_sources.data_source_test',
22 |     'dspllib.data_sources.data_source_to_dspl_test',
23 |     'dspllib.model.dspl_model_loader_test',
24 |     'dspllib.model.dspl_model_test',
25 |     'dspllib.validation.dspl_validation_test',
26 |     'dspllib.validation.xml_validation_test']
27 | 
28 | 
29 | def main():
30 |   """Run all DSPL Tools tests and print the results to stderr."""
31 |   test_suite = unittest.TestSuite()
32 | 
33 |   for test_module_name in _TEST_MODULE_NAMES:
34 |     test_suite.addTests(
35 |         unittest.defaultTestLoader.loadTestsFromName(test_module_name))
36 | 
37 |   unittest.TextTestRunner().run(test_suite)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |   main()
42 | 


--------------------------------------------------------------------------------
/tools/dspltools/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2
 2 | #
 3 | # Copyright 2018 Google LLC
 4 | #
 5 | # Use of this source code is governed by a BSD-style
 6 | # license that can be found in the LICENSE file or at
 7 | # https://developers.google.com/open-source/licenses/bsd
 8 | 
 9 | """Setup script for the DSPLtools suite."""
10 | 
11 | import setuptools
12 | from distutils.core import setup
13 | 
14 | 
15 | setup(name='dspltools',
16 |       version='0.5.0',
17 |       description='Suite of command-line tools for generating DSPL datasets',
18 |       author='Public Statistics',
19 |       author_email='public-data-import-feedback@google.com',
20 |       url='http://github.com/google/dspl',
21 |       packages=['dspllib', 'dspllib.data_sources',
22 |                 'dspllib.model', 'dspllib.validation'],
23 |       package_dir={'dspllib': 'packages/dspllib'},
24 |       package_data={'dspllib.validation': ['schemas/*.xsd',
25 |                                            'test_dataset/*.csv',
26 |                                            'test_dataset/*.xml']},
27 |       scripts=['scripts/dsplcheck.py', 'scripts/dsplgen.py',
28 |                'scripts/run_all_tests.py'])
29 | 


--------------------------------------------------------------------------------