├── metatab
    ├── test
    │   ├── __init__.py
    │   ├── test-data
    │   │   ├── __init__.py
    │   │   ├── scripts
    │   │   │   ├── __init__.py
    │   │   │   ├── programsource.py
    │   │   │   ├── Py3Notebook.ipynb
    │   │   │   └── complex-text.txt
    │   │   ├── include3.csv
    │   │   ├── declare-only.csv
    │   │   ├── json
    │   │   │   ├── include3.json
    │   │   │   ├── include2.json
    │   │   │   ├── include1.json
    │   │   │   ├── datapackage_ex1.json
    │   │   │   ├── datapackage_ex1_web.json
    │   │   │   ├── issue1.json
    │   │   │   ├── children2.json
    │   │   │   ├── children.json
    │   │   │   ├── datapackage_ex2.json
    │   │   │   ├── example2.json
    │   │   │   ├── example1-web.json
    │   │   │   └── example1.json
    │   │   ├── include1.csv
    │   │   ├── include2.csv
    │   │   ├── line
    │   │   │   ├── line-oriented-doc-contacts.txt
    │   │   │   ├── line-oriented-doc-root.txt
    │   │   │   ├── line-oriented-doc-references-1.txt
    │   │   │   ├── line-oriented-doc-bib.txt
    │   │   │   ├── line-oriented-doc-references-2.txt
    │   │   │   └── line-oriented-doc.txt
    │   │   ├── short.csv
    │   │   ├── childpropertytype.csv
    │   │   ├── headers.csv
    │   │   ├── name.csv
    │   │   ├── name2.csv
    │   │   ├── nested.csv
    │   │   ├── errors
    │   │   │   ├── bad_include.csv
    │   │   │   ├── bad_declare.csv
    │   │   │   └── errors2.csv
    │   │   ├── children.csv
    │   │   ├── issue1.csv
    │   │   ├── children2.csv
    │   │   ├── census.csv
    │   │   ├── children3.csv
    │   │   ├── url_classes.csv
    │   │   ├── programsource.csv
    │   │   ├── resolve_urls.csv
    │   │   ├── packages
    │   │   │   └── example.com-test_package
    │   │   │   │   ├── metadata.csv
    │   │   │   │   └── notebooks
    │   │   │   │       └── Test_Notebook.ipynb
    │   │   ├── simple-text.txt
    │   │   ├── example2.csv
    │   │   ├── simple1.csv
    │   │   ├── resources.csv
    │   │   ├── short-declare.csv
    │   │   ├── datapackage_ex1.csv
    │   │   ├── datapackage_ex1_web.csv
    │   │   ├── geo.csv
    │   │   ├── datapackage_ex2.csv
    │   │   ├── schema.csv
    │   │   ├── yaml
    │   │   │   ├── yaml-example-1.csv
    │   │   │   └── yaml-example-1.yaml
    │   │   ├── notebooks
    │   │   │   ├── ImportTest.ipynb
    │   │   │   ├── CellExecuteError.ipynb
    │   │   │   └── SimpleMagicsTest.ipynb
    │   │   ├── example1-web.csv
    │   │   ├── example1.csv
    │   │   ├── example1-headers.csv
    │   │   ├── example1.txt
    │   │   ├── properties.csv
    │   │   ├── almost-everything.csv
    │   │   └── civicknowledge.com-rcfe_affordability-2015.csv
    │   ├── Dockerfile
    │   ├── core.py
    │   ├── Makefile
    │   ├── outputs
    │   │   ├── datapackage.json
    │   │   └── metadata.json
    │   └── test_doc.py
    ├── templates
    │   ├── __init__.py
    │   ├── datapackage.csv
    │   └── metatab.csv
    ├── __init__.py
    ├── exc.py
    ├── resolver.py
    ├── datapackage.py
    ├── appurl.py
    ├── rowgen.py
    ├── util.py
    └── cli.py
├── requirements.txt
├── MANIFEST.in
├── pyproject.toml
├── .travis.yml
├── docker
    ├── Dockerfile
    └── Makefile
├── examples
    ├── pandas-reporter.py
    └── Pandas Reporter Example.ipynb
├── develop.sh
├── Makefile
├── LICENSE
├── .gitignore
├── setup.py
├── docs
    ├── Census.rst
    ├── GeneratingRowsWithPrograms.rst
    ├── PrivateDatasets.rst
    └── Wrangling packages.rst
└── README.rst


/metatab/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/metatab/templates/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/metatab/test/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM n42org/tox
2 | 
3 | WORKDIR /code


--------------------------------------------------------------------------------
/metatab/test/test-data/include3.csv:
--------------------------------------------------------------------------------
1 | "Note","Include File 3"
2 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/declare-only.csv:
--------------------------------------------------------------------------------
1 | "Declare","metadata.csv",,,


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | metatabdecl
2 | rowgenerators>=0.7.0
3 | tabulate


--------------------------------------------------------------------------------
/metatab/test/test-data/scripts/programsource.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include README.rst
3 | include LICENSE
4 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/include3.json:
--------------------------------------------------------------------------------
1 | {
2 |     "note": "Include File 3"
3 | }


--------------------------------------------------------------------------------
/metatab/test/test-data/include1.csv:
--------------------------------------------------------------------------------
1 | Note,Include File 1
2 | Include,include2.csv
3 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta:__legacy__"
4 | 
5 | [tool.setuptools_scm]


--------------------------------------------------------------------------------
/metatab/test/test-data/include2.csv:
--------------------------------------------------------------------------------
1 | "Note","Include File 2"
2 | "Include","https://raw.githubusercontent.com/CivicKnowledge/structured_tables/master/test/data/include3.csv"
3 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/line/line-oriented-doc-contacts.txt:
--------------------------------------------------------------------------------
1 | Section: Contacts
2 | Wrangler: Eric Busboom
3 | Wrangler.Email: eric@civicknowledge.com
4 | Wrangler.Organization: Civic Knowledge
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 | install:
 5 |   - pip install -r requirements.txt
 6 | script: python setup.py test
 7 | branches:
 8 |   only:
 9 |     - master
10 | 
11 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/short.csv:
--------------------------------------------------------------------------------
1 | "Declare","short-declare.csv"
2 | "include","include3.csv"
3 | "Title","Title1"
4 | ".Language","en"
5 | "Section","Section1"
6 | "Title","Title2"
7 | "Include","include3.csv"
8 | "Title","Title3"
9 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/childpropertytype.csv:
--------------------------------------------------------------------------------
1 | ,,
2 | "ChildPropertyType","Parent.Child","scalar"
3 | ,,
4 | "Parent","parent",
5 | "Parent.Child","child1",
6 | "Parent.Child","child2",
7 | "Parent.Child","child3",
8 | "Parent.Child","child4",
9 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/include2.json:
--------------------------------------------------------------------------------
1 | {
2 |     "note": [
3 |         "Include File 2",
4 |         "Include File 3"
5 |     ],
6 |     "include": "https://raw.githubusercontent.com/CivicKnowledge/structured_tables/master/test/data/include3.csv"
7 | }


--------------------------------------------------------------------------------
/metatab/test/test-data/line/line-oriented-doc-root.txt:
--------------------------------------------------------------------------------
1 | Identifier: 47bc1089-7584-41f0-b804-602ec42f1249
2 | Origin: civicknowledge.com
3 | Dataset: rcfe_affordability
4 | Version: 4
5 | Time: 2015
6 | Name: civicknowledge.com-rcfe_affordability-2015-4
7 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/headers.csv:
--------------------------------------------------------------------------------
 1 | "Section ","One",,
 2 | "Header","A","B","C"
 3 | "one",1,2,3
 4 | "two",4,5,6
 5 | "three",7,8,9
 6 | ,,,
 7 | "Section ","One",,
 8 | "Header","D","E","F"
 9 | "one",10,11,12
10 | "two",13,14,15
11 | "three",16,17,18
12 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/name.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest"
 2 | "Title","Registered Voters, By County"
 3 | "Name","this_name_should_be_replaced"
 4 | "Dataset","FooBar"
 5 | "Version",1
 6 | "Origin","example.com"
 7 | "Time",2017
 8 | "Space","CA"
 9 | "Grain","people"
10 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/name2.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest"
 2 | "Title","Registered Voters, By County"
 3 | "Name","this_name_should_be_replaced"
 4 | "Dataset","FooBar"
 5 | "Version",1
 6 | "Origin","example.com"
 7 | "Time",2017
 8 | "Space","CA"
 9 | "Grain","people"
10 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/nested.csv:
--------------------------------------------------------------------------------
 1 | "Section","Nesting",
 2 | "A",1,
 3 | ".B",2,
 4 | ".B",3,
 5 | "X",4,
 6 | ".Y",5,
 7 | ".Y",6,
 8 | ,,
 9 | "Section","More Nesting","Alt"
10 | "A",1,"Alt"
11 | ".B",2,"b"
12 | ".B",3,"c"
13 | "X",4,"d"
14 | ".Y",5,"e"
15 | ".Y",6,"f"
16 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/line/line-oriented-doc-references-1.txt:
--------------------------------------------------------------------------------
1 | 
2 | Section: References
3 | 
4 | Reference: censusreporter:B09020/140/05000US06073
5 | Reference.Name: B09020
6 | Reference.Description: Relationship by Household Type (Including Living Alone) for Population 65 Years and Over
7 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/errors/bad_include.csv:
--------------------------------------------------------------------------------
1 | "Include","doesntexist.csv"
2 | "Title","Registered Voters, By County"
3 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections."
4 | "Identifier","cdph.ca.gov-hci-registered_voters-county"
5 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/errors/bad_declare.csv:
--------------------------------------------------------------------------------
1 | "Declare","http://example.com/doesntexist.csv"
2 | "Title","Registered Voters, By County"
3 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections."
4 | "Identifier","cdph.ca.gov-hci-registered_voters-county"
5 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | FROM python:3.6.1-alpine
 3 | 
 4 | MAINTAINER Eric Busboom "eric@civicknowledge.com"
 5 | 
 6 | VOLUME /opt/metatab
 7 | 
 8 | RUN apk add --update --no-cache g++ gcc python-dev py-lxml libxslt-dev==1.1.29-r0 bash git
 9 | 
10 | RUN pip install https://github.com/CivicKnowledge/metatab-py/archive/master.zip # 9
11 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/include1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "note": [
 3 |         "Include File 1",
 4 |         "Include File 2",
 5 |         "Include File 3"
 6 |     ],
 7 |     "include": [
 8 |         "include2.csv",
 9 |         "https://raw.githubusercontent.com/CivicKnowledge/structured_tables/master/test/data/include3.csv"
10 |     ]
11 | }


--------------------------------------------------------------------------------
/examples/pandas-reporter.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | import numpy as np
 4 | import pandasreporter as pr
 5 | 
 6 | b17001 = pr.get_dataframe('B17001', '140',  '05000US06073', cache=True)
 7 | b17024 = pr.get_dataframe('B17024', '140',  '05000US06073', cache=True)
 8 | b17017 = pr.get_dataframe('B17017', '140',  '05000US06073', cache=True)
 9 | 
10 | print df.head(2)


--------------------------------------------------------------------------------
/metatab/test/core.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017 Civic Knowledge. This file is licensed under the terms of the
 2 | # Revised BSD License, included in this distribution as LICENSE
 3 | 
 4 | """
 5 | 
 6 | """
 7 | 
 8 | 
 9 | def test_data(*paths):
10 |     from os.path import dirname, join, abspath
11 | 
12 |     return abspath(join(dirname(abspath(__file__)), 'test-data', *paths))


--------------------------------------------------------------------------------
/metatab/test/test-data/children.csv:
--------------------------------------------------------------------------------
 1 | "NOte","This is a note",,
 2 | ,,,
 3 | "Section","Arguments","prop1","prop2"
 4 | "Parent","parent","prop1","prop2"
 5 | ,,,
 6 | "Section","ExplicitChildren",,
 7 | "Parent","parent",,
 8 | "Parent.Prop1","prop1",,
 9 | "Parent.Prop2","prop2",,
10 | ,,,
11 | "Section","ElidedChildren",,
12 | "Parent","parent",,
13 | ".Prop1","prop1",,
14 | ".Prop2","prop2",,
15 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/datapackage_ex1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "declare": "datapackage-latest.csv",
3 |     "title": "Registered Voters, By County",
4 |     "description": "Percent of the eligible population registered to vote and the percent who voted in statewide elections.",
5 |     "name": "cdph.ca.gov-hci-registered_voters-county",
6 |     "version": "1.3.4",
7 |     "section": [
8 |         {
9 |             "section": 


--------------------------------------------------------------------------------
/metatab/templates/datapackage.csv:
--------------------------------------------------------------------------------
1 | "# ","Declarations for producing package.json files",,,,,
2 | ,,,,,,
3 | "Section","DeclaredTerms","TermValueName","ChildPropertyType","Section","Synonym","ValueSet"
4 | "DeclareTerm","resources","url",,,,
5 | "DeclareTerm","resource",,,,"resources",
6 | "DeclareTerm","schema",,,,"resources.schema",
7 | "DeclareTerm","field","name",,,"schema.fields",
8 | "DeclareTerm","schema.fields","name",,,,
9 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/issue1.csv:
--------------------------------------------------------------------------------
1 | ,,,,
2 | "Section","Resources","table","Grain","Title"
3 | "Documentation","https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf","Indicator Documentation for Voter Registration / Participation",,
4 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",,,
5 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/datapackage_ex1_web.json:
--------------------------------------------------------------------------------
1 | {
2 |     "declare": "http://assets.metatab.org/datapackage.csv",
3 |     "title": "Registered Voters, By County",
4 |     "description": "Percent of the eligible population registered to vote and the percent who voted in statewide elections.",
5 |     "name": "cdph.ca.gov-hci-registered_voters-county",
6 |     "version": "1.3.4",
7 |     "section": [
8 |         {
9 |             "section": 


--------------------------------------------------------------------------------
/metatab/test/test-data/children2.csv:
--------------------------------------------------------------------------------
 1 | "# Like children.csv, but with different values for debugging. ",,,
 2 | ,,,
 3 | "Section","Arguments","prop1","prop2"
 4 | "Parent","parent","prop11","prop12"
 5 | ,,,
 6 | "Section","ExplicitChildren",,
 7 | "Parent","parent",,
 8 | "Parent.Prop1","prop21",,
 9 | "Parent.Prop2","prop22",,
10 | ,,,
11 | "Section","ElidedChildren",,
12 | "Parent","parent",,
13 | ".Prop1","prop31",,
14 | ".Prop2","prop32",,
15 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/census.csv:
--------------------------------------------------------------------------------
 1 | Section,DeclaredSections,,,,
 2 | DeclareSection,Section,Schema,title,column_ref,indent
 3 | ,,,,,
 4 | ,,,,,
 5 | Section,DeclaredTerms,,,,
 6 | Header,Term,TermValueName,ChildPropertyType,Section,
 7 | DeclareTerm,Table,Name,,Schema,
 8 | DeclareTerm,Table.Universe,,,Root,
 9 | DeclareTerm,Table.Segment,,,Root,
10 | DeclareTerm,Table.Topics,,,,
11 | DeclareTerm,Table.Subject,,,,
12 | DeclareTerm,Table.Column,Name,,,
13 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/issue1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "documentation": {
3 |         "table": "Indicator Documentation for Voter Registration / Participation",
4 |         "description": "Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",
5 |         "@value": "https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf"
6 |     }
7 | }


--------------------------------------------------------------------------------
/metatab/test/test-data/children3.csv:
--------------------------------------------------------------------------------
 1 | ,,,
 2 | ,,,
 3 | ,,,
 4 | "Section","Arguments","child1","child2"
 5 | "Parent","parent","child1","child2"
 6 | ,,,
 7 | "Section","ExplicitChildren",,
 8 | "Parent","parent",,
 9 | "Parent.Child1","child1",,
10 | "Parent.Child2","child2",,
11 | ,,,
12 | "Section","ElidedChildren",,
13 | "Parent","parent",,
14 | ".Child1","child1",,
15 | ".Child2","child2",,
16 | "Child1.grand1","grand1",,
17 | "Child2.grand2","grand2",,
18 | "Grand1.Great1","great1",,
19 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/children2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "parent": [
 3 |         {
 4 |             "prop1": "prop11",
 5 |             "prop2": "prop12",
 6 |             "@value": "parent"
 7 |         },
 8 |         {
 9 |             "prop1": "prop21",
10 |             "prop2": "prop22",
11 |             "@value": "parent"
12 |         },
13 |         {
14 |             "prop1": "prop31",
15 |             "prop2": "prop32",
16 |             "@value": "parent"
17 |         }
18 |     ]
19 | }


--------------------------------------------------------------------------------
/metatab/templates/metatab.csv:
--------------------------------------------------------------------------------
 1 | Declare,metatab-latest,,,
 2 | Title,,,,
 3 | Description,,,,
 4 | Identifier,,,,
 5 | Name,,,,
 6 | Dataset,,,,
 7 | Origin,,,,
 8 | Space,,,,
 9 | Time,,,,
10 | Grain,,,,
11 | Variant,,,,
12 | Version,1,,,
13 | ,,,,
14 | Section,References,Name,Description,
15 | ,,,,
16 | Section,Resources,Name,Description,
17 | ,,,,
18 | Section ,Documentation,Title,Description,
19 | ,,,,
20 | Section,Contacts,Email,Organization,Url
21 | ,,,,
22 | Section,Schema,DataType,AltName,Description
23 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/children.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "note": "This is a note",
 3 |     "parent": [
 4 |         {
 5 |             "prop1": "prop1",
 6 |             "prop2": "prop2",
 7 |             "@value": "parent"
 8 |         },
 9 |         {
10 |             "prop1": "prop1",
11 |             "prop2": "prop2",
12 |             "@value": "parent"
13 |         },
14 |         {
15 |             "prop1": "prop1",
16 |             "prop2": "prop2",
17 |             "@value": "parent"
18 |         }
19 |     ]
20 | }


--------------------------------------------------------------------------------
/metatab/test/test-data/url_classes.csv:
--------------------------------------------------------------------------------
1 | in_url,download_file,download_format,download_url,encoding,file_segment,is_archive,proto,target_file,target_format,url
2 | http://example.com/simple-example-altnames.csv,simple-example-altnames.csv,csv,http://example.com/simple-example-altnames.csv,,,False,http,simple-example-altnames.csv,csv,http://example.com/simple-example-altnames.csv
3 | http://example.com/test_data.zip,test_data.zip,zip,http://example.com/test_data.zip,,,True,http,test_data.zip,zip,http://example.com/test_data.zip
4 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/line/line-oriented-doc-bib.txt:
--------------------------------------------------------------------------------
 1 | Section: Bibliography
 2 | Citation: ipums
 3 | Citation.Type: dataset
 4 | Citation.Author: Steven Ruggles; Katie Genadek; Ronald Goeken; Josiah Grover; Matthew Sobek
 5 | Citation.Title: Integrated Public Use Microdata Series
 6 | Citation.Year: 2017
 7 | Citation.Publisher: University of Minnesota
 8 | Citation.Version: 7.0
 9 | Citation.AccessDate: 20170718
10 | Citation.Url: https://usa.ipums.org/usa/index.shtml
11 | Citation.Doi: https://doi.org/10.18128/D010.V7.0
12 | 
13 | 


--------------------------------------------------------------------------------
/develop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash 
2 | git clone https://github.com/CivicKnowledge/rowpipe.git && (cd rowpipe && python setup.py develop)
3 | git clone https://github.com/CivicKnowledge/tableintuit.git && (cd tableintuit && python setup.py develop)
4 | git clone https://github.com/CivicKnowledge/rowgenerators.git && (cd rowgenerators && python setup.py develop)
5 | git clone https://github.com/CivicKnowledge/pandas-reporter.git && (cd pandas-reporter && python setup.py develop)
6 | git clone https://github.com/CivicKnowledge/metatab-py.git; (cd metatab-py && python setup.py develop)
7 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/programsource.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest",,,
 2 | "Title","Program Source Text",,,
 3 | "Description","Test using a program to generate the data",,,
 4 | "Identifier","6e5cc47a-b712-4868-afc1-76a5797d1e98",,,
 5 | "Name","program_source-1",,,
 6 | "Name.Origin",,,,
 7 | "Name.Space",,,,
 8 | "Name.Time",,,,
 9 | "Name.Dataset","program-source",,,
10 | "Name.Version",1,,,
11 | "Name.Grain",,,,
12 | ,,,,
13 | "Section","Resources","Name","VarName","GeoType"
14 | "Datafile","program:scripts/dumpvar.py","Obesity","OBESEA","ZCTA"
15 | ,,,,
16 | ,,,,
17 | "Section","Schema","DataType","AltName","Description"
18 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/resolve_urls.csv:
--------------------------------------------------------------------------------
 1 | "doc","base_url","resource_url","url"
 2 | "example1.csv",,"c/d.csv","file:<base>/c/d.csv"
 3 | "example1.csv","http://example/a/b","c/d.csv","http://example/a/c/d.csv"
 4 | "example1.csv",,"program:c/d.csv","program+file:<base>/c/d.csv"
 5 | "example1.csv","http://example/a/b","program:c/d.csv","program+http://example/a/c/d.csv"
 6 | "example1.csv",,"/c/d.csv","file:/c/d.csv"
 7 | "example1.csv","http://example/a/b","/c/d.csv","http://example/c/d.csv"
 8 | "example1.csv",,"program:/c/d.csv","program+file:/c/d.csv"
 9 | "example1.csv","http://example/a/b","program:/c/d.csv","program+http://example/c/d.csv"
10 | 


--------------------------------------------------------------------------------
/metatab/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016 Civic Knowledge. This file is licensed under the terms of the
 2 | # Revised BSD License, included in this distribution as LICENSE
 3 | """
 4 | Record objects for the Simple Data Package format.
 5 | """
 6 | 
 7 | # default metadata file
 8 | DEFAULT_METATAB_FILE = 'metadata.csv'
 9 | LINES_METATAB_FILE = 'metadata.txt'
10 | IPYNB_METATAB_FILE = 'metadata.ipynb'
11 | 
12 | from .parser import *
13 | from .exc import *
14 | from .doc import MetatabDoc
15 | from .resolver import WebResolver
16 | 
17 | from pkg_resources import get_distribution, DistributionNotFound
18 | try:
19 |     __version__ = get_distribution(__name__).version
20 | except DistributionNotFound:
21 |     # package is not installed
22 |     pass
23 | 
24 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/packages/example.com-test_package/metadata.csv:
--------------------------------------------------------------------------------
 1 | Declare,metatab-latest
 2 | Title,Test Package
 3 | Description,Package for Testing
 4 | Identifier,
 5 | Identifier,36c7e945-943c-435e-923c-1af21d831b3b
 6 | Name,example.com-test_package-1
 7 | Dataset,test_package
 8 | Origin,example.com
 9 | Time,
10 | Space,
11 | Grain,
12 | Version,1
13 | Created,2017-08-03T21:15:56
14 | Modified,2017-08-03T21:15:56
15 | Modified,2017-08-03T21:16:42
16 | Giturl,https://github.com/CivicKnowledge/metatab-py.git
17 | 
18 | Section,Resources,Name,Description,
19 | Datafile,http://example.com/data.csv,,,
20 | 
21 | Section,Documentation,Title,Description,
22 | Note,,,,
23 | 
24 | Section,Contacts,Email,Organization,Url
25 | 
26 | Section,Schema,DataType,AltName,Description
27 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/datapackage_ex2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "declare": "datapackage-latest",
 3 |     "title": "Country, Regional and World GDP (Gross Domestic Product)",
 4 |     "description": "Country, regional and world GDP in current US Dollars ($). Regional means collections of countries e.g. Europe & Central Asia. Data is sourced from the World Bank and turned into a standard normalized CSV.",
 5 |     "name": "gdp",
 6 |     "version": "2011",
 7 |     "license": "PDDL-1.0",
 8 |     "keyword": [
 9 |         "GDP",
10 |         "World",
11 |         "Gross Domestic Product",
12 |         "Time series"
13 |     ],
14 |     "image": "http://assets.okfn.org/p/opendatahandbook/img/data-wrench.png",
15 |     "last-updated": "2011-09-21",
16 |     "section": [
17 |         {
18 |             "section": 


--------------------------------------------------------------------------------
/metatab/test/test-data/simple-text.txt:
--------------------------------------------------------------------------------
 1 | Declare: metatab-latest
 2 | Title: Registered Voters, By County
 3 | Description: An Example Whatever.
 4 | Origin: example.com
 5 | Dataset: foobar.com
 6 | 
 7 | Section: Contacts
 8 | Wrangler: Eric Busboom
 9 | Wrangler.Email: eric@civicknowledge.com
10 | 
11 | Section: Resources
12 | Datafile: http://public.source.civicknowledge.com/example.com/sources/renter_cost.csv
13 | Datafile.Name: resource
14 | Datafile.Title: The First Example Data File
15 | Datafile.Startline: 5
16 | Datafile.HeaderLines: 3,4
17 | 
18 | Section: References
19 | Reference: http://public.source.civicknowledge.com/example.com/sources/renter_cost.csv
20 | Reference.Name: reference
21 | Reference.Title: The First Example Data File
22 | Reference.Startline: 5
23 | Reference.HeaderLines: 3,4
24 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/example2.csv:
--------------------------------------------------------------------------------
 1 | "Term","value",
 2 | "Title","Registered Voters, By County",
 3 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",
 4 | "Identifier","cdph.ca.gov-hci-registered_voters-county",
 5 | "Version",201404,
 6 | ,,
 7 | "Section","documentation","title"
 8 | "Homepage","https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx","Healthy Communities Data and Indicators Project (HCI)"
 9 | "Documentation","https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf","Indicator Documentation for Voter Registration / Participation"
10 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",
11 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/simple1.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest",
 2 | "Title","Registered Voters, By County",
 3 | "Name","cdph.ca.gov-hci-registered_voters-county",
 4 | ,,
 5 | "Section","Resources","Name"
 6 | "Datafile","http://example.com/example1.csv","namea"
 7 | "Datafile","http://example.com/example2.csv","nameb"
 8 | "Homepage","https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx","namec"
 9 | "Documentation","https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf","named"
10 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections","namee"
11 | ,,
12 | ,,
13 | "Section","Schema","datatype"
14 | "Table","registered_voters",
15 | "Table.Column","reportyear","int"
16 | "Table.Column","type","str"
17 | 


--------------------------------------------------------------------------------
/metatab/test/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | NS = civicknowledge.com
 3 | VERSION = latest
 4 | 
 5 | REPO = tox
 6 | NAME = tox
 7 | INSTANCE = default
 8 | DOCKER ?= docker
 9 | 
10 | .PHONY: test build push shell run start stop restart reload rm rmf release
11 | CWD = $(notdir $(shell pwd))
12 | 
13 | VOLUMES=-v $(abspath $(CWD)/../../..):/code
14 | 
15 | test:
16 | 	$(DOCKER) run --rm --name $(NAME) $(PORTS) $(VOLUMES) $(ENV) $(NS)/$(REPO):$(VERSION) tox
17 | 
18 | build:
19 | 	$(DOCKER) build -t $(NS)/$(REPO):$(VERSION) .
20 | 
21 | push:
22 | 	$(DOCKER) push $(NS)/$(REPO):$(VERSION)
23 | 
24 | shell:
25 | 	$(DOCKER) run --rm -i -t $(PORTS) $(VOLUMES) $(ENV) $(NS)/$(REPO):$(VERSION) /bin/bash
26 | 
27 | logs:
28 | 	$(DOCKER) logs -f $(NAME) 
29 | 
30 | rmf:
31 | 	$(DOCKER) rm -f $(NAME)
32 | 
33 | rm:
34 | 	$(DOCKER) rm $(NAME)
35 | 
36 | release: build
37 | 	make push -e VERSION=$(VERSION)
38 | 
39 | default: test


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: default install reset check test tox readme docs publish clean
 2 | 
 3 | MAKE := $(MAKE) --no-print-directory
 4 | 	
 5 | test:
 6 | 	python setup.py test
 7 | 	
 8 | develop: 
 9 | 	python setup.py develop 
10 | 	
11 | publish: 
12 | 	$(MAKE) clean
13 | 	python setup.py sdist 
14 | 	twine upload dist/*
15 | 	$(MAKE) clean
16 | 	
17 | clean:
18 | 	@rm -Rf *.egg .cache .coverage .tox build dist docs/build htmlcov
19 | 	@find -depth -type d -name __pycache__ -exec rm -Rf {} \;
20 | 	@find -type f -name '*.pyc' -delete
21 | test:
22 | 	python setup.py test
23 | 	
24 | develop: 
25 | 	python setup.py develop 
26 | 	
27 | publish: 
28 | 	git push --tags origin
29 | 	$(MAKE) clean
30 | 	python setup.py sdist 
31 | 	twine upload dist/*
32 | 	$(MAKE) clean
33 | 	
34 | clean:
35 | 	@rm -Rf *.egg .cache .coverage .tox build dist docs/build htmlcov
36 | 	#@find . -type d -name __pycache__ -exec rm -Rf {} \;
37 | 	#@find . -type f -name '*.pyc' -delete


--------------------------------------------------------------------------------
/metatab/test/test-data/json/example2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "term": "value",
 3 |     "title": "Registered Voters, By County",
 4 |     "description": "Percent of the eligible population registered to vote and the percent who voted in statewide elections.",
 5 |     "identifier": "cdph.ca.gov-hci-registered_voters-county",
 6 |     "version": "201404",
 7 |     "homepage": {
 8 |         "title": "Healthy Communities Data and Indicators Project (HCI)",
 9 |         "@value": "https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx"
10 |     },
11 |     "documentation": {
12 |         "title": "Indicator Documentation for Voter Registration / Participation",
13 |         "description": "Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",
14 |         "@value": "https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf"
15 |     }
16 | }


--------------------------------------------------------------------------------
/metatab/exc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016 Civic Knowledge. This file is licensed under the terms of the
 2 | # Revised BSD License, included in this distribution as LICENSE
 3 | 
 4 | """
 5 | 
 6 | """
 7 | 
 8 | 
 9 | class MetatabError(Exception):
10 |     pass
11 | 
12 | 
13 | 
14 | 
15 | class ReferenceError(MetatabError):
16 |     pass
17 | 
18 | 
19 | class ParserError(MetatabError):
20 |     def __init__(self, *args, **kwargs):
21 |         super(ParserError, self).__init__(*args, **kwargs)
22 |         self.term = kwargs.get('term', None)
23 | 
24 | 
25 | class IncludeError(MetatabError):
26 |     def __init__(self, *args, **kwargs):
27 |         self.message = ''
28 |         super(IncludeError, self).__init__(*args, **kwargs)
29 | 
30 | 
31 | class DeclarationError(ParserError):
32 |     pass
33 | 
34 | 
35 | class GenerateError(MetatabError):
36 |     pass
37 | 
38 | 
39 | class ConversionError(MetatabError):
40 |     pass
41 | 
42 | class FormatError(MetatabError):
43 |     pass
44 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/resources.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest",,,,
 2 | "Title","Resource test",,,,
 3 | "Name","resource-test",,,,
 4 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",,,,
 5 | "Identifier","cdph.ca.gov-hci-registered_voters-county",,,,
 6 | ,,,,,
 7 | "Section","Resources",,,,
 8 | "Header","url","name",,,"Title"
 9 | "Datafile","http://example.com/example1.csv","example1",,,"The First Example Data File"
10 | "Datafile","http://example.com/example3.csv","example2",,,"The Second Example Data File"
11 | "Reference","http://example.com/example3.csv","example3",,,
12 | "Reference","http://example.com/example4.csv","example4",,,
13 | "Documentation","http://example.com/example5.csv","example5",,,
14 | "Documentation","http://example.com/example6.csv","example6",,,
15 | "Homepage","http://example.com/example7.csv","example7",,,
16 | "Homepage","http://example.com/example8.csv","example8",,,
17 | "Citation","example9",,,,
18 | "Citation","example10",,,,
19 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/short-declare.csv:
--------------------------------------------------------------------------------
 1 | "Section","DeclaredSections",,,,
 2 | "DeclareSection","DeclaredSections","Arg0","Arg1","Arg2",
 3 | "DeclareSection","Root",,,,
 4 | "DeclareSection","DeclaredTerms","TermValueName","ChildPropertyType","Section",
 5 | "DeclareSection","Resources","Table","Grain","Title",
 6 | "DeclareSection","Contacts","Email",,,
 7 | "DeclareSection","Schemas","DataType","ValueType","Description",
 8 | ,,,,,
 9 | "Section","DeclaredTerms","TermValueName","ChildPropertyType","Section","InheritsFrom"
10 | "DeclareTerm","DeclareTerm","Term",,"DeclaredTerms",
11 | "DeclareTerm","Declare",,,"Root",
12 | "DeclareTerm","Include",,,"Root",
13 | "DeclareTerm","Section","Name","sequence","Root",
14 | "DeclareTerm","DeclareSection","Section","sequence","DeclaredSections",
15 | ,,,,,
16 | "Section ","DeclaredTerms","TermValueName","InheritsFrom","Section",
17 | "# Top Level Dataset Terms",,,,,
18 | "DeclareTerm","Root.Title",,,"Root",
19 | "DeclareTerm","Title.Language",,,"Root",
20 | "DeclareTerm","Root.Summary",,"Root.Title","Root",
21 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/datapackage_ex1.csv:
--------------------------------------------------------------------------------
 1 | "Declare","datapackage-latest.csv",,
 2 | "title","Registered Voters, By County",,
 3 | "description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",,
 4 | "name","cdph.ca.gov-hci-registered_voters-county",,
 5 | "version","1.3.4",,
 6 | ,,,
 7 | ,,,
 8 | "Section","Resources","type","description"
 9 | "resource","http://example.com/resource1.csv",,
10 | ".title","First Resource",,
11 | ".name","the-first-resource",,
12 | ".mediatype","text/csv",,
13 | ".format","csv",,
14 | ,,,
15 | "schema",,,
16 | "field","id","string","description"
17 | "field","state","string","description"
18 | "field","income","string","description"
19 | ,,,
20 | "resource","http://example.com/resource2.csv",,
21 | ".title","Second Resource",,
22 | ".name","the-second-resource",,
23 | ".mediatype","text/csv",,
24 | ".format","csv",,
25 | ,,,
26 | "schema",,,
27 | "field","id","string","description"
28 | "field","country","string","description"
29 | "field","gdp","string","description"
30 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/scripts/Py3Notebook.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from string import ascii_uppercase\n",
10 |     "\n",
11 |     "lst = [ascii_uppercase[:11] ] + [ list(range(10))+ [mult(i)] for i in range(10)]"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": null,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     ""
21 |    ]
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "Python 3",
27 |    "language": "python",
28 |    "name": "python3"
29 |   },
30 |   "language_info": {
31 |    "codemirror_mode": {
32 |     "name": "ipython",
33 |     "version": 3.0
34 |    },
35 |    "file_extension": ".py",
36 |    "mimetype": "text/x-python",
37 |    "name": "python",
38 |    "nbconvert_exporter": "python",
39 |    "pygments_lexer": "ipython3",
40 |    "version": "3.5.0"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 2
45 | }


--------------------------------------------------------------------------------
/metatab/test/test-data/datapackage_ex1_web.csv:
--------------------------------------------------------------------------------
 1 | "Declare","http://assets.metatab.org/datapackage.csv",,
 2 | "title","Registered Voters, By County",,
 3 | "description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",,
 4 | "name","cdph.ca.gov-hci-registered_voters-county",,
 5 | "version","1.3.4",,
 6 | ,,,
 7 | ,,,
 8 | "Section","Resources","type","description"
 9 | "resource","http://example.com/resource1.csv",,
10 | ".title","First Resource",,
11 | ".name","the-first-resource",,
12 | ".mediatype","text/csv",,
13 | ".format","csv",,
14 | ,,,
15 | "schema",,,
16 | "field","country","string","description"
17 | "field","country","string","description"
18 | "field","country","string","description"
19 | ,,,
20 | "resource","http://example.com/resource2.csv",,
21 | ".title","Second Resource",,
22 | ".name","the-second-resource",,
23 | ".mediatype","text/csv",,
24 | ".format","csv",,
25 | ,,,
26 | "schema",,,
27 | "field","country","string","description"
28 | "field","country","string","description"
29 | "field","country","string","description"
30 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/geo.csv:
--------------------------------------------------------------------------------
 1 | Declare,metatab-latest
 2 | Title,US States
 3 | Description,US States
 4 | Identifier,11585edd-20f4-4b15-a0da-9b5197b5ecc5
 5 | Name,us_states-1
 6 | Name.Time,
 7 | Name.Version,1
 8 | Name.Dataset,us-states
 9 | Name.Origin,
10 | Name.Grain,
11 | Name.Space,
12 | 
13 | Section,Resources,Name,Description,
14 | Datafile,shape+http://s3.amazonaws.com/test.library.civicknowledge.com/census/tl_2016_us_state.geojson.zip,us_states,,
15 | 
16 | Section,Schema,DataType,AltName,Description
17 | Table,us_states,,,
18 | Table.Column,id,integer,,
19 | Table.Column,REGION,integer,region,
20 | Table.Column,DIVISION,integer,division,
21 | Table.Column,STATEFP,integer,statefp,
22 | Table.Column,STATENS,integer,statens,
23 | Table.Column,GEOID,integer,geoid,
24 | Table.Column,STUSPS,text,stusps,
25 | Table.Column,NAME,text,name,
26 | Table.Column,LSAD,integer,lsad,
27 | Table.Column,MTFCC,text,mtfcc,
28 | Table.Column,FUNCSTAT,text,funcstat,
29 | Table.Column,ALAND,integer,aland,
30 | Table.Column,AWATER,integer,awater,
31 | Table.Column,INTPTLAT,number,intptlat,
32 | Table.Column,INTPTLON,number,intptlon,
33 | Table.Column,geometry,text,,
34 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/line/line-oriented-doc-references-2.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Section: References
 3 | 
 4 | #
 5 | # Tract crosswalk
 6 | #
 7 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tract-sra-msa-xwalk
 8 | Reference.Name: tracts
 9 | Reference.Description: Crosswalk between crosswalks, tracts, zip codes and SRAs in San Diego County
10 | 
11 | #
12 | # Tract boundaries
13 | #
14 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tracts
15 | Reference.Name: tracts_geo
16 | Reference.Description: Geographics Boundaries for Tracts
17 | 
18 | #
19 | # SRA boundaries
20 | #
21 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#sra
22 | Reference.Name: sra_geo
23 | Reference.Description: Geographics Boundaries for SRAs
24 | 
25 | #
26 | # IPUMS Housing and Income Data
27 | #
28 | # Need to use the ZIP version b/c we need to import the Python Code
29 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/ipums.org-income_homevalue-5.zip#income_homeval
30 | Reference.Name: incv
31 | Reference.Description: Income and Home value records from IPUMS for San Diego County
32 | 


--------------------------------------------------------------------------------
/docker/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | INSTANCE = default
 3 | DOCKER ?= docker
 4 | 
 5 | NS = civicknowledge
 6 | VERSION = latest
 7 | 
 8 | REPO = metatab
 9 | NAME = metatab
10 | 
11 | DOCKER ?= docker
12 | 
13 | PORTS =
14 | 
15 | VOLUMES= -v /data
16 | 
17 | ENV =
18 | 
19 | 
20 | .PHONY: build rebuild push shell run start stop restart reload rm rmf release test
21 | 
22 | build:
23 | 	$(DOCKER) build -t $(NS)/$(REPO):$(VERSION) .
24 | 
25 | rebuild:
26 | 	$(DOCKER) build --no-cache=true -t $(NS)/$(REPO):$(VERSION) .
27 | 
28 | push:
29 | 	$(DOCKER) push $(NS)/$(REPO):$(VERSION)
30 | 
31 | shell:
32 | 	$(DOCKER) run --rm  -i -t $(PORTS) $(VOLUMES) $(LINKS) $(ENV) $(NS)/$(REPO):$(VERSION) /bin/bash
33 | 
34 | run:
35 | 	$(DOCKER) run --rm --name $(NAME) $(PORTS) $(VOLUMES) $(LINKS) $(ENV) $(NS)/$(REPO):$(VERSION)
36 | 
37 | logs:
38 | 	$(DOCKER) logs -f $(NAME) 
39 | 
40 | start:
41 | 	$(DOCKER) run -d --name $(NAME) $(PORTS) $(VOLUMES) $(LINKS) $(ENV) $(NS)/$(REPO):$(VERSION)
42 | 
43 | stop:
44 | 	$(DOCKER) stop $(NAME)
45 | 	
46 | restart: stop start
47 | 
48 | reload: build rmf start
49 | 
50 | rmf:
51 | 	$(DOCKER) rm -f $(NAME)
52 | 
53 | rm:
54 | 	$(DOCKER) rm $(NAME)
55 | 
56 | release: build
57 | 	make push -e VERSION=$(VERSION)
58 | 
59 | default: build
60 | 
61 | 


--------------------------------------------------------------------------------
/metatab/resolver.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016 Civic Knowledge. This file is licensed under the terms of the
 2 | # Revised BSD License, included in this distribution as LICENSE
 3 | 
 4 | """
 5 | Generate rows from a variety of paths, references or other input
 6 | """
 7 | 
 8 | from .exc import IncludeError, GenerateError
 9 | 
10 | class WebResolver(object):
11 | 
12 |     def fetch_row_source(self, url):
13 |         pass
14 | 
15 |     def find_decl_doc(self, name):
16 | 
17 | 
18 |         raise IncludeError(name)
19 | 
20 |         import requests
21 |         from requests.exceptions import InvalidSchema
22 |         url = METATAB_ASSETS_URL + name + '.csv'
23 |         try:
24 |             # See if it exists online in the official repo
25 |             r = requests.head(url, allow_redirects=False)
26 |             if r.status_code == requests.codes.ok:
27 | 
28 |                 return url
29 | 
30 |         except InvalidSchema:
31 |             pass  # It's probably FTP
32 | 
33 | 
34 |     def get_row_generator(self, ref, cache=None):
35 | 
36 |         """Return a row generator for a reference"""
37 |         from inspect import isgenerator
38 |         from rowgenerators import get_generator
39 | 
40 |         g = get_generator(ref)
41 | 
42 |         if not g:
43 |             raise GenerateError("Cant figure out how to generate rows from {} ref: {}".format(type(ref), ref))
44 |         else:
45 |             return g
46 | 
47 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/datapackage_ex2.csv:
--------------------------------------------------------------------------------
 1 | "Declare","datapackage-latest",,,,
 2 | "title","Country, Regional and World GDP (Gross Domestic Product)",,,,
 3 | "description","Country, regional and world GDP in current US Dollars ($). Regional means collections of countries e.g. Europe & Central Asia. Data is sourced from the World Bank and turned into a standard normalized CSV.",,,,
 4 | "name","gdp",,,,
 5 | "version",2011,,,,
 6 | "license","PDDL-1.0",,,,
 7 | "keyword","GDP",,,,
 8 | "keyword","World",,,,
 9 | "keyword","Gross Domestic Product",,,,
10 | "keyword","Time series",,,,
11 | "image","http://assets.okfn.org/p/opendatahandbook/img/data-wrench.png",,,,
12 | "last-updated","2011-09-21",,,,
13 | ,,,,,
14 | "Section","Sources","web",,,
15 | "Source","World Bank and OECD","http://data.worldbank.org/indicator/NY.GDP.MKTP.CD",,,
16 | ,,,,,
17 | "Section ","Resources","type","format","foreignkey","description"
18 | "resource","gdp",,,,
19 | "resource.path","data/gdp.csv",,,,
20 | "schema",,,,,
21 | "field","Country Name","string",,,
22 | "field","Contry Code","string",,"iso-3-geo-codes/id",
23 | "field","Year","date","yyyy",,
24 | "field","Value","number",,,"GDP in current USD"
25 | ,,,,,
26 | "resource","another_gdp_resource",,,,
27 | "resource.path","data/other_gdp.csv",,,,
28 | "schema",,,,,
29 | "field","Country Name","string",,,
30 | "field","Contry Code","string",,"iso-3-geo-codes/id",
31 | "field","Year","date","yyyy",,
32 | "field","Value","number",,,"GDP in current USD"
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017, Civic Knowledge
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of Civic Knowledge nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | ##
 3 | ## Python Ignores
 4 | ##
 5 | 
 6 | *.py[cod]
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Packages
12 | *.egg
13 | *.egg-info
14 | *.eggs
15 | *.cache
16 | dist
17 | build
18 | eggs
19 | parts
20 | var
21 | sdist
22 | develop-eggs
23 | .installed.cfg
24 | lib
25 | lib64
26 | __pycache__
27 | 
28 | # Installer logs
29 | pip-log.txt
30 | 
31 | # Unit test / coverage reports
32 | .coverage
33 | .tox
34 | nosetests.xml
35 | htmlcov/*
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | .idea
45 | test/testbundle/build-save
46 | test/bundles/testbundle/meta/schema-old.csv
47 | bundle.yaml.old
48 | schema-revised.csv
49 | build-save
50 | *.sqlite3
51 | 
52 | test/coverage
53 | meta/coverage.yaml
54 | 
55 | ##
56 | ## Javascript Ignores
57 | ##
58 | 
59 | # Logs
60 | logs
61 | *.log
62 | npm-debug.log*
63 | 
64 | # Runtime data
65 | pids
66 | *.pid
67 | *.seed
68 | 
69 | # Directory for instrumented libs generated by jscoverage/JSCover
70 | lib-cov
71 | 
72 | # Coverage directory used by tools like istanbul
73 | coverage
74 | 
75 | # nyc test coverage
76 | .nyc_output
77 | 
78 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
79 | .grunt
80 | 
81 | # node-waf configuration
82 | .lock-wscript
83 | 
84 | # Compiled binary addons (http://nodejs.org/api/addons.html)
85 | build/Release
86 | 
87 | # Dependency directories
88 | node_modules
89 | jspm_packages
90 | 
91 | # Optional npm cache directory
92 | .npm
93 | 
94 | # Optional REPL history
95 | .node_repl_history
96 | 
97 | _metapack
98 | .DS_Store
99 | 


--------------------------------------------------------------------------------
/metatab/datapackage.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2016 Civic Knowledge. This file is licensed under the terms of the
 2 | # Revised BSD License, included in this distribution as LICENSE
 3 | 
 4 | """
 5 | Convert Metatab terms into datapackage.json file
 6 | """
 7 | 
 8 | from metatab.exc import ConversionError
 9 | 
10 | type_map = {
11 |     'str': 'string',
12 |     'text': 'string',
13 |     'unicode': 'string',
14 |     'int': 'integer',
15 |     'float': 'number'
16 | }
17 | 
18 | 
19 | def convert_to_datapackage(doc):
20 |     dp = doc['root'].as_dict()
21 | 
22 |     try:
23 |         dp.update(doc['identity'].as_dict())
24 |     except KeyError as e:
25 |         pass
26 | 
27 |     if not 'name' in dp:
28 |         if 'indentifier' in dp:
29 |             dp['name'] = dp['indentifier']
30 |         else:
31 |             raise ConversionError("Datapackage.json requires a Name or Identity term")
32 | 
33 |     try:
34 |         table_schemas = {t.value: t.as_dict()['column'] for t in doc['schema']}
35 |     except KeyError as e:
36 |         raise ConversionError("Failed to get schemas: " + str(e))
37 | 
38 |     file_resources = [fr.arg_props for fr in doc['resources'] if fr.term_is('root.datafile')]
39 | 
40 |     dp['resources'] = []
41 | 
42 |     for r in file_resources:
43 | 
44 |         try:
45 |             columns = table_schemas[r['name']] if r.get('name', '<none>') in table_schemas else table_schemas[
46 |                 r['table']]
47 |         except KeyError as e:
48 |             continue
49 | 
50 |         def mkdict(c):
51 |             d = {}
52 | 
53 |             for prop in ('name', 'title', 'description'):
54 |                 if c.get(prop):
55 |                     d[prop] = c[prop]
56 | 
57 |             d['type'] = type_map.get(c.get('datatype'), c.get('datatype'))
58 | 
59 |             return d
60 | 
61 |         dr = dict(
62 |             path=r['url'],
63 |             name=r['name'],
64 |             schema={'fields': [mkdict(c) for c in columns]}
65 |         )
66 | 
67 |         dp['resources'].append(dr)
68 | 
69 |     return dp
70 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/schema.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-0.1",,,
 2 | "Title","Registered Voters, By County",,,
 3 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",,,
 4 | ,,,,
 5 | "Section","Resources","table","Grain","Title"
 6 | "Datafile","http://example.com/example1.csv","registered_voters","County","The First Example Data File"
 7 | "Datafile","http://example.com/example2.csv","registered_voters","Tract","The Second Example Data File"
 8 | "Homepage","https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx","Healthy Communities Data and Indicators Project (HCI)",,
 9 | "Documentation","https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf","Indicator Documentation for Voter Registration / Participation",,
10 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",,,
11 | ,,,,
12 | "Section ","Contacts","email",,
13 | "Creator","Office of Health Equity","HCIOHE@cdph.ca.gov",,
14 | "Wrangler","Eric Busboom","eric@civicknowledge.com",,
15 | ,,,,
16 | "Section","Schema","datatype","valuetype","description"
17 | "Table","Table1",,,"HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters"
18 | "Column","Column1","int","year range","Year or years that indicator was reported"
19 | "Column","Column2","str","dimension","Type of record"
20 | "Column","Column3","str","gvid","GVid version of the geotype and geotypeval"
21 | "Column","Column4","str","label for gvid","Census name of geographic area"
22 | "Table","Table1",,,"HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters"
23 | "Column","Column1","int","year range","Year or years that indicator was reported"
24 | "Column","Column2","str","dimension","Type of record"
25 | "Column","Column3","str","gvid","GVid version of the geotype and geotypeval"
26 | "Column","Column4","str","label for gvid","Census name of geographic area"
27 | "Column.Foo","Bingo",,,
28 | ,,"Bingo 1","BIngo 2",
29 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | import os
 6 | import sys
 7 | from setuptools import setup
 8 | 
 9 | if sys.argv[-1] == 'publish':
10 |     os.system('python setup.py sdist upload')
11 |     sys.exit()
12 | 
13 | with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as f:
14 |     readme = f.read()
15 | 
16 | classifiers = [
17 |     'Development Status :: 4 - Beta',
18 |     'Intended Audience :: Developers',
19 |     'License :: OSI Approved :: BSD License',
20 |     'Operating System :: OS Independent',
21 |     'Programming Language :: Python',
22 |     'Programming Language :: Python :: 3.6',
23 |     'Topic :: Software Development :: Libraries :: Python Modules',
24 | ]
25 | 
26 | # Setup a directory for a fake package for importing plugins
27 | 
28 | setup(
29 |     name='metatab',
30 |     version='0.8.2',
31 |     description='Data format for storing structured data in spreadsheet tables',
32 |     long_description=readme,
33 |     packages=['metatab','metatab.templates', 'metatab.test', 'metatab.test.test-data'],
34 | 
35 |     package_data={
36 |         '': ['*.csv','*.json','*.txt','*.ipynb',''],
37 |     },
38 | 
39 |     install_requires=[
40 |         'metatabdecl',
41 |         'rowgenerators',
42 |     ],
43 | 
44 |     # test_suite='appurl.test.test_suite',
45 |     test_suite='nose.collector',
46 |     tests_require=['nose', 'tabulate'],
47 | 
48 |     entry_points={
49 |         'console_scripts': [
50 |             'metatab=metatab.cli:metatab'
51 |         ],
52 | 
53 |         'appurl.urls': [
54 |             "metatab+ = metatab.appurl:MetatabUrl",
55 |         ],
56 | 
57 |         'rowgenerators': [
58 |             "metatab+.txt =  metatab.rowgenerators:TextRowGenerator",
59 |             ".yaml =  metatab.rowgenerators:YamlMetatabSource"
60 |         ]
61 |     },
62 | 
63 |     author='Eric Busboom',
64 |     author_email='eric@civicknowledge.com',
65 |     url='https://github.com/Metatab/metatab-py.git',
66 |     license='BSD',
67 |     classifiers=classifiers,
68 |     extras_require={
69 |        'datapackage': ['datapackage'],
70 |     }
71 | )
72 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/line/line-oriented-doc.txt:
--------------------------------------------------------------------------------
 1 | Identifier: 47bc1089-7584-41f0-b804-602ec42f1249
 2 | Origin: civicknowledge.com
 3 | Dataset: rcfe_affordability
 4 | Version: 4
 5 | Time: 2015
 6 | Name: civicknowledge.com-rcfe_affordability-2015-4
 7 | 
 8 | Section: Contacts
 9 | Wrangler: Eric Busboom
10 | Wrangler.Email: eric@civicknowledge.com
11 | Wrangler.Organization: Civic Knowledge
12 | 
13 | Section: References
14 | 
15 | Reference: censusreporter:B09020/140/05000US06073
16 | Reference.Name: B09020
17 | Reference.Description: Relationship by Household Type (Including Living Alone) for Population 65 Years and Over
18 | 
19 | 
20 | #
21 | # Tract crosswalk
22 | #
23 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tract-sra-msa-xwalk
24 | Reference.Name: tracts
25 | Reference.Description: Crosswalk between crosswalks, tracts, zip codes and SRAs in San Diego County
26 | 
27 | #
28 | # Tract boundaries
29 | #
30 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tracts
31 | Reference.Name: tracts_geo
32 | Reference.Description: Geographics Boundaries for Tracts
33 | 
34 | #
35 | # SRA boundaries
36 | #
37 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#sra
38 | Reference.Name: sra_geo
39 | Reference.Description: Geographics Boundaries for SRAs
40 | 
41 | #
42 | # IPUMS Housing and Income Data
43 | #
44 | # Need to use the ZIP version b/c we need to import the Python Code
45 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/ipums.org-income_homevalue-5.zip#income_homeval
46 | Reference.Name: incv
47 | Reference.Description: Income and Home value records from IPUMS for San Diego County
48 | 
49 | 
50 | ==== Bibliography
51 | Citation: ipums
52 | Citation.Type: dataset
53 | Citation.Author: Steven Ruggles; Katie Genadek; Ronald Goeken; Josiah Grover; Matthew Sobek
54 | Citation.Title: Integrated Public Use Microdata Series
55 | Citation.Year: 2017
56 | Citation.Publisher: University of Minnesota
57 | Citation.Version: 7.0
58 | Citation.AccessDate: 20170718
59 | Citation.Url: https://usa.ipums.org/usa/index.shtml
60 | Citation.Doi: https://doi.org/10.18128/D010.V7.0
61 | 
62 | 


--------------------------------------------------------------------------------
/metatab/test/outputs/datapackage.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "resources": [
 3 |         {
 4 |             "name": "the-first-resource",
 5 |             "format": "csv",
 6 |             "url": "http://example.com/resource1.csv",
 7 |             "title": "First Resource",
 8 |             "mediatype": "text/csv",
 9 |             "schema": {
10 |                 "fields": [
11 |                     {
12 |                         "type": "string",
13 |                         "description": "description",
14 |                         "name": "country"
15 |                     },
16 |                     {
17 |                         "type": "string",
18 |                         "description": "description",
19 |                         "name": "country"
20 |                     },
21 |                     {
22 |                         "type": "string",
23 |                         "description": "description",
24 |                         "name": "country"
25 |                     }
26 |                 ]
27 |             }
28 |         },
29 |         {
30 |             "name": "the-second-resource",
31 |             "format": "csv",
32 |             "url": "http://example.com/resource2.csv",
33 |             "title": "Second Resource",
34 |             "mediatype": "text/csv",
35 |             "schema": {
36 |                 "fields": [
37 |                     {
38 |                         "type": "string",
39 |                         "description": "description",
40 |                         "name": "country"
41 |                     },
42 |                     {
43 |                         "type": "string",
44 |                         "description": "description",
45 |                         "name": "country"
46 |                     },
47 |                     {
48 |                         "type": "string",
49 |                         "description": "description",
50 |                         "name": "country"
51 |                     }
52 |                 ]
53 |             }
54 |         }
55 |     ],
56 |     "version": "1.3.4",
57 |     "description": "Percent of the eligible population registered to vote and the percent who voted in statewide elections.",
58 |     "name": "cdph.ca.gov-hci-registered_voters-county",
59 |     "title": "Registered Voters, By County"
60 | }
61 | 


--------------------------------------------------------------------------------
/docs/Census.rst:
--------------------------------------------------------------------------------
 1 | Loading Census Data With Pandas Reporter
 2 | ========================================
 3 | 
 4 | The general process for creating a census package is similar to the package process described in the `Getting Started tutorial, <https://github.com/CivicKnowledge/metatab-py/blob/master/docs/GettingStarted.rst>`_ but with a ``DataFile`` term that uses a program to fetch data from Census Reporter. First we'll create the program, then link it into a Metatab package. The program uses the `pandas-reporter` module, so the reation process is very similar to the `Pandas-Reporter tutorial. <https://github.com/CivicKnowledge/pandas-reporter/blob/master/test/Pandas%20Reporter%20Examples.ipynb>`_
 5 | 
 6 | Creating a Pandas-Reporter program
 7 | ----------------------------------
 8 | 
 9 | First, read the `Pandas-Reporter tutorial. <https://github.com/CivicKnowledge/pandas-reporter/blob/master/test/Pandas%20Reporter%20Examples.ipynb>`_ You'l need to install the `pandasreporter` python module.
10 | 
11 | Then, visit `Census Reporter <http://censusreporter.org>`_ to locate information about tables, regions and  and summary levels.
12 | 
13 | For this tutorial, we will use these tables:
14 | 
15 | - B17001, Poverty Status by Sex by Age
16 | - B17024, Age by Ratio of Income to Poverty Level
17 | - B17017, Poverty Status by Household Type by Age of Householder
18 | 
19 | For the geography, we will use tracts in San Diego County.
20 | 
21 | To find the geoid code for San Diego County, visit the main page at `Census Reporter <http://censusreporter.org>`_ and search for San Diego County. You should get a `profile page for the county <https://censusreporter.org/profiles/05000US06073-san-diego-county-ca/> '_. In the URL for the page, you should see the code `05000US06073`. This code is the geoid for San Diego County.
22 | 
23 | Next, visit the page for `Cartographic Boundary File Summary Level Codes <https://www.census.gov/geo/maps-data/data/summary_level.html>`_ to get the summary level code for tracts. It is actually listed by all of its components, in this case, 	"State-County-Census Tract." It is code "140". ( BTW, that is a string, not a number. )
24 | 
25 | The start of our program is similar to the program in the `Pandas-Reporter tutorial. <https://github.com/CivicKnowledge/pandas-reporter/blob/master/test/Pandas%20Reporter%20Examples.ipynb>`_, except using the table, summary level and region codes for this example:
26 | 
27 | .. code-block:: python
28 | 
29 |     $ mkdir example-data-package
30 |     $ cd example-data-package
31 |     $ metapack -c


--------------------------------------------------------------------------------
/metatab/test/test-data/yaml/yaml-example-1.csv:
--------------------------------------------------------------------------------
 1 | Declare,metatab-latest,,,
 2 | Title,San Diego County Weather,,,
 3 | Description,Daily summaries from a selection of San Diego county weather stations,,,
 4 | Identifier,2dc83efa-e6da-4561-bdf9-63263360ccf0,,,
 5 | Name,noaa.gov-daily_summary-1998e-san-1,,,
 6 | Dataset,daily_summary,,,
 7 | Origin,noaa.gov,,,
 8 | Time,1998e,,,
 9 | Space,san,,,
10 | Grain,,,,
11 | Variant,,,,
12 | Version,1,,,
13 | Created,2018-08-17T15:44:24,,,
14 | Modified,2018-08-17T16:18:19,,,
15 | Giturl,https://github.com/san-diego-water-quality/water-datasets.git,,,
16 | ,,,,
17 | Section,Contacts,Email,Organization,Url
18 | Wrangler,Eric Busboom,eric@civicknowledge.com,Civic Knowledge,http://civicknowledge.com
19 | ,,,,
20 | ,,,,
21 | Section,Documentation,Title,Description,
22 | Documentation,file:README.md,README,,
23 | Documentation,https://www1.ncdc.noaa.gov/pub/data/cdo/documentation/GHCND_documentation.pdf,Documentation,Main documentation,
24 | ,,,,
25 | Section,Resources,Name,Description,
26 | Datafile,http://ds.civicknowledge.org.s3.amazonaws.com/noaa.gov/daily-summary-1998-2018-san.csv,daily_summary_san,Daily weather summaries,
27 | Datafile,http://ds.civicknowledge.org.s3.amazonaws.com/noaa.gov/daily-summary-1998-2018-san.csv,daily_summary_la,Daily weather summaries,
28 | ,,,,
29 | ,,,,
30 | ,,,,
31 | Section,Schema,DataType,AltName,Description
32 | Table,daily_summary_san,,,
33 | Table.Column,STATION,string,station,
34 | Table.Column,NAME,string,name,Station code
35 | Table.Column,LATITUDE,number,latitude,Station name
36 | Table.Column,LONGITUDE,number,longitude,Station lattitude
37 | Table.Column,ELEVATION,number,elevation,Station longitude
38 | Table.Column,DATE,date,date,Station elevation
39 | Table.Column,AWND,number,awnd,Measurement date
40 | Table.Column,DAPR,string,dapr,Average daily wind speed (meters per second or miles per hour as per user preference
41 | Table.Column,FMTM,integer,fmtm,Number of days included in the multiday precipitation total (MDPR)
42 | Table.Column,MDPR,string,mdpr,"Time of fastest mile or fastest 1-minute wind (hours and minutes, i.e., HHMM)"
43 | Table.Column,PGTM,string,pgtm,"Multiday precipitation total (mm or inches as per user preference; use with DAPR and DWPR, if available)"
44 | Table.Column,PRCP,number,prcp,"Peak gust time (hours and minutes, i.e., HHMM)"
45 | Table.Column,SNOW,integer,snow,"Precipitation (mm or inches as per user preference, inches to hundredths on Daily Form pdf file)"
46 | Table.Column,SNWD,integer,snwd,"Snowfall (mm or inches as per user preference, inches to tenths on Daily Form pdf file)"
47 | Table.Column,TAVG,string,tavg,"Snow depth (mm or inches as per user preference, inches on Daily Form pdf file)"
48 | Table.Column,TMAX,integer,tmax,Average temerature
49 | Table.Column,TMIN,integer,tmin,"Maximum temperature (Fahrenheit or Celsius as per user preference, Fahrenheit to tenths on Daily Form pdf file"
50 | 


--------------------------------------------------------------------------------
/docs/GeneratingRowsWithPrograms.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Row Generating Programs
 3 | =======================
 4 | 
 5 | Metatab Datafile terms can reference programs and IPython notebooks to generate rows. 
 6 | 
 7 | To reference a program, the ``Root.Datafile`` must be a URL with a ``program`` scheme and a relative path. Usually, the file is placed in a subdirectory named 'scripts' at the same level as the ``metadata.csv`` file. It must be an executable program, and may be any executable program. 
 8 | 
 9 | When a data package is created, regardless of the type, a filesystem package is created first, then other types of packages are created from the filesystem package. This means that the row-generating program is only run once per resource when multiple packages are created, and also that the program can open the Metatab package being used to run the program to access previously created resource files. 
10 | 
11 | Program Inputs
12 | **************
13 | 
14 | The program can receive information from Metatab through program options and environmental variables, and must print CSV formatted lines to std out. 
15 | 
16 | There are two broad sources for inputs to the program. The first is are several values that are passed into the program regardless of the configuration of the ``Root.DataFile`` term. The second are the properties of the ``Root.DataFile`` terms. 
17 | 
18 | The inputs for all programs are: 
19 | 
20 | - METATAB_DOC: An env var that holds the URL for the Metatab document being processed
21 | - METATAB_PACKAGE: An env var that holds the metatab document's package URL. ( Which is usually the same as the document URL )
22 | - METATAB_WORKING_DIR: An env var that holds the path to the directory holding the metatab file. 
23 | - PROPERTIES: An env var with holds a JSON encoded dict with the three previous env values, along with the ``properties`` dict for the ``Root.DataFile`` term. 
24 | 
25 | Additionally, the program receives the ``Root.DataFile`` properties in these forms:
26 | 
27 | - Properties that have names that are all uppercased are assigned to env variables. 
28 | - Properties that have names that begin with '-' are assigned to program options.
29 | 
30 | 
31 | Common Patterns
32 | ***************
33 | 
34 | It is very common for a program to open the Metatab document that is being used to run the program. In Python:
35 | 
36 | .. code-block:: python 
37 | 
38 |     import metatab as mt
39 |     doc = mt.MetatabDoc(environ['METATAB_DOC'])
40 | 
41 | Since the program must output CSV formatted lines, a CSV writer can be constructed on ``sys.stdout``:
42 | 
43 | .. code-block:: python 
44 | 
45 |      import sys
46 |      import csv
47 |      
48 |      w = csv.writer(sys.stdout)
49 |      
50 |      w.writerow(...)
51 |      
52 |      
53 | If the program generates logging or warnings, they must be printed to ``sys.stderr``
54 | 
55 | .. code-block:: python 
56 | 
57 |      import sys
58 |      
59 |      print("ERROR!", file=sys.stderr)
60 |      
61 |      


--------------------------------------------------------------------------------
/metatab/test/test_doc.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import unittest
  4 | from os.path import join, dirname
  5 | 
  6 | from metatab import MetatabDoc
  7 | from metatab.rowgenerators import TextRowGenerator
  8 | from metatab.test.core import test_data
  9 | 
 10 | 
 11 | class TestDoc(unittest.TestCase):
 12 | 
 13 |     def test_open(self):
 14 | 
 15 |         doc = MetatabDoc(test_data('almost-everything.csv'))
 16 | 
 17 |         self.assertEquals('9FC11204-B291-4E0E-A841-5372090ADEC0', doc.find_first_value('Root.Identifier'))
 18 | 
 19 |         self.assertEquals('9FC11204-B291-4E0E-A841-5372090ADEC0', doc['Root'].find_first_value('Root.Identifier'))
 20 | 
 21 | 
 22 |     def test_new(self):
 23 | 
 24 |         import metatab.templates as tmpl
 25 | 
 26 |         template_path = join(dirname(tmpl.__file__), 'metatab.csv')
 27 | 
 28 |         doc = MetatabDoc(template_path)
 29 |         doc.cleanse()
 30 | 
 31 |         print(doc.as_csv()[:200])
 32 | 
 33 |     def test_version(self):
 34 | 
 35 |         from textwrap import dedent
 36 | 
 37 | 
 38 |         doc = MetatabDoc(TextRowGenerator(
 39 |             dedent(
 40 |             """
 41 |             Root.Version:
 42 |             """)))
 43 | 
 44 |         # None because there are no Minor, Major, Patch value
 45 |         self.assertIsNone(doc.update_version())
 46 | 
 47 |         self.assertFalse(doc._has_semver())
 48 | 
 49 |         doc = MetatabDoc(TextRowGenerator(
 50 |             dedent(
 51 |                 """
 52 |                 Root.Version: 10
 53 |                 """)))
 54 | 
 55 |         # None because there are no Minor, Major, Patch value
 56 |         self.assertEqual("10", doc.update_version())
 57 |         self.assertFalse(doc._has_semver())
 58 | 
 59 |         doc = MetatabDoc(TextRowGenerator(
 60 |             dedent(
 61 |                 """
 62 |                 Root.Version: 10
 63 |                 Version.Patch: 5
 64 |                 """)))
 65 | 
 66 |         # None because there are no Minor, Major, Patch value
 67 |         self.assertEqual("0.0.5", doc.update_version())
 68 |         self.assertTrue(doc._has_semver())
 69 | 
 70 |         doc = MetatabDoc(TextRowGenerator(
 71 |             dedent(
 72 |                 """
 73 |                 Root.Version: 10
 74 |                 Version.Major: 2
 75 |                 Version.Patch: 5
 76 |                 """)))
 77 | 
 78 |         # None because there are no Minor, Major, Patch value
 79 |         self.assertEqual("2.0.5", doc.update_version())
 80 | 
 81 |         doc = MetatabDoc(TextRowGenerator(
 82 |             dedent(
 83 |                 """
 84 |                 Root.Name:
 85 |                 Root.Origin: example.com
 86 |                 Root.Dataset: foobar
 87 |                 Root.Version:
 88 |                 Version.Minor: 24
 89 |                 Version.Major: 2
 90 |                 Version.Patch: 5
 91 |                 """)))
 92 | 
 93 |         # None because there are no Minor, Major, Patch value
 94 |         self.assertEqual("2.24.5", doc.update_version())
 95 | 
 96 |         doc.update_name()
 97 |         self.assertEqual('example.com-foobar-2.24', doc.get_value('Root.Name'))
 98 | 
 99 | if __name__ == '__main__':
100 |     unittest.main()
101 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/packages/example.com-test_package/notebooks/Test_Notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "The metatab extension is already loaded. To reload it, use:\n",
 13 |       "  %reload_ext metatab\n",
 14 |       "The autoreload extension is already loaded. To reload it, use:\n",
 15 |       "  %reload_ext autoreload\n"
 16 |      ]
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "%matplotlib inline\n",
 21 |     "%load_ext metatab\n",
 22 |     "\n",
 23 |     "%load_ext autoreload\n",
 24 |     "%autoreload 2\n",
 25 |     "\n",
 26 |     "import pandas as pd\n",
 27 |     "import numpy as np \n",
 28 |     "import metatab as mt"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 5,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "%mt_open_package\n",
 38 |     "assert mt_pkg.path.endswith('metatab-py/test-data/packages/example.com-test_package/metadata.csv')\n",
 39 |     "orig_path = mt_pkg.path.endswith"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 6,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "from metatab.pands import MetatabDataFrame\n",
 49 |     "\n",
 50 |     "odf = MetatabDataFrame({ 'cola':range(10), 'colb': range(10)})\n",
 51 |     "\n",
 52 |     "odf.name = 'income_homeval'\n",
 53 |     "odf.title = 'Income and Home Value Records for San Diego County'\n",
 54 |     "odf.cola.description = 'Household income'\n",
 55 |     "odf.colb.description = 'Home value'\n",
 56 |     "\n",
 57 |     "%mt_add_dataframe odf  --materialize\n",
 58 |     "\n",
 59 |     "cols = list(mt_pkg.resource('income_homeval').columns())\n",
 60 |     "assert 'cola' in [ c['name'] for c in cols]\n",
 61 |     "assert 'colb' in [ c['name'] for c in cols]"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {
 68 |     "collapsed": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": []
 72 |   }
 73 |  ],
 74 |  "metadata": {
 75 |   "kernelspec": {
 76 |    "display_name": "Python 3",
 77 |    "language": "python",
 78 |    "name": "python3"
 79 |   },
 80 |   "language_info": {
 81 |    "codemirror_mode": {
 82 |     "name": "ipython",
 83 |     "version": 3
 84 |    },
 85 |    "file_extension": ".py",
 86 |    "mimetype": "text/x-python",
 87 |    "name": "python",
 88 |    "nbconvert_exporter": "python",
 89 |    "pygments_lexer": "ipython3",
 90 |    "version": "3.6.1"
 91 |   },
 92 |   "varInspector": {
 93 |    "cols": {
 94 |     "lenName": 16,
 95 |     "lenType": 16,
 96 |     "lenVar": 40
 97 |    },
 98 |    "kernels_config": {
 99 |     "python": {
100 |      "delete_cmd_postfix": "",
101 |      "delete_cmd_prefix": "del ",
102 |      "library": "var_list.py",
103 |      "varRefreshCmd": "print(var_dic_list())"
104 |     },
105 |     "r": {
106 |      "delete_cmd_postfix": ") ",
107 |      "delete_cmd_prefix": "rm(",
108 |      "library": "var_list.r",
109 |      "varRefreshCmd": "cat(var_dic_list()) "
110 |     }
111 |    },
112 |    "types_to_exclude": [
113 |     "module",
114 |     "function",
115 |     "builtin_function_or_method",
116 |     "instance",
117 |     "_Feature"
118 |    ],
119 |    "window_display": false
120 |   }
121 |  },
122 |  "nbformat": 4,
123 |  "nbformat_minor": 2
124 | }
125 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/notebooks/ImportTest.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%load_ext metatab\n",
 12 |     "%mt_lib_dir lib\n",
 13 |     "\n",
 14 |     "import file\n"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "assert file.__file__.endswith('test-data/notebooks/lib/file.py')"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {
 32 |     "collapsed": true
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "%mt_lib_dir http://s3.amazonaws.com/library.metatab.org/ipums.org-income_homevalue-5.zip"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 4,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "import lib.incomedist"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 5,
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "assert lib.incomedist.__file__.endswith('ipums.org-income_homevalue-5.zip/ipums.org-income_homevalue-5/lib/incomedist.py')"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 6,
 64 |    "metadata": {
 65 |     "collapsed": true
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "%%metatab\n",
 70 |     "Identifier: 47bc1089-7584-41f0-b804-602ec42f1249\n",
 71 |     "Name: FooBarBaz\n",
 72 |     "\n",
 73 |     "Section: References \n",
 74 |     "Reference: metatab+http://s3.amazonaws.com/library.metatab.org/ipums.org-income_homevalue-5.zip#income_homeval\n",
 75 |     "Reference.Name: incv\n",
 76 |     "Reference.Description: Income and Home value records from IPUMS for San Diego County\n",
 77 |     "\n"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 7,
 83 |    "metadata": {
 84 |     "collapsed": true
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "%mt_lib_dir incv"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": []
 99 |   }
100 |  ],
101 |  "metadata": {
102 |   "kernelspec": {
103 |    "display_name": "Python 3",
104 |    "language": "python",
105 |    "name": "python3"
106 |   },
107 |   "language_info": {
108 |    "codemirror_mode": {
109 |     "name": "ipython",
110 |     "version": 3
111 |    },
112 |    "file_extension": ".py",
113 |    "mimetype": "text/x-python",
114 |    "name": "python",
115 |    "nbconvert_exporter": "python",
116 |    "pygments_lexer": "ipython3",
117 |    "version": "3.6.1"
118 |   },
119 |   "varInspector": {
120 |    "cols": {
121 |     "lenName": 16,
122 |     "lenType": 16,
123 |     "lenVar": 40
124 |    },
125 |    "kernels_config": {
126 |     "python": {
127 |      "delete_cmd_postfix": "",
128 |      "delete_cmd_prefix": "del ",
129 |      "library": "var_list.py",
130 |      "varRefreshCmd": "print(var_dic_list())"
131 |     },
132 |     "r": {
133 |      "delete_cmd_postfix": ") ",
134 |      "delete_cmd_prefix": "rm(",
135 |      "library": "var_list.r",
136 |      "varRefreshCmd": "cat(var_dic_list()) "
137 |     }
138 |    },
139 |    "types_to_exclude": [
140 |     "module",
141 |     "function",
142 |     "builtin_function_or_method",
143 |     "instance",
144 |     "_Feature"
145 |    ],
146 |    "window_display": false
147 |   }
148 |  },
149 |  "nbformat": 4,
150 |  "nbformat_minor": 2
151 | }
152 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/notebooks/CellExecuteError.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%load_ext metatab"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 4,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "%%metatab\n",
 23 |     "Origin: example.com\n",
 24 |     "Dataset: foobar.com \n",
 25 |     "Identifier: de097279-28ef-42f5-a4f5-0eaac53b7dc4\n",
 26 |     "Name: example.com-foobar.com \n",
 27 |     "\n",
 28 |     "Section: Contacts\n",
 29 |     "Wrangler: Eric Busboom\n",
 30 |     "Wrangler.Email: eric@civicknowledge.com\n",
 31 |     "\n",
 32 |     "Section: References\n",
 33 |     "Reference: http://public.source.civicknowledge.com/example.com/sources/renter_cost.csv\n",
 34 |     "Reference.Name: reference\n",
 35 |     "Reference.Title: The First Example Data File\n",
 36 |     "Reference.Startline: 5\n",
 37 |     "Reference.HeaderLines: 3,4\n",
 38 |     "    \n",
 39 |     "Section: Resources\n",
 40 |     "Datafile: http://public.source.civicknowledge.com/example.com/sources/renter_cost.csv\n",
 41 |     "Datafile.Name: ext_resource\n",
 42 |     "Datafile.Title: An Extern CSV Resource\n",
 43 |     "Datafile.Startline: 5\n",
 44 |     "Datafile.HeaderLines: 3,4"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 5,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "ename": "ZeroDivisionError",
 54 |      "evalue": "division by zero",
 55 |      "output_type": "error",
 56 |      "traceback": [
 57 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 58 |       "\u001b[0;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
 59 |       "\u001b[0;32m<ipython-input-5-05c9758a9c21>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 60 |       "\u001b[0;31mZeroDivisionError\u001b[0m: division by zero"
 61 |      ]
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "1/0"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": true
 73 |    },
 74 |    "outputs": [],
 75 |    "source": []
 76 |   }
 77 |  ],
 78 |  "metadata": {
 79 |   "kernelspec": {
 80 |    "display_name": "Python 3",
 81 |    "language": "python",
 82 |    "name": "python3"
 83 |   },
 84 |   "language_info": {
 85 |    "codemirror_mode": {
 86 |     "name": "ipython",
 87 |     "version": 3
 88 |    },
 89 |    "file_extension": ".py",
 90 |    "mimetype": "text/x-python",
 91 |    "name": "python",
 92 |    "nbconvert_exporter": "python",
 93 |    "pygments_lexer": "ipython3",
 94 |    "version": "3.6.1"
 95 |   },
 96 |   "varInspector": {
 97 |    "cols": {
 98 |     "lenName": 16,
 99 |     "lenType": 16,
100 |     "lenVar": 40
101 |    },
102 |    "kernels_config": {
103 |     "python": {
104 |      "delete_cmd_postfix": "",
105 |      "delete_cmd_prefix": "del ",
106 |      "library": "var_list.py",
107 |      "varRefreshCmd": "print(var_dic_list())"
108 |     },
109 |     "r": {
110 |      "delete_cmd_postfix": ") ",
111 |      "delete_cmd_prefix": "rm(",
112 |      "library": "var_list.r",
113 |      "varRefreshCmd": "cat(var_dic_list()) "
114 |     }
115 |    },
116 |    "types_to_exclude": [
117 |     "module",
118 |     "function",
119 |     "builtin_function_or_method",
120 |     "instance",
121 |     "_Feature"
122 |    ],
123 |    "window_display": false
124 |   }
125 |  },
126 |  "nbformat": 4,
127 |  "nbformat_minor": 2
128 | }
129 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/errors/errors2.csv:
--------------------------------------------------------------------------------
 1 | "Declare","http://doesntexist.csv",,,
 2 | "Title","Registered Voters, By County",,,
 3 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",,,
 4 | "Identifier","cdph.ca.gov-hci-registered_voters-county",,,
 5 | ,201404,,,
 6 | ,"cdph.ca.gov-hci-registered_voters-county-201304",,,
 7 | "Format","excel",,,
 8 | "Spatial","California <04000US06>",,,
 9 | "Time","2002-2014",,,
10 | "SpatialGrain","County <05000US>",,,
11 | ,,,,
12 | "Section","Resources","table","Grain","Title"
13 | "Datafile","http://example.com/example1.csv","registered_voters","County","The First Example Data File"
14 | "Datafile","http://example.com/example2.csv","registered_voters","Tract","The Second Example Data File"
15 | ,,"Healthy Communities Data and Indicators Project (HCI)",,
16 | ,,"Indicator Documentation for Voter Registration / Participation",,
17 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",,,
18 | ,,,,
19 | "Section ","Contacts","email",,
20 | "Creator","Office of Health Equity","HCIOHE@cdph.ca.gov",,
21 | "Wrangler","Eric Busboom","eric@civicknowledge.com",,
22 | ,,,,
23 | "Section ","Notes",,,
24 | "Note","This file is an example of a data bundle, a simple format for linking data to metadata using spreadsheets. See the specification for more details. ",,,
25 | "Documentation","https://docs.google.com/document/d/16tb7x73AyF8pJ6e6IBcaIJAioEZCNBGDEksKYTXfdfg/edit#",,,
26 | ".title","Data Bundles Packaging Specification",,,
27 | ,,,,
28 | "Section","Schema",,"valuetype","description"
29 | "Table","registered_voters",,,"HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters"
30 | "Column","reportyear","int","year range","Year or years that indicator was reported"
31 | "Column","type","str","dimension","Type of record"
32 | "Column","gvid","str","gvid","GVid version of the geotype and geotypeval"
33 | "Column","geoname","str","label for gvid","Census name of geographic area"
34 | "Column","geotype","str","label","Code for type of geographic area"
35 | "Column","geotypevalue","str","census","Census geoid code"
36 | "Column","county_fips","str","FIPS county code","County FIPS code"
37 | "Column","county_name","str","label for counrty_fips","County name"
38 | "Column","region_code","str","census code","Numeric code of region"
39 | "Column","region_name","str","label for region_code","Name of region"
40 | "Column","raceth","str","raceth/civick","Civic Knowledge race / ethnicity code."
41 | "Column","raceth_name","str","label for raceeth","Race / Ethnicity Name"
42 | "Column","race_eth_code","str","raceth/hci","Race / ethnicity code"
43 | "Column","race_eth_name","str","label for race_eth_code","Race / ethnicity name"
44 | "Column","numerator","int","count","Adults who are registered to vote, or who voted, depending on type of record"
45 | "Column","denominator","int","count","Population of Adults, 18 years or older"
46 | "Column","percent","float","percent of numerator over denominator","Percent of adults who are registered to vote, or who voted, depending on type of record"
47 | "Column","ll_95ci","float","ci95l for percent","Lower bound of 95% confidence interval"
48 | "Column","ul_95ci","float","ci95u for percent","Upper bound of 95% confidence interval"
49 | "Column","se","float","se for percent","Standard error"
50 | "Column","rse","float","rse for percent","Relative standard error (se/percent * 100) expressed as a percent"
51 | "Column","ca_decile","float","decile","Statewide decile ranking"
52 | "Column","ca_rr","float","ratio","Ratio of indicator to state average"
53 | "Column","vap","float","measure","Voter age population, from CA Department of Finance."
54 | "Column","ind_id","str","dimension",
55 | "Column","ind_definition","str","dimension",
56 | "Column","version","str","other",
57 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/example1-web.csv:
--------------------------------------------------------------------------------
 1 | "Declare","http://assets.metatab.org/metatab-0.1.csv",,,
 2 | "Title","Registered Voters, By County",,,
 3 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",,,
 4 | "Identifier","cdph.ca.gov-hci-registered_voters-county",,,
 5 | "Version",201404,,,
 6 | "Obsoletes","cdph.ca.gov-hci-registered_voters-county-201304",,,
 7 | "Format","excel",,,
 8 | "Spatial","California <04000US06>",,,
 9 | "Time","2002-2014",,,
10 | "SpatialGrain","County <05000US>",,,
11 | ,,,,
12 | "Section","Resources","table","Grain","Title"
13 | "Datafile","http://example.com/example1.csv","registered_voters","County","The First Example Data File"
14 | "Datafile","http://example.com/example2.csv","registered_voters","Tract","The Second Example Data File"
15 | "Homepage","https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx","Healthy Communities Data and Indicators Project (HCI)",,
16 | "Documentation","https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf","Indicator Documentation for Voter Registration / Participation",,
17 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",,,
18 | ,,,,
19 | "Section ","Contacts","email",,
20 | "Creator","Office of Health Equity","HCIOHE@cdph.ca.gov",,
21 | "Wrangler","Eric Busboom","eric@civicknowledge.com",,
22 | ,,,,
23 | "Section ","Notes",,,
24 | "Note","This file is an example of a data bundle, a simple format for linking data to metadata using spreadsheets. See the specification for more details. ",,,
25 | "Documentation","https://docs.google.com/document/d/16tb7x73AyF8pJ6e6IBcaIJAioEZCNBGDEksKYTXfdfg/edit#",,,
26 | ".title","Data Bundles Packaging Specification",,,
27 | ,,,,
28 | "Section","Schema","datatype","valuetype","description"
29 | "Table","registered_voters",,,"HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters"
30 | "Column","reportyear","int","year range","Year or years that indicator was reported"
31 | "Column","type","str","dimension","Type of record"
32 | "Column","gvid","str","gvid","GVid version of the geotype and geotypeval"
33 | "Column","geoname","str","label for gvid","Census name of geographic area"
34 | "Column","geotype","str","label","Code for type of geographic area"
35 | "Column","geotypevalue","str","census","Census geoid code"
36 | "Column","county_fips","str","FIPS county code","County FIPS code"
37 | "Column","county_name","str","label for counrty_fips","County name"
38 | "Column","region_code","str","census code","Numeric code of region"
39 | "Column","region_name","str","label for region_code","Name of region"
40 | "Column","raceth","str","raceth/civick","Civic Knowledge race / ethnicity code."
41 | "Column","raceth_name","str","label for raceeth","Race / Ethnicity Name"
42 | "Column","race_eth_code","str","raceth/hci","Race / ethnicity code"
43 | "Column","race_eth_name","str","label for race_eth_code","Race / ethnicity name"
44 | "Column","numerator","int","count","Adults who are registered to vote, or who voted, depending on type of record"
45 | "Column","denominator","int","count","Population of Adults, 18 years or older"
46 | "Column","percent","float","percent of numerator over denominator","Percent of adults who are registered to vote, or who voted, depending on type of record"
47 | "Column","ll_95ci","float","ci95l for percent","Lower bound of 95% confidence interval"
48 | "Column","ul_95ci","float","ci95u for percent","Upper bound of 95% confidence interval"
49 | "Column","se","float","se for percent","Standard error"
50 | "Column","rse","float","rse for percent","Relative standard error (se/percent * 100) expressed as a percent"
51 | "Column","ca_decile","float","decile","Statewide decile ranking"
52 | "Column","ca_rr","float","ratio","Ratio of indicator to state average"
53 | "Column","vap","float","measure","Voter age population, from CA Department of Finance."
54 | "Column","ind_id","str","dimension",
55 | "Column","ind_definition","str","dimension",
56 | "Column","version","str","other",
57 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Metatab
  2 | =======
  3 | 
  4 | .. image:: https://travis-ci.org/Metatab/metatab.svg?branch=master
  5 |     :target: https://travis-ci.org/Metatab/metatab
  6 | 
  7 | Parse and manipulate structured data and metadata in a tabular format.
  8 | 
  9 | `Metatab <http://metatab.org>`_ is a data format that allows structured
 10 | metadata -- the sort you'd normally store in JSON, YAML or XML -- to be stored
 11 | and edited in tabular forms like CSV or Excel. Metatab files look exactly like
 12 | you'd expect, so they are very easy for non-technical users to read and edit,
 13 | using tools they already have. Metatab is an excellent format for creating,
 14 | storing and transmitting metadata. For more information about metatab, visit
 15 | http://metatab.org.
 16 | 
 17 | This repository has a Python module and executable. For a Javascript version,
 18 | see the `metatab-js <https://github.com/CivicKnowledge/metatab-js>`_ repository.
 19 | 
 20 | What is Metatab For?
 21 | --------------------
 22 | 
 23 | Metatab is a tabular format that allows storing metadata for demographics,
 24 | health and research datasets in a tabular format. The tabular format is much
 25 | easier for data creators to write and for data consumers to read, and it allows
 26 | a complete data packages to be stored in a single Excel file.
 27 | 
 28 | 
 29 | Install
 30 | -------
 31 | 
 32 | 
 33 | 
 34 | Install the package from PiPy with:
 35 | 
 36 | .. code-block:: bash
 37 | 
 38 |     $ pip install metatab
 39 | 
 40 | Or, install the master branch from github with:
 41 | 
 42 | .. code-block:: bash
 43 | 
 44 |     $ pip install https://github.com/CivicKnowledge/metatab.git
 45 | 
 46 | Then test parsing using a remote file with:
 47 | 
 48 | .. code-block:: bash
 49 | 
 50 |     $ metatab -j https://raw.githubusercontent.com/CivicKnowledge/metatab/master/test-data/example1.csv
 51 | 
 52 | Run ``metatab -h`` to get other program options.
 53 | 
 54 | The ``test-data`` directory has test files that also serve as examples to
 55 | parse. You can either clone the repo and parse them from the files, or from the
 56 | Github page for the file, click on the ``raw`` button to get raw view of the
 57 | flie, then copy the URL.
 58 | 
 59 | 
 60 | Running tests
 61 | +++++++++++++
 62 | 
 63 | Run ``python setup.py tests`` to run normal development tests. You can also run
 64 | ``tox``, which will try to run the tests with python 3.4, 3.5 and 3.6, ignoring
 65 | non-existent interpreters.
 66 | 
 67 | 
 68 | Development Testing with Docker
 69 | +++++++++++++++++++++++++++++++
 70 | 
 71 | Testing during development for other versions of Python is a bit of a pain,
 72 | since you have to install the alternate version, and Tox will run all of the
 73 | tests, not just the one you want.
 74 | 
 75 | One way to deal with this is to install Docker locally, then run the docker
 76 | test container on the source directory. This is done automatically from the
 77 | Makefile in metatab/test, just run:
 78 | 
 79 | .. code-block:: bash
 80 | 
 81 |     $ cd metatab/test
 82 |     $ make build # to create the container image
 83 |     $ make test
 84 |     # or just ..
 85 |     $ make
 86 | 
 87 | You can also run the container shell, and run tests from the command line.
 88 | 
 89 | .. code-block:: bash
 90 | 
 91 |     $ cd metatab/test
 92 |     $ make build # to create the container image
 93 |     $ make shell # to run bash the container
 94 | 
 95 | You now have a docker container where the /code directory is the metatab source dir.
 96 | 
 97 | Now, run tox to build the tox virtual environments, then enter the specific version you want to
 98 | run tests for and activate the virtual environment.
 99 | 
100 | .. code-block:: bash
101 | 
102 |     # tox
103 |     # cd .tox/py34
104 |     # source bin/activate # Activate the python 3.4 virtual env
105 |     # cd ../../
106 |     # python setup.py test # Cause test deps to get installed
107 |     #
108 |     # python -munittest metatab.test.test_parser.TestParser.test_parse_everython  # Run one test
109 | 
110 | Note that your development environment is mounted into the Docker container, so you can edit local
111 | files and test the changes in Docker.
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/yaml/yaml-example-1.yaml:
--------------------------------------------------------------------------------
  1 | declare: metatab-latest
  2 | title: San Diego County Weather
  3 | description: Daily summaries from a selection of San Diego county weather stations
  4 | identifier: 2dc83efa-e6da-4561-bdf9-63263360ccf0
  5 | name: noaa.gov-daily_summary-1998e-san-1
  6 | dataset: daily_summary
  7 | origin: noaa.gov
  8 | time: 1998e
  9 | space: san
 10 | grain: null
 11 | variant: null
 12 | version: '1'
 13 | created: '2018-08-17T15:44:24'
 14 | modified: '2018-08-17T16:18:19'
 15 | giturl: https://github.com/san-diego-water-quality/water-datasets.git
 16 | wrangler:
 17 | -   email: eric@civicknowledge.com
 18 |     organization: Civic Knowledge
 19 |     url: http://civicknowledge.com
 20 |     name: Eric Busboom
 21 | documentation:
 22 | -   title: README
 23 |     url: file:README.md
 24 | -   title: Documentation
 25 |     description: Main documentation
 26 |     url: https://www1.ncdc.noaa.gov/pub/data/cdo/documentation/GHCND_documentation.pdf
 27 | datafile:
 28 | -   name: daily_summary_san
 29 |     description: Daily weather summaries
 30 |     url: http://ds.civicknowledge.org.s3.amazonaws.com/noaa.gov/daily-summary-1998-2018-san.csv
 31 | -   name: daily_summary_la
 32 |     description: Daily weather summaries
 33 |     url: http://ds.civicknowledge.org.s3.amazonaws.com/noaa.gov/daily-summary-1998-2018-san.csv
 34 | table:
 35 | -   column:
 36 |     -   datatype: string
 37 |         altname: station
 38 |         name: STATION
 39 |     -   datatype: string
 40 |         altname: name
 41 |         description: Station code
 42 |         name: NAME
 43 |     -   datatype: number
 44 |         altname: latitude
 45 |         description: Station name
 46 |         name: LATITUDE
 47 |     -   datatype: number
 48 |         altname: longitude
 49 |         description: Station lattitude
 50 |         name: LONGITUDE
 51 |     -   datatype: number
 52 |         altname: elevation
 53 |         description: Station longitude
 54 |         name: ELEVATION
 55 |     -   datatype: date
 56 |         altname: date
 57 |         description: Station elevation
 58 |         name: DATE
 59 |     -   datatype: number
 60 |         altname: awnd
 61 |         description: Measurement date
 62 |         name: AWND
 63 |     -   datatype: string
 64 |         altname: dapr
 65 |         description: Average daily wind speed (meters per second or miles per hour
 66 |             as per user preference
 67 |         name: DAPR
 68 |     -   datatype: integer
 69 |         altname: fmtm
 70 |         description: Number of days included in the multiday precipitation total (MDPR)
 71 |         name: FMTM
 72 |     -   datatype: string
 73 |         altname: mdpr
 74 |         description: Time of fastest mile or fastest 1-minute wind (hours and minutes,
 75 |             i.e., HHMM)
 76 |         name: MDPR
 77 |     -   datatype: string
 78 |         altname: pgtm
 79 |         description: Multiday precipitation total (mm or inches as per user preference;
 80 |             use with DAPR and DWPR, if available)
 81 |         name: PGTM
 82 |     -   datatype: number
 83 |         altname: prcp
 84 |         description: Peak gust time (hours and minutes, i.e., HHMM)
 85 |         name: PRCP
 86 |     -   datatype: integer
 87 |         altname: snow
 88 |         description: Precipitation (mm or inches as per user preference, inches to
 89 |             hundredths on Daily Form pdf file)
 90 |         name: SNOW
 91 |     -   datatype: integer
 92 |         altname: snwd
 93 |         description: Snowfall (mm or inches as per user preference, inches to tenths
 94 |             on Daily Form pdf file)
 95 |         name: SNWD
 96 |     -   datatype: string
 97 |         altname: tavg
 98 |         description: Snow depth (mm or inches as per user preference, inches on Daily
 99 |             Form pdf file)
100 |         name: TAVG
101 |     -   datatype: integer
102 |         altname: tmax
103 |         description: Average temerature
104 |         name: TMAX
105 |     -   datatype: integer
106 |         altname: tmin
107 |         description: Maximum temperature (Fahrenheit or Celsius as per user preference,
108 |             Fahrenheit to tenths on Daily Form pdf file
109 |         name: TMIN
110 |     name: daily_summary_san
111 | 
112 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/example1.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest",,,,
 2 | "Title","Registered Voters, By County",,,,
 3 | "Name","cdph.ca.gov-hci-registered_voters-county",,,,
 4 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",,,,
 5 | "Identifier","cdph.ca.gov-hci-registered_voters-county",,,,
 6 | "Version",201404,,,,
 7 | "Obsoletes","cdph.ca.gov-hci-registered_voters-county-201304",,,,
 8 | "Dataset","voters",,,,
 9 | "Origin","example.com",,,,
10 | "Space","Ca",,,,
11 | "Time","2002-2014",,,,
12 | "Grain","County",,,,
13 | "Format","excel",,,,
14 | ,,,,,
15 | ,,,,,
16 | "Section","Resources",,,,
17 | "Header","url","name","schema","Grain","Title"
18 | "Datafile","http://example.com/example1.csv","example1","registered_voters","County","The First Example Data File"
19 | "Datafile","http://example.com/example2.csv","example2","registered_voters","Tract","The Second Example Data File"
20 | "Homepage","https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx",,"Healthy Communities Data and Indicators Project (HCI)",,
21 | "Documentation","https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf",,"Indicator Documentation for Voter Registration / Participation",,
22 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",,,,
23 | ,,,,,
24 | "Section ","Contacts",,"email",,
25 | "Creator","Office of Health Equity",,"HCIOHE@cdph.ca.gov",,
26 | "Wrangler","Eric Busboom",,"eric@civicknowledge.com",,
27 | ,,,,,
28 | "Section ","Notes",,,,
29 | "Note","This file is an example of a data bundle, a simple format for linking data to metadata using spreadsheets. See the specification for more details. ",,,,
30 | "Documentation","https://docs.google.com/document/d/16tb7x73AyF8pJ6e6IBcaIJAioEZCNBGDEksKYTXfdfg/edit#",,,,
31 | ".Title","Data Bundles Packaging Specification",,,,
32 | ,,,,,
33 | "Section","Schema",,"datatype","valuetype","description"
34 | "Table","registered_voters",,,,"HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters"
35 | "Table.Column","reportyear",,"int","year range","Year or years that indicator was reported"
36 | "Table.Column","type",,"str","dimension","Type of record"
37 | "Table.Column","gvid",,"str","gvid","GVid version of the geotype and geotypeval"
38 | "Table.Column","geoname",,"str","label for gvid","Census name of geographic area"
39 | "Table.Column","geotype",,"str","label","Code for type of geographic area"
40 | "Table.Column","geotypevalue",,"str","census","Census geoid code"
41 | "Table.Column","county_fips",,"str","FIPS county code","County FIPS code"
42 | "Table.Column","county_name",,"str","label for counrty_fips","County name"
43 | "Table.Column","region_code",,"str","census code","Numeric code of region"
44 | "Table.Column","region_name",,"str","label for region_code","Name of region"
45 | "Table.Column","raceth",,"str","raceth/civick","Civic Knowledge race / ethnicity code."
46 | "Table.Column","raceth_name",,"str","label for raceeth","Race / Ethnicity Name"
47 | "Table.Column","race_eth_code",,"str","raceth/hci","Race / ethnicity code"
48 | "Table.Column","race_eth_name",,"str","label for race_eth_code","Race / ethnicity name"
49 | "Table.Column","numerator",,"int","count","Adults who are registered to vote, or who voted, depending on type of record"
50 | "Table.Column","denominator",,"int","count","Population of Adults, 18 years or older"
51 | "Table.Column","percent",,"float","percent of numerator over denominator","Percent of adults who are registered to vote, or who voted, depending on type of record"
52 | "Table.Column","ll_95ci",,"float","ci95l for percent","Lower bound of 95% confidence interval"
53 | "Table.Column","ul_95ci",,"float","ci95u for percent","Upper bound of 95% confidence interval"
54 | "Table.Column","se",,"float","se for percent","Standard error"
55 | "Table.Column","rse",,"float","rse for percent","Relative standard error (se/percent * 100) expressed as a percent"
56 | "Table.Column","ca_decile",,"float","decile","Statewide decile ranking"
57 | "Table.Column","ca_rr",,"float","ratio","Ratio of indicator to state average"
58 | "Table.Column","vap",,"float","measure","Voter age population, from CA Department of Finance."
59 | "Table.Column","ind_id",,"str","dimension",
60 | "Table.Column","ind_definition",,"str","dimension",
61 | "Table.Column","version",,"str","other",
62 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/example1-headers.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest",,,,
 2 | "Title","Registered Voters, By County",,,,
 3 | "Name","cdph.ca.gov-hci-registered_voters-county",,,,
 4 | "Description","Percent of the eligible population registered to vote and the percent who voted in statewide elections.",,,,
 5 | "Identifier","cdph.ca.gov-hci-registered_voters-county",,,,
 6 | "Version",201404,,,,
 7 | "Obsoletes","cdph.ca.gov-hci-registered_voters-county-201304",,,,
 8 | "Dataset","voters",,,,
 9 | "Origin","example.com",,,,
10 | "Space","Ca",,,,
11 | "Time","2002-2014",,,,
12 | "Grain","County",,,,
13 | "Format","excel",,,,
14 | ,,,,,
15 | ,,,,,
16 | ,,,,,
17 | "Section","Resources",,,,
18 | "Header","url","name","schema","Grain","Title"
19 | "Datafile","http://example.com/example1.csv","example1","registered_voters","County","The First Example Data File"
20 | "Datafile","http://example.com/example2.csv","example2","registered_voters","Tract","The Second Example Data File"
21 | "Homepage","https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx",,"Healthy Communities Data and Indicators Project (HCI)",,
22 | "Documentation","https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf",,"Indicator Documentation for Voter Registration / Participation",,
23 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",,,,
24 | ,,,,,
25 | "Section ","Contacts",,"email",,
26 | "Creator","Office of Health Equity",,"HCIOHE@cdph.ca.gov",,
27 | "Wrangler","Eric Busboom",,"eric@civicknowledge.com",,
28 | ,,,,,
29 | "Section ","Notes",,,,
30 | "Note","This file is an example of a data bundle, a simple format for linking data to metadata using spreadsheets. See the specification for more details. ",,,,
31 | "Documentation","https://docs.google.com/document/d/16tb7x73AyF8pJ6e6IBcaIJAioEZCNBGDEksKYTXfdfg/edit#",,,,
32 | ".Title","Data Bundles Packaging Specification",,,,
33 | ,,,,,
34 | "Section","Schema",,"datatype","valuetype","description"
35 | "Table","registered_voters",,,,"HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters"
36 | "Table.Column","reportyear",,"int","year range","Year or years that indicator was reported"
37 | "Table.Column","type",,"str","dimension","Type of record"
38 | "Table.Column","gvid",,"str","gvid","GVid version of the geotype and geotypeval"
39 | "Table.Column","geoname",,"str","label for gvid","Census name of geographic area"
40 | "Table.Column","geotype",,"str","label","Code for type of geographic area"
41 | "Table.Column","geotypevalue",,"str","census","Census geoid code"
42 | "Table.Column","county_fips",,"str","FIPS county code","County FIPS code"
43 | "Table.Column","county_name",,"str","label for counrty_fips","County name"
44 | "Table.Column","region_code",,"str","census code","Numeric code of region"
45 | "Table.Column","region_name",,"str","label for region_code","Name of region"
46 | "Table.Column","raceth",,"str","raceth/civick","Civic Knowledge race / ethnicity code."
47 | "Table.Column","raceth_name",,"str","label for raceeth","Race / Ethnicity Name"
48 | "Table.Column","race_eth_code",,"str","raceth/hci","Race / ethnicity code"
49 | "Table.Column","race_eth_name",,"str","label for race_eth_code","Race / ethnicity name"
50 | "Table.Column","numerator",,"int","count","Adults who are registered to vote, or who voted, depending on type of record"
51 | "Table.Column","denominator",,"int","count","Population of Adults, 18 years or older"
52 | "Table.Column","percent",,"float","percent of numerator over denominator","Percent of adults who are registered to vote, or who voted, depending on type of record"
53 | "Table.Column","ll_95ci",,"float","ci95l for percent","Lower bound of 95% confidence interval"
54 | "Table.Column","ul_95ci",,"float","ci95u for percent","Upper bound of 95% confidence interval"
55 | "Table.Column","se",,"float","se for percent","Standard error"
56 | "Table.Column","rse",,"float","rse for percent","Relative standard error (se/percent * 100) expressed as a percent"
57 | "Table.Column","ca_decile",,"float","decile","Statewide decile ranking"
58 | "Table.Column","ca_rr",,"float","ratio","Ratio of indicator to state average"
59 | "Table.Column","vap",,"float","measure","Voter age population, from CA Department of Finance."
60 | "Table.Column","ind_id",,"str","dimension",
61 | "Table.Column","ind_definition",,"str","dimension",
62 | "Table.Column","version",,"str","other",
63 | 


--------------------------------------------------------------------------------
/metatab/appurl.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Civic Knowledge. This file is licensed under the terms of the
  2 | # Revised BSD License, included in this distribution as LICENSE
  3 | 
  4 | """
  5 | 
  6 | """
  7 | 
  8 | from metatab import DEFAULT_METATAB_FILE
  9 | from os.path import basename, join
 10 | from rowgenerators import Url
 11 | from rowgenerators.appurl.file.file import InnerFile
 12 | from rowgenerators.appurl.util import file_ext
 13 | from rowgenerators.appurl.web.web import WebUrl
 14 | 
 15 | class MetatabUrl(InnerFile, Url):
 16 |     match_priority = WebUrl.match_priority - 1
 17 | 
 18 |     simple_file_formats = ('csv', 'txt', 'ipynb')
 19 | 
 20 |     def __init__(self, url=None, downloader=None, **kwargs):
 21 |         kwargs['proto'] = 'metatab'
 22 | 
 23 |         u = Url(url, **kwargs)
 24 | 
 25 |         assert downloader
 26 | 
 27 |         # If there is no file with an extension in the path, assume that this
 28 |         # is a filesystem package, and that the path should have DEFAULT_METATAB_FILE
 29 |         if file_ext(basename(u.path)) not in ('zip', 'xlsx') + self.simple_file_formats:
 30 |             u.path = join(u.path, DEFAULT_METATAB_FILE)
 31 | 
 32 |         super().__init__(str(u), downloader=downloader, **kwargs)
 33 | 
 34 |         self.scheme_extension = 'metatab'
 35 | 
 36 |         if basename(self.path) == DEFAULT_METATAB_FILE:
 37 |             frag = ''
 38 |         elif self.resource_format in self.simple_file_formats:
 39 |             frag = ''
 40 |         elif self.resource_format == 'xlsx':
 41 |             frag = 'meta'
 42 |         elif self.resource_format == 'zip':
 43 |             frag = DEFAULT_METATAB_FILE
 44 | 
 45 |         self.fragment = [frag, None]
 46 | 
 47 |     @classmethod
 48 |     def _match(cls, url, **kwargs):
 49 |         return url.proto == 'metatab'
 50 | 
 51 |     @property
 52 |     def resource_format(self):
 53 | 
 54 |         resource_format = file_ext(basename(self.path))
 55 | 
 56 |         assert resource_format, self.path  # Should have either a definite file, or have added one in __init__
 57 | 
 58 |         return resource_format
 59 | 
 60 |     @property
 61 |     def resource_file(self):
 62 | 
 63 |         assert basename(self.resource_url)
 64 | 
 65 |         return basename(self.resource_url)
 66 | 
 67 |     @property
 68 |     def target_file(self):
 69 |         if self.path.endswith(DEFAULT_METATAB_FILE):
 70 |             return DEFAULT_METATAB_FILE
 71 |         elif self.resource_format in self.simple_file_formats:
 72 |             return self.resource_file
 73 |         elif self.resource_format == 'xlsx':
 74 |             return 'meta'
 75 |         elif self.resource_format == 'zip':
 76 |             return 'metadata.csv'
 77 |         else:
 78 |             return self.resource_file
 79 | 
 80 |     @property
 81 |     def target_format(self):
 82 |         if self.resource_format in self.simple_file_formats:
 83 |             return self.resource_format
 84 |         elif self.resource_format == 'xlsx':
 85 |             return 'xlsx'
 86 |         elif self.resource_format == 'zip':
 87 |             return 'csv'
 88 |         else:
 89 |             return 'csv'
 90 | 
 91 |     @property
 92 |     def doc(self):
 93 |         """Return the metatab document for the URL"""
 94 |         from metatab import MetatabDoc
 95 |         t = self.get_resource().get_target()
 96 |         return MetatabDoc(t.inner)
 97 | 
 98 |     @property
 99 |     def generator(self):
100 | 
101 |         from rowgenerators import get_generator
102 | 
103 |         ##
104 |         ## Hack! This used to be
105 |         ## target = self.get_resource().get_target().inner
106 | 
107 |         target = self.get_resource().get_target()
108 | 
109 |         return get_generator(target)
110 | 
111 |     def get_resource(self):
112 | 
113 |         if self.scheme == 'file':
114 |             u = self
115 |         else:
116 |             u = WebUrl(str(self), downloader=self._downloader).get_resource()
117 | 
118 |         return MetatabUrl(str(u), downloader=self._downloader)
119 | 
120 |     def get_target(self):
121 |         return MetatabUrl(str(self.inner.get_target()), downloader=self._downloader)
122 | 
123 |     def join_target(self, tf):
124 | 
125 |         print("Type=", type(self))
126 | 
127 |         if self.target_file == DEFAULT_METATAB_FILE:
128 |             return self.inner.join_dir(tf)
129 |         else:
130 |             return self.inner.join_target(tf)
131 | 
132 |     def exists(self):
133 |         return self.inner.exists()
134 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/notebooks/SimpleMagicsTest.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# This is the Title of the Notebook"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "And this is the description"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "from metatab.jupyter.script import get_ipython\n",
 26 |     "import pandas as pd\n",
 27 |     "from os.path import exists\n",
 28 |     "from os import remove"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "name": "stdout",
 38 |      "output_type": "stream",
 39 |      "text": [
 40 |       "This is a Bash cell\n"
 41 |      ]
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "%%bash\n",
 46 |     "echo \"This is a Bash cell\"\n",
 47 |     "touch /tmp/footouched"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "assert exists('/tmp/footouched')\n",
 59 |     "remove('/tmp/footouched')\n",
 60 |     "assert not exists('/tmp/footouched')"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 2,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "MagicsTest.ipynb\r\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "!ls"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 2,
 83 |    "metadata": {
 84 |     "collapsed": true
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "%%metatab -p . \n",
 89 |     "Origin: example.com\n",
 90 |     "Dataset: foobar.com \n",
 91 |     "Identifier: de097279-28ef-42f5-a4f5-0eaac53b7dc4\n",
 92 |     "Name: example.com-foobar.com"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "assert mt_pkg.find_first_value('Root.Name') == 'example.com-foobar.com'"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 1,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "20"
115 |       ]
116 |      },
117 |      "execution_count": 1,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "foo = 10\n",
124 |     "bar = 20\n",
125 |     "print(bar)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {
132 |     "collapsed": true
133 |    },
134 |    "outputs": [],
135 |    "source": [
136 |     "print(\"DIsplaying Locals\")\n",
137 |     "print(locals())"
138 |    ]
139 |   }
140 |  ],
141 |  "metadata": {
142 |   "celltoolbar": "Tags",
143 |   "kernelspec": {
144 |    "display_name": "Python 3",
145 |    "language": "python",
146 |    "name": "python3"
147 |   },
148 |   "language_info": {
149 |    "codemirror_mode": {
150 |     "name": "ipython",
151 |     "version": 3
152 |    },
153 |    "file_extension": ".py",
154 |    "mimetype": "text/x-python",
155 |    "name": "python",
156 |    "nbconvert_exporter": "python",
157 |    "pygments_lexer": "ipython3",
158 |    "version": "3.6.1"
159 |   },
160 |   "varInspector": {
161 |    "cols": {
162 |     "lenName": 16,
163 |     "lenType": 16,
164 |     "lenVar": 40
165 |    },
166 |    "kernels_config": {
167 |     "python": {
168 |      "delete_cmd_postfix": "",
169 |      "delete_cmd_prefix": "del ",
170 |      "library": "var_list.py",
171 |      "varRefreshCmd": "print(var_dic_list())"
172 |     },
173 |     "r": {
174 |      "delete_cmd_postfix": ") ",
175 |      "delete_cmd_prefix": "rm(",
176 |      "library": "var_list.r",
177 |      "varRefreshCmd": "cat(var_dic_list()) "
178 |     }
179 |    },
180 |    "types_to_exclude": [
181 |     "module",
182 |     "function",
183 |     "builtin_function_or_method",
184 |     "instance",
185 |     "_Feature"
186 |    ],
187 |    "window_display": false
188 |   }
189 |  },
190 |  "nbformat": 4,
191 |  "nbformat_minor": 2
192 | }
193 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/scripts/complex-text.txt:
--------------------------------------------------------------------------------
  1 | Identifier: 47bc1089-7584-41f0-b804-602ec42f1249
  2 | Origin: civicknowledge.com
  3 | Dataset: rcfe_affordability
  4 | Version: 4
  5 | Time: 2015
  6 | Name: civicknowledge.com-rcfe_affordability-2015-4
  7 | 
  8 | Section: Contacts
  9 | Wrangler: Eric Busboom
 10 | Wrangler.Email: eric@civicknowledge.com
 11 | Wrangler.Organization: Civic Knowledge
 12 | 
 13 | Section: References
 14 | 
 15 | Reference: censusreporter:B09020/140/05000US06073
 16 | Reference.Name: B09020
 17 | Reference.Description: Relationship by Household Type (Including Living Alone) for Population 65 Years and Over
 18 | 
 19 | Reference: censusreporter:B25007/140/05000US06073
 20 | Reference.Name: B25007
 21 | Reference.Description: Tenure by Age of Householder
 22 | 
 23 | #
 24 | # Household Income
 25 | #
 26 | 
 27 | Reference: censusreporter:B19049/140/05000US06073
 28 | Reference.Name: B19049
 29 | Reference.Description: Median Household Income by Age of Householder
 30 | 
 31 | 
 32 | # For whole county
 33 | 
 34 | Reference: censusreporter:B19049/050/05000US06073
 35 | Reference.Name: B19049_county
 36 | Reference.Description: Median Household Income by Age of Householder
 37 | 
 38 | #
 39 | # Home value distributions, by tract
 40 | #
 41 | 
 42 | Reference: censusreporter:B25076/140/05000US06073
 43 | Reference.Name: B25076
 44 | Reference.Description: Lower Value Quartile (Dollars)
 45 | 
 46 | Reference: censusreporter:B25077/140/05000US06073
 47 | Reference.Name: B25077
 48 | Reference.Description: Median Value
 49 | 
 50 | Reference: censusreporter:B25078/140/05000US06073
 51 | Reference.Name: B25078
 52 | Reference.Description: Upper Value Quartile (Dollars)
 53 | 
 54 | #
 55 | # Home  value distributions, for SD County
 56 | #
 57 | Reference: censusreporter:B25076/050/05000US06073
 58 | Reference.Name: B25076_county
 59 | Reference.Description: Lower Value Quartile (Dollars)
 60 | 
 61 | Reference: censusreporter:B25077/050/05000US06073
 62 | Reference.Name: B25077_county
 63 | Reference.Description: Median Value
 64 | 
 65 | Reference: censusreporter:B25078/050/05000US06073
 66 | Reference.Name: B25078_county
 67 | Reference.Description: Upper Value Quartile (Dollars)
 68 | 
 69 | #
 70 | # Tract crosswalk
 71 | #
 72 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tract-sra-msa-xwalk
 73 | Reference.Name: tracts
 74 | Reference.Description: Crosswalk between crosswalks, tracts, zip codes and SRAs in San Diego County
 75 | 
 76 | #
 77 | # Tract boundaries
 78 | #
 79 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tracts
 80 | Reference.Name: tracts_geo
 81 | Reference.Description: Geographics Boundaries for Tracts
 82 | 
 83 | #
 84 | # SRA boundaries
 85 | #
 86 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#sra
 87 | Reference.Name: sra_geo
 88 | Reference.Description: Geographics Boundaries for SRAs
 89 | 
 90 | #
 91 | # IPUMS Housing and Income Data
 92 | #
 93 | # Need to use the ZIP version b/c we need to import the Python Code
 94 | Reference: metatab+http://s3.amazonaws.com/library.metatab.org/ipums.org-income_homevalue-5.zip#income_homeval
 95 | Reference.Name: incv
 96 | Reference.Description: Income and Home value records from IPUMS for San Diego County
 97 | Section: Resources
 98 | 
 99 | 
100 | Section: Bibliography
101 | Citation: ipums
102 | Citation.Type: dataset
103 | Citation.Author: Steven Ruggles; Katie Genadek; Ronald Goeken; Josiah Grover; Matthew Sobek
104 | Citation.Title: Integrated Public Use Microdata Series
105 | Citation.Year: 2017
106 | Citation.Publisher: University of Minnesota
107 | Citation.Version: 7.0
108 | Citation.AccessDate: 20170718
109 | Citation.Url: https://usa.ipums.org/usa/index.shtml
110 | Citation.Doi: https://doi.org/10.18128/D010.V7.0
111 | 
112 | Citation: bordley
113 | Citation.Type: article
114 | Citation.Author: Robert F. Bordley; James B. McDonald; Anand Mantrala
115 | Citation.Title: Something New, Something Old: Parametric Models for the Size of Distribution of Income
116 | Citation.Year: 1997
117 | Citation.Month: June
118 | Citation.Journal: Journal of Income Distribution
119 | Citation.Volume: 6
120 | Citation.Number: 1
121 | Citation.Pages: 5-5
122 | Citation.Url: https://ideas.repec.org/a/jid/journl/y1997v06i1p5-5.html
123 | 
124 | Citation: mcdonald
125 | Citation.Type: article
126 | Citation.Author: McDonald, James B.;  Mantrala, Anand
127 | Citation.Title: The distribution of personal income: Revisited
128 | Citation.Journal: Journal of Applied Econometrics
129 | Citation.Volume: 10
130 | Citation.Number: 2
131 | Citation.Publisher: Wiley Subscription Services, Inc., A Wiley Company
132 | Citation.Issn: 1099-1255
133 | Citation.Doi: 10.1002/jae.3950100208
134 | Citation.Pages: 201--204,
135 | Citation.Year: 1995
136 | 
137 | Citation: majumder
138 | Citation.Type: article
139 | Citation.Author: Majumder, Amita; Chakravarty, Satya Ranjan
140 | Citation.Title: Distribution of personal income: Development of a new model and its application to U.S. income data
141 | Citation.Journal: Journal of Applied Econometrics
142 | Citation.Volume: 5
143 | Citation.Number: 2
144 | Citation.Publisher: Wiley Subscription Services, Inc., A Wiley Company
145 | Citation.Issn: 1099-1255
146 | Citation.Doi: 10.1002/jae.3950050206
147 | Citation.Pages: 189--196
148 | Citation.Year: 1990


--------------------------------------------------------------------------------
/docs/PrivateDatasets.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Private Datasets
  3 | ================
  4 | 
  5 | Datasets that should be protected from unauthorized access can be written to S3 with a private ACL and access using S3 credentials. To use private datasets:
  6 | 
  7 | - Use the **metaaws** program to setup an S3 bucket with a policy and users
  8 | - Add a ``Root.Access`` term to the dataset's metatab document.
  9 | - Syncronize the dataset to s3 with **metasync**
 10 | - Setup credentials for an S3 user
 11 | - Access the dataset using an S3 url.
 12 | 
 13 | Setup The S3 Bucket
 14 | -------------------
 15 | 
 16 | Suppose we want to store datasets in a bucket ``bucket.example.com``. After creating the bucjet, initialize it with subdirectories and policies with the **metaaws**  program.
 17 | 
 18 | .. code-block:: bash
 19 | 
 20 |     $ metaaws init-bucket bucket.example.com
 21 | 
 22 | 
 23 | 
 24 | Configure and Sync a Dataset
 25 | ----------------------------
 26 | 
 27 | To make a dataset private,  add a ``Root.Access`` term to the ``Root`` section, with  a value of ``private``
 28 | 
 29 | 
 30 | 
 31 | Create S3 Users
 32 | ---------------
 33 | 
 34 | Use the **metaaws**  program to create users and add permissions to the bucket. First, initialize a bucket with the apprpriate policies:
 35 | 
 36 | .. code-block:: bash
 37 | 
 38 |     $ metaaws init-bucket bucket.example.com
 39 | 
 40 | Then, create a new user.
 41 | 
 42 | .. code-block:: bash
 43 | 
 44 |     $ metaaws new-user foobar
 45 |     Created user : foobar
 46 |     arn          : arn:aws:iam::095555823111:user/metatab/foobar
 47 |     Access Key   : AKIAJXMFAP3X5TRYYQ5Q
 48 |     Secret Key   : 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
 49 | 
 50 | The secret key and access key should be given to the user, to set up as according to the next
 51 |  section.
 52 | 
 53 | Setup S3 Credentials
 54 | --------------------
 55 | 
 56 | The access and secret keys should be stored in a boto configuration file, such as ``~/.aws/credentials``. See
 57 | the `boto3 configuration documentation <http://boto3.readthedocs.io/en/latest/guide/configuration.html>`_ for details. Here is an example of a ``credentials`` file
 58 | 
 59 | .. code-block::
 60 | 
 61 |     [default]
 62 |     aws_access_key_id = AKIAJXMFAP3X5TRYYQ5Q
 63 |     aws_secret_access_key = b81zw4LRDKVILzrZbS0B8KMn88xbY9BEEnwzKrz2
 64 | 
 65 | 
 66 | If you have multiple credentials, you can put them in different sections by changing ``[default]`` to the name of another profile. For instance, here is a credentials file with a default and alternate profile:
 67 | 
 68 | .. code-block::
 69 | 
 70 |     [default]
 71 |     aws_access_key_id = AKIAJXMFAP3X5TRYYQ5Q
 72 |     aws_secret_access_key = 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
 73 |     [fooprofile]
 74 |     aws_access_key_id = AKIAX5TRYYQ5QJXMFAP3
 75 |     aws_secret_access_key = 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
 76 | 
 77 | To use the alternate credentials with the ``metasync`` program, use the ``-p`` option:
 78 | 
 79 | .. code-block:: bash
 80 | 
 81 |     $ metasync -p fooprofile -S library.metatab.org
 82 | 
 83 | To use the alternate credentials with the ``open_package()`` function, you will need to set them in the shell before you run any programs. The ``metasync -C`` program will display the credentials in a form that can be shell eval'd, and the ``-p`` option can select an alternate profile.
 84 | 
 85 | .. code-block:: bash
 86 | 
 87 |     $ metasync -C -p fooprofile
 88 |     export AWS_ACCESS_KEY_ID=AKIAX5TRYYQ5QJXMFAP3
 89 |     export AWS_SECRET_ACCESS_KEY=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
 90 |     # Run  'eval $(metasync -C -p fooprofile )' to configure credentials in a shell
 91 | 
 92 | The last line of the output shows the command to run to set the credentials in the shell:
 93 | 
 94 | .. code-block:: bash
 95 | 
 96 |     $ eval $(metasync -C -p fooprofile )
 97 | 
 98 | Setting credentials in the shell is only required if you access the private dataset via ``open_package()`` although it should also work when using the ``metasync`` and ``metapack`` program.
 99 | 
100 | Using Private Files
101 | -------------------
102 | 
103 | Private files can't be easily downloaded using a web browser, but there are a few other ways to fetch them.
104 | 
105 | * Use an S3 client, such as CyberDuck, S3 Browser, CloudBerry or S3 Tools.
106 | * Use the ``metapack`` program to dump a CSV file.
107 | 
108 | To use the matpack program, first list the resources in the remote package:
109 | 
110 | .. code-block:: bash
111 | 
112 |     $ metapack -r s3://library.civicknowledge.com/private/carr/civicknowledge.com-rcfe_health-1.csv
113 |     seniors s3://library.civicknowledge.com/private/carr/civicknowledge.com-rcfe_health-1/data/seniors.csv
114 |     rcfe_tract s3://library.civicknowledge.com/private/carr/civicknowledge.com-rcfe_health-1/data/rcfe_tract.csv
115 |     rcfe_sra s3://library.civicknowledge.com/private/carr/civicknowledge.com-rcfe_health-1/data/rcfe_sra.csv
116 |     rcfe_seniors_tract s3://library.civicknowledge.com/private/carr/civicknowledge.com-rcfe_health-1/data/rcfe_seniors_tract.csv
117 | 
118 | Then, run the same command again, but appending a fragment to the url, and redirecting to a csv file. For instance, for the 'seniors' file, append ``#seniors`` to the url:
119 | 
120 | 
121 | .. code-block:: bash
122 | 
123 |     $ metapack -r s3://.../civicknowledge.com-rcfe_health-1.csv#seniors > seniors.csv
124 | 
125 | You can also fetch the entire data package, downloading all of the data files, by creating a local file system, zip or excel package. The easiest to use is the Filesystem package, created with ``metapack -f``
126 | 
127 | .. code-block:: bash
128 | 
129 |     $ metapack -f s3://.../civicknowledge.com-rcfe_health-1.csv
130 | 
131 | The command will create a complete data package with unpacked CSV files in the ``_packages`` subdirectory. 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/example1.txt:
--------------------------------------------------------------------------------
  1 | Declare: metatab-latest
  2 | Title: Registered Voters, By County
  3 | Name: cdph.ca.gov-hci-registered_voters-county
  4 | Description: Percent of the eligible population registered to vote and the percent who voted in statewide elections.
  5 | Identifier: cdph.ca.gov-hci-registered_voters-county
  6 | Version: 201404
  7 | Obsoletes: cdph.ca.gov-hci-registered_voters-county-201304
  8 | Format: excel
  9 | Spatial: California <04000US06>
 10 | Time: 2002-2014
 11 | Spatialgrain: County <05000US>
 12 | Section: Resources
 13 | Datafile: http://example.com/example1.csv
 14 | Datafile.Name: example1
 15 | Datafile.Schema: registered_voters
 16 | Datafile.Grain: County
 17 | Datafile.Title: The First Example Data File
 18 | Datafile: http://example.com/example2.csv
 19 | Datafile.Name: example2
 20 | Datafile.Schema: registered_voters
 21 | Datafile.Grain: Tract
 22 | Datafile.Title: The Second Example Data File
 23 | Homepage: https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx
 24 | Homepage.Schema: Healthy Communities Data and Indicators Project (HCI)
 25 | Documentation: https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf
 26 | Documentation.Schema: Indicator Documentation for Voter Registration / Participation
 27 | Documentation.Description: Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections
 28 | Section: Contacts
 29 | Creator: Office of Health Equity
 30 | Creator.Email: HCIOHE@cdph.ca.gov
 31 | Wrangler: Eric Busboom
 32 | Wrangler.Email: eric@civicknowledge.com
 33 | Section: Notes
 34 | Note: This file is an example of a data bundle, a simple format for linking data to metadata using spreadsheets. See the specification for more details.
 35 | Documentation: https://docs.google.com/document/d/16tb7x73AyF8pJ6e6IBcaIJAioEZCNBGDEksKYTXfdfg/edit#
 36 | Documentation.Title: Data Bundles Packaging Specification
 37 | Section: Schema
 38 | Table: registered_voters
 39 | Table.Description: HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters
 40 | Table.Column: reportyear
 41 | Column.Datatype: int
 42 | Column.Valuetype: year range
 43 | Column.Description: Year or years that indicator was reported
 44 | Table.Column: type
 45 | Column.Datatype: str
 46 | Column.Valuetype: dimension
 47 | Column.Description: Type of record
 48 | Table.Column: gvid
 49 | Column.Datatype: str
 50 | Column.Valuetype: gvid
 51 | Column.Description: GVid version of the geotype and geotypeval
 52 | Table.Column: geoname
 53 | Column.Datatype: str
 54 | Column.Valuetype: label for gvid
 55 | Column.Description: Census name of geographic area
 56 | Table.Column: geotype
 57 | Column.Datatype: str
 58 | Column.Valuetype: label
 59 | Column.Description: Code for type of geographic area
 60 | Table.Column: geotypevalue
 61 | Column.Datatype: str
 62 | Column.Valuetype: census
 63 | Column.Description: Census geoid code
 64 | Table.Column: county_fips
 65 | Column.Datatype: str
 66 | Column.Valuetype: FIPS county code
 67 | Column.Description: County FIPS code
 68 | Table.Column: county_name
 69 | Column.Datatype: str
 70 | Column.Valuetype: label for counrty_fips
 71 | Column.Description: County name
 72 | Table.Column: region_code
 73 | Column.Datatype: str
 74 | Column.Valuetype: census code
 75 | Column.Description: Numeric code of region
 76 | Table.Column: region_name
 77 | Column.Datatype: str
 78 | Column.Valuetype: label for region_code
 79 | Column.Description: Name of region
 80 | Table.Column: raceth
 81 | Column.Datatype: str
 82 | Column.Valuetype: raceth/civick
 83 | Column.Description: Civic Knowledge race / ethnicity code.
 84 | Table.Column: raceth_name
 85 | Column.Datatype: str
 86 | Column.Valuetype: label for raceeth
 87 | Column.Description: Race / Ethnicity Name
 88 | Table.Column: race_eth_code
 89 | Column.Datatype: str
 90 | Column.Valuetype: raceth/hci
 91 | Column.Description: Race / ethnicity code
 92 | Table.Column: race_eth_name
 93 | Column.Datatype: str
 94 | Column.Valuetype: label for race_eth_code
 95 | Column.Description: Race / ethnicity name
 96 | Table.Column: numerator
 97 | Column.Datatype: int
 98 | Column.Valuetype: count
 99 | Column.Description: Adults who are registered to vote, or who voted, depending on type of record
100 | Table.Column: denominator
101 | Column.Datatype: int
102 | Column.Valuetype: count
103 | Column.Description: Population of Adults, 18 years or older
104 | Table.Column: percent
105 | Column.Datatype: float
106 | Column.Valuetype: percent of numerator over denominator
107 | Column.Description: Percent of adults who are registered to vote, or who voted, depending on type of record
108 | Table.Column: ll_95ci
109 | Column.Datatype: float
110 | Column.Valuetype: ci95l for percent
111 | Column.Description: Lower bound of 95% confidence interval
112 | Table.Column: ul_95ci
113 | Column.Datatype: float
114 | Column.Valuetype: ci95u for percent
115 | Column.Description: Upper bound of 95% confidence interval
116 | Table.Column: se
117 | Column.Datatype: float
118 | Column.Valuetype: se for percent
119 | Column.Description: Standard error
120 | Table.Column: rse
121 | Column.Datatype: float
122 | Column.Valuetype: rse for percent
123 | Column.Description: Relative standard error (se/percent * 100) expressed as a percent
124 | Table.Column: ca_decile
125 | Column.Datatype: float
126 | Column.Valuetype: decile
127 | Column.Description: Statewide decile ranking
128 | Table.Column: ca_rr
129 | Column.Datatype: float
130 | Column.Valuetype: ratio
131 | Column.Description: Ratio of indicator to state average
132 | Table.Column: vap
133 | Column.Datatype: float
134 | Column.Valuetype: measure
135 | Column.Description: Voter age population, from CA Department of Finance.
136 | Table.Column: ind_id
137 | Column.Datatype: str
138 | Column.Valuetype: dimension
139 | Table.Column: ind_definition
140 | Column.Datatype: str
141 | Column.Valuetype: dimension
142 | Table.Column: version
143 | Column.Datatype: str
144 | Column.Valuetype: other
145 | 


--------------------------------------------------------------------------------
/metatab/rowgen.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Civic Knowledge. This file is licensed under the terms of the
  2 | # MIT License, included in this distribution as LICENSE.txt
  3 | 
  4 | """ """
  5 | from rowgenerators import Source
  6 | from rowgenerators.source import Source
  7 | from rowgenerators import SourceError
  8 | 
  9 | class YamlMetatabSource(Source):
 10 |     """Turn a metatab-formated YAML file into Metatab rows."""
 11 | 
 12 |     def __init__(self, ref, table=None, cache=None, working_dir=None, env=None, **kwargs):
 13 |         super().__init__(ref, cache, working_dir, **kwargs)
 14 | 
 15 |         self.url = ref
 16 |         self.section_map = {}
 17 |         self.sections = {}
 18 | 
 19 |     def yield_dict(self, doc, d, parent=None):
 20 | 
 21 |         for k, v in d.items():
 22 | 
 23 |             tn = "{}.{}".format((parent or 'Root').split('.')[-1], k).lower()
 24 |             t = doc.decl_terms.get(tn,{})
 25 |             vtn = t.get('termvaluename','').lower()
 26 | 
 27 |             if isinstance(v, list):
 28 |                 for e in v:
 29 |                     try:
 30 |                         value = e[vtn]
 31 |                         del e[vtn]
 32 |                         yield (tn, value, parent)
 33 |                     except KeyError:
 34 |                         pass
 35 | 
 36 |                     yield from self.yield_dict(doc, e, tn)
 37 |             elif isinstance(v, dict):
 38 |                 yield from self.yield_dict(doc, v, tn)
 39 |             else:
 40 |                 yield (tn,v, parent)
 41 | 
 42 | 
 43 |     def __iter__(self):
 44 |         """Iterate over all of the lines in the file"""
 45 | 
 46 |         import yaml
 47 |         from metatab import MetatabDoc
 48 | 
 49 |         with open(self.url.fspath) as f:
 50 |             d = yaml.load(f)
 51 | 
 52 |         decl = d.get('declare', 'metatab-latest')
 53 | 
 54 |         doc = MetatabDoc(decl=decl)
 55 | 
 56 |         #yield from doc.rows
 57 | 
 58 |         section_names = ['root','contacts','documentation','resources','references','schema']
 59 | 
 60 |         for section_name in section_names:
 61 |             section =  doc.decl_sections[section_name]
 62 |             #print(section_name, section)
 63 | 
 64 |             for tn in  section.get('terms',[]):
 65 |                 self.section_map[tn.lower()] = section_name
 66 | 
 67 |             self.sections[section_name] = doc.get_or_new_section(section_name, section['args'])
 68 | 
 69 |         last_section = None
 70 |         last_term = { }
 71 |         for term_name, value, parent in self.yield_dict(doc, d):
 72 | 
 73 |             print(term_name, value, parent)
 74 | 
 75 |             section = self.sections.get(self.section_map.get(term_name) or 'root')
 76 | 
 77 |             if parent is None:
 78 |                 term = section.new_term(term_name, value)
 79 |             else:
 80 | 
 81 |                 parent_term = last_term[parent]
 82 |                 term = parent_term.new_child(term_name, value)
 83 | 
 84 |             last_term[term_name] = term
 85 | 
 86 | 
 87 | 
 88 | 
 89 |         yield from doc.rows
 90 | 
 91 | 
 92 | class MetatabRowGenerator(Source):
 93 |     """An object that generates rows. The current implementation mostly just a wrapper around
 94 |     csv.reader, but it adds a path property so term interperters know where the terms are coming from
 95 |     """
 96 | 
 97 |     def __init__(self, ref, cache=None, working_dir=None, path = None, **kwargs):
 98 |         super().__init__(ref, cache, working_dir, **kwargs)
 99 | 
100 |         self._rows = ref
101 |         self._path = path or '<none>'
102 | 
103 |     @property
104 |     def path(self):
105 |         return self._path
106 | 
107 |     def open(self):
108 |         pass
109 | 
110 |     def close(self):
111 |         pass
112 | 
113 |     def __iter__(self):
114 |         for row in self._rows:
115 |             yield row
116 | 
117 | 
118 | class TextRowGenerator(MetatabRowGenerator):
119 |     """Return lines of text of a line-oriented metatab file, breaking them to be used as Metatab rows.
120 |     This is the core of the Lines format implementation"""
121 | 
122 |     def __init__(self, ref, cache=None, working_dir=None, path = None, **kwargs):
123 |         super().__init__(ref, cache, working_dir, path, **kwargs)
124 | 
125 |         while True:
126 | 
127 |             try:
128 |                 # Pathlib Path
129 |                 with ref.open() as r:
130 |                     text = r.read()
131 |                 break
132 |             except:
133 |                 pass
134 | 
135 |             try:
136 |                 # Filehandle
137 |                 text = ref.read()
138 |                 break
139 |             except:
140 |                 pass
141 | 
142 |             try:
143 |                 # Url
144 |                 with ref.inner.fspath.open() as f:
145 |                     text = f.read()
146 |                 break
147 |             except:
148 | 
149 |                 pass
150 | 
151 |             try:
152 |                 # File name
153 |                 with open(ref) as r:
154 |                     text = r.read()
155 |                 break
156 |             except:
157 |                 pass
158 | 
159 |             try:
160 |                 text = ref
161 |                 text.splitlines()
162 |                 break
163 |             except AttributeError:
164 |                 pass
165 | 
166 | 
167 |             raise SourceError("Can't handle ref of type {}".format(type(ref)))
168 | 
169 |         self._text = text
170 |         self._text_lines = text.splitlines()
171 |         self._path = path or '<none>'
172 | 
173 |     @property
174 |     def path(self):
175 |         return self._path
176 | 
177 |     def open(self):
178 |         pass
179 | 
180 |     def close(self):
181 |         pass
182 | 
183 |     def __iter__(self):
184 |         import re
185 | 
186 |         for row in self._text_lines:
187 |             if re.match(r'^\s*#', row):  # Skip comments
188 |                 continue
189 | 
190 |             # Special handling for ====, which implies a section:
191 |             #   ==== Schema
192 |             # is also
193 |             #   Section: Schema
194 | 
195 |             if row.startswith('===='):
196 |                 row = re.sub(r'^=*','Section:', row)
197 | 
198 |             row = [e.strip() for e in row.split(':', 1)]
199 | 
200 |             # Pipe characters seperate columns
201 |             if len(row) > 1:
202 |                 row = [row[0]] + [ e.replace('\|','|') for e in re.split(r'(?<!\\)\|', row[1]) ]
203 | 
204 |             yield row


--------------------------------------------------------------------------------
/metatab/test/test-data/properties.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest",,,,,,,,,,,,,
 2 | "Identifier","47bc1089-7584-41f0-b804-602ec42f1249",,,,,,,,,,,,,
 3 | "Origin","civicknowledge.com",,,,,,,,,,,,,
 4 | "Dataset","rcfe_affordability",,,,,,,,,,,,,
 5 | "Version",2,,,,,,,,,,,,,
 6 | "Time",2015,,,,,,,,,,,,,
 7 | "Name","civicknowledge.com-rcfe_affordability-2015-2",,,,,,,,,,,,,
 8 | "Title","San Diego RCFE Affordability Index",,,,,,,,,,,,,
 9 | "Description","A collection of synthetic datasets that estimate the number of households in San Diego County with one or more seniors which can afford 22 months of residential care.",,,,,,,,,,,,,
10 | ,,,,,,,,,,,,,,
11 | "Section","Contacts","Email",,,,,,,,,,,,
12 | "Wrangler","Eric Busboom","eric@civicknowledge.com",,,,,,,,,,,,
13 | "Wrangler.Organization","Civic Knowledge",,,,,,,,,,,,,
14 | "Wrangler.Tel","555-555-1234",,,,,,,,,,,,,
15 | "Wrangler.Url","http://civicknowledge.com",,,,,,,,,,,,,
16 | ,,,,,,,,,,,,,,
17 | "Section","References","Name","Title","Description","StartLine","HeaderLines","Encoding",,,,,,,
18 | "Reference","censusreporter:B09020/140/05000US06073","B09020",,"Relationship by Household Type (Including Living Alone) for Population 65 Years and Over",,,,,,,,,,
19 | "Reference","censusreporter:B25007/140/05000US06073","B25007",,"Tenure by Age of Householder",,,,,,,,,,
20 | "Reference","censusreporter:B19049/140/05000US06073","B19049",,"Median Household Income by Age of Householder",,,,,,,,,,
21 | ,,,,,,,,,,,,,,
22 | "Section","Resources","Name","Title","Description","StartLine","HeaderLines","Encoding",,,,,,,
23 | "Datafile","file:data/affordability.csv","affordability","Number of seniors who can afford RCFE care. Full final dataset with all columns",,,,,,,,,,,
24 | "Datafile","file:data/afford_tracts.csv","afford_tracts","Number of seniors who can afford RCFE care by tract",,,,,,,,,,,
25 | ,,,,,,,,,,,,,,
26 | "Section","Bibliography","Type","Month","Publisher","Journal","Version","Volume","Number","Pages","AccessDate","Location","Url","Doi","Issn"
27 | "Citation","ipums","dataset",,"University of Minnesota",,7,,,,20170718,,"https://usa.ipums.org/usa/index.shtml","https://doi.org/10.18128/D010.V7.0",
28 | ".Author","Steven Ruggles; Katie Genadek; Ronald Goeken; Josiah Grover; Matthew Sobek",,,,,,,,,,,,,
29 | ".Title","Integrated Public Use Microdata Series",,,,,,,,,,,,,
30 | ".Year",2017,,,,,,,,,,,,,
31 | "Citation","bordley","article","June",,"Journal of Income Distribution",,6,1,"5-5",,,"https://ideas.repec.org/a/jid/journl/y1997v06i1p5-5.html",,
32 | "Citation","mcdonald","article",,"Wiley Subscription Services, Inc., A Wiley Company","Journal of Applied Econometrics",,10,2,"201--204,",,,,"10.1002/jae.3950100208","1099-1255"
33 | "Citation","majumder","article",,"Wiley Subscription Services, Inc., A Wiley Company","Journal of Applied Econometrics",,5,2,"189--196",,,,"10.1002/jae.3950050206","1099-1255"
34 | ,,,,,,,,,,,,,,
35 | "Section","Schema","DataType","ValueType","Description",,,,,,,,,,
36 | "Table","affordability",,,,,,,,,,,,,
37 | "Table.Column","Index","text",,,,,,,,,,,,
38 | "Column.Altname","index",,,,,,,,,,,,,
39 | "Table.Column","senior_pop","integer",,,,,,,,,,,,
40 | "Table.Column","senior_pop_m90","integer",,,,,,,,,,,,
41 | "Table.Column","homeown_65","integer",,,,,,,,,,,,
42 | "Table.Column","homeown_65_m90","number",,,,,,,,,,,,
43 | "Table.Column","renters_65","integer",,,,,,,,,,,,
44 | "Table.Column","median_income","number",,,,,,,,,,,,
45 | "Table.Column","median_income_25_m90","number",,,,,,,,,,,,
46 | "Table.Column","hv_25","integer",,,,,,,,,,,,
47 | "Table.Column","hv_25_m90","number",,,,,,,,,,,,
48 | "Table.Column","hv_50","integer",,,,,,,,,,,,
49 | "Table.Column","hv_50_m90","number",,,,,,,,,,,,
50 | "Table.Column","hv_75","number",,,,,,,,,,,,
51 | "Table.Column","hv_75_m90","number",,,,,,,,,,,,
52 | "Table.Column","tract","number",,,,,,,,,,,,
53 | "Table.Column","tract_census_geoid","integer",,,,,,,,,,,,
54 | "Table.Column","sra","integer",,,,,,,,,,,,
55 | "Table.Column","sra_name","text",,,,,,,,,,,,
56 | "Table.Column","msa","integer",,,,,,,,,,,,
57 | "Table.Column","msa_name","text",,,,,,,,,,,,
58 | "Table.Column","lon","number",,,,,,,,,,,,
59 | "Table.Column","lat","number",,,,,,,,,,,,
60 | "Table.Column","can_afford","integer",,,,,,,,,,,,
61 | "Table.Column","counts","integer",,,,,,,,,,,,
62 | "Table.Column","cant_afford","integer",,,,,,,,,,,,
63 | "Table.Column","afford_pct","integer",,,,,,,,,,,,
64 | "Table","afford_tracts",,,,,,,,,,,,,
65 | "Table.Column","Index","text",,,,,,,,,,,,
66 | "Column.Altname","index",,,,,,,,,,,,,
67 | "Table.Column","senior_pop","integer",,"Number of seniors, aged 65+",,,,,,,,,,
68 | "Table.Column","can_afford","integer",,"Number of seniors who can afford RCFE care",,,,,,,,,,
69 | "Table.Column","afford_pct","integer",,"Percentage of seniors who can afford RCFE care",,,,,,,,,,
70 | "Table.Column","score","integer",,"Affordability Score",,,,,,,,,,
71 | "Table","afford_sra",,,,,,,,,,,,,
72 | "Table.Column","Index","text",,,,,,,,,,,,
73 | "Column.Altname","index",,,,,,,,,,,,,
74 | "Table.Column","can_afford","integer",,"Number of seniors who can afford RCFE care",,,,,,,,,,
75 | "Table.Column","senior_pop","integer",,"Number of seniors, aged 65+",,,,,,,,,,
76 | "Table.Column","afford_pct","number",,"Percentage of seniors who can afford RCFE care",,,,,,,,,,
77 | "Table.Column","score","number",,"Affordability Score",,,,,,,,,,
78 | "Table","afford_msa",,,,,,,,,,,,,
79 | "Table.Column","Index","text",,,,,,,,,,,,
80 | "Column.Altname","index",,,,,,,,,,,,,
81 | "Table.Column","can_afford","integer",,"Number of seniors who can afford RCFE care",,,,,,,,,,
82 | "Table.Column","senior_pop","integer",,"Number of seniors, aged 65+",,,,,,,,,,
83 | "Table.Column","afford_pct","integer",,"Percentage of seniors who can afford RCFE care",,,,,,,,,,
84 | "Table.Column","score","integer",,"Affordability Score",,,,,,,,,,
85 | ,,,,,,,,,,,,,,
86 | "Section","Documentation","Name","Title","Description",,,,,,,,,,
87 | "Documentation","docs/notebook.html","notebook.html","Jupyter Notebook (HTML)",,,,,,,,,,,
88 | "Image","docs/image_7_0.png",,"Image for HTML Documentation",,,,,,,,,,,
89 | "Image","docs/image_9_0.png",,"Image for HTML Documentation",,,,,,,,,,,
90 | "Image","docs/image_25_0.png",,"Image for HTML Documentation",,,,,,,,,,,
91 | "Image","docs/image_33_0.png",,"Image for HTML Documentation",,,,,,,,,,,
92 | "Image","docs/image_34_0.png",,"Image for HTML Documentation",,,,,,,,,,,
93 | "Image","docs/image_35_0.png",,"Image for HTML Documentation",,,,,,,,,,,
94 | "Image","docs/image_36_0.png",,"Image for HTML Documentation",,,,,,,,,,,
95 | "Documentation","docs/documentation.md",,"Documentation (Markdown)",,,,,,,,,,,
96 | "Documentation","docs/documentation.html",,"Primary Documentation (HTML)",,,,,,,,,,,
97 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/almost-everything.csv:
--------------------------------------------------------------------------------
 1 | "Declare","metatab-latest",,,,,,,,,
 2 | "Title","Almost Everything",,,,,,,,,
 3 | "Name","cdph.ca.gov-hci-registered_voters-county",,,,,,,,,
 4 | "Description","Almost all of the terms",,,,,,,,,
 5 | "Identifier","9FC11204-B291-4E0E-A841-5372090ADEC0",,,,,,,,,
 6 | "Version",56,,,,,,,,,
 7 | "Obsoletes","previous-almost-everything",,,,,,,,,
 8 | "Format","excel",,,,,,,,,
 9 | ,,,,,,,,,,
10 | "Origin","example.com",,,,,,,,,
11 | "Time",2017,,,,,,,,,
12 | "Space","CA",,,,,,,,,
13 | ,,,,,,,,,,
14 | "Section","Resources",,,,,,,,,
15 | "Header","url","name","schema","Grain","Title","Description",,,,
16 | "Datafile","http://example.com/example1.csv","example1","registered_voters","County","The First Example Data File",,,,,
17 | "Datafile","http://example.com/example2.csv","example2","registered_voters","Tract","The Second Example Data File",,,,,
18 | "Reference","censusreporter:B09020/140/05000US06073","B09020",,,,"Relationship by Household Type (Including Living Alone) for Population 65 Years and Over",,,,
19 | "Reference","metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tract-sra-msa-xwalk","tracts",,,,"Crosswalk between crosswalks, tracts, zip codes and SRAs in San Diego County",,,,
20 | "Homepage","https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx",,"Healthy Communities Data and Indicators Project (HCI)",,,,,,,
21 | "Documentation","https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf",,"Indicator Documentation for Voter Registration / Participation",,,,,,,
22 | ".description","Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",,,,,,,,,
23 | ,,,,,,,,,,
24 | "Section","References","Name","Title","Description",,,,,,
25 | "Reference","censusreporter:B25007/140/05000US06073","B25007",,"Tenure by Age of Householder",,,,,,
26 | "Reference","censusreporter:B19049/140/05000US06073","B19049",,"Median Household Income by Age of Householder",,,,,,
27 | "Reference","censusreporter:B19049/050/05000US06073","B19049_county",,"Median Household Income by Age of Householder",,,,,,
28 | ,,,,,,,,,,
29 | "Section","Contacts","Email","Organization","Tel","Url",,,,,
30 | "Wrangler","Eric Busboom","eric@civicknowledge.com","Civic Knowledge",,,,,,,
31 | "Creator","Office of Health Equity",,"HCIOHE@cdph.ca.gov",,,,,,,
32 | ,,,,,,,,,,
33 | "Section ","Notes",,,,,,,,,
34 | "Note","This file is an example of a data bundle, a simple format for linking data to metadata using spreadsheets. See the specification for more details. ",,,,,,,,,
35 | "Documentation","https://docs.google.com/document/d/16tb7x73AyF8pJ6e6IBcaIJAioEZCNBGDEksKYTXfdfg/edit#",,,,,,,,,
36 | ".Title","Data Bundles Packaging Specification",,,,,,,,,
37 | ,,,,,,,,,,
38 | "Section","Schema",,"datatype","valuetype","description",,,,,
39 | "Table","registered_voters",,,,"HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters",,,,,
40 | "Table.Column","reportyear",,"int","year range","Year or years that indicator was reported",,,,,
41 | "Table.Column","type",,"str","dimension","Type of record",,,,,
42 | "Table.Column","gvid",,"str","gvid","GVid version of the geotype and geotypeval",,,,,
43 | "Table.Column","geoname",,"str","label for gvid","Census name of geographic area",,,,,
44 | "Table.Column","geotype",,"str","label","Code for type of geographic area",,,,,
45 | "Table.Column","geotypevalue",,"str","census","Census geoid code",,,,,
46 | "Table.Column","county_fips",,"str","FIPS county code","County FIPS code",,,,,
47 | "Table.Column","county_name",,"str","label for counrty_fips","County name",,,,,
48 | "Table.Column","region_code",,"str","census code","Numeric code of region",,,,,
49 | "Table.Column","region_name",,"str","label for region_code","Name of region",,,,,
50 | "Table.Column","raceth",,"str","raceth/civick","Civic Knowledge race / ethnicity code.",,,,,
51 | "Table.Column","raceth_name",,"str","label for raceeth","Race / Ethnicity Name",,,,,
52 | "Table.Column","race_eth_code",,"str","raceth/hci","Race / ethnicity code",,,,,
53 | "Table.Column","race_eth_name",,"str","label for race_eth_code","Race / ethnicity name",,,,,
54 | "Table.Column","numerator",,"int","count","Adults who are registered to vote, or who voted, depending on type of record",,,,,
55 | "Table.Column","denominator",,"int","count","Population of Adults, 18 years or older",,,,,
56 | "Table.Column","percent",,"float","percent of numerator over denominator","Percent of adults who are registered to vote, or who voted, depending on type of record",,,,,
57 | "Table.Column","ll_95ci",,"float","ci95l for percent","Lower bound of 95% confidence interval",,,,,
58 | "Table.Column","ul_95ci",,"float","ci95u for percent","Upper bound of 95% confidence interval",,,,,
59 | "Table.Column","se",,"float","se for percent","Standard error",,,,,
60 | "Table.Column","rse",,"float","rse for percent","Relative standard error (se/percent * 100) expressed as a percent",,,,,
61 | "Table.Column","ca_decile",,"float","decile","Statewide decile ranking",,,,,
62 | "Table.Column","ca_rr",,"float","ratio","Ratio of indicator to state average",,,,,
63 | "Table.Column","vap",,"float","measure","Voter age population, from CA Department of Finance.",,,,,
64 | "Table.Column","ind_id",,"str","dimension",,,,,,
65 | "Table.Column","ind_definition",,"str","dimension",,,,,,
66 | "Table.Column","version",,"str","other",,,,,,
67 | ,,,,,,,,,,
68 | ,,,,,,,,,,
69 | "Section","Bibliography","Name","Type","Author","Title","PublicationYear","Publisher","Version","AccessDate","Location"
70 | "Citation","ipums",,"dataset","Steven Ruggles; Katie Genadek; Ronald Goeken; Josiah Grover; Matthew Sobek","Integrated Public Use Microdata Series",,"University of Minnesota",7,20170718,
71 | "Citation.Year",2017,,,,,,,,,
72 | "Citation.Url","https://usa.ipums.org/usa/index.shtml",,,,,,,,,
73 | "Citation.Doi","https://doi.org/10.18128/D010.V7.0",,,,,,,,,
74 | "Citation","bordley",,"article","Robert F. Bordley; James B. McDonald; Anand Mantrala","Something New, Something Old: Parametric Models for the Size of Distribution of Income",,,,,
75 | "Citation.Year",1997,,,,,,,,,
76 | "Citation.Month","June",,,,,,,,,
77 | "Citation.Journal","Journal of Income Distribution",,,,,,,,,
78 | "Citation.Volume",6,,,,,,,,,
79 | "Citation.Number",1,,,,,,,,,
80 | "Citation.Pages","5-5",,,,,,,,,
81 | "Citation.Url","https://ideas.repec.org/a/jid/journl/y1997v06i1p5-5.html",,,,,,,,,
82 | "Citation","mcdonald",,"article","McDonald, James B.;  Mantrala, Anand","The distribution of personal income: Revisited",,"Wiley Subscription Services, Inc., A Wiley Company",,,
83 | "Citation.Journal","Journal of Applied Econometrics",,,,,,,,,
84 | "Citation.Volume",10,,,,,,,,,
85 | "Citation.Number",2,,,,,,,,,
86 | "Citation.Issn","1099-1255",,,,,,,,,
87 | "Citation.Doi","10.1002/jae.3950100208",,,,,,,,,
88 | "Citation.Pages","201--204,",,,,,,,,,
89 | "Citation.Year",1995,,,,,,,,,
90 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/civicknowledge.com-rcfe_affordability-2015.csv:
--------------------------------------------------------------------------------
  1 | Declare,metatab-latest
  2 | Identifier,47bc1089-7584-41f0-b804-602ec42f1249
  3 | Origin,civicknowledge.com
  4 | Dataset,rcfe_affordability
  5 | Version,2
  6 | Time,2015
  7 | Name,civicknowledge.com-rcfe_affordability-2015-2
  8 | Title,San Diego RCFE Affordability Index
  9 | Description,A collection of synthetic datasets that estimate the number of households in San Diego County with one or more seniors which can afford 22 months of residential care.
 10 | 
 11 | Section,Contacts,Email,Organization,Tel,Url
 12 | Wrangler,Eric Busboom,eric@civicknowledge.com,,,
 13 | 
 14 | Section,References,Name,Title,Description,StartLine,HeaderLines,Encoding
 15 | Reference,censusreporter:B09020/140/05000US06073,B09020,,Relationship by Household Type (Including Living Alone) for Population 65 Years and Over,,,
 16 | Reference,censusreporter:B25007/140/05000US06073,B25007,,Tenure by Age of Householder,,,
 17 | Reference,censusreporter:B19049/140/05000US06073,B19049,,Median Household Income by Age of Householder,,,
 18 | Reference,censusreporter:B19049/050/05000US06073,B19049_county,,Median Household Income by Age of Householder,,,
 19 | Reference,censusreporter:B25076/140/05000US06073,B25076,,Lower Value Quartile (Dollars),,,
 20 | Reference,censusreporter:B25077/140/05000US06073,B25077,,Median Value,,,
 21 | Reference,censusreporter:B25078/140/05000US06073,B25078,,Upper Value Quartile (Dollars),,,
 22 | Reference,censusreporter:B25076/050/05000US06073,B25076_county,,Lower Value Quartile (Dollars),,,
 23 | Reference,censusreporter:B25077/050/05000US06073,B25077_county,,Median Value,,,
 24 | Reference,censusreporter:B25078/050/05000US06073,B25078_county,,Upper Value Quartile (Dollars),,,
 25 | Reference,metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tract-sra-msa-xwalk,tracts,,"Crosswalk between crosswalks, tracts, zip codes and SRAs in San Diego County",,,
 26 | Reference,metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#tracts,tracts_geo,,Geographics Boundaries for Tracts,,,
 27 | Reference,metatab+http://s3.amazonaws.com/library.metatab.org/sangis.org-census_regions-2010-sandiego-7.csv#sra,sra_geo,,Geographics Boundaries for SRAs,,,
 28 | Reference,metatab+http://s3.amazonaws.com/library.metatab.org/ipums.org-income_homevalue-5.zip#income_homeval,incv,,Income and Home value records from IPUMS for San Diego County,,,
 29 | 
 30 | Section,Resources,Name,Title,Description,StartLine,HeaderLines,Encoding
 31 | Datafile,file:data/affordability.csv,affordability,Number of seniors who can afford RCFE care. Full final dataset with all columns,,,,
 32 | Datafile,file:data/afford_tracts.csv,afford_tracts,Number of seniors who can afford RCFE care by tract,,,,
 33 | Datafile,file:data/afford_sra.csv,afford_sra,Number of seniors who can afford RCFE care by SRA,,,,
 34 | Datafile,file:data/afford_msa.csv,afford_msa,Number of seniors who can afford RCFE care by MSA,,,,
 35 | 
 36 | Section,Bibliography,Name,Type,Author,Title,Year,Month,Publisher,Journal,Version,Volume,Number,Pages,AccessDate,Location,Url,Doi,Issn
 37 | Citation,ipums,,dataset,Steven Ruggles; Katie Genadek; Ronald Goeken; Josiah Grover; Matthew Sobek,Integrated Public Use Microdata Series,2017,,University of Minnesota,,7.0,,,,20170718,,https://usa.ipums.org/usa/index.shtml,https://doi.org/10.18128/D010.V7.0,
 38 | Citation,bordley,,article,Robert F. Bordley; James B. McDonald; Anand Mantrala,"Something New, Something Old: Parametric Models for the Size of Distribution of Income",1997,June,,Journal of Income Distribution,,6,1,5-5,,,https://ideas.repec.org/a/jid/journl/y1997v06i1p5-5.html,,
 39 | Citation,mcdonald,,article,"McDonald, James B.;  Mantrala, Anand",The distribution of personal income: Revisited,1995,,"Wiley Subscription Services, Inc., A Wiley Company",Journal of Applied Econometrics,,10,2,"201--204,",,,,10.1002/jae.3950100208,1099-1255
 40 | Citation,majumder,,article,"Majumder, Amita; Chakravarty, Satya Ranjan",Distribution of personal income: Development of a new model and its application to U.S. income data,1990,,"Wiley Subscription Services, Inc., A Wiley Company",Journal of Applied Econometrics,,5,2,189--196,,,,10.1002/jae.3950050206,1099-1255
 41 | 
 42 | Section,Schema,DataType,ValueType,Description
 43 | Table,affordability,,,
 44 | Table.Column,Index,text,,
 45 | Column.Altname,index
 46 | Table.Column,senior_pop,integer,,
 47 | Table.Column,senior_pop_m90,integer,,
 48 | Table.Column,homeown_65,integer,,
 49 | Table.Column,homeown_65_m90,number,,
 50 | Table.Column,renters_65,integer,,
 51 | Table.Column,median_income,number,,
 52 | Table.Column,median_income_25_m90,number,,
 53 | Table.Column,hv_25,integer,,
 54 | Table.Column,hv_25_m90,number,,
 55 | Table.Column,hv_50,integer,,
 56 | Table.Column,hv_50_m90,number,,
 57 | Table.Column,hv_75,number,,
 58 | Table.Column,hv_75_m90,number,,
 59 | Table.Column,tract,number,,
 60 | Table.Column,tract_census_geoid,integer,,
 61 | Table.Column,sra,integer,,
 62 | Table.Column,sra_name,text,,
 63 | Table.Column,msa,integer,,
 64 | Table.Column,msa_name,text,,
 65 | Table.Column,lon,number,,
 66 | Table.Column,lat,number,,
 67 | Table.Column,can_afford,integer,,
 68 | Table.Column,counts,integer,,
 69 | Table.Column,cant_afford,integer,,
 70 | Table.Column,afford_pct,integer,,
 71 | Table,afford_tracts,,,
 72 | Table.Column,Index,text,,
 73 | Column.Altname,index
 74 | Table.Column,senior_pop,integer,,"Number of seniors, aged 65+"
 75 | Table.Column,can_afford,integer,,Number of seniors who can afford RCFE care
 76 | Table.Column,afford_pct,integer,,Percentage of seniors who can afford RCFE care
 77 | Table.Column,score,integer,,Affordability Score
 78 | Table,afford_sra,,,
 79 | Table.Column,Index,text,,
 80 | Column.Altname,index
 81 | Table.Column,can_afford,integer,,Number of seniors who can afford RCFE care
 82 | Table.Column,senior_pop,integer,,"Number of seniors, aged 65+"
 83 | Table.Column,afford_pct,number,,Percentage of seniors who can afford RCFE care
 84 | Table.Column,score,number,,Affordability Score
 85 | Table,afford_msa,,,
 86 | Table.Column,Index,text,,
 87 | Column.Altname,index
 88 | Table.Column,can_afford,integer,,Number of seniors who can afford RCFE care
 89 | Table.Column,senior_pop,integer,,"Number of seniors, aged 65+"
 90 | Table.Column,afford_pct,integer,,Percentage of seniors who can afford RCFE care
 91 | Table.Column,score,integer,,Affordability Score
 92 | 
 93 | Section,Documentation,Name,Title,Description
 94 | Documentation,docs/notebook.html,notebook.html,Jupyter Notebook (HTML),
 95 | Image,docs/image_7_0.png,,Image for HTML Documentation,
 96 | Image,docs/image_9_0.png,,Image for HTML Documentation,
 97 | Image,docs/image_25_0.png,,Image for HTML Documentation,
 98 | Image,docs/image_33_0.png,,Image for HTML Documentation,
 99 | Image,docs/image_34_0.png,,Image for HTML Documentation,
100 | Image,docs/image_35_0.png,,Image for HTML Documentation,
101 | Image,docs/image_36_0.png,,Image for HTML Documentation,
102 | Documentation,docs/documentation.md,,Documentation (Markdown),
103 | Documentation,docs/documentation.html,,Primary Documentation (HTML),
104 | 


--------------------------------------------------------------------------------
/docs/Wrangling packages.rst:
--------------------------------------------------------------------------------
  1 | Guide to Wrangling Metatab Packages
  2 | ===================================
  3 | 
  4 | 
  5 | Setting the Name
  6 | ----------------
  7 | 
  8 | For any non-trivial use, the ``Root.Name`` term is critical; most Metatab programs require it to be set. It can be set directly, but it is much more useful to allow ``metapack`` to set it, by aggregating other terms. The other terms that ``metapack`` will combine to create a name are:
  9 | 
 10 | - Dataset. The base name of the dataset.
 11 | - Origin. A part of a domain name ( like 'usgs.gov' or 'census.gov' ) for the source of the data.
 12 | - Version. An integer version number
 13 | - Space. The name of the region that the data covers. 
 14 | - Time. A year, year range, or other time interval for the temporal coverage of the data. 
 15 | - Grain. The name of what each row is about, such as a 'school' or a 'county' or a 'person'
 16 | 
 17 | The ``Space``, ``Time`` and ``Grain`` are usually only used to distinguishing this package from other packages. If there is only one package for a particular ``Dataset`` value, these three terms are rarely used. 
 18 | 
 19 | Setting the ``Dataset`` term triggers rebuilding the ``Name`` term; if ``Dataset`` is not set, ``metapack`` will not update the ``Name`` term. You can run ``metapack -u`` to force regenerating the name.
 20 | 
 21 | Adding Properties to Sections
 22 | -----------------------------
 23 | 
 24 | ``Root.Section`` terms introduce Sections, which both group terms and set the headings for term properties. In the Section row, all of the values in the 3rd and later columns set the property name for child property terms. For instance, the default ``Schema`` section is:
 25 | 
 26 | ::
 27 | 
 28 |     A       B       C           D       E
 29 |     Section	Schema	DataType	AltName	Description
 30 | 
 31 | The B column is the section name, and the C, D, and E columns cause the parser to interpret values in those columns as being child values of terms on the row, with a term name given by the header in the ``Section`` Line. So, for a row that starts with a ``Table.Column`` term, the value in the C column is the value for a ``Column.DataType`` property.
 32 | 
 33 | You can re-order these header values, and can create new ones, but in some cases, the ``metapack`` program will expect some properties to exist. For instance, every ``Table.Column`` term must have a ``Column.DataType`` term.
 34 | 
 35 | 
 36 | Groups and Tags
 37 | ---------------
 38 | 
 39 | When creating entries in a data repository like CKAN or Data.World, the ``metakan`` and ``metaworld`` programs  may categorize the dataset entry with groups and tags. Metatab treats these term values as simple strings, so refer to the data repository documentation for specifics about how groups and tags are used.
 40 | 
 41 | For Tags, set a value for the ``Root.Tag`` and  for groups, use ``Root.group``
 42 | 
 43 | 
 44 | Schemas
 45 | -------
 46 | 
 47 | Schemas are the ``Root.Table`` terms in the ``Schema`` section of the metatab document, along with it's ``Table.Column`` children. The value of the ``Root.Table`` term is the name of the schema, and this value can be referenced from the ``Root.DataSet`` entries in the ``Resources`` section either by being set to the ``Dataset.Name`` for the entry, or by being set as the ``Dataset.Schema``. Using ``Dataset.Name`` is the default case, but using this method of linking only allows one resource per schema. If there are multiple resources that should share the same schema, link the two with the ``Dataset.Schema`` property.
 48 | 
 49 | 
 50 | Column Names
 51 | ++++++++++++
 52 | 
 53 | The value of a ``Table.Column`` term is the primary name of a column, most often the column header from the original resource.
 54 | 
 55 | The ``Column.AltName`` term sets and alternate name for the column, which will be used whenever the resource is copied into a new package. The alterate name is set when the primary name is not a well formed column name. For instance, if the header value from the original resource is 'Date & Time', the ``Table.Column`` value will be 'Date & Time', but 'Column.AltName' will also be set and will be 'date_time'.
 56 | 
 57 | When a resource is copied, such as building a package with ``metatab`` or ``metasync``, the data file will have the header value from ``Column.AltName`` when it exists and from ``Table.Column`` when it doesn't. The header values will be moved into the new package's schema as  in the ``Table.Column`` values. Because all of the ``Column.AltName`` values will have been "made official" when packaging, the Altname column is removed from the schema after packaging.
 58 | 
 59 | Because the header can come from either  ``Column.AltName`` or  `Table.Column`` values, you only need to set the ``Column.AltName`` when the `Table.Column`` value is an ill-formed header.
 60 | 
 61 | 
 62 | DataTypes
 63 | +++++++++
 64 | 
 65 | Every ``Table.Column`` term must have a ``Column.Datatype`` to be useful. The values for these terms are free-form, but most processing programs will expect them to be one of:
 66 | 
 67 | ::
 68 | 
 69 |   integer
 70 |   number
 71 |   text
 72 | 
 73 | These are the same values as are used in Tabular Data Packages. The value of `number` is a general real or floating point number.
 74 | 
 75 | Testing Packages
 76 | ----------------
 77 | 
 78 | When you are working on a package where the ``metadata.csv`` file is stored on Github or a similar VCS system, you are working on a "source" Metatab file, since the Metatab file will directly reference data files. To test that the file is what you want, you should occasionally build a filesystem package from this file, using ``metatab -F -f``. The ``-F`` option will force the new package to be build, although if you want  be completely sure, you can delete the ``_packages`` directory in the current directory.
 79 | 
 80 | The first tests should be done by building the package, then inspecting the data files to see that they have the columns that you expect. Then open the ``index.html`` file to ensure that all of the documentation you want has been generated.
 81 | 
 82 | When the package looks correct from direct inspection, you can open it in Jupyter Notebook to check the documentation.
 83 | 
 84 | Start Jupyter Notebook in the current directory, with the source ``metadata.csv`` file. Then enter this in a cell:
 85 | 
 86 | .. code-block:: python
 87 | 
 88 |     import metatab
 89 |     doc = metatab.open_package('./metadata.csv')
 90 |     doc
 91 | 
 92 | You should get a pretty HTML version of the package documentation. Alternately, you can dump the docs for the package and the data dictoinaries for all of the resource with:
 93 | 
 94 | .. code-block:: python
 95 | 
 96 |     import metatab
 97 |     from IPython.display import display_html
 98 | 
 99 |     doc = metatab.open_package('./metadata.csv')
100 |     display_html(doc)
101 | 
102 |     for r in doc.resources():
103 |         display_html(r)
104 | 
105 | 
106 | The previous code is displaying the documentation generated from the source Metatab document. You may also want to view the documentation generated form the file system package you build with `metapack -F -f`. In that case, open the package document with:
107 | 
108 | .. code-block:: python
109 | 
110 |     doc = metatab.open_package('./_packages/<package_name>/')
111 | 
112 | The result should be the same documentation, but with different URLs.


--------------------------------------------------------------------------------
/metatab/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2016 Civic Knowledge. This file is licensed under the terms of the
  2 | # Revised BSD License, included in this distribution as LICENSE
  3 | 
  4 | """Classes to build a Metatab document
  5 | """
  6 | import logging
  7 | import os
  8 | import shutil
  9 | import sys
 10 | from genericpath import exists, isfile
 11 | from os import makedirs
 12 | from os.path import join, basename, dirname, isdir, abspath
 13 | 
 14 | #from rowgenerators import reparse_url, parse_url_to_dict, unparse_url_dict, Url
 15 | 
 16 | from metatab import DEFAULT_METATAB_FILE
 17 | from rowgenerators import get_cache
 18 | 
 19 | 
 20 | def declaration_path(name):
 21 |     """Return the path to an included declaration"""
 22 |     from os.path import dirname, join, exists
 23 |     import  metatabdecl
 24 |     from metatab.exc import IncludeError
 25 | 
 26 |     d = dirname(metatabdecl.__file__)
 27 | 
 28 |     path = join(d, name)
 29 | 
 30 |     if not exists(path):
 31 |         path = join(d, name + '.csv')
 32 | 
 33 |     if not exists(path):
 34 |         raise IncludeError("No local declaration file for name '{}' ".format(name))
 35 | 
 36 |     return path
 37 | 
 38 | 
 39 | # From http://stackoverflow.com/a/295466
 40 | def slugify(value):
 41 |     """
 42 |     Normalizes string, converts to lowercase, removes non-alpha characters,
 43 |     and converts spaces to hyphens.
 44 |     """
 45 |     import re
 46 |     import unicodedata
 47 |     value = str(value)
 48 |     value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('utf8').strip().lower()
 49 |     value = re.sub(r'[^\w\s\-\.]', '', value)
 50 |     value = re.sub(r'[-\s]+', '-', value)
 51 |     return value
 52 | 
 53 | 
 54 | def flatten(d, sep='.'):
 55 |     """Flatten a data structure into tuples"""
 56 | 
 57 |     def _flatten(e, parent_key='', sep='.'):
 58 |         import collections
 59 | 
 60 |         prefix = parent_key + sep if parent_key else ''
 61 | 
 62 |         if isinstance(e, collections.MutableMapping):
 63 |             return tuple((prefix + k2, v2) for k, v in e.items() for k2, v2 in _flatten(v, k, sep))
 64 |         elif isinstance(e, collections.MutableSequence):
 65 |             return tuple((prefix + k2, v2) for i, v in enumerate(e) for k2, v2 in _flatten(v, str(i), sep))
 66 |         else:
 67 |             return (parent_key, (e,)),
 68 | 
 69 |     return tuple((k, v[0]) for k, v in _flatten(d, '', sep))
 70 | 
 71 | 
 72 | # From http://stackoverflow.com/a/2597440
 73 | class Bunch(object):
 74 |     def __init__(self, adict):
 75 |         self.__dict__.update(adict)
 76 | 
 77 | 
 78 | MP_DIR = '_metapack'
 79 | DOWNLOAD_DIR = join(MP_DIR, 'download')
 80 | PACKAGE_DIR = join(MP_DIR, 'package')
 81 | OLD_DIR = join(MP_DIR, 'old')
 82 | 
 83 | 
 84 | def make_dir_structure(base_dir):
 85 |     """Make the build directory structure. """
 86 | 
 87 |     def maybe_makedir(*args):
 88 | 
 89 |         p = join(base_dir, *args)
 90 | 
 91 |         if exists(p) and not isdir(p):
 92 |             raise IOError("File '{}' exists but is not a directory ".format(p))
 93 | 
 94 |         if not exists(p):
 95 |             makedirs(p)
 96 | 
 97 |     maybe_makedir(DOWNLOAD_DIR)
 98 |     maybe_makedir(PACKAGE_DIR)
 99 |     maybe_makedir(OLD_DIR)
100 | 
101 | 
102 | def make_metatab_file(template='metatab'):
103 |     from os.path import dirname
104 |     from rowgenerators.util import fs_join as join
105 |     import metatab.templates
106 |     from metatab.doc import MetatabDoc
107 | 
108 |     template_path = join(dirname(metatab.templates.__file__), template + '.csv')
109 | 
110 |     doc = MetatabDoc(template_path)
111 | 
112 |     return doc
113 | 
114 | 
115 | 
116 | import mimetypes
117 | 
118 | mimetypes.init()
119 | mime_map = {v: k.strip('.') for k, v in mimetypes.types_map.items()}
120 | mime_map['application/x-zip-compressed'] = 'zip'
121 | mime_map['application/vnd.ms-excel'] = 'xls'
122 | mime_map['text/html'] = 'html'
123 | 
124 | 
125 | # From https://gist.github.com/zdavkeos/1098474
126 | def walk_up(bottom):
127 |     """  mimic os.walk, but walk 'up' instead of down the directory tree
128 |     :param bottom:
129 |     :return:
130 |     """
131 |     import os
132 |     from os import path
133 | 
134 |     bottom = path.realpath(bottom)
135 | 
136 |     # get files in current dir
137 |     try:
138 |         names = os.listdir(bottom)
139 |     except Exception as e:
140 |         raise e
141 | 
142 |     dirs, nondirs = [], []
143 |     for name in names:
144 |         if path.isdir(path.join(bottom, name)):
145 |             dirs.append(name)
146 |         else:
147 |             nondirs.append(name)
148 | 
149 |     yield bottom, dirs, nondirs
150 | 
151 |     new_path = path.realpath(path.join(bottom, '..'))
152 | 
153 |     # see if we are at the top
154 |     if new_path == bottom:
155 |         return
156 | 
157 |     for x in walk_up(new_path):
158 |         yield x
159 | 
160 | 
161 | def ensure_dir(path):
162 |     if path and not exists(path):
163 |         makedirs(path)
164 | 
165 | 
166 | def copytree(src, dst, symlinks=False, ignore=None):
167 |     for item in os.listdir(src):
168 |         s = os.path.join(src, item)
169 |         d = os.path.join(dst, item)
170 |         if os.path.isdir(s):
171 |             shutil.copytree(s, d, symlinks, ignore)
172 |         else:
173 |             shutil.copy2(s, d)
174 | 
175 | 
176 | logger = logging.getLogger('user')
177 | logger_err = logging.getLogger('cli-errors')
178 | debug_logger = logging.getLogger('debug')
179 | 
180 | 
181 | def cli_init(log_level=logging.INFO):
182 |     out_hdlr = logging.StreamHandler(sys.stdout)
183 |     out_hdlr.setFormatter(logging.Formatter('%(message)s'))
184 |     out_hdlr.setLevel(log_level)
185 |     logger.addHandler(out_hdlr)
186 |     logger.setLevel(log_level)
187 | 
188 |     out_hdlr = logging.StreamHandler(sys.stderr)
189 |     out_hdlr.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
190 |     out_hdlr.setLevel(logging.WARN)
191 |     logger_err.addHandler(out_hdlr)
192 |     logger_err.setLevel(logging.WARN)
193 | 
194 | 
195 | def prt(*args, **kwargs):
196 |     logger.info(' '.join(str(e) for e in args), **kwargs)
197 | 
198 | 
199 | def warn(*args, **kwargs):
200 |     logger_err.warn(' '.join(str(e) for e in args), **kwargs)
201 | 
202 | 
203 | def err(*args, **kwargs):
204 |     logger_err.critical(' '.join(str(e) for e in args), **kwargs)
205 |     sys.exit(1)
206 | 
207 | 
208 | def import_name_or_class(name):
209 |     " Import an obect as either a fully qualified, dotted name, "
210 | 
211 |     if isinstance(name, str):
212 | 
213 |         # for "a.b.c.d" -> [ 'a.b.c', 'd' ]
214 |         module_name, object_name = name.rsplit('.',1)
215 |         # __import__ loads the multi-level of module, but returns
216 |         # the top level, which we have to descend into
217 |         mod = __import__(module_name)
218 | 
219 |         components = name.split('.')
220 | 
221 |         for comp in components[1:]: # Already got the top level, so start at 1
222 | 
223 |             mod = getattr(mod, comp)
224 |         return mod
225 |     else:
226 |         return name # Assume it is already the thing we want to import
227 | 
228 | 
229 | def md5_file(filePath):
230 |     import hashlib
231 | 
232 |     try:
233 |         with open(filePath, 'rb') as fh:
234 |             m = hashlib.md5()
235 |             while True:
236 |                 data = fh.read(8192)
237 |                 if not data:
238 |                     break
239 |                 m.update(data)
240 |             return m.hexdigest()
241 |     except (FileNotFoundError, IsADirectoryError):
242 |         return None


--------------------------------------------------------------------------------
/metatab/cli.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017 Civic Knowledge. This file is licensed under the terms of the
  2 | # Revised BSD License, included in this distribution as LICENSE
  3 | 
  4 | """
  5 | CLI program for managing Metatab files
  6 | """
  7 | 
  8 | import json
  9 | import sys
 10 | from genericpath import exists
 11 | 
 12 | from metatab import  DEFAULT_METATAB_FILE, MetatabDoc, parse_app_url
 13 | from rowgenerators.util import get_cache, clean_cache
 14 | from os.path import dirname
 15 | from rowgenerators.util import fs_join as join
 16 | 
 17 | import logging
 18 | 
 19 | logger = logging.getLogger('user')
 20 | logger_err = logging.getLogger('cli-errors')
 21 | debug_logger = logging.getLogger('debug')
 22 | 
 23 | cache = get_cache()
 24 | 
 25 | def metatab():
 26 |     import argparse
 27 |     parser = argparse.ArgumentParser(
 28 |         prog='metatab',
 29 |         description='Matatab file parser',
 30 |         epilog='Cache dir: {}\n'.format(str(cache.getsyspath('/') ) ))
 31 | 
 32 |     g = parser.add_mutually_exclusive_group()
 33 | 
 34 |     g.add_argument('-C', '--create', action='store', nargs='?', default=False,
 35 |                    help="Create a new metatab file, from named template. With no argument, uses the 'metatab' template ")
 36 | 
 37 |     g.add_argument('-t', '--terms', default=False, action='store_const', dest='out_type', const='terms',
 38 |                    help='Parse a file and print out the stream of terms, before interpretation')
 39 | 
 40 |     g.add_argument('-j', '--json', default=False, action='store_const', dest='out_type', const='json',
 41 |                    help='Parse a file and print out a JSON representation')
 42 | 
 43 |     g.add_argument('-y', '--yaml', default=False, action='store_const', dest='out_type', const='yaml',
 44 |                    help='Parse a file and print out a YAML representation')
 45 | 
 46 |     g.add_argument('-l', '--line', default=False, action='store_const', dest='out_type', const='line',
 47 |                    help='Parse a file and print out a Metatab Line representation')
 48 | 
 49 |     g.add_argument('-c', '--csv', default=False, action='store_const', dest='out_type', const='csv',
 50 |                    help='Parse a file and print out a Metatab Line representation')
 51 | 
 52 |     g.add_argument('-p', '--prety', default=False, action='store_const', dest='out_type', const='prety',
 53 |                    help='Pretty print the python Dict representation ')
 54 | 
 55 |     parser.add_argument('-W', '--write-in-place',
 56 |                    help='When outputting as yaml, json, csv or line, write the file instead of printing it, '
 57 |                         'to a file with same base name and appropriate extension ', action='store_true')
 58 | 
 59 |     parser.set_defaults(out_type='csv')
 60 | 
 61 |     parser.add_argument('-f', '--find-first',
 62 |                    help='Find and print the first value for a fully qualified term name')
 63 | 
 64 |     parser.add_argument('-d', '--show-declaration', default=False, action='store_true',
 65 |                         help='Parse a declaration file and print out declaration dict. Use -j or -y for the format')
 66 | 
 67 |     parser.add_argument('file', nargs='?', default=DEFAULT_METATAB_FILE, help='Path to a Metatab file')
 68 | 
 69 |     cli_init()
 70 | 
 71 |     args = parser.parse_args(sys.argv[1:])
 72 | 
 73 |     # Specing a fragment screws up setting the default metadata file name
 74 |     if args.file.startswith('#'):
 75 |         args.file = DEFAULT_METATAB_FILE + args.file
 76 | 
 77 |     if args.create is not False:
 78 |         if new_metatab_file(args.file, args.create):
 79 |             prt("Created ", args.file)
 80 |         else:
 81 |             warn("File",args.file,'already exists.')
 82 | 
 83 |         exit(0)
 84 | 
 85 |     metadata_url = parse_app_url(args.file, proto='metatab')
 86 |     try:
 87 |         doc = MetatabDoc(metadata_url, cache=cache)
 88 |     except IOError as e:
 89 | 
 90 |         err("Failed to open '{}': {}".format(metadata_url, e))
 91 | 
 92 |     def write_or_print(t):
 93 |         from pathlib import Path
 94 | 
 95 |         if metadata_url.scheme != 'file':
 96 |             err("Can only use -w with local files")
 97 |             return
 98 | 
 99 |         ext = 'txt' if args.out_type == 'line' else args.out_type
100 | 
101 |         if args.write_in_place:
102 |             with metadata_url.fspath.with_suffix('.'+ext).open('w') as f:
103 |                 f.write(t)
104 |         else:
105 |             print(t)
106 | 
107 | 
108 | 
109 |     if args.show_declaration:
110 | 
111 |         decl_doc = MetatabDoc('', cache=cache, decl=metadata_url.path)
112 | 
113 |         d = {
114 |             'terms': decl_doc.decl_terms,
115 |             'sections': decl_doc.decl_sections
116 |         }
117 | 
118 |         if args.out_type == 'json':
119 |             print(json.dumps(d, indent=4))
120 | 
121 |         elif args.out_type == 'yaml':
122 |             import yaml
123 |             print(yaml.safe_dump(d, default_flow_style=False, indent=4))
124 | 
125 |     elif args.find_first:
126 | 
127 |         t = doc.find_first(args.find_first)
128 |         print(t.value)
129 | 
130 | 
131 |     elif args.out_type == 'terms':
132 |         for t in doc._term_parser:
133 |             print(t)
134 | 
135 |     elif args.out_type == 'json':
136 |         write_or_print(json.dumps(doc.as_dict(), indent=4))
137 | 
138 |     elif args.out_type == 'yaml':
139 |         import yaml
140 |         from collections import OrderedDict
141 | 
142 |         def ordered_dump(data, stream=None, Dumper=yaml.Dumper, **kwds):
143 |             class OrderedDumper(Dumper):
144 |                 pass
145 | 
146 |             def _dict_representer(dumper, data):
147 |                 return dumper.represent_mapping(
148 |                     yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
149 |                     data.items())
150 | 
151 |             OrderedDumper.add_representer(OrderedDict, _dict_representer)
152 |             return yaml.dump(data, stream, OrderedDumper, **kwds)
153 | 
154 |         write_or_print(ordered_dump(doc.as_dict(), default_flow_style=False, indent=4, Dumper=yaml.SafeDumper))
155 | 
156 |     elif args.out_type == 'line':
157 |         write_or_print(doc.as_lines())
158 | 
159 |     elif args.out_type == 'csv':
160 |         write_or_print(doc.as_csv())
161 | 
162 |     elif args.out_type == 'prety':
163 |         from pprint import pprint
164 |         pprint(doc.as_dict())
165 | 
166 |     exit(0)
167 | 
168 | 
169 | 
170 | 
171 | 
172 | def cli_init(log_level=logging.INFO):
173 | 
174 |     out_hdlr = logging.StreamHandler(sys.stdout)
175 |     out_hdlr.setFormatter(logging.Formatter('%(message)s'))
176 |     out_hdlr.setLevel(log_level)
177 |     logger.addHandler(out_hdlr)
178 |     logger.setLevel(log_level)
179 | 
180 |     out_hdlr = logging.StreamHandler(sys.stderr)
181 |     out_hdlr.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))
182 |     out_hdlr.setLevel(logging.WARN)
183 |     logger_err.addHandler(out_hdlr)
184 |     logger_err.setLevel(logging.WARN)
185 | 
186 | def prt(*args, **kwargs):
187 |     logger.info(' '.join(str(e) for e in args),**kwargs)
188 | 
189 | def warn(*args, **kwargs):
190 |     logger_err.warn(' '.join(str(e) for e in args),**kwargs)
191 | 
192 | def err(*args, **kwargs):
193 |     logger_err.critical(' '.join(str(e) for e in args),**kwargs)
194 |     sys.exit(1)
195 | 
196 | 
197 | def make_metatab_file(template='metatab'):
198 |     import metatab.templates as tmpl
199 | 
200 |     template_path = join(dirname(tmpl.__file__),template+'.csv')
201 | 
202 |     doc = MetatabDoc(template_path)
203 | 
204 |     return doc
205 | 
206 | 
207 | 
208 | def new_metatab_file(mt_file, template):
209 |     template = template if template else 'metatab'
210 | 
211 |     if not exists(mt_file):
212 |         doc = make_metatab_file(template)
213 | 
214 |         doc.write_csv(mt_file)
215 | 
216 |         return True
217 | 
218 |     else:
219 | 
220 |         return False
221 | 
222 | 
223 | def get_table(doc, name):
224 |     t = doc.find_first('Root.Table', value=name)
225 | 
226 |     if not t:
227 | 
228 |         table_names = ["'" + t.value + "'" for t in doc.find('Root.Table')]
229 | 
230 |         if not table_names:
231 |             table_names = ["<No Tables>"]
232 | 
233 |         err("Did not find schema for table name '{}' Tables are: {}"
234 |             .format(name, " ".join(table_names)))
235 | 
236 |     return t
237 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/example1-web.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "declare": "http://assets.metatab.org/metatab-0.1.csv",
  3 |     "title": "Registered Voters, By County",
  4 |     "description": "Percent of the eligible population registered to vote and the percent who voted in statewide elections.",
  5 |     "identifier": "cdph.ca.gov-hci-registered_voters-county",
  6 |     "version": "201404",
  7 |     "obsoletes": "cdph.ca.gov-hci-registered_voters-county-201304",
  8 |     "format": "excel",
  9 |     "spatial": "California <04000US06>",
 10 |     "time": "2002-2014",
 11 |     "spatialgrain": "County <05000US>",
 12 |     "datafile": [
 13 |         {
 14 |             "table": "registered_voters",
 15 |             "grain": "County",
 16 |             "title": "The First Example Data File",
 17 |             "url": "http://example.com/example1.csv"
 18 |         },
 19 |         {
 20 |             "table": "registered_voters",
 21 |             "grain": "Tract",
 22 |             "title": "The Second Example Data File",
 23 |             "url": "http://example.com/example2.csv"
 24 |         }
 25 |     ],
 26 |     "homepage": {
 27 |         "table": "Healthy Communities Data and Indicators Project (HCI)",
 28 |         "url": "https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx"
 29 |     },
 30 |     "documentation": [
 31 |         {
 32 |             "table": "Indicator Documentation for Voter Registration / Participation",
 33 |             "description": "Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",
 34 |             "url": "https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf"
 35 |         },
 36 |         {
 37 |             "title": "Data Bundles Packaging Specification",
 38 |             "url": "https://docs.google.com/document/d/16tb7x73AyF8pJ6e6IBcaIJAioEZCNBGDEksKYTXfdfg/edit#"
 39 |         }
 40 |     ],
 41 |     "creator": {
 42 |         "email": "HCIOHE@cdph.ca.gov",
 43 |         "name": "Office of Health Equity"
 44 |     },
 45 |     "wrangler": {
 46 |         "email": "eric@civicknowledge.com",
 47 |         "name": "Eric Busboom"
 48 |     },
 49 |     "note": "This file is an example of a data bundle, a simple format for linking data to metadata using spreadsheets. See the specification for more details.",
 50 |     "table": {
 51 |         "description": "HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters",
 52 |         "column": [
 53 |             {
 54 |                 "datatype": "int",
 55 |                 "valuetype": "year range",
 56 |                 "description": "Year or years that indicator was reported",
 57 |                 "name": "reportyear"
 58 |             },
 59 |             {
 60 |                 "datatype": "str",
 61 |                 "valuetype": "dimension",
 62 |                 "description": "Type of record",
 63 |                 "name": "type"
 64 |             },
 65 |             {
 66 |                 "datatype": "str",
 67 |                 "valuetype": "gvid",
 68 |                 "description": "GVid version of the geotype and geotypeval",
 69 |                 "name": "gvid"
 70 |             },
 71 |             {
 72 |                 "datatype": "str",
 73 |                 "valuetype": "label for gvid",
 74 |                 "description": "Census name of geographic area",
 75 |                 "name": "geoname"
 76 |             },
 77 |             {
 78 |                 "datatype": "str",
 79 |                 "valuetype": "label",
 80 |                 "description": "Code for type of geographic area",
 81 |                 "name": "geotype"
 82 |             },
 83 |             {
 84 |                 "datatype": "str",
 85 |                 "valuetype": "census",
 86 |                 "description": "Census geoid code",
 87 |                 "name": "geotypevalue"
 88 |             },
 89 |             {
 90 |                 "datatype": "str",
 91 |                 "valuetype": "FIPS county code",
 92 |                 "description": "County FIPS code",
 93 |                 "name": "county_fips"
 94 |             },
 95 |             {
 96 |                 "datatype": "str",
 97 |                 "valuetype": "label for counrty_fips",
 98 |                 "description": "County name",
 99 |                 "name": "county_name"
100 |             },
101 |             {
102 |                 "datatype": "str",
103 |                 "valuetype": "census code",
104 |                 "description": "Numeric code of region",
105 |                 "name": "region_code"
106 |             },
107 |             {
108 |                 "datatype": "str",
109 |                 "valuetype": "label for region_code",
110 |                 "description": "Name of region",
111 |                 "name": "region_name"
112 |             },
113 |             {
114 |                 "datatype": "str",
115 |                 "valuetype": "raceth/civick",
116 |                 "description": "Civic Knowledge race / ethnicity code.",
117 |                 "name": "raceth"
118 |             },
119 |             {
120 |                 "datatype": "str",
121 |                 "valuetype": "label for raceeth",
122 |                 "description": "Race / Ethnicity Name",
123 |                 "name": "raceth_name"
124 |             },
125 |             {
126 |                 "datatype": "str",
127 |                 "valuetype": "raceth/hci",
128 |                 "description": "Race / ethnicity code",
129 |                 "name": "race_eth_code"
130 |             },
131 |             {
132 |                 "datatype": "str",
133 |                 "valuetype": "label for race_eth_code",
134 |                 "description": "Race / ethnicity name",
135 |                 "name": "race_eth_name"
136 |             },
137 |             {
138 |                 "datatype": "int",
139 |                 "valuetype": "count",
140 |                 "description": "Adults who are registered to vote, or who voted, depending on type of record",
141 |                 "name": "numerator"
142 |             },
143 |             {
144 |                 "datatype": "int",
145 |                 "valuetype": "count",
146 |                 "description": "Population of Adults, 18 years or older",
147 |                 "name": "denominator"
148 |             },
149 |             {
150 |                 "datatype": "float",
151 |                 "valuetype": "percent of numerator over denominator",
152 |                 "description": "Percent of adults who are registered to vote, or who voted, depending on type of record",
153 |                 "name": "percent"
154 |             },
155 |             {
156 |                 "datatype": "float",
157 |                 "valuetype": "ci95l for percent",
158 |                 "description": "Lower bound of 95% confidence interval",
159 |                 "name": "ll_95ci"
160 |             },
161 |             {
162 |                 "datatype": "float",
163 |                 "valuetype": "ci95u for percent",
164 |                 "description": "Upper bound of 95% confidence interval",
165 |                 "name": "ul_95ci"
166 |             },
167 |             {
168 |                 "datatype": "float",
169 |                 "valuetype": "se for percent",
170 |                 "description": "Standard error",
171 |                 "name": "se"
172 |             },
173 |             {
174 |                 "datatype": "float",
175 |                 "valuetype": "rse for percent",
176 |                 "description": "Relative standard error (se/percent * 100) expressed as a percent",
177 |                 "name": "rse"
178 |             },
179 |             {
180 |                 "datatype": "float",
181 |                 "valuetype": "decile",
182 |                 "description": "Statewide decile ranking",
183 |                 "name": "ca_decile"
184 |             },
185 |             {
186 |                 "datatype": "float",
187 |                 "valuetype": "ratio",
188 |                 "description": "Ratio of indicator to state average",
189 |                 "name": "ca_rr"
190 |             },
191 |             {
192 |                 "datatype": "float",
193 |                 "valuetype": "measure",
194 |                 "description": "Voter age population, from CA Department of Finance.",
195 |                 "name": "vap"
196 |             },
197 |             {
198 |                 "datatype": "str",
199 |                 "valuetype": "dimension",
200 |                 "name": "ind_id"
201 |             },
202 |             {
203 |                 "datatype": "str",
204 |                 "valuetype": "dimension",
205 |                 "name": "ind_definition"
206 |             },
207 |             {
208 |                 "datatype": "str",
209 |                 "valuetype": "other",
210 |                 "name": "version"
211 |             }
212 |         ],
213 |         "name": "registered_voters"
214 |     }
215 | }


--------------------------------------------------------------------------------
/metatab/test/outputs/metadata.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "sections": {
  3 |         "contacts": {
  4 |             "terms": [
  5 |                 "Wrangler",
  6 |                 "Wrangler.Email",
  7 |                 "Wrangler.Name",
  8 |                 "Creator",
  9 |                 "Creator.Email",
 10 |                 "Creator.Name",
 11 |                 "Publisher",
 12 |                 "Publisher.Email",
 13 |                 "Publisher.Name"
 14 |             ],
 15 |             "args": [
 16 |                 "email"
 17 |             ]
 18 |         },
 19 |         "declaredterms": {
 20 |             "terms": [
 21 |                 "DeclareTerm"
 22 |             ],
 23 |             "args": []
 24 |         },
 25 |         "resources": {
 26 |             "terms": [
 27 |                 "Datafile",
 28 |                 "Datafile.Grain",
 29 |                 "Datafile.Table",
 30 |                 "Datafile.Title",
 31 |                 "Datafile.Url",
 32 |                 "Documentation",
 33 |                 "Documentation.Description",
 34 |                 "Documentation.Title",
 35 |                 "Documentation.Url",
 36 |                 "Homepage",
 37 |                 "Homepage.Title",
 38 |                 "Homepage.Url"
 39 |             ],
 40 |             "args": [
 41 |                 "table",
 42 |                 "grain",
 43 |                 "title"
 44 |             ]
 45 |         },
 46 |         "declaredsections": {
 47 |             "terms": [
 48 |                 "DeclareSection"
 49 |             ],
 50 |             "args": []
 51 |         },
 52 |         "schema": {
 53 |             "terms": [
 54 |                 "Table",
 55 |                 "Table.Column",
 56 |                 "Table.Description",
 57 |                 "Table.Name",
 58 |                 "Column",
 59 |                 "Column.Datatype",
 60 |                 "Column.Description",
 61 |                 "Column.Name",
 62 |                 "Column.Valuetype"
 63 |             ],
 64 |             "args": [
 65 |                 "datatype",
 66 |                 "valuetype",
 67 |                 "description"
 68 |             ]
 69 |         },
 70 |         "root": {
 71 |             "terms": [
 72 |                 "Declare",
 73 |                 "Include",
 74 |                 "Section",
 75 |                 "Synonym",
 76 |                 "Title",
 77 |                 "Description",
 78 |                 "Time",
 79 |                 "Version",
 80 |                 "Format",
 81 |                 "Identifier",
 82 |                 "Note",
 83 |                 "Obsoletes",
 84 |                 "Spatial",
 85 |                 "SpatialGrain"
 86 |             ],
 87 |             "args": []
 88 |         }
 89 |     },
 90 |     "terms": {
 91 |         "<no_term>.include": {
 92 |             "term_name": "Include",
 93 |             "section": "root"
 94 |         },
 95 |         "<no_term>.declare": {
 96 |             "term_name": "Declare",
 97 |             "section": "root"
 98 |         },
 99 |         "<no_term>.title": {
100 |             "term_name": "Title",
101 |             "section": "root"
102 |         },
103 |         "datafile.grain": {
104 |             "term_name": "Datafile.Grain",
105 |             "section": "resources"
106 |         },
107 |         "homepage.url": {
108 |             "term_name": "Homepage.Url",
109 |             "section": "resources"
110 |         },
111 |         "<no_term>.synonym": {
112 |             "term_name": "Synonym",
113 |             "childpropertytype": "sequence",
114 |             "termvaluename": "term",
115 |             "section": "root"
116 |         },
117 |         "homepage.title": {
118 |             "term_name": "Homepage.Title",
119 |             "section": "resources"
120 |         },
121 |         "<no_term>.datafile": {
122 |             "term_name": "Datafile",
123 |             "termvaluename": "url",
124 |             "section": "resources"
125 |         },
126 |         "<no_term>.obsoletes": {
127 |             "term_name": "Obsoletes",
128 |             "section": "root"
129 |         },
130 |         "documentation.url": {
131 |             "term_name": "Documentation.Url",
132 |             "section": "resources"
133 |         },
134 |         "table.description": {
135 |             "term_name": "Table.Description",
136 |             "section": "schema"
137 |         },
138 |         "<no_term>.table": {
139 |             "term_name": "Table",
140 |             "termvaluename": "name",
141 |             "section": "schema"
142 |         },
143 |         "documentation.description": {
144 |             "term_name": "Documentation.Description",
145 |             "section": "resources"
146 |         },
147 |         "<no_term>.publisher": {
148 |             "term_name": "Publisher",
149 |             "termvaluename": "name",
150 |             "section": "contacts"
151 |         },
152 |         "wrangler.email": {
153 |             "term_name": "Wrangler.Email",
154 |             "section": "contacts"
155 |         },
156 |         "publisher.name": {
157 |             "term_name": "Publisher.Name",
158 |             "section": "contacts"
159 |         },
160 |         "<no_term>.note": {
161 |             "term_name": "Note",
162 |             "section": "root"
163 |         },
164 |         "<no_term>.description": {
165 |             "term_name": "Description",
166 |             "section": "root"
167 |         },
168 |         "creator.email": {
169 |             "term_name": "Creator.Email",
170 |             "section": "contacts"
171 |         },
172 |         "column.valuetype": {
173 |             "term_name": "Column.Valuetype",
174 |             "section": "schema"
175 |         },
176 |         "<no_term>.declareterm": {
177 |             "term_name": "DeclareTerm",
178 |             "termvaluename": "term",
179 |             "section": "DeclaredTerms"
180 |         },
181 |         "datafile.table": {
182 |             "term_name": "Datafile.Table",
183 |             "section": "resources"
184 |         },
185 |         "table.column": {
186 |             "term_name": "Table.Column",
187 |             "childpropertytype": "sequence",
188 |             "termvaluename": "name",
189 |             "section": "schema"
190 |         },
191 |         "<no_term>.documentation": {
192 |             "term_name": "Documentation",
193 |             "section": "resources"
194 |         },
195 |         "wrangler.name": {
196 |             "term_name": "Wrangler.Name",
197 |             "section": "contacts"
198 |         },
199 |         "column.description": {
200 |             "term_name": "Column.Description",
201 |             "section": "schema"
202 |         },
203 |         "documentation.title": {
204 |             "term_name": "Documentation.Title",
205 |             "section": "resources"
206 |         },
207 |         "<no_term>.column": {
208 |             "term_name": "Column",
209 |             "termvaluename": "name",
210 |             "synonym": "Table.Column",
211 |             "section": "schema"
212 |         },
213 |         "<no_term>.identifier": {
214 |             "term_name": "Identifier",
215 |             "section": "root"
216 |         },
217 |         "column.datatype": {
218 |             "term_name": "Column.Datatype",
219 |             "section": "schema"
220 |         },
221 |         "creator.name": {
222 |             "term_name": "Creator.Name",
223 |             "section": "contacts"
224 |         },
225 |         "column.name": {
226 |             "term_name": "Column.Name",
227 |             "section": "schema"
228 |         },
229 |         "<no_term>.format": {
230 |             "term_name": "Format",
231 |             "section": "root"
232 |         },
233 |         "<no_term>.spatialgrain": {
234 |             "term_name": "SpatialGrain",
235 |             "section": "root"
236 |         },
237 |         "<no_term>.section": {
238 |             "term_name": "Section",
239 |             "childpropertytype": "sequence",
240 |             "termvaluename": "name",
241 |             "section": "root"
242 |         },
243 |         "<no_term>.declaresection": {
244 |             "term_name": "DeclareSection",
245 |             "childpropertytype": "sequence",
246 |             "termvaluename": "section",
247 |             "section": "DeclaredSections"
248 |         },
249 |         "datafile.url": {
250 |             "term_name": "Datafile.Url",
251 |             "section": "resources"
252 |         },
253 |         "table.name": {
254 |             "term_name": "Table.Name",
255 |             "section": "schema"
256 |         },
257 |         "<no_term>.time": {
258 |             "term_name": "Time",
259 |             "section": "root"
260 |         },
261 |         "datafile.title": {
262 |             "term_name": "Datafile.Title",
263 |             "section": "resources"
264 |         },
265 |         "<no_term>.creator": {
266 |             "term_name": "Creator",
267 |             "termvaluename": "name",
268 |             "section": "contacts"
269 |         },
270 |         "<no_term>.homepage": {
271 |             "term_name": "Homepage",
272 |             "termvaluename": "url",
273 |             "section": "resources"
274 |         },
275 |         "<no_term>.spatial": {
276 |             "term_name": "Spatial",
277 |             "section": "root"
278 |         },
279 |         "<no_term>.wrangler": {
280 |             "term_name": "Wrangler",
281 |             "termvaluename": "name",
282 |             "section": "contacts"
283 |         },
284 |         "publisher.email": {
285 |             "term_name": "Publisher.Email",
286 |             "section": "contacts"
287 |         },
288 |         "<no_term>.version": {
289 |             "term_name": "Version",
290 |             "section": "root"
291 |         }
292 |     }
293 | }
294 | 


--------------------------------------------------------------------------------
/metatab/test/test-data/json/example1.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "declare": "metatab-latest",
  3 |     "title": "Registered Voters, By County",
  4 |     "name": "cdph.ca.gov-hci-registered_voters-county",
  5 |     "description": "Percent of the eligible population registered to vote and the percent who voted in statewide elections.",
  6 |     "identifier": "cdph.ca.gov-hci-registered_voters-county",
  7 |     "version": "201404",
  8 |     "obsoletes": "cdph.ca.gov-hci-registered_voters-county-201304",
  9 |     "dataset": "voters",
 10 |     "origin": "example.com",
 11 |     "space": "Ca",
 12 |     "time": "2002-2014",
 13 |     "grain": "County",
 14 |     "format": "excel",
 15 |     "datafile": [
 16 |         {
 17 |             "name": "example1",
 18 |             "schema": "registered_voters",
 19 |             "grain": "County",
 20 |             "title": "The First Example Data File",
 21 |             "url": "http://example.com/example1.csv"
 22 |         },
 23 |         {
 24 |             "name": "example2",
 25 |             "schema": "registered_voters",
 26 |             "grain": "Tract",
 27 |             "title": "The Second Example Data File",
 28 |             "url": "http://example.com/example2.csv"
 29 |         }
 30 |     ],
 31 |     "homepage": [
 32 |         {
 33 |             "schema": "Healthy Communities Data and Indicators Project (HCI)",
 34 |             "url": "https://www.cdph.ca.gov/programs/pages/healthycommunityindicators.aspx"
 35 |         }
 36 |     ],
 37 |     "documentation": [
 38 |         {
 39 |             "schema": "Indicator Documentation for Voter Registration / Participation",
 40 |             "description": "Voter Registration/Participation: Percent of the eligible population registered to vote and the percent who voted in statewide elections",
 41 |             "url": "https://www.cdph.ca.gov/programs/Documents/HCI_RegisteredVoters_653_Narrative_and_examples_6-2-14.pdf"
 42 |         },
 43 |         {
 44 |             "title": "Data Bundles Packaging Specification",
 45 |             "url": "https://docs.google.com/document/d/16tb7x73AyF8pJ6e6IBcaIJAioEZCNBGDEksKYTXfdfg/edit#"
 46 |         }
 47 |     ],
 48 |     "creator": [
 49 |         {
 50 |             "email": "HCIOHE@cdph.ca.gov",
 51 |             "name": "Office of Health Equity"
 52 |         }
 53 |     ],
 54 |     "wrangler": [
 55 |         {
 56 |             "email": "eric@civicknowledge.com",
 57 |             "name": "Eric Busboom"
 58 |         }
 59 |     ],
 60 |     "note": [
 61 |         "This file is an example of a data bundle, a simple format for linking data to metadata using spreadsheets. See the specification for more details."
 62 |     ],
 63 |     "table": [
 64 |         {
 65 |             "description": "HCI Indicator 653.0: Percent of adults age 18 years and older who are registered voters",
 66 |             "column": [
 67 |                 {
 68 |                     "datatype": "int",
 69 |                     "valuetype": "year range",
 70 |                     "description": "Year or years that indicator was reported",
 71 |                     "name": "reportyear"
 72 |                 },
 73 |                 {
 74 |                     "datatype": "str",
 75 |                     "valuetype": "dimension",
 76 |                     "description": "Type of record",
 77 |                     "name": "type"
 78 |                 },
 79 |                 {
 80 |                     "datatype": "str",
 81 |                     "valuetype": "gvid",
 82 |                     "description": "GVid version of the geotype and geotypeval",
 83 |                     "name": "gvid"
 84 |                 },
 85 |                 {
 86 |                     "datatype": "str",
 87 |                     "valuetype": "label for gvid",
 88 |                     "description": "Census name of geographic area",
 89 |                     "name": "geoname"
 90 |                 },
 91 |                 {
 92 |                     "datatype": "str",
 93 |                     "valuetype": "label",
 94 |                     "description": "Code for type of geographic area",
 95 |                     "name": "geotype"
 96 |                 },
 97 |                 {
 98 |                     "datatype": "str",
 99 |                     "valuetype": "census",
100 |                     "description": "Census geoid code",
101 |                     "name": "geotypevalue"
102 |                 },
103 |                 {
104 |                     "datatype": "str",
105 |                     "valuetype": "FIPS county code",
106 |                     "description": "County FIPS code",
107 |                     "name": "county_fips"
108 |                 },
109 |                 {
110 |                     "datatype": "str",
111 |                     "valuetype": "label for counrty_fips",
112 |                     "description": "County name",
113 |                     "name": "county_name"
114 |                 },
115 |                 {
116 |                     "datatype": "str",
117 |                     "valuetype": "census code",
118 |                     "description": "Numeric code of region",
119 |                     "name": "region_code"
120 |                 },
121 |                 {
122 |                     "datatype": "str",
123 |                     "valuetype": "label for region_code",
124 |                     "description": "Name of region",
125 |                     "name": "region_name"
126 |                 },
127 |                 {
128 |                     "datatype": "str",
129 |                     "valuetype": "raceth/civick",
130 |                     "description": "Civic Knowledge race / ethnicity code.",
131 |                     "name": "raceth"
132 |                 },
133 |                 {
134 |                     "datatype": "str",
135 |                     "valuetype": "label for raceeth",
136 |                     "description": "Race / Ethnicity Name",
137 |                     "name": "raceth_name"
138 |                 },
139 |                 {
140 |                     "datatype": "str",
141 |                     "valuetype": "raceth/hci",
142 |                     "description": "Race / ethnicity code",
143 |                     "name": "race_eth_code"
144 |                 },
145 |                 {
146 |                     "datatype": "str",
147 |                     "valuetype": "label for race_eth_code",
148 |                     "description": "Race / ethnicity name",
149 |                     "name": "race_eth_name"
150 |                 },
151 |                 {
152 |                     "datatype": "int",
153 |                     "valuetype": "count",
154 |                     "description": "Adults who are registered to vote, or who voted, depending on type of record",
155 |                     "name": "numerator"
156 |                 },
157 |                 {
158 |                     "datatype": "int",
159 |                     "valuetype": "count",
160 |                     "description": "Population of Adults, 18 years or older",
161 |                     "name": "denominator"
162 |                 },
163 |                 {
164 |                     "datatype": "float",
165 |                     "valuetype": "percent of numerator over denominator",
166 |                     "description": "Percent of adults who are registered to vote, or who voted, depending on type of record",
167 |                     "name": "percent"
168 |                 },
169 |                 {
170 |                     "datatype": "float",
171 |                     "valuetype": "ci95l for percent",
172 |                     "description": "Lower bound of 95% confidence interval",
173 |                     "name": "ll_95ci"
174 |                 },
175 |                 {
176 |                     "datatype": "float",
177 |                     "valuetype": "ci95u for percent",
178 |                     "description": "Upper bound of 95% confidence interval",
179 |                     "name": "ul_95ci"
180 |                 },
181 |                 {
182 |                     "datatype": "float",
183 |                     "valuetype": "se for percent",
184 |                     "description": "Standard error",
185 |                     "name": "se"
186 |                 },
187 |                 {
188 |                     "datatype": "float",
189 |                     "valuetype": "rse for percent",
190 |                     "description": "Relative standard error (se/percent * 100) expressed as a percent",
191 |                     "name": "rse"
192 |                 },
193 |                 {
194 |                     "datatype": "float",
195 |                     "valuetype": "decile",
196 |                     "description": "Statewide decile ranking",
197 |                     "name": "ca_decile"
198 |                 },
199 |                 {
200 |                     "datatype": "float",
201 |                     "valuetype": "ratio",
202 |                     "description": "Ratio of indicator to state average",
203 |                     "name": "ca_rr"
204 |                 },
205 |                 {
206 |                     "datatype": "float",
207 |                     "valuetype": "measure",
208 |                     "description": "Voter age population, from CA Department of Finance.",
209 |                     "name": "vap"
210 |                 },
211 |                 {
212 |                     "datatype": "str",
213 |                     "valuetype": "dimension",
214 |                     "name": "ind_id"
215 |                 },
216 |                 {
217 |                     "datatype": "str",
218 |                     "valuetype": "dimension",
219 |                     "name": "ind_definition"
220 |                 },
221 |                 {
222 |                     "datatype": "str",
223 |                     "valuetype": "other",
224 |                     "name": "version"
225 |                 }
226 |             ],
227 |             "name": "registered_voters"
228 |         }
229 |     ]
230 | }


--------------------------------------------------------------------------------
/examples/Pandas Reporter Example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import numpy as np\n",
 11 |     "import pandasreporter as pr\n",
 12 |     "\n",
 13 |     "\n",
 14 |     "# B17001, Poverty Status by Sex by Age\n",
 15 |     "b17001 = pr.get_dataframe('B17001', '140', '05000US06073', cache=True).ct_columns\n",
 16 |     "# B17024, Age by Ratio of Income to Poverty Level\n",
 17 |     "b17024 = pr.get_dataframe('B17024', '140', '05000US06073', cache=True).ct_columns\n",
 18 |     "# B17017, Poverty Status by Household Type by Age of Householder\n",
 19 |     "b17017 = pr.get_dataframe('B17017', '140', '05000US06073', cache=True).ct_columns"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "# B17001 Poverty Status by Sex by Age\n",
 27 |     "\n",
 28 |     "For the [Poverty Status by Sex by Age](https://censusreporter.org/tables/B17001/) we'll select the columns for male and female, below poverty, 65 and older. \n",
 29 |     "\n",
 30 |     "**NOTE** if you want to get seniors of a particular race, use table `C17001a-g`, condensed race iterations. The 'C' tables have fewer age ranges, but there is no 'C' table for all races: There is a `C17001a` for Whites, a condensed version of `B17001a`, but there is no `C17001` for a condensed version of `B17001`\n",
 31 |     "\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 2,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "data": {
 41 |       "text/plain": [
 42 |        "['B17001015 Total Income in the past 12 months below poverty level Male 65 to 74 years',\n",
 43 |        " 'Margins for B17001015 Total Income in the past 12 months below poverty level Male 65 to 74 years',\n",
 44 |        " 'B17001016 Total Income in the past 12 months below poverty level Male 75 years and over',\n",
 45 |        " 'Margins for B17001016 Total Income in the past 12 months below poverty level Male 75 years and over',\n",
 46 |        " 'B17001029 Total Income in the past 12 months below poverty level Female 65 to 74 years',\n",
 47 |        " 'Margins for B17001029 Total Income in the past 12 months below poverty level Female 65 to 74 years',\n",
 48 |        " 'B17001030 Total Income in the past 12 months below poverty level Female 75 years and over',\n",
 49 |        " 'Margins for B17001030 Total Income in the past 12 months below poverty level Female 75 years and over',\n",
 50 |        " 'B17001044 Total Income in the past 12 months at or above poverty level Male 65 to 74 years',\n",
 51 |        " 'Margins for B17001044 Total Income in the past 12 months at or above poverty level Male 65 to 74 years',\n",
 52 |        " 'B17001045 Total Income in the past 12 months at or above poverty level Male 75 years and over',\n",
 53 |        " 'Margins for B17001045 Total Income in the past 12 months at or above poverty level Male 75 years and over',\n",
 54 |        " 'B17001058 Total Income in the past 12 months at or above poverty level Female 65 to 74 years',\n",
 55 |        " 'Margins for B17001058 Total Income in the past 12 months at or above poverty level Female 65 to 74 years',\n",
 56 |        " 'B17001059 Total Income in the past 12 months at or above poverty level Female 75 years and over',\n",
 57 |        " 'Margins for B17001059 Total Income in the past 12 months at or above poverty level Female 75 years and over']"
 58 |       ]
 59 |      },
 60 |      "execution_count": 2,
 61 |      "output_type": "execute_result",
 62 |      "metadata": {}
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "[e for e in b17001.columns if '65 to 74' in str(e) or '75 years' in str(e) ]"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 3,
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "data": {
 76 |       "text/html": [
 77 |        "<div>\n",
 78 |        "<table border=\"1\" class=\"dataframe\">\n",
 79 |        "  <thead>\n",
 80 |        "    <tr style=\"text-align: right;\">\n",
 81 |        "      <th></th>\n",
 82 |        "      <th>geoid</th>\n",
 83 |        "      <th>B17001015 Total Income in the past 12 months below poverty level Male 65 to 74 years</th>\n",
 84 |        "      <th>Margins for B17001015 Total Income in the past 12 months below poverty level Male 65 to 74 years</th>\n",
 85 |        "      <th>B17001016 Total Income in the past 12 months below poverty level Male 75 years and over</th>\n",
 86 |        "      <th>Margins for B17001016 Total Income in the past 12 months below poverty level Male 75 years and over</th>\n",
 87 |        "      <th>B17001029 Total Income in the past 12 months below poverty level Female 65 to 74 years</th>\n",
 88 |        "      <th>Margins for B17001029 Total Income in the past 12 months below poverty level Female 65 to 74 years</th>\n",
 89 |        "      <th>B17001030 Total Income in the past 12 months below poverty level Female 75 years and over</th>\n",
 90 |        "      <th>Margins for B17001030 Total Income in the past 12 months below poverty level Female 75 years and over</th>\n",
 91 |        "    </tr>\n",
 92 |        "  </thead>\n",
 93 |        "  <tbody>\n",
 94 |        "    <tr>\n",
 95 |        "      <th>0</th>\n",
 96 |        "      <td>14000US06073004501</td>\n",
 97 |        "      <td>10.0</td>\n",
 98 |        "      <td>18.0</td>\n",
 99 |        "      <td>0.0</td>\n",
100 |        "      <td>12.0</td>\n",
101 |        "      <td>13.0</td>\n",
102 |        "      <td>22.0</td>\n",
103 |        "      <td>7.0</td>\n",
104 |        "      <td>12.0</td>\n",
105 |        "    </tr>\n",
106 |        "    <tr>\n",
107 |        "      <th>1</th>\n",
108 |        "      <td>14000US06073019803</td>\n",
109 |        "      <td>0.0</td>\n",
110 |        "      <td>12.0</td>\n",
111 |        "      <td>0.0</td>\n",
112 |        "      <td>12.0</td>\n",
113 |        "      <td>8.0</td>\n",
114 |        "      <td>12.0</td>\n",
115 |        "      <td>11.0</td>\n",
116 |        "      <td>17.0</td>\n",
117 |        "    </tr>\n",
118 |        "    <tr>\n",
119 |        "      <th>2</th>\n",
120 |        "      <td>14000US06073006000</td>\n",
121 |        "      <td>18.0</td>\n",
122 |        "      <td>30.0</td>\n",
123 |        "      <td>0.0</td>\n",
124 |        "      <td>12.0</td>\n",
125 |        "      <td>0.0</td>\n",
126 |        "      <td>12.0</td>\n",
127 |        "      <td>0.0</td>\n",
128 |        "      <td>12.0</td>\n",
129 |        "    </tr>\n",
130 |        "    <tr>\n",
131 |        "      <th>3</th>\n",
132 |        "      <td>14000US06073008364</td>\n",
133 |        "      <td>0.0</td>\n",
134 |        "      <td>17.0</td>\n",
135 |        "      <td>7.0</td>\n",
136 |        "      <td>18.0</td>\n",
137 |        "      <td>7.0</td>\n",
138 |        "      <td>17.0</td>\n",
139 |        "      <td>0.0</td>\n",
140 |        "      <td>17.0</td>\n",
141 |        "    </tr>\n",
142 |        "    <tr>\n",
143 |        "      <th>4</th>\n",
144 |        "      <td>14000US06073008507</td>\n",
145 |        "      <td>0.0</td>\n",
146 |        "      <td>17.0</td>\n",
147 |        "      <td>67.0</td>\n",
148 |        "      <td>61.0</td>\n",
149 |        "      <td>17.0</td>\n",
150 |        "      <td>26.0</td>\n",
151 |        "      <td>26.0</td>\n",
152 |        "      <td>41.0</td>\n",
153 |        "    </tr>\n",
154 |        "  </tbody>\n",
155 |        "</table>\n",
156 |        "</div>"
157 |       ]
158 |      },
159 |      "output_type": "execute_result",
160 |      "metadata": {}
161 |     }
162 |    ],
163 |    "source": [
164 |     "# Now create a subset dataframe with just the columns we need. \n",
165 |     "b17001s = b17001[['geoid', 'B17001015', 'B17001016','B17001029','B17001030']]\n",
166 |     "b17001s.head()"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "## Senior poverty rates\n",
174 |     "\n",
175 |     "Creating the sums for the senior below poverty rates at the tract level is easy, but there is a *serious problem* with the results: the numbers are completely unstable. The minimum RSE is 22%, and the median is about 60%. These are useless results. "
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 4,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "data": {
185 |       "text/plain": [
186 |        "count     576.000000\n",
187 |        "mean       87.621218\n",
188 |        "std       156.710591\n",
189 |        "min        22.150407\n",
190 |        "25%        43.645038\n",
191 |        "50%        58.919310\n",
192 |        "75%        82.136436\n",
193 |        "max      1806.402183\n",
194 |        "dtype: float64"
195 |       ]
196 |      },
197 |      "execution_count": 4,
198 |      "output_type": "execute_result",
199 |      "metadata": {}
200 |     }
201 |    ],
202 |    "source": [
203 |     "b17001_65mf = pr.CensusDataFrame()\n",
204 |     "b17001_65mf['geoid'] = b17001['geoid']\n",
205 |     "b17001_65mf['poverty_65'], b17001_65mf['poverty_65_m90'] = b17001.sum_m('B17001015', 'B17001016','B17001029','B17001030')\n",
206 |     "b17001_65mf.add_rse('poverty_65')\n",
207 |     "b17001_65mf.poverty_65_rse.replace([np.inf, -np.inf], np.nan).dropna().describe()"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     ""
217 |    ]
218 |   }
219 |  ],
220 |  "metadata": {
221 |   "kernelspec": {
222 |    "display_name": "Python 3",
223 |    "language": "python",
224 |    "name": "python3"
225 |   },
226 |   "language_info": {
227 |    "codemirror_mode": {
228 |     "name": "ipython",
229 |     "version": 3.0
230 |    },
231 |    "file_extension": ".py",
232 |    "mimetype": "text/x-python",
233 |    "name": "python",
234 |    "nbconvert_exporter": "python",
235 |    "pygments_lexer": "ipython3",
236 |    "version": "3.5.0"
237 |   }
238 |  },
239 |  "nbformat": 4,
240 |  "nbformat_minor": 2
241 | }


--------------------------------------------------------------------------------