├── ckanext
    ├── geodatagov
    │   ├── saml2
    │   │   ├── __init__.py
    │   │   ├── pki
    │   │   │   └── README
    │   │   ├── attributemaps
    │   │   │   └── saml_uri.py
    │   │   ├── pkitestcrt
    │   │   │   ├── mykey.pem
    │   │   │   └── mycert.pem
    │   │   └── sp_config.py.template
    │   ├── __init__.py
    │   ├── tests
    │   │   ├── data-samples
    │   │   │   ├── waf-trim-tags
    │   │   │   │   └── index.html
    │   │   │   ├── waf-collection2
    │   │   │   │   └── index.html
    │   │   │   ├── waf-collection1
    │   │   │   │   └── index.html
    │   │   │   ├── waf-fgdc
    │   │   │   │   └── index.html
    │   │   │   ├── waf-gmi
    │   │   │   │   └── index.html
    │   │   │   ├── waf1
    │   │   │   │   └── index.html
    │   │   │   ├── sample6_bad_data.json
    │   │   │   └── sample5_data.json
    │   │   ├── conftest.py
    │   │   ├── test_fix_dataset.py
    │   │   ├── test_fix_packages.py
    │   │   ├── test_json_export.py
    │   │   ├── test_tracking.py
    │   │   ├── test_s3test.py
    │   │   ├── test_update_geo.py
    │   │   ├── test_category_tags.py
    │   │   ├── utils.py
    │   │   ├── test_logic.py
    │   │   ├── factories.py
    │   │   ├── test_waf_GMI.py
    │   │   ├── test_datajson.py
    │   │   ├── test_sitemap_creation.py
    │   │   ├── test_fix_spatial.py
    │   │   ├── test_relink.py
    │   │   └── test_waf-collection.py
    │   ├── auth.py
    │   ├── templates
    │   │   ├── organization
    │   │   │   ├── read.html
    │   │   │   └── snippets
    │   │   │   │   └── organization_form.html
    │   │   ├── package
    │   │   │   ├── search.html
    │   │   │   └── read.html
    │   │   ├── snippets
    │   │   │   └── related_collection.html
    │   │   └── source
    │   │   │   └── geodatagov_source_form.html
    │   ├── harvesters
    │   │   ├── __init__.py
    │   │   ├── z3950.py
    │   │   └── waf_collection.py
    │   ├── search.py
    │   ├── helpers.py
    │   ├── validation
    │   │   ├── __init__.py
    │   │   └── xml
    │   │   │   ├── fgdc-std-001.1-1999
    │   │   │       └── fgdc-std-001.1-1999.xsd
    │   │   │   ├── fgdc-std-012-2002
    │   │   │       ├── fgdc-std-001-1998-sect03.xsd
    │   │   │       ├── fgdc-std-012-2002-sect03.xsd
    │   │   │       ├── fgdc-std-012-2002-sect05.xsd
    │   │   │       ├── fgdc-std-012-2002-locainfo.xsd
    │   │   │       ├── fgdc-std-001-1998-sect05.xsd
    │   │   │       └── fgdc-std-001-1998-sect09.xsd
    │   │   │   └── fgdc-std-001.2-2001
    │   │   │       ├── fgdc-std-001.2-2001.xsd
    │   │   │       └── fgdc-std-001.2-2001-sect09.xsd
    │   ├── rebuild.py
    │   └── bin
    │   │   └── scrapewaf.py
    └── __init__.py
├── docker-entrypoint.d
    └── 10-setup-db.sh
├── MANIFEST.in
├── dev-requirements.txt
├── solr
    └── README
├── setup.cfg
├── .gitignore
├── .github
    ├── pull_request_template.md
    └── workflows
    │   ├── test.yml
    │   └── deploy.yml
├── requirements.txt
├── Dockerfile
├── test.sh
├── test.ini
├── Makefile
├── docker-compose.yml
├── setup.py
├── scripts
    └── sql
    │   ├── make_pk.sql
    │   └── what_to_alter.sql
├── LICENSE.md
├── CONTRIBUTING.md
├── .env
├── README.md
└── ADR.md


/ckanext/geodatagov/saml2/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'rohe0002'
2 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/saml2/pki/README:
--------------------------------------------------------------------------------
1 | Add pki files in this folder named:
2 | mycert.pem
3 | mykey.pem
4 | 
5 | 


--------------------------------------------------------------------------------
/docker-entrypoint.d/10-setup-db.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "Init Harvest database tables"
4 | ckan harvester initdb
5 | 
6 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE.md
3 | include requirements.txt
4 | recursive-include ckanext/geodatagov *.html *.xsd *.pem *.xslt
5 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | cryptography==44.0.1
2 | factory-boy==2.12.0
3 | mock==1.0.1
4 | flake8
5 | pycodestyle
6 | pytest
7 | pytest-ckan
8 | pytest-cov
9 | 


--------------------------------------------------------------------------------
/solr/README:
--------------------------------------------------------------------------------
1 | This file has been copied from the schema-2.0.xml file in CKAN core,
2 | and includes the following modifications:
3 | 
4 |     * Add 'spatial_geom' field to index geometries for the spatial query
5 | 


--------------------------------------------------------------------------------
/ckanext/__init__.py:
--------------------------------------------------------------------------------
1 | # this is a namespace package
2 | try:
3 |     import pkg_resources
4 |     pkg_resources.declare_namespace(__name__)
5 | except ImportError:
6 |     import pkgutil
7 |     __path__ = pkgutil.extend_path(__path__, __name__)
8 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/__init__.py:
--------------------------------------------------------------------------------
1 | # this is a namespace package
2 | try:
3 |     import pkg_resources
4 |     pkg_resources.declare_namespace(__name__)
5 | except ImportError:
6 |     import pkgutil
7 |     __path__ = pkgutil.extend_path(__path__, __name__)
8 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/data-samples/waf-trim-tags/index.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
2 | <html>
3 |  <body>
4 | <ul><li><a href="#no"> Parent Directory</a></li>
5 | <li><a href="bad-tags.xml">bad-tags.xml</a></li>
6 | </ul>
7 | </body></html>
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [coverage:run]
 2 | dynamic_context = test_function
 3 | omit=ckanext/geodatagov/tests/*
 4 | 
 5 | [flake8]
 6 | max-line-length = 127
 7 | # TODO disable once future.standard_libary is removed
 8 | ignore = E402  
 9 | 
10 | [tool:pytest]
11 | norecursedirs=ckanext/geodatagov/tests/nose
12 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/auth.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def related_create(context, data_dict=None):
 4 |     return {'success': False}
 5 | 
 6 | 
 7 | def related_update(context, data_dict=None):
 8 |     return {'success': False}
 9 | 
10 | 
11 | def group_catagory_tag_update(context, data_dict=None):
12 |     return {'success': False}
13 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/data-samples/waf-collection2/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |     <head>
 4 |         <title>Index of /waf-collection2</title>
 5 |     </head>
 6 |     <body>
 7 |         <h1>Index of /waf</h1>
 8 |         <a href="2013_county.ea.iso.xml">2013_county.ea.iso.xml</a>
 9 |     </body>
10 | </html>
11 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/templates/organization/read.html:
--------------------------------------------------------------------------------
1 | {% ckan_extends %}
2 | 
3 | {% block primary_content_inner %}
4 |   {% if request.args.get('collection_package_id') %}
5 |     {% snippet "snippets/related_collection.html", collection_package_id=request.args.get('collection_package_id') %}
6 |   {% endif %}
7 |   {{ super() }}
8 | {% endblock %}
9 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import utils
 4 | 
 5 | 
 6 | @pytest.fixture(scope="session", autouse=True)
 7 | def run_once_for_all_tests():
 8 |     utils.simple_http_server()
 9 | 
10 | 
11 | @pytest.fixture(scope="class", autouse=True)
12 | def run_for_every_test_class():
13 |     utils.reset_db_and_solr()
14 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/data-samples/waf-collection1/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |     <head>
 4 |         <title>Index of /waf-collection1</title>
 5 |     </head>
 6 |     <body>
 7 |         <h1>Index of /waf</h1>
 8 |         <a href="tl_2013_us_county.shp.iso.xml">tl_2013_us_county.shp.iso.xml</a>
 9 |     </body>
10 | </html>
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | syntax: glob
 3 | *.pyc
 4 | *.egg-info
 5 | .eggs/*
 6 | *.swo
 7 | *.swp
 8 | *.bak
 9 | .coverage
10 | conversiontool/run.sh
11 | conversiontool/errors.log
12 | conversiontool/debug.xml
13 | conversiontool/lib/saxon-license.lic
14 | conversiontool/lib/*.jar
15 | .vscode/
16 | PyZ3950_parsetab.py
17 | src
18 | venv/*
19 | tmp/localstack/*
20 | .vim/*
21 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/data-samples/waf-fgdc/index.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
 3 | <html>
 4 |  <head>
 5 |   <title>Index of CSDGM/FGDC sample data</title>
 6 |  </head>
 7 |  <body>
 8 | <h1>Index of CSDGM/FGDC sample data</h1>
 9 | <ul><li><a href="#NO"> Parent Directory</a></li>
10 | <li><a href="fgdc-csdgm_sample.xml"> fgdc-csdgm_sample.xml</a></li>
11 | </ul>
12 | </body></html>


--------------------------------------------------------------------------------
/ckanext/geodatagov/templates/package/search.html:
--------------------------------------------------------------------------------
 1 | {% ckan_extends %}
 2 | 
 3 | {% block primary_content %}
 4 |     {% if request.args.get('collection_package_id') %}
 5 |         <div class="module">
 6 |             {% snippet "snippets/related_collection.html", collection_package_id=request.args.get('collection_package_id') %}
 7 |         </div>
 8 |     {% endif %}
 9 | 
10 |     {{ super() }}
11 | 
12 | {% endblock %}
13 | 
14 | {{ super() }}


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # Pull Request
 2 | 
 3 | Related to [LINK TO ISSUE]
 4 | 
 5 | ## About
 6 | 
 7 | <!-- any pertinent notes -->
 8 | 
 9 | ## PR TASKS
10 | 
11 | - [ ] The actual code changes.
12 | - [ ] Tests written and passed.
13 | - [ ] Any changes to docs?
14 | - [ ] Bumped version number in [setup.py](https://github.com/GSA/ckanext-geodatagov/blob/main/setup.py#L13) (also checked on [PyPi](https://pypi.org/project/ckanext-geodatagov/#history)).
15 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/data-samples/waf-gmi/index.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
 3 | <html>
 4 |  <head>
 5 |   <title>Index of /data/existing/decennial/GEO/CPMB/boundary/2015gz/CartographicShapefiles/necta_500k</title>
 6 |  </head>
 7 |  <body>
 8 | <h1>Index of /data/existing/decennial/GEO/CPMB/boundary/2015gz/CartographicShapefiles/necta_500k</h1>
 9 | <ul><li><a href="#NO"> Parent Directory</a></li>
10 | <li><a href="cb_2014_us_necta_500k.shp.iso.xml"> cb_2014_us_necta_500k.shp.iso.xml</a></li>
11 | </ul>
12 | </body></html>


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_fix_dataset.py:
--------------------------------------------------------------------------------
 1 | from ckanext.geodatagov.logic import fix_dataset
 2 | 
 3 | 
 4 | def test_fix_dataset():
 5 |     data_dict = {
 6 |         "title": "test dataset",
 7 |         "extras": [
 8 |             {"key": "test-key", "value": "test value"},
 9 |             {"key": "tags", "value": "taG*01, tag (test) 02"}
10 |         ]
11 |     }
12 | 
13 |     data_dict = fix_dataset(data_dict)
14 | 
15 |     assert "tag01" in [t['name'] for t in data_dict['tags']]
16 |     assert "tag-test-02" in [t['name'] for t in data_dict['tags']]
17 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/saml2/attributemaps/saml_uri.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'rolandh'
 2 | 
 3 | EDUPERSON_OID = "urn:oid:1.3.6.1.4.1.5923.1.1.1."
 4 | X500ATTR_OID = "urn:oid:2.5.4."
 5 | NOREDUPERSON_OID = "urn:oid:1.3.6.1.4.1.2428.90.1."
 6 | NETSCAPE_LDAP = "urn:oid:2.16.840.1.113730.3.1."
 7 | UCL_DIR_PILOT = 'urn:oid:0.9.2342.19200300.100.1.'
 8 | PKCS_9 = "urn:oid:1.2.840.113549.1.9.1."
 9 | UMICH = "urn:oid:1.3.6.1.4.1.250.1.57."
10 | 
11 | MAP = {
12 |     "identifier": "urn:oasis:names:tc:SAML:2.0:attrname-format:uri",
13 |     "fro": {
14 |     },
15 |     "to": {
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/templates/snippets/related_collection.html:
--------------------------------------------------------------------------------
 1 | {% set collection_package = h.get_collection_package(collection_package_id) %}
 2 | {% set title = title or _('Collection:') %}
 3 | {% set wrapper_class = wrapper_class or "well" %}
 4 | 
 5 | {% if collection_package %}
 6 |   <div class="module-content collection-package">
 7 |     <div class="{{ wrapper_class }}">
 8 |       <h3>{{ title }}</h3>
 9 |       <ul class="dataset-list unstyled">
10 |         {% snippet "snippets/package_item.html", package=collection_package %}
11 |       </ul>
12 |     </div>
13 |   </div>
14 | {% endif %}
15 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/data-samples/waf1/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
 2 | <html>
 3 |  <head>
 4 |   <title>Index of /data/existing/decennial/GEO/CPMB/boundary/2016Cartographic/division_500</title>
 5 |  </head>
 6 |  <body>
 7 | <h1>Index of /data/existing/decennial/GEO/CPMB/boundary/2016Cartographic/division_500</h1>
 8 | <ul><li><a href="#no"> Parent Directory</a></li>
 9 | <li><a href="cb_2016_us_division_500k.shp.iso.xml">cb_2016_us_division_500k.shp.iso.xml</a></li>
10 | <li><a href="nodc_0099041_cw_region_meta_v2013.xml">nodc_0099041_cw_region_meta_v2013.xml</a></li>
11 | </ul>
12 | </body></html>
13 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | -e git+https://github.com/ckan/ckanext-harvest.git#egg=ckanext_harvest
 2 | -e git+https://github.com/ckan/ckanext-spatial.git#egg=ckanext-spatial
 3 | -e git+https://github.com/asl2/PyZ3950.git#egg=PyZ3950
 4 | 
 5 | pyutilib
 6 | 
 7 | # ckanext-harvest dependencies
 8 | ckantoolkit>=0.0.7
 9 | pika>=1.1.0
10 | pyOpenSSL>22.10 #pinning to fix error with crypto (https://levelup.gitconnected.com/fix-attributeerror-module-lib-has-no-attribute-openssl-521a35d83769)
11 | # redis==2.10.6 # included in ckan core
12 | # requests>=2.11.1 # included in ckan core
13 | 
14 | # ckanext-spatial
15 | # ckantoolkit # included as dep of ckanext-harvest
16 | Shapely==2.0.6
17 | OWSLib==0.32.1
18 | lxml>=2.3
19 | argparse
20 | pyparsing>=2.1.10
21 | # requests>=1.1.0 # included in ckan-core
22 | six
23 | geojson==3.1.0
24 | 
25 | # PyZ3950
26 | pyasn1
27 | # ply #required in setup.py
28 | 
29 | # other requirments
30 | boto3
31 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG CKAN_VERSION=2.11
 2 | FROM ckan/ckan-dev:${CKAN_VERSION}
 3 | ARG CKAN_VERSION
 4 | 
 5 | USER root
 6 | 
 7 | RUN apt-get update && apt-get install -y postgresql-client openjdk-17-jre libgeos-dev
 8 | 
 9 | # Download Saxon jar for FGDC2ISO transform (geodatagov)
10 | ARG saxon_ver=9.9.1-7
11 | ADD \
12 |   https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/${saxon_ver}/Saxon-HE-${saxon_ver}.jar \
13 |   /usr/lib/jvm/java-11-openjdk/saxon/saxon.jar
14 | 
15 | ENV CLASSPATH=${CLASSPATH}:/usr/lib/jvm/java-11-openjdk/saxon/saxon.jar
16 | 
17 | # Pinned for build issue: https://github.com/pyproj4/pyproj/issues/1321
18 | RUN pip install --upgrade pip
19 | # RUN python3 -m pip install 'cython<3'
20 | # RUN python3 -m pip install --no-use-pep517 pyproj==3.4.1
21 | RUN python3 -m pip install pyproj
22 | 
23 | COPY . $APP_DIR/
24 | 
25 | RUN pip install -r $APP_DIR/requirements.txt -r $APP_DIR/dev-requirements.txt -e $APP_DIR/.
26 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/harvesters/__init__.py:
--------------------------------------------------------------------------------
 1 | # this is a namespace package
 2 | try:
 3 |     import pkg_resources
 4 |     pkg_resources.declare_namespace(__name__)
 5 | except ImportError:
 6 |     import pkgutil
 7 |     __path__ = pkgutil.extend_path(__path__, __name__)
 8 | 
 9 | from ckanext.geodatagov.harvesters.base import GeoDataGovHarvester  # NOQA F401
10 | from ckanext.geodatagov.harvesters.base import GeoDataGovCSWHarvester  # NOQA F401
11 | from ckanext.geodatagov.harvesters.base import GeoDataGovWAFHarvester  # NOQA F401
12 | from ckanext.geodatagov.harvesters.base import GeoDataGovDocHarvester  # NOQA F401
13 | from ckanext.geodatagov.harvesters.base import GeoDataGovGeoportalHarvester  # NOQA F401
14 | from ckanext.geodatagov.harvesters.waf_collection import WAFCollectionHarvester  # NOQA F401
15 | from ckanext.geodatagov.harvesters.z3950 import Z3950Harvester  # NOQA F401
16 | from ckanext.geodatagov.harvesters.arcgis import ArcGISHarvester  # NOQA F401
17 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/saml2/pkitestcrt/mykey.pem:
--------------------------------------------------------------------------------
 1 | -----BEGIN RSA PRIVATE KEY-----
 2 | MIICXAIBAAKBgQDkJWP7bwOxtH+E15VTaulNzVQ/0cSbM5G7abqeqSNSs0l0veHr
 3 | 6/ROgW96ZeQ57fzVy2MCFiQRw2fzBs0n7leEmDJyVVtBTavYlhAVXDNa3stgvh43
 4 | qCfLx+clUlOvtnsoMiiRmo7qf0BoPKTj7c0uLKpDpEbAHQT4OF1HRYVxMwIDAQAB
 5 | AoGAbx9rKH91DCw/ZEPhHsVXJ6cYHxGcMoAWvnMMC9WUN+bNo4gNL205DLfsxXA1
 6 | jqXFXZj3+38vSFumGPA6IvXrN+Wyp3+Lz3QGc4K5OdHeBtYlxa6EsrxPgvuxYDUB
 7 | vx3xdWPMjy06G/ML+pR9XHnRaPNubXQX3UxGBuLjwNXVmyECQQD2/D84tYoCGWoq
 8 | 5FhUBxFUy2nnOLKYC/GGxBTX62iLfMQ3fbQcdg2pJsB5rrniyZf7UL+9FOsAO9k1
 9 | 8DO7G12DAkEA7Hkdg1KEw4ZfjnnjEa+KqpyLTLRQ91uTVW6kzR+4zY719iUJ/PXE
10 | PxJqm1ot7mJd1LW+bWtjLpxs7jYH19V+kQJBAIEpn2JnxdmdMuFlcy/WVmDy09pg
11 | 0z0imdexeXkFmjHAONkQOv3bWv+HzYaVMo8AgCOksfEPHGqN4eUMTfFeuUMCQF+5
12 | E1JSd/2yCkJhYqKJHae8oMLXByNqRXTCyiFioutK4JPYIHfugJdLfC4QziD+Xp85
13 | RrGCU+7NUWcIJhqfiJECQAIgUAzfzhdj5AyICaFPaOQ+N8FVMLcTyqeTXP0sIlFk
14 | JStVibemTRCbxdXXM7OVipz1oW3PBVEO3t/VyjiaGGg=
15 | -----END RSA PRIVATE KEY-----
16 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on: [push]
 4 | env:
 5 |   CODE_COVERAGE_THRESHOLD_REQUIRED: 33
 6 | 
 7 | jobs:
 8 |   lint:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v3
12 |       - uses: actions/setup-python@v4
13 |         with:
14 |           python-version: '3.8'
15 |       - name: Install requirements
16 |         run: pip install flake8 pycodestyle pytest pytest-ckan pytest-cov
17 |       - name: Run flake8
18 |         run: flake8 . --count --max-line-length=127 --statistics --exclude ckan
19 | 
20 |   test:
21 |     needs: lint
22 |     strategy:
23 |       matrix:
24 |         ckan-version: ['2.11']
25 |       fail-fast: false
26 | 
27 |     name: CKAN ${{ matrix.ckan-version }}
28 |     runs-on: ubuntu-latest
29 | 
30 |     steps:
31 |     - uses: actions/checkout@v3
32 |     - name: Run everything
33 |       run: source .env && CKAN_VERSION=${{ matrix.ckan-version }} make clean build ci test
34 |     # - name: Setup tmate session on fail
35 |     #   if: ${{ failure() }}
36 |     #   uses: mxschmitt/action-tmate@v3
37 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | on:
 3 |   pull_request:
 4 |     branches: [main]
 5 |     types: [closed]
 6 |   workflow_dispatch:
 7 |     inputs:
 8 |       version_no:
 9 |         description: 'Release Version:'
10 |         required: true
11 | 
12 | jobs:
13 |   deploy:
14 |     name: Publish to PyPI
15 |     runs-on: ubuntu-latest
16 |     if: github.event.pull_request.merged == true || github.event_name == 'workflow_dispatch'
17 |     steps:
18 |       - name: checkout
19 |         uses: actions/checkout@v4
20 |       - name: Update setup.py if manual release
21 |         if: github.event_name == 'workflow_dispatch'
22 |         run: |
23 |           sed -i "s/version='[0-9]\{1,2\}.[0-9]\{1,4\}.[0-9]\{1,4\}',/version='${{github.event.inputs.version_no}}',/g" setup.py
24 |       - name: Create packages
25 |         run: |
26 |           pip install build
27 |           python -m build
28 |       - name: pypi-publish
29 |         uses: pypa/gh-action-pypi-publish@release/v1
30 |         with:
31 |           user: __token__
32 |           password: ${{ secrets.PYPI_API_TOKEN }}
33 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/saml2/pkitestcrt/mycert.pem:
--------------------------------------------------------------------------------
 1 | -----BEGIN CERTIFICATE-----
 2 | MIIC8jCCAlugAwIBAgIJAJHg2V5J31I8MA0GCSqGSIb3DQEBBQUAMFoxCzAJBgNV
 3 | BAYTAlNFMQ0wCwYDVQQHEwRVbWVhMRgwFgYDVQQKEw9VbWVhIFVuaXZlcnNpdHkx
 4 | EDAOBgNVBAsTB0lUIFVuaXQxEDAOBgNVBAMTB1Rlc3QgU1AwHhcNMDkxMDI2MTMz
 5 | MTE1WhcNMTAxMDI2MTMzMTE1WjBaMQswCQYDVQQGEwJTRTENMAsGA1UEBxMEVW1l
 6 | YTEYMBYGA1UEChMPVW1lYSBVbml2ZXJzaXR5MRAwDgYDVQQLEwdJVCBVbml0MRAw
 7 | DgYDVQQDEwdUZXN0IFNQMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDkJWP7
 8 | bwOxtH+E15VTaulNzVQ/0cSbM5G7abqeqSNSs0l0veHr6/ROgW96ZeQ57fzVy2MC
 9 | FiQRw2fzBs0n7leEmDJyVVtBTavYlhAVXDNa3stgvh43qCfLx+clUlOvtnsoMiiR
10 | mo7qf0BoPKTj7c0uLKpDpEbAHQT4OF1HRYVxMwIDAQABo4G/MIG8MB0GA1UdDgQW
11 | BBQ7RgbMJFDGRBu9o3tDQDuSoBy7JjCBjAYDVR0jBIGEMIGBgBQ7RgbMJFDGRBu9
12 | o3tDQDuSoBy7JqFepFwwWjELMAkGA1UEBhMCU0UxDTALBgNVBAcTBFVtZWExGDAW
13 | BgNVBAoTD1VtZWEgVW5pdmVyc2l0eTEQMA4GA1UECxMHSVQgVW5pdDEQMA4GA1UE
14 | AxMHVGVzdCBTUIIJAJHg2V5J31I8MAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEF
15 | BQADgYEAMuRwwXRnsiyWzmRikpwinnhTmbooKm5TINPE7A7gSQ710RxioQePPhZO
16 | zkM27NnHTrCe2rBVg0EGz7QTd1JIwLPvgoj4VTi/fSha/tXrYUaqc9AqU1kWI4WN
17 | +vffBGQ09mo+6CffuFTZYeOhzP/2stAPwCTU4kxEoiy0KpZMANI=
18 | -----END CERTIFICATE-----
19 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Setup and run extension tests. This script should be run in a _clean_ CKAN
 3 | # environment. e.g.:
 4 | #
 5 | #     $ docker compose run --rm app ./test.sh
 6 | #
 7 | 
 8 | set -o errexit
 9 | set -o pipefail
10 | 
11 | test_ini=/srv/app/test.ini
12 | 
13 | # Database is listening, but still unavailable. Just keep trying...
14 | while ! ckan -c $test_ini db init; do
15 |     echo Retrying in 5 seconds...
16 |     sleep 5
17 | done
18 | 
19 | HOST=db
20 | DB_NAME=ckan
21 | DB_USER=ckan
22 | PASS=ckan
23 | 
24 | # Uncomment if you would like to rapid-prototype with the spatial extension
25 | # Note: make sure the correct brance is referenced in either requirements.py file
26 | # cd /srv/app/src/ckanext-spatial/
27 | # git pull
28 | # cd -
29 | 
30 | ckan -c $test_ini db upgrade -p harvest
31 | 
32 | 
33 | pytest --ckan-ini=test.ini --cov=ckanext.geodatagov --disable-warnings ckanext/geodatagov/tests/
34 | 
35 | # Run this this pytest command if only testing a single test
36 | # pytest --ckan-ini=$test_ini --cov=ckanext.geodatagov --disable-warnings ckanext/geodatagov/tests/test_category_tags.py 
37 | # pytest --ckan-ini=$test_ini --cov=ckanext.geodatagov --disable-warnings ckanext/geodatagov/tests/test_category_tags.py ckanext/geodatagov/tests/test_waf-collection.py 
38 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_fix_packages.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import logging
 3 | 
 4 | from ckan.tests import factories
 5 | 
 6 | 
 7 | log = logging.getLogger(__name__)
 8 | 
 9 | 
10 | @pytest.mark.usefixtures("with_plugins")
11 | class TestFixPkg(object):
12 | 
13 |     @classmethod
14 |     def setup_class(cls):
15 |         cls.organization = factories.Organization()
16 | 
17 |     def test_fix_tags(self):
18 |         dataset_extras = [
19 |             {
20 |                 "key": "tags",
21 |                 "value": "tag01, tag02"
22 |             }
23 |         ]
24 |         dataset = factories.Dataset(
25 |             owner_org=self.organization['id'],
26 |             extras=dataset_extras)
27 | 
28 |         assert "tag01" in [t['name'] for t in dataset['tags']]
29 |         assert "tag02" in [t['name'] for t in dataset['tags']]
30 | 
31 |     def test_avoid_duplicated_tags(self):
32 |         dataset_extras = [
33 |             {
34 |                 "key": "tags",
35 |                 "value": "tag01, tag02"
36 |             }
37 |         ]
38 |         dataset = factories.Dataset(
39 |             owner_org=self.organization['id'],
40 |             extras=dataset_extras,
41 |             tags=[{'name': 'tag01'}])
42 | 
43 |         assert len(dataset['tags']) == 2
44 |         assert "tag01" in [t['name'] for t in dataset['tags']]
45 |         assert "tag02" in [t['name'] for t in dataset['tags']]
46 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/templates/package/read.html:
--------------------------------------------------------------------------------
 1 | {% ckan_extends %}
 2 | 
 3 | {% set pkg_dict = c.pkg_dict %}
 4 | 
 5 | {% block collection_resources %}
 6 | {% set collection_package_id = h.get_pkg_dict_extra(c.pkg_dict, 'collection_package_id', '') %}
 7 | {% if h.get_pkg_dict_extra(c.pkg_dict, 'collection_metadata', '') %}
 8 | <section class="module-content">
 9 |     <h3>{{ _('Collection') }}</h3>
10 |     <p>{{ _('This dataset is a collection of other datasets.') }}</p>
11 |     <p><a href="{{ h.url_for('search', collection_package_id=pkg_dict.id) }}" class="btn-collection">{{ _('Search datasets within this collection') }}</a></p>
12 | </section>
13 | {% elif collection_package_id %}
14 | {% set collection_package = h.get_collection_package(collection_package_id) %}
15 | <section class="module-content">
16 |     <h3>{{ _('Collection') }}</h3>
17 |     {% if collection_package %}
18 |         <p>{{ _('This dataset is part of the following collection:') }}</p>
19 |         <ul class="dataset-list unstyled">
20 |             {% snippet "snippets/package_item.html", package=collection_package, truncate=75 %}
21 |         </ul>
22 |     {% else %}
23 |         <p>{{ _('This dataset is part of a deleted collection.') }}</p>
24 |         <p><a href="{{ h.url_for('search', collection_package_id=collection_package_id) }}" class="btn-collection">{{ _('Search other datasets within the same collection') }}</a></p>
25 |     {% endif %}
26 | </section>
27 | {% endif %}
28 | {% endblock %}
29 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_json_export.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from ckan.tests import factories
 4 | 
 5 | 
 6 | # import json
 7 | # from ckan.common import config
 8 | # from ckanext.geodatagov.commands import GeoGovCommand
 9 | 
10 | log = logging.getLogger(__name__)
11 | 
12 | 
13 | class TestJSONExport(object):
14 | 
15 |     def create_datasets(self):
16 | 
17 |         org_extras = [{'key': 'organization_type', 'value': 'Federal Government'}]
18 |         organization = factories.Organization(extras=org_extras)
19 |         dataset1 = factories.Dataset(owner_org=organization['id'])  # NOQA
20 |         dataset2 = factories.Dataset(owner_org=organization['id'])  # NOQA
21 | 
22 |     # TODO: Fix this test when `jsonl_export` is no longer defunct
23 |     '''
24 |     def test_json_output(self):
25 |         """ run json_export and analyze results """
26 | 
27 |         self.create_datasets()
28 | 
29 |         # skip AWS bucket if exists
30 |         config['ckanext.geodatagov.aws_bucket_name'] = None
31 | 
32 |         cmd = GeoGovCommand()
33 |         path, _ = cmd.jsonl_export()
34 | 
35 |         parsed_lines = 0
36 |         with open(path, 'r') as f:
37 |             line = f.readline()
38 |             while line:
39 |                 data = json.loads(line)  # NOQA
40 |                 parsed_lines += 1
41 |                 line = f.readline()
42 | 
43 |         log.info('Data is JSON valid: {} parsed lines'.format(parsed_lines))
44 |         assert parsed_lines > 0
45 |     '''
46 | 


--------------------------------------------------------------------------------
/test.ini:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | debug = true
 3 | # Uncomment and replace with the address which should receive any error reports
 4 | #email_to = you@yourdomain.com
 5 | smtp_server = localhost
 6 | error_email_from = paste@localhost
 7 | 
 8 | [app:main]
 9 | use = config:/srv/app/src/ckan/test-core.ini
10 | ckan.site_title = My Test CKAN Site
11 | ckan.site_description = A test site for testing my CKAN extension
12 | ckan.plugins = tracking harvest geodatagov datagov_harvest ckan_harvester geodatagov_geoportal_harvester z3950_harvester arcgis_harvester waf_harvester_collection geodatagov_csw_harvester geodatagov_doc_harvester geodatagov_waf_harvester spatial_metadata spatial_query resource_proxy spatial_harvest_metadata_api datajson_harvest envvars
13 | ckan.legacy_templates = no
14 | ckan.spatial.validator.profiles = iso19139ngdc
15 | ckanext.spatial.search_backend = solr-bbox
16 | 
17 | # Logging configuration
18 | [loggers]
19 | keys = root, ckan, sqlalchemy
20 | 
21 | [handlers]
22 | keys = console
23 | 
24 | [formatters]
25 | keys = generic
26 | 
27 | [logger_root]
28 | level = WARN
29 | handlers = console
30 | 
31 | [logger_ckan]
32 | qualname = ckan
33 | handlers = 
34 | level = INFO
35 | 
36 | [logger_sqlalchemy]
37 | handlers =
38 | qualname = sqlalchemy.engine
39 | level = WARN  
40 | 
41 | [handler_console]
42 | class = StreamHandler
43 | args = (sys.stdout,)
44 | level = NOTSET
45 | formatter = generic
46 | 
47 | [formatter_generic]
48 | format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s
49 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CKAN_VERSION ?= 2.11
 2 | COMPOSE_FILE ?= docker-compose.yml
 3 | 
 4 | build: ## Build the docker containers
 5 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose -f $(COMPOSE_FILE) build
 6 | debug:
 7 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose run --service-ports app
 8 | 
 9 | lint: ## Lint the code
10 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose -f docker-compose.yml run --rm app flake8 /srv/app/ckanext/ --count --max-line-length=127 --show-source --statistics --exclude ckan
11 | 
12 | clean: ## Clean workspace and containers
13 | 	find . -name *.pyc -delete
14 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose -f $(COMPOSE_FILE) down -v --remove-orphans
15 | 
16 | test: ## Run tests in a new container
17 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose -f $(COMPOSE_FILE) run --rm app /srv/app/test.sh
18 | 
19 | java-test: ## Test java transformation command (java + saxon installed)
20 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose -f $(COMPOSE_FILE) run --rm app bash -c "java net.sf.saxon.Transform -s:/app/ckanext/geodatagov/tests/data-samples/waf-fgdc/fgdc-csdgm_sample.xml -xsl:/app/ckanext/geodatagov/harvesters/fgdcrse2iso19115-2.xslt"
21 | 
22 | up: ## Start the containers
23 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose -f $(COMPOSE_FILE) up
24 | 
25 | down: ## Stop the containers
26 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose -f $(COMPOSE_FILE) down
27 | 
28 | ci: ## Start the containers in the background
29 | 	CKAN_VERSION=$(CKAN_VERSION) docker compose -f $(COMPOSE_FILE) up -d
30 | 
31 | .DEFAULT_GOAL := help
32 | .PHONY: build clean help lint test up
33 | 
34 | # Output documentation for top-level targets
35 | # Thanks to https://marmelab.com/blog/2016/02/29/auto-documented-makefile.html
36 | help: ## This help
37 | 	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-10s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
38 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_tracking.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import pytest
 3 | 
 4 | import ckan.model as model
 5 | from ckan.tests import factories, helpers
 6 | from click.testing import CliRunner
 7 | 
 8 | import ckanext.geodatagov.cli as cli
 9 | 
10 | 
11 | log = logging.getLogger(__name__)
12 | 
13 | 
14 | @pytest.mark.usefixtures("with_plugins")
15 | class TestTracking(object):
16 | 
17 |     def create_datasets(self):
18 | 
19 |         organization = factories.Organization()
20 |         self.dataset = factories.Dataset(owner_org=organization["id"])
21 | 
22 |         # total view should be 0 for a new dataset
23 |         package = helpers.call_action("package_show", id=self.dataset["id"], include_tracking=True)
24 |         assert package['tracking_summary']['total'] == 0
25 | 
26 |         # insert two raw tracking data
27 |         sql = (
28 |             "INSERT INTO tracking_raw (user_key, url, tracking_type, access_timestamp) VALUES"
29 |             "('aaa','/dataset/{0}','page','2020-10-10'),"
30 |             "('bbb','/dataset/{0}','page','2021-11-11')"
31 |         ).format(self.dataset["name"])
32 | 
33 |         model.Session.execute(sql)
34 |         model.Session.commit()
35 | 
36 |     @pytest.fixture
37 |     def cli_result(self):
38 |         self.create_datasets()
39 | 
40 |         runner = CliRunner()
41 |         raw_cli_output = runner.invoke(
42 |             cli.tracking_update,
43 |             args=[],
44 |         )
45 | 
46 |         return raw_cli_output
47 | 
48 |     def test_tracking_data_in_package_show(self, cli_result):
49 | 
50 |         assert cli_result.exit_code == 0
51 | 
52 |         pacakge = helpers.call_action("package_show", id=self.dataset["id"], include_tracking=True)
53 |         assert pacakge['tracking_summary']['total'] == 2
54 |         assert pacakge['tracking_summary']['recent'] == 1
55 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/templates/organization/snippets/organization_form.html:
--------------------------------------------------------------------------------
 1 | {% ckan_extends %}
 2 | 
 3 |   {% block custom_fields %}
 4 |     {% set extras_email = [] %}
 5 |     {% set extras_except_email = [] %}
 6 |     {% for extra in data.extras %}
 7 |       {% if extra.key == 'email_list' %}
 8 |         {# there is only one email value, but using a list here to bypass jinja valiable scope limitaiton #}
 9 |         {% do extras_email.append(extra.value) %}
10 |       {% else %}
11 |         {% do extras_except_email.append(extra) %}
12 |       {% endif%}
13 |     {% endfor %}
14 |     {{ form.hidden('extras__0__key', value='email_list') }}
15 |     {{ form.textarea('extras__0__value', label=_('Harvest report email list'), id='field-extras-0-value', value=extras_email[0], error=errors[prefix ~ 'value']) }}
16 | 
17 |     {% for extra in extras_except_email %}
18 |       {% set prefix = 'extras__%d__' % loop.index %}
19 |       {{ form.custom(
20 |         names=(prefix ~ 'key', prefix ~ 'value', prefix ~ 'deleted'),
21 |         id='field-extras-%d' % loop.index,
22 |         label=_('Custom Field'),
23 |         values=(extra.key, extra.value, extra.deleted),
24 |         error=errors[prefix ~ 'key'] or errors[prefix ~ 'value']
25 |       ) }}
26 |     {% endfor %}
27 | 
28 |     {# Add a max if 3 empty columns #}
29 |     {% for extra in range(extras_except_email|count, 3) %}
30 |       {% set index = (loop.index + extras_except_email|count) %}
31 |       {% set prefix = 'extras__%d__' % index %}
32 |       {{ form.custom(
33 |         names=(prefix ~ 'key', prefix ~ 'value', prefix ~ 'deleted'),
34 |         id='field-extras-%d' % index,
35 |         label=_('Custom Field'),
36 |         values=(extra.key, extra.value, extra.deleted),
37 |         error=errors[prefix ~ 'key'] or errors[prefix ~ 'value']
38 |       ) }}
39 |     {% endfor %}
40 |   {% endblock %}
41 | 
42 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/search.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from ckan.common import config
 3 | # from ckan.lib.search.common import make_connection
 4 | # from ckan.lib.search.query import SearchQuery
 5 | 
 6 | from ckan.lib.search import make_connection, PackageSearchQuery  # , SolrSettings
 7 | 
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | class GeoPackageSearchQuery(PackageSearchQuery):
13 |     def get_count(self):
14 |         """
15 |         Return the count of all indexed packages.
16 |         """
17 |         query = "*: *"
18 |         fq = "+site_id: \"%s\" " % config.get('ckan.site_id')
19 |         fq += "+state: active "
20 | 
21 |         conn = make_connection()
22 | 
23 |         try:
24 |             data = conn.search(query, fq=fq, rows=0)
25 |         except Exception as e:
26 |             error = 'Error in GeoPackageSearchQuery.get_count: {}'.format(e)
27 |             log.error(error)
28 |             print(error)
29 | 
30 |         return data.hits
31 | 
32 |     def get_paginated_entity_name_modtime(self, max_results=1000, start=0):
33 |         """
34 |         Return a list of the name and metadata_modified s of indexed packages.
35 |         """
36 |         query = "*: *"
37 |         fq = "+site_id: \"%s\" " % config.get('ckan.site_id')
38 |         fq += "+state: active "
39 | 
40 |         conn = make_connection()
41 |         try:
42 |             data = conn.search(query,
43 |                                fq=fq,
44 |                                rows=max_results,
45 |                                fl='name,metadata_modified',
46 |                                start=start,
47 |                                sort='metadata_created asc')
48 |         except Exception as e:
49 |             error = 'Error in GeoPackageSearchQuery.get_paginated_entity_name_modtime: {}'.format(e)
50 |             log.error(error)
51 |             print(error)
52 | 
53 |         return [{'name': r.get('name'),
54 |                 'metadata_modified': r.get('metadata_modified')}
55 |                 for r in data.docs]
56 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/helpers.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | 
 4 | from ckan import plugins as p
 5 | from ckanext.harvest.model import HarvestSource
 6 | from ckan.logic import NotFound, NotAuthorized
 7 | 
 8 | log = logging.getLogger(__name__)
 9 | 
10 | try:
11 |     from ckanext.geodatagov.harvesters.base import VALIDATION_PROFILES
12 | except ImportError as e:
13 |     log.critical('Harvester not available %s' % str(e))
14 | 
15 | 
16 | def get_validation_profiles():
17 |     return VALIDATION_PROFILES
18 | 
19 | 
20 | def get_validation_schema():
21 |     try:
22 |         from ckanext.datajson.harvester_base import VALIDATION_SCHEMA
23 |     except ImportError:
24 |         return None
25 | 
26 |     return VALIDATION_SCHEMA
27 | 
28 | 
29 | def get_harvest_source_type(harvester_id):
30 |     source_type = None
31 |     try:
32 |         package = p.toolkit.get_action('harvest_source_show')({}, {'id': harvester_id})
33 |         source_type = package['source_type']
34 |     except BaseException:
35 |         pass
36 | 
37 |     return source_type
38 | 
39 | 
40 | def get_harvest_source_config(harvester_id):
41 |     source_config = {}
42 |     keys_lookfor = [
43 |         'default_groups',
44 |         'private_datasets',
45 |         'validator_profiles',
46 |     ]
47 |     try:
48 |         harvest_source = HarvestSource.get(harvester_id)
49 |         source_config = json.loads(harvest_source.config)
50 |     except BaseException:
51 |         pass
52 | 
53 |     # convert single string element list to string
54 |     if source_config:
55 |         for key in keys_lookfor:
56 |             value = source_config.get(key, '')
57 |             if type(value) is list:
58 |                 source_config[key] = value[0]
59 |     return source_config
60 | 
61 | 
62 | def get_collection_package(collection_package_id):
63 |     try:
64 |         package = p.toolkit.get_action('package_show')({}, {'id': collection_package_id})
65 |         return package
66 |     except (NotFound, NotAuthorized):
67 |         pass
68 | 
69 | 
70 | def string(value):
71 |     return str(value)
72 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   app:
 3 |     image: datagov/ckanext-geodatagov:${CKAN_VERSION} # ensures docker-compose will rebuild the right image in case we change CKAN_VERSION
 4 |     build:
 5 |       context: .
 6 |       args:
 7 |         CKAN_VERSION: ${CKAN_VERSION}
 8 |     env_file:
 9 |       - .env
10 |     environment:
11 |       CKAN_SOLR_URL: http://solr:8983/solr/ckan
12 |       CKAN_REDIS_URL: redis://redis:6379/1
13 |       CKAN_DATAPUSHER_URL: http://localhost:8080/  # datapusher is not really enabled
14 |       PYTHONDONTWRITEBYTECODE: 1
15 |     ports:
16 |       - "5000:5000"
17 |     depends_on:
18 |       - db
19 |       - redis
20 |       - solr
21 |       - localstack-container
22 |     volumes:
23 |       - ./ckanext:/srv/app/ckanext/
24 |       - ./test.sh:/srv/app/test.sh
25 |       - ./test.ini:/srv/app/test.ini
26 |       - ./setup.py:/srv/app/setup.py
27 |       - ckan_storage:/var/lib/ckan
28 |       - ./docker-entrypoint.d:/docker-entrypoint.d
29 | 
30 |   db:
31 |     image: datagov/catalog.data.gov.db:latest
32 |     env_file:
33 |       - .env
34 |     healthcheck:
35 |       test: ["CMD", "pg_isready --username=postgres"]
36 |       interval: 10s
37 |       timeout: 5s
38 |       retries: 5
39 |     ports:
40 |       - "5432:5432"
41 |     volumes:
42 |       - pg_data:/var/lib/postgresql/data
43 | 
44 |   redis:
45 |     image: redis:alpine
46 | 
47 |   solr:
48 |     image: datagov/catalog.data.gov.solr:latest
49 |     ports:
50 |       - "8983:8983"
51 | 
52 |   localstack-container:
53 |     container_name: "localstack-container"
54 |     privileged: true
55 |     image: localstack/localstack:1.1.0
56 |     ports:
57 |       - "4566-4583:4566-4583"
58 |       - "8081:8081"
59 |     environment:
60 |       - SERVICES=s3
61 |       - DEBUG=1
62 |       - DATA_DIR=/tmp/localstack/data
63 |       - HOSTNAME=
64 |       - DOCKER_HOST=unix:///var/run/docker.sock
65 |       - DEFAULT_REGION=us-east-1
66 |       - START_WEB=1
67 |     volumes:
68 |       - "./tmp/localstack:/var/lib/localstack"
69 |       - "./tmp/localstack/run/docker.sock:/var/run/docker.sock"
70 | 
71 | volumes:
72 |   ckan_storage:
73 |   pg_data:
74 |   solr_data:
75 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_s3test.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import pytest
 3 | import requests
 4 | from click.testing import CliRunner, Result
 5 | 
 6 | from ckan.common import config
 7 | 
 8 | import ckanext.geodatagov.cli as cli
 9 | 
10 | 
11 | log = logging.getLogger(__name__)
12 | 
13 | 
14 | class TestS3TestCommand(object):
15 |     @pytest.fixture
16 |     def txt_cli_result(self) -> Result:
17 | 
18 |         runner = CliRunner()
19 |         raw_cli_output = runner.invoke(
20 |             cli.s3_test,
21 |             args=['txt'],
22 |         )
23 | 
24 |         return raw_cli_output
25 | 
26 |     @pytest.fixture
27 |     def html_cli_result(self) -> Result:
28 | 
29 |         runner = CliRunner()
30 |         raw_cli_output = runner.invoke(
31 |             cli.s3_test,
32 |             args=['html'],
33 |         )
34 | 
35 |         return raw_cli_output
36 | 
37 |     def test_s3_upload_txt(self, txt_cli_result):
38 |         """upload test.txt to s3 and make sure there's no errors"""
39 |         # check successful cli run
40 |         assert txt_cli_result.exit_code == 0
41 | 
42 |         endpoint_url = config.get("ckanext.s3sitemap.endpoint_url")
43 |         bucket = config.get("ckanext.s3sitemap.aws_bucket_name")
44 | 
45 |         s3_response = requests.get(f"{endpoint_url}/{bucket}/test.txt")
46 |         assert txt_cli_result.output.strip("\n") == s3_response.content.decode("utf8")
47 | 
48 |         # check content-type
49 |         assert 'text/plain' == s3_response.headers['content-type']
50 | 
51 |     def test_s3_upload_html(self, html_cli_result):
52 |         """upload test.html to s3 and make sure there's no errors"""
53 |         # check successful cli run
54 |         assert html_cli_result.exit_code == 0
55 | 
56 |         endpoint_url = config.get("ckanext.s3sitemap.endpoint_url")
57 |         bucket = config.get("ckanext.s3sitemap.aws_bucket_name")
58 | 
59 |         # chcek content
60 |         s3_response = requests.get(f"{endpoint_url}/{bucket}/test.html")
61 |         assert html_cli_result.output.strip("\n") == s3_response.content.decode("utf8")
62 | 
63 |         # check content-type
64 |         assert 'application/html' == s3_response.headers['content-type']
65 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from codecs import open  # To use a consistent encoding
 3 | from os import path
 4 | 
 5 | here = path.abspath(path.dirname(__file__))
 6 | 
 7 | # Get the long description from the relevant file
 8 | with open(path.join(here, "README.md"), encoding="utf-8") as f:
 9 |     long_description = f.read()
10 | 
11 | setup(
12 |     name="ckanext-geodatagov",
13 |     version="0.3.6",
14 |     description="",
15 |     long_description=long_description,
16 |     long_description_content_type="text/markdown",
17 |     classifiers=[
18 |         "Programming Language :: Python :: 3"
19 |     ],  # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
20 |     keywords="",
21 |     author="Data.gov",
22 |     author_email="datagovhelp@gsa.gov",
23 |     url="https://github.com/GSA/ckanext-geodatagov",
24 |     license="",
25 |     packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
26 |     namespace_packages=["ckanext", "ckanext.geodatagov"],
27 |     include_package_data=True,
28 |     zip_safe=False,
29 |     install_requires=[
30 |         # -*- Extra requirements: -*-
31 |         "ckanext-datajson>=0.1.19",
32 |         "boto3",
33 |         "ply>=3.4",
34 |     ],
35 |     setup_requires=["wheel"],
36 |     entry_points="""
37 |         [ckan.plugins]
38 |     # Add plugins here, eg
39 |     geodatagov=ckanext.geodatagov.plugin:Demo
40 |     s3test=ckanext.geodatagov.plugin:S3Test
41 |     datagov_harvest=ckanext.geodatagov.plugin:DataGovHarvest
42 | 
43 |     geodatagov_csw_harvester=ckanext.geodatagov.harvesters:GeoDataGovCSWHarvester
44 |     geodatagov_waf_harvester=ckanext.geodatagov.harvesters:GeoDataGovWAFHarvester
45 |     geodatagov_doc_harvester=ckanext.geodatagov.harvesters:GeoDataGovDocHarvester
46 |     geodatagov_geoportal_harvester=ckanext.geodatagov.harvesters:GeoDataGovGeoportalHarvester
47 |     waf_harvester_collection=ckanext.geodatagov.harvesters:WAFCollectionHarvester
48 |     arcgis_harvester=ckanext.geodatagov.harvesters:ArcGISHarvester
49 |     z3950_harvester=ckanext.geodatagov.harvesters:Z3950Harvester
50 | 
51 |     [paste.paster_command]
52 |     geodatagov=ckanext.geodatagov.commands:GeoGovCommand
53 |     """,
54 | )
55 | 


--------------------------------------------------------------------------------
/scripts/sql/make_pk.sql:
--------------------------------------------------------------------------------
 1 | drop table old_new_source_id_mapping;
 2 | drop table harvest_source_after_load;
 3 | drop table tmp_to_delete;
 4 | 
 5 | 
 6 | ALTER TABLE activity
 7 | 	ADD CONSTRAINT activity_pkey PRIMARY KEY (id);
 8 | 
 9 | ALTER TABLE activity_detail
10 | 	ADD CONSTRAINT activity_detail_pkey PRIMARY KEY (id);
11 | 
12 | ALTER TABLE group_extra_revision
13 | 	ADD CONSTRAINT group_extra_revision_pkey PRIMARY KEY (id, revision_id);
14 | 
15 | ALTER TABLE group_revision
16 | 	ADD CONSTRAINT group_revision_pkey PRIMARY KEY (id, revision_id);
17 | 
18 | ALTER TABLE harvest_object_extra
19 | 	ADD CONSTRAINT harvest_object_extra_pkey PRIMARY KEY (id);
20 | 
21 | ALTER TABLE member_revision
22 | 	ADD CONSTRAINT member_revision_pkey PRIMARY KEY (id, revision_id);
23 | 
24 | ALTER TABLE package_extra
25 | 	ADD CONSTRAINT package_extra_pkey PRIMARY KEY (id);
26 | 
27 | ALTER TABLE package_extra_revision
28 | 	ADD CONSTRAINT package_extra_revision_pkey PRIMARY KEY (id, revision_id);
29 | 
30 | ALTER TABLE package_relationship_revision
31 | 	ADD CONSTRAINT package_relationship_revision_pkey PRIMARY KEY (id, revision_id);
32 | 
33 | ALTER TABLE package_revision
34 | 	ADD CONSTRAINT package_revision_pkey PRIMARY KEY (id, revision_id);
35 | 
36 | ALTER TABLE package_tag
37 | 	ADD CONSTRAINT package_tag_pkey PRIMARY KEY (id);
38 | 
39 | ALTER TABLE package_tag_revision
40 | 	ADD CONSTRAINT package_tag_revision_pkey PRIMARY KEY (id, revision_id);
41 | 
42 | ALTER TABLE resource_group_revision
43 | 	ADD CONSTRAINT resource_group_revision_pkey PRIMARY KEY (id, revision_id);
44 | 
45 | ALTER TABLE resource_revision
46 | 	ADD CONSTRAINT resource_revision_pkey PRIMARY KEY (id, revision_id);
47 | 
48 | ALTER TABLE revision
49 | 	ADD CONSTRAINT revision_pkey PRIMARY KEY (id);
50 | 
51 | ALTER TABLE system_info_revision
52 | 	ADD CONSTRAINT system_info_revision_pkey PRIMARY KEY (id, revision_id);
53 | 
54 | ALTER TABLE term_translation
55 | 	ADD CONSTRAINT term_translation_pkey PRIMARY KEY (term, term_translation);
56 | 
57 | ALTER TABLE tracking_raw
58 | 	ADD CONSTRAINT tracking_raw_pkey PRIMARY KEY (user_key, access_timestamp);
59 | 
60 | ALTER TABLE tracking_summary
61 | 	ADD CONSTRAINT tracking_summary_pkey PRIMARY KEY (url, tracking_type, package_id, tracking_date);
62 | 
63 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/saml2/sp_config.py.template:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | from saml2 import BINDING_HTTP_REDIRECT
 4 | from saml2.saml import NAME_FORMAT_URI
 5 | 
 6 | BASE= 'https://saml-test.datagov.ckan.org/'
 7 | #BASE = 'http://localhost:5000/'
 8 | CONFIG_PATH = os.path.dirname(__file__)
 9 | 
10 | CONFIG = {
11 |     'entityid' : 'urn:mace:umu.se:saml:ckan:sp',
12 |     'description': 'CKAN saml2 authorizor',
13 |     'service': {
14 |         'sp': {
15 |             'name' : 'CKAN SP',
16 |             'endpoints': {
17 |                 'assertion_consumer_service': [BASE],
18 |                 'single_logout_service' : [(BASE + 'slo',
19 |                                             BINDING_HTTP_REDIRECT)],
20 |             },
21 |             'required_attributes': [
22 |                 'uid',
23 |                 'name',
24 |                 'mail',
25 |                 'status',
26 |                 'roles',
27 |                 'field_display_name',
28 |                 'realname',
29 |                 'field_unique_id',
30 |                 'field_type_of_user',
31 |                 'field_organization_type',
32 |                 'field_agency',
33 |                 'field_organization',
34 |             ],
35 |             'allow_unsolicited': True,
36 |             'optional_attributes': [],
37 |             'idp': ['urn:mace:umu.se:saml:ckan:idp'],
38 |         }
39 |     },
40 |     'debug': 0,
41 |     'key_file': CONFIG_PATH + '/pki/mykey.pem',
42 |     'cert_file': CONFIG_PATH + '/pki/mycert.pem',
43 |     'attribute_map_dir': CONFIG_PATH + '/attributemaps',
44 |     'metadata': {
45 |        'local': [CONFIG_PATH + '/idp.xml'],
46 |     },
47 |     # -- below used by make_metadata --
48 |     'organization': {
49 |         'name': 'Exempel AB',
50 |         'display_name': [('Exempel AB','se'),('Example Co.','en')],
51 |         'url':'http://www.example.com/ckan',
52 |     },
53 |     'contact_person': [{
54 |         'given_name':'John',
55 |         'sur_name': 'Smith',
56 |         'email_address': ['john.smith@example.com'],
57 |         'contact_type': 'technical',
58 |         },
59 |     ],
60 |     'name_form': NAME_FORMAT_URI,
61 |     'logger': {
62 |         'rotating': {
63 |             'filename': '/tmp/sp.log',
64 |             'maxBytes': 100000,
65 |             'backupCount': 5,
66 |             },
67 |         'loglevel': 'error',
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_update_geo.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from ckanext.geodatagov.logic import translate_spatial
 4 | 
 5 | from utils import populate_locations_table
 6 | 
 7 | 
 8 | log = logging.getLogger(__name__)
 9 | 
10 | 
11 | class TestUpdateGeo(object):
12 | 
13 |     def setup_method(self):
14 |         populate_locations_table()
15 | 
16 |     def test_translations(self):
17 |         """ test translate_spatial function """
18 | 
19 |         # Test place in locations table
20 |         us = ('{"type":"Polygon","coordinates":[[[-124.733253,24.544245],[-124.733253,49.388611],'
21 |               '[-66.954811,49.388611],[-66.954811,24.544245],[-124.733253,24.544245]]]}')
22 |         assert translate_spatial('United States') == us
23 |         california = ('{"type":"Polygon","coordinates":[[[-124.3926,32.5358],[-124.3926,42.0022],'
24 |                       '[-114.1252,42.0022],[-114.1252,32.5358],[-124.3926,32.5358]]]}')
25 |         assert translate_spatial('California') == california
26 | 
27 |         # test numeric versions
28 |         assert translate_spatial('1.0,2.0,3.5,5.5') == ('{"type": "Polygon", "coordinates": '
29 |                                                         '[[[1.0, 2.0], [1.0, 5.5], [3.5, 5.5], '
30 |                                                         '[3.5, 2.0], [1.0, 2.0]]]}')
31 |         # Test not existent places
32 |         assert translate_spatial('not exists') is None
33 |         assert translate_spatial('1.0,3.0') is None
34 |         assert translate_spatial('US, Virginia, Fairfax, Reston') is None
35 |         assert translate_spatial(
36 |             '["CARTESIAN", [{"WestBoundingCoordinate": -69.864167, "NorthBoundingCoordinate": 70.843889, '
37 |             '"EastBoundingCoordinate": -69.864167, "SouthBoundingCoordinate": 70.843889}, '
38 |             '{"WestBoundingCoordinate": -68.156667, "NorthBoundingCoordinate": 70.313889, '
39 |             '"EastBoundingCoordinate": -68.156667, "SouthBoundingCoordinate": 70.313889}, '
40 |             '{"WestBoundingCoordinate": -70.52, "NorthBoundingCoordinate": 69.846667, '
41 |             '"EastBoundingCoordinate": -70.52, "SouthBoundingCoordinate": 69.846667}, '
42 |             '{"WestBoundingCoordinate": -70.52007, "NorthBoundingCoordinate": 70.843889, '
43 |             '"EastBoundingCoordinate": -68.15668, "SouthBoundingCoordinate": 69.84673}]]'
44 |         ) is None
45 | 


--------------------------------------------------------------------------------
/scripts/sql/what_to_alter.sql:
--------------------------------------------------------------------------------
 1 | create index idx_harvest_object_guid on harvest_object(guid);
 2 | create index idx_harvest_object_pkg_id on harvest_object(package_id);
 3 | create index idx_harvest_object_id on harvest_object_extra(harvest_object_id);
 4 | create index idx_harvest_object_err on harvest_object_error(harvest_object_id);
 5 | create index idx_package_extend_pkg_id on package_extent(package_id);
 6 | 
 7 | create index idx_package_extra_revision_pkg_id on package_extra_revision(package_id);
 8 | create index idx_package_extra_revision on package_extra_revision(id);
 9 | 
10 | 
11 | --special
12 | create index idx_revision_id on revision(id);
13 | drop index idx_package_resource_pkg_id_resource_id;
14 | 
15 | create index idx_resource_name on resource(name);
16 | 
17 | 
18 | create index idx_resource_group_pkg_id on resource_group(package_id);
19 | create index idx_resource_group_revision_pkg_id on resource_group_revision(package_id);
20 | create index idx_resource_group_revision_rev_id on resource_group_revision(revision_id);
21 | create index idx_resource_group_revision on resource_group_revision(id);
22 | 
23 | create index idx_resource_revision on resource_revision(id);
24 | create index idx_resource_revision_res_grp_id on resource_revision(resource_group_id);
25 | create index idx_member_revision_id on member_revision(id);
26 | create index idx_member_revision_group_id on member_revision(group_id);
27 | 
28 | 
29 | 
30 | 
31 | drop INDEX idx_package_extra_current;
32 | drop INDEX idx_package_extra_period;
33 | drop INDEX idx_package_extra_period_package;
34 | drop index idx_extra_id_pkg_id;
35 | 
36 | drop INDEX idx_package_tag_id ;
37 | 
38 | drop INDEX idx_package_tag_current ;
39 | drop INDEX idx_package_tag_revision_pkg_id_tag_id ;
40 | drop INDEX idx_period_package_tag ;
41 | 
42 | drop INDEX idx_resource_group_period ;
43 | drop INDEX idx_resource_group_period_package ;
44 | drop INDEX idx_resource_group_current ;
45 | 
46 | drop INDEX idx_resource_period;
47 | drop INDEX idx_resource_current;
48 | drop INDEX idx_resource_period_resource_group;
49 | 
50 | drop index idx_package_group_period_package_group;
51 | drop index "idx_package_group_current";
52 | 
53 | 
54 | drop index idx_pkg_id;
55 | drop index idx_pkg_name;
56 | drop index idx_pkg_rev_id;
57 | drop index idx_pkg_sid;
58 | drop index idx_pkg_slname;
59 | drop index idx_pkg_sname;
60 | drop index idx_pkg_srev_id;
61 | drop index idx_pkg_stitle;
62 | drop index idx_pkg_suname;
63 | drop index idx_pkg_title;
64 | drop index idx_pkg_uname;
65 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_category_tags.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import json
 3 | import pytest
 4 | 
 5 | from ckan import plugins as p
 6 | from ckan.tests import factories
 7 | 
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | @pytest.mark.usefixtures("with_plugins")
13 | class TestCategoryTags(object):
14 | 
15 |     def create_datasets(self):
16 |         organization = factories.Organization()
17 |         self.group1 = factories.Group()
18 |         self.group2 = factories.Group()
19 |         self.dataset1 = factories.Dataset(owner_org=organization['id'], groups=[{"name": self.group1["name"]}])
20 |         self.dataset2 = factories.Dataset(owner_org=organization['id'], groups=[{"name": self.group2["name"]}])
21 |         sysadmin = factories.SysadminWithToken()
22 |         self.user_name = sysadmin['name']
23 | 
24 |     def test_group_catagory_tag_update(self):
25 |         self.create_datasets()
26 |         context = {'user': self.user_name, 'ignore_auth': True}
27 | 
28 |         self.dataset1['categories'] = '["cat1"]'
29 |         self.dataset1['group_id'] = self.group1["id"]
30 |         p.toolkit.get_action('group_catagory_tag_update')(context, self.dataset1)
31 |         expected_extra = {"key": "__category_tag_{}".format(self.group1["id"]),
32 |                           "value": json.dumps(self.dataset1['categories'])}
33 |         pkg_dict = p.toolkit.get_action('package_show')(context, {'id': self.dataset1["id"]})
34 |         assert expected_extra in pkg_dict["extras"]
35 | 
36 |         # test if we preserve category tag extras while we update the dataset
37 |         pkg_dict['Title'] = 'Change title 02'
38 |         pkg_dict = p.toolkit.get_action('package_update')(context, pkg_dict)
39 |         assert expected_extra in pkg_dict["extras"]
40 | 
41 |         self.dataset2['categories'] = '["cat2"]'
42 |         self.dataset2['group_id'] = self.group2["id"]
43 |         p.toolkit.get_action('group_catagory_tag_update')(context, self.dataset2)
44 |         expected_extra = {"key": "__category_tag_{}".format(self.group2["id"]),
45 |                           "value": json.dumps(self.dataset2['categories'])}
46 |         pkg_dict = p.toolkit.get_action('package_show')(context, {'id': self.dataset2["id"]})
47 |         assert expected_extra in pkg_dict["extras"]
48 | 
49 |         # test if we preserve category tag extras while we update the dataset
50 |         pkg_dict['Title'] = 'Change title 03'
51 |         pkg_dict = p.toolkit.get_action('package_update')(context, pkg_dict)
52 |         assert expected_extra in pkg_dict["extras"]
53 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | This project utilizes code form http://ckan.org/. Therefore, all code and content created by CKAN is [licensed under the GNU Affero General Public License](https://github.com/ckan/ckan/blob/master/LICENSE.txt). All contributions and code added to this project are [dedicated to the public domain worldwide](https://creativecommons.org/publicdomain/zero/1.0/). 
 2 | 
 3 | ## Public Domain
 4 | 
 5 | This project constitutes a work of the United States Government and is not subject to domestic copyright protection under 17 USC § 105. Additionally, we waive copyright and related rights in the work worldwide through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
 6 | 
 7 | All contributions to this project will be released under the CC0 dedication. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest. See [CONTRIBUTING](https://github.com/GSA/ckanext-geodatagov/blob/master/CONTRIBUTING.md) for more information. 
 8 | 
 9 | ## GNU Affero General Public License
10 | 
11 | This project utilizes code [licensed under the terms of the GNU Affero General Public License](https://github.com/ckan/ckan/blob/master/LICENSE.txt).
12 | 
13 | CKAN is free software: you can redistribute it and/or modify
14 | it under the terms of the GNU Affero General Public License as
15 | published by the Free Software Foundation, either version 3 of the
16 | License, or (at your option) any later version.
17 | 
18 | CKAN is distributed in the hope that it will be useful,
19 | but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 | GNU Affero General Public License for more details.
22 | 
23 | Visit http://www.gnu.org/licenses/ to learn more about the GNU Affero General Public License.
24 | 
25 | ###Note
26 | 
27 | CKAN is sometimes packaged directly with other software (listed in
28 | requirements.txt and dev-requirements.txt).
29 | In these cases, we are required to list the licenses of the packaged softare
30 | too. They are all AGPL compatible and listed in the [CKAN licensing.txt](https://github.com/ckan/ckan/blob/master/LICENSE.txt).
31 | 
32 | 
33 | ## Other Information
34 | 
35 | In no way are the patent or trademark rights of any person affected by CC0, nor are the rights that other persons may have in the work or in how the work is used, such as publicity or privacy rights.
36 | 
37 | Unless expressly stated otherwise, the person who associated a work with this deed makes no warranties about the work, and disclaims liability for all uses of the work, to the fullest extent permitted by applicable law. When using or citing the work, you should not imply endorsement by the author or the affirmer.
38 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Welcome!
 2 | 
 3 | We're so glad you're thinking about contributing to Data.gov!
 4 | 
 5 | Before contributing to this extension, we encourage you to read our CONTRIBUTING guide (you are here), our [LICENSE](https://github.com/GSA/ckanext-geodatagov/blob/master/LICENSE.md), and our [README](https://github.com/GSA/ckanext-geodatagov/blob/master/README.md), all of which should be in this repository. If you have any questions, you can email the Data.gov team at [datagov@gsa.gov](mailto:datagov@gsa.gov).
 6 | 
 7 | ## Ways to Contribute
 8 | 
 9 | **The Data.gov team manages all project wide Data.gov updates, bugs, and feature additions via the public [GSA Data.gov issue tracker](https://github.com/GSA/data.gov/issues).** 
10 | 
11 | **Please limit submitting issues to this repository to discreet issues with this extension.**
12 | 
13 | If you do not already have a GitHub account, you can [sign up for GitHub here](https://github.com/). In the spirit of open source software, everyone is encouraged to help improve this project. Here are some ways you can contribute:
14 | - by reporting bugs
15 | - by suggesting new features
16 | - by translating content to a new language
17 | - by writing or editing documentation
18 | - by writing specifications
19 | - by writing code and documentation (**no pull request is too small**: fix typos, add code comments, clean up inconsistent whitespace)
20 | - by reviewing [pull requests](https://github.com/GSA/ckanext-geodatagov/pulls).
21 | - by closing issues
22 | 
23 | #### Submit Great Issues
24 | * Submit project wide issues to the [GSA Data.gov issue tracker](https://github.com/GSA/data.gov/issues). When in doubt, submit issues in that repo. 
25 | * Before submitting a new [issue](https://github.com/GSA/ckanext-geodatagov/issues), check to make sure [a similar issue isn't already open](https://github.com/ckanext-geodatagov/data.gov/issues?q=is%3Aissue+is%3Aopen). If one is, contribute to that issue thread with your feedback.
26 | * When submitting a bug report, please try to provide as much detail as possible, i.e. a screenshot or [gist](https://gist.github.com/) that demonstrates the problem, the technology you are using, and any relevant links. 
27 | 
28 | #### Ready for your Help 
29 | Issues labeled :sparkles:[`help wanted`](https://github.com/GSA/ckanext-geodatagov/labels/help%20wanted):sparkles: make it easy for you to find ways you can contribute today. 
30 | 
31 | ## Public Domain
32 | 
33 | This project constitutes a work of the United States Government and is not subject to domestic copyright protection under 17 USC § 105. Additionally, we waive copyright and related rights in the work worldwide through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
34 | 
35 | All contributions to this project will be released under the CC0
36 | dedication. By submitting a pull request, you are agreeing to comply
37 | with this waiver of copyright interest.
38 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import http.server
 2 | import logging
 3 | import socketserver
 4 | from threading import Thread
 5 | import os
 6 | 
 7 | from ckan.tests.helpers import reset_db
 8 | from ckan.model.meta import Session, metadata
 9 | import ckan.lib.search as search
10 | 
11 | 
12 | log = logging.getLogger(__name__)
13 | 
14 | PORT = 8999
15 | 
16 | 
17 | def simple_http_server(port=PORT):
18 |     '''Serves test XML files over HTTP'''
19 | 
20 |     # Make sure we serve from the tests' XML directory
21 |     os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)),
22 |                           'data-samples'))
23 | 
24 |     Handler = http.server.SimpleHTTPRequestHandler
25 | 
26 |     class TestServer(socketserver.TCPServer):
27 |         allow_reuse_address = True
28 | 
29 |     skip_connection = False
30 |     try:
31 |         httpd = TestServer(("", port), Handler)
32 |     except Exception as e:
33 |         print('Serve error {}'.format(e))
34 |         skip_connection = True
35 | 
36 |     if skip_connection is False:
37 |         info = 'Serving test HTTP server at port', port
38 |         print(info)
39 |         log.info(info)
40 | 
41 |         httpd_thread = Thread(target=httpd.serve_forever)
42 |         httpd_thread.setDaemon(True)
43 |         httpd_thread.start()
44 | 
45 | 
46 | def populate_locations_table():
47 |     # download locations.sql.gz if not present
48 |     if not os.path.exists('/tmp/locations.sql.gz'):
49 |         os.system(
50 |             "wget https://github.com/GSA/datagov-deploy/raw/71936f004be1882a506362670b82c710c64ef796/"
51 |             "ansible/roles/software/ec2/ansible/files/locations.sql.gz "
52 |             "-O /tmp/locations.sql.gz"
53 |         )
54 |     # echo "Creating locations table"
55 |     os.system("PGPASSWORD=ckan psql -h db -U ckan -d ckan -c 'DROP TABLE IF EXISTS locations;'")
56 |     os.system("PGPASSWORD=ckan psql -h db -U ckan -d ckan -c 'DROP SEQUENCE IF EXISTS locations_id_seq;'")
57 |     os.system("gunzip -c /tmp/locations.sql.gz | PGPASSWORD=ckan psql -h db -U ckan -d ckan -v ON_ERROR_STOP=1")
58 | 
59 | 
60 | def reset_db_and_solr():
61 |     # https://github.com/ckan/ckan/issues/4764
62 |     # drop extension postgis so we can reset db
63 |     try:
64 |         os.system(
65 |             "PGPASSWORD=ckan psql -h db -U ckan -d ckan -c "
66 |             "'SELECT pg_terminate_backend(pg_stat_activity.pid) "
67 |             " FROM pg_stat_activity WHERE "
68 |             " datname = current_database() AND"
69 |             " pid <> pg_backend_pid();'"
70 |         )
71 |     except Exception:
72 |         pass
73 |     os.system("PGPASSWORD=ckan psql -h db -U ckan -d ckan -c 'drop extension IF EXISTS postgis cascade;'")
74 |     try:
75 |         reset_db()
76 |     except Exception:
77 |         pass
78 |     os.system("PGPASSWORD=ckan psql -h db -U ckan -d ckan -c 'create extension postgis;'")
79 |     # add back tables from extensions
80 |     metadata.create_all(bind=Session.bind)
81 | 
82 |     search.clear_all()
83 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/data-samples/sample6_bad_data.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"@context": "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld",
 3 | 	"@type": "dcat:Catalog",
 4 | 	"conformsTo": "https://project-open-data.cio.gov/v1.1/schema",
 5 | 	"describedBy": "https://project-open-data.cio.gov/v1.1/schema/catalog.json",
 6 | 	"dataset": [
 7 | 		{
 8 | 			"@type": "dcat:Dataset",
 9 | 			"accessLevel": "public",
10 | 			"accrualPeriodicity": "R/P1D",
11 | 			"bureauCode": [
12 | 				"581:00"
13 | 			],
14 | 			"contactPoint": {
15 | 				"@type": "vcard:Contact",
16 | 				"fn": "devops@cfpb.gov",
17 | 				"hasEmail": "mailto:devops@cfpb.gov"
18 | 			},
19 | 			"describedBy": "https://cfpb.github.io/api/ccdb/api.html",
20 | 			"description": "The Consumer Complaint Database is a collection of complaints about consumer financial products and services that we sent to companies for response. Complaints are published after the company responds, confirming a commercial relationship with the consumer, or after 15 days, whichever comes first. Complaints referred to other regulators, such as complaints about depository institutions with less than $10 billion in assets, are not published in the Consumer Complaint Database. The database generally updates daily.",
21 | 			"distribution": [
22 | 				{
23 | 					"@type": "dcat:Distribution",
24 | 					"downloadURL": "https://files.consumerfinance.gov/ccdb/complaints.csv.zip",
25 | 					"mediaType": "text/csv"
26 | 				},
27 | 				{
28 | 					"@type": "dcat:Distribution",
29 | 					"downloadURL": "https://files.consumerfinance.gov/ccdb/complaints.json.zip",
30 | 					"mediaType": "application/json"
31 | 				},
32 | 				{
33 | 					"@type": "dcat:Distribution",
34 | 					"format": "API",
35 | 					"accessURL": "https://www.consumerfinance.gov/data-research/consumer-complaints/search/api/v1/"
36 | 				}
37 | 			],
38 | 			"identifier": "CCDB",
39 | 			"keyword": [
40 | 				"consumer",
41 | 				"finance",
42 | 				"complaint",
43 | 				"bank account",
44 | 				"bank service",
45 | 				"credit card",
46 | 				"credit report",
47 | 				"debt collection",
48 | 				"money transfer",
49 | 				"mortgage",
50 | 				"student loan",
51 | 				"loan"
52 | 			],
53 | 			"landingPage": "https://www.consumerfinance.gov/data-research/consumer-complaints/",
54 | 			"modified": "2020-01-13",
55 | 			"programCode": [
56 | 				"000:000"
57 | 			],
58 | 			"publisher": {
59 | 				"@type": "org:Organization",
60 | 				"name": "Consumer Financial Protection Bureau"
61 | 			},
62 | 			"spatial": {
63 | 				"type": "Polygon",
64 | 				"coordinates": [
65 | 					[
66 | 						[
67 | 							-124.733253,
68 | 							24.544245
69 | 						],
70 | 						[
71 | 							-124.733253,
72 | 							49.388611
73 | 						],
74 | 						[
75 | 							-66.954811,
76 | 							49.388611
77 | 						],
78 | 						[
79 | 							-66.954811,
80 | 							24.544245
81 | 						]
82 | 						[
83 | 							-124.733253,
84 | 							24.544245
85 | 						]
86 | 					]
87 | 				]
88 | 			},
89 | 			"title": "Consumer Complaint Database"
90 | 		}
91 | 	]
92 | }


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_logic.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from ckan.tests.helpers import FunctionalTestBase
 4 | from ckan.tests import factories
 5 | from ckanext.geodatagov.logic import rollup_save_action
 6 | 
 7 | from utils import populate_locations_table
 8 | 
 9 | 
10 | class TestLogic(FunctionalTestBase):
11 | 
12 |     def setup_method(self):
13 |         populate_locations_table()
14 | 
15 |     def create_datasets(self):
16 |         self.group1 = factories.Group()
17 |         organization = factories.Organization()
18 | 
19 |         self.dataset1 = factories.Dataset(  # NOQA
20 |             title="Dataset 1",
21 |             owner_org=organization['id'],
22 |             groups=[
23 |                 {"name": self.group1['name']},
24 |             ],
25 |             extras=[])
26 | 
27 |         sysadmin = factories.SysadminWithToken()
28 |         self.user_name = sysadmin['name']
29 | 
30 |     def test_rollup_save_action(self):
31 |         """ test rollup_save_action for expected results """
32 |         test_data = [
33 |             {'key': 'harvest_object_id', 'value': 'to_be_ignored'},
34 |             {'key': 'spatial', 'value': 'US'},
35 |             {'key': 'extras_rollup', 'value': '{"some_extras_rollup": 123}'},
36 |             {'key': 'everything_else', 'value': 'others'}
37 |         ]
38 |         ignored_extra = test_data[0]
39 |         # spatial_extra = test_data[1]
40 |         rollup_extra = test_data[2]
41 |         other_extra = test_data[3]
42 | 
43 |         self.create_datasets()
44 |         context = {'user': self.user_name, 'ignore_auth': True}
45 | 
46 |         self.dataset1['extras'] = test_data
47 | 
48 |         rollup_save_action(context, self.dataset1)
49 |         # print(self.dataset1['extras'])
50 |         # [
51 |         #    {'value': 'to_be_ignored', 'key': 'harvest_object_id'},
52 |         #    {'value': u'{"type":"Polygon","coordinates":[[...]]}',
53 |         #     'key': 'spatial'},
54 |         #    {'value': '{"some_extras_rollup": 1,
55 |         #                "everything_else": "others",
56 |         #                "old-spatial": "US"
57 |         #               }',
58 |         #     'key': 'extras_rollup'}
59 |         # ]
60 |         new_extras = self.dataset1['extras']
61 |         new_extras_rollup = json.loads(next(
62 |             item for item in new_extras if item['key'] == 'extras_rollup'
63 |         )['value'])
64 | 
65 |         # harvest_object_id in one of EXTRAS_ROLLUP_KEY_IGNORE
66 |         # it should not go into new_extras_rollup
67 |         assert ignored_extra in new_extras
68 |         assert ignored_extra['key'] not in new_extras_rollup.keys()
69 | 
70 |         # old spatial sees translation
71 |         assert 'old-spatial' in new_extras_rollup.keys()
72 |         assert 'Polygon' in next(
73 |             item for item in new_extras if item['key'] == 'spatial'
74 |         )['value']
75 | 
76 |         # all others should go into new_extras_rollup
77 |         assert json.loads(rollup_extra['value'])['some_extras_rollup'] \
78 |             == new_extras_rollup['some_extras_rollup']
79 |         assert other_extra['key'] in new_extras_rollup.keys()
80 | 


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
 1 | # DB image settings
 2 | POSTGRES_PASSWORD=ckan
 3 | POSTGRES_USER=ckan
 4 | POSTGRES_DB=ckan
 5 | DATASTORE_READONLY_PASSWORD=datastore
 6 | 
 7 | # Basic
 8 | CKAN_SITE_ID=default
 9 | CKAN_SITE_URL=http://ckan:5000
10 | CKAN_PORT=5000
11 | CKAN_SYSADMIN_NAME=admin
12 | CKAN_SYSADMIN_PASSWORD=password
13 | CKAN_SYSADMIN_EMAIL=your_email@example.com
14 | TZ=UTC
15 | 
16 | # Database connections (TODO: avoid duplication)
17 | CKAN_SQLALCHEMY_URL=postgresql://ckan:ckan@db/ckan
18 | # CKAN_SQLALCHEMY_URL=postgresql://ckan_default:pass@db/ckan_test  # ckan/ckan-postgres-dev:
19 | CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@db/datastore
20 | CKAN_DATASTORE_READ_URL=postgresql://ckan:ckan@db/datastore
21 | 
22 | # Test database connections
23 | TEST_CKAN_SQLALCHEMY_URL=postgres://ckan:ckan@db/ckan_test
24 | # TEST_CKAN_SQLALCHEMY_URL=postgres://ckan_default:pass@db/ckan_test  # ckan/ckan-postgres-dev:
25 | TEST_CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@db/datastore_test
26 | TEST_CKAN_DATASTORE_READ_URL=postgresql://ckan:ckan@db/datastore_test
27 | 
28 | # Other services connections
29 | CKAN_SOLR_URL=http://solr:8983/solr/ckan
30 | CKAN_REDIS_URL=redis://redis:6379/1
31 | CKAN_DATAPUSHER_URL=http://datapusher:8800
32 | CKAN__DATAPUSHER__CALLBACK_URL_BASE=http://ckan:5000
33 | 
34 | TEST_CKAN_SOLR_URL=http://solr:8983/solr/ckan
35 | TEST_CKAN_REDIS_URL=redis://redis:6379/1
36 | 
37 | # Core settings
38 | CKAN__STORAGE_PATH=/var/lib/ckan
39 | 
40 | CKAN_SMTP_SERVER=smtp.corporateict.domain:25
41 | CKAN_SMTP_STARTTLS=True
42 | CKAN_SMTP_USER=user
43 | CKAN_SMTP_PASSWORD=pass
44 | CKAN_SMTP_MAIL_FROM=ckan@localhost
45 | 
46 | # Extensions
47 | CKAN__PLUGINS=tracking harvest datagov_harvest ckan_harvester geodatagov z3950_harvester arcgis_harvester geodatagov_geoportal_harvester waf_harvester_collection geodatagov_csw_harvester geodatagov_doc_harvester geodatagov_waf_harvester spatial_metadata spatial_query s3test datajson datajson_harvest envvars
48 | 
49 | # Harvest settings
50 | CKAN__HARVEST__MQ__TYPE=redis
51 | CKAN__HARVEST__MQ__HOSTNAME=redis
52 | CKAN__HARVEST__MQ__PORT=6379
53 | CKAN__HARVEST__MQ__REDIS_DB=1
54 | CKAN__HARVEST__LOG_LEVEL=info
55 | CKAN__HARVEST__LOG_SCOPE=0
56 | 
57 | CKAN__HARVEST__STATUS_MAIL__ALL=True
58 | 
59 | CKANEXT__GEODATAGOV__BUREAU_CSV__URL=https://resources.data.gov/schemas/dcat-us/v1.1/omb_bureau_codes.csv
60 | CKANEXT__GEODATAGOV__BUREAU_CSV__URL_DEFAULT=https://resources.data.gov/schemas/dcat-us/v1.1/omb_bureau_codes.csv
61 | 
62 | CKAN__SPATIAL__SRID=4326
63 | CKAN__SPATIAL__VALIDATOR__PROFILES=iso19139ngdc
64 | 
65 | CKAN___BROKER_BACKEND=redis
66 | CKAN___BROKER_HOST=redis://redis/1
67 | CKAN___CELERY_RESULT_BACKEND=redis
68 | CKAN___REDIS_HOST=redis
69 | CKAN___REDIS_PORT=6379
70 | CKAN___REDIS_DB=0
71 | CKAN___REDIS_CONNECT_RETRY=True
72 | 
73 | ## S3 settings
74 | # The maximum content size, in bytes, for uploads
75 | CKAN__STORAGE__MAX_CONTENT_LENGTH=650000000
76 | CKAN_STORAGE_PATH=/var/lib/ckan/files
77 | CKANEXT__S3SITEMAP__AWS_ACCESS_KEY_ID=_placeholder
78 | CKANEXT__S3SITEMAP__AWS_BUCKET_NAME=catalog-sitemap
79 | CKANEXT__S3SITEMAP__AWS_S3_URL=_placeholder
80 | CKANEXT__S3SITEMAP__AWS_SECRET_ACCESS_KEY=_placeholder
81 | CKANEXT__S3SITEMAP__AWS_STORAGE_PATH=local
82 | CKANEXT__S3SITEMAP__REGION_NAME=us-east-1
83 | CKANEXT__S3SITEMAP__HOST_NAME=http://localstack-container:4566
84 | CKANEXT__S3SITEMAP__PUBLIC_HOST_NAME=http://localhost:4566
85 | # endpoint used to create boto3.resource('s3')
86 | CKANEXT__S3SITEMAP__ENDPOINT_URL=http://localstack-container:4566
87 | CKANEXT__S3SITEMAP__SIGNATURE_VERSION=s3v4
88 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from ckanext.spatial.validation import BaseValidator, XsdValidator, FGDCSchema
  4 | 
  5 | 
  6 | class MinimalFGDCValidator(BaseValidator):
  7 | 
  8 |     name = 'fgdc_minimal'
  9 |     title = 'FGDC Minimal Validation'
 10 | 
 11 |     _elements = [
 12 |         ('Identification Citation Title', '/metadata/idinfo/citation/citeinfo/title'),
 13 |         ('Identification Citation Originator', '/metadata/idinfo/citation/citeinfo/origin'),
 14 |         ('Identification Citation Publication Date', '/metadata/idinfo/citation/citeinfo/pubdate'),
 15 |         ('Identification Description Abstract', '/metadata/idinfo/descript/abstract'),
 16 |         ('Identification Spatial Domain West Bounding Coordinate', '/metadata/idinfo/spdom/bounding/westbc'),
 17 |         ('Identification Spatial Domain East Bounding Coordinate', '/metadata/idinfo/spdom/bounding/eastbc'),
 18 |         ('Identification Spatial Domain North Bounding Coordinate', '/metadata/idinfo/spdom/bounding/northbc'),
 19 |         ('Identification Spatial Domain South Bounding Coordinate', '/metadata/idinfo/spdom/bounding/southbc'),
 20 |         ('Metadata Reference Information Contact Address Type', '/metadata/metainfo/metc/cntinfo/cntaddr/addrtype'),
 21 |         ('Metadata Reference Information Contact Address State', '/metadata/metainfo/metc/cntinfo/cntaddr/state'),
 22 |     ]
 23 | 
 24 |     @classmethod
 25 |     def is_valid(cls, xml):
 26 | 
 27 |         errors = []
 28 | 
 29 |         for title, xpath in cls._elements:
 30 |             element = xml.xpath(xpath)
 31 |             if len(element) == 0 or not element[0].text:
 32 |                 errors.append(('Element not found: {0}'.format(title), None))
 33 |         if len(errors):
 34 |             return False, errors
 35 | 
 36 |         return True, []
 37 | 
 38 | 
 39 | class FGDCValidator(XsdValidator):
 40 |     '''
 41 |     Base class for FGDC XSD validators
 42 |     '''
 43 | 
 44 |     @classmethod
 45 |     def is_valid(cls, xml):
 46 |         xsd_filepath = os.path.join(os.path.dirname(__file__),
 47 |                                     cls._xsd_path, cls._xsd_file)
 48 |         return cls._is_valid(xml, xsd_filepath, 'FGDC Schema ({0})'.format(cls._xsd_file))
 49 | 
 50 | 
 51 | class FGDC1998Schema(FGDCSchema):
 52 |     '''
 53 |     XSD based validation for FGDC metadata documents, version FGDC-STD-001-1998
 54 | 
 55 |     This is the same version present on ckanext-spatial
 56 | 
 57 |     '''
 58 | 
 59 |     name = 'fgdc_std_001_1998'
 60 |     title = 'FGDC CSDGM Version 2.0, 1998 (FGDC-STD-001-1998)'
 61 | 
 62 | 
 63 | class FGDC1999Schema(FGDCValidator):
 64 |     '''
 65 |     XSD based validation for FGDC metadata documents, version FGDC-STD-001.1-1999
 66 | 
 67 |     Source: http://www.ncddc.noaa.gov/metadata-standards/metadata-xml/
 68 | 
 69 |     '''
 70 |     _xsd_path = 'xml/fgdc-std-001.1-1999'
 71 |     _xsd_file = 'fgdc-std-001.1-1999.xsd'
 72 | 
 73 |     name = 'fgdc_std_001.1_1999'
 74 |     title = 'FGDC CSDGM Biological Data Profile (FGDC-STD-001.1-1999)'
 75 | 
 76 | 
 77 | class FGDC2001Schema(FGDCValidator):
 78 |     '''
 79 |     XSD based validation for FGDC metadata documents, version FGDC-STD-001.2-2001
 80 | 
 81 |     Source: http://www.ncddc.noaa.gov/metadata-standards/metadata-xml/
 82 | 
 83 |     '''
 84 |     _xsd_path = 'xml/fgdc-std-001.2-2001'
 85 |     _xsd_file = 'fgdc-std-001.2-2001.xsd'
 86 | 
 87 |     name = 'fgdc_std_001.2_2001'
 88 |     title = 'FGDC CSDGM Metadata Profile for Shoreline Data (FGDC-STD-001.2-2001)'
 89 | 
 90 | 
 91 | class FGDC2002Schema(FGDCValidator):
 92 |     '''
 93 |     XSD based validation for FGDC metadata documents, version FGDC-STD-0012-2002
 94 | 
 95 |     Source: http://www.ncddc.noaa.gov/metadata-standards/metadata-xml/
 96 | 
 97 |     '''
 98 |     _xsd_path = 'xml/fgdc-std-012-2002'
 99 |     _xsd_file = 'fgdc-std-012-2002.xsd'
100 | 
101 |     name = 'fgdc_std_012_2002'
102 |     title = 'FGDC Extensions for Remote Sensing (FGDC-STD-012-2002)'
103 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/rebuild.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Collection, Optional
  3 | 
  4 | import ckan.logic as logic
  5 | import ckan.model as model
  6 | from ckan.lib.search import index_for, query_for, text_traceback
  7 | from ckan.lib.search.common import config
  8 | from ckan.types import Context
  9 | 
 10 | log = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def rebuild(
 14 |     package_id: Optional[str] = None,
 15 |     only_missing: bool = False,
 16 |     force: bool = False,
 17 |     defer_commit: bool = False,
 18 |     package_ids: Optional[Collection[str]] = None,
 19 |     quiet: bool = False,
 20 |     clear: bool = False,
 21 | ):
 22 |     """
 23 |     Rebuilds the search index.
 24 | 
 25 |     If a dataset id is provided, only this dataset will be reindexed.
 26 |     When reindexing all datasets, if only_missing is True, only the
 27 |     datasets not already indexed will be processed. If force equals
 28 |     True, if an exception is found, the exception will be logged, but
 29 |     the process will carry on.
 30 |     """
 31 |     log.info("Rebuilding search index...")
 32 | 
 33 |     package_index = index_for(model.Package)
 34 |     context: Context = {"ignore_auth": True, "validate": False, "use_cache": False}
 35 | 
 36 |     if package_id:
 37 |         pkg_dict = logic.get_action("package_show")(context, {"id": package_id})
 38 |         log.info("Indexing package %r...", pkg_dict["name"])
 39 |         package_index.remove_dict(pkg_dict)
 40 |         package_index.insert_dict(pkg_dict)
 41 |     elif package_ids is not None:
 42 |         for package_id in package_ids:
 43 |             pkg_dict = logic.get_action("package_show")(context, {"id": package_id})
 44 |             log.info("Indexing package %r...", pkg_dict["name"])
 45 |             try:
 46 |                 package_index.update_dict(pkg_dict, True)
 47 |             except Exception as e:
 48 |                 log.error("Error while indexing package %s: %s" % (package_id, repr(e)))
 49 |                 if force:
 50 |                     log.error(text_traceback())
 51 |                     continue
 52 |                 else:
 53 |                     raise
 54 |     # If no package_id or package_ids is provided, rebuild the index for all packages
 55 |     else:
 56 |         packages = model.Session.query(model.Package.id)
 57 |         if config.get("ckan.search.remove_deleted_packages"):
 58 |             packages = packages.filter(model.Package.state != "deleted")
 59 | 
 60 |         package_ids = [r[0] for r in packages.all()]
 61 | 
 62 |         if only_missing:
 63 |             log.info("Indexing only missing packages...")
 64 |             package_query = query_for(model.Package)
 65 |             indexed_pkg_ids = set(
 66 |                 package_query.get_all_entity_ids(max_results=len(package_ids))
 67 |             )
 68 |             # Packages not indexed
 69 |             package_ids = set(package_ids) - indexed_pkg_ids
 70 | 
 71 |             if len(package_ids) == 0:
 72 |                 log.info("All datasets are already indexed")
 73 |                 return
 74 |         else:
 75 |             log.info("Rebuilding the whole index...")
 76 |             # When refreshing, the index is not previously cleared
 77 |             if clear:
 78 |                 package_index.clear()
 79 | 
 80 |         total_packages = len(package_ids)
 81 |         for counter, pkg_id in enumerate(package_ids):
 82 |             if not quiet:
 83 |                 log.info(
 84 |                     "\rIndexing dataset {0}/{1}".format(counter + 1, total_packages)
 85 |                 )
 86 |             try:
 87 |                 package_index.update_dict(
 88 |                     logic.get_action("package_show")(context, {"id": pkg_id}),
 89 |                     defer_commit,
 90 |                 )
 91 |             except Exception as e:
 92 |                 log.error("Error while indexing dataset %s: %s" % (pkg_id, repr(e)))
 93 |                 if force:
 94 |                     log.error(text_traceback())
 95 |                     continue
 96 |                 else:
 97 |                     raise
 98 | 
 99 |     model.Session.commit()
100 |     log.info("Finished rebuilding search index.")
101 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/harvesters/z3950.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import hashlib
  3 | from PyZ3950 import zoom
  4 | 
  5 | from ckan import model
  6 | 
  7 | from ckan.plugins.core import SingletonPlugin, implements
  8 | from ckan.plugins import IConfigurer
  9 | 
 10 | from ckanext.harvest.interfaces import IHarvester
 11 | from ckanext.harvest.model import HarvestObject
 12 | from ckanext.harvest.model import HarvestObjectExtra as HOExtra
 13 | 
 14 | from ckanext.geodatagov.harvesters import GeoDataGovHarvester
 15 | 
 16 | from ckan.lib.navl.validators import not_empty, convert_int, ignore_empty
 17 | from ckan.logic.validators import boolean_validator
 18 | 
 19 | from ckan.plugins.toolkit import add_template_directory, add_resource, requires_ckan_version
 20 | from ckanext.geodatagov.helpers import string
 21 | 
 22 | requires_ckan_version("2.9")
 23 | 
 24 | 
 25 | class Z3950Harvester(GeoDataGovHarvester, SingletonPlugin):
 26 |     '''
 27 |     A Harvester for z3950.
 28 |     '''
 29 | 
 30 |     implements(IConfigurer)
 31 |     implements(IHarvester)
 32 | 
 33 |     # IConfigurer
 34 |     def update_config(self, config):
 35 |         add_template_directory(config, 'templates')
 36 |         add_resource('fanstatic_library', 'geodatagov')
 37 | 
 38 |     def info(self):
 39 |         return {
 40 |             'name': 'z3950',
 41 |             'title': 'Z39.50',
 42 |             'description': 'A remote database supporting the Z39.50 protocol'
 43 |         }
 44 | 
 45 |     def extra_schema(self):
 46 |         return {'private_datasets': [ignore_empty, boolean_validator],
 47 |                 'database': [not_empty, string],
 48 |                 'port': [not_empty, convert_int]}
 49 | 
 50 |     def gather_stage(self, harvest_job):
 51 | 
 52 |         log = logging.getLogger(__name__ + '.WAF.gather')
 53 |         log.debug('z3950Harvester gather_stage for job: %r', harvest_job)
 54 | 
 55 |         self.harvest_job = harvest_job
 56 | 
 57 |         # Get source URL
 58 |         source_url = harvest_job.source.url
 59 | 
 60 |         self._set_source_config(harvest_job.source.config)
 61 | 
 62 |         # get current objects out of db
 63 |         query = model.Session.query(HarvestObject.guid, HarvestObject.package_id).filter(
 64 |             True if HarvestObject.current else False).\
 65 |             filter(HarvestObject.harvest_source_id == harvest_job.source.id)
 66 | 
 67 |         guid_to_package_id = dict((res[0], res[1]) for res in query)
 68 |         current_guids = set(guid_to_package_id.keys())
 69 |         current_guids_in_harvest = set()
 70 | 
 71 |         # Get contents
 72 |         try:
 73 |             conn = zoom.Connection(source_url, int(self.source_config.get('port', 210)))
 74 |             conn.databaseName = self.source_config.get('database', '')
 75 |             conn.preferredRecordSyntax = 'XML'
 76 |             conn.elementSetName = 'T'
 77 |             query = zoom.Query('CCL', 'metadata')
 78 |             res = conn.search(query)
 79 |             ids = []
 80 |             for num, result in enumerate(res):
 81 |                 hash = hashlib.md5(result.data).hexdigest()
 82 |                 if hash in current_guids:
 83 |                     current_guids_in_harvest.add(hash)
 84 |                 else:
 85 |                     obj = HarvestObject(job=harvest_job, guid=hash, extras=[
 86 |                         HOExtra(key='status', value='new'),
 87 |                         HOExtra(key='original_document', value=result.data.decode('latin-1')),
 88 |                         HOExtra(key='original_format', value='fgdc')
 89 |                     ])
 90 |                     obj.save()
 91 |                     ids.append(obj.id)
 92 |             for guid in (current_guids - current_guids_in_harvest):
 93 |                 obj = HarvestObject(job=harvest_job,
 94 |                                     guid=guid,
 95 |                                     package_id=guid_to_package_id[guid],
 96 |                                     extras=[HOExtra(key='status', value='delete')])
 97 |                 obj.save()
 98 |                 ids.append(obj.id)
 99 |             return ids
100 |         except Exception as e:
101 |             self._save_gather_error('Unable to get content for URL: %s: %r' %
102 |                                     (source_url, e), harvest_job)
103 |             return None
104 | 
105 |     def fetch_stage(self, harvest_object):
106 |         return True
107 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/factories.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import factory
  3 | 
  4 | import ckanext.harvest.model as harvest_model
  5 | try:
  6 |     from ckan.new_tests.factories import _get_action_user_name
  7 | except ImportError:
  8 |     from ckan.tests.factories import _get_action_user_name
  9 | from ckan.plugins import toolkit
 10 | 
 11 | 
 12 | log = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class HarvestSource(factory.Factory):
 16 |     FACTORY_FOR = harvest_model.HarvestSource
 17 |     _return_type = 'dict'
 18 | 
 19 |     class Meta:
 20 |         model = harvest_model.HarvestSource
 21 | 
 22 |     name = factory.Sequence(lambda n: 'test_source_{n}'.format(n=n))
 23 |     title = factory.Sequence(lambda n: 'test title {n}'.format(n=n))
 24 |     url = factory.Sequence(lambda n: 'http://{n}.test.com'.format(n=n))
 25 |     source_type = 'undefined'
 26 |     id = '{0}_id'.format(name).lower()
 27 | 
 28 |     @classmethod
 29 |     def _create(cls, target_class, *args, **kwargs):
 30 |         if args:
 31 |             assert False, "Positional args aren't supported, use keyword args."
 32 |         context = {'user': _get_action_user_name(kwargs)}
 33 |         if kwargs.get('owner_org', False):
 34 |             context['owner_org'] = kwargs['owner_org']
 35 |         # If there is an existing source for this URL, and we can't create
 36 |         # another source with that URL, just return the original one.
 37 |         log.info('Factory HarvestSource : {} : {}'.format(context, kwargs))
 38 |         try:
 39 |             source_dict = toolkit.get_action('harvest_source_show')(
 40 |                 context, dict(url=kwargs['url']))
 41 |         except (KeyError, toolkit.ObjectNotFound):
 42 |             source_dict = toolkit.get_action('harvest_source_create')(
 43 |                 context, kwargs)
 44 |         if cls._return_type == 'dict':
 45 |             return source_dict
 46 |         else:
 47 |             return cls.FACTORY_FOR.get(source_dict['id'])
 48 | 
 49 | 
 50 | class HarvestSourceObj(HarvestSource):
 51 |     _return_type = 'obj'
 52 | 
 53 | 
 54 | class CSWHarvestSourceObj(HarvestSourceObj):
 55 |     source_type = 'csw'
 56 | 
 57 | 
 58 | class WafCollectionHarvestSourceObj(HarvestSourceObj):
 59 |     source_type = 'waf-collection'
 60 | 
 61 | 
 62 | class WafHarvestSourceObj(HarvestSourceObj):
 63 |     source_type = 'waf'
 64 | 
 65 | 
 66 | class DataJsonHarvestSourceObj(HarvestSourceObj):
 67 |     source_type = 'datajson'
 68 | 
 69 | 
 70 | class HarvestJob(factory.Factory):
 71 |     FACTORY_FOR = harvest_model.HarvestJob
 72 |     _return_type = 'dict'
 73 | 
 74 |     class Meta:
 75 |         model = harvest_model.HarvestJob
 76 | 
 77 |     source = factory.SubFactory(HarvestSourceObj)
 78 | 
 79 |     @classmethod
 80 |     def _create(cls, target_class, *args, **kwargs):
 81 |         if args:
 82 |             assert False, "Positional args aren't supported, use keyword args."
 83 |         context = {'user': _get_action_user_name(kwargs)}
 84 |         if 'source_id' not in kwargs:
 85 |             kwargs['source_id'] = kwargs['source'].id
 86 |         if 'run' not in kwargs:
 87 |             kwargs['run'] = False
 88 |         job_dict = toolkit.get_action('harvest_job_create')(
 89 |             context, kwargs)
 90 |         if cls._return_type == 'dict':
 91 |             return job_dict
 92 |         else:
 93 |             return cls.FACTORY_FOR.get(job_dict['id'])
 94 | 
 95 | 
 96 | class HarvestJobObj(HarvestJob):
 97 |     _return_type = 'obj'
 98 | 
 99 | 
100 | class HarvestObject(factory.Factory):
101 |     FACTORY_FOR = harvest_model.HarvestObject
102 |     _return_type = 'dict'
103 | 
104 |     class Meta:
105 |         model = harvest_model.HarvestObject
106 | 
107 |     # source = factory.SubFactory(HarvestSourceObj)
108 |     job = factory.SubFactory(HarvestJobObj)
109 | 
110 |     @classmethod
111 |     def _create(cls, target_class, *args, **kwargs):
112 |         if args:
113 |             assert False, "Positional args aren't supported, use keyword args."
114 |         context = {'user': _get_action_user_name(kwargs)}
115 |         if 'job_id' not in kwargs:
116 |             kwargs['job_id'] = kwargs['job'].id
117 |             kwargs['source_id'] = kwargs['job'].source.id
118 |         # Remove 'job' to avoid it getting added as a HarvestObjectExtra
119 |         if 'job' in kwargs:
120 |             kwargs.pop('job')
121 |         job_dict = toolkit.get_action('harvest_object_create')(
122 |             context, kwargs)
123 |         if cls._return_type == 'dict':
124 |             return job_dict
125 |         else:
126 |             return cls.FACTORY_FOR.get(job_dict['id'])
127 | 
128 | 
129 | class HarvestObjectObj(HarvestObject):
130 |     _return_type = 'obj'
131 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/data-samples/sample5_data.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"@context": "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld",
  3 | 	"@type": "dcat:Catalog",
  4 | 	"conformsTo": "https://project-open-data.cio.gov/v1.1/schema",
  5 | 	"describedBy": "https://project-open-data.cio.gov/v1.1/schema/catalog.json",
  6 | 	"dataset": [
  7 | 		{
  8 | 			"@type": "dcat:Dataset",
  9 | 			"accessLevel": "public",
 10 | 			"accrualPeriodicity": "R/P1D",
 11 | 			"bureauCode": [
 12 | 				"581:00"
 13 | 			],
 14 | 			"contactPoint": {
 15 | 				"@type": "vcard:Contact",
 16 | 				"fn": "devops@cfpb.gov",
 17 | 				"hasEmail": "mailto:devops@cfpb.gov"
 18 | 			},
 19 | 			"describedBy": "https://cfpb.github.io/api/ccdb/api.html",
 20 | 			"description": "The Consumer Complaint Database is a collection of complaints about consumer financial products and services that we sent to companies for response. Complaints are published after the company responds, confirming a commercial relationship with the consumer, or after 15 days, whichever comes first. Complaints referred to other regulators, such as complaints about depository institutions with less than $10 billion in assets, are not published in the Consumer Complaint Database. The database generally updates daily.",
 21 | 			"distribution": [
 22 | 				{
 23 | 					"@type": "dcat:Distribution",
 24 | 					"downloadURL": "https://files.consumerfinance.gov/ccdb/complaints.csv.zip",
 25 | 					"mediaType": "text/csv"
 26 | 				},
 27 | 				{
 28 | 					"@type": "dcat:Distribution",
 29 | 					"downloadURL": "https://files.consumerfinance.gov/ccdb/complaints.json.zip",
 30 | 					"mediaType": "application/json"
 31 | 				},
 32 | 				{
 33 | 					"@type": "dcat:Distribution",
 34 | 					"format": "API",
 35 | 					"accessURL": "https://www.consumerfinance.gov/data-research/consumer-complaints/search/api/v1/"
 36 | 				}
 37 | 			],
 38 | 			"identifier": "CCDB",
 39 | 			"keyword": [
 40 | 				"consumer",
 41 | 				"finance",
 42 | 				"complaint",
 43 | 				"bank account",
 44 | 				"bank service",
 45 | 				"credit card",
 46 | 				"credit report",
 47 | 				"debt collection",
 48 | 				"money transfer",
 49 | 				"mortgage",
 50 | 				"student loan",
 51 | 				"loan"
 52 | 			],
 53 | 			"landingPage": "https://www.consumerfinance.gov/data-research/consumer-complaints/",
 54 | 			"modified": "2020-01-13",
 55 | 			"programCode": [
 56 | 				"000:000"
 57 | 			],
 58 | 			"publisher": {
 59 | 				"@type": "org:Organization",
 60 | 				"name": "Consumer Financial Protection Bureau"
 61 | 			},
 62 | 			"spatial": "United States",
 63 | 			"title": "Consumer Complaint Database"
 64 | 		},
 65 | 		{
 66 | 			"@type": "dcat:Dataset",
 67 | 			"accessLevel": "public",
 68 | 			"accrualPeriodicity": "R/P1Y",
 69 | 			"bureauCode": [
 70 | 				"581:00"
 71 | 			],
 72 | 			"contactPoint": {
 73 | 				"@type": "vcard:Contact",
 74 | 				"fn": "devops@cfpb.gov",
 75 | 				"hasEmail": "mailto:devops@cfpb.gov"
 76 | 			},
 77 | 			"describedBy": "https://api.consumerfinance.gov/data/hmda",
 78 | 			"description": "The Home Mortgage Disclosure Act (HMDA) requires many financial institutions to maintain, report, and publicly disclose information about mortgages",
 79 | 			"distribution": [
 80 | 				{
 81 | 					"@type": "dcat:Distribution",
 82 | 					"downloadURL": "https://api.consumerfinance.gov/data/hmda/slice/hmda_lar.csv",
 83 | 					"mediaType": "text/csv"
 84 | 				},
 85 | 				{
 86 | 					"@type": "dcat:Distribution",
 87 | 					"downloadURL": "https://api.consumerfinance.gov/data/hmda/slice/hmda_lar.csv",
 88 | 					"mediaType": "text/csv"
 89 | 				},
 90 | 				{
 91 | 					"@type": "dcat:Distribution",
 92 | 					"downloadURL": "https://api.consumerfinance.gov/data/hmda/slice/hmda_lar.json",
 93 | 					"mediaType": "application/json"
 94 | 				},
 95 | 				{
 96 | 					"@type": "dcat:Distribution",
 97 | 					"downloadURL": "https://api.consumerfinance.gov/data/hmda/slice/hmda_lar.xml",
 98 | 					"mediaType": "application/xml"
 99 | 				},
100 | 				{
101 | 					"@type": "dcat:Distribution",
102 | 					"format": "API",
103 | 					"accessURL": "https://api.consumerfinance.gov/data/hmda/slice/hmda_lar.json"
104 | 				}
105 | 			],
106 | 			"identifier": "hmda_lar",
107 | 			"keyword": [
108 | 				"consumer",
109 | 				"finance",
110 | 				"mortgage",
111 | 				"HMDA",
112 | 				"Home Mortgage Disclosure Act",
113 | 				"loan"
114 | 			],
115 | 			"landingPage": "https://www.consumerfinance.gov/hmda/",
116 | 			"modified": "2014-09-22",
117 | 			"programCode": [
118 | 				"000:000"
119 | 			],
120 | 			"publisher": {
121 | 				"@type": "org:Organization",
122 | 				"name": "Consumer Financial Protection Bureau"
123 | 			},
124 | 			"spatial": "United States",
125 | 			"temporal": "2007-02-01T00:00:00Z/2014-12-31T00:00:00Z",
126 | 			"title": "Home Mortgage Disclosure Act Data for the years 2007-2014"
127 | 		}
128 | 	]
129 | }


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_waf_GMI.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import pytest
  4 | 
  5 | import ckanext.harvest.model as harvest_model
  6 | from ckan import model
  7 | from ckanext.geodatagov.harvesters.base import GeoDataGovWAFHarvester
  8 | from ckan.tests.factories import Organization
  9 | 
 10 | from factories import HarvestJobObj, WafHarvestSourceObj
 11 | from utils import PORT, reset_db_and_solr
 12 | 
 13 | log = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | @pytest.mark.usefixtures("with_plugins")
 17 | class TestWafHarvester(object):
 18 | 
 19 |     def setup_method(self):
 20 |         reset_db_and_solr()
 21 | 
 22 |         self.organization = Organization()
 23 | 
 24 |     def run_gather(self, url, source_config):
 25 |         sc = json.loads(source_config)
 26 | 
 27 |         source = WafHarvestSourceObj(url=url,
 28 |                                      owner_org=self.organization['id'],
 29 |                                      config=source_config,
 30 |                                      **sc)
 31 | 
 32 |         log.info('Created source {}'.format(repr(source)))
 33 |         self.job = HarvestJobObj(source=source)
 34 |         self.harvester = GeoDataGovWAFHarvester()
 35 | 
 36 |         # gather stage
 37 |         log.info('GATHERING %s', url)
 38 |         obj_ids = self.harvester.gather_stage(self.job)
 39 |         log.info('job.gather_errors=%s', self.job.gather_errors)
 40 |         if len(self.job.gather_errors) > 0:
 41 |             raise Exception(self.job.gather_errors[0])
 42 | 
 43 |         log.info('obj_ids=%s', obj_ids)
 44 |         if obj_ids is None or len(obj_ids) == 0:
 45 |             # nothing to see
 46 |             return
 47 | 
 48 |         self.harvest_objects = []
 49 |         for obj_id in obj_ids:
 50 |             harvest_object = harvest_model.HarvestObject.get(obj_id)
 51 |             log.info('ho guid=%s', harvest_object.guid)
 52 |             log.info('ho content=%s', harvest_object.content)
 53 |             self.harvest_objects.append(harvest_object)
 54 | 
 55 |         # this is a list of harvestObjects IDs. One for dataset
 56 |         return obj_ids
 57 | 
 58 |     def run_fetch(self):
 59 |         # fetch stage
 60 |         for harvest_object in self.harvest_objects:
 61 |             log.info('FETCHING %s' % harvest_object.id)
 62 |             result = self.harvester.fetch_stage(harvest_object)
 63 | 
 64 |             log.info('ho errors=%s', harvest_object.errors)
 65 |             log.info('result 1=%s', result)
 66 |             if len(harvest_object.errors) > 0:
 67 |                 raise Exception(harvest_object.errors[0])
 68 | 
 69 |     def run_import(self):
 70 |         # fetch stage
 71 |         datasets = []
 72 |         for harvest_object in self.harvest_objects:
 73 |             log.info('IMPORTING %s' % harvest_object.id)
 74 |             result = self.harvester.import_stage(harvest_object)
 75 | 
 76 |             log.info('ho errors 2=%s', harvest_object.errors)
 77 |             log.info('result 2=%s', result)
 78 |             if len(harvest_object.errors) > 0:
 79 |                 raise Exception(harvest_object.errors[0])
 80 | 
 81 |             log.info('ho pkg id=%s', harvest_object.package_id)
 82 |             dataset = model.Package.get(harvest_object.package_id)
 83 |             datasets.append(dataset)
 84 |             log.info('dataset name=%s', dataset.name)
 85 | 
 86 |         return datasets
 87 | 
 88 |     def get_datasets_from_waf_gmi_sample(self):
 89 |         """ harvest waf-gmi/ folder as waf source """
 90 |         url = f'http://127.0.0.1:{PORT}/waf-gmi/index.html'
 91 | 
 92 |         self.config1 = '{"private_datasets": "false"}'
 93 |         self.run_gather(url=url, source_config=self.config1)
 94 |         self.run_fetch()
 95 |         datasets = self.run_import()
 96 | 
 97 |         return datasets
 98 | 
 99 |     def test_waf_gmi_datasets_count(self):
100 |         """ Get datasets from waf/ folder as waf source
101 |             and test we have one dataset with the expected name """
102 | 
103 |         datasets = self.get_datasets_from_waf_gmi_sample()
104 |         assert len(datasets) == 1
105 | 
106 |     def test_waf_gmi_datasets_privacy(self):
107 |         """ Harvest waf-gmi/ folder as waf source and check the datasets are public"""
108 | 
109 |         datasets = self.get_datasets_from_waf_gmi_sample()
110 |         for dataset in datasets:
111 |             assert dataset.private is False
112 | 
113 |     def test_waf_gmi_names(self):
114 |         """ Harvest waf-gmi/ folder as waf source and test we have the names we expect """
115 | 
116 |         expected_names = [
117 |             '2014-cartographic-boundary-file-new-england-city-and-town-area-for-united-states-1-500000'
118 |         ]
119 |         datasets = self.get_datasets_from_waf_gmi_sample()
120 |         for dataset in datasets:
121 |             assert dataset.name in expected_names
122 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-001.1-1999/fgdc-std-001.1-1999.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!-- edited with XMLSpy v2008 sp1 (http://www.altova.com) by Systems Administrator (NCDDC) -->
 3 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"  blockDefault="#all">
 4 | 	<!--
 5 | 	======================================================================
 6 | 	 FGDC-STD-001.1-1999 XML Schema. 1.0.0 2009XXXX
 7 | 
 8 | 	 This is the XML Schema for formal metadata, metadata conforming to the Content Standards for Digital Geospatial Metadata Biological Profile of the Federal Geographic Data 
 9 | 	 Committee.  This schema corresponds to the October, 1999 version of the standard, FGDC-STD-001.1-1999.
10 | 
11 | 	 This file is the primary XML Schema and loads the definitions for sections 1-10 of the standard from separate schema modules. This Biological Profile schema was built from the 		
12 | 	 existing FGDC Metadata XML Schema 1.0.0 2009.
13 | 
14 | 	 Element names:
15 | 	     Element names are a maximum of 8-characters long, to coincide with the Reference Concrete Syntax.
16 | 
17 | 	 Element ordering:
18 | 	     Generally the order of elements is now significant.  XML makes it difficult to write a DTD that allows elements to be in any order.  Although XML Schemas do not have this 
19 | 		 restriction, it was decided to keep the significance of element order in order not to break the DTD validity of XML-encoded metadata files.
20 | 
21 | 	 Authors:
22 | 	     Original:Richard E. Rathmann (PSGS/NOAA Coastal Services Center,Charleston, SC) with assistance from Mike Moeller (PSGS/NOAA CSC), Doug Nebert (Federal 
23 | 				Geographic Data Committee) and Jacqueline Mize (Radiance Technologies, Inc./NOAA's National Coastal Data Development Center (NCDDC)).
24 | 
25 | 	 Distribution liability:
26 | 		 NOAA makes no warranty regarding these data, expressed or implied, nor does the fact of distribution constitute such a warranty. NOAA, NESDIS, 
27 | 		 NODC and NCDDC cannot assume liability for any damages caused by any errors or omissions in these data, nor as a result of the failure of these data 
28 | 		 to function on a particular system.
29 | 
30 | 	 Revisions:
31 | 		 2009XXXX (RER) 
32 | 
33 | 
34 | 	======================================================================
35 | -->
36 | 	<xs:include schemaLocation="fgdc-std-001.1-1999-sect01.xsd"/>
37 | 	<xs:include schemaLocation="fgdc-std-001.1-1999-sect02.xsd"/>
38 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect03.xsd"/>
39 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect04.xsd"/>
40 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect05.xsd"/>
41 | 	<xs:include schemaLocation="fgdc-std-001.1-1999-sect06.xsd"/>
42 | 	<xs:include schemaLocation="fgdc-std-001.1-1999-sect07.xsd"/>
43 | 	<xs:include schemaLocation="fgdc-std-001.1-1999-sect08.xsd"/>
44 | 	<xs:include schemaLocation="fgdc-std-001.1-1999-sect09.xsd"/>
45 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect10.xsd"/>
46 | 	<xs:element name="metadata" type="metadataType">
47 | 		<xs:annotation>
48 | 			<xs:documentation>FGDC Biological Data Working Group, and USGS Biological Resources Division.  1999.  Content Standard for Digital Geospatial Metadata - Biological Data Profile, FGDC-STD-001.1-1999  Federal Geographic Data Committee</xs:documentation>
49 | 			<xs:appinfo>
50 | 				<xs:attribute name="mdname" fixed="FGDC Biological Profile"/>
51 | 				<xs:attribute name="use" fixed="Mandatory"/>
52 | 			</xs:appinfo>
53 | 		</xs:annotation>
54 | 	</xs:element>
55 | 	<xs:complexType name="metadataType">
56 | 		<xs:sequence>
57 | 			<xs:element ref="idinfo"/>
58 | 			<xs:element ref="dataqual" minOccurs="0"/>
59 | 			<xs:element ref="spdoinfo" minOccurs="0"/>
60 | 			<xs:element ref="spref" minOccurs="0"/>
61 | 			<xs:element ref="eainfo" minOccurs="0"/>
62 | 			<xs:element ref="distinfo" minOccurs="0" maxOccurs="unbounded"/>
63 | 			<xs:element ref="metainfo"/>
64 | 		</xs:sequence>
65 | 	</xs:complexType>
66 | 	<xs:simpleType name="FGDCdate">
67 | 		<xs:restriction base="xs:token">
68 | 			<xs:pattern value="\d{4}(\d{2}(\d{2})?)?"/>
69 | 			<xs:pattern value="bc\d{4}(\d{2}(\d{2})?)?"/>
70 | 			<xs:pattern value="cc\d{5,}"/>
71 | 			<xs:pattern value="cd\d{5,}"/>
72 | 		</xs:restriction>
73 | 	</xs:simpleType>
74 | 	<xs:simpleType name="FGDCtime">
75 | 		<xs:restriction base="xs:token">
76 | 			<xs:pattern value="\d{2}(\d{2}(\d{2,})?)?"/>
77 | 			<xs:pattern value="\d{2}(\d{2}(\d{2,})?)?[+\-]\d{4}"/>
78 | 			<xs:pattern value="\d{2}(\d{2}(\d{2,})?)?Z"/>
79 | 		</xs:restriction>
80 | 	</xs:simpleType>
81 | 	<xs:simpleType name="FGDCstring">
82 | 		<xs:restriction base="xs:string">
83 | 			<xs:pattern value="\s*\S(.|\n|\r)*"/>
84 | 		</xs:restriction>
85 | 	</xs:simpleType>
86 | 	<xs:simpleType name="FGDClatitude">
87 | 		<xs:restriction base="xs:double">
88 | 			<xs:minInclusive value="-90.0"/>
89 | 			<xs:maxInclusive value="90.0"/>
90 | 		</xs:restriction>
91 | 	</xs:simpleType>
92 | 	<xs:simpleType name="FGDClongitude">
93 | 		<xs:restriction base="xs:double">
94 | 			<xs:minInclusive value="-180.0"/>
95 | 			<xs:maxInclusive value="180.0"/>
96 | 		</xs:restriction>
97 | 	</xs:simpleType>
98 | </xs:schema>
99 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_datajson.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pytest
  3 | import logging
  4 | 
  5 | from ckan.tests.factories import Organization
  6 | from ckan import model
  7 | import ckanext.harvest.model as harvest_model
  8 | from ckanext.datajson.harvester_datajson import DataJsonHarvester
  9 | 
 10 | from factories import (DataJsonHarvestSourceObj,
 11 |                        HarvestJobObj)
 12 | from utils import PORT, populate_locations_table
 13 | 
 14 | log = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | @pytest.mark.usefixtures("with_plugins")
 18 | class TestDataJsonHarvester(object):
 19 | 
 20 |     @classmethod
 21 |     def setup_class(cls):
 22 |         populate_locations_table()
 23 | 
 24 |     def run_gather(self, url):
 25 |         source = DataJsonHarvestSourceObj(url=url, owner_org=self.organization['id'])
 26 |         job = HarvestJobObj(source=source)
 27 | 
 28 |         self.harvester = DataJsonHarvester()
 29 | 
 30 |         # gather stage
 31 |         log.info('GATHERING %s', url)
 32 |         obj_ids = self.harvester.gather_stage(job)
 33 |         log.info('job.gather_errors=%s', job.gather_errors)
 34 |         if len(job.gather_errors) > 0:
 35 |             raise Exception(job.gather_errors[0])
 36 | 
 37 |         log.info('obj_ids=%s', obj_ids)
 38 |         if obj_ids is None or len(obj_ids) == 0:
 39 |             # nothing to see
 40 |             return
 41 | 
 42 |         self.harvest_objects = []
 43 |         for obj_id in obj_ids:
 44 |             harvest_object = harvest_model.HarvestObject.get(obj_id)
 45 |             log.info('ho guid=%s', harvest_object.guid)
 46 |             log.info('ho content=%s', harvest_object.content)
 47 |             self.harvest_objects.append(harvest_object)
 48 | 
 49 |         # this is a list of harvestObjects IDs. One for dataset
 50 |         return obj_ids
 51 | 
 52 |     def run_fetch(self):
 53 |         # fetch stage
 54 |         for harvest_object in self.harvest_objects:
 55 |             log.info('FETCHING %s' % harvest_object.id)
 56 |             result = self.harvester.fetch_stage(harvest_object)
 57 | 
 58 |             log.info('ho errors=%s', harvest_object.errors)
 59 |             log.info('result 1=%s', result)
 60 |             if len(harvest_object.errors) > 0:
 61 |                 raise Exception(harvest_object.errors[0])
 62 | 
 63 |     def run_import(self):
 64 |         # fetch stage
 65 |         datasets = []
 66 |         for harvest_object in self.harvest_objects:
 67 |             log.info('IMPORTING %s' % harvest_object.id)
 68 |             result = self.harvester.import_stage(harvest_object)
 69 | 
 70 |             log.info('ho errors 2=%s', harvest_object.errors)
 71 |             log.info('result 2=%s', result)
 72 |             if len(harvest_object.errors) > 0:
 73 |                 raise Exception(harvest_object.errors[0])
 74 | 
 75 |             log.info('ho pkg id=%s', harvest_object.package_id)
 76 |             dataset = model.Package.get(harvest_object.package_id)
 77 |             datasets.append(dataset)
 78 |             log.info('dataset name=%s', dataset.name)
 79 | 
 80 |         return datasets
 81 | 
 82 |     def test_sample5_data(self):
 83 |         self.organization = Organization()
 84 | 
 85 |         # testing with data from https://www.consumerfinance.gov/data.json
 86 |         url = f'http://127.0.0.1:{PORT}/sample5_data.json'
 87 |         obj_ids = self.run_gather(url=url)
 88 |         assert len(obj_ids) == 2
 89 |         self.run_fetch()
 90 |         datasets = self.run_import()
 91 |         assert len(datasets) == 2
 92 |         titles = ['Consumer Complaint Database',
 93 |                   'Home Mortgage Disclosure Act Data for the years 2007-2014']
 94 |         for dataset in datasets:
 95 |             assert dataset.title in titles
 96 |             # test we get the spatial as we want: https://github.com/GSA/catalog.data.gov/issues/55
 97 |             # we expect a data transformation here
 98 |             pkg = dataset.as_dict()
 99 |             extras = json.loads(pkg["extras"]['extras_rollup'])
100 | 
101 |             assert pkg["extras"]["spatial"] == ('{"type":"Polygon",'
102 |                                                 '"coordinates":[[[-124.733253,24.544245],'
103 |                                                 '[-124.733253,49.388611],'
104 |                                                 '[-66.954811,49.388611],'
105 |                                                 '[-66.954811,24.544245],'
106 |                                                 '[-124.733253,24.544245]]]}')
107 |             assert extras['old-spatial'] == 'United States'
108 |             assert extras['programCode'] == ['000:000']
109 | 
110 |     def test_bad_data_JSONDecodeError(self):
111 |         """
112 |         Test for JSONDecodeError when the data.json file is not valid JSON.
113 |         """
114 |         self.organization = Organization()
115 | 
116 |         # testing with data from https://www.consumerfinance.gov/data.json
117 |         url = f"http://127.0.0.1:{PORT}/sample6_bad_data.json"
118 |         with pytest.raises(Exception) as error:
119 |             self.run_gather(url=url)
120 | 
121 |         assert "JSONDecodeError" in error.value.args[0].message
122 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/harvesters/waf_collection.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | log = logging.getLogger(__name__)
  4 | import hashlib
  5 | 
  6 | import requests
  7 | from ckan import model
  8 | from ckan.lib.navl.validators import not_empty  # , ignore_empty
  9 | 
 10 | import ckanext.harvest.queue as queue
 11 | from ckanext.geodatagov.harvesters.base import (
 12 |     GeoDataGovWAFHarvester,
 13 | )  # , validate_profiles; , validate_profiles
 14 | from ckanext.harvest.model import HarvestObject
 15 | from ckanext.harvest.model import HarvestObjectExtra as HOExtra
 16 | from ckanext.geodatagov.helpers import string
 17 | 
 18 | 
 19 | class WAFCollectionHarvester(GeoDataGovWAFHarvester):
 20 |     def info(self):
 21 |         return {
 22 |             "name": "waf-collection",
 23 |             "title": "Web Accessible Folder (WAF) Homogeneous Collection",
 24 |             "description": "A Web Accessible Folder (WAF) displaying a list"
 25 |             "of spatial metadata documents with a collection record",
 26 |         }
 27 | 
 28 |     def extra_schema(self):
 29 |         extra_schema = super(WAFCollectionHarvester, self).extra_schema()
 30 |         extra_schema["collection_metadata_url"] = [not_empty, string]
 31 |         log.debug(
 32 |             "Getting extra schema for WAFCollectionHarvester: {}".format(extra_schema)
 33 |         )
 34 |         return extra_schema
 35 | 
 36 |     def get_package_dict(self, iso_values, harvest_object):
 37 | 
 38 |         package_dict = super(WAFCollectionHarvester, self).get_package_dict(
 39 |             iso_values, harvest_object
 40 |         )
 41 |         if not package_dict:
 42 |             return None
 43 | 
 44 |         collection_package_id = self._get_object_extra(
 45 |             harvest_object, "collection_package_id"
 46 |         )
 47 |         if collection_package_id:
 48 |             package_dict["extras"].append(
 49 |                 dict(key="collection_package_id", value=collection_package_id)
 50 |             )
 51 | 
 52 |         collection_metadata = self._get_object_extra(
 53 |             harvest_object, "collection_metadata"
 54 |         )
 55 |         if collection_metadata:
 56 |             package_dict["extras"].append(
 57 |                 dict(key="collection_metadata", value=collection_metadata)
 58 |             )
 59 |             status = self._get_object_extra(harvest_object, "status")
 60 |             if status == "change":
 61 |                 self.force_import = True
 62 |             else:
 63 |                 self.force_import = False
 64 | 
 65 |         return package_dict
 66 | 
 67 |     def gather_stage(self, harvest_job):
 68 |         log.debug("WafHarvester gather_stage for job: %r", harvest_job)
 69 | 
 70 |         self.harvest_job = harvest_job
 71 | 
 72 |         # Get source URL
 73 |         source_url = harvest_job.source.url
 74 | 
 75 |         self._set_source_config(harvest_job.source.config)
 76 | 
 77 |         collection_metadata_url = self.source_config.get("collection_metadata_url")
 78 | 
 79 |         if not collection_metadata_url:
 80 |             self._save_gather_error("collection url does not exist", harvest_job)
 81 |             return None
 82 | 
 83 |         try:
 84 |             # Ignore F841 unused variable because if commented, code does nothing
 85 |             response = requests.get(source_url, timeout=60)  # NOQA
 86 |             content = response.content  # NOQA
 87 |         except Exception as e:
 88 |             self._save_gather_error(
 89 |                 "Unable to get content for URL: %s: %r" % (source_url, e), harvest_job
 90 |             )
 91 |             return None
 92 | 
 93 |         guid = hashlib.md5(collection_metadata_url.encode("utf8", "ignore")).hexdigest()
 94 | 
 95 |         existing_harvest_object = (
 96 |             model.Session.query(
 97 |                 HarvestObject.guid, HarvestObject.package_id, HOExtra.value
 98 |             )
 99 |             .join(HOExtra, HarvestObject.extras)
100 |             .filter(HOExtra.key == "collection_metadata")
101 |             .filter(HOExtra.value == "true")
102 |             .filter(True if HarvestObject.current else False)
103 |             .filter(HarvestObject.harvest_source_id == harvest_job.source.id)
104 |             .first()
105 |         )
106 | 
107 |         if existing_harvest_object:
108 |             status = "change"
109 |             guid = existing_harvest_object.guid
110 |             package_id = existing_harvest_object.package_id
111 |         else:
112 |             status, package_id = "new", None
113 | 
114 |         obj = HarvestObject(
115 |             job=harvest_job,
116 |             extras=[
117 |                 HOExtra(key="collection_metadata", value="true"),
118 |                 HOExtra(key="waf_location", value=collection_metadata_url),
119 |                 HOExtra(key="status", value=status),
120 |             ],
121 |             guid=guid,
122 |             package_id=package_id,
123 |         )
124 | 
125 |         queue.fetch_and_import_stages(self, obj)
126 | 
127 |         if obj.state == "ERROR":
128 |             self._save_gather_error(
129 |                 "Collection object failed to harvest, not harvesting", harvest_job
130 |             )
131 |             return None
132 | 
133 |         return GeoDataGovWAFHarvester.gather_stage(
134 |             self, harvest_job, collection_package_id=obj.package_id
135 |         )
136 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_sitemap_creation.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import xml.etree.ElementTree as ET
  3 | import pytest
  4 | 
  5 | from ckan.tests import factories
  6 | from click.testing import CliRunner, Result
  7 | 
  8 | import ckanext.geodatagov.cli as cli
  9 | 
 10 | 
 11 | log = logging.getLogger(__name__)
 12 | 
 13 | # TODO - test for output, test checking complete s3 cycle
 14 | 
 15 | 
 16 | class TestSitemapExport(object):
 17 | 
 18 |     def create_datasets(self) -> None:
 19 | 
 20 |         organization = factories.Organization()
 21 |         self.dataset1 = factories.Dataset(owner_org=organization["id"])
 22 |         self.dataset2 = factories.Dataset(owner_org=organization["id"])
 23 |         self.dataset3 = factories.Dataset(owner_org=organization["id"])
 24 |         self.dataset4 = factories.Dataset(owner_org=organization["id"])
 25 | 
 26 |     @pytest.fixture
 27 |     def cli_result(self) -> Result:
 28 |         self.create_datasets()
 29 | 
 30 |         runner = CliRunner()
 31 |         raw_cli_output = runner.invoke(
 32 |             cli.sitemap_to_s3,
 33 |             args=[
 34 |                 "--upload_to_s3",
 35 |                 "False",
 36 |                 "--page_size",
 37 |                 "100",
 38 |                 "--max_per_page",
 39 |                 "100",
 40 |             ],
 41 |         )
 42 | 
 43 |         return raw_cli_output
 44 | 
 45 |     @staticmethod
 46 |     def test_cli_output(cli_result: Result) -> None:
 47 |         # check successful cli run
 48 |         assert cli_result.exit_code == 0
 49 | 
 50 |         # the example output I have only has one element in it,
 51 |         # this and _handle_cli_output will need to be updated for examples with more elements
 52 |         # checks only one list element in output string
 53 |         assert cli_result.output.count("file_num") == 1
 54 | 
 55 |     @staticmethod
 56 |     def _handle_cli_output(cli_result: Result) -> list:
 57 |         """Parses cli output Result to an interable file_list"""
 58 | 
 59 |         file_list = cli_result.output.split("}\"\n")
 60 |         file_list = list(set([f + "}\"" for f in file_list]) - {'}\"'})
 61 | 
 62 |         return file_list
 63 | 
 64 |     def test_create_sitemap(self, cli_result):
 65 |         """run sitemap-to-s3 and analyze results"""
 66 | 
 67 |         file_list = self._handle_cli_output(cli_result)
 68 | 
 69 |         files = 0
 70 |         datasets = 0
 71 |         for site_file in file_list:
 72 |             # site_file is dumped as string
 73 |             site_file = eval(eval(site_file))
 74 | 
 75 |             files += 1
 76 |             """ expected something like
 77 |                 <?xml version="1.0" encoding="UTF-8"?>
 78 |                 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 79 |                     <url>
 80 |                         <loc>http://ckan:5000/dataset/test_dataset_01</loc>
 81 |                         <lastmod>2020-09-29</lastmod>
 82 |                     </url>
 83 |                     <url>
 84 |                         <loc>http://ckan:5000/dataset/test_dataset_02</loc>
 85 |                         <lastmod>2020-09-29</lastmod>
 86 |                     </url>
 87 |                     ...
 88 |                 </urlset>
 89 |             """
 90 |             log.info("Opening file {}".format(site_file["filename_s3"]))
 91 |             root = ET.fromstring(site_file["xml"])
 92 |             log.info("XML Root {}".format(root))
 93 |             assert root.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset"
 94 | 
 95 |             prev_last_mod = ""
 96 | 
 97 |             dataset1_found = False
 98 |             dataset2_found = False
 99 |             dataset3_found = False
100 |             dataset4_found = False
101 | 
102 |             for url in root:
103 |                 for child in url:
104 |                     if child.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}loc":
105 |                         dataset_url = child.text
106 |                         dataset_name = dataset_url.split("/")[-1]
107 |                         if dataset_name == self.dataset1["name"]:
108 |                             dataset1_found = True
109 |                         elif dataset_name == self.dataset2["name"]:
110 |                             dataset2_found = True
111 |                         elif dataset_name == self.dataset3["name"]:
112 |                             dataset3_found = True
113 |                         elif dataset_name == self.dataset4["name"]:
114 |                             dataset4_found = True
115 |                         datasets += 1
116 |                     elif (
117 |                         child.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}lastmod"
118 |                     ):
119 |                         last_mod = child.text
120 |                         log.info("{} >= {} ".format(prev_last_mod, last_mod))
121 |                         assert last_mod >= prev_last_mod
122 |                         prev_last_mod = last_mod
123 |                     else:
124 |                         raise Exception("Unexpected tag")
125 | 
126 |         assert files == 1
127 |         assert site_file["filename_s3"] == "sitemap-0.xml"
128 |         assert datasets >= 4  # at least this four
129 |         assert dataset1_found
130 |         assert dataset2_found
131 |         assert dataset3_found
132 |         assert dataset4_found
133 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/bin/scrapewaf.py:
--------------------------------------------------------------------------------
  1 | import zip
  2 | import str
  3 | import csv
  4 | import requests.exceptions
  5 | import pyparsing as parse
  6 | import urllib.parse
  7 | import dateutil.parser
  8 | from ckanext.spatial.harvesters.base import guess_standard
  9 | 
 10 | 
 11 | def add_status():
 12 |     records = open('wafurls.txt')
 13 |     results = open('wafurlsstatus.txt', 'w+')
 14 |     headers = 'count,count_with_date,server,status_code,error,standard,id,unapproved,url'
 15 |     results.write(headers + '\n')
 16 |     writer = csv.DictWriter(
 17 |         results, headers.split(',')
 18 |     )
 19 | 
 20 |     for row in records:
 21 |         row_dict = dict(list(zip('id unapproved url'.split(), row.split())))
 22 |         try:
 23 |             response = requests.get(row_dict['url'], timeout=60)
 24 |             content = response.content
 25 |             server = str(response.headers.get('server'))
 26 |             if server == 'Microsoft-IIS/7.5':
 27 |                 scraper = 'iis'
 28 |             elif 'apache' in server.lower() or 'nginx' in server.lower() or not response.headers.get('server'):
 29 |                 scraper = 'apache'
 30 |             else:
 31 |                 scraper = 'other'
 32 | 
 33 |             row_dict['status_code'] = str(response.status_code)
 34 |             row_dict['server'] = server
 35 | 
 36 |             if content and response.status_code == 200:
 37 |                 extracted_waf = extract_waf(content, row_dict['url'], scraper)
 38 |                 row_dict['count'] = str(len(extracted_waf))
 39 |                 row_dict['count_with_date'] = str(len([i for i in extracted_waf if i[1]]))
 40 |                 if extracted_waf:
 41 |                     try:
 42 |                         content_doc = requests.get(extracted_waf[0][0], timeout=60).content
 43 |                         standard = guess_standard(content_doc)
 44 |                         row_dict['standard'] = standard
 45 |                     except Exception as e:
 46 |                         print(('Error guessing format. Error is ', e))
 47 |             else:
 48 |                 row_dict['count'] = "0"
 49 |                 row_dict['count_with_date'] = "0"
 50 |         except Exception as e:
 51 |             row_dict['error'] = str(e)
 52 |             row_dict['count'] = "0"
 53 |             row_dict['count_with_date'] = "0"
 54 | 
 55 |         writer.writerow(row_dict)
 56 |         results.flush()
 57 | 
 58 | 
 59 | apache = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress() \
 60 |     + parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url') \
 61 |     + parse.SkipTo("</a>", include=True).suppress() \
 62 |     + parse.Optional(parse.Literal('</td><td align="right">')).suppress() \
 63 |     + parse.Optional(parse.Combine(
 64 |         parse.Word(parse.alphanums + '-') + parse.Word(parse.alphanums + ':'),
 65 |         adjacent=False, joinString=' ').setResultsName('date'))
 66 | 
 67 | iis = parse.SkipTo("<br>").suppress() \
 68 |     + parse.OneOrMore("<br>").suppress() \
 69 |     + parse.Optional(parse.Combine(
 70 |         parse.Word(parse.alphanums + '/') + parse.Word(parse.alphanums + ':') + parse.Word(parse.alphas),
 71 |         adjacent=False, joinString=' ').setResultsName('date')) \
 72 |     + parse.Word(parse.nums).suppress() \
 73 |     + parse.Literal('<A HREF=').suppress() \
 74 |     + parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url')
 75 | 
 76 | other = parse.SkipTo(parse.CaselessLiteral("<a href="), include=True).suppress() \
 77 |     + parse.quotedString.setParseAction(parse.removeQuotes).setResultsName('url')
 78 | 
 79 | 
 80 | scrapers = {'apache': parse.OneOrMore(parse.Group(apache)),
 81 |             'other': parse.OneOrMore(parse.Group(other)),
 82 |             'iis': parse.OneOrMore(parse.Group(iis))}
 83 | 
 84 | 
 85 | def extract_waf(content, base_url, scraper, results=None, depth=0):
 86 |     if results is None:
 87 |         results = []
 88 | 
 89 |     base_url = base_url.rstrip('/').split('/')
 90 |     if 'index' in base_url[-1]:
 91 |         base_url.pop()
 92 |     base_url = '/'.join(base_url)
 93 |     base_url += '/'
 94 | 
 95 |     parsed = scrapers[scraper].parseString(content)
 96 | 
 97 |     for record in parsed:
 98 |         url = record.url
 99 |         if not url:
100 |             continue
101 |         if url.startswith('_'):
102 |             continue
103 |         if '?' in url:
104 |             continue
105 |         if '#' in url:
106 |             continue
107 |         if 'mailto:' in url:
108 |             continue
109 |         if '..' not in url and url[0] != '/' and url[-1] == '/':
110 |             new_depth = depth + 1
111 |             if depth > 10:
112 |                 print('max depth reached')
113 |                 continue
114 |             new_url = urllib.parse.urljoin(base_url, url)
115 |             if not new_url.startswith(base_url):
116 |                 continue
117 |             print(('new_url', new_url))
118 |             try:
119 |                 response = requests.get(new_url)
120 |                 content = response.content
121 |             except Exception as e:
122 |                 print(str(e))
123 |                 continue
124 |             extract_waf(content, new_url, scraper, results, new_depth)
125 |             continue
126 |         if not url.endswith('.xml'):
127 |             continue
128 |         date = record.date
129 |         if date:
130 |             try:
131 |                 date = str(dateutil.parser.parse(date))
132 |             except Exception:
133 |                 date = None
134 |         results.append((urllib.parse.urljoin(base_url, record.url), date))
135 | 
136 |     return results
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     add_status()
141 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/templates/source/geodatagov_source_form.html:
--------------------------------------------------------------------------------
 1 | {% extends 'source/new_source_form.html' %}
 2 | {% import 'macros/form.html' as form %}
 3 | {% asset 'harvest-extra-field/main' %}
 4 | 
 5 | {% block extra_config %}
 6 | 
 7 | {% set source_config = h.get_harvest_source_config(data.id) %}
 8 | <fieldset data-module="harvest-extra-form-change" data-module-form-items="{{ h.dump_json(h.harvest_source_extra_fields()) }}">
 9 |         {% set database = source_config.get('database') or data.database %}
10 |         {% set port = source_config.get('port') or data.port %}
11 |         {% set collection_metadata_url = source_config.get('collection_metadata_url') or data.collection_metadata_url %}
12 |         {% set extra_search_criteria = source_config.get('extra_search_criteria') or data.extra_search_criteria %}
13 | 
14 |         {{ form.input('database', id='field-database', label=_('Database'), placeholder=_('eg. My Database'), value=database, error=errors.database, classes=['control-full', 'control-group'] ) }}
15 |         {{ form.input('port', id='field-port', label=_('Port'), placeholder=_('eg. 3452'), value=port, error=errors.port, classes=['control-full', 'control-group']) }}
16 | 
17 |         {{ form.input('collection_metadata_url', id='field-collection_metadata_url', label=_('Collection Metadata Url'), placeholder=_('eg. http://example.com/collection.xml'), value=collection_metadata_url, error=errors.port, classes=['control-full', 'control-group']) }}
18 |         {{ form.input('extra_search_criteria', id='field-extra_search_criteria', label=_('Extra Search Criteria'), placeholder=_('eg. accountid:0123456789ABCDEF'), value=extra_search_criteria, error=errors.extra_search_criteria, classes=['control-full', 'control-group']) }}
19 | 
20 | {% set validator_profiles = source_config.get('validator_profiles') or data.validator_profiles %}
21 | {% set validator_schema = source_config.get('validator_schema') or data.validator_schema %}
22 |   <div data-module="reclinepreview" data-module-site_url="{{ g.site_url }}">
23 |          <div class="harvest-types form-group control-group">
24 |            <label class="control-label">Validation</label>
25 |            <div class="controls">
26 |              {% for key, value in h.get_validation_profiles() %}
27 |                {% set checked = key == (validator_profiles or '') %}
28 |                <label class="radio">
29 |                  <input type="radio" name="validator_profiles" value="{{ key }}" {{ "checked " if checked }}>
30 |                  {{ value }}
31 |                </label>
32 |              {% endfor %}
33 |            </div>
34 |          </div>
35 | 
36 |          <div class="harvest-types form-group control-group">
37 |            <label class="control-label">Validation Schema</label>
38 |            <div class="controls">
39 |            {% if h.get_validation_schema() %}
40 |              {% for key, value in h.get_validation_schema() %}
41 |                {% set checked = key == (validator_schema or '')  %}
42 |                <label class="radio">
43 |                  <input type="radio" name="validator_schema" value="{{ key }}" {{ "checked " if checked }}>
44 |                  {{ value }}
45 |                </label>
46 |              {% endfor %}
47 |            {% endif %}
48 |            </div>
49 |          </div>
50 | 
51 | </fieldset>
52 | 
53 | {% set private_datasets = data.private_datasets or source_config.get('private_datasets') %}
54 | 
55 | <div class="control-group form-group">
56 |   <label for="field-private_datasets" class="control-label">{{ _('Dataset visibility') }}</label>
57 |   <div class="controls">
58 |     <select id="field-private_datasets" name="private_datasets">
59 |       {% for option in [(true, _('Private')), (false, _('Public'))] %}
60 |       <option value="{{ option[0] }}" {% if option[0]|lower == private_datasets|lower %}selected="selected"{% endif %}>{{ option[1] }}</option>
61 |       {% endfor %}
62 |     </select>
63 |     <i class="icon-question-sign muted" title="{{ _('This will take effect the next time datasets are created or updated during a harvest job') }}" data-toggle="tooltip"></i>
64 |   </div>
65 | </div>
66 | 
67 | {% set existing_group = source_config.default_groups or data.default_groups %}
68 | {% set groups_available = h.groups_available() %}
69 | {% if groups_available %}
70 | <div class="control-group form-group">
71 |   <label for="field-default_groups" class="control-label">{{ _('Default Group') }}</label>
72 |   <div class="controls">
73 |     <select id="field-default_groups" name="default_groups" data-module="autocomplete">
74 |       <option value="">{{ _('Select a group...') }}</option>
75 |       {% for group in groups_available %}
76 |         {# get out first org from users list only if there is not an existing org #}
77 |         {% set selected_group = (existing_group and (existing_group == group.name or group.name in existing_group)) %}
78 |         <option value="{{ group.name }}" {% if selected_group %} selected="selected" {% endif %}>{{ group.name }}</option>
79 |       {% endfor %}
80 |     </select>
81 |     <span class="select2-helper">
82 |       <i class="icon-question-sign muted" title="{{ _('Datasets harvested from this source will be assigned to this group') }}" data-toggle="tooltip"></i>
83 |     </span>
84 |   </div>
85 | </div>
86 | {% endif %}
87 | 
88 | {% endblock extra_config %}
89 | 
90 | 
91 | {% block delete_button %}
92 |   {% set locale = h.dump_json({'content': _('Warning: Apart from deleting this source, this command will remove all its datasets, as well as all previous job reports. Are you sure you want to continue?')}) %}
93 |     <a class="btn btn-danger pull-left" href="{% url_for 'harvest_delete', id=data.name %}?clear=True" data-module="confirm-action" data-module-i18n="{{ locale }}">{% block delete_button_text %}{{ _('Delete') }}{% endblock %}</a>
94 | 
95 | {% endblock %}
96 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-012-2002/fgdc-std-001-1998-sect03.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" blockDefault="#all">
  3 | <xsd:element name="spdoinfo" type="spdoinfoType"/>
  4 | <xsd:complexType name="spdoinfoType">
  5 | 	<xsd:sequence>
  6 | 		<xsd:element ref="indspref" minOccurs="0"/>
  7 | 		<xsd:sequence minOccurs="0">
  8 | 			<xsd:element ref="direct"/>
  9 | 			<xsd:choice minOccurs="0">
 10 | 				<xsd:element ref="ptvctinf"/>
 11 | 				<xsd:element ref="rastinfo"/>
 12 | 			</xsd:choice>
 13 | 		</xsd:sequence>
 14 | 	</xsd:sequence>
 15 | </xsd:complexType>
 16 | 
 17 | <xsd:element name="indspref" type="indsprefType"/>
 18 | <xsd:simpleType name="indsprefType">
 19 | 	<xsd:restriction base="FGDCstring"/>
 20 | </xsd:simpleType>
 21 | 
 22 | <xsd:element name="direct" type="directType"/>
 23 | <xsd:simpleType name="directType">
 24 | 	<xsd:restriction base="xsd:token">
 25 | 		<xsd:enumeration value="Point"/>
 26 | 		<xsd:enumeration value="Vector"/>
 27 | 		<xsd:enumeration value="Raster"/>
 28 | 	</xsd:restriction>
 29 | </xsd:simpleType>
 30 | 
 31 | <xsd:element name="ptvctinf" type="ptvctinfType"/>
 32 | <xsd:complexType name="ptvctinfType">
 33 | 	<xsd:choice>
 34 | 		<xsd:element ref="sdtsterm" maxOccurs="unbounded"/>
 35 | 		<xsd:element ref="vpfterm"/>
 36 | 	</xsd:choice>
 37 | </xsd:complexType>
 38 | 
 39 | <xsd:element name="sdtsterm" type="sdtstermType"/>
 40 | <xsd:complexType name="sdtstermType">
 41 | 	<xsd:sequence>
 42 | 		<xsd:element ref="sdtstype"/>
 43 | 		<xsd:element ref="ptvctcnt" minOccurs="0"/>
 44 | 	</xsd:sequence>
 45 | </xsd:complexType>
 46 | 
 47 | <xsd:element name="sdtstype" type="sdtstypeType"/>
 48 | <xsd:simpleType name="sdtstypeType">
 49 | 	<xsd:restriction base="xsd:token">
 50 | 		<xsd:enumeration value="Point"/>
 51 | 		<xsd:enumeration value="Entity point"/>
 52 | 		<xsd:enumeration value="Label point"/>
 53 | 		<xsd:enumeration value="Area point"/>
 54 | 		<xsd:enumeration value="Node, planar graph"/>
 55 | 		<xsd:enumeration value="Node, network"/>
 56 | 		<xsd:enumeration value="String"/>
 57 | 		<xsd:enumeration value="Link"/>
 58 | 		<xsd:enumeration value="Complete chain"/>
 59 | 		<xsd:enumeration value="Area chain"/>
 60 | 		<xsd:enumeration value="Network chain, planar graph"/>
 61 | 		<xsd:enumeration value="Network chain, nonplanar graph"/>
 62 | 		<xsd:enumeration value="Circular arc, three point center"/>
 63 | 		<xsd:enumeration value="Elliptical arc"/>
 64 | 		<xsd:enumeration value="Uniform B-spline"/>
 65 | 		<xsd:enumeration value="Piecewise Bezier"/>
 66 | 		<xsd:enumeration value="Ring with mixed composition"/>
 67 | 		<xsd:enumeration value="Ring composed of strings"/>
 68 | 		<xsd:enumeration value="Ring composed of chains"/>
 69 | 		<xsd:enumeration value="Ring composed of arcs"/>
 70 | 		<xsd:enumeration value="G-polygon"/>
 71 | 		<xsd:enumeration value="GT-polygon composed of rings"/>
 72 | 		<xsd:enumeration value="GT-polygon composed of chains"/>
 73 | 		<xsd:enumeration value="Universe polygon composed of rings"/>
 74 | 		<xsd:enumeration value="Universe polygon composed of chains"/>
 75 | 		<xsd:enumeration value="Void polygon composed of rings"/>
 76 | 		<xsd:enumeration value="Void polygon composed of chains"/>
 77 | 	</xsd:restriction>
 78 | </xsd:simpleType>
 79 | 
 80 | <xsd:element name="ptvctcnt" type="ptvctcntType"/>
 81 | <xsd:simpleType name="ptvctcntType">
 82 | 	<xsd:restriction base="xsd:integer">
 83 | 		<xsd:minExclusive value="0"/>
 84 | 	</xsd:restriction>
 85 | </xsd:simpleType>
 86 | 
 87 | <xsd:element name="vpfterm" type="vpftermType"/>
 88 | <xsd:complexType name="vpftermType">
 89 | 	<xsd:sequence>
 90 | 		<xsd:element ref="vpflevel"/>
 91 | 		<xsd:element ref="vpfinfo" maxOccurs="unbounded"/>
 92 | 	</xsd:sequence>
 93 | </xsd:complexType>
 94 | 
 95 | <xsd:element name="vpflevel" type="vpflevelType"/>
 96 | <xsd:simpleType name="vpflevelType">
 97 | 	<xsd:restriction base="xsd:integer">
 98 | 		<xsd:minInclusive value="0"/>
 99 | 		<xsd:maxInclusive value="3"/>
100 | 	</xsd:restriction>
101 | </xsd:simpleType>
102 | 
103 | <xsd:element name="vpfinfo" type="vpfinfoType"/>
104 | <xsd:complexType name="vpfinfoType">
105 | 	<xsd:sequence>
106 | 		<xsd:element ref="vpftype"/>
107 | 		<xsd:element ref="ptvctcnt" minOccurs="0"/>
108 | 	</xsd:sequence>
109 | </xsd:complexType>
110 | 
111 | <xsd:element name="vpftype" type="vpftypeType"/>
112 | <xsd:simpleType name="vpftypeType">
113 | 	<xsd:restriction base="xsd:token">
114 | 		<xsd:enumeration value="Node"/>
115 | 		<xsd:enumeration value="Edge"/>
116 | 		<xsd:enumeration value="Face"/>
117 | 		<xsd:enumeration value="Text"/>
118 | 	</xsd:restriction>
119 | </xsd:simpleType>
120 | 
121 | <xsd:element name="rastinfo" type="rastinfoType"/>
122 | <xsd:complexType name="rastinfoType">
123 | 	<xsd:sequence>
124 | 		<xsd:element ref="rasttype"/>
125 | 		<xsd:sequence minOccurs="0">
126 | 			<xsd:element ref="rowcount"/>
127 | 			<xsd:element ref="colcount"/>
128 | 			<xsd:element ref="vrtcount" minOccurs="0"/>
129 | 		</xsd:sequence>
130 | 	</xsd:sequence>
131 | </xsd:complexType>
132 | 
133 | <xsd:element name="rasttype" type="rasttypeType"/>
134 | <xsd:simpleType name="rasttypeType">
135 | 	<xsd:restriction base="xsd:token">
136 | 		<xsd:enumeration value="Point"/>
137 | 		<xsd:enumeration value="Pixel"/>
138 | 		<xsd:enumeration value="Grid Cell"/>
139 | 		<xsd:enumeration value="Voxel"/>
140 | 	</xsd:restriction>
141 | </xsd:simpleType>
142 | 
143 | <xsd:element name="rowcount" type="rowcountType"/>
144 | <xsd:simpleType name="rowcountType">
145 | 	<xsd:restriction base="xsd:positiveInteger"/>
146 | </xsd:simpleType>
147 | 
148 | <xsd:element name="colcount" type="colcountType"/>
149 | <xsd:simpleType name="colcountType">
150 | 	<xsd:restriction base="xsd:positiveInteger"/>
151 | </xsd:simpleType>
152 | 
153 | <xsd:element name="vrtcount" type="vrtcountType"/>
154 | <xsd:simpleType name="vrtcountType">
155 | 	<xsd:restriction base="xsd:positiveInteger"/>
156 | </xsd:simpleType>
157 | 
158 | </xsd:schema>
159 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_fix_spatial.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import ckan.plugins as p
  4 | import ckan.tests.factories as factories
  5 | import ckan.tests.helpers as helpers
  6 | 
  7 | from utils import populate_locations_table
  8 | 
  9 | 
 10 | @pytest.mark.usefixtures("with_plugins")
 11 | class TestSpatialField(object):
 12 | 
 13 |     @classmethod
 14 |     def setup_class(cls):
 15 |         populate_locations_table()
 16 |         cls.user = factories.Sysadmin(name='spatial_user')
 17 | 
 18 |     def test_numeric_spatial_transformation(self):
 19 |         old_geo = '10.0,0.0,15.0,5.0'
 20 | 
 21 |         context = {'user': self.user['name'], 'ignore_auth': True}
 22 |         pkg = {
 23 |             'title': 'Spatial num',
 24 |             'name': 'spatial-num',
 25 |             'extras': [
 26 |                 {'key': 'spatial', 'value': old_geo}
 27 |             ]
 28 |         }
 29 |         dataset = p.toolkit.get_action('package_create')(context, pkg)
 30 | 
 31 |         expected_spatial = ('{"type": "Polygon", "coordinates": [[[10.0, 0.0], [10.0, 5.0], [15.0, 5.0], '
 32 |                             '[15.0, 0.0], [10.0, 0.0]]]}')
 33 | 
 34 |         spatial_extra_exists = False
 35 |         for extra in dataset['extras']:
 36 |             if extra['key'] == 'spatial':
 37 |                 spatial_extra_exists = True
 38 |                 assert extra['value'] == expected_spatial
 39 | 
 40 |         assert spatial_extra_exists is True
 41 | 
 42 |         result = helpers.call_action(
 43 |             'package_search',
 44 |             extras={'ext_bbox': '9,-1,16,4'})
 45 |         assert result['count'] == 1
 46 |         assert result['results'][0]['id'] == dataset['id']
 47 | 
 48 |     def test_string_spatial_transformation(self):
 49 | 
 50 |         old_geo = 'California'
 51 |         # require locations table to be installed
 52 | 
 53 |         context = {'user': self.user['name'], 'ignore_auth': True}
 54 |         pkg = {
 55 |             'title': 'Spatial String',
 56 |             'name': 'spatial-str',
 57 |             'extras': [
 58 |                 {'key': 'spatial', 'value': old_geo}
 59 |             ]
 60 |         }
 61 |         dataset = p.toolkit.get_action('package_create')(context, pkg)
 62 | 
 63 |         expected_spatial = ('{"type":"Polygon",'
 64 |                             '"coordinates":[[[-124.3926,32.5358],[-124.3926,42.0022],[-114.1252,42.0022],'
 65 |                             '[-114.1252,32.5358],[-124.3926,32.5358]]]}')
 66 |         spatial_extra_exists = False
 67 |         for extra in dataset['extras']:
 68 |             if extra['key'] == 'spatial':
 69 |                 spatial_extra_exists = True
 70 |                 assert extra['value'] == expected_spatial
 71 | 
 72 |         assert spatial_extra_exists is True
 73 | 
 74 |         result = helpers.call_action(
 75 |             'package_search',
 76 |             extras={'ext_bbox': '-125,31,-113,43'})
 77 | 
 78 |         assert result['count'] == 1
 79 |         assert result['results'][0]['id'] == dataset['id']
 80 | 
 81 |     def test_list_spatial_transformation(self):
 82 | 
 83 |         old_geo = '[[20.0, 10.0], [25.0, 15.0]]'
 84 | 
 85 |         context = {'user': self.user['name'], 'ignore_auth': True}
 86 |         pkg = {
 87 |             'title': 'Spatial List',
 88 |             'name': 'spatial-list',
 89 |             'extras': [
 90 |                 {'key': 'spatial', 'value': old_geo}
 91 |             ]
 92 |         }
 93 |         dataset = p.toolkit.get_action('package_create')(context, pkg)
 94 | 
 95 |         expected_spatial = ('{"type": "Polygon", "coordinates": [[[20.0, 10.0], [20.0, 15.0], [25.0, 15.0], '
 96 |                             '[25.0, 10.0], [20.0, 10.0]]]}')
 97 |         spatial_extra_exists = False
 98 |         for extra in dataset['extras']:
 99 |             if extra['key'] == 'spatial':
100 |                 spatial_extra_exists = True
101 |                 assert extra['value'] == expected_spatial
102 | 
103 |         assert spatial_extra_exists is True
104 | 
105 |         result = helpers.call_action(
106 |             'package_search',
107 |             extras={'ext_bbox': '19,9,26,16'})
108 | 
109 |         assert result['count'] == 1
110 |         assert result['results'][0]['id'] == dataset['id']
111 | 
112 |     def test_spatial_plus_sign(self):
113 | 
114 |         old_geo = '-179.231086,-14.601813,+179.859681,+71.441059'
115 | 
116 |         context = {'user': self.user['name'], 'ignore_auth': True}
117 |         pkg = {
118 |             'title': 'Spatial Plus Sign',
119 |             'name': 'spatial-plus',
120 |             'extras': [
121 |                 {'key': 'spatial', 'value': old_geo}
122 |             ]
123 |         }
124 |         dataset = p.toolkit.get_action('package_create')(context, pkg)
125 | 
126 |         expected_spatial = ('{"type": "Polygon", "coordinates": [[[-179.231086, -14.601813], '
127 |                             '[-179.231086, 71.441059], [179.859681, 71.441059], [179.859681, '
128 |                             '-14.601813], [-179.231086, -14.601813]]]}')
129 |         spatial_extra_exists = False
130 |         for extra in dataset['extras']:
131 |             if extra['key'] == 'spatial':
132 |                 spatial_extra_exists = True
133 |                 assert extra['value'] == expected_spatial
134 | 
135 |         assert spatial_extra_exists is True
136 | 
137 |     def test_bad_string_transformation(self):
138 | 
139 |         old_geo = 'US Domestic'
140 |         # require locations table to be installed
141 | 
142 |         context = {'user': self.user['name'], 'ignore_auth': True}
143 |         pkg = {
144 |             'title': 'Spatial US Domestic',
145 |             'name': 'spatial-usd',
146 |             'extras': [
147 |                 {'key': 'spatial', 'value': old_geo}
148 |             ]
149 |         }
150 |         dataset = p.toolkit.get_action('package_create')(context, pkg)
151 | 
152 |         expected_spatial = ""
153 |         spatial_extra_exists = False
154 |         for extra in dataset['extras']:
155 |             if extra['key'] == 'spatial':
156 |                 spatial_extra_exists = True
157 |                 assert extra['value'] == expected_spatial
158 |             if extra['key'] == 'old-spatial':
159 |                 assert extra['value'] == old_geo
160 | 
161 |         assert spatial_extra_exists is True
162 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-012-2002/fgdc-std-012-2002-sect03.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!-- edited with XMLSpy v2008 sp1 (http://www.altova.com) by Systems Administrator (NCDDC) -->
  3 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="#all">
  4 | 	<xs:complexType name="rseSpdoinfoType">
  5 | 		<xs:sequence>
  6 | 			<xs:element ref="indspref" minOccurs="0"/>
  7 | 			<xs:sequence minOccurs="0">
  8 | 				<xs:element ref="direct"/>
  9 | 				<xs:choice minOccurs="0">
 10 | 					<xs:element ref="ptvctinf"/>
 11 | 					<xs:element ref="rastinfo"/>
 12 | 				</xs:choice>
 13 | 			</xs:sequence>
 14 | 		</xs:sequence>
 15 | 	</xs:complexType>
 16 | 	<xs:complexType name="rseRastinfoType">
 17 | 		<xs:sequence>
 18 | 			<xs:element ref="cvaltype"/>
 19 | 			<xs:choice>
 20 | 				<xs:sequence>
 21 | 					<xs:element ref="rasttype"/>
 22 | 					<xs:sequence minOccurs="0">
 23 | 						<xs:element ref="rowcount"/>
 24 | 						<xs:element ref="colcount"/>
 25 | 						<xs:element ref="vrtcount" minOccurs="0"/>
 26 | 					</xs:sequence>
 27 | 				</xs:sequence>
 28 | 				<xs:element ref="dimdesc"/>
 29 | 			</xs:choice>
 30 | 		</xs:sequence>
 31 | 	</xs:complexType>
 32 | 	<xs:element name="cvaltype" type="cvaltypeType">
 33 | 		<xs:annotation>
 34 | 			<xs:documentation>Bit representation of data value in raster cell.</xs:documentation>
 35 | 			<xs:appinfo>
 36 | 				<xs:attribute name="mdname" fixed="Cell Value Type"/>
 37 | 				<xs:attribute name="use" fixed="Mandatory"/>
 38 | 			</xs:appinfo>
 39 | 		</xs:annotation>
 40 | 	</xs:element>
 41 | 	<xs:simpleType name="cvaltypeType">
 42 | 		<xs:union memberTypes="FGDCstring">
 43 | 			<xs:simpleType>
 44 | 				<xs:restriction base="xs:token">
 45 | 					<xs:enumeration value="unsigned eight-bit integer"/>
 46 | 					<xs:enumeration value="signed eight-bit integer"/>
 47 | 					<xs:enumeration value="big endian unsigned sixteen-bit integer"/>
 48 | 					<xs:enumeration value="big endian signed sixteen-bit integer"/>
 49 | 					<xs:enumeration value="little endian unsigned sixteen-bit integer"/>
 50 | 					<xs:enumeration value="little endian signed sixteen-bit integer"/>
 51 | 					<xs:enumeration value="big endian unsigned thirty-two bit integer"/>
 52 | 					<xs:enumeration value="big endian signed thirty-two bit integer"/>
 53 | 					<xs:enumeration value="little-endian unsigned thirty-two bit integer"/>
 54 | 					<xs:enumeration value="little endian signed thirty-two bit integer"/>
 55 | 					<xs:enumeration value="big endian single precision IEEE floating point"/>
 56 | 					<xs:enumeration value="big endian double precision IEEE floating point"/>
 57 | 					<xs:enumeration value="little endian single precision IEEE floating point"/>
 58 | 					<xs:enumeration value="little endian double precision IEEE floating point"/>
 59 | 				</xs:restriction>
 60 | 			</xs:simpleType>
 61 | 		</xs:union>
 62 | 	</xs:simpleType>
 63 | 	<xs:element name="dimdesc" type="dimdescType">
 64 | 		<xs:annotation>
 65 | 			<xs:documentation>Specification for the independent axes in the coordinate system in which spatial data are located.</xs:documentation>
 66 | 			<xs:appinfo>
 67 | 				<xs:attribute name="mdname" fixed="Dimension Description"/>
 68 | 				<xs:attribute name="use" fixed="Mandatory"/>
 69 | 				<xs:attribute name="other-validation-rules" fixed="checkRangeExcludeMin([nodatdim,0]);minDefinedByDataElement([nodatdim,dimprops])"/>
 70 | 			</xs:appinfo>
 71 | 		</xs:annotation>
 72 | 	</xs:element>
 73 | 	<xs:complexType name="dimdescType">
 74 | 		<xs:sequence>
 75 | 			<xs:element ref="nodatdim"/>
 76 | 			<xs:element ref="dimprops" maxOccurs="unbounded"/>
 77 | 		</xs:sequence>
 78 | 	</xs:complexType>
 79 | 	<xs:element name="nodatdim" type="wholeNumber">
 80 | 		<xs:annotation>
 81 | 			<xs:documentation>Number of axes used in spatial data matrix.</xs:documentation>
 82 | 			<xs:appinfo>
 83 | 				<xs:attribute name="mdname" fixed="Number of Data Dimensions"/>
 84 | 				<xs:attribute name="use" fixed="Mandatory"/>
 85 | 				<xs:attribute name="string-input-format" fixed="Number of Data Dimensions &gt; 0"/>
 86 | 			</xs:appinfo>
 87 | 		</xs:annotation>
 88 | 	</xs:element>
 89 | 	<xs:simpleType name="wholeNumber">
 90 | 		<xs:restriction base="xs:integer">
 91 | 			<xs:minInclusive value="0"/>
 92 | 		</xs:restriction>
 93 | 	</xs:simpleType>
 94 | 	<xs:element name="dimprops" type="dimpropType">
 95 | 		<xs:annotation>
 96 | 			<xs:documentation>Description of individual axis in spatial data matrix.</xs:documentation>
 97 | 			<xs:appinfo>
 98 | 				<xs:attribute name="mdname" fixed="Dimension Properties"/>
 99 | 				<xs:attribute name="use" fixed="Mandatory"/>
100 | 			</xs:appinfo>
101 | 		</xs:annotation>
102 | 	</xs:element>
103 | 	<xs:complexType name="dimpropType">
104 | 		<xs:sequence>
105 | 			<xs:element ref="namedim"/>
106 | 			<xs:element ref="dimcount"/>
107 | 		</xs:sequence>
108 | 	</xs:complexType>
109 | 	<xs:element name="namedim" type="namedimType">
110 | 		<xs:annotation>
111 | 			<xs:documentation>Designation assigned to an axis.</xs:documentation>
112 | 			<xs:appinfo>
113 | 				<xs:attribute name="mdname" fixed="Name of Dimension"/>
114 | 				<xs:attribute name="use" fixed="Mandatory"/>
115 | 			</xs:appinfo>
116 | 		</xs:annotation>
117 | 	</xs:element>
118 | 	<xs:simpleType name="namedimType">
119 | 		<xs:union memberTypes="FGDCstring">
120 | 			<xs:simpleType>
121 | 				<xs:restriction base="xs:token">
122 | 					<xs:enumeration value="row"/>
123 | 					<xs:enumeration value="column"/>
124 | 					<xs:enumeration value="vertical"/>
125 | 					<xs:enumeration value="band"/>
126 | 				</xs:restriction>
127 | 			</xs:simpleType>
128 | 		</xs:union>
129 | 	</xs:simpleType>
130 | 	<xs:element name="dimcount" type="dimcountType">
131 | 		<xs:annotation>
132 | 			<xs:documentation>The maximum number of data points along the corresponding axis.</xs:documentation>
133 | 			<xs:appinfo>
134 | 				<xs:attribute name="mdname" fixed="Dimension Count"/>
135 | 				<xs:attribute name="use" fixed="Mandatory"/>
136 | 				<xs:attribute name="string-input-format" fixed="Dimension Count &gt;= 1"/>
137 | 			</xs:appinfo>
138 | 		</xs:annotation>
139 | 	</xs:element>
140 | 	<xs:simpleType name="dimcountType">
141 | 		<xs:restriction base="xs:integer">
142 | 			<xs:minInclusive value="1"/>
143 | 		</xs:restriction>
144 | 	</xs:simpleType>
145 | </xs:schema>
146 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Github Actions](https://github.com/GSA/ckanext-geodatagov/actions/workflows/test.yml/badge.svg)](https://github.com/GSA/ckanext-geodatagov/actions)
  2 | [![PyPI version](https://badge.fury.io/py/ckanext-geodatagov.svg)](https://badge.fury.io/py/ckanext-geodatagov)
  3 | 
  4 | # Data.gov  
  5 | 
  6 | [Data.gov](http://data.gov) is an open data website created by the [U.S. General Services Administration](https://github.com/GSA/) that is based on two robust open source projects: [CKAN](http://ckan.org) and [WordPress](http://wordpress.org). The data catalog at [catalog.data.gov](catalog.data.gov) is powered by CKAN, while the content seen at [Data.gov](Data.gov) is powered by WordPress.  
  7 |         
  8 | **For all code, bugs, and feature requests related to Data.gov, see the project wide Data.gov [issue tracker](https://github.com/GSA/data.gov/issues).** 
  9 | 
 10 | Currently this repository is only used for source version control on the code for the CKAN extension for geospatial data, but you can see all of the Data.gov relevant repos listed in the [GSA Data.gov README file](https://github.com/GSA/data.gov/blob/master/README.md). 
 11 | 
 12 | ## CKAN Extension for Geospatial Data
 13 | 
 14 | Most Data.gov specific CKAN customizations are contained within this extension, but the extension also provides additional geospatial capabilities.
 15 | 
 16 | ### Customization
 17 | 
 18 | Due to CKAN 2.3 and 2.8 migrations, some features should be removed or moved to the official community versions:
 19 |   - [Stop rolling up the extras](https://github.com/GSA/ckanext-geodatagov/issues/178)
 20 |   - [Move to the official search by geolocation](https://github.com/GSA/datagov-deploy/issues/2440) (probably sharing our version that has improvements)
 21 |   - Do a general analysis of this extension to detect other personalized functionalities that should be discontinued.
 22 | 
 23 | ### Requirements
 24 | 
 25 | Package                                                                | Notes
 26 | ---------------------------------------------------------------------- | -------------
 27 | [ckanext-harvest](https://github.com/ckan/ckanext-harvest/)            | --
 28 | [ckanext-spatial](https://github.com/ckan/ckanext-spatial)             | --
 29 | [PyZ3950](https://github.com/asl2/PyZ3950)                             | --
 30 | [werkzeug](https://github.com/nickumia-reisys/werkzeug)                | This only effects the tests.  For all intents and purposes, this should be tracking [upstream](https://github.com/pallets/werkzeug)
 31 | 
 32 | This extension is compatible with these versions of CKAN.
 33 | 
 34 | CKAN version | Compatibility
 35 | ------------ | -------------
 36 | <=2.8        | no
 37 | 2.9          | 0.1.37 (last supported)
 38 | 2.10         | >=0.2.0
 39 | 
 40 | ## Tests
 41 | 
 42 | All the tests live in the [/ckanext/geodatagov/tests](/ckanext/geodatagov/tests) folder. [Github actions](https://github.com/GSA/ckanext-geodatagov/blob/main/.github/workflows/test.yml) is configured to run the tests against CKAN 2.10 when you open a pull request.
 43 | 
 44 | ## Using the Docker Dev Environment
 45 | 
 46 | ### Build Environment
 47 | 
 48 | To start environment, run:
 49 | ```docker compose build```
 50 | ```docker compose up```
 51 | 
 52 | CKAN will start at localhost:5000
 53 | 
 54 | To shut down environment, run:
 55 | 
 56 | ```docker compose down```
 57 | 
 58 | To docker exec into the CKAN image, run:
 59 | 
 60 | ```docker compose exec app /bin/bash```
 61 | 
 62 | ### Testing
 63 | 
 64 | They follow the guidelines for [testing CKAN
 65 | extensions](https://docs.ckan.org/en/2.10/extensions/testing-extensions.html#testing-extensions).
 66 | 
 67 | To run the extension tests, start the containers with `make up`, then:
 68 | 
 69 |     $ make test
 70 | 
 71 | Lint the code.
 72 | 
 73 |     $ make lint
 74 | 
 75 | ### Debugging
 76 | 
 77 | We have not determined a good way for most IDE native debugging, however you can use the built in
 78 | Python pdb debugger. Simply run `make debug`, which will run docker with an interactive shell.
 79 | Add `import pdb; pdb.set_trace()` anywhere you want to start debugging, and if the code is triggered
 80 | you should see a command prompt waiting in the shell. Use a pdb cheat sheet when starting to learn
 81 | like [this](https://kapeli.com/cheat_sheets/Python_Debugger.docset/Contents/Resources/Documents/index).
 82 | 
 83 | When you edit/add/remove code, the server is smart enough to restart. If you are editing logic that is
 84 | not part of the webserver (ckan command, etc) then you should be able to run the command after edits
 85 | and get the same debugger prompt.
 86 |     
 87 | ### Matrix builds
 88 | 
 89 | The existing development environment assumes a full catalog.data.gov test setup. This makes
 90 | it difficult to develop and test against new versions of CKAN (or really any
 91 | dependency) because everything is tightly coupled and would require us to
 92 | upgrade everything at once which doesn't really work. A new make target
 93 | `test-new` is introduced with a new docker-compose file.
 94 | 
 95 | The "new" development environment drops as many dependencies as possible. It is
 96 | not meant to have feature parity with
 97 | [GSA/catalog.data.gov](https://github.com/GSA/catalog.data.gov/). Tests should
 98 | mock external dependencies where possible.
 99 | 
100 | In order to support multiple versions of CKAN, or even upgrade to new versions
101 | of CKAN, we support development and testing through the `CKAN_VERSION`
102 | environment variable.
103 | 
104 |     $ make CKAN_VERSION=2.11 test
105 | 
106 | ### Command line interface
107 | 
108 | The following operations can be run from the command line as described underneath::
109 | 
110 |       geodatagov sitemap-to-s3 [{upload_to_s3}] [{page_size}] [{max_per_page}]
111 |         - Generates sitemap and uploads to s3
112 | 
113 |       geodatagov db-solr-sync [{dryrun}] [{cleanup_solr}] [{update_solr}]
114 |         - DB Solr sync. 
115 | 
116 |       geodatagov tracking-update [{start_date}]
117 |         - ckan tracking update with customized options and output
118 | 
119 | ## Credit / Copying
120 | 
121 | Original work written by the HealthData.gov team. It has been modified in support of Data.gov.
122 | 
123 | As a work of the United States Government, this package is in the public
124 | domain within the United States. Additionally, we waive copyright and
125 | related rights in the work worldwide through the CC0 1.0 Universal
126 | public domain dedication (which can be found at http://creativecommons.org/publicdomain/zero/1.0/).
127 | 
128 | ## Ways to Contribute
129 | We're so glad you're thinking about contributing to ckanext-datajson!
130 | 
131 | Before contributing to ckanext-datajson we encourage you to read our
132 | [CONTRIBUTING](CONTRIBUTING.md) guide, our [LICENSE](LICENSE.md), and our README
133 | (you are here), all of which should be in this repository. If you have any
134 | questions, you can email the Data.gov team at
135 | [datagov@gsa.gov](mailto:datagov@gsa.gov).
136 | 


--------------------------------------------------------------------------------
/ADR.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ADRs for CKANEXT_GEODATAGOV
  3 | ==============================================
  4 | 
  5 | # 1. Fix encoding issue for waf harvester
  6 | 
  7 | Date: 2021-07-16
  8 | 
  9 | ## Status
 10 | 
 11 | Accepted
 12 | 
 13 | ## Context
 14 | 
 15 | We are using the upstream ckan version of ckanext-spatial.  They upgraded the extension to PY3; however, their harvester tests were removed.  The waf harvester was not being encoded properly to support PY2 and PY3 so our tests were failing.
 16 | 
 17 | ## Decision
 18 | 
 19 | We decided to fix the bug and submit a PR [upstream](https://github.com/ckan/ckanext-spatial/pull/252).
 20 | 
 21 | ## Consequences
 22 | 
 23 | - Until the fix is merged upstream, the ckanext-geodatagov repo will be tracking a pinned version of ckanext-spatial fork which adds complexity.
 24 | - All of the customization of the GSA fork of ckanext-spatial is disregarded.  The GSA fork was messy already.
 25 | 
 26 | 
 27 | 
 28 | # 2. Fix JSON Serialization of dictionary
 29 | 
 30 | Date: 2021-07-19
 31 | 
 32 | ## Status
 33 | 
 34 | Accepted
 35 | 
 36 | ## Context
 37 | 
 38 | We are using the upstream ckan version of ckanext-harvest.  They upgraded the extension to PY3; however, there is a PY3-compatibility issue that causes our tests were failing.
 39 | 
 40 | ## Decision
 41 | 
 42 | We decided to fix the bug and submit a PR [upstream](https://github.com/ckan/ckanext-harvest/pull/450).
 43 | 
 44 | ## Consequences
 45 | 
 46 | - Until the fix is merged upstream, the ckanext-geodatagov repo will be tracking a pinned version of ckanext-spatial fork
 47 | which adds complexity.
 48 | - All of the customization of the GSA fork of ckanext-spatial is disregarded.  The GSA fork was messy already.
 49 | 
 50 | 
 51 | # 3. Use catalog.data.gov Solr Image
 52 | 
 53 | Date: 2021-06-21
 54 | 
 55 | ## Status
 56 | 
 57 | Accepted
 58 | 
 59 | ## Context
 60 | 
 61 | The Solr dev image that ckanext-datajson uses was incompatible with ckanext-geodatagov.  There was a 'solrsearch issue' that popped up with no clear resolution.
 62 | 
 63 | ## Decision
 64 | 
 65 | Using the catalog.data.gov stopped solr from throwing exceptions.
 66 | 
 67 | ## Consequences
 68 | 
 69 | - Consequences unknown.
 70 | - All of the ckanext repos shouldn't be using varying versions of solr/postgres/etc..
 71 | 
 72 | 
 73 | # 4. Fix CKAN Test Suite, specifically reset_db()
 74 | 
 75 | Date: 2021-06-24
 76 | 
 77 | ## Status
 78 | 
 79 | Accepted
 80 | 
 81 | ## Context
 82 | 
 83 | If all of the tables are not initialized, the 'reset_db' function attempts to delete all of the tables and reinitialize everything.  Becaues geodatagov requires the postgis tables which has a complicated initialized, the ckan function doesn't support it's maintenance (the current code doesn't support it, it doesn't mean they can't or won't).  This is the [logic](https://github.com/ckan/ckan/blob/e2d9d1610e63d2256739a09ba2a18e59a29a45db/ckan/model/__init__.py#L225-L236) that breaks it.  Either way, if reset_db() is called to early, the postgis tables will be deleted and will break the code.  If reset_db() is called too late, the db can't initialize and the code breaks.  
 84 | 
 85 | ## Decision
 86 | 
 87 | Implement two customizations.
 88 | - https://github.com/GSA/ckanext-geodatagov/pull/190/commits/627a8ad689d50b446527ea39ff4b9290203929a9
 89 | - https://github.com/GSA/ckanext-geodatagov/pull/190/commits/8e34ee0164ac1ce454d4c8944ee5fbc5d025b2ed
 90 | 
 91 | ## Consequences
 92 | 
 93 | - Consequences unknown.
 94 | - If the commands called in the test_category_tags.py is called anywhere else, the tests fail.
 95 | - If the commands are repeated in multiple files, the tests fail.
 96 | - If any test needs to be run in isolation, the test_category_tags.py test needs to precede it, otherwise the independent test will fail..
 97 | 
 98 | 
 99 | # 5. Track PY2 pip requirements separately from PY3
100 | 
101 | Date: 2021-07-08
102 | 
103 | ## Status
104 | 
105 | Accepted
106 | 
107 | ## Context
108 | 
109 | There are a few libraries that either operate differently in py2 and py3 or have different support for py2 and py3 needed to use two separate version.
110 | 
111 | PY2:
112 | - https://github.com/asl2/PyZ3950.git#egg=PyZ3950
113 | - OWSLib == 0.8.6
114 | - pyproj 1.9.6
115 | - factory-boy==2.1.1
116 | - werkzeug (no customization; it installed based on other dependencies)
117 | 
118 | PY3:
119 | - https://github.com/danizen/PyZ3950.git#egg=PyZ3950
120 | - OWSLib >= 0.18.0 
121 | - pyproj 2.6.1
122 | - factory-boy==2.12.0
123 | - https://github.com/nickumia-reisys/werkzeug@e1f6527604ab30e4b46b5430a5fb97e7a7055cd7#egg=werkzeug
124 | 
125 | The PY3 upgrade for ckanext-harvest and ckanext-spatial had small bugs that were submitted as PRs upstream, until they are accepted, the local change needs to be tracked.
126 | - https://github.com/nickumia-reisys/ckanext-harvest.git@9d1f647d247c16b6c3acba26e321e9500cafb18c#egg=ckanext-harvest
127 | - https://github.com/GSA/ckanext-spatial.git@93c430ffc36ba7e306652fd511efd0d1e7081381#egg=ckanext-spatial
128 | 
129 | ## Decision
130 | 
131 | See [commit](https://github.com/GSA/ckanext-geodatagov/pull/190/commits/0cbd146d286fc1467fd2f3fba4800f7ba66b76ce)
132 | 
133 | ## Consequences
134 | 
135 | - A lot of specificity
136 | 
137 | 
138 | # 6. Remove csw harvester tests
139 | 
140 | Date: 2021-07-16
141 | 
142 | ## Status
143 | 
144 | Accepted
145 | 
146 | ## Context
147 | 
148 | We don't have any customizations to the csw harvesting capability, so we no longer need to test our unique cases.
149 | 
150 | ## Decision
151 | 
152 | Remove [tests](https://github.com/GSA/ckanext-geodatagov/pull/190/commits/18927273785a8b2f06939c259f909c0d1ae36faf).
153 | 
154 | ## Consequences
155 | 
156 | - ckanext-spatial or ckanext-harvester are not testing csw harvesting, so there are missing tests overall.
157 | 
158 | 
159 | # 6. Rewrite source form test
160 | 
161 | Date: 2021-07-19
162 | 
163 | ## Status
164 | 
165 | Unreviewed
166 | 
167 | ## Context
168 | 
169 | The CKAN test suite no longer supports forms in web pages; so custom parsing needs to be done to extract form options and data.  The new tests leverage [this](https://docs.python.org/3/library/html.parser.html).  The CKAN test suite changed the return type of the test app from [2.8](https://github.com/ckan/ckan/blob/2.8/ckan/tests/helpers.py#L147-L159) to [2.9](https://github.com/ckan/ckan/blob/2.9/ckan/tests/helpers.py#L194-L240).
170 | 
171 | ## Decision
172 | 
173 | Write [custom test functions](https://github.com/GSA/ckanext-geodatagov/pull/190/commits/18927273785a8b2f06939c259f909c0d1ae36faf).
174 | 
175 | ## Consequences
176 | 
177 | - ckanext-spatial or ckanext-harvester are not testing csw harvesting, so there are missing tests overall.
178 | 
179 | 
180 | # 7. Remove test_source_form test
181 | 
182 | Date: 2022-12-12
183 | 
184 | ## Status
185 | 
186 | Unreviewed
187 | 
188 | ## Context
189 | 
190 | The test was trying to create a harvest source with a post request to `/harvest/new`; however, we suspect something in ckanext-harvest changed and broke this functionality.  Since we are doing harvest tests in catalog.data.gov, we thought it was acceptable to remove this test altogether.
191 | 
192 | ## Decision
193 | 
194 | Remove test
195 | 
196 | ## Consequences
197 | 
198 | - Less tests?
199 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-012-2002/fgdc-std-012-2002-sect05.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!-- edited with XMLSpy v2008 sp1 (http://www.altova.com) by Systems Administrator (NCDDC) -->
  3 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="#all">
  4 | 	<xs:complexType name="rseEainfo">
  5 | 		<xs:choice>
  6 | 			<xs:sequence>
  7 | 				<xs:element ref="detailed" maxOccurs="unbounded"/>
  8 | 				<xs:element ref="overview" minOccurs="0" maxOccurs="unbounded"/>
  9 | 			</xs:sequence>
 10 | 			<xs:element ref="overview" maxOccurs="unbounded"/>
 11 | 		</xs:choice>
 12 | 	</xs:complexType>
 13 | 	<xs:complexType name="rseDetailedType">
 14 | 		<xs:sequence>
 15 | 			<xs:element ref="enttyp"/>
 16 | 			<xs:element ref="attr" minOccurs="0" maxOccurs="unbounded"/>
 17 | 		</xs:sequence>
 18 | 	</xs:complexType>
 19 | 	<xs:complexType name="rseAttrType">
 20 | 		<xs:sequence>
 21 | 			<xs:element ref="attrlabl"/>
 22 | 			<xs:element ref="attrdef"/>
 23 | 			<xs:element ref="attrdefs"/>
 24 | 			<xs:element ref="attrdomv" maxOccurs="unbounded"/>
 25 | 			<xs:sequence minOccurs="0" maxOccurs="unbounded">
 26 | 				<xs:element ref="begdatea"/>
 27 | 				<xs:element ref="enddatea" minOccurs="0"/>
 28 | 			</xs:sequence>
 29 | 			<xs:element ref="attrvai" minOccurs="0"/>
 30 | 			<xs:element ref="attrmfrq" minOccurs="0"/>
 31 | 		</xs:sequence>
 32 | 	</xs:complexType>
 33 | 	<xs:complexType name="rseAttrdomvType">
 34 | 		<xs:sequence>
 35 | 			<xs:choice>
 36 | 				<xs:element ref="edom" maxOccurs="unbounded"/>
 37 | 				<xs:element ref="rdom"/>
 38 | 				<xs:element ref="codesetd"/>
 39 | 				<xs:element ref="udom"/>
 40 | 			</xs:choice>
 41 | 			<xs:element ref="datascal" minOccurs="0"/>
 42 | 		</xs:sequence>
 43 | 	</xs:complexType>
 44 | 	<xs:element name="datascal" type="datascalType">
 45 | 		<xs:annotation>
 46 | 			<xs:documentation>Function converting set of values on one scale to another.</xs:documentation>
 47 | 			<xs:appinfo>
 48 | 				<xs:attribute name="mdname" fixed="Data Scaling Information"/>
 49 | 				<xs:attribute name="use" fixed="Mandatory-if-applicable"/>
 50 | 				<xs:attribute name="other-conditionalities" fixed="['Scan Radiometric Properties','Frame Radiometric Calibration']:Mandatory"/>
 51 | 				<xs:attribute name="other-validation-rules" fixed="checkOr([polyfunc,npolscal])"/>
 52 | 			</xs:appinfo>
 53 | 		</xs:annotation>
 54 | 	</xs:element>
 55 | 	<xs:complexType name="datascalType" mixed="true">
 56 | 		<xs:choice>
 57 | 			<xs:element ref="polyfunc"/>
 58 | 			<xs:element ref="npolscal"/>
 59 | 		</xs:choice>
 60 | 	</xs:complexType>
 61 | 	<xs:element name="polyfunc" type="polyfuncType">
 62 | 		<xs:annotation>
 63 | 			<xs:documentation>A function in successive powers of the independent variable, or the ratio of such functions, used in a transformation, one example of which is scaling, derivation of a set of values on one scale or coordinate system from the value in another, in the sense derived value = polynomial (initial value).</xs:documentation>
 64 | 			<xs:appinfo>
 65 | 				<xs:attribute name="mdname" fixed="Polynomial Function"/>
 66 | 				<xs:attribute name="use" fixed="Mandatory"/>
 67 | 			</xs:appinfo>
 68 | 		</xs:annotation>
 69 | 	</xs:element>
 70 | 	<xs:complexType name="polyfuncType">
 71 | 		<xs:sequence>
 72 | 			<xs:element ref="polynume"/>
 73 | 			<xs:element ref="polydeno" minOccurs="0"/>
 74 | 		</xs:sequence>
 75 | 	</xs:complexType>
 76 | 	<xs:element name="polynume" type="polynumeType">
 77 | 		<xs:annotation>
 78 | 			<xs:documentation>The polynomial function when not a ratio, and the dividend of the ratio when it is.</xs:documentation>
 79 | 			<xs:appinfo>
 80 | 				<xs:attribute name="mdname" fixed="Polynomial Numerator"/>
 81 | 				<xs:attribute name="use" fixed="Mandatory"/>
 82 | 				<xs:attribute name="other-validation-rules" fixed="minDefinedByDataElement([nnumterm,polycoef])"/>
 83 | 			</xs:appinfo>
 84 | 		</xs:annotation>
 85 | 	</xs:element>
 86 | 	<xs:complexType name="polynumeType">
 87 | 		<xs:sequence>
 88 | 			<xs:element ref="nnumterm"/>
 89 | 			<xs:element ref="polycoef" maxOccurs="unbounded"/>
 90 | 		</xs:sequence>
 91 | 	</xs:complexType>
 92 | 	<xs:element name="nnumterm" type="positiveIntegerType">
 93 | 		<xs:annotation>
 94 | 			<xs:documentation>The number of nonzero terms in the numerator of the polynomial.</xs:documentation>
 95 | 			<xs:appinfo>
 96 | 				<xs:attribute name="mdname" fixed="Number of Numerator Terms"/>
 97 | 				<xs:attribute name="use" fixed="Mandatory"/>
 98 | 				<xs:attribute name="string-input-format" fixed="Number of Numerator Terms &gt;=1"/>
 99 | 			</xs:appinfo>
100 | 		</xs:annotation>
101 | 	</xs:element>
102 | 	<xs:element name="polydeno" type="polydenoType">
103 | 		<xs:annotation>
104 | 			<xs:documentation>The divisor of a polynomial function that is a ratio. (&lt;i&gt;Note:  if absent, assumed equal to 1.&lt;/i&gt;)</xs:documentation>
105 | 			<xs:appinfo>
106 | 				<xs:attribute name="mdname" fixed="Polynomial Denominator"/>
107 | 				<xs:attribute name="use" fixed="Mandatory-if-applicable"/>
108 | 				<xs:attribute name="other-validation-rules" fixed="minDefinedByDataElement([ndenterm,polycoef])"/>
109 | 			</xs:appinfo>
110 | 		</xs:annotation>
111 | 	</xs:element>
112 | 	<xs:complexType name="polydenoType">
113 | 		<xs:sequence>
114 | 			<xs:element ref="ndentern"/>
115 | 			<xs:element ref="polycoef" maxOccurs="unbounded"/>
116 | 		</xs:sequence>
117 | 	</xs:complexType>
118 | 	<xs:element name="ndentern" type="positiveIntegerType">
119 | 		<xs:annotation>
120 | 			<xs:documentation>The number of nonzero terms in the denominator of the polynomial.</xs:documentation>
121 | 			<xs:appinfo>
122 | 				<xs:attribute name="mdname" fixed="Number of Denominator Terms"/>
123 | 				<xs:attribute name="use" fixed="Mandatory"/>
124 | 				<xs:attribute name="string-input-format" fixed="Number of Denominator Terms &gt;=1"/>
125 | 			</xs:appinfo>
126 | 		</xs:annotation>
127 | 	</xs:element>
128 | 	<xs:element name="polycoef" type="xs:double">
129 | 		<xs:annotation>
130 | 			<xs:documentation>The coefficient of one term in the numerator or denominator of a polynomial function.(&lt;i&gt;Note: For a polynomial numerator or denominator of order m, there will be m+1 coefficients. Any of these coefficients, except that of the m power term, may be zero. When the function is linear, the coefficient of the zero-power term is the offset and the coefficient of the first power term is the scale factor.&lt;/i&gt;)</xs:documentation>
131 | 			<xs:appinfo>
132 | 				<xs:attribute name="mdname" fixed="Polynomial Coefficient"/>
133 | 				<xs:attribute name="use" fixed="Mandatory"/>
134 | 			</xs:appinfo>
135 | 		</xs:annotation>
136 | 	</xs:element>
137 | 	<xs:element name="npolscal" type="FGDCstring">
138 | 		<xs:annotation>
139 | 			<xs:documentation>Text description of the function used to derive a set of values on one scale from their value in another, using a function that is not a polynomial.</xs:documentation>
140 | 			<xs:appinfo>
141 | 				<xs:attribute name="mdname" fixed="Non-Polynomial Scaling"/>
142 | 				<xs:attribute name="use" fixed="Mandatory"/>
143 | 			</xs:appinfo>
144 | 		</xs:annotation>
145 | 	</xs:element>
146 | </xs:schema>
147 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-001.2-2001/fgdc-std-001.2-2001.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!-- edited with XMLSpy v2009 sp1 (http://www.altova.com) by Roxzana Moore (NCDDC) -->
  3 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="#all">
  4 | 	<!--
  5 | ======================================================================
  6 | 	 FGDC Shoreline Metadata XML Schema 1.0.0 2009XXXX
  7 | 
  8 | 	 This is the XML Schema for formal metadata, metadata conforming to the Shoreline Metadata Profile of the Content Standards for Digital Geospatial Metadata of the Federal Geographic Data 
  9 | 	 Committee.  This schema corresponds to the June, 2001 version of the standard, FGDC-STD-001.2-2001.
 10 | 
 11 | 	 This file is the primary XML Schema and loads the definitions for sections 1-10 of the standard from separate schema modules using the original FGDC CSDGM schema developed by 
 12 | 	 Richard E. Rathmann (PSGS/NOAA Coastal Services Center, Charleston, SC) with assistance from Mike Moeller (PSGS/NOAA CSC) and Doug Nebert (Federal Geographic Data Committee).
 13 | 
 14 | 	 Element names:
 15 | 	     Element names are a maximum of 8-characters long, to coincide with the Reference Concrete Syntax.
 16 | 
 17 | 	 Element ordering:
 18 | 	     Generally the order of elements is now significant.  XML makes it difficult to write a DTD that allows elements to be in any order.  Although XML Schemas do not have this restriction, 
 19 | 		 it was decided to keep the significance of element order in order not to break the DTD validity of XML-encoded metadata files.
 20 | 
 21 | 	 Authors:
 22 | 	     Original: Jacqueline Mize and Roxzana Moore (Radiance Technologies, Inc./NOAA's National Coastal Data Development Center (NCDDC)) based on the work of Richard E. Rathmann 
 23 | 			 (PSGS/NOAA Coastal Services Center, Charleston, SC) with assistance from Mike Moeller (PSGS/NOAA CSC) and Doug Nebert (Federal Geographic Data Committee).
 24 | 
 25 | 	 Distribution liability:
 26 | 		 NOAA makes no warranty regarding these data, expressed or implied, nor does the fact of distribution constitute such a warranty. NOAA, NESDIS, 
 27 | 		 NODC and NCDDC cannot assume liability for any damages caused by any errors or omissions in these data, nor as a result of the failure of these data 
 28 | 		 to function on a particular system.
 29 | 
 30 |  Revisions:
 31 | 	     20020826 (RER) Locally scoped the definition of "onlink" in
 32 | 	                      Section 7 (7.11.1) within "metextns" (7.11)
 33 | 	                      rather than referencing "onlink" definition in
 34 | 	                      Section 8 (8.10).
 35 | 	     20030801 (RER) Removed 'xml:space="preserve"' from all
 36 | 	                      'xs:documentation' elements.  I originally put
 37 | 	                      this in to say "whitespace is significant", but
 38 | 	                      both XML Spy and IBM's  Schema Quality Checker
 39 | 	                      complain about them.
 40 | 	     20030801 (RER) Corrected the second regular expression pattern
 41 | 	                      for the "FGDCtime" simple type in this schema
 42 | 	                      module.  The "-" in the "[+-]" character class
 43 | 	                      for the sign character needed to be escaped, as
 44 | 	                      in "[+\-]".
 45 | 	     20030801 (RER) Changed types of "srcused" (2.5.2.2) and "srcprod"
 46 | 	                      (2.5.2.5) in Section 2 to reference the type
 47 | 	                      defined for "srccitea" (2.5.1.5) to comply with
 48 | 	                      XML Schema constraint that key/keyref field
 49 | 	                      value pairs be of the same type.
 50 | 		 2009XXXX (RER) Annotation was added. Changed conditionality obligations 
 51 | 						 mostly to temporal and time compounds and elements in Sections 1 
 52 | 						 and 9. Made domain changes to several elements and compounds 
 53 | 						 in Sections 1, 2, 7, and 9. Added 'Description of Geographic Extent' 
 54 | 						 extended element to Spatial Domain in Section 1. Added 'Process 
 55 | 						 Step Citation' under Lineage Process Step in section 2. Also, Section 
 56 | 						 2 required the addition of 15 extended elements and compounds 
 57 | 						 describing tidal information, weather conditions and environmental events.
 58 | 
 59 | 
 60 | 	======================================================================
 61 | -->
 62 | 	<xs:include schemaLocation="fgdc-std-001.2-2001-sect01.xsd"/>
 63 | 	<xs:include schemaLocation="fgdc-std-001.2-2001-sect02.xsd"/>
 64 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect03.xsd"/>
 65 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect04.xsd"/>
 66 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect05.xsd"/>
 67 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect06.xsd"/>
 68 | 	<xs:include schemaLocation="fgdc-std-001.2-2001-sect07.xsd"/>
 69 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect08.xsd"/>
 70 | 	<xs:include schemaLocation="fgdc-std-001.2-2001-sect09.xsd"/>
 71 | 	<xs:include schemaLocation="fgdc-std-001-1998-sect10.xsd"/>
 72 | 	<xs:element name="metadata" type="metadataType">
 73 | 		<xs:annotation>
 74 | 			<xs:documentation>Federal Geographic Data Committee's Shoreline Metadata Profile of the Content Standard for Digital Geospatial Metadata (FGDC-STD-001.2-2001), June 2001 version of the standard. Data about the content, quality, condition, and other characteristics of data.</xs:documentation>
 75 | 			<xs:appinfo>
 76 | 				<xs:attribute name="mdname" fixed="FGDC Shoreline Profile"/>
 77 | 				<xs:attribute name="use" fixed="Mandatory-if-applicable"/>
 78 | 			</xs:appinfo>
 79 | 		</xs:annotation>
 80 | 	</xs:element>
 81 | 	<xs:complexType name="metadataType">
 82 | 		<xs:sequence>
 83 | 			<xs:element ref="idinfo"/>
 84 | 			<xs:element ref="dataqual" minOccurs="0"/>
 85 | 			<xs:element ref="spdoinfo" minOccurs="0"/>
 86 | 			<xs:element ref="spref" minOccurs="0"/>
 87 | 			<xs:element ref="eainfo" minOccurs="0"/>
 88 | 			<xs:element ref="distinfo" minOccurs="0" maxOccurs="unbounded"/>
 89 | 			<xs:element ref="metainfo"/>
 90 | 		</xs:sequence>
 91 | 	</xs:complexType>
 92 | 	<xs:simpleType name="FGDCdate">
 93 | 		<xs:restriction base="xs:token">
 94 | 			<xs:pattern value="\d{4}(\d{2}(\d{2})?)?"/>
 95 | 			<xs:pattern value="bc\d{4}(\d{2}(\d{2})?)?"/>
 96 | 			<xs:pattern value="cc\d{5,}"/>
 97 | 			<xs:pattern value="cd\d{5,}"/>
 98 | 		</xs:restriction>
 99 | 	</xs:simpleType>
100 | 	<xs:simpleType name="FGDCtime">
101 | 		<xs:restriction base="xs:token">
102 | 			<xs:pattern value="\d{2}(\d{2}(\d{2,})?)?"/>
103 | 			<xs:pattern value="\d{2}(\d{2}(\d{2,})?)?[+\-]\d{4}"/>
104 | 			<xs:pattern value="\d{2}(\d{2}(\d{2,})?)?Z"/>
105 | 		</xs:restriction>
106 | 	</xs:simpleType>
107 | 	<xs:simpleType name="FGDCstring">
108 | 		<xs:restriction base="xs:string">
109 | 			<xs:pattern value="\s*\S(.|\n|\r)*"/>
110 | 		</xs:restriction>
111 | 	</xs:simpleType>
112 | 	<xs:simpleType name="FGDClatitude">
113 | 		<xs:restriction base="xs:double">
114 | 			<xs:minInclusive value="-90.0"/>
115 | 			<xs:maxInclusive value="90.0"/>
116 | 		</xs:restriction>
117 | 	</xs:simpleType>
118 | 	<xs:simpleType name="FGDClongitude">
119 | 		<xs:restriction base="xs:double">
120 | 			<xs:minInclusive value="-180.0"/>
121 | 			<xs:maxInclusive value="180.0"/>
122 | 		</xs:restriction>
123 | 	</xs:simpleType>
124 | </xs:schema>
125 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-012-2002/fgdc-std-012-2002-locainfo.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!-- edited with XMLSpy v2008 sp1 (http://www.altova.com) by Systems Administrator (NCDDC) -->
  3 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="#all">
  4 | 	<xs:element name="locainfo" type="locainfoType">
  5 | 		<xs:annotation>
  6 | 			<xs:documentation>Information about the location of a set of one or more points.(&lt;i&gt;Note: this section provides a means of describing position in a coordinate system relevant to the calling element and is used by other sections of the metadata extensions. This section is never used alone. It differs from the Spatial Reference Information in that it provides positions in a coordinate system relevant to metadata elements, whereas the Spatial Reference Information refers only to positions at which the data are located.&lt;/i&gt;)</xs:documentation>
  7 | 			<xs:appinfo>
  8 | 				<xs:attribute name="mdname" fixed="Location Information"/>
  9 | 				<xs:attribute name="use" fixed="Mandatory"/>
 10 | 				<xs:attribute name="other-validation-rules" fixed="checkRangeExcludeMin([numpoint,0])"/>
 11 | 			</xs:appinfo>
 12 | 		</xs:annotation>
 13 | 	</xs:element>
 14 | 	<xs:complexType name="locainfoType" mixed="true">
 15 | 		<xs:sequence>
 16 | 			<xs:element ref="numpoint"/>
 17 | 			<xs:element ref="coordsys" minOccurs="0"/>
 18 | 			<xs:element ref="coordxyu" minOccurs="0"/>
 19 | 			<xs:element ref="coordzu" minOccurs="0"/>
 20 | 			<xs:element ref="coordpt" maxOccurs="unbounded"/>
 21 | 		</xs:sequence>
 22 | 	</xs:complexType>
 23 | 	<xs:element name="numpoint" type="positiveIntegerType">
 24 | 		<xs:annotation>
 25 | 			<xs:documentation>Number of coordinate positions.</xs:documentation>
 26 | 			<xs:appinfo>
 27 | 				<xs:attribute name="mdname" fixed="Number of Points"/>
 28 | 				<xs:attribute name="use" fixed="Mandatory"/>
 29 | 				<xs:attribute name="string-input-format" fixed="Number of Points &gt; 0"/>
 30 | 			</xs:appinfo>
 31 | 		</xs:annotation>
 32 | 	</xs:element>
 33 | 	<xs:element name="coordsys" type="coordsysType">
 34 | 		<xs:annotation>
 35 | 			<xs:documentation>Definition of axes of coordinate system in which location of positions is provided.</xs:documentation>
 36 | 			<xs:appinfo>
 37 | 				<xs:attribute name="mdname" fixed="Coordinate System"/>
 38 | 				<xs:attribute name="use" fixed="Mandatory-if-applicable"/>
 39 | 				<xs:attribute name="other-validation-rules" fixed="checkOr([unrefsys,refsys])"/>
 40 | 			</xs:appinfo>
 41 | 		</xs:annotation>
 42 | 	</xs:element>
 43 | 	<xs:complexType name="coordsysType">
 44 | 		<xs:choice>
 45 | 			<xs:element ref="unrefsys"/>
 46 | 			<xs:element ref="refsys"/>
 47 | 		</xs:choice>
 48 | 	</xs:complexType>
 49 | 	<xs:element name="unrefsys" type="FGDCstring">
 50 | 		<xs:annotation>
 51 | 			<xs:documentation>Coordinate system which is not georeferenced and for which georeferencing information is unavailable or irrelevant.</xs:documentation>
 52 | 			<xs:appinfo>
 53 | 				<xs:attribute name="mdname" fixed="Unreferenced Coordinate System"/>
 54 | 				<xs:attribute name="use" fixed="Mandatory"/>
 55 | 			</xs:appinfo>
 56 | 		</xs:annotation>
 57 | 	</xs:element>
 58 | 	<xs:element name="refsys" type="refsysType">
 59 | 		<xs:annotation>
 60 | 			<xs:documentation>Coordinate system that can be georeferenced.</xs:documentation>
 61 | 			<xs:appinfo>
 62 | 				<xs:attribute name="mdname" fixed="Referenced Coordinate System"/>
 63 | 				<xs:attribute name="use" fixed="Mandatory"/>
 64 | 			</xs:appinfo>
 65 | 		</xs:annotation>
 66 | 	</xs:element>
 67 | 	<xs:complexType name="refsysType">
 68 | 		<xs:sequence>
 69 | 			<xs:element ref="spref"/>
 70 | 		</xs:sequence>
 71 | 	</xs:complexType>
 72 | 	<xs:element name="coordxyu" type="coordxyuType">
 73 | 		<xs:annotation>
 74 | 			<xs:documentation>Physical dimension corresponding to value of unity in x and y coordinate directions as defined in Coordinate System or referencing element, where the coordinates correspond to physical dimensions.</xs:documentation>
 75 | 			<xs:appinfo>
 76 | 				<xs:attribute name="mdname" fixed="Coordinate XY Units"/>
 77 | 				<xs:attribute name="use" fixed="Mandatory"/>
 78 | 			</xs:appinfo>
 79 | 		</xs:annotation>
 80 | 	</xs:element>
 81 | 	<xs:simpleType name="coordxyuType">
 82 | 		<xs:union memberTypes="FGDCstring">
 83 | 			<xs:simpleType>
 84 | 				<xs:restriction base="xs:token">
 85 | 					<xs:enumeration value="micrometers"/>
 86 | 					<xs:enumeration value="millimeters"/>
 87 | 					<xs:enumeration value="meters"/>
 88 | 					<xs:enumeration value="kilometers"/>
 89 | 				</xs:restriction>
 90 | 			</xs:simpleType>
 91 | 		</xs:union>
 92 | 	</xs:simpleType>
 93 | 	<xs:element name="coordzu" type="coordzuType">
 94 | 		<xs:annotation>
 95 | 			<xs:documentation>Physical dimension corresponding to value of unity in z coordinate directions Coordinate System or referencing element, where the coordinates correspond to physical dimensions.</xs:documentation>
 96 | 			<xs:appinfo>
 97 | 				<xs:attribute name="mdname" fixed="Coordinate Z Units"/>
 98 | 				<xs:attribute name="use" fixed="Optional"/>
 99 | 			</xs:appinfo>
100 | 		</xs:annotation>
101 | 	</xs:element>
102 | 	<xs:simpleType name="coordzuType">
103 | 		<xs:union memberTypes="FGDCstring">
104 | 			<xs:simpleType>
105 | 				<xs:restriction base="xs:token">
106 | 					<xs:enumeration value="feet"/>
107 | 					<xs:enumeration value="meters"/>
108 | 					<xs:enumeration value="millibars"/>
109 | 				</xs:restriction>
110 | 			</xs:simpleType>
111 | 		</xs:union>
112 | 	</xs:simpleType>
113 | 	<xs:element name="coordpt" type="coordptType">
114 | 		<xs:annotation>
115 | 			<xs:documentation>Location of a coordinate point described by the referencing element.</xs:documentation>
116 | 			<xs:appinfo>
117 | 				<xs:attribute name="mdname" fixed="Coordinate Point"/>
118 | 				<xs:attribute name="use" fixed="Mandatory"/>
119 | 			</xs:appinfo>
120 | 		</xs:annotation>
121 | 	</xs:element>
122 | 	<xs:complexType name="coordptType">
123 | 		<xs:sequence>
124 | 			<xs:element ref="coorxval"/>
125 | 			<xs:element ref="cooryval"/>
126 | 			<xs:element ref="coorzval" minOccurs="0"/>
127 | 		</xs:sequence>
128 | 	</xs:complexType>
129 | 	<xs:element name="coorxval" type="xs:double">
130 | 		<xs:annotation>
131 | 			<xs:documentation>Location of point along x-axis.</xs:documentation>
132 | 			<xs:appinfo>
133 | 				<xs:attribute name="mdname" fixed="Coordinate x Value"/>
134 | 				<xs:attribute name="use" fixed="Mandatory"/>
135 | 			</xs:appinfo>
136 | 		</xs:annotation>
137 | 	</xs:element>
138 | 	<xs:element name="cooryval" type="xs:double">
139 | 		<xs:annotation>
140 | 			<xs:documentation>Location of point along y-axis.</xs:documentation>
141 | 			<xs:appinfo>
142 | 				<xs:attribute name="use" fixed="Mandatory"/>
143 | 				<xs:attribute name="mdname" fixed="Coordinate y Value"/>
144 | 			</xs:appinfo>
145 | 		</xs:annotation>
146 | 	</xs:element>
147 | 	<xs:element name="coorzval" type="xs:double">
148 | 		<xs:annotation>
149 | 			<xs:documentation>Location of point along z-axis.</xs:documentation>
150 | 			<xs:appinfo>
151 | 				<xs:attribute name="mdname" fixed="Coordinate z Value"/>
152 | 				<xs:attribute name="use" fixed="Mandatory-if-applicable"/>
153 | 			</xs:appinfo>
154 | 		</xs:annotation>
155 | 	</xs:element>
156 | </xs:schema>
157 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_relink.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import datetime
  4 | import pytest
  5 | 
  6 | from ckan.common import config
  7 | from ckan.lib.search.common import make_connection
  8 | import ckan.model as model
  9 | from ckanext.geodatagov.rebuild import rebuild
 10 | from ckan.tests import factories
 11 | from click.testing import CliRunner
 12 | from ckanext.harvest.model import HarvestObject
 13 | from ckanext.harvest.tests import factories as harvest_factories
 14 | from ckanext.harvest.logic import HarvestJobExists
 15 | 
 16 | import ckanext.geodatagov.cli as cli
 17 | 
 18 | 
 19 | log = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | class TestRelink(object):
 23 | 
 24 |     @classmethod
 25 |     def setup_class(cls):
 26 |         organization = factories.Organization()
 27 |         # create two harvest sources
 28 |         cls.source1 = harvest_factories.HarvestSourceObj(
 29 |             url="http://test1",
 30 |             name="test-ho-id1",
 31 |             title="Test relink 1",
 32 |             source_type="ckan",
 33 |             frequency="MANUAL"
 34 |         )
 35 |         cls.source2 = harvest_factories.HarvestSourceObj(
 36 |             url="http://test2",
 37 |             name="test-ho-id2",
 38 |             title="Test relink 2",
 39 |             source_type="ckan",
 40 |             frequency="MANUAL"
 41 |         )
 42 | 
 43 |         # dataset 1 is for source 1
 44 |         cls.dataset1 = factories.Dataset(owner_org=organization["id"])
 45 |         # with false hoid1 and true hoid2
 46 |         cls.dataset1_hoid1 = HarvestObject(
 47 |             package_id=cls.dataset1['id'],
 48 |             job=create_harvest_job(cls.source1),
 49 |             import_finished=datetime.datetime.utcnow(),
 50 |             state='COMPLETE',
 51 |             report_status='',
 52 |             current=False
 53 |         )
 54 |         cls.dataset1_hoid2 = HarvestObject(
 55 |             package_id=cls.dataset1['id'],
 56 |             job=create_harvest_job(cls.source2),
 57 |             import_finished=datetime.datetime.utcnow(),
 58 |             state='COMPLETE',
 59 |             current=True
 60 |         )
 61 |         cls.dataset1_hoid1.save()
 62 |         cls.dataset1_hoid2.save()
 63 | 
 64 |         # dataset 2 is for source 2
 65 |         cls.dataset2 = factories.Dataset(owner_org=organization["id"])
 66 |         # with false hoid1 and true hoid2
 67 |         cls.dataset2_hoid1 = HarvestObject(
 68 |             package_id=cls.dataset2['id'],
 69 |             job=create_harvest_job(cls.source2),
 70 |             import_finished=datetime.datetime.utcnow(),
 71 |             state='COMPLETE',
 72 |             report_status='',
 73 |             current=False
 74 |         )
 75 |         cls.dataset2_hoid2 = HarvestObject(
 76 |             package_id=cls.dataset2['id'],
 77 |             job=create_harvest_job(cls.source2),
 78 |             import_finished=datetime.datetime.utcnow(),
 79 |             state='COMPLETE',
 80 |             current=True
 81 |         )
 82 |         cls.dataset2_hoid1.save()
 83 |         cls.dataset2_hoid2.save()
 84 | 
 85 |         rebuild()
 86 | 
 87 |         # check solr is using the current=True harvest object hoid2
 88 |         assert get_solr_hoid(cls.dataset1['id']) == cls.dataset1_hoid2.id
 89 |         assert get_solr_hoid(cls.dataset2['id']) == cls.dataset2_hoid2.id
 90 | 
 91 |         # make all harvest objects current=False, but hoid1 with newer import_finished
 92 |         cls.dataset1_hoid1.current = False
 93 |         cls.dataset1_hoid1.import_finished = datetime.datetime.utcnow()
 94 |         cls.dataset1_hoid1.save()
 95 |         cls.dataset1_hoid2.current = False
 96 |         cls.dataset1_hoid2.save()
 97 | 
 98 |         cls.dataset2_hoid1.current = False
 99 |         cls.dataset2_hoid1.import_finished = datetime.datetime.utcnow()
100 |         cls.dataset2_hoid1.save()
101 |         cls.dataset2_hoid2.current = False
102 |         cls.dataset2_hoid2.save()
103 | 
104 |     @pytest.fixture
105 |     def cli_result_source1(self):
106 |         runner = CliRunner()
107 |         raw_cli_output = runner.invoke(
108 |             cli.harvest_object_relink,
109 |             args=[self.source1.id],
110 |         )
111 | 
112 |         return raw_cli_output
113 | 
114 |     @pytest.fixture
115 |     def cli_result_all(self):
116 |         runner = CliRunner()
117 |         raw_cli_output = runner.invoke(
118 |             cli.harvest_object_relink,
119 |             args=[],
120 |         )
121 | 
122 |         return raw_cli_output
123 | 
124 |     @pytest.mark.order1
125 |     def test_relink_source1(self, cli_result_source1):
126 |         """run harvest_object_relink and analyze results"""
127 |         # check successful cli run
128 |         assert cli_result_source1.exit_code == 0
129 | 
130 |         # check harvest object with newer import_finished is now current
131 |         assert get_hoid_current(self.dataset1_hoid1.id) is True
132 |         assert get_hoid_current(self.dataset1_hoid2.id) is False
133 | 
134 |         # check that solr has current harvest object for source1 dataset
135 |         assert get_solr_hoid(self.dataset1['id']) == self.dataset1_hoid1.id
136 | 
137 |         # check that solr has not changed for source2 dataset
138 |         assert get_solr_hoid(self.dataset2['id']) == self.dataset2_hoid2.id
139 | 
140 |     @pytest.mark.order2
141 |     def test_relink_all(self, cli_result_all):
142 |         """run harvest_object_relink and analyze results"""
143 |         # check successful cli run
144 |         assert cli_result_all.exit_code == 0
145 | 
146 |         # check harvest object with newer import_finished is now current
147 |         assert get_hoid_current(self.dataset2_hoid1.id) is True
148 |         assert get_hoid_current(self.dataset2_hoid2.id) is False
149 | 
150 |         # check that solr has current harvest object for both sources' datasets
151 |         assert get_solr_hoid(self.dataset1['id']) == self.dataset1_hoid1.id
152 |         assert get_solr_hoid(self.dataset2['id']) == self.dataset2_hoid1.id
153 | 
154 | 
155 | def get_hoid_current(id):
156 |     """
157 |     Return the current value for a particular harvest object in DB.
158 |     """
159 |     return model.Session.query(
160 |         HarvestObject.current).filter(HarvestObject.id == id).first()[0]
161 | 
162 | 
163 | def get_solr_hoid(id):
164 |     """
165 |     Return the harvest_object_id for a particular package id in Solr.
166 |     """
167 |     query = "*:*"
168 |     fq = "+site_id:\"%s\" " % config.get('ckan.site_id')
169 |     fq += "+state:active "
170 |     fq += "+id:%s" % (id)
171 | 
172 |     conn = make_connection()
173 |     data = conn.search(query, fq=fq, rows=10, fl='validated_data_dict')
174 | 
175 |     harvest_object_id = None
176 |     if data.docs:
177 |         data_dict = json.loads(data.docs[0].get("validated_data_dict"))
178 |         for extra in data_dict.get("extras", []):
179 |             if extra["key"] == "harvest_object_id":
180 |                 harvest_object_id = extra["value"]
181 |                 break
182 | 
183 |     return harvest_object_id
184 | 
185 | 
186 | def create_harvest_job(source):
187 |     """
188 |     Create a fictitious harvest job object and return it
189 |     """
190 |     try:
191 |         job = harvest_factories.HarvestJobObj(source=source)
192 |     except HarvestJobExists:  # not sure why
193 |         job = source.get_jobs()[0]
194 | 
195 |     job.save()
196 | 
197 |     return job
198 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-012-2002/fgdc-std-001-1998-sect05.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" blockDefault="#all">
  3 | <xsd:element name="eainfo" type="eainfoType"/>
  4 | <xsd:complexType name="eainfoType">
  5 | 	<xsd:choice>
  6 | 		<xsd:sequence>
  7 | 			<xsd:element ref="detailed" maxOccurs="unbounded"/>
  8 | 			<xsd:element ref="overview" minOccurs="0" maxOccurs="unbounded"/>
  9 | 		</xsd:sequence>
 10 | 		<xsd:element ref="overview" maxOccurs="unbounded"/>
 11 | 	</xsd:choice>
 12 | </xsd:complexType>
 13 | 
 14 | <xsd:element name="detailed" type="detailedType"/>
 15 | <xsd:complexType name="detailedType">
 16 | 	<xsd:sequence>
 17 | 		<xsd:element ref="enttyp"/>
 18 | 		<xsd:element ref="attr" minOccurs="0" maxOccurs="unbounded"/>
 19 | 	</xsd:sequence>
 20 | </xsd:complexType>
 21 | 
 22 | <xsd:element name="enttyp" type="enttypType"/>
 23 | <xsd:complexType name="enttypType">
 24 | 	<xsd:sequence>
 25 | 		<xsd:element ref="enttypl"/>
 26 | 		<xsd:element ref="enttypd"/>
 27 | 		<xsd:element ref="enttypds"/>
 28 | 	</xsd:sequence>
 29 | </xsd:complexType>
 30 | 
 31 | <xsd:element name="enttypl" type="enttyplType"/>
 32 | <xsd:simpleType name="enttyplType">
 33 | 	<xsd:restriction base="FGDCstring"/>
 34 | </xsd:simpleType>
 35 | 
 36 | <xsd:element name="enttypd" type="enttypdType"/>
 37 | <xsd:simpleType name="enttypdType">
 38 | 	<xsd:restriction base="FGDCstring"/>
 39 | </xsd:simpleType>
 40 | 
 41 | <xsd:element name="enttypds" type="enttypdsType"/>
 42 | <xsd:simpleType name="enttypdsType">
 43 | 	<xsd:restriction base="FGDCstring"/>
 44 | </xsd:simpleType>
 45 | 
 46 | <xsd:element name="attr" type="attrType"/>
 47 | <xsd:complexType name="attrType">
 48 | 	<xsd:sequence>
 49 | 		<xsd:element ref="attrlabl"/>
 50 | 		<xsd:element ref="attrdef"/>
 51 | 		<xsd:element ref="attrdefs"/>
 52 | 		<xsd:element ref="attrdomv" maxOccurs="unbounded"/>
 53 | 		<xsd:sequence minOccurs="0" maxOccurs="unbounded">
 54 | 			<xsd:element ref="begdatea"/>
 55 | 			<xsd:element ref="enddatea" minOccurs="0"/>
 56 | 		</xsd:sequence>
 57 | 		<xsd:element ref="attrvai" minOccurs="0"/>
 58 | 		<xsd:element ref="attrmfrq" minOccurs="0"/>
 59 | 	</xsd:sequence>
 60 | </xsd:complexType>
 61 | 
 62 | <xsd:element name="attrlabl" type="attrlablType"/>
 63 | <xsd:simpleType name="attrlablType">
 64 | 	<xsd:restriction base="FGDCstring"/>
 65 | </xsd:simpleType>
 66 | 
 67 | <xsd:element name="attrdef" type="attrdefType"/>
 68 | <xsd:simpleType name="attrdefType">
 69 | 	<xsd:restriction base="FGDCstring"/>
 70 | </xsd:simpleType>
 71 | 
 72 | <xsd:element name="attrdefs" type="attrdefsType"/>
 73 | <xsd:simpleType name="attrdefsType">
 74 | 	<xsd:restriction base="FGDCstring"/>
 75 | </xsd:simpleType>
 76 | 
 77 | <xsd:element name="attrdomv" type="attrdomvType"/>
 78 | <xsd:complexType name="attrdomvType">
 79 | 	<xsd:choice>
 80 | 		<xsd:element ref="edom" maxOccurs="unbounded"/>
 81 | 		<xsd:element ref="rdom"/>
 82 | 		<xsd:element ref="codesetd"/>
 83 | 		<xsd:element ref="udom"/>
 84 | 	</xsd:choice>
 85 | </xsd:complexType>
 86 | 
 87 | <xsd:element name="edom" type="edomType"/>
 88 | <xsd:complexType name="edomType">
 89 | 	<xsd:sequence>
 90 | 		<xsd:element ref="edomv"/>
 91 | 		<xsd:element ref="edomvd"/>
 92 | 		<xsd:element ref="edomvds"/>
 93 | 		<xsd:element ref="attr" minOccurs="0" maxOccurs="unbounded"/>
 94 | 	</xsd:sequence>
 95 | </xsd:complexType>
 96 | 
 97 | <xsd:element name="edomv" type="edomvType"/>
 98 | <xsd:simpleType name="edomvType">
 99 | 	<xsd:restriction base="FGDCstring"/>
100 | </xsd:simpleType>
101 | 
102 | <xsd:element name="edomvd" type="edomvdType"/>
103 | <xsd:simpleType name="edomvdType">
104 | 	<xsd:restriction base="FGDCstring"/>
105 | </xsd:simpleType>
106 | 
107 | <xsd:element name="edomvds" type="edomvdsType"/>
108 | <xsd:simpleType name="edomvdsType">
109 | 	<xsd:restriction base="FGDCstring"/>
110 | </xsd:simpleType>
111 | 
112 | <xsd:element name="rdom" type="rdomType"/>
113 | <xsd:complexType name="rdomType">
114 | 	<xsd:sequence>
115 | 		<xsd:element ref="rdommin"/>
116 | 		<xsd:element ref="rdommax"/>
117 | 		<xsd:element ref="attrunit" minOccurs="0"/>
118 | 		<xsd:element ref="attrmres" minOccurs="0"/>
119 | 		<xsd:element ref="attr" minOccurs="0" maxOccurs="unbounded"/>
120 | 	</xsd:sequence>
121 | </xsd:complexType>
122 | 
123 | <xsd:element name="rdommin" type="rdomminType"/>
124 | <xsd:simpleType name="rdomminType">
125 | 	<xsd:restriction base="FGDCstring"/>
126 | </xsd:simpleType>
127 | 
128 | <xsd:element name="rdommax" type="rdommaxType"/>
129 | <xsd:simpleType name="rdommaxType">
130 | 	<xsd:restriction base="FGDCstring"/>
131 | </xsd:simpleType>
132 | 
133 | <xsd:element name="attrunit" type="attrunitType"/>
134 | <xsd:simpleType name="attrunitType">
135 | 	<xsd:restriction base="FGDCstring"/>
136 | </xsd:simpleType>
137 | 
138 | <xsd:element name="attrmres" type="attrmresType"/>
139 | <xsd:simpleType name="attrmresType">
140 | 	<xsd:restriction base="xsd:double">
141 | 		<xsd:minExclusive value="0.0"/>
142 | 	</xsd:restriction>
143 | </xsd:simpleType>
144 | 
145 | <xsd:element name="codesetd" type="codesetdType"/>
146 | <xsd:complexType name="codesetdType">
147 | 	<xsd:sequence>
148 | 		<xsd:element ref="codesetn"/>
149 | 		<xsd:element ref="codesets"/>
150 | 	</xsd:sequence>
151 | </xsd:complexType>
152 | 
153 | <xsd:element name="codesetn" type="codesetnType"/>
154 | <xsd:simpleType name="codesetnType">
155 | 	<xsd:restriction base="FGDCstring"/>
156 | </xsd:simpleType>
157 | 
158 | <xsd:element name="codesets" type="codesetsType"/>
159 | <xsd:simpleType name="codesetsType">
160 | 	<xsd:restriction base="FGDCstring"/>
161 | </xsd:simpleType>
162 | 
163 | <xsd:element name="udom" type="udomType"/>
164 | <xsd:simpleType name="udomType">
165 | 	<xsd:restriction base="FGDCstring"/>
166 | </xsd:simpleType>
167 | 
168 | <xsd:element name="begdatea" type="begdateaType"/>
169 | <xsd:simpleType name="begdateaType">
170 | 	<xsd:restriction base="FGDCdate"/>
171 | </xsd:simpleType>
172 | 
173 | <xsd:element name="enddatea" type="enddateaType"/>
174 | <xsd:simpleType name="enddateaType">
175 | 	<xsd:restriction base="FGDCdate"/>
176 | </xsd:simpleType>
177 | 
178 | <xsd:element name="attrvai" type="attrvaiType"/>
179 | <xsd:complexType name="attrvaiType">
180 | 	<xsd:sequence>
181 | 		<xsd:element ref="attrva"/>
182 | 		<xsd:element ref="attrvae"/>
183 | 	</xsd:sequence>
184 | </xsd:complexType>
185 | 
186 | <xsd:element name="attrva" type="attrvaType"/>
187 | <xsd:simpleType name="attrvaType">
188 | 	<xsd:restriction base="xsd:double"/>
189 | </xsd:simpleType>
190 | 
191 | <xsd:element name="attrvae" type="attrvaeType"/>
192 | <xsd:simpleType name="attrvaeType">
193 | 	<xsd:restriction base="FGDCstring"/>
194 | </xsd:simpleType>
195 | 
196 | <xsd:element name="attrmfrq" type="attrmfrqType"/>
197 | <xsd:simpleType name="attrmfrqType">
198 | 	<xsd:restriction base="FGDCstring"/>
199 | </xsd:simpleType>
200 | 
201 | <xsd:element name="overview" type="overviewType"/>
202 | <xsd:complexType name="overviewType">
203 | 	<xsd:sequence>
204 | 		<xsd:element ref="eaover"/>
205 | 		<xsd:element ref="eadetcit" maxOccurs="unbounded"/>
206 | 	</xsd:sequence>
207 | </xsd:complexType>
208 | 
209 | <xsd:element name="eaover" type="eaoverType"/>
210 | <xsd:simpleType name="eaoverType">
211 | 	<xsd:restriction base="FGDCstring"/>
212 | </xsd:simpleType>
213 | 
214 | <xsd:element name="eadetcit" type="eadetcitType"/>
215 | <xsd:simpleType name="eadetcitType">
216 | 	<xsd:restriction base="FGDCstring"/>
217 | </xsd:simpleType>
218 | 
219 | </xsd:schema>
220 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-012-2002/fgdc-std-001-1998-sect09.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!-- edited with XMLSpy v2008 sp1 (http://www.altova.com) by Systems Administrator (NCDDC) -->
  3 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="#all">
  4 | 	<xs:element name="timeinfo" type="timeinfoType">
  5 | 		<xs:annotation>
  6 | 			<xs:documentation>Information about the date and time of an event.</xs:documentation>
  7 | 			<xs:appinfo>
  8 | 				<xs:attribute name="mdname" fixed="Time Period Information"/>
  9 | 				<xs:attribute name="use" fixed="Mandatory"/>
 10 | 				<xs:attribute name="other-validation-rules" fixed="checkOr([sngdate,mdattim,rngdates])"/>
 11 | 			</xs:appinfo>
 12 | 		</xs:annotation>
 13 | 	</xs:element>
 14 | 	<xs:complexType name="timeinfoType">
 15 | 		<xs:choice>
 16 | 			<xs:element ref="sngdate"/>
 17 | 			<xs:element ref="mdattim"/>
 18 | 			<xs:element ref="rngdates"/>
 19 | 		</xs:choice>
 20 | 	</xs:complexType>
 21 | 	<xs:element name="sngdate" type="sngdateType">
 22 | 		<xs:annotation>
 23 | 			<xs:documentation>Means of encoding a single date and time.</xs:documentation>
 24 | 			<xs:appinfo>
 25 | 				<xs:attribute name="mdname" fixed="Single Date/Time"/>
 26 | 				<xs:attribute name="use" fixed="Mandatory"/>
 27 | 			</xs:appinfo>
 28 | 		</xs:annotation>
 29 | 	</xs:element>
 30 | 	<xs:complexType name="sngdateType">
 31 | 		<xs:sequence>
 32 | 			<xs:element ref="caldate"/>
 33 | 			<xs:element ref="time" minOccurs="0"/>
 34 | 		</xs:sequence>
 35 | 	</xs:complexType>
 36 | 	<xs:element name="caldate" type="caldateType">
 37 | 		<xs:annotation>
 38 | 			<xs:documentation>The year (and optionally month, or month and day).</xs:documentation>
 39 | 			<xs:appinfo>
 40 | 				<xs:attribute name="mdname" fixed="Calendar Date"/>
 41 | 				<xs:attribute name="use" fixed="Mandatory"/>
 42 | 				<xs:attribute name="string-input-format" fixed="YYYY|YYYYMM|YYYYMMDD|bcYYYY|bcYYYYMM|bcYYYYMMDD|ccYYYYY...|cdYYYYY..."/>
 43 | 			</xs:appinfo>
 44 | 		</xs:annotation>
 45 | 	</xs:element>
 46 | 	<xs:simpleType name="caldateType">
 47 | 		<xs:union memberTypes="FGDCdate">
 48 | 			<xs:simpleType>
 49 | 				<xs:restriction base="xs:token">
 50 | 					<xs:enumeration value="Unknown"/>
 51 | 				</xs:restriction>
 52 | 			</xs:simpleType>
 53 | 		</xs:union>
 54 | 	</xs:simpleType>
 55 | 	<xs:element name="time" type="timeType">
 56 | 		<xs:annotation>
 57 | 			<xs:documentation>The hour (and optionally minute, or minute and second) of the day.</xs:documentation>
 58 | 			<xs:appinfo>
 59 | 				<xs:attribute name="mdname" fixed="Time of Day"/>
 60 | 				<xs:attribute name="use" fixed="Optional"/>
 61 | 				<xs:attribute name="string-input-format" fixed="HH|HHMM|HHMMSS|HHMMSSSS|HHMMSSSSZ|HHMMSSSSshhmm|Values must follow the 24-hour timekeeping system"/>
 62 | 			</xs:appinfo>
 63 | 		</xs:annotation>
 64 | 	</xs:element>
 65 | 	<xs:simpleType name="timeType">
 66 | 		<xs:union memberTypes="FGDCtime">
 67 | 			<xs:simpleType>
 68 | 				<xs:restriction base="xs:token">
 69 | 					<xs:enumeration value="Unknown"/>
 70 | 				</xs:restriction>
 71 | 			</xs:simpleType>
 72 | 		</xs:union>
 73 | 	</xs:simpleType>
 74 | 	<xs:element name="mdattim" type="mdattimType">
 75 | 		<xs:annotation>
 76 | 			<xs:documentation>Means of encoding multiple individual dates and times.</xs:documentation>
 77 | 			<xs:appinfo>
 78 | 				<xs:attribute name="mdname" fixed="Multiple Dates/Times"/>
 79 | 				<xs:attribute name="use" fixed="Mandatory"/>
 80 | 			</xs:appinfo>
 81 | 		</xs:annotation>
 82 | 	</xs:element>
 83 | 	<xs:complexType name="mdattimType">
 84 | 		<xs:sequence>
 85 | 			<xs:element ref="sngdate" minOccurs="2" maxOccurs="unbounded"/>
 86 | 		</xs:sequence>
 87 | 	</xs:complexType>
 88 | 	<xs:element name="rngdates" type="rngdatesType">
 89 | 		<xs:annotation>
 90 | 			<xs:documentation>Means of encoding a range of dates and times.</xs:documentation>
 91 | 			<xs:appinfo>
 92 | 				<xs:attribute name="mdname" fixed="Range of Dates/Times"/>
 93 | 				<xs:attribute name="use" fixed="Mandatory"/>
 94 | 				<xs:attribute name="other-validation-rules" fixed="checkChron([begdate,enddate])"/>
 95 | 			</xs:appinfo>
 96 | 		</xs:annotation>
 97 | 	</xs:element>
 98 | 	<xs:complexType name="rngdatesType">
 99 | 		<xs:sequence>
100 | 			<xs:element ref="begdate"/>
101 | 			<xs:element ref="begtime" minOccurs="0"/>
102 | 			<xs:element ref="enddate"/>
103 | 			<xs:element ref="endtime" minOccurs="0"/>
104 | 		</xs:sequence>
105 | 	</xs:complexType>
106 | 	<xs:element name="begdate" type="begdateType">
107 | 		<xs:annotation>
108 | 			<xs:documentation>The first year (and optionally month, or month and day) of the event.</xs:documentation>
109 | 			<xs:appinfo>
110 | 				<xs:attribute name="mdname" fixed="Beginning Date"/>
111 | 				<xs:attribute name="use" fixed="Mandatory"/>
112 | 				<xs:attribute name="string-input-format" fixed="YYYY|YYYYMM|YYYYMMDD|bcYYYY|bcYYYYMM|bcYYYYMMDD|ccYYYYY...|cdYYYYY...|Beginning Date &lt; Ending Date"/>
113 | 			</xs:appinfo>
114 | 		</xs:annotation>
115 | 	</xs:element>
116 | 	<xs:simpleType name="begdateType">
117 | 		<xs:union memberTypes="FGDCdate">
118 | 			<xs:simpleType>
119 | 				<xs:restriction base="xs:token">
120 | 					<xs:enumeration value="Unknown"/>
121 | 				</xs:restriction>
122 | 			</xs:simpleType>
123 | 		</xs:union>
124 | 	</xs:simpleType>
125 | 	<xs:element name="begtime" type="begtimeType">
126 | 		<xs:annotation>
127 | 			<xs:documentation>The first hour (and optionally minute, or minute and second) of the day for the event.</xs:documentation>
128 | 			<xs:appinfo>
129 | 				<xs:attribute name="mdname" fixed="Beginning Time"/>
130 | 				<xs:attribute name="use" fixed="Optional"/>
131 | 				<xs:attribute name="string-input-format" fixed="HH|HHMM|HHMMSS|HHMMSSSS|HHMMSSSSZ|HHMMSSSSshhmm|Values must follow the 24-hour timekeeping system"/>
132 | 			</xs:appinfo>
133 | 		</xs:annotation>
134 | 	</xs:element>
135 | 	<xs:simpleType name="begtimeType">
136 | 		<xs:union memberTypes="FGDCtime">
137 | 			<xs:simpleType>
138 | 				<xs:restriction base="xs:token">
139 | 					<xs:enumeration value="Unknown"/>
140 | 				</xs:restriction>
141 | 			</xs:simpleType>
142 | 		</xs:union>
143 | 	</xs:simpleType>
144 | 	<xs:element name="enddate" type="enddateType">
145 | 		<xs:annotation>
146 | 			<xs:documentation>The last year (and optionally month, or month and day) for the event.</xs:documentation>
147 | 			<xs:appinfo>
148 | 				<xs:attribute name="mdname" fixed="Ending Date"/>
149 | 				<xs:attribute name="use" fixed="Mandatory"/>
150 | 				<xs:attribute name="string-input-format" fixed="YYYY|YYYYMM|YYYYMMDD|bcYYYY|bcYYYYMM|bcYYYYMMDD|ccYYYYY...|cdYYYYY..."/>
151 | 			</xs:appinfo>
152 | 		</xs:annotation>
153 | 	</xs:element>
154 | 	<xs:simpleType name="enddateType">
155 | 		<xs:union memberTypes="FGDCdate">
156 | 			<xs:simpleType>
157 | 				<xs:restriction base="xs:token">
158 | 					<xs:enumeration value="Unknown"/>
159 | 					<xs:enumeration value="Present"/>
160 | 				</xs:restriction>
161 | 			</xs:simpleType>
162 | 		</xs:union>
163 | 	</xs:simpleType>
164 | 	<xs:element name="endtime" type="endtimeType">
165 | 		<xs:annotation>
166 | 			<xs:documentation>The last hour (and optionally minute, or minute and  second) of the day for the event.</xs:documentation>
167 | 			<xs:appinfo>
168 | 				<xs:attribute name="mdname" fixed="Ending Time"/>
169 | 				<xs:attribute name="use" fixed="Optional"/>
170 | 				<xs:attribute name="string-input-format" fixed="HH|HHMM|HHMMSS|HHMMSSSS|HHMMSSSSZ|HHMMSSSSshhmm|Values must follow the 24-hour timekeeping system"/>
171 | 			</xs:appinfo>
172 | 		</xs:annotation>
173 | 	</xs:element>
174 | 	<xs:simpleType name="endtimeType">
175 | 		<xs:union memberTypes="FGDCtime">
176 | 			<xs:simpleType>
177 | 				<xs:restriction base="xs:token">
178 | 					<xs:enumeration value="Unknown"/>
179 | 				</xs:restriction>
180 | 			</xs:simpleType>
181 | 		</xs:union>
182 | 	</xs:simpleType>
183 | </xs:schema>
184 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/tests/test_waf-collection.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import pytest
  4 | 
  5 | from ckan import model
  6 | from ckanext.geodatagov.harvesters.waf_collection import WAFCollectionHarvester
  7 | from ckanext.spatial.validation import all_validators
  8 | import ckanext.harvest.model as harvest_model
  9 | from ckan.tests.factories import Organization
 10 | from ckan.tests.helpers import call_action
 11 | 
 12 | from factories import HarvestJobObj, WafCollectionHarvestSourceObj
 13 | from utils import PORT, reset_db_and_solr
 14 | 
 15 | 
 16 | log = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | @pytest.mark.usefixtures("with_plugins")
 20 | class TestWafCollectionHarvester(object):
 21 | 
 22 |     def setup_method(self):
 23 |         reset_db_and_solr()
 24 | 
 25 |         self.organization = Organization()
 26 | 
 27 |     def run_gather(self, url, source_config):
 28 | 
 29 |         sc = json.loads(source_config)
 30 |         existing_profiles = [v.name for v in all_validators]
 31 |         log.info('Existing validator profiles: {}'.format(existing_profiles))
 32 |         source = WafCollectionHarvestSourceObj(url=url,
 33 |                                                owner_org=self.organization['id'],
 34 |                                                # config=source_config,
 35 |                                                **sc)
 36 |         self.job = HarvestJobObj(source=source)
 37 | 
 38 |         self.harvester = WAFCollectionHarvester()
 39 | 
 40 |         # gather stage
 41 |         log.info('GATHERING %s', url)
 42 |         obj_ids = self.harvester.gather_stage(self.job)
 43 |         log.info('job.gather_errors=%s', self.job.gather_errors)
 44 |         if len(self.job.gather_errors) > 0:
 45 |             raise Exception(self.job.gather_errors[0])
 46 | 
 47 |         log.info('obj_ids=%s', obj_ids)
 48 |         if obj_ids is None or len(obj_ids) == 0:
 49 |             # nothing to see
 50 |             return
 51 | 
 52 |         self.harvest_objects = []
 53 |         for obj_id in obj_ids:
 54 |             harvest_object = harvest_model.HarvestObject.get(obj_id)
 55 |             log.info('ho guid=%s', harvest_object.guid)
 56 |             log.info('ho content=%s', harvest_object.content)
 57 |             self.harvest_objects.append(harvest_object)
 58 | 
 59 |         # this is a list of harvestObjects IDs. One for dataset
 60 |         return obj_ids
 61 | 
 62 |     def run_fetch(self):
 63 |         # fetch stage
 64 |         for harvest_object in self.harvest_objects:
 65 |             log.info('FETCHING %s' % harvest_object.id)
 66 |             result = self.harvester.fetch_stage(harvest_object)
 67 | 
 68 |             log.info('ho errors=%s', harvest_object.errors)
 69 |             log.info('result 1=%s', result)
 70 |             if len(harvest_object.errors) > 0:
 71 |                 raise Exception(harvest_object.errors[0])
 72 | 
 73 |     def run_import(self):
 74 |         # fetch stage
 75 |         datasets = []
 76 |         for harvest_object in self.harvest_objects:
 77 |             log.info('IMPORTING %s' % harvest_object.id)
 78 |             result = self.harvester.import_stage(harvest_object)
 79 | 
 80 |             log.info('ho errors 2=%s', harvest_object.errors)
 81 |             log.info('result 2=%s', result)
 82 |             if len(harvest_object.errors) > 0:
 83 |                 raise Exception(harvest_object.errors[0])
 84 | 
 85 |             log.info('ho pkg id=%s', harvest_object.package_id)
 86 |             dataset = model.Package.get(harvest_object.package_id)
 87 |             datasets.append(dataset)
 88 |             log.info('dataset name=%s', dataset.name)
 89 | 
 90 |         return datasets
 91 | 
 92 |     def get_datasets_from_waf_collection1_sample(self):
 93 |         """ harvest waf-collection1/ folder as waf-collection source """
 94 |         url = f'http://127.0.0.1:{PORT}/waf-collection1/index.html'
 95 | 
 96 |         collection_metadata = f"http://127.0.0.1:{PORT}/waf-collection1/cfg/SeriesCollection_tl_2013_county.shp.iso.xml"
 97 |         config = '{"collection_metadata_url": "%s", "validator_profiles": ["iso19139ngdc"], "private_datasets": false}' %\
 98 |             collection_metadata
 99 |         self.run_gather(url=url, source_config=config)
100 |         self.run_fetch()
101 |         datasets = self.run_import()
102 |         self.job.status = 'Finished'
103 |         self.job.save()
104 | 
105 |         return datasets
106 | 
107 |     def test_waf_collection1_datasets_count(self):
108 |         """ Get datasets from waf-collection1/ folder as waf-collection source
109 |             and test we have one dataset with the expected name """
110 | 
111 |         datasets = self.get_datasets_from_waf_collection1_sample()
112 |         assert len(datasets) == 1
113 |         dataset = datasets[0]
114 |         assert dataset.name == 'tiger-line-shapefile-2013-nation-u-s-current-county-and-equivalent-national-shapefile'
115 | 
116 |     def test_waf_collection1_datasets_as_child(self):
117 |         """ Harvest waf-collection1/ folder as waf-collection source
118 |             and test we get one dataset and this dataset is a "child" (it have a "collection_package_id" extra)
119 |             and is not a "parent" (do not include the collection_metadata extra) """
120 | 
121 |         datasets = self.get_datasets_from_waf_collection1_sample()
122 |         dataset = datasets[0]
123 | 
124 |         extras = json.loads(dataset.extras['extras_rollup'])
125 |         print(f'extras: {extras}')
126 |         keys = [key for key in list(extras.keys())]
127 |         assert 'collection_package_id' in keys
128 |         assert 'collection_metadata' not in keys
129 | 
130 |     def test_waf_collection1_parent_exists(self):
131 |         """ Harvest waf-collection1/ folder as waf-collection source
132 |             and test parent dataset exists (include the collection_metadata=true extra) """
133 | 
134 |         datasets = self.get_datasets_from_waf_collection1_sample()
135 |         dataset = datasets[0]
136 |         extras = json.loads(dataset.extras['extras_rollup'])
137 | 
138 |         parent = call_action('package_show', context={'user': 'dummy'}, id=extras['collection_package_id'])
139 |         parent_keys = [extra['key'] for extra in parent['extras']]
140 |         assert 'collection_metadata' in parent_keys
141 |         assert 'true' == [extra['value'] for extra in parent['extras'] if extra['key'] == 'collection_metadata'][0]
142 | 
143 |     def test_waf_collection1_parent_title(self):
144 |         """ Harvest waf-collection1/ folder as waf-collection source
145 |             and test parent dataset have the expected title and name """
146 | 
147 |         datasets = self.get_datasets_from_waf_collection1_sample()
148 |         dataset = datasets[0]
149 |         extras = json.loads(dataset.extras['extras_rollup'])
150 | 
151 |         parent = call_action('package_show', context={'user': 'dummy'}, id=extras['collection_package_id'])
152 | 
153 |         assert parent['title'] == ('TIGER/Line Shapefile, 2013, '
154 |                                    'Series Information File for the Current county and Equivalent National Shapefile')
155 |         assert parent['name'] == ('tiger-line-shapefile-2013-'
156 |                                   'series-information-file-for-the-current-county-and-equivalent-nationa')
157 | 
158 |     def test_waf_collection_transformation_failed(self):
159 |         url = f'http://127.0.0.1:{PORT}/waf-collection2/index.html'
160 | 
161 |         collection_metadata = f"http://127.0.0.1:{PORT}/waf-collection2/cfg/SeriesCollection_tl_2013_county.shp.iso.xml"
162 |         config = '{"collection_metadata_url": "%s", "validator_profiles": ["iso19139ngdc"], "private_datasets": false}' %\
163 |             collection_metadata
164 |         self.run_gather(url=url, source_config=config)
165 | 
166 |         self.run_fetch()
167 | 
168 |         # we don't manage IS0 19110
169 |         with pytest.raises(Exception) as e:
170 |             self.run_import()
171 |         assert 'Transformation to ISO failed' in str(e.value)
172 | 


--------------------------------------------------------------------------------
/ckanext/geodatagov/validation/xml/fgdc-std-001.2-2001/fgdc-std-001.2-2001-sect09.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!-- edited with XMLSpy v2008 sp1 (http://www.altova.com) by Systems Administrator (NCDDC) -->
  3 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" blockDefault="#all">
  4 | 	<xs:element name="timeinfo" type="timeinfoType">
  5 | 		<xs:annotation>
  6 | 			<xs:documentation>Information about the date and time of an event.</xs:documentation>
  7 | 			<xs:appinfo>
  8 | 				<xs:attribute name="mdname" fixed="Time Period Information"/>
  9 | 				<xs:attribute name="use" fixed="Mandatory"/>
 10 | 				<xs:attribute name="other-validation-rules" fixed="checkOr([sngdate,mdattim,rngdates])"/>
 11 | 			</xs:appinfo>
 12 | 		</xs:annotation>
 13 | 	</xs:element>
 14 | 	<xs:complexType name="timeinfoType">
 15 | 		<xs:choice>
 16 | 			<xs:element ref="sngdate"/>
 17 | 			<xs:element ref="mdattim"/>
 18 | 			<xs:element ref="rngdates"/>
 19 | 		</xs:choice>
 20 | 	</xs:complexType>
 21 | 	<xs:element name="sngdate" type="sngdateType">
 22 | 		<xs:annotation>
 23 | 			<xs:documentation>Means of encoding a single date and time.</xs:documentation>
 24 | 			<xs:appinfo>
 25 | 				<xs:attribute name="mdname" fixed="Single Date/Time"/>
 26 | 				<xs:attribute name="use" fixed="Mandatory"/>
 27 | 			</xs:appinfo>
 28 | 		</xs:annotation>
 29 | 	</xs:element>
 30 | 	<xs:complexType name="sngdateType">
 31 | 		<xs:sequence>
 32 | 			<xs:element ref="caldate"/>
 33 | 			<xs:element ref="time"/>
 34 | 		</xs:sequence>
 35 | 	</xs:complexType>
 36 | 	<xs:element name="caldate" type="caldateType">
 37 | 		<xs:annotation>
 38 | 			<xs:documentation>The year (and optionally month, or month and day).</xs:documentation>
 39 | 			<xs:appinfo>
 40 | 				<xs:attribute name="mdname" fixed="Calendar Date"/>
 41 | 				<xs:attribute name="use" fixed="Mandatory"/>
 42 | 				<xs:attribute name="string-input-format" fixed="YYYY|YYYYMM|YYYYMMDD|bcYYYY|bcYYYYMM|bcYYYYMMDD|ccYYYYY...|cdYYYYY..."/>
 43 | 			</xs:appinfo>
 44 | 		</xs:annotation>
 45 | 	</xs:element>
 46 | 	<xs:simpleType name="caldateType">
 47 | 		<xs:union memberTypes="FGDCdate">
 48 | 			<xs:simpleType>
 49 | 				<xs:restriction base="xs:token">
 50 | 					<xs:enumeration value="Unknown"/>
 51 | 				</xs:restriction>
 52 | 			</xs:simpleType>
 53 | 		</xs:union>
 54 | 	</xs:simpleType>
 55 | 	<xs:element name="time" type="timeType">
 56 | 		<xs:annotation>
 57 | 			<xs:documentation>The hour and minute, and (optionally second) of the day.</xs:documentation>
 58 | 			<xs:appinfo>
 59 | 				<xs:attribute name="mdname" fixed="Time of Day"/>
 60 | 				<xs:attribute name="use" fixed="Mandatory"/>
 61 | 				<xs:attribute name="string-input-format" fixed="HH|HHMM|HHMMSS|HHMMSSSS|HHMMSSSSZ|HHMMSSSSshhmm|Values must follow the 24-hour timekeeping system"/>
 62 | 			</xs:appinfo>
 63 | 		</xs:annotation>
 64 | 	</xs:element>
 65 | 	<xs:simpleType name="timeType">
 66 | 		<xs:union memberTypes="FGDCtime">
 67 | 			<xs:simpleType>
 68 | 				<xs:restriction base="xs:token">
 69 | 					<xs:enumeration value="Unknown"/>
 70 | 					<xs:enumeration value="Local time"/>
 71 | 					<xs:enumeration value="Local time to the minute"/>
 72 | 				</xs:restriction>
 73 | 			</xs:simpleType>
 74 | 		</xs:union>
 75 | 	</xs:simpleType>
 76 | 	<xs:element name="mdattim" type="mdattimType">
 77 | 		<xs:annotation>
 78 | 			<xs:documentation>Means of encoding multiple individual dates and times.</xs:documentation>
 79 | 			<xs:appinfo>
 80 | 				<xs:attribute name="mdname" fixed="Multiple Dates/Times"/>
 81 | 				<xs:attribute name="use" fixed="Mandatory"/>
 82 | 			</xs:appinfo>
 83 | 		</xs:annotation>
 84 | 	</xs:element>
 85 | 	<xs:complexType name="mdattimType">
 86 | 		<xs:sequence>
 87 | 			<xs:element ref="sngdate" minOccurs="2" maxOccurs="unbounded"/>
 88 | 		</xs:sequence>
 89 | 	</xs:complexType>
 90 | 	<xs:element name="rngdates" type="rngdatesType">
 91 | 		<xs:annotation>
 92 | 			<xs:documentation>Means of encoding a range of dates and times.</xs:documentation>
 93 | 			<xs:appinfo>
 94 | 				<xs:attribute name="mdname" fixed="Range of Dates/Times"/>
 95 | 				<xs:attribute name="use" fixed="Mandatory"/>
 96 | 				<xs:attribute name="other-validation-rules" fixed="checkChron([begdate,enddate])"/>
 97 | 			</xs:appinfo>
 98 | 		</xs:annotation>
 99 | 	</xs:element>
100 | 	<xs:complexType name="rngdatesType">
101 | 		<xs:sequence>
102 | 			<xs:element ref="begdate"/>
103 | 			<xs:element ref="begtime"/>
104 | 			<xs:element ref="enddate"/>
105 | 			<xs:element ref="endtime"/>
106 | 		</xs:sequence>
107 | 	</xs:complexType>
108 | 	<xs:element name="begdate" type="begdateType">
109 | 		<xs:annotation>
110 | 			<xs:documentation>The first year (and optionally month, or month and day) of the event.</xs:documentation>
111 | 			<xs:appinfo>
112 | 				<xs:attribute name="mdname" fixed="Beginning Date"/>
113 | 				<xs:attribute name="use" fixed="Mandatory"/>
114 | 				<xs:attribute name="string-input-format" fixed="YYYY|YYYYMM|YYYYMMDD|bcYYYY|bcYYYYMM|bcYYYYMMDD|ccYYYYY...|cdYYYYY...|Beginning Date &lt; Ending Date"/>
115 | 			</xs:appinfo>
116 | 		</xs:annotation>
117 | 	</xs:element>
118 | 	<xs:simpleType name="begdateType">
119 | 		<xs:union memberTypes="FGDCdate">
120 | 			<xs:simpleType>
121 | 				<xs:restriction base="xs:token">
122 | 					<xs:enumeration value="Unknown"/>
123 | 				</xs:restriction>
124 | 			</xs:simpleType>
125 | 		</xs:union>
126 | 	</xs:simpleType>
127 | 	<xs:element name="begtime" type="begtimeType">
128 | 		<xs:annotation>
129 | 			<xs:documentation>The first hour and minute, or (optionally second) of the day for the event.</xs:documentation>
130 | 			<xs:appinfo>
131 | 				<xs:attribute name="mdname" fixed="Beginning Time"/>
132 | 				<xs:attribute name="use" fixed="Mandatory"/>
133 | 				<xs:attribute name="string-input-format" fixed="HH|HHMM|HHMMSS|HHMMSSSS|HHMMSSSSZ|HHMMSSSSshhmm|Values must follow the 24-hour timekeeping system"/>
134 | 			</xs:appinfo>
135 | 		</xs:annotation>
136 | 	</xs:element>
137 | 	<xs:simpleType name="begtimeType">
138 | 		<xs:union memberTypes="FGDCtime">
139 | 			<xs:simpleType>
140 | 				<xs:restriction base="xs:token">
141 | 					<xs:enumeration value="Unknown"/>
142 | 					<xs:enumeration value="Local time"/>
143 | 					<xs:enumeration value="Local time to the minute"/>
144 | 				</xs:restriction>
145 | 			</xs:simpleType>
146 | 		</xs:union>
147 | 	</xs:simpleType>
148 | 	<xs:element name="enddate" type="enddateType">
149 | 		<xs:annotation>
150 | 			<xs:documentation>The last year (and optionally month, or month and day) for the event.</xs:documentation>
151 | 			<xs:appinfo>
152 | 				<xs:attribute name="mdname" fixed="Ending Date"/>
153 | 				<xs:attribute name="use" fixed="Mandatory"/>
154 | 				<xs:attribute name="string-input-format" fixed="YYYY|YYYYMM|YYYYMMDD|bcYYYY|bcYYYYMM|bcYYYYMMDD|ccYYYYY...|cdYYYYY..."/>
155 | 			</xs:appinfo>
156 | 		</xs:annotation>
157 | 	</xs:element>
158 | 	<xs:simpleType name="enddateType">
159 | 		<xs:union memberTypes="FGDCdate">
160 | 			<xs:simpleType>
161 | 				<xs:restriction base="xs:token">
162 | 					<xs:enumeration value="Unknown"/>
163 | 					<xs:enumeration value="Present"/>
164 | 				</xs:restriction>
165 | 			</xs:simpleType>
166 | 		</xs:union>
167 | 	</xs:simpleType>
168 | 	<xs:element name="endtime" type="endtimeType">
169 | 		<xs:annotation>
170 | 			<xs:documentation>The last hour and minute, or (optionally second) of the day for the event.</xs:documentation>
171 | 			<xs:appinfo>
172 | 				<xs:attribute name="mdname" fixed="Ending Time"/>
173 | 				<xs:attribute name="use" fixed="Mandatory"/>
174 | 				<xs:attribute name="string-input-format" fixed="HH|HHMM|HHMMSS|HHMMSSSS|HHMMSSSSZ|HHMMSSSSshhmm|Values must follow the 24-hour timekeeping system"/>
175 | 			</xs:appinfo>
176 | 		</xs:annotation>
177 | 	</xs:element>
178 | 	<xs:simpleType name="endtimeType">
179 | 		<xs:union memberTypes="FGDCtime">
180 | 			<xs:simpleType>
181 | 				<xs:restriction base="xs:token">
182 | 					<xs:enumeration value="Unknown"/>
183 | 					<xs:enumeration value="Local time"/>
184 | 					<xs:enumeration value="Local time to the minute"/>
185 | 				</xs:restriction>
186 | 			</xs:simpleType>
187 | 		</xs:union>
188 | 	</xs:simpleType>
189 | </xs:schema>
190 | 


--------------------------------------------------------------------------------