├── docs ├── source │ ├── _static │ │ ├── .keep │ │ ├── images │ │ │ ├── code.png │ │ │ ├── grid.png │ │ │ ├── line.png │ │ │ ├── logo.png │ │ │ ├── YAMLtab.png │ │ │ ├── favicon.ico │ │ │ ├── gui_add.png │ │ │ ├── gui_plot.png │ │ │ ├── terminal.png │ │ │ ├── gui_remove.png │ │ │ ├── gui_search.png │ │ │ ├── custom_button.png │ │ │ ├── gui_add_local.png │ │ │ ├── gui_builtin.png │ │ │ ├── gui_plot_yaml.png │ │ │ ├── gui_add_remote.png │ │ │ ├── gui_search_cat.png │ │ │ ├── plotting_violin.png │ │ │ ├── gui_search_inputs.png │ │ │ └── logo.svg │ │ └── css │ │ │ └── custom.css │ ├── api.rst │ ├── reference.rst │ ├── start.rst │ ├── guide.rst │ ├── transforms.rst │ ├── api_base.rst │ ├── api_user.rst │ ├── roadmap.rst │ ├── community.rst │ └── api_other.rst ├── requirements.txt ├── environment.yml ├── Makefile ├── README.md └── make.bat ├── intake ├── interface │ ├── catalog │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── test_catalog_search.py │ │ │ ├── test_gui.py │ │ │ ├── test_catalog_add.py │ │ │ └── test_select.py │ │ └── __init__.py │ ├── source │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_gui.py │ │ ├── __init__.py │ │ └── description.py │ ├── icons │ │ ├── logo.png │ │ ├── baseline-check-24px.svg │ │ └── baseline-error-24px.svg │ ├── tests │ │ ├── catalogs │ │ │ ├── catalog1.yaml │ │ │ ├── parent.yaml │ │ │ └── catalog2.yaml │ │ ├── __init__.py │ │ ├── test_init_gui.py │ │ └── test_base.py │ ├── server.py │ ├── conftest.py │ └── __init__.py ├── catalog │ ├── tests │ │ ├── data_source_non_dict.yml │ │ ├── catalog_non_dict.yml │ │ ├── plugins_non_dict.yml │ │ ├── data_source_missing.yml │ │ ├── data_source_name_non_string.yml │ │ ├── data_source_value_non_dict.yml │ │ ├── params_missing_required.yml │ │ ├── plugins_source_missing.yml │ │ ├── plugins_source_non_list.yml │ │ ├── catalog_search │ │ │ ├── example_packages │ │ │ │ ├── ep │ │ │ │ │ └── __init__.py │ │ │ │ └── ep-0.1.dist-info │ │ │ │ │ └── entry_points.txt │ │ │ └── yaml.yml │ │ ├── obsolete_data_source_list.yml │ │ ├── params_non_dict.yml │ │ ├── plugins_source_non_dict.yml │ │ ├── plugins_source_non_string.yml │ │ ├── small.npy │ │ ├── entry1_1.csv │ │ ├── entry1_2.csv │ │ ├── params_value_non_dict.yml │ │ ├── plugins_source_missing_key.yml │ │ ├── obsolete_params_list.yml │ │ ├── params_name_non_string.yml │ │ ├── params_value_bad_type.yml │ │ ├── params_value_bad_choice.yml │ │ ├── catalog.yml │ │ ├── catalog_union_1.yml │ │ ├── conftest.py │ │ ├── multi_plugins2.yaml │ │ ├── catalog_named.yml │ │ ├── __init__.py │ │ ├── test_core.py │ │ ├── catalog_dup_sources.yml │ │ ├── catalog_dup_parameters.yml │ │ ├── example1_source.py │ │ ├── example_plugin_dir │ │ │ └── example2_source.py │ │ ├── catalog_hierarchy.yml │ │ ├── dot-nest.yaml │ │ ├── test_default.py │ │ ├── catalog_union_2.yml │ │ ├── util.py │ │ ├── test_alias.py │ │ ├── catalog_alias.yml │ │ ├── test_discovery.py │ │ ├── test_persist.py │ │ ├── test_gui.py │ │ ├── test_utils.py │ │ ├── multi_plugins.yaml │ │ ├── catalog1.yml │ │ ├── test_auth_integration.py │ │ └── catalog_caching.yml │ ├── __init__.py │ ├── exceptions.py │ ├── default.py │ └── zarr.py ├── source │ ├── tests │ │ ├── plugin_searchpath │ │ │ ├── driver_with_entrypoints │ │ │ │ └── __init__.py │ │ │ ├── driver_with_entrypoints-0.1.dist-info │ │ │ │ └── entry_points.txt │ │ │ ├── collision_foo │ │ │ │ └── __init__.py │ │ │ ├── collision_foo2 │ │ │ │ └── __init__.py │ │ │ ├── intake_foo │ │ │ │ └── __init__.py │ │ │ └── not_intake_foo │ │ │ │ └── __init__.py │ │ ├── sample1.csv │ │ ├── calvert_uk.zip │ │ ├── sample2_1.csv │ │ ├── sample2_2.csv │ │ ├── sample3_2.csv │ │ ├── data.zarr │ │ │ ├── 0 │ │ │ └── .zarray │ │ ├── calvert_uk_filter.tar.gz │ │ ├── sources.yaml │ │ ├── der.yaml │ │ ├── __init__.py │ │ ├── alias.yaml │ │ ├── test_derived.py │ │ ├── util.py │ │ ├── cached.yaml │ │ └── test_npy.py │ ├── decompress.py │ ├── zarr.py │ └── __init__.py ├── cli │ ├── client │ │ ├── tests │ │ │ ├── entry1_1.csv │ │ │ ├── entry1_2.csv │ │ │ ├── __init__.py │ │ │ ├── catalog1.yml │ │ │ ├── test_conf.py │ │ │ └── test_cache.py │ │ ├── __init__.py │ │ ├── subcommands │ │ │ ├── __init__.py │ │ │ ├── exists.py │ │ │ ├── get.py │ │ │ ├── describe.py │ │ │ ├── discover.py │ │ │ ├── list.py │ │ │ ├── precache.py │ │ │ ├── info.py │ │ │ ├── example.py │ │ │ ├── config.py │ │ │ └── cache.py │ │ └── __main__.py │ ├── server │ │ ├── tests │ │ │ ├── entry1_1.csv │ │ │ ├── entry1_2.csv │ │ │ ├── __init__.py │ │ │ ├── catalog1.yml │ │ │ └── test_serializer.py │ │ ├── __init__.py │ │ ├── templates │ │ │ └── index.html │ │ └── __main__.py │ ├── sample │ │ └── us_states.yml │ ├── tests │ │ ├── __init__.py │ │ └── test_util.py │ ├── __init__.py │ ├── bootstrap.py │ └── util.py ├── tests │ ├── catalog1.yml │ ├── catalog2.yml │ ├── __init__.py │ └── test_utils.py ├── auth │ ├── __init__.py │ ├── tests │ │ ├── __init__.py │ │ └── test_auth.py │ ├── secret.py │ └── base.py ├── container │ ├── tests │ │ ├── __init__.py │ │ ├── test_generics.py │ │ └── test_persist.py │ └── semistructured.py ├── compat.py └── util_tests.py ├── .gitattributes ├── .ci-coveragerc ├── readthedocs.yml ├── logo-small.png ├── requirements.txt ├── test_requirements.txt ├── .coveragerc ├── templates ├── data_package │ ├── {{cookiecutter.package_name}} │ │ ├── {{cookiecutter.dataset_name}}.yaml │ │ ├── build.sh │ │ └── meta.yaml │ ├── cookiecutter.json │ └── hooks │ │ └── post_gen_project.py └── README.md ├── appveyor.yml ├── MANIFEST.in ├── scripts └── ci │ ├── environment-py37.yml │ ├── environment-py38.yml │ ├── environment-py39.yml │ └── environment-pip.yml ├── setup.cfg ├── .github └── workflows │ ├── pypipublish.yaml │ └── main.yaml ├── LICENSE ├── .gitignore ├── README.md └── setup.py /docs/source/_static/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /intake/interface/catalog/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /intake/interface/source/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | intake/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.ci-coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = *tests/*, */_version.py 3 | -------------------------------------------------------------------------------- /intake/catalog/tests/data_source_non_dict.yml: -------------------------------------------------------------------------------- 1 | sources: foo 2 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | conda: 2 | file: docs/environment.yml 3 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_non_dict.yml: -------------------------------------------------------------------------------- 1 | - 1 2 | - 2 3 | - 3 4 | -------------------------------------------------------------------------------- /intake/catalog/tests/plugins_non_dict.yml: -------------------------------------------------------------------------------- 1 | plugins: 0 2 | sources: {} 3 | -------------------------------------------------------------------------------- /intake/catalog/tests/data_source_missing.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: [] 3 | -------------------------------------------------------------------------------- /intake/catalog/tests/data_source_name_non_string.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | 1: foo 3 | -------------------------------------------------------------------------------- /intake/catalog/tests/data_source_value_non_dict.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | foo: 1 3 | -------------------------------------------------------------------------------- /logo-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/logo-small.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs 2 | dask 3 | entrypoints 4 | pyyaml 5 | fsspec >=0.7.4 6 | -------------------------------------------------------------------------------- /intake/catalog/tests/params_missing_required.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | a: 3 | description: A 4 | -------------------------------------------------------------------------------- /intake/catalog/tests/plugins_source_missing.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | s0urce: [] 3 | sources: {} 4 | -------------------------------------------------------------------------------- /intake/catalog/tests/plugins_source_non_list.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: module 3 | sources: {} 4 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme 3 | numpydoc 4 | msgpack_numpy 5 | panel 6 | 7 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_search/example_packages/ep/__init__.py: -------------------------------------------------------------------------------- 1 | class TestCatalog: 2 | ... 3 | -------------------------------------------------------------------------------- /intake/catalog/tests/obsolete_data_source_list.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | - name: a 3 | driver: csv 4 | -------------------------------------------------------------------------------- /intake/catalog/tests/params_non_dict.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | a: 3 | driver: csv 4 | parameters: b 5 | -------------------------------------------------------------------------------- /intake/catalog/tests/plugins_source_non_dict.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module 4 | sources: {} 5 | -------------------------------------------------------------------------------- /intake/catalog/tests/plugins_source_non_string.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: 0 4 | sources: {} 5 | -------------------------------------------------------------------------------- /intake/source/tests/plugin_searchpath/driver_with_entrypoints/__init__.py: -------------------------------------------------------------------------------- 1 | class SomeTestDriver: 2 | ... 3 | -------------------------------------------------------------------------------- /intake/catalog/tests/small.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/intake/catalog/tests/small.npy -------------------------------------------------------------------------------- /intake/interface/icons/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/intake/interface/icons/logo.png -------------------------------------------------------------------------------- /intake/source/tests/sample1.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice,100.5,1 3 | Bob,50.3,2 4 | Charlie,25,3 5 | Eve,25,3 6 | -------------------------------------------------------------------------------- /intake/catalog/tests/entry1_1.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice1,100.5,1 3 | Bob1,50.3,2 4 | Charlie1,25,3 5 | Eve1,25,3 6 | -------------------------------------------------------------------------------- /intake/catalog/tests/entry1_2.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice2,100.5,1 3 | Bob2,50.3,2 4 | Charlie2,25,3 5 | Eve2,25,3 6 | -------------------------------------------------------------------------------- /intake/catalog/tests/params_value_non_dict.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | a: 3 | driver: csv 4 | parameters: 5 | b: 1 6 | -------------------------------------------------------------------------------- /intake/catalog/tests/plugins_source_missing_key.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - directory: /tmp 4 | sources: {} 5 | -------------------------------------------------------------------------------- /intake/cli/client/tests/entry1_1.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice1,100.5,1 3 | Bob1,50.3,2 4 | Charlie1,25,3 5 | Eve1,25,3 6 | -------------------------------------------------------------------------------- /intake/cli/client/tests/entry1_2.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice2,100.5,1 3 | Bob2,50.3,2 4 | Charlie2,25,3 5 | Eve2,25,3 6 | -------------------------------------------------------------------------------- /intake/cli/server/tests/entry1_1.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice1,100.5,1 3 | Bob1,50.3,2 4 | Charlie1,25,3 5 | Eve1,25,3 6 | -------------------------------------------------------------------------------- /intake/cli/server/tests/entry1_2.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice2,100.5,1 3 | Bob2,50.3,2 4 | Charlie2,25,3 5 | Eve2,25,3 6 | -------------------------------------------------------------------------------- /intake/source/tests/calvert_uk.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/intake/source/tests/calvert_uk.zip -------------------------------------------------------------------------------- /intake/source/tests/sample2_1.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice1,100.5,1 3 | Bob1,50.3,2 4 | Charlie1,25,3 5 | Eve1,25,3 6 | -------------------------------------------------------------------------------- /intake/source/tests/sample2_2.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice2,100.5,1 3 | Bob2,50.3,2 4 | Charlie2,25,3 5 | Eve2,25,3 6 | -------------------------------------------------------------------------------- /intake/source/tests/sample3_2.csv: -------------------------------------------------------------------------------- 1 | name,score,rank 2 | Alice3,100.5,1 3 | Bob3,50.3,2 4 | Charlie3,25,3 5 | Eve3,25,3 6 | -------------------------------------------------------------------------------- /docs/source/_static/images/code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/code.png -------------------------------------------------------------------------------- /docs/source/_static/images/grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/grid.png -------------------------------------------------------------------------------- /docs/source/_static/images/line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/line.png -------------------------------------------------------------------------------- /docs/source/_static/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/logo.png -------------------------------------------------------------------------------- /intake/catalog/tests/obsolete_params_list.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | a: 3 | driver: csv 4 | parameters: 5 | - name: b 6 | -------------------------------------------------------------------------------- /intake/catalog/tests/params_name_non_string.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | a: 3 | driver: csv 4 | parameters: 5 | 1: {} 6 | -------------------------------------------------------------------------------- /intake/source/tests/data.zarr/0: -------------------------------------------------------------------------------- 1 | 3PP`Ib.& YH -------------------------------------------------------------------------------- /docs/source/_static/images/YAMLtab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/YAMLtab.png -------------------------------------------------------------------------------- /docs/source/_static/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/favicon.ico -------------------------------------------------------------------------------- /docs/source/_static/images/gui_add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_add.png -------------------------------------------------------------------------------- /docs/source/_static/images/gui_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_plot.png -------------------------------------------------------------------------------- /docs/source/_static/images/terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/terminal.png -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_search/example_packages/ep-0.1.dist-info/entry_points.txt: -------------------------------------------------------------------------------- 1 | [intake.catalogs] 2 | ep1 = ep:TestCatalog 3 | -------------------------------------------------------------------------------- /docs/source/_static/images/gui_remove.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_remove.png -------------------------------------------------------------------------------- /docs/source/_static/images/gui_search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_search.png -------------------------------------------------------------------------------- /docs/source/_static/images/custom_button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/custom_button.png -------------------------------------------------------------------------------- /docs/source/_static/images/gui_add_local.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_add_local.png -------------------------------------------------------------------------------- /docs/source/_static/images/gui_builtin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_builtin.png -------------------------------------------------------------------------------- /docs/source/_static/images/gui_plot_yaml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_plot_yaml.png -------------------------------------------------------------------------------- /intake/source/tests/calvert_uk_filter.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/intake/source/tests/calvert_uk_filter.tar.gz -------------------------------------------------------------------------------- /docs/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | div.prompt { 2 | display: none 3 | } 4 | 5 | div.logo-block img { 6 | display: none !important 7 | } 8 | -------------------------------------------------------------------------------- /docs/source/_static/images/gui_add_remote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_add_remote.png -------------------------------------------------------------------------------- /docs/source/_static/images/gui_search_cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_search_cat.png -------------------------------------------------------------------------------- /docs/source/_static/images/plotting_violin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/plotting_violin.png -------------------------------------------------------------------------------- /docs/source/_static/images/gui_search_inputs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/intake/master/docs/source/_static/images/gui_search_inputs.png -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | intake-parquet 2 | zarr 3 | notebook 4 | panel==0.8.0 5 | hvplot==0.5.2 6 | bokeh<2.0.0 7 | fsspec 8 | aiohttp 9 | requests 10 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | */tests/* 4 | */test_*.py 5 | *_version.py 6 | source = 7 | intake 8 | [report] 9 | show_missing = True 10 | -------------------------------------------------------------------------------- /intake/catalog/tests/params_value_bad_type.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | a: 3 | driver: csv 4 | parameters: 5 | b: 6 | description: 1 7 | type: str 8 | -------------------------------------------------------------------------------- /intake/catalog/tests/params_value_bad_choice.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | a: 3 | driver: csv 4 | parameters: 5 | b: 6 | description: B 7 | type: string 8 | -------------------------------------------------------------------------------- /intake/source/tests/plugin_searchpath/driver_with_entrypoints-0.1.dist-info/entry_points.txt: -------------------------------------------------------------------------------- 1 | [intake.drivers] 2 | some_test_driver = driver_with_entrypoints:SomeTestDriver 3 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | Auto-generated reference 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | api_user.rst 10 | api_base.rst 11 | api_other.rst 12 | -------------------------------------------------------------------------------- /docs/source/reference.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | --------- 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 1 7 | 8 | api.rst 9 | making-plugins.rst 10 | auth-plugins.rst 11 | data-packages.rst 12 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake.catalog.tests.example1_source 4 | sources: 5 | use_example1: 6 | description: example1 source plugin 7 | driver: example1 8 | args: {} -------------------------------------------------------------------------------- /intake/interface/icons/baseline-check-24px.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_search/yaml.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake.catalog.tests.example1_source 4 | sources: 5 | use_example1: 6 | description: example1 source plugin 7 | driver: example1 8 | args: {} -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_union_1.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake.catalog.tests.example1_source 4 | sources: 5 | use_example1: 6 | description: example1 source plugin 7 | driver: example1 8 | args: {} 9 | -------------------------------------------------------------------------------- /intake/source/tests/sources.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | zarr1: 3 | driver: ndzarr 4 | args: 5 | urlpath: "{{CATALOG_DIR}}/data.zarr" 6 | sometext: 7 | driver: textfiles 8 | args: 9 | urlpath: "{{CATALOG_DIR}}/*.py" 10 | -------------------------------------------------------------------------------- /intake/catalog/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import pytest 3 | from intake import open_catalog 4 | 5 | 6 | @pytest.fixture 7 | def catalog1(): 8 | path = os.path.dirname(__file__) 9 | return open_catalog(os.path.join(path, 'catalog1.yml')) 10 | -------------------------------------------------------------------------------- /intake/interface/icons/baseline-error-24px.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /intake/interface/tests/catalogs/catalog1.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | crime: 3 | description: Fake flight data - doesn't matter 4 | driver: csv 5 | args: 6 | urlpath: '{{ CATALOG_DIR }}../data/crime.csv' 7 | 8 | fake1: 9 | driver: fake 10 | args: 11 | urlpath: '' 12 | -------------------------------------------------------------------------------- /intake/tests/catalog1.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | ex1: 3 | description: this source doesn't work 4 | driver: csv 5 | args: {} 6 | ex2: 7 | description: this source doesn't work 8 | metadata: 9 | foo: 'bar' 10 | bar: [1, 2, 3] 11 | driver: csv 12 | args: {} 13 | -------------------------------------------------------------------------------- /intake/tests/catalog2.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | ex3: 3 | description: this source doesn't work 4 | driver: csv 5 | args: {} 6 | ex4: 7 | description: this source doesn't work 8 | metadata: 9 | foo: 'bar' 10 | bar: [1, 2, 3] 11 | driver: csv 12 | args: {} 13 | -------------------------------------------------------------------------------- /intake/catalog/tests/multi_plugins2.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | tables6: 3 | args: 4 | urlpath: "{{ CATALOG_DIR }}/files*" 5 | description: "incompatible plugins" 6 | driver: 7 | myplug: 8 | class: csv 9 | myplug2: 10 | class: numpy 11 | metadata: {} 12 | -------------------------------------------------------------------------------- /intake/source/tests/der.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | base: 3 | driver: csv 4 | args: 5 | urlpath: "{{CATALOG_DIR}}/sample1.csv" 6 | cols: 7 | driver: intake.source.derived.Columns 8 | args: 9 | targets: 10 | - base 11 | columns: 12 | - score 13 | - rank 14 | -------------------------------------------------------------------------------- /templates/data_package/{{cookiecutter.package_name}}/{{cookiecutter.dataset_name}}.yaml: -------------------------------------------------------------------------------- 1 | metadata: 2 | version: 1 3 | sources: 4 | mydataset: 5 | description: The Dataset Description 6 | driver: parquet 7 | args: 8 | urlpath: 's3://not_a_real_bucket/star_facts.parq' 9 | storage_options: {'anon': True} 10 | -------------------------------------------------------------------------------- /intake/cli/sample/us_states.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | states: 3 | description: US state information from [CivilServices](https://civil.services/) 4 | driver: csv 5 | args: 6 | urlpath: '{{ CATALOG_DIR }}/states_*.csv' 7 | metadata: 8 | origin_url: 'https://github.com/CivilServiceUSA/us-states/blob/v1.0.0/data/states.csv' 9 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_named.yml: -------------------------------------------------------------------------------- 1 | name: name_in_spec 2 | description: This is a catalog with a description in the yaml 3 | metadata: 4 | some: thing 5 | plugins: 6 | source: 7 | - module: intake.catalog.tests.example1_source 8 | sources: 9 | use_example1: 10 | description: example1 source plugin 11 | driver: example1 12 | args: {} -------------------------------------------------------------------------------- /intake/auth/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /templates/data_package/cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "dataset_name": "starfacts", 3 | "package_name": "data-{{cookiecutter.dataset_name}}", 4 | "description": "Star Facts", 5 | "full_name": "Your Name", 6 | "email": "you@example.com", 7 | "required_intake_plugins": "intake_parquet", 8 | "install_local_data_files": ["no", "yes"] 9 | } 10 | -------------------------------------------------------------------------------- /intake/cli/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/auth/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/catalog/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/cli/client/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/cli/server/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/interface/catalog/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- -------------------------------------------------------------------------------- /intake/interface/source/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- -------------------------------------------------------------------------------- /intake/source/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/cli/client/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/cli/server/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /intake/interface/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /docs/source/start.rst: -------------------------------------------------------------------------------- 1 | .. _start: 2 | 3 | Start here 4 | ---------- 5 | 6 | These documents will familiarise you with Intake, show you some basic usage and examples, 7 | and describe Intake's place in the wider python data world. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | quickstart.rst 13 | use_cases.rst 14 | overview.rst 15 | examples.rst 16 | deployments.rst 17 | -------------------------------------------------------------------------------- /intake/container/tests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #----------------------------------------------------------------------------- 3 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 4 | # All rights reserved. 5 | # 6 | # The full license is in the LICENSE file, distributed with this software. 7 | #----------------------------------------------------------------------------- 8 | -------------------------------------------------------------------------------- /intake/interface/tests/catalogs/parent.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | child1: 3 | args: 4 | path: "{{CATALOG_DIR}}/catalog1.yaml" 5 | driver: intake.catalog.local.YAMLFileCatalog 6 | 7 | child2: 8 | args: 9 | path: "{{CATALOG_DIR}}/catalog2.yaml" 10 | driver: intake.catalog.local.YAMLFileCatalog 11 | 12 | entry1: 13 | driver: blah 14 | args: 15 | urlpath: '' 16 | -------------------------------------------------------------------------------- /intake/catalog/tests/test_core.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from intake.catalog.base import Catalog 3 | 4 | 5 | def test_no_entry(): 6 | cat = Catalog() 7 | cat2 = cat.configure_new() 8 | assert isinstance(cat2, Catalog) 9 | assert cat.auth is None 10 | assert cat2.auth is None 11 | 12 | 13 | def test_regression(): 14 | with pytest.raises(ValueError): 15 | Catalog("URI") 16 | -------------------------------------------------------------------------------- /docs/source/guide.rst: -------------------------------------------------------------------------------- 1 | User Guide 2 | ---------- 3 | 4 | More detailed information about specific parts of Intake, such as how to author catalogs, 5 | how to use the graphical interface, plotting, etc. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | gui.rst 11 | catalog.rst 12 | tools.rst 13 | persisting.rst 14 | plotting.rst 15 | plugin-directory.rst 16 | server.rst 17 | transforms.rst 18 | -------------------------------------------------------------------------------- /intake/source/tests/data.zarr/.zarray: -------------------------------------------------------------------------------- 1 | { 2 | "chunks": [ 3 | 10 4 | ], 5 | "compressor": { 6 | "blocksize": 0, 7 | "clevel": 5, 8 | "cname": "lz4", 9 | "id": "blosc", 10 | "shuffle": 1 11 | }, 12 | "dtype": "=0.5.2 19 | - panel >=0.8.0 20 | - bokeh <2.0.0 21 | - sphinx 22 | - sphinx_rtd_theme 23 | - numpydoc 24 | -------------------------------------------------------------------------------- /templates/data_package/{{cookiecutter.package_name}}/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | INTAKE_CATALOG_DIR=$PREFIX/share/intake/ 3 | mkdir -p INTAKE_CATALOG_DIR 4 | 5 | {% if cookiecutter.install_local_data_files == 'yes' %} 6 | DATA_DIR=$INTAKE_CATALOG_DIR/{{cookiecutter.dataset_name}} 7 | mkdir -p $DATA_DIR 8 | cp -a $RECIPE_DIR/src/ $DATA_DIR/ 9 | {% endif %} 10 | 11 | cp $RECIPE_DIR/{{cookiecutter.dataset_name}}.yaml $INTAKE_CATALOG_DIR 12 | -------------------------------------------------------------------------------- /intake/cli/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os 9 | 10 | if os.getenv("COVERAGE_PROCESS_START", False): 11 | import coverage 12 | coverage.process_startup() 13 | -------------------------------------------------------------------------------- /intake/source/tests/alias.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | csvs: 3 | driver: textfiles 4 | args: 5 | urlpath: '{{ CATALOG_DIR }}/*.csv' 6 | yamls: 7 | driver: textfiles 8 | args: 9 | urlpath: '{{ CATALOG_DIR }}/*.yaml' 10 | alias1: 11 | driver: intake.source.derived.AliasSource 12 | args: 13 | mapping: 14 | first: csvs 15 | second: yamls 16 | target: first 17 | alias2: 18 | driver: intake.source.derived.AliasSource 19 | args: 20 | target: csvs 21 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # Based on bokeh appveyor set up 2 | build: false 3 | 4 | platform: 5 | - x64 6 | 7 | environment: 8 | matrix: 9 | - MINICONDA: C:\Miniconda36-x64 10 | 11 | skip_branch_with_pr: true 12 | clone_depth: 5 13 | skip_tags: true 14 | 15 | init: 16 | - cmd: set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%MINICONDA%\\Library\\bin;%PATH% 17 | - cmd: echo %path% 18 | 19 | install: 20 | - powershell .\\scripts\\ci\\appveyor\\install.ps1 21 | 22 | build_script: 23 | - powershell .\\scripts\\ci\\appveyor\\build.ps1 24 | 25 | test_script: 26 | - py.test 27 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_dup_sources.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | entry1_part: 3 | description: entry1 part 4 | parameters: 5 | part: 6 | description: a 7 | type: str 8 | driver: csv 9 | args: # passed to the open() method 10 | urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv' 11 | entry1_part: 12 | description: entry1 part 13 | parameters: 14 | part: 15 | description: a 16 | type: str 17 | driver: csv 18 | args: # passed to the open() method 19 | urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv' 20 | -------------------------------------------------------------------------------- /intake/interface/server.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | """ 8 | The simplest possible panel server. To launch a panel server containing the intake gui 9 | run: 10 | 11 | panel serve intake/gui/server.py 12 | 13 | """ 14 | 15 | import intake 16 | intake.interface.servable() 17 | -------------------------------------------------------------------------------- /intake/source/tests/plugin_searchpath/collision_foo/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from intake.source.base import DataSource 9 | 10 | 11 | class FooPlugin(DataSource): 12 | name = 'foo' 13 | version = '0.1' 14 | container = 'dataframe' 15 | partition_access = False 16 | -------------------------------------------------------------------------------- /intake/source/tests/plugin_searchpath/collision_foo2/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from intake.source.base import DataSource 9 | 10 | 11 | class FooPlugin(DataSource): 12 | name = 'foo' 13 | version = '0.1' 14 | container = 'dataframe' 15 | partition_access = False 16 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_dup_parameters.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | entry1_part: 3 | description: entry1 part 4 | parameters: 5 | part: 6 | description: a 7 | type: str 8 | part: 9 | description: b 10 | type: int 11 | driver: csv 12 | args: # passed to the open() method 13 | urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv' 14 | entry2_part: 15 | description: entry2 part 16 | parameters: 17 | part: 18 | description: a 19 | type: str 20 | driver: csv 21 | args: # passed to the open() method 22 | urlpath: '{{ CATALOG_DIR }}/entry2_{{ part }}.csv' 23 | -------------------------------------------------------------------------------- /templates/README.md: -------------------------------------------------------------------------------- 1 | # Cookiecutter Templates 2 | 3 | This directory contains 4 | [Cookiecutter](https://cookiecutter.readthedocs.io/en/latest/) templates for 5 | making new Intake plugins and data packages. 6 | 7 | To use these templates, install cookiecutter: 8 | ``` 9 | conda install -c defaults -c conda-forge cookiecutter 10 | ``` 11 | or 12 | ``` 13 | pip install cookiecutter 14 | ``` 15 | 16 | For a new plugin: 17 | ``` 18 | cookiecutter gh:intake/intake/templates/plugin 19 | ``` 20 | 21 | And for a new conda data package: 22 | ``` 23 | cookiecutter gh:intake/intake/templates/data_package 24 | ``` 25 | 26 | The template will prompt for parameters. 27 | -------------------------------------------------------------------------------- /intake/source/tests/plugin_searchpath/intake_foo/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from intake.source.base import DataSource 9 | 10 | 11 | class FooPlugin(DataSource): 12 | name = 'foo' 13 | version = '0.1' 14 | container = 'dataframe' 15 | partition_access = False 16 | 17 | def __init__(self, **kwargs): 18 | pass 19 | -------------------------------------------------------------------------------- /templates/data_package/{{cookiecutter.package_name}}/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | version: '1.0' # update version number if data contents change 3 | name: {{cookiecutter.package_name}} 4 | 5 | build: 6 | number: 0 # update build number for minor catalog fixes 7 | noarch: generic 8 | 9 | requirements: 10 | run: 11 | - intake 12 | {%- for plugin_name in cookiecutter.required_intake_plugins.split(',') %} 13 | - {{plugin_name}} 14 | {%- endfor %} 15 | # Add additional plugins here 16 | build: [] 17 | 18 | about: 19 | description: {{cookiecutter.description}} 20 | 21 | extra: 22 | maintainers: 23 | - {{cookiecutter.full_name}} <{{cookiecutter.email}}> -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include intake *.html 2 | recursive-include intake *.csv 3 | recursive-include intake *.npy 4 | recursive-include intake *.yml 5 | recursive-include intake *.yaml 6 | recursive-include intake *.zip 7 | recursive-include intake *.png 8 | recursive-include intake/source/tests/plugin_searchpath *.py 9 | recursive-include intake/catalog/tests/catalog_search * 10 | recursive-include docs/source * 11 | include docs/Makefile docs/make.bat 12 | 13 | recursive-include templates * 14 | 15 | include versioneer.py 16 | include intake/_version.py 17 | include intake/catalog/tests/example_plugin_dir/example2_source.py 18 | include requirements.txt 19 | include LICENSE 20 | -------------------------------------------------------------------------------- /intake/source/tests/plugin_searchpath/not_intake_foo/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from intake.source.base import DataSource 9 | 10 | 11 | class FooPlugin(DataSource): 12 | name = 'otherfoo' 13 | version = '0.1' 14 | container = 'dataframe' 15 | partition_access = False 16 | 17 | def __init__(self, **kwargs): 18 | pass 19 | -------------------------------------------------------------------------------- /intake/catalog/tests/example1_source.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from intake.source.base import DataSource 9 | 10 | 11 | class ExampleSource(DataSource): 12 | name = 'example1' 13 | version = '0.1' 14 | container = 'dataframe' 15 | partition_access = True 16 | 17 | def __init__(self, **kwargs): 18 | super(ExampleSource, self).__init__() 19 | -------------------------------------------------------------------------------- /intake/source/tests/test_derived.py: -------------------------------------------------------------------------------- 1 | import os 2 | import intake 3 | 4 | catfile = os.path.join(os.path.dirname(__file__), "..", "..", 5 | "catalog", "tests", "catalog_alias.yml") 6 | 7 | 8 | def test_columns(): 9 | cat = intake.open_catalog(catfile) 10 | df1 = cat.input_data.read() 11 | df2 = cat.derive_cols.read() 12 | assert df1[["state", "slug"]].equals(df2) 13 | 14 | 15 | def _pick_columns(df, columns): 16 | return df[columns] 17 | 18 | 19 | def test_df_transform(): 20 | cat = intake.open_catalog(catfile) 21 | df1 = cat.input_data.read() 22 | df2 = cat.derive_cols_func.read() 23 | assert df1[["state", "slug"]].equals(df2) 24 | -------------------------------------------------------------------------------- /intake/catalog/tests/example_plugin_dir/example2_source.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from intake.source.base import DataSource 9 | 10 | 11 | class Ex2Plugin(DataSource): 12 | name = 'example2' 13 | version = '0.1' 14 | container = 'dataframe' 15 | partition_access = True 16 | 17 | def __init__(self): 18 | super(Ex2Plugin, self).__init__() 19 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = intake 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /scripts/ci/environment-py37.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.7 7 | - aiohttp 8 | - flask 9 | - appdirs 10 | - dask 11 | - jinja2 12 | - numpy 13 | - pyyaml 14 | - requests 15 | - msgpack-numpy 16 | - pytest-cov 17 | - coveralls 18 | - pytest 19 | - fsspec 20 | - intake-parquet 21 | - zarr 22 | - notebook 23 | - panel 24 | - hvplot 25 | - bokeh 26 | - dask 27 | - h5netcdf 28 | - intake 29 | - netcdf4 30 | - pip 31 | - pydap 32 | - pytest 33 | - rasterio 34 | - s3fs 35 | - scikit-image 36 | - xarray 37 | - zarr 38 | - moto 39 | - pip: 40 | - rangehttpserver 41 | -------------------------------------------------------------------------------- /scripts/ci/environment-py38.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.8 7 | - aiohttp 8 | - flask 9 | - appdirs 10 | - dask 11 | - jinja2 12 | - numpy 13 | - pyyaml 14 | - requests 15 | - msgpack-numpy 16 | - pytest-cov 17 | - coveralls 18 | - pytest 19 | - fsspec 20 | - intake-parquet 21 | - zarr 22 | - notebook 23 | - panel 24 | - hvplot 25 | - bokeh 26 | - dask 27 | - h5netcdf 28 | - intake 29 | - netcdf4 30 | - pip 31 | - pydap 32 | - pytest 33 | - rasterio 34 | - s3fs 35 | - scikit-image 36 | - xarray 37 | - zarr 38 | - moto 39 | - pip: 40 | - rangehttpserver 41 | -------------------------------------------------------------------------------- /scripts/ci/environment-py39.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.9 7 | - aiohttp 8 | - flask 9 | - appdirs 10 | - dask 11 | - jinja2 12 | - numpy 13 | - pyyaml 14 | - requests 15 | - msgpack-numpy 16 | - pytest-cov 17 | - coveralls 18 | - pytest 19 | - fsspec 20 | - intake-parquet 21 | - zarr 22 | - notebook 23 | - panel 24 | - hvplot 25 | - bokeh 26 | - dask 27 | - h5netcdf 28 | - intake 29 | - netcdf4 30 | - pip 31 | - pydap 32 | - pytest 33 | - rasterio 34 | - s3fs 35 | - scikit-image 36 | - xarray 37 | - zarr 38 | - moto 39 | - pip: 40 | - rangehttpserver 41 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_hierarchy.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | a.b.c: 3 | description: abc 4 | driver: csv 5 | args: 6 | urlpath: '{{ CATALOG_DIR }}/entry1_*.csv' 7 | a.b.d: 8 | description: abc 9 | driver: csv 10 | args: 11 | urlpath: '{{ CATALOG_DIR }}/entry1_*.csv' 12 | c: 13 | description: abc 14 | driver: csv 15 | args: 16 | urlpath: '{{ CATALOG_DIR }}/entry1_*.csv' 17 | a.c: 18 | description: abc 19 | driver: csv 20 | parameters: 21 | part: 22 | description: part of filename 23 | type: str 24 | default: "1" 25 | allowed: ["1", "2"] 26 | driver: csv 27 | args: 28 | urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv' 29 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # References: 3 | # http://flake8.readthedocs.org/en/latest/config.html 4 | # http://flake8.readthedocs.org/en/latest/warnings.html#error-codes 5 | # 6 | # Style checks turned on: 7 | # F - all pyflake errors 8 | # E101 - indentation contains mixed spaces and tabs 9 | # E111 - indentation is not a multiple of four 10 | # E501 - line too long (see max-line-length) 11 | 12 | # Note: there cannot be spaces after comma's here 13 | exclude = __init__.py 14 | ignore = E,W 15 | select = F,E101,E111,E501 16 | max-line-length = 165 17 | 18 | [versioneer] 19 | VCS = git 20 | style = pep440 21 | versionfile_source = intake/_version.py 22 | versionfile_build = intake/_version.py 23 | tag_prefix = 24 | parentdir_prefix = intake- 25 | -------------------------------------------------------------------------------- /intake/catalog/tests/dot-nest.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | self: 3 | description: this cat 4 | driver: yaml_file_cat 5 | args: 6 | path: "{{CATALOG_DIR}}/dot-nest.yaml" 7 | selfdot.dot: 8 | description: this cat 9 | driver: yaml_file_cat 10 | args: 11 | path: "{{CATALOG_DIR}}/dot-nest.yaml" 12 | self.dot: 13 | description: this cat 14 | driver: yaml_file_cat 15 | args: 16 | path: "{{CATALOG_DIR}}/dot-nest.yaml" 17 | leaf: 18 | description: leaf 19 | driver: csv 20 | args: 21 | urlpath: "" 22 | leafdot.dot: 23 | description: leaf-dot 24 | driver: csv 25 | args: 26 | urlpath: "" 27 | leaf.dot: 28 | description: leaf-dot 29 | driver: csv 30 | args: 31 | urlpath: "" 32 | -------------------------------------------------------------------------------- /.github/workflows/pypipublish.yaml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: "3.x" 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools setuptools-scm wheel twine 20 | - name: Build and publish 21 | env: 22 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 23 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 24 | run: | 25 | python setup.py sdist bdist_wheel 26 | twine upload dist/* 27 | -------------------------------------------------------------------------------- /scripts/ci/environment-pip.yml: -------------------------------------------------------------------------------- 1 | name: test_env 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.8 7 | - pip 8 | - pip: 9 | - rangehttpserver 10 | - aiohttp 11 | - flask 12 | - appdirs 13 | - dask 14 | - jinja2 15 | - numpy 16 | - pyyaml 17 | - requests 18 | - msgpack-numpy 19 | - pytest-cov 20 | - coveralls 21 | - pytest 22 | - fsspec 23 | - intake-parquet 24 | - zarr 25 | - notebook 26 | - panel 27 | - hvplot 28 | - bokeh 29 | - dask 30 | - h5netcdf 31 | - intake 32 | - netcdf4 33 | - pip 34 | - pydap 35 | - pytest 36 | - rasterio 37 | - s3fs 38 | - scikit-image 39 | - xarray 40 | - zarr 41 | - moto 42 | -------------------------------------------------------------------------------- /intake/catalog/tests/test_default.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from pathlib import Path 9 | import sys 10 | from intake.catalog import default 11 | from intake.catalog.base import Catalog 12 | 13 | 14 | def test_which(): 15 | p = default.which('python') 16 | assert Path(p).resolve() == Path(sys.executable).resolve() 17 | 18 | 19 | def test_load(): 20 | cat = default.load_user_catalog() 21 | assert isinstance(cat, Catalog) 22 | cat = default.load_global_catalog() 23 | assert isinstance(cat, Catalog) 24 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from .cache import Cache 9 | from .config import Config 10 | from .describe import Describe 11 | from .discover import Discover 12 | from .example import Example 13 | from .exists import Exists 14 | from .get import Get 15 | from .info import Info 16 | from .list import List 17 | from .precache import Precache 18 | from .drivers import Drivers 19 | 20 | all = (Cache, Config, Describe, Discover, Example, Exists, Get, Info, List, 21 | Precache, Drivers) 22 | -------------------------------------------------------------------------------- /templates/data_package/hooks/post_gen_project.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from __future__ import print_function 9 | 10 | import os 11 | 12 | install_local_data_files = "{{cookiecutter.install_local_data_files}}" == "yes" 13 | 14 | print("Don't forget to edit {{cookiecutter.package_name}}/{{cookiecutter.dataset_name}}.yaml to add your data sources!") 15 | 16 | if install_local_data_files: 17 | os.mkdir('src') 18 | print("Put your data files in the {{cookiecutter.package_name}}/src/ directory to be included in the package.") 19 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Building Documentation 2 | 3 | An environment with several prerequisites is needed to build the 4 | documentation. Create this with: 5 | 6 | ## First option for environment 7 | 8 | ```bash 9 | conda create -n intake python=3.6 pandas dask python-snappy appdirs -c conda-forge -y 10 | conda activate intake 11 | ``` 12 | 13 | Additional pip packages are listed in `./requirements.txt` are required to 14 | build the docs: 15 | 16 | ```bash 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | ## Second option for environment 21 | 22 | A conda environment with pip packages included is in `environment.yml` of the current directory, and you may create it with: 23 | 24 | ```bash 25 | conda env create 26 | conda activate intake 27 | ``` 28 | 29 | ## Build docs 30 | 31 | To make HTML documentation: 32 | 33 | ```bash 34 | make html 35 | ``` 36 | 37 | Outputs to `build/html/index.html` 38 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_union_2.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake.catalog.tests.example1_source 4 | - module: intake.catalog.tests.example2_source 5 | sources: 6 | entry1: 7 | description: entry1 full 8 | metadata: 9 | foo: 'bar' 10 | bar: [1, 2, 3] 11 | driver: csv 12 | # Default direct_access is "forbid" by default 13 | args: # passed to the open() method 14 | urlpath: '{{ CATALOG_DIR }}/entry1_*.csv' 15 | entry1_part: 16 | description: entry1 part 17 | parameters: # User defined parameters 18 | part: 19 | description: part of filename 20 | type: str 21 | default: "1" 22 | allowed: ["1", "2"] 23 | metadata: 24 | foo: 'baz' 25 | bar: [2, 4, 6] 26 | driver: csv 27 | direct_access: "allow" 28 | args: # passed to the open() method 29 | urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv' 30 | -------------------------------------------------------------------------------- /intake/interface/tests/catalogs/catalog2.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | us_crime: 3 | description: US Crime data [UCRDataTool](https://www.ucrdatatool.gov/Search/Crime/State/StatebyState.cfm) 4 | driver: csv 5 | args: 6 | urlpath: '{{ CATALOG_DIR }}../data/crime{{selector}}.csv' 7 | parameters: 8 | selector: 9 | type: str 10 | description: "none" 11 | allowed: ["2", ""] 12 | default: "" 13 | metadata: 14 | plots: 15 | line_example: 16 | kind: line 17 | y: ['Robbery', 'Burglary'] 18 | x: 'Year' 19 | violin_example: 20 | kind: violin 21 | y: ['Burglary rate', 'Larceny-theft rate', 22 | 'Robbery rate', 'Violent Crime rate'] 23 | group_label: 'Type of crime' 24 | value_label: 'Rate per 100k' 25 | invert: True 26 | 27 | fake2: 28 | driver: fake 29 | args: 30 | urlpath: '' 31 | -------------------------------------------------------------------------------- /intake/source/tests/util.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | 9 | def verify_plugin_interface(plugin): 10 | assert isinstance(plugin.version, str) 11 | assert isinstance(plugin.container, str) 12 | assert isinstance(plugin.partition_access, bool) 13 | 14 | 15 | def verify_datasource_interface(source): 16 | for attr in ['container', 'description', 'dtype', 'shape', 17 | 'npartitions', 'metadata']: 18 | assert hasattr(source, attr) 19 | 20 | for method in ['discover', 'read', 'read_chunked', 'read_partition', 21 | 'to_dask', 'close']: 22 | assert hasattr(source, method) 23 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=intake 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: "*" 6 | pull_request: 7 | branches: master 8 | 9 | jobs: 10 | test: 11 | name: ${{ matrix.CONDA_ENV }}-pytest 12 | runs-on: ubuntu-latest 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | CONDA_ENV: [py37, py38, pip] 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v2 20 | 21 | - name: Setup Miniconda 22 | uses: conda-incubator/setup-miniconda@v2 23 | with: 24 | auto-update-conda: true 25 | auto-activate-base: false 26 | activate-environment: test_env 27 | environment-file: scripts/ci/environment-${{ matrix.CONDA_ENV }}.yml 28 | 29 | - name: pip-install 30 | shell: bash -l {0} 31 | run: | 32 | pip install -e . --no-deps 33 | 34 | - name: Run Tests 35 | shell: bash -l {0} 36 | run: | 37 | pytest --verbose --cov=intake 38 | -------------------------------------------------------------------------------- /intake/catalog/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from .base import Catalog 9 | from .local import MergedCatalog, EntrypointsCatalog 10 | from .default import load_combo_catalog 11 | 12 | 13 | def _make_builtin(): 14 | return MergedCatalog( 15 | [EntrypointsCatalog(), load_combo_catalog()], 16 | name='builtin', 17 | description='Generated from data packages found on your intake search path') 18 | 19 | 20 | def __getattr__(name): 21 | """Only make the builtin catalog on request""" 22 | global builtin 23 | if name == "builtin": 24 | builtin = _make_builtin() 25 | return builtin 26 | raise AttributeError(name) 27 | -------------------------------------------------------------------------------- /intake/container/tests/test_generics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #----------------------------------------------------------------------------- 3 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 4 | # All rights reserved. 5 | # 6 | # The full license is in the LICENSE file, distributed with this software. 7 | #----------------------------------------------------------------------------- 8 | 9 | import os 10 | import posixpath 11 | import pytest 12 | from intake.container.dataframe import GenericDataFrame 13 | here = os.path.abspath(os.path.dirname(__file__)) 14 | 15 | 16 | def test_generic_dataframe(): 17 | pd = pytest.importorskip('pandas') 18 | 19 | def make_a_part(openfile): 20 | return pd.DataFrame([[0]], columns=['x']) 21 | 22 | url = posixpath.join(here, '*.py') 23 | s = GenericDataFrame(url, reader=make_a_part) 24 | ddf = s.to_dask() 25 | assert ddf.compute().x.unique() == [0] 26 | df = s.read() 27 | assert len(df) == len(ddf) 28 | 29 | 30 | -------------------------------------------------------------------------------- /intake/cli/client/tests/catalog1.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake.catalog.tests.example1_source 4 | - module: intake.catalog.tests.example_plugin_dir.example2_source 5 | sources: 6 | use_example1: 7 | description: example1 source plugin 8 | driver: example1 9 | args: {} 10 | entry1: 11 | description: entry1 full 12 | metadata: 13 | foo: 'bar' 14 | bar: [1, 2, 3] 15 | driver: csv 16 | # Default direct_access is "forbid" by default 17 | args: # passed to the open() method 18 | urlpath: '{{ CATALOG_DIR }}/entry1_*.csv' 19 | entry1_part: 20 | description: entry1 part 21 | parameters: # User defined parameters 22 | part: 23 | description: part of filename 24 | type: str 25 | default: "1" 26 | allowed: ["1", "2"] 27 | metadata: 28 | foo: 'baz' 29 | bar: [2, 4, 6] 30 | driver: csv 31 | direct_access: "allow" 32 | args: # passed to the open() method 33 | urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv' 34 | -------------------------------------------------------------------------------- /intake/cli/server/tests/catalog1.yml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake.catalog.tests.example1_source 4 | - module: intake.catalog.tests.example_plugin_dir.example2_source 5 | sources: 6 | use_example1: 7 | description: example1 source plugin 8 | driver: example1 9 | args: {} 10 | entry1: 11 | description: entry1 full 12 | metadata: 13 | foo: 'bar' 14 | bar: [1, 2, 3] 15 | driver: csv 16 | # Default direct_access is "forbid" by default 17 | args: # passed to the open() method 18 | urlpath: '{{ CATALOG_DIR }}/entry1_*.csv' 19 | entry1_part: 20 | description: entry1 part 21 | parameters: # User defined parameters 22 | part: 23 | description: part of filename 24 | type: str 25 | default: "1" 26 | allowed: ["1", "2"] 27 | metadata: 28 | foo: 'baz' 29 | bar: [2, 4, 6] 30 | driver: csv 31 | direct_access: "allow" 32 | args: # passed to the open() method 33 | urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv' 34 | -------------------------------------------------------------------------------- /intake/compat.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import msgpack 9 | from .utils import encode_datetime, decode_datetime 10 | 11 | 12 | if msgpack.version >= (0, 5, 2): 13 | unpack_kwargs = {'raw': False} 14 | else: 15 | unpack_kwargs = {'encoding': 'utf-8'} 16 | 17 | unpack_kwargs["object_hook"] = decode_datetime 18 | 19 | 20 | pack_kwargs = dict( 21 | default=encode_datetime, 22 | use_bin_type=True, 23 | ) 24 | 25 | try: 26 | import msgpack_numpy 27 | np_unpack_kwargs = dict( 28 | object_hook=lambda obj: decode_datetime(msgpack_numpy.decode(obj)), 29 | ) 30 | np_pack_kwargs = dict( 31 | default=lambda obj: encode_datetime(msgpack_numpy.encode(obj)), 32 | ) 33 | except ImportError: 34 | pass 35 | 36 | 37 | -------------------------------------------------------------------------------- /docs/source/transforms.rst: -------------------------------------------------------------------------------- 1 | Dataset Transforms 2 | ------------------ 3 | 4 | aka. derived datasets. 5 | 6 | (experimental) 7 | 8 | Intake allows for the definition of data sources which take as their input 9 | another source in the same directory, so that you have the opportunity to 10 | present *processing* to the user of the catalog. 11 | 12 | Example 13 | ~~~~~~~ 14 | 15 | This example is taken from the Intake test suite. 16 | 17 | Text to come, watch this space... 18 | 19 | API 20 | ~~~ 21 | 22 | .. autosummary:: 23 | intake.source.derived.DerivedSource 24 | intake.source.derived.Alias 25 | intake.source.derived.GenericTransform 26 | intake.source.derived.DataFrameTransform 27 | intake.source.derived.Columns 28 | 29 | .. autoclass:: intake.source.derived.DerivedSource 30 | :members: 31 | .. autoclass:: intake.source.derived.Alias 32 | :members: 33 | .. autoclass:: intake.source.derived.GenericTransform 34 | :members: 35 | .. autoclass:: ntake.source.derived.DataFrameTransform 36 | :members: 37 | .. autoclass:: intake.source.derived.Columns 38 | :members: 39 | -------------------------------------------------------------------------------- /intake/catalog/tests/util.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from intake.source import base, registry 9 | 10 | 11 | def assert_items_equal(a, b): 12 | assert len(a) == len(b) and sorted(a) == sorted(b) 13 | 14 | 15 | class TestingSource(base.DataSource): 16 | """A source that gives back whatever parameters were passed to it""" 17 | name = 'test' 18 | version = '0.0.1' 19 | container = 'python' 20 | partition_access = False 21 | 22 | def __init__(self, *args, **kwargs): 23 | self.args = args 24 | self.kwargs = kwargs 25 | super(TestingSource, self).__init__('python') 26 | self.npartitions = 1 27 | 28 | def _load_metadata(self): 29 | pass 30 | 31 | def _get_partition(self, _): 32 | return self.args, self.kwargs 33 | 34 | 35 | def register(): 36 | registry['test'] = TestingSource 37 | -------------------------------------------------------------------------------- /intake/catalog/tests/test_alias.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import intake 9 | import os 10 | 11 | here = os.path.abspath(os.path.dirname(__file__)) 12 | fn = os.path.join(here, 'catalog_alias.yml') 13 | 14 | 15 | def test_simple(): 16 | cat = intake.open_catalog(fn) 17 | s = cat.alias0() 18 | assert s.container == 'other' 19 | out = str(s.discover()) 20 | assert s.container == 'dataframe' 21 | assert "state" in out 22 | 23 | 24 | def test_mapping(): 25 | cat = intake.open_catalog(fn) 26 | s = cat.alias1() 27 | assert s.container == 'other' 28 | out = str(s.discover()) 29 | assert s.container == 'dataframe' 30 | assert "state" in out 31 | 32 | s = cat.alias1(choice='second') 33 | assert s.container == 'other' 34 | out = str(s.discover()) 35 | assert s.container == 'ndarray' 36 | assert "int64" in out 37 | -------------------------------------------------------------------------------- /docs/source/_static/images/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_alias.yml: -------------------------------------------------------------------------------- 1 | sources: 2 | input_data: 3 | description: a local data file 4 | driver: csv 5 | args: 6 | urlpath: '{{ CATALOG_DIR }}cache_data/states.csv' 7 | arr_cache: 8 | description: small array 9 | driver: numpy 10 | args: 11 | path: "{{ CATALOG_DIR }}/small.npy" 12 | chunks: 5 13 | alias0: 14 | driver: intake.source.derived.AliasSource 15 | args: 16 | target: input_data 17 | alias1: 18 | driver: alias 19 | args: 20 | target: "{{choice}}" 21 | mapping: 22 | first: input_data 23 | second: arr_cache 24 | parameters: 25 | choice: 26 | description: which to alias 27 | type: str 28 | default: first 29 | allowed: ["first", "second"] 30 | derive_cols: 31 | driver: intake.source.derived.Columns 32 | args: 33 | targets: 34 | - input_data 35 | columns: ["state", "slug"] 36 | derive_cols_func: 37 | driver: intake.source.derived.DataFrameTransform 38 | args: 39 | targets: 40 | - input_data 41 | transform: "intake.source.tests.test_derived._pick_columns" 42 | transform_kwargs: 43 | columns: ["state", "slug"] 44 | 45 | -------------------------------------------------------------------------------- /intake/cli/client/__main__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #---------------------------------------------------------------------------- 7 | 8 | 9 | #----------------------------------------------------------------------------- 10 | # Imports 11 | #----------------------------------------------------------------------------- 12 | 13 | # Standard library imports 14 | import sys 15 | 16 | # External imports 17 | 18 | # Intake imports 19 | from . import subcommands 20 | import logging 21 | log = logging.getLogger('intake') 22 | 23 | #----------------------------------------------------------------------------- 24 | # API 25 | #----------------------------------------------------------------------------- 26 | 27 | 28 | def main(argv=None): 29 | """ Execute the "intake" command line program. 30 | 31 | """ 32 | from intake.cli.bootstrap import main as _main 33 | 34 | return _main('Intake Catalog CLI', subcommands.all, argv or sys.argv) 35 | 36 | 37 | if __name__ == "__main__": 38 | sys.exit(main(sys.argv)) 39 | -------------------------------------------------------------------------------- /intake/source/tests/cached.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | calvert: 3 | driver: csv 4 | args: 5 | urlpath: '{{ CATALOG_DIR }}/calvert_uk.zip' 6 | cache: 7 | - type: compressed 8 | argkey: urlpath 9 | calvert_infer: 10 | driver: csv 11 | args: 12 | urlpath: '{{ CATALOG_DIR }}/calvert_uk.zip' 13 | cache: 14 | - type: compressed 15 | argkey: urlpath 16 | decomp: infer 17 | calvert_badkey: 18 | driver: csv 19 | args: 20 | urlpath: '{{ CATALOG_DIR }}/calvert_uk.zip' 21 | cache: 22 | - type: compressed 23 | argkey: urlpath 24 | decomp: unknown 25 | calvert_filter: 26 | driver: csv 27 | args: 28 | urlpath: '{{ CATALOG_DIR }}/calvert_uk_filter.tar.gz' 29 | cache: 30 | - type: compressed 31 | argkey: urlpath 32 | regex_filter: '.*calvert_uk_research2017_nodes.csv' 33 | dirs: 34 | driver: textfiles 35 | args: 36 | urlpath: '{{ CATALOG_DIR }}/main' 37 | cache: 38 | - type: dir 39 | argkey: urlpath 40 | depth: 2 41 | dat_data: 42 | driver: textfiles 43 | args: 44 | urlpath: 'dat://66ef52101a2543e1721c901e84d2dd7a758c94283b8501d34a691abefe3fb3d6/*.json' 45 | decoder: json.loads 46 | cache: 47 | - type: dat 48 | -------------------------------------------------------------------------------- /intake/catalog/tests/test_discovery.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import glob 3 | import os 4 | import pytest 5 | import sys 6 | 7 | from ..local import YAMLFilesCatalog, MergedCatalog, EntrypointsCatalog 8 | 9 | 10 | def test_catalog_discovery(): 11 | basedir = os.path.dirname(__file__) 12 | yaml_glob = os.path.join(basedir, 'catalog_search', '*.yml') 13 | example_packages = os.path.join(basedir, 'catalog_search', 'example_packages') 14 | 15 | test_catalog = MergedCatalog([EntrypointsCatalog(paths=[example_packages]), 16 | YAMLFilesCatalog(path=[yaml_glob])]) 17 | 18 | assert 'use_example1' in test_catalog 19 | assert 'ep1' in test_catalog 20 | 21 | 22 | def test_deferred_import(): 23 | "See https://github.com/intake/intake/pull/541" 24 | # We are going to mess with sys.modules here, so to be safe let's put it 25 | # back the way it was at the end. 26 | import intake.catalog 27 | intake.catalog.builtin = None 28 | mods = sys.modules.copy() 29 | try: 30 | sys.modules.pop("intake") 31 | sys.modules.pop("intake.catalog") 32 | intake.catalog.__dict__.pop('builtin') 33 | assert 'builtin' not in intake.catalog.__dict__ 34 | assert intake.cat is not None 35 | finally: 36 | sys.modules.update(mods) 37 | -------------------------------------------------------------------------------- /intake/interface/tests/test_init_gui.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import pytest 8 | 9 | 10 | def panel_importable(): 11 | try: 12 | import panel as pn 13 | return True 14 | except: 15 | return False 16 | 17 | 18 | @pytest.mark.skipif(panel_importable(), reason="panel is importable, so skip") 19 | def test_no_panel_does_not_raise_errors(cat1_url): 20 | import intake 21 | cat = intake.open_catalog(cat1_url) 22 | assert cat.name == 'catalog1' 23 | 24 | 25 | @pytest.mark.skipif(panel_importable(), reason="panel is importable, so skip") 26 | def test_no_panel_display_init_gui(): 27 | import intake 28 | with pytest.raises(RuntimeError, match=('Please install panel to use the GUI ' 29 | '`conda install -c conda-forge panel')): 30 | repr(intake.gui) 31 | 32 | 33 | def test_display_init_gui(): 34 | pytest.importorskip('panel') 35 | import intake 36 | assert repr(intake.gui).startswith('Column') 37 | -------------------------------------------------------------------------------- /intake/interface/tests/test_base.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import pytest 9 | 10 | pn = pytest.importorskip('panel') 11 | 12 | from ..base import Base 13 | 14 | 15 | class BaseClass(Base): 16 | def __init__(self, **kwargs): 17 | self.panel = pn.Row() 18 | super().__init__(**kwargs) 19 | 20 | def setup(self): 21 | self.children = ['fake content'] 22 | 23 | 24 | def test_base_with_panel_gets_populated_when_visible_is_set_to_true(): 25 | base = BaseClass(visible=True) 26 | assert base.children == ['fake content'] 27 | assert len(base.panel.objects) == 1 28 | 29 | base.visible = False 30 | assert len(base.panel.objects) == 0 31 | assert base.children == ['fake content'] 32 | 33 | 34 | def test_base_with_panel_gets_populated_when_visible_is_changed_to_true(): 35 | base = BaseClass(visible=False) 36 | assert len(base.panel.objects) == 0 37 | 38 | base.visible = True 39 | assert base.children == ['fake content'] 40 | assert len(base.panel.objects) == 1 41 | 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Anaconda, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /intake/catalog/tests/test_persist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #----------------------------------------------------------------------------- 3 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 4 | # All rights reserved. 5 | # 6 | # The full license is in the LICENSE file, distributed with this software. 7 | #----------------------------------------------------------------------------- 8 | 9 | import os 10 | import os.path 11 | import pytest 12 | 13 | import intake 14 | 15 | 16 | path = os.path.dirname(__file__) 17 | 18 | 19 | def test_idempotent(temp_cache): 20 | pytest.importorskip('zarr') 21 | cat = intake.open_catalog(os.path.abspath( 22 | os.path.join(path, '..', '..', 'source', 'tests', 'sources.yaml'))) 23 | s = cat.zarr1() 24 | assert not s.has_been_persisted 25 | s2 = s.persist() 26 | assert s.has_been_persisted 27 | assert not s.is_persisted 28 | assert not s2.has_been_persisted 29 | assert s2.is_persisted 30 | s3 = s.persist() 31 | assert s3 == s2 32 | 33 | assert s.get_persisted() == cat.zarr1() 34 | with pytest.raises(ValueError): 35 | s2.persist() 36 | 37 | 38 | def test_parquet(temp_cache): 39 | inp = pytest.importorskip('intake_parquet') 40 | cat = intake.open_catalog(os.path.abspath( 41 | os.path.join(path, 'catalog1.yml'))) 42 | s = cat.entry1() 43 | s2 = s.persist() 44 | assert isinstance(s2, inp.ParquetSource) 45 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/exists.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | 20 | # External imports 21 | 22 | # Intake imports 23 | from intake import open_catalog 24 | from intake.cli.util import Subcommand 25 | 26 | #----------------------------------------------------------------------------- 27 | # API 28 | #----------------------------------------------------------------------------- 29 | 30 | class Exists(Subcommand): 31 | ''' Check for the existence of a catalog entry 32 | 33 | ''' 34 | 35 | name = "exists" 36 | 37 | def initialize(self): 38 | self.parser.add_argument('uri', metavar='URI', type=str, help='Catalog URI') 39 | self.parser.add_argument('name', metavar='NAME', type=str, help='Catalog name') 40 | 41 | def invoke(self, args): 42 | catalog = open_catalog(args.uri) 43 | print(args.name in catalog) 44 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/get.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | 20 | # External imports 21 | 22 | # Intake imports 23 | from intake import open_catalog 24 | from intake.cli.util import Subcommand 25 | 26 | #----------------------------------------------------------------------------- 27 | # API 28 | #----------------------------------------------------------------------------- 29 | 30 | class Get(Subcommand): 31 | ''' Get a catalog entry 32 | 33 | ''' 34 | 35 | name = "get" 36 | 37 | def initialize(self): 38 | self.parser.add_argument('uri', metavar='URI', type=str, help='Catalog URI') 39 | self.parser.add_argument('name', metavar='NAME', type=str, help='Catalog name') 40 | 41 | def invoke(self, args): 42 | catalog = open_catalog(args.uri) 43 | with catalog[args.name] as f: 44 | print(f.read()) 45 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/describe.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | 20 | # External imports 21 | 22 | # Intake imports 23 | from intake import open_catalog 24 | from intake.cli.util import print_entry_info, Subcommand 25 | 26 | #----------------------------------------------------------------------------- 27 | # API 28 | #----------------------------------------------------------------------------- 29 | 30 | class Describe(Subcommand): 31 | ''' Describe a catalog entry. 32 | 33 | ''' 34 | 35 | name = "describe" 36 | 37 | def initialize(self): 38 | self.parser.add_argument('uri', metavar='URI', type=str, help='Catalog URI') 39 | self.parser.add_argument('name', metavar='NAME', type=str, help='Catalog name') 40 | 41 | def invoke(self, args): 42 | catalog = open_catalog(args.uri) 43 | print_entry_info(catalog, args.name) 44 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/discover.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | 20 | # External imports 21 | 22 | # Intake imports 23 | from intake import open_catalog 24 | from intake.cli.util import Subcommand 25 | 26 | #----------------------------------------------------------------------------- 27 | # API 28 | #----------------------------------------------------------------------------- 29 | 30 | class Discover(Subcommand): 31 | ''' Discover a catalog entry 32 | 33 | ''' 34 | 35 | name = "discover" 36 | 37 | def initialize(self): 38 | self.parser.add_argument('uri', metavar='URI', type=str, help='Catalog URI') 39 | self.parser.add_argument('name', metavar='NAME', type=str, help='Catalog name') 40 | 41 | def invoke(self, args): 42 | catalog = open_catalog(args.uri) 43 | with catalog[args.name] as f: 44 | print(f.discover()) 45 | -------------------------------------------------------------------------------- /intake/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import pytest 8 | import os 9 | from intake.utils import make_path_posix, no_duplicate_yaml 10 | import yaml 11 | 12 | 13 | def test_windows_file_path(): 14 | path = 'C:\\Users\\user\\fake.file' 15 | actual = make_path_posix(path) 16 | expected = 'C:/Users/user/fake.file' 17 | assert actual == expected 18 | 19 | 20 | def test_make_path_posix_removes_double_sep(): 21 | path = 'user//fake.file' 22 | actual = make_path_posix(path) 23 | expected = 'user/fake.file' 24 | assert actual == expected 25 | 26 | 27 | @pytest.mark.parametrize('path', [ 28 | '~/fake.file', 29 | 'https://example.com', 30 | ]) 31 | def test_noops(path): 32 | """For non windows style paths, make_path_posix should be a noop""" 33 | assert make_path_posix(path) == path 34 | 35 | 36 | def test_roundtrip_file_path(): 37 | path = os.path.dirname(__file__) 38 | actual = make_path_posix(path) 39 | assert '\\' not in actual 40 | assert os.path.samefile(actual, path) 41 | 42 | 43 | def test_yaml_tuples(): 44 | data = (1, 2) 45 | text = yaml.dump(data) 46 | with no_duplicate_yaml(): 47 | assert yaml.safe_load(text) == data 48 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/list.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | 20 | # External imports 21 | 22 | # Intake imports 23 | from intake import open_catalog 24 | from intake.cli.util import print_entry_info, Subcommand 25 | 26 | #----------------------------------------------------------------------------- 27 | # API 28 | #----------------------------------------------------------------------------- 29 | 30 | class List(Subcommand): 31 | ''' Show catalog listing 32 | 33 | ''' 34 | 35 | name = "list" 36 | 37 | def initialize(self): 38 | self.parser.add_argument('--full', action='store_true') 39 | self.parser.add_argument('uri', metavar='URI', type=str, help='Catalog URI') 40 | 41 | def invoke(self, args): 42 | catalog = open_catalog(args.uri) 43 | for entry in list(catalog): 44 | if args.full: 45 | print_entry_info(catalog, entry) 46 | else: 47 | print(entry) 48 | -------------------------------------------------------------------------------- /docs/source/api_base.rst: -------------------------------------------------------------------------------- 1 | Base Classes 2 | ------------ 3 | 4 | This is a reference API class listing, useful mainly for developers. 5 | 6 | .. autosummary:: 7 | intake.source.base.DataSourceBase 8 | intake.source.base.DataSource 9 | intake.source.base.PatternMixin 10 | intake.source.base.AliasSource 11 | intake.container.base.RemoteSource 12 | intake.catalog.Catalog 13 | intake.catalog.entry.CatalogEntry 14 | intake.catalog.local.UserParameter 15 | intake.auth.base.BaseAuth 16 | intake.source.cache.BaseCache 17 | intake.source.base.Schema 18 | intake.container.persist.PersistStore 19 | 20 | .. autoclass:: intake.source.base.DataSource 21 | :members: 22 | 23 | .. attribute:: plot 24 | 25 | Accessor for HVPlot methods. See :doc:`plotting` for more details. 26 | 27 | .. autoclass:: intake.catalog.Catalog 28 | :members: 29 | 30 | .. autoclass:: intake.catalog.entry.CatalogEntry 31 | :members: 32 | 33 | .. autoclass:: intake.container.base.RemoteSource 34 | :members: 35 | 36 | .. autoclass:: intake.catalog.local.UserParameter 37 | :members: 38 | 39 | .. autoclass:: intake.auth.base.BaseAuth 40 | :members: 41 | 42 | .. autoclass:: intake.source.cache.BaseCache 43 | :members: 44 | 45 | .. autoclass:: intake.source.base.AliasSource 46 | :members: __init__, _get_source 47 | 48 | .. autoclass:: intake.source.base.PatternMixin 49 | :members: 50 | 51 | .. autoclass:: intake.source.base.Schema 52 | :members: 53 | 54 | .. autoclass:: intake.container.persist.PersistStore 55 | :members: add, get_tok, remove, backtrack, refresh, needs_refresh 56 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/precache.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | ''' 9 | 10 | import logging 11 | log = logging.getLogger(__name__) 12 | 13 | #----------------------------------------------------------------------------- 14 | # Imports 15 | #----------------------------------------------------------------------------- 16 | 17 | # Standard library imports 18 | 19 | # External imports 20 | 21 | # Intake imports 22 | from intake import open_catalog 23 | from intake.cli.util import Subcommand 24 | 25 | #----------------------------------------------------------------------------- 26 | # API 27 | #----------------------------------------------------------------------------- 28 | 29 | class Precache(Subcommand): 30 | ''' Populate caching for catalog entries that define caching. 31 | ''' 32 | 33 | name = "precache" 34 | 35 | def initialize(self): 36 | self.parser.add_argument('uri', metavar='URI', type=str, help='Catalog URI') 37 | 38 | def invoke(self, args): 39 | catalog = open_catalog(args.uri) 40 | for entry in list(catalog): 41 | try: 42 | s = catalog[entry] 43 | s.read() 44 | if s.cache: 45 | print("Caching for entry %s" % entry) 46 | except Exception as e: 47 | print("Skipping {} due to {}".format(entry, e)) 48 | -------------------------------------------------------------------------------- /intake/interface/conftest.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os 9 | import pytest 10 | import intake 11 | 12 | here = os.path.abspath(os.path.dirname(__file__)) 13 | 14 | 15 | @pytest.fixture 16 | def cat1_url(): 17 | return os.path.join(here, 'tests', 'catalogs', 'catalog1.yaml') 18 | 19 | 20 | @pytest.fixture 21 | def cat2_url(): 22 | return os.path.join(here, 'tests', 'catalogs', 'catalog2.yaml') 23 | 24 | 25 | @pytest.fixture 26 | def parent_cat_url(): 27 | return os.path.join(here, 'tests', 'catalogs', 'parent.yaml') 28 | 29 | 30 | @pytest.fixture 31 | def cat1(cat1_url): 32 | return intake.open_catalog(cat1_url) 33 | 34 | 35 | @pytest.fixture 36 | def cat2(cat2_url): 37 | return intake.open_catalog(cat2_url) 38 | 39 | 40 | @pytest.fixture 41 | def parent_cat(parent_cat_url): 42 | return intake.open_catalog(parent_cat_url) 43 | 44 | 45 | @pytest.fixture 46 | def cat_browser(cat1): 47 | from .catalog.select import CatSelector 48 | return CatSelector(cats=[cat1]) 49 | 50 | 51 | @pytest.fixture 52 | def sources1(cat1): 53 | return list(cat1._entries.values()) 54 | 55 | 56 | @pytest.fixture 57 | def sources2(cat2): 58 | return list(cat2._entries.values()) 59 | 60 | 61 | @pytest.fixture 62 | def source_browser(sources1): 63 | from .source.select import SourceSelector 64 | return SourceSelector(sources=sources1) 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | .pytest_cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # jetbrains/pycharm 105 | .idea/ -------------------------------------------------------------------------------- /intake/catalog/tests/test_gui.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import pytest 8 | import os 9 | 10 | 11 | def panel_importable(): 12 | try: 13 | import panel as pn 14 | return True 15 | except: 16 | return False 17 | 18 | 19 | EXPECTED_ERROR_TEXT = "Please install panel to use the GUI" 20 | 21 | 22 | @pytest.mark.skipif(panel_importable(), reason="panel is importable, so skip") 23 | def test_cat_no_panel_does_not_raise_errors(catalog1): 24 | assert catalog1.name == 'name_in_cat' 25 | 26 | 27 | @pytest.mark.skipif(panel_importable(), reason="panel is importable, so skip") 28 | def test_cat_no_panel_display_gui(catalog1): 29 | with pytest.raises(RuntimeError, match=EXPECTED_ERROR_TEXT): 30 | repr(catalog1.gui) 31 | 32 | 33 | def test_cat_gui(catalog1): 34 | pytest.importorskip('panel') 35 | assert repr(catalog1.gui).startswith('Column') 36 | 37 | 38 | @pytest.mark.skipif(panel_importable(), reason="panel is importable, so skip") 39 | def test_entry_no_panel_does_not_raise_errors(catalog1): 40 | assert catalog1.entry1.name == 'entry1' 41 | 42 | 43 | @pytest.mark.skipif(panel_importable(), reason="panel is importable, so skip") 44 | def test_entry_no_panel_display_gui(catalog1): 45 | with pytest.raises(RuntimeError, match=EXPECTED_ERROR_TEXT): 46 | repr(catalog1.entry1.gui) 47 | 48 | 49 | def test_entry_gui(catalog1): 50 | pytest.importorskip('panel') 51 | assert repr(catalog1.entry1.gui).startswith('Column') 52 | -------------------------------------------------------------------------------- /intake/catalog/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import pytest 8 | import intake.catalog.utils as utils 9 | import pandas as pd 10 | 11 | 12 | def test_expand_templates(): 13 | pars = {'a': "{{par}} hi"} 14 | context = {'b': 1, 'par': 'ho'} 15 | assert utils.expand_templates(pars, context)['a'] == 'ho hi' 16 | assert utils.expand_templates(pars, context, True)[1] == {'b'} 17 | 18 | 19 | def test_expand_nested_template(): 20 | pars = {'a': ["{{par}} hi"]} 21 | context = {'b': 1, 'par': 'ho'} 22 | assert utils.expand_templates(pars, context)['a'] == ['ho hi'] 23 | assert utils.expand_templates(pars, context, True)[1] == {'b'} 24 | 25 | pars = {'a': {'k': {("{{par}} hi", )}}} 26 | context = {'b': 1, 'par': 'ho'} 27 | assert utils.expand_templates(pars, context)['a'] == {'k': {("ho hi", )}} 28 | assert utils.expand_templates(pars, context, True)[1] == {'b'} 29 | 30 | 31 | @pytest.mark.parametrize("test_input,expected", [ 32 | (None, pd.Timestamp('1970-01-01 00:00:00')), 33 | (1, pd.Timestamp('1970-01-01 00:00:00.000000001')), 34 | ("1988-02-24T13:37+0100", pd.Timestamp("1988-02-24 13:37+0100")), 35 | ({"__datetime__": True, "as_str": "1988-02-24T13:37+0100"}, pd.Timestamp("1988-02-24T13:37+0100")), 36 | ]) 37 | def test_coerce_datetime(test_input, expected): 38 | assert utils.coerce_datetime(test_input) == expected 39 | 40 | 41 | def test_flatten(): 42 | assert list(utils.flatten([["hi"], ["oi"]])) == ['hi', 'oi'] 43 | -------------------------------------------------------------------------------- /intake/catalog/tests/multi_plugins.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | tables0: 3 | args: 4 | urlpath: "{{ CATALOG_DIR }}/files*" 5 | description: "short form" 6 | driver: 7 | - csv 8 | metadata: {} 9 | tables1: 10 | args: 11 | urlpath: "{{ CATALOG_DIR }}/files*" 12 | description: "long form" 13 | driver: 14 | - intake.source.csv.CSVSource 15 | metadata: {} 16 | tables2: 17 | args: 18 | urlpath: "{{ CATALOG_DIR }}/files*" 19 | description: "same plugin twice" 20 | driver: 21 | - csv 22 | - intake.source.csv.CSVSource 23 | metadata: {} 24 | tables3: 25 | args: 26 | urlpath: "{{ CATALOG_DIR }}/files*" 27 | description: "user's choice with extra param" 28 | driver: 29 | myplug: 30 | class: intake.source.csv.CSVSource 31 | myplug2: 32 | class: intake.source.csv.CSVSource 33 | args: 34 | csv_kwargs: true 35 | metadata: {} 36 | tables4: 37 | args: 38 | urlpath: "{{ CATALOG_DIR }}/files*" 39 | description: "neither plugins exist" 40 | driver: 41 | myplug: 42 | class: doesnotexist 43 | myplug2: 44 | class: also.none.Class 45 | metadata: {} 46 | tables5: 47 | args: 48 | urlpath: "{{ CATALOG_DIR }}/files*" 49 | description: "only second plugin exists" 50 | driver: 51 | myplug: 52 | class: doesnotexist 53 | myplug2: 54 | class: csv 55 | metadata: {} 56 | tables6: 57 | args: 58 | urlpath: "{{ CATALOG_DIR }}/files*" 59 | description: "no valid plugin in list" 60 | driver: 61 | myplug: 62 | class: doesnotexist 63 | metadata: {} 64 | tables7: 65 | args: 66 | urlpath: "{{ CATALOG_DIR }}/files*" 67 | description: "no valid plugin" 68 | driver: doesnotexist 69 | metadata: {} 70 | 71 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/info.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | from importlib import import_module 20 | import sys 21 | 22 | # External imports 23 | 24 | # Intake imports 25 | from intake import __version__ 26 | from intake.cli.util import Subcommand 27 | 28 | #----------------------------------------------------------------------------- 29 | # API 30 | #----------------------------------------------------------------------------- 31 | 32 | class Info(Subcommand): 33 | ''' Display runtime information related to Intake 34 | 35 | ''' 36 | 37 | name = "info" 38 | 39 | def initialize(self): 40 | pass 41 | 42 | def invoke(self, args): 43 | print("Python version : %s" % sys.version.split('\n')[0]) 44 | print("IPython version : %s" % _version_from_module('IPython')) 45 | print("Tornado version : %s" % _version_from_module('tornado', 'version')) 46 | print("Dask version : %s" % _version_from_module('dask')) 47 | print("Pandas version : %s" % _version_from_module('pandas')) 48 | print("Numpy version : %s" % _version_from_module('numpy')) 49 | print("Intake version : %s" % __version__) 50 | 51 | def _version_from_module(modname, version_attr="__version__"): 52 | try: 53 | mod = import_module(modname) 54 | return getattr(mod, version_attr) 55 | except ImportError: 56 | return "(not installed)" 57 | -------------------------------------------------------------------------------- /intake/util_tests.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from contextlib import contextmanager 9 | import os 10 | import posixpath 11 | import requests 12 | import shutil 13 | import subprocess 14 | import sys 15 | import tempfile 16 | import time 17 | import yaml 18 | 19 | from .utils import make_path_posix 20 | 21 | ex = sys.executable 22 | here = os.path.dirname(__file__) 23 | defcat = make_path_posix(os.path.join( 24 | here, 'cli', 'server', 'tests', 'catalog1.yml')) 25 | PY2 = sys.version_info[0] == 2 26 | 27 | @contextmanager 28 | def tempdir(): 29 | d = tempfile.mkdtemp() 30 | try: 31 | yield d 32 | finally: 33 | if os.path.exists(d): 34 | shutil.rmtree(d) 35 | 36 | 37 | @contextmanager 38 | def temp_conf(conf): 39 | with tempdir() as d: 40 | fn = os.path.join(d, 'conf.yaml') 41 | with open(fn, 'w') as f: 42 | yaml.dump(conf, f) 43 | yield fn 44 | 45 | 46 | @contextmanager 47 | def server(args=None, cat=None, env=None, wait=None, timeout=25): 48 | cat = cat if cat is not None else defcat 49 | args = list(args if args is not None else []) + [] 50 | env = env if env is not None else {} 51 | cmd = [ex, '-m', 'intake.cli.server'] + list(args) + [cat] 52 | p = subprocess.Popen(cmd, env=env, stdout=subprocess.PIPE, 53 | stderr=subprocess.STDOUT) 54 | if wait is not None: 55 | while True: 56 | try: 57 | requests.get('http://localhost:%i/v1/info' % wait) 58 | break 59 | except: 60 | time.sleep(0.1) 61 | timeout -= 0.1 62 | assert timeout > 0 63 | try: 64 | yield p 65 | finally: 66 | p.terminate() 67 | -------------------------------------------------------------------------------- /intake/cli/server/tests/test_serializer.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os.path 9 | 10 | import pytest 11 | import numpy as np 12 | 13 | from intake.container import serializer 14 | 15 | 16 | all_serializers = pytest.mark.parametrize( 17 | 'ser', serializer.format_registry.values()) 18 | all_compressors = pytest.mark.parametrize( 19 | 'comp', serializer.compression_registry.values()) 20 | 21 | 22 | @all_serializers 23 | def test_dataframe(ser): 24 | pd = pytest.importorskip('pandas') 25 | pytest.importorskip('pyarrow') 26 | csv_filename = os.path.join(os.path.dirname(__file__), 'entry1_1.csv') 27 | expected_df = pd.read_csv(csv_filename) 28 | 29 | # Check roundtrip 30 | df = ser.decode(ser.encode(expected_df, 'dataframe'), 'dataframe') 31 | 32 | assert expected_df.equals(df) 33 | 34 | 35 | @all_serializers 36 | def test_ndarray(ser): 37 | expected_array = np.arange(35).reshape((5, 7)) 38 | 39 | # Check roundtrip 40 | array = ser.decode(ser.encode(expected_array, 'ndarray'), 'ndarray') 41 | np.testing.assert_array_equal(expected_array, array) 42 | 43 | 44 | @all_serializers 45 | def test_python(ser): 46 | expected = [dict(a=1, b=[1, 2], c='str'), dict(a=[1, 2], b='str', d=None)] 47 | actual = ser.decode(ser.encode(expected, 'python'), 'python') 48 | 49 | assert expected == actual 50 | 51 | 52 | @all_compressors 53 | def test_compression_roundtrip(comp): 54 | data = b'1234\x01\x02' 55 | 56 | assert data == comp.decompress(comp.compress(data)) 57 | 58 | 59 | def test_none_compress(): 60 | data = b'1234\x01\x02' 61 | comp = serializer.NoneCompressor() 62 | 63 | # None should be no-op 64 | assert data == comp.decompress(data) 65 | assert data == comp.compress(data) 66 | -------------------------------------------------------------------------------- /intake/interface/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | from distutils.version import LooseVersion 8 | 9 | gl = globals() 10 | 11 | 12 | def do_import(): 13 | error = too_old = False 14 | try: 15 | import panel as pn 16 | too_old = LooseVersion(pn.__version__) < LooseVersion("0.9.5") 17 | except ImportError as e: 18 | error = e 19 | 20 | if too_old or error: 21 | raise RuntimeError("Please install panel to use the GUI `conda " 22 | "install -c conda-forge panel>=0.8.0`. Import " 23 | "failed with error: %s" % error) 24 | 25 | from .gui import GUI 26 | css = """ 27 | .scrolling { 28 | overflow: scroll; 29 | } 30 | """ 31 | pn.config.raw_css.append(css) # add scrolling class from css (panel GH#383, GH#384) 32 | pn.extension() 33 | gl['instance'] = GUI() 34 | 35 | 36 | def __getattr__(attr): 37 | if attr == 'instance': 38 | do_import() 39 | return gl['instance'] 40 | 41 | 42 | def output_notebook(inline=True, logo=False): 43 | """ 44 | Load the notebook extension 45 | 46 | Parameters 47 | ---------- 48 | inline : boolean (optional) 49 | Whether to inline JS code or load it from a CDN 50 | logo : boolean (optional) 51 | Whether to show the logo(s) 52 | """ 53 | try: 54 | import hvplot 55 | except ImportError: 56 | raise ImportError("The intake plotting API requires hvplot." 57 | "hvplot may be installed with:\n\n" 58 | "`conda install -c pyviz hvplot` or " 59 | "`pip install hvplot`.") 60 | import holoviews as hv 61 | return hv.extension('bokeh', inline=inline, logo=logo) 62 | -------------------------------------------------------------------------------- /intake/auth/secret.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import logging 9 | from .base import BaseAuth, BaseClientAuth 10 | import uuid 11 | 12 | logger = logging.getLogger('intake') 13 | 14 | 15 | class SecretAuth(BaseAuth): 16 | """A very simple auth mechanism using a shared secret 17 | 18 | Parameters 19 | ---------- 20 | secret: str 21 | The string that must be matched in the requests. If None, a random UUID 22 | is generated and logged. 23 | key: str 24 | Header entry in which to seek the secret 25 | """ 26 | 27 | def __init__(self, secret=None, key='intake-secret'): 28 | if secret is None: 29 | secret = uuid.uuid1().hex 30 | logger.info('Random server secret: %s' % secret) 31 | self.secret = secret 32 | self.key = key 33 | 34 | def allow_connect(self, header): 35 | try: 36 | return self.get_case_insensitive(header, self.key, '') \ 37 | == self.secret 38 | except: 39 | return False 40 | 41 | def allow_access(self, header, source, catalog): 42 | try: 43 | return self.get_case_insensitive(header, self.key, '') \ 44 | == self.secret 45 | except: 46 | return False 47 | 48 | 49 | class SecretClientAuth(BaseClientAuth): 50 | """Matching client auth plugin to SecretAuth 51 | 52 | Parameters 53 | ---------- 54 | secret: str 55 | The string that must be included requests. 56 | key: str 57 | HTTP Header key for the shared secret 58 | """ 59 | 60 | def __init__(self, secret, key='intake-secret'): 61 | self.secret = secret 62 | self.key = key 63 | super(SecretClientAuth, self).__init__() 64 | 65 | def get_headers(self): 66 | return {self.key: self.secret} 67 | -------------------------------------------------------------------------------- /intake/container/tests/test_persist.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os 9 | import pytest 10 | import time 11 | 12 | from intake.container.persist import store 13 | from intake.source.textfiles import TextFilesSource 14 | from intake.source.base import DataSource 15 | 16 | 17 | def test_store(temp_cache): 18 | from dask.base import tokenize 19 | assert list(store) == [] 20 | s = DataSource(metadata={'original_name': 'blah'}) 21 | token = tokenize(s) 22 | store.add(token, s) 23 | time.sleep(0.2) 24 | 25 | store.ttl = 0 26 | assert list(store) == [token] 27 | assert store.get_tok(s) == token 28 | assert store.needs_refresh(s) is False # because it has no TTL 29 | 30 | store.remove(s) 31 | time.sleep(0.2) 32 | 33 | assert list(store) == [] 34 | assert os.path.exists(store.pdir) 35 | store.clear() 36 | time.sleep(0.2) 37 | 38 | assert not os.path.exists(store.pdir) 39 | assert list(store) == [] 40 | 41 | 42 | def test_backtrack(temp_cache): 43 | s = TextFilesSource("*.py") 44 | s2 = s.persist() 45 | s3 = store.backtrack(s2) 46 | assert s3 == s 47 | 48 | 49 | def test_persist_with_nonnumeric_ttl_raises_error(temp_cache): 50 | s = TextFilesSource("*.py") 51 | with pytest.raises(ValueError, match="User-provided ttl was a string"): 52 | s.persist(ttl='a string') 53 | 54 | 55 | class DummyDataframe(DataSource): 56 | name = 'dummy' 57 | container = 'dataframe' 58 | 59 | def __init__(self, *args): 60 | DataSource.__init__(self) 61 | 62 | def read(self): 63 | import pandas as pd 64 | return pd.DataFrame({'a': [0]}) 65 | 66 | 67 | def test_undask_persist(temp_cache): 68 | pytest.importorskip('intake_parquet') 69 | s = DummyDataframe() 70 | s2 = s.persist() 71 | assert s.read().to_dict() == s2.read().to_dict() 72 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/example.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | 9 | ''' 10 | import logging 11 | log = logging.getLogger(__name__) 12 | 13 | #----------------------------------------------------------------------------- 14 | # Imports 15 | #----------------------------------------------------------------------------- 16 | 17 | # Standard library imports 18 | from os.path import dirname, exists, join 19 | import os, shutil 20 | 21 | # External imports 22 | 23 | # Intake imports 24 | from intake.cli.util import Subcommand 25 | 26 | #----------------------------------------------------------------------------- 27 | # API 28 | #----------------------------------------------------------------------------- 29 | 30 | class Example(Subcommand): 31 | ''' Create example catalog 32 | 33 | ''' 34 | 35 | name = "example" 36 | 37 | def initialize(self): 38 | pass 39 | 40 | def invoke(self, args): 41 | print('Creating example catalog...') 42 | files = ['us_states.yml', 'states_1.csv', 'states_2.csv'] 43 | for filename in files: 44 | if exists(filename): 45 | print('Cannot create example catalog in current directory.\n' 46 | '%s already exists.' % filename) 47 | return 1 48 | 49 | src_dir = join(dirname(__file__), '..', '..', 'sample') 50 | 51 | for filename in files: 52 | src_name = join(src_dir, filename) 53 | dest_name = filename 54 | dest_dir = dirname(filename) 55 | print(' Writing %s' % filename) 56 | if dest_dir != '' and not exists(dest_dir): 57 | os.mkdir(dest_dir) 58 | shutil.copyfile(src_name, dest_name) 59 | 60 | print('''\nTo load the catalog: 61 | >>> import intake 62 | >>> cat = intake.open_catalog('%s') 63 | ''' % files[0]) 64 | -------------------------------------------------------------------------------- /docs/source/api_user.rst: -------------------------------------------------------------------------------- 1 | End User 2 | -------- 3 | 4 | These are reference class and function definitions likely to be useful to everyone. 5 | 6 | .. autosummary:: 7 | intake.open_catalog 8 | intake.registry 9 | intake.register_driver 10 | intake.unregister_driver 11 | intake.upload 12 | intake.open_ 13 | intake.source.csv.CSVSource 14 | intake.source.textfiles.TextFilesSource 15 | intake.source.npy.NPySource 16 | intake.source.zarr.ZarrArraySource 17 | intake.catalog.local.YAMLFileCatalog 18 | intake.catalog.local.YAMLFilesCatalog 19 | intake.catalog.zarr.ZarrGroupCatalog 20 | intake.interface.gui.GUI 21 | 22 | .. autofunction:: 23 | intake.open_catalog 24 | 25 | .. attribute:: intake.registry 26 | 27 | Mapping from plugin names to the DataSource classes that implement them. These are the 28 | names that should appear in the ``driver:`` key of each source definition in a 29 | catalog. See :doc:`plugin-directory` for more details. 30 | 31 | .. attribute:: intake.open_ 32 | 33 | Set of functions, one for each plugin, for direct opening of a data source. The names are derived from the names of 34 | the plugins in the registry at import time. 35 | 36 | .. autofunction:: 37 | intake.upload 38 | 39 | .. autoclass:: intake.interface.gui.GUI 40 | :members: 41 | 42 | Source classes 43 | '''''''''''''' 44 | 45 | .. autoclass:: intake.source.csv.CSVSource 46 | :members: __init__, discover, read_partition, read, to_dask, persist, export 47 | 48 | .. autoclass:: intake.source.zarr.ZarrArraySource 49 | :members: __init__, discover, read_partition, read, to_dask, persist, export 50 | 51 | .. autoclass:: intake.source.textfiles.TextFilesSource 52 | :members: __init__, discover, read_partition, read, to_dask, persist, export 53 | 54 | .. autoclass:: intake.source.npy.NPySource 55 | :members: __init__, discover, read_partition, read, to_dask, persist, export 56 | 57 | .. autoclass:: intake.catalog.local.YAMLFileCatalog 58 | :members: __init__, reload, search, walk, persist, export 59 | 60 | .. autoclass:: intake.catalog.local.YAMLFilesCatalog 61 | :members: __init__, reload, search, walk, persist, export 62 | 63 | .. autoclass:: intake.catalog.zarr.ZarrGroupCatalog 64 | :members: __init__, reload, search, walk, persist, export, to_zarr 65 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog1.yml: -------------------------------------------------------------------------------- 1 | name: name_in_cat 2 | metadata: 3 | test: true 4 | plugins: 5 | source: 6 | - module: intake.catalog.tests.example1_source 7 | - module: intake.catalog.tests.example_plugin_dir.example2_source 8 | sources: 9 | use_example1: 10 | description: example1 source plugin 11 | driver: example1 12 | args: {} 13 | nested: 14 | description: around again 15 | driver: yaml_file_cat 16 | args: 17 | path: '{{ CATALOG_DIR }}/catalog1.yml' 18 | entry1: 19 | description: entry1 full 20 | metadata: 21 | foo: 'bar' 22 | bar: [1, 2, 3] 23 | driver: csv 24 | # Default direct_access is "forbid" by default 25 | args: # passed to the open() method 26 | urlpath: '{{ CATALOG_DIR }}/entry1_*.csv' 27 | entry1_part: 28 | description: entry1 part 29 | parameters: # User defined parameters 30 | part: 31 | description: part of filename 32 | type: str 33 | default: "1" 34 | allowed: ["1", "2"] 35 | metadata: 36 | foo: 'baz' 37 | bar: [2, 4, 6] 38 | driver: csv 39 | direct_access: "allow" 40 | args: # passed to the open() method 41 | urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv' 42 | remote_env: 43 | description: env gets interpreted in server 44 | driver: intake.conftest.TestSource 45 | args: 46 | urlpath: 'path-{{intake_test}}' 47 | parameters: 48 | intake_test: 49 | description: none 50 | type: str 51 | default: 'env(INTAKE_TEST)' 52 | local_env: 53 | description: env gets interpreted in client 54 | driver: csv 55 | args: 56 | urlpath: 'path-{{intake_test}}' 57 | parameters: 58 | intake_test: 59 | description: none 60 | type: str 61 | default: 'client_env(INTAKE_TEST)' 62 | text: 63 | description: textfiles in this dir 64 | driver: textfiles 65 | args: 66 | urlpath: "{{ CATALOG_DIR }}/*.yml" 67 | arr: 68 | description: small array 69 | driver: numpy 70 | args: 71 | path: "{{ CATALOG_DIR }}/small.npy" 72 | chunks: 5 73 | datetime: 74 | description: datetime parameters 75 | driver: intake.conftest.TestSource 76 | args: 77 | urlpath: "{{time}}" 78 | parameters: 79 | time: 80 | description: some time 81 | type: datetime 82 | -------------------------------------------------------------------------------- /intake/cli/server/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Intake Catalog Browser 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 21 | 22 |
23 | 24 |

Intake Catalog Browser

25 | 26 |

Data Sources

27 | 28 | {% for source in sources %} 29 |

{{ source.name }}

30 |

{{ source.description.description }}

31 |

Returns: {{ source.description.container }}

32 |

Parameters:

33 | 42 | {% endfor %} 43 | 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /intake/cli/bootstrap.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' Provide a ``main`` function to run intake commands. 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | import argparse 20 | 21 | # External imports 22 | 23 | # Intake imports 24 | from intake import __version__ 25 | from intake.cli.util import die, nice_join 26 | 27 | #----------------------------------------------------------------------------- 28 | # API 29 | #----------------------------------------------------------------------------- 30 | 31 | def main(description, subcommands, argv): 32 | ''' Execute an intake command. 33 | 34 | Args: 35 | description (str) : 36 | A description for this top-level command 37 | 38 | subcommands (seq[SubCommand]) : 39 | A list of subcommands to configure for argparse 40 | 41 | argv (seq[str]) : 42 | A list of command line arguments to process 43 | 44 | Returns: 45 | None 46 | 47 | ''' 48 | if len(argv) == 1: 49 | die("ERROR: Must specify subcommand, one of: %s" % nice_join(x.name for x in subcommands)) 50 | 51 | parser = argparse.ArgumentParser( 52 | prog=argv[0], 53 | description=description, 54 | epilog="See ' --help' to read about a specific subcommand.") 55 | 56 | parser.add_argument('-v', '--version', action='version', version=__version__) 57 | 58 | subs = parser.add_subparsers(help="Sub-commands") 59 | 60 | for cls in subcommands: 61 | subparser = subs.add_parser(cls.name, help=cls.__doc__.strip()) 62 | subcommand = cls(parser=subparser) 63 | subparser.set_defaults(invoke=subcommand.invoke) 64 | 65 | args = parser.parse_args(argv[1:]) 66 | try: 67 | return args.invoke(args) or 0 # convert None to 0 68 | except Exception as e: 69 | die("ERROR: " + repr(e)) 70 | -------------------------------------------------------------------------------- /intake/interface/source/tests/test_gui.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | from distutils.version import LooseVersion 8 | import pytest 9 | pn = pytest.importorskip('panel') 10 | too_old = LooseVersion(pn.__version__) < LooseVersion("0.9.5") 11 | 12 | 13 | @pytest.mark.skipif(too_old, reason="Use with latest panel") 14 | @pytest.fixture 15 | def gui(sources1): 16 | from ..gui import SourceGUI 17 | return SourceGUI(sources=sources1) 18 | 19 | 20 | def test_gui_attribute(sources1): 21 | assert sources1[0].gui 22 | 23 | 24 | def test_gui(gui, sources1): 25 | assert gui.select.items == sources1 26 | assert gui.sources == [sources1[0]] 27 | 28 | assert not gui.plot.watchers 29 | assert gui.plot.visible is False 30 | assert gui.plot_widget.disabled is False 31 | 32 | 33 | def test_gui_close_and_open_select(gui, sources1): 34 | gui.select.selected = [sources1[1]] 35 | gui.select.visible = False 36 | assert not gui.select.watchers 37 | 38 | gui.select.visible = True 39 | assert len(gui.select.watchers) == 1 40 | assert gui.select.selected == [sources1[1]] 41 | assert gui.plot_widget.disabled is False 42 | 43 | 44 | def test_gui_getstate(gui, sources1): 45 | state = gui.__getstate__() 46 | 47 | assert state['visible']is True 48 | assert state['plot']['visible'] is False 49 | assert state['select']['selected'] == [sources1[0].name] 50 | 51 | 52 | def test_gui_state_roundtrip(gui, sources1): 53 | from ..gui import SourceGUI 54 | other = SourceGUI.from_state(gui.__getstate__()) 55 | 56 | assert other.select.items == sources1 57 | assert other.sources == [sources1[0]] 58 | assert other.plot.visible is False 59 | assert other.description.visible is True 60 | 61 | 62 | def test_gui_state_roundtrip_with_subpanels(gui, sources1): 63 | from ..gui import SourceGUI 64 | gui.plot.visible = True 65 | 66 | other = SourceGUI.from_state(gui.__getstate__()) 67 | 68 | assert other.select.items == sources1 69 | assert other.sources == [sources1[0]] 70 | assert other.plot.visible is True 71 | assert other.plot_widget.value is True 72 | -------------------------------------------------------------------------------- /intake/source/decompress.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os 9 | from intake.utils import make_path_posix 10 | 11 | 12 | def unzip(f, outpath): 13 | import zipfile 14 | z = zipfile.ZipFile(f, 'r') 15 | z.extractall(outpath) 16 | out = [make_path_posix(os.path.join(outpath, fn.filename)) 17 | for fn in z.filelist] 18 | z.close() 19 | return out 20 | 21 | 22 | def untargz(f, outpath): 23 | import tarfile 24 | tar = tarfile.open(f, "r:gz") 25 | out = [make_path_posix(os.path.join(outpath, fn.name)) 26 | for fn in tar.getmembers()] 27 | tar.extractall(outpath) 28 | tar.close() 29 | return out 30 | 31 | 32 | def untarbz(f, outpath): 33 | import tarfile 34 | tar = tarfile.open(f, "r:bz2") 35 | out = [make_path_posix(os.path.join(outpath, fn.name)) 36 | for fn in tar.getmembers()] 37 | tar.extractall(outpath) 38 | tar.close() 39 | return out 40 | 41 | 42 | def untar(f, outpath): 43 | import tarfile 44 | tar = tarfile.open(f, "r:") 45 | out = [make_path_posix(os.path.join(outpath, fn.name)) 46 | for fn in tar.getmembers()] 47 | tar.extractall(outpath) 48 | tar.close() 49 | return out 50 | 51 | 52 | def ungzip(f, outpath): 53 | import gzip 54 | z = gzip.open(f) 55 | fn = os.path.basename(f)[:-3] 56 | with open(os.path.join(outpath, fn), 'wb') as fout: 57 | data = True 58 | while data: 59 | data = z.read(2**15) 60 | fout.write(data) 61 | return [make_path_posix(os.path.join(outpath, fn))] 62 | 63 | 64 | def unbzip(f, outpath): 65 | import bz2 66 | z = bz2.open(f) 67 | fn = os.path.basename(f)[:-3] 68 | with open(os.path.join(outpath, fn), 'wb') as fout: 69 | data = True 70 | while data: 71 | data = z.read(2 ** 15) 72 | fout.write(data) 73 | return [make_path_posix(os.path.join(outpath, fn))] 74 | 75 | 76 | decomp = {'zip': unzip, 77 | 'tgz': untargz, 78 | 'tbz': untarbz, 79 | 'tar': untar, 80 | 'gz': ungzip, 81 | 'bz': unbzip} 82 | -------------------------------------------------------------------------------- /intake/interface/source/description.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import panel as pn 8 | from ..base import BaseView 9 | from ...utils import pretty_describe 10 | 11 | 12 | class Description(BaseView): 13 | """ 14 | Class for displaying a textual description of a data source. 15 | 16 | Parameters 17 | ---------- 18 | source: intake catalog entry, or list of same 19 | source to describe in this object 20 | 21 | Attributes 22 | ---------- 23 | contents: str 24 | string representation of the source's description 25 | label: str 26 | label to display at top of panel - contains name of source 27 | children: list of panel objects 28 | children that will be used to populate the panel when visible 29 | panel: panel layout object 30 | instance of a panel layout (row or column) that contains children 31 | when visible 32 | watchers: list of param watchers 33 | watchers that are set on children - cleaned up when visible 34 | is set to false. 35 | """ 36 | main_pane = None 37 | 38 | def __init__(self, source=None, **kwargs): 39 | self.source = source 40 | self.panel = pn.Column(name='Description', width_policy='max', 41 | margin=0, height=240, sizing_mode='stretch_width', 42 | scroll=True) 43 | super().__init__(**kwargs) 44 | 45 | def setup(self): 46 | self.main_pane = pn.pane.Markdown(self.contents) 47 | self.children = [self.main_pane] 48 | 49 | @BaseView.source.setter 50 | def source(self, source): 51 | """When the source gets updated, update the pane object""" 52 | BaseView.source.fset(self, source) 53 | if self.main_pane: 54 | self.main_pane.object = """```yaml\n{}\n```""".format(self.contents) 55 | 56 | @property 57 | def contents(self): 58 | """String representation of the source's description""" 59 | if not self._source: 60 | return 'name: ' + "⠀" * 30 61 | contents = self.source.describe() 62 | return pretty_describe(contents) 63 | -------------------------------------------------------------------------------- /intake/interface/catalog/tests/test_catalog_search.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import pytest 8 | pytest.importorskip('panel') 9 | 10 | 11 | @pytest.fixture 12 | def search_inputs(cat1, cat2): 13 | from ..search import SearchInputs 14 | return SearchInputs() 15 | 16 | 17 | def test_search_inputs(search_inputs): 18 | assert search_inputs.visible 19 | assert len(search_inputs.children) == 4 20 | assert len(search_inputs.panel.objects) == 4 21 | 22 | 23 | def test_search_inputs_text_prop_equal_to_widget_value(search_inputs): 24 | search_inputs.text = 'some text' 25 | assert search_inputs.text_widget.value == 'some text' 26 | 27 | 28 | def test_search_inputs_depth_prop_parses_to_int(search_inputs): 29 | search_inputs.depth = '2' 30 | assert search_inputs.depth == 2 31 | 32 | search_inputs.depth = 'All' 33 | assert search_inputs.depth == 99 34 | 35 | 36 | @pytest.fixture 37 | def search(cat1, cat2): 38 | from ..search import Search 39 | return Search(cats=[cat1, cat2]) 40 | 41 | 42 | def test_search(search): 43 | assert search.visible 44 | assert len(search.children) == 2 45 | assert len(search.panel.objects) == 2 46 | 47 | 48 | def test_search_watchers_gets_populated(search): 49 | assert len(search.watchers) == 1 50 | 51 | 52 | def test_search_widget_click_tries_to_run_callback(search): 53 | search.inputs.text = 'flight' 54 | with pytest.raises(TypeError, match="'NoneType' object is not callable"): 55 | search.widget.clicks = 1 56 | 57 | 58 | def test_search_unwatch_watchers_get_cleaned_up(search): 59 | search.unwatch() 60 | assert len(search.watchers) == 0 61 | search.inputs.text = 'flight' 62 | 63 | # does not try to run callback 64 | search.widget.clicks = 2 65 | 66 | 67 | def test_callback_gets_right_input(search): 68 | def callback(new_cats): 69 | """Raises an error if called""" 70 | raise ValueError('New catalogs', new_cats) 71 | 72 | search.inputs.text = 'flight' 73 | search.done_callback = callback 74 | with pytest.raises(ValueError, match=''): 75 | search.widget.clicks = 3 76 | -------------------------------------------------------------------------------- /intake/catalog/tests/test_auth_integration.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os 9 | import os.path 10 | import shutil 11 | import tempfile 12 | import time 13 | 14 | import pytest 15 | 16 | from intake import open_catalog 17 | 18 | from intake.auth.secret import SecretClientAuth 19 | from intake.auth.base import AuthenticationFailure 20 | 21 | TMP_DIR = tempfile.mkdtemp() 22 | CONF_DIR = os.path.join(TMP_DIR, 'conf') 23 | os.mkdir(CONF_DIR) 24 | 25 | TEST_CATALOG_PATH = [TMP_DIR] 26 | YAML_FILENAME = 'intake_test_catalog.yml' 27 | 28 | 29 | # Create server configuration using shared-secret Auth 30 | TEST_SERVER_CONF = os.path.join(CONF_DIR, 'config.yaml') 31 | conf = ''' 32 | auth: 33 | cls: intake.auth.secret.SecretAuth 34 | kwargs: 35 | secret: test_secret 36 | ''' 37 | with open(TEST_SERVER_CONF, 'w') as f: 38 | f.write(conf) 39 | 40 | 41 | @pytest.fixture 42 | def intake_server_with_auth(intake_server): 43 | fullname = os.path.join(TMP_DIR, YAML_FILENAME) 44 | 45 | try: 46 | os.makedirs(os.path.join(TMP_DIR, 'data')) 47 | except: 48 | pass 49 | with open(fullname, 'w') as f: 50 | f.write(''' 51 | sources: 52 | example: 53 | description: example1 source plugin 54 | driver: csv 55 | args: 56 | urlpath: "{{ CATALOG_DIR }}/data/example.csv" 57 | ''') 58 | 59 | csv_name = os.path.join(TMP_DIR, 'data', 'example.csv') 60 | with open(csv_name, 'w') as f: 61 | f.write('a,b,c\n1,2,3\n4,5,6') 62 | time.sleep(2) 63 | 64 | yield intake_server 65 | 66 | try: 67 | shutil.rmtree(TMP_DIR) 68 | except: 69 | pass 70 | 71 | 72 | def test_secret_auth(intake_server_with_auth): 73 | auth = SecretClientAuth(secret='test_secret') 74 | catalog = open_catalog(intake_server_with_auth, auth=auth) 75 | 76 | entries = list(catalog) 77 | assert entries == ['example'] 78 | 79 | catalog.example.read() 80 | 81 | 82 | def test_secret_auth_fail(intake_server_with_auth): 83 | auth = SecretClientAuth(secret='test_wrong_secret') 84 | with pytest.raises(AuthenticationFailure): 85 | list(open_catalog(intake_server_with_auth, auth=auth)) 86 | -------------------------------------------------------------------------------- /intake/source/tests/test_npy.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os 9 | import numpy as np 10 | import posixpath 11 | import pytest 12 | import intake 13 | from ..npy import NPySource 14 | 15 | here = os.path.abspath(os.path.dirname(__file__)) 16 | 17 | 18 | @pytest.mark.parametrize('shape', [(1, ), (1, 1), (10, ), (5, 2), (3, 3, 3)]) 19 | def test_one_file(tempdir, shape): 20 | size = 1 21 | for s in shape: 22 | size *= s 23 | data = np.random.randint(1, 100, size=size).reshape(shape) 24 | fn = os.path.join(tempdir, 'out.npy') 25 | np.save(fn, data) 26 | s = NPySource(fn) 27 | out = s.read() 28 | assert (out == data).all() 29 | s = NPySource(fn, chunks=1) 30 | out = s.read() 31 | assert (out == data).all() 32 | s = NPySource(fn, shape=shape, dtype='int', chunks=1) 33 | out = s.read() 34 | assert (out == data).all() 35 | 36 | 37 | @pytest.mark.parametrize('shape', [(1, ), (1, 1), (10, ), (5, 2), (3, 3, 3)]) 38 | def test_multi_file(tempdir, shape): 39 | size = 1 40 | for s in shape: 41 | size *= s 42 | data0 = np.random.randint(1, 100, size=size).reshape(shape) 43 | fn0 = os.path.join(tempdir, 'out0.npy') 44 | np.save(fn0, data0) 45 | data1 = np.random.randint(1, 100, size=size).reshape(shape) 46 | fn1 = os.path.join(tempdir, 'out1.npy') 47 | np.save(fn1, data1) 48 | data = np.stack([data0, data1]) 49 | fn = [fn0, fn1] 50 | s = NPySource(fn) 51 | out = s.read() 52 | assert (out == data).all() 53 | s = NPySource(fn, chunks=1) 54 | out = s.read() 55 | assert (out == data).all() 56 | s = NPySource(fn, shape=shape, dtype='int', chunks=1) 57 | out = s.read() 58 | assert (out == data).all() 59 | s = NPySource(os.path.join(tempdir, 'out*.npy')) 60 | out = s.read() 61 | assert (out == data).all() 62 | 63 | 64 | def test_zarr_minimal(): 65 | pytest.importorskip('zarr') 66 | cat = intake.open_catalog(posixpath.join(here, 'sources.yaml')) 67 | s = cat.zarr1() 68 | assert s.container == 'ndarray' 69 | assert s.read().tolist() == [73, 98, 46, 38, 20, 12, 31, 8, 89, 72] 70 | assert s.npartitions == 1 71 | assert s.dtype == 'int' 72 | assert s.shape == (10,) 73 | assert (s.read_partition((0, )) == s.read()).all() 74 | -------------------------------------------------------------------------------- /intake/cli/tests/test_util.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | 9 | import pytest 10 | 11 | # module under test 12 | import intake.cli.util as m 13 | 14 | # TODO 15 | def test_print_entry_info(): 16 | pass 17 | 18 | def test_die(capsys): 19 | with pytest.raises(SystemExit): 20 | m.die("foo") 21 | out, err = capsys.readouterr() 22 | assert err == "foo\n" 23 | assert out == "" 24 | 25 | class Test_nice_join(object): 26 | 27 | def test_default(self): 28 | assert m.nice_join(["one"]) == "one" 29 | assert m.nice_join(["one", "two"]) == "one or two" 30 | assert m.nice_join(["one", "two", "three"]) == "one, two or three" 31 | assert m.nice_join(["one", "two", "three", "four"]) == "one, two, three or four" 32 | 33 | def test_string_conjunction(self): 34 | assert m.nice_join(["one"], conjunction="and") == "one" 35 | assert m.nice_join(["one", "two"], conjunction="and") == "one and two" 36 | assert m.nice_join(["one", "two", "three"], conjunction="and") == "one, two and three" 37 | assert m.nice_join(["one", "two", "three", "four"], conjunction="and") == "one, two, three and four" 38 | 39 | def test_None_conjunction(self): 40 | assert m.nice_join(["one"], conjunction=None) == "one" 41 | assert m.nice_join(["one", "two"], conjunction=None) == "one, two" 42 | assert m.nice_join(["one", "two", "three"], conjunction=None) == "one, two, three" 43 | assert m.nice_join(["one", "two", "three", "four"], conjunction=None) == "one, two, three, four" 44 | 45 | def test_sep(self): 46 | assert m.nice_join(["one"], sep='; ') == "one" 47 | assert m.nice_join(["one", "two"], sep='; ') == "one or two" 48 | assert m.nice_join(["one", "two", "three"], sep='; ') == "one; two or three" 49 | assert m.nice_join(["one", "two", "three", "four"], sep="; ") == "one; two; three or four" 50 | 51 | class TestSubcommand(object): 52 | 53 | def test_initialize_abstract(self): 54 | with pytest.raises(NotImplementedError): 55 | obj = m.Subcommand("parser") 56 | obj.initialize() 57 | 58 | def test_invoke_abstract(self): 59 | with pytest.raises(NotImplementedError): 60 | obj = m.Subcommand("parser") 61 | obj.invoke("args") 62 | -------------------------------------------------------------------------------- /docs/source/roadmap.rst: -------------------------------------------------------------------------------- 1 | .. _roadmap: 2 | 3 | Roadmap 4 | ======= 5 | 6 | Some high-level work that we expect to be achieved on the time-scale of months. This list 7 | is not exhaustive, but rather aims to whet the appetite for what Intake can be in the future. 8 | 9 | Since Intake aims to be a community of data-oriented pythoneers, nothing written here is laid in 10 | stone, and users and devs are encouraged to make their opinions known! 11 | 12 | See also the [wiki page](https://github.com/intake/intake/wiki/Community-News) on latest Intake 13 | community news. 14 | 15 | Broaden the coverage of formats 16 | ------------------------------- 17 | 18 | Data-type drivers are easy to write, but still require some effort, and therefore reasonable 19 | impetus to get the work done. Conversations over the coming months can help determine the 20 | drivers that should be created by the Intake team, and those that might be contributed by the 21 | community. 22 | 23 | The next type that we would specifically like to consider is machine learning 24 | model artifacts. 25 | 26 | Streaming Source 27 | ---------------- 28 | 29 | Many data sources are inherently time-sensitive and event-wise. These are not covered well by existing 30 | Python tools, but the ``streamz`` library may present a nice way to model them. From the Intake point of 31 | view, the task would be to develop a streaming type, and at least one data driver that uses it. 32 | 33 | The most obvious place to start would be read a file: every time a new line appears in the file, an event 34 | is emitted. This is appropriate, for instance, for watching the log files of a web-server, and indeed could 35 | be extended to read from an arbitrary socket. 36 | 37 | Streamz has seen renewed development recently and a new version is coming soon. 38 | 39 | Server publish hooks 40 | -------------------- 41 | 42 | To add API endpoints to the server, so that a user (with sufficient privilege) can post data 43 | specifications to a running server, optionally saving the specs to a catalog server-side. Furthermore, 44 | we will consider the possibility of being able to upload and/or transform data 45 | (rather than refer to it in a third-party location), so that you would have a one-line "publish" 46 | ability from the client. 47 | 48 | Simplify dependencies and class hierarchy 49 | ----------------------------------------- 50 | 51 | We would like the make it easier to write Intake drivers which don't need any 52 | persist or GUI functionality, and to be able to install Intake core 53 | functionality (driver registry, data loading and catalog traversal) without 54 | needing many other packages at all. 55 | -------------------------------------------------------------------------------- /intake/cli/client/tests/test_conf.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os 9 | import posixpath 10 | import subprocess 11 | 12 | 13 | def test_reset(env): 14 | subprocess.call(['intake', 'config', 'reset'], 15 | env=env, universal_newlines=True) 16 | confdir = env['INTAKE_CONF_DIR'] 17 | fn = posixpath.join(confdir, 'conf.yaml') 18 | assert os.path.isfile(fn) 19 | txt = open(fn).read() 20 | assert 'port: 5000' in txt 21 | 22 | 23 | def test_info(tempdir): # if envs is used, conf file will already exist 24 | env = os.environ.copy() 25 | env["INTAKE_CONF_DIR"] = confdir = tempdir 26 | out = subprocess.check_output(['intake', 'config', 'info'], 27 | env=env, universal_newlines=True) 28 | fn = posixpath.join(confdir, 'conf.yaml') 29 | assert fn in out 30 | assert 'INTAKE_CONF_DIR' in out 31 | assert 'INTAKE_CONF_FILE' not in out 32 | assert "(does not exist)" in out 33 | with open(fn, 'w') as f: 34 | f.write('port: 5000') 35 | out = subprocess.check_output(['intake', 'config', 'info'], 36 | env=env, universal_newlines=True) 37 | assert "(does not exist)" not in out 38 | 39 | 40 | def test_defaults(): 41 | out = subprocess.check_output(['intake', 'config', 'list-defaults'], 42 | universal_newlines=True) 43 | assert 'port: 5000' in out 44 | 45 | 46 | def test_get(env): 47 | confdir = env['INTAKE_CONF_DIR'] 48 | fn = posixpath.join(confdir, 'conf.yaml') 49 | with open(fn, 'w') as f: 50 | f.write('port: 5001') 51 | out = subprocess.check_output(['intake', 'config', 'get'], 52 | env=env, universal_newlines=True) 53 | assert 'port: 5001' in out 54 | out = subprocess.check_output(['intake', 'config', 'get', 'port'], 55 | env=env, universal_newlines=True) 56 | assert out.startswith('5001') 57 | 58 | 59 | def test_log_level(): 60 | env = os.environ.copy() 61 | env['INTAKE_LOG_LEVEL'] = 'DEBUG' 62 | out = subprocess.check_output(['intake', 'config', 'info'], 63 | stderr=subprocess.STDOUT, 64 | env=env, universal_newlines=True) 65 | assert "logger set to debug" in out 66 | -------------------------------------------------------------------------------- /intake/auth/base.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from ..utils import DictSerialiseMixin 9 | 10 | 11 | class AuthenticationFailure(Exception): 12 | pass 13 | 14 | 15 | class BaseAuth(DictSerialiseMixin): 16 | """Base class for authorization 17 | 18 | Subclass this and override the methods to implement a new type of auth. 19 | 20 | This basic class allows all access. 21 | """ 22 | 23 | def __init__(self, *args): 24 | self.args = args 25 | 26 | def allow_connect(self, header): 27 | """Is the requests header given allowed to talk to the server 28 | 29 | Parameters 30 | ---------- 31 | header: dict 32 | The HTTP header from the incoming request 33 | """ 34 | return True 35 | 36 | def allow_access(self, header, source, catalog): 37 | """Is the given HTTP header allowed to access given data source 38 | 39 | Parameters 40 | ---------- 41 | header: dict 42 | The HTTP header from the incoming request 43 | source: CatalogEntry 44 | The data source the user wants to access. 45 | catalog: Catalog 46 | The catalog object containing this data source. 47 | """ 48 | return True 49 | 50 | def get_case_insensitive(self, dictionary, key, default=None): 51 | """Case-insensitive search of a dictionary for key. 52 | 53 | Returns the value if key match is found, otherwise default. 54 | """ 55 | lower_key = key.lower() 56 | for k, v in dictionary.items(): 57 | if lower_key == k.lower(): 58 | return v 59 | else: 60 | return default 61 | 62 | 63 | class BaseClientAuth(object): 64 | """Base class for client-side setting of authorization headers 65 | 66 | This basic class adds no headers to remote catalog reqests 67 | """ 68 | 69 | def __init__(self, *args): 70 | self.args = args 71 | 72 | def __dask_tokenize__(self): 73 | return hash(self) 74 | 75 | @property 76 | def _tok(self): 77 | from dask.base import tokenize 78 | return tokenize({'cls': type(self).__name__, 'args': self.args}) 79 | 80 | def __hash__(self): 81 | return int(self._tok, 16) 82 | 83 | def get_headers(self): 84 | """Returns a dictionary of HTTP headers for the remote catalog request. 85 | """ 86 | return {} 87 | -------------------------------------------------------------------------------- /intake/interface/catalog/tests/test_gui.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import pytest 8 | pn = pytest.importorskip('panel') 9 | 10 | 11 | @pytest.fixture 12 | def gui(cat1, cat2): 13 | from ..gui import CatGUI 14 | return CatGUI(cats=[cat1, cat2]) 15 | 16 | 17 | def test_gui(gui, cat1, cat2): 18 | assert gui.select.items == [cat1, cat2] 19 | assert gui.cats == [cat1] 20 | 21 | assert not gui.add.watchers 22 | assert gui.add.visible is False 23 | assert gui.add_widget.disabled is False 24 | 25 | assert not gui.search.watchers 26 | assert gui.search.visible is False 27 | assert gui.search_widget.disabled is False 28 | 29 | 30 | def test_gui_close_and_open_select(gui, cat2, sources2): 31 | gui.select.selected = [cat2] 32 | gui.select.visible = False 33 | assert not gui.select.watchers 34 | assert gui.search_widget.disabled is False 35 | 36 | gui.select.visible = True 37 | assert len(gui.select.watchers) == 1 38 | assert gui.select.selected == [cat2] 39 | assert gui.search_widget.disabled is False 40 | 41 | 42 | def test_gui_getstate(gui, cat1, sources1): 43 | state = gui.__getstate__() 44 | 45 | assert state['visible']is True 46 | assert state['add']['visible'] is False 47 | assert state['search']['visible'] is False 48 | assert state['select']['selected'] == [cat1.name] 49 | 50 | 51 | def test_gui_state_roundtrip(gui, cat1, cat2, sources1): 52 | from ..gui import CatGUI 53 | other = CatGUI.from_state(gui.__getstate__()) 54 | 55 | assert other.select.items == [cat1, cat2] 56 | assert other.cats == [cat1] 57 | assert other.search.visible is False 58 | assert other.add.visible is False 59 | 60 | 61 | def test_gui_state_roundtrip_with_subpanels(gui, cat1, cat2, sources1): 62 | from ..gui import CatGUI 63 | gui.search.visible = True 64 | gui.search.inputs.text = 'foo' 65 | gui.search.inputs.depth = 3 66 | gui.add.visible = True 67 | gui.add.tabs.active = 1 68 | 69 | other = CatGUI.from_state(gui.__getstate__()) 70 | 71 | assert other.select.items == [cat1, cat2] 72 | assert other.cats == [cat1] 73 | assert other.search.visible is True 74 | assert other.search_widget.value is True 75 | assert other.search.inputs.text == 'foo' 76 | assert other.search.inputs.depth == 3 77 | 78 | assert other.add.visible is True 79 | assert other.add_widget.value is True 80 | assert other.add.widget.disabled is False 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intake: A general interface for loading data 2 | 3 | ![Logo](https://github.com/intake/intake/raw/master/logo-small.png) 4 | 5 | [![Build Status](https://github.com/intake/intake/workflows/CI/badge.svg)](https://github.com/intake/intake/actions) 6 | [![Documentation Status](https://readthedocs.org/projects/intake/badge/?version=latest)](http://intake.readthedocs.io/en/latest/?badge=latest) 7 | [![Join the chat at https://gitter.im/ContinuumIO/intake](https://badges.gitter.im/ContinuumIO/intake.svg)](https://gitter.im/ContinuumIO/intake?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 8 | 9 | 10 | Intake is a lightweight set of tools for loading and sharing data in data science projects. 11 | Intake helps you: 12 | 13 | * Load data from a variety of formats (see the [current list of known plugins](http://intake.readthedocs.io/en/latest/plugin-directory.html)) into containers you already know, like Pandas dataframes, Python lists, NumPy arrays, and more. 14 | * Convert boilerplate data loading code into reusable Intake plugins 15 | * Describe data sets in catalog files for easy reuse and sharing between projects and with others. 16 | * Share catalog information (and data sets) over the network with the Intake server 17 | 18 | Documentation is available at [Read the Docs](http://intake.readthedocs.io/en/latest). 19 | 20 | Status of intake and related packages is available at [Status Dashboard](https://intake.github.io/status) 21 | 22 | Weekly news about this repo and other related projects can be found on the 23 | [wiki](https://github.com/intake/intake/wiki/Community-News) 24 | 25 | Install 26 | ------- 27 | 28 | Recommended method using conda: 29 | ```bash 30 | conda install -c conda-forge intake 31 | ``` 32 | 33 | You can also install using `pip`, in which case you have a choice as to how many of the optional 34 | dependencies you install, with the simplest having least requirements 35 | 36 | ```bash 37 | pip install intake 38 | ``` 39 | 40 | and additional sections `[server]`, `[plot]` and `[dataframe]`, or to include everything: 41 | 42 | ```bash 43 | pip install intake[complete] 44 | ``` 45 | 46 | Note that you may well need specific drivers and other plugins, which usually have additional 47 | dependencies of their own. 48 | 49 | Development 50 | ----------- 51 | * Create development Python environment, ideally with `conda`. The requirements can be found in the 52 | recipe in the `conda/` directory of this repo or in the sister 53 | [feedstock](https://github.com/conda-forge/intake-feedstock) 54 | * Install using `pip install -e .[complete]` 55 | * Add `pytest` to the environment to be able to run tests 56 | * Create a fork on github to be able to submit PRs. 57 | * We respect, but do not enforce, pep8 standards; all new code should be covered by tests. 58 | -------------------------------------------------------------------------------- /intake/auth/tests/test_auth.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from intake.auth.base import BaseAuth, BaseClientAuth 9 | from intake.auth.secret import SecretAuth,SecretClientAuth 10 | from intake.utils import remake_instance 11 | 12 | 13 | def test_get(): 14 | auth = remake_instance('intake.auth.base.BaseAuth') 15 | assert isinstance(auth, BaseAuth) 16 | auth = remake_instance('intake.auth.secret.SecretAuth') 17 | assert isinstance(auth, SecretAuth) 18 | 19 | 20 | def test_base(): 21 | auth = BaseAuth() 22 | assert auth.allow_connect(None) 23 | assert auth.allow_access(None, None, None) 24 | 25 | 26 | def test_base_client(): 27 | auth = BaseClientAuth() 28 | assert auth.get_headers() == {} 29 | 30 | 31 | def test_base_get_case_insensitive(): 32 | auth = BaseAuth() 33 | d = {'foo': 1, 'BAR': 2} 34 | assert auth.get_case_insensitive(d, 'foo') == 1 35 | assert auth.get_case_insensitive(d, 'Foo') == 1 36 | assert auth.get_case_insensitive(d, 'FOO') == 1 37 | 38 | assert auth.get_case_insensitive(d, 'bar') == 2 39 | assert auth.get_case_insensitive(d, 'Bar') == 2 40 | assert auth.get_case_insensitive(d, 'BAR') == 2 41 | 42 | assert auth.get_case_insensitive(d, 'no') is None 43 | assert auth.get_case_insensitive(d, 'no', '') == '' 44 | 45 | 46 | def test_secret(): 47 | secret = 'test-secret' 48 | auth = SecretAuth(secret=secret) 49 | assert not auth.allow_connect({}) 50 | assert not auth.allow_connect({'intake-secret': ''}) 51 | assert not auth.allow_connect({'intake-secret': None}) 52 | assert not auth.allow_connect({'intake-secret': 'wrong'}) 53 | assert auth.allow_connect({'intake-secret': secret}) 54 | # HTTP headers are not case sensitive, and frequently recapitalized 55 | assert auth.allow_connect({'Intake-Secret': secret}) 56 | 57 | assert not auth.allow_access({'intake-secret': 'wrong'}, None, None) 58 | assert auth.allow_access({'intake-secret': secret}, None, None) 59 | 60 | auth = SecretAuth(secret=secret, key='another_header') 61 | assert not auth.allow_connect({'intake-secret': secret}) 62 | assert auth.allow_connect({'another_header': secret}) 63 | 64 | 65 | def test_secret_client(): 66 | secret = 'test-secret' 67 | auth = SecretClientAuth(secret=secret) 68 | assert auth.get_headers() == { 'intake-secret': secret} 69 | 70 | auth = SecretClientAuth(secret=secret, key='another_header') 71 | assert auth.get_headers() == { 'another_header': secret} 72 | -------------------------------------------------------------------------------- /intake/catalog/exceptions.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | 9 | class CatalogException(Exception): 10 | """Basic exception for errors raised by catalog""" 11 | 12 | 13 | class PermissionDenied(CatalogException): 14 | """Raised when user requests functionality that they do not have permission 15 | to access. 16 | """ 17 | 18 | 19 | class ShellPermissionDenied(PermissionDenied): 20 | """The user does not have permission to execute shell commands.""" 21 | def __init__(self, msg=None): 22 | if msg is None: 23 | msg = "Additional permissions needed to execute shell commands." 24 | super(ShellPermissionDenied, self).__init__(msg) 25 | 26 | 27 | class EnvironmentPermissionDenied(PermissionDenied): 28 | """The user does not have permission to read environment variables.""" 29 | def __init__(self, msg=None): 30 | if msg is None: 31 | msg = "Additional permissions needed to read environment variables." 32 | super(EnvironmentPermissionDenied, self).__init__(msg) 33 | 34 | 35 | class ValidationError(CatalogException): 36 | """Something's wrong with the catalog spec""" 37 | def __init__(self, message, errors): 38 | super(ValidationError, self).__init__(message) 39 | self.errors = errors 40 | 41 | 42 | class DuplicateKeyError(ValidationError): 43 | """Catalog contains key duplications""" 44 | def __init__(self, context, context_mark, problem, problem_mark): 45 | line = problem_mark.line 46 | column = problem_mark.column 47 | msg = "duplicate key found on line {}, column {}".format( 48 | line + 1, column + 1) 49 | super(DuplicateKeyError, self).__init__(msg, []) 50 | 51 | 52 | class ObsoleteError(ValidationError): 53 | pass 54 | 55 | 56 | class ObsoleteParameterError(ObsoleteError): 57 | def __init__(self): 58 | msg = """Detected old syntax. See details for upgrade instructions to new syntax: 59 | 60 | [old syntax] 61 | 62 | parameters: 63 | - name: abc 64 | type: str 65 | 66 | [new syntax] 67 | 68 | parameters: 69 | abc: 70 | type: str 71 | """ 72 | super(ObsoleteParameterError, self).__init__(msg, []) 73 | 74 | 75 | class ObsoleteDataSourceError(ObsoleteError): 76 | def __init__(self): 77 | msg = """Detected old syntax. See details for upgrade instructions to new syntax: 78 | 79 | [old syntax] 80 | 81 | sources: 82 | - name: abc 83 | driver: csv 84 | 85 | [new syntax] 86 | 87 | sources: 88 | abc: 89 | driver: csv 90 | """ 91 | super(ObsoleteDataSourceError, self).__init__(msg, []) 92 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/config.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | import os 20 | 21 | # External imports 22 | import yaml 23 | 24 | # Intake imports 25 | from intake.cli.util import Subcommand 26 | #----------------------------------------------------------------------------- 27 | # API 28 | #----------------------------------------------------------------------------- 29 | 30 | class Config(Subcommand): 31 | ''' Configuration functions 32 | 33 | ''' 34 | 35 | name = "config" 36 | 37 | def initialize(self): 38 | sub_parser = self.parser.add_subparsers() 39 | 40 | list = sub_parser.add_parser('list-defaults', help='Show all builtin defaults') 41 | list.set_defaults(invoke=self._list_defaults) 42 | 43 | conf_reset = sub_parser.add_parser('reset', help='Set config file to defaults') 44 | conf_reset.set_defaults(invoke=self._reset) 45 | 46 | conf_info = sub_parser.add_parser('info', help='Show config settings') 47 | conf_info.set_defaults(invoke=self._info) 48 | 49 | conf_get = sub_parser.add_parser('get', help='Get current config, specific key or all') 50 | conf_get.add_argument('key', type=str, help='Key in config dictionary', nargs='?') 51 | conf_get.set_defaults(invoke=self._get) 52 | 53 | def invoke(self, args): 54 | self.parser.print_help() 55 | 56 | def _get(self, args): 57 | from intake.config import conf 58 | if args.key: 59 | print(conf[args.key]) 60 | else: 61 | print(yaml.dump(conf, default_flow_style=False)) 62 | 63 | def _info(self, args): 64 | from intake.config import cfile 65 | if 'INTAKE_CONF_DIR' in os.environ: 66 | print('INTAKE_CONF_DIR: ', os.environ['INTAKE_CONF_DIR']) 67 | if 'INTAKE_CONF_FILE' in os.environ: 68 | print('INTAKE_CONF_FILE: ', os.environ['INTAKE_CONF_FILE']) 69 | ex = "" if os.path.isfile(cfile()) else "(does not exist)" 70 | print('Using: ', cfile(), ex) 71 | 72 | def _list_defaults(self, args): 73 | from intake.config import defaults 74 | print(yaml.dump(defaults, default_flow_style=False)) 75 | 76 | def _reset(self, args): 77 | from intake.config import reset_conf, save_conf 78 | reset_conf() 79 | save_conf() 80 | -------------------------------------------------------------------------------- /intake/cli/client/subcommands/cache.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import os 9 | import yaml 10 | from intake.cli.util import Subcommand 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | class Cache(Subcommand): 16 | """ Locally cached files 17 | 18 | """ 19 | 20 | name = "cache" 21 | 22 | def initialize(self): 23 | sub_parser = self.parser.add_subparsers() 24 | 25 | cache_list = sub_parser.add_parser('list-keys', help='List keys currently stored') 26 | cache_list.set_defaults(invoke=self._list_keys) 27 | 28 | cache_files = sub_parser.add_parser('list-files', help='List files for a give key') 29 | cache_files.add_argument('key', type=str, help='Key to list files for') 30 | cache_files.set_defaults(invoke=self._list_files) 31 | 32 | cache_rm = sub_parser.add_parser('clear', help='Clear a key from the cache') 33 | cache_rm.add_argument('key', type=str, help='Key to remove (all, if omitted)', nargs='?') 34 | cache_rm.set_defaults(invoke=self._clear) 35 | 36 | cache_du = sub_parser.add_parser('usage', help='Print usage information') 37 | cache_du.set_defaults(invoke=self._usage) 38 | 39 | def invoke(self, args): 40 | self.parser.print_help() 41 | 42 | def _clear(self, args): 43 | from intake.source.cache import BaseCache 44 | if args.key is None: 45 | BaseCache(None, None).clear_all() 46 | else: 47 | BaseCache(None, None).clear_cache(args.key) 48 | 49 | def _list_keys(self, args): 50 | from intake.source.cache import CacheMetadata 51 | md = CacheMetadata() 52 | print(yaml.dump(list(md), default_flow_style=False)) 53 | 54 | def _list_files(self, args): 55 | from intake.source.cache import CacheMetadata 56 | md = CacheMetadata() 57 | print(yaml.dump(md[args.key], default_flow_style=False)) 58 | 59 | def _usage(self, args): 60 | from intake.config import conf 61 | total_size = 0 62 | for dirpath, dirnames, filenames in os.walk( 63 | os.path.join(conf['cache_dir'], 'cache')): 64 | for f in filenames: 65 | fp = os.path.join(dirpath, f) 66 | total_size += os.path.getsize(fp) 67 | for unit in ['', 'k', 'M', 'G', 'T', 'P', 'E', 'Z']: 68 | # "human" 69 | # https://gist.github.com/cbwar/d2dfbc19b140bd599daccbe0fe925597 70 | if abs(total_size) < 1024.0: 71 | s = "%3.1f %s" % (total_size, unit) 72 | break 73 | total_size /= 1024.0 74 | print("%s: %s" % (conf['cache_dir'], s)) 75 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #----------------------------------------------------------------------------- 3 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 4 | # All rights reserved. 5 | # 6 | # The full license is in the LICENSE file, distributed with this software. 7 | #----------------------------------------------------------------------------- 8 | 9 | from setuptools import setup, find_packages 10 | import sys 11 | import versioneer 12 | 13 | requires = [line.strip() for line in open('requirements.txt').readlines() 14 | if not line.startswith("#")] 15 | extras_require = { 16 | 'server': ['tornado', 'python-snappy'], 17 | 'plot': ['hvplot', 'panel >= 0.7.0', 'bokeh < 2.0'], 18 | 'dataframe': ['dask[dataframe]', 'msgpack-numpy', 'pyarrow'], 19 | } 20 | extras_require['complete'] = sorted(set(sum(extras_require.values(), []))) 21 | 22 | # Only include pytest-runner in setup_requires if we're invoking tests 23 | if {'pytest', 'test', 'ptr'}.intersection(sys.argv): 24 | setup_requires = ['pytest-runner'] 25 | else: 26 | setup_requires = [] 27 | 28 | setup( 29 | name='intake', 30 | version=versioneer.get_version(), 31 | cmdclass=versioneer.get_cmdclass(), 32 | description='Data load and catalog system', 33 | url='https://github.com/intake/intake', 34 | maintainer='Martin Durant', 35 | maintainer_email='mdurant@anaconda.com', 36 | license='BSD', 37 | package_data={'': ['*.csv', '*.yml', '*.yaml', '*.html']}, 38 | include_package_data=True, 39 | install_requires=requires, 40 | packages=find_packages(), 41 | entry_points={ 42 | 'console_scripts': [ 43 | 'intake-server = intake.cli.server.__main__:main', 44 | 'intake = intake.cli.client.__main__:main' 45 | ], 46 | 'intake.drivers': [ 47 | 'yaml_file_cat = intake.catalog.local:YAMLFileCatalog', 48 | 'yaml_files_cat = intake.catalog.local:YAMLFilesCatalog', 49 | 'csv = intake.source.csv:CSVSource', 50 | 'textfiles = intake.source.textfiles:TextFilesSource', 51 | 'catalog = intake.catalog.base:Catalog', 52 | 'intake_remote = intake.catalog.remote:RemoteCatalog', 53 | 'numpy = intake.source.npy:NPySource', 54 | 'ndzarr = intake.source.zarr:ZarrArraySource', 55 | 'zarr_cat = intake.catalog.zarr:ZarrGroupCatalog', 56 | 'alias = intake.source.derived:AliasSource', 57 | ] 58 | }, 59 | classifiers=[ 60 | "Programming Language :: Python :: 3", 61 | "Programming Language :: Python :: 3.6", 62 | "Programming Language :: Python :: 3.7", 63 | "Programming Language :: Python :: 3.8", 64 | "Programming Language :: Python :: 3.9", 65 | ], 66 | python_requires=">=3.6", 67 | long_description=open('README.md').read(), 68 | long_description_content_type="text/markdown", 69 | tests_require=['pytest'], 70 | extras_require=extras_require, 71 | zip_safe=False, 72 | ) 73 | -------------------------------------------------------------------------------- /intake/source/zarr.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | from .base import DataSource, Schema 9 | 10 | 11 | class ZarrArraySource(DataSource): 12 | """Read Zarr format files into an array 13 | 14 | Zarr is an numerical array storage format which works particularly well 15 | with remote and parallel access. 16 | For specifics of the format, see https://zarr.readthedocs.io/en/stable/ 17 | """ 18 | container = 'ndarray' 19 | name = 'ndzarr' 20 | version = '0.0.1' 21 | partition_access = True 22 | 23 | def __init__(self, urlpath, storage_options=None, component=None, 24 | metadata=None, **kwargs): 25 | """ 26 | The parameters dtype and shape will be determined from the first 27 | file, if not given. 28 | 29 | Parameters 30 | ---------- 31 | urlpath : str 32 | Location of data file(s), possibly including protocol 33 | information 34 | storage_options : dict 35 | Passed on to storage backend for remote files 36 | component : str or None 37 | If None, assume the URL points to an array. If given, assume 38 | the URL points to a group, and descend the group to find the 39 | array at this location in the hierarchy. 40 | kwargs : passed on to dask.array.from_zarr 41 | """ 42 | self.urlpath = urlpath 43 | self.storage_options = storage_options or {} 44 | self.component = component 45 | self.kwargs = kwargs 46 | self.chunks = None 47 | self._arr = None 48 | super(ZarrArraySource, self).__init__(metadata=metadata) 49 | 50 | def _get_schema(self): 51 | import dask.array as da 52 | if self._arr is None: 53 | self._arr = da.from_zarr(self.urlpath, component=self.component, 54 | storage_options=self.storage_options, 55 | **self.kwargs) 56 | self.chunks = self._arr.chunks 57 | self.shape = self._arr.shape 58 | self.dtype = self._arr.dtype 59 | self.npartitions = self._arr.npartitions 60 | return Schema(dtype=str(self.dtype), shape=self.shape, 61 | extra_metadata=self.metadata, 62 | npartitions=self.npartitions, 63 | chunks=self.chunks) 64 | 65 | def _get_partition(self, i): 66 | if isinstance(i, list): 67 | i = tuple(i) 68 | return self._arr.blocks[i].compute() 69 | 70 | def read_partition(self, i): 71 | self._get_schema() 72 | return self._get_partition(i) 73 | 74 | def to_dask(self): 75 | self._get_schema() 76 | return self._arr 77 | 78 | def read(self): 79 | self._get_schema() 80 | return self._arr.compute() 81 | 82 | def _close(self): 83 | self._arr = None 84 | self._mapper = None 85 | -------------------------------------------------------------------------------- /intake/catalog/tests/catalog_caching.yml: -------------------------------------------------------------------------------- 1 | metadata: 2 | test: true 3 | plugins: 4 | source: 5 | - module: intake.catalog.tests.example1_source 6 | - module: intake.catalog.tests.example2_source 7 | sources: 8 | test_cache: 9 | description: cache a csv file from the local filesystem 10 | driver: csv 11 | cache: 12 | - argkey: urlpath 13 | regex: '{{ CATALOG_DIR }}/cache_data' 14 | type: file 15 | args: 16 | urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv' 17 | test_cache_new: 18 | description: cache a csv file from the local filesystem 19 | driver: csv 20 | args: 21 | urlpath: 'filecache://{{ CATALOG_DIR }}/cache_data/states.csv' 22 | storage_options: 23 | target_protocol: 'file' 24 | cache_storage: "{{env(TEST_CACHE_DIR)}}" 25 | 26 | test_multiple_cache: 27 | description: testing what happens when there are multiple cache specs 28 | driver: csv 29 | cache: 30 | - argkey: urlpath 31 | regex: '{{ CATALOG_DIR }}/cache_data' 32 | type: file 33 | - argkey: urlpath 34 | regex: '{{ CATALOG_DIR }}' 35 | type: file 36 | args: 37 | urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv' 38 | test_list_cache: 39 | description: testing what happens when there are multiple cache specs 40 | driver: csv 41 | cache: 42 | - argkey: urlpath 43 | regex: '{{ CATALOG_DIR }}cache_data' 44 | type: file 45 | args: 46 | urlpath: ['{{ CATALOG_DIR }}cache_data/states.csv', '{{ CATALOG_DIR }}cache_data/states.csv'] 47 | test_bad_type_cache_spec: 48 | description: cache a csv file from the local filesystem 49 | driver: csv 50 | cache: 51 | - argkey: urlpath 52 | regex: '{{ CATALOG_DIR }}/cache_data' 53 | type: noidea 54 | args: 55 | urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv' 56 | text_cache: 57 | description: textfiles in this dir 58 | driver: textfiles 59 | cache: 60 | - argkey: urlpath 61 | regex: '{{ CATALOG_DIR }}' 62 | type: file 63 | args: 64 | urlpath: "{{ CATALOG_DIR }}/*.yml" 65 | arr_cache: 66 | description: small array 67 | driver: numpy 68 | cache: 69 | - argkey: path 70 | regex: '{{ CATALOG_DIR }}' 71 | type: file 72 | args: 73 | path: "{{ CATALOG_DIR }}/small.npy" 74 | chunks: 5 75 | test_no_regex: 76 | description: cache a csv file from the local filesystem 77 | driver: csv 78 | cache: 79 | - argkey: urlpath 80 | type: file 81 | args: 82 | urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv' 83 | test_regex_no_match: 84 | description: regex does not match urlpath 85 | driver: csv 86 | cache: 87 | - argkey: urlpath 88 | regex: 'xxx' 89 | type: file 90 | args: 91 | urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv' 92 | test_regex_partial_match: 93 | description: regex matches some part of the url 94 | driver: csv 95 | cache: 96 | - argkey: urlpath 97 | regex: '_data' 98 | type: file 99 | args: 100 | urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv' 101 | -------------------------------------------------------------------------------- /intake/source/__init__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import entrypoints 9 | import logging 10 | 11 | from ..utils import DriverRegistryView 12 | from .base import DataSource 13 | 14 | logger = logging.getLogger('intake') 15 | 16 | 17 | class DriverRegistry(dict): 18 | """Dict of driver: DataSource class 19 | 20 | If the value object is a EntryPoint, will load it when accesses, which 21 | does the import. 22 | """ 23 | 24 | def __getitem__(self, item): 25 | if isinstance(super().__getitem__(item), entrypoints.EntryPoint): 26 | self[item] = super().__getitem__(item).load() 27 | return super().__getitem__(item) 28 | 29 | 30 | _registry = DriverRegistry() # internal mutable registry 31 | registry = DriverRegistryView(_registry) # public, read-ony wrapper 32 | 33 | 34 | def register_driver(name, driver, overwrite=False): 35 | """ 36 | Add a driver to intake.registry. 37 | 38 | Parameters 39 | ---------- 40 | name: string 41 | driver: DataSource 42 | overwrite: bool, optional 43 | False by default. 44 | 45 | Raises 46 | ------ 47 | ValueError 48 | If name collides with an existing name in the registry and overwrite is 49 | False. 50 | """ 51 | if name in _registry and not overwrite: 52 | # If we are re-registering the same object, there is no problem. 53 | original = _registry[name] 54 | if original is driver: 55 | return 56 | raise ValueError( 57 | f"The driver {driver} could not be registered for the " 58 | f"name {name} because {_registry[name]} is already " 59 | f"registered for that name. Use overwrite=True to force it.") 60 | _registry[name] = driver 61 | 62 | 63 | def unregister_driver(name): 64 | """ 65 | Ensure that a given name in the registry is cleared. 66 | 67 | This function is idempotent: if the name does not exist, nothing is done, 68 | and the function returns None 69 | 70 | Parameters 71 | ---------- 72 | name: string 73 | 74 | Returns 75 | ------- 76 | driver: DataSource or None 77 | Whatever was registered for ``name``, or ``None`` 78 | """ 79 | return _registry.pop(name, None) 80 | 81 | 82 | # A set of fully-qualified package.module.Class mappings 83 | classes = {} 84 | 85 | 86 | def import_name(name): 87 | import importlib 88 | mod, cls = name.rsplit('.', 1) 89 | module = importlib.import_module(mod) 90 | return getattr(module, cls) 91 | 92 | 93 | def get_plugin_class(name): 94 | if name in registry: 95 | return registry[name] 96 | if '.' not in name: 97 | logger.debug('Plugin name "%s" not known' % name) 98 | return None 99 | if name not in classes: 100 | try: 101 | classes[name] = import_name(name) 102 | except (KeyError, NameError, ImportError): 103 | logger.debug('Failed to import "%s"' % name) 104 | return classes.get(name, None) 105 | -------------------------------------------------------------------------------- /intake/cli/server/__main__.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import argparse 8 | import logging 9 | import signal 10 | import sys 11 | 12 | import tornado.ioloop 13 | import tornado.web 14 | 15 | from .server import IntakeServer 16 | logger = logging.getLogger('intake') 17 | 18 | 19 | def call_exit_on_sigterm(signal, frame): 20 | sys.exit(0) 21 | 22 | 23 | def main(argv=None): 24 | from intake.config import conf 25 | from intake import open_catalog 26 | 27 | if argv is None: 28 | argv = sys.argv 29 | 30 | parser = argparse.ArgumentParser(description='Intake Catalog Server') 31 | parser.add_argument('-p', '--port', type=int, default=conf['port'], 32 | help='port number for server to listen on') 33 | parser.add_argument('--list-entries', action='store_true', 34 | help='list catalog entries at startup') 35 | parser.add_argument('--sys-exit-on-sigterm', action='store_true', 36 | help='internal flag used during unit testing to ensure ' 37 | '.coverage file is written') 38 | parser.add_argument('catalog_args', metavar='FILE', type=str, nargs='+', 39 | help='Name of catalog YAML file') 40 | parser.add_argument('--flatten', dest='flatten', action='store_true') 41 | parser.add_argument('--no-flatten', dest='flatten', action='store_false') 42 | parser.add_argument('--ttl', dest='ttl',type=int, default=60) 43 | parser.add_argument('-a', '--address', type=str, 44 | default=conf.get('address', 'localhost'), 45 | help='address to use as a host, defaults to the address ' 46 | 'in the configuration file, if provided otherwise localhost') 47 | parser.set_defaults(flatten=True) 48 | args = parser.parse_args(argv[1:]) 49 | 50 | if args.sys_exit_on_sigterm: 51 | signal.signal(signal.SIGTERM, call_exit_on_sigterm) 52 | 53 | logger.info('Creating catalog from:') 54 | for arg in args.catalog_args: 55 | logger.info(' - %s' % arg) 56 | 57 | catargs = args.catalog_args 58 | ttl = args.ttl 59 | 60 | if len(catargs) == 1: 61 | catalog = open_catalog(catargs[0], ttl=ttl) 62 | logger.info("catalog_args: %s" % catargs[0]) 63 | else: 64 | catalog = open_catalog(catargs, flatten=args.flatten, ttl=ttl) 65 | logger.info("catalog_args: %s" % catargs) 66 | if args.list_entries: 67 | # This is not a good idea if the Catalog is huge. 68 | logger.info('Entries:' + ','.join(list(catalog))) 69 | 70 | logger.info('Listening on %s:%d' % (args.address, args.port)) 71 | 72 | server = IntakeServer(catalog) 73 | app = server.make_app() 74 | server.start_periodic_functions(close_idle_after=3600.0) 75 | 76 | app.listen(args.port, address=args.address) 77 | try: 78 | tornado.ioloop.IOLoop.current().start() 79 | except KeyboardInterrupt: 80 | logger.critical("Exiting") 81 | except Exception as e: 82 | logger.critical("Exiting due to %s" % e) 83 | 84 | 85 | if __name__ == "__main__": 86 | sys.exit(main(sys.argv)) 87 | -------------------------------------------------------------------------------- /intake/cli/client/tests/test_cache.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import intake 9 | import intake.config 10 | from intake.source.cache import CacheMetadata 11 | import os 12 | import pytest 13 | import subprocess 14 | import sys 15 | from intake.utils import make_path_posix 16 | cpath = make_path_posix( 17 | os.path.abspath( 18 | os.path.join(os.path.dirname(__file__), 19 | '..', '..', '..', 20 | 'catalog', 'tests', 'catalog_caching.yml'))) 21 | 22 | 23 | @pytest.mark.skipif(sys.version_info[0] == 2, 24 | reason="Py2 exists early on argparse") 25 | def test_help(temp_cache, env): 26 | out = subprocess.check_output(['intake', 'cache'], 27 | env=env, universal_newlines=True) 28 | assert out.startswith('usage: ') 29 | 30 | out2 = subprocess.check_output(['intake', 'cache', '-h'], 31 | env=env, universal_newlines=True) 32 | assert out2 == out 33 | 34 | 35 | def test_list_keys(temp_cache, env): 36 | out = subprocess.check_output(['intake', 'cache', 'list-keys'], 37 | env=env, universal_newlines=True) 38 | assert out.startswith('[]') # empty cache 39 | cat = intake.open_catalog(cpath) 40 | cat.test_cache.read() 41 | out = subprocess.check_output(['intake', 'cache', 'list-keys'], 42 | env=env, universal_newlines=True) 43 | assert 'states.csv' in out 44 | 45 | 46 | def test_precache(temp_cache, env): 47 | out = subprocess.check_output(['intake', 'cache', 'list-keys'], 48 | env=env, universal_newlines=True) 49 | assert out.startswith('[]') # empty cache 50 | out = subprocess.check_output(['intake', 'precache', cpath], 51 | env=env, universal_newlines=True) 52 | assert out.count('Caching for entry') > 1 53 | out = subprocess.check_output(['intake', 'cache', 'list-keys'], 54 | env=env, universal_newlines=True) 55 | assert 'states.csv' in out 56 | assert 'small.npy' in out 57 | 58 | 59 | def test_clear_all(temp_cache, env): 60 | cat = intake.open_catalog(cpath) 61 | cat.test_cache.read() 62 | md = CacheMetadata() 63 | assert len(md) == 1 64 | assert 'states' in list(md)[0] 65 | subprocess.call(['intake', 'cache', 'clear'], env=env) 66 | md = CacheMetadata() 67 | assert len(md) == 0 68 | 69 | 70 | def test_clear_one(temp_cache, env): 71 | cat = intake.open_catalog(cpath) 72 | cat.test_cache.read() 73 | cat.arr_cache.read() 74 | md = CacheMetadata() 75 | keys = list(md) 76 | assert len(keys) == 2 77 | subprocess.call(['intake', 'cache', 'clear', keys[0]], 78 | env=env) 79 | md = CacheMetadata() 80 | assert len(md) == 1 81 | assert list(md)[0] == keys[1] 82 | 83 | 84 | def test_usage(temp_cache, env): 85 | from intake.source.cache import BaseCache 86 | BaseCache(None, None).clear_all() 87 | out = subprocess.check_output(['intake', 'cache', 'usage'], 88 | env=env, universal_newlines=True) 89 | assert '0.0' in out # empty! 90 | -------------------------------------------------------------------------------- /docs/source/community.rst: -------------------------------------------------------------------------------- 1 | Community 2 | ========= 3 | 4 | Intake is used and developed by individuals at a variety of institutions. It 5 | is open source (`license `_) 6 | and sits within the broader Python numeric ecosystem commonly referred to as 7 | PyData or SciPy. 8 | 9 | Discussion 10 | ---------- 11 | 12 | Conversation happens in the following places: 13 | 14 | 1. **Usage questions** are directed to `Stack Overflow with the #intake tag`_. 15 | Intake developers monitor this tag. 16 | 2. **Bug reports and feature requests** are managed on the `GitHub issue 17 | tracker`_. Individual intake plugins are managed in separate repositories 18 | each with its own issue tracker. Please consult the :doc:`plugin-directory` 19 | for a list of available plugins. 20 | 3. **Chat** occurs on at `gitter.im/ContinuumIO/intake 21 | `_. Note that 22 | because gitter chat is not searchable by future users we discourage usage 23 | questions and bug reports on gitter and instead ask people to use Stack 24 | Overflow or GitHub. 25 | 4. **Monthly developer meeting** happens the first Thursday of the month at 26 | 9:00 US Central Time in `this video meeting `_. 27 | Meeting notes are available at 28 | https://docs.google.com/document/d/1GVcqubeTGICQzPGq_BZxn2Bdtf31lnvcrRxMY3GPCPs/edit 29 | 30 | .. raw:: html 31 | 32 | 33 | 34 | You can subscribe to this calendar to be notified of changes: 35 | 36 | * `Google Calendar `__ 37 | * `iCal `__ 38 | 39 | .. _`Stack Overflow with the #intake tag`: https://stackoverflow.com/questions/tagged/intake 40 | .. _`GitHub issue tracker`: https://github.com/intake/intake/issues/ 41 | 42 | 43 | Asking for help 44 | --------------- 45 | 46 | We welcome usage questions and bug reports from all users, even those who are 47 | new to using the project. There are a few things you can do to improve the 48 | likelihood of quickly getting a good answer. 49 | 50 | 1. **Ask questions in the right place**: We strongly prefer the use 51 | of Stack Overflow or GitHub issues over Gitter chat. GitHub and 52 | Stack Overflow are more easily searchable by future users, and therefore is more 53 | efficient for everyone's time. Gitter chat is strictly reserved for 54 | developer and community discussion. 55 | 56 | If you have a general question about how something should work or 57 | want best practices then use Stack Overflow. If you think you have found a 58 | bug then use GitHub 59 | 60 | 2. **Ask only in one place**: Please restrict yourself to posting your 61 | question in only one place (likely Stack Overflow or GitHub) and don't post 62 | in both 63 | 64 | 3. **Create a minimal example**: It is ideal to create `minimal, complete, 65 | verifiable examples `_. This 66 | significantly reduces the time that answerers spend understanding your 67 | situation, resulting in higher quality answers more quickly. 68 | -------------------------------------------------------------------------------- /intake/catalog/default.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import appdirs 9 | import json 10 | import os 11 | import subprocess 12 | import sys 13 | 14 | from intake.config import conf 15 | from intake.utils import make_path_posix 16 | from .local import YAMLFilesCatalog, Catalog 17 | 18 | 19 | def load_user_catalog(): 20 | """Return a catalog for the platform-specific user Intake directory""" 21 | cat_dir = user_data_dir() 22 | if not os.path.isdir(cat_dir): 23 | return Catalog() 24 | else: 25 | return YAMLFilesCatalog(cat_dir) 26 | 27 | 28 | def user_data_dir(): 29 | """Return the user Intake catalog directory""" 30 | return appdirs.user_data_dir(appname='intake', appauthor='intake') 31 | 32 | 33 | def load_global_catalog(): 34 | """Return a catalog for the environment-specific Intake directory""" 35 | cat_dir = global_data_dir() 36 | if not os.path.isdir(cat_dir): 37 | return Catalog() 38 | else: 39 | return YAMLFilesCatalog(cat_dir) 40 | 41 | 42 | CONDA_VAR = 'CONDA_PREFIX' 43 | VIRTUALENV_VAR = 'VIRTUAL_ENV' 44 | 45 | 46 | def conda_prefix(): 47 | """Fallback: ask conda in PATH for its prefix""" 48 | try: 49 | out = subprocess.check_output(['conda', 'info', '--json']) 50 | return json.loads(out.decode())["default_prefix"] 51 | except (subprocess.CalledProcessError, json.JSONDecodeError, OSError): 52 | return False 53 | 54 | 55 | def which(program): 56 | """Emulate posix ``which``""" 57 | import distutils.spawn 58 | return distutils.spawn.find_executable(program) 59 | 60 | 61 | def global_data_dir(): 62 | """Return the global Intake catalog dir for the current environment""" 63 | prefix = False 64 | if VIRTUALENV_VAR in os.environ: 65 | prefix = os.environ[VIRTUALENV_VAR] 66 | elif CONDA_VAR in os.environ: 67 | prefix = sys.prefix 68 | elif which('conda'): 69 | # conda exists but is not activated 70 | prefix = conda_prefix() 71 | 72 | if prefix: 73 | # conda and virtualenv use Linux-style directory pattern 74 | return make_path_posix(os.path.join(prefix, 'share', 'intake')) 75 | else: 76 | return appdirs.site_data_dir(appname='intake', appauthor='intake') 77 | 78 | 79 | def load_combo_catalog(): 80 | """Load a union of the user and global catalogs for convenience""" 81 | user_dir = user_data_dir() 82 | global_dir = global_data_dir() 83 | desc = 'Generated from data packages found on your intake search path' 84 | cat_dirs = [] 85 | if os.path.isdir(user_dir): 86 | cat_dirs.append(user_dir + '/*.yaml') 87 | cat_dirs.append(user_dir + '/*.yml') 88 | if os.path.isdir(global_dir): 89 | cat_dirs.append(global_dir + '/*.yaml') 90 | cat_dirs.append(global_dir + '/*.yml') 91 | for path_dir in conf.get('catalog_path', []): 92 | if path_dir != '': 93 | if not path_dir.endswith(('yaml', 'yml')): 94 | cat_dirs.append(path_dir + '/*.yaml') 95 | cat_dirs.append(path_dir + '/*.yml') 96 | else: 97 | cat_dirs.append(path_dir) 98 | 99 | return YAMLFilesCatalog(cat_dirs, name='builtin', description=desc) 100 | -------------------------------------------------------------------------------- /intake/catalog/zarr.py: -------------------------------------------------------------------------------- 1 | from .base import Catalog 2 | from .local import LocalCatalogEntry 3 | from ..source import register_driver 4 | 5 | 6 | class ZarrGroupCatalog(Catalog): 7 | """A catalog of the members of a Zarr group.""" 8 | 9 | version = '0.0.1' 10 | container = 'catalog' 11 | partition_access = None 12 | name = 'zarr_cat' 13 | 14 | def __init__(self, urlpath, storage_options=None, component=None, metadata=None, 15 | consolidated=False, name=None): 16 | """ 17 | 18 | Parameters 19 | ---------- 20 | urlpath : str 21 | Location of data file(s), possibly including protocol information 22 | storage_options : dict, optional 23 | Passed on to storage backend for remote files 24 | component : str, optional 25 | If None, build a catalog from the root group. If given, build the 26 | catalog from the group at this location in the hierarchy. 27 | metadata : dict, optional 28 | Catalog metadata. If not provided, will be populated from Zarr 29 | group attributes. 30 | consolidated : bool, optional 31 | If True, assume Zarr metadata has been consolidated. 32 | """ 33 | self._urlpath = urlpath 34 | self._storage_options = storage_options or {} 35 | self._component = component 36 | self._consolidated = consolidated 37 | self._grp = None 38 | self.name = name 39 | super().__init__(metadata=metadata) 40 | 41 | def _load(self): 42 | import zarr 43 | 44 | if self._grp is None: 45 | 46 | # obtain the zarr root group 47 | if isinstance(self._urlpath, zarr.hierarchy.Group): 48 | # use already-opened group, allows support for nested groups 49 | # as catalogs 50 | root = self._urlpath 51 | 52 | else: 53 | 54 | # obtain store 55 | if isinstance(self._urlpath, str): 56 | # open store from url 57 | from fsspec import get_mapper 58 | store = get_mapper(self._urlpath, **self._storage_options) 59 | else: 60 | # assume store passed directly 61 | store = self._urlpath 62 | 63 | # open root group 64 | if self._consolidated: 65 | # use consolidated metadata 66 | root = zarr.open_consolidated(store=store, mode='r') 67 | else: 68 | root = zarr.open_group(store=store, mode='r') 69 | 70 | # deal with component path 71 | if self._component is None: 72 | self._grp = root 73 | else: 74 | self._grp = root[self._component] 75 | 76 | # use zarr attributes as metadata 77 | self.metadata.update(self._grp.attrs.asdict()) 78 | 79 | # build catalog entries 80 | entries = {} 81 | for k, v in self._grp.items(): 82 | if isinstance(v, zarr.core.Array): 83 | entry = LocalCatalogEntry(name=k, 84 | description='', 85 | driver='ndzarr', 86 | args=dict(urlpath=v), 87 | catalog=self) 88 | else: 89 | entry = LocalCatalogEntry(name=k, 90 | description='', 91 | driver='zarr_cat', 92 | args=dict(urlpath=v)) 93 | entries[k] = entry 94 | self._entries = entries 95 | 96 | def to_zarr(self): 97 | return self._grp 98 | -------------------------------------------------------------------------------- /intake/cli/util.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | ''' Provide a ``main`` function to run intake commands. 8 | 9 | ''' 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | #----------------------------------------------------------------------------- 15 | # Imports 16 | #----------------------------------------------------------------------------- 17 | 18 | # Standard library imports 19 | import sys 20 | 21 | # External imports 22 | 23 | # Intake imports 24 | 25 | 26 | #----------------------------------------------------------------------------- 27 | # API 28 | #----------------------------------------------------------------------------- 29 | 30 | def die(message, status=1): 31 | ''' Print an error message and exit. 32 | This function will call ``sys.exit`` with the given ``status`` and the 33 | process will terminate. 34 | 35 | Args: 36 | message (str) : error message to print 37 | 38 | status (int) : the exit status to pass to ``sys.exit`` 39 | 40 | ''' 41 | print(message, file=sys.stderr) 42 | sys.exit(status) 43 | 44 | def nice_join(seq, sep=", ", conjunction="or"): 45 | ''' Join together sequences of strings into English-friendly phrases using 46 | a conjunction when appropriate. 47 | 48 | Args: 49 | seq (seq[str]) : a sequence of strings to nicely join 50 | 51 | sep (str, optional) : a sequence delimiter to use (default: ", ") 52 | 53 | conjunction (str or None, optional) : a conjunction to use for the last 54 | two items, or None to reproduce basic join behavior (default: "or") 55 | 56 | Returns: 57 | a joined string 58 | 59 | Examples: 60 | >>> nice_join(["a", "b", "c"]) 61 | 'a, b or c' 62 | 63 | ''' 64 | seq = [str(x) for x in seq] 65 | 66 | if len(seq) <= 1 or conjunction is None: 67 | return sep.join(seq) 68 | else: 69 | return "%s %s %s" % (sep.join(seq[:-1]), conjunction, seq[-1]) 70 | 71 | def print_entry_info(catalog, name): 72 | ''' 73 | 74 | ''' 75 | info = catalog[name].describe() 76 | for key in sorted(info.keys()): 77 | print("[{}] {}={}".format(name, key, info[key])) 78 | 79 | class Subcommand(object): 80 | ''' Abstract base class for subcommands 81 | 82 | Subclasses should define a class variable ``name`` that will be used as the 83 | subparser name, and a docstring, that will be used as the subparser help. 84 | After initialization, the parser for this comman will be avaialble as 85 | ``self.parser``. 86 | 87 | Subclasses must also implement: 88 | 89 | * an ``initialize(self)`` method that configures ``self.parser`` 90 | 91 | * an ``invoke(self, args)`` method that accepts a set of argparse 92 | processed arguments as input. 93 | 94 | ''' 95 | 96 | def __init__(self, parser): 97 | ''' Configure a parser for this command. 98 | 99 | ''' 100 | self.parser = parser 101 | self.initialize() 102 | 103 | def initialize(self): 104 | ''' Implement in subclasses to configure self.parser with any arguments 105 | or additional sub-parsers. 106 | 107 | ''' 108 | raise NotImplementedError("Subclasses must implement initialize()") 109 | 110 | def invoke(self, args): 111 | ''' Implement in subclasses to perform the actual work of the command 112 | 113 | ''' 114 | raise NotImplementedError("Subclasses must implement invoke()") 115 | -------------------------------------------------------------------------------- /intake/interface/catalog/tests/test_catalog_add.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | import os 8 | import pytest 9 | pn = pytest.importorskip('panel') 10 | 11 | def callback(args): 12 | """Raises an error if called""" 13 | raise ValueError('Callback provided:', args) 14 | 15 | 16 | @pytest.fixture 17 | def file_selector(): 18 | from ..add import FileSelector 19 | return FileSelector() 20 | 21 | 22 | @pytest.fixture 23 | def url_selector(): 24 | from ..add import URLSelector 25 | return URLSelector() 26 | 27 | 28 | @pytest.fixture 29 | def cat_adder(): 30 | from ..add import CatAdder 31 | return CatAdder() 32 | 33 | 34 | def test_file_selector(file_selector): 35 | assert file_selector.path == os.getcwd() + '/' 36 | 37 | 38 | def test_file_selector_raises_error_if_no_file_selected(file_selector): 39 | with pytest.raises(IndexError, match='list index out of range'): 40 | file_selector.url 41 | 42 | 43 | def test_file_selector_edit_path(file_selector): 44 | expected = os.getcwd() 45 | file_selector.move_up() 46 | file_selector.path_text.value = os.getcwd() 47 | assert file_selector.validator.object is None 48 | assert file_selector.path == expected 49 | 50 | 51 | def test_file_selector_go_home(file_selector): 52 | expected = os.getcwd() + '/' 53 | assert file_selector.path == expected 54 | file_selector.move_up() 55 | file_selector.go_home() 56 | assert file_selector.path == expected 57 | 58 | 59 | def test_file_selector_move_up(file_selector): 60 | assert file_selector.path == os.getcwd() + '/' 61 | file_selector.move_up() 62 | expected = os.path.abspath('..') 63 | assert file_selector.path == expected 64 | 65 | 66 | def test_file_selector_move_down(file_selector): 67 | expected = os.getcwd() + '/' 68 | dirname = expected.split('/')[-2] + '/' 69 | 70 | # move up so that we know we will be able to move down into 71 | # intial dir 72 | file_selector.move_up() 73 | 74 | # setting the value on main widget will trigger move down 75 | file_selector.main.value = [dirname] 76 | assert file_selector.path == expected 77 | 78 | # should empty the selection on main 79 | assert file_selector.main.value == [] 80 | 81 | def test_url_selector(url_selector): 82 | assert url_selector.url == '' 83 | assert url_selector.visible 84 | assert len(url_selector.panel.objects) == 2 85 | 86 | def test_url_selector_set_visible_to_false(url_selector): 87 | url_selector.visible = False 88 | assert url_selector.visible is False 89 | assert len(url_selector.panel.objects) == 0 90 | 91 | 92 | def test_cat_adder(cat_adder): 93 | assert cat_adder.visible is True 94 | assert cat_adder.tabs.active == 0 95 | assert cat_adder.widget.disabled is True 96 | assert len(cat_adder.panel.objects) == 2 97 | 98 | cat_adder.tabs.active = 1 99 | assert cat_adder.cat_url[0] == '' 100 | assert cat_adder.cat.name is None 101 | assert cat_adder.widget.disabled is False 102 | 103 | cat_adder.done_callback = callback 104 | with pytest.raises(ValueError, match='None'): 105 | cat_adder.add_cat() 106 | 107 | def test_cat_adder_add_real_cat(cat_adder, cat1_url, cat1): 108 | cat_adder.tabs.active = 1 109 | cat_adder.url.main.value = cat1_url 110 | 111 | assert cat_adder.cat_url[0] == cat1_url 112 | assert cat_adder.cat == cat1 113 | 114 | cat_adder.done_callback = callback 115 | with pytest.raises(ValueError, match=str(cat1)): 116 | cat_adder.add_cat() 117 | -------------------------------------------------------------------------------- /intake/interface/catalog/tests/test_select.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import intake 9 | import pytest 10 | 11 | pytest.importorskip('panel') 12 | 13 | 14 | def assert_widget_matches(browser): 15 | assert browser.options == browser.widget.options 16 | assert browser.selected == browser.widget.value 17 | 18 | 19 | def test_catalog_browser_init_emtpy(): 20 | from ..select import CatSelector 21 | cat_browser = CatSelector() 22 | assert cat_browser.selected == [intake.cat] 23 | assert_widget_matches(cat_browser) 24 | 25 | 26 | def test_catalog_browser(cat_browser, cat1): 27 | assert cat_browser.items == [cat1] 28 | assert cat1.name in cat_browser.options 29 | assert cat_browser.selected == [cat1] 30 | assert_widget_matches(cat_browser) 31 | 32 | 33 | def test_catalog_browser_set_to_visible_and_back(cat_browser, cat1): 34 | cat_browser.visible = False 35 | assert len(cat_browser.watchers) == 0 36 | 37 | cat_browser.visible = True 38 | assert len(cat_browser.watchers) == 1 39 | assert cat_browser.items == [cat1] 40 | assert cat1.name in cat_browser.options 41 | assert cat_browser.selected == [cat1] 42 | assert_widget_matches(cat_browser) 43 | 44 | 45 | def test_catalog_browser_add(cat_browser, cat2): 46 | cat_browser.add(cat2) 47 | assert cat2.name in cat_browser.options 48 | assert cat_browser.selected == [cat2] 49 | assert_widget_matches(cat_browser) 50 | 51 | 52 | def test_catalog_browser_add_list(cat_browser, cat2): 53 | cat_browser.add([cat2]) 54 | assert cat2.name in cat_browser.options 55 | assert cat_browser.selected == [cat2] 56 | assert_widget_matches(cat_browser) 57 | 58 | 59 | def test_catalog_browser_add_cat_as_str(cat_browser, cat2, cat2_url): 60 | cat_browser.add(cat2_url) 61 | assert cat2.name in cat_browser.options 62 | assert cat_browser.selected == [cat2] 63 | assert_widget_matches(cat_browser) 64 | 65 | 66 | def test_catalog_browser_add_nested_catalog(cat_browser, parent_cat): 67 | cat_browser.add(parent_cat) 68 | assert parent_cat.name in cat_browser.options 69 | assert cat_browser.selected == [parent_cat] 70 | assert list(cat_browser.options.keys()) == ['catalog1', 'parent', '└── child1', '└── child2'] 71 | assert_widget_matches(cat_browser) 72 | 73 | 74 | def test_catalog_browser_select_cat_by_widget(cat_browser, cat1): 75 | cat_browser.selected = [] 76 | assert cat_browser.selected == [] 77 | assert_widget_matches(cat_browser) 78 | 79 | cat_browser.widget.value = [cat1] 80 | assert cat_browser.selected == [cat1] 81 | assert_widget_matches(cat_browser) 82 | 83 | 84 | def test_catalog_browser_remove_selected_cat(cat_browser, cat1): 85 | cat_browser.remove_selected() 86 | assert cat1 not in cat_browser.options 87 | assert cat_browser.selected == [] 88 | assert_widget_matches(cat_browser) 89 | 90 | 91 | def test_catalog_browser_remove_cat_that_is_not_in_options_passes(cat_browser, cat2): 92 | assert cat2.name not in cat_browser.options 93 | cat_browser.remove(cat2) 94 | 95 | 96 | def test_catalog_browser_remove_nested_catalog(cat_browser, parent_cat): 97 | cat_browser.add(parent_cat) 98 | assert parent_cat.name in cat_browser.options 99 | assert cat_browser.selected == [parent_cat] 100 | assert list(cat_browser.options.keys()) == ['catalog1', 'parent', '└── child1', '└── child2'] 101 | cat_browser.remove_selected() 102 | assert list(cat_browser.options.keys()) == ['catalog1'] 103 | assert_widget_matches(cat_browser) 104 | -------------------------------------------------------------------------------- /docs/source/api_other.rst: -------------------------------------------------------------------------------- 1 | Other Classes 2 | ============= 3 | 4 | Cache Types 5 | ----------- 6 | 7 | .. autosummary:: 8 | 9 | intake.source.cache.FileCache 10 | intake.source.cache.DirCache 11 | intake.source.cache.CompressedCache 12 | intake.source.cache.DATCache 13 | intake.source.cache.CacheMetadata 14 | 15 | .. autoclass:: intake.source.cache.FileCache 16 | :members: 17 | 18 | .. autoclass:: intake.source.cache.DirCache 19 | :members: 20 | 21 | .. autoclass:: intake.source.cache.CompressedCache 22 | :members: 23 | 24 | .. autoclass:: intake.source.cache.DATCache 25 | :members: 26 | 27 | .. autoclass:: intake.source.cache.CacheMetadata 28 | :members: 29 | 30 | Auth 31 | ---- 32 | 33 | .. autosummary:: 34 | 35 | intake.auth.secret.SecretAuth 36 | intake.auth.secret.SecretClientAuth 37 | 38 | .. autoclass:: intake.auth.secret.SecretAuth 39 | :members: 40 | 41 | .. autoclass:: intake.auth.secret.SecretClientAuth 42 | :members: 43 | 44 | Containers 45 | ---------- 46 | 47 | .. autosummary:: 48 | 49 | intake.container.dataframe.RemoteDataFrame 50 | intake.container.ndarray.RemoteArray 51 | intake.container.semistructured.RemoteSequenceSource 52 | 53 | .. autoclass:: intake.container.dataframe.RemoteDataFrame 54 | :members: 55 | 56 | .. autoclass:: intake.container.ndarray.RemoteArray 57 | :members: 58 | 59 | .. autoclass:: intake.container.semistructured.RemoteSequenceSource 60 | :members: 61 | 62 | Server 63 | ------ 64 | 65 | .. autosummary:: 66 | 67 | intake.cli.server.server.IntakeServer 68 | intake.cli.server.server.ServerInfoHandler 69 | intake.cli.server.server.SourceCache 70 | intake.cli.server.server.ServerSourceHandler 71 | 72 | .. autoclass:: intake.cli.server.server.IntakeServer 73 | :members: 74 | 75 | .. autoclass:: intake.cli.server.server.ServerInfoHandler 76 | :members: 77 | 78 | .. autoclass:: intake.cli.server.server.SourceCache 79 | :members: 80 | 81 | .. autoclass:: intake.cli.server.server.ServerSourceHandler 82 | :members: 83 | 84 | GUI 85 | --- 86 | 87 | .. autosummary:: 88 | 89 | intake.interface.base.Base 90 | intake.interface.base.BaseSelector 91 | intake.interface.base.BaseView 92 | intake.interface.catalog.add.FileSelector 93 | intake.interface.catalog.add.URLSelector 94 | intake.interface.catalog.add.CatAdder 95 | intake.interface.catalog.gui.CatGUI 96 | intake.interface.catalog.search.Search 97 | intake.interface.catalog.search.SearchInputs 98 | intake.interface.catalog.select.CatSelector 99 | intake.interface.source.defined_plots.Plots 100 | intake.interface.source.gui.SourceGUI 101 | intake.interface.source.description.Description 102 | intake.interface.source.select.SourceSelector 103 | 104 | .. autoclass:: intake.interface.base.Base 105 | :members: 106 | 107 | .. autoclass:: intake.interface.base.BaseSelector 108 | :members: 109 | 110 | .. autoclass:: intake.interface.base.BaseView 111 | :members: 112 | 113 | .. autoclass:: intake.interface.catalog.add.FileSelector 114 | :members: 115 | 116 | .. autoclass:: intake.interface.catalog.add.URLSelector 117 | :members: 118 | 119 | .. autoclass:: intake.interface.catalog.add.CatAdder 120 | :members: 121 | 122 | .. autoclass:: intake.interface.catalog.gui.CatGUI 123 | :members: 124 | 125 | .. autoclass:: intake.interface.catalog.search.Search 126 | :members: 127 | 128 | .. autoclass:: intake.interface.catalog.search.SearchInputs 129 | :members: 130 | 131 | .. autoclass:: intake.interface.catalog.select.CatSelector 132 | :members: 133 | 134 | .. autoclass:: intake.interface.source.defined_plots.Plots 135 | :members: 136 | 137 | .. autoclass:: intake.interface.source.gui.SourceGUI 138 | :members: 139 | 140 | .. autoclass:: intake.interface.source.description.Description 141 | :members: 142 | 143 | .. autoclass:: intake.interface.source.select.SourceSelector 144 | :members: 145 | 146 | -------------------------------------------------------------------------------- /intake/container/semistructured.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors 3 | # All rights reserved. 4 | # 5 | # The full license is in the LICENSE file, distributed with this software. 6 | #----------------------------------------------------------------------------- 7 | 8 | import datetime 9 | from intake.container.base import RemoteSource, get_partition 10 | from intake.source.base import Schema 11 | 12 | 13 | class RemoteSequenceSource(RemoteSource): 14 | """Sequence-of-things source on an Intake server""" 15 | name = 'remote_sequence' 16 | container = 'python' 17 | 18 | def __init__(self, url, headers, **kwargs): 19 | self.url = url 20 | self.npartitions = kwargs.get('npartition', 1) 21 | self.partition_access = self.npartitions > 1 22 | self.headers = headers 23 | self.metadata = kwargs.get('metadata', {}) 24 | self._schema = Schema(npartitions=self.npartitions, 25 | extra_metadata=self.metadata) 26 | self.bag = None 27 | super(RemoteSequenceSource, self).__init__(url, headers, **kwargs) 28 | 29 | def _load_metadata(self): 30 | import dask.bag as db 31 | import dask 32 | if self.bag is None: 33 | self.parts = [dask.delayed(get_partition)( 34 | self.url, self.headers, self._source_id, self.container, i 35 | ) 36 | for i in range(self.npartitions)] 37 | self.bag = db.from_delayed(self.parts) 38 | return self._schema 39 | 40 | def _get_partition(self, i): 41 | self._load_metadata() 42 | return self.parts[i].compute() 43 | 44 | def read(self): 45 | self._load_metadata() 46 | return self.bag.compute() 47 | 48 | def to_dask(self): 49 | self._load_metadata() 50 | return self.bag 51 | 52 | def _close(self): 53 | self.bag = None 54 | 55 | @staticmethod 56 | def _persist(source, path, encoder=None, **kwargs): 57 | """Save list to files using encoding 58 | 59 | encoder : None or one of str|json|pickle 60 | None is equivalent to str 61 | """ 62 | import pickle 63 | import json 64 | encoder = {None: str, 'str': str, 'json': json.dumps, 65 | 'pickle': pickle.dumps}[encoder] 66 | try: 67 | b = source.to_dask() 68 | except NotImplementedError: 69 | b = source.read() 70 | return RemoteSequenceSource._data_to_source(b, path, encoder, **kwargs) 71 | 72 | @staticmethod 73 | def _data_to_source(b, path, encoder=None, storage_options=None, **kwargs): 74 | import dask.bag as db 75 | import posixpath 76 | from fsspec import open_files 77 | import dask 78 | import pickle 79 | import json 80 | from intake.source.textfiles import TextFilesSource 81 | encoder = {None: str, 'str': str, 'json': json.dumps, 82 | 'pickle': pickle.dumps}.get(encoder, encoder) 83 | 84 | if not hasattr(b, 'to_textfiles'): 85 | try: 86 | b = db.from_sequence(b, npartitions=1) 87 | except TypeError: 88 | raise NotImplementedError 89 | 90 | files = open_files(posixpath.join(path, 'part.*'), mode='wt', 91 | num=b.npartitions, **(storage_options or {})) 92 | dwrite = dask.delayed(write_file) 93 | out = [dwrite(part, f, encoder) 94 | for part, f in zip(b.to_delayed(), files)] 95 | dask.compute(out) 96 | s = TextFilesSource(posixpath.join(path, 'part.*'), storage_options=storage_options) 97 | return s 98 | 99 | 100 | def write_file(data, fo, encoder): 101 | with fo as f: 102 | for d in data: 103 | f.write(encoder(d)) 104 | 105 | --------------------------------------------------------------------------------