├── sparkxarray
    ├── tests
    │   ├── __init__.py
    │   └── test_reader.py
    ├── utils
    │   ├── __init__.py
    │   └── vis_utils.py
    ├── applications
    │   ├── __init__.py
    │   └── shapefile_masking.py
    ├── __init__.py
    └── reader.py
├── setup.cfg
├── MANIFEST.in
├── ci
    ├── docs-requirements.txt
    ├── requirements-py35.yml
    ├── requirements-py36.yml
    ├── bin
    │   └── download_travis_dependencies.sh
    └── install_python.ps1
├── datasets
    ├── air.sig995.2012.nc
    └── NCEP
    │   ├── ersst.v4.185401.nc
    │   ├── ersst.v4.185402.nc
    │   ├── ersst.v4.185403.nc
    │   └── ersst.v4.185404.nc
├── readthedocs.yml
├── scripts
    └── cleanup.sh
├── examples
    └── bias
    │   ├── knmi_bias_compared_to_wrf.png
    │   ├── wrf_bias_compared_to_knmi.png
    │   └── bias.ipynb
├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    └── ISSUE_TEMPLATE.md
├── mkdocs.yml
├── appveyor.yml
├── .travis.yml
├── .gitignore
├── setup.py
├── README.md
├── LICENSE
└── bias.ipynb


/sparkxarray/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sparkxarray/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sparkxarray/applications/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE


--------------------------------------------------------------------------------
/ci/docs-requirements.txt:
--------------------------------------------------------------------------------
1 | pymdown-extensions
2 | 


--------------------------------------------------------------------------------
/datasets/air.sig995.2012.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/spark-xarray/HEAD/datasets/air.sig995.2012.nc


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
1 | conda:
2 |     file: doc/environment.yml
3 | python:
4 |    version: 3
5 | setup_py_install: true


--------------------------------------------------------------------------------
/datasets/NCEP/ersst.v4.185401.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/spark-xarray/HEAD/datasets/NCEP/ersst.v4.185401.nc


--------------------------------------------------------------------------------
/datasets/NCEP/ersst.v4.185402.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/spark-xarray/HEAD/datasets/NCEP/ersst.v4.185402.nc


--------------------------------------------------------------------------------
/datasets/NCEP/ersst.v4.185403.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/spark-xarray/HEAD/datasets/NCEP/ersst.v4.185403.nc


--------------------------------------------------------------------------------
/datasets/NCEP/ersst.v4.185404.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/spark-xarray/HEAD/datasets/NCEP/ersst.v4.185404.nc


--------------------------------------------------------------------------------
/scripts/cleanup.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | rm -r -f metastore_db
4 | rm -r -f derby.log
5 | rm -r -f spark-warehouse


--------------------------------------------------------------------------------
/examples/bias/knmi_bias_compared_to_wrf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/spark-xarray/HEAD/examples/bias/knmi_bias_compared_to_wrf.png


--------------------------------------------------------------------------------
/examples/bias/wrf_bias_compared_to_knmi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/spark-xarray/HEAD/examples/bias/wrf_bias_compared_to_knmi.png


--------------------------------------------------------------------------------
/sparkxarray/utils/vis_utils.py:
--------------------------------------------------------------------------------
1 | """Utilities related to model visualization."""
2 | #import holoviews as hv
3 | #import geoviews as gv
4 | #import geoviews.feature as gf
5 | #from cartopy import crs
6 | 
7 | #hv.notebook_extension()
8 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |  - [ ] closes #xxxx
2 |  - [ ] tests added / passed
3 |  - [ ] docs reflect changes
4 |  - [ ] passes ``flake8 downscale tests docs``
5 |  - [ ] entry in HISTORY.rst
6 | 
7 | [summarize your pull request here]
8 | 


--------------------------------------------------------------------------------
/sparkxarray/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | # List to define the behaviour of imports of the form:
 4 | #           from sparkxarray import *
 5 | 
 6 | __all__ = []
 7 | 
 8 | # Package version number.
 9 | __version__ = version = '0.1.dev0'
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * spark-xarray version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/ci/requirements-py35.yml:
--------------------------------------------------------------------------------
 1 | name: test_env
 2 | channels:
 3 | - conda-forge
 4 | dependencies:
 5 | - python=3.5
 6 | - dask
 7 | - h5py
 8 | - h5netcdf
 9 | - toolz
10 | - seaborn
11 | - numpy
12 | - scipy
13 | - pandas
14 | - netcdf4
15 | - xarray
16 | - matplotlib
17 | - pyspark
18 | - pytest
19 | - geopandas
20 | - pip:
21 |   - coverage
22 |   - coveralls
23 |   - codecov
24 |   - pytest-cov
25 |   - pymdown-extensions


--------------------------------------------------------------------------------
/ci/requirements-py36.yml:
--------------------------------------------------------------------------------
 1 | name: test_env
 2 | channels:
 3 | - conda-forge
 4 | dependencies:
 5 | - python=3.6
 6 | - dask
 7 | - h5py
 8 | - h5netcdf
 9 | - toolz
10 | - seaborn
11 | - numpy
12 | - scipy
13 | - pandas
14 | - netcdf4
15 | - xarray
16 | - matplotlib
17 | - pyspark
18 | - pytest
19 | - geopandas
20 | - pip:
21 |   - coverage
22 |   - coveralls
23 |   - codecov
24 |   - pytest-cov
25 |   - pymdown-extensions


--------------------------------------------------------------------------------
/ci/bin/download_travis_dependencies.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | echo "Downloading Spark if necessary"
 3 | echo "Spark version = $SPARK_VERSION"
 4 | echo "Spark build = $SPARK_BUILD"
 5 | echo "Spark build URL = $SPARK_BUILD_URL"
 6 | mkdir -p $HOME/.cache/spark-versions
 7 | filename="$HOME/.cache/spark-versions/$SPARK_BUILD.tgz"
 8 | if ! [ -f $filename ]; then
 9 | 	echo "Downloading file..."
10 | 	echo `which curl`
11 | 	curl "$SPARK_BUILD_URL" > $filename
12 | 	echo "Content of directory:"
13 | 	ls -la $HOME/.cache/spark-versions/*
14 | 	tar xvf $filename --directory $HOME/.cache/spark-versions > /dev/null
15 | fi
16 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: spark-xarray
 2 | theme: readthedocs
 3 | repo_url: https://github.com/andersy005/spark-xarray
 4 | site_url: https://andersy005.github.io/spark-xarray/
 5 | 
 6 | pages:
 7 | - Home: index.md
 8 | 
 9 | markdown_extensions:
10 |   - pymdownx.arithmatex
11 |   - pymdownx.betterem(smart_enable=all)
12 |   - pymdownx.caret
13 |   - pymdownx.critic
14 |   - pymdownx.inlinehilite
15 |   - pymdownx.magiclink
16 |   - pymdownx.mark
17 |   - pymdownx.smartsymbols
18 |   - pymdownx.superfences
19 |   - pymdownx.tasklist(custom_checkbox=true)
20 |   - pymdownx.tilde
21 |   - codehilite
22 |   - footnotes
23 |   - toc(permalink=true)
24 | 
25 | 
26 | extra:
27 |   palette:
28 |     primary: blue
29 |     accent: blue
30 | 
31 |   font:
32 |     text: Roboto
33 |     code: Roboto Mono
34 | extra_javascript:
35 |   - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS_HTML
36 |   - 'javascripts/mathjax.js'
37 | 


--------------------------------------------------------------------------------
/sparkxarray/tests/test_reader.py:
--------------------------------------------------------------------------------
 1 | from pyspark.sql import SparkSession 
 2 | from sparkxarray.reader import ncread
 3 | import os
 4 | 
 5 | 
 6 | spark = SparkSession.builder.appName('spark-tests').getOrCreate()
 7 | sc = spark.sparkContext
 8 | print(os.getcwd())
 9 | filename = os.path.abspath('sparkxarray/tests/data/air.sig995.2012.nc')
10 | print(filename)
11 | paths = os.path.abspath('sparkxarray/tests/data/NCEP/*.nc')
12 | print(paths)
13 | 
14 | ### Tests for single file
15 | rdd1 = ncread(sc, filename, mode='single', partition_on=['lat', 'lon'], partitions=300)
16 | print(rdd1.count())
17 | print(rdd1.first())
18 | print(rdd1.getNumPartitions())
19 |     
20 | 
21 | rdd2 = ncread(sc, filename, mode='single', partition_on=['time'], partitions=80)
22 | print(rdd2.count())
23 | print(rdd2.first())
24 | 
25 | 
26 | ### Tests for Multiple files 
27 | rdd3 = ncread(sc, paths, mode='multi', partition_on=['lat', 'lon'], partitions=300)
28 | print(rdd3.count())
29 | print(rdd3.first())
30 | 
31 | rdd4 = ncread(sc, paths, mode='multi', partition_on=['lat', 'lon', 'time', 'nv'], partitions=1000)
32 | print(rdd4.count())
33 | print(rdd4.first())
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # CI on Windows via appveyor
 2 | # Adapted (minorly) from xarray's appveyor.yml, which itself
 3 | # was based on Olivier Grisel's python-appveyor-demo
 4 | 
 5 | environment:
 6 | 
 7 |   matrix:
 8 | 
 9 |     - PYTHON: "C:\\Python35-conda64"
10 |       PYTHON_VERSION: "3.5"
11 |       PYTHON_ARCH: "64"
12 |       CONDA_ENV: "py35"
13 | 
14 |     - PYTHON: "C:\\Python36-conda64"
15 |       PYTHON_VERSION: "3.6"
16 |       PYTHON_ARCH: "64"
17 |       CONDA_ENV: "py36"
18 | 
19 | install:
20 |   # Install miniconda Python
21 |   - "powershell ./ci/install_python.ps1"
22 | 
23 |   # Prepend newly installed Python to the PATH of this build (this cannot be
24 |   # done from inside the powershell script as it would require to restart
25 |   # the parent CMD process).
26 |   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
27 | 
28 |   # Check that we have the expected version and architecture for Python
29 |   - "python --version"
30 |   - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
31 | 
32 |   # install xarray and depenencies
33 |   - "conda env create -f ./ci/requirements-%CONDA_ENV%.yml"
34 |   - "activate test_env"
35 |   # Install the package:
36 |   - "python setup.py install"
37 |   #- git clone https://github.com/andersy005/spark-xarray.git
38 |   #- cd spark-xarray
39 |   #- pip install -e .
40 | 
41 | build: false
42 | 
43 | test_script:
44 |   - "coverage run sparkxarray/tests/test_reader.py"
45 | 
46 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | notifications:
 2 |   email: false
 3 | 
 4 | sudo: required
 5 | 
 6 | dist: trusty
 7 | 
 8 | language: python
 9 | 
10 | matrix:
11 |   include:
12 |   - python: 3.5
13 |     env: CONDA_ENV=py35
14 | 
15 |   - python: 3.6
16 |     env: CONDA_ENV=py36
17 | 
18 |  
19 | before_install:
20 |   - sudo apt-get update
21 |   # We do this conditionally because it saves us some downloading if the
22 |   # version is the same.
23 |   - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
24 |       wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
25 |     else
26 |       wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
27 |     fi
28 |   - bash miniconda.sh -b -p $HOME/miniconda
29 |   - export PATH="$HOME/miniconda/bin:$PATH"
30 |   - hash -r
31 |   - conda config --set always_yes yes --set changeps1 no
32 |   - conda update -q conda
33 |   # Useful for debugging any issues with conda
34 |   - conda info -a
35 | 
36 | 
37 | install:
38 | 
39 |   #- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION --file ./ci/requirements.txt
40 |   - conda env create -f ci/requirements-$CONDA_ENV.yml
41 |   - source activate test_env
42 |   # Install the package:
43 |   - python setup.py install
44 |   #- git clone https://github.com/andersy005/spark-xarray.git
45 |   #- cd spark-xarray
46 |   #- pip install -e .
47 | 
48 | script:
49 |   - coverage run sparkxarray/tests/test_reader.py
50 | 
51 | after_success:
52 |   - bash <(curl -s https://codecov.io/bash)
53 |   - codecov
54 | 
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.ipynb_checkpoints
  2 | *.pyc
  3 | /lab
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | env/
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # dotenv
 87 | .env
 88 | 
 89 | # virtualenv
 90 | .venv
 91 | venv/
 92 | ENV/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 
107 | /tests


--------------------------------------------------------------------------------
/sparkxarray/applications/shapefile_masking.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """ Masking an area in a netCDF dataset using a geographical outline.
 3 | This module reads a shapefile using geopandas. The user have the option 
 4 | of masking the area inside or outside the geographical outline.
 5 | """
 6 | 
 7 | import warnings
 8 | warnings.filterwarnings('ignore')
 9 | import xarray as xr 
10 | import geopandas as gpd 
11 | from geopandas import GeoDataFrame  # Loading boundaries data
12 | from shapely.geometry import Point, Polygon, shape # For creating geospatial data
13 | import time
14 | from functools import partial 
15 | 
16 | 
17 | def _shift_lon_values(dset):
18 |     from shapely.geometry import Point 
19 |     lat = dset.lat.values 
20 |     lon = dset.lon.values 
21 | 
22 |     if lon >= 180:
23 |         lon = lon - 360.
24 | 
25 |     coordinate = Point(lon, lat)
26 |     return coordinate, dset 
27 |     
28 | 
29 | 
30 | def masking(sc, rdd, shapefile_path, mask_area='in'):
31 | 
32 |     print("Loading and broadcasting the shapefile....\n\n")
33 |     shape = GeoDataFrame.from_file(shapefile_path)
34 | 
35 |     my_shape = sc.broadcast(shape)
36 |     print("Successfully loaded the shapefile....\n\n")
37 | 
38 |     print("Masking the data against the shapefile in progress....\n\n")
39 |     start = time.time()
40 |     masked_rdd = rdd.map(_shift_lon_values).filter(partial(_point_look_up, my_shape))\
41 |               .collect()
42 |     masked_data = [item[1] for item in masked_rdd]
43 |     
44 |     dset = xr.auto_combine(masked_data, concat_dim=None)
45 |     
46 |     stop = time.time()
47 |     total_time = stop - start 
48 |     print("Successfully masked the data in {} seconds\n".format(round(total_time, 3)))
49 |     return dset
50 |     
51 | 
52 | 
53 | def _point_look_up(my_shape, element):
54 |     grid_point = element[0]
55 |     dset = element[1]
56 | 
57 |     # Access the broadcasted shape on the workers
58 |     gdf = my_shape.value 
59 | 
60 |     # See if the grid point i s inside the shape
61 |     check = gdf.contains(grid_point).unique()
62 | 
63 | 
64 |     if True in check:
65 |         return True
66 |     else:
67 |         return False
68 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup
 3 | from setuptools import find_packages
 4 | import os
 5 | from ast import parse
 6 | 
 7 | LONG_DESCRIPTION = """
 8 | **spark-xarray**: 
 9 |       
10 | Spark-xarray is a high level, Apache Spark and xarray-based Python library for working 
11 | with netCDF climate model data with Apache Spark.
12 | 
13 |  Important links
14 | ------------------
15 | 
16 | - Official source code repo: https://github.com/andersy005/spark-xarray
17 | - Issue tracker: https://github.com/andersy005/spark-xarray/issues
18 | 
19 | """
20 | 
21 | NAME = 'spark-xarray'
22 | 
23 | def version():
24 |       """Return version string."""
25 |       with open(os.path.join(os.path.abspath(os.path.dirname(__file__)),'sparkxarray', '__init__.py')) as input_file:
26 |             for line in input_file:
27 |                   if line.startswith('__version__'):
28 |                         return parse(line).body[0].value.s
29 | 
30 | 
31 | #for line in open('sparkxarray/__init__.py').readlines():
32 | #      if line.startswith('__version__'):
33 | #            exec(line)
34 | 
35 | INSTALL_REQUIRES = (['numpy >= 1.7',
36 |                       'scipy >= 0.16',
37 |                       'pandas >= 0.15.0',
38 |                       'netCDF4 >= 1.2',
39 |                       'xarray>=0.9.5', 
40 |                       'dask >= 0.14',
41 |                       'distributed >= 1.16.1',
42 |                       'geopandas >= 0.3.0', 
43 |                       'toolz>=0.8.2',
44 |                       'cloudpickle >= 0.2.1'])
45 | 
46 | packages = ['sparkxarray', 'sparkxarray.tests']
47 | 
48 | package_data = {'sparkxarray': ['data/*.nc']}
49 | 
50 | setup(name=NAME,
51 |       version=version(),
52 |       author='Anderson Banihirwe, Kevin Paul',
53 |       author_email='axbanihirwe@gmail.com',
54 |       description='Big Atmospheric & Oceanic Data Analysis with Apache Spark + xarray',
55 |       url='https://github.com/andersy005/spark-xarray',
56 |       long_description=LONG_DESCRIPTION,
57 |       install_requires=INSTALL_REQUIRES,
58 |       packages=packages,
59 |       package_data=package_data,
60 |       keywords=[' Climate Science', 'xarray', 'Apache Spark', 'Distributed', 'netCDF', 'Parallel'],
61 |       classifiers=[
62 |         'Development Status :: 1 - Beta',
63 |         'Intended Audience :: Science/Research',
64 |         'License :: OSI Approved :: Apache',
65 |         'Natural Language :: English',
66 |         'Operating System :: OS Independent',
67 |         'Programming Language :: Python :: 2.7',
68 |         'Programming Language :: Python :: 3',
69 |         'Programming Language :: Python :: 3.4',
70 |         'Programming Language :: Python :: 3.5',
71 |         'Programming Language :: Python :: 3.6',
72 |         'Topic :: Scientific/Engineering :: Atmospheric Science'
73 |        ],
74 |       zip_safe=False,
75 |       
76 | )
77 | 


--------------------------------------------------------------------------------
/ci/install_python.ps1:
--------------------------------------------------------------------------------
 1 | # Sample script to install Python and pip under Windows
 2 | # Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner
 3 | # License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
 4 | 
 5 | $MINICONDA_URL = "http://repo.continuum.io/miniconda/"
 6 | $BASE_URL = "https://www.python.org/ftp/python/"
 7 | 
 8 | 
 9 | function DownloadMiniconda ($python_version, $platform_suffix) {
10 |     $webclient = New-Object System.Net.WebClient
11 |     if ($python_version -match "3.6") {
12 |         $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe"
13 |     } else {
14 |         $filename = "Miniconda2-latest-Windows-" + $platform_suffix + ".exe"
15 |     }
16 |     $url = $MINICONDA_URL + $filename
17 | 
18 |     $basedir = $pwd.Path + "\"
19 |     $filepath = $basedir + $filename
20 |     if (Test-Path $filename) {
21 |         Write-Host "Reusing" $filepath
22 |         return $filepath
23 |     }
24 | 
25 |     # Download and retry up to 3 times in case of network transient errors.
26 |     Write-Host "Downloading" $filename "from" $url
27 |     $retry_attempts = 2
28 |     for($i=0; $i -lt $retry_attempts; $i++){
29 |         try {
30 |             $webclient.DownloadFile($url, $filepath)
31 |             break
32 |         }
33 |         Catch [Exception]{
34 |             Start-Sleep 1
35 |         }
36 |    }
37 |    if (Test-Path $filepath) {
38 |        Write-Host "File saved at" $filepath
39 |    } else {
40 |        # Retry once to get the error message if any at the last try
41 |        $webclient.DownloadFile($url, $filepath)
42 |    }
43 |    return $filepath
44 | }
45 | 
46 | 
47 | function InstallMiniconda ($python_version, $architecture, $python_home) {
48 |     Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home
49 |     if (Test-Path $python_home) {
50 |         Write-Host $python_home "already exists, skipping."
51 |         return $false
52 |     }
53 |     if ($architecture -match "32") {
54 |         $platform_suffix = "x86"
55 |     } else {
56 |         $platform_suffix = "x86_64"
57 |     }
58 | 
59 |     $filepath = DownloadMiniconda $python_version $platform_suffix
60 |     Write-Host "Installing" $filepath "to" $python_home
61 |     $install_log = $python_home + ".log"
62 |     $args = "/S /D=$python_home"
63 |     Write-Host $filepath $args
64 |     Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru
65 |     if (Test-Path $python_home) {
66 |         Write-Host "Python $python_version ($architecture) installation complete"
67 |     } else {
68 |         Write-Host "Failed to install Python in $python_home"
69 |         Get-Content -Path $install_log
70 |         Exit 1
71 |     }
72 | }
73 | 
74 | 
75 | function InstallCondaPackages ($python_home, $spec) {
76 |     $conda_path = $python_home + "\Scripts\conda.exe"
77 |     $args = "install --yes " + $spec
78 |     Write-Host ("conda " + $args)
79 |     Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
80 | }
81 | 
82 | function UpdateConda ($python_home) {
83 |     $conda_path = $python_home + "\Scripts\conda.exe"
84 |     Write-Host "Updating conda..."
85 |     $args = "update --yes conda"
86 |     Write-Host $conda_path $args
87 |     Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
88 | }
89 | 
90 | 
91 | function main () {
92 |     InstallMiniconda $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON
93 |     UpdateConda $env:PYTHON
94 |     InstallCondaPackages $env:PYTHON "conda-build jinja2 anaconda-client"
95 | }
96 | 
97 | main


--------------------------------------------------------------------------------
/sparkxarray/reader.py:
--------------------------------------------------------------------------------
  1 | """ Interface for Data Ingestion.
  2 | """
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #    http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | 
 19 | from __future__ import print_function
 20 | from __future__ import absolute_import
 21 | import os
 22 | import numpy as np
 23 | import pandas as pd 
 24 | import xarray as xr
 25 | import itertools
 26 | from glob import glob
 27 | # from pyspark.sql import SparkSession # Removing this line simply makes the library compatible with Spark 1.6.3 !
 28 | 
 29 | def ncread(sc, paths, mode='single', **kwargs):
 30 |     """Calls sparkxarray netcdf read function based on the mode parameter.
 31 | 
 32 |     ============ ==============================
 33 |     Mode          Reading Function
 34 |     ------------ ------------------------------
 35 |     single       : read_nc_single
 36 |     multi        : read_nc_multi
 37 |     Anything else: Throw an exception
 38 |     ============= ==============================
 39 | 
 40 |     Parameters
 41 |     ----------
 42 | 
 43 |     sc       :  sparkContext object
 44 | 
 45 |     paths    :  str or sequence
 46 |                 Either a string glob in the form "path/to/my/files/*.nc" or an explicit
 47 |                 list of files to open
 48 | 
 49 |     mode     : str
 50 |                'single' for a single file
 51 |                'multi' for multiple files
 52 | 
 53 |     **kwargs : dict
 54 |                partitioning options to be passed on to the actual read function.
 55 |             
 56 |     
 57 |     """
 58 | 
 59 |     if 'partitions' not in kwargs:
 60 |         kwargs['partitions'] = None
 61 | 
 62 |     if 'partition_on' not in kwargs:
 63 |         kwargs['partition_on'] = ['time']
 64 |     
 65 |     if 'decode_times' not in kwargs:
 66 |         kwargs['decode_times'] = True
 67 | 
 68 |     error_msg = ("You specified a mode that is not implemented.")
 69 | 
 70 |     if (mode == 'single'):
 71 |         return _read_nc_single(sc, paths, **kwargs)
 72 | 
 73 |     elif (mode == 'multi'):
 74 |         return _read_nc_multi(sc, paths, **kwargs)
 75 |     else:
 76 |         raise NotImplementedError(error_msg)
 77 | 
 78 |         
 79 | def _read_nc_single(sc, paths, **kwargs):
 80 |     """ Read a single netCDF file
 81 | 
 82 |     Parameters
 83 |     -----------
 84 |     sc       :  sparkContext object
 85 | 
 86 |     paths    :  str
 87 |                 an explicit filename to open
 88 |     
 89 | 
 90 |     **kwargs : dict
 91 |                Additional arguments for partitioning 
 92 | 
 93 |     """
 94 |     partition_on = kwargs.get('partition_on')
 95 |     partitions = kwargs.get('partitions')
 96 |     decode_times=kwargs.get('decode_times')
 97 | 
 98 |     dset = xr.open_dataset(paths, autoclose=True, decode_times=decode_times)
 99 | 
100 |     # D = {'dim_1': dim_1_size, 'dim_2': dim_2_size, ...}
101 |     D = {dset[dimension].name:dset[dimension].size for dimension in partition_on}
102 |     
103 |     # dim_sizes = [range(dim_1_size), range(dim_2_size), range(...)]
104 |     dim_ranges = [range(dim_size) for dim_size in D.values()]
105 |     
106 | 
107 |     dim_cartesian_product_indices = [element for element in itertools.product(*dim_ranges)]
108 | 
109 |     # create a list of dictionaries for  positional indexing
110 |     positional_indices = [dict(zip(partition_on, ij)) for ij in dim_cartesian_product_indices]
111 | 
112 |     if not partitions:
113 |         partitions = len(dim_cartesian_product_indices)
114 | 
115 |     if partitions > len(dim_cartesian_product_indices):
116 |         partitions = len(dim_cartesian_product_indices)
117 | 
118 |     
119 |     # Create an RDD
120 |     rdd = sc.parallelize(positional_indices, partitions).map(lambda x: _readone_slice(dset, x))
121 | 
122 |     return rdd
123 | 
124 | 
125 | def _readone_slice(dset, positional_indices):
126 |     """Read a slice from an xarray.Dataset.
127 | 
128 |     Parameters
129 |     ----------
130 | 
131 |     dset                : file_object
132 |                          xarray.Dataset object
133 |     positional_indices  : dict
134 |                           dict containing positional indices for each dimension
135 |                           e.g. {'lat': 0, 'lon': 0}
136 | 
137 |     Returns
138 |     ---------
139 |     chunk               : xarray.Dataset
140 |                          a subset of the Xarray Dataset
141 | 
142 |     """
143 | 
144 |     # Change the positional indices into slice objects
145 |     # e.g {'lat': 0, 'lon': 0} ---> {'lat': slice(0, 1, None),  'lon': slice(0, 1, None)}
146 |     positional_slices = {dim: slice(positional_indices[dim], positional_indices[dim]+1) 
147 |                                                          for dim in positional_indices}
148 | 
149 |     # Read a slice for the given positional_slices
150 |     chunk = dset[positional_slices]
151 |     return chunk
152 | 
153 | 
154 | def _read_nc_multi(sc, paths, **kwargs):
155 |     """ Read multiple netCDF files
156 | 
157 |     Parameters
158 |     -----------
159 |     sc       :  sparkContext object
160 | 
161 |     paths    :  str or sequence
162 |                 Either a string glob in the form "path/to/my/files/*.nc" or an explicit
163 |                 list of files to open
164 | 
165 |     **kwargs : dict
166 |                Additional arguments for partitioning 
167 | 
168 |     """
169 | 
170 |     partition_on = kwargs.get('partition_on')
171 |     partitions = kwargs.get('partitions')
172 | 
173 |     dset = xr.open_mfdataset(paths, autoclose=True)
174 | 
175 |     # D = {'dim_1': dim_1_size, 'dim_2': dim_2_size, ...}
176 |     D ={dset[dimension].name:dset[dimension].size for dimension in partition_on}
177 |     
178 |     # dim_sizes = [range(dim_1_size), range(dim_2_size), range(...)]
179 |     dim_ranges = [range(dim_size) for dim_size in D.values()]
180 | 
181 |     dim_cartesian_product_indices = [element for element in itertools.product(*dim_ranges)]
182 | 
183 |     # create a list of dictionaries for positional indexing
184 |     positional_indices = [dict(zip(partition_on, ij)) for ij in dim_cartesian_product_indices]
185 | 
186 |     if not partitions:
187 |         partitions = len(dim_cartesian_product_indices) / 50
188 | 
189 |     if partitions > len(dim_cartesian_product_indices):
190 |         partitions = len(dim_cartesian_product_indices)
191 | 
192 |     
193 |     # Create an RDD
194 |     rdd = sc.parallelize(positional_indices, partitions).map(lambda x: readone_slice(dset, x))
195 | 
196 |     return rdd


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | [![Build Status](https://travis-ci.org/andersy005/spark-xarray.svg?branch=master)](https://travis-ci.org/andersy005/spark-xarray)
  3 | [![codecov](https://codecov.io/gh/andersy005/spark-xarray/branch/master/graph/badge.svg)](https://codecov.io/gh/andersy005/spark-xarray)
  4 | [![Build status](https://ci.appveyor.com/api/projects/status/93dmqmctpjcgnbcs/branch/master?svg=true)](https://ci.appveyor.com/project/andersy005/spark-xarray/branch/master)
  5 | [![License: MIT](https://img.shields.io/badge/License-MIT-red.svg)](https://opensource.org/licenses/MIT) [![PyPI](https://img.shields.io/pypi/pyversions/Django.svg)]()
  6 | 
  7 | # spark-xarray
  8 | 
  9 | spark-xarray is an open source project and Python package that seeks to integrate PySpark and xarray for Climate Data Analysis. It is built on top of [PySpark - Spark Python API](https://spark.apache.org/docs/latest/api/python/index.html) and [xarray](http://xarray.pydata.org/en/stable/).
 10 | 
 11 | spark-xarray was originally conceived during the Summer of 2017 as part of [PySpark for "Big" Atmospheric & Oceanic Data Analysis](https://ncar.github.io/PySpark4Climate/) - [A CISL/SIParCS Research Project](https://www2.cisl.ucar.edu/siparcs).
 12 | 
 13 | It is currently maintained by [Anderson Banihirwe](https://github.com/andersy005).
 14 | 
 15 | Documentation is available at https://andersy005.github.io/spark-xarray/.
 16 | ## Installation
 17 | 
 18 | We will guide you how to install spark-xarray. However, we will assume that an Apache Spark installation is available.
 19 | 
 20 | 
 21 | ### Install
 22 | 
 23 | #### Requirements
 24 | 
 25 | For the installation of ```spark-xarray```, the following packages are required:
 26 | 
 27 | 
 28 | - [Spark 2.0+](https://spark.apache.org/)
 29 | - [netcdf4-python (>=1.2.8)](https://unidata.github.io/netcdf4-python/)
 30 | - ```xarray (>=0.9.5)```
 31 | - ```dask (>=0.15.1)```
 32 | - ```toolz (>=0.8.2)```
 33 | 
 34 | #### Install
 35 | 
 36 | Clone the repository directly from GitHub and install it aftwards using ```$ python setup.py```. This will also resolve possible missing dependencies.
 37 | 
 38 | ```sh
 39 | $ git clone https://github.com/andersy005/spark-xarray.git
 40 | $ cd spark-xarray
 41 | $ python setup.py install
 42 | ```
 43 | 
 44 | ## Development
 45 | 
 46 | We welcome new contributors of all experience levels.
 47 | 
 48 | ### Important links
 49 | 
 50 | - Official source code repo: https://github.com/andersy005/spark-xarray
 51 | - Issue tracker: https://github.com/andersy005/spark-xarray/issues
 52 | 
 53 | ## Examples
 54 | 
 55 | ### Single file
 56 | 
 57 | ```python
 58 | >>> from sparkxarray.reader import ncread
 59 | >>> from pyspark.sql import SparkSession
 60 | >>> spark = SparkSession.builder.appName('spark-rdd').getOrCreate()
 61 | >>> sc = spark.SparkContext
 62 | >>> filepath='spark-xarray/sparkxarray/tests/data/air.sig995.2012.nc'
 63 | >>> # Create an RDD
 64 | >>> rdd = ncread(sc, filepath, mode='single', partition_on=['time'], partitions=100)
 65 | >>> rdd.first()  # Get the first element
 66 | <xarray.Dataset>
 67 | Dimensions:  (lat: 73, lon: 144, time: 1)
 68 | Coordinates:
 69 |   * lat      (lat) float32 90.0 87.5 85.0 82.5 80.0 77.5 75.0 72.5 70.0 67.5 ...
 70 |   * lon      (lon) float32 0.0 2.5 5.0 7.5 10.0 12.5 15.0 17.5 20.0 22.5 ...
 71 |   * time     (time) datetime64[ns] 2012-01-01
 72 | Data variables:
 73 |     air      (time, lat, lon) float64 234.5 234.5 234.5 234.5 234.5 234.5 ...
 74 | Attributes:
 75 |     Conventions:  COARDS
 76 |     title:        mean daily NMC reanalysis (2012)
 77 |     history:      created 2011/12 by Hoop (netCDF2.3)
 78 |     description:  Data is from NMC initialized reanalysis\n(4x/day).  These a...
 79 |     platform:     Model
 80 |     references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...
 81 | >>> rdd.count()   # Get a count of elements in the rdd
 82 | 366
 83 | >>> # The count above corresponds to number of timesteps in the netCDF file 
 84 | >>> rdd.getNumPartitions()  # Get the number of partitions
 85 | 100
 86 | >>> # Compute the daily average for each day (element) in RDD
 87 | >>> daily_average = rdd.map(lambda x: x.mean(dim=['lat', 'lon']))
 88 | >>> daily_average.take(3)
 89 | [<xarray.Dataset>
 90 | Dimensions:  (time: 1)
 91 | Coordinates:
 92 |   * time     (time) datetime64[ns] 2012-01-01
 93 | Data variables:
 94 |     air      (time) float64 277.0, <xarray.Dataset>
 95 | Dimensions:  (time: 1)
 96 | Coordinates:
 97 |   * time     (time) datetime64[ns] 2012-01-02
 98 | Data variables:
 99 |     air      (time) float64 276.8, <xarray.Dataset>
100 | Dimensions:  (time: 1)
101 | Coordinates:
102 |   * time     (time) datetime64[ns] 2012-01-03
103 | Data variables:
104 |     air     
105 | ```
106 | 
107 | ### Multiple files
108 | 
109 | ```python
110 | >>> from sparkxarray.reader import ncread
111 | >>> from pyspark.sql import SparkSession
112 | >>> spark = SparkSession.builder.appName('spark-rdd').getOrCreate()
113 | >>> sc = spark.SparkContext
114 | >>> paths='spark-xarray/sparkxarray/tests/data/NCEP/*.nc'
115 | >>> multi_rdd = ncread(sc, paths, mode='multi', partition_on=['lat', 'lon'], partitions=300)
116 | >>> multi_rdd.count()
117 | 16020
118 | >>> multi_rdd.first()
119 | <xarray.Dataset>
120 | Dimensions:   (lat: 1, lon: 1, nv: 2, time: 4, zlev: 1)
121 | Coordinates:
122 |   * zlev      (zlev) float32 0.0
123 |   * lat       (lat) float32 -88.0
124 |   * lon       (lon) float32 0.0
125 |   * time      (time) datetime64[ns] 1854-01-15 1854-02-15 1854-03-15 1854-04-15
126 | Dimensions without coordinates: nv
127 | Data variables:
128 |     lat_bnds  (time, lat, nv) float32 -89.0 -87.0 -89.0 -87.0 -89.0 -87.0 ...
129 |     lon_bnds  (time, lon, nv) float32 -1.0 1.0 -1.0 1.0 -1.0 1.0 -1.0 1.0
130 |     sst       (time, zlev, lat, lon) float64 nan nan nan nan
131 |     anom      (time, zlev, lat, lon) float64 nan nan nan nan
132 | Attributes:
133 |     Conventions:                CF-1.6
134 |     Metadata_Conventions:       CF-1.6, Unidata Dataset Discovery v1.0
135 |     metadata_link:              C00884
136 |     id:                         ersst.v4.185401
137 |     naming_authority:           gov.noaa.ncdc
138 |     title:                      NOAA Extended Reconstructed Sea Surface Tempe...
139 |     summary:                    ERSST.v4 is developped based on v3b after rev...
140 |     institution:                NOAA/NESDIS/NCDC
141 |     creator_name:               Boyin Huang
142 |     creator_email:              boyin.huang@noaa.gov
143 |     date_created:               2014-10-24
144 |     production_version:         Beta Version 4
145 |     history:                    Version 4 based on Version 3b
146 |     publisher_name:             Boyin Huang
147 |     publisher_email:            boyin.huang@noaa.gov
148 |     publisher_url:              http://www.ncdc.noaa.gov
149 |     creator_url:                http://www.ncdc.noaa.gov
150 |     license:                    No constraints on data access or use
151 |     time_coverage_start:        1854-01-15T000000Z
152 |     time_coverage_end:          1854-01-15T000000Z
153 |     geospatial_lon_min:         -1.0f
154 |     geospatial_lon_max:         359.0f
155 |     geospatial_lat_min:         -89.0f
156 |     geospatial_lat_max:         89.0f
157 |     geospatial_lat_units:       degrees_north
158 |     geospatial_lat_resolution:  2.0
159 |     geospatial_lon_units:       degrees_east
160 |     geospatial_lon_resolution:  2.0
161 |     spatial_resolution:         2.0 degree grid
162 |     cdm_data_type:              Grid
163 |     processing_level:           L4
164 |     standard_name_vocabulary:   CF Standard Name Table v27
165 |     keywords:                   Earth Science &gt; Oceans &gt; Ocean Temperat...
166 |     keywords_vocabulary:        NASA Global Change Master Directory (GCMD) Sc...
167 |     project:                    NOAA Extended Reconstructed Sea Surface Tempe...
168 |     platform:                   Ship and Buoy SSTs from ICOADS R2.5 and NCEP GTS
169 |     instrument:                 Conventional thermometers
170 |     source:                     ICOADS R2.5 SST, NCEP GTS SST, HadISST ice, N...
171 |     comment:                    SSTs were observed by conventional thermomete...
172 |     references:                 Huang et al, 2014: Extended Reconstructed Sea...
173 |     climatology:                Climatology is based on 1971-2000 SST, Xue, Y...
174 |     description:                In situ data: ICOADS2.5 before 2007 and NCEP ...
175 | ```
176 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/bias.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": true
  7 |    },
  8 |    "source": [
  9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
 10 |     "<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\"></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {
 17 |     "ExecuteTime": {
 18 |      "end_time": "2017-11-16T19:28:46.494511Z",
 19 |      "start_time": "2017-11-16T19:28:45.846305Z"
 20 |     }
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "# import reader module from sparkxarray\n",
 25 |     "from sparkxarray import reader\n",
 26 |     "from pyspark.sql import SparkSession"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {
 33 |     "ExecuteTime": {
 34 |      "end_time": "2017-11-16T19:28:51.948198Z",
 35 |      "start_time": "2017-11-16T19:28:47.038329Z"
 36 |     }
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "# Create sparksession\n",
 41 |     "spark = SparkSession.builder.appName(\"bias\").getOrCreate()\n",
 42 |     "sc = spark.sparkContext"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "ExecuteTime": {
 50 |      "end_time": "2017-11-16T19:28:51.953696Z",
 51 |      "start_time": "2017-11-16T19:28:51.950303Z"
 52 |     }
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "FILE_1 = \"/home/abanihi/Documents/Github/spark-xarray/datasets/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc\"\n",
 57 |     "FILE_2 = \"/home/abanihi/Documents/Github/spark-xarray/datasets/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc\"\n"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "metadata": {
 64 |     "ExecuteTime": {
 65 |      "end_time": "2017-11-16T19:28:52.495714Z",
 66 |      "start_time": "2017-11-16T19:28:51.959272Z"
 67 |     }
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "knmi = reader.ncread(sc, FILE_1, mode='single', partition_on=['rlat', 'rlon'], partitions=500, decode_times=False)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 5,
 77 |    "metadata": {
 78 |     "ExecuteTime": {
 79 |      "end_time": "2017-11-16T19:28:55.702752Z",
 80 |      "start_time": "2017-11-16T19:28:53.427679Z"
 81 |     }
 82 |    },
 83 |    "outputs": [
 84 |     {
 85 |      "data": {
 86 |       "text/plain": [
 87 |        "<xarray.Dataset>\n",
 88 |        "Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)\n",
 89 |        "Coordinates:\n",
 90 |        "  * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...\n",
 91 |        "  * rlon          (rlon) float32 -24.64\n",
 92 |        "    lon           (rlat, rlon) float32 -24.64\n",
 93 |        "  * rlat          (rlat) float32 -45.76\n",
 94 |        "    lat           (rlat, rlon) float32 -45.76\n",
 95 |        "  * height        (height) float32 2.0\n",
 96 |        "Dimensions without coordinates: bnds\n",
 97 |        "Data variables:\n",
 98 |        "    rotated_pole  |S1 b''\n",
 99 |        "    time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...\n",
100 |        "    tasmax        (time, height, rlat, rlon) float64 283.4 284.2 284.2 284.6 ...\n",
101 |        "Attributes:\n",
102 |        "    institution:     KNMI\n",
103 |        "    Conventions:     CF-1.0\n",
104 |        "    conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html\n",
105 |        "    source:          RACMO2.2b\n",
106 |        "    project_id:      ENSEMBLES\n",
107 |        "    experiment_id:   ERAIN CORDEX-Africa-50km\n",
108 |        "    realization:     1\n",
109 |        "    comments:        beta-version RACMO2.2 with default physics from ECMWF CY...\n",
110 |        "    creation_date:   2010-04-09 13:53:22"
111 |       ]
112 |      },
113 |      "execution_count": 5,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "knmi.first()"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 6,
125 |    "metadata": {
126 |     "ExecuteTime": {
127 |      "end_time": "2017-11-16T19:28:55.835670Z",
128 |      "start_time": "2017-11-16T19:28:55.706696Z"
129 |     }
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "wrf = reader.ncread(sc, FILE_2, mode='single', partition_on=['rlat', 'rlon'], partitions=500, decode_times=False)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 7,
139 |    "metadata": {
140 |     "ExecuteTime": {
141 |      "end_time": "2017-11-16T19:28:56.969431Z",
142 |      "start_time": "2017-11-16T19:28:55.840045Z"
143 |     }
144 |    },
145 |    "outputs": [
146 |     {
147 |      "data": {
148 |       "text/plain": [
149 |        "<xarray.Dataset>\n",
150 |        "Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)\n",
151 |        "Coordinates:\n",
152 |        "    lon           (rlat, rlon) float64 -24.64\n",
153 |        "    lat           (rlat, rlon) float64 -45.76\n",
154 |        "  * height        (height) float32 2.0\n",
155 |        "  * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...\n",
156 |        "  * rlat          (rlat) float64 -45.76\n",
157 |        "  * rlon          (rlon) float64 -24.64\n",
158 |        "Dimensions without coordinates: bnds\n",
159 |        "Data variables:\n",
160 |        "    tasmax        (time, height, rlat, rlon) float64 283.4 284.2 284.3 284.6 ...\n",
161 |        "    rotated_pole  |S1 b''\n",
162 |        "    time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...\n",
163 |        "Attributes:\n",
164 |        "    Conventions:               CF-1.4\n",
165 |        "    institution:               Universidad de Cantabria (Spain)\n",
166 |        "    title:                     CORDEX Africa Sensitivity Run\n",
167 |        "    comment:                   The simulation was forced with ERA-Interim 2x2...\n",
168 |        "    nco_openmp_thread_number:  1"
169 |       ]
170 |      },
171 |      "execution_count": 7,
172 |      "metadata": {},
173 |      "output_type": "execute_result"
174 |     }
175 |    ],
176 |    "source": [
177 |     "wrf.first()"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 8,
183 |    "metadata": {
184 |     "ExecuteTime": {
185 |      "end_time": "2017-11-16T19:29:42.654411Z",
186 |      "start_time": "2017-11-16T19:29:07.174825Z"
187 |     }
188 |    },
189 |    "outputs": [
190 |     {
191 |      "name": "stdout",
192 |      "output_type": "stream",
193 |      "text": [
194 |       "CPU times: user 72 ms, sys: 28 ms, total: 100 ms\n",
195 |       "Wall time: 35.5 s\n"
196 |      ]
197 |     },
198 |     {
199 |      "data": {
200 |       "text/plain": [
201 |        "38994"
202 |       ]
203 |      },
204 |      "execution_count": 8,
205 |      "metadata": {},
206 |      "output_type": "execute_result"
207 |     }
208 |    ],
209 |    "source": [
210 |     "%time wrf.count()"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 12,
216 |    "metadata": {
217 |     "ExecuteTime": {
218 |      "end_time": "2017-11-16T19:30:31.565338Z",
219 |      "start_time": "2017-11-16T19:30:31.556340Z"
220 |     }
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "def create_indices(element):\n",
225 |     "    lat = round(float(element.rlat.data), 1)\n",
226 |     "    lon = round(float(element.rlon.data), 1)\n",
227 |     "    key = (lat, lon)\n",
228 |     "    return (key, element)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 13,
234 |    "metadata": {
235 |     "ExecuteTime": {
236 |      "end_time": "2017-11-16T19:30:32.584974Z",
237 |      "start_time": "2017-11-16T19:30:32.491836Z"
238 |     }
239 |    },
240 |    "outputs": [
241 |     {
242 |      "data": {
243 |       "text/plain": [
244 |        "((-45.8, -24.6), <xarray.Dataset>\n",
245 |        " Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)\n",
246 |        " Coordinates:\n",
247 |        "   * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...\n",
248 |        "   * rlon          (rlon) float32 -24.64\n",
249 |        "     lon           (rlat, rlon) float32 -24.64\n",
250 |        "   * rlat          (rlat) float32 -45.76\n",
251 |        "     lat           (rlat, rlon) float32 -45.76\n",
252 |        "   * height        (height) float32 2.0\n",
253 |        " Dimensions without coordinates: bnds\n",
254 |        " Data variables:\n",
255 |        "     rotated_pole  |S1 b''\n",
256 |        "     time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...\n",
257 |        "     tasmax        (time, height, rlat, rlon) float64 283.4 284.2 284.2 284.6 ...\n",
258 |        " Attributes:\n",
259 |        "     institution:     KNMI\n",
260 |        "     Conventions:     CF-1.0\n",
261 |        "     conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html\n",
262 |        "     source:          RACMO2.2b\n",
263 |        "     project_id:      ENSEMBLES\n",
264 |        "     experiment_id:   ERAIN CORDEX-Africa-50km\n",
265 |        "     realization:     1\n",
266 |        "     comments:        beta-version RACMO2.2 with default physics from ECMWF CY...\n",
267 |        "     creation_date:   2010-04-09 13:53:22)"
268 |       ]
269 |      },
270 |      "execution_count": 13,
271 |      "metadata": {},
272 |      "output_type": "execute_result"
273 |     }
274 |    ],
275 |    "source": [
276 |     "knmi2 = knmi.map(create_indices)\n",
277 |     "knmi2.first()"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 14,
283 |    "metadata": {
284 |     "ExecuteTime": {
285 |      "end_time": "2017-11-16T19:30:35.180079Z",
286 |      "start_time": "2017-11-16T19:30:35.094491Z"
287 |     }
288 |    },
289 |    "outputs": [
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "((-45.8, -24.6), <xarray.Dataset>\n",
294 |        " Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)\n",
295 |        " Coordinates:\n",
296 |        "     lon           (rlat, rlon) float64 -24.64\n",
297 |        "     lat           (rlat, rlon) float64 -45.76\n",
298 |        "   * height        (height) float32 2.0\n",
299 |        "   * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...\n",
300 |        "   * rlat          (rlat) float64 -45.76\n",
301 |        "   * rlon          (rlon) float64 -24.64\n",
302 |        " Dimensions without coordinates: bnds\n",
303 |        " Data variables:\n",
304 |        "     tasmax        (time, height, rlat, rlon) float64 283.4 284.2 284.3 284.6 ...\n",
305 |        "     rotated_pole  |S1 b''\n",
306 |        "     time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...\n",
307 |        " Attributes:\n",
308 |        "     Conventions:               CF-1.4\n",
309 |        "     institution:               Universidad de Cantabria (Spain)\n",
310 |        "     title:                     CORDEX Africa Sensitivity Run\n",
311 |        "     comment:                   The simulation was forced with ERA-Interim 2x2...\n",
312 |        "     nco_openmp_thread_number:  1)"
313 |       ]
314 |      },
315 |      "execution_count": 14,
316 |      "metadata": {},
317 |      "output_type": "execute_result"
318 |     }
319 |    ],
320 |    "source": [
321 |     "wrf2 = wrf.map(create_indices)\n",
322 |     "wrf2.first()"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 15,
328 |    "metadata": {
329 |     "ExecuteTime": {
330 |      "end_time": "2017-11-16T19:33:35.868775Z",
331 |      "start_time": "2017-11-16T19:31:08.958021Z"
332 |     }
333 |    },
334 |    "outputs": [
335 |     {
336 |      "data": {
337 |       "text/plain": [
338 |        "((-44.4, -14.1), (<xarray.Dataset>\n",
339 |        "  Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)\n",
340 |        "  Coordinates:\n",
341 |        "      lon           (rlat, rlon) float64 -14.08\n",
342 |        "      lat           (rlat, rlon) float64 -44.44\n",
343 |        "    * height        (height) float32 2.0\n",
344 |        "    * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...\n",
345 |        "    * rlat          (rlat) float64 -44.44\n",
346 |        "    * rlon          (rlon) float64 -14.08\n",
347 |        "  Dimensions without coordinates: bnds\n",
348 |        "  Data variables:\n",
349 |        "      tasmax        (time, height, rlat, rlon) float64 284.4 285.3 284.2 285.0 ...\n",
350 |        "      rotated_pole  |S1 b''\n",
351 |        "      time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...\n",
352 |        "  Attributes:\n",
353 |        "      Conventions:               CF-1.4\n",
354 |        "      institution:               Universidad de Cantabria (Spain)\n",
355 |        "      title:                     CORDEX Africa Sensitivity Run\n",
356 |        "      comment:                   The simulation was forced with ERA-Interim 2x2...\n",
357 |        "      nco_openmp_thread_number:  1, <xarray.Dataset>\n",
358 |        "  Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)\n",
359 |        "  Coordinates:\n",
360 |        "    * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...\n",
361 |        "    * rlon          (rlon) float32 -14.08\n",
362 |        "      lon           (rlat, rlon) float32 -14.08\n",
363 |        "    * rlat          (rlat) float32 -44.44\n",
364 |        "      lat           (rlat, rlon) float32 -44.44\n",
365 |        "    * height        (height) float32 2.0\n",
366 |        "  Dimensions without coordinates: bnds\n",
367 |        "  Data variables:\n",
368 |        "      rotated_pole  |S1 b''\n",
369 |        "      time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...\n",
370 |        "      tasmax        (time, height, rlat, rlon) float64 284.1 285.2 284.2 285.0 ...\n",
371 |        "  Attributes:\n",
372 |        "      institution:     KNMI\n",
373 |        "      Conventions:     CF-1.0\n",
374 |        "      conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html\n",
375 |        "      source:          RACMO2.2b\n",
376 |        "      project_id:      ENSEMBLES\n",
377 |        "      experiment_id:   ERAIN CORDEX-Africa-50km\n",
378 |        "      realization:     1\n",
379 |        "      comments:        beta-version RACMO2.2 with default physics from ECMWF CY...\n",
380 |        "      creation_date:   2010-04-09 13:53:22))"
381 |       ]
382 |      },
383 |      "execution_count": 15,
384 |      "metadata": {},
385 |      "output_type": "execute_result"
386 |     }
387 |    ],
388 |    "source": [
389 |     "rdd = wrf2.join(knmi2, numPartitions=500)\n",
390 |     "rdd.first()"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 16,
396 |    "metadata": {
397 |     "ExecuteTime": {
398 |      "end_time": "2017-11-16T19:33:35.877366Z",
399 |      "start_time": "2017-11-16T19:33:35.871317Z"
400 |     }
401 |    },
402 |    "outputs": [
403 |     {
404 |      "data": {
405 |       "text/plain": [
406 |        "500"
407 |       ]
408 |      },
409 |      "execution_count": 16,
410 |      "metadata": {},
411 |      "output_type": "execute_result"
412 |     }
413 |    ],
414 |    "source": [
415 |     "rdd.getNumPartitions()"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": 17,
421 |    "metadata": {
422 |     "ExecuteTime": {
423 |      "end_time": "2017-11-16T19:34:15.591937Z",
424 |      "start_time": "2017-11-16T19:33:35.879862Z"
425 |     }
426 |    },
427 |    "outputs": [
428 |     {
429 |      "data": {
430 |       "text/plain": [
431 |        "38994"
432 |       ]
433 |      },
434 |      "execution_count": 17,
435 |      "metadata": {},
436 |      "output_type": "execute_result"
437 |     }
438 |    ],
439 |    "source": [
440 |     "rdd.count()"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": 18,
446 |    "metadata": {
447 |     "ExecuteTime": {
448 |      "end_time": "2017-11-16T19:40:36.261659Z",
449 |      "start_time": "2017-11-16T19:40:35.204744Z"
450 |     }
451 |    },
452 |    "outputs": [
453 |     {
454 |      "data": {
455 |       "text/plain": [
456 |        "((-44.4, -14.1), (<xarray.Dataset>\n",
457 |        "  Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)\n",
458 |        "  Coordinates:\n",
459 |        "      lon           (rlat, rlon) float64 -14.08\n",
460 |        "      lat           (rlat, rlon) float64 -44.44\n",
461 |        "    * height        (height) float32 2.0\n",
462 |        "    * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...\n",
463 |        "    * rlat          (rlat) float64 -44.44\n",
464 |        "    * rlon          (rlon) float64 -14.08\n",
465 |        "  Dimensions without coordinates: bnds\n",
466 |        "  Data variables:\n",
467 |        "      tasmax        (time, height, rlat, rlon) float64 284.4 285.3 284.2 285.0 ...\n",
468 |        "      rotated_pole  |S1 b''\n",
469 |        "      time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...\n",
470 |        "  Attributes:\n",
471 |        "      Conventions:               CF-1.4\n",
472 |        "      institution:               Universidad de Cantabria (Spain)\n",
473 |        "      title:                     CORDEX Africa Sensitivity Run\n",
474 |        "      comment:                   The simulation was forced with ERA-Interim 2x2...\n",
475 |        "      nco_openmp_thread_number:  1, <xarray.Dataset>\n",
476 |        "  Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)\n",
477 |        "  Coordinates:\n",
478 |        "    * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...\n",
479 |        "    * rlon          (rlon) float32 -14.08\n",
480 |        "      lon           (rlat, rlon) float32 -14.08\n",
481 |        "    * rlat          (rlat) float32 -44.44\n",
482 |        "      lat           (rlat, rlon) float32 -44.44\n",
483 |        "    * height        (height) float32 2.0\n",
484 |        "  Dimensions without coordinates: bnds\n",
485 |        "  Data variables:\n",
486 |        "      rotated_pole  |S1 b''\n",
487 |        "      time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...\n",
488 |        "      tasmax        (time, height, rlat, rlon) float64 284.1 285.2 284.2 285.0 ...\n",
489 |        "  Attributes:\n",
490 |        "      institution:     KNMI\n",
491 |        "      Conventions:     CF-1.0\n",
492 |        "      conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html\n",
493 |        "      source:          RACMO2.2b\n",
494 |        "      project_id:      ENSEMBLES\n",
495 |        "      experiment_id:   ERAIN CORDEX-Africa-50km\n",
496 |        "      realization:     1\n",
497 |        "      comments:        beta-version RACMO2.2 with default physics from ECMWF CY...\n",
498 |        "      creation_date:   2010-04-09 13:53:22))"
499 |       ]
500 |      },
501 |      "execution_count": 18,
502 |      "metadata": {},
503 |      "output_type": "execute_result"
504 |     }
505 |    ],
506 |    "source": [
507 |     "a = rdd.first()\n",
508 |     "a"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "code",
513 |    "execution_count": 139,
514 |    "metadata": {
515 |     "ExecuteTime": {
516 |      "end_time": "2017-11-16T20:49:26.115527Z",
517 |      "start_time": "2017-11-16T20:49:25.997131Z"
518 |     }
519 |    },
520 |    "outputs": [],
521 |    "source": [
522 |     "def bias_correct(element):\n",
523 |     "    import numpy as np\n",
524 |     "    obs = element[1][1].tasmax.values.ravel()\n",
525 |     "    mod = element[1][0].tasmax.values.ravel()\n",
526 |     "    \n",
527 |     "    cdfn = 30.0\n",
528 |     "    \n",
529 |     "    obs = np.sort(obs)\n",
530 |     "    mod = np.sort(mod)\n",
531 |     "    \n",
532 |     "    global_max = max(np.amax(obs), np.amax(mod))\n",
533 |     "    \n",
534 |     "    wide = global_max / cdfn\n",
535 |     "    \n",
536 |     "    xbins = np.arange(0.0, global_max+wide, wide)\n",
537 |     "    \n",
538 |     "    pdfobs, bins = np.histogram(obs, bins=xbins)\n",
539 |     "    pdfmod, bins = np.histogram(mod, bins=xbins)\n",
540 |     "    \n",
541 |     "    cdfobs = np.insert(np.cumsum(pdfobs), 0, 0.0)\n",
542 |     "    cdfmod = np.insert(np.cumsum(pdfmod), 0, 0.0) \n",
543 |     "    \n",
544 |     "    vals = [150., 256.6, 100000]\n",
545 |     "    \n",
546 |     "    def bias_map(vals, xbins, cdfmod, cdfobs):\n",
547 |     "        xbins = xbins\n",
548 |     "        cdfmod = cdfmod\n",
549 |     "        cdfobs = cdfobs\n",
550 |     "        \n",
551 |     "        cdf1 = np.interp(vals, xbins, cdfmod)\n",
552 |     "        \n",
553 |     "        corrected = np.interp(cdf1, cdfobs, xbins)\n",
554 |     "        \n",
555 |     "        return corrected \n",
556 |     "\n",
557 |     "    results = bias_map(vals, xbins, cdfmod, cdfobs)\n",
558 |     "        \n",
559 |     "    return results "
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "code",
564 |    "execution_count": 140,
565 |    "metadata": {
566 |     "ExecuteTime": {
567 |      "end_time": "2017-11-16T20:49:26.644420Z",
568 |      "start_time": "2017-11-16T20:49:26.640479Z"
569 |     }
570 |    },
571 |    "outputs": [],
572 |    "source": [
573 |     "bias_corrected = rdd.map(bias_correct)"
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "code",
578 |    "execution_count": 142,
579 |    "metadata": {
580 |     "ExecuteTime": {
581 |      "end_time": "2017-11-16T20:51:21.227339Z",
582 |      "start_time": "2017-11-16T20:51:20.369483Z"
583 |     }
584 |    },
585 |    "outputs": [
586 |     {
587 |      "data": {
588 |       "text/plain": [
589 |        "[array([ 276.68960063,  276.68960063,  286.23062134]),\n",
590 |        " array([ 277.6843516 ,  277.6843516 ,  287.25967407]),\n",
591 |        " array([ 280.57859904,  280.57859904,  290.25372314]),\n",
592 |        " array([ 280.19751383,  280.19751383,  299.52148031]),\n",
593 |        " array([ 283.49803975,  283.49803975,  293.27383423]),\n",
594 |        " array([ 285.22876485,  285.22876485,  304.89971415]),\n",
595 |        " array([ 285.08433126,  285.08433126,  294.91482544]),\n",
596 |        " array([ 283.94258016,  283.94258016,  293.73370361]),\n",
597 |        " array([ 283.80044759,  283.80044759,  293.58666992]),\n",
598 |        " array([ 288.61755575,  288.61755575,  298.56988525])]"
599 |       ]
600 |      },
601 |      "execution_count": 142,
602 |      "metadata": {},
603 |      "output_type": "execute_result"
604 |     }
605 |    ],
606 |    "source": [
607 |     "bias_corrected.take(10)"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": 102,
613 |    "metadata": {
614 |     "ExecuteTime": {
615 |      "end_time": "2017-11-16T20:37:53.698087Z",
616 |      "start_time": "2017-11-16T20:37:53.528502Z"
617 |     }
618 |    },
619 |    "outputs": [
620 |     {
621 |      "data": {
622 |       "text/plain": [
623 |        "282.32231330871582"
624 |       ]
625 |      },
626 |      "execution_count": 102,
627 |      "metadata": {},
628 |      "output_type": "execute_result"
629 |     }
630 |    ],
631 |    "source": [
632 |     "bias_corrected.first().mean()"
633 |    ]
634 |   },
635 |   {
636 |    "cell_type": "code",
637 |    "execution_count": null,
638 |    "metadata": {},
639 |    "outputs": [],
640 |    "source": []
641 |   }
642 |  ],
643 |  "metadata": {
644 |   "kernelspec": {
645 |    "display_name": "Python 3",
646 |    "language": "python",
647 |    "name": "python3"
648 |   },
649 |   "language_info": {
650 |    "codemirror_mode": {
651 |     "name": "ipython",
652 |     "version": 3
653 |    },
654 |    "file_extension": ".py",
655 |    "mimetype": "text/x-python",
656 |    "name": "python",
657 |    "nbconvert_exporter": "python",
658 |    "pygments_lexer": "ipython3",
659 |    "version": "3.6.3"
660 |   },
661 |   "toc": {
662 |    "nav_menu": {},
663 |    "number_sections": true,
664 |    "sideBar": true,
665 |    "skip_h1_title": false,
666 |    "toc_cell": true,
667 |    "toc_position": {},
668 |    "toc_section_display": "block",
669 |    "toc_window_display": false
670 |   }
671 |  },
672 |  "nbformat": 4,
673 |  "nbformat_minor": 2
674 | }
675 | 


--------------------------------------------------------------------------------
/examples/bias/bias.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "metadata": {},
   7 |    "outputs": [
   8 |     {
   9 |      "data": {
  10 |       "text/html": [
  11 |        "\n",
  12 |        "<script src=\"https://code.jquery.com/ui/1.10.4/jquery-ui.min.js\" type=\"text/javascript\"></script>\n",
  13 |        "<script type=\"text/javascript\">function HoloViewsWidget(){\n",
  14 |        "}\n",
  15 |        "\n",
  16 |        "HoloViewsWidget.comms = {};\n",
  17 |        "HoloViewsWidget.comm_state = {};\n",
  18 |        "\n",
  19 |        "HoloViewsWidget.prototype.init_slider = function(init_val){\n",
  20 |        "\tif(this.load_json) {\n",
  21 |        "\t\tthis.from_json()\n",
  22 |        "\t} else {\n",
  23 |        "\t\tthis.update_cache();\n",
  24 |        "\t}\n",
  25 |        "}\n",
  26 |        "\n",
  27 |        "HoloViewsWidget.prototype.populate_cache = function(idx){\n",
  28 |        "    this.cache[idx].html(this.frames[idx]);\n",
  29 |        "    if (this.embed) {\n",
  30 |        "        delete this.frames[idx];\n",
  31 |        "    }\n",
  32 |        "}\n",
  33 |        "\n",
  34 |        "HoloViewsWidget.prototype.process_error = function(msg){\n",
  35 |        "\n",
  36 |        "}\n",
  37 |        "\n",
  38 |        "HoloViewsWidget.prototype.from_json = function() {\n",
  39 |        "\tvar data_url = this.json_path + this.id + '.json';\n",
  40 |        "\t$.getJSON(data_url, $.proxy(function(json_data) {\n",
  41 |        "\t\tthis.frames = json_data;\n",
  42 |        "\t\tthis.update_cache();\n",
  43 |        "\t\tthis.update(0);\n",
  44 |        "\t}, this));\n",
  45 |        "}\n",
  46 |        "\n",
  47 |        "HoloViewsWidget.prototype.dynamic_update = function(current){\n",
  48 |        "\tif (current === undefined) {\n",
  49 |        "\t\treturn\n",
  50 |        "\t}\n",
  51 |        "\tif(this.dynamic) {\n",
  52 |        "\t\tcurrent = JSON.stringify(current);\n",
  53 |        "\t}\n",
  54 |        "\tfunction callback(initialized, msg){\n",
  55 |        "\t\t/* This callback receives data from Python as a string\n",
  56 |        "\t\t   in order to parse it correctly quotes are sliced off*/\n",
  57 |        "\t\tif (msg.content.ename != undefined) {\n",
  58 |        "\t\t\tthis.process_error(msg);\n",
  59 |        "\t\t}\n",
  60 |        "\t\tif (msg.msg_type != \"execute_result\") {\n",
  61 |        "\t\t\tconsole.log(\"Warning: HoloViews callback returned unexpected data for key: (\", current, \") with the following content:\", msg.content)\n",
  62 |        "\t\t\tthis.time = undefined;\n",
  63 |        "\t\t\tthis.wait = false;\n",
  64 |        "\t\t\treturn\n",
  65 |        "\t\t}\n",
  66 |        "\t\tthis.timed = (Date.now() - this.time) * 1.1;\n",
  67 |        "\t\tif (msg.msg_type == \"execute_result\") {\n",
  68 |        "\t\t\tif (msg.content.data['text/plain'].includes('Complete')) {\n",
  69 |        "\t\t\t\tthis.wait = false;\n",
  70 |        "\t\t\t\tif (this.queue.length > 0) {\n",
  71 |        "\t\t\t\t\tthis.time = Date.now();\n",
  72 |        "\t\t\t\t\tthis.dynamic_update(this.queue[this.queue.length-1]);\n",
  73 |        "\t\t\t\t\tthis.queue = [];\n",
  74 |        "\t\t\t\t}\n",
  75 |        "\t\t\t\treturn\n",
  76 |        "\t\t\t}\n",
  77 |        "\t\t}\n",
  78 |        "\t}\n",
  79 |        "\tthis.current = current;\n",
  80 |        "\tif ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null)) {\n",
  81 |        "\t\tvar kernel = Jupyter.notebook.kernel;\n",
  82 |        "\t\tcallbacks = {iopub: {output: $.proxy(callback, this, this.initialized)}};\n",
  83 |        "\t\tvar cmd = \"holoviews.plotting.widgets.NdWidget.widgets['\" + this.id + \"'].update(\" + current + \")\";\n",
  84 |        "\t\tkernel.execute(\"import holoviews;\" + cmd, callbacks, {silent : false});\n",
  85 |        "\t}\n",
  86 |        "}\n",
  87 |        "\n",
  88 |        "HoloViewsWidget.prototype.update_cache = function(force){\n",
  89 |        "    var frame_len = Object.keys(this.frames).length;\n",
  90 |        "    for (var i=0; i<frame_len; i++) {\n",
  91 |        "        if(!this.load_json || this.dynamic)  {\n",
  92 |        "            frame = Object.keys(this.frames)[i];\n",
  93 |        "        } else {\n",
  94 |        "            frame = i;\n",
  95 |        "        }\n",
  96 |        "        if(!(frame in this.cache) || force) {\n",
  97 |        "\t\t\tif ((frame in this.cache) && force) { this.cache[frame].remove() }\n",
  98 |        "\t\t\tthis.cache[frame] = $('<div />').appendTo(\"#\"+\"_anim_img\"+this.id).hide();\n",
  99 |        "\t\t\tvar cache_id = \"_anim_img\"+this.id+\"_\"+frame;\n",
 100 |        "\t\t\tthis.cache[frame].attr(\"id\", cache_id);\n",
 101 |        "\t\t\tthis.populate_cache(frame);\n",
 102 |        "        }\n",
 103 |        "    }\n",
 104 |        "}\n",
 105 |        "\n",
 106 |        "HoloViewsWidget.prototype.update = function(current){\n",
 107 |        "    if(current in this.cache) {\n",
 108 |        "        $.each(this.cache, function(index, value) {\n",
 109 |        "            value.hide();\n",
 110 |        "        });\n",
 111 |        "        this.cache[current].show();\n",
 112 |        "\t\tthis.wait = false;\n",
 113 |        "    }\n",
 114 |        "}\n",
 115 |        "\n",
 116 |        "HoloViewsWidget.prototype.init_comms = function() {\n",
 117 |        "\tif ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel !== undefined)) {\n",
 118 |        "\t\tvar widget = this;\n",
 119 |        "\t\tcomm_manager = Jupyter.notebook.kernel.comm_manager;\n",
 120 |        "        comm_manager.register_target(this.id, function (comm) {\n",
 121 |        "\t\t\tcomm.on_msg(function (msg) { widget.process_msg(msg) });\n",
 122 |        "\t\t});\n",
 123 |        "\t}\n",
 124 |        "}\n",
 125 |        "\n",
 126 |        "HoloViewsWidget.prototype.process_msg = function(msg) {\n",
 127 |        "}\n",
 128 |        "\n",
 129 |        "function SelectionWidget(frames, id, slider_ids, keyMap, dim_vals, notFound, load_json, mode, cached, json_path, dynamic){\n",
 130 |        "    this.frames = frames;\n",
 131 |        "    this.id = id;\n",
 132 |        "    this.slider_ids = slider_ids;\n",
 133 |        "    this.keyMap = keyMap\n",
 134 |        "    this.current_frame = 0;\n",
 135 |        "    this.current_vals = dim_vals;\n",
 136 |        "    this.load_json = load_json;\n",
 137 |        "    this.mode = mode;\n",
 138 |        "    this.notFound = notFound;\n",
 139 |        "    this.cached = cached;\n",
 140 |        "    this.dynamic = dynamic;\n",
 141 |        "    this.cache = {};\n",
 142 |        "\tthis.json_path = json_path;\n",
 143 |        "    this.init_slider(this.current_vals[0]);\n",
 144 |        "\tthis.queue = [];\n",
 145 |        "\tthis.wait = false;\n",
 146 |        "\tif (!this.cached || this.dynamic) {\n",
 147 |        "\t\tthis.init_comms()\n",
 148 |        "\t}\n",
 149 |        "}\n",
 150 |        "\n",
 151 |        "SelectionWidget.prototype = new HoloViewsWidget;\n",
 152 |        "\n",
 153 |        "\n",
 154 |        "SelectionWidget.prototype.get_key = function(current_vals) {\n",
 155 |        "\tvar key = \"(\";\n",
 156 |        "    for (var i=0; i<this.slider_ids.length; i++)\n",
 157 |        "    {\n",
 158 |        "        val = this.current_vals[i];\n",
 159 |        "        if (!(typeof val === 'string')) {\n",
 160 |        "            if (val % 1 === 0) { val = val.toFixed(1); }\n",
 161 |        "            else { val = val.toFixed(10); val = val.slice(0, val.length-1);}\n",
 162 |        "        }\n",
 163 |        "        key += \"'\" + val + \"'\";\n",
 164 |        "        if(i != this.slider_ids.length-1) { key += ', ';}\n",
 165 |        "        else if(this.slider_ids.length == 1) { key += ',';}\n",
 166 |        "    }\n",
 167 |        "    key += \")\";\n",
 168 |        "\treturn this.keyMap[key];\n",
 169 |        "}\n",
 170 |        "\n",
 171 |        "SelectionWidget.prototype.set_frame = function(dim_val, dim_idx){\n",
 172 |        "\tthis.current_vals[dim_idx] = dim_val;\n",
 173 |        "    var current = this.get_key(this.current_vals);\n",
 174 |        "    if(current === undefined && !this.dynamic) {\n",
 175 |        "        return\n",
 176 |        "    }\n",
 177 |        "\tif (this.dynamic || !this.cached) {\n",
 178 |        "\t\tif (this.time === undefined) {\n",
 179 |        "\t\t\t// Do nothing the first time\n",
 180 |        "\t\t} else if ((this.timed === undefined) || ((this.time + this.timed) > Date.now())) {\n",
 181 |        "\t\t\tvar key = this.current_vals;\n",
 182 |        "\t\t\tif (!this.dynamic) {\n",
 183 |        "\t\t\t\tkey = this.get_key(key);\n",
 184 |        "\t\t\t}\n",
 185 |        "\t\t\tthis.queue.push(key);\n",
 186 |        "\t\t\treturn\n",
 187 |        "\t\t}\n",
 188 |        "\t}\n",
 189 |        "\tthis.queue = [];\n",
 190 |        "\tthis.time = Date.now();\n",
 191 |        "\tthis.current_frame = current;\n",
 192 |        "    if(this.dynamic) {\n",
 193 |        "        this.dynamic_update(this.current_vals)\n",
 194 |        "    } else if(this.cached) {\n",
 195 |        "        this.update(current)\n",
 196 |        "    } else {\n",
 197 |        "        this.dynamic_update(current)\n",
 198 |        "    }\n",
 199 |        "}\n",
 200 |        "\n",
 201 |        "\n",
 202 |        "/* Define the ScrubberWidget class */\n",
 203 |        "function ScrubberWidget(frames, num_frames, id, interval, load_json, mode, cached, json_path, dynamic){\n",
 204 |        "    this.slider_id = \"_anim_slider\" + id;\n",
 205 |        "    this.loop_select_id = \"_anim_loop_select\" + id;\n",
 206 |        "    this.id = id;\n",
 207 |        "    this.interval = interval;\n",
 208 |        "    this.current_frame = 0;\n",
 209 |        "    this.direction = 0;\n",
 210 |        "    this.dynamic = dynamic;\n",
 211 |        "    this.timer = null;\n",
 212 |        "    this.load_json = load_json;\n",
 213 |        "    this.mode = mode;\n",
 214 |        "    this.cached = cached;\n",
 215 |        "    this.frames = frames;\n",
 216 |        "    this.cache = {};\n",
 217 |        "    this.length = num_frames;\n",
 218 |        "\tthis.json_path = json_path;\n",
 219 |        "    document.getElementById(this.slider_id).max = this.length - 1;\n",
 220 |        "    this.init_slider(0);\n",
 221 |        "\tthis.wait = false;\n",
 222 |        "\tthis.queue = [];\n",
 223 |        "\tif (!this.cached || this.dynamic) {\n",
 224 |        "\t\tthis.init_comms()\n",
 225 |        "\t}\n",
 226 |        "}\n",
 227 |        "\n",
 228 |        "ScrubberWidget.prototype = new HoloViewsWidget;\n",
 229 |        "\n",
 230 |        "ScrubberWidget.prototype.set_frame = function(frame){\n",
 231 |        "\tthis.current_frame = frame;\n",
 232 |        "\twidget = document.getElementById(this.slider_id);\n",
 233 |        "    if (widget === null) {\n",
 234 |        "        this.pause_animation();\n",
 235 |        "        return\n",
 236 |        "    }\n",
 237 |        "    widget.value = this.current_frame;\n",
 238 |        "    if(this.cached) {\n",
 239 |        "        this.update(frame)\n",
 240 |        "    } else {\n",
 241 |        "        this.dynamic_update(frame)\n",
 242 |        "    }\n",
 243 |        "}\n",
 244 |        "\n",
 245 |        "\n",
 246 |        "ScrubberWidget.prototype.process_error = function(msg){\n",
 247 |        "\tif (msg.content.ename === 'StopIteration') {\n",
 248 |        "\t\tthis.pause_animation();\n",
 249 |        "\t\tthis.stopped = true;\n",
 250 |        "\t\tvar keys = Object.keys(this.frames)\n",
 251 |        "\t\tthis.length = keys.length;\n",
 252 |        "\t\tdocument.getElementById(this.slider_id).max = this.length-1;\n",
 253 |        "\t\tdocument.getElementById(this.slider_id).value = this.length-1;\n",
 254 |        "\t\tthis.current_frame = this.length-1;\n",
 255 |        "\t}\n",
 256 |        "}\n",
 257 |        "\n",
 258 |        "\n",
 259 |        "ScrubberWidget.prototype.get_loop_state = function(){\n",
 260 |        "    var button_group = document[this.loop_select_id].state;\n",
 261 |        "    for (var i = 0; i < button_group.length; i++) {\n",
 262 |        "        var button = button_group[i];\n",
 263 |        "        if (button.checked) {\n",
 264 |        "            return button.value;\n",
 265 |        "        }\n",
 266 |        "    }\n",
 267 |        "    return undefined;\n",
 268 |        "}\n",
 269 |        "\n",
 270 |        "\n",
 271 |        "ScrubberWidget.prototype.next_frame = function() {\n",
 272 |        "\tthis.set_frame(Math.min(this.length - 1, this.current_frame + 1));\n",
 273 |        "}\n",
 274 |        "\n",
 275 |        "ScrubberWidget.prototype.previous_frame = function() {\n",
 276 |        "    this.set_frame(Math.max(0, this.current_frame - 1));\n",
 277 |        "}\n",
 278 |        "\n",
 279 |        "ScrubberWidget.prototype.first_frame = function() {\n",
 280 |        "    this.set_frame(0);\n",
 281 |        "}\n",
 282 |        "\n",
 283 |        "ScrubberWidget.prototype.last_frame = function() {\n",
 284 |        "    this.set_frame(this.length - 1);\n",
 285 |        "}\n",
 286 |        "\n",
 287 |        "ScrubberWidget.prototype.slower = function() {\n",
 288 |        "    this.interval /= 0.7;\n",
 289 |        "    if(this.direction > 0){this.play_animation();}\n",
 290 |        "    else if(this.direction < 0){this.reverse_animation();}\n",
 291 |        "}\n",
 292 |        "\n",
 293 |        "ScrubberWidget.prototype.faster = function() {\n",
 294 |        "    this.interval *= 0.7;\n",
 295 |        "    if(this.direction > 0){this.play_animation();}\n",
 296 |        "    else if(this.direction < 0){this.reverse_animation();}\n",
 297 |        "}\n",
 298 |        "\n",
 299 |        "ScrubberWidget.prototype.anim_step_forward = function() {\n",
 300 |        "    if(this.current_frame < this.length - 1){\n",
 301 |        "        this.next_frame();\n",
 302 |        "    }else{\n",
 303 |        "        var loop_state = this.get_loop_state();\n",
 304 |        "        if(loop_state == \"loop\"){\n",
 305 |        "            this.first_frame();\n",
 306 |        "        }else if(loop_state == \"reflect\"){\n",
 307 |        "            this.last_frame();\n",
 308 |        "            this.reverse_animation();\n",
 309 |        "        }else{\n",
 310 |        "            this.pause_animation();\n",
 311 |        "            this.last_frame();\n",
 312 |        "        }\n",
 313 |        "    }\n",
 314 |        "}\n",
 315 |        "\n",
 316 |        "ScrubberWidget.prototype.anim_step_reverse = function() {\n",
 317 |        "    if(this.current_frame > 0){\n",
 318 |        "        this.previous_frame();\n",
 319 |        "    } else {\n",
 320 |        "        var loop_state = this.get_loop_state();\n",
 321 |        "        if(loop_state == \"loop\"){\n",
 322 |        "            this.last_frame();\n",
 323 |        "        }else if(loop_state == \"reflect\"){\n",
 324 |        "            this.first_frame();\n",
 325 |        "            this.play_animation();\n",
 326 |        "        }else{\n",
 327 |        "            this.pause_animation();\n",
 328 |        "            this.first_frame();\n",
 329 |        "        }\n",
 330 |        "    }\n",
 331 |        "}\n",
 332 |        "\n",
 333 |        "ScrubberWidget.prototype.pause_animation = function() {\n",
 334 |        "    this.direction = 0;\n",
 335 |        "    if (this.timer){\n",
 336 |        "        clearInterval(this.timer);\n",
 337 |        "        this.timer = null;\n",
 338 |        "    }\n",
 339 |        "}\n",
 340 |        "\n",
 341 |        "ScrubberWidget.prototype.play_animation = function() {\n",
 342 |        "    this.pause_animation();\n",
 343 |        "    this.direction = 1;\n",
 344 |        "    var t = this;\n",
 345 |        "    if (!this.timer) this.timer = setInterval(function(){t.anim_step_forward();}, this.interval);\n",
 346 |        "}\n",
 347 |        "\n",
 348 |        "ScrubberWidget.prototype.reverse_animation = function() {\n",
 349 |        "    this.pause_animation();\n",
 350 |        "    this.direction = -1;\n",
 351 |        "    var t = this;\n",
 352 |        "    if (!this.timer) this.timer = setInterval(function(){t.anim_step_reverse();}, this.interval);\n",
 353 |        "}\n",
 354 |        "\n",
 355 |        "function extend(destination, source) {\n",
 356 |        "    for (var k in source) {\n",
 357 |        "        if (source.hasOwnProperty(k)) {\n",
 358 |        "            destination[k] = source[k];\n",
 359 |        "        }\n",
 360 |        "    }\n",
 361 |        "    return destination;\n",
 362 |        "}\n",
 363 |        "\n",
 364 |        "function update_widget(widget, values) {\n",
 365 |        "\tif (widget.hasClass(\"ui-slider\")) {\n",
 366 |        "\t\twidget.slider('option',\n",
 367 |        "\t\t\t\t\t  {'min': 0, 'max': values.length-1,\n",
 368 |        "\t\t\t\t\t   'dim_vals': values, 'value': 0,\n",
 369 |        "\t\t\t\t\t   'dim_labels': values})\n",
 370 |        "\t\twidget.slider('option', 'slide').call(widget, event, {'value': 0})\n",
 371 |        "\t} else {\n",
 372 |        "\t\twidget.empty();\n",
 373 |        "\t\tfor (var i=0; i<values.length; i++){\n",
 374 |        "\t\t\twidget.append($(\"<option>\", {\n",
 375 |        "\t\t\t\tvalue: i,\n",
 376 |        "\t\t\t\ttext: values[i]\n",
 377 |        "\t\t\t}))};\n",
 378 |        "\t\twidget.data('values', values);\n",
 379 |        "\t\twidget.data('value', 0);\n",
 380 |        "\t\twidget.trigger(\"change\");\n",
 381 |        "\t};\n",
 382 |        "}\n",
 383 |        "\n",
 384 |        "// Define Bokeh specific subclasses\n",
 385 |        "function BokehSelectionWidget() {\n",
 386 |        "\tSelectionWidget.apply(this, arguments);\n",
 387 |        "}\n",
 388 |        "\n",
 389 |        "function BokehScrubberWidget() {\n",
 390 |        "\tScrubberWidget.apply(this, arguments);\n",
 391 |        "}\n",
 392 |        "\n",
 393 |        "// Let them inherit from the baseclasses\n",
 394 |        "BokehSelectionWidget.prototype = Object.create(SelectionWidget.prototype);\n",
 395 |        "BokehScrubberWidget.prototype = Object.create(ScrubberWidget.prototype);\n",
 396 |        "\n",
 397 |        "// Define methods to override on widgets\n",
 398 |        "var BokehMethods = {\n",
 399 |        "\tupdate_cache : function(){\n",
 400 |        "\t\t$.each(this.frames, $.proxy(function(index, frame) {\n",
 401 |        "\t\t\tthis.frames[index] = JSON.parse(frame);\n",
 402 |        "\t\t}, this));\n",
 403 |        "\t},\n",
 404 |        "\tupdate : function(current){\n",
 405 |        "\t\tif (current === undefined) {\n",
 406 |        "\t\t\tvar data = undefined;\n",
 407 |        "\t\t} else {\n",
 408 |        "\t\t\tvar data = this.frames[current];\n",
 409 |        "\t\t}\n",
 410 |        "\t\tif (data !== undefined) {\n",
 411 |        "\t\t\tvar doc = Bokeh.index[data.root].model.document;\n",
 412 |        "\t\t\tdoc.apply_json_patch(data.patch);\n",
 413 |        "\t\t}\n",
 414 |        "\t},\n",
 415 |        "\tinit_comms : function() {\n",
 416 |        "\t}\n",
 417 |        "}\n",
 418 |        "\n",
 419 |        "// Extend Bokeh widgets with backend specific methods\n",
 420 |        "extend(BokehSelectionWidget.prototype, BokehMethods);\n",
 421 |        "extend(BokehScrubberWidget.prototype, BokehMethods);\n",
 422 |        "\n",
 423 |        "// Define MPL specific subclasses\n",
 424 |        "function MPLSelectionWidget() {\n",
 425 |        "    SelectionWidget.apply(this, arguments);\n",
 426 |        "}\n",
 427 |        "\n",
 428 |        "function MPLScrubberWidget() {\n",
 429 |        "    ScrubberWidget.apply(this, arguments);\n",
 430 |        "}\n",
 431 |        "\n",
 432 |        "// Let them inherit from the baseclasses\n",
 433 |        "MPLSelectionWidget.prototype = Object.create(SelectionWidget.prototype);\n",
 434 |        "MPLScrubberWidget.prototype = Object.create(ScrubberWidget.prototype);\n",
 435 |        "\n",
 436 |        "// Define methods to override on widgets\n",
 437 |        "var MPLMethods = {\n",
 438 |        "    init_slider : function(init_val){\n",
 439 |        "        if(this.load_json) {\n",
 440 |        "            this.from_json()\n",
 441 |        "        } else {\n",
 442 |        "            this.update_cache();\n",
 443 |        "        }\n",
 444 |        "        this.update(0);\n",
 445 |        "        if(this.mode == 'nbagg') {\n",
 446 |        "            this.set_frame(init_val, 0);\n",
 447 |        "        }\n",
 448 |        "    },\n",
 449 |        "    populate_cache : function(idx){\n",
 450 |        "        var cache_id = \"_anim_img\"+this.id+\"_\"+idx;\n",
 451 |        "        this.cache[idx].html(this.frames[idx]);\n",
 452 |        "        if (this.embed) {\n",
 453 |        "            delete this.frames[idx];\n",
 454 |        "        }\n",
 455 |        "    },\n",
 456 |        "    process_msg : function(msg) {\n",
 457 |        "        if (!(this.mode == 'nbagg')) {\n",
 458 |        "            var data = msg.content.data;\n",
 459 |        "            this.frames[this.current] = data;\n",
 460 |        "            this.update_cache(true);\n",
 461 |        "            this.update(this.current);\n",
 462 |        "        }\n",
 463 |        "    }\n",
 464 |        "}\n",
 465 |        "// Extend MPL widgets with backend specific methods\n",
 466 |        "extend(MPLSelectionWidget.prototype, MPLMethods);\n",
 467 |        "extend(MPLScrubberWidget.prototype, MPLMethods);\n",
 468 |        "</script>\n",
 469 |        "\n",
 470 |        "\n",
 471 |        "<link rel=\"stylesheet\" href=\"https://code.jquery.com/ui/1.10.4/themes/smoothness/jquery-ui.css\">\n",
 472 |        "<style>div.bk-hbox {\n",
 473 |        "    display: flex;\n",
 474 |        "    justify-content: center;\n",
 475 |        "}\n",
 476 |        "\n",
 477 |        "div.bk-hbox div.bk-plot {\n",
 478 |        "    padding: 8px;\n",
 479 |        "}\n",
 480 |        "\n",
 481 |        "div.bk-hbox div.bk-data-table {\n",
 482 |        "    padding: 20px;\n",
 483 |        "}\n",
 484 |        "\n",
 485 |        "div.hololayout {\n",
 486 |        "    display: flex;\n",
 487 |        "    align-items: center;\n",
 488 |        "    margin: 0;\n",
 489 |        "}\n",
 490 |        "\n",
 491 |        "div.holoframe {\n",
 492 |        "\twidth: 75%;\n",
 493 |        "}\n",
 494 |        "\n",
 495 |        "div.holowell {\n",
 496 |        "    display: flex;\n",
 497 |        "    align-items: center;\n",
 498 |        "    margin: 0;\n",
 499 |        "}\n",
 500 |        "\n",
 501 |        "form.holoform {\n",
 502 |        "    background-color: #fafafa;\n",
 503 |        "    border-radius: 5px;\n",
 504 |        "    overflow: hidden;\n",
 505 |        "\tpadding-left: 0.8em;\n",
 506 |        "    padding-right: 0.8em;\n",
 507 |        "    padding-top: 0.4em;\n",
 508 |        "    padding-bottom: 0.4em;\n",
 509 |        "}\n",
 510 |        "\n",
 511 |        "div.holowidgets {\n",
 512 |        "    padding-right: 0;\n",
 513 |        "\twidth: 25%;\n",
 514 |        "}\n",
 515 |        "\n",
 516 |        "div.holoslider {\n",
 517 |        "    min-height: 0 !important;\n",
 518 |        "    height: 0.8em;\n",
 519 |        "    width: 60%;\n",
 520 |        "}\n",
 521 |        "\n",
 522 |        "div.holoformgroup {\n",
 523 |        "    padding-top: 0.5em;\n",
 524 |        "    margin-bottom: 0.5em;\n",
 525 |        "}\n",
 526 |        "\n",
 527 |        "div.hologroup {\n",
 528 |        "    padding-left: 0;\n",
 529 |        "    padding-right: 0.8em;\n",
 530 |        "    width: 50%;\n",
 531 |        "}\n",
 532 |        "\n",
 533 |        ".holoselect {\n",
 534 |        "    width: 92%;\n",
 535 |        "    margin-left: 0;\n",
 536 |        "    margin-right: 0;\n",
 537 |        "}\n",
 538 |        "\n",
 539 |        ".holotext {\n",
 540 |        "    width: 100%;\n",
 541 |        "    padding-left:  0.5em;\n",
 542 |        "    padding-right: 0;\n",
 543 |        "}\n",
 544 |        "\n",
 545 |        ".holowidgets .ui-resizable-se {\n",
 546 |        "\tvisibility: hidden\n",
 547 |        "}\n",
 548 |        "\n",
 549 |        ".holoframe > .ui-resizable-se {\n",
 550 |        "\tvisibility: hidden\n",
 551 |        "}\n",
 552 |        "\n",
 553 |        ".holowidgets .ui-resizable-s {\n",
 554 |        "\tvisibility: hidden\n",
 555 |        "}\n",
 556 |        "</style>\n",
 557 |        "\n",
 558 |        "\n",
 559 |        "<div>\n",
 560 |        "<img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAYAAACqaXHeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
 561 |        "AAAB+wAAAfsBxc2miwAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAA6zSURB\n",
 562 |        "VHic7ZtpeFRVmsf/5966taWqUlUJ2UioBBJiIBAwCZtog9IOgjqACsogKtqirT2ttt069nQ/zDzt\n",
 563 |        "tI4+CrJIREFaFgWhBXpUNhHZQoKBkIUASchWla1S+3ar7r1nPkDaCAnZKoQP/D7mnPOe9/xy76n3\n",
 564 |        "nFSAW9ziFoPFNED2LLK5wcyBDObkb8ZkxuaoSYlI6ZcOKq1eWFdedqNzGHQBk9RMEwFAASkk0Xw3\n",
 565 |        "ETacDNi2vtvc7L0ROdw0AjoSotQVkKSvHQz/wRO1lScGModBFbDMaNRN1A4tUBCS3lk7BWhQkgpD\n",
 566 |        "lG4852/+7DWr1R3uHAZVQDsbh6ZPN7CyxUrCzJMRouusj0ipRwD2uKm0Zn5d2dFwzX1TCGhnmdGo\n",
 567 |        "G62Nna+isiUqhkzuKrkQaJlPEv5mFl2fvGg2t/VnzkEV8F5ioioOEWkLG86fvbpthynjdhXYZziQ\n",
 568 |        "x1hC9J2NFyi8vCTt91Fh04KGip0AaG9zuCk2wQCVyoNU3Hjezee9bq92duzzTmxsRJoy+jEZZZYo\n",
 569 |        "GTKJ6SJngdJqAfRzpze0+jHreUtPc7gpBLQnIYK6BYp/uGhw9YK688eu7v95ysgshcg9qSLMo3JC\n",
 570 |        "4jqLKQFBgdKDPoQ+Pltb8dUyQLpeDjeVgI6EgLIQFT5tEl3rn2losHVsexbZ3EyT9wE1uGdkIPcy\n",
 571 |        "BGxn8QUq1QrA5nqW5i2tLqvrrM9NK6AdkVIvL9E9bZL/oyfMVd/jqvc8LylzRBKDJSzIExwhQzuL\n",
 572 |        "QYGQj4rHfFTc8mUdu3E7yoLtbTe9gI4EqVgVkug2i5+uXGo919ixbRog+3fTbQ8qJe4ZOYNfMoTI\n",
 573 |        "OoshUNosgO60AisX15aeI2PSIp5KiFLI9ubb1vV3Qb2ltwLakUCDAkWX7/nHKRmmGIl9VgYsUhJm\n",
 574 |        "2NXjKYADtM1ygne9QQDIXlk49FBstMKx66D1v4+XuQr7vqTe0VcBHQlRWiOCbmmSYe2SqtL6q5rJ\n",
 575 |        "zsTb7lKx3FKOYC4DoqyS/B5bvLPxvD9Qtf6saxYLQGJErmDOdOMr/zo96km1nElr8bmPOBwI9COv\n",
 576 |        "HnFPRIwmkSOv9kcAS4heRsidOkpeWBgZM+UBrTFAXNYL5Vf2ii9c1trNzpYdaoVil3WIc+wdk+gQ\n",
 577 |        "noie3ecCcxt9ITcLAPWt/laGEO/9U6PmzZkenTtsSMQ8uYywJVW+grCstAvCIaAdArAsIWkRDDs/\n",
 578 |        "KzLm2YcjY1Lv0UdW73HabE9n6V66cxSzfEmuJssTpKGVp+0vHq73FwL46eOjpMpbRAnNmJFrGJNu\n",
 579 |        "Ukf9Yrz+3rghiumCKNXXWPhLYcjxGsIpoCMsIRoFITkW8AuyM8jC1+/QLx4bozCEJIq38+1rtpR6\n",
 580 |        "V/yzb8eBlRb3fo5l783N0CWolAzJHaVNzkrTzlEp2bQ2q3TC5gn6wpnoQAmwSiGh2GitnTmVMc5O\n",
 581 |        "UyfKWUKCIsU7+fZDKwqdT6DDpvkzAX4/+AMFjk0tDp5GRXLpQ2MUmhgDp5gxQT8+Y7hyPsMi8uxF\n",
 582 |        "71H0oebujHALECjFKaW9Lm68n18wXp2kVzIcABytD5iXFzg+WVXkegpAsOOYziqo0OkK76GyquC3\n",
 583 |        "ltZAzMhhqlSNmmWTE5T6e3IN05ITFLM4GdN0vtZ3ob8Jh1NAKXFbm5PtLU/eqTSlGjkNAJjdgn/N\n",
 584 |        "aedXa0tdi7+t9G0FIF49rtMSEgAs1kDLkTPO7ebm4IUWeyh1bKomXqlgMG6kJmHcSM0clYLJ8XtR\n",
 585 |        "1GTnbV3F6I5wCGikAb402npp1h1s7LQUZZSMIfALFOuL3UUrfnS8+rez7v9qcold5tilgHbO1fjK\n",
 586 |        "9ubb17u9oshxzMiUBKXWqJNxd+fqb0tLVs4lILFnK71H0Ind7uiPgACVcFJlrb0tV6DzxqqTIhUM\n",
 587 |        "CwDf1/rrVhTa33/3pGPxJYdQ2l2cbgVcQSosdx8uqnDtbGjh9SlDVSMNWhlnilfqZk42Th2ZpLpf\n",
 588 |        "xrHec5e815zrr0dfBZSwzkZfqsv+1FS1KUknUwPARVvItfKUY+cn57yP7qv07UE3p8B2uhUwLk09\n",
 589 |        "e0SCOrK+hbdYHYLjRIl71wWzv9jpEoeOHhGRrJAzyEyNiJuUqX0g2sBN5kGK6y2Blp5M3lsB9Qh4\n",
 590 |        "y2Ja6x6+i0ucmKgwMATwhSjdUu49tKrQ/pvN5d53ml2CGwCmJipmKjgmyuaXzNeL2a0AkQ01Th5j\n",
 591 |        "2DktO3Jyk8f9vcOBQHV94OK+fPumJmvQHxJoWkaKWq9Vs+yUsbq0zGT1I4RgeH2b5wef7+c7bl8F\n",
 592 |        "eKgoHVVZa8ZPEORzR6sT1BzDUAD/d9F78e2Tzv99v8D+fLVTqAKAsbGamKey1Mt9Ann4eH3gTXTz\n",
 593 |        "idWtAJ8PQWOk7NzSeQn/OTHDuEikVF1R4z8BQCy+6D1aWRfY0tTGG2OM8rRoPaeIj5ZHzJxszElN\n",
 594 |        "VM8K8JS5WOfv8mzRnQAKoEhmt8gyPM4lU9SmBK1MCQBnW4KONT86v1hZ1PbwSXPw4JWussVjtH9Y\n",
 595 |        "NCoiL9UoH/6PSu8jFrfY2t36erQHXLIEakMi1SydmzB31h3GGXFDFNPaK8Rme9B79Ixrd0WN+1ij\n",
 596 |        "NRQ/doRmuFLBkHSTOm5GruG+pFjFdAmorG4IXH1Qua6ASniclfFtDYt+oUjKipPrCQB7QBQ2lrgP\n",
 597 |        "fFzm+9XWUtcqJ3/5vDLDpJ79XHZk3u8nGZ42qlj1+ydtbxysCezrydp6ugmipNJ7WBPB5tydY0jP\n",
 598 |        "HaVNzs3QzeE4ZpTbI+ZbnSFPbVOw9vsfnVvqWnirPyCNGD08IlqtYkh2hjZ5dErEQzoNm+6ykyOt\n",
 599 |        "Lt5/PQEuSRRKo22VkydK+vvS1XEKlhCJAnsqvcVvH7f/ZU2R67eXbMEGAMiIV5oWZWiWvz5Fv2xG\n",
 600 |        "sjqNJQRvn3Rs2lji/lNP19VjAQDgD7FHhujZB9OGqYxRkZxixgRDVlqS6uEOFaJUVu0rPFzctrnF\n",
 601 |        "JqijImVp8dEKVWyUXDk92zAuMZ6bFwpBU1HrOw6AdhQgUooChb0+ItMbWJitSo5Ws3IAOGEOtL53\n",
 602 |        "0vHZih9sC4vtofZ7Qu6523V/fmGcds1TY3V36pUsBwAbSlxnVh2xLfAD/IAIMDf7XYIkNmXfpp2l\n",
 603 |        "18rkAJAy9HKFaIr/qULkeQQKy9zf1JgDB2uaeFNGijo5QsUyacNUUTOnGO42xSnv4oOwpDi1zYkc\n",
 604 |        "efUc3I5Gk6PhyTuVKaOGyLUAYPGIoY9Pu/atL/L92+4q9wbflRJ2Trpm/jPjdBtfnqB/dIThcl8A\n",
 605 |        "KG7hbRuKnb8qsQsVvVlTrwQAQMUlf3kwJI24Z4JhPMtcfng5GcH49GsrxJpGvvHIaeem2ma+KSjQ\n",
 606 |        "lIwUdYyCY8j4dE1KzijNnIP2llF2wcXNnsoapw9XxsgYAl6k+KzUXbi2yP3KR2ecf6z3BFsBICdW\n",
 607 |        "nvnIaG3eHybqX7vbpEqUMT+9OL4Qpe8VON7dXuFd39v19FoAABRVePbGGuXTszO0P7tu6lghUonE\n",
 608 |        "llRdrhArLvmKdh9u29jcFiRRkfLUxBiFNiqSU9icoZQHo5mYBI1MBgBH6wMNb+U7Pnw337H4gi1Y\n",
 609 |        "ciWs+uks3Z9fztUvfzxTm9Ne8XXkvQLHNytOOZeiD4e0PgkAIAYCYknKUNUDSXEKzdWNpnil7r4p\n",
 610 |        "xqkjTarZMtk/K8TQ6Qve78qqvXurGwIJqcOUKfUWHsm8KGvxSP68YudXq4pcj39X49uOK2X142O0\n",
 611 |        "Tz5/u/7TVybqH0rSya6ZBwD21/gubbrgWdDgEOx9WUhfBaC2ibcEBYm7a7x+ukrBMNcEZggyR0TE\n",
 612 |        "T8zUPjikQ4VosQZbTpS4vqizBKvqmvjsqnpfzaZyx9JPiz1/bfGKdgD45XB1zoIMzYbfTdS/NClB\n",
 613 |        "Gct0USiY3YL/g0LHy/uq/Ef6uo5+n0R/vyhp17Klpge763f8rMu6YU/zrn2nml+2WtH+Z+5IAAFc\n",
 614 |        "2bUTdTDOSNa9+cQY7YLsOIXhevEkCvzph7a8laecz/Un/z4/Ae04XeL3UQb57IwU9ZDr9UuKVajv\n",
 615 |        "nxp1+1UVIo/LjztZkKH59fO3G/JemqCfmaCRqbqbd90ZZ8FfjtkfAyD0J/9+C2h1hDwsSxvGjNDc\n",
 616 |        "b4zk5NfrSwiQblLHzZhg+Jf4aPlUwpDqkQqa9nimbt1/TDH8OitGMaQnj+RJS6B1fbF7SY1TqO5v\n",
 617 |        "/v0WAADl1f7zokgS7s7VT2DZ7pegUjBM7mjtiDZbcN4j0YrHH0rXpCtY0qPX0cVL0rv5jv/ZXend\n",
 618 |        "0u/EESYBAFBU4T4Qa5TflZOhTe7pmKpaP8kCVUVw1+yhXfJWvn1P3hnXi33JsTN6PnP3hHZ8Z3/h\n",
 619 |        "aLHzmkNPuPj7Bc/F/Q38CwjTpSwQXgE4Vmwry9tpfq/ZFgqFMy4AVDtCvi8rvMvOmv0N4YwbVgEA\n",
 620 |        "sPM72/KVnzfspmH7HQGCRLG2yL1+z8XwvPcdCbsAANh+xPzstgMtxeGKt+6MK3/tacfvwhWvIwMi\n",
 621 |        "oKEBtm0H7W+UVfkc/Y1V0BhoPlDr/w1w/eu1vjIgAgDg22OtX6/eYfnEz/focrZTHAFR+PSs56/7\n",
 622 |        "q32nwpjazxgwAQCwcU/T62t3WL7r6/jVRa6/byp1rei+Z98ZUAEAhEPHPc8fKnTU9nbgtnOe8h0l\n",
 623 |        "9hcGIqmODLQAHCy2Xti6v/XNRivf43f4fFvIteu854+VHnR7q9tfBlwAAGz+pnndB9vM26UebAe8\n",
 624 |        "SLHujPOTPVW+rwY+sxskAAC2HrA8t2Vvc7ffP1r9o+vwR2dcr92InIAbKKC1FZ5tB1tf+/G8p8sv\n",
 625 |        "N/9Q5zd/XR34LYCwV5JdccMEAMDBk45DH243r/X4xGvqxFa/GNpS7n6rwOwNWwHVE26oAADYurf1\n",
 626 |        "zx/utOzt+DMKYM0p17YtZZ5VNzqfsB2HewG1WXE8PoZ7gOclbTIvynZf9JV+fqZtfgs/8F/Nu5rB\n",
 627 |        "EIBmJ+8QRMmpU7EzGRsf2FzuePqYRbzh/zE26EwdrT10f6r6o8HOYzCJB9Dpff8tbnGLG8L/A/WE\n",
 628 |        "roTBs2RqAAAAAElFTkSuQmCC'\n",
 629 |        "     style='height:25px; border-radius:12px; display: inline-block; float: left; vertical-align: middle'></img>\n",
 630 |        "\n",
 631 |        "\n",
 632 |        "\n",
 633 |        "\n",
 634 |        "  <img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\n",
 635 |        "AAAFMAAABTABZarKtgAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAArNSURB\n",
 636 |        "VFiFnVd5VFNXGv/ee0kgGyQhbFoXIKCFYEXEDVErTucMoKUOWA/VLsNSLPQgFTOdyrHPiIp1lFIQ\n",
 637 |        "OlaPShEG3EpPcQmISCuV1bQ1CLKIULeQhJA9JO+9+UMT0x5aPfOdc895373f/e7v/t537/ddBF5Q\n",
 638 |        "JBIJl81mJwCACEVRQBCEQhAEAQCgnghCURRCkmS7Wq2+WlJSYn0Rv8jzDHAcD0EQJIVGo5mFQuGF\n",
 639 |        "jIyMu39kq1KpkOrq6gU6nS6aIAiGzWY7VVBQ0P9/AcjNzWXy+fxcOp2uiY+Przm0d6+n8dblv/Fo\n",
 640 |        "kzM4SzYfPlRePvFnjnt6ehh1dXVv2mw2nlar/byoqMj8wgBwHBchCJIZEhJSeu1yHVi7vtu02t8+\n",
 641 |        "NykQ7BMWoOUMhXQsXLv5IQAwSJJEEASxcDicoeTk5DtCoZBy9XX69Gnv3t7ebJIky3EcH3guAKlU\n",
 642 |        "GoGiaOKWLVsOvhs7/9XXPMde3/IyIFbMnaPDuD5AUdQuOf2XlD0npTExMWYAgNbWVpZcLg8xGAzB\n",
 643 |        "JEnSvby82tPT052LaTQatLy8fBtJkt/s3Lnz5h8CwHFcRKPRNu/YsePAjh072KTs0IGCxRg8RgUB\n",
 644 |        "TGpSx6cmHgMAfNqN6Xa1GvJ/D35gYAAViURkcXHxUrPZHDRv3rxv4uLiDI7xPXv2bLdYLBUFBQWD\n",
 645 |        "jj7M8ZGbm8tkMpmSrKysQiaTScXGxtpqL7dManT6tcu5mgEWWJyOhicozpk+c3NsbKzNFcBbWWEf\n",
 646 |        "1Td9/upA30i3ZJv0h8bGxiSFQmFcuHDhOACAWCy+0d3dvX3lypUtzc3N9t8AiIuLk4SEhByLiooy\n",
 647 |        "AgAcO3ZsNlPgH3Cttb35JZo+bCYXIQAA9MDiUW7sWS1KN687w6Mera2twa2trfMvXboUOS28Pyb1\n",
 648 |        "U08McRtf/sXBSmt5cc35pqamVQqFwhoZGallMpnU/fv3e7RaberVq1d/AABAn1IfQqfTNRs3blQB\n",
 649 |        "AFy+fJk7Nja2XCKRnD3dNSorusPq6NfTPR+gPiEEoLRFXO1tS2+zavv27ReftjNttyr0S1/j0rUP\n",
 650 |        "PEJQwNwQYGgAACQSyXmNRhMtk8lYAAApKSlKDMP0+fn5QU4ACIKkxMfH1zjYuHnz5uspKSlOfdX7\n",
 651 |        "u68fvOePcCzKQR4YVCgATGfa/F3pnzaHWOAXSDyaMCqH2+r8VXErP3D+snXr1tV2dXW94dATExOr\n",
 652 |        "6XT6JgAAVCKRcDEMM4WHh9sAAHJyUqNu//wDymKx7AAAVVVVPiaTKXxByrYMvBsxEMSTwPXhuL+8\n",
 653 |        "e/fu9fv371+flvbemogYNz+TnsBOFEwMFO8/KzEYDKFVVVX+AAChoaGT7u7ud48ePRro0DEMs+bl\n",
 654 |        "5bFRNpud4O3tfdGBzq5uy/5wTUPM/q2zC9atmbVqeHg4Pi0t7WxGRoZFH5rw76I7LI8HqHfwPL7d\n",
 655 |        "rfVagzw1NfW81t4ePUfsP/OrnWZ6fPSuUqFQSEkkkrOjo6OvuQR5q0ajiXLoPj4+lzgcTjwKACLH\n",
 656 |        "9SqXy2kzhBO8haGo+UA2wZW+p880DxeveGt9aHx9fT09ctlq3sC0NT9e6xsbjuZblSxl7wKtVotM\n",
 657 |        "m6PnXvlmZJBtX91CEMQsxyJsNlteXl4udugIghAajQYFAEhPTx9AEGQOimGY8y4oLt63KlJkdB4t\n",
 658 |        "P282Z/c/dPrDH04ktJ9P2tfWXP3+2o1vHzunEp6Xq0lsGt08KzUrcSGTQ3n3XeefLCs5UqnT6Rap\n",
 659 |        "VCoEACA7O/snvV4f5gJooLa2NsihoygKKEVRzquTND2OCpttGXdG1tOxwOlgzdvE9v30rV+m3W5I\n",
 660 |        "2jfJNQmLH85QUUzPNTwvkAx0+vVGhq2/VV9fT+dyuZ01NTXOXQOA3fGxevXq2waDYY5r8KIoij5b\n",
 661 |        "jzB5Cz2oKdOo0erOm+1tVuVtBMZXElNMRJR1fvvjx9iPLQ/RjpuB0Xu/Vp7YmH1864YNG3oNBkPw\n",
 662 |        "VD7mzp1rJUnSzZUBmqsBggAgGFC/n6jVA+3WoN3tu1Gg39cg2tEx1Cg3CIJHsclxnl2HRorMN8Z0\n",
 663 |        "fRW+vr7GJ36Q56Z5h9BIknzGAMJWtvdQYs0EZe3/FSwqk5tpXEMb1JoYD+n8xRdQJl/fMPEgzKhS\n",
 664 |        "L40KCD7lGzg92qIyovpb3y/msT2un2psvFpWVvYyl8vtc1nDSXFXV5c7iqLOtEyS5LNBAADfWeKm\n",
 665 |        "Ly4uuvR1++sfv51/P5sfnHm2/Iy+mBmwsaHJbpt+Q0jHSS7TZ/PSNVkNJ/973OxtemD1s91CPb12\n",
 666 |        "h9MfvZsk5meo1eqo5ORkxTNWn7HR1tY2l8PhOAsUiqIolCRJcETtv/61qzNySYK5trZ2TCgUUiwW\n",
 667 |        "S1FSUhLR+bA/kAzwXcAbHa/cFhrTXrJ/v+7IkSPu3Je4Xm5eboJv2wba5QbO5fQwxhsP679Y+nFO\n",
 668 |        "jgAAoKSkJILFYjnBGI1G0YYNGwYBnqRoiqIQlKKojurq6gUAAAKBgKQoiuGYkJWVpTCZTOKmI1Xd\n",
 669 |        "HwnDcm+cOnOMw+H0FxYWbqpvqv/r9EV+bky+O+/QoUPiqJRt9JphTLFHbKBCR87tWL9EPN9oNIZn\n",
 670 |        "ZWUpXHaMCQQCEgCgsrIyEgBuoGq1+qpOp4t2GPH5/BvFxcVLHXpgYGDD8ePH/56Xl2cCAMjMzOxP\n",
 671 |        "S0s7pWfow4RCbz/fAF9RT0+P9yeffHJySSqev+9nxLD1FaAlTR8vlJ8vxxzsFhUVLRMIBB0OvwaD\n",
 672 |        "YRlFUdfQkpISK0EQ9J6eHgYAQEZGxl2z2Rw0MjJCBwBITk5+xOVyfzpw4ECSw5lQKKQIbxtJm4EN\n",
 673 |        "8eZ7jPz0oNv+dK5FG/jq54eH+IFr/S1KabBy0UerAvI+++wzD4vFEpCWljYEACCTyVh2ux3FcXwS\n",
 674 |        "BQCw2WxVdXV1bzrQRURE1FVVVTn1zMzM/pkzZ35/9OjRd0pLS19RqVQIy4/tCwDgOcPTQvFQEQBA\n",
 675 |        "aWnpK0ERK2LbyVllN341GUJ4YDu8zD5bKyur7O+85tx9Z2fnO1ar9QjA04KkpaVFs2LFir8olcq7\n",
 676 |        "YWFhJpFINNnX16drbGyMjY6Ovg0AIBaLjcuXL5d3d3d7XbhwIW704b3F479MeD1qVfJ5Og/bvb4R\n",
 677 |        "LwaDMZabm9uwflNa/z/3HOIv5NsDEK7XS7FeevXPvYNLvm5S/GglCK5KpZorlUobXE8g5ObmMqVS\n",
 678 |        "6UG1Wu1BURSHoijOiRMnwgoLC7coFAqBo+9Fm0KhEKStmvvto3TeucFN7pVJYbytarXaQyqVHsRx\n",
 679 |        "3N15TF1BuBaljr4rV66wOzo63mAymXdzcnKuwwtIUVHRMqvVGkgQxMV7NXvyJijGvcNXB/7z5Zdf\n",
 680 |        "bicI4gSO40NTAgD4bVnuODIAT2pElUq1FEEQO4fD6QsPD++fqixHEATj8/ntjoCrqKhwS0hIsJWV\n",
 681 |        "leURBHEOx3G563pT3tn5+flBDAbjg6CgoMMpKSlK17GhoSFMJpMFPk04DJIkEQzDzCwW6+5UD5Oa\n",
 682 |        "mhrfO3fufECS5GHXnf8pAAAAHMfdURTdimGYPjExsTo0NHTyj2ynEplMxurs7HyHIAiKJMlSHMct\n",
 683 |        "U9k9N2vl5+cH0en0TRiGWX18fC65vnh+LxqNBq2oqFhgMpmi7XY7arVaj+zdu/fxn/l/4bSZl5fH\n",
 684 |        "5nK5CQAQMtXznCRJePpEbwOAZhzHX4ix/wHzzC/tu64gcwAAAABJRU5ErkJggg=='\n",
 685 |        "       style='height:15px; border-radius:12px; display: inline-block; float: left'></img>\n",
 686 |        "  \n",
 687 |        "\n",
 688 |        "\n",
 689 |        "</div>\n"
 690 |       ],
 691 |       "text/plain": [
 692 |        "<IPython.core.display.HTML object>"
 693 |       ]
 694 |      },
 695 |      "metadata": {},
 696 |      "output_type": "display_data"
 697 |     }
 698 |    ],
 699 |    "source": [
 700 |     "import warnings\n",
 701 |     "warnings.filterwarnings('ignore')\n",
 702 |     "%matplotlib inline\n",
 703 |     "import matplotlib.pyplot as plt\n",
 704 |     "import matplotlib\n",
 705 |     "matplotlib.style.use('ggplot')\n",
 706 |     "matplotlib.rcParams['figure.figsize'] = (12, 15)\n",
 707 |     "from pyspark.sql import SparkSession\n",
 708 |     "import holoviews as hv\n",
 709 |     "import geoviews as gv\n",
 710 |     "import geoviews.feature as gf\n",
 711 |     "from cartopy import crs\n",
 712 |     "\n",
 713 |     "hv.notebook_extension()\n"
 714 |    ]
 715 |   },
 716 |   {
 717 |    "cell_type": "code",
 718 |    "execution_count": 128,
 719 |    "metadata": {},
 720 |    "outputs": [],
 721 |    "source": [
 722 |     "\"\"\" Interface for Data Ingestion.\n",
 723 |     "\"\"\"\n",
 724 |     "# Licensed to the Apache Software Foundation (ASF) under one or more\n",
 725 |     "# contributor license agreements.  See the NOTICE file distributed with\n",
 726 |     "# this work for additional information regarding copyright ownership.\n",
 727 |     "# The ASF licenses this file to You under the Apache License, Version 2.0\n",
 728 |     "# (the \"License\"); you may not use this file except in compliance with\n",
 729 |     "# the License.  You may obtain a copy of the License at\n",
 730 |     "#\n",
 731 |     "#    http://www.apache.org/licenses/LICENSE-2.0\n",
 732 |     "#\n",
 733 |     "# Unless required by applicable law or agreed to in writing, software\n",
 734 |     "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 735 |     "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 736 |     "# See the License for the specific language governing permissions and\n",
 737 |     "# limitations under the License.\n",
 738 |     "\n",
 739 |     "\n",
 740 |     "from __future__ import print_function\n",
 741 |     "from __future__ import absolute_import\n",
 742 |     "import os\n",
 743 |     "import numpy as np\n",
 744 |     "import pandas as pd \n",
 745 |     "import xarray as xr\n",
 746 |     "import itertools\n",
 747 |     "from glob import glob\n",
 748 |     "# from pyspark.sql import SparkSession # Removing this line simply makes the library compatible with Spark 1.6.3 !\n",
 749 |     "\n",
 750 |     "def ncread(sc, paths, mode='single', **kwargs):\n",
 751 |     "    \"\"\"Calls sparkxarray netcdf read function based on the mode parameter.\n",
 752 |     "\n",
 753 |     "    ============ ==============================\n",
 754 |     "    Mode          Reading Function\n",
 755 |     "    ------------ ------------------------------\n",
 756 |     "    single       : read_nc_single\n",
 757 |     "    multi        : read_nc_multi\n",
 758 |     "    Anything else: Throw an exception\n",
 759 |     "    ============= ==============================\n",
 760 |     "\n",
 761 |     "    Parameters\n",
 762 |     "    ----------\n",
 763 |     "\n",
 764 |     "    sc       :  sparkContext object\n",
 765 |     "\n",
 766 |     "    paths    :  str or sequence\n",
 767 |     "                Either a string glob in the form \"path/to/my/files/*.nc\" or an explicit\n",
 768 |     "                list of files to open\n",
 769 |     "\n",
 770 |     "    mode     : str\n",
 771 |     "               'single' for a single file\n",
 772 |     "               'multi' for multiple files\n",
 773 |     "\n",
 774 |     "    **kwargs : dict\n",
 775 |     "               partitioning options to be passed on to the actual read function.\n",
 776 |     "            \n",
 777 |     "    \n",
 778 |     "    \"\"\"\n",
 779 |     "\n",
 780 |     "    if 'partitions' not in kwargs:\n",
 781 |     "        kwargs['partitions'] = None\n",
 782 |     "\n",
 783 |     "    if 'partition_on' not in kwargs:\n",
 784 |     "        kwargs['partition_on'] = ['time']\n",
 785 |     "    \n",
 786 |     "    if 'decode_times' not in kwargs:\n",
 787 |     "        kwargs['decode_times'] = True\n",
 788 |     "\n",
 789 |     "    error_msg = (\"You specified a mode that is not implemented.\")\n",
 790 |     "\n",
 791 |     "    if (mode == 'single'):\n",
 792 |     "        return _read_nc_single(sc, paths, **kwargs)\n",
 793 |     "\n",
 794 |     "    elif (mode == 'multi'):\n",
 795 |     "        return _read_nc_multi(sc, paths, **kwargs)\n",
 796 |     "    else:\n",
 797 |     "        raise NotImplementedError(error_msg)\n",
 798 |     "\n",
 799 |     "        \n",
 800 |     "def _read_nc_single(sc, paths, **kwargs):\n",
 801 |     "    \"\"\" Read a single netCDF file\n",
 802 |     "\n",
 803 |     "    Parameters\n",
 804 |     "    -----------\n",
 805 |     "    sc       :  sparkContext object\n",
 806 |     "\n",
 807 |     "    paths    :  str\n",
 808 |     "                an explicit filename to open\n",
 809 |     "    \n",
 810 |     "\n",
 811 |     "    **kwargs : dict\n",
 812 |     "               Additional arguments for partitioning \n",
 813 |     "\n",
 814 |     "    \"\"\"\n",
 815 |     "    partition_on = kwargs.get('partition_on')\n",
 816 |     "    partitions = kwargs.get('partitions')\n",
 817 |     "    decode_times=kwargs.get('decode_times')\n",
 818 |     "\n",
 819 |     "    dset = xr.open_dataset(paths, autoclose=True, decode_times=decode_times)\n",
 820 |     "\n",
 821 |     "    # D = {'dim_1': dim_1_size, 'dim_2': dim_2_size, ...}\n",
 822 |     "    D = {dset[dimension].name:dset[dimension].size for dimension in partition_on}\n",
 823 |     "    \n",
 824 |     "    # dim_sizes = [range(dim_1_size), range(dim_2_size), range(...)]\n",
 825 |     "    dim_ranges = [range(dim_size) for dim_size in D.values()]\n",
 826 |     "    \n",
 827 |     "\n",
 828 |     "    dim_cartesian_product_indices = [element for element in itertools.product(*dim_ranges)]\n",
 829 |     "\n",
 830 |     "    # create a list of dictionaries for  positional indexing\n",
 831 |     "    positional_indices = [dict(zip(partition_on, ij)) for ij in dim_cartesian_product_indices]\n",
 832 |     "\n",
 833 |     "    if not partitions:\n",
 834 |     "        partitions = len(dim_cartesian_product_indices)\n",
 835 |     "\n",
 836 |     "    if partitions > len(dim_cartesian_product_indices):\n",
 837 |     "        partitions = len(dim_cartesian_product_indices)\n",
 838 |     "\n",
 839 |     "    \n",
 840 |     "    # Create an RDD\n",
 841 |     "    rdd = sc.parallelize(positional_indices, partitions).map(lambda x: _readone_slice(dset, x))\n",
 842 |     "\n",
 843 |     "    return rdd\n",
 844 |     "\n",
 845 |     "\n",
 846 |     "def _readone_slice(dset, positional_indices):\n",
 847 |     "    \"\"\"Read a slice from an xarray.Dataset.\n",
 848 |     "\n",
 849 |     "    Parameters\n",
 850 |     "    ----------\n",
 851 |     "\n",
 852 |     "    dset                : file_object\n",
 853 |     "                         xarray.Dataset object\n",
 854 |     "    positional_indices  : dict\n",
 855 |     "                          dict containing positional indices for each dimension\n",
 856 |     "                          e.g. {'lat': 0, 'lon': 0}\n",
 857 |     "\n",
 858 |     "    Returns\n",
 859 |     "    ---------\n",
 860 |     "    chunk               : xarray.Dataset\n",
 861 |     "                         a subset of the Xarray Dataset\n",
 862 |     "\n",
 863 |     "    \"\"\"\n",
 864 |     "\n",
 865 |     "    # Change the positional indices into slice objects\n",
 866 |     "    # e.g {'lat': 0, 'lon': 0} ---> {'lat': slice(0, 1, None),  'lon': slice(0, 1, None)}\n",
 867 |     "    positional_slices = {dim: slice(positional_indices[dim], positional_indices[dim]+1) \n",
 868 |     "                                                         for dim in positional_indices}\n",
 869 |     "\n",
 870 |     "    # Read a slice for the given positional_slices\n",
 871 |     "    chunk = dset[positional_slices]\n",
 872 |     "    return chunk\n",
 873 |     "\n",
 874 |     "\n",
 875 |     "def _read_nc_multi(sc, paths, **kwargs):\n",
 876 |     "    \"\"\" Read multiple netCDF files\n",
 877 |     "\n",
 878 |     "    Parameters\n",
 879 |     "    -----------\n",
 880 |     "    sc       :  sparkContext object\n",
 881 |     "\n",
 882 |     "    paths    :  str or sequence\n",
 883 |     "                Either a string glob in the form \"path/to/my/files/*.nc\" or an explicit\n",
 884 |     "                list of files to open\n",
 885 |     "\n",
 886 |     "    **kwargs : dict\n",
 887 |     "               Additional arguments for partitioning \n",
 888 |     "\n",
 889 |     "    \"\"\"\n",
 890 |     "\n",
 891 |     "    partition_on = kwargs.get('partition_on')\n",
 892 |     "    partitions = kwargs.get('partitions')\n",
 893 |     "\n",
 894 |     "    dset = xr.open_mfdataset(paths, autoclose=True)\n",
 895 |     "\n",
 896 |     "    # D = {'dim_1': dim_1_size, 'dim_2': dim_2_size, ...}\n",
 897 |     "    D ={dset[dimension].name:dset[dimension].size for dimension in partition_on}\n",
 898 |     "    \n",
 899 |     "    # dim_sizes = [range(dim_1_size), range(dim_2_size), range(...)]\n",
 900 |     "    dim_ranges = [range(dim_size) for dim_size in D.values()]\n",
 901 |     "\n",
 902 |     "    dim_cartesian_product_indices = [element for element in itertools.product(*dim_ranges)]\n",
 903 |     "\n",
 904 |     "    # create a list of dictionaries for positional indexing\n",
 905 |     "    positional_indices = [dict(zip(partition_on, ij)) for ij in dim_cartesian_product_indices]\n",
 906 |     "\n",
 907 |     "    if not partitions:\n",
 908 |     "        partitions = len(dim_cartesian_product_indices) / 50\n",
 909 |     "\n",
 910 |     "    if partitions > len(dim_cartesian_product_indices):\n",
 911 |     "        partitions = len(dim_cartesian_product_indices)\n",
 912 |     "\n",
 913 |     "    \n",
 914 |     "    # Create an RDD\n",
 915 |     "    rdd = sc.parallelize(positional_indices, partitions).map(lambda x: readone_slice(dset, x))\n",
 916 |     "\n",
 917 |     "    return rdd\n",
 918 |     "\n"
 919 |    ]
 920 |   },
 921 |   {
 922 |    "cell_type": "code",
 923 |    "execution_count": 3,
 924 |    "metadata": {},
 925 |    "outputs": [],
 926 |    "source": [
 927 |     "# Create sparksession\n",
 928 |     "spark = SparkSession.builder.appName(\"bias\").getOrCreate()\n",
 929 |     "sc = spark.sparkContext"
 930 |    ]
 931 |   },
 932 |   {
 933 |    "cell_type": "code",
 934 |    "execution_count": 167,
 935 |    "metadata": {},
 936 |    "outputs": [],
 937 |    "source": [
 938 |     "FILE_1 = \"/home/abanihi/Documents/Github/spark-xarray/datasets/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc\"\n",
 939 |     "FILE_2 = \"/home/abanihi/Documents/Github/spark-xarray/datasets/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc\""
 940 |    ]
 941 |   },
 942 |   {
 943 |    "cell_type": "code",
 944 |    "execution_count": 168,
 945 |    "metadata": {},
 946 |    "outputs": [
 947 |     {
 948 |      "data": {
 949 |       "text/plain": [
 950 |        "<xarray.Dataset>\n",
 951 |        "Dimensions:       (bnds: 2, height: 1, rlat: 201, rlon: 194, time: 240)\n",
 952 |        "Coordinates:\n",
 953 |        "  * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...\n",
 954 |        "  * rlon          (rlon) float32 -24.64 -24.2 -23.76 -23.32 -22.88 -22.44 ...\n",
 955 |        "    lon           (rlat, rlon) float32 -24.64 -24.2 -23.76 -23.32 -22.88 ...\n",
 956 |        "  * rlat          (rlat) float32 -45.76 -45.32 -44.88 -44.44 -44.0 -43.56 ...\n",
 957 |        "    lat           (rlat, rlon) float32 -45.76 -45.76 -45.76 -45.76 -45.76 ...\n",
 958 |        "  * height        (height) float32 2.0\n",
 959 |        "Dimensions without coordinates: bnds\n",
 960 |        "Data variables:\n",
 961 |        "    rotated_pole  |S1 b''\n",
 962 |        "    time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...\n",
 963 |        "    tasmax        (time, height, rlat, rlon) float64 283.4 283.4 283.4 283.4 ...\n",
 964 |        "Attributes:\n",
 965 |        "    institution:     KNMI\n",
 966 |        "    Conventions:     CF-1.0\n",
 967 |        "    conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html\n",
 968 |        "    source:          RACMO2.2b\n",
 969 |        "    project_id:      ENSEMBLES\n",
 970 |        "    experiment_id:   ERAIN CORDEX-Africa-50km\n",
 971 |        "    realization:     1\n",
 972 |        "    comments:        beta-version RACMO2.2 with default physics from ECMWF CY...\n",
 973 |        "    creation_date:   2010-04-09 13:53:22"
 974 |       ]
 975 |      },
 976 |      "execution_count": 168,
 977 |      "metadata": {},
 978 |      "output_type": "execute_result"
 979 |     }
 980 |    ],
 981 |    "source": [
 982 |     "knmi = xr.open_dataset(FILE_1, decode_times=False)\n",
 983 |     "knmi"
 984 |    ]
 985 |   },
 986 |   {
 987 |    "cell_type": "code",
 988 |    "execution_count": 169,
 989 |    "metadata": {},
 990 |    "outputs": [
 991 |     {
 992 |      "data": {
 993 |       "text/plain": [
 994 |        "<xarray.Dataset>\n",
 995 |        "Dimensions:       (bnds: 2, height: 1, rlat: 201, rlon: 194, time: 240)\n",
 996 |        "Coordinates:\n",
 997 |        "    lon           (rlat, rlon) float64 -24.64 -24.2 -23.76 -23.32 -22.88 ...\n",
 998 |        "    lat           (rlat, rlon) float64 -45.76 -45.76 -45.76 -45.76 -45.76 ...\n",
 999 |        "  * height        (height) float32 2.0\n",
1000 |        "  * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...\n",
1001 |        "  * rlat          (rlat) float64 -45.76 -45.32 -44.88 -44.44 -44.0 -43.56 ...\n",
1002 |        "  * rlon          (rlon) float64 -24.64 -24.2 -23.76 -23.32 -22.88 -22.44 ...\n",
1003 |        "Dimensions without coordinates: bnds\n",
1004 |        "Data variables:\n",
1005 |        "    tasmax        (time, height, rlat, rlon) float64 283.4 283.4 283.5 283.5 ...\n",
1006 |        "    rotated_pole  |S1 b''\n",
1007 |        "    time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...\n",
1008 |        "Attributes:\n",
1009 |        "    Conventions:               CF-1.4\n",
1010 |        "    institution:               Universidad de Cantabria (Spain)\n",
1011 |        "    title:                     CORDEX Africa Sensitivity Run\n",
1012 |        "    comment:                   The simulation was forced with ERA-Interim 2x2...\n",
1013 |        "    nco_openmp_thread_number:  1"
1014 |       ]
1015 |      },
1016 |      "execution_count": 169,
1017 |      "metadata": {},
1018 |      "output_type": "execute_result"
1019 |     }
1020 |    ],
1021 |    "source": [
1022 |     "wrf = xr.open_dataset(FILE_2, decode_times=False)\n",
1023 |     "wrf"
1024 |    ]
1025 |   },
1026 |   {
1027 |    "cell_type": "code",
1028 |    "execution_count": 170,
1029 |    "metadata": {},
1030 |    "outputs": [],
1031 |    "source": [
1032 |     "import ocw.data_source.local as local\n",
1033 |     "import ocw.dataset_processor as dsp\n",
1034 |     "import ocw.evaluation as evaluation\n",
1035 |     "import ocw.metrics as metrics\n",
1036 |     "import ocw.plotter as plotter"
1037 |    ]
1038 |   },
1039 |   {
1040 |    "cell_type": "code",
1041 |    "execution_count": 171,
1042 |    "metadata": {},
1043 |    "outputs": [
1044 |     {
1045 |      "name": "stdout",
1046 |      "output_type": "stream",
1047 |      "text": [
1048 |       "Loading /home/abanihi/Documents/Github/spark-xarray/datasets/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc into an OCW Dataset Object\n",
1049 |       "KNMI_Dataset.values shape: (times, lats, lons) - (240, 201, 194) \n",
1050 |       "\n",
1051 |       "Loading /home/abanihi/Documents/Github/spark-xarray/datasets/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc into an OCW Dataset Object\n",
1052 |       "WRF_Dataset.values shape: (times, lats, lons) - (240, 201, 194) \n",
1053 |       "\n"
1054 |      ]
1055 |     }
1056 |    ],
1057 |    "source": [
1058 |     "\"\"\" Step 1: Load Local NetCDF Files into OCW Dataset Objects \"\"\"\n",
1059 |     "print(\"Loading %s into an OCW Dataset Object\" % (FILE_1,))\n",
1060 |     "knmi_dataset = local.load_file(FILE_1, \"tasmax\")\n",
1061 |     "print(\"KNMI_Dataset.values shape: (times, lats, lons) - %s \\n\" %\n",
1062 |     "      (knmi_dataset.values.shape,))\n",
1063 |     "\n",
1064 |     "print(\"Loading %s into an OCW Dataset Object\" % (FILE_2,))\n",
1065 |     "wrf_dataset = local.load_file(FILE_2, \"tasmax\")\n",
1066 |     "print(\"WRF_Dataset.values shape: (times, lats, lons) - %s \\n\" %\n",
1067 |     "      (wrf_dataset.values.shape,))"
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "code",
1072 |    "execution_count": 172,
1073 |    "metadata": {},
1074 |    "outputs": [
1075 |     {
1076 |      "name": "stdout",
1077 |      "output_type": "stream",
1078 |      "text": [
1079 |       "Temporally Rebinning the Datasets to an Annual Timestep\n",
1080 |       "KNMI_Dataset.values shape: (20, 201, 194)\n",
1081 |       "WRF_Dataset.values shape: (20, 201, 194) \n",
1082 |       "\n",
1083 |       "\n"
1084 |      ]
1085 |     }
1086 |    ],
1087 |    "source": [
1088 |     "\"\"\" Step 2: Temporally Rebin the Data into an Annual Timestep \"\"\"\n",
1089 |     "print(\"Temporally Rebinning the Datasets to an Annual Timestep\")\n",
1090 |     "knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='annual')\n",
1091 |     "wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='annual')\n",
1092 |     "print(\"KNMI_Dataset.values shape: %s\" % (knmi_dataset.values.shape,))\n",
1093 |     "print(\"WRF_Dataset.values shape: %s \\n\\n\" % (wrf_dataset.values.shape,))"
1094 |    ]
1095 |   },
1096 |   {
1097 |    "cell_type": "code",
1098 |    "execution_count": 175,
1099 |    "metadata": {},
1100 |    "outputs": [
1101 |     {
1102 |      "name": "stdout",
1103 |      "output_type": "stream",
1104 |      "text": [
1105 |       "The KNMI_Dataset spatial bounds (min_lat, max_lat, min_lon, max_lon) are: \n",
1106 |       "(-45.7599983215332, 42.2400016784668, -24.639999389648438, 60.279998779296875)\n",
1107 |       "\n",
1108 |       "The KNMI_Dataset spatial resolution (lat_resolution, lon_resolution) is: \n",
1109 |       "(0.43999863, 0.44000053)\n",
1110 |       "\n",
1111 |       "\n"
1112 |      ]
1113 |     }
1114 |    ],
1115 |    "source": [
1116 |     "\"\"\" Step 3: Spatially Regrid the Dataset Objects to a 1 degree grid \"\"\"\n",
1117 |     "#  The spatial_boundaries() function returns the spatial extent of the dataset\n",
1118 |     "print(\"The KNMI_Dataset spatial bounds (min_lat, max_lat, min_lon, max_lon) are: \\n\"\n",
1119 |     "      \"%s\\n\" % (knmi_dataset.spatial_boundaries(), ))\n",
1120 |     "print(\"The KNMI_Dataset spatial resolution (lat_resolution, lon_resolution) is: \\n\"\n",
1121 |     "      \"%s\\n\\n\" % (knmi_dataset.spatial_resolution(), ))"
1122 |    ]
1123 |   },
1124 |   {
1125 |    "cell_type": "code",
1126 |    "execution_count": 176,
1127 |    "metadata": {},
1128 |    "outputs": [
1129 |     {
1130 |      "name": "stdout",
1131 |      "output_type": "stream",
1132 |      "text": [
1133 |       "Spatially Regridding the KNMI_Dataset...\n",
1134 |       "Final shape of the KNMI_Dataset: \n",
1135 |       "(20, 88, 85)\n",
1136 |       "\n"
1137 |      ]
1138 |     }
1139 |    ],
1140 |    "source": [
1141 |     "min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries()\n",
1142 |     "\n",
1143 |     "# Using the bounds we will create a new set of lats and lons on 1 degree step\n",
1144 |     "new_lons = np.arange(min_lon, max_lon, 1)\n",
1145 |     "new_lats = np.arange(min_lat, max_lat, 1)\n",
1146 |     "\n",
1147 |     "# Spatially regrid datasets using the new_lats, new_lons numpy arrays\n",
1148 |     "print(\"Spatially Regridding the KNMI_Dataset...\")\n",
1149 |     "knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons)\n",
1150 |     "print(\"Final shape of the KNMI_Dataset: \\n\"\n",
1151 |     "      \"%s\\n\" % (knmi_dataset.values.shape, ))"
1152 |    ]
1153 |   },
1154 |   {
1155 |    "cell_type": "code",
1156 |    "execution_count": 177,
1157 |    "metadata": {},
1158 |    "outputs": [
1159 |     {
1160 |      "name": "stdout",
1161 |      "output_type": "stream",
1162 |      "text": [
1163 |       "Final shape of the WRF_Dataset: \n",
1164 |       "(20, 88, 85)\n",
1165 |       "\n"
1166 |      ]
1167 |     }
1168 |    ],
1169 |    "source": [
1170 |     "wrf_dataset = dsp.spatial_regrid(wrf_dataset, new_lats, new_lons)\n",
1171 |     "print(\"Final shape of the WRF_Dataset: \\n\"\n",
1172 |     "      \"%s\\n\" % (wrf_dataset.values.shape, ))"
1173 |    ]
1174 |   },
1175 |   {
1176 |    "cell_type": "code",
1177 |    "execution_count": 178,
1178 |    "metadata": {},
1179 |    "outputs": [
1180 |     {
1181 |      "name": "stdout",
1182 |      "output_type": "stream",
1183 |      "text": [
1184 |       "Setting up a Bias metric to use for evaluation\n"
1185 |      ]
1186 |     }
1187 |    ],
1188 |    "source": [
1189 |     "\"\"\" Step 4:  Build a Metric to use for Evaluation - Bias for this example \"\"\"\n",
1190 |     "# You can build your own metrics, but OCW also ships with some common metrics\n",
1191 |     "print(\"Setting up a Bias metric to use for evaluation\")\n",
1192 |     "bias = metrics.Bias()"
1193 |    ]
1194 |   },
1195 |   {
1196 |    "cell_type": "code",
1197 |    "execution_count": 179,
1198 |    "metadata": {},
1199 |    "outputs": [
1200 |     {
1201 |      "name": "stdout",
1202 |      "output_type": "stream",
1203 |      "text": [
1204 |       "Making the Evaluation definition\n",
1205 |       "Executing the Evaluation using the object's run() method\n"
1206 |      ]
1207 |     }
1208 |    ],
1209 |    "source": [
1210 |     "\"\"\" Step 5: Create an Evaluation Object using Datasets and our Metric \"\"\"\n",
1211 |     "# The Evaluation Class Signature is:\n",
1212 |     "# Evaluation(reference, targets, metrics, subregions=None)\n",
1213 |     "# Evaluation can take in multiple targets and metrics, so we need to convert\n",
1214 |     "# our examples into Python lists.  Evaluation will iterate over the lists\n",
1215 |     "print(\"Making the Evaluation definition\")\n",
1216 |     "bias_evaluation = evaluation.Evaluation(knmi_dataset, [wrf_dataset], [bias])\n",
1217 |     "print(\"Executing the Evaluation using the object's run() method\")\n",
1218 |     "bias_evaluation.run()"
1219 |    ]
1220 |   },
1221 |   {
1222 |    "cell_type": "code",
1223 |    "execution_count": 180,
1224 |    "metadata": {},
1225 |    "outputs": [
1226 |     {
1227 |      "name": "stdout",
1228 |      "output_type": "stream",
1229 |      "text": [
1230 |       "Accessing the Results of the Evaluation run\n",
1231 |       "The results are of type: <class 'numpy.ma.core.MaskedArray'>\n"
1232 |      ]
1233 |     }
1234 |    ],
1235 |    "source": [
1236 |     "\"\"\" Step 6: Make a Plot from the Evaluation.results \"\"\"\n",
1237 |     "# The Evaluation.results are a set of nested lists to support many different\n",
1238 |     "# possible Evaluation scenarios.\n",
1239 |     "#\n",
1240 |     "# The Evaluation results docs say:\n",
1241 |     "# The shape of results is (num_metrics, num_target_datasets) if no subregion\n",
1242 |     "# Accessing the actual results when we have used 1 metric and 1 dataset is\n",
1243 |     "# done this way:\n",
1244 |     "print(\"Accessing the Results of the Evaluation run\")\n",
1245 |     "results = bias_evaluation.results[0][0]\n",
1246 |     "print(\"The results are of type: %s\" % type(results))\n"
1247 |    ]
1248 |   },
1249 |   {
1250 |    "cell_type": "code",
1251 |    "execution_count": 181,
1252 |    "metadata": {},
1253 |    "outputs": [],
1254 |    "source": [
1255 |     "OUTPUT_PLOT = \"wrf_bias_compared_to_knmi\""
1256 |    ]
1257 |   },
1258 |   {
1259 |    "cell_type": "code",
1260 |    "execution_count": 182,
1261 |    "metadata": {},
1262 |    "outputs": [
1263 |     {
1264 |      "name": "stdout",
1265 |      "output_type": "stream",
1266 |      "text": [
1267 |       "Generating a contour map using ocw.plotter.draw_contour_map()\n"
1268 |      ]
1269 |     },
1270 |     {
1271 |      "data": {
1272 |       "text/plain": [
1273 |        "<matplotlib.figure.Figure at 0x7f887b0fb2e8>"
1274 |       ]
1275 |      },
1276 |      "metadata": {},
1277 |      "output_type": "display_data"
1278 |     }
1279 |    ],
1280 |    "source": [
1281 |     "print(\"Generating a contour map using ocw.plotter.draw_contour_map()\")\n",
1282 |     "\n",
1283 |     "lats = new_lats\n",
1284 |     "lons = new_lons\n",
1285 |     "fname = OUTPUT_PLOT\n",
1286 |     "gridshape = (4, 5)  # 20 Years worth of plots. 20 rows in 1 column\n",
1287 |     "plot_title = \"TASMAX Bias of WRF Compared to KNMI (1989 - 2008)\"\n",
1288 |     "sub_titles = range(1989, 2009, 1)\n",
1289 |     "\n",
1290 |     "plotter.draw_contour_map(results, lats, lons, fname,\n",
1291 |     "                         gridshape=gridshape, ptitle=plot_title,\n",
1292 |     "                         subtitles=sub_titles)\n",
1293 |     "plt.show()"
1294 |    ]
1295 |   },
1296 |   {
1297 |    "cell_type": "code",
1298 |    "execution_count": 183,
1299 |    "metadata": {},
1300 |    "outputs": [
1301 |     {
1302 |      "name": "stdout",
1303 |      "output_type": "stream",
1304 |      "text": [
1305 |       "Making the Evaluation definition\n",
1306 |       "Executing the Evaluation using the object's run() method\n",
1307 |       "Accessing the Results of the Evaluation run\n",
1308 |       "The results are of type: <class 'numpy.ma.core.MaskedArray'>\n"
1309 |      ]
1310 |     }
1311 |    ],
1312 |    "source": [
1313 |     "print(\"Making the Evaluation definition\")\n",
1314 |     "bias_evaluation = evaluation.Evaluation(wrf_dataset, [knmi_dataset], [bias])\n",
1315 |     "print(\"Executing the Evaluation using the object's run() method\")\n",
1316 |     "bias_evaluation.run()\n",
1317 |     "print(\"Accessing the Results of the Evaluation run\")\n",
1318 |     "results = bias_evaluation.results[0][0]\n",
1319 |     "print(\"The results are of type: %s\" % type(results))\n"
1320 |    ]
1321 |   },
1322 |   {
1323 |    "cell_type": "code",
1324 |    "execution_count": 184,
1325 |    "metadata": {},
1326 |    "outputs": [],
1327 |    "source": [
1328 |     "OUTPUT_PLOT = \"knmi_bias_compared_to_wrf\""
1329 |    ]
1330 |   },
1331 |   {
1332 |    "cell_type": "code",
1333 |    "execution_count": null,
1334 |    "metadata": {},
1335 |    "outputs": [],
1336 |    "source": [
1337 |     "print(\"Generating a contour map using ocw.plotter.draw_contour_map()\")\n",
1338 |     "\n",
1339 |     "lats = new_lats\n",
1340 |     "lons = new_lons\n",
1341 |     "fname = OUTPUT_PLOT\n",
1342 |     "gridshape = (4, 5)  # 20 Years worth of plots. 20 rows in 1 column\n",
1343 |     "plot_title = \"TASMAX Bias of KNMI Compared to WRF (1989 - 2008)\"\n",
1344 |     "sub_titles = range(1989, 2009, 1)\n",
1345 |     "\n",
1346 |     "plotter.draw_contour_map(results, lats, lons, fname,\n",
1347 |     "                         gridshape=gridshape, ptitle=plot_title,\n",
1348 |     "                         subtitles=sub_titles)\n",
1349 |     "plt.show()"
1350 |    ]
1351 |   }
1352 |  ],
1353 |  "metadata": {
1354 |   "kernelspec": {
1355 |    "display_name": "Python 3",
1356 |    "language": "python",
1357 |    "name": "python3"
1358 |   },
1359 |   "language_info": {
1360 |    "codemirror_mode": {
1361 |     "name": "ipython",
1362 |     "version": 3
1363 |    },
1364 |    "file_extension": ".py",
1365 |    "mimetype": "text/x-python",
1366 |    "name": "python",
1367 |    "nbconvert_exporter": "python",
1368 |    "pygments_lexer": "ipython3",
1369 |    "version": "3.6.3"
1370 |   }
1371 |  },
1372 |  "nbformat": 4,
1373 |  "nbformat_minor": 2
1374 | }
1375 | 


--------------------------------------------------------------------------------