├── tests
    ├── __init__.py
    ├── test_db.py
    ├── test_extract.py
    └── test_utils.py
├── .gitignore
├── docs
    ├── figs
    │   ├── drawing.jpg
    │   ├── ndmi_2sites.png
    │   ├── ndvi_uxmal.png
    │   └── multispectral_uxmal.png
    ├── data
    │   └── cdmx_parks.gpkg
    ├── api
    │   └── index.rst
    ├── index.rst
    ├── cli.rst
    ├── introduction.rst
    ├── processing.rst
    ├── install.rst
    ├── conf.py
    ├── Makefile
    └── examples.rst
├── travis_setup.py
├── .travis.yml
├── setup.py
├── geextract
    ├── scripts
    │   ├── gee_extract.py
    │   └── gee_extract_batch.py
    └── __init__.py
└── README.rst


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | geextract.egg-info/*
2 | docs/_build/*
3 | *.swp
4 | *.pyc
5 | 


--------------------------------------------------------------------------------
/docs/figs/drawing.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/loicdtx/landsat-extract-gee/HEAD/docs/figs/drawing.jpg


--------------------------------------------------------------------------------
/docs/data/cdmx_parks.gpkg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/loicdtx/landsat-extract-gee/HEAD/docs/data/cdmx_parks.gpkg


--------------------------------------------------------------------------------
/docs/figs/ndmi_2sites.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/loicdtx/landsat-extract-gee/HEAD/docs/figs/ndmi_2sites.png


--------------------------------------------------------------------------------
/docs/figs/ndvi_uxmal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/loicdtx/landsat-extract-gee/HEAD/docs/figs/ndvi_uxmal.png


--------------------------------------------------------------------------------
/docs/figs/multispectral_uxmal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/loicdtx/landsat-extract-gee/HEAD/docs/figs/multispectral_uxmal.png


--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 | geextract API
2 | -------------
3 | 
4 | .. automodule:: geextract
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:


--------------------------------------------------------------------------------
/travis_setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # THis script is used to write the GEE API key (available as GEE_API_KEY environment variable
 4 | # in the travis machine via the travis variable encription mechanism) to a file
 5 | import os
 6 | 
 7 | # Get key
 8 | key = os.environ['GEE_API_KEY']
 9 | 
10 | # Build line
11 | line = '{"refresh_token": "%s"}' % key
12 | 
13 | # Create directory 
14 | os.makedirs(os.path.expanduser('~/.config/earthengine/'))
15 | 
16 | # Write line to file
17 | with open(os.path.expanduser('~/.config/earthengine/credentials'), 'w') as dst:
18 |    dst.write(line)
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | geextract (Landsat SR time-series extraction tool)
 2 | ==================================================
 3 | 
 4 | This is the documentation of the ``geextract`` python package. The source code is available in a
 5 | git repository hosted at https://github.com/loicdtx/landsat-extract-gee
 6 | 
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 2
10 | 
11 |    introduction
12 | 
13 | 
14 | .. toctree::
15 |    :maxdepth: 2
16 |    :caption: User guide
17 | 
18 |    install
19 |    processing
20 |    api/index
21 |    cli
22 | 
23 | .. toctree::
24 |    :maxdepth: 2
25 |    :caption: Examples
26 | 
27 |    examples
28 | 
29 | 
30 | 
31 | 
32 | Indices and tables
33 | ==================
34 | 
35 | * :ref:`genindex`
36 | * :ref:`modindex`
37 | * :ref:`search`
38 | 


--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
 1 | Command line interface
 2 | ----------------------
 3 | 
 4 | ggextract comes with two Command Line Interfaces for convenience. Both CLI expose the functionalities of `geextract.ts_extract()` function, to extract a time-series from a single pixel or a circular buffer. The `gee_extract_batch` command takes a text file as input in which coordinates and name of multiple locations may be written, allowing batch ordrering of data. Both CLI write the extracted data to a sqlite database.
 5 | 
 6 | 
 7 | Simple CLI for ordering a single time-series and write it to a sqlite database
 8 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 9 | 
10 | .. program-output:: gee_extract.py --help
11 | 
12 | 
13 | Batch ordering CLI
14 | ^^^^^^^^^^^^^^^^^^
15 | 
16 | .. program-output:: gee_extract_batch.py --help


--------------------------------------------------------------------------------
/docs/introduction.rst:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | ------------
 3 | 
 4 | 
 5 | ``geextract`` is a python library (API + command lines) to extract Landsat time-series from the Google Earth Engine platform. It can query single pixels or spatially aggregated values over polygons. When used via the command line, extracted time-series are written to a sqlite database.
 6 | 
 7 | The idea is to provide quick access to Landsat time-series for exploratory analysis or algorithm testing. Instead of downloading the whole stack of Landsat scenes, preparing the data locally and extracting the time-series of interest, which may take several days, ``geextract`` allows to get time-series in a few seconds.
 8 | 
 9 | Compatible with python 2.7 and 3.
10 | 
11 | This online documentation includes an installation guide, API and command line documentation and some usage examples.


--------------------------------------------------------------------------------
/tests/test_db.py:
--------------------------------------------------------------------------------
 1 | from geextract import dictlist2sqlite
 2 | import unittest
 3 | from datetime import datetime
 4 | import sqlite3
 5 | import tempfile
 6 | import os
 7 | 
 8 | tmp_dir = tempfile.gettempdir()
 9 | db_name = os.path.join(tmp_dir, 'gee_test.sqlite')
10 | if os.path.isfile(db_name):
11 |     os.remove(db_name)
12 | dict_list = [{'id': "LT50200461986040", 'B1': 12, 'B2': 23},
13 |              {'id': "LT50200461986072", 'B1': 45, 'B2': 54},
14 |              {'id': "LT50200461986104", 'B1': None, 'B2': 54},
15 |              {'id': "LT50200461986232", 'B1': None, 'B2': None},
16 |              {'id': "LT50200461986296", 'B1': 45, 'B2': 54},
17 | ]
18 | 
19 | class TestDatabase(unittest.TestCase):
20 |     def test_insert(self):
21 |         dictlist2sqlite(dict_list, 'testSite', 'LT5', db_name, 'unittesting')
22 |         conn = sqlite3.connect(db_name)
23 |         cur = conn.cursor()
24 |         cur.execute("SELECT * FROM unittesting")
25 |         rows = cur.fetchall()
26 |         self.assertEqual(len(rows), 3)
27 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | cache: pip
 4 | 
 5 | dist: trusty
 6 | 
 7 | python:
 8 |   - "2.7"
 9 |   - "3.6"
10 | 
11 | before_install:
12 |   - pip install -U pip
13 |   - ./travis_setup.py
14 | 
15 | install:
16 |   - pip install coveralls
17 |   - pip install oauth2client
18 |   - pip install -e .  
19 | 
20 | script:
21 |   - python setup.py test
22 |   - coverage run --source=geextract/ setup.py test
23 | 
24 | after_success:
25 |   - coveralls
26 | 
27 | deploy:
28 |   provider: pypi
29 |   user: "loicdtx"
30 |   password:
31 |     secure: "lhFeqgk0yI1c9YJwrKV7Z1wIuzCv6yVezsi80w5NYL/R4g99KPPwtVNIHPzKwo2Sh+FrKYMdN5lSCEEjZoqLOjoPQn2aQYUs6pSgwGcpzGIoijS6FkRL+gnQF56pPqFOBhxKOTlIihEqDTqr7lqiXT8zfDZyT4zdrxCsGg7JmgpKtqYl4aC8KYv3EBnCP6hs781ihV9VHpD6vOoUcPiiQmi4/qkI8wiqEJi5vZV8AGVBC1tV7EGYHD9jzlalsRrXYIWvXTFS/ercR1OkrOKzaEQlrHgfwZhQw6uqKHMOTIq7Q583MC01VluCpqCuL10c3TqxbV/4kl6MHX1z2dAaI55/hWHgfqaT5ZPWIZivA0+/UorzPV1vZikven7GLuimwSnJLLM6S6I+ecjG2+Zc45PKc6BYlSteHHWuaWbsY2M4cQU0qSZzlgZG412c3//2NUULpZcN3MojHB+ayFkikmnQv3BjiX53feGCBs7GQvYEn0f87gmzM/W6AxekzgfgE9NrvpEJG6zCXzVL6jNh46pnX+fy4ZyqIXJ0evc+vQ/HXRtnr4bXCH/O1PlEUeW8yfcqu3rxU80c11y52juiubkyZtlevctTV9b+Fuu1o/giyGbMh9Xwgn/TtL+04T7lMRV8R1AtydRazVbFUXRbFGQ1R/bi/W4LOGb3KOmcQw8="
32 |   on:
33 |     tags: true
34 | 
35 | warnings_are_errors: true
36 | 
37 | notifications:
38 |   email: false
39 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import codecs
 5 | from setuptools import setup, find_packages
 6 | import itertools
 7 | 
 8 | # Parse the version from the geextract module.
 9 | with open('geextract/__init__.py') as f:
10 |     for line in f:
11 |         if line.find("__version__") >= 0:
12 |             version = line.split("=")[1].strip()
13 |             version = version.strip('"')
14 |             version = version.strip("'")
15 |             continue
16 | 
17 | # 
18 | extra_reqs = {'docs': ['sphinx',
19 |                        'sphinx-rtd-theme',
20 |                        'sphinxcontrib-programoutput',
21 |                        'fiona',
22 |                        'seaborn',
23 |                        'matplotlib']}
24 | extra_reqs['all'] = list(set(itertools.chain(*extra_reqs.values())))
25 | 
26 | with codecs.open('README.rst', encoding='utf-8') as f:
27 |     readme = f.read()
28 | 
29 | setup(name='geextract',
30 |       version=version,
31 |       description=u"Extract Landsat surface reflectance time-series at given location from google earth engine",
32 |       long_description=readme,
33 |       classifiers=[],
34 |       keywords='Landsat, surface reflectance, google, gee, time-series',
35 |       author=u"Loïc Dutrieux",
36 |       author_email='loic.dutrieux@gmail.com',
37 |       url='https://github.com/loicdtx/landsat-extract-gee.git',
38 |       license='GPLv3',
39 |       packages=find_packages(),
40 |       install_requires=[
41 |           'pandas',
42 |           'earthengine-api'],
43 |       scripts=['geextract/scripts/gee_extract.py',
44 |                'geextract/scripts/gee_extract_batch.py'],
45 |       test_suite="tests",
46 |       extras_require=extra_reqs)
47 | 


--------------------------------------------------------------------------------
/docs/processing.rst:
--------------------------------------------------------------------------------
 1 | Processing
 2 | ----------
 3 | 
 4 | Data sources
 5 | ^^^^^^^^^^^^
 6 | 
 7 | For now ``geextract`` only supports the extraction of Landsat surface reflectance data. Two Landsat surface reflectance collections are available on the platform; pre-collection and collection 1. It is preferable to use collection 1 as it is supposedly of better quality (more recent version of ledaps and cloud masking algorithm) and is more up to date (pre-collection data are no longer ingested on the google earth engine platform so that recent data won't be available).
 8 | Reference documents (product guides) for Landsat surface reflectance data are available `here <https://landsat.usgs.gov/sites/default/files/documents/ledaps_product_guide.pdf>`_ for Landsat 4, 5 and 7, and `here <https://landsat.usgs.gov/sites/default/files/documents/lasrc_product_guide.pdf>`_ for Landsat 8.
 9 | 
10 | Pre-processing
11 | ^^^^^^^^^^^^^^
12 | 
13 | A rather standard pre-processing consisting of cloud and cloud shadow masking and removal of saturated pixels is applied to the data prior to extraction.
14 | 
15 | The cfmask quality data (available in the cfmask band for pre-collection and pixel_qa for collection 1) are used for masking pixels contaminated by clouds, cloud shadows or being water, ice, etc... This is done by keeping only pixels labeled as "clear land pixel".
16 | 
17 | Saturated pixels are filtered out by excluding out of valid range values. 
18 | 
19 | Spatial aggregation methods
20 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^
21 | 
22 | The user can either order time-series corresponding to a single pixel or a polygon. When a polygon is used, one of the four spatial aggregation functions (mean, median, max. min) can be chosen. Masked pixels are automatically excluded from the spatial aggregation.


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ------------
 3 | 
 4 | 
 5 | You must have a Google Earth Engine account. If you don't yet have an account, you can request for it `here <http://signup.earthengine.google.com/#!/>`_ 
 6 | 
 7 | Once you have an account, the package can be installed using ``pip``, preferably within a virtual environment. If you're new to python and/or virtual environments, read the :ref:`scratch` section.
 8 | 
 9 | 
10 | Quick install
11 | ^^^^^^^^^^^^^
12 | 
13 | .. code-block:: bash
14 | 
15 |     pip install geextract
16 | 
17 | If you're using the gee API for the first time on your machine, you'll have to run:
18 | 
19 | .. code-block:: bash
20 | 
21 | 	earthengine authenticate
22 | 
23 | which will open a google authentication page in your browser, and will give you an authentication token to paste back in the terminal.
24 | 
25 | You can check that the authentication process was successful by running.
26 | 
27 | .. code-block:: bash
28 | 
29 |     python -c "import ee; ee.Initialize()"
30 | 
31 | 
32 | If nothing happens, it means that things are working... You can go ahead and use the ``geextract`` API and command line.
33 | 
34 | 
35 | .. _scratch:
36 | 
37 | Install from scratch
38 | ^^^^^^^^^^^^^^^^^^^^
39 | 
40 | 
41 | This section details step by step installation and setup from scratch. It includes installating and seting up `virtualenv <https://pypi.python.org/pypi/virtualenv>`_ and `virtualenvwrapper <https://virtualenvwrapper.readthedocs.io/en/latest/>`_ on ubuntu/debian. For windows and mac, refer to the `gee API installation instructions <https://developers.google.com/earth-engine/python_install_manual>`_.
42 | 
43 | Install dependencies
44 | """"""""""""""""""""
45 | 
46 | 
47 | .. code-block:: bash
48 | 
49 |     # Install pip (a package manager for python)
50 |     sudo apt-get install python-pip
51 | 
52 |     # Install virtualenv (virtual environments for python projects)
53 |     sudo pip install virtualenv
54 | 
55 |     # Install virtualenvwrapper (Makes working with virtualenv easier)
56 |     sudo pip install virtualenvwrapper
57 | 
58 |     # Finish setting up virtualenvwraper (of course if you use a different shell, export to the right config file)
59 |     echo 'source /usr/local/bin/virtualenvwrapper.sh' >> ~/.bashrc
60 |     source ~/.bashrc
61 | 
62 |     # Create a virtual environement
63 |     mkvirtualenv geextract
64 | 
65 |     # You are now in the virtual environment
66 |     # You can exit it by running 'deactivate'
67 |     # And get back to it with 'workon geextract'
68 | 
69 | Install the package
70 | """""""""""""""""""
71 | 
72 | To install the ``geextract`` package, run the following line in your terminal from within a virtual environment.
73 | 
74 | .. code-block:: bash
75 | 
76 |     # Install
77 |     pip install geextract
78 | 
79 | You then need to authenticate for the package to be able to interact with the Google Earth Engine platform.
80 | 
81 | .. code-block:: bash
82 | 
83 | 	earthengine authenticate
84 | 
85 | which will open a google authentication page in your browser, and will give you an authentication token to paste back in the terminal.
86 | 
87 | You can check that the authentication process was successful by running.
88 | 
89 | .. code-block:: bash
90 | 
91 |     python -c "import ee; ee.Initialize()"
92 | 
93 | 
94 | If nothing happens, it means that things are working... You can go ahead and use the ``geextract`` API and command line.


--------------------------------------------------------------------------------
/tests/test_extract.py:
--------------------------------------------------------------------------------
 1 | from geextract import ts_extract
 2 | import unittest
 3 | from datetime import datetime
 4 | 
 5 | class TestTsExtraction(unittest.TestCase):
 6 |     def test_point(self):
 7 |         a = ts_extract(lon=4.722111, lat=44.770928, sensor='LC8', start=datetime(2015,1,1),
 8 |                        end=datetime(2016, 6, 1), radius = None, feature = None, bands = None,
 9 |                        stats = 'mean')
10 |         self.assertTrue(len(a) > 2)
11 |         self.assertTrue(isinstance(a[0], dict))
12 |         self.assertEqual(set(a[0].keys()),
13 |                          set(['id', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']))
14 | 
15 |     def test_point_radius(self):
16 |         a = ts_extract(lon=-3, lat=44.7, sensor='LC8', start=datetime(2015,1,1),
17 |                        end=datetime(2016, 6, 1), radius = 300, feature = None, bands = None,
18 |                        stats = 'mean')
19 |         self.assertTrue(len(a) > 2)
20 |         self.assertTrue(isinstance(a[0], dict))
21 |         self.assertEqual(set(a[0].keys()),
22 |                          set(['id', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']))
23 | 
24 |     def test_point_radius_tm(self):
25 |         a = ts_extract(lon=-3, lat=44.7, sensor='LT5', start=datetime(1999,1,1),
26 |                        end=datetime(2005, 6, 1), radius = 300, feature = None, bands = None,
27 |                        stats = 'median')
28 |         self.assertTrue(len(a) > 2)
29 |         self.assertTrue(isinstance(a[0], dict))
30 |         self.assertEqual(set(a[0].keys()),
31 |                          set(['id', 'B1', 'B2', 'B3', 'B4', 'B5', 'B7']))
32 | 
33 |     def test_point_radius_tiers(self):
34 |         a = ts_extract(lon=-3, lat=44.7, sensor='LC8', start=datetime(2015,1,1),
35 |                        end=datetime(2016, 6, 1), radius = 300, feature = None, bands = None,
36 |                        stats = 'mean', tiers=['T1'])
37 |         self.assertTrue(len(a) > 2)
38 |         self.assertTrue(isinstance(a[0], dict))
39 |         self.assertEqual(set(a[0].keys()),
40 |                          set(['id', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']))
41 | 
42 |     def test_point_radius_tm_tiers(self):
43 |         a = ts_extract(lon=-3, lat=44.7, sensor='LT5', start=datetime(1999,1,1),
44 |                        end=datetime(2010, 6, 1), radius = 300, feature = None, bands = None,
45 |                        stats = 'median', tiers=['T1'])
46 |         self.assertTrue(len(a) > 2)
47 |         self.assertTrue(isinstance(a[0], dict))
48 |         self.assertEqual(set(a[0].keys()),
49 |                          set(['id', 'B1', 'B2', 'B3', 'B4', 'B5', 'B7']))
50 | 
51 |     def test_exceptions(self):
52 |         # sensor does not exist
53 |         kwargs_1 = {'lon': -3,
54 |                   'lat': 44.7,
55 |                   'sensor': 'LT8',
56 |                   'start': datetime(1999, 1, 1),
57 |                   'end': datetime(2000, 6, 1),
58 |                   'radius': 300,
59 |                   'stats': 'max'}
60 |         self.assertRaises(ValueError, ts_extract, **kwargs_1)
61 |         # aggregation method does not exist
62 |         kwargs_2 = {'lon': -3,
63 |                   'lat': 44.7,
64 |                   'sensor': 'LT5',
65 |                   'start': datetime(1999, 1, 1),
66 |                   'end': datetime(2000, 6, 1),
67 |                   'radius': 300,
68 |                   'stats': 'mode'}
69 |         self.assertRaises(ValueError, ts_extract, **kwargs_2)
70 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | from geextract import get_date, date_append, relabel
  2 | import unittest
  3 | from datetime import date
  4 | 
  5 | 
  6 | dict_list = [{'id': "LT50200461986040", 'B1': 12, 'B2': 23},
  7 |              {'id': "LT50200461986072", 'B1': 45, 'B2': 54},
  8 |              {'id': "LT50200461986104", 'B1': None, 'B2': 54},
  9 |              {'id': "LT50200461986232", 'B1': None, 'B2': None},
 10 |              {'id': "LT50200461986296", 'B1': 45, 'B2': 54},
 11 | ]
 12 | 
 13 | dict_list_with_time = [{'B1': 12,
 14 |                         'B2': 23,
 15 |                         'id': 'LT50200461986040',
 16 |                         'time': date(1986, 2, 9)},
 17 |                        {'B1': 45,
 18 |                         'B2': 54,
 19 |                         'id': 'LT50200461986072',
 20 |                         'time': date(1986, 3, 13)},
 21 |                        {'B1': None,
 22 |                         'B2': 54,
 23 |                         'id': 'LT50200461986104',
 24 |                         'time': date(1986, 4, 14)},
 25 |                        {'B1': None,
 26 |                         'B2': None,
 27 |                         'id': 'LT50200461986232',
 28 |                         'time': date(1986, 8, 20)},
 29 |                        {'B1': 45,
 30 |                         'B2': 54,
 31 |                         'id': 'LT50200461986296',
 32 |                         'time': date(1986, 10, 23)}]
 33 | LE7_dict = [{'B1': 791.0712281921998,
 34 |              'B2': 882.609940300239,
 35 |              'B3': 989.8438010780741,
 36 |              'B4': 2346.717295542807,
 37 |              'B5': 2925.630661334263,
 38 |              'B7': 1843.7754404295472,
 39 |              'id': 'LE07_020046_20170427'},
 40 |             {'B1': 421.51089404078505,
 41 |              'B2': 624.8624105377779,
 42 |              'B3': 551.8283911071643,
 43 |              'B4': 3165.8649582950115,
 44 |              'B5': 1943.1104732908364,
 45 |              'B7': 1008.9000877443359,
 46 |              'id': 'LE07_020046_20170630'},
 47 |             {'B1': 323.23065242403203,
 48 |              'B2': 585.4544074102201,
 49 |              'B3': 487.89957540751027,
 50 |              'B4': 3455.480794052203,
 51 |              'B5': 2173.9796873083665,
 52 |              'B7': 1043.4691601011557,
 53 |              'id': 'LE07_020046_20170716'}]
 54 | 
 55 | LE7_dict_color = [{'blue': 791.0712281921998,
 56 |                    'green': 882.609940300239,
 57 |                    'red': 989.8438010780741,
 58 |                    'nir': 2346.717295542807,
 59 |                    'swir1': 2925.630661334263,
 60 |                    'swir2': 1843.7754404295472,
 61 |                    'id': 'LE07_020046_20170427'},
 62 |                   {'blue': 421.51089404078505,
 63 |                    'green': 624.8624105377779,
 64 |                    'red': 551.8283911071643,
 65 |                    'nir': 3165.8649582950115,
 66 |                    'swir1': 1943.1104732908364,
 67 |                    'swir2': 1008.9000877443359,
 68 |                    'id': 'LE07_020046_20170630'},
 69 |                   {'blue': 323.23065242403203,
 70 |                    'green': 585.4544074102201,
 71 |                    'red': 487.89957540751027,
 72 |                    'nir': 3455.480794052203,
 73 |                    'swir1': 2173.9796873083665,
 74 |                    'swir2': 1043.4691601011557,
 75 |                    'id': 'LE07_020046_20170716'}]
 76 | 
 77 | class TestFilenameParsing(unittest.TestCase):
 78 |     def test_date_extraction(self):
 79 |         f1 = 'LC81970292013106'
 80 |         d1 = date(2013, 4, 16)
 81 |         f2 = 'LANDSAT/LE07/C01/T1_SR/LE07_023039_20000604'
 82 |         d2 = date(2000, 6, 4)
 83 |         f3 = 'LE07_023039_20000604'
 84 |         d3 = date(2000, 6, 4)
 85 |         f4 = 'S2A_MSIL1C_20170105T013442_N0204_R031_T53NMJ_20170105T013443'
 86 | 
 87 |         self.assertEqual(get_date(f1), d1)
 88 |         self.assertEqual(get_date(f2), d2)
 89 |         self.assertEqual(get_date(f3), d3)
 90 |         self.assertRaises(ValueError, get_date, *[f4])
 91 | 
 92 | 
 93 | class TestUtils(unittest.TestCase):
 94 |     def test_date_append(self):
 95 |         self.assertEqual(date_append(dict_list), dict_list_with_time)
 96 | 
 97 |     def test_relabel(self):
 98 |         self.assertEqual(relabel(LE7_dict, 'LE7'), LE7_dict_color)
 99 | 
100 | 


--------------------------------------------------------------------------------
/geextract/scripts/gee_extract.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | 
 4 | import argparse
 5 | from datetime import datetime
 6 | from geextract import ts_extract, relabel, date_append, dictlist2sqlite
 7 | 
 8 | def main(lon, lat, sensor, begin, end, radius, stats, db, table, site,
 9 |          tiers):
10 |     # Parse time string into datetime object
11 |     begin = datetime.strptime(begin, '%Y-%m-%j')
12 |     end = datetime.strptime(end, '%Y-%m-%j')
13 |     # Extract data
14 |     dict_list_0 = ts_extract(lon=lon, lat=lat, sensor=sensor, start=begin, end=end,
15 |                            radius=radius, stats=stats, tiers=tiers)
16 |     print('Extracted %d records from Google Eath Engine' % len(dict_list_0))
17 |     # Prepare list of dictories ()
18 |     dict_list_1 = relabel(dict_list_0, sensor)
19 |     dict_list_2 = date_append(dict_list_1)
20 |     # Write to db
21 |     dictlist2sqlite(dict_list_2, site=site, sensor=sensor, db_src=db, table=table)
22 | 
23 | if __name__ == '__main__':
24 |     epilog = """
25 | Command line utility to extract Lansat surface reflectance data from the google earth
26 | engine platform and write the output to a local sqlite database. Query can be done for
27 | a single pixel, or for a circular region, in which case data are spatially aggregated
28 | for each time step using the a user defined spatial aggregation function.
29 | 
30 | sqlite tables get appended if new data are queried (i.e. for the same location but a different sensor).
31 | 
32 | --------------------------
33 | Example usage
34 | --------------------------
35 | # Extract all the LT5 bands for a location in Yucatan for the entire Landsat period, with a 500m radius
36 | gee_extract.py -s LT5 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
37 | gee_extract.py -s LE7 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
38 | gee_extract.py -s LC8 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
39 | 
40 | # Extract only tier 1 data for LC8
41 | gee_extract.py -s LC8 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1 --tiers T1
42 | """
43 | 
44 | 
45 |     parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
46 | 
47 |     parser.add_argument('-lat', '--lat',
48 |                         required=True,
49 |                         type=float,
50 |                         help='center latitude in Decimal Degrees')
51 | 
52 |     parser.add_argument('-lon', '--lon',
53 |                         required=True,
54 |                         type=float,
55 |                         help='center longitude in Decimal Degrees')
56 | 
57 |     parser.add_argument('-b', '--begin',
58 |                         required = True,
59 |                         help = 'Anterior time-range boundary in yyyy-mm-dd')
60 | 
61 |     parser.add_argument('-e', '--end',
62 |                         required = False,
63 |                         help = 'Posterior time-range boundary in yyyy-mm-dd')
64 |     parser.set_defaults(end=datetime.today().date().strftime('%Y-%m-%d'))
65 | 
66 |     parser.add_argument('-db', '--db', required=True,
67 |                         help='Path to sqlite database. Will be created if does not exist')
68 | 
69 |     parser.add_argument('-site', '--site', required=True,
70 |                         help='Label associated with that location (e.g. Site name)')
71 | 
72 |     parser.add_argument('-table', '--table', required=True,
73 |                         help='Database table name to write data. Existing tables will be appended')
74 | 
75 |     parser.add_argument('-r', '--radius', type=float, required=False,
76 |                         help='Optional circular radius in meters around center point')
77 |     parser.set_defaults(radius=None)
78 | 
79 |     parser.add_argument('-s', '--sensor', required=True,
80 |                         help='Landsat sensor to query; one of LT4, LT5, LE7, LC8')
81 | 
82 |     parser.add_argument('-stats', '--stats', required=False,
83 |                         help='Spatial aggregation function, one of mean (default), median, max or min. Only relevant if a radius value is provided')
84 |     parser.set_defaults(stats='mean')
85 | 
86 |     parser.add_argument('-t', '--tiers', required=False,
87 |                         nargs='*',
88 |                         type=str,
89 |                         default=['T1', 'T2'],
90 |                         help='Tiers to order (T1: highest quality, defaults to T1 and T2)')
91 | 
92 |     parsed_args = parser.parse_args()
93 | 
94 |     main(**vars(parsed_args))
95 | 


--------------------------------------------------------------------------------
/geextract/scripts/gee_extract_batch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import print_function
  3 | 
  4 | import argparse
  5 | from datetime import datetime
  6 | import csv
  7 | from geextract import ts_extract, dictlist2sqlite, relabel, date_append
  8 | 
  9 | def main(file, sensor, begin, end, radius, stats, db, table, tiers):
 10 |     # Parse time string into datetime object
 11 |     begin = datetime.strptime(begin, '%Y-%m-%j')
 12 |     end = datetime.strptime(end, '%Y-%m-%j')
 13 |     # Read coordinates and table names from text file
 14 |     with open(file) as src:
 15 |         reader = csv.reader(src)
 16 |         for line in reader:
 17 |             try:
 18 |                 lon = float(line[0])
 19 |                 lat = float(line[1])
 20 |                 site = line[2]
 21 |                 # Extract data
 22 |                 dict_list_0 = ts_extract(lon=lon, lat=lat, sensor=sensor,
 23 |                                          start=begin, end=end, radius=radius,
 24 |                                          stats=stats, tiers=tiers)
 25 |                 print('Extracted %d records from Google Earth Engine' % len(dict_list_0))
 26 |                 # Prepare list of dictories ()
 27 |                 dict_list_1 = relabel(dict_list_0, sensor)
 28 |                 dict_list_2 = date_append(dict_list_1)
 29 |                 # Write to db
 30 |                 dictlist2sqlite(dict_list_2, site=site, sensor=sensor, db_src=db, table=table)
 31 |             except Exception as e:
 32 |                 print('An error occured while extracting a site. %s' % e)
 33 | 
 34 | if __name__ == '__main__':
 35 |     epilog = """
 36 | Command line utility to batch extract Lansat surface reflectance data from the google earth
 37 | engine platform and write the output to a local sqlite database. Query can be done for
 38 | a single pixel, or for a circular region, in which case data are spatially aggregated
 39 | for each time step using the a user defined spatial aggregation function.
 40 | 
 41 | Input locations must be provided in a text file, with on each line lon,lat,site_name
 42 | site_name provided for each site in the text file is used (together with sensor) as grouping variable
 43 | in the sqlite table.
 44 | 
 45 | --------------------------
 46 | Example usage
 47 | --------------------------
 48 | # Extract all the LC8 bands in a 500 meters for two locations between 2012 and now
 49 | echo "4.7174,44.7814,rompon\\n-149.4260,-17.6509,tahiti" > site_list.txt
 50 | gee_extract_batch.py site_list.txt -b 1984-01-01 -s LT5 -r 500 -db /tmp/gee_db.sqlite -table landsat_ts
 51 | gee_extract_batch.py site_list.txt -b 1984-01-01 -s LE7 -r 500 -db /tmp/gee_db.sqlite -table landsat_ts
 52 | gee_extract_batch.py site_list.txt -b 1984-01-01 -s LC8 -r 500 -db /tmp/gee_db.sqlite -table landsat_ts
 53 | 
 54 | # Only Tier 1 for LC8
 55 | gee_extract_batch.py site_list.txt -b 1984-01-01 -s LC8 -r 500 -db /tmp/gee_db.sqlite -table landsat_ts --tiers T1
 56 | """
 57 | 
 58 | 
 59 |     parser = argparse.ArgumentParser(epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
 60 | 
 61 |     parser.add_argument('file', type=str,
 62 |                         help='Input text file with coma separated site coordinates in DD and site name on each line.')
 63 | 
 64 |     parser.add_argument('-b', '--begin',
 65 |                         required = True,
 66 |                         help = 'Anterior time-range boundary in yyyy-mm-dd')
 67 | 
 68 |     parser.add_argument('-e', '--end',
 69 |                         required = False,
 70 |                         help = 'Posterior time-range boundary in yyyy-mm-dd')
 71 |     parser.set_defaults(end=datetime.today().date().strftime('%Y-%m-%d'))
 72 | 
 73 |     parser.add_argument('-db', '--db', required=True,
 74 |                         help='Path to sqlite database. Will be created if does not exist')
 75 | 
 76 |     parser.add_argument('-r', '--radius', type=float, required=False,
 77 |                         help='Optional circular radius in meters around center point')
 78 |     parser.set_defaults(radius=None)
 79 | 
 80 |     parser.add_argument('-s', '--sensor', required=True,
 81 |                         help='Landsat sensor to query; one of LT4, LT5, LE7, LC8')
 82 | 
 83 |     parser.add_argument('-table', '--table', required=True,
 84 |                         help='Database table name to write data. Existing tables will be appended')
 85 | 
 86 |     parser.add_argument('-stats', '--stats', required=False,
 87 |                         help='Spatial aggregation function, one of mean (default), median, max or min. Only relevant if a radius value is provided')
 88 |     parser.set_defaults(stats='mean')
 89 | 
 90 |     parser.add_argument('-t', '--tiers', required=False,
 91 |                         nargs='*',
 92 |                         type=str,
 93 |                         default=['T1', 'T2'],
 94 |                         help='Tiers to order (T1: highest quality, defaults to T1 and T2)')
 95 | 
 96 |     parsed_args = parser.parse_args()
 97 | 
 98 |     main(**vars(parsed_args))
 99 | 
100 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | geextract
  2 | =========
  3 | 
  4 | *Google Earth Engine data extraction tool. Quickly obtain Landsat multispectral time-series for exploratory analysis and algorithm testing*
  5 | 
  6 | Online documentation available at https://loicdtx.github.io/landsat-extract-gee
  7 | 
  8 | .. image:: https://coveralls.io/repos/github/loicdtx/landsat-extract-gee/badge.svg?branch=master
  9 |     :target: https://coveralls.io/github/loicdtx/landsat-extract-gee?branch=master
 10 | 
 11 | .. image:: https://travis-ci.org/loicdtx/landsat-extract-gee.svg?branch=master
 12 |     :target: https://travis-ci.org/loicdtx/landsat-extract-gee
 13 | 
 14 | .. image:: https://badge.fury.io/py/geextract.svg
 15 |     :target: https://badge.fury.io/py/geextract
 16 | 
 17 | 
 18 | 
 19 | Introduction
 20 | ------------
 21 | 
 22 | 
 23 | A python library (API + command lines) to extract Landsat time-series from the Google Earth Engine platform. Can query single pixels or spatially aggregated values over polygons. When used via the command line, extracted time-series are written to a sqlite database.
 24 | 
 25 | The idea is to provide quick access to Landsat time-series for exploratory analysis or algorithm testing. Instead of downloading the whole stack of Landsat scenes, preparing the data locally and extracting the time-series of interest, which may take several days, ``geextract`` allows to get time-series in a few seconds.
 26 | 
 27 | Compatible with python 2.7 and 3.
 28 | 
 29 | Usage
 30 | -----
 31 | 
 32 | API
 33 | ^^^
 34 | 
 35 | The principal function of the API is ``ts_extract``
 36 | 
 37 | .. code-block:: python
 38 | 
 39 |     from geextract import ts_extract
 40 |     from datetime import datetime
 41 | 
 42 |     # Extract a Landsat 7 time-series for a 500m radius circular buffer around
 43 |     # a location in Yucatan
 44 |     lon = -89.8107197
 45 |     lat = 20.4159611
 46 |     LE7_dict_list = ts_extract(lon=lon, lat=lat, sensor='LE7',
 47 |                                start=datetime(1999, 1, 1), radius=500)
 48 | 
 49 | 
 50 | Command line
 51 | ^^^^^^^^^^^^
 52 | 
 53 | ``geextract`` comes with two command lines, for extracting Landsat time-series directly from the command line.
 54 | 
 55 | - ``gee_extract.py``: Extract a Landsat multispectral time-series for a single site. Extracted data are automatically added to a sqlite database.
 56 | - ``gee_extract_batch.py``: Batch order Landsat multispectral time-series for multiple locations.
 57 |   
 58 | .. code-block:: bash
 59 |     
 60 |     gee_extract.py --help
 61 | 
 62 |     # Extract all the LT5 bands for a location in Yucatan for the entire Landsat period, with a 500m radius
 63 |     gee_extract.py -s LT5 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
 64 |     gee_extract.py -s LE7 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
 65 |     gee_extract.py -s LC8 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
 66 | 
 67 | .. code-block:: bash
 68 | 
 69 |     gee_extract_batch.py --help
 70 | 
 71 |     # Extract all the LC8 bands in a 500 meters for two locations between 2012 and now
 72 |     echo "4.7174,44.7814,rompon\n-149.4260,-17.6509,tahiti" > site_list.txt
 73 |     gee_extract_batch.py site_list.txt -b 1984-01-01 -s LT5 -r 500 -db /tmp/gee_db.sqlite -table landsat_ts
 74 |     gee_extract_batch.py site_list.txt -b 1984-01-01 -s LE7 -r 500 -db /tmp/gee_db.sqlite -table landsat_ts
 75 |     gee_extract_batch.py site_list.txt -b 1984-01-01 -s LC8 -r 500 -db /tmp/gee_db.sqlite -table landsat_ts
 76 | 
 77 | 
 78 | .. image:: https://github.com/loicdtx/landsat-extract-gee/raw/master/docs/figs/multispectral_uxmal.png
 79 | 
 80 | 
 81 | 
 82 | 
 83 | Installation
 84 | ------------
 85 | 
 86 | You must have a `Google Earth Engine <http://signup.earthengine.google.com/#!/>`__ account to use the package.
 87 | 
 88 | Then, in a vitual environment run:
 89 | 
 90 | .. code-block:: bash
 91 | 
 92 |     pip install geextract
 93 |     earthengine authenticate
 94 | 
 95 | 
 96 | This will open a google authentication page in your browser, and will give you an authentication token to paste back in the terminal.
 97 | 
 98 | You can check that the authentication process was successful by running.
 99 | 
100 | .. code-block:: bash
101 | 
102 |     python -c "import ee; ee.Initialize()"
103 | 
104 | 
105 | If nothing happens... it's working.
106 | 
107 | 
108 | Benchmark
109 | ---------
110 | 
111 | A quick benchmark of the extraction speed, using a 500 m buffer.
112 | 
113 | .. code-block:: python
114 | 
115 |     import time
116 |     from datetime import datetime
117 |     from pprint import pprint
118 |     import geextract
119 | 
120 |     lon = -89.8107197
121 |     lat = 20.4159611
122 | 
123 |     for sensor in ['LT5', 'LE7', 'LT4', 'LC8']:
124 |         start = time.time()
125 |         out = geextract.ts_extract(lon=lon, lat=lat, sensor=sensor, start=datetime(1980, 1, 1, 0, 0),
126 |                                    end=datetime.today(), radius=500)
127 |         end = time.time()
128 | 
129 |         pprint('%s. Extracted %d records in %.1f seconds' % (sensor, len(out), end - start))
130 | 
131 | .. code-block:: pycon
132 | 
133 |     # 'LT5. Extracted 142 records in 1.9 seconds'
134 |     # 'LE7. Extracted 249 records in 5.8 seconds'
135 |     # 'LT4. Extracted 7 records in 1.0 seconds'
136 |     # 'LC8. Extracted 72 records in 2.4 seconds'
137 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # satmo documentation build configuration file, created by
  4 | # sphinx-quickstart on Wed Apr  5 12:37:42 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | # import os
 20 | # import sys
 21 | # sys.path.insert(0, u'/home/ldutrieux/git/satmo/src/satmo')
 22 | 
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #
 28 | # needs_sphinx = '1.0'
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = ['sphinx.ext.autodoc',
 34 |               'sphinx.ext.todo',
 35 |               'sphinx.ext.viewcode',
 36 |               'sphinx.ext.napoleon',
 37 |               'sphinxcontrib.programoutput',
 38 |               'matplotlib.sphinxext.only_directives',
 39 |               'matplotlib.sphinxext.plot_directive']
 40 | 
 41 | # Add any paths that contain templates here, relative to this directory.
 42 | templates_path = ['_templates']
 43 | 
 44 | # The suffix(es) of source filenames.
 45 | # You can specify multiple suffix as a list of string:
 46 | #
 47 | # source_suffix = ['.rst', '.md']
 48 | source_suffix = ['.rst', '.md']
 49 | 
 50 | # The master toctree document.
 51 | master_doc = 'index'
 52 | 
 53 | # General information about the project.
 54 | project = u'geextract'
 55 | copyright = u'2017, Loïc Dutrieux'
 56 | author = u'Loïc Dutrieux'
 57 | 
 58 | # The version info for the project you're documenting, acts as replacement for
 59 | # |version| and |release|, also used in various other places throughout the
 60 | # built documents.
 61 | #
 62 | # Parse version from module
 63 | with open('../geextract/__init__.py') as f:
 64 |     for line in f:
 65 |         if line.find("__version__") >= 0:
 66 |             version = line.split("=")[1].strip()
 67 |             version = version.strip('"')
 68 |             version = version.strip("'")
 69 |             continue
 70 | release = version
 71 | # The language for content autogenerated by Sphinx. Refer to documentation
 72 | # for a list of supported languages.
 73 | #
 74 | # This is also used if you do content translation via gettext catalogs.
 75 | # Usually you set "language" from the command line for these cases.
 76 | language = 'en'
 77 | 
 78 | # List of patterns, relative to source directory, that match files and
 79 | # directories to ignore when looking for source files.
 80 | # This patterns also effect to html_static_path and html_extra_path
 81 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 87 | todo_include_todos = True
 88 | 
 89 | 
 90 | # -- Options for HTML output ----------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | #
 95 | html_theme = 'sphinx_rtd_theme'
 96 | 
 97 | # Theme options are theme-specific and customize the look and feel of a theme
 98 | # further.  For a list of options available for each theme, see the
 99 | # documentation.
100 | #
101 | # html_theme_options = {}
102 | 
103 | # Add any paths that contain custom static files (such as style sheets) here,
104 | # relative to this directory. They are copied after the builtin static files,
105 | # so a file named "default.css" will overwrite the builtin "default.css".
106 | # html_static_path = ['_static']
107 | 
108 | 
109 | # -- Options for HTMLHelp output ------------------------------------------
110 | 
111 | # Output file base name for HTML help builder.
112 | htmlhelp_basename = 'geedoc'
113 | 
114 | 
115 | # -- Options for LaTeX output ---------------------------------------------
116 | 
117 | latex_elements = {
118 |     # The paper size ('letterpaper' or 'a4paper').
119 |     #
120 |     # 'papersize': 'letterpaper',
121 | 
122 |     # The font size ('10pt', '11pt' or '12pt').
123 |     #
124 |     # 'pointsize': '10pt',
125 | 
126 |     # Additional stuff for the LaTeX preamble.
127 |     #
128 |     # 'preamble': '',
129 | 
130 |     # Latex figure (float) alignment
131 |     #
132 |     # 'figure_align': 'htbp',
133 | }
134 | 
135 | # Grouping the document tree into LaTeX files. List of tuples
136 | # (source start file, target name, title,
137 | #  author, documentclass [howto, manual, or own class]).
138 | latex_documents = [
139 |     (master_doc, 'geextract.tex', u'geextract Documentation',
140 |      u'Loic Dutrieux', 'manual'),
141 | ]
142 | 
143 | 
144 | # -- Options for manual page output ---------------------------------------
145 | 
146 | # One entry per manual page. List of tuples
147 | # (source start file, name, description, authors, manual section).
148 | man_pages = [
149 |     (master_doc, 'geextract', u'geextract Documentation',
150 |      [author], 1)
151 | ]
152 | 
153 | 
154 | # -- Options for Texinfo output -------------------------------------------
155 | 
156 | # Grouping the document tree into Texinfo files. List of tuples
157 | # (source start file, target name, title, author,
158 | #  dir menu entry, description, category)
159 | texinfo_documents = [
160 |     (master_doc, 'geextract', u'geextract Documentation',
161 |      author, 'geextract', 'One line description of project.',
162 |      'Miscellaneous'),
163 | ]
164 | 
165 | 
166 | 
167 | # -- Options for Epub output ----------------------------------------------
168 | 
169 | # Bibliographic Dublin Core info.
170 | epub_title = project
171 | epub_author = author
172 | epub_publisher = author
173 | epub_copyright = copyright
174 | 
175 | # The unique identifier of the text. This can be a ISBN number
176 | # or the project homepage.
177 | #
178 | # epub_identifier = ''
179 | 
180 | # A unique identification for the text.
181 | #
182 | # epub_uid = ''
183 | 
184 | # A list of files that should not be packed into the epub file.
185 | epub_exclude_files = ['search.html']
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_elements.papersize=a4
 12 | PAPEROPT_letter = -D latex_elements.papersize=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help
 18 | help:
 19 | 	@echo "Please use \`make <target>' where <target> is one of"
 20 | 	@echo "  html        to make standalone HTML files"
 21 | 	@echo "  dirhtml     to make HTML files named index.html in directories"
 22 | 	@echo "  singlehtml  to make a single large HTML file"
 23 | 	@echo "  pickle      to make pickle files"
 24 | 	@echo "  json        to make JSON files"
 25 | 	@echo "  htmlhelp    to make HTML files and an HTML help project"
 26 | 	@echo "  qthelp      to make HTML files and a qthelp project"
 27 | 	@echo "  applehelp   to make an Apple Help Book"
 28 | 	@echo "  devhelp     to make HTML files and a Devhelp project"
 29 | 	@echo "  epub        to make an epub"
 30 | 	@echo "  epub3       to make an epub3"
 31 | 	@echo "  latex       to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 32 | 	@echo "  latexpdf    to make LaTeX files and run them through pdflatex"
 33 | 	@echo "  latexpdfja  to make LaTeX files and run them through platex/dvipdfmx"
 34 | 	@echo "  lualatexpdf to make LaTeX files and run them through lualatex"
 35 | 	@echo "  xelatexpdf  to make LaTeX files and run them through xelatex"
 36 | 	@echo "  text        to make text files"
 37 | 	@echo "  man         to make manual pages"
 38 | 	@echo "  texinfo     to make Texinfo files"
 39 | 	@echo "  info        to make Texinfo files and run them through makeinfo"
 40 | 	@echo "  gettext     to make PO message catalogs"
 41 | 	@echo "  changes     to make an overview of all changed/added/deprecated items"
 42 | 	@echo "  xml         to make Docutils-native XML files"
 43 | 	@echo "  pseudoxml   to make pseudoxml-XML files for display purposes"
 44 | 	@echo "  linkcheck   to check all external links for integrity"
 45 | 	@echo "  doctest     to run all doctests embedded in the documentation (if enabled)"
 46 | 	@echo "  coverage    to run coverage check of the documentation (if enabled)"
 47 | 	@echo "  dummy       to check syntax errors of document sources"
 48 | 	
 49 | .PHONY: clean
 50 | clean:
 51 | 	rm -rf $(BUILDDIR)/*
 52 | 
 53 | .PHONY: html
 54 | html:
 55 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 58 | 
 59 | .PHONY: dirhtml
 60 | dirhtml:
 61 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 62 | 	@echo
 63 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 64 | 
 65 | .PHONY: singlehtml
 66 | singlehtml:
 67 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 68 | 	@echo
 69 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 70 | 
 71 | .PHONY: pickle
 72 | pickle:
 73 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the pickle files."
 76 | 
 77 | .PHONY: json
 78 | json:
 79 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 80 | 	@echo
 81 | 	@echo "Build finished; now you can process the JSON files."
 82 | 
 83 | .PHONY: htmlhelp
 84 | htmlhelp:
 85 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 86 | 	@echo
 87 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 88 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 89 | 
 90 | .PHONY: qthelp
 91 | qthelp:
 92 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 93 | 	@echo
 94 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 95 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 96 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/geextract.qhcp"
 97 | 	@echo "To view the help file:"
 98 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/geextract.qhc"
 99 | 
100 | .PHONY: applehelp
101 | applehelp:
102 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
103 | 	@echo
104 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
105 | 	@echo "N.B. You won't be able to view it unless you put it in" \
106 | 	      "~/Library/Documentation/Help or install it in your application" \
107 | 	      "bundle."
108 | 
109 | .PHONY: devhelp
110 | devhelp:
111 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
112 | 	@echo
113 | 	@echo "Build finished."
114 | 	@echo "To view the help file:"
115 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/geextract"
116 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/geextract"
117 | 	@echo "# devhelp"
118 | 
119 | .PHONY: epub
120 | epub:
121 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
122 | 	@echo
123 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
124 | 
125 | .PHONY: epub3
126 | epub3:
127 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
128 | 	@echo
129 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
130 | 
131 | .PHONY: latex
132 | latex:
133 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
134 | 	@echo
135 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
136 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
137 | 	      "(use \`make latexpdf' here to do that automatically)."
138 | 
139 | .PHONY: latexpdf
140 | latexpdf:
141 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
142 | 	@echo "Running LaTeX files through pdflatex..."
143 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
144 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
145 | 
146 | .PHONY: latexpdfja
147 | latexpdfja:
148 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
149 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
150 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
151 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
152 | 
153 | .PHONY: lualatexpdf
154 | lualatexpdf:
155 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
156 | 	@echo "Running LaTeX files through lualatex..."
157 | 	$(MAKE) PDFLATEX=lualatex -C $(BUILDDIR)/latex all-pdf
158 | 	@echo "lualatex finished; the PDF files are in $(BUILDDIR)/latex."
159 | 
160 | .PHONY: xelatexpdf
161 | xelatexpdf:
162 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
163 | 	@echo "Running LaTeX files through xelatex..."
164 | 	$(MAKE) PDFLATEX=xelatex -C $(BUILDDIR)/latex all-pdf
165 | 	@echo "xelatex finished; the PDF files are in $(BUILDDIR)/latex."
166 | 
167 | .PHONY: text
168 | text:
169 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
170 | 	@echo
171 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
172 | 
173 | .PHONY: man
174 | man:
175 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
176 | 	@echo
177 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
178 | 
179 | .PHONY: texinfo
180 | texinfo:
181 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
182 | 	@echo
183 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
184 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
185 | 	      "(use \`make info' here to do that automatically)."
186 | 
187 | .PHONY: info
188 | info:
189 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
190 | 	@echo "Running Texinfo files through makeinfo..."
191 | 	make -C $(BUILDDIR)/texinfo info
192 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
193 | 
194 | .PHONY: gettext
195 | gettext:
196 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
197 | 	@echo
198 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
199 | 
200 | .PHONY: changes
201 | changes:
202 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
203 | 	@echo
204 | 	@echo "The overview file is in $(BUILDDIR)/changes."
205 | 
206 | .PHONY: linkcheck
207 | linkcheck:
208 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
209 | 	@echo
210 | 	@echo "Link check complete; look for any errors in the above output " \
211 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
212 | 
213 | .PHONY: doctest
214 | doctest:
215 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
216 | 	@echo "Testing of doctests in the sources finished, look at the " \
217 | 	      "results in $(BUILDDIR)/doctest/output.txt."
218 | 
219 | .PHONY: coverage
220 | coverage:
221 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
222 | 	@echo "Testing of coverage in the sources finished, look at the " \
223 | 	      "results in $(BUILDDIR)/coverage/python.txt."
224 | 
225 | .PHONY: xml
226 | xml:
227 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
228 | 	@echo
229 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
230 | 
231 | .PHONY: pseudoxml
232 | pseudoxml:
233 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
234 | 	@echo
235 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
236 | 
237 | .PHONY: dummy
238 | dummy:
239 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
240 | 	@echo
241 | 	@echo "Build finished. Dummy builder generates no files."
242 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
  1 | Examples
  2 | --------
  3 | 
  4 | First example: extract a time-series using the API and plot it with matplotlib
  5 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  6 | 
  7 | .. plot::
  8 |     :include-source:
  9 | 
 10 |     from geextract import ts_extract, get_date
 11 |     from datetime import datetime
 12 |     import numpy as np
 13 |     import matplotlib.pyplot as plt
 14 |     plt.figure(figsize=(10,5))
 15 | 
 16 |     # Extract a Landsat 7 time-series for a 500m radius circular buffer around
 17 |     # a location in Yucatan
 18 |     lon = -89.8107197
 19 |     lat = 20.4159611
 20 |     raw_dict = ts_extract(lon=lon, lat=lat, sensor='LE7',
 21 |                           start=datetime(1999, 1, 1), radius=500)
 22 | 
 23 |     # Function to compute ndvi from a dictionary of the list of dictionaries returned
 24 |     # by ts_extract
 25 |     def ndvi(x):
 26 |         try:
 27 |             return (x['B4'] - x['B3']) / (x['B4'] + x['B3'])
 28 |         except:
 29 |             pass
 30 | 
 31 |     # Build x and y arrays and remove missing values 
 32 |     x = np.array([get_date(d['id']) for d in raw_dict])
 33 |     y = np.array([ndvi(d) for d in raw_dict], dtype=np.float)
 34 |     x = x[~np.isnan(y)]
 35 |     y = y[~np.isnan(y)]
 36 | 
 37 |     # Make plot
 38 |     plt.plot_date(x, y, "--")
 39 |     plt.plot_date(x, y)
 40 |     plt.title("Landsat 7 NDVI time-series Uxmal")
 41 |     plt.ylabel("NDVI (-)")
 42 |     plt.grid(True)
 43 |     plt.show()
 44 | 
 45 | 
 46 | 
 47 | 
 48 | Second example: extract a time-series using the command line and read it in R
 49 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 50 | 
 51 | .. code-block:: bash
 52 | 
 53 |     gee_extract.py -s LT5 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
 54 |     gee_extract.py -s LE7 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
 55 |     gee_extract.py -s LC8 -b 1980-01-01 -lon -89.8107 -lat 20.4159 -r 500 -db /tmp/gee_db.sqlite -site uxmal -table col_1
 56 | 
 57 | 
 58 | Running the three above commands gives you the following terminal output. Records refer to individual time steps at which Landsat observations were extracted. Note that some records may be "empty" due to absence of useful data after the cloud masking step. The amount of useful data of the entire time-series is therefore likely to be less than the reported extracted records.
 59 | 
 60 | .. code-block:: console
 61 |     
 62 |     Extracted 148 records from Google Eath Engine
 63 |     Extracted 231 records from Google Eath Engine
 64 |     Extracted 82 records from Google Eath Engine
 65 | 
 66 | A sqlite database has been created at ``/tmp/gee_db.sqlite``; it contains a table named ``col_1`` (for "collection 1") that can be read as an R dataframe using tools like ``dplyr``.
 67 | 
 68 | .. code-block:: r
 69 | 
 70 |     library(dplyr)
 71 |     library(DBI)
 72 | 
 73 |     # Open database connection (requires dbplyr and RSQLite packages, DBI installed via dbplyr)
 74 |     con <- dbConnect(RSQLite::SQLite(), dbname = "/tmp/gee_db.sqlite")
 75 |     dbListTables(con)
 76 |     df <- tbl(con, 'col_1') %>% collect()
 77 |     df
 78 | 
 79 | 
 80 | In that case the table contains only one time-series (uxmal site); we are therefore loading the whole table in memory using ``collect()`` without additional filtering query. 
 81 | 
 82 | .. code-block:: rout
 83 | 
 84 |     [1] "col_1"
 85 | 
 86 |     # A tibble: 390 x 11
 87 |        index  blue green id                     nir   red swir1 swir2 time       sensor site 
 88 |        <int> <dbl> <dbl> <chr>                <dbl> <dbl> <dbl> <dbl> <chr>      <chr>  <chr>
 89 |      1     0   434   635 LT05_020046_19850206  2077   675  2267  1281 1985-02-06 LT5    uxmal
 90 |      2     1   370   664 LT05_020046_19850427  2883   588  2136  1128 1985-04-27 LT5    uxmal
 91 |      3     2   385   592 LT05_020046_19860108  2732   553  2010   953 1986-01-08 LT5    uxmal
 92 |      4     3   555   748 LT05_020046_19860313  1971   823  2497  1479 1986-03-13 LT5    uxmal
 93 |      5     4   574   804 LT05_020046_19860414  2216   919  2751  1701 1986-04-14 LT5    uxmal
 94 |      6     5   790  1084 LT05_020046_19860703  3852   955  2205  1121 1986-07-03 LT5    uxmal
 95 |      7     6   546   858 LT05_020046_19860820  3876   730  1968   896 1986-08-20 LT5    uxmal
 96 |      8     7   334   560 LT05_020046_19861007  2694   532  2088  1072 1986-10-07 LT5    uxmal
 97 |      9     8   321   539 LT05_020046_19861023  2550   524  2064  1082 1986-10-23 LT5    uxmal
 98 |     10     9   590   832 LT05_020046_19870417  2390   891  2752  1660 1987-04-17 LT5    uxmal
 99 |     # ... with 380 more rows
100 | 
101 | This dataframe (or tibble) can now be used as the base for all kind of data analysis in R. Here we'll make some simple plots using the ``ggplot2`` package.
102 | 
103 | .. code-block:: r
104 | 
105 |     library(ggplot2)
106 |     library(tidyr)
107 | 
108 |     df %>% mutate(ndvi = (nir - red) / (nir + red)) %>%
109 |       ggplot(aes(time, ndvi)) +
110 |         geom_line() +
111 |         geom_point(aes(col = sensor)) +
112 |         theme_bw()
113 | 
114 | .. image:: figs/ndvi_uxmal.png
115 | 
116 | 
117 | 
118 | .. code-block:: r
119 | 
120 |     df %>% gather(key, value, -c(time, index, sensor, site, id)) %>%
121 |       ggplot(aes(time, value)) +
122 |         geom_line() +
123 |         geom_point(aes(col = sensor)) +
124 |         facet_grid(key ~ ., scales = 'free') +
125 |         theme_bw()
126 | 
127 | .. image:: figs/multispectral_uxmal.png
128 | 
129 | 
130 | The idea when working with multiple sites is to append them all to the same database table and use sql (raw or via ``dplyr``) to filter the desired data. Ordering time-series for multiple sites can be done in batch thanks to the ``gee_extract_batch.py`` command (run ``gee_extract_batch.py --help`` to see the detailed usage instructions). Here we will simply append another site to the sqlite table by re-running the ``gee_extract.py`` commands with different coordinates.
131 | 
132 | .. code-block:: bash
133 | 
134 |     gee_extract.py -s LT5 -b 1980-01-01 -lon 4.7174 -lat 44.7814 -r 500 -db /tmp/gee_db.sqlite -site rompon -table col_1
135 |     gee_extract.py -s LE7 -b 1980-01-01 -lon 4.7174 -lat 44.7814 -r 500 -db /tmp/gee_db.sqlite -site rompon -table col_1
136 |     gee_extract.py -s LC8 -b 1980-01-01 -lon 4.7174 -lat 44.7814 -r 500 -db /tmp/gee_db.sqlite -site rompon -table col_1
137 | 
138 | .. code-block:: console
139 | 
140 |     Extracted 104 records from Google Eath Engine
141 |     Extracted 494 records from Google Eath Engine
142 |     Extracted 193 records from Google Eath Engine
143 | 
144 | Now the ``col_1`` sqlite table contains time-series for two different sites (uxmal and rompon). Loading the time-series of a single site can be done thanks to the ``filter()`` dplyr verb.
145 | 
146 | .. code-block:: r
147 | 
148 |     df <- tbl(con, 'col_1') %>%
149 |       filter(site == 'rompon') %>%
150 |       collect() %>%
151 |       mutate(time = as.Date(time))
152 |     df
153 | 
154 | .. code-block:: rout
155 | 
156 |     # A tibble: 513 x 11
157 |        index  blue green id                     nir   red swir1 swir2 time       sensor site  
158 |        <int> <dbl> <dbl> <chr>                <dbl> <dbl> <dbl> <dbl> <date>     <chr>  <chr> 
159 |      1     0  1023  1179 LT05_196029_19840409  2438  1193  2096  1329 1984-04-09 LT5    rompon
160 |      2     1   822  1035 LT05_196029_19840425  2561   987  2025  1125 1984-04-25 LT5    rompon
161 |      3     4   451   715 LT05_196029_19840612  3481   582  1893   870 1984-06-12 LT5    rompon
162 |      4     6   481   691 LT05_196029_19840815  2935   624  1799   866 1984-08-15 LT5    rompon
163 |      5     7   370   590 LT05_196029_19840831  2880   534  1736   818 1984-08-31 LT5    rompon
164 |      6     8   358   580 LT05_196029_19841002  2833   510  1560   708 1984-10-02 LT5    rompon
165 |      7    10   408   642 LT05_196029_19841119  2491   656  1693   817 1984-11-19 LT5    rompon
166 |      8    13   744   991 LT05_196029_19850327  2284  1033  2046  1239 1985-03-27 LT5    rompon
167 |      9    16   579   845 LT05_196029_19850530  3678   697  2114   999 1985-05-30 LT5    rompon
168 |     10    17   546   800 LT05_196029_19850615  3697   654  1928   933 1985-06-15 LT5    rompon
169 |     # ... with 503 more rows
170 | 
171 | It is also possible to load the entire table to for instance plot the two time-series side by side.
172 | 
173 | .. code-block:: r
174 |     
175 |     df <- tbl(con, 'col_1') %>%
176 |       collect() %>%
177 |       mutate(time = as.Date(time))
178 | 
179 |     df %>% mutate(ndmi = (nir - swir1) / (nir + swir1)) %>%
180 |         ggplot(aes(time, ndmi)) +
181 |           geom_line() +
182 |           geom_point(aes(col = sensor)) +
183 |           facet_grid(site ~ ., scales = 'free') +
184 |           theme_bw()
185 | 
186 | .. image:: figs/ndmi_2sites.png
187 | 
188 | 
189 | Third example: Extract time-series for each feature of a polygon feature collection
190 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
191 | 
192 | The file ``cdmx_parks.gpkg`` contains contours of some of the parks of Mexico City. The example below extracts the spatially aggregated time-series for each of these parks, writes the results to a sqlite database and reads the data back into python for plotting.
193 | 
194 | .. plot::
195 |     :include-source:
196 | 
197 |     from geextract import ts_extract, relabel, date_append, dictlist2sqlite
198 |     import fiona
199 |     import pandas as pd
200 |     import seaborn as sns
201 |     import matplotlib.pyplot as plt
202 |     
203 |     import sqlite3
204 |     from datetime import datetime
205 |     import os
206 | 
207 |     try:
208 |         os.remove('/tmp/landsat_cdmx.sqlite')
209 |     except:
210 |         pass
211 | 
212 |     # Open feature collection generator
213 |     with fiona.open('data/cdmx_parks.gpkg', layer='parks') as src:
214 |         # Iterate over feature collection
215 |         for feature in src:
216 |             # Extract time-series
217 |             ts_0 = ts_extract(sensor='LC8', start=datetime(2012, 1, 1),
218 |                               feature=feature)
219 |             ts_1 = relabel(ts_0, 'LC8')
220 |             ts_2 = date_append(ts_1)
221 |             # Write dictionnary list to sqlite database table
222 |             dictlist2sqlite(ts_2, site=feature['properties']['name'],
223 |                             sensor='LC8', db_src='/tmp/landsat_cdmx.sqlite',
224 |                             table='cdmx')
225 | 
226 |     # REad the data back into a pandas dataframe
227 |     conn = sqlite3.connect("/tmp/landsat_cdmx.sqlite")
228 |     df = pd.read_sql_query('select * from cdmx', conn)
229 |     df['date'] = pd.to_datetime(df['time'], format='%Y-%m-%d')
230 |     df['ndvi'] = (df.nir - df.red) / (df.nir + df.red)
231 |     # Make facetgrid plot
232 |     g = sns.FacetGrid(df, row='site', aspect=4, size=2)
233 |     def dateplot(x, y, **kwargs):
234 |         ax = plt.gca()
235 |         data = kwargs.pop("data")
236 |         data.plot(x=x, y=y, ax=ax, grid=False, **kwargs)
237 |     g = g.map_dataframe(dateplot, "date", "ndvi")
238 |     plt.show()


--------------------------------------------------------------------------------
/geextract/__init__.py:
--------------------------------------------------------------------------------
  1 | """geextract"""
  2 | 
  3 | __version__ = "0.5.0"
  4 | 
  5 | import ee
  6 | import sqlite3
  7 | import pandas as pd
  8 | import re
  9 | from datetime import datetime
 10 | import warnings
 11 | 
 12 | # Silence pandas warning
 13 | warnings.simplefilter(action='ignore')
 14 | 
 15 | ee.Initialize()
 16 | 
 17 | 
 18 | BANDS_TO_COLORS = {'LT4': {'B1': 'blue',
 19 |                            'B2': 'green',
 20 |                            'B3': 'red',
 21 |                            'B4': 'nir',
 22 |                            'B5': 'swir1',
 23 |                            'B7': 'swir2',
 24 |                            'id': 'id'},
 25 |                    'LC8': {'B2': 'blue',
 26 |                            'B3': 'green',
 27 |                            'B4': 'red',
 28 |                            'B5': 'nir',
 29 |                            'B6': 'swir1',
 30 |                            'B7': 'swir2',
 31 |                            'id': 'id'}}
 32 | 
 33 | BANDS_TO_COLORS['LT5'] = BANDS_TO_COLORS['LT4']
 34 | BANDS_TO_COLORS['LE7'] = BANDS_TO_COLORS['LT4']
 35 | 
 36 | 
 37 | def get_date(filename):
 38 |     """Retriev date information from typical Landsat filenames
 39 | 
 40 |     Args:
 41 |         filename (str): Landsat file name
 42 | 
 43 |     Returns:
 44 |         datetime.date : The corresponding date of the filename.
 45 | 
 46 |     Examples:
 47 |         >>> import geextract
 48 |         >>> geextract.get_date('LC81970292013106')
 49 |     """
 50 |     p0 = re.compile(r'(?P<sensor>LC8|LE7|LT5|LT4)(?P<pathrow>\d{6})(?P<date>\d{7})')
 51 |     p1 = re.compile(r'(?P<sensor>LC08|LE07|LT04|LT05)_(?P<pathrow>\d{6})_(?P<date>\d{8})')
 52 |     if p0.search(filename):
 53 |         m = p0.search(filename)
 54 |         d = datetime.strptime(m.group('date'), '%Y%j').date()
 55 |     elif p1.search(filename):
 56 |         m = p1.search(filename)
 57 |         d = datetime.strptime(m.group('date'), '%Y%m%d').date()
 58 |     else:
 59 |         raise ValueError('Unknown pattern')
 60 |     return d
 61 | 
 62 | 
 63 | def ts_extract(sensor, start, tiers = ['T1', 'T2'], lon = None, lat = None,
 64 |                end = datetime.today(), radius = None, feature = None,
 65 |                bands = None, stats = 'mean'):
 66 |     """Perform a spatio temporal query to extract Landsat surface reflectance data
 67 |         from gee
 68 | 
 69 |     Args:
 70 |         lon (float): Center longitude in decimal degree
 71 |         lat (float): Center latitude in decimal degree
 72 |         sensor (str): Landsat sensor to query data from. Must be one of 'LT4',
 73 |             'LT5', 'LE7', 'LC8'
 74 |         tiers (list): List of tiers to order. ``'T1'`` corresponds to tiers 1.
 75 |             Default is ``['T1', 'T2']``
 76 |         start (datetime.datetime): Start date
 77 |         end (datetime.datetime): Optional end date; automatically set as today if unset
 78 |         radius (float): Optional radius around center point in meters. If unset,
 79 |             time-series of a single pixel are queried. Otherwise a reducer is used
 80 |             to spatially aggregate the pixels intersecting the circular feature
 81 |             built.
 82 |         feature (dict): Optional dictionary representation of a polygon feature
 83 |             in longlat CRS. If unset, time-series of a single pixel are queried.
 84 |             Otherwise a reducer is used to spatially aggregate the pixels intersecting
 85 |             the given feature.
 86 |         bands (list): List of Landsat band names. Optional, defaults to
 87 |             ['B2', 'B3', 'B4', 'B5', 'B6', 'B7'] in the case of LC8 sensor and to
 88 |             ['B1', 'B2', 'B3', 'B4', 'B5', 'B7'] otherwise.
 89 |         stats (str): Spatial aggregation function to use. Only relevant
 90 |             if a radius value is set.
 91 | 
 92 |     Returns:
 93 |         dict: A dictionary representation of the json data returned by the gee platform.
 94 | 
 95 |     Example:
 96 |         >>> import geextract
 97 |         >>> from pprint import pprint
 98 |         >>> from datetime import datetime
 99 | 
100 |         >>> lon = -89.8107197
101 |         >>> lat = 20.4159611
102 | 
103 |         >>> out = geextract.ts_extract(lon=lon, lat=lat, sensor='LE7', start=datetime(1980, 1, 1, 0, 0),
104 |         >>>                            radius=500)
105 |         >>> pprint(out)
106 | 
107 |     """
108 |     # Define some internal functions to be mapped over imageCollections
109 |     def _mask_clouds(image):
110 |         """Cloud masking function"""
111 |         # collection 1 cloud masking example
112 |         # https://code.earthengine.google.com/52e39cc00de3471c905509e374c52284
113 | 
114 |         # Pre collecction masking example
115 |         # https://code.earthengine.google.com/37ffd688d1b2d2c977fa5c536a023356
116 |         # collection must be a variable of the parent environment
117 |         clear = image.select('pixel_qa').bitwiseAnd(0x2).neq(0)
118 |         valid_range_mask = image.gte(0).And(image.lte(10000))
119 |         return image.updateMask(clear).updateMask(valid_range_mask)
120 | 
121 |     # Check inputs
122 |     if sensor not in ['LT4', 'LT5', 'LC8', 'LE7']:
123 |         raise ValueError('Unknown sensor (Must be one of LT4, LT5, LE7, LC8)')
124 |     if bands is None:
125 |         if sensor in ['LT4', 'LT5', 'LE7']:
126 |             bands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B7']
127 |         else:
128 |             bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7']
129 |     sensor = re.sub(r'(LC|LT|LE)(\d{1})', r'\g<1>0\g<2>', sensor)
130 |     collection_name_template = 'LANDSAT/%s/C01/%%s_SR' % sensor
131 |     # Iterate over tiers to load and merge all corresponding image collections
132 |     landsat_ic = ee.ImageCollection(collection_name_template % tiers[0])
133 |     for tier in tiers[1:]:
134 |         tier_ic = ee.ImageCollection(collection_name_template % tier)
135 |         landsat_ic = ee.ImageCollection(landsat_ic.merge(tier_ic))
136 |     # Prepare image collection
137 |     landsat = landsat_ic.\
138 |             filterDate(start=start, opt_end=end)\
139 |             .map(_mask_clouds)\
140 |             .select(bands)
141 |     if radius is not None or feature is not None:
142 |         # Define spatial aggregation function
143 |         if stats == 'mean':
144 |             fun = ee.Reducer.mean()
145 |         elif stats == 'median':
146 |             fun = ee.Reducer.median()
147 |         elif stats == 'max':
148 |             fun = ee.Reducer.max()
149 |         elif stats == 'min':
150 |             fun = ee.Reducer.min()
151 |         else:
152 |             raise ValueError('Unknown spatial aggregation function. Must be one of mean, median, max, or min')
153 | 
154 |         if feature is not None:
155 |             geometry = ee.Geometry.Polygon(feature['geometry']['coordinates'])
156 |         else: # Geometry defined by point and radius
157 |             geometry = ee.Geometry.Point(lon, lat).buffer(radius)
158 |         # Define function to map over imageCollection to perform spatial aggregation 
159 |         def _reduce_region(image):
160 |             """Spatial aggregation function for a single image and a polygon feature"""
161 |             stat_dict = image.reduceRegion(fun, geometry, 30);
162 |             # FEature needs to be rebuilt because the backend doesn't accept to map
163 |             # functions that return dictionaries
164 |             return ee.Feature(None, stat_dict)
165 |         fc = landsat.filterBounds(geometry).map(_reduce_region).getInfo()
166 |         out = simplify(fc)
167 |     else:
168 |         # Extraction with a point, no spatial aggregation, etc
169 |         geometry = ee.Geometry.Point(lon, lat)
170 |         l = landsat.filterBounds(geometry).getRegion(geometry, 30).getInfo()
171 |         out = dictify(l)
172 |         # pop longitude and lattitude keys from dict collection so that band aliases can
173 |         # be replaced by their color names
174 |         [d.pop('longitude', None) for d in out]
175 |         [d.pop('latitude', None) for d in out]
176 |         [d.pop('time', None) for d in out]
177 |     return out
178 | 
179 | 
180 | def simplify(fc):
181 |     """Take a feature collection, as returned by mapping a reducer to a ImageCollection,
182 |         and reshape it into a simpler list of dictionaries
183 | 
184 |     Args:
185 |         fc (dict): Dictionary representation of a feature collection, as returned
186 |             by mapping a reducer to an ImageCollection
187 | 
188 |     Returns:
189 |         list: A list of dictionaries.
190 | 
191 |     Examples:
192 |         >>> fc = {u'columns': {},
193 |         ...       u'features': [{u'geometry': None,
194 |         ...                      u'id': u'LC81970292013106',
195 |         ...                      u'properties': {u'B1': 651.8054424353023,
196 |         ...                                      u'B2': 676.6018246419446},
197 |         ...                      u'type': u'Feature'},
198 |         ...                     {u'geometry': None,
199 |         ...                      u'id': u'LC81970292013122',
200 |         ...                      u'properties': {u'B1': 176.99323997958842,
201 |         ...                                      u'B2': 235.83196553144882},
202 |         ...                      u'type': u'Feature'}]}
203 |         >>> simplify(fc)
204 |     """
205 |     def feature2dict(f):
206 |         id = f['id']
207 |         out = f['properties']
208 |         out.update(id=id)
209 |         return out
210 |     out = [feature2dict(x) for x in fc['features']]
211 |     return out
212 | 
213 | 
214 | def dictify(x):
215 |     """Build a list of dictionaries from a list of lists as returned by running
216 |         getRegion on an Image collection
217 | 
218 |     Args:
219 |         x (list): A list of list. First list element contain the keys while following
220 |             list elements contain values.
221 | 
222 |     Returns:
223 |         list: A list of dictionaries
224 | 
225 |     Examples:
226 |         >>> l = [[u'id', u'B1', u'B2', u'B3', u'B7'],
227 |         ...      [u'LC81970292013106', 649, 683, 910, 1365],
228 |         ...      [u'LC81970292013122', 140, 191, 521, 965]]
229 |         >>> dictify(l)
230 |     """
231 |     out = [dict(zip(x[0], values)) for values in x[1:]]
232 |     return out
233 | 
234 | 
235 | def relabel(dl, sensor):
236 |     """Rename the keys of each element of a list of dictionaries
237 | 
238 |     Args:
239 |         dl (list): List of dictionaries
240 |         sensor (str): Landsat sensor to which belong the data. Must be one of
241 |             'LT4', 'LT5', 'LE7' or 'LC8'
242 | 
243 |     Returns:
244 |         list: A list of dictionaries
245 |     """
246 |     def change_keys(d, dr):
247 |         return dict((dr[key], value) for (key, value) in d.items())
248 |     dl_out = [change_keys(d, BANDS_TO_COLORS[sensor]) for d in dl]
249 |     return dl_out
250 | 
251 | 
252 | def date_append(dl):
253 |     """Add time key to each element of a list of dictionaries
254 | 
255 |     Args:
256 |         dl (list): List of dictionaries, each dictionary should at least contain
257 |             the key 'id' in which a classic Landsat scene ID parsable by get_date
258 |             is stored.
259 | 
260 |     Returns:
261 |         list: A list of dictionaries
262 |     """
263 |     # Add time key to each dict of dl
264 |     for item in dl:
265 |         item.update(time = get_date(item['id']))
266 |     return dl
267 | 
268 | 
269 | def dictlist2sqlite(dl, site, sensor, db_src, table):
270 |     """Write a list of dictionaries to a sqlite database
271 | 
272 |     Args:
273 |         dl (list): List of dictionaries
274 |         db_src (str): Path an sqlite database (created in case it does not exist)
275 |         table (str): Name of database table to write data
276 | 
277 |     Returns:
278 |         This function is used for its side effect of writing data to a database;
279 |             it does not return anything
280 |     """
281 |     df = pd.DataFrame(dl)
282 |     # Drop any row that contain no-data
283 |     # TODO: Filter only row for which all bands are Nan
284 |     df2 = df.dropna(how='any')
285 |     df2['sensor'] = sensor
286 |     df2['site'] = site
287 |     con = sqlite3.connect(db_src)
288 |     df2.to_sql(name=table, con=con, if_exists='append')
289 | 
290 | 


--------------------------------------------------------------------------------