├── .gitignore
├── .travis.yml
├── HISTORY.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bld.bat
├── build.sh
├── examples
    ├── FY2015-summary-stats.ipynb
    ├── fetch_media
    │   ├── README.md
    │   ├── fetch_media.py
    │   ├── mediarecord_uuids_list.txt
    │   ├── query.txt
    │   ├── query_geo.txt
    │   ├── query_geo_bounding.txt
    │   └── record_uuids_list.txt
    └── recordset_indexed_modified_date_and_archive_link_status.py
├── idigbio
    ├── __init__.py
    ├── json_client.py
    ├── pandas_client.py
    └── util.py
├── meta.yaml
├── setup.cfg
├── setup.py
├── test-requirements.txt
└── tests
    ├── __init__.py
    ├── test_json_client.py
    ├── test_json_client_map.py
    └── test_pandas_client.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # emacs temp files
 2 | *~
 3 | \#*\#
 4 | 
 5 | 
 6 | 
 7 | # Created by https://www.gitignore.io/api/python
 8 | 
 9 | ### Python ###
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 | 
15 | # C extensions
16 | *.so
17 | 
18 | # Distribution / packaging
19 | .Python
20 | env/
21 | build/
22 | develop-eggs/
23 | dist/
24 | downloads/
25 | eggs/
26 | .eggs/
27 | lib/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | 
36 | # PyInstaller
37 | #  Usually these files are written by a python script from a template
38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 | 
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 | 
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *,cover
55 | .hypothesis/
56 | 
57 | # Translations
58 | *.mo
59 | *.pot
60 | 
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | 
65 | # Flask instance folder
66 | instance/
67 | 
68 | # Sphinx documentation
69 | docs/_build/
70 | 
71 | # PyBuilder
72 | target/
73 | 
74 | # IPython Notebook
75 | .ipynb_checkpoints
76 | 
77 | # pyenv
78 | .python-version
79 | 
80 | # Pythoscope
81 | .pythoscope/
82 | 
83 | /.dir-locals.el
84 | /README.html
85 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 | - '2.7'
 4 | - '3.4'
 5 | - '3.5'
 6 | install:
 7 | - pip install -r test-requirements.txt
 8 | - pip install .
 9 | script:
10 | - python -m unittest tests
11 | - cat README.rst HISTORY.rst | rst2html.py --strict - /dev/null
12 | deploy:
13 |   provider: pypi
14 |   user: idigbio
15 |   password:
16 |     secure: e/ltrIGHu9wZXmi+8XmIaFmyiYz2540UrL129+ezzWRKRTM5FhPguvRO+6epBNGL4+NYZ+BvBiOumC2SCTGzq3ASncCkaCmjAGh8NCPXXblDOkvMGA9qls9sjoJxBCCBhmcY6/QnPOQ59CiSPLIBxru5u2nf4R7peOrzZHJ5o88=
17 |   on:
18 |     tags: true
19 |     distributions: sdist bdist_wheel
20 |     repo: iDigBio/idigbio-python-client
21 | notifications:
22 |   slack:
23 |     secure: XIUC86fDwnlQjT44v+BXwnB2e+Xa+bY7n9mgo+YM7SnS/xdWMId/0aIRsiQAsbDRLPfnunEu/E8kpQFvRphK3jWv5t2fHnR+HJ/tHfPSralDTCtvAJX019KY0Bkz0xLgFSjtEvX2BeEHY+QRuA8LCRvWFlY2Mnfx7fFkqbWDPNY=
24 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
 1 | .. :changelog:
 2 | 
 3 | Release History
 4 | ---------------
 5 | 
 6 | 0.8.6 (TBD)
 7 | ++++++++++++++++++
 8 | 
 9 | **Changes**
10 | 
11 | - Drop support for Python 3.3 (following the lead of Pandas library)
12 | 
13 | 0.8.5 (2018-03-16)
14 | ++++++++++++++++++
15 | 
16 | **New**
17 | 
18 | - add debug command-line option
19 | 
20 | 0.8.4 (2017-06-07)
21 | ++++++++++++++++++
22 | 
23 | **New**
24 | 
25 | - add full-featured example script fetch_media.py to download media from iDigBio
26 | - add documentation for fetch_media
27 | 
28 | **Changes**
29 | 
30 | - remove fetch_media_based_on_query.py which is superceded by fetch_media.py
31 | 
32 | 0.8.3.3 (2017-05-17)
33 | ++++++++++++++++++++
34 | 
35 | **New**
36 | 
37 | - add an example to examples directory to download media based on search query
38 | 
39 | **Changes**
40 | 
41 | - minor changes to documentation, unit tests
42 | - remove hard-coded path to tmp directory
43 | 
44 | 0.8.2 (2017-05-10)
45 | ++++++++++++++++++
46 | 
47 | **New**
48 | 
49 | - count_recordsets() function returns number of recordsets in iDigBio
50 | 
51 | 
52 | 0.8.1 (2016-08-29)
53 | ++++++++++++++++++
54 | 
55 | - Send etag with file on upload to verify correctness
56 | 
57 | 0.6.1 (2016-04-08)
58 | ++++++++++++++++++
59 | 
60 | **Changes**
61 | 
62 | - Add media_type to upload functionality.
63 | 
64 | 0.6.0 (2016-03-30)
65 | ++++++++++++++++++
66 | 
67 | **Changes**
68 | 
69 | - Make pandas an extra requirements, update docs
70 | 
71 | **New**
72 | 
73 | - Specify auth for api backend
74 | - Upload image capability (requires auth)
75 | 
76 | 
77 | 
78 | 0.5.0 (2016-02-24)
79 | ++++++++++++++++++
80 | 
81 | **Changes**
82 | 
83 | - Don't exclude ``data.*`` fields if requested specifically
84 | - Fix ``stats`` and ``datehist`` api calls to respect parameters;
85 |   param names changed to use python style and match server params.
86 | 
87 | 
88 | 0.4.3 (2016-02-23)
89 | ++++++++++++++++++
90 | 
91 | **Bugfixes**
92 | 
93 | - no results no longer errs in the pandas client.
94 | - limit correctly limits to specified record, not next larger batch size
95 | 
96 | **Miscellaneous**
97 | 
98 | - Clarify targetted python versions
99 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 iDigBio
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst LICENSE
2 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | idigbio-python-client
  2 | =====================
  3 | 
  4 | .. image:: https://img.shields.io/pypi/v/idigbio.svg
  5 |     :target: https://pypi.python.org/pypi/idigbio
  6 | 
  7 | .. image:: https://img.shields.io/travis/iDigBio/idigbio-python-client.svg
  8 |         :target: https://travis-ci.com/iDigBio/idigbio-python-client
  9 | 
 10 | A python client for the `iDigBio <https://www.idigbio.org/>`_ iDigBio v2 API.
 11 | 
 12 | Installation
 13 | ------------
 14 | 
 15 | .. code-block::
 16 | 
 17 |     pip install idigbio
 18 | 
 19 | If you want to use the Pandas Data Frame interface you need to install
 20 | pandas as well.
 21 | 
 22 | .. code-block::
 23 | 
 24 |     pip install idigbio pandas
 25 | 
 26 | If you see InsecurePlatformWarning or have other SSL certificate verification issues, you may 
 27 | wish to install urllib3 with the secure extra.
 28 | 
 29 | .. code-block::
 30 | 
 31 |     pip install urllib3[secure]
 32 | 
 33 | 
 34 | Basic Usage
 35 | -----------
 36 | 
 37 | Returning JSON from the API.
 38 | 
 39 | .. code-block:: python
 40 | 
 41 |     import idigbio
 42 |     api = idigbio.json()
 43 |     json_output = api.search_records()
 44 | 
 45 | Returning a Pandas Data Frame.
 46 | 
 47 | .. code-block:: python
 48 | 
 49 |     import idigbio
 50 |     api = idigbio.pandas()
 51 |     pandas_output = api.search_records()
 52 | 
 53 | See the `Search API docs
 54 | <https://github.com/idigbio/idigbio-search-api/wiki>`_ for info about
 55 | the endpoint parameters.
 56 | 
 57 | 
 58 | Examples
 59 | ++++++++
 60 | 
 61 | View a Record By UUID
 62 | 
 63 | .. code-block:: python
 64 | 
 65 |     import idigbio
 66 |     api = idigbio.json()
 67 |     record = api.view("records","1db58713-1c7f-4838-802d-be784e444c4a")
 68 | 
 69 | Search for a Record by scientific name
 70 | 
 71 | .. code-block:: python
 72 | 
 73 |     import idigbio
 74 |     api = idigbio.json()
 75 |     record_list = api.search_records(rq={"scientificname": "puma concolor"})
 76 | 
 77 | Search for Records that have images
 78 | 
 79 | .. code-block:: python
 80 | 
 81 |     import idigbio
 82 |     api = idigbio.json()
 83 |     record_list = api.search_records(rq={"scientificname": "puma concolor", "hasImage": True})
 84 | 
 85 | Search for a MediaRecords by record property
 86 | 
 87 | .. code-block:: python
 88 | 
 89 |     import idigbio
 90 |     api = idigbio.json()
 91 |     mediarecord_list = api.search_media(rq={"scientificname": "puma concolor", "hasImage": True})
 92 | 
 93 | Create a heat map for a genus
 94 | 
 95 | .. code-block:: python
 96 | 
 97 |     import idigbio
 98 |     api = idigbio.json()
 99 |     m = api.create_map(rq={"genus": "acer"}, t="geohash")
100 |     m.save_map_image("acer_map_geohash", 2)
101 | 
102 | Create a point map for a genus
103 | 
104 | .. code-block:: python
105 | 
106 |     import idigbio
107 |     api = idigbio.json()
108 |     m = api.create_map(rq={"genus": "acer"}, t="points")
109 |     m.save_map_image("acer_map_points", 2)
110 | 
111 | Create a zoomed in point map for a bounding box
112 | 
113 | .. code-block:: python
114 | 
115 |     import idigbio
116 |     api = idigbio.json()
117 |     bbox = {"type": "geo_bounding_box", "bottom_right": {"lat": 29.642979999999998, "lon": -82.00}, "top_left": {"lat": 29.66298, "lon": -82.35315800000001}}
118 |     m = api.create_map(
119 |         rq={"geopoint": bbox}
120 |     )
121 |     m.save_map_image("test.png", None, bbox=bbox)
122 | 
123 | 
124 | Create a summary of kingdom and phylum data
125 | 
126 | .. code-block:: python
127 | 
128 |     import idigbio
129 |     api = idigbio.json()
130 |     summary_data = api.top_records(fields=["kingdom", "phylum"])
131 | 
132 | Get the number of Records for a search by scientific name
133 | 
134 | .. code-block:: python
135 | 
136 |     import idigbio
137 |     api = idigbio.json()
138 |     count = api.count_records(rq={"scientificname": "puma concolor"})
139 | 
140 | Get the number of MediaRecords for a search by scientific name
141 | 
142 | .. code-block:: python
143 | 
144 |     import idigbio
145 |     api = idigbio.json()
146 |     count = api.count_media(rq={"scientificname": "puma concolor"})
147 | 
148 | Get the histogram of Collection Dates for a search by record property, for the last 10 years
149 | 
150 | .. code-block:: python
151 | 
152 |     import idigbio
153 |     api = idigbio.json()
154 |     histogram_data = api.datehist(
155 |         rq={"scientificname": "puma concolor"},
156 |         top_fields=["institutioncode"], min_date="2005-01-01")
157 | 
158 | Development
159 | +++++++++++
160 | 
161 | To contribute code to this project, please submit a pull request to the repo on github:
162 | 
163 | https://github.com/idigbio/idigbio-python-client/
164 | 
165 | To set up a development environment, run the following from inside a python virtual environment
166 | in your local repo directory:
167 | 
168 | .. code-block::
169 | 
170 |     pip install -e .
171 | 
172 | 


--------------------------------------------------------------------------------
/bld.bat:
--------------------------------------------------------------------------------
1 | "%PYTHON%" setup.py install 
2 | if errorlevel 1 exit 1
3 | 
4 | :: Add more build steps here, if they are necessary.
5 | 
6 | :: See
7 | :: http://docs.continuum.io/conda/build.html
8 | :: for a list of environment variables that are set during the build process.
9 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | $PYTHON setup.py install 
 4 | 
 5 | # Add more build steps here, if they are necessary.
 6 | 
 7 | # See
 8 | # http://docs.continuum.io/conda/build.html
 9 | # for a list of environment variables that are set during the build process.
10 | 


--------------------------------------------------------------------------------
/examples/fetch_media/README.md:
--------------------------------------------------------------------------------
  1 | # fetch_media.py
  2 | 
  3 | An example script that leverages the iDigBio search API to download media that match a query.
  4 | 
  5 | ## Usage
  6 | 
  7 | ```
  8 | $ python fetch_media.py --help
  9 | usage: fetch_media.py [-h] [-m MAX] [-s {thumbnail,webview,fullsize}]
 10 |                       [-o OUTPUT_DIR]
 11 |                       (-q QUERY | --query-file QUERY_FILE | --records-uuids-file RECORDS_UUIDS_FILE | --mediarecords-uuids-file MEDIARECORDS_UUIDS_FILE)
 12 | 
 13 |     This script will download media that are associated with the specimens
 14 |     returned by an iDigBio specimen record search query.
 15 | 
 16 |     The iDigBio Query Format is documented at
 17 | 
 18 |       https://github.com/idigbio/idigbio-search-api/wiki/Query-Format
 19 | 
 20 |     Notes on the --output-dir / -o parameter:
 21 | 
 22 |         If the specified output directory does not exist, it will be created.
 23 |         Omitting this parameter will cause a new directory to be created
 24 |         under the current directory, named in a timestamp-like style.
 25 | 
 26 |     ### Sample ###
 27 | 
 28 |     $ python fetch_media.py -o /tmp/idigbio_media_downloads -m 5 -q '{"genus": "acer"}'
 29 |     <snip>
 30 |     DOWNLOADING FINISHED with 5 successes and 0 failures
 31 | 
 32 |     Media downloads are in output directory: '/tmp/idigbio_media_downloads'
 33 | 
 34 |     $ ls -l /tmp/idigbio_media_downloads
 35 |     total 604
 36 |     -rw-rw-r-- 1 dstoner dstoner  93767 Jun  6 09:19 0c9b4669-edaa-467d-b240-f3311c764c04_webview.jpg
 37 |     -rw-rw-r-- 1 dstoner dstoner 114132 Jun  6 09:19 1f2dbb2b-75ba-48cb-b34c-1ca003b4a38d_webview.jpg
 38 |     -rw-rw-r-- 1 dstoner dstoner 147900 Jun  6 09:19 56f84bfe-5095-4fbb-b9e0-08cef3fdb448_webview.jpg
 39 |     -rw-rw-r-- 1 dstoner dstoner 117882 Jun  6 09:19 6a0d0c92-d2be-4ae5-9fef-60453778b0f0_webview.jpg
 40 |     -rw-rw-r-- 1 dstoner dstoner 136202 Jun  6 09:19 b98b9704-5ac5-4b53-b74d-d2d4d7d46ddd_webview.jpg
 41 |     ###
 42 | 
 43 |     The media record for the first download above would be viewable in the iDigBio portal at
 44 |     https://www.idigbio.org/portal/mediarecords/0c9b4669-edaa-467d-b240-f3311c764c04
 45 | 
 46 | optional arguments:
 47 |   -h, --help            show this help message and exit
 48 |   -m MAX, --max MAX     Maximum number of records to be returned from search
 49 |                         query. Default: 100, Maximum allowed value: 100000
 50 |   -s {thumbnail,webview,fullsize}, --size {thumbnail,webview,fullsize}
 51 |                         Size of derivative to download. Default: 'webview'
 52 |   -o OUTPUT_DIR, --output-dir OUTPUT_DIR
 53 |                         Directory path for downloaded media files. Default: a
 54 |                         new directory will be created under current directory
 55 |   -q QUERY, --query QUERY
 56 |                         query in iDigBio Query Format.
 57 |   --query-file QUERY_FILE
 58 |                         file path containing query string in iDigBio Query
 59 |                         Format
 60 |   --records-uuids-file RECORDS_UUIDS_FILE
 61 |                         file path containing list of iDigBio record uuids, one
 62 |                         per line
 63 |   --mediarecords-uuids-file MEDIARECORDS_UUIDS_FILE
 64 |                         file path containing list of iDigBio mediarecord
 65 |                         uuids, one per line
 66 | 
 67 | ```
 68 | 
 69 | ## Examples
 70 | 
 71 | Some of these example queries are taken directly from the iDigBio Query Format portion of the Search API documentation:
 72 | 
 73 | https://github.com/idigbio/idigbio-search-api/wiki/Query-Format
 74 | 
 75 | 
 76 | ### Specify a query on the command-line
 77 | 
 78 | Different operating systems display different behaviors when dealing with quotes and curly braces.
 79 | 
 80 | On Unix-like operating systems is best to wrap the query string in single quotes to protect the contents from shell interpretation.
 81 | 
 82 | On Windows, the query string cannot be wrapped in single quotes and double-quotes must be escaped with a backslash.
 83 | 
 84 | In many cases it may be best to specify the query in a text file and use the --query-file option instead of -q / --query.
 85 | 
 86 | ```
 87 | $ python fetch_media.py -q '{"scientificname":"isotelus maximus"}'
 88 | 
 89 | Using query:
 90 | 
 91 | {"scientificname":"isotelus maximus"}
 92 | 
 93 | OPERATING PARAMETERS...
 94 | 
 95 | Maximum number of media to fetch: 100
 96 | Media derivative size: webview
 97 | Output directory: /home/dstoner/git/idigbio-python-client/examples/fetch_media/20170607094735.99
 98 | Query Type: rq
 99 | 
100 | EXECUTING SEARCH QUERY...
101 | 
102 | 
103 | Search query produced 10 results.
104 | 
105 | 
106 | BEGINNING DOWNLOADS NOW...
107 | 
108 | Downloading: 'https://api.idigbio.org/v2/media/267e2624-641f-4e34-9fdc-df59b14a5571?size=webview'
109 | Downloading: 'https://api.idigbio.org/v2/media/597f3eba-e40a-411f-af3e-5e6bb2d77c5c?size=webview'
110 | Downloading: 'https://api.idigbio.org/v2/media/f610a25a-ea1f-4f8b-9905-68523ff9e876?size=webview'
111 | Downloading: 'https://api.idigbio.org/v2/media/9ceb0644-03ea-47cb-9b58-bbb8ffd22a5b?size=webview'
112 | Downloading: 'https://api.idigbio.org/v2/media/c4aa9d24-8284-4207-8df1-294cbd80f634?size=webview'
113 | Downloading: 'https://api.idigbio.org/v2/media/1db5c01d-a54f-4049-b4cd-ffceda60a920?size=webview'
114 | Downloading: 'https://api.idigbio.org/v2/media/3e20720a-2f1b-4891-9d00-80028a3222b4?size=webview'
115 | Downloading: 'https://api.idigbio.org/v2/media/efd6753c-f276-4836-a05a-8771bd934ee5?size=webview'
116 | Downloading: 'https://api.idigbio.org/v2/media/c1bedf6b-2ddc-418f-aa9f-a4e69ec811fc?size=webview'
117 | Downloading: 'https://api.idigbio.org/v2/media/d29d364e-72f2-407c-9c81-428e14c7a2c3?size=webview'
118 | 
119 | DOWNLOADING FINISHED with 10 successes and 0 failures
120 | 
121 | Media downloads are in output directory: '/home/dstoner/git/idigbio-python-client/examples/fetch_media/20170607094735.99'
122 | 
123 | ```
124 | 
125 | ### Use a query specified in a file
126 | 
127 | ```
128 | $ cat query.txt
129 | {
130 |   "scientificname": "Anastrepha pallens Coquillett, 1904"
131 | }
132 | 
133 | 
134 | $ python fetch_media.py --query-file query.txt
135 | 
136 | Using query:
137 | 
138 | {
139 |   "scientificname": "Anastrepha pallens Coquillett, 1904"
140 | }
141 | 
142 | 
143 | 
144 | OPERATING PARAMETERS...
145 | 
146 | Maximum number of media to fetch: 100
147 | Media derivative size: webview
148 | Output directory: /home/dstoner/git/idigbio-python-client/examples/fetch_media/20170607095607.19
149 | Query Type: rq
150 | 
151 | EXECUTING SEARCH QUERY...
152 | 
153 | 
154 | Search query produced 7 results.
155 | 
156 | 
157 | BEGINNING DOWNLOADS NOW...
158 | 
159 | Downloading: 'https://api.idigbio.org/v2/media/5cf7837c-7535-4263-a9a9-cfcf3a45b251?size=webview'
160 | Downloading: 'https://api.idigbio.org/v2/media/cd4fa6ce-95d3-4445-8733-75a6908944d8?size=webview'
161 | Downloading: 'https://api.idigbio.org/v2/media/ba7322f1-6468-4739-be87-a98ef8eb8bfc?size=webview'
162 | Downloading: 'https://api.idigbio.org/v2/media/0d0e07fa-9e86-4b71-8abf-140d163f9c16?size=webview'
163 | Downloading: 'https://api.idigbio.org/v2/media/8a0229f9-0b58-4017-af6e-f55121c28cab?size=webview'
164 | Downloading: 'https://api.idigbio.org/v2/media/500ba0ee-2e70-46ea-b80f-9e5a29753923?size=webview'
165 | Downloading: 'https://api.idigbio.org/v2/media/fbc36e25-16db-4828-a4c9-98049f0663fc?size=webview'
166 | 
167 | DOWNLOADING FINISHED with 7 successes and 0 failures
168 | 
169 | Media downloads are in output directory: '/home/dstoner/git/idigbio-python-client/examples/fetch_media/20170607095607.19'
170 | 
171 | ```
172 | 
173 | 
174 | ### Searching within a radius around a geopoint
175 | 
176 | In addition to specifying a query file, the following command limits the number of media to 5 and specifies an output directory.
177 | 
178 | ```
179 | $ python fetch_media.py -m 5 --query-file query_geo.txt -o /tmp/idigbio_media_downloads
180 | 
181 | Using query:
182 | 
183 | {
184 |   "geopoint": {
185 |     "type": "geo_distance",
186 |     "distance": "100km",
187 |     "lat": -41.1119,
188 |     "lon": 145.323
189 |   }
190 | }
191 | 
192 | 
193 | OPERATING PARAMETERS...
194 | 
195 | Maximum number of media to fetch: 5
196 | Media derivative size: webview
197 | Output directory: /tmp/idigbio_media_downloads
198 | Query Type: rq
199 | 
200 | EXECUTING SEARCH QUERY...
201 | 
202 | 
203 | Search query produced 588 results.
204 | 
205 | *** WARNING: search query produced more results than the designated maximum number of media to fetch.
206 | *** Use the -m or --max parameter to increase the maximum number of media to fetch.
207 | 
208 | BEGINNING DOWNLOADS NOW...
209 | 
210 | Downloading: 'https://api.idigbio.org/v2/media/63d218ad-4788-45ef-a11d-6d5ae75e9c19?size=webview'
211 | Downloading: 'https://api.idigbio.org/v2/media/81769eba-4c23-4dd7-8d1f-40a57d0cee94?size=webview'
212 | Downloading: 'https://api.idigbio.org/v2/media/90979c0b-1807-42c8-9180-cbc95a696d0a?size=webview'
213 | Downloading: 'https://api.idigbio.org/v2/media/9d6efc2a-ffec-4866-b3fc-2f0c7d3340d1?size=webview'
214 | Downloading: 'https://api.idigbio.org/v2/media/a01348b0-bed7-447d-982d-2e946db7ac5b?size=webview'
215 | 
216 | DOWNLOADING FINISHED with 5 successes and 0 failures
217 | 
218 | Media downloads are in output directory: '/tmp/idigbio_media_downloads'
219 | 
220 | ```
221 | 
222 | 
223 | ### Specify a geo bounding box
224 | 
225 | ```
226 | $ cat query_geo_bounding.txt 
227 | {
228 |   "geopoint": {
229 |     "type": "geo_bounding_box",
230 |     "top_left": {
231 |       "lat": 19.23,
232 |       "lon": -130
233 |     },
234 |     "bottom_right": {
235 |       "lat": -45.1119,
236 |       "lon": 179.99999
237 |     }
238 |   }
239 | }
240 | ```
241 | 
242 | In addition to specifying a query file, the following command limits the number of media to 5 and specifies an output directory.
243 | 
244 | 
245 | ```
246 | $ python fetch_media.py -m 5 --query-file query_geo_bounding.txt  -o /tmp/idigbio_media_downloads
247 | 
248 | Using query:
249 | 
250 | {
251 |   "geopoint": {
252 |     "type": "geo_bounding_box",
253 |     "top_left": {
254 |       "lat": 19.23,
255 |       "lon": -130
256 |     },
257 |     "bottom_right": {
258 |       "lat": -45.1119,
259 |       "lon": 179.99999
260 |     }
261 |   }
262 | }
263 | 
264 | 
265 | OPERATING PARAMETERS...
266 | 
267 | Maximum number of media to fetch: 5
268 | Media derivative size: webview
269 | Output directory: /tmp/idigbio_media_downloads
270 | Query Type: rq
271 | 
272 | EXECUTING SEARCH QUERY...
273 | 
274 | 
275 | Search query produced 1260449 results.
276 | 
277 | *** WARNING: search query produced more results than the designated maximum number of media to fetch.
278 | *** Use the -m or --max parameter to increase the maximum number of media to fetch.
279 | 
280 | BEGINNING DOWNLOADS NOW...
281 | 
282 | Downloading: 'https://api.idigbio.org/v2/media/3a12b56f-70fd-4f14-aa9f-feead4aa4a9d?size=webview'
283 | Downloading: 'https://api.idigbio.org/v2/media/3b94d07c-31d9-42bb-b31c-708c20ff56f0?size=webview'
284 | Downloading: 'https://api.idigbio.org/v2/media/3bb22506-bcd9-4a56-bcd2-94d4b3cdfd46?size=webview'
285 | Downloading: 'https://api.idigbio.org/v2/media/3c1ae3e3-3df0-43cb-9864-c0b34f41e491?size=webview'
286 | Downloading: 'https://api.idigbio.org/v2/media/3c663c42-2f7e-435d-89eb-39e35961f0ed?size=webview'
287 | 
288 | DOWNLOADING FINISHED with 5 successes and 0 failures
289 | 
290 | Media downloads are in output directory: '/tmp/idigbio_media_downloads'
291 | 
292 | ```
293 | 
294 | ### Specify a query based on a list of uuids
295 | 
296 | If you have already processed a list of downloaded iDigBio records and have a list of record
297 | or mediarecord uuids, those uuids can be placed in a text file, one per line, and fetch_media
298 | can download the associated media.
299 | 
300 | #### iDigBio record uuids
301 | 
302 | Note that in this case the records have more than one media associated with them so we end up with more than 3 images after specifying only 3 record uuids.
303 | 
304 | ```
305 | $ cat record_uuids_list.txt 
306 | a494a2a6-b64b-4f99-b26c-53bfdcd54876
307 | ddc56589-7009-4fe6-81d8-d9c9219a503f
308 | 9f7f4ba7-0def-4b01-b806-9089dcb7382c
309 | 
310 | $ python fetch_media.py --records-uuids-file record_uuids_list.txt -o /tmp/idigbio_media_downloads
311 | 
312 | Using query:
313 | 
314 | {"uuid":["a494a2a6-b64b-4f99-b26c-53bfdcd54876", "ddc56589-7009-4fe6-81d8-d9c9219a503f", "9f7f4ba7-0def-4b01-b806-9089dcb7382c"]}
315 | 
316 | OPERATING PARAMETERS...
317 | 
318 | Maximum number of media to fetch: 100
319 | Media derivative size: webview
320 | Output directory: /tmp/idigbio_media_downloads
321 | Query Type: rq
322 | 
323 | EXECUTING SEARCH QUERY...
324 | 
325 | 
326 | Search query produced 6 results.
327 | 
328 | 
329 | BEGINNING DOWNLOADS NOW...
330 | 
331 | Downloading: 'https://api.idigbio.org/v2/media/24be5c10-9b1d-418f-85d4-f13b52e9644e?size=webview'
332 | Downloading: 'https://api.idigbio.org/v2/media/6976b7a3-1547-49a7-8601-febfb90d5e44?size=webview'
333 | Downloading: 'https://api.idigbio.org/v2/media/8fc71122-9fc9-4d5c-8bb2-17a315847f9c?size=webview'
334 | Downloading: 'https://api.idigbio.org/v2/media/88e21956-702d-4e1a-ba71-0d695159b9a9?size=webview'
335 | Downloading: 'https://api.idigbio.org/v2/media/b7cf0d3b-be0f-4d47-9361-0ef0521df28f?size=webview'
336 | Downloading: 'https://api.idigbio.org/v2/media/fbc3237a-1816-4cee-8025-c364d37280d4?size=webview'
337 | 
338 | DOWNLOADING FINISHED with 6 successes and 0 failures
339 | 
340 | Media downloads are in output directory: '/tmp/idigbio_media_downloads'
341 | 
342 | ```
343 | 
344 | #### iDigBio mediarecord uuids
345 | 
346 | ```
347 | $ python fetch_media.py --mediarecords-uuids-file mediarecord_uuids_list.txt -o /tmp/idigbio_media_downloads
348 | 
349 | Using query:
350 | 
351 | {"uuid":["787d60f7-3fb7-4b82-8846-b5b4123761c1", "9c84908f-170f-44eb-ad6d-6d3fec5032a6", "845f80e8-02d7-49dd-aef7-fc58cec36c89"]}
352 | 
353 | OPERATING PARAMETERS...
354 | 
355 | Maximum number of media to fetch: 100
356 | Media derivative size: webview
357 | Output directory: /tmp/idigbio_media_downloads
358 | Query Type: mq
359 | 
360 | EXECUTING SEARCH QUERY...
361 | 
362 | 
363 | Search query produced 3 results.
364 | 
365 | 
366 | BEGINNING DOWNLOADS NOW...
367 | 
368 | Downloading: 'https://api.idigbio.org/v2/media/845f80e8-02d7-49dd-aef7-fc58cec36c89?size=webview'
369 | Downloading: 'https://api.idigbio.org/v2/media/9c84908f-170f-44eb-ad6d-6d3fec5032a6?size=webview'
370 | Downloading: 'https://api.idigbio.org/v2/media/787d60f7-3fb7-4b82-8846-b5b4123761c1?size=webview'
371 | 
372 | DOWNLOADING FINISHED with 3 successes and 0 failures
373 | 
374 | Media downloads are in output directory: '/tmp/idigbio_media_downloads'
375 | 
376 | 
377 | ```


--------------------------------------------------------------------------------
/examples/fetch_media/fetch_media.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | try:
  3 |     from idigbio.json_client import iDbApiJson
  4 |     import requests
  5 |     import shutil
  6 |     import os
  7 |     import sys
  8 |     import time
  9 |     import argparse
 10 |     import json
 11 | except ImportError as e:
 12 |     print ("IMPORT ERROR (This exception is likely caused by a missing module): '{0}'".format(e))
 13 |     raise SystemExit
 14 | 
 15 | help_blob = """
 16 | 
 17 |     This script will download media that are associated with the specimens
 18 |     returned by an iDigBio specimen record search query.
 19 | 
 20 |     The iDigBio Query Format is documented at
 21 | 
 22 |       https://github.com/idigbio/idigbio-search-api/wiki/Query-Format
 23 | 
 24 |     Notes on the --output-dir / -o parameter:
 25 | 
 26 |         If the specified output directory does not exist, it will be created.
 27 |         Omitting this parameter will cause a new directory to be created
 28 |         under the current directory, named in a timestamp-like style.
 29 | 
 30 |     ### Sample ###
 31 | 
 32 |     $ python fetch_media.py -o /tmp/idigbio_media_downloads -m 5 -q '{"genus": "acer"}'
 33 |     <snip>
 34 |     DOWNLOADING FINISHED with 5 successes and 0 failures
 35 | 
 36 |     Media downloads are in output directory: '/tmp/idigbio_media_downloads'
 37 | 
 38 |     $ ls -l /tmp/idigbio_media_downloads
 39 |     total 604
 40 |     -rw-rw-r-- 1 dstoner dstoner  93767 Jun  6 09:19 0c9b4669-edaa-467d-b240-f3311c764c04_webview.jpg
 41 |     -rw-rw-r-- 1 dstoner dstoner 114132 Jun  6 09:19 1f2dbb2b-75ba-48cb-b34c-1ca003b4a38d_webview.jpg
 42 |     -rw-rw-r-- 1 dstoner dstoner 147900 Jun  6 09:19 56f84bfe-5095-4fbb-b9e0-08cef3fdb448_webview.jpg
 43 |     -rw-rw-r-- 1 dstoner dstoner 117882 Jun  6 09:19 6a0d0c92-d2be-4ae5-9fef-60453778b0f0_webview.jpg
 44 |     -rw-rw-r-- 1 dstoner dstoner 136202 Jun  6 09:19 b98b9704-5ac5-4b53-b74d-d2d4d7d46ddd_webview.jpg
 45 |     ###
 46 | 
 47 |     The media record for the first download above would be viewable in the iDigBio portal at
 48 |     https://www.idigbio.org/portal/mediarecords/0c9b4669-edaa-467d-b240-f3311c764c04
 49 | 
 50 | """
 51 | 
 52 | # MAX_MAX_COUNT is a safety limit to keep an erroneous query from downloading all of iDigBio's media.
 53 | # Change this value if you are legitimately trying to download more than 100k media.
 54 | # Also, please consider letting us know that you are doing this because we are interested
 55 | # in these kinds of use cases.   idigbio@acis.ufl.edu
 56 | MAX_MAX_COUNT = 100000
 57 | 
 58 | DEFAULT_MAX_COUNT = 100
 59 | SIZES = ["thumbnail", "webview", "fullsize"]
 60 | DEFAULT_SIZE = "webview"
 61 | DEFAULT_OUTPUT_DIR = None
 62 | 
 63 | argparser = argparse.ArgumentParser(description=help_blob, formatter_class=argparse.RawDescriptionHelpFormatter)
 64 | argparser.add_argument("-m", "--max", type=int, default=DEFAULT_MAX_COUNT,
 65 |                        help="Maximum number of records to be returned from search query. Default: {0}, Maximum allowed value: {1}".format(DEFAULT_MAX_COUNT,MAX_MAX_COUNT))
 66 | argparser.add_argument("-s", "--size", choices=SIZES, default=DEFAULT_SIZE, 
 67 |                        help="Size of derivative to download. Default: '{0}'".format(DEFAULT_SIZE))
 68 | argparser.add_argument("-o", "--output-dir", default=DEFAULT_OUTPUT_DIR, 
 69 |                        help="Directory path for downloaded media files. Default: a new directory will be created under current directory")
 70 | argparser.add_argument("-d", "--debug", default=False, action='store_true',
 71 |                        help="enable debugging output")
 72 | arg_group = argparser.add_mutually_exclusive_group(required=True)
 73 | arg_group.add_argument("-q", "--query", 
 74 |                        help="query in iDigBio Query Format.")
 75 | arg_group.add_argument("--query-file",
 76 |                        help="file path containing query string in iDigBio Query Format")
 77 | arg_group.add_argument("--records-uuids-file",
 78 |                        help="file path containing list of iDigBio record uuids, one per line")
 79 | arg_group.add_argument("--mediarecords-uuids-file",
 80 |                        help="file path containing list of iDigBio mediarecord uuids, one per line")
 81 | args = argparser.parse_args()
 82 | 
 83 | MAX_RESULTS = max(0,(min(args.max, MAX_MAX_COUNT)))
 84 | SIZE = args.size
 85 | 
 86 | output_directory = args.output_dir
 87 | 
 88 | QUERY_TYPE = 'rq'
 89 | 
 90 | debug_flag = args.debug
 91 | if debug_flag:
 92 |     print ()
 93 |     print ("** DEBUGGING ENABLED **")
 94 |     print ()
 95 |     print ()
 96 |     modulenames = set(sys.modules)&set(globals())
 97 |     allmodules = [sys.modules[name] for name in modulenames]
 98 |     print ("Loaded modules...")
 99 |     for each_mod in allmodules:
100 |         print (each_mod)
101 |     print ()
102 | 
103 | def read_query_file(query_filename):
104 |     if os.path.isfile(query_filename):
105 |         with open(query_filename, 'r') as queryfile:
106 |             q = queryfile.read()
107 |             return q
108 |     else:
109 |         print ("*** Error: query file could not be read or does not exist.")
110 |         raise SystemExit
111 | 
112 | def get_query_from_uuids_list_file(uuids_file):
113 |     uuids_from_file = []
114 |     with open(uuids_file) as uf:
115 |         for line in uf:
116 |             uuids_from_file.append(line.strip())
117 |     
118 |     q = '{"uuid":'
119 |     q += json.dumps(uuids_from_file)
120 |     q += '}'
121 |     return q
122 | 
123 | query = None
124 | 
125 | if args.query:
126 |     # use the query as supplied on the command line
127 |     query = args.query
128 | if args.query_file:
129 |     # use the query as supplied in a file
130 |     query = read_query_file(args.query_file)
131 | if args.records_uuids_file:
132 |     # generate a query from a list of record uuids
133 |     query = get_query_from_uuids_list_file(args.records_uuids_file)
134 | if args.mediarecords_uuids_file:
135 |     # generate a query from a list of mediarecord uuids
136 |     query = get_query_from_uuids_list_file(args.mediarecords_uuids_file)
137 |     QUERY_TYPE = 'mq'
138 | 
139 | # Verify that the provided query string is valid JSON
140 | if query is None:
141 |     print ("*** ERROR! Query source is empty or unusable.")
142 | else:
143 |     try:
144 |         query_json = json.loads(query)
145 |     except Exception as e:
146 |         print ('*** FATAL ERROR parsing query string:')
147 |         print (e)
148 |         print ('*** Supplied query string:')
149 |         print (query)
150 |         raise SystemExit
151 | 
152 | 
153 | # The following should work whether one has specified an existing directory name, created a new directory by name,
154 | # or left the output_directory unspecified.
155 | if output_directory is None:
156 |     now_ms = str(time.time())
157 |     output_directory = time.strftime("%Y%m%d%H%M%S") + "." + str(time.time()).rsplit('.')[ len(now_ms.rsplit('.')) - 1]
158 |     try:
159 |         os.makedirs(output_directory)
160 |     except:
161 |         print ("*** ERROR! Could not create directroy for output: '{0}'".format(os.path.abspath(output_directory)))
162 |         raise SystemExit
163 | else:
164 |     if not os.path.exists(output_directory):
165 |         try:
166 |             os.makedirs(output_directory)              
167 |         except:
168 |             print ("*** ERROR! Could not create directroy for output: '{0}'".format(os.path.abspath(output_directory)))
169 |             raise SystemExit
170 | 
171 | 
172 | 
173 | def get_media_with_naming (output_dir, media_url, uuid, size):
174 |     """
175 | Download a media file to a directory and name it based on the input parameters.
176 | 
177 |  'output_dir' controls where the download is placed.
178 | 
179 |  'media_url' is the url / link to the media that will be downloaded.
180 | 
181 |  'uuid' is used to uniquely identify the output filename.
182 | 
183 |  'SIZE' is the class of image derivative, useful in the output filename.
184 | 
185 | """
186 |     try:
187 |         response = requests.get(media_url, stream=True)
188 |         response.raise_for_status()
189 |     except (requests.exceptions.HTTPError, requests.exceptions.ConnectionError) as e:
190 |         print('*** HTTP ERROR: {0}'.format(e))
191 |         return False
192 | 
193 |     ### iDigBio returns 200 OK and displays an SVG status image when a derivative
194 |     ### is not present.  Check for "Content-Type: image/svg+xml" header to notice this condition.
195 |     if response.headers['Content-Type'] == 'image/svg+xml':
196 |         print("*** WARNING - No media at '{0}'".format(media_url))
197 |         return False
198 | 
199 |     # Output filenames will be of the form: {mediarecord_uuid}_{SIZE}.jpg
200 |     local_filepath = os.path.join(output_dir,  uuid + '_' + SIZE + '.jpg')
201 | 
202 |     try:
203 |         with open(local_filepath, 'wb') as out_file:
204 |             shutil.copyfileobj(response.raw, out_file)
205 |         return True
206 |     except:
207 |         return False
208 | 
209 | 
210 | if __name__ == '__main__':
211 | 
212 |     api = iDbApiJson()
213 | 
214 |     print ()
215 |     print ("Using query:")
216 |     print ()
217 |     print (query)
218 |     print ()
219 |     print ("OPERATING PARAMETERS...")
220 |     print ()
221 |     print ("Maximum number of media to fetch: {:d}".format(MAX_RESULTS))
222 |     print ("Media derivative size: {0}".format(SIZE))
223 |     print ("Output directory: {0}".format(os.path.abspath(output_directory)))
224 |     print ("Query Type: {0}".format(QUERY_TYPE))
225 | 
226 |     print ()
227 |     print ("EXECUTING SEARCH QUERY...")
228 |     print ()
229 |     if QUERY_TYPE == 'mq':
230 |         results = api.search_media(mq=query, limit=MAX_RESULTS)
231 |     else:
232 |         results = api.search_media(rq=query, limit=MAX_RESULTS)
233 |     print ()
234 |     if debug_flag:
235 |         print ("Results JSON:")
236 |         print (json.dumps(results))
237 |     print ()
238 |     print ("Search query produced {:d} results.".format(results['itemCount']))
239 |     print ()
240 |     if results['itemCount'] == 0 or MAX_RESULTS == 0:
241 |         print ("Nothing to download. Exiting.")
242 |         raise SystemExit
243 |     if results['itemCount'] > MAX_RESULTS:
244 |         print ("*** WARNING: search query produced more results than the designated maximum number of media to fetch.")
245 |         print ("*** Use the -m or --max parameter to increase the maximum number of media to fetch.")
246 |     print ()
247 |     print("BEGINNING DOWNLOADS NOW...")
248 |     print ()
249 | 
250 |     successes = 0
251 |     failures = 0
252 | 
253 |     for each in results['items']:
254 |         media_record_uuid = each['indexTerms']['uuid']
255 |         media_url = 'https://api.idigbio.org/v2/media/' + media_record_uuid + '?size=' + SIZE
256 |         print ("Downloading: '{0}'".format(media_url))
257 |         if get_media_with_naming(output_directory, media_url, media_record_uuid, SIZE):
258 |             successes += 1
259 |         else:
260 |             failures += 1
261 | 
262 |     print () 
263 |     print ("DOWNLOADING FINISHED with {0:d} successes and {1:d} failures".format(successes, failures))
264 |     print ()
265 |     print ("Media downloads are in output directory: '{0}'".format(os.path.abspath(output_directory)))
266 | 


--------------------------------------------------------------------------------
/examples/fetch_media/mediarecord_uuids_list.txt:
--------------------------------------------------------------------------------
1 | 787d60f7-3fb7-4b82-8846-b5b4123761c1
2 | 9c84908f-170f-44eb-ad6d-6d3fec5032a6
3 | 845f80e8-02d7-49dd-aef7-fc58cec36c89
4 | 


--------------------------------------------------------------------------------
/examples/fetch_media/query.txt:
--------------------------------------------------------------------------------
1 | {
2 |   "scientificname": "Anastrepha pallens Coquillett, 1904"
3 | }
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/fetch_media/query_geo.txt:
--------------------------------------------------------------------------------
1 | {
2 |   "geopoint": {
3 |     "type": "geo_distance",
4 |     "distance": "100km",
5 |     "lat": -41.1119,
6 |     "lon": 145.323
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/examples/fetch_media/query_geo_bounding.txt:
--------------------------------------------------------------------------------
 1 | {
 2 |   "geopoint": {
 3 |     "type": "geo_bounding_box",
 4 |     "top_left": {
 5 |       "lat": 19.23,
 6 |       "lon": -130
 7 |     },
 8 |     "bottom_right": {
 9 |       "lat": -45.1119,
10 |       "lon": 179.99999
11 |     }
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/fetch_media/record_uuids_list.txt:
--------------------------------------------------------------------------------
1 | a494a2a6-b64b-4f99-b26c-53bfdcd54876
2 | ddc56589-7009-4fe6-81d8-d9c9219a503f
3 | 9f7f4ba7-0def-4b01-b806-9089dcb7382c
4 | 


--------------------------------------------------------------------------------
/examples/recordset_indexed_modified_date_and_archive_link_status.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | try:
 3 |     from idigbio.json_client import iDbApiJson
 4 |     import requests
 5 |     import argparse
 6 | except ImportError as e:
 7 |     print ("IMPORT ERROR (This exception is likely caused by a missing module): '{0}'".format(e))
 8 |     raise SystemExit
 9 | 
10 | help_blob = """
11 | 
12 |     This script will print information about recordsets and their indexed pubdate, contacts.
13 | 
14 |     Input list of recordset uuids is specified by putting them in a file and using --uuids-file option.
15 | 
16 | """
17 | 
18 | argparser = argparse.ArgumentParser(description=help_blob, formatter_class=argparse.RawDescriptionHelpFormatter)
19 | argparser.add_argument("-d", "--debug", default=False, action='store_true',
20 |                        help="enable debugging output")
21 | arg_group = argparser.add_mutually_exclusive_group(required=True)
22 | arg_group.add_argument("-u", "--uuid", 
23 |                        help="single iDigBio recordset uuid to query")
24 | arg_group.add_argument("--uuids-file",
25 |                        help="file path containing list of iDigBio recordset uuids to query, one per line")
26 | args = argparser.parse_args()
27 | 
28 | QUERY_TYPE = 'rq'
29 | 
30 | debug_flag = args.debug
31 | if debug_flag:
32 |     print ()
33 |     print ("** DEBUGGING ENABLED **")
34 |     print ()
35 |     print ()
36 |     modulenames = set(sys.modules)&set(globals())
37 |     allmodules = [sys.modules[name] for name in modulenames]
38 |     print ("Loaded modules...")
39 |     for each_mod in allmodules:
40 |         print (each_mod)
41 |     print ()
42 | 
43 | def get_list_of_uuids_from_file(uuids_file):
44 |     uuids_from_file = []
45 |     with open(uuids_file) as uf:
46 |         for line in uf:
47 |             uuids_from_file.append(line.strip())
48 |     return uuids_from_file
49 | 
50 | 
51 | def check_archive_status(url):
52 |     try:
53 |         r = requests.head(url, timeout=5)
54 |         r.raise_for_status()
55 |         return r.reason
56 |     except:
57 |         return "NO_ARCHIVE_AVAILABLE"
58 | 
59 | 
60 | def print_recordset_view_data(api,uuid):
61 |     recordset_item_from_api = api.view("recordsets", uuid)
62 |     the_important_data = [
63 |         recordset_item_from_api["uuid"],
64 |         recordset_item_from_api["indexTerms"]["indexData"]["update"],
65 |         recordset_item_from_api["indexTerms"]["name"],
66 |         recordset_item_from_api["indexTerms"]["indexData"]["link"],
67 |         # Does not yet include contacts or other data.
68 |         # recordset_item_from_api[""],
69 |         check_archive_status(recordset_item_from_api["indexTerms"]["indexData"]["link"])
70 |         ]
71 |     line = "\t".join(the_important_data)
72 |     print(line.encode("utf-8"))
73 |     
74 | 
75 | if args.uuids_file:
76 |     uuid_list = get_list_of_uuids_from_file(args.uuids_file)
77 | else:
78 |     uuid_list = [args.uuid]
79 | 
80 | if __name__ == '__main__':
81 | 
82 |     api = iDbApiJson()
83 | 
84 |     print ()
85 |     for each in uuid_list:
86 |         print_recordset_view_data(api,each)
87 | 
88 | 
89 |     print ()
90 |     print ("***END***")
91 | 


--------------------------------------------------------------------------------
/idigbio/__init__.py:
--------------------------------------------------------------------------------
 1 | """Python library to talk to the iDigBio search api
 2 | 
 3 | This has two main client interfaces:
 4 | 
 5 |  * ``idigbio.json``: basic access that talks JSON to the server and
 6 |    returns python dictionaries
 7 | 
 8 |  * ``idigbio.pandas``: uses the json library underneath but returns
 9 |    pandas dataframes for more advanced data processing
10 | 
11 | 
12 | Both interfaces take parameters:
13 | 
14 |  * ``env``: Which environment to use {beta, prod}; defaults to prod
15 |  * ``user``: api username; not necesary for searching
16 |  * ``password``: api password; not necessary for searching
17 | 
18 | 
19 | """
20 | from __future__ import absolute_import
21 | 
22 | import logging
23 | 
24 | __version__ = '0.8.5'
25 | 
26 | 
27 | def json(*args, **kwargs):
28 |     from .json_client import iDbApiJson
29 |     return iDbApiJson(*args, **kwargs)
30 | 
31 | 
32 | def pandas(*args, **kwargs):
33 |     from .pandas_client import iDbApiPandas
34 |     return iDbApiPandas(*args, **kwargs)
35 | 
36 | __all__ = ['json', 'pandas']
37 | 
38 | logging.getLogger(__name__).addHandler(logging.NullHandler())
39 | 


--------------------------------------------------------------------------------
/idigbio/json_client.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | import requests
  4 | import json
  5 | import traceback
  6 | 
  7 | from idigbio import util
  8 | 
  9 | try:
 10 |     # Python 2
 11 |     from urllib import urlencode
 12 | except:
 13 |     # Python 3
 14 |     from urllib.parse import urlencode
 15 | 
 16 | global_disable_images = False
 17 | try:
 18 |     import PIL.Image as Image
 19 | except:
 20 |     global_disable_images = True
 21 | 
 22 | try:
 23 |     # Python 2 C
 24 |     from cStringIO import StringIO as io_ify
 25 | except:
 26 |     try:
 27 |         # Python 2 native
 28 |         from StringIO import StringIO as io_ify
 29 |     except:
 30 |         # Python 3
 31 |         from io import BytesIO as io_ify
 32 | 
 33 | log = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | FIELDS_EXCLUDE_DEFAULT = ['data.*']
 37 | 
 38 | 
 39 | def level_dic():
 40 |     '''
 41 |     http://wiki.openstreetmap.org/wiki/Zoom_levels
 42 |     '''
 43 |     # return data
 44 |     data = {0: 360.0,
 45 |             1: 180.0,
 46 |             2: 90.0,
 47 |             3: 45.0,
 48 |             4: 22.5,
 49 |             5: 11.25,
 50 |             6: 5.625,
 51 |             7: 2.813,
 52 |             8: 1.406,
 53 |             9: 0.703,
 54 |             10: 0.352,
 55 |             11: 0.176,
 56 |             12: 0.088,
 57 |             13: 0.044,
 58 |             14: 0.022,
 59 |             15: 0.011,
 60 |             16: 0.005,
 61 |             17: 0.003,
 62 |             18: 0.001,
 63 |             19: 0.0005}
 64 |     return data
 65 | 
 66 | 
 67 | def getzoom(min_lon, max_lon, min_lat, max_lat):
 68 |     data = level_dic()  # our presets
 69 |     r = 4
 70 |     dne = max(round(max_lat - min_lat, r),
 71 |               round(max_lon - min_lon, r))  # ne: North East point
 72 |     mylist = [round(i, r) for i in data.values()] + [dne]
 73 |     new = sorted(mylist, reverse=True)
 74 |     return new.index(dne)
 75 | 
 76 | 
 77 | def deg2num(lat_deg, lon_deg, zoom):
 78 |     lat_rad = math.radians(lat_deg)
 79 |     n = 2.0 ** zoom
 80 |     xtile = int((lon_deg + 180.0) / 360.0 * n)
 81 |     ytile = int((1.0 - math.log(math.tan(lat_rad) + (1 / math.cos(lat_rad))) / math.pi) / 2.0 * n)
 82 |     return (xtile, ytile)
 83 | 
 84 | 
 85 | class BadEnvException(Exception):
 86 |     pass
 87 | 
 88 | 
 89 | class MapNotCreatedException(Exception):
 90 |     pass
 91 | 
 92 | 
 93 | class ImagesDisabledException(Exception):
 94 |     pass
 95 | 
 96 | 
 97 | def make_session(user=None, password=None):
 98 |     import idigbio
 99 |     s = requests.Session()
100 |     if user and password:
101 |         s.auth = (user, password)
102 |     s.headers["User-Agent"] = "idigbio-python-client/" + idigbio.__version__
103 |     return s
104 | 
105 | 
106 | class iDigBioMap(object):
107 |     def __init__(self, api, rq={}, style=None, t="auto", disable_images=False):
108 |         self.__api = api
109 |         self._disable_images = disable_images or global_disable_images
110 |         self._map_def = self.__api._api_post(
111 |             "/v2/mapping", rq=rq, style=style, type=t)
112 |         if self._map_def is None:
113 |             raise MapNotCreatedException()
114 |         self._short_code = self._map_def["shortCode"]
115 |         self._tiles = self._map_def["tiles"]
116 | 
117 |     def definition(self):
118 |         return self.__api._api_get("/v2/mapping/{0}".format(self._short_code))
119 | 
120 |     def json_tile(self, z, x, y):
121 |         return self.__api._api_get(
122 |             "/v2/mapping/{0}/{1}/{2}/{3}.json".format(
123 |                 self._short_code, z, x, y))
124 | 
125 |     def utf8grid_tile(self, z, x, y):
126 |         return self.__api._api_get(
127 |             "/v2/mapping/{0}/{1}/{2}/{3}.grid.json".format(
128 |                 self._short_code, z, x, y))
129 | 
130 |     def png_tile(self, z, x, y):
131 |         if self._disable_images:
132 |             raise ImagesDisabledException()
133 |         tile = self.__api._api_get(
134 |             "/v2/mapping/{0}/{1}/{2}/{3}.png".format(
135 |                 self._short_code, z, x, y), raw=True)
136 |         if tile is None:
137 |             return None
138 |         else:
139 |             return Image.open(io_ify(tile))
140 | 
141 |     def points(self, lat, lon, zoom, sort=None, limit=100, offset=None):
142 |         return self.__api._api_get(
143 |             "/v2/mapping/{0}/points".format(self._short_code),
144 |             lat=lat, lon=lon, zoom=zoom, sort=sort, limit=limit, offset=offset)
145 | 
146 |     def save_map_image(self, filename, zoom, bbox=None):
147 |         x_tiles = None
148 |         y_tiles = None
149 | 
150 |         if zoom is None and bbox is not None:
151 |             zoom = getzoom(
152 |                 bbox["bottom_right"]["lat"],
153 |                 bbox["top_left"]["lat"],
154 |                 bbox["top_left"]["lon"],
155 |                 bbox["bottom_right"]["lon"]
156 |             )
157 | 
158 |         if bbox is not None:
159 |             top_left_tile = deg2num(
160 |                 bbox["top_left"]["lat"],
161 |                 bbox["top_left"]["lon"],
162 |                 zoom
163 |             )
164 | 
165 |             bottom_right_tile = deg2num(
166 |                 bbox["bottom_right"]["lat"],
167 |                 bbox["bottom_right"]["lon"],
168 |                 zoom
169 |             )
170 | 
171 |             x_tiles = range(top_left_tile[0], bottom_right_tile[0]+1)
172 |             y_tiles = range(top_left_tile[1], bottom_right_tile[1]+1)
173 | 
174 |         if x_tiles is None:
175 |             x_tiles = range(0, 2**zoom)
176 |         if y_tiles is None:
177 |             y_tiles = range(0, 2**zoom)
178 | 
179 |         s = make_session()
180 |         if self._disable_images:
181 |             raise ImagesDisabledException()
182 |         im = Image.new("RGB", (len(x_tiles) * 256, len(y_tiles) * 256))
183 |         x_tile_count = 0
184 |         for x in x_tiles:
185 |             y_tile_count = 0
186 |             for y in y_tiles:
187 |                 r = s.get(
188 |                     "http://b.tile.openstreetmap.org/{z}/{x}/{y}.png".format(
189 |                         z=zoom, x=x, y=y))
190 |                 r.raise_for_status()
191 |                 bim = Image.open(io_ify(r.content))
192 |                 tim = self.png_tile(zoom, x, y)
193 |                 im.paste(bim, (x_tile_count * 256, y_tile_count * 256))
194 |                 im.paste(tim, (x_tile_count * 256, y_tile_count * 256), tim)
195 |                 y_tile_count += 1
196 |             x_tile_count += 1
197 |         im.save("{0}.png".format(filename), "PNG")
198 |         s.close()
199 | 
200 | 
201 | class iDbApiJson(object):
202 |     """ iDigBio Search API Json Client """
203 | 
204 |     def __init__(self, env="prod", retries=3, user=None, password=None):
205 |         """
206 |             env: Which environment to use. Defaults to prod."
207 |         """
208 |         self.retries = retries
209 | 
210 |         if env == "prod":
211 |             self._api_urls = {
212 |                 "base": "https://search.idigbio.org",
213 |                 "/v2/media": "https://api.idigbio.org",
214 |                 "/v2/download": "https://api.idigbio.org"
215 |             }
216 |         elif env == "beta":
217 |             self._api_urls = {
218 |                 "base": "https://beta-search.idigbio.org",
219 |                 "/v2/media": "https://beta-api.idigbio.org",
220 |                 "/v2/download": "https://beta-api.idigbio.org"
221 |             }
222 |         elif env == "dev":
223 |             self._api_urls = {
224 |                 "base": "https://localhost:19196",
225 |                 "/v2/media": "http://localhost:19197",
226 |                 "/v2/download": "http://localhost:19197"
227 |             }
228 |         else:
229 |             raise BadEnvException()
230 | 
231 |         self.s = make_session(user=user, password=password)
232 | 
233 |     def __del__(self):
234 |         self.s.close()
235 | 
236 |     def _api_get(self, slug, **kwargs):
237 |         retries = self.retries
238 |         raw = kwargs.pop('raw', False)
239 | 
240 |         api_url = self._api_urls.get(slug, self._api_urls.get("base"))
241 | 
242 |         for arg in list(kwargs):
243 |             if isinstance(kwargs[arg], (dict, list)):
244 |                 kwargs[arg] = json.dumps(kwargs[arg])
245 |             elif kwargs[arg] is None:
246 |                 del kwargs[arg]
247 |         qs = urlencode(kwargs)
248 |         while retries > 0:
249 |             try:
250 |                 log.debug("Querying: %r", api_url + slug + "?" + qs)
251 |                 r = self.s.get(api_url + slug + "?" + qs)
252 |                 r.raise_for_status()
253 |                 if raw:
254 |                     return r.content
255 |                 else:
256 |                     return r.json()
257 |             except:
258 |                 log.debug(traceback.print_exc())
259 |                 retries -= 1
260 |         return None
261 | 
262 |     def _api_post(self, slug, **kwargs):
263 |         retries = self.retries
264 |         raw = kwargs.pop('raw', False)
265 |         files = kwargs.pop('files', None)
266 |         params = kwargs.pop('params', None)
267 | 
268 |         api_url = self._api_urls.get(slug, self._api_urls.get("base"))
269 | 
270 |         for arg in list(kwargs):
271 |             if kwargs[arg] is None:
272 |                 del kwargs[arg]
273 | 
274 |         while retries > 0:
275 |             try:
276 |                 body = json.dumps(kwargs)
277 |                 if files is None:
278 |                     log.debug("POSTing: %r\n%s", slug, body)
279 |                     r = self.s.post(
280 |                         api_url + slug,
281 |                         data=json.dumps(kwargs),
282 |                         params=params,
283 |                         headers={"Content-Type": "application/json"}
284 |                     )
285 |                 else:
286 |                     # you must seek the file before sending,
287 |                     # especially on the retry loop
288 |                     for k in files:
289 |                         files[k].seek(0)
290 |                     log.debug("POSTing + Files: %r\n%s", slug, body)
291 |                     r = self.s.post(
292 |                         api_url + slug,
293 |                         data=kwargs,
294 |                         files=files,
295 |                         params=params
296 |                     )
297 | 
298 |                 r.raise_for_status()
299 |                 if raw:
300 |                     return r.content
301 |                 else:
302 |                     return r.json()
303 |             except:
304 |                 log.exception("Error posting: %r %r", slug, params)
305 |                 retries -= 1
306 |         return None
307 | 
308 |     def view(self, t, uuid):
309 |         """
310 |             t: the type to view. Supported types: records, media (mediarecords), recordsets, publishers
311 |             uuid: the uuid to view.
312 |         """
313 |         return self._api_get("/v2/view/{0}/{1}".format(t, uuid))
314 | 
315 |     def search_records(self, rq={}, limit=100, offset=0, sort=None,
316 |                        fields=None, fields_exclude=FIELDS_EXCLUDE_DEFAULT):
317 |         """
318 |             rq  Search Query in iDigBio Query Format, using Record Query Fields
319 |             sort    field to sort on, pick from Record Query Fields
320 |             fields  a list of fields to return, specified using the fieldName parameter from Fields with type records
321 |             fields_exclude  a list of fields to exclude, specified using the fieldName parameter from Fields with type records
322 |             limit   max results
323 |             offset  skip results
324 | 
325 |             Returns idigbio record format (legacy api), plus additional top level keys with parsed index terms. Returns None on error.
326 |         """
327 |         if fields is not None and fields_exclude is FIELDS_EXCLUDE_DEFAULT:
328 |             fields_exclude = None
329 | 
330 |         return self._api_post("/v2/search/records",
331 |                               rq=rq, limit=limit, offset=offset, sort=sort,
332 |                               fields=fields, fields_exclude=fields_exclude)
333 | 
334 |     def search_media(self, mq={}, rq={}, limit=100, offset=0, sort=None,
335 |                      fields=None, fields_exclude=FIELDS_EXCLUDE_DEFAULT):
336 |         """
337 |             mq  Search Query in iDigBio Query Format, using Media Query Fields
338 |             rq  Search Query in iDigBio Query Format, using Record Query Fields
339 |             sort    field to sort on, pick from Media Query Fields
340 |             fields  a list of fields to return, specified using the fieldName parameter from Fields with type mediarecords
341 |             fields_exclude  a list of fields to exclude, specified using the fieldName parameter from Fields with type records
342 |             limit   max results
343 |             offset  skip results
344 | 
345 |             Returns idigbio record format (legacy api), plus additional top level keys with parsed index terms. Returns None on error.
346 |         """
347 |         if fields is not None and fields_exclude is FIELDS_EXCLUDE_DEFAULT:
348 |             fields_exclude = None
349 | 
350 |         return self._api_post("/v2/search/media",
351 |                               rq=rq, mq=mq, limit=limit, offset=offset, sort=sort,
352 |                               fields=fields, fields_exclude=fields_exclude)
353 | 
354 |     def create_map(self, rq={}, style=None, t="auto", disable_images=False):
355 |         return iDigBioMap(
356 |             self, rq=rq, style=style, t=t, disable_images=disable_images)
357 | 
358 |     def top_records(self, rq={}, top_fields=None, count=None):
359 |         return self._api_post("/v2/summary/top/records",
360 |                               rq=rq, top_fields=top_fields, count=count)
361 | 
362 |     def top_media(self, mq={}, rq={}, top_fields=None, count=None):
363 |         return self._api_post("/v2/summary/top/media", mq=mq, rq=rq,
364 |                               top_fields=top_fields, count=count)
365 | 
366 |     def count_records(self, rq={}):
367 |         r = self._api_post("/v2/summary/count/records", rq=rq)
368 |         if r is not None:
369 |             return r["itemCount"]
370 |         else:
371 |             return None
372 | 
373 |     def count_media(self, mq={}, rq={}):
374 |         r = self._api_post("/v2/summary/count/media", mq=mq, rq=rq)
375 |         if r is not None:
376 |             return r["itemCount"]
377 |         else:
378 |             return None
379 | 
380 |     def count_recordsets(self, rsq={"data.ingest":True}):
381 |         r = self._api_post("/v2/summary/count/recordsets", rsq=rsq)
382 |         if r is not None:
383 |             return r["itemCount"]
384 |         else:
385 |             return None
386 | 
387 |     def datehist(self, rq={}, top_fields=None, count=None, date_field=None,
388 |                  min_date=None, max_date=None, date_interval=None):
389 |         return self._api_post(
390 |             "/v2/summary/datehist",
391 |             rq=rq, top_fields=top_fields, count=count, date_field=date_field,
392 |             min_date=min_date, max_date=max_date, date_interval=date_interval)
393 | 
394 |     def stats(self, t, recordset=None, min_date=None, max_date=None,
395 |               date_interval=None):
396 |         return self._api_post("/v2/summary/stats/{0}".format(t),
397 |                               recordset=recordset,
398 |                               min_date=min_date, max_date=max_date,
399 |                               date_interval=date_interval)
400 | 
401 |     def upload(self, filereference, localfile, media_type=None, etag=None):
402 |         if not self.s.auth:
403 |             raise Exception("Unauthorized")
404 |         if not localfile:
405 |             raise ValueError("Must have local copy of file to upload")
406 |         fd = open(localfile, 'rb')
407 |         if etag is None:
408 |             etag = util.calcFileHash(fd, op=False)
409 |             log.debug("Calculate etag for %r as %s", localfile, etag)
410 |         files = {'file': fd}
411 |         p = {
412 |             "filereference": filereference,
413 |             "media_type": media_type,
414 |             "etag": etag
415 |         }
416 |         return self._api_post("/v2/media", files=files, params=p)
417 | 
418 |     def addreference(self, filereference, localfile):
419 |         if not self.s.auth:
420 |             raise Exception("Unauthorized")
421 |         if not localfile:
422 |             raise ValueError("Must have local copy of file to upload")
423 |         etag = util.calcFileHash(localfile)
424 |         p = {'filereference': filereference,
425 |              'etag': etag}
426 |         return self._api_post("/v2/media", params=p)
427 | 
428 |     def addurl(self, filereference, media_type=None, mime_type=None):
429 |         if not self.s.auth:
430 |             raise Exception("Unauthorized")
431 |         p = {
432 |             "filereference": filereference,
433 |             "media_type": media_type,
434 |             "mime": mime_type
435 |         }
436 |         return self._api_post("/v2/media", **p)
437 | 


--------------------------------------------------------------------------------
/idigbio/pandas_client.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import pandas
  3 | from .json_client import iDbApiJson
  4 | from itertools import chain
  5 | try:
  6 |     from future_builtins import map
  7 | except ImportError:
  8 |     pass
  9 | 
 10 | MAX_BATCH_SIZE = 5000
 11 | 
 12 | log = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class iDbApiPandas(object):
 16 |     def __init__(self, env="prod", user=None, password=None):
 17 |         """
 18 |             env: Which environment to use. Defaults to prod."
 19 |         """
 20 |         self.__api = iDbApiJson(env=env, user=user, password=password)
 21 | 
 22 |     def __search_base(self, apifn, **kwargs):
 23 |         def yd(data):
 24 |             for r in data["items"]:
 25 |                 yield r["indexTerms"]
 26 | 
 27 |         if "limit" in kwargs and kwargs["limit"] > MAX_BATCH_SIZE:
 28 |             def one(offset, total_limit):
 29 |                 while offset < total_limit:
 30 |                     batch = min(MAX_BATCH_SIZE, total_limit - offset)
 31 |                     log.debug("Querying at offset %s", offset)
 32 |                     data = apifn(offset=offset, limit=batch, **kwargs)
 33 |                     yield data
 34 |                     if len(data["items"]) < batch:
 35 |                         log.debug("Exiting early, no more records on server")
 36 |                         break
 37 |                     offset += batch
 38 |             datagen = one(kwargs.pop("offset", 0), kwargs.pop("limit"))
 39 |             data = next(datagen)
 40 |             if data and len(data["items"]) > 0:
 41 |                 records = chain(
 42 |                     yd(data),
 43 |                     chain.from_iterable(map(yd, datagen)))
 44 |                 return pandas.DataFrame.from_records(records, index="uuid")
 45 |         else:
 46 |             data = apifn(**kwargs)
 47 |             if data["itemCount"] > 0:
 48 |                 return pandas.DataFrame.from_records(yd(data), index="uuid")
 49 |         return None
 50 | 
 51 |     def search_records(self, **kwargs):
 52 |         """
 53 |             rq  Search Query in iDigBio Query Format, using Record Query Fields
 54 |             sort    field to sort on, pick from Record Query Fields
 55 |             fields  a list of fields to return, specified using the fieldName parameter from Fields with type records
 56 |             fields_exclude  a list of fields to exclude, specified using the fieldName parameter from Fields with type records
 57 |             limit   max results
 58 |             offset  skip results
 59 | 
 60 |             Returns idigbio record format (legacy api), plus additional top level keys with parsed index terms. Returns None on error.
 61 |         """
 62 | 
 63 |         return self.__search_base(apifn=self.__api.search_records, **kwargs)
 64 | 
 65 |     def search_media(self,**kwargs):
 66 |         """
 67 |             mq  Search Query in iDigBio Query Format, using Media Query Fields
 68 |             rq  Search Query in iDigBio Query Format, using Record Query Fields
 69 |             sort    field to sort on, pick from Media Query Fields
 70 |             fields  a list of fields to return, specified using the fieldName parameter from Fields with type mediarecords
 71 |             fields_exclude  a list of fields to exclude, specified using the fieldName parameter from Fields with type records
 72 |             limit   max results
 73 |             offset  skip results
 74 | 
 75 |             Returns idigbio record format (legacy api), plus additional top level keys with parsed index terms. Returns None on error.
 76 |         """
 77 |         return self.__search_base(apifn=self.__api.search_media, **kwargs)
 78 | 
 79 |     def __top_recuse(self, top_fields, top_records):
 80 |         if len(top_fields) == 0:
 81 |             yield [top_records["itemCount"]]
 82 |         else:
 83 |             for k in top_records[top_fields[0]]:
 84 |                 for v in self.__top_recuse(top_fields[1:], top_records[top_fields[0]][k]):
 85 |                     yield [k] + v
 86 | 
 87 |     def top_records(self, top_fields=["scientificname"], **kwargs):
 88 |         r = self.__api.top_records(top_fields=top_fields, **kwargs)
 89 |         return pandas.DataFrame.from_records(
 90 |             self.__top_recuse(top_fields, r), columns=top_fields + ["count"])
 91 | 
 92 |     def top_media(self, top_fields=["flags"], **kwargs):
 93 |         r = self.__api.top_media(top_fields=top_fields, **kwargs)
 94 |         return pandas.DataFrame.from_records(
 95 |             self.__top_recuse(top_fields, r), columns=top_fields + ["count"])
 96 | 
 97 |     def count_records(self, **kwargs):
 98 |         return self.__api.count_records(**kwargs)
 99 | 
100 |     def count_media(self, **kwargs):
101 |         return self.__api.count_media(**kwargs)
102 | 
103 |     def count_recordsets(self, **kwargs):
104 |         return self.__api.count_recordsets(**kwargs)
105 | 
106 | 
107 |     # TODO
108 |     # def datehist(self,**kwargs):
109 |     #     return self._api.datehist(**kwargs)
110 | 
111 |     # def stats(self,t,**kwags):
112 |     #     return self._api.stats(t,**kwargs)
113 | 


--------------------------------------------------------------------------------
/idigbio/util.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import hashlib
 3 | 
 4 | 
 5 | def calcFileHash(f, op=True, return_size=False):
 6 |     md5 = hashlib.md5()
 7 |     size = 0
 8 |     if op:
 9 |         with open(f, "rb") as fd:
10 |             buf = fd.read(128)
11 |             while len(buf) > 0:
12 |                 size += len(buf)
13 |                 md5.update(buf)
14 |                 buf = fd.read(128)
15 |     else:
16 |         buf = f.read(128)
17 |         while len(buf) > 0:
18 |             size += len(buf)
19 |             md5.update(buf)
20 |             buf = f.read(128)
21 |     if return_size:
22 |         return (md5.hexdigest(), size)
23 |     else:
24 |         return md5.hexdigest()
25 | 


--------------------------------------------------------------------------------
/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: idigbio
 3 |   version: "0.8.0"
 4 | 
 5 | source:
 6 |   fn: idigbio-0.8.0.tar.gz
 7 |   url: https://pypi.python.org/packages/b1/35/763081178444144eb484f34a57db71673978df90e185dca2bbd2195c6bba/idigbio-0.8.0.tar.gz
 8 |   md5: 54431a3cf4f8260532f0c959db203a25
 9 | #  patches:
10 |    # List any patch files here
11 |    # - fix.patch
12 | 
13 | # build:
14 |   # noarch_python: True
15 |   # preserve_egg_dir: True
16 |   # entry_points:
17 |     # Put any entry points (scripts to be generated automatically) here. The
18 |     # syntax is module:function.  For example
19 |     #
20 |     # - idigbio = idigbio:main
21 |     #
22 |     # Would create an entry point called idigbio that calls idigbio.main()
23 | 
24 | 
25 |   # If this is a new build for the same version, increment the build
26 |   # number. If you do not include this key, it defaults to 0.
27 |   # number: 1
28 | 
29 | requirements:
30 |   build:
31 |     - python
32 |     - setuptools
33 |     - requests
34 | 
35 |   run:
36 |     - python
37 |     - requests
38 | 
39 | test:
40 |   # Python imports
41 |   imports:
42 |     - idigbio
43 | 
44 |   # commands:
45 |     # You can put test commands to be run here.  Use this to test that the
46 |     # entry points work.
47 | 
48 | 
49 |   # You can also put a file called run_test.py in the recipe that will be run
50 |   # at test time.
51 | 
52 |   # requires:
53 |     # Put any additional test requirements here.  For example
54 |     # - nose
55 | 
56 | about:
57 |   home: http://github.com/idigbio/idigbio-python-client/
58 |   license: MIT License
59 |   summary: 'Python Client for the iDigBio Search API'
60 | 
61 | # See
62 | # http://docs.continuum.io/conda/build.html for
63 | # more information about meta.yaml
64 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [wheel]
2 | universal = 1
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from setuptools import setup, find_packages
 4 | 
 5 | from codecs import open
 6 | 
 7 | 
 8 | def read(*paths):
 9 |     """Build a file path from *paths* and return the contents."""
10 |     with open(os.path.join(*paths), 'r', 'utf-8') as f:
11 |         return f.read()
12 | 
13 | readme = read('README.rst')
14 | history = read('HISTORY.rst')
15 | 
16 | version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
17 |                     read('idigbio/__init__.py'), re.MULTILINE).group(1)
18 | 
19 | setup(
20 |     name='idigbio',
21 |     version=version,
22 |     description='Python Client for the iDigBio Search API',
23 |     long_description=readme + "\n\n" + history,
24 |     url='http://github.com/idigbio/idigbio-python-client/',
25 |     license='MIT',
26 |     author='Alex Thompson',
27 |     author_email='godfoder@acis.ufl.edu',
28 |     packages=find_packages(exclude=['tests*']),
29 |     install_requires=['requests'],
30 |     extras_require={
31 |         "pandas": ["pandas"]
32 |     },
33 |     include_package_data=True,
34 |     classifiers=[
35 |         'Development Status :: 5 - Production/Stable',
36 |         'Intended Audience :: Developers',
37 |         'Natural Language :: English',
38 |         'License :: OSI Approved :: MIT License',
39 |         'Operating System :: OS Independent',
40 |         'Programming Language :: Python',
41 |         'Programming Language :: Python :: 2.7',
42 |         'Programming Language :: Python :: 3',
43 |         'Programming Language :: Python :: 3.3',
44 |         'Programming Language :: Python :: 3.4',
45 |         'Programming Language :: Python :: 3.5',
46 |         'Topic :: Software Development :: Libraries :: Python Modules',
47 |     ],
48 | )
49 | 


--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | requests
3 | pillow
4 | docutils
5 | pygments
6 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | from .test_json_client import *
2 | from .test_json_client_map import *
3 | from .test_pandas_client import *
4 | 


--------------------------------------------------------------------------------
/tests/test_json_client.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import os
  3 | import sys
  4 | 
  5 | from idigbio.json_client import iDbApiJson, ImagesDisabledException
  6 | 
  7 | try:
  8 |     import mock as mock_module
  9 | except ImportError:
 10 |     try:
 11 |         import unittest.mock as mock_module
 12 |     except ImportError:
 13 |         mock_module = None
 14 | 
 15 | if mock_module:
 16 |     Mock = mock_module.Mock
 17 |     MagicMock = mock_module.MagicMock
 18 |     patch = mock_module.patch
 19 | 
 20 | class TestIDbApiJson(unittest.TestCase):
 21 |     def test___init__(self):
 22 |         api = iDbApiJson()
 23 |         self.assertIsNotNone(api.s)
 24 | 
 25 |     def test___del__(self):
 26 |         api = iDbApiJson()
 27 |         del api
 28 | 
 29 |     def test_create_map(self):
 30 |         api = iDbApiJson()
 31 |         m = api.create_map()
 32 |         self.assertIsNotNone(m)
 33 | 
 34 |     def test_search_media(self):
 35 |         api = iDbApiJson()
 36 |         r = api.search_media()
 37 |         self.assertIsNotNone(r)
 38 | 
 39 |     def test_search_records(self):
 40 |         api = iDbApiJson()
 41 |         r = api.search_records()
 42 |         self.assertIsNotNone(r)
 43 | 
 44 |     def test_view(self):
 45 |         api = iDbApiJson()
 46 |         r = api.view("records","56c351b5-30c0-4529-a57f-60c451cc5876")
 47 |         self.assertIsNotNone(r)
 48 | 
 49 |     def test_count_media(self):
 50 |         api = iDbApiJson()
 51 |         r = api.count_media()
 52 |         self.assertIsNotNone(r)
 53 |         self.assertIsInstance(r,int)
 54 |         self.assertNotEqual(r,0)
 55 | 
 56 |     def test_count_media_null(self):
 57 |         api = iDbApiJson()
 58 |         r = api.count_media(mq={"version": -1})
 59 |         self.assertIsNotNone(r)
 60 |         self.assertIsInstance(r,int)
 61 |         self.assertEqual(r,0)
 62 | 
 63 |     def test_count_records(self):
 64 |         api = iDbApiJson()
 65 |         r = api.count_records()
 66 |         self.assertIsNotNone(r)
 67 |         self.assertIsInstance(r,int)
 68 |         self.assertNotEqual(r,0)
 69 | 
 70 |     def test_count_records_null(self):
 71 |         api = iDbApiJson()
 72 |         r = api.count_records(rq={"version": -1})
 73 |         self.assertIsNotNone(r)
 74 |         self.assertIsInstance(r,int)
 75 |         self.assertEqual(r,0)
 76 | 
 77 |     def test_count_recordsets(self):
 78 |         api = iDbApiJson()
 79 |         r = api.count_recordsets()
 80 |         self.assertIsNotNone(r)
 81 |         self.assertIsInstance(r,int)
 82 |         self.assertNotEqual(r,0)
 83 | 
 84 |     def test_datehist(self):
 85 |         api = iDbApiJson()
 86 |         r = api.datehist(
 87 |             rq={"scientificname": "puma concolor"},
 88 |             top_fields=["institutioncode"],
 89 |             min_date="2005-01-01")
 90 |         self.assertIsNotNone(r)
 91 | 
 92 |     def test_stats_api(self):
 93 |         api = iDbApiJson()
 94 |         r = api.stats("api", min_date="2005-01-01")
 95 |         self.assertIsNotNone(r)
 96 | 
 97 |     def test_stats_digest(self):
 98 |         api = iDbApiJson()
 99 |         r = api.stats("digest", min_date="2005-01-01")
100 |         self.assertIsNotNone(r)
101 | 
102 |     def test_stats_search(self):
103 |         api = iDbApiJson()
104 |         r = api.stats("search", min_date="2005-01-01")
105 |         self.assertIsNotNone(r)
106 | 
107 |     def test_top_media(self):
108 |         api = iDbApiJson()
109 |         r = api.top_media()
110 |         self.assertIsNotNone(r)
111 | 
112 |     def test_top_records(self):
113 |         api = iDbApiJson()
114 |         r = api.top_records()
115 |         self.assertIsNotNone(r)
116 | 
117 |     def test_upload(self):
118 |         if mock_module is None:
119 |             self.skipTest('mock library not installed')
120 |         api = iDbApiJson(user="foo", password="bar")
121 |         api._api_post = Mock()
122 |         api.upload('testreference', __file__)
123 |         args, kwargs = api._api_post.call_args
124 |         self.assertIn("/v2/media", args)
125 |         self.assertIn('files', kwargs)
126 |         self.assertIn('params', kwargs)
127 |         self.assertIn('etag', kwargs['params'])
128 |         self.assertIsNotNone(kwargs['params']['etag'])
129 | 
130 | if __name__ == '__main__':
131 |     unittest.main()
132 | 


--------------------------------------------------------------------------------
/tests/test_json_client_map.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | import sys
 4 | 
 5 | from idigbio.json_client import iDbApiJson, ImagesDisabledException
 6 | 
 7 | try:
 8 |     import mock as mock_module
 9 | except ImportError:
10 |     try:
11 |         import unittest.mock as mock_module
12 |     except ImportError:
13 |         mock_module = None
14 | 
15 | if mock_module:
16 |     Mock = mock_module.Mock
17 |     MagicMock = mock_module.MagicMock
18 |     patch = mock_module.patch
19 | 
20 | 
21 | class TestIDigBioMap(unittest.TestCase):
22 |     def test___init__(self):
23 |         api = iDbApiJson()
24 |         m = api.create_map()
25 |         self.assertIsNotNone(m)
26 |         self.assertIsNotNone(m._short_code)
27 | 
28 |     def test_definition(self):
29 |         api = iDbApiJson()
30 |         m = api.create_map()
31 |         self.assertIsNotNone(m)
32 |         self.assertIsNotNone(m.definition())
33 | 
34 |     def test_json_tile(self):
35 |         api = iDbApiJson()
36 |         m = api.create_map()
37 |         self.assertIsNotNone(m)
38 |         self.assertIsNotNone(m.json_tile(1,0,0))
39 | 
40 |     def test_png_tile(self):
41 |         api = iDbApiJson()
42 |         m = api.create_map()
43 |         self.assertIsNotNone(m)
44 |         self.assertIsNotNone(m.png_tile(1,0,0))
45 | 
46 |     def test_png_tile_disabled(self):
47 |         api = iDbApiJson()
48 |         m = api.create_map(disable_images=True)
49 |         self.assertIsNotNone(m)
50 |         with self.assertRaises(ImagesDisabledException):
51 |             m.png_tile(1,0,0)
52 | 
53 |     def test_points(self):
54 |         api = iDbApiJson()
55 |         m = api.create_map()
56 |         self.assertIsNotNone(m)
57 |         self.assertIsNotNone(m.points(0,0,1))
58 | 
59 |     def test_save_map_image(self):
60 |         api = iDbApiJson()
61 |         m = api.create_map()
62 |         self.assertIsNotNone(m)
63 |         m.save_map_image("test_map",1)
64 |         self.assertTrue(os.path.exists("test_map.png"))
65 |         os.unlink("test_map.png")
66 | 
67 |     def test_save_map_image_disabled(self):
68 |         api = iDbApiJson()
69 |         m = api.create_map(disable_images=True)
70 |         self.assertIsNotNone(m)
71 |         with self.assertRaises(ImagesDisabledException):
72 |             m.save_map_image("test_map",1)
73 |         self.assertFalse(os.path.exists("test_map.png"))
74 | 
75 |     def test_utf8grid_tile(self):
76 |         api = iDbApiJson()
77 |         m = api.create_map()
78 |         self.assertIsNotNone(m)
79 |         self.assertIsNotNone(m.utf8grid_tile(1,0,0))
80 | 
81 |     def test_save_map_from_bounding_box(self):
82 |         api = iDbApiJson()
83 |         # rectangular bounding box around Gainesville, FL
84 |         bbox = {"type": "geo_bounding_box",
85 |                 "bottom_right": {"lat": 29.642979999999998, "lon": -82.00},
86 |                 "top_left": {"lat": 29.66298, "lon": -82.35315800000001}}
87 |         m = api.create_map(rq={"geopoint": bbox})
88 |         m.save_map_image("test_bounded_map", None, bbox=bbox)
89 |         self.assertTrue(os.path.exists("test_bounded_map.png"))
90 |         os.unlink("test_bounded_map.png")
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     unittest.main()
95 | 


--------------------------------------------------------------------------------
/tests/test_pandas_client.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import pandas
 3 | 
 4 | from idigbio.pandas_client import iDbApiPandas
 5 | 
 6 | class TestIDbApiPandas(unittest.TestCase):
 7 |     def test___init__(self):
 8 |         api = iDbApiPandas()
 9 |         self.assertIsNotNone(api)
10 | 
11 |     def test_search_media(self):
12 |         api = iDbApiPandas()
13 |         self.assertIsNotNone(api)
14 |         df = api.search_media()
15 |         self.assertIsInstance(df,pandas.DataFrame)
16 | 
17 |     def test_search_records(self):
18 |         api = iDbApiPandas()
19 |         self.assertIsNotNone(api)
20 |         df = api.search_records()
21 |         self.assertIsInstance(df,pandas.DataFrame)
22 | 
23 |     def test_search_records_limit_10007(self):
24 |         api = iDbApiPandas()
25 |         self.assertIsNotNone(api)
26 |         df = api.search_records(limit=10007)
27 |         self.assertIsInstance(df,pandas.DataFrame)
28 |         self.assertEqual(len(df),10007)
29 | 
30 |     def test_search_no_results(self):
31 |         api = iDbApiPandas()
32 |         self.assertIsNotNone(api)
33 |         df = api.search_records(rq={"scientificname": "7f2caf7a-e84a3c972752"})
34 |         self.assertIsNone(df)
35 |         df = api.search_records(
36 |             rq={"scientificname": "7f2caf7a-e84a3c972752"}, limit=10000)
37 |         self.assertIsNone(df)
38 |         df = api.search_media(rq={"scientificname": "7f2caf7a-e84a3c972752"})
39 |         self.assertIsNone(df)
40 |         df = api.search_media(
41 |             rq={"scientificname": "7f2caf7a-e84a3c972752"}, limit=10000)
42 |         self.assertIsNone(df)
43 | 
44 |     def test_count_media(self):
45 |         api = iDbApiPandas()
46 |         self.assertIsNotNone(api)
47 |         df = api.count_media()
48 |         self.assertIsInstance(df,int)
49 | 
50 |     def test_count_records(self):
51 |         api = iDbApiPandas()
52 |         self.assertIsNotNone(api)
53 |         df = api.count_records()
54 |         self.assertIsInstance(df,int)
55 | 
56 |     def test_count_recordsets(self):
57 |         api = iDbApiPandas()
58 |         r = api.count_recordsets()
59 |         self.assertIsNotNone(r)
60 |         self.assertIsInstance(r,int)
61 |         self.assertNotEqual(r,0)
62 | 
63 | 
64 |     # TODO
65 |     # def test_datehist(self):
66 |     #     # i_db_api_pandas = iDbApiPandas(env, debug)
67 |     #     # self.assertEqual(expected, i_db_api_pandas.datehist(**kwargs))
68 |     #     assert False # TODO: implement your test here
69 | 
70 |     # def test_stats(self):
71 |     #     # i_db_api_pandas = iDbApiPandas(env, debug)
72 |     #     # self.assertEqual(expected, i_db_api_pandas.stats(t, **kwags))
73 |     #     assert False # TODO: implement your test here
74 | 
75 |     def test_top_media(self):
76 |         api = iDbApiPandas()
77 |         self.assertIsNotNone(api)
78 |         df = api.search_media()
79 |         self.assertIsInstance(df,pandas.DataFrame)
80 | 
81 |     def test_top_records(self):
82 |         api = iDbApiPandas()
83 |         self.assertIsNotNone(api)
84 |         df = api.search_media()
85 |         self.assertIsInstance(df,pandas.DataFrame)
86 | 
87 |     def test_auth_on_session(self):
88 |         p = iDbApiPandas(user="foo", password="bar")
89 |         japi = getattr(p, '_iDbApiPandas__api')
90 |         self.assertIsNotNone(japi.s)
91 |         self.assertEqual(japi.s.auth, ("foo", "bar"))
92 | 
93 | if __name__ == '__main__':
94 |     unittest.main()
95 | 


--------------------------------------------------------------------------------