├── .gitignore ├── .travis.yml ├── HISTORY.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── bld.bat ├── build.sh ├── examples ├── FY2015-summary-stats.ipynb ├── fetch_media │ ├── README.md │ ├── fetch_media.py │ ├── mediarecord_uuids_list.txt │ ├── query.txt │ ├── query_geo.txt │ ├── query_geo_bounding.txt │ └── record_uuids_list.txt └── recordset_indexed_modified_date_and_archive_link_status.py ├── idigbio ├── __init__.py ├── json_client.py ├── pandas_client.py └── util.py ├── meta.yaml ├── setup.cfg ├── setup.py ├── test-requirements.txt └── tests ├── __init__.py ├── test_json_client.py ├── test_json_client_map.py └── test_pandas_client.py /.gitignore: -------------------------------------------------------------------------------- 1 | # emacs temp files 2 | *~ 3 | \#*\# 4 | 5 | 6 | 7 | # Created by https://www.gitignore.io/api/python 8 | 9 | ### Python ### 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | 15 | # C extensions 16 | *.so 17 | 18 | # Distribution / packaging 19 | .Python 20 | env/ 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *,cover 55 | .hypothesis/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | 65 | # Flask instance folder 66 | instance/ 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # IPython Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # Pythoscope 81 | .pythoscope/ 82 | 83 | /.dir-locals.el 84 | /README.html 85 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '2.7' 4 | - '3.4' 5 | - '3.5' 6 | install: 7 | - pip install -r test-requirements.txt 8 | - pip install . 9 | script: 10 | - python -m unittest tests 11 | - cat README.rst HISTORY.rst | rst2html.py --strict - /dev/null 12 | deploy: 13 | provider: pypi 14 | user: idigbio 15 | password: 16 | secure: e/ltrIGHu9wZXmi+8XmIaFmyiYz2540UrL129+ezzWRKRTM5FhPguvRO+6epBNGL4+NYZ+BvBiOumC2SCTGzq3ASncCkaCmjAGh8NCPXXblDOkvMGA9qls9sjoJxBCCBhmcY6/QnPOQ59CiSPLIBxru5u2nf4R7peOrzZHJ5o88= 17 | on: 18 | tags: true 19 | distributions: sdist bdist_wheel 20 | repo: iDigBio/idigbio-python-client 21 | notifications: 22 | slack: 23 | secure: XIUC86fDwnlQjT44v+BXwnB2e+Xa+bY7n9mgo+YM7SnS/xdWMId/0aIRsiQAsbDRLPfnunEu/E8kpQFvRphK3jWv5t2fHnR+HJ/tHfPSralDTCtvAJX019KY0Bkz0xLgFSjtEvX2BeEHY+QRuA8LCRvWFlY2Mnfx7fFkqbWDPNY= 24 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | .. :changelog: 2 | 3 | Release History 4 | --------------- 5 | 6 | 0.8.6 (TBD) 7 | ++++++++++++++++++ 8 | 9 | **Changes** 10 | 11 | - Drop support for Python 3.3 (following the lead of Pandas library) 12 | 13 | 0.8.5 (2018-03-16) 14 | ++++++++++++++++++ 15 | 16 | **New** 17 | 18 | - add debug command-line option 19 | 20 | 0.8.4 (2017-06-07) 21 | ++++++++++++++++++ 22 | 23 | **New** 24 | 25 | - add full-featured example script fetch_media.py to download media from iDigBio 26 | - add documentation for fetch_media 27 | 28 | **Changes** 29 | 30 | - remove fetch_media_based_on_query.py which is superceded by fetch_media.py 31 | 32 | 0.8.3.3 (2017-05-17) 33 | ++++++++++++++++++++ 34 | 35 | **New** 36 | 37 | - add an example to examples directory to download media based on search query 38 | 39 | **Changes** 40 | 41 | - minor changes to documentation, unit tests 42 | - remove hard-coded path to tmp directory 43 | 44 | 0.8.2 (2017-05-10) 45 | ++++++++++++++++++ 46 | 47 | **New** 48 | 49 | - count_recordsets() function returns number of recordsets in iDigBio 50 | 51 | 52 | 0.8.1 (2016-08-29) 53 | ++++++++++++++++++ 54 | 55 | - Send etag with file on upload to verify correctness 56 | 57 | 0.6.1 (2016-04-08) 58 | ++++++++++++++++++ 59 | 60 | **Changes** 61 | 62 | - Add media_type to upload functionality. 63 | 64 | 0.6.0 (2016-03-30) 65 | ++++++++++++++++++ 66 | 67 | **Changes** 68 | 69 | - Make pandas an extra requirements, update docs 70 | 71 | **New** 72 | 73 | - Specify auth for api backend 74 | - Upload image capability (requires auth) 75 | 76 | 77 | 78 | 0.5.0 (2016-02-24) 79 | ++++++++++++++++++ 80 | 81 | **Changes** 82 | 83 | - Don't exclude ``data.*`` fields if requested specifically 84 | - Fix ``stats`` and ``datehist`` api calls to respect parameters; 85 | param names changed to use python style and match server params. 86 | 87 | 88 | 0.4.3 (2016-02-23) 89 | ++++++++++++++++++ 90 | 91 | **Bugfixes** 92 | 93 | - no results no longer errs in the pandas client. 94 | - limit correctly limits to specified record, not next larger batch size 95 | 96 | **Miscellaneous** 97 | 98 | - Clarify targetted python versions 99 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 iDigBio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst LICENSE 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | idigbio-python-client 2 | ===================== 3 | 4 | .. image:: https://img.shields.io/pypi/v/idigbio.svg 5 | :target: https://pypi.python.org/pypi/idigbio 6 | 7 | .. image:: https://img.shields.io/travis/iDigBio/idigbio-python-client.svg 8 | :target: https://travis-ci.com/iDigBio/idigbio-python-client 9 | 10 | A python client for the `iDigBio `_ iDigBio v2 API. 11 | 12 | Installation 13 | ------------ 14 | 15 | .. code-block:: 16 | 17 | pip install idigbio 18 | 19 | If you want to use the Pandas Data Frame interface you need to install 20 | pandas as well. 21 | 22 | .. code-block:: 23 | 24 | pip install idigbio pandas 25 | 26 | If you see InsecurePlatformWarning or have other SSL certificate verification issues, you may 27 | wish to install urllib3 with the secure extra. 28 | 29 | .. code-block:: 30 | 31 | pip install urllib3[secure] 32 | 33 | 34 | Basic Usage 35 | ----------- 36 | 37 | Returning JSON from the API. 38 | 39 | .. code-block:: python 40 | 41 | import idigbio 42 | api = idigbio.json() 43 | json_output = api.search_records() 44 | 45 | Returning a Pandas Data Frame. 46 | 47 | .. code-block:: python 48 | 49 | import idigbio 50 | api = idigbio.pandas() 51 | pandas_output = api.search_records() 52 | 53 | See the `Search API docs 54 | `_ for info about 55 | the endpoint parameters. 56 | 57 | 58 | Examples 59 | ++++++++ 60 | 61 | View a Record By UUID 62 | 63 | .. code-block:: python 64 | 65 | import idigbio 66 | api = idigbio.json() 67 | record = api.view("records","1db58713-1c7f-4838-802d-be784e444c4a") 68 | 69 | Search for a Record by scientific name 70 | 71 | .. code-block:: python 72 | 73 | import idigbio 74 | api = idigbio.json() 75 | record_list = api.search_records(rq={"scientificname": "puma concolor"}) 76 | 77 | Search for Records that have images 78 | 79 | .. code-block:: python 80 | 81 | import idigbio 82 | api = idigbio.json() 83 | record_list = api.search_records(rq={"scientificname": "puma concolor", "hasImage": True}) 84 | 85 | Search for a MediaRecords by record property 86 | 87 | .. code-block:: python 88 | 89 | import idigbio 90 | api = idigbio.json() 91 | mediarecord_list = api.search_media(rq={"scientificname": "puma concolor", "hasImage": True}) 92 | 93 | Create a heat map for a genus 94 | 95 | .. code-block:: python 96 | 97 | import idigbio 98 | api = idigbio.json() 99 | m = api.create_map(rq={"genus": "acer"}, t="geohash") 100 | m.save_map_image("acer_map_geohash", 2) 101 | 102 | Create a point map for a genus 103 | 104 | .. code-block:: python 105 | 106 | import idigbio 107 | api = idigbio.json() 108 | m = api.create_map(rq={"genus": "acer"}, t="points") 109 | m.save_map_image("acer_map_points", 2) 110 | 111 | Create a zoomed in point map for a bounding box 112 | 113 | .. code-block:: python 114 | 115 | import idigbio 116 | api = idigbio.json() 117 | bbox = {"type": "geo_bounding_box", "bottom_right": {"lat": 29.642979999999998, "lon": -82.00}, "top_left": {"lat": 29.66298, "lon": -82.35315800000001}} 118 | m = api.create_map( 119 | rq={"geopoint": bbox} 120 | ) 121 | m.save_map_image("test.png", None, bbox=bbox) 122 | 123 | 124 | Create a summary of kingdom and phylum data 125 | 126 | .. code-block:: python 127 | 128 | import idigbio 129 | api = idigbio.json() 130 | summary_data = api.top_records(fields=["kingdom", "phylum"]) 131 | 132 | Get the number of Records for a search by scientific name 133 | 134 | .. code-block:: python 135 | 136 | import idigbio 137 | api = idigbio.json() 138 | count = api.count_records(rq={"scientificname": "puma concolor"}) 139 | 140 | Get the number of MediaRecords for a search by scientific name 141 | 142 | .. code-block:: python 143 | 144 | import idigbio 145 | api = idigbio.json() 146 | count = api.count_media(rq={"scientificname": "puma concolor"}) 147 | 148 | Get the histogram of Collection Dates for a search by record property, for the last 10 years 149 | 150 | .. code-block:: python 151 | 152 | import idigbio 153 | api = idigbio.json() 154 | histogram_data = api.datehist( 155 | rq={"scientificname": "puma concolor"}, 156 | top_fields=["institutioncode"], min_date="2005-01-01") 157 | 158 | Development 159 | +++++++++++ 160 | 161 | To contribute code to this project, please submit a pull request to the repo on github: 162 | 163 | https://github.com/idigbio/idigbio-python-client/ 164 | 165 | To set up a development environment, run the following from inside a python virtual environment 166 | in your local repo directory: 167 | 168 | .. code-block:: 169 | 170 | pip install -e . 171 | 172 | -------------------------------------------------------------------------------- /bld.bat: -------------------------------------------------------------------------------- 1 | "%PYTHON%" setup.py install 2 | if errorlevel 1 exit 1 3 | 4 | :: Add more build steps here, if they are necessary. 5 | 6 | :: See 7 | :: http://docs.continuum.io/conda/build.html 8 | :: for a list of environment variables that are set during the build process. 9 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | $PYTHON setup.py install 4 | 5 | # Add more build steps here, if they are necessary. 6 | 7 | # See 8 | # http://docs.continuum.io/conda/build.html 9 | # for a list of environment variables that are set during the build process. 10 | -------------------------------------------------------------------------------- /examples/fetch_media/README.md: -------------------------------------------------------------------------------- 1 | # fetch_media.py 2 | 3 | An example script that leverages the iDigBio search API to download media that match a query. 4 | 5 | ## Usage 6 | 7 | ``` 8 | $ python fetch_media.py --help 9 | usage: fetch_media.py [-h] [-m MAX] [-s {thumbnail,webview,fullsize}] 10 | [-o OUTPUT_DIR] 11 | (-q QUERY | --query-file QUERY_FILE | --records-uuids-file RECORDS_UUIDS_FILE | --mediarecords-uuids-file MEDIARECORDS_UUIDS_FILE) 12 | 13 | This script will download media that are associated with the specimens 14 | returned by an iDigBio specimen record search query. 15 | 16 | The iDigBio Query Format is documented at 17 | 18 | https://github.com/idigbio/idigbio-search-api/wiki/Query-Format 19 | 20 | Notes on the --output-dir / -o parameter: 21 | 22 | If the specified output directory does not exist, it will be created. 23 | Omitting this parameter will cause a new directory to be created 24 | under the current directory, named in a timestamp-like style. 25 | 26 | ### Sample ### 27 | 28 | $ python fetch_media.py -o /tmp/idigbio_media_downloads -m 5 -q '{"genus": "acer"}' 29 | 30 | DOWNLOADING FINISHED with 5 successes and 0 failures 31 | 32 | Media downloads are in output directory: '/tmp/idigbio_media_downloads' 33 | 34 | $ ls -l /tmp/idigbio_media_downloads 35 | total 604 36 | -rw-rw-r-- 1 dstoner dstoner 93767 Jun 6 09:19 0c9b4669-edaa-467d-b240-f3311c764c04_webview.jpg 37 | -rw-rw-r-- 1 dstoner dstoner 114132 Jun 6 09:19 1f2dbb2b-75ba-48cb-b34c-1ca003b4a38d_webview.jpg 38 | -rw-rw-r-- 1 dstoner dstoner 147900 Jun 6 09:19 56f84bfe-5095-4fbb-b9e0-08cef3fdb448_webview.jpg 39 | -rw-rw-r-- 1 dstoner dstoner 117882 Jun 6 09:19 6a0d0c92-d2be-4ae5-9fef-60453778b0f0_webview.jpg 40 | -rw-rw-r-- 1 dstoner dstoner 136202 Jun 6 09:19 b98b9704-5ac5-4b53-b74d-d2d4d7d46ddd_webview.jpg 41 | ### 42 | 43 | The media record for the first download above would be viewable in the iDigBio portal at 44 | https://www.idigbio.org/portal/mediarecords/0c9b4669-edaa-467d-b240-f3311c764c04 45 | 46 | optional arguments: 47 | -h, --help show this help message and exit 48 | -m MAX, --max MAX Maximum number of records to be returned from search 49 | query. Default: 100, Maximum allowed value: 100000 50 | -s {thumbnail,webview,fullsize}, --size {thumbnail,webview,fullsize} 51 | Size of derivative to download. Default: 'webview' 52 | -o OUTPUT_DIR, --output-dir OUTPUT_DIR 53 | Directory path for downloaded media files. Default: a 54 | new directory will be created under current directory 55 | -q QUERY, --query QUERY 56 | query in iDigBio Query Format. 57 | --query-file QUERY_FILE 58 | file path containing query string in iDigBio Query 59 | Format 60 | --records-uuids-file RECORDS_UUIDS_FILE 61 | file path containing list of iDigBio record uuids, one 62 | per line 63 | --mediarecords-uuids-file MEDIARECORDS_UUIDS_FILE 64 | file path containing list of iDigBio mediarecord 65 | uuids, one per line 66 | 67 | ``` 68 | 69 | ## Examples 70 | 71 | Some of these example queries are taken directly from the iDigBio Query Format portion of the Search API documentation: 72 | 73 | https://github.com/idigbio/idigbio-search-api/wiki/Query-Format 74 | 75 | 76 | ### Specify a query on the command-line 77 | 78 | Different operating systems display different behaviors when dealing with quotes and curly braces. 79 | 80 | On Unix-like operating systems is best to wrap the query string in single quotes to protect the contents from shell interpretation. 81 | 82 | On Windows, the query string cannot be wrapped in single quotes and double-quotes must be escaped with a backslash. 83 | 84 | In many cases it may be best to specify the query in a text file and use the --query-file option instead of -q / --query. 85 | 86 | ``` 87 | $ python fetch_media.py -q '{"scientificname":"isotelus maximus"}' 88 | 89 | Using query: 90 | 91 | {"scientificname":"isotelus maximus"} 92 | 93 | OPERATING PARAMETERS... 94 | 95 | Maximum number of media to fetch: 100 96 | Media derivative size: webview 97 | Output directory: /home/dstoner/git/idigbio-python-client/examples/fetch_media/20170607094735.99 98 | Query Type: rq 99 | 100 | EXECUTING SEARCH QUERY... 101 | 102 | 103 | Search query produced 10 results. 104 | 105 | 106 | BEGINNING DOWNLOADS NOW... 107 | 108 | Downloading: 'https://api.idigbio.org/v2/media/267e2624-641f-4e34-9fdc-df59b14a5571?size=webview' 109 | Downloading: 'https://api.idigbio.org/v2/media/597f3eba-e40a-411f-af3e-5e6bb2d77c5c?size=webview' 110 | Downloading: 'https://api.idigbio.org/v2/media/f610a25a-ea1f-4f8b-9905-68523ff9e876?size=webview' 111 | Downloading: 'https://api.idigbio.org/v2/media/9ceb0644-03ea-47cb-9b58-bbb8ffd22a5b?size=webview' 112 | Downloading: 'https://api.idigbio.org/v2/media/c4aa9d24-8284-4207-8df1-294cbd80f634?size=webview' 113 | Downloading: 'https://api.idigbio.org/v2/media/1db5c01d-a54f-4049-b4cd-ffceda60a920?size=webview' 114 | Downloading: 'https://api.idigbio.org/v2/media/3e20720a-2f1b-4891-9d00-80028a3222b4?size=webview' 115 | Downloading: 'https://api.idigbio.org/v2/media/efd6753c-f276-4836-a05a-8771bd934ee5?size=webview' 116 | Downloading: 'https://api.idigbio.org/v2/media/c1bedf6b-2ddc-418f-aa9f-a4e69ec811fc?size=webview' 117 | Downloading: 'https://api.idigbio.org/v2/media/d29d364e-72f2-407c-9c81-428e14c7a2c3?size=webview' 118 | 119 | DOWNLOADING FINISHED with 10 successes and 0 failures 120 | 121 | Media downloads are in output directory: '/home/dstoner/git/idigbio-python-client/examples/fetch_media/20170607094735.99' 122 | 123 | ``` 124 | 125 | ### Use a query specified in a file 126 | 127 | ``` 128 | $ cat query.txt 129 | { 130 | "scientificname": "Anastrepha pallens Coquillett, 1904" 131 | } 132 | 133 | 134 | $ python fetch_media.py --query-file query.txt 135 | 136 | Using query: 137 | 138 | { 139 | "scientificname": "Anastrepha pallens Coquillett, 1904" 140 | } 141 | 142 | 143 | 144 | OPERATING PARAMETERS... 145 | 146 | Maximum number of media to fetch: 100 147 | Media derivative size: webview 148 | Output directory: /home/dstoner/git/idigbio-python-client/examples/fetch_media/20170607095607.19 149 | Query Type: rq 150 | 151 | EXECUTING SEARCH QUERY... 152 | 153 | 154 | Search query produced 7 results. 155 | 156 | 157 | BEGINNING DOWNLOADS NOW... 158 | 159 | Downloading: 'https://api.idigbio.org/v2/media/5cf7837c-7535-4263-a9a9-cfcf3a45b251?size=webview' 160 | Downloading: 'https://api.idigbio.org/v2/media/cd4fa6ce-95d3-4445-8733-75a6908944d8?size=webview' 161 | Downloading: 'https://api.idigbio.org/v2/media/ba7322f1-6468-4739-be87-a98ef8eb8bfc?size=webview' 162 | Downloading: 'https://api.idigbio.org/v2/media/0d0e07fa-9e86-4b71-8abf-140d163f9c16?size=webview' 163 | Downloading: 'https://api.idigbio.org/v2/media/8a0229f9-0b58-4017-af6e-f55121c28cab?size=webview' 164 | Downloading: 'https://api.idigbio.org/v2/media/500ba0ee-2e70-46ea-b80f-9e5a29753923?size=webview' 165 | Downloading: 'https://api.idigbio.org/v2/media/fbc36e25-16db-4828-a4c9-98049f0663fc?size=webview' 166 | 167 | DOWNLOADING FINISHED with 7 successes and 0 failures 168 | 169 | Media downloads are in output directory: '/home/dstoner/git/idigbio-python-client/examples/fetch_media/20170607095607.19' 170 | 171 | ``` 172 | 173 | 174 | ### Searching within a radius around a geopoint 175 | 176 | In addition to specifying a query file, the following command limits the number of media to 5 and specifies an output directory. 177 | 178 | ``` 179 | $ python fetch_media.py -m 5 --query-file query_geo.txt -o /tmp/idigbio_media_downloads 180 | 181 | Using query: 182 | 183 | { 184 | "geopoint": { 185 | "type": "geo_distance", 186 | "distance": "100km", 187 | "lat": -41.1119, 188 | "lon": 145.323 189 | } 190 | } 191 | 192 | 193 | OPERATING PARAMETERS... 194 | 195 | Maximum number of media to fetch: 5 196 | Media derivative size: webview 197 | Output directory: /tmp/idigbio_media_downloads 198 | Query Type: rq 199 | 200 | EXECUTING SEARCH QUERY... 201 | 202 | 203 | Search query produced 588 results. 204 | 205 | *** WARNING: search query produced more results than the designated maximum number of media to fetch. 206 | *** Use the -m or --max parameter to increase the maximum number of media to fetch. 207 | 208 | BEGINNING DOWNLOADS NOW... 209 | 210 | Downloading: 'https://api.idigbio.org/v2/media/63d218ad-4788-45ef-a11d-6d5ae75e9c19?size=webview' 211 | Downloading: 'https://api.idigbio.org/v2/media/81769eba-4c23-4dd7-8d1f-40a57d0cee94?size=webview' 212 | Downloading: 'https://api.idigbio.org/v2/media/90979c0b-1807-42c8-9180-cbc95a696d0a?size=webview' 213 | Downloading: 'https://api.idigbio.org/v2/media/9d6efc2a-ffec-4866-b3fc-2f0c7d3340d1?size=webview' 214 | Downloading: 'https://api.idigbio.org/v2/media/a01348b0-bed7-447d-982d-2e946db7ac5b?size=webview' 215 | 216 | DOWNLOADING FINISHED with 5 successes and 0 failures 217 | 218 | Media downloads are in output directory: '/tmp/idigbio_media_downloads' 219 | 220 | ``` 221 | 222 | 223 | ### Specify a geo bounding box 224 | 225 | ``` 226 | $ cat query_geo_bounding.txt 227 | { 228 | "geopoint": { 229 | "type": "geo_bounding_box", 230 | "top_left": { 231 | "lat": 19.23, 232 | "lon": -130 233 | }, 234 | "bottom_right": { 235 | "lat": -45.1119, 236 | "lon": 179.99999 237 | } 238 | } 239 | } 240 | ``` 241 | 242 | In addition to specifying a query file, the following command limits the number of media to 5 and specifies an output directory. 243 | 244 | 245 | ``` 246 | $ python fetch_media.py -m 5 --query-file query_geo_bounding.txt -o /tmp/idigbio_media_downloads 247 | 248 | Using query: 249 | 250 | { 251 | "geopoint": { 252 | "type": "geo_bounding_box", 253 | "top_left": { 254 | "lat": 19.23, 255 | "lon": -130 256 | }, 257 | "bottom_right": { 258 | "lat": -45.1119, 259 | "lon": 179.99999 260 | } 261 | } 262 | } 263 | 264 | 265 | OPERATING PARAMETERS... 266 | 267 | Maximum number of media to fetch: 5 268 | Media derivative size: webview 269 | Output directory: /tmp/idigbio_media_downloads 270 | Query Type: rq 271 | 272 | EXECUTING SEARCH QUERY... 273 | 274 | 275 | Search query produced 1260449 results. 276 | 277 | *** WARNING: search query produced more results than the designated maximum number of media to fetch. 278 | *** Use the -m or --max parameter to increase the maximum number of media to fetch. 279 | 280 | BEGINNING DOWNLOADS NOW... 281 | 282 | Downloading: 'https://api.idigbio.org/v2/media/3a12b56f-70fd-4f14-aa9f-feead4aa4a9d?size=webview' 283 | Downloading: 'https://api.idigbio.org/v2/media/3b94d07c-31d9-42bb-b31c-708c20ff56f0?size=webview' 284 | Downloading: 'https://api.idigbio.org/v2/media/3bb22506-bcd9-4a56-bcd2-94d4b3cdfd46?size=webview' 285 | Downloading: 'https://api.idigbio.org/v2/media/3c1ae3e3-3df0-43cb-9864-c0b34f41e491?size=webview' 286 | Downloading: 'https://api.idigbio.org/v2/media/3c663c42-2f7e-435d-89eb-39e35961f0ed?size=webview' 287 | 288 | DOWNLOADING FINISHED with 5 successes and 0 failures 289 | 290 | Media downloads are in output directory: '/tmp/idigbio_media_downloads' 291 | 292 | ``` 293 | 294 | ### Specify a query based on a list of uuids 295 | 296 | If you have already processed a list of downloaded iDigBio records and have a list of record 297 | or mediarecord uuids, those uuids can be placed in a text file, one per line, and fetch_media 298 | can download the associated media. 299 | 300 | #### iDigBio record uuids 301 | 302 | Note that in this case the records have more than one media associated with them so we end up with more than 3 images after specifying only 3 record uuids. 303 | 304 | ``` 305 | $ cat record_uuids_list.txt 306 | a494a2a6-b64b-4f99-b26c-53bfdcd54876 307 | ddc56589-7009-4fe6-81d8-d9c9219a503f 308 | 9f7f4ba7-0def-4b01-b806-9089dcb7382c 309 | 310 | $ python fetch_media.py --records-uuids-file record_uuids_list.txt -o /tmp/idigbio_media_downloads 311 | 312 | Using query: 313 | 314 | {"uuid":["a494a2a6-b64b-4f99-b26c-53bfdcd54876", "ddc56589-7009-4fe6-81d8-d9c9219a503f", "9f7f4ba7-0def-4b01-b806-9089dcb7382c"]} 315 | 316 | OPERATING PARAMETERS... 317 | 318 | Maximum number of media to fetch: 100 319 | Media derivative size: webview 320 | Output directory: /tmp/idigbio_media_downloads 321 | Query Type: rq 322 | 323 | EXECUTING SEARCH QUERY... 324 | 325 | 326 | Search query produced 6 results. 327 | 328 | 329 | BEGINNING DOWNLOADS NOW... 330 | 331 | Downloading: 'https://api.idigbio.org/v2/media/24be5c10-9b1d-418f-85d4-f13b52e9644e?size=webview' 332 | Downloading: 'https://api.idigbio.org/v2/media/6976b7a3-1547-49a7-8601-febfb90d5e44?size=webview' 333 | Downloading: 'https://api.idigbio.org/v2/media/8fc71122-9fc9-4d5c-8bb2-17a315847f9c?size=webview' 334 | Downloading: 'https://api.idigbio.org/v2/media/88e21956-702d-4e1a-ba71-0d695159b9a9?size=webview' 335 | Downloading: 'https://api.idigbio.org/v2/media/b7cf0d3b-be0f-4d47-9361-0ef0521df28f?size=webview' 336 | Downloading: 'https://api.idigbio.org/v2/media/fbc3237a-1816-4cee-8025-c364d37280d4?size=webview' 337 | 338 | DOWNLOADING FINISHED with 6 successes and 0 failures 339 | 340 | Media downloads are in output directory: '/tmp/idigbio_media_downloads' 341 | 342 | ``` 343 | 344 | #### iDigBio mediarecord uuids 345 | 346 | ``` 347 | $ python fetch_media.py --mediarecords-uuids-file mediarecord_uuids_list.txt -o /tmp/idigbio_media_downloads 348 | 349 | Using query: 350 | 351 | {"uuid":["787d60f7-3fb7-4b82-8846-b5b4123761c1", "9c84908f-170f-44eb-ad6d-6d3fec5032a6", "845f80e8-02d7-49dd-aef7-fc58cec36c89"]} 352 | 353 | OPERATING PARAMETERS... 354 | 355 | Maximum number of media to fetch: 100 356 | Media derivative size: webview 357 | Output directory: /tmp/idigbio_media_downloads 358 | Query Type: mq 359 | 360 | EXECUTING SEARCH QUERY... 361 | 362 | 363 | Search query produced 3 results. 364 | 365 | 366 | BEGINNING DOWNLOADS NOW... 367 | 368 | Downloading: 'https://api.idigbio.org/v2/media/845f80e8-02d7-49dd-aef7-fc58cec36c89?size=webview' 369 | Downloading: 'https://api.idigbio.org/v2/media/9c84908f-170f-44eb-ad6d-6d3fec5032a6?size=webview' 370 | Downloading: 'https://api.idigbio.org/v2/media/787d60f7-3fb7-4b82-8846-b5b4123761c1?size=webview' 371 | 372 | DOWNLOADING FINISHED with 3 successes and 0 failures 373 | 374 | Media downloads are in output directory: '/tmp/idigbio_media_downloads' 375 | 376 | 377 | ``` -------------------------------------------------------------------------------- /examples/fetch_media/fetch_media.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | try: 3 | from idigbio.json_client import iDbApiJson 4 | import requests 5 | import shutil 6 | import os 7 | import sys 8 | import time 9 | import argparse 10 | import json 11 | except ImportError as e: 12 | print ("IMPORT ERROR (This exception is likely caused by a missing module): '{0}'".format(e)) 13 | raise SystemExit 14 | 15 | help_blob = """ 16 | 17 | This script will download media that are associated with the specimens 18 | returned by an iDigBio specimen record search query. 19 | 20 | The iDigBio Query Format is documented at 21 | 22 | https://github.com/idigbio/idigbio-search-api/wiki/Query-Format 23 | 24 | Notes on the --output-dir / -o parameter: 25 | 26 | If the specified output directory does not exist, it will be created. 27 | Omitting this parameter will cause a new directory to be created 28 | under the current directory, named in a timestamp-like style. 29 | 30 | ### Sample ### 31 | 32 | $ python fetch_media.py -o /tmp/idigbio_media_downloads -m 5 -q '{"genus": "acer"}' 33 | 34 | DOWNLOADING FINISHED with 5 successes and 0 failures 35 | 36 | Media downloads are in output directory: '/tmp/idigbio_media_downloads' 37 | 38 | $ ls -l /tmp/idigbio_media_downloads 39 | total 604 40 | -rw-rw-r-- 1 dstoner dstoner 93767 Jun 6 09:19 0c9b4669-edaa-467d-b240-f3311c764c04_webview.jpg 41 | -rw-rw-r-- 1 dstoner dstoner 114132 Jun 6 09:19 1f2dbb2b-75ba-48cb-b34c-1ca003b4a38d_webview.jpg 42 | -rw-rw-r-- 1 dstoner dstoner 147900 Jun 6 09:19 56f84bfe-5095-4fbb-b9e0-08cef3fdb448_webview.jpg 43 | -rw-rw-r-- 1 dstoner dstoner 117882 Jun 6 09:19 6a0d0c92-d2be-4ae5-9fef-60453778b0f0_webview.jpg 44 | -rw-rw-r-- 1 dstoner dstoner 136202 Jun 6 09:19 b98b9704-5ac5-4b53-b74d-d2d4d7d46ddd_webview.jpg 45 | ### 46 | 47 | The media record for the first download above would be viewable in the iDigBio portal at 48 | https://www.idigbio.org/portal/mediarecords/0c9b4669-edaa-467d-b240-f3311c764c04 49 | 50 | """ 51 | 52 | # MAX_MAX_COUNT is a safety limit to keep an erroneous query from downloading all of iDigBio's media. 53 | # Change this value if you are legitimately trying to download more than 100k media. 54 | # Also, please consider letting us know that you are doing this because we are interested 55 | # in these kinds of use cases. idigbio@acis.ufl.edu 56 | MAX_MAX_COUNT = 100000 57 | 58 | DEFAULT_MAX_COUNT = 100 59 | SIZES = ["thumbnail", "webview", "fullsize"] 60 | DEFAULT_SIZE = "webview" 61 | DEFAULT_OUTPUT_DIR = None 62 | 63 | argparser = argparse.ArgumentParser(description=help_blob, formatter_class=argparse.RawDescriptionHelpFormatter) 64 | argparser.add_argument("-m", "--max", type=int, default=DEFAULT_MAX_COUNT, 65 | help="Maximum number of records to be returned from search query. Default: {0}, Maximum allowed value: {1}".format(DEFAULT_MAX_COUNT,MAX_MAX_COUNT)) 66 | argparser.add_argument("-s", "--size", choices=SIZES, default=DEFAULT_SIZE, 67 | help="Size of derivative to download. Default: '{0}'".format(DEFAULT_SIZE)) 68 | argparser.add_argument("-o", "--output-dir", default=DEFAULT_OUTPUT_DIR, 69 | help="Directory path for downloaded media files. Default: a new directory will be created under current directory") 70 | argparser.add_argument("-d", "--debug", default=False, action='store_true', 71 | help="enable debugging output") 72 | arg_group = argparser.add_mutually_exclusive_group(required=True) 73 | arg_group.add_argument("-q", "--query", 74 | help="query in iDigBio Query Format.") 75 | arg_group.add_argument("--query-file", 76 | help="file path containing query string in iDigBio Query Format") 77 | arg_group.add_argument("--records-uuids-file", 78 | help="file path containing list of iDigBio record uuids, one per line") 79 | arg_group.add_argument("--mediarecords-uuids-file", 80 | help="file path containing list of iDigBio mediarecord uuids, one per line") 81 | args = argparser.parse_args() 82 | 83 | MAX_RESULTS = max(0,(min(args.max, MAX_MAX_COUNT))) 84 | SIZE = args.size 85 | 86 | output_directory = args.output_dir 87 | 88 | QUERY_TYPE = 'rq' 89 | 90 | debug_flag = args.debug 91 | if debug_flag: 92 | print () 93 | print ("** DEBUGGING ENABLED **") 94 | print () 95 | print () 96 | modulenames = set(sys.modules)&set(globals()) 97 | allmodules = [sys.modules[name] for name in modulenames] 98 | print ("Loaded modules...") 99 | for each_mod in allmodules: 100 | print (each_mod) 101 | print () 102 | 103 | def read_query_file(query_filename): 104 | if os.path.isfile(query_filename): 105 | with open(query_filename, 'r') as queryfile: 106 | q = queryfile.read() 107 | return q 108 | else: 109 | print ("*** Error: query file could not be read or does not exist.") 110 | raise SystemExit 111 | 112 | def get_query_from_uuids_list_file(uuids_file): 113 | uuids_from_file = [] 114 | with open(uuids_file) as uf: 115 | for line in uf: 116 | uuids_from_file.append(line.strip()) 117 | 118 | q = '{"uuid":' 119 | q += json.dumps(uuids_from_file) 120 | q += '}' 121 | return q 122 | 123 | query = None 124 | 125 | if args.query: 126 | # use the query as supplied on the command line 127 | query = args.query 128 | if args.query_file: 129 | # use the query as supplied in a file 130 | query = read_query_file(args.query_file) 131 | if args.records_uuids_file: 132 | # generate a query from a list of record uuids 133 | query = get_query_from_uuids_list_file(args.records_uuids_file) 134 | if args.mediarecords_uuids_file: 135 | # generate a query from a list of mediarecord uuids 136 | query = get_query_from_uuids_list_file(args.mediarecords_uuids_file) 137 | QUERY_TYPE = 'mq' 138 | 139 | # Verify that the provided query string is valid JSON 140 | if query is None: 141 | print ("*** ERROR! Query source is empty or unusable.") 142 | else: 143 | try: 144 | query_json = json.loads(query) 145 | except Exception as e: 146 | print ('*** FATAL ERROR parsing query string:') 147 | print (e) 148 | print ('*** Supplied query string:') 149 | print (query) 150 | raise SystemExit 151 | 152 | 153 | # The following should work whether one has specified an existing directory name, created a new directory by name, 154 | # or left the output_directory unspecified. 155 | if output_directory is None: 156 | now_ms = str(time.time()) 157 | output_directory = time.strftime("%Y%m%d%H%M%S") + "." + str(time.time()).rsplit('.')[ len(now_ms.rsplit('.')) - 1] 158 | try: 159 | os.makedirs(output_directory) 160 | except: 161 | print ("*** ERROR! Could not create directroy for output: '{0}'".format(os.path.abspath(output_directory))) 162 | raise SystemExit 163 | else: 164 | if not os.path.exists(output_directory): 165 | try: 166 | os.makedirs(output_directory) 167 | except: 168 | print ("*** ERROR! Could not create directroy for output: '{0}'".format(os.path.abspath(output_directory))) 169 | raise SystemExit 170 | 171 | 172 | 173 | def get_media_with_naming (output_dir, media_url, uuid, size): 174 | """ 175 | Download a media file to a directory and name it based on the input parameters. 176 | 177 | 'output_dir' controls where the download is placed. 178 | 179 | 'media_url' is the url / link to the media that will be downloaded. 180 | 181 | 'uuid' is used to uniquely identify the output filename. 182 | 183 | 'SIZE' is the class of image derivative, useful in the output filename. 184 | 185 | """ 186 | try: 187 | response = requests.get(media_url, stream=True) 188 | response.raise_for_status() 189 | except (requests.exceptions.HTTPError, requests.exceptions.ConnectionError) as e: 190 | print('*** HTTP ERROR: {0}'.format(e)) 191 | return False 192 | 193 | ### iDigBio returns 200 OK and displays an SVG status image when a derivative 194 | ### is not present. Check for "Content-Type: image/svg+xml" header to notice this condition. 195 | if response.headers['Content-Type'] == 'image/svg+xml': 196 | print("*** WARNING - No media at '{0}'".format(media_url)) 197 | return False 198 | 199 | # Output filenames will be of the form: {mediarecord_uuid}_{SIZE}.jpg 200 | local_filepath = os.path.join(output_dir, uuid + '_' + SIZE + '.jpg') 201 | 202 | try: 203 | with open(local_filepath, 'wb') as out_file: 204 | shutil.copyfileobj(response.raw, out_file) 205 | return True 206 | except: 207 | return False 208 | 209 | 210 | if __name__ == '__main__': 211 | 212 | api = iDbApiJson() 213 | 214 | print () 215 | print ("Using query:") 216 | print () 217 | print (query) 218 | print () 219 | print ("OPERATING PARAMETERS...") 220 | print () 221 | print ("Maximum number of media to fetch: {:d}".format(MAX_RESULTS)) 222 | print ("Media derivative size: {0}".format(SIZE)) 223 | print ("Output directory: {0}".format(os.path.abspath(output_directory))) 224 | print ("Query Type: {0}".format(QUERY_TYPE)) 225 | 226 | print () 227 | print ("EXECUTING SEARCH QUERY...") 228 | print () 229 | if QUERY_TYPE == 'mq': 230 | results = api.search_media(mq=query, limit=MAX_RESULTS) 231 | else: 232 | results = api.search_media(rq=query, limit=MAX_RESULTS) 233 | print () 234 | if debug_flag: 235 | print ("Results JSON:") 236 | print (json.dumps(results)) 237 | print () 238 | print ("Search query produced {:d} results.".format(results['itemCount'])) 239 | print () 240 | if results['itemCount'] == 0 or MAX_RESULTS == 0: 241 | print ("Nothing to download. Exiting.") 242 | raise SystemExit 243 | if results['itemCount'] > MAX_RESULTS: 244 | print ("*** WARNING: search query produced more results than the designated maximum number of media to fetch.") 245 | print ("*** Use the -m or --max parameter to increase the maximum number of media to fetch.") 246 | print () 247 | print("BEGINNING DOWNLOADS NOW...") 248 | print () 249 | 250 | successes = 0 251 | failures = 0 252 | 253 | for each in results['items']: 254 | media_record_uuid = each['indexTerms']['uuid'] 255 | media_url = 'https://api.idigbio.org/v2/media/' + media_record_uuid + '?size=' + SIZE 256 | print ("Downloading: '{0}'".format(media_url)) 257 | if get_media_with_naming(output_directory, media_url, media_record_uuid, SIZE): 258 | successes += 1 259 | else: 260 | failures += 1 261 | 262 | print () 263 | print ("DOWNLOADING FINISHED with {0:d} successes and {1:d} failures".format(successes, failures)) 264 | print () 265 | print ("Media downloads are in output directory: '{0}'".format(os.path.abspath(output_directory))) 266 | -------------------------------------------------------------------------------- /examples/fetch_media/mediarecord_uuids_list.txt: -------------------------------------------------------------------------------- 1 | 787d60f7-3fb7-4b82-8846-b5b4123761c1 2 | 9c84908f-170f-44eb-ad6d-6d3fec5032a6 3 | 845f80e8-02d7-49dd-aef7-fc58cec36c89 4 | -------------------------------------------------------------------------------- /examples/fetch_media/query.txt: -------------------------------------------------------------------------------- 1 | { 2 | "scientificname": "Anastrepha pallens Coquillett, 1904" 3 | } 4 | 5 | -------------------------------------------------------------------------------- /examples/fetch_media/query_geo.txt: -------------------------------------------------------------------------------- 1 | { 2 | "geopoint": { 3 | "type": "geo_distance", 4 | "distance": "100km", 5 | "lat": -41.1119, 6 | "lon": 145.323 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /examples/fetch_media/query_geo_bounding.txt: -------------------------------------------------------------------------------- 1 | { 2 | "geopoint": { 3 | "type": "geo_bounding_box", 4 | "top_left": { 5 | "lat": 19.23, 6 | "lon": -130 7 | }, 8 | "bottom_right": { 9 | "lat": -45.1119, 10 | "lon": 179.99999 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /examples/fetch_media/record_uuids_list.txt: -------------------------------------------------------------------------------- 1 | a494a2a6-b64b-4f99-b26c-53bfdcd54876 2 | ddc56589-7009-4fe6-81d8-d9c9219a503f 3 | 9f7f4ba7-0def-4b01-b806-9089dcb7382c 4 | -------------------------------------------------------------------------------- /examples/recordset_indexed_modified_date_and_archive_link_status.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | try: 3 | from idigbio.json_client import iDbApiJson 4 | import requests 5 | import argparse 6 | except ImportError as e: 7 | print ("IMPORT ERROR (This exception is likely caused by a missing module): '{0}'".format(e)) 8 | raise SystemExit 9 | 10 | help_blob = """ 11 | 12 | This script will print information about recordsets and their indexed pubdate, contacts. 13 | 14 | Input list of recordset uuids is specified by putting them in a file and using --uuids-file option. 15 | 16 | """ 17 | 18 | argparser = argparse.ArgumentParser(description=help_blob, formatter_class=argparse.RawDescriptionHelpFormatter) 19 | argparser.add_argument("-d", "--debug", default=False, action='store_true', 20 | help="enable debugging output") 21 | arg_group = argparser.add_mutually_exclusive_group(required=True) 22 | arg_group.add_argument("-u", "--uuid", 23 | help="single iDigBio recordset uuid to query") 24 | arg_group.add_argument("--uuids-file", 25 | help="file path containing list of iDigBio recordset uuids to query, one per line") 26 | args = argparser.parse_args() 27 | 28 | QUERY_TYPE = 'rq' 29 | 30 | debug_flag = args.debug 31 | if debug_flag: 32 | print () 33 | print ("** DEBUGGING ENABLED **") 34 | print () 35 | print () 36 | modulenames = set(sys.modules)&set(globals()) 37 | allmodules = [sys.modules[name] for name in modulenames] 38 | print ("Loaded modules...") 39 | for each_mod in allmodules: 40 | print (each_mod) 41 | print () 42 | 43 | def get_list_of_uuids_from_file(uuids_file): 44 | uuids_from_file = [] 45 | with open(uuids_file) as uf: 46 | for line in uf: 47 | uuids_from_file.append(line.strip()) 48 | return uuids_from_file 49 | 50 | 51 | def check_archive_status(url): 52 | try: 53 | r = requests.head(url, timeout=5) 54 | r.raise_for_status() 55 | return r.reason 56 | except: 57 | return "NO_ARCHIVE_AVAILABLE" 58 | 59 | 60 | def print_recordset_view_data(api,uuid): 61 | recordset_item_from_api = api.view("recordsets", uuid) 62 | the_important_data = [ 63 | recordset_item_from_api["uuid"], 64 | recordset_item_from_api["indexTerms"]["indexData"]["update"], 65 | recordset_item_from_api["indexTerms"]["name"], 66 | recordset_item_from_api["indexTerms"]["indexData"]["link"], 67 | # Does not yet include contacts or other data. 68 | # recordset_item_from_api[""], 69 | check_archive_status(recordset_item_from_api["indexTerms"]["indexData"]["link"]) 70 | ] 71 | line = "\t".join(the_important_data) 72 | print(line.encode("utf-8")) 73 | 74 | 75 | if args.uuids_file: 76 | uuid_list = get_list_of_uuids_from_file(args.uuids_file) 77 | else: 78 | uuid_list = [args.uuid] 79 | 80 | if __name__ == '__main__': 81 | 82 | api = iDbApiJson() 83 | 84 | print () 85 | for each in uuid_list: 86 | print_recordset_view_data(api,each) 87 | 88 | 89 | print () 90 | print ("***END***") 91 | -------------------------------------------------------------------------------- /idigbio/__init__.py: -------------------------------------------------------------------------------- 1 | """Python library to talk to the iDigBio search api 2 | 3 | This has two main client interfaces: 4 | 5 | * ``idigbio.json``: basic access that talks JSON to the server and 6 | returns python dictionaries 7 | 8 | * ``idigbio.pandas``: uses the json library underneath but returns 9 | pandas dataframes for more advanced data processing 10 | 11 | 12 | Both interfaces take parameters: 13 | 14 | * ``env``: Which environment to use {beta, prod}; defaults to prod 15 | * ``user``: api username; not necesary for searching 16 | * ``password``: api password; not necessary for searching 17 | 18 | 19 | """ 20 | from __future__ import absolute_import 21 | 22 | import logging 23 | 24 | __version__ = '0.8.5' 25 | 26 | 27 | def json(*args, **kwargs): 28 | from .json_client import iDbApiJson 29 | return iDbApiJson(*args, **kwargs) 30 | 31 | 32 | def pandas(*args, **kwargs): 33 | from .pandas_client import iDbApiPandas 34 | return iDbApiPandas(*args, **kwargs) 35 | 36 | __all__ = ['json', 'pandas'] 37 | 38 | logging.getLogger(__name__).addHandler(logging.NullHandler()) 39 | -------------------------------------------------------------------------------- /idigbio/json_client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import math 3 | import requests 4 | import json 5 | import traceback 6 | 7 | from idigbio import util 8 | 9 | try: 10 | # Python 2 11 | from urllib import urlencode 12 | except: 13 | # Python 3 14 | from urllib.parse import urlencode 15 | 16 | global_disable_images = False 17 | try: 18 | import PIL.Image as Image 19 | except: 20 | global_disable_images = True 21 | 22 | try: 23 | # Python 2 C 24 | from cStringIO import StringIO as io_ify 25 | except: 26 | try: 27 | # Python 2 native 28 | from StringIO import StringIO as io_ify 29 | except: 30 | # Python 3 31 | from io import BytesIO as io_ify 32 | 33 | log = logging.getLogger(__name__) 34 | 35 | 36 | FIELDS_EXCLUDE_DEFAULT = ['data.*'] 37 | 38 | 39 | def level_dic(): 40 | ''' 41 | http://wiki.openstreetmap.org/wiki/Zoom_levels 42 | ''' 43 | # return data 44 | data = {0: 360.0, 45 | 1: 180.0, 46 | 2: 90.0, 47 | 3: 45.0, 48 | 4: 22.5, 49 | 5: 11.25, 50 | 6: 5.625, 51 | 7: 2.813, 52 | 8: 1.406, 53 | 9: 0.703, 54 | 10: 0.352, 55 | 11: 0.176, 56 | 12: 0.088, 57 | 13: 0.044, 58 | 14: 0.022, 59 | 15: 0.011, 60 | 16: 0.005, 61 | 17: 0.003, 62 | 18: 0.001, 63 | 19: 0.0005} 64 | return data 65 | 66 | 67 | def getzoom(min_lon, max_lon, min_lat, max_lat): 68 | data = level_dic() # our presets 69 | r = 4 70 | dne = max(round(max_lat - min_lat, r), 71 | round(max_lon - min_lon, r)) # ne: North East point 72 | mylist = [round(i, r) for i in data.values()] + [dne] 73 | new = sorted(mylist, reverse=True) 74 | return new.index(dne) 75 | 76 | 77 | def deg2num(lat_deg, lon_deg, zoom): 78 | lat_rad = math.radians(lat_deg) 79 | n = 2.0 ** zoom 80 | xtile = int((lon_deg + 180.0) / 360.0 * n) 81 | ytile = int((1.0 - math.log(math.tan(lat_rad) + (1 / math.cos(lat_rad))) / math.pi) / 2.0 * n) 82 | return (xtile, ytile) 83 | 84 | 85 | class BadEnvException(Exception): 86 | pass 87 | 88 | 89 | class MapNotCreatedException(Exception): 90 | pass 91 | 92 | 93 | class ImagesDisabledException(Exception): 94 | pass 95 | 96 | 97 | def make_session(user=None, password=None): 98 | import idigbio 99 | s = requests.Session() 100 | if user and password: 101 | s.auth = (user, password) 102 | s.headers["User-Agent"] = "idigbio-python-client/" + idigbio.__version__ 103 | return s 104 | 105 | 106 | class iDigBioMap(object): 107 | def __init__(self, api, rq={}, style=None, t="auto", disable_images=False): 108 | self.__api = api 109 | self._disable_images = disable_images or global_disable_images 110 | self._map_def = self.__api._api_post( 111 | "/v2/mapping", rq=rq, style=style, type=t) 112 | if self._map_def is None: 113 | raise MapNotCreatedException() 114 | self._short_code = self._map_def["shortCode"] 115 | self._tiles = self._map_def["tiles"] 116 | 117 | def definition(self): 118 | return self.__api._api_get("/v2/mapping/{0}".format(self._short_code)) 119 | 120 | def json_tile(self, z, x, y): 121 | return self.__api._api_get( 122 | "/v2/mapping/{0}/{1}/{2}/{3}.json".format( 123 | self._short_code, z, x, y)) 124 | 125 | def utf8grid_tile(self, z, x, y): 126 | return self.__api._api_get( 127 | "/v2/mapping/{0}/{1}/{2}/{3}.grid.json".format( 128 | self._short_code, z, x, y)) 129 | 130 | def png_tile(self, z, x, y): 131 | if self._disable_images: 132 | raise ImagesDisabledException() 133 | tile = self.__api._api_get( 134 | "/v2/mapping/{0}/{1}/{2}/{3}.png".format( 135 | self._short_code, z, x, y), raw=True) 136 | if tile is None: 137 | return None 138 | else: 139 | return Image.open(io_ify(tile)) 140 | 141 | def points(self, lat, lon, zoom, sort=None, limit=100, offset=None): 142 | return self.__api._api_get( 143 | "/v2/mapping/{0}/points".format(self._short_code), 144 | lat=lat, lon=lon, zoom=zoom, sort=sort, limit=limit, offset=offset) 145 | 146 | def save_map_image(self, filename, zoom, bbox=None): 147 | x_tiles = None 148 | y_tiles = None 149 | 150 | if zoom is None and bbox is not None: 151 | zoom = getzoom( 152 | bbox["bottom_right"]["lat"], 153 | bbox["top_left"]["lat"], 154 | bbox["top_left"]["lon"], 155 | bbox["bottom_right"]["lon"] 156 | ) 157 | 158 | if bbox is not None: 159 | top_left_tile = deg2num( 160 | bbox["top_left"]["lat"], 161 | bbox["top_left"]["lon"], 162 | zoom 163 | ) 164 | 165 | bottom_right_tile = deg2num( 166 | bbox["bottom_right"]["lat"], 167 | bbox["bottom_right"]["lon"], 168 | zoom 169 | ) 170 | 171 | x_tiles = range(top_left_tile[0], bottom_right_tile[0]+1) 172 | y_tiles = range(top_left_tile[1], bottom_right_tile[1]+1) 173 | 174 | if x_tiles is None: 175 | x_tiles = range(0, 2**zoom) 176 | if y_tiles is None: 177 | y_tiles = range(0, 2**zoom) 178 | 179 | s = make_session() 180 | if self._disable_images: 181 | raise ImagesDisabledException() 182 | im = Image.new("RGB", (len(x_tiles) * 256, len(y_tiles) * 256)) 183 | x_tile_count = 0 184 | for x in x_tiles: 185 | y_tile_count = 0 186 | for y in y_tiles: 187 | r = s.get( 188 | "http://b.tile.openstreetmap.org/{z}/{x}/{y}.png".format( 189 | z=zoom, x=x, y=y)) 190 | r.raise_for_status() 191 | bim = Image.open(io_ify(r.content)) 192 | tim = self.png_tile(zoom, x, y) 193 | im.paste(bim, (x_tile_count * 256, y_tile_count * 256)) 194 | im.paste(tim, (x_tile_count * 256, y_tile_count * 256), tim) 195 | y_tile_count += 1 196 | x_tile_count += 1 197 | im.save("{0}.png".format(filename), "PNG") 198 | s.close() 199 | 200 | 201 | class iDbApiJson(object): 202 | """ iDigBio Search API Json Client """ 203 | 204 | def __init__(self, env="prod", retries=3, user=None, password=None): 205 | """ 206 | env: Which environment to use. Defaults to prod." 207 | """ 208 | self.retries = retries 209 | 210 | if env == "prod": 211 | self._api_urls = { 212 | "base": "https://search.idigbio.org", 213 | "/v2/media": "https://api.idigbio.org", 214 | "/v2/download": "https://api.idigbio.org" 215 | } 216 | elif env == "beta": 217 | self._api_urls = { 218 | "base": "https://beta-search.idigbio.org", 219 | "/v2/media": "https://beta-api.idigbio.org", 220 | "/v2/download": "https://beta-api.idigbio.org" 221 | } 222 | elif env == "dev": 223 | self._api_urls = { 224 | "base": "https://localhost:19196", 225 | "/v2/media": "http://localhost:19197", 226 | "/v2/download": "http://localhost:19197" 227 | } 228 | else: 229 | raise BadEnvException() 230 | 231 | self.s = make_session(user=user, password=password) 232 | 233 | def __del__(self): 234 | self.s.close() 235 | 236 | def _api_get(self, slug, **kwargs): 237 | retries = self.retries 238 | raw = kwargs.pop('raw', False) 239 | 240 | api_url = self._api_urls.get(slug, self._api_urls.get("base")) 241 | 242 | for arg in list(kwargs): 243 | if isinstance(kwargs[arg], (dict, list)): 244 | kwargs[arg] = json.dumps(kwargs[arg]) 245 | elif kwargs[arg] is None: 246 | del kwargs[arg] 247 | qs = urlencode(kwargs) 248 | while retries > 0: 249 | try: 250 | log.debug("Querying: %r", api_url + slug + "?" + qs) 251 | r = self.s.get(api_url + slug + "?" + qs) 252 | r.raise_for_status() 253 | if raw: 254 | return r.content 255 | else: 256 | return r.json() 257 | except: 258 | log.debug(traceback.print_exc()) 259 | retries -= 1 260 | return None 261 | 262 | def _api_post(self, slug, **kwargs): 263 | retries = self.retries 264 | raw = kwargs.pop('raw', False) 265 | files = kwargs.pop('files', None) 266 | params = kwargs.pop('params', None) 267 | 268 | api_url = self._api_urls.get(slug, self._api_urls.get("base")) 269 | 270 | for arg in list(kwargs): 271 | if kwargs[arg] is None: 272 | del kwargs[arg] 273 | 274 | while retries > 0: 275 | try: 276 | body = json.dumps(kwargs) 277 | if files is None: 278 | log.debug("POSTing: %r\n%s", slug, body) 279 | r = self.s.post( 280 | api_url + slug, 281 | data=json.dumps(kwargs), 282 | params=params, 283 | headers={"Content-Type": "application/json"} 284 | ) 285 | else: 286 | # you must seek the file before sending, 287 | # especially on the retry loop 288 | for k in files: 289 | files[k].seek(0) 290 | log.debug("POSTing + Files: %r\n%s", slug, body) 291 | r = self.s.post( 292 | api_url + slug, 293 | data=kwargs, 294 | files=files, 295 | params=params 296 | ) 297 | 298 | r.raise_for_status() 299 | if raw: 300 | return r.content 301 | else: 302 | return r.json() 303 | except: 304 | log.exception("Error posting: %r %r", slug, params) 305 | retries -= 1 306 | return None 307 | 308 | def view(self, t, uuid): 309 | """ 310 | t: the type to view. Supported types: records, media (mediarecords), recordsets, publishers 311 | uuid: the uuid to view. 312 | """ 313 | return self._api_get("/v2/view/{0}/{1}".format(t, uuid)) 314 | 315 | def search_records(self, rq={}, limit=100, offset=0, sort=None, 316 | fields=None, fields_exclude=FIELDS_EXCLUDE_DEFAULT): 317 | """ 318 | rq Search Query in iDigBio Query Format, using Record Query Fields 319 | sort field to sort on, pick from Record Query Fields 320 | fields a list of fields to return, specified using the fieldName parameter from Fields with type records 321 | fields_exclude a list of fields to exclude, specified using the fieldName parameter from Fields with type records 322 | limit max results 323 | offset skip results 324 | 325 | Returns idigbio record format (legacy api), plus additional top level keys with parsed index terms. Returns None on error. 326 | """ 327 | if fields is not None and fields_exclude is FIELDS_EXCLUDE_DEFAULT: 328 | fields_exclude = None 329 | 330 | return self._api_post("/v2/search/records", 331 | rq=rq, limit=limit, offset=offset, sort=sort, 332 | fields=fields, fields_exclude=fields_exclude) 333 | 334 | def search_media(self, mq={}, rq={}, limit=100, offset=0, sort=None, 335 | fields=None, fields_exclude=FIELDS_EXCLUDE_DEFAULT): 336 | """ 337 | mq Search Query in iDigBio Query Format, using Media Query Fields 338 | rq Search Query in iDigBio Query Format, using Record Query Fields 339 | sort field to sort on, pick from Media Query Fields 340 | fields a list of fields to return, specified using the fieldName parameter from Fields with type mediarecords 341 | fields_exclude a list of fields to exclude, specified using the fieldName parameter from Fields with type records 342 | limit max results 343 | offset skip results 344 | 345 | Returns idigbio record format (legacy api), plus additional top level keys with parsed index terms. Returns None on error. 346 | """ 347 | if fields is not None and fields_exclude is FIELDS_EXCLUDE_DEFAULT: 348 | fields_exclude = None 349 | 350 | return self._api_post("/v2/search/media", 351 | rq=rq, mq=mq, limit=limit, offset=offset, sort=sort, 352 | fields=fields, fields_exclude=fields_exclude) 353 | 354 | def create_map(self, rq={}, style=None, t="auto", disable_images=False): 355 | return iDigBioMap( 356 | self, rq=rq, style=style, t=t, disable_images=disable_images) 357 | 358 | def top_records(self, rq={}, top_fields=None, count=None): 359 | return self._api_post("/v2/summary/top/records", 360 | rq=rq, top_fields=top_fields, count=count) 361 | 362 | def top_media(self, mq={}, rq={}, top_fields=None, count=None): 363 | return self._api_post("/v2/summary/top/media", mq=mq, rq=rq, 364 | top_fields=top_fields, count=count) 365 | 366 | def count_records(self, rq={}): 367 | r = self._api_post("/v2/summary/count/records", rq=rq) 368 | if r is not None: 369 | return r["itemCount"] 370 | else: 371 | return None 372 | 373 | def count_media(self, mq={}, rq={}): 374 | r = self._api_post("/v2/summary/count/media", mq=mq, rq=rq) 375 | if r is not None: 376 | return r["itemCount"] 377 | else: 378 | return None 379 | 380 | def count_recordsets(self, rsq={"data.ingest":True}): 381 | r = self._api_post("/v2/summary/count/recordsets", rsq=rsq) 382 | if r is not None: 383 | return r["itemCount"] 384 | else: 385 | return None 386 | 387 | def datehist(self, rq={}, top_fields=None, count=None, date_field=None, 388 | min_date=None, max_date=None, date_interval=None): 389 | return self._api_post( 390 | "/v2/summary/datehist", 391 | rq=rq, top_fields=top_fields, count=count, date_field=date_field, 392 | min_date=min_date, max_date=max_date, date_interval=date_interval) 393 | 394 | def stats(self, t, recordset=None, min_date=None, max_date=None, 395 | date_interval=None): 396 | return self._api_post("/v2/summary/stats/{0}".format(t), 397 | recordset=recordset, 398 | min_date=min_date, max_date=max_date, 399 | date_interval=date_interval) 400 | 401 | def upload(self, filereference, localfile, media_type=None, etag=None): 402 | if not self.s.auth: 403 | raise Exception("Unauthorized") 404 | if not localfile: 405 | raise ValueError("Must have local copy of file to upload") 406 | fd = open(localfile, 'rb') 407 | if etag is None: 408 | etag = util.calcFileHash(fd, op=False) 409 | log.debug("Calculate etag for %r as %s", localfile, etag) 410 | files = {'file': fd} 411 | p = { 412 | "filereference": filereference, 413 | "media_type": media_type, 414 | "etag": etag 415 | } 416 | return self._api_post("/v2/media", files=files, params=p) 417 | 418 | def addreference(self, filereference, localfile): 419 | if not self.s.auth: 420 | raise Exception("Unauthorized") 421 | if not localfile: 422 | raise ValueError("Must have local copy of file to upload") 423 | etag = util.calcFileHash(localfile) 424 | p = {'filereference': filereference, 425 | 'etag': etag} 426 | return self._api_post("/v2/media", params=p) 427 | 428 | def addurl(self, filereference, media_type=None, mime_type=None): 429 | if not self.s.auth: 430 | raise Exception("Unauthorized") 431 | p = { 432 | "filereference": filereference, 433 | "media_type": media_type, 434 | "mime": mime_type 435 | } 436 | return self._api_post("/v2/media", **p) 437 | -------------------------------------------------------------------------------- /idigbio/pandas_client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import pandas 3 | from .json_client import iDbApiJson 4 | from itertools import chain 5 | try: 6 | from future_builtins import map 7 | except ImportError: 8 | pass 9 | 10 | MAX_BATCH_SIZE = 5000 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | class iDbApiPandas(object): 16 | def __init__(self, env="prod", user=None, password=None): 17 | """ 18 | env: Which environment to use. Defaults to prod." 19 | """ 20 | self.__api = iDbApiJson(env=env, user=user, password=password) 21 | 22 | def __search_base(self, apifn, **kwargs): 23 | def yd(data): 24 | for r in data["items"]: 25 | yield r["indexTerms"] 26 | 27 | if "limit" in kwargs and kwargs["limit"] > MAX_BATCH_SIZE: 28 | def one(offset, total_limit): 29 | while offset < total_limit: 30 | batch = min(MAX_BATCH_SIZE, total_limit - offset) 31 | log.debug("Querying at offset %s", offset) 32 | data = apifn(offset=offset, limit=batch, **kwargs) 33 | yield data 34 | if len(data["items"]) < batch: 35 | log.debug("Exiting early, no more records on server") 36 | break 37 | offset += batch 38 | datagen = one(kwargs.pop("offset", 0), kwargs.pop("limit")) 39 | data = next(datagen) 40 | if data and len(data["items"]) > 0: 41 | records = chain( 42 | yd(data), 43 | chain.from_iterable(map(yd, datagen))) 44 | return pandas.DataFrame.from_records(records, index="uuid") 45 | else: 46 | data = apifn(**kwargs) 47 | if data["itemCount"] > 0: 48 | return pandas.DataFrame.from_records(yd(data), index="uuid") 49 | return None 50 | 51 | def search_records(self, **kwargs): 52 | """ 53 | rq Search Query in iDigBio Query Format, using Record Query Fields 54 | sort field to sort on, pick from Record Query Fields 55 | fields a list of fields to return, specified using the fieldName parameter from Fields with type records 56 | fields_exclude a list of fields to exclude, specified using the fieldName parameter from Fields with type records 57 | limit max results 58 | offset skip results 59 | 60 | Returns idigbio record format (legacy api), plus additional top level keys with parsed index terms. Returns None on error. 61 | """ 62 | 63 | return self.__search_base(apifn=self.__api.search_records, **kwargs) 64 | 65 | def search_media(self,**kwargs): 66 | """ 67 | mq Search Query in iDigBio Query Format, using Media Query Fields 68 | rq Search Query in iDigBio Query Format, using Record Query Fields 69 | sort field to sort on, pick from Media Query Fields 70 | fields a list of fields to return, specified using the fieldName parameter from Fields with type mediarecords 71 | fields_exclude a list of fields to exclude, specified using the fieldName parameter from Fields with type records 72 | limit max results 73 | offset skip results 74 | 75 | Returns idigbio record format (legacy api), plus additional top level keys with parsed index terms. Returns None on error. 76 | """ 77 | return self.__search_base(apifn=self.__api.search_media, **kwargs) 78 | 79 | def __top_recuse(self, top_fields, top_records): 80 | if len(top_fields) == 0: 81 | yield [top_records["itemCount"]] 82 | else: 83 | for k in top_records[top_fields[0]]: 84 | for v in self.__top_recuse(top_fields[1:], top_records[top_fields[0]][k]): 85 | yield [k] + v 86 | 87 | def top_records(self, top_fields=["scientificname"], **kwargs): 88 | r = self.__api.top_records(top_fields=top_fields, **kwargs) 89 | return pandas.DataFrame.from_records( 90 | self.__top_recuse(top_fields, r), columns=top_fields + ["count"]) 91 | 92 | def top_media(self, top_fields=["flags"], **kwargs): 93 | r = self.__api.top_media(top_fields=top_fields, **kwargs) 94 | return pandas.DataFrame.from_records( 95 | self.__top_recuse(top_fields, r), columns=top_fields + ["count"]) 96 | 97 | def count_records(self, **kwargs): 98 | return self.__api.count_records(**kwargs) 99 | 100 | def count_media(self, **kwargs): 101 | return self.__api.count_media(**kwargs) 102 | 103 | def count_recordsets(self, **kwargs): 104 | return self.__api.count_recordsets(**kwargs) 105 | 106 | 107 | # TODO 108 | # def datehist(self,**kwargs): 109 | # return self._api.datehist(**kwargs) 110 | 111 | # def stats(self,t,**kwags): 112 | # return self._api.stats(t,**kwargs) 113 | -------------------------------------------------------------------------------- /idigbio/util.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import hashlib 3 | 4 | 5 | def calcFileHash(f, op=True, return_size=False): 6 | md5 = hashlib.md5() 7 | size = 0 8 | if op: 9 | with open(f, "rb") as fd: 10 | buf = fd.read(128) 11 | while len(buf) > 0: 12 | size += len(buf) 13 | md5.update(buf) 14 | buf = fd.read(128) 15 | else: 16 | buf = f.read(128) 17 | while len(buf) > 0: 18 | size += len(buf) 19 | md5.update(buf) 20 | buf = f.read(128) 21 | if return_size: 22 | return (md5.hexdigest(), size) 23 | else: 24 | return md5.hexdigest() 25 | -------------------------------------------------------------------------------- /meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: idigbio 3 | version: "0.8.0" 4 | 5 | source: 6 | fn: idigbio-0.8.0.tar.gz 7 | url: https://pypi.python.org/packages/b1/35/763081178444144eb484f34a57db71673978df90e185dca2bbd2195c6bba/idigbio-0.8.0.tar.gz 8 | md5: 54431a3cf4f8260532f0c959db203a25 9 | # patches: 10 | # List any patch files here 11 | # - fix.patch 12 | 13 | # build: 14 | # noarch_python: True 15 | # preserve_egg_dir: True 16 | # entry_points: 17 | # Put any entry points (scripts to be generated automatically) here. The 18 | # syntax is module:function. For example 19 | # 20 | # - idigbio = idigbio:main 21 | # 22 | # Would create an entry point called idigbio that calls idigbio.main() 23 | 24 | 25 | # If this is a new build for the same version, increment the build 26 | # number. If you do not include this key, it defaults to 0. 27 | # number: 1 28 | 29 | requirements: 30 | build: 31 | - python 32 | - setuptools 33 | - requests 34 | 35 | run: 36 | - python 37 | - requests 38 | 39 | test: 40 | # Python imports 41 | imports: 42 | - idigbio 43 | 44 | # commands: 45 | # You can put test commands to be run here. Use this to test that the 46 | # entry points work. 47 | 48 | 49 | # You can also put a file called run_test.py in the recipe that will be run 50 | # at test time. 51 | 52 | # requires: 53 | # Put any additional test requirements here. For example 54 | # - nose 55 | 56 | about: 57 | home: http://github.com/idigbio/idigbio-python-client/ 58 | license: MIT License 59 | summary: 'Python Client for the iDigBio Search API' 60 | 61 | # See 62 | # http://docs.continuum.io/conda/build.html for 63 | # more information about meta.yaml 64 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [wheel] 2 | universal = 1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from setuptools import setup, find_packages 4 | 5 | from codecs import open 6 | 7 | 8 | def read(*paths): 9 | """Build a file path from *paths* and return the contents.""" 10 | with open(os.path.join(*paths), 'r', 'utf-8') as f: 11 | return f.read() 12 | 13 | readme = read('README.rst') 14 | history = read('HISTORY.rst') 15 | 16 | version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', 17 | read('idigbio/__init__.py'), re.MULTILINE).group(1) 18 | 19 | setup( 20 | name='idigbio', 21 | version=version, 22 | description='Python Client for the iDigBio Search API', 23 | long_description=readme + "\n\n" + history, 24 | url='http://github.com/idigbio/idigbio-python-client/', 25 | license='MIT', 26 | author='Alex Thompson', 27 | author_email='godfoder@acis.ufl.edu', 28 | packages=find_packages(exclude=['tests*']), 29 | install_requires=['requests'], 30 | extras_require={ 31 | "pandas": ["pandas"] 32 | }, 33 | include_package_data=True, 34 | classifiers=[ 35 | 'Development Status :: 5 - Production/Stable', 36 | 'Intended Audience :: Developers', 37 | 'Natural Language :: English', 38 | 'License :: OSI Approved :: MIT License', 39 | 'Operating System :: OS Independent', 40 | 'Programming Language :: Python', 41 | 'Programming Language :: Python :: 2.7', 42 | 'Programming Language :: Python :: 3', 43 | 'Programming Language :: Python :: 3.3', 44 | 'Programming Language :: Python :: 3.4', 45 | 'Programming Language :: Python :: 3.5', 46 | 'Topic :: Software Development :: Libraries :: Python Modules', 47 | ], 48 | ) 49 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | requests 3 | pillow 4 | docutils 5 | pygments 6 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_json_client import * 2 | from .test_json_client_map import * 3 | from .test_pandas_client import * 4 | -------------------------------------------------------------------------------- /tests/test_json_client.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | 5 | from idigbio.json_client import iDbApiJson, ImagesDisabledException 6 | 7 | try: 8 | import mock as mock_module 9 | except ImportError: 10 | try: 11 | import unittest.mock as mock_module 12 | except ImportError: 13 | mock_module = None 14 | 15 | if mock_module: 16 | Mock = mock_module.Mock 17 | MagicMock = mock_module.MagicMock 18 | patch = mock_module.patch 19 | 20 | class TestIDbApiJson(unittest.TestCase): 21 | def test___init__(self): 22 | api = iDbApiJson() 23 | self.assertIsNotNone(api.s) 24 | 25 | def test___del__(self): 26 | api = iDbApiJson() 27 | del api 28 | 29 | def test_create_map(self): 30 | api = iDbApiJson() 31 | m = api.create_map() 32 | self.assertIsNotNone(m) 33 | 34 | def test_search_media(self): 35 | api = iDbApiJson() 36 | r = api.search_media() 37 | self.assertIsNotNone(r) 38 | 39 | def test_search_records(self): 40 | api = iDbApiJson() 41 | r = api.search_records() 42 | self.assertIsNotNone(r) 43 | 44 | def test_view(self): 45 | api = iDbApiJson() 46 | r = api.view("records","56c351b5-30c0-4529-a57f-60c451cc5876") 47 | self.assertIsNotNone(r) 48 | 49 | def test_count_media(self): 50 | api = iDbApiJson() 51 | r = api.count_media() 52 | self.assertIsNotNone(r) 53 | self.assertIsInstance(r,int) 54 | self.assertNotEqual(r,0) 55 | 56 | def test_count_media_null(self): 57 | api = iDbApiJson() 58 | r = api.count_media(mq={"version": -1}) 59 | self.assertIsNotNone(r) 60 | self.assertIsInstance(r,int) 61 | self.assertEqual(r,0) 62 | 63 | def test_count_records(self): 64 | api = iDbApiJson() 65 | r = api.count_records() 66 | self.assertIsNotNone(r) 67 | self.assertIsInstance(r,int) 68 | self.assertNotEqual(r,0) 69 | 70 | def test_count_records_null(self): 71 | api = iDbApiJson() 72 | r = api.count_records(rq={"version": -1}) 73 | self.assertIsNotNone(r) 74 | self.assertIsInstance(r,int) 75 | self.assertEqual(r,0) 76 | 77 | def test_count_recordsets(self): 78 | api = iDbApiJson() 79 | r = api.count_recordsets() 80 | self.assertIsNotNone(r) 81 | self.assertIsInstance(r,int) 82 | self.assertNotEqual(r,0) 83 | 84 | def test_datehist(self): 85 | api = iDbApiJson() 86 | r = api.datehist( 87 | rq={"scientificname": "puma concolor"}, 88 | top_fields=["institutioncode"], 89 | min_date="2005-01-01") 90 | self.assertIsNotNone(r) 91 | 92 | def test_stats_api(self): 93 | api = iDbApiJson() 94 | r = api.stats("api", min_date="2005-01-01") 95 | self.assertIsNotNone(r) 96 | 97 | def test_stats_digest(self): 98 | api = iDbApiJson() 99 | r = api.stats("digest", min_date="2005-01-01") 100 | self.assertIsNotNone(r) 101 | 102 | def test_stats_search(self): 103 | api = iDbApiJson() 104 | r = api.stats("search", min_date="2005-01-01") 105 | self.assertIsNotNone(r) 106 | 107 | def test_top_media(self): 108 | api = iDbApiJson() 109 | r = api.top_media() 110 | self.assertIsNotNone(r) 111 | 112 | def test_top_records(self): 113 | api = iDbApiJson() 114 | r = api.top_records() 115 | self.assertIsNotNone(r) 116 | 117 | def test_upload(self): 118 | if mock_module is None: 119 | self.skipTest('mock library not installed') 120 | api = iDbApiJson(user="foo", password="bar") 121 | api._api_post = Mock() 122 | api.upload('testreference', __file__) 123 | args, kwargs = api._api_post.call_args 124 | self.assertIn("/v2/media", args) 125 | self.assertIn('files', kwargs) 126 | self.assertIn('params', kwargs) 127 | self.assertIn('etag', kwargs['params']) 128 | self.assertIsNotNone(kwargs['params']['etag']) 129 | 130 | if __name__ == '__main__': 131 | unittest.main() 132 | -------------------------------------------------------------------------------- /tests/test_json_client_map.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | 5 | from idigbio.json_client import iDbApiJson, ImagesDisabledException 6 | 7 | try: 8 | import mock as mock_module 9 | except ImportError: 10 | try: 11 | import unittest.mock as mock_module 12 | except ImportError: 13 | mock_module = None 14 | 15 | if mock_module: 16 | Mock = mock_module.Mock 17 | MagicMock = mock_module.MagicMock 18 | patch = mock_module.patch 19 | 20 | 21 | class TestIDigBioMap(unittest.TestCase): 22 | def test___init__(self): 23 | api = iDbApiJson() 24 | m = api.create_map() 25 | self.assertIsNotNone(m) 26 | self.assertIsNotNone(m._short_code) 27 | 28 | def test_definition(self): 29 | api = iDbApiJson() 30 | m = api.create_map() 31 | self.assertIsNotNone(m) 32 | self.assertIsNotNone(m.definition()) 33 | 34 | def test_json_tile(self): 35 | api = iDbApiJson() 36 | m = api.create_map() 37 | self.assertIsNotNone(m) 38 | self.assertIsNotNone(m.json_tile(1,0,0)) 39 | 40 | def test_png_tile(self): 41 | api = iDbApiJson() 42 | m = api.create_map() 43 | self.assertIsNotNone(m) 44 | self.assertIsNotNone(m.png_tile(1,0,0)) 45 | 46 | def test_png_tile_disabled(self): 47 | api = iDbApiJson() 48 | m = api.create_map(disable_images=True) 49 | self.assertIsNotNone(m) 50 | with self.assertRaises(ImagesDisabledException): 51 | m.png_tile(1,0,0) 52 | 53 | def test_points(self): 54 | api = iDbApiJson() 55 | m = api.create_map() 56 | self.assertIsNotNone(m) 57 | self.assertIsNotNone(m.points(0,0,1)) 58 | 59 | def test_save_map_image(self): 60 | api = iDbApiJson() 61 | m = api.create_map() 62 | self.assertIsNotNone(m) 63 | m.save_map_image("test_map",1) 64 | self.assertTrue(os.path.exists("test_map.png")) 65 | os.unlink("test_map.png") 66 | 67 | def test_save_map_image_disabled(self): 68 | api = iDbApiJson() 69 | m = api.create_map(disable_images=True) 70 | self.assertIsNotNone(m) 71 | with self.assertRaises(ImagesDisabledException): 72 | m.save_map_image("test_map",1) 73 | self.assertFalse(os.path.exists("test_map.png")) 74 | 75 | def test_utf8grid_tile(self): 76 | api = iDbApiJson() 77 | m = api.create_map() 78 | self.assertIsNotNone(m) 79 | self.assertIsNotNone(m.utf8grid_tile(1,0,0)) 80 | 81 | def test_save_map_from_bounding_box(self): 82 | api = iDbApiJson() 83 | # rectangular bounding box around Gainesville, FL 84 | bbox = {"type": "geo_bounding_box", 85 | "bottom_right": {"lat": 29.642979999999998, "lon": -82.00}, 86 | "top_left": {"lat": 29.66298, "lon": -82.35315800000001}} 87 | m = api.create_map(rq={"geopoint": bbox}) 88 | m.save_map_image("test_bounded_map", None, bbox=bbox) 89 | self.assertTrue(os.path.exists("test_bounded_map.png")) 90 | os.unlink("test_bounded_map.png") 91 | 92 | 93 | if __name__ == '__main__': 94 | unittest.main() 95 | -------------------------------------------------------------------------------- /tests/test_pandas_client.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas 3 | 4 | from idigbio.pandas_client import iDbApiPandas 5 | 6 | class TestIDbApiPandas(unittest.TestCase): 7 | def test___init__(self): 8 | api = iDbApiPandas() 9 | self.assertIsNotNone(api) 10 | 11 | def test_search_media(self): 12 | api = iDbApiPandas() 13 | self.assertIsNotNone(api) 14 | df = api.search_media() 15 | self.assertIsInstance(df,pandas.DataFrame) 16 | 17 | def test_search_records(self): 18 | api = iDbApiPandas() 19 | self.assertIsNotNone(api) 20 | df = api.search_records() 21 | self.assertIsInstance(df,pandas.DataFrame) 22 | 23 | def test_search_records_limit_10007(self): 24 | api = iDbApiPandas() 25 | self.assertIsNotNone(api) 26 | df = api.search_records(limit=10007) 27 | self.assertIsInstance(df,pandas.DataFrame) 28 | self.assertEqual(len(df),10007) 29 | 30 | def test_search_no_results(self): 31 | api = iDbApiPandas() 32 | self.assertIsNotNone(api) 33 | df = api.search_records(rq={"scientificname": "7f2caf7a-e84a3c972752"}) 34 | self.assertIsNone(df) 35 | df = api.search_records( 36 | rq={"scientificname": "7f2caf7a-e84a3c972752"}, limit=10000) 37 | self.assertIsNone(df) 38 | df = api.search_media(rq={"scientificname": "7f2caf7a-e84a3c972752"}) 39 | self.assertIsNone(df) 40 | df = api.search_media( 41 | rq={"scientificname": "7f2caf7a-e84a3c972752"}, limit=10000) 42 | self.assertIsNone(df) 43 | 44 | def test_count_media(self): 45 | api = iDbApiPandas() 46 | self.assertIsNotNone(api) 47 | df = api.count_media() 48 | self.assertIsInstance(df,int) 49 | 50 | def test_count_records(self): 51 | api = iDbApiPandas() 52 | self.assertIsNotNone(api) 53 | df = api.count_records() 54 | self.assertIsInstance(df,int) 55 | 56 | def test_count_recordsets(self): 57 | api = iDbApiPandas() 58 | r = api.count_recordsets() 59 | self.assertIsNotNone(r) 60 | self.assertIsInstance(r,int) 61 | self.assertNotEqual(r,0) 62 | 63 | 64 | # TODO 65 | # def test_datehist(self): 66 | # # i_db_api_pandas = iDbApiPandas(env, debug) 67 | # # self.assertEqual(expected, i_db_api_pandas.datehist(**kwargs)) 68 | # assert False # TODO: implement your test here 69 | 70 | # def test_stats(self): 71 | # # i_db_api_pandas = iDbApiPandas(env, debug) 72 | # # self.assertEqual(expected, i_db_api_pandas.stats(t, **kwags)) 73 | # assert False # TODO: implement your test here 74 | 75 | def test_top_media(self): 76 | api = iDbApiPandas() 77 | self.assertIsNotNone(api) 78 | df = api.search_media() 79 | self.assertIsInstance(df,pandas.DataFrame) 80 | 81 | def test_top_records(self): 82 | api = iDbApiPandas() 83 | self.assertIsNotNone(api) 84 | df = api.search_media() 85 | self.assertIsInstance(df,pandas.DataFrame) 86 | 87 | def test_auth_on_session(self): 88 | p = iDbApiPandas(user="foo", password="bar") 89 | japi = getattr(p, '_iDbApiPandas__api') 90 | self.assertIsNotNone(japi.s) 91 | self.assertEqual(japi.s.auth, ("foo", "bar")) 92 | 93 | if __name__ == '__main__': 94 | unittest.main() 95 | --------------------------------------------------------------------------------