├── GBIF_API_litterature └── get_litterature_from_API.ipynb ├── README.md ├── count_number_of_organization_per_country.ipynb ├── create_github_issues_grscicoll_inst.ipynb ├── datasets_containing_preserved_specimens ├── dataset_containing_preserved_specimens.ipynb └── find_preserved_specimen_collection_and_datasets.py ├── downlaad_citizen_science_data └── download_citizen_science_data.ipynb ├── examples_download_grscicoll.md ├── find_citizen_science_datasets ├── citizen_science_datasets_26nove2018_not_reviewed.tsv ├── raw_descriptions.tsv ├── raw_occurrence_dataset_descriptions_and_titles.tsv ├── scripts │ └── citizen_science_gather_dataset_descriptions.ipynb ├── some_manually_annotated_datasets.tsv ├── test_model_subsample.tsv └── wd_replace.txt ├── get_distribution_of_records_cited_for_a_dataset ├── get_number_of_records_contributing_to_citations.ipynb └── nb_records_contributing_to_GBIF_citations_from_INSDC_dataset.csv ├── list_datasets_with_occurrence_count.ipynb ├── map_occ_to_grscicoll.ipynb ├── query_species_list ├── functions_query_from_species_list.py └── query_from_species_list.ipynb ├── species-lookup └── species-matching-gbif-api.ipynb ├── species_per_continent ├── ipbes-regions-countries.xlsx ├── species_in_country_list.py └── species_per_continent.ipynb ├── update_registry ├── change_registry_using_API.py └── create_dataset_example.ipynb └── words_associated_with_dataset_type_naive_baysian_model └── gather_dataset_descriptions_markdown.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Small scripts using GBIF API 2 | 3 | This repository contains a few scripts I wrote to use the [GBIF API](https://www.gbif.org/developer/summary) with python. 4 | 5 | ## Counts the number of species occurring in a list of countries 6 | 7 | You can use [this function](https://github.com/ManonGros/Small-scripts-using-GBIF-API/blob/master/species_per_continent/species_in_country_list.py). 8 | 9 | For usage, see for example: [How many butterfly species do we have per continent?](https://github.com/ManonGros/Small-scripts-using-GBIF-API/blob/master/species_per_continent/species_per_continent.ipynb) 10 | 11 | ## Counts the number of unique dataset keys and collection codes associated with preserved specimens 12 | 13 | See example [here](https://github.com/ManonGros/Small-scripts-using-GBIF-API/blob/master/datasets_containing_preserved_specimens/dataset_containing_preserved_specimens.ipynb) 14 | 15 | ## Look for words in the dataset description which could be associated with a dataset type 16 | 17 | The model is not good but the script could be reused for something else. 18 | See the notebook [here](https://github.com/ManonGros/Small-scripts-using-GBIF-API/blob/master/words_associated_with_dataset_type_naive_baysian_model/gather_dataset_descriptions_markdown.ipynb) 19 | -------------------------------------------------------------------------------- /count_number_of_organization_per_country.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import requests" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "api = \"http://api.gbif.org/v1/\"" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 6, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "# Get all organisations\n", 29 | "information = [\"endorsingNodeKey\", \"endorsementApproved\", \"country\", \"numPublishedDatasets\"]\n", 30 | "organizations = pd.DataFrame(columns=information)\n", 31 | "\n", 32 | "step = 300\n", 33 | "offset = 0\n", 34 | "end_of_records = False\n", 35 | "while not end_of_records:\n", 36 | " param = {\n", 37 | " \"offset\": offset,\n", 38 | " \"limit\": step\n", 39 | " }\n", 40 | " response = requests.get(api+\"organization\", param)\n", 41 | " response = response.json()\n", 42 | " for org in response[\"results\"]:\n", 43 | " uuid = org[\"key\"]\n", 44 | " for info in information:\n", 45 | " if info in org:\n", 46 | " organizations.at[uuid, info] = org[info]\n", 47 | " offset += step\n", 48 | " end_of_records = response[\"endOfRecords\"]" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 10, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "count_organizations = organizations[(organizations[\"endorsementApproved\"] == True)&(organizations[\"numPublishedDatasets\"] > 0)][[\"endorsementApproved\", \"country\"]].groupby(\"country\").count()" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 11, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "count_organizations.to_csv(\"Number_of_endsorsed_organization_publishing_datasets_per_country_20210408.tsv\", sep=\"\\t\")" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.7.4" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 2 98 | } 99 | -------------------------------------------------------------------------------- /create_github_issues_grscicoll_inst.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 11, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import json\n", 11 | "import requests\n", 12 | "import time\n", 13 | "import sys\n", 14 | "# import psycopg2\n", 15 | "import re" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 12, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "grscicoll_api = 'http://api.gbif.org/v1/grscicoll/'\n", 25 | "occurrence_api = 'https://api.gbif.org/v1/occurrence/search'\n", 26 | "organization_api = 'https://api.gbif.org/v1/organization'\n", 27 | "dataset_prefix = \"https://www.gbif.org/dataset/\"\n", 28 | "# GitHub\n", 29 | "gh_username = \"\"\n", 30 | "github_header = {'Accept': \"application/vnd.github.v3+json\"}\n", 31 | "gh_token = \"\"\n", 32 | "owner = \"gbif\"\n", 33 | "repo = \"collection-mobilization\"\n", 34 | "gh_api = \"https://api.github.com/repos/\"+owner+\"/\"+repo+\"/issues\"\n", 35 | "link_readme = \"https://github.com/\"+owner+\"/\"+repo+\"#readme\"\n", 36 | "\n", 37 | "# Scope\n", 38 | "region = \"ASIA\"" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 13, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# test_json = {\n", 48 | "# \"title\": \"title\"\n", 49 | "# }\n", 50 | "\n", 51 | "# post = requests.post(gh_api,\n", 52 | "# data=json.dumps(test_json),\n", 53 | "# auth=(gh_username, gh_token),\n", 54 | "# headers=github_header)\n", 55 | "# print(gh_api)\n", 56 | "# post" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 14, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "def create_body_issue(inst, country, steps_to_check,\n", 66 | " gbif_prefix='https://www.gbif.org/',\n", 67 | " grscicoll_prefix='https://www.gbif.org/grscicoll/'):\n", 68 | " \"\"\"\n", 69 | " Use institution information and find related GBIF records then format all of it in a GH issue\n", 70 | " \"\"\"\n", 71 | " markdown_body = \"## \"+inst[\"name\"]+\"\\n\"\n", 72 | " markdown_body += \"**GRSciColl URL**: \"+grscicoll_prefix+\"institution/\"+inst[\"key\"]+\"\\n\"\n", 73 | " if inst[\"masterSource\"] != \"GRSCICOLL\":\n", 74 | " markdown_body += \"**Synchronized with** `\"+inst[\"masterSource\"]+\"`\\n\"\n", 75 | " if \"code\" in inst:\n", 76 | " markdown_body += \"**Code**: `\"+inst[\"code\"]+\"`\\n\"\n", 77 | " if \"homepage\" in inst:\n", 78 | " markdown_body += \"**Homepage**: \"+ inst[\"homepage\"]+\"\\n\"\n", 79 | " \n", 80 | " markdown_body += \"**Fuzzy name search in GBIF publisher**:\\n\"\n", 81 | " param_org = {\n", 82 | " \"q\": inst[\"name\"],\n", 83 | " \"limit\": 30,\n", 84 | " \"country\": country\n", 85 | " }\n", 86 | " inst_name_search = requests.get(organization_api, param_org)\n", 87 | " if inst_name_search.ok:\n", 88 | " organization_match = inst_name_search.json()\n", 89 | " organization_names = []\n", 90 | " organization_UUIDs = []\n", 91 | " if organization_match[\"results\"] == []:\n", 92 | " markdown_body += \"`None`\\n\"\n", 93 | " else:\n", 94 | " for org in organization_match[\"results\"]:\n", 95 | " markdown_body += \"* [\"+org[\"title\"]+\"](\"+gbif_prefix+\"publisher/\"+org[\"key\"]+\")\\n\"\n", 96 | " \n", 97 | " markdown_body += \"\\n\\n\"\n", 98 | " \n", 99 | " linked_occurrences = 0\n", 100 | " param = {\n", 101 | " \"institution_key\": inst[\"key\"],\n", 102 | " \"limit\": 0,\n", 103 | " \"facet\": \"publishingOrg\"\n", 104 | " }\n", 105 | " inst_occ = requests.get(occurrence_api, param)\n", 106 | " if inst_occ.ok:\n", 107 | " inst_occ_dict = inst_occ.json()\n", 108 | " linked_occurrences = inst_occ_dict[\"count\"]\n", 109 | " markdown_body += \"**Number of linked occurrences**: `\"+str(linked_occurrences)+\"` (\"+gbif_prefix+\"occurrence/search?advanced=1&institution_key=\"+inst[\"key\"]+\")\\n\"\n", 110 | " if linked_occurrences != 0:\n", 111 | " markdown_body += \"**Publishing organizations for linked occurrences**:\\n\"\n", 112 | " for publisher in inst_occ_dict[\"facets\"][0][\"counts\"]:\n", 113 | " markdown_body += \"* \"+gbif_prefix+\"publisher/\"+publisher[\"name\"]+\"\\n\"\n", 114 | " \n", 115 | " markdown_body += steps_to_check\n", 116 | " return markdown_body" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 15, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "def post_issue(data, gh_api, gh_username, gh_token,github_header):\n", 126 | " post = requests.post(gh_api,\n", 127 | " data=json.dumps(data),\n", 128 | " auth=(gh_username, gh_token),\n", 129 | " headers=github_header)\n", 130 | " if not post.ok:\n", 131 | " print(\"couldn't create issue for \", data)\n", 132 | " print(post)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 16, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "def create_issue_for_institution_based_on_json(inst, country, gh_api, gh_username, gh_token, github_header, steps_to_check):\n", 142 | " \"\"\"\n", 143 | " Create a GitHub issue for an institution entry:\n", 144 | " * inst: is an institution JSON as returned by the GRSciColl API.\n", 145 | " * country: country of the institution\n", 146 | " \"\"\"\n", 147 | " issue = {}\n", 148 | " issue[\"title\"] = inst[\"name\"]\n", 149 | " issue[\"body\"] = create_body_issue(inst, country, steps_to_follow)\n", 150 | " issue[\"labels\"] = [country]\n", 151 | " post_issue(issue, gh_api, gh_username, gh_token,github_header)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 17, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "def create_issue_for_institution_per_country(country, already_issued, grscicoll_api, gh_api, gh_username, gh_token, github_header, steps_to_check, step=500):\n", 161 | " \"\"\"\n", 162 | " For a given country, create one GitHub issue per GRSciColl institution entry. If an issue already exists, don't create a new issue\n", 163 | " \"\"\"\n", 164 | " query = {\n", 165 | " \"country\": country,\n", 166 | " \"limit\": step,\n", 167 | " \"offset\" : 0\n", 168 | " }\n", 169 | " endOfRecords = False\n", 170 | " while not endOfRecords:\n", 171 | " institutions = requests.get(grscicoll_api + 'institution/', query)\n", 172 | "\n", 173 | " if institutions.ok:\n", 174 | " institution_page = institutions.json()\n", 175 | " endOfRecords = institution_page[\"endOfRecords\"]\n", 176 | " query[\"offset\"] += step\n", 177 | "\n", 178 | " for inst in institution_page[\"results\"]:\n", 179 | " if inst[\"key\"] not in already_issued:\n", 180 | " create_issue_for_institution_based_on_json(inst, country, gh_api, gh_username, gh_token, github_header, steps_to_check)\n", 181 | " time.sleep(3)\n", 182 | " else:\n", 183 | " print(institutions)\n", 184 | " endOfRecords = True" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 28, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "def create_issue_for_institution(key, country, gh_api, gh_username, gh_token, github_header, steps_to_check):\n", 194 | " \"\"\"\n", 195 | " Create a GitHub issue for an institution entry:\n", 196 | " * key: is an institution key in GRSciColl\n", 197 | " * country: country of the institution\n", 198 | " \"\"\"\n", 199 | " institution = requests.get(grscicoll_api + 'institution/'+ key)\n", 200 | " if institution.ok:\n", 201 | " create_issue_for_institution_based_on_json(institution.json(), country, gh_api, gh_username, gh_token, github_header, steps_to_check)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 19, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "steps_to_follow = \"\"\"\\n\\n---\n", 211 | "1. **Find out if the information is complete and up to date**:\n", 212 | "\n", 213 | "- [ ] Check the homepage if available or google the institution name in the country. Check if all collections for that institution are represented in GRSciColl.\n", 214 | " - Check whether it is an independent entry. If duplicated merge with the selected and entry and close this issue.\n", 215 | "- [ ] Add missing collections/information to the institution on GRSciColl directly in the registry. If some collections are digitized, put the information in the GRSciColl `Notes` field.\n", 216 | "\n", 217 | "2. **Check if the data is also in GBIF**:\n", 218 | "\n", 219 | "- [ ] If there are GBIF occurrence records linked, check from which dataset/publisher they come. Is the institution a registered publisher? Or do the records come from a third party publisher? Are all the collections in GRSciColl also in GBIF? (add comments to the issue)\n", 220 | "- [ ] If no record is linked to GRSciColl, look for the institution name on the GBIF list of publishers. Is there any corresponding publisher? Have they published any data? (add comments to the issue)\n", 221 | "- [ ] If data has been published on GBIF but isn’t linked to GRSciColl, notify Marie (tag ManonGros), she can link the data.\n", 222 | "- [ ] Translate outcome of your checks into labels. See guidelines here: \"\"\" + link_readme" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 20, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "## If you want to query all the countries for a region\n", 232 | "# countries = []\n", 233 | "# all_countries = requests.get(\"https://api.gbif.org/v1/enumeration/country\")\n", 234 | "# if all_countries.ok:\n", 235 | "# all_countries = all_countries.json()\n", 236 | "# for country in all_countries:\n", 237 | "# if country[\"gbifRegion\"] == region:\n", 238 | "# countries.append(country[\"iso2\"])" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 21, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "countries = [\n", 248 | " \"VE\"\n", 249 | "]" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 23, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "## IF you want to have a list of all the issues already created\n", 259 | "# page = 1\n", 260 | "# end = False\n", 261 | "# already_issued = []\n", 262 | "\n", 263 | "# while not end:\n", 264 | "# issues = requests.get(gh_api, {\"page\":page}, auth=(gh_username, gh_token)).json()\n", 265 | "# page += 1\n", 266 | "# if len(issues) < 30:\n", 267 | "# end = True\n", 268 | " \n", 269 | "# for issue in issues:\n", 270 | "# search_res = re.search('https://www.gbif.org/grscicoll/institution/.+\\n', issue[\"body\"])\n", 271 | "# if search_res is not None:\n", 272 | "# already_issued.append(search_res.group(0).replace(\"\\n\",\"\").replace(\"\\r\", \"\").replace(\"https://www.gbif.org/grscicoll/institution/\",\"\"))\n", 273 | "# else:\n", 274 | "# print(issue[\"body\"])" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 24, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "name": "stdout", 284 | "output_type": "stream", 285 | "text": [ 286 | "VE\n" 287 | ] 288 | } 289 | ], 290 | "source": [ 291 | "for country in countries:\n", 292 | " print(country)\n", 293 | " create_issue_for_institution_per_country(country, already_issued, grscicoll_api, gh_api, gh_username, gh_token, github_header, steps_to_follow)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 29, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "## For just one institution\n", 303 | "create_issue_for_institution(\"a50c1ed5-76c3-478c-be0b-e781e7cb04eb\", \"LT\", gh_api, gh_username, gh_token, github_header, steps_to_follow)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [] 312 | } 313 | ], 314 | "metadata": { 315 | "kernelspec": { 316 | "display_name": "Python 3 (ipykernel)", 317 | "language": "python", 318 | "name": "python3" 319 | }, 320 | "language_info": { 321 | "codemirror_mode": { 322 | "name": "ipython", 323 | "version": 3 324 | }, 325 | "file_extension": ".py", 326 | "mimetype": "text/x-python", 327 | "name": "python", 328 | "nbconvert_exporter": "python", 329 | "pygments_lexer": "ipython3", 330 | "version": "3.10.9" 331 | } 332 | }, 333 | "nbformat": 4, 334 | "nbformat_minor": 2 335 | } 336 | -------------------------------------------------------------------------------- /datasets_containing_preserved_specimens/dataset_containing_preserved_specimens.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import requests\n", 11 | "import json\n", 12 | "from find_preserved_specimen_collection_and_datasets import *" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "ERROR http://api.gbif.org/v1/occurrence/search?limit=0&facet=collectionCode&basis_of_record=PRESERVED_SPECIMEN&facetLimit=1000&facetOffset=5000\n" 25 | ] 26 | }, 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "5000" 31 | ] 32 | }, 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "number_of_collection_with_preserved_specimen()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "8901" 51 | ] 52 | }, 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "number_of_dataset_with_preserved_specimen()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [] 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "Python 3", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.6.4" 87 | } 88 | }, 89 | "nbformat": 4, 90 | "nbformat_minor": 2 91 | } 92 | -------------------------------------------------------------------------------- /datasets_containing_preserved_specimens/find_preserved_specimen_collection_and_datasets.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | 5 | def number_of_collection_with_preserved_specimen(step=1000): 6 | """ 7 | Count the number of collections for which GBIF has occurrences of preserved specimens 8 | """ 9 | return number_of_facet_with_preserved_specimen("collectionCode", step) 10 | 11 | 12 | def number_of_dataset_with_preserved_specimen(step=1000): 13 | """ 14 | Count the number of dataset for which GBIF has occurrences of preserved specimens 15 | """ 16 | return number_of_facet_with_preserved_specimen("datasetKey", step) 17 | 18 | 19 | def number_of_facet_with_preserved_specimen(facet, step=1000): 20 | """ 21 | Count the number of dataset or collections for which GBIF has occurrences of preserved specimens 22 | """ 23 | offset = 0 24 | end_of_records = False 25 | nb_facet = 0 26 | base_request = "http://api.gbif.org/v1/occurrence/search?" 27 | base_request += "limit=0&facet=" + facet 28 | base_request += "&basis_of_record=PRESERVED_SPECIMEN" 29 | base_request += "&facetLimit=" + str(step) 30 | while not end_of_records: 31 | response = requests.get(base_request + "&facetOffset=" + str(offset)) 32 | if response.ok: 33 | response = response.json() 34 | nb_facet_in_page = len(response["facets"][0]["counts"]) 35 | nb_facet += nb_facet_in_page 36 | # Increment page 37 | offset += step 38 | end_of_records = (nb_facet_in_page < step) 39 | else: 40 | print("ERROR", base_request + "&facetOffset=" + str(offset)) 41 | end_of_records = True 42 | return nb_facet 43 | -------------------------------------------------------------------------------- /downlaad_citizen_science_data/download_citizen_science_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 59, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import requests" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 60, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "def get_citsci_dataset_list(api=\"http://api.gbif.org/v1/\",\n", 20 | " namespace=\"citizenScience.mgrosjean.gbif.org\"):\n", 21 | " '''\n", 22 | " Gets the dataset UUIDs for all the datasets currently tagged\n", 23 | " as scitizen science dataset.\n", 24 | " '''\n", 25 | " citizen_science_datasets = []\n", 26 | " offset = 0\n", 27 | " step = 900\n", 28 | " end_of_records = False\n", 29 | " while not end_of_records:\n", 30 | " param = {\n", 31 | " \"offset\": offset,\n", 32 | " \"limit\": step,\n", 33 | " \"machineTagNamespace\": namespace\n", 34 | " }\n", 35 | " # Query API\n", 36 | " response = requests.get(api + \"dataset\", param)\n", 37 | " if response.ok:\n", 38 | " citsci_dataset = response.json()\n", 39 | " for dataset in citsci_dataset[\"results\"]:\n", 40 | " citizen_science_datasets.append(dataset[\"key\"])\n", 41 | " offset += step\n", 42 | " end_of_records = citsci_dataset[\"endOfRecords\"]\n", 43 | " else:\n", 44 | " print(\"ERROR\")\n", 45 | " end_of_records = True\n", 46 | " return citizen_science_datasets" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 61, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "def create_download_given_query(login,\n", 56 | " password,\n", 57 | " download_query,\n", 58 | " api=\"http://api.gbif.org/v1/\"):\n", 59 | " '''\n", 60 | " Query the download API\n", 61 | " '''\n", 62 | " headers = {'Content-Type': 'application/json'}\n", 63 | " download_request = requests.post(api + \"occurrence/download/request\",\n", 64 | " data=json.dumps(download_query),\n", 65 | " auth=(login, password),\n", 66 | " headers=headers)\n", 67 | " if download_request.ok:\n", 68 | " print(\"ok\")\n", 69 | " else:\n", 70 | " print(download_request)\n", 71 | " return download_request" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 63, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "# Build Download query\n", 81 | "\n", 82 | "download_query = {}\n", 83 | "download_query[\"creator\"] = \"\"\n", 84 | "download_query[\"notificationAddresses\"] = [\"\"]\n", 85 | "download_query[\"sendNotification\"] = True\n", 86 | "download_query[\"format\"] = \"SIMPLE_CSV\"\n", 87 | "\n", 88 | "# Specify predicates\n", 89 | "download_query[\"predicate\"] = {\n", 90 | " \"type\": \"and\",\n", 91 | " \"predicates\": []\n", 92 | "}\n", 93 | "predicate_dataset = {\n", 94 | " \"type\": \"in\",\n", 95 | " \"key\": \"DATASET_KEY\",\n", 96 | " \"values\": get_citsci_dataset_list()\n", 97 | "}\n", 98 | "# I want only the insects from scitizen science dataset:\n", 99 | "predicate_taxon = {\n", 100 | " \"type\": \"equals\",\n", 101 | " \"key\": \"TAXON_KEY\",\n", 102 | " \"value\": 216 # insect\n", 103 | "}\n", 104 | "download_query[\"predicate\"][\"predicates\"].append(predicate_dataset)\n", 105 | "download_query[\"predicate\"][\"predicates\"].append(predicate_taxon)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 64, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "login = \"\"\n", 115 | "password = \"\"" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "create_download_given_query(login, password, download_query)" 125 | ] 126 | } 127 | ], 128 | "metadata": { 129 | "kernelspec": { 130 | "display_name": "Python 3", 131 | "language": "python", 132 | "name": "python3" 133 | }, 134 | "language_info": { 135 | "codemirror_mode": { 136 | "name": "ipython", 137 | "version": 3 138 | }, 139 | "file_extension": ".py", 140 | "mimetype": "text/x-python", 141 | "name": "python", 142 | "nbconvert_exporter": "python", 143 | "pygments_lexer": "ipython3", 144 | "version": "3.6.4" 145 | } 146 | }, 147 | "nbformat": 4, 148 | "nbformat_minor": 2 149 | } 150 | -------------------------------------------------------------------------------- /examples_download_grscicoll.md: -------------------------------------------------------------------------------- 1 | ## Examples of use of GRSciColl export function 2 | 3 | * All GRSciColl institutions: http://api.gbif.org/v1/grscicoll/institution/export 4 | * US institutions: http://api.gbif.org/v1/grscicoll/institution/export?country=US 5 | * Argentinian institutions: http://api.gbif.org/v1/grscicoll/institution/export?country=AR 6 | * US Federal institutions: http://api.gbif.org/v1/grscicoll/institution/export?institutionalGovernance=FEDERAL&country=US 7 | * All GRSciColl collections: http://api.gbif.org/v1/grscicoll/collection/export 8 | * US collections: http://api.gbif.org/v1/grscicoll/collection/export?country=US 9 | * Collections containing exoskeletons: http://api.gbif.org/v1/grscicoll/collection/export?preservationType=BIOLOGICAL_EXOSKELETONS 10 | 11 | ## List of searchable parameters 12 | 13 | Link to API documentation: https://www.gbif.org/developer/registry#collections 14 | 15 | ### For institutions: 16 | 17 | * `q` 18 | * `contact` 19 | * `code` 20 | * `name` 21 | * `alternativeCode` 22 | * `identifier` 23 | * `identifierType` 24 | * `machineTagNamespace` 25 | * `machineTagName` 26 | * `machineTagValue` 27 | * `country` 28 | * `active` 29 | * `type` (possible values: https://api.gbif.org/v1/enumeration/basic/InstitutionType) 30 | * `institutionalGovernance` (possible values: https://api.gbif.org/v1/enumeration/basic/InstitutionGovernance) 31 | * `discipline` (can be specified more than once, possible values: https://api.gbif.org/v1/enumeration/basic/Discipline) 32 | 33 | ### For collections: 34 | 35 | * `q` 36 | * `institution` 37 | * `contact` 38 | * `code` 39 | * `name` 40 | * `alternativeCode` 41 | * `identifier` 42 | * `identifierType` 43 | * `machineTagNamespace` 44 | * `machineTagName` 45 | * `machineTagValue` 46 | * `country` 47 | * `active` 48 | * `contentType` (can be specified more than once, possible values: https://api.gbif.org/v1/enumeration/basic/CollectionContentType) 49 | * `preservationType` (can be specified more than once, possible values: https://api.gbif.org/v1/enumeration/basic/PreservationType) 50 | * `accessionStatus` (possible values: https://api.gbif.org/v1/enumeration/basic/AccessionStatus) 51 | * `personalCollection` 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /find_citizen_science_datasets/citizen_science_datasets_26nove2018_not_reviewed.tsv: -------------------------------------------------------------------------------- 1 | UUID CS 2 | 02131297-7f17-4eb7-aeb6-9692ac6d5ea1 T 3 | 02abb9d1-7d81-42b3-ac9a-3b3d0c7a5280 T 4 | 04e5adc3-28c6-4589-b48a-36b4c8609b40 T 5 | 07609307-26f9-490f-89ea-4e74497e3c0d T 6 | 11314af8-aad0-4414-a961-e91b88fd4abb T 7 | 115f45ed-4334-4144-b5c5-669027e9b191 T 8 | 13b70480-bd69-11dd-b15f-b8a03c50a862 T 9 | 169fa761-2fb9-4022-93bd-e22b7a062efd T 10 | 1eb5d9c0-2e4c-11de-909a-b8a03c50a862 T 11 | 1fef1ead-3d02-495e-8ff1-6aeb01123408 T 12 | 202af80e-3bd1-4856-9f67-833588872230 T 13 | 233ba471-91a2-47a4-9946-b7e844f2124c T 14 | 2b3a8ad6-c040-4105-9eae-cfdf0003cf88 T 15 | 2deeb3ad-0390-4d2c-9623-c5abc6610b08 T 16 | 2e4f4ed1-77d0-4ee2-a895-6f894d3fcc13 T 17 | 2f59780e-3d77-403a-9437-9a560ea5f764 T 18 | 2fce3ac5-5bd1-4c11-82c8-68bfb937399a T 19 | 30ac77b7-9434-4eec-84c3-52b507ee9b8b T 20 | 333e2382-51d2-4c9d-93cf-ff46288ad1b9 T 21 | 3697fec6-b46c-4976-a7c2-417188574dcf T 22 | 37867d84-1143-4378-ac6f-95e5e19650e9 T 23 | 38b4c89f-584c-41bb-bd8f-cd1def33e92f T 24 | 38f97143-922d-4ec0-a6e5-75e51a77890a T 25 | 3b74ac9f-b90b-4113-969f-a489a5aa11fc T 26 | 3b823447-b6b3-4424-80ae-643e877a1c8d T 27 | 3cf84bf6-5a7b-4abe-89ef-b16eaef2fa3c T 28 | 3eb94dd5-c5d2-4a55-947c-7a4d3535ced8 T 29 | 3f0bfe27-b8d7-459e-8b83-1abc57aa669c T 30 | 42319b8f-9b9d-448d-969f-656792a69176 T 31 | 432b3389-4758-4b14-bae1-617e4463cd73 T 32 | 4764db32-05c6-4ac3-a648-670db5de9d37 T 33 | 4ea0d3ad-eab4-4c2c-87b4-8247eae6cec5 T 34 | 4eb871a4-6dc4-4db6-abf2-3ab553a41008 T 35 | 4fa7b334-ce0d-4e88-aaae-2e0c138d049e T 36 | 4fac6de9-a3ae-4334-809c-89d7a56e0d06 T 37 | 5024b47d-c401-4543-9780-8890ae6d1e13 T 38 | 50c9509d-22c7-4a22-a47d-8c48425ef4a7 T 39 | 516bb920-2e4c-11de-909a-b8a03c50a862 T 40 | 51e25910-c7b5-11de-b279-e8b0507c4765 T 41 | 52f2051b-c47e-403a-8e32-04b2f2273c20 T 42 | 555dfae5-6a38-4db5-ac80-58d9b5626e29 T 43 | 576c0061-40a6-46fa-8f95-a425eef87893 T 44 | 5a1b62b2-4a1f-4404-b481-87836564d4a5 T 45 | 5ba69a5d-a3f6-4e4e-bbf6-52ebf7dc47b3 T 46 | 5ee22017-b870-4d04-95c6-13e335b7f032 T 47 | 5f83f1a9-05dd-41df-96fd-743d6f50c111 T 48 | 626fca6f-8590-478b-a662-b9c82afad2b5 T 49 | 64ec1ba4-08d4-46ef-b40a-68965728e182 T 50 | 669ecb52-bfbe-4220-99c7-bafc643df32e T 51 | 66ca4356-bdef-4de4-b5d0-c1b6d69b7fff T 52 | 66f6192f-6cc0-45fd-a2d1-e76f5ae3eab2 T 53 | 67e2335c-4303-4ee3-9bfe-ecfe65662629 T 54 | 68705ca0-5e04-4518-aec3-d6829d003570 T 55 | 69fe6f86-b594-4521-8ae1-4b1d4b7ed620 T 56 | 6ac3f774-d9fb-4796-b3e9-92bf6c81c084 T 57 | 6d5fced1-6e85-4d9e-88f4-e1459772d2fd T 58 | 6df4cdef-c55d-4a03-9055-3cbadcdd05aa T 59 | 6e09f9dc-0bf2-4c20-8ff6-bd898bbc4831 T 60 | 6ec63232-a552-4c8d-9932-5caecefdace4 T 61 | 70f6c067-ea99-4acc-9fd0-ae4c53dffcc0 T 62 | 711d6639-7d82-4282-aac5-c901ac280d75 T 63 | 723c4647-5916-4575-a9bb-a0ea15852f8d T 64 | 73c32d08-e30f-476b-8d65-e661a9df0c3c T 65 | 73d4365a-8868-4d84-a6fd-51098b968170 T 66 | 76cc7230-76b6-4763-9caf-22626b29c0a6 T 67 | 78ba4493-432d-4458-8696-7901cbe5e113 T 68 | 78d49398-9309-4c93-8ebf-9e16e4ffbd5b T 69 | 7b9521ef-988f-45eb-9c89-b8528a7debc1 T 70 | 7bc261a0-c7b8-11de-b279-bb42b9ac878e T 71 | 7c44411b-0296-4634-9538-0ae43b10a38a T 72 | 7f513bfc-f762-11e1-a439-00145eb45e9a T 73 | 7ff7a20f-f3b8-4a25-9057-069ae3417c49 T 74 | 80576d53-992a-41f3-a3e3-dfd0fa0242ee T 75 | 806df471-8b78-4166-a186-ba07c7c957ec T 76 | 820177a0-2e4c-11de-909a-b8a03c50a862 T 77 | 827a08ae-f762-11e1-a439-00145eb45e9a T 78 | 82ab0954-f762-11e1-a439-00145eb45e9a T 79 | 82cb293c-f762-11e1-a439-00145eb45e9a T 80 | 8325e0d4-f762-11e1-a439-00145eb45e9a T 81 | 8442816f-cf5f-4c21-9d3d-5c368b4006d8 T 82 | 84606c08-f762-11e1-a439-00145eb45e9a T 83 | 846296d6-f762-11e1-a439-00145eb45e9a T 84 | 84a649ce-ff81-420d-9c41-aa1de59e3766 T 85 | 84cca7ba-f762-11e1-a439-00145eb45e9a T 86 | 84d26682-f762-11e1-a439-00145eb45e9a T 87 | 85e79b82-f762-11e1-a439-00145eb45e9a T 88 | 85eb6fc8-8e95-4621-9fe0-661d483ac614 T 89 | 85f86f01-b572-467e-98c4-6ca2027be5f0 T 90 | 864eb98e-f762-11e1-a439-00145eb45e9a T 91 | 867b3fb8-f762-11e1-a439-00145eb45e9a T 92 | 87b6acb8-4db2-4769-93ca-e086040dfe14 T 93 | 884cb99c-95a0-4dac-94dd-d57dc509c560 T 94 | 8a309be4-939e-4874-a6ba-6ebee4ebeaac T 95 | 8b708d1a-3021-4cfd-aa17-f9696317292b T 96 | 93f5be1b-c517-4a76-8cbe-6e81a5dd4f85 T 97 | 94d8f41f-7204-4a89-82ae-e337710dd543 T 98 | 95db4db8-f762-11e1-a439-00145eb45e9a T 99 | 95f572da-a3a9-4706-bccb-03521079a520 T 100 | 96c6e82c-f762-11e1-a439-00145eb45e9a T 101 | 98e0629a-678b-4bec-98ef-fb097262bf18 T 102 | 9cbcf2b3-448f-48ce-9510-39e25c6234a8 T 103 | 9cc95cb1-d97a-4c35-9d0e-5de8a6e2f134 T 104 | 9db7cf84-85b0-4fe9-b3a3-2bf622ad4fcc T 105 | 9dbb55ce-0b38-4468-9172-40c8481edd2a T 106 | 9f1c80ed-aa0f-4c7c-8679-6a31467d8fe4 T 107 | a1bd8c5d-cfb3-4d55-943a-cebcd6c32be6 T 108 | a3d5f6ba-034e-4167-bd8a-1fb80018ae61 T 109 | a416dc00-2e4b-11de-9099-b8a03c50a862 T 110 | a8702089-c388-42ea-aea8-8c0c1ba19193 T 111 | a8742f30-c7b8-11de-b279-ebdd43e85d4a T 112 | a8d08280-1def-11de-be11-b8a03c50a862 T 113 | aaebba56-769a-4564-9b66-a7f4b566e25b T 114 | ac1a6843-ac40-4073-bfb1-8ffaa4b73b66 T 115 | ad2e51d0-1e04-11de-be11-b8a03c50a862 T 116 | b0dacaa0-6511-4d21-9d55-129bc400831d T 117 | b124e1e0-4755-430f-9eab-894f25a9b59c T 118 | b4e21aec-30ff-4867-8503-2bf8de3a4fe7 T 119 | b5345830-c7b8-11de-b279-99d156ba6477 T 120 | b70121ef-b7ea-4316-a05b-abdf30f5ca09 T 121 | bc1bc7d8-8f63-46ae-bdae-f34d8f3d3c75 T 122 | bc94931c-4197-42bd-9cac-32c09e3959f3 T 123 | bd92e852-0fec-4679-82c2-b7e4ba66c48e T 124 | be2af664-2990-4153-99b5-d92bbd8cdb0e T 125 | be7ebcf3-6053-4e00-9c07-989cf2d56b2d T 126 | c10dfc42-db10-49e9-9b32-2fcaabbecb92 T 127 | c2e643bb-cf05-4089-b6fe-b3389b5d1c44 T 128 | c35d6000-c7b8-11de-b279-9fe72ba677dc T 129 | c5645d41-0978-4f99-89f5-317351335fef T 130 | c585e6fb-fd76-426e-ae01-a32dc9de5689 T 131 | c6bbb6ef-ad16-4f3c-99e2-f693760173e0 T 132 | c779b049-28f3-4daf-bbf4-0a40830819b6 T 133 | c84310f0-a629-47ff-96b5-51373acdfdcc T 134 | ca435754-9190-4a87-9b2c-c21563898294 T 135 | cb6e66f1-3056-404d-a341-bb856762c57c T 136 | cbc9dc02-6315-4daf-add2-d132ace4662c T 137 | ce2583cc-da9e-440f-8811-44a1f8230484 T 138 | cfbc0f8c-e6c6-4634-b012-9bf20c22e50d T 139 | d3484430-8876-11dd-986c-b8a03c50a862 T 140 | d435e038-1f0a-4007-9429-4e55e71428e7 T 141 | d8144310-c7b8-11de-b279-97e3db606fab T 142 | db09684b-0fd1-431e-b5fa-4c1532fbdb14 T 143 | db0a80b1-f4cc-4a48-a079-f293acb5a043 T 144 | de8221ff-6110-46e3-aec4-b61133f5b38c T 145 | dee8edc4-b19a-11e2-886d-00145eb45e9a T 146 | e2929e2d-e162-4767-b53e-13d839d36d1a T 147 | e6c97f6e-e952-11e2-961f-00145eb45e9a T 148 | ebdc9eed-2673-43fc-a4e9-f80de9a68bcb T 149 | ed923320-2e4b-11de-9099-b8a03c50a862 T 150 | f0f8e840-1df9-11de-be11-b8a03c50a862 T 151 | f648085b-8e1c-4a79-bc8d-f45d36296564 T 152 | f794b231-42de-4008-ba8e-809e01ee7785 T 153 | faf313a1-9ae4-43f4-bfc9-974281feac0e T 154 | fb7d56ee-11aa-454f-8542-89e2ce0fa976 T 155 | fd1a7c99-f7b8-4887-b36c-5bdbd13eae3b T 156 | fdf097eb-1576-47dd-9b79-1943be6e75ed T 157 | fe95dd88-c8dc-4e09-804e-c36dd22de761 T 158 | feb41318-374b-4ed6-b61e-0369993abedc T 159 | ffc8f6a2-0bb4-45c5-a978-9064a52c5e02 T 160 | 6378806e-388e-4293-b7f4-775548da77b3 T 161 | f305217e-aed5-4b03-b0a1-7d69ff760d35 T 162 | f240817c-23f3-4a8f-b32e-97833663fd4d T 163 | 5c5e17e7-fbc1-4452-ac55-b8876e10e289 T 164 | 196e6026-9979-46fa-a260-95a91336d095 T 165 | ce861efd-402f-4c10-a9fb-30ad0426bd15 T 166 | 54facf62-bf0c-43e7-9972-4fdc407fa6c5 T 167 | ee74ecf1-7acf-4c5d-8dcb-43e09fae474c T 168 | a542665f-1b92-406e-8d92-072fcba576b2 T 169 | bd8ee748-4ae8-4956-98d9-c1b8e8b2b9c9 T 170 | 77c3a178-a140-4a5d-87af-1f29e29f8330 T 171 | d91b392e-bb9d-4fb2-99d8-6053e1ade94f T 172 | 6dc35c87-ff97-4708-9e45-6f764ddc1ef3 T 173 | f145c27d-b1c3-4136-85b1-c21674d2d1c3 T 174 | cf7d6c01-309b-4545-8319-3d53b1e8bfd0 T 175 | cca13f2c-0d2c-4c2f-93b9-4446c0cc1629 T 176 | f1a4ce9a-97cd-4d35-bf5e-561f2f1c6d91 T 177 | 37e094f3-dcf2-469f-93a2-c4b9b5fa7275 T 178 | e58dbe2f-cfc0-40c2-a04d-e45a7e876980 T 179 | 57b39cb1-45e9-4444-9eb3-9c489e94d63e T 180 | 0c8b3534-7666-44bb-9dc0-913834eb04f0 T 181 | 3b8c5ed8-b6c2-4264-ac52-a9d772d69e9f T 182 | 6a948a1c-7e23-4d99-b1c1-ec578d0d3159 T 183 | 7d28fe9c-190a-43ab-bdf8-a7235490bbf4 T 184 | c671d43f-7fb9-4b5a-a964-630bfbf47dd2 T 185 | 5e6b1784-7ade-4542-a61f-8adf9bfb0b7b T 186 | 228491dd-bd53-4cde-a91e-378092cf90cf T 187 | 958b1d2f-2d11-4e94-a828-c8e2d2c013ca T 188 | 11a24c97-653b-4e4c-a524-a4c06ffab25f T 189 | f14f3ec9-c54a-477d-82c8-f7ba6925534a T 190 | 8a6ca920-216c-4731-a892-f5132570d9ad T 191 | 8ae09016-b819-450e-b8f6-c1f249110502 T 192 | 19671673-994b-4100-89ed-dc4cc18e0adc T 193 | 557146f2-4f90-43ad-a2eb-c4f9ace7a2df T 194 | 82e3cee7-c06b-40ae-ab30-b3c911a2b713 T 195 | d9870517-1d50-45f0-831c-a783922c22d3 T 196 | 6de6a7d6-95ce-40b8-a569-43b127a57bc5 T 197 | d89c884f-0661-4ceb-a531-ceae2374b915 T 198 | dc2d2c19-92b8-485d-802d-f7c8f1d2055f T 199 | 99ce2b9e-816a-47a7-ad68-b2915763a576 T 200 | c709bf36-4964-4771-90f0-c6ba4b351620 T 201 | 67f85ce5-4cfb-45ac-b05f-cac763d0b838 T 202 | d85e9305-e0e5-4e95-b7b0-e87a0488db85 T 203 | 869b574f-23eb-4575-a771-6ffd480dcdab T 204 | eabe9306-fd68-4140-83fc-554da92e6268 T 205 | d8c540f5-bb74-4748-9634-034ccd8ec52b T 206 | 31cc8b55-0455-4faa-aa50-e135d1dcacf0 T 207 | f0c74a2c-4bd8-49d0-837a-92bb835fd2f3 T 208 | ef7e0d45-c59f-4f48-afa3-53d9f545de56 T 209 | 241f9bd4-613e-4050-9fe0-2ff44b526c84 T 210 | 5fe8fc90-ed14-46c3-8bac-9dbd2d4b92d7 T 211 | 84149905-2fc7-4cc6-a9c8-f4186e0a62a8 T 212 | c01ddb0f-b7e4-4723-b630-7e0c2217230e T 213 | eda0e170-a39f-44fc-9c03-cce4f7417df7 T 214 | 594e1181-c38a-4f44-b78c-ceab50351cca T 215 | ab47c4a9-4b6a-4d59-b846-899d9420f7d3 T 216 | baa86fb2-7346-4507-a34f-44e4c1bd0d57 T 217 | 2d680d46-d783-4ea7-94b1-2556cd653e36 T 218 | cbd241aa-a115-4856-af66-fac5cb90f2cc T 219 | 2aaf8ea9-0460-41d2-a651-3583479947c6 T 220 | 172149d2-2dc0-43fb-a7b1-57e3e4ec34a2 T 221 | 6b76dc19-eb87-4ca2-b738-3727286f1818 T 222 | 465e33dd-59ec-4e55-b2c0-1edde45d757e T 223 | a2e308bf-e9ec-4651-906e-956c963df0ca T 224 | 990e845f-836b-4467-bc5f-46c8495259ec T 225 | b480871a-6c2f-4c35-ab94-6de203c4ff28 T 226 | ce1faba4-d8d9-46ac-ba26-cffe03eadc99 T 227 | 120b7285-b1e6-4653-ab30-8d4231ccc9bc T 228 | 2ae1fe04-6f12-4766-bdee-50e5b2dc1148 T 229 | 258c9ce5-1bda-4001-a192-347c9e7fb186 T 230 | 6f0597b0-54d4-4084-8d4b-339d5b806181 T 231 | acd3e0ac-cfb0-41c1-827b-f831c4b5b57d T 232 | 1270b3c8-ff55-4b63-a407-dcd7e08cd23f T 233 | 2de58bfe-1bf1-4318-97a3-d97efc269a4f T 234 | 37a1816f-d4fd-4d30-bf74-72ca5bf0eb90 T 235 | 94c74198-9eaf-4b43-a349-0b566dcd648a T 236 | 80df9ab6-fb28-422c-8b86-e92cf9bfdbf7 T 237 | 84df82e3-99a3-4150-aea2-e2d2e159c749 T 238 | 2488cd13-4f80-479c-ae54-257de312054e T 239 | f530f789-58de-4b43-9f42-1292db1945cf T 240 | 66f39e15-e74d-491c-b5d2-397cd6bad884 T 241 | 906e6978-e292-4a8b-9c39-adf6bb0f3323 T 242 | 2ce65268-cf51-4ecb-bb36-ce615564c255 T 243 | 4fa0d1fc-ad2f-4c0d-b716-278693f91afc T 244 | fcd8414a-9d49-492c-ab63-17b55aa05b95 T 245 | 9d2d0ec1-bc13-40df-a6ff-6df727cc42b5 T 246 | 3184ffaf-a7d3-4498-a8f9-3f1e0e45e1e9 T 247 | 52422715-c1db-40a0-ba16-b84a295d248a T 248 | 928304f1-d313-47cd-b17b-0232bc27b727 T 249 | 82d23348-0fee-4e2d-9152-48136ef54b2d T 250 | c45c8530-fd7d-4f30-a9e4-c6d243a4b404 T 251 | a899709f-9a73-4cac-84d8-a7dcc7b70280 T 252 | 05a46222-b9ff-4162-a0e1-08bfcc5c6584 T 253 | 0284b82d-cdd1-4d67-83bd-c8e42e988adb T 254 | febe3d7b-59a7-45c1-abbd-bd610e85823b T 255 | c3c56ab1-706c-4e7f-85f8-f6f860e203f2 T 256 | 6d2c56f6-9643-429c-b4e6-c752f6a2602a T 257 | 3a47d44d-de57-4332-adb2-999e70e28960 T 258 | 3cc1bf3e-e503-4d2a-9b23-a54b9269ebbe T 259 | 2912f89b-4104-4dd7-a5fc-bbe45884cdbc T 260 | 1a83ba26-a32f-4474-9e86-4d3cfd6c7e0e T 261 | 35752720-9dc9-40ce-aea5-63915e8aeb0e T 262 | 4b75c0ac-f7c4-4883-942d-d1dfe6939754 T 263 | 116f7655-c1cc-475e-a567-2f5f2b358f73 T 264 | bde5b63d-8664-4632-bc8b-d06653b09b22 T 265 | ad005057-c4b5-4eb1-8b3f-8cdc7aaf994f T 266 | f03ae1bb-de27-45ea-bc58-4911e17d308f T 267 | 1c12eb6c-84b6-4db2-a8a8-8c297685e2eb T 268 | 4d4e7415-5800-4d8b-8d4d-6de938e8d38c T 269 | c4a764e4-8455-425b-b553-253f9f9dfbac T 270 | 547f2b71-ba25-4254-aaf1-4823600e7784 T 271 | dd946360-820d-4959-a752-77ed01cdc661 T 272 | bba60198-3eb5-4101-954d-3e2c87df4b73 T 273 | 9566b8fc-3f1d-472b-b037-7ceb5e4e94f9 T 274 | 13558000-99a0-45de-8d3b-db87a578ace7 T 275 | 82091a91-03bb-4af0-8fc3-e141ad1a6efc T 276 | e6d9a4a4-dbf0-4a3e-87c2-91b6af1f9771 T 277 | 68fa8801-0b3e-400e-afa6-5ad08e0f1e4c T 278 | 91a7ba5f-1090-45f6-9e71-48b0bce5ab63 T 279 | 4273f8ff-73c6-4791-89dc-b64f47b7416a T 280 | 6867ae37-74b8-43e7-805d-5dac0a6d447e T 281 | f3278405-0943-41a0-9f04-3a7733ec344d T 282 | 1864778a-0d5a-4b59-9396-33fdf6539b65 T 283 | 313f597d-32cd-4389-9f27-5966e3444d53 T 284 | 58aa0748-bdac-458f-99f6-cf39f2e15bc5 T 285 | 04a041d6-bfa9-4efd-97f4-3a016e973f37 T 286 | 744505cc-d9a7-4b00-a3cb-77826a46614b T 287 | 2b91dc29-e06a-4e2e-a85b-e7781b4f5d0d T 288 | b660c51c-17cc-44e7-973a-bec13da75166 T 289 | ba84601e-4470-4de5-ae07-02431ac322ab T 290 | e0eb19aa-f73c-4e45-917e-d2d3c157ed92 T 291 | be8970d8-8e56-4ba7-b09c-70e78457f465 T 292 | e351951e-32da-46d6-b61c-e9816d39e0d4 T 293 | 216952bd-4392-4f48-bd5f-322486e2b35c T 294 | f49dd893-1e91-4ce6-9aa7-e1047ffee557 T 295 | 9a0b66df-7535-4f28-9f4e-5bc11b8b096c T 296 | 7f5e4129-0717-428e-876a-464fbd5d9a47 T 297 | b3d40286-603b-47ee-bcaa-e829c383675d T 298 | 824c7e2e-d2fb-466f-bf1b-ee975f1f6956 T 299 | 4ac5f415-6f44-43d0-a19b-2d91e5dcbe2b T 300 | 83e20573-f7dd-4852-9159-21566e1e691e T 301 | 9cf65d8e-f5ee-4e52-a88a-ab16c4bb78de T 302 | 912ac7b3-f473-435b-97d1-20b6ed0a6208 T 303 | e3803a54-b4fd-46d7-ab67-ad8f6871fae5 T 304 | 683957d7-7bb8-44da-bdc1-8d61a41d2b7b T 305 | 2f3751bd-f348-4dd0-bec3-61053f43d211 T 306 | 44c73c0b-662e-41e7-8eee-4754a8592a27 T 307 | 1f968e89-ca96-4065-91a5-4858e736b5aa T 308 | 04234973-a68a-4c56-a786-c6c7da0efe21 T 309 | 138cc86a-61be-4e89-86a7-737086398cbf T 310 | cb429b64-d789-47bb-a7d8-379657c5e407 T 311 | 56b1a75f-6bf7-47ff-bcdb-a4d402172b1d T 312 | 2fd17f41-80ef-4c15-ac48-63ca89f4bb16 T 313 | f351577b-9f46-4130-accf-41d55c3ed508 T 314 | c4e54359-1ee7-4fe4-a768-e17da314784e T 315 | 19016860-06e5-4bd9-ae82-4eed1b39007e T 316 | a7e947a1-15d4-43ab-a244-b4e7511e5423 T 317 | 4797da32-5693-4281-a8f1-bf99bae66ddb T 318 | b8866a8d-adc7-4a3a-887b-e59f830bb988 T 319 | e04b3c0a-79a8-4df6-91f9-8ca5fb52d41c T 320 | 09efcc43-c674-4a70-b326-cd83f7463d1d T 321 | 045d64f1-29e5-4b8e-a0e7-8d7bd6b4d1db T 322 | 6d66d064-0f02-4441-95fc-a83840c7f8ff T 323 | 6b3877d5-4869-43aa-af12-92c703fd5b6d T 324 | 8e2961c9-5996-474b-9bfc-90370e04448b T 325 | b66c629e-b878-4b26-960d-7c0103b025c7 T 326 | a3ad1858-32f3-4cbe-9b9d-df772e2d18b1 T 327 | d18e2338-0455-48b3-a85a-bd5e8de66b75 T 328 | efb3f1a0-43d5-4ae6-a58f-4fd61043756a T 329 | 8a0e70b1-dd09-4c74-85f1-68554108ba7b T 330 | 6c7ea09f-a712-4dc5-95c6-59c4687e810d T 331 | 7f9eb622-c036-44c6-8be9-5793eaa1fa1e T 332 | fc8c35b2-cf16-48c0-98cb-472c02452d4c T 333 | 5d784d06-fa1d-4f00-8cdc-663d04d26061 T 334 | 90eae6b8-57d2-4d24-8eb3-1dbde00f1669 T 335 | 9ebeccc1-895b-4751-9c56-90f646b8b303 T 336 | c19a8f6e-c368-494d-942e-10108d8867a7 T 337 | 3fab912e-e927-4f1c-a97c-eb446cd609e0 T 338 | 271c444f-f8d8-4986-b748-e7367755c0c1 T 339 | d1a8a72f-658d-4996-8ae5-43ce336d9eb4 T 340 | b6a04f9b-d449-43a2-9707-36260bdb25de T 341 | 07f6c820-16ba-49a8-81af-72f7028b6d04 T 342 | 1897eef5-1e10-4842-97f7-cad98925b59d T 343 | d4368fa0-ffbe-4cb8-8e32-7bb902e3656d T 344 | b5ae9525-5de3-438c-b4c0-3d3257f4bea2 T 345 | dce533af-6a50-4d6b-900b-7c60fb709559 T 346 | f8f4c490-8ab3-4e27-b871-944fb807bc1b T 347 | dad6253c-be06-4626-9b95-d48823137ced T 348 | 3d6c8883-c29b-48f6-9fbf-9a5ad6ecbee4 T 349 | 2b2bf993-fc91-4d29-ae0b-9940b97e3232 T 350 | 417f4d21-959b-4773-90a2-c38d1822d873 T 351 | e2bc2f00-62f3-4fd4-b9f3-89c030bca07a T 352 | 7aa24e43-257b-44b6-ab4d-695372f2e3f3 T 353 | f519367d-6b9d-411c-b319-99424741e7de T 354 | 95ed1fa5-2923-4459-836b-11ad8cc4bf42 T 355 | 4896f82a-ff72-4d14-bf2a-780b86ab03f7 T 356 | 9cee82bd-8963-4118-a38d-109e2a613ce2 T 357 | b6da6358-dd77-41fe-848e-1383f14a9ffb T 358 | 30d18e81-2b7a-40ed-828a-72a1c7f585c4 T 359 | 6aebb29a-4dcd-41cb-be99-e08865e422d8 T 360 | cce01da5-fba8-4fd9-ad28-ec7a82e14f31 T 361 | 8124cd73-ac84-43d2-ab39-1d80dc346525 T 362 | 95f5b4eb-e696-48a9-8f00-7d1b0b4ed516 T 363 | e0b8cb67-6667-423d-ab71-08021b6485f3 T 364 | c04a8bc5-eccd-432c-b9b5-e86552e2a7f5 T 365 | 08360455-914e-4d02-809a-13e3cd8a8d21 T 366 | 23db4323-6471-4e16-b8d5-aeb8c59cbfac T 367 | 73846baa-cba6-48e0-94e0-9308dae5c6bb T 368 | 4645a9cc-e357-4630-8bd2-696bf6475850 T 369 | 491648cb-7e3a-42f4-8b86-fca1bcc82173 T 370 | bfc6fe18-77c7-4ede-a555-9207d60d1d86 T 371 | a8c8144a-3693-4d92-8bc4-49e00dd8defc T 372 | 3f9cd7e5-6d7b-40a8-8062-a18d2f2ca599 T 373 | ad07ac5e-718d-40cc-8093-b61169b745ef T 374 | 5d4911e4-8c8e-41cb-97ff-d80b2f11a4cc T 375 | f170f056-3f8a-4ef3-ac9f-4503cc854ce0 T 376 | 2c12acd9-1600-487d-a809-2c2743381112 T 377 | 60807e2a-422e-48a4-b79d-eeb35820a8bb T 378 | b2d0f29e-4614-4001-93c8-f651878a86d2 T 379 | f6637736-f04e-4361-87a0-541e08a8a8d3 T 380 | be212283-072e-4e50-b938-689e3c82f5c6 T 381 | 1a20c977-7365-49d2-8100-5978d4e52234 T 382 | cbde2a70-7a85-4370-b240-09e0738f76f9 T 383 | 821765c9-59dc-4310-9970-d82c2eb9eff7 T 384 | 616efc67-5aba-49f7-914d-8c2c8e217858 T 385 | f76041b1-53f6-448d-a2e1-4302bf80c93c T 386 | 5b580625-29f0-4911-be29-cf5b6496e538 T 387 | 0a7c5a4e-db9b-4349-a793-ef4b3ef9e025 T 388 | 7bb5d1e3-ee8d-4a96-97cd-bb8f56dc6375 T 389 | 8bb6b7ef-553f-40a8-872b-c6a54ecf2f68 T 390 | 80455677-4c00-4c3d-8290-446e459a9e9c T 391 | 039ece6c-1e6d-4441-8392-43ab8bb29c47 T 392 | 7efaf3c5-bd1a-42b9-852e-fd39fbc6c5b8 T 393 | 86ade71a-3894-4f17-bcde-f5894bc5e0f7 T 394 | e13faa2d-3cd9-4716-aeb1-1a489d9b69b4 T 395 | 710f72d0-2b22-452d-89ac-462164a9a369 T 396 | cbd94c94-02ae-4c40-b5b5-ec7221f0482b T 397 | fe6d1c27-5284-402f-a57a-55727ee3f981 T 398 | 96c4aa76-f762-11e1-a439-00145eb45e9a T 399 | cb0ad417-bdca-4d5e-a624-811b50f02860 T 400 | 81c5a091-6e94-40db-a2a4-48f4de42d410 T 401 | c0e410ad-0d01-49f3-a837-71982c0e0c32 T 402 | 688e9ab9-d010-4691-96e0-80ca29323a57 T 403 | 8539cd0e-f762-11e1-a439-00145eb45e9a T 404 | 9df997ea-5549-45a9-b42f-ff8a308ccbf6 T 405 | e8ce41f4-a58a-4ce9-90b7-93694071afcd T 406 | 8b0588e2-6852-4553-951d-90c01eaaf90b T 407 | b22bebf8-8b6b-4a45-949a-e0cc70d4ccd7 T 408 | 5accf920-492e-4641-9ba2-11481c116419 T 409 | 68724de6-4ce3-405e-b76f-f5814712cfb9 T -------------------------------------------------------------------------------- /find_citizen_science_datasets/scripts/citizen_science_gather_dataset_descriptions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import json\n", 11 | "import requests\n", 12 | "import random\n", 13 | "import textblob\n", 14 | "from textblob.classifiers import NaiveBayesClassifier" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Can we predict if a dataset is part of a citizen science project?\n", 22 | "\n", 23 | "With the help of the [GBIF API](https://www.gbif.org/developer/summary) and textblob.\n", 24 | "\n", 25 | "See the documentation I used to write this script:\n", 26 | "* https://www.analyticsvidhya.com/blog/2018/02/the-different-methods-deal-text-data-predictive-python/\n", 27 | "* https://stevenloria.com/simple-text-classification/\n", 28 | "\n", 29 | "## What do I label as citizen science in the training set?\n", 30 | "\n", 31 | "### What I label as \"citizen science\":\n", 32 | "* Metadata explicitly includes the words \"citizen\" or \"citizen science\" (or for the French version \"science\" or \"enquête\" participative)\n", 33 | "* The metadata mentions that the dataset is partly or entirely made by volunteers\n", 34 | "* Bioblitz datasets\n", 35 | "\n", 36 | "### What I don't label as \"citizen science\":\n", 37 | "* What seems like compulsory student work\n", 38 | "* Personal collections or notebook (unless the description includes clue from above)\n", 39 | "\n", 40 | "## I - get the dataset's features to analyse from GBIF" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "def extract_set_of_words(dataset):\n", 50 | " '''\n", 51 | " Puts together titles, descriptions, methods and keywords\n", 52 | " '''\n", 53 | " set_of_words = dataset[\"title\"] + \" \"\n", 54 | " if \"description\" in dataset:\n", 55 | " set_of_words += dataset[\"description\"]+ \" \"\n", 56 | " # Get keywords\n", 57 | " if \"keywordCollections\" in dataset:\n", 58 | " for kwcollection in dataset[\"keywordCollections\"]:\n", 59 | " if \"keywords\" in kwcollection:\n", 60 | " for kw in kwcollection[\"keywords\"]:\n", 61 | " set_of_words += str(kw)+ \" \"\n", 62 | " # Get Methods\n", 63 | " if \"samplingDescription\" in dataset:\n", 64 | " for key in dataset[\"samplingDescription\"]:\n", 65 | " if key != \"methodSteps\":\n", 66 | " set_of_words += dataset[\"samplingDescription\"][key]+ \" \"\n", 67 | " else:\n", 68 | " for methodStep in dataset[\"samplingDescription\"][key]:\n", 69 | " set_of_words += str(methodStep)+ \" \"\n", 70 | " return set_of_words" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "res = pd.DataFrame(columns=[\"Description\", \"Language\"])\n", 80 | "\n", 81 | "# We are excluding datasets from organizations that generated the metadata automatically\n", 82 | "# PLAZI and PANGEA\n", 83 | "exclude = ['7ce8aef0-9e92-11dc-8738-b8a03c50a862',\n", 84 | " 'd5778510-eb28-11da-8629-b8a03c50a862']\n", 85 | "offset = 0\n", 86 | "step = 900\n", 87 | "end_of_records = False\n", 88 | "while not end_of_records:\n", 89 | " param = {\n", 90 | " \"offset\": offset,\n", 91 | " \"limit\": step\n", 92 | " }\n", 93 | " # Query API\n", 94 | " response = requests.get(\"http://api.gbif.org/v1/dataset\", param)\n", 95 | " response = response.json()\n", 96 | " offset += step\n", 97 | " end_of_records = response[\"endOfRecords\"]\n", 98 | " for dataset in response[\"results\"]:\n", 99 | " # exclude dataset from PLAZI and PANGEA\n", 100 | " if dataset[\"publishingOrganizationKey\"] not in exclude and dataset[\"type\"] != \"CHECKLIST\":\n", 101 | " res.at[dataset[\"key\"], \"Language\"] = dataset[\"language\"]\n", 102 | " # Get title, description, keywords, methods of the dataset\n", 103 | " res.at[dataset[\"key\"], \"Description\"] = extract_set_of_words(dataset)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "## Save data to avoid rerunning everything, everytime I debug my model" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 4, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "# res.to_csv(\"../raw_descriptions.tsv\", index=True, sep=\"\\t\")\n", 120 | "\n", 121 | "res = pd.read_table(\"../raw_descriptions.tsv\")\n", 122 | "res = res.set_index(\"UUID\")" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "## Load the training + testing sets: partially manually annotated data" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 5, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "training_set = pd.read_table(\"../some_manually_annotated_datasets.tsv\")\n", 139 | "training_set = training_set.set_index(\"UUID\")" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "## II - Set the parameters for data cleaning and analysis" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 6, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "feature = \"Description\" # we want to find words in the description\n", 156 | "frequency_threshold = 15 # how many most frequent word do we remove\n", 157 | "rare_threshold_word_number = 2 # how many time a word should appear to be kept\n", 158 | "parameter_selection_training_set = 800 # Size of training set + selection of parameters\n", 159 | "crossValidation = 4\n", 160 | "triming_threshold_for_model = 15 # number of words we want to keep from model" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "## III - Clean up" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 7, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "# Remove underscore\n", 177 | "res[feature] = res[feature].str.replace(\"_\", \" \")\n", 178 | "res[feature] = res[feature].str.replace(\"-\", \" \")\n", 179 | "\n", 180 | "# Set everything to lower case\n", 181 | "res[feature] = res[feature].apply(lambda x: \" \".join(x.lower() for x in x.split()))" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "Unfortunately, the translation stopped working after an hour of using it. Apparently iy is a common issue with libraries using the google API." 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 8, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "# # Translate sentence if needed\n", 198 | "# for dataset in res.index.tolist():\n", 199 | "# if res.loc[dataset, \"Language\"] != \"eng\":\n", 200 | "# print(dataset)\n", 201 | "# res.at[dataset, feature] = str(textblob.TextBlob(res.loc[dataset, feature]).translate(to='en'))" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 9, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "# Remove punctuation\n", 211 | "res[feature] = res[feature].str.replace('[^\\w\\s]', '')\n", 212 | "res[feature] = res[feature].str.replace('[\\d]', '')" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 10, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "# Find the most frequent words by language\n", 222 | "for language in list(set(res.Language.tolist())):\n", 223 | " freq = pd.Series(' '.join(res[res.Language == language][feature]).split()).value_counts()[:frequency_threshold]\n", 224 | " freq = list(freq.index)\n", 225 | " # Remove the most frequent words\n", 226 | " res[feature] = res[feature].apply(lambda x: \" \".join([c for c in x.split() if c not in freq]))" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 11, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "# Replace some key words\n", 236 | "words_to_replace = pd.read_table(\"../wd_replace.txt\")\n", 237 | "words_to_replace = words_to_replace.set_index(\"word\")\n", 238 | "for word in words_to_replace.index.tolist():\n", 239 | " res[feature] = res[feature].str.replace(word, words_to_replace.loc[word, \"replacement\"])" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 12, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "# # Find the rare words # OPTIONAL because it takes too much time\n", 249 | "# freq = pd.Series(' '.join(res[feature]).split()).value_counts()\n", 250 | "# freq = freq[freq < rare_threshold_word_number]\n", 251 | "# freq = list(freq.index)\n", 252 | "# # Remove rare words\n", 253 | "# res[feature] = res[feature].apply(lambda x: \" \".join([c for c in x.split() if c not in freq]))" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 13, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "# Correct spelling - not done here because it takes too much time\n", 263 | "# res[feature] = res[feature].apply(lambda x: str(textblob.TextBlob(x).correct()))" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 14, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# Lemmatization (remove some letters in words to make it more \"universal\")\n", 273 | "res[feature] = res[feature].apply(lambda x: \" \".join([textblob.Word(word).lemmatize() for word in x.split()]))\n", 274 | "\n", 275 | "# Remove as many NAs as possible\n", 276 | "res[feature] = res[feature].str.replace(\" na \", \" \")" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 15, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "# Training set\n", 286 | "data_training_set = pd.concat([res, training_set], join=\"inner\", axis=1)\n", 287 | "\n", 288 | "# Reformat dataset\n", 289 | "index_list = data_training_set.index.tolist()\n", 290 | "random.shuffle(index_list)\n", 291 | "index_test_set = index_list[parameter_selection_training_set:len(index_list)]\n", 292 | "index_training_set = index_list[0:parameter_selection_training_set]\n", 293 | "\n", 294 | "data_training_set = data_training_set.loc[index_training_set]\n", 295 | "data_training_set = list(data_training_set[[feature, 'CS']].itertuples(index=False))" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "## IV - Train and test Classifier - Naive Bayesian" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 16, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "# Train classifier\n", 312 | "word_for_model = []\n", 313 | "\n", 314 | "for fold in range(0, crossValidation):\n", 315 | " cl = NaiveBayesClassifier(data_training_set[fold:(fold+1)*int(len(data_training_set)/crossValidation)])\n", 316 | " informative_feature = cl.informative_features(triming_threshold_for_model)\n", 317 | " for word in informative_feature:\n", 318 | " word_for_model.append(word[0].replace(\"contains(\",\"\").replace(\")\",\"\"))" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 17, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "data": { 328 | "text/plain": [ 329 | "F 788\n", 330 | "T 226\n", 331 | "Name: CS, dtype: int64" 332 | ] 333 | }, 334 | "execution_count": 17, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "# Put back the performance in the context \n", 341 | "training_set[\"CS\"].value_counts()" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 18, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/plain": [ 352 | "eng 6591\n", 353 | "spa 1757\n", 354 | "fra 1114\n", 355 | "por 223\n", 356 | "nor 6\n", 357 | "zho 6\n", 358 | "rus 2\n", 359 | "glg 1\n", 360 | "cat 1\n", 361 | "Name: Language, dtype: int64" 362 | ] 363 | }, 364 | "execution_count": 18, 365 | "metadata": {}, 366 | "output_type": "execute_result" 367 | } 368 | ], 369 | "source": [ 370 | "# Languages\n", 371 | "res[\"Language\"].value_counts()" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "## V - What word is associated with citizen science" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 19, 384 | "metadata": {}, 385 | "outputs": [ 386 | { 387 | "data": { 388 | "text/plain": [ 389 | "na 3\n", 390 | "select 3\n", 391 | "professional 3\n", 392 | "occurence 3\n", 393 | "school 3\n", 394 | "validated 3\n", 395 | "organised 2\n", 396 | "amateur 2\n", 397 | "dedicated 2\n", 398 | "spot 2\n", 399 | "you 2\n", 400 | "citizen 2\n", 401 | "hosted 2\n", 402 | "volunteer 2\n", 403 | "connaissance 1\n", 404 | "validation 1\n", 405 | "since 1\n", 406 | "object 1\n", 407 | "contribution 1\n", 408 | "order 1\n", 409 | "asked 1\n", 410 | "camp 1\n", 411 | "participant 1\n", 412 | "absence 1\n", 413 | "deposited 1\n", 414 | "search 1\n", 415 | "help 1\n", 416 | "submit 1\n", 417 | "monitor 1\n", 418 | "ensure 1\n", 419 | "imported 1\n", 420 | "late 1\n", 421 | "collated 1\n", 422 | "of 1\n", 423 | "watcher 1\n", 424 | "conduct 1\n", 425 | "thousand 1\n", 426 | "surroundings 1\n", 427 | "occurring 1\n", 428 | "bioblitz 1\n", 429 | "dtype: int64" 430 | ] 431 | }, 432 | "execution_count": 19, 433 | "metadata": {}, 434 | "output_type": "execute_result" 435 | } 436 | ], 437 | "source": [ 438 | "# Find the most informative words\n", 439 | "keywords = pd.Series(word_for_model).value_counts()\n", 440 | "keywords" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "metadata": {}, 446 | "source": [ 447 | "## VI - Train and test model on reduced set of words" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 20, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "reduced_set_of_words = keywords[keywords > 1].index.tolist()\n", 457 | "reduced_set_of_words += [\"herbarium\", \"museum\", \"inaturalist\"]\n", 458 | "reduced_set_of_words = set(reduced_set_of_words)" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 21, 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [ 467 | "res[feature] = res[feature].apply(lambda x: \" \".join([c for c in x.split() if c in reduced_set_of_words]))\n", 468 | "res[feature] = res[feature].str.replace(\" na \", \" \")" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 22, 474 | "metadata": {}, 475 | "outputs": [], 476 | "source": [ 477 | "# Reduced training set\n", 478 | "reduced_set = pd.concat([res, training_set], join=\"inner\", axis=1)\n", 479 | "\n", 480 | "# Reformat dataset\n", 481 | "reduced_training_set = reduced_set.loc[index_training_set]\n", 482 | "reduced_training_set = list(reduced_training_set[[feature, 'CS']].itertuples(index=False))\n", 483 | "\n", 484 | "test_set = reduced_set.loc[index_test_set]\n", 485 | "test_set = list(test_set[[feature, 'CS']].itertuples(index=False))\n", 486 | "\n", 487 | "# train dataset\n", 488 | "# cl = NaiveBayesClassifier(reduced_training_set)\n", 489 | "cl = textblob.classifiers.DecisionTreeClassifier(reduced_training_set)" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 23, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "name": "stdout", 499 | "output_type": "stream", 500 | "text": [ 501 | "if contains(citizen) == False: \n", 502 | " if contains(volunteer) == False: \n", 503 | " if contains(organised) == False: \n", 504 | " if contains(amateur) == False: return 'F'\n", 505 | " if contains(amateur) == True: return 'T'\n", 506 | " if contains(organised) == True: \n", 507 | " if contains(na) == False: return 'T'\n", 508 | " if contains(na) == True: return 'T'\n", 509 | " if contains(volunteer) == True: return 'T'\n", 510 | "if contains(citizen) == True: return 'T'\n", 511 | "\n" 512 | ] 513 | } 514 | ], 515 | "source": [ 516 | "print(cl.pseudocode())" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 24, 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [ 525 | "truePositive = 0\n", 526 | "trueNegative = 0\n", 527 | "falsePositive = 0\n", 528 | "falseNegative = 0\n", 529 | "for test in test_set:\n", 530 | " if test[1] == \"T\":\n", 531 | " if cl.classify(test[0]) == \"T\":\n", 532 | " truePositive += 1\n", 533 | " else:\n", 534 | " falsePositive += 1\n", 535 | " else:\n", 536 | " if cl.classify(test[0]) == \"T\":\n", 537 | " falseNegative += 1\n", 538 | " else:\n", 539 | " trueNegative += 1" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 25, 545 | "metadata": {}, 546 | "outputs": [ 547 | { 548 | "name": "stdout", 549 | "output_type": "stream", 550 | "text": [ 551 | "Performance on testing set\n", 552 | "\n", 553 | "True positive:\t 9.859154929577464\n", 554 | "True negative:\t 85.21126760563381\n", 555 | "False positive:\t 4.929577464788732\n", 556 | "False negative:\t 0.0\n" 557 | ] 558 | } 559 | ], 560 | "source": [ 561 | "print(\"Performance on testing set\\n\")\n", 562 | "print(\"True positive:\\t\", truePositive*100/len(test_set))\n", 563 | "print(\"True negative:\\t\", trueNegative*100/len(test_set))\n", 564 | "print(\"False positive:\\t\", falsePositive*100/len(test_set))\n", 565 | "print(\"False negative:\\t\", falseNegative*100/len(test_set))" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 26, 571 | "metadata": {}, 572 | "outputs": [ 573 | { 574 | "name": "stdout", 575 | "output_type": "stream", 576 | "text": [ 577 | "Accuracy\n" 578 | ] 579 | }, 580 | { 581 | "data": { 582 | "text/plain": [ 583 | "0.9507042253521126" 584 | ] 585 | }, 586 | "execution_count": 26, 587 | "metadata": {}, 588 | "output_type": "execute_result" 589 | } 590 | ], 591 | "source": [ 592 | "print(\"Accuracy\")\n", 593 | "cl.accuracy(test_set)" 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": 27, 599 | "metadata": {}, 600 | "outputs": [ 601 | { 602 | "data": { 603 | "text/plain": [ 604 | "F 121\n", 605 | "T 21\n", 606 | "Name: CS, dtype: int64" 607 | ] 608 | }, 609 | "execution_count": 27, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "# Put back the performance in the context \n", 616 | "reduced_set.loc[index_test_set][\"CS\"].value_counts()" 617 | ] 618 | }, 619 | { 620 | "cell_type": "markdown", 621 | "metadata": {}, 622 | "source": [ 623 | "## Predict the rest of the datasets" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 28, 629 | "metadata": {}, 630 | "outputs": [], 631 | "source": [ 632 | "to_check = pd.DataFrame([\"UUID\", \"CS\"])\n", 633 | "for test in res.index.tolist():\n", 634 | " if test not in training_set.index.tolist():\n", 635 | " to_check.at[test, \"CS\"] = cl.classify(res.at[test, feature])" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": 29, 641 | "metadata": {}, 642 | "outputs": [], 643 | "source": [ 644 | "title_and_description = pd.read_table(\"../raw_occurrence_dataset_descriptions_and_titles.tsv\")\n", 645 | "title_and_description = title_and_description.set_index(\"UUID\")\n", 646 | "to_check_w_title = pd.concat([title_and_description, to_check], join = \"inner\", axis=1)\n", 647 | "to_check_w_title.sort_values([\"CS\"], ascending=False).to_csv(\"../test_model_subsample.tsv\", sep = \"\\t\")" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [] 656 | } 657 | ], 658 | "metadata": { 659 | "kernelspec": { 660 | "display_name": "Python 3", 661 | "language": "python", 662 | "name": "python3" 663 | }, 664 | "language_info": { 665 | "codemirror_mode": { 666 | "name": "ipython", 667 | "version": 3 668 | }, 669 | "file_extension": ".py", 670 | "mimetype": "text/x-python", 671 | "name": "python", 672 | "nbconvert_exporter": "python", 673 | "pygments_lexer": "ipython3", 674 | "version": "3.6.4" 675 | } 676 | }, 677 | "nbformat": 4, 678 | "nbformat_minor": 2 679 | } 680 | -------------------------------------------------------------------------------- /find_citizen_science_datasets/some_manually_annotated_datasets.tsv: -------------------------------------------------------------------------------- 1 | UUID CS 2 | 01a6caea-b201-4495-b511-1adf1384e790 F 3 | 02131297-7f17-4eb7-aeb6-9692ac6d5ea1 T 4 | 02abb9d1-7d81-42b3-ac9a-3b3d0c7a5280 T 5 | 02b3bdaa-07dc-45b3-a761-7580d4bdcdb4 F 6 | 04992c9c-0ead-428d-917e-698cd5f38d01 F 7 | 04c4143f-1b12-44c1-a81d-1ef5fb3ea175 F 8 | 04e5adc3-28c6-4589-b48a-36b4c8609b40 T 9 | 053cc2c4-c0c9-4024-8cdb-9f45b93700bf F 10 | 07609307-26f9-490f-89ea-4e74497e3c0d T 11 | 077cd59e-b198-43ed-97a3-2317aa68e671 F 12 | 086a644d-6cbe-43b7-b7c7-d33d36028d7f F 13 | 08ccad1b-e79f-4481-94e5-7f47533129ff F 14 | 098045a1-9b72-42e9-91bd-3dbd2d90ede3 F 15 | 09e90dfb-5b1b-4dd9-a796-e2fba53d26f0 F 16 | 09fa2625-92c0-4720-9cbb-d43ced49d11f F 17 | 0abad0df-c5b2-44e9-9cef-5d2c5c04ea69 F 18 | 0c70b4de-b7af-4cda-b9de-b59d4be45879 F 19 | 0d2e2847-e125-4a3b-b4a7-a2a199919b44 F 20 | 0e240ab8-9620-4be8-b489-7c4adc78f8a4 F 21 | 0e6ca8ff-526a-4d2f-ba9b-4238f45d281b F 22 | 100caf86-ad3e-4c13-be7b-4c838d221dfa F 23 | 1084c169-c39a-4749-94aa-32159777d828 F 24 | 11314af8-aad0-4414-a961-e91b88fd4abb T 25 | 115f45ed-4334-4144-b5c5-669027e9b191 T 26 | 116fd540-36a8-4c36-8274-67d90815fd5f F 27 | 1228306b-f4c8-45ec-83cf-c006b8236021 F 28 | 13456803-0361-461e-b957-ce325bc14e62 F 29 | 13b70480-bd69-11dd-b15f-b8a03c50a862 T 30 | 13d7cb4c-3dc0-4276-b3cf-c29d10e92beb F 31 | 149e44e7-fb0b-4db0-a40a-86e64eb8baf3 F 32 | 14f889f3-6ab2-4431-bbed-58cbd1afbc6f F 33 | 169fa761-2fb9-4022-93bd-e22b7a062efd T 34 | 17a5a17a-36a3-4c74-b693-fe8241bd643d F 35 | 17b214d1-f2ff-43ae-a731-4e52bae687fe F 36 | 17ea10b4-1ef2-42ba-9cb1-b38937dd15b9 F 37 | 1aaa6e74-593c-4a7c-a83e-ca8cbb4c6511 F 38 | 1be624ad-2628-4a57-a68f-0fea6e3fba31 F 39 | 1c8388f5-5c12-46ba-a486-07d1b4f8b78c F 40 | 1d0f6e54-c1a9-48c4-bdb8-381fe2220b44 F 41 | 1d1f4137-7653-4eac-ab67-7487dae51739 F 42 | 1eb5d9c0-2e4c-11de-909a-b8a03c50a862 T 43 | 1ed27efd-2a72-4d9b-a2c3-311ce3da5937 F 44 | 1f61b310-0d25-498d-902a-97c28c5a110a F 45 | 1fef1ead-3d02-495e-8ff1-6aeb01123408 T 46 | 202af80e-3bd1-4856-9f67-833588872230 T 47 | 20d8ad6d-4291-48f1-9eeb-f2ef1283330b F 48 | 231acf5a-0727-4315-b97d-957746e5f197 F 49 | 233ba471-91a2-47a4-9946-b7e844f2124c T 50 | 23fa1c7a-1f3e-45c7-9997-cb718f8975b6 F 51 | 24baf0da-60a9-4a13-99d7-a8c860c3cb30 F 52 | 25cf9b9b-4bf8-46c4-b85c-7a08933b1577 F 53 | 25d73670-8b5c-4813-aadb-37b65022602b F 54 | 2743e1fb-5a9c-4224-800c-f86bab2ca52b F 55 | 275a15fd-1a00-43c4-92c6-f2524ef3ca72 F 56 | 2790adc5-c709-427c-8d04-804590d98272 F 57 | 2838b79f-bd50-46cd-ac42-2bef5771a646 F 58 | 28b20161-d9ef-4e62-b13c-19564af2005c F 59 | 2a2bf4d1-5747-4c03-ae7b-328a7768345b F 60 | 2b151ccd-eb45-4f73-8baa-24c24ce87165 F 61 | 2b3a8ad6-c040-4105-9eae-cfdf0003cf88 T 62 | 2d0a94b6-fca2-4630-9683-e37756440de0 F 63 | 2d21ceb2-e459-4025-a62f-6b65c494de47 F 64 | 2d4d4edb-4a78-4855-909e-56dd4c6dfe38 F 65 | 2daac733-54d3-41b9-8f4d-f2fd45a68441 F 66 | 2dda31ff-f776-452f-9a4b-d5229f6e3494 F 67 | 2deeb3ad-0390-4d2c-9623-c5abc6610b08 T 68 | 2e4f4ed1-77d0-4ee2-a895-6f894d3fcc13 T 69 | 2eb32bde-d5a9-433d-8c68-40fe0813938e F 70 | 2f51827d-8e69-4ecc-9318-d93bdba79986 F 71 | 2f59780e-3d77-403a-9437-9a560ea5f764 T 72 | 2fce3ac5-5bd1-4c11-82c8-68bfb937399a T 73 | 2fdace5f-efaa-4f5c-8680-acb8f7797319 F 74 | 3006c2fd-b596-4368-b3b0-fd06116251da F 75 | 30595b72-0016-42cf-a1b5-afa6a2474cba F 76 | 30ac77b7-9434-4eec-84c3-52b507ee9b8b T 77 | 3194e21c-447a-410d-bb09-31398482de1f F 78 | 324221f2-eb92-419b-88d5-a44e85c9898c F 79 | 329c9f4f-13eb-4a69-9c56-09923e394dcf F 80 | 3316aeea-f5a5-45de-880f-055b4f91cbec F 81 | 333e2382-51d2-4c9d-93cf-ff46288ad1b9 T 82 | 33e67b5a-f6e5-4f31-a60f-3b1d259551b7 F 83 | 340984fa-507b-4aa6-b59c-cce1dcee339f F 84 | 3419e2ce-de18-4727-868a-b239c379b3bb F 85 | 34585f24-1ffe-4744-8734-7d563c918d18 F 86 | 34877684-bbd1-45f2-a205-8f1cc9ad58d7 F 87 | 34ca940f-facb-4039-9ac0-111dad7fd9cc F 88 | 34fbcf59-d9bb-47e7-9672-99e13ea8c736 F 89 | 358fa28d-3344-4372-826b-2416f47f1118 F 90 | 3629a5cd-06f8-45ad-94ca-a8991d8784da F 91 | 3697fec6-b46c-4976-a7c2-417188574dcf T 92 | 36b77d42-895c-4192-8872-8f234d58071a F 93 | 37867d84-1143-4378-ac6f-95e5e19650e9 T 94 | 38012fb6-c779-4ef8-8b02-3f458709b5a0 F 95 | 38b4c89f-584c-41bb-bd8f-cd1def33e92f T 96 | 38f97143-922d-4ec0-a6e5-75e51a77890a T 97 | 3924362e-fa83-4a62-a5e2-6f764c872555 F 98 | 3945e97d-a88a-4e91-ab4e-931b53f91792 F 99 | 397b0ae4-460e-4680-9625-73ab82f99ac1 F 100 | 3b74ac9f-b90b-4113-969f-a489a5aa11fc T 101 | 3b823447-b6b3-4424-80ae-643e877a1c8d T 102 | 3cf84bf6-5a7b-4abe-89ef-b16eaef2fa3c T 103 | 3d777576-eec6-44a0-ae80-012cf5f84e82 F 104 | 3dd1a517-73a9-40db-ae9e-b0e8ad342a22 F 105 | 3eb94dd5-c5d2-4a55-947c-7a4d3535ced8 T 106 | 3f0bfe27-b8d7-459e-8b83-1abc57aa669c T 107 | 3fb7ddd8-07c0-490d-a0f1-32b5dec8d583 F 108 | 4002a223-2daf-43e4-ab43-10b2ccfa5988 F 109 | 40126b48-3511-4fbb-aa79-7d039779c387 F 110 | 4085b735-e486-400f-baaa-4dfcbec0aab3 F 111 | 41754ccf-f293-4183-b0fe-5efbdffe597c F 112 | 42319b8f-9b9d-448d-969f-656792a69176 T 113 | 424c2bbf-e40b-46e6-a19e-69235d7d29b3 F 114 | 432b3389-4758-4b14-bae1-617e4463cd73 T 115 | 43460666-2eb6-4b2f-9819-0259534a32c3 F 116 | 4392dba3-3e44-42aa-8878-19d7b9819bdf F 117 | 4496b1b1-1ea9-4eef-bcca-90adc41038df F 118 | 44fb5823-c1f5-4ac7-8e06-795a09a138f2 F 119 | 458acc40-a815-4fa3-859b-66c84e324907 F 120 | 474ee54a-9eae-4b67-b79b-d8fc2d9ec884 F 121 | 4764db32-05c6-4ac3-a648-670db5de9d37 T 122 | 4792ce42-563a-40a5-b961-fcd7efa64ba0 F 123 | 480d9ab0-c682-4cd6-ba78-38cf9ef91087 F 124 | 4813e9b0-8d4b-41d1-b101-05ece4eb4776 F 125 | 48249f10-4b89-4b56-a1d8-83a6297bd0c7 F 126 | 48d5602a-fbea-417a-b5d3-46073a08efda F 127 | 48f1c160-e87d-4c0c-89ca-1fde7a758378 F 128 | 48f54e74-d525-4fad-b482-a32316d79ccb F 129 | 4a3421d3-e83f-4d30-b0b9-6658c9f047b6 F 130 | 4a53a180-f0c8-4dd8-a1fb-18768668edc9 F 131 | 4b3ec18d-2b2e-490b-a92d-1a4088065712 F 132 | 4bce5a53-c1d9-4e06-a773-cfd18ccd198f F 133 | 4bf1cca8-832c-4891-9e17-7e7a65b7cc81 F 134 | 4c39bca8-5d52-4efa-9bb0-72cf38456bfc F 135 | 4e19e2b8-9934-4b5a-a51f-62df5a9802e7 F 136 | 4e302fbd-7437-4f2a-ae19-2525f090a4e7 F 137 | 4ea0d3ad-eab4-4c2c-87b4-8247eae6cec5 T 138 | 4eb871a4-6dc4-4db6-abf2-3ab553a41008 T 139 | 4ec9e37d-1af5-4dc2-9c2f-97fcf5f22867 F 140 | 4fa7b334-ce0d-4e88-aaae-2e0c138d049e T 141 | 4fac6de9-a3ae-4334-809c-89d7a56e0d06 T 142 | 5024b47d-c401-4543-9780-8890ae6d1e13 T 143 | 50320846-bf5b-4e62-a5ff-17687f448089 F 144 | 50c9509d-22c7-4a22-a47d-8c48425ef4a7 T 145 | 514d5f6a-d6ce-4e4a-9097-c014d06f11c9 F 146 | 516bb920-2e4c-11de-909a-b8a03c50a862 T 147 | 51e25910-c7b5-11de-b279-e8b0507c4765 T 148 | 51ffcd3a-f71f-491e-b75a-505657c3770e F 149 | 52855660-ce87-4312-9538-fefde37134b6 F 150 | 52f2051b-c47e-403a-8e32-04b2f2273c20 T 151 | 53334055-d64e-48cd-8c83-2295d3891782 F 152 | 550f8727-56d8-4feb-ae98-d8db0ead73bd F 153 | 555dfae5-6a38-4db5-ac80-58d9b5626e29 T 154 | 560539d5-7f71-4fd8-998b-ba5b16a55568 F 155 | 576c0061-40a6-46fa-8f95-a425eef87893 T 156 | 57fa062c-ac30-4f3f-a8ec-9d210e913415 F 157 | 58f732a6-b36e-4d86-bb67-22fe034ba196 F 158 | 59825263-9f89-43a9-8f22-c22a6a53b8fc F 159 | 59997fb4-d940-4305-90b4-6dee190260bd F 160 | 5a1b62b2-4a1f-4404-b481-87836564d4a5 T 161 | 5b7e2ecf-162e-4458-a92c-04d706ace8e9 F 162 | 5ba69a5d-a3f6-4e4e-bbf6-52ebf7dc47b3 T 163 | 5c88c333-373d-46bb-8e22-5a2f50913ed0 F 164 | 5e610094-df1e-4073-85cb-260091509e1e F 165 | 5e848873-567f-42dd-8a29-88056b9b33a4 F 166 | 5ee22017-b870-4d04-95c6-13e335b7f032 T 167 | 5ee9e28d-9bf8-480a-8d5f-14e0e5653cf8 F 168 | 5f83f1a9-05dd-41df-96fd-743d6f50c111 T 169 | 5fb59b97-5ae7-473a-bb66-c4da8703700f F 170 | 5ff00d49-e775-4545-b641-12eb24251201 F 171 | 6026ed0c-3113-4d31-9c85-a6468f09f699 F 172 | 6043b710-f1a6-48bb-a473-8b32b1e43408 F 173 | 606a9bcd-3896-4ff2-9c46-ed24fea6777a F 174 | 609f3aaf-8aaa-4cdc-9f44-369702e1bc01 F 175 | 6183f1f7-0513-4587-bee6-cad11e23002d F 176 | 61fff8d5-8888-48cc-a497-fde1363cf718 F 177 | 6242ca69-c65e-4723-b629-9aeb05b51c88 F 178 | 626621b4-bf3f-4568-b1ce-e2e5f619bdbb F 179 | 626fca6f-8590-478b-a662-b9c82afad2b5 T 180 | 63d72e90-c7b8-11de-b279-ef4016664769 F 181 | 64ac1450-3abe-4a48-a49c-07f1347740c1 F 182 | 64ec1ba4-08d4-46ef-b40a-68965728e182 T 183 | 6555005d-4594-4a3e-be33-c70e587b63d7 F 184 | 65c84b10-5074-4bf9-a759-0d524dd449fe F 185 | 669ecb52-bfbe-4220-99c7-bafc643df32e T 186 | 66ca4356-bdef-4de4-b5d0-c1b6d69b7fff T 187 | 66f6192f-6cc0-45fd-a2d1-e76f5ae3eab2 T 188 | 6770737f-ae99-4b5c-8adf-71b0706daf73 F 189 | 67e2335c-4303-4ee3-9bfe-ecfe65662629 T 190 | 68705ca0-5e04-4518-aec3-d6829d003570 T 191 | 6899818d-a6f5-4a18-81d2-047d84ee28b8 F 192 | 68998c38-2d26-48b4-af53-d64dc4f7dee2 F 193 | 6943fd00-8381-4dd8-a39a-3ba9851e92a2 F 194 | 6944b893-6194-4bb6-b2bf-7dc3600cd3ef F 195 | 6965d9f1-b8ae-46c9-8859-3189fbc1dd14 F 196 | 69fe6f86-b594-4521-8ae1-4b1d4b7ed620 T 197 | 6ac3f774-d9fb-4796-b3e9-92bf6c81c084 T 198 | 6b58525c-da6e-4d0a-a3fd-457efa65984b F 199 | 6d5fced1-6e85-4d9e-88f4-e1459772d2fd T 200 | 6df4cdef-c55d-4a03-9055-3cbadcdd05aa T 201 | 6e09f9dc-0bf2-4c20-8ff6-bd898bbc4831 T 202 | 6ec0db76-23c5-44b5-80f8-ad7e2aa17773 F 203 | 6ec63232-a552-4c8d-9932-5caecefdace4 T 204 | 6ef46ac2-8d5d-4a5b-a4f8-ea055d0e745d F 205 | 6f40c8ec-2b95-47d5-9064-eda6a8ddf3fb F 206 | 6fea96d6-e47f-4cd5-abcd-08235a8e26d7 F 207 | 70f6c067-ea99-4acc-9fd0-ae4c53dffcc0 T 208 | 711d6639-7d82-4282-aac5-c901ac280d75 T 209 | 7162b803-bde2-4196-9383-263ba6dc9ad3 F 210 | 723c4647-5916-4575-a9bb-a0ea15852f8d T 211 | 72414bd9-69dc-461c-8034-b7d895eccf63 F 212 | 7351fca8-d39b-4af5-b23f-fdcd8da0120e F 213 | 73c32d08-e30f-476b-8d65-e661a9df0c3c T 214 | 73d4365a-8868-4d84-a6fd-51098b968170 T 215 | 7445a910-faa2-4f43-b650-d2f055bfb0d7 F 216 | 76525e81-9210-401d-a90d-5509f771c0e7 F 217 | 76cc7230-76b6-4763-9caf-22626b29c0a6 T 218 | 784c0a48-dbf9-41b6-a5ed-06f69cc12491 F 219 | 78ba4493-432d-4458-8696-7901cbe5e113 T 220 | 78d49398-9309-4c93-8ebf-9e16e4ffbd5b T 221 | 7998a4ff-a9f3-46b0-a291-55fe832b6522 F 222 | 7a01619e-0e49-49fa-afcf-5a95428f0458 F 223 | 7a2e6fb2-8e8c-436e-a1fb-520fcf20deab F 224 | 7b384b30-7ced-4784-9fc1-1a8e21fa0a01 F 225 | 7b9521ef-988f-45eb-9c89-b8528a7debc1 T 226 | 7ba26ff7-d8cb-4d95-912c-38cd8223c983 F 227 | 7bc261a0-c7b8-11de-b279-bb42b9ac878e T 228 | 7c44411b-0296-4634-9538-0ae43b10a38a T 229 | 7d0795c4-a685-4ab1-9fd6-71737a87fe8c F 230 | 7dc0d8a3-57da-4090-a959-69a0aad10285 F 231 | 7dcd2156-75af-4772-bf1b-db606d534977 F 232 | 7e3daff4-428b-4951-8313-d7fb63831df9 F 233 | 7ec7c087-c3d7-48b7-98e7-8020ca203577 F 234 | 7f1dc982-02f3-4ae7-8fe8-d2cf6d079ccd F 235 | 7f513bfc-f762-11e1-a439-00145eb45e9a T 236 | 7fc558b6-19e3-4952-abba-cb8f5f422376 F 237 | 7fe3eb5c-42bd-49d7-a30b-82c353ef6575 F 238 | 7ff7a20f-f3b8-4a25-9057-069ae3417c49 T 239 | 80493a5a-d8d8-4ddc-a38a-d32c5bbabc5f F 240 | 80576d53-992a-41f3-a3e3-dfd0fa0242ee T 241 | 806df471-8b78-4166-a186-ba07c7c957ec T 242 | 80739819-f715-4bc6-9075-d3d9574561e9 F 243 | 820177a0-2e4c-11de-909a-b8a03c50a862 T 244 | 827a08ae-f762-11e1-a439-00145eb45e9a T 245 | 82ab0954-f762-11e1-a439-00145eb45e9a T 246 | 82cb293c-f762-11e1-a439-00145eb45e9a T 247 | 82d21102-f762-11e1-a439-00145eb45e9a F 248 | 82d32d8a-f762-11e1-a439-00145eb45e9a F 249 | 8325e0d4-f762-11e1-a439-00145eb45e9a T 250 | 8442816f-cf5f-4c21-9d3d-5c368b4006d8 T 251 | 84606c08-f762-11e1-a439-00145eb45e9a T 252 | 846296d6-f762-11e1-a439-00145eb45e9a T 253 | 84a649ce-ff81-420d-9c41-aa1de59e3766 T 254 | 84cca7ba-f762-11e1-a439-00145eb45e9a T 255 | 84d26682-f762-11e1-a439-00145eb45e9a T 256 | 852fd378-2356-4b27-be1b-e7c05aa3ed36 F 257 | 85e79b82-f762-11e1-a439-00145eb45e9a T 258 | 85eb6fc8-8e95-4621-9fe0-661d483ac614 T 259 | 85f86f01-b572-467e-98c4-6ca2027be5f0 T 260 | 8600a36d-b72d-4612-bfa0-b32aa08e424d F 261 | 864eb98e-f762-11e1-a439-00145eb45e9a T 262 | 867b3fb8-f762-11e1-a439-00145eb45e9a T 263 | 8795ab31-79ae-4a9c-b285-0b76e5c09b9e F 264 | 87b6acb8-4db2-4769-93ca-e086040dfe14 T 265 | 8806aad1-215d-47bb-868b-02ea8fbffde4 F 266 | 884cb99c-95a0-4dac-94dd-d57dc509c560 T 267 | 885a0b69-3f54-4d79-aabf-cc68df89533d F 268 | 88cc32da-a6c3-4a54-9a8d-ffbc3f749706 F 269 | 892362cc-9d86-492d-9fc8-c06541bd8714 F 270 | 8a309be4-939e-4874-a6ba-6ebee4ebeaac T 271 | 8a397061-e613-46a8-bcbd-52195a7695b4 F 272 | 8ac24ec8-1e7b-40a9-a5b4-c8e3037a3431 F 273 | 8afb033d-f4dc-4a7a-86ad-953cf62d2870 F 274 | 8b708d1a-3021-4cfd-aa17-f9696317292b T 275 | 8b97e0ae-c803-4d24-8098-74874113c344 F 276 | 8bad411c-c00d-415f-a11c-3e004fd15656 F 277 | 8c3e7db0-5c1a-49df-af56-9cb84d8511ff F 278 | 8e4e5830-2f87-47d7-a265-3411f172a640 F 279 | 8ea4250e-0ff0-44f8-812e-bffc3b9ba2a4 F 280 | 8ec254cc-7da8-4055-b3f0-ded781a43522 F 281 | 8fe7fc31-775f-444b-b49e-2d59f62400e3 F 282 | 8feb9acd-cd5e-4ea8-9e60-da1a585a7d53 F 283 | 9223ae40-6d6f-458f-83f8-8d58300f5ebf F 284 | 93f5be1b-c517-4a76-8cbe-6e81a5dd4f85 T 285 | 94d8f41f-7204-4a89-82ae-e337710dd543 T 286 | 95db4db8-f762-11e1-a439-00145eb45e9a T 287 | 95f572da-a3a9-4706-bccb-03521079a520 T 288 | 96c6e82c-f762-11e1-a439-00145eb45e9a T 289 | 96d36be8-f65b-4bce-ac40-05ad8cadc9a1 F 290 | 97e0aad8-f746-4925-be9f-c9961ec8d3f2 F 291 | 97fdda72-823d-4109-b9d0-1e3d62e567c2 F 292 | 981e18cc-84bd-40e1-8f92-33b7330b3260 F 293 | 98e0629a-678b-4bec-98ef-fb097262bf18 T 294 | 9a0983a1-55dc-4492-bd6c-4085a36dcddc F 295 | 9a10be01-3d0c-4f6e-9811-0627bd7a210d F 296 | 9ac8503c-5163-4910-a4e4-c17887995c59 F 297 | 9af2981b-cc16-4c0c-86ae-ce04c279dbc6 F 298 | 9b5720a2-99f7-4e29-86c8-c028e67a86f2 F 299 | 9cbcf2b3-448f-48ce-9510-39e25c6234a8 T 300 | 9cc95cb1-d97a-4c35-9d0e-5de8a6e2f134 T 301 | 9ce80b03-2e8d-4c69-b842-2702ef5aae59 F 302 | 9db7cf84-85b0-4fe9-b3a3-2bf622ad4fcc T 303 | 9dbb55ce-0b38-4468-9172-40c8481edd2a T 304 | 9e77576d-8547-44ea-932d-ff4eeda93c60 F 305 | 9f113fae-1666-4bdc-99af-4d0c94beb936 F 306 | 9f1c80ed-aa0f-4c7c-8679-6a31467d8fe4 T 307 | a0998d3b-4a7f-4add-8044-299092d9c63f F 308 | a17b720f-8e07-47a8-903f-361d4a348199 F 309 | a1bd8c5d-cfb3-4d55-943a-cebcd6c32be6 T 310 | a1d63dd1-5456-450c-afc8-25a62ffec50e F 311 | a1e87da3-fe4b-44a3-a1d4-23923aab526d F 312 | a3d5f6ba-034e-4167-bd8a-1fb80018ae61 T 313 | a416dc00-2e4b-11de-9099-b8a03c50a862 T 314 | a451e2eb-f70f-49c4-b1ca-fe78e65bacbe F 315 | a48c2e76-4984-4540-9bf6-f4d579ed7d99 F 316 | a4c3fc76-dafa-4348-9a0f-2efd8b33c2f5 F 317 | a5e2a5e4-c8c7-47f5-9f8a-1b7716dc3cd2 F 318 | a621b3ba-8415-41f2-a4af-7ec9511ae868 F 319 | a72ac6ef-6f55-4b17-a1d1-b213d5bb8977 F 320 | a7c1e188-5a04-47fb-8fe1-6a3bcbb1b98c F 321 | a814f323-ccc0-44b7-b188-4c155867deb7 F 322 | a8702089-c388-42ea-aea8-8c0c1ba19193 T 323 | a8742f30-c7b8-11de-b279-ebdd43e85d4a T 324 | a8d08280-1def-11de-be11-b8a03c50a862 T 325 | aaebba56-769a-4564-9b66-a7f4b566e25b T 326 | aafc612c-53f0-4b74-ac06-5c5b96fcb342 F 327 | ab2684cf-62e5-4f87-b57b-6cd68b048606 F 328 | abd43557-5afd-448a-86cb-8502b368d51b F 329 | abf09c39-7e71-4d6c-9787-052c2d3e2842 F 330 | ac1a6843-ac40-4073-bfb1-8ffaa4b73b66 T 331 | ac4b4e0a-3ed5-4162-a040-1315e173f7e7 F 332 | ac9ccbe6-263f-41a8-a865-7979712bb2b4 F 333 | acb08fed-85f6-482e-a8a5-f04b253ddfb8 F 334 | ad2e51d0-1e04-11de-be11-b8a03c50a862 T 335 | ae1de9ab-5578-47ef-93ae-f9df824930d3 F 336 | ae8278d8-b34c-43ad-80cd-127997c3dd68 F 337 | aea541ad-d8fa-42df-b5ed-ac0f26c977a7 F 338 | afa36eb1-34d3-4acf-bf5b-ace515ea0f74 F 339 | b01bffc2-67b0-4441-806d-65c2cff690b5 F 340 | b0b35c6e-54c2-4440-a3f3-5800edaf6458 F 341 | b0dacaa0-6511-4d21-9d55-129bc400831d T 342 | b10b4c57-e186-4699-8e1c-0350c4993972 F 343 | b124e1e0-4755-430f-9eab-894f25a9b59c T 344 | b2334c74-b73f-4cfb-b088-683119fa3c6e F 345 | b4e21aec-30ff-4867-8503-2bf8de3a4fe7 T 346 | b52cb0c3-c024-46c4-b7b2-d635400ff6d1 F 347 | b5345830-c7b8-11de-b279-99d156ba6477 T 348 | b6242377-797c-4f9b-b60a-25626bceb089 F 349 | b6ca9b0b-b6eb-46c5-b623-cd479a71e566 F 350 | b70121ef-b7ea-4316-a05b-abdf30f5ca09 T 351 | b76c7793-e18b-4825-9232-a20eff625cc2 F 352 | b8bf1fd9-e2d3-444a-a0b3-bc02794d961d F 353 | baf21ac6-5381-47ca-822e-22cbecf600dc F 354 | bb72c976-72e6-4870-8fd1-0b2305e2b7c0 F 355 | bbcdfbfc-d1d5-4ba1-a56f-bc4c96150a2f F 356 | bc1bc7d8-8f63-46ae-bdae-f34d8f3d3c75 T 357 | bc3ddea6-79c3-4c77-a962-62e9cbeed61d F 358 | bc94931c-4197-42bd-9cac-32c09e3959f3 T 359 | bd7a358b-f7af-416a-be49-c1e55a9fbf46 F 360 | bd92e852-0fec-4679-82c2-b7e4ba66c48e T 361 | be2af664-2990-4153-99b5-d92bbd8cdb0e T 362 | be7ebcf3-6053-4e00-9c07-989cf2d56b2d T 363 | bf1feaf4-b23d-4529-9c47-ac6176c70afe F 364 | bf7d3b45-f000-499d-8dc6-e0d8f4c0d5e2 F 365 | c0a19d95-471c-44d6-956c-df9b1db6995f F 366 | c10dfc42-db10-49e9-9b32-2fcaabbecb92 T 367 | c1ae1c90-d21e-49d9-b5a8-6605c07c8364 F 368 | c28743cc-fcaa-4e80-8c42-ece6a20dea70 F 369 | c2e643bb-cf05-4089-b6fe-b3389b5d1c44 T 370 | c35d6000-c7b8-11de-b279-9fe72ba677dc T 371 | c4a243a3-cc76-4e84-9f28-8a3c919bf0f4 F 372 | c4d742af-8f18-4f71-90ea-91f1645b9b28 F 373 | c5645d41-0978-4f99-89f5-317351335fef T 374 | c585e6fb-fd76-426e-ae01-a32dc9de5689 T 375 | c67decad-0b17-4383-9f12-1a87f6a6a6ca F 376 | c6bbb6ef-ad16-4f3c-99e2-f693760173e0 T 377 | c779b049-28f3-4daf-bbf4-0a40830819b6 T 378 | c8378949-f3f6-48b8-99ec-b646807deb59 F 379 | c84310f0-a629-47ff-96b5-51373acdfdcc T 380 | c847fb48-d604-4a6e-91c9-4df147f43e14 F 381 | c8a26d4e-b5a5-4dd9-9fff-7cd31779be93 F 382 | c93574ce-39ec-4d73-bd9b-61ab85ae6b7c F 383 | ca435754-9190-4a87-9b2c-c21563898294 T 384 | cb1a521c-29f7-4751-8661-898d16d0caa6 F 385 | cb6e66f1-3056-404d-a341-bb856762c57c T 386 | cb7c2913-e387-4fbe-9e57-a2f24ed3d88c F 387 | cbc9dc02-6315-4daf-add2-d132ace4662c T 388 | cbe4cdda-2045-415c-8968-039e389b7fc7 F 389 | cc488b41-4e98-4dff-ae42-b44fa31bd442 F 390 | cd309a69-421b-4317-a61e-483333be3cb9 F 391 | cd67c18a-98e2-4b8b-ad07-358b02b7812c F 392 | ce2583cc-da9e-440f-8811-44a1f8230484 T 393 | ce58b7bb-fed6-46b9-9424-135499c7afdf F 394 | ce6c8d6e-f929-4afc-b860-57f8e3ceb7d1 F 395 | ceac3273-94ef-4793-9789-fb8154d43436 F 396 | cfbc0f8c-e6c6-4634-b012-9bf20c22e50d T 397 | cff41e2c-4f25-4d30-93cc-bb61133e0937 F 398 | d056f586-b5a0-494c-990b-854ddfe18f77 F 399 | d0df772d-78f4-4602-acf2-7d768798f632 F 400 | d114f791-b86a-4f26-9356-17b0645f41b3 F 401 | d303d758-eb02-4d96-9a74-bbed85bf4a73 F 402 | d3484430-8876-11dd-986c-b8a03c50a862 T 403 | d435e038-1f0a-4007-9429-4e55e71428e7 T 404 | d4b3cce1-4594-47bb-b1e2-1ace65458a11 F 405 | d5314de0-8871-4edd-9853-55d77eb09c79 F 406 | d546af34-1ce3-4ceb-a535-f88692357f2f F 407 | d59a4e92-9528-49a9-9b9e-ae776543339b F 408 | d59ae05c-bf84-44aa-9753-d4acf40a9bb1 F 409 | d6cc1310-7aba-4c80-90ec-45cb395592cf F 410 | d7688dd5-9cd0-4de7-8873-e76b709ee082 F 411 | d8144310-c7b8-11de-b279-97e3db606fab T 412 | d882e391-10f9-43f6-b0f9-29636396ba9e F 413 | d8d0c8e6-acc5-43cc-a694-1d69b610bc5b F 414 | d922b606-6c94-4d51-9277-36c9b03872a7 F 415 | d986d84a-d0cb-4739-bb48-021457ef7317 F 416 | da39fadf-94c9-4d9d-ba65-64750b58cfe7 F 417 | db09684b-0fd1-431e-b5fa-4c1532fbdb14 T 418 | db0a80b1-f4cc-4a48-a079-f293acb5a043 T 419 | db3c013f-00d0-4146-be65-86e89cbf1436 F 420 | db9efd51-4f8b-4e58-b2a8-875a151ee00e F 421 | dbf97ebc-f931-4b99-8b3b-6fb1375b21bb F 422 | dc9b4f81-f75e-44e8-bb19-845e665ccb81 F 423 | dce0d7ca-50b5-4d61-9a0f-dad4b55fb94d F 424 | dd223a19-c7f6-4847-9422-614c6e8bc623 F 425 | de8221ff-6110-46e3-aec4-b61133f5b38c T 426 | dedb8f2b-4641-44f2-bafb-057f9fe70ce0 F 427 | dee8edc4-b19a-11e2-886d-00145eb45e9a T 428 | def80289-b974-48d3-8ff1-7e91c82b5bfd F 429 | dfb05b7b-e477-4d0f-861b-043df5e2fd7f F 430 | e1966d34-2144-4a07-a6d4-b246d6f047aa F 431 | e1e134b1-491d-457c-9ae1-319ca933574c F 432 | e21fcd92-b7a4-42fb-9541-af2e85d490dc F 433 | e27843f7-0ba7-4730-a70e-293b9bb84e17 F 434 | e2929e2d-e162-4767-b53e-13d839d36d1a T 435 | e36d0997-2f51-4718-b684-16ec092ecd82 F 436 | e381b970-9b62-4664-a249-6023b6fa8ef9 F 437 | e4ed7202-d694-4912-ba7f-93a206a249c7 F 438 | e5386a70-12e7-4dfe-a307-dbd547e167fb F 439 | e5464dc8-3361-476f-b415-d4a3a7dda8e7 F 440 | e58a3de7-a22c-4fd6-92d9-119343547474 F 441 | e61f1cc7-d7cd-4884-a4b8-b41de46d101c F 442 | e6c97f6e-e952-11e2-961f-00145eb45e9a T 443 | e728ed76-9424-48e9-8955-82b23ed91e46 F 444 | e89f4cfd-5f73-4179-aefb-e33313d89212 F 445 | e99c1b37-f7b7-4316-9749-fc9d4f3dfc0b F 446 | e9b412fa-98fc-4ec3-a0d4-c837eaf683ee F 447 | ebdc9eed-2673-43fc-a4e9-f80de9a68bcb T 448 | ec4038fb-cb53-4ddb-96a3-f1c395f97584 F 449 | ed80a247-379f-4286-a348-63da32c9c047 F 450 | ed923320-2e4b-11de-9099-b8a03c50a862 T 451 | edf03db8-68cf-48ac-a545-15f89c88e113 F 452 | ee3e4241-ae07-4e5d-9966-d432ec0cda79 F 453 | eea3ca22-7cf1-42b8-9381-20defc2f109e F 454 | eeb765d6-8e53-40d7-abe0-bd6c1df83943 F 455 | f0268e26-de5f-4ef9-b62f-1851e6a4370d F 456 | f090e47d-7e22-4b83-abae-293786ee320d F 457 | f0f8e840-1df9-11de-be11-b8a03c50a862 T 458 | f187d1b1-12ba-4a6f-87cc-89537d3f768d F 459 | f1e99a55-3278-4279-bf54-68e8b84dfc1c F 460 | f273f6f6-5b88-4110-bd13-6996014e1e34 F 461 | f2823a82-923b-43c6-ae62-dbfef95652c5 F 462 | f38d87cb-8acc-491a-9a98-b7346eb331b0 F 463 | f4c99a6b-4993-4bfb-acbc-952315842215 F 464 | f4ecc84b-b746-48b2-81d7-119c35fa7cda F 465 | f5ff3e05-fdd3-4ef8-8c0b-490965019feb F 466 | f648085b-8e1c-4a79-bc8d-f45d36296564 T 467 | f67d4560-51fa-4443-9ce3-acced5515eec F 468 | f74dfed8-d09b-4b18-8443-c20754410e16 F 469 | f7598557-adb5-4827-9c28-8025da802dc4 F 470 | f794b231-42de-4008-ba8e-809e01ee7785 T 471 | f7c30fac-cf80-471f-8343-4ec5d8594661 F 472 | f885c8c7-8312-4e4d-a9f4-7e5b30a0a2d1 F 473 | f885d3e2-0310-43f5-a902-50dda62e6a68 F 474 | f8ceb4e6-82ff-4325-afc8-eb5e64b5f842 F 475 | f8f683a8-ab6c-4c4b-a6ec-53b23d5404d0 F 476 | f933bc72-4600-4201-8454-c85cf0c48545 F 477 | f99a4d39-ac4b-4c79-baa8-1f29c6e6916a F 478 | fade3e7a-82c5-4652-b482-03dbd1510b18 F 479 | faf313a1-9ae4-43f4-bfc9-974281feac0e T 480 | fb7d56ee-11aa-454f-8542-89e2ce0fa976 T 481 | fc5d68a6-b511-4ccf-bdeb-84c209a4c824 F 482 | fd1a7c99-f7b8-4887-b36c-5bdbd13eae3b T 483 | fdf097eb-1576-47dd-9b79-1943be6e75ed T 484 | fdf17dd6-44fa-409c-be7f-8a6a86c3cb76 F 485 | fe0fc5ec-e771-4afa-ac08-2211e4f9ff12 F 486 | fe3265d6-8723-4753-800b-ca1850d66d3c F 487 | fe95dd88-c8dc-4e09-804e-c36dd22de761 T 488 | feb41318-374b-4ed6-b61e-0369993abedc T 489 | fed2ec12-dce0-4f72-8b25-c6c2fa64a0ef F 490 | ffc8f6a2-0bb4-45c5-a978-9064a52c5e02 T 491 | 2d1a33c3-f278-40d2-a662-e0d3369658e2 F 492 | df9dcbc9-f3d1-40e2-9011-02babb0d98f6 F 493 | 052596c5-d27d-4c4f-a211-64c0f54d58a1 F 494 | 46d4f1b8-52fd-478e-aad0-43a933994b61 F 495 | beefc0e5-520f-446c-8d7a-2e1c35394144 F 496 | 8918ab8e-2534-4a3f-8469-67407dd166c8 F 497 | ed5fc1c1-5a36-4ae0-a208-b7b89c8aacf7 F 498 | 3187332f-77d0-4d34-9840-f26bcdb98a82 F 499 | 2f0f6f6b-b7dc-4cf0-9ede-107627044722 F 500 | 7afcfc1d-b370-4b00-823c-e9308f1fb5ca F 501 | 34f447ee-6521-4e05-b7d5-f18540a9648d F 502 | 0b394791-e0c5-4f94-bc8e-bbfe5c32920d F 503 | 148cdfe1-c323-4108-9c2c-547c11279a2f F 504 | e5c9ca65-d152-478e-8593-b626a6802629 F 505 | d3bb4a63-7277-482c-b1e3-413301720130 F 506 | bf41b935-afd9-4ef5-8301-447841169eb2 F 507 | 4c13483d-a2ac-4c61-9087-c4e1a3c7b91d F 508 | efc3aee4-0153-4a6a-9815-4e1bb51f786a F 509 | 8a4c7a23-7732-4ba2-b51d-c2cc885ec44c F 510 | c1442cb5-7a32-415e-b459-f6719a01e733 F 511 | f73b742f-69a5-4bb4-9423-53a172d95073 F 512 | 3484c814-c0f1-453c-8501-50d4846c4ba6 F 513 | eeba8c4c-f059-4c3a-8f5c-38f1e8622bbb F 514 | 1570db97-7b8d-4f12-9c3b-10c731460ca6 F 515 | dbebf070-6954-404f-af12-d61be85a99c0 F 516 | 1c1c6cff-9943-4e9c-bb7b-0cbf5be39c21 F 517 | 1f285f71-26db-452a-a499-fb6657c4ed0e F 518 | c6cc2da8-8cae-4540-b6ad-5fb2373a537c F 519 | 6378806e-388e-4293-b7f4-775548da77b3 T 520 | f305217e-aed5-4b03-b0a1-7d69ff760d35 T 521 | 31f97742-c165-49b4-92ab-9a46b81e0af2 F 522 | f240817c-23f3-4a8f-b32e-97833663fd4d T 523 | 5c5e17e7-fbc1-4452-ac55-b8876e10e289 T 524 | 2ceb7556-fbaf-4a3e-97b2-34696a48a0c3 F 525 | 196e6026-9979-46fa-a260-95a91336d095 T 526 | ce861efd-402f-4c10-a9fb-30ad0426bd15 T 527 | 54facf62-bf0c-43e7-9972-4fdc407fa6c5 T 528 | 147834a9-fbff-4357-b8b8-e755c62df2ce F 529 | ee74ecf1-7acf-4c5d-8dcb-43e09fae474c T 530 | a542665f-1b92-406e-8d92-072fcba576b2 T 531 | bd8ee748-4ae8-4956-98d9-c1b8e8b2b9c9 T 532 | d7cc19ba-7785-4131-ab63-3328a99ecfab F 533 | 6de8fe45-fecc-4957-a2c1-9cd23879d215 F 534 | a7910107-b902-401c-989a-3b4e70c9070d F 535 | 77c3a178-a140-4a5d-87af-1f29e29f8330 T 536 | d91b392e-bb9d-4fb2-99d8-6053e1ade94f T 537 | cd6bb071-b4c6-4e7a-b328-da53662128b1 F 538 | 658ab0a2-69f3-4094-a2fe-3834bee063c8 F 539 | dbca12a3-a1d0-42bf-8535-c30aa349faa7 F 540 | 6dc35c87-ff97-4708-9e45-6f764ddc1ef3 T 541 | f145c27d-b1c3-4136-85b1-c21674d2d1c3 T 542 | ac8e5183-e233-4168-96a5-67541049aa67 F 543 | 9efc2821-3026-48be-8018-5a0f75f25706 F 544 | cf7d6c01-309b-4545-8319-3d53b1e8bfd0 T 545 | cca13f2c-0d2c-4c2f-93b9-4446c0cc1629 T 546 | f1a4ce9a-97cd-4d35-bf5e-561f2f1c6d91 T 547 | 37e094f3-dcf2-469f-93a2-c4b9b5fa7275 T 548 | f83746ee-4cf2-4e60-a720-dd508b559794 F 549 | c791b255-a184-4600-b828-ef9d4092a212 F 550 | 7e1d9b3b-a73c-465a-aa55-a70bfa3138f3 F 551 | 30eca5bd-6a4f-46de-9e6f-d6ede90850dc F 552 | 6baf2807-2cb4-48cd-aede-da0503b13003 F 553 | dc55fd8a-47af-47fe-a59e-a2890c72fed1 F 554 | 6e1af587-c4f3-4526-bedf-9ee5130755ee F 555 | c3ef6404-277e-4b8b-93a0-588e9eab643a F 556 | afac4a15-ae13-42b0-911a-d9ec445e2849 F 557 | ea53b031-75b8-47c7-8db9-4f59c58b000c F 558 | bc7c19ec-eefd-403b-931a-16a2ea1e71fc F 559 | 263bbfe7-f643-43bb-b448-f0705d1c0e18 F 560 | e58dbe2f-cfc0-40c2-a04d-e45a7e876980 T 561 | 7aff155c-1c15-44d8-adf3-4d2ecb842bb9 F 562 | b2fd674b-5eb0-41e9-8112-cd0f2c207812 F 563 | 22767367-9a4d-4eaf-90d2-baaa2d9befab F 564 | 4a781c12-de3c-46ee-81f4-c048e078b09b F 565 | ec3800df-fdcf-4925-a350-d7de3f1f9597 F 566 | 87a311d8-b1d4-4791-b791-adc7a84af774 F 567 | 8e45fbf0-d9c5-4527-9980-fa3188387549 F 568 | 98c27c36-6190-4196-8250-df29faaf1728 F 569 | 4e287c72-c3f3-492c-ba43-d0fe8a97bba1 F 570 | e82f2a08-095b-432c-980d-b10e9cd9370f F 571 | f3b40fa3-d59c-4149-96bd-68f9d1b8390c F 572 | f77c791c-51f8-49f1-ab01-294b7301fadd F 573 | c33ef967-201f-4bda-8ac7-8debe58fa656 F 574 | 5411820d-a4e0-4a7a-be0b-d73dde0db121 F 575 | 6297bb5f-e3f0-4fda-90a0-3be740029c5b F 576 | 29498dc2-56fa-4b9c-b216-a5b5297511b5 F 577 | 1f1c8776-ab46-40a2-85d7-97fb4d6391aa F 578 | 3c442521-4f04-479b-8bf8-443a751807ab F 579 | 0ac88789-10f8-4a09-9f08-675043501d50 F 580 | 65dc71be-6965-4c4f-85a9-92bd3bb52196 F 581 | a58ff47c-b609-49c7-982a-7f0194b13472 F 582 | c3f2d009-a95b-40dd-8f9b-bad1bc54e8e7 F 583 | 5299d81c-113a-4c24-8f56-8deccb1fa4e1 F 584 | 196d4689-8cfc-40e3-adf8-0e9b35c513c1 F 585 | a4f3eba8-d023-4762-88f8-fcc6e81ce74d F 586 | 15525510-6eeb-4e98-b07f-7dce28a90940 F 587 | 5a455e8a-51b2-4ecd-9247-cfd89f9cb768 F 588 | 4487d0be-178f-412f-b2fc-bb4aabcd867a F 589 | 7adf9745-c840-484d-b4b6-fdf097d96f77 F 590 | b185d466-1a87-41d0-8e75-55a1bbba190e F 591 | 02396af3-59b1-43ad-8a54-5029557825e5 F 592 | 90fda018-0a42-4bf1-b3fc-a37c5b3e4e80 F 593 | 05841fe7-d8f6-418f-ba1b-3091128d8ab6 F 594 | 33faa86f-70af-4d16-ab3d-838c0da61266 F 595 | aebd3dab-0a7c-4bd7-b08c-397a462cbd67 F 596 | 44481e05-53f7-41e6-b002-b70933dfa921 F 597 | 2087881f-6373-475e-81f8-100e32176c90 F 598 | be906189-e80e-4fbc-b675-af1834558503 F 599 | f5fd4e7a-c977-41b1-b77e-875ad588e5dd F 600 | bab19599-89cd-41ce-bbcb-01663cebd277 F 601 | f8b2c677-b40d-4c0b-9a5b-15c362d9e063 F 602 | f17d98e6-85ca-4f4c-8920-50c68166fcdb F 603 | f307cee9-2f0b-4b3d-8774-f86cc17e60c8 F 604 | b1c50b31-0ede-4a35-9030-1aae35a0c8d5 F 605 | e5e89b4d-bb7c-4dae-b1b4-924177e95c2b F 606 | e9c27954-85d1-4a87-bce4-1715bd606268 F 607 | 8335054b-97cf-4b0d-ab9f-e5fda37cc64d F 608 | 21a0cb45-f0ec-4901-a868-cd45abe74601 F 609 | 11cfbde1-bb23-4937-bc98-7aa83d61efe0 F 610 | 1ea32a69-06ec-48d4-9b00-6e954ceb7828 F 611 | e6a7eff1-f9fa-444e-b3f5-d6f3c4bfe1b2 F 612 | 233e1bdb-3141-4367-b364-1f88db6b65a1 F 613 | eb7681c5-5c9c-4e28-954f-f328991c7004 F 614 | 94386f28-e94a-4e67-8012-b656c6007f2b F 615 | 2d38cdfd-8d55-4154-8670-85c1bf9fa39e F 616 | 390dcccc-4ee1-4ee1-8607-3e4f584bcccb F 617 | 52d51b66-5ed6-4afe-a7b5-d272ca11c3c8 F 618 | c93e0fd0-46a4-48e7-bf68-fc52e2b5f2e9 F 619 | c1eb73a0-cc4b-4ffa-9a95-9a2332e48805 F 620 | 98faf566-a0db-4f30-a814-9f9c1125c594 F 621 | 20392dd8-d632-4e63-a189-59bbb270a153 F 622 | 7086bdef-e377-4ac5-872f-751b1d614ed2 F 623 | b50dfef4-3f34-4829-9d6e-edd5fcdb2b29 F 624 | 1e61b812-b2ec-43d0-bdbb-8534a761f74c F 625 | e0fa07ae-74d9-44b1-8e05-5785482ccad2 F 626 | 87f42a91-8dfd-44f1-aabb-8b6ef14aba82 F 627 | 0b94e07b-898e-47fc-b95f-31a5538f326b F 628 | 00ed18ea-4993-4809-a656-62363baa4808 F 629 | 848586a4-a07b-4974-9f12-e1bbe0736a21 F 630 | 74f92761-3a24-4d85-9bfb-00b1fee0119d F 631 | f86a681d-7db8-483b-819a-248def18b70a F 632 | dc3bfd08-0141-45ae-9603-91ac50cdc33b F 633 | 74f3bc4b-7b4a-47bb-b64e-71de754c3dac F 634 | 00b3dec8-a7f4-4aa2-b8d5-feb936368e64 F 635 | 552f282a-92a6-4f41-95ef-c6537026fbeb F 636 | 35fd702c-c615-4fb2-a7ed-5ab0e1f0946c F 637 | ea05dcbd-0d05-4785-9313-0ec68cb90426 F 638 | d0c7a825-fbcc-4ca3-8afd-178f4c458ebf F 639 | 038b6cf8-4c53-4bf3-961e-9fe6c57b813e F 640 | 1b9eaa10-8441-481c-b50a-391ba168a86f F 641 | 2abba580-96a7-4a7c-bef9-84764f755408 F 642 | 5e015b88-67d3-474b-8e96-a537c9ef0210 F 643 | 06a00852-f764-4fb8-80d4-ca51f0918459 F 644 | 52da118d-e74e-420a-9b6b-1a6d7bc3028d F 645 | cbe0a4a8-817a-4edd-840f-a46e35121e69 F 646 | c7525c7a-0815-4510-b0db-4150d271cda0 F 647 | fac1936d-3df3-4558-9abc-aef194482d82 F 648 | bad17466-7611-4b68-a80b-9b70e555add0 F 649 | 9ca7b5d0-d59d-412b-bf6d-6af74efc03a8 F 650 | 2747a081-2994-409d-963f-a746af097c3c F 651 | 0375fd40-83ee-439a-8ad9-62375d769ac9 F 652 | 11d35c65-edea-49c9-9be0-64cb1c4d384c F 653 | 99fd5638-cff4-460d-9436-f2cd658a2cde F 654 | 9baf254d-aac5-48a2-b1ad-a0672f4f5eea F 655 | 168d155d-681e-4a44-a4c0-47ef151ed434 F 656 | 30ceac7b-3452-4ab1-a2fd-c6460ac7d1d3 F 657 | c64d7750-ae5b-44ad-a02a-7d2f34c4f792 F 658 | 0ffe7e6a-f76c-4f25-858e-9f7ba49b1833 F 659 | 4c6575de-cac0-4bce-a71f-e7f73984717a F 660 | 930a3876-8c30-44c6-b9cb-90e554872234 F 661 | d1df9105-e008-4f51-8506-3ade0f3f9a28 F 662 | dbeb51aa-7b79-429e-920f-edfabd3f64d0 F 663 | 561a0884-2a43-421c-bf74-15f7563842dd F 664 | aff9a2e1-24a0-4e32-9a90-d61c98be0ef3 F 665 | 68de352d-2c3f-4822-8734-b74d67721aa6 F 666 | 8a694351-76df-4a9d-b52a-b02d12237f3a F 667 | d0185a04-e83e-47b6-8f91-eb36b86dac1d F 668 | 042f54ba-58de-454d-a9f0-ee548e2fe438 F 669 | b99d0a5a-c490-4840-a786-aa27b05cc49b F 670 | 4a70d544-30a9-4eb7-9461-f31a84ccb32c F 671 | 8b640f68-7f07-4bf6-82f1-99d9f82e67fa F 672 | c02e6c8a-192f-4673-8db6-2f861e22005b F 673 | 574d0235-1dfb-4f76-aa1c-26198d9d6e25 F 674 | c6282a20-b4a3-4ecb-ba98-0ab59ebeb509 F 675 | b33ce172-d55a-449f-ae8c-63bd94ebee63 F 676 | e1ae2cbd-3eb0-48a1-aef6-014d677e0829 F 677 | 43325c51-e94d-4ccf-9a17-f61f2fcdee6b F 678 | 90a9d9f3-e252-4753-80ab-334c98f0c485 F 679 | 566f6340-eb57-4b67-9f61-3f296ee1c2c1 F 680 | ed524224-4cd7-4310-8f46-e434ae8d3b32 F 681 | 2e82f997-f629-4316-a2b6-c4e5d238146c F 682 | ba395ddc-c1fe-43e6-988f-f841cbcda0eb F 683 | 02ab589f-d822-44c5-9dc4-cca38e8e9823 F 684 | 038cb34c-a08e-48aa-b09f-e8b386127ec1 F 685 | ca415041-8434-45ce-b6d3-c6b0683a40b5 F 686 | 2e42b3dc-267f-444e-a917-080c254fec5d F 687 | 531c20a7-cd40-4a32-9d54-880a1769862f F 688 | 220f93b6-7807-4641-aaaf-12cf27f908d5 F 689 | 5841a257-8f7d-433f-8882-5a10e10fd7e3 F 690 | 3b7c6871-7f02-4990-a684-c43800ce373d F 691 | 36714b50-dc56-455a-9ba7-9560e082d653 F 692 | 0bd97889-05e6-4391-8e28-ae718fecb68e F 693 | 8bcb10e8-12f6-4fd7-8884-90690ea1b9a0 F 694 | 5e881b29-46a1-4787-8c79-b0591bf2ea0f F 695 | a5857806-ee41-417e-b7dc-2152d8e61c7b F 696 | 9219fcf2-33bd-486b-adaf-86ecad701309 F 697 | ca8c4575-a763-4271-9376-c962981d402a F 698 | eae096ac-c585-4234-99cb-b32e7c517604 F 699 | 5f8cf94e-ba98-463a-b94e-dc9df01f02c0 F 700 | 98ba9f24-4c60-47ea-9ce2-22402252f10c F 701 | b5b2612e-4ab1-4797-a827-158b56d8c5c8 F 702 | c9147f19-8032-48df-8dfc-ee9571956634 F 703 | 4d3b4683-a3b0-4145-9315-78a99af7ec5f F 704 | 32c59955-2e73-4b5c-817a-70ce525f9b7c F 705 | 6ba74bc9-526c-4039-a21a-f5ac80dead8f F 706 | 57251ff4-716e-4457-b50b-63e90c6ce708 F 707 | 0001480b-76ca-4f30-86bc-f4292481554b F 708 | 2eff1933-b05e-4bef-ba69-7630ae2b4f1a F 709 | 78fbe592-7ce6-4778-b5d6-572687de652a F 710 | a1c11083-604f-4f66-b20f-e1a6fd967e10 F 711 | e1016685-f922-457e-99ef-778e5c68d4a5 F 712 | cb7b533c-104f-4cd9-af7e-2007f7d5ece8 F 713 | e2620494-d46d-4b0a-8fc2-4fcf860e622d F 714 | 20900294-85ce-4e0b-aa56-d9a9494bf189 F 715 | d18c9825-fe6d-4f9c-a3e6-996b13a34807 F 716 | ee57ec5e-1fcc-437a-9783-fd58a2ee73d7 F 717 | 5a266987-02a1-4444-97bb-a63dea1a0081 F 718 | 59ffe691-64ad-4472-a46f-6b31ae34d412 F 719 | 1199003d-8b75-410d-a843-7ff64b21e330 F 720 | ae7a982f-a42b-4524-8c82-7c4f3bb35a46 F 721 | 442d5fde-1001-41bc-bc4d-3559266cd01a F 722 | 0d6fa0fb-a1ab-422a-bdad-dd880651f40e F 723 | 93554762-c2cf-4ee4-a475-7d1d4dc01372 F 724 | a05ca6ce-701e-4518-8412-a85427fdf5f6 F 725 | 157e7669-3074-4446-bc67-8829d483d25b F 726 | cbe7e37b-3905-4479-9c89-de4eb6ece0bf F 727 | 05f109ca-e39a-44e8-8774-85ba6d0caa8a F 728 | e67c686b-c880-4a82-887c-d8d85f94d6c8 F 729 | e030b8bc-bd4d-440e-80d8-12b764e85db5 F 730 | 26b38a0b-adb7-4b2b-a55b-ad632b5a7777 F 731 | a40a3f1d-8248-491f-a7c4-757d8a47ec9d F 732 | 88cd034f-34a5-4a52-990d-78e085832ece F 733 | 11a962e1-9109-4f7a-a56f-6783de1dc8cc F 734 | 827843c0-e60b-4283-b79f-6fa88482736d F 735 | 8a2ba058-6ca8-4f8a-a54a-b79bb225be66 F 736 | 25852bb3-3f94-48ee-9be5-f86544133813 F 737 | 100e2658-58f3-49d7-b15e-0da98d2b6bd9 F 738 | c151307b-d8f4-49e3-878d-ea3ab8f3bd65 F 739 | bd702991-e1e7-4770-b8ba-aecd8bdf9e63 F 740 | 30a29818-d6d2-4116-b59b-1facb9276526 F 741 | 396f6ea3-e250-40b9-9810-cdf2ab2f0f79 F 742 | d271ae38-a8eb-46d5-875a-2a851818785d F 743 | 351c9d13-5c04-4cd0-99ed-18e98327b955 F 744 | 2e0e2d1e-4903-40f8-9815-16a03de3955f F 745 | e86233df-a477-4776-8e30-6c541fb41e5f F 746 | 6461be2b-49e4-4a4e-a0de-a90eae602e78 F 747 | 6b03c5e7-05fe-497b-8156-6d564ecdbdaf F 748 | 9c30b6b3-177b-49f4-bfeb-91e1da24ecd4 F 749 | cf8c1737-4702-4a16-93c9-e3da9ccece7a F 750 | 57b39cb1-45e9-4444-9eb3-9c489e94d63e T 751 | 2dd51d10-85bb-4e3e-82b4-1dd0e8869960 F 752 | f1eda48e-f4b2-45ea-828e-eca8e3997220 F 753 | b33b7703-f922-494f-bc34-167ba41bf5eb F 754 | 5dac2d5c-90fd-4c24-85e1-ee7543174ea9 F 755 | 0c8b3534-7666-44bb-9dc0-913834eb04f0 T 756 | 90b6055a-c12b-4c91-951a-7efbe184bca1 F 757 | c5126b60-6dc3-428b-b479-15a921199da1 F 758 | 11c00ed9-1c1b-47d1-9eb4-66a491c7d7b3 F 759 | 966b3e29-f1e4-4190-badd-100bc989788f F 760 | 3294d36a-987c-4dcb-8ecf-bd2082796f08 F 761 | 38fdd8b7-4e48-452c-8c05-673e6f6829af F 762 | acd76923-54da-4799-b4b0-cfe585c2c0b8 F 763 | d88ff1b1-8ae9-4211-93ee-5c5931f164f2 F 764 | d7bb3e9a-bf7a-44b1-81ca-72896a6d3b9c F 765 | 3805d974-905b-428e-be7b-a12aa1165f41 F 766 | 4b6d4f6c-e5d7-47bc-acaf-f507fb8a4077 F 767 | 01d9098d-7dec-4235-b783-c537478cd4c2 F 768 | 29ce88ff-5cf1-47fd-af44-8477c52c910c F 769 | 2c7c72b2-ac29-4be3-a95f-6b09e8f4b932 F 770 | 29e78377-34c3-4c91-8062-550069a92b70 F 771 | 3a229d67-06bd-4313-883c-0ca1094efab4 F 772 | 4b99d5d8-5d62-4f17-89cc-b1bd3bc242ec F 773 | aba8cc4c-fba5-44f1-be2a-60c3592dc105 F 774 | 1f7fa6e8-2e65-45ba-bc13-9ab6c707d13b F 775 | a65e824b-9189-491e-8e7d-8473570d1905 F 776 | 54853adf-7939-4b1d-8507-893c76cabe2e F 777 | a9aec242-8efb-4d5b-ad26-4dd4db12b754 F 778 | 9f5db154-686c-46d0-8b94-4ad4b528b741 F 779 | c1aaedc7-b5ba-4baf-9112-ec70637e242e F 780 | 165af79d-00c9-434c-8882-387d232924cf F 781 | 84e57e73-164d-4ec2-94c5-db2c0e481804 F 782 | dbe7a6d3-b852-4d0d-a91a-caf495556d3a F 783 | 1ee2f194-4c11-4212-a77a-28a4cfcd5c50 F 784 | 56a5cb72-db94-42f5-9f71-de81e56588b7 F 785 | 439e017b-c952-490b-9b5a-1e190e55bb15 F 786 | b87b2913-b9ad-4ec5-8272-0978994acce4 F 787 | 437fa56c-e7bb-4534-93c5-8af0bebd5d72 F 788 | 5108c281-71ad-4ad5-bbd8-160272dcd784 F 789 | a3d3fb48-90f0-442a-bd1a-d0876fc7972d F 790 | c293d351-7a9b-4963-b81b-b8dfa96fae1b F 791 | fe17fe38-29d7-48ec-a4f6-bda16a39679f F 792 | 8073bd4c-cf6e-42ad-979c-dc8b3ba1d422 F 793 | 6cb824fd-7c2e-491b-85c9-156f1c4f9b29 F 794 | 83c5db8e-ade3-4269-9f33-00f85d90355a F 795 | 3a6ab9d2-a0e2-4c16-8040-2e1ad912547c F 796 | f3a952cc-b6f4-4e2b-91f7-e2261377c0e8 F 797 | c497c84a-ba5e-439d-8932-b23df520e529 F 798 | f8d03ba0-f81b-482c-a7d0-bd3254e9e7e7 F 799 | c053c6ad-e590-4c69-a141-5fd9c8ebbe3c F 800 | 2a97ee62-de89-4c4e-8151-0cb2ec98ae3c F 801 | 076bbcbe-6c76-4c7c-8835-6dd346ef387a F 802 | dfde3cb2-99a6-40a3-a333-900ddcb17807 F 803 | 2d633622-d522-4c0d-93ae-a3940df3bbe7 F 804 | b9d90a50-3138-4837-b4b1-775f3526889c F 805 | 0c9234fb-f87c-4608-8ec8-0cc8a1f93079 F 806 | 04257f52-91ed-4851-905d-d5887465d363 F 807 | b3614f42-8f96-49b5-9659-4c421a983f02 F 808 | e62db8c8-4ae7-4bbb-9401-97b279cbf126 F 809 | c67df85a-9a3d-4b93-9625-d1f4b1e2d6f2 F 810 | 5c02bff2-9680-4df7-b543-3682b60e6b76 F 811 | fa90f202-2921-4dea-a16c-ba9f602e8ffd F 812 | 2f1f9ef5-07bf-4ddb-931a-930f251de793 F 813 | f31212ae-87f5-45e3-8c03-87a0cc5cb86f F 814 | 7c1373e6-36e2-486f-9e09-ef36cd12c6ed F 815 | ccddb599-33cb-4206-bbb9-d8dc641c70cd F 816 | 20e8d7fb-9b3b-43c6-a81d-8ebcd6c0ab11 F 817 | 3063fd20-83ba-4716-9a5e-07fcea584f06 F 818 | e6712257-e922-4bc3-8b5e-e567a0458ad2 F 819 | f05e070e-f907-46af-9660-d8fc0a5aa910 F 820 | 4a65752a-525b-4fb3-ae0f-5348aaa1e6d8 F 821 | 8dc279ac-38a5-4ae2-bdc2-d759a8b15203 F 822 | 22bd6f42-72a2-4a4c-9d14-a97f7efd556c F 823 | e3913909-2457-468a-ab6f-79ea2cac3283 F 824 | a49c6fa2-7e0d-4350-945c-cec9635f3920 F 825 | a799fd82-0b29-44d9-92e0-9fd71abd9d2a F 826 | 24168265-33c2-4342-a6d5-fad89ecf877b F 827 | 19c94964-189f-45f2-905a-d23747ec9652 F 828 | b63a7193-0c48-4648-a728-086157f26373 F 829 | 6c4f18a9-1bb7-401b-902c-037d0ab6bb2a F 830 | 6018941d-c361-456d-a615-c3d34a033f6f F 831 | b08b5ce4-b157-4fa5-b904-afa1ac982e1a F 832 | 8812c25a-753a-4616-9676-8dd1cf04bae1 F 833 | 6ffe6db1-ffe7-4d89-ab02-230fee59b3be F 834 | 4560023d-c8cb-42d2-bfa5-2fa893f9175c F 835 | c81d08e2-e648-4ffc-aec9-35ca10e570bb F 836 | 3150a0db-309a-4d10-82bc-f54b99a57050 F 837 | becc070e-edd1-4415-b819-6ad037bbb5a4 F 838 | 0991f9af-8ff9-4e34-a8b9-990bb522dc0c F 839 | 011f5525-bc47-4418-8377-2edcd7eb883e F 840 | aa4f7405-273d-4a27-bac2-611416f1902b F 841 | 7dff2b43-64f8-41f4-b022-8c371a6aef3f F 842 | fa48fff5-f29b-4e2f-8b71-d3d4a5a03826 F 843 | 4965c6a0-338c-4b27-8cf0-523abe237fac F 844 | 2a6eaed1-cc05-49aa-9c43-12c491747cbb F 845 | 87530d66-1254-4141-8a6f-0fc282037470 F 846 | 4c41cd76-942e-481f-9999-42744ea7b7c8 F 847 | 44e8b95a-1850-4507-8fa0-a3a07f5dd936 F 848 | 2e37d484-5ab6-4ae2-9e48-104af0e7123e F 849 | b9f1eaa8-03d3-458c-bf3a-ae883cd06ca1 F 850 | 6c827f0d-9371-447b-b42b-199ad19bdb64 F 851 | fc33cdf1-dd24-4593-af7d-b9df93268f18 F 852 | e314f3fc-ffd0-4a43-baa0-73d7a271e5f9 F 853 | a1b562d2-1f3f-4d2b-aa2f-11e5e49d4898 F 854 | 27acb6cf-a30b-4cc9-8953-95435c7dc727 F 855 | e9ef15ac-1078-44c1-bc86-f1ecc4115a68 F 856 | 675442df-956e-4714-8e78-903a1060f15b F 857 | 0e65f3e6-a632-435f-b188-0001c2a4eeed F 858 | 69acd8f2-7a20-4dbe-bf84-8b607304529d F 859 | 667455a8-428b-408c-8398-1f21522ff890 F 860 | c81d43ba-3cf6-4016-9c74-321e07074f6c F 861 | 9622906c-39e1-4470-8e55-388d3fe76c21 F 862 | 985e6485-14c9-4555-9c63-bb1b79c29da7 F 863 | 956c26d2-d114-421f-bf02-e9088a805a74 F 864 | b0023a4f-de9c-4a46-a021-222ddcc93d21 F 865 | 0f8ef13d-dc6f-4f56-9815-313818910e9c F 866 | 7d4ae541-a54e-45a5-8dc2-faa8a7c1613b F 867 | 73c0bec5-417f-4d9c-8084-e80b027a1d61 F 868 | da27a3b0-5b28-4e4a-9742-dfc77a1e539f F 869 | e03c0e51-f68e-4467-9f20-e6437c422c17 F 870 | e8e2aa50-c651-4370-a0b7-46532ec48a5e F 871 | ad66f5d0-f485-4d27-905e-3328a0f951fe F 872 | 01981814-35ca-4637-bba8-90ab22f2b68e F 873 | f47b0035-fc20-4ac1-905a-e0b79a1383d7 F 874 | 10a9ffed-8062-454b-bd7e-9c36b4006728 F 875 | dbaba076-caa1-46b4-86b7-e7ae1dcb74b7 F 876 | c5da77b7-7423-49a4-a379-f423ea0f07b8 F 877 | c68d6fdc-f921-47f3-95c6-ca9766280ad4 F 878 | 69f0f5c2-08b9-11e3-ab1f-00145eb45e9a F 879 | 6a122346-08b9-11e3-ab1f-00145eb45e9a F 880 | 80ebe7f9-00ee-4fe3-80f6-303729cf5bac F 881 | 9fbac577-9e5f-41b2-9edb-c2ed1d2c7020 F 882 | 2f46fe6f-b9fd-4dba-8ca2-2cb1368ceed8 F 883 | 423d9318-4dd4-4d31-81cb-27778c44a3bc F 884 | f3e4b261-00c5-4f3a-a5b7-d66075b7f3e1 F 885 | 62ad511d-d298-4fd7-80e7-f5d5bd32299e F 886 | 0daed095-478a-4af6-abf5-18acb790fbb2 F 887 | f86f5466-8f18-4b83-84d4-d0edab4c0e90 F 888 | 1c403d2c-5c37-4237-9746-8ed539b9ba15 F 889 | 8c6aacda-2fd5-48a6-9b27-c673163ce2dd F 890 | e0ebf2a1-3656-468a-b0b6-1aa93ff43fef F 891 | 4b6f0569-9925-4a42-84a0-7d4ff5d13593 F 892 | b568aebb-b2c8-4dd2-8a59-ab74d50f8640 F 893 | da6a07ed-9eee-460d-9448-910f542c1a7b F 894 | 0214a6a7-898f-4ee8-b888-0be60ecde81f F 895 | ba0c03ab-fa61-4a3c-8db7-35c8c3454168 F 896 | e08f0546-0224-4b0d-886d-4ab05f0c87e6 F 897 | 06c7cc28-7705-4881-ae8c-37d373b988d2 F 898 | 990649d4-fc41-4823-8d4e-8e0a7939b591 F 899 | 995b2ca4-9b4d-4609-acd5-24d8297f0a4b F 900 | 646858f7-8620-4124-9405-279539aec76c F 901 | a837f452-43bc-4c6f-bb2d-db47b931df6d F 902 | def8d066-e920-42d6-bb67-dfca6980c7f3 F 903 | e553568e-2317-4592-8638-92e5ac93b40c F 904 | cb9beff3-a185-486f-975a-732251444158 F 905 | a578b33b-72f6-4833-a3eb-45d61f79f091 F 906 | b4a37621-7556-4705-9349-ede261feebe4 F 907 | 81275e16-f762-11e1-a439-00145eb45e9a F 908 | 81264d96-f762-11e1-a439-00145eb45e9a F 909 | 81251eee-f762-11e1-a439-00145eb45e9a F 910 | 8123fd5c-f762-11e1-a439-00145eb45e9a F 911 | 8122cb6c-f762-11e1-a439-00145eb45e9a F 912 | 8121aa20-f762-11e1-a439-00145eb45e9a F 913 | 81209a18-f762-11e1-a439-00145eb45e9a F 914 | 811f7d86-f762-11e1-a439-00145eb45e9a F 915 | 811e481c-f762-11e1-a439-00145eb45e9a F 916 | 811d24dc-f762-11e1-a439-00145eb45e9a F 917 | 811bed92-f762-11e1-a439-00145eb45e9a F 918 | 3b8c5ed8-b6c2-4264-ac52-a9d772d69e9f T 919 | 6a948a1c-7e23-4d99-b1c1-ec578d0d3159 T 920 | 7d28fe9c-190a-43ab-bdf8-a7235490bbf4 T 921 | c671d43f-7fb9-4b5a-a964-630bfbf47dd2 T 922 | 5e6b1784-7ade-4542-a61f-8adf9bfb0b7b T 923 | b2371835-ea93-4475-b0f9-e5274147863b F 924 | 3e0b024b-ea38-4e78-a97a-9c7544beeea3 F 925 | 0c7bd9e3-ded7-4de4-99ec-d5145361ff48 F 926 | c47f13c1-7427-45a0-9f12-237aad351040 F 927 | 39bd4817-9530-4341-a918-4f16b8db1419 F 928 | bcbfd319-8813-4b6d-b529-07dc5a6ccf56 F 929 | 8c3e2fd5-2c7a-4126-a5c0-b6f67c9911db F 930 | a968d7e9-5391-482d-8927-4b34b9863d56 F 931 | 85a3c886-3312-45c9-b040-4d7634653246 F 932 | f27b41cb-2f1c-4d1a-9b6a-ff64904356b9 F 933 | dfddad59-5bc5-4e35-8b35-334eed43bba9 F 934 | 6fa4f45d-c4e6-48d4-847b-afe42817bd84 F 935 | 6c9729f5-0d66-426d-ace0-5348ae2ce711 F 936 | 71efa00f-97b8-4dcd-a4b1-712b812d6c7d F 937 | 9770446a-f836-428d-990d-7f46c4d14df8 F 938 | 218522cf-b0ed-4b0e-8dd9-e543e3cd4951 F 939 | 7dd1dc3b-d3db-4a8f-8252-5f93c1b64231 F 940 | bd13138d-0173-4994-9893-2acbaa4683e7 F 941 | 04004ee7-4c21-47f7-b55d-267e6b8a1f76 F 942 | be41dbd4-e195-45d7-8d60-985f7eca716b F 943 | 3e8ead6f-7701-4afa-812b-000cb23481cd F 944 | eb1aa29c-65b1-432b-b66c-22e5fdcb52d4 F 945 | a20a695a-df93-4bd8-98dd-33e1a37e2f7f F 946 | f66a607a-6658-4855-aaa1-291bd31c6d82 F 947 | 265fb13a-515a-47f6-84da-88b294e88798 F 948 | f2c7dd7a-9199-498f-a0c1-7fb711598b51 F 949 | 9b12d595-11ea-4128-88ea-ed378eb9ea9a F 950 | 228491dd-bd53-4cde-a91e-378092cf90cf T 951 | 958b1d2f-2d11-4e94-a828-c8e2d2c013ca T 952 | 11a24c97-653b-4e4c-a524-a4c06ffab25f T 953 | f14f3ec9-c54a-477d-82c8-f7ba6925534a T 954 | 8a6ca920-216c-4731-a892-f5132570d9ad T 955 | 8ae09016-b819-450e-b8f6-c1f249110502 T 956 | 19671673-994b-4100-89ed-dc4cc18e0adc T 957 | 557146f2-4f90-43ad-a2eb-c4f9ace7a2df T 958 | 82e3cee7-c06b-40ae-ab30-b3c911a2b713 T 959 | d9870517-1d50-45f0-831c-a783922c22d3 T 960 | 6de6a7d6-95ce-40b8-a569-43b127a57bc5 T 961 | d89c884f-0661-4ceb-a531-ceae2374b915 T 962 | dc2d2c19-92b8-485d-802d-f7c8f1d2055f T 963 | 99ce2b9e-816a-47a7-ad68-b2915763a576 T 964 | c709bf36-4964-4771-90f0-c6ba4b351620 T 965 | 67f85ce5-4cfb-45ac-b05f-cac763d0b838 T 966 | d85e9305-e0e5-4e95-b7b0-e87a0488db85 T 967 | 869b574f-23eb-4575-a771-6ffd480dcdab T 968 | eabe9306-fd68-4140-83fc-554da92e6268 T 969 | d8c540f5-bb74-4748-9634-034ccd8ec52b T 970 | 8918109e-0d4a-4cc9-af7b-8e49d31df942 F 971 | d90f2b88-71d9-4cc4-bb15-880c4cac8be9 F 972 | 56c48715-9f8b-445a-98b7-cada12902027 F 973 | a6058a01-5880-41f5-9dbf-3eef8591f714 F 974 | bb30e03a-b746-49e4-bab9-decbf27abdf1 F 975 | d8fd5432-0cb3-4354-8be8-874f2c047f8d F 976 | 543c7a69-38bc-4506-9d20-aa10e4ae2876 F 977 | 0f670eab-9d21-4caf-91aa-d2cea1c22e69 F 978 | c13def4c-bc1d-4506-a654-8b4aadbfbd0b F 979 | 623cb501-8772-46a4-8035-ae41c1e6f934 F 980 | 2237b8cd-84b1-45d1-9941-bccdb277933b F 981 | 31cc8b55-0455-4faa-aa50-e135d1dcacf0 T 982 | 5d17a4fd-7249-46c0-a56b-1924d477b29e F 983 | a38c7d49-5a5d-4aa6-a64e-421178bd06d7 F 984 | b4a929e5-35bd-46d2-9ce7-e6ce5206eaec F 985 | f0c74a2c-4bd8-49d0-837a-92bb835fd2f3 T 986 | 983c4ccb-377d-412f-9eb8-6d96d0c620fa F 987 | db0fb078-4134-4dbd-893a-cf7749d33929 F 988 | 79a4574f-a80a-4bd7-9f61-a17c16ef606e F 989 | f04bedf8-9c2b-4816-a1fe-c68a1d607e06 F 990 | a4d333cb-3de4-495b-9156-41fe32c0d892 F 991 | ef7e0d45-c59f-4f48-afa3-53d9f545de56 T 992 | 241f9bd4-613e-4050-9fe0-2ff44b526c84 T 993 | 5fe8fc90-ed14-46c3-8bac-9dbd2d4b92d7 T 994 | 84149905-2fc7-4cc6-a9c8-f4186e0a62a8 T 995 | c01ddb0f-b7e4-4723-b630-7e0c2217230e T 996 | eda0e170-a39f-44fc-9c03-cce4f7417df7 T 997 | 594e1181-c38a-4f44-b78c-ceab50351cca T 998 | 212a4207-f330-49e7-8c84-51dccf3f1a8b F 999 | 7c67da7f-490e-4e8d-8848-7dc152dd4734 F 1000 | 71d46a30-f762-11e1-a439-00145eb45e9a F 1001 | ab47c4a9-4b6a-4d59-b846-899d9420f7d3 T 1002 | baa86fb2-7346-4507-a34f-44e4c1bd0d57 T 1003 | 2d680d46-d783-4ea7-94b1-2556cd653e36 T 1004 | cbd241aa-a115-4856-af66-fac5cb90f2cc T 1005 | 2aaf8ea9-0460-41d2-a651-3583479947c6 T 1006 | 172149d2-2dc0-43fb-a7b1-57e3e4ec34a2 T 1007 | 6b76dc19-eb87-4ca2-b738-3727286f1818 T 1008 | ed7d4c25-b00a-4b7f-b84b-1f5286bfba8f F 1009 | 465e33dd-59ec-4e55-b2c0-1edde45d757e T 1010 | a2e308bf-e9ec-4651-906e-956c963df0ca T 1011 | 990e845f-836b-4467-bc5f-46c8495259ec T 1012 | b480871a-6c2f-4c35-ab94-6de203c4ff28 T 1013 | ce1faba4-d8d9-46ac-ba26-cffe03eadc99 T 1014 | 120b7285-b1e6-4653-ab30-8d4231ccc9bc T 1015 | 4b79a668-7e87-4154-ba53-51ee79a76599 F -------------------------------------------------------------------------------- /find_citizen_science_datasets/wd_replace.txt: -------------------------------------------------------------------------------- 1 | word replacement 2 | de 3 | del 4 | con 5 | para 6 | que 7 | este 8 | el 9 | yearly 10 | e 11 | o 12 | netherlands 13 | ireland 14 | por 15 | como 16 | nl 17 | familia family 18 | clase class 19 | información information 20 | territoire territory 21 | brussels 22 | género genus 23 | validé validated 24 | m 25 | diferentes different 26 | différents different 27 | flemish 28 | south 29 | flanders 30 | organiz organis 31 | britain 32 | registros regist 33 | registro regist 34 | registre regist 35 | registration regist 36 | register regist 37 | registry regist 38 | permet allow 39 | réseau network 40 | más 41 | région region 42 | especies species 43 | especie species 44 | cada each 45 | una 46 | colombia 47 | los 48 | además 49 | grey 50 | dobservateur of observer 51 | projet project 52 | biodiversité biodiversity 53 | mouse 54 | mice 55 | etou 56 | flies fly 57 | butterfly 58 | dutch 59 | moth 60 | régionale regional 61 | bee 62 | dragonfly 63 | portugal 64 | mist 65 | pellet 66 | terme term 67 | lévolution the evolution 68 | développement development 69 | mycologists 70 | mycologist 71 | donc 72 | fait 73 | groupe group 74 | bénévole volunteer 75 | lobservatoire the observatory 76 | œuvre 77 | cf 78 | na 79 | nas 80 | coleção collection 81 | museu museum 82 | museo museum 83 | herbário herbarium 84 | herbier herbarium 85 | slug 86 | den 87 | muséum museum 88 | espécie species 89 | depositado deposited 90 | km 91 | humidity 92 | michael 93 | mieux better 94 | scientifique scientific 95 | transmise transmitted 96 | dinformation 97 | monitored monitor 98 | ligne line 99 | thousand 100 | notre our 101 | larva 102 | larvae 103 | école school 104 | escuela school 105 | voluntario volunteer 106 | voluntaria volunteer 107 | orthoptera 108 | aficionado amateur 109 | privado private 110 | privada private 111 | privé private 112 | privée private 113 | citoyen citizen 114 | citoyenne citizen 115 | ciudadano citizen 116 | ciudadana citizen 117 | shrew 118 | temporel temporal 119 | twice 120 | wintering 121 | pdf 122 | observador observer 123 | observadores observers 124 | trabajo de campo fieldwork 125 | bi 126 | herbario herbarium 127 | denmark 128 | danish 129 | spanish 130 | httpcreativecommonsorgpublicdomainzero 131 | localité locality 132 | parfois sometimes 133 | creative 134 | données data 135 | marine 136 | dacquisition of acquisition 137 | plusieurs several 138 | récoltées collected 139 | sagit 140 | jeu de donnée dataset 141 | donnée data 142 | espèce species 143 | publique public 144 | étudiant student 145 | calais 146 | débuté started 147 | atlas 148 | paticipative citizen 149 | live 150 | oiseaux birds 151 | investigación research 152 | universidad university 153 | université university 154 | winter 155 | http 156 | bat 157 | cadre -------------------------------------------------------------------------------- /get_distribution_of_records_cited_for_a_dataset/get_number_of_records_contributing_to_citations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import requests\n", 11 | "import pandas as pd\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "%matplotlib inline\n", 14 | "import seaborn as sns" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "dataset_key = \"ad43e954-dd79-4986-ae34-9ccdbd8bf568\" # Geographically tagged INSDC sequences dataset" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "def get_all_downloads_involved_citations(dataset_key,\n", 33 | " paging_step = 200,\n", 34 | " api_call=\"https://www.gbif.org/api/resource/search\"):\n", 35 | " '''\n", 36 | " Uses the API to get a list of all the download keys that were cited\n", 37 | " '''\n", 38 | " list_download_keys = []\n", 39 | " endOfRecords = False\n", 40 | " offset = 0\n", 41 | " while not endOfRecords:\n", 42 | " params = {\n", 43 | " \"contentType\": \"literature\",\n", 44 | " \"gbifDatasetKey\": dataset_key,\n", 45 | " \"offset\": offset,\n", 46 | " \"limit\": paging_step\n", 47 | " }\n", 48 | " response = requests.get(api_call, params)\n", 49 | " if response.ok:\n", 50 | " citation_list = response.json()\n", 51 | " for citation in citation_list[\"results\"]:\n", 52 | " list_download_keys += citation[\"gbifDownloadKey\"]\n", 53 | " endOfRecords = citation_list[\"endOfRecords\"]\n", 54 | " offset += paging_step\n", 55 | " else:\n", 56 | " print(\"issue\", params)\n", 57 | " endOfRecords = True\n", 58 | " return list_download_keys" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "def get_dataset_download_numbers(dataset_key,\n", 68 | " download_key,\n", 69 | " paging_step = 200,\n", 70 | " api_call=\"https://api.gbif.org/v1/occurrence/download/\"):\n", 71 | " '''\n", 72 | " Uses the API to get number of records from a given dataset in a download, total records in that download\n", 73 | " and total number of dataset\n", 74 | " '''\n", 75 | " downloads = {\n", 76 | " \"downloadKey\": download_key,\n", 77 | " \"numberRecords\": 0,\n", 78 | " \"totalRecords\": 0,\n", 79 | " \"numberDatasets\": 0\n", 80 | " }\n", 81 | " endOfRecords = False\n", 82 | " offset = 0\n", 83 | " \n", 84 | " while not endOfRecords:\n", 85 | " params = {\n", 86 | " \"offset\": offset,\n", 87 | " \"limit\": paging_step\n", 88 | " }\n", 89 | " response = requests.get(api_call+download_key+\"/datasets\", params)\n", 90 | " if response.ok:\n", 91 | " downloaded_datasets = response.json()\n", 92 | " endOfRecords = downloaded_datasets[\"endOfRecords\"]\n", 93 | " downloads[\"numberDatasets\"] = downloaded_datasets[\"count\"]\n", 94 | " \n", 95 | " for dataset in downloaded_datasets[\"results\"]:\n", 96 | " downloads[\"totalRecords\"] += dataset[\"numberRecords\"]\n", 97 | " if dataset[\"datasetKey\"] == dataset_key:\n", 98 | " downloads[\"numberRecords\"] = dataset[\"numberRecords\"]\n", 99 | " \n", 100 | " offset += paging_step\n", 101 | " else:\n", 102 | " print(\"issue\", params)\n", 103 | " endOfRecords = True\n", 104 | " return downloads" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 5, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "cited_download = get_all_downloads_involved_citations(dataset_key)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 8, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "cited_download_stats = []\n", 123 | "for download in cited_download:\n", 124 | " cited_download_stats.append(get_dataset_download_numbers(dataset_key,download, 500))" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 9, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "pd_cited_download_stats = pd.DataFrame(cited_download_stats).set_index(\"downloadKey\")" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 47, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "pd_cited_download_stats_no_zero = pd_cited_download_stats[pd_cited_download_stats[\"numberRecords\"]>0]" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 51, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "pd_cited_download_stats_no_zero.to_csv(\"nb_records_contributing_to_GBIF_citations_from_INSDC_dataset.csv\", sep=\"\\t\")" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 48, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/plain": [ 162 | "array([[]],\n", 163 | " dtype=object)" 164 | ] 165 | }, 166 | "execution_count": 48, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | }, 170 | { 171 | "data": { 172 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEICAYAAACzliQjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAZyklEQVR4nO3df7RV5X3n8fcnoEjUCRj1FoEpJKFpsaZob5SMWbNuNAE0nWLXSmbhckU0tqQdnJV0mI6QzlSjpWNmxZilMUZSiZghIjUxMJaUEuJpl21FISUgKuGqVK4gxAHR6681mO/8sZ9rttdzzj333J89z+e11lln7+9+9t7Pfth8zrn77HuuIgIzM8vDu0a6A2ZmNnwc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHoWxYk7ZX08ZHux1CT1CGpa6T7YaOXQ9+sCelF5DVJ3ZKel3SXpJNGul9mfXHom/WDpLGl2f8QEScBs4CzgWUj2Bezhjj0bUSld8z/VdIOSUcl3SvpBElXSHqoV9uQ9IE0fZekb0j6YXq3/Q+SfkXS1yQdkfSkpLN77e7Dkh5Py78t6YTStn9H0nZJL0r6R0kf6tXHayTtAF7pHbYR8TywkSL8e9YZJ+krkp6VdFDSNyWNLy2fn/b3kqSnJM1L9TMkrZd0WFKnpD8orXOdpPsk/W9JLwFXSBqfxuKIpMeBD/cas2skPSfpZUm7JV3Y338jay0OfRsN/iMwD5gOfAi4oh/r/XfgVOAN4J+An6T5+4Cv9mp/GTAXeD/wa2ldJJ0DrAQ+B7wXuANYL2lcad1LgU8CEyLiWHmjkqYAFwGdpfKX0z5mAR8AJgN/ltqfC9wN/AkwAfj3wN603j1AF3AG8CngL3oF9fx0bBOA1cC16Xjen45tYalfHwSuBj4cESen5XuxrDn0bTS4JSL2R8Rh4P9Qesfch/sjYltEvA7cD7weEXdHxJvAvRSXXMq+HhH70n6WUwQ5wB8Ad0TEloh4MyJWUbyIzO7Vx30R8Vqp9gNJLwP7gEMUAYwkpW3+cUQcjoiXgb8AFqT1rgJWRsSmiPhFRDwXEU9Kmgp8FLgmIl6PiO3AXwKfKe3znyLiB2m91yhe+Jan/ewDbim1fRMYB8yUdFxE7I2IpxocW2tRDn0bDZ4vTb8KNPqB6MHS9GtV5ntvZ19p+l8o3k0D/CqwJF3aeVHSi8DU0vLe6/a4JL2D7gB+neInDIDTgHcD20rb+5tUJ227WvieAfS8SJT7OblOP86oclwAREQn8AXgOuCQpDWSzsCy5tC30eoViuAEQNKvDMI2p5am/y2wP03vo3i3PKH0eHdE3FNqX/PraCPi74C7gK+k0gsULzpnlrb3nvShb8/+3l9lU/uBUySd3Kufz9Xpx4Eqx1Xu23cj4qMUL2xBcdnJMubQt9Hqp8CZkmalD1yvG4RtLpY0RdIpwBcpLgEBfAv4Q0nnqXCipE/2Ct++fA34hKRZEfGLtM2bJZ0OIGmypLmp7Z3AlZIulPSutOzX0+WZfwT+Z/ow+0MUl4JW19nvWmCZpInps4X/3LNA0gclXZA+m3id4oXozX4ck7Ugh76NShHxM+B64EfAHuCh+ms05LvA3wJPp8efp31tpbgG/3XgCMUHslf0s78/p/hw9n+k0jVpOw+nO21+BHwwtX0EuBK4GTgK/B3FO3EoPmeYRvGu/37g2ojYVGfXX6K4pPNMOrbvlJaNA26k+MnjeeB0ihc7y5j8R1TMzPLhd/pmZhlx6JuZZcShb2aWEYe+mVlGRvUXNp166qkxbdq0ptd/5ZVXOPHEEwevQy3EY1Ofx6c2j019o2F8tm3b9kJEnFZt2agO/WnTprF169am169UKnR0dAxeh1qIx6Y+j09tHpv6RsP4SPqXWst8ecfMLCMOfTOzjPQZ+unXwR+R9FNJuyR9KdXvkvRM+k7w7ZJmpbok3ZK+C3xH+tranm0tlLQnPRbW2qeZmQ2NRq7pvwFcEBHdko4DHpL0w7TsTyLivl7tLwJmpMd5wO3Aeen7Tq4F2im++GmbpPURcWQwDsTMzPrW5zv9KHSn2ePSo953N8wH7k7rPQxMkDSJ4g84bErf+30E2ETxhzPMzGyYNHT3jqQxwDaKvwB0W0RskfRHwHJJfwZsBpZGxBsU3/1d/n7vrlSrVe+9r0XAIoC2tjYqlUp/j+kt3d3dA1q/lXls6vP41OaxqW+0j09DoZ/+EtEsSROA+yX9JsUfgX4eOB5YQfGtgtcDqraJOvXe+1qRtkd7e3sM5Nan0XDr1GjlsanP41Obx6a+0T4+/bp7JyJeBCrAvIg4kC7hvAF8Gzg3Nevi7X/UYQrF18TWqpuZ2TBp5O6d09I7fCSNBz4OPJmu0/f8PdBLgMfSKuuBy9NdPLOBoxFxANgIzEl/7GEiMCfVzMxsmDRyeWcSsCpd138XsDYiHpD0Y0mnUVy22Q78YWq/AbiY4g9IvErxxyKIiMOSbgAeTe2uT3+gesjsfO4oVyz9awD23vjJodyVmdm/Cn2GfkTsAM6uUr+gRvsAFtdYthJY2c8+mpnZIPFv5JqZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZaTP0Jd0gqRHJP1U0i5JX0r16ZK2SNoj6V5Jx6f6uDTfmZZPK21rWarvljR3qA7KzMyqa+Sd/hvABRHxW8AsYJ6k2cCXgZsjYgZwBLgqtb8KOBIRHwBuTu2QNBNYAJwJzAO+IWnMYB6MmZnV12foR6E7zR6XHgFcANyX6quAS9L0/DRPWn6hJKX6moh4IyKeATqBcwflKMzMrCFjG2mU3pFvAz4A3AY8BbwYEcdSky5gcpqeDOwDiIhjko4C7031h0ubLa9T3tciYBFAW1sblUqlf0dU0jYelpxVdHEg22lF3d3dHpM6PD61eWzqG+3j01DoR8SbwCxJE4D7gd+o1iw9q8ayWvXe+1oBrABob2+Pjo6ORrpY1a2r13HTzuIQ917W/HZaUaVSYSBj2+o8PrV5bOob7ePTr7t3IuJFoALMBiZI6nnRmALsT9NdwFSAtPw9wOFyvco6ZmY2DBq5e+e09A4fSeOBjwNPAA8Cn0rNFgLr0vT6NE9a/uOIiFRfkO7umQ7MAB4ZrAMxM7O+NXJ5ZxKwKl3XfxewNiIekPQ4sEbSnwP/DNyZ2t8JfEdSJ8U7/AUAEbFL0lrgceAYsDhdNjIzs2HSZ+hHxA7g7Cr1p6ly901EvA58usa2lgPL+99NMzMbDP6NXDOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjPQZ+pKmSnpQ0hOSdkn6fKpfJ+k5SdvT4+LSOsskdUraLWluqT4v1TolLR2aQzIzs1rGNtDmGLAkIn4i6WRgm6RNadnNEfGVcmNJM4EFwJnAGcCPJP1aWnwb8AmgC3hU0vqIeHwwDsTMzPrWZ+hHxAHgQJp+WdITwOQ6q8wH1kTEG8AzkjqBc9Oyzoh4GkDSmtTWoW9mNkwaeaf/FknTgLOBLcD5wNWSLge2Uvw0cITiBeHh0mpd/PJFYl+v+nlV9rEIWATQ1tZGpVLpTxffpm08LDnrGMCAttOKuru7PSZ1eHxq89jUN9rHp+HQl3QS8D3gCxHxkqTbgRuASM83AZ8FVGX1oPrnB/GOQsQKYAVAe3t7dHR0NNrFd7h19Tpu2lkc4t7Lmt9OK6pUKgxkbFudx6c2j019o318Ggp9ScdRBP7qiPg+QEQcLC3/FvBAmu0CppZWnwLsT9O16mZmNgwauXtHwJ3AExHx1VJ9UqnZ7wGPpen1wAJJ4yRNB2YAjwCPAjMkTZd0PMWHvesH5zDMzKwRjbzTPx/4DLBT0vZU+yJwqaRZFJdo9gKfA4iIXZLWUnxAewxYHBFvAki6GtgIjAFWRsSuQTwWMzPrQyN37zxE9ev0G+qssxxYXqW+od56ZmY2tPwbuWZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGekz9CVNlfSgpCck7ZL0+VQ/RdImSXvS88RUl6RbJHVK2iHpnNK2Fqb2eyQtHLrDMjOzahp5p38MWBIRvwHMBhZLmgksBTZHxAxgc5oHuAiYkR6LgNuheJEArgXOA84Fru15oTAzs+HRZ+hHxIGI+Emafhl4ApgMzAdWpWargEvS9Hzg7ig8DEyQNAmYC2yKiMMRcQTYBMwb1KMxM7O6xvansaRpwNnAFqAtIg5A8cIg6fTUbDKwr7RaV6rVqvfexyKKnxBoa2ujUqn0p4tv0zYelpx1DGBA22lF3d3dHpM6PD61eWzqG+3j03DoSzoJ+B7whYh4SVLNplVqUaf+9kLECmAFQHt7e3R0dDTaxXe4dfU6btpZHOLey5rfTiuqVCoMZGxbncenNo9NfaN9fBq6e0fScRSBvzoivp/KB9NlG9LzoVTvAqaWVp8C7K9TNzOzYdLI3TsC7gSeiIivlhatB3ruwFkIrCvVL0938cwGjqbLQBuBOZImpg9w56SamZkNk0Yu75wPfAbYKWl7qn0RuBFYK+kq4Fng02nZBuBioBN4FbgSICIOS7oBeDS1uz4iDg/KUZiZWUP6DP2IeIjq1+MBLqzSPoDFNba1EljZnw6amdng8W/kmpllxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llpM/Ql7RS0iFJj5Vq10l6TtL29Li4tGyZpE5JuyXNLdXnpVqnpKWDfyhmZtaXRt7p3wXMq1K/OSJmpccGAEkzgQXAmWmdb0gaI2kMcBtwETATuDS1NTOzYTS2rwYR8feSpjW4vfnAmoh4A3hGUidwblrWGRFPA0hak9o+3u8em5lZ0/oM/TqulnQ5sBVYEhFHgMnAw6U2XakGsK9X/bxqG5W0CFgE0NbWRqVSabqDbeNhyVnHAAa0nVbU3d3tManD41Obx6a+0T4+zYb+7cANQKTnm4DPAqrSNqh+GSmqbTgiVgArANrb26Ojo6PJLsKtq9dx087iEPde1vx2WlGlUmEgY9vqPD61eWzqG+3j01ToR8TBnmlJ3wIeSLNdwNRS0ynA/jRdq25mZsOkqVs2JU0qzf4e0HNnz3pggaRxkqYDM4BHgEeBGZKmSzqe4sPe9c1328zMmtHnO31J9wAdwKmSuoBrgQ5Jsygu0ewFPgcQEbskraX4gPYYsDgi3kzbuRrYCIwBVkbErkE/GjMzq6uRu3curVK+s0775cDyKvUNwIZ+9c7MzAaVfyPXzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwjfYa+pJWSDkl6rFQ7RdImSXvS88RUl6RbJHVK2iHpnNI6C1P7PZIWDs3hmJlZPY28078LmNerthTYHBEzgM1pHuAiYEZ6LAJuh+JFArgWOA84F7i254XCzMyGT5+hHxF/DxzuVZ4PrErTq4BLSvW7o/AwMEHSJGAusCkiDkfEEWAT73whMTOzIdbsNf22iDgAkJ5PT/XJwL5Su65Uq1U3M7NhNHaQt6cqtahTf+cGpEUUl4Zoa2ujUqk03Zm28bDkrGMAA9pOK+ru7vaY1OHxqc1jU99oH59mQ/+gpEkRcSBdvjmU6l3A1FK7KcD+VO/oVa9U23BErABWALS3t0dHR0e1Zg25dfU6btpZHOLey5rfTiuqVCoMZGxbncenNo9NfaN9fJq9vLMe6LkDZyGwrlS/PN3FMxs4mi7/bATmSJqYPsCdk2pmZjaM+nynL+keinfpp0rqorgL50ZgraSrgGeBT6fmG4CLgU7gVeBKgIg4LOkG4NHU7vqI6P3hsJmZDbE+Qz8iLq2x6MIqbQNYXGM7K4GV/eqdmZkNKv9GrplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZGVDoS9oraaek7ZK2ptopkjZJ2pOeJ6a6JN0iqVPSDknnDMYBmJlZ4wbjnf7HImJWRLSn+aXA5oiYAWxO8wAXATPSYxFw+yDs28zM+mEoLu/MB1al6VXAJaX63VF4GJggadIQ7N/MzGpQRDS/svQMcAQI4I6IWCHpxYiYUGpzJCImSnoAuDEiHkr1zcA1EbG11zYXUfwkQFtb22+vWbOm6f4dOnyUg68V02dNfk/T22lF3d3dnHTSSSPdjVHL41Obx6a+0TA+H/vYx7aVrr68zdgBbvv8iNgv6XRgk6Qn67RVldo7XnEiYgWwAqC9vT06Ojqa7tytq9dx087iEPde1vx2WlGlUmEgY9vqPD61eWzqG+3jM6DLOxGxPz0fAu4HzgUO9ly2Sc+HUvMuYGpp9SnA/oHs38zM+qfp0Jd0oqSTe6aBOcBjwHpgYWq2EFiXptcDl6e7eGYDRyPiQNM9NzOzfhvI5Z024H5JPdv5bkT8jaRHgbWSrgKeBT6d2m8ALgY6gVeBKwewbzMza0LToR8RTwO/VaX+f4ELq9QDWNzs/szMbOD8G7lmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhkZ9tCXNE/SbkmdkpYO9/7NzHI2rKEvaQxwG3ARMBO4VNLM4eyDmVnOxg7z/s4FOiPiaQBJa4D5wONDveNpS/+6zzZ7b/xkn+vWajMQA+mbmVl/DHfoTwb2lea7gPPKDSQtAhal2W5Juwewv1OBFxptrC8PTpuhMAT77dfYZMjjU5vHpr7RMD6/WmvBcIe+qtTibTMRK4AVg7IzaWtEtA/GtlqNx6Y+j09tHpv6Rvv4DPcHuV3A1NL8FGD/MPfBzCxbwx36jwIzJE2XdDywAFg/zH0wM8vWsF7eiYhjkq4GNgJjgJURsWsIdzkol4lalMemPo9PbR6b+kb1+Cgi+m5lZmYtwb+Ra2aWEYe+mVlGWjL0W+2rHiRNlfSgpCck7ZL0+VQ/RdImSXvS88RUl6Rb0vHvkHROaVsLU/s9khaW6r8taWda5xZJanYfI0HSGEn/LOmBND9d0pbU73vTjQNIGpfmO9PyaaVtLEv13ZLmlupVz6dm9jESJE2QdJ+kJ9M59BGfO2/1/Y/T/6nHJN0j6YSWP3cioqUeFB8QPwW8Dzge+Ckwc6T7NcBjmgSck6ZPBn5G8TUW/wtYmupLgS+n6YuBH1L8XsRsYEuqnwI8nZ4npumJadkjwEfSOj8ELkr1fu1jBMfovwDfBR5I82uBBWn6m8Afpen/BHwzTS8A7k3TM9O5Mg6Yns6hMfXOp/7uYwTHZhXw+2n6eGCCz52A4pdFnwHGl/49r2j1c2fETsQh/If8CLCxNL8MWDbS/RrkY1wHfALYDUxKtUnA7jR9B3Bpqf3utPxS4I5S/Y5UmwQ8Waq/1a6/+xih8ZgCbAYuAB5IYfICMLb3OUFx59hH0vTY1E69z5OedrXOp2b2MUJj829SsKlXPftzh19+Q8Ap6d/pAWBuq587rXh5p9pXPUweob4MuvTj3tnAFqAtIg4ApOfTU7NaY1Cv3lWlThP7GAlfA/4b8Is0/17gxYg4lubLfXur32n50dS+v2PWzD5GwvuAnwPfTpe//lLSifjcISKeA74CPAscoPh32kaLnzutGPp9ftXDv1aSTgK+B3whIl6q17RKLZqo1+1OE+sMOkm/AxyKiG3lcpWm0ceywRyzUTE2yVjgHOD2iDgbeIXiUkstOZ07Eym+8HE6cAZwIsU3APfWUudOK4Z+S37Vg6TjKAJ/dUR8P5UPSpqUlk8CDqV6rTGoV59Spd7MPobb+cDvStoLrKG4xPM1YIKknl8+LPftrX6n5e8BDtP/MXuhiX2MhC6gKyK2pPn7KF4EfO7Ax4FnIuLnEfH/gO8D/44WP3daMfRb7qse0t0QdwJPRMRXS4vWAz13USykuNbfU7883SUxGziafrzeCMyRNDG9y5lDcS3xAPCypNlpX5f32lZ/9jGsImJZREyJiGkU/9Y/jojLgAeBT9Xod8/xfCq1j1RfkO6emA7MoPiAsur5lNbp7z6GXUQ8D+yT9MFUupDiq8yzP3coLuvMlvTu1PeesWntc2e4PzwZjgfF3QE/o/jk/E9Huj+DcDwfpfgRbwewPT0uprjWtxnYk55PSe1F8cdqngJ2Au2lbX0W6EyPK0v1duCxtM7X+eVva/d7HyM4Th388u6d91H8x+sE/goYl+onpPnOtPx9pfX/NB3PbtIdKPXOp2b2MULjMgvYms6fH1DcfeNzp+jLl4AnU/+/Q3EHTkufO/4aBjOzjLTi5R0zM6vBoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRv4/4737RwCDqd4AAAAASUVORK5CYII=\n", 173 | "text/plain": [ 174 | "
" 175 | ] 176 | }, 177 | "metadata": { 178 | "needs_background": "light" 179 | }, 180 | "output_type": "display_data" 181 | } 182 | ], 183 | "source": [ 184 | "pd_cited_download_stats_no_zero.hist(column=[\"numberRecords\"],bins=100)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 49, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "data": { 194 | "text/plain": [ 195 | "(0, 10000)" 196 | ] 197 | }, 198 | "execution_count": 49, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | }, 202 | { 203 | "data": { 204 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAEICAYAAACj2qi6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAYPElEQVR4nO3de5BmdZ3f8fdHRlDROOAAC8MQQGbdxeACUcRobXVEubkJbpWmoCgZkN0xCaR0YxLBTcDLssGUqKG8jsKKRkGiohMWlx1Ze7fMLlcXubM0OOuMIGhAdLyVg9/88fzafmx6Zn5z6ae7mfer6qnnPN/zO+f8zm/O9KfPpbtTVUiS1ONpc90BSdLCYWhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRraKSRZm+RVc92P2ZZkLMn6ue6HnroMDWkbtBD6aZINSb6b5JNJnj3X/ZJmm6EhbYUki4Y+/quqejZwOHAEcO4c9kUaCUNDc6p9x/6fktyW5PEkn0vyjCSnJ/n6tLaV5JA2/ckkH07ylfbd/v9N8htJPpDksST3JDli2uZekuSuNv/PkjxjaN2/l+TWJD9I8rdJXjStj29Lchvw4+lfrKvqu8C1DMJjcpndkrw3ybeTPJzko0meOTT/pLa9Hya5P8nxrb5fktVJHk0ykeQPh5Z5R5LPJ/lfSX4InJ7kmW0sHktyF/CSaWP2tiTfSfKjJPcmOWZr/42kYYaG5oN/AxwPHAS8CDh9K5b7r8AS4OfA3wHfaJ8/D7xvWvtTgeOA5wO/2ZYlyZHApcCbgOcBHwNWJ9ltaNlTgNcAi6tq4/BKk+wPnABMDJXf07ZxOHAIsBQ4r7U/CvgU8J+BxcDvAmvbcpcD64H9gNcBfzrtC/1Jbd8WA58Bzm/78/y2byuG+vUC4GzgJVX1nDZ/LdJ2MDQ0H1xcVQ9W1aPA/2HoO/YtuKqqbqmqnwFXAT+rqk9V1RPA5xhcMhr2wapa17ZzAYMgAPhD4GNVdUNVPVFVlzEIoaOn9XFdVf10qPalJD8C1gGPMPgCTpK0df5RVT1aVT8C/hQ4uS13JnBpVa2pql9W1Xeq6p4ky4BXAG+rqp9V1a3AJ4A3DG3z76rqS225nzIIzgvadtYBFw+1fQLYDTg0ydOram1V3d85ttKMDA3NB98dmv4J0HtD+eGh6Z/O8Hn6etYNTf8jg+/mAf4p8NZ2aeoHSX4ALBuaP33ZSa9t38GPAb/F4AwHYC/gWcAtQ+v7i1anrXumL977AZMhM9zPpZvpx34z7BcAVTUBvAV4B/BIkiuS7Ie0HQwNzVc/ZvCFF4Akv7ED1rlsaPoA4ME2vY7Bd+uLh17PqqrLh9pv8tdBV9VfA58E3ttK32cQWi8cWt9z203zye09f4ZVPQjsmeQ50/r5nc3046EZ9mu4b5+tqlcwCMZicNlM2maGhuarbwIvTHJ4u2H9jh2wzrOS7J9kT+DtDC5hAXwc+LdJXpqB3ZO8ZtoX7y35APDqJIdX1S/bOt+fZG+AJEuTHNfaXgKckeSYJE9r836rXV76W+C/t4cBXsTgUtZnNrPdK4Fzk+zR7q38h8kZSV6Q5JXt3szPGATZE1uxT9KTGBqal6rqH4B3AV8F7gO+vvklunwW+Evggfb6k7atmxncg/gg8BiDG9qnb2V/v8fg5vZ/a6W3tfVc3550+irwgtb2RuAM4P3A48BfMzgTgMF9lgMZnHVcBZxfVWs2s+l3Mrgk9a22b58emrcbcCGDM5/vAnszCEtpm8U/wiRJ6uWZhiSpm6EhSepmaEiSuhkakqRu8/oXni1evLgOOeSQue7GvPDjH/+Y3Xfffa67MS84FlMciymOxZRbbrnl+1W115Zbbr15HRr77LMPN99881x3Y14YHx9nbGxsrrsxLzgWUxyLKY7FlCT/uOVW28bLU5KkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRu8/onwn/6iyc48Jw/H+k21174mpFuT5IWEs80JEndDA1JUjdDQ5LUzdCQJHUzNCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEndDA1JUrcthkaSZyS5Mck3k9yZ5J2tflCSG5Lcl+RzSXZt9d3a54k2/8ChdZ3b6vcmOW62dkqSNDt6zjR+Dryyqn4HOBw4PsnRwHuA91fVcuAx4MzW/kzgsao6BHh/a0eSQ4GTgRcCxwMfTrLLjtwZSdLs2mJo1MCG9vHp7VXAK4HPt/plwGvb9EntM23+MUnS6ldU1c+r6lvABHDUDtkLSdJIdN3TSLJLkluBR4A1wP3AD6pqY2uyHljappcC6wDa/MeB5w3XZ1hGkrQAdP2516p6Ajg8yWLgKuC3Z2rW3rOJeZuq/5okK4GVAEuW7MV5h2180kKzaXx8fKTb67Vhw4Z527dRcyymOBZTHIvR2Kq/EV5VP0gyDhwNLE6yqJ1N7A882JqtB5YB65MsAp4LPDpUnzS8zPA2VgGrAA44+JC66PbR/hnztaeOjXR7vcbHxxkbG5vrbswLjsUUx2KKYzEaPU9P7dXOMEjyTOBVwN3A14DXtWYrgC+36dXtM23+X1VVtfrJ7emqg4DlwI07akckSbOv59v4fYHL2pNOTwOurKqrk9wFXJHkT4C/By5p7S8BPp1kgsEZxskAVXVnkiuBu4CNwFntspckaYHYYmhU1W3AETPUH2CGp5+q6mfA6zexrguAC7a+m5Kk+cCfCJckdTM0JEndDA1JUjdDQ5LUzdCQJHUzNCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEndDA1JUjdDQ5LUzdCQJHUzNCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSty2GRpJlSb6W5O4kdyZ5c6u/I8l3ktzaXicOLXNukokk9yY5bqh+fKtNJDlndnZJkjRbFnW02Qi8taq+keQ5wC1J1rR576+q9w43TnIocDLwQmA/4KtJfrPN/hDwamA9cFOS1VV1147YEUnS7NtiaFTVQ8BDbfpHSe4Glm5mkZOAK6rq58C3kkwAR7V5E1X1AECSK1pbQ0OSFoieM41fSXIgcARwA/By4OwkpwE3MzgbeYxBoFw/tNh6pkJm3bT6S2fYxkpgJcCSJXtx3mEbt6aL2218fHyk2+u1YcOGedu3UXMspjgWUxyL0egOjSTPBr4AvKWqfpjkI8C7gWrvFwFvBDLD4sXM90/qSYWqVcAqgAMOPqQuun2rcm27rT11bKTb6zU+Ps7Y2Nhcd2NecCymOBZTHIvR6PqKnOTpDALjM1X1RYCqenho/seBq9vH9cCyocX3Bx5s05uqS5IWgJ6npwJcAtxdVe8bqu871Oz3gTva9Grg5CS7JTkIWA7cCNwELE9yUJJdGdwsX71jdkOSNAo9ZxovB94A3J7k1lZ7O3BKksMZXGJaC7wJoKruTHIlgxvcG4GzquoJgCRnA9cCuwCXVtWdO3BfJEmzrOfpqa8z832KazazzAXABTPUr9nccpKk+c2fCJckdTM0JEndDA1JUjdDQ5LUzdCQJHUzNCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd0MDUlSN0NDktTN0JAkdTM0JEndDA1JUjdDQ5LUzdCQJHUzNCRJ3QwNSVI3Q0OS1M3QkCR1MzQkSd22GBpJliX5WpK7k9yZ5M2tvmeSNUnua+97tHqSXJxkIsltSY4cWteK1v6+JCtmb7ckSbOh50xjI/DWqvpt4GjgrCSHAucA11XVcuC69hngBGB5e60EPgKDkAHOB14KHAWcPxk0kqSFYYuhUVUPVdU32vSPgLuBpcBJwGWt2WXAa9v0ScCnauB6YHGSfYHjgDVV9WhVPQasAY7foXsjSZpVi7amcZIDgSOAG4B9quohGARLkr1bs6XAuqHF1rfapurTt7GSwRkKS5bsxXmHbdyaLm638fHxkW6v14YNG+Zt30bNsZjiWExxLEajOzSSPBv4AvCWqvphkk02naFWm6n/eqFqFbAK4ICDD6mLbt+qXNtua08dG+n2eo2PjzM2NjbX3ZgXHIspjsUUx2I0up6eSvJ0BoHxmar6Yis/3C470d4fafX1wLKhxfcHHtxMXZK0QPQ8PRXgEuDuqnrf0KzVwOQTUCuALw/VT2tPUR0NPN4uY10LHJtkj3YD/NhWkyQtED3Xfl4OvAG4PcmtrfZ24ELgyiRnAt8GXt/mXQOcCEwAPwHOAKiqR5O8G7iptXtXVT26Q/ZCkjQSWwyNqvo6M9+PADhmhvYFnLWJdV0KXLo1HZQkzR/+RLgkqZuhIUnqZmhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRuhoYkqZuhIUnqZmhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSum0xNJJcmuSRJHcM1d6R5DtJbm2vE4fmnZtkIsm9SY4bqh/fahNJztnxuyJJmm09ZxqfBI6fof7+qjq8va4BSHIocDLwwrbMh5PskmQX4EPACcChwCmtrSRpAVm0pQZV9TdJDuxc30nAFVX1c+BbSSaAo9q8iap6ACDJFa3tXVvdY0nSnNliaGzG2UlOA24G3lpVjwFLgeuH2qxvNYB10+ovnWmlSVYCKwGWLNmL8w7buB1d3Hrj4+Mj3V6vDRs2zNu+jZpjMcWxmOJYjMa2hsZHgHcD1d4vAt4IZIa2xcyXwWqmFVfVKmAVwAEHH1IX3b49ubb11p46NtLt9RofH2dsbGyuuzEvOBZTHIspjsVobNNX5Kp6eHI6yceBq9vH9cCyoab7Aw+26U3VJUkLxDY9cptk36GPvw9MPlm1Gjg5yW5JDgKWAzcCNwHLkxyUZFcGN8tXb3u3JUlzYYtnGkkuB8aAJUnWA+cDY0kOZ3CJaS3wJoCqujPJlQxucG8EzqqqJ9p6zgauBXYBLq2qO3f43kiSZlXP01OnzFC+ZDPtLwAumKF+DXDNVvVOkjSv+BPhkqRuhoYkqZuhIUnqZmhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRuhoYkqZuhIUnqZmhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSp2xZDI8mlSR5JcsdQbc8ka5Lc1973aPUkuTjJRJLbkhw5tMyK1v6+JCtmZ3ckSbOp50zjk8Dx02rnANdV1XLguvYZ4ARgeXutBD4Cg5ABzgdeChwFnD8ZNJKkhWOLoVFVfwM8Oq18EnBZm74MeO1Q/VM1cD2wOMm+wHHAmqp6tKoeA9bw5CCSJM1zi7ZxuX2q6iGAqnooyd6tvhRYN9Rufattqv4kSVYyOEthyZK9OO+wjdvYxW0zPj4+0u312rBhw7zt26g5FlMciymOxWhsa2hsSmao1WbqTy5WrQJWARxw8CF10e07uoubt/bUsZFur9f4+DhjY2Nz3Y15wbGY4lhMcSxGY1ufnnq4XXaivT/S6uuBZUPt9gce3ExdkrSAbGtorAYmn4BaAXx5qH5ae4rqaODxdhnrWuDYJHu0G+DHtpokaQHZ4rWfJJcDY8CSJOsZPAV1IXBlkjOBbwOvb82vAU4EJoCfAGcAVNWjSd4N3NTavauqpt9clyTNc1sMjao6ZROzjpmhbQFnbWI9lwKXblXvJEnzij8RLknqZmhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRuhoYkqZuhIUnqZmhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSum1XaCRZm+T2JLcmubnV9kyyJsl97X2PVk+Si5NMJLktyZE7YgckSaOzI840/mVVHV5VL26fzwGuq6rlwHXtM8AJwPL2Wgl8ZAdsW5I0QrNxeeok4LI2fRnw2qH6p2rgemBxkn1nYfuSpFmyaDuXL+AvkxTwsapaBexTVQ8BVNVDSfZubZcC64aWXd9qDw2vMMlKBmciLFmyF+cdtnE7u7h1xsfHR7q9Xhs2bJi3fRs1x2KKYzHFsRiN7Q2Nl1fVgy0Y1iS5ZzNtM0OtnlQYBM8qgAMOPqQuun17u7h11p46NtLt9RofH2dsbGyuuzEvOBZTHIspjsVobNflqap6sL0/AlwFHAU8PHnZqb0/0pqvB5YNLb4/8OD2bF+SNFrbHBpJdk/ynMlp4FjgDmA1sKI1WwF8uU2vBk5rT1EdDTw+eRlLkrQwbM+1n32Aq5JMruezVfUXSW4CrkxyJvBt4PWt/TXAicAE8BPgjO3YtiRpDmxzaFTVA8DvzFD/f8AxM9QLOGtbtydJmnv+RLgkqZuhIUnqZmhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSuhkakqRuhoYkqZuhIUnqZmhIkroZGpKkboaGJKmboSFJ6mZoSJK6GRqSpG6GhiSpm6EhSepmaEiSui0a9QaTHA/8T2AX4BNVdeGo+7A5B57z5yPd3toLXzPS7UnS9hjpmUaSXYAPAScAhwKnJDl0lH2QJG27UZ9pHAVMVNUDAEmuAE4C7hpxP+aN3jObtx62kdNHfBY0X/WMxVycwY36LBWe+seFZ+LzT6pqdBtLXgccX1V/0D6/AXhpVZ091GYlsLJ9/GfAHSPr4Py2BPj+XHdinnAspjgWUxyLKS+oqufMxopHfaaRGWq/llpVtQpYBZDk5qp68Sg6Nt85FlMciymOxRTHYkqSm2dr3aN+emo9sGzo8/7AgyPugyRpG406NG4Clic5KMmuwMnA6hH3QZK0jUZ6eaqqNiY5G7iWwSO3l1bVnZtZZNVoerYgOBZTHIspjsUUx2LKrI3FSG+ES5IWNn8iXJLUzdCQJHWbt6GR5Pgk9yaZSHLOXPdnNiRZluRrSe5OcmeSN7f6nknWJLmvve/R6klycRuT25IcObSuFa39fUlWzNU+bY8kuyT5+yRXt88HJbmh7dPn2sMTJNmtfZ5o8w8cWse5rX5vkuPmZk+2X5LFST6f5J52fLxsZzwukvxR+79xR5LLkzxjZzouklya5JEkdwzVdthxkOSfJ7m9LXNxkpl+LOLXVdW8ezG4SX4/cDCwK/BN4NC57tcs7Oe+wJFt+jnAPzD49Sr/Azin1c8B3tOmTwS+wuDnXY4Gbmj1PYEH2vsebXqPud6/bRiP/wh8Fri6fb4SOLlNfxT4d2363wMfbdMnA59r04e2Y2U34KB2DO0y1/u1jWNxGfAHbXpXYPHOdlwAS4FvAc8cOh5O35mOC+B3gSOBO4ZqO+w4AG4EXtaW+Qpwwhb7NNeDsomBehlw7dDnc4Fz57pfI9jvLwOvBu4F9m21fYF72/THgFOG2t/b5p8CfGyo/mvtFsKLwc/sXAe8Eri6HcTfBxZNPyYYPH33sja9qLXL9ONkuN1CegH/pH2xzLT6TnVctNBY177YLWrHxXE723EBHDgtNHbIcdDm3TNU/7V2m3rN18tTkwfLpPWt9pTVTqWPAG4A9qmqhwDa+96t2abG5akwXh8A/gvwy/b5ecAPqmpj+zy8T7/a3zb/8db+qTAOMDjD/h7wZ+1y3SeS7M5OdlxU1XeA9wLfBh5i8O98CzvvcTFpRx0HS9v09PpmzdfQ2OKvG3kqSfJs4AvAW6rqh5trOkOtNlNfEJL8HvBIVd0yXJ6haW1h3oIehyGLGFyS+EhVHQH8mMFliE15So5Hu1Z/EoNLSvsBuzP4DdnT7SzHxZZs7f5v07jM19DYaX7dSJKnMwiMz1TVF1v54ST7tvn7Ao+0+qbGZaGP18uBf51kLXAFg0tUHwAWJ5n8AdThffrV/rb5zwUeZeGPw6T1wPqquqF9/jyDENnZjotXAd+qqu9V1S+ALwL/gp33uJi0o46D9W16en2z5mto7BS/bqQ9qXAJcHdVvW9o1mpg8gmHFQzudUzWT2tPSRwNPN5OT68Fjk2yR/vu7NhWWxCq6tyq2r+qDmTwb/1XVXUq8DXgda3Z9HGYHJ/XtfbV6ie3p2gOApYzuNG3oFTVd4F1SV7QSscw+PMBO9VxweCy1NFJntX+r0yOw055XAzZIcdBm/ejJEe38T1taF2bNtc3eTZz8+dEBk8T3Q/88Vz3Z5b28RUMTgdvA25trxMZXIe9Drivve/Z2ofBH7G6H7gdePHQut4ITLTXGXO9b9sxJmNMPT11MIP/3BPA/wZ2a/VntM8Tbf7BQ8v/cRufe+l4EmS+voDDgZvbsfElBk+97HTHBfBO4B4GfyLh0wyegNppjgvgcgb3c37B4MzgzB15HAAvbmN7P/BBpj18MdPLXyMiSeo2Xy9PSZLmIUNDktTN0JAkdTM0JEndDA1JUjdDQ5LUzdCQJHX7/xfFMzVeT+itAAAAAElFTkSuQmCC\n", 205 | "text/plain": [ 206 | "
" 207 | ] 208 | }, 209 | "metadata": { 210 | "needs_background": "light" 211 | }, 212 | "output_type": "display_data" 213 | } 214 | ], 215 | "source": [ 216 | "pd_cited_download_stats_no_zero.hist(column=[\"numberRecords\"],bins=10000)\n", 217 | "plt.xlim(0,10000)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 53, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "numberRecords 7.0\n", 229 | "totalRecords 20867.5\n", 230 | "numberDatasets 113.0\n", 231 | "dtype: float64" 232 | ] 233 | }, 234 | "execution_count": 53, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "pd_cited_download_stats_no_zero.median()" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 54, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "numberRecords 3.236751e+04\n", 252 | "totalRecords 5.677073e+06\n", 253 | "numberDatasets 2.918200e+02\n", 254 | "dtype: float64" 255 | ] 256 | }, 257 | "execution_count": 54, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "pd_cited_download_stats_no_zero.mean()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [] 272 | } 273 | ], 274 | "metadata": { 275 | "kernelspec": { 276 | "display_name": "Python 3", 277 | "language": "python", 278 | "name": "python3" 279 | }, 280 | "language_info": { 281 | "codemirror_mode": { 282 | "name": "ipython", 283 | "version": 3 284 | }, 285 | "file_extension": ".py", 286 | "mimetype": "text/x-python", 287 | "name": "python", 288 | "nbconvert_exporter": "python", 289 | "pygments_lexer": "ipython3", 290 | "version": "3.7.4" 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 2 295 | } 296 | -------------------------------------------------------------------------------- /list_datasets_with_occurrence_count.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import requests\n", 11 | "import json" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Gets a list of all the occurrence datasets published in GBIF\n", 19 | "\n", 20 | "By using the registry and the occurrence API, this scripst get a list of all the occurrence datasets published on GBIF and get the number of occurrences they contain.\n", 21 | "Running the script might take a while.\n", 22 | "\n", 23 | "Alternatively, if you would like to get all the datasets with occurrences (which also include sampling-events and some checklists), you can simply use the folowing API call:\n", 24 | "\n", 25 | "```\n", 26 | "https://api.gbif.org/v1/occurrence/search?facet=datasetKey&limit=0&facetLimit=50000\n", 27 | "```\n", 28 | "(this is much faster)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "GBIF_API = \"http://api.gbif.org/v1/\"\n", 38 | "root_URL = \"https://www.gbif.org/dataset/\"\n", 39 | "step = 500" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "datasets_with_count = pd.DataFrame()\n", 49 | "\n", 50 | "endOfRecords = False\n", 51 | "offset = 0\n", 52 | "while not endOfRecords:\n", 53 | " param = {\n", 54 | " \"offset\": offset,\n", 55 | " \"limit\": step,\n", 56 | " \"type\": \"OCCURRENCE\"\n", 57 | " }\n", 58 | " # Get dataset\n", 59 | " response = requests.get(GBIF_API + \"dataset\", param)\n", 60 | " if response.ok:\n", 61 | " dataset_list = response.json()\n", 62 | " for dataset in dataset_list[\"results\"]:\n", 63 | " datasets_with_count.at[dataset[\"key\"], \"URL\"] = root_URL + dataset[\"key\"]\n", 64 | " datasets_with_count.at[dataset[\"key\"], \"title\"] = dataset[\"title\"]\n", 65 | " if \"description\" in dataset:\n", 66 | " datasets_with_count.at[dataset[\"key\"], \"description\"] = dataset[\"description\"]\n", 67 | " \n", 68 | " # Get occurrence count\n", 69 | " response_occ_count = requests.get(GBIF_API + \"occurrence/count\", {\"datasetKey\": dataset[\"key\"]})\n", 70 | " if response_occ_count.ok:\n", 71 | " datasets_with_count.at[dataset[\"key\"], \"count_occ\"] = response_occ_count.json()\n", 72 | " else:\n", 73 | " print(response_occ_count)\n", 74 | " print(dataset[\"key\"])\n", 75 | " offset += step\n", 76 | " endOfRecords = dataset_list[\"endOfRecords\"]\n", 77 | " else:\n", 78 | " print(response)\n", 79 | " endOfRecords = True" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "datasets_with_count.to_csv('occurrence_datasets_with_count.txt', sep = '\\t', index=False)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 6, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/html": [ 99 | "
\n", 100 | "\n", 113 | "\n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | "
URLtitledescriptioncount_occ
0d7c6a1a-0aab-47dc-8256-f23fefac69cdhttps://www.gbif.org/dataset/0d7c6a1a-0aab-47d...Wild bees of BelgiumMultidiciplinary assessment of BELgian wild BE...236585.0
a0a4d131-f53f-43b2-a1ba-254473b8a006https://www.gbif.org/dataset/a0a4d131-f53f-43b...Marine Invertebrate voucher specimens at the F...This dataset is generated from the records for...99609.0
73416d1c-e675-43b3-bbe4-672d5a9f9e8bhttps://www.gbif.org/dataset/73416d1c-e675-43b...Monitoreo de Flora y Fauna de los Acuerdos de ...Estos datos dan cuenta de los inventarios de b...2413.0
36cd5465-6625-4bf5-929d-e51a57583c9chttps://www.gbif.org/dataset/36cd5465-6625-4bf...Caracterización de la avifauna asociada a la c...El Atrato, es considerado como uno de los río...313.0
4bef41c6-daf5-48b0-9868-0548976dead0https://www.gbif.org/dataset/4bef41c6-daf5-48b...Caracterización de la fauna reptiliana asociad...El Atrato, es considerado como uno de los río...60.0
...............
85727f1e-f762-11e1-a439-00145eb45e9ahttps://www.gbif.org/dataset/85727f1e-f762-11e...Herbarium Willing at Herbarium Berolinense, Be...The “Herbarium Willing” is a private Phaneroga...111899.0
85739778-f762-11e1-a439-00145eb45e9ahttps://www.gbif.org/dataset/85739778-f762-11e...Lichens at Herbarium Berolinense, Berlin (B)This database contains label information for a...129129.0
8575f23e-f762-11e1-a439-00145eb45e9ahttps://www.gbif.org/dataset/8575f23e-f762-11e...PonTaurus collectionPlant specimens gathered in the Toroslar mount...1534.0
85771146-f762-11e1-a439-00145eb45e9ahttps://www.gbif.org/dataset/85771146-f762-11e...Staatliches Museum für Naturkunde Stuttgart, H...NaN27351.0
857846ba-f762-11e1-a439-00145eb45e9ahttps://www.gbif.org/dataset/857846ba-f762-11e...Desmidiaceae RostockCulture collection of Desmidiaceae contained i...6.0
\n", 203 | "

19129 rows × 4 columns

\n", 204 | "
" 205 | ], 206 | "text/plain": [ 207 | " URL \\\n", 208 | "0d7c6a1a-0aab-47dc-8256-f23fefac69cd https://www.gbif.org/dataset/0d7c6a1a-0aab-47d... \n", 209 | "a0a4d131-f53f-43b2-a1ba-254473b8a006 https://www.gbif.org/dataset/a0a4d131-f53f-43b... \n", 210 | "73416d1c-e675-43b3-bbe4-672d5a9f9e8b https://www.gbif.org/dataset/73416d1c-e675-43b... \n", 211 | "36cd5465-6625-4bf5-929d-e51a57583c9c https://www.gbif.org/dataset/36cd5465-6625-4bf... \n", 212 | "4bef41c6-daf5-48b0-9868-0548976dead0 https://www.gbif.org/dataset/4bef41c6-daf5-48b... \n", 213 | "... ... \n", 214 | "85727f1e-f762-11e1-a439-00145eb45e9a https://www.gbif.org/dataset/85727f1e-f762-11e... \n", 215 | "85739778-f762-11e1-a439-00145eb45e9a https://www.gbif.org/dataset/85739778-f762-11e... \n", 216 | "8575f23e-f762-11e1-a439-00145eb45e9a https://www.gbif.org/dataset/8575f23e-f762-11e... \n", 217 | "85771146-f762-11e1-a439-00145eb45e9a https://www.gbif.org/dataset/85771146-f762-11e... \n", 218 | "857846ba-f762-11e1-a439-00145eb45e9a https://www.gbif.org/dataset/857846ba-f762-11e... \n", 219 | "\n", 220 | " title \\\n", 221 | "0d7c6a1a-0aab-47dc-8256-f23fefac69cd Wild bees of Belgium \n", 222 | "a0a4d131-f53f-43b2-a1ba-254473b8a006 Marine Invertebrate voucher specimens at the F... \n", 223 | "73416d1c-e675-43b3-bbe4-672d5a9f9e8b Monitoreo de Flora y Fauna de los Acuerdos de ... \n", 224 | "36cd5465-6625-4bf5-929d-e51a57583c9c Caracterización de la avifauna asociada a la c... \n", 225 | "4bef41c6-daf5-48b0-9868-0548976dead0 Caracterización de la fauna reptiliana asociad... \n", 226 | "... ... \n", 227 | "85727f1e-f762-11e1-a439-00145eb45e9a Herbarium Willing at Herbarium Berolinense, Be... \n", 228 | "85739778-f762-11e1-a439-00145eb45e9a Lichens at Herbarium Berolinense, Berlin (B) \n", 229 | "8575f23e-f762-11e1-a439-00145eb45e9a PonTaurus collection \n", 230 | "85771146-f762-11e1-a439-00145eb45e9a Staatliches Museum für Naturkunde Stuttgart, H... \n", 231 | "857846ba-f762-11e1-a439-00145eb45e9a Desmidiaceae Rostock \n", 232 | "\n", 233 | " description \\\n", 234 | "0d7c6a1a-0aab-47dc-8256-f23fefac69cd Multidiciplinary assessment of BELgian wild BE... \n", 235 | "a0a4d131-f53f-43b2-a1ba-254473b8a006 This dataset is generated from the records for... \n", 236 | "73416d1c-e675-43b3-bbe4-672d5a9f9e8b Estos datos dan cuenta de los inventarios de b... \n", 237 | "36cd5465-6625-4bf5-929d-e51a57583c9c El Atrato, es considerado como uno de los río... \n", 238 | "4bef41c6-daf5-48b0-9868-0548976dead0 El Atrato, es considerado como uno de los río... \n", 239 | "... ... \n", 240 | "85727f1e-f762-11e1-a439-00145eb45e9a The “Herbarium Willing” is a private Phaneroga... \n", 241 | "85739778-f762-11e1-a439-00145eb45e9a This database contains label information for a... \n", 242 | "8575f23e-f762-11e1-a439-00145eb45e9a Plant specimens gathered in the Toroslar mount... \n", 243 | "85771146-f762-11e1-a439-00145eb45e9a NaN \n", 244 | "857846ba-f762-11e1-a439-00145eb45e9a Culture collection of Desmidiaceae contained i... \n", 245 | "\n", 246 | " count_occ \n", 247 | "0d7c6a1a-0aab-47dc-8256-f23fefac69cd 236585.0 \n", 248 | "a0a4d131-f53f-43b2-a1ba-254473b8a006 99609.0 \n", 249 | "73416d1c-e675-43b3-bbe4-672d5a9f9e8b 2413.0 \n", 250 | "36cd5465-6625-4bf5-929d-e51a57583c9c 313.0 \n", 251 | "4bef41c6-daf5-48b0-9868-0548976dead0 60.0 \n", 252 | "... ... \n", 253 | "85727f1e-f762-11e1-a439-00145eb45e9a 111899.0 \n", 254 | "85739778-f762-11e1-a439-00145eb45e9a 129129.0 \n", 255 | "8575f23e-f762-11e1-a439-00145eb45e9a 1534.0 \n", 256 | "85771146-f762-11e1-a439-00145eb45e9a 27351.0 \n", 257 | "857846ba-f762-11e1-a439-00145eb45e9a 6.0 \n", 258 | "\n", 259 | "[19129 rows x 4 columns]" 260 | ] 261 | }, 262 | "execution_count": 6, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "datasets_with_count" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [] 277 | } 278 | ], 279 | "metadata": { 280 | "kernelspec": { 281 | "display_name": "Python 3", 282 | "language": "python", 283 | "name": "python3" 284 | }, 285 | "language_info": { 286 | "codemirror_mode": { 287 | "name": "ipython", 288 | "version": 3 289 | }, 290 | "file_extension": ".py", 291 | "mimetype": "text/x-python", 292 | "name": "python", 293 | "nbconvert_exporter": "python", 294 | "pygments_lexer": "ipython3", 295 | "version": "3.7.4" 296 | } 297 | }, 298 | "nbformat": 4, 299 | "nbformat_minor": 2 300 | } 301 | -------------------------------------------------------------------------------- /map_occ_to_grscicoll.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import requests\n", 10 | "import json" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "grscicoll_api = 'http://api.gbif.org/v1/grscicoll/'\n", 20 | "headers = {'Content-Type': 'application/json'}\n", 21 | "login = \"\"\n", 22 | "password = \"\"" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 16, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "entity_key = \"7f10ed32-b003-4297-ab21-f85257e237d9\"\n", 32 | "dataset_key = \"e08de352-9383-48b0-88ae-be15cce386c5\"\n", 33 | "entity_type = \"collection\" # The type of entity to which the mapping will be applied, here \"collection\"\n", 34 | "# entity_type = \"institution\" # The type of entity to which the mapping will be applied, here \"institution\"\n" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 17, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "mapping = {\n", 44 | " \"datasetKey\" : dataset_key,\n", 45 | " \"code\": \"ALG\" # Code of the collection as provided in the dataset\n", 46 | " # \"parentCode\": \"ALG\" # Code of the parent institution as provided in the dataset\n", 47 | "}" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 18, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "add_mapping = requests.post(grscicoll_api + entity_type +'/' + entity_key + \"/occurrenceMapping\",\n", 57 | " data=json.dumps(mapping),\n", 58 | " auth=(login, password),\n", 59 | " headers=headers)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 19, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "98" 71 | ] 72 | }, 73 | "execution_count": 19, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "add_mapping.json()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 20, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "" 91 | ] 92 | }, 93 | "execution_count": 20, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "## If you want to delete a mapping\n", 100 | "# requests.delete(\"https://api.gbif.org/v1/grscicoll/collection/7f10ed32-b003-4297-ab21-f85257e237d9/occurrenceMapping/95\",\n", 101 | "# auth=(login, password),\n", 102 | "# headers=headers)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "Python 3 (ipykernel)", 116 | "language": "python", 117 | "name": "python3" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.10.9" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 2 134 | } 135 | -------------------------------------------------------------------------------- /query_species_list/functions_query_from_species_list.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import pandas as pd 3 | import requests 4 | import json 5 | 6 | 7 | def match_species(species_list, # pandas table 8 | nameCol="scientificName", 9 | species_api="http://api.gbif.org/v1/species/match?verbose=true&name="): 10 | ''' 11 | Species matching API 12 | This function aims to emulate the GBIF species matching tool 13 | on GBIF (https://www.gbif.org/tools/species-lookup). 14 | It takes and input file containing scientific names 15 | (under the column `scientificName`) and look up a match in 16 | the GBIF taxonomy. 17 | ---- 18 | The aletrnative matches are also included in the output file (they 19 | are tagged in the `is_alternative`). 20 | --- 21 | The function uses the GBIF Species API (https://www.gbif.org/developer/species): 22 | ``` 23 | http://api.gbif.org/v1/species/match? 24 | ``` 25 | ''' 26 | # Upload species list 27 | matched_species = [] 28 | # For each name 29 | for species in species_list.index: 30 | # Replace space by %20 for API request in names 31 | name = species_list.loc[species, nameCol].replace(" ", "%20") 32 | # Find a match for the name with the API 33 | match = requests.get(species_api+name) 34 | # If the response is ok 35 | if match.ok: 36 | # Process the response 37 | match_result = match.json() 38 | match_result["inputName"] = species_list.loc[species, nameCol] 39 | # If the response contains alternative matches, make one line per match 40 | if "alternatives" in match_result: 41 | match_result["has_alternatives"] = True 42 | for alt in match_result["alternatives"]: 43 | alt["inputName"] = species_list.loc[species, nameCol] 44 | alt["is_alternative"] = True 45 | matched_species.append(alt) # add alternative 46 | match_result.pop('alternatives') 47 | # Strore the result 48 | matched_species.append(match_result) 49 | result = pd.DataFrame(matched_species) 50 | # Store taxon keys as integers 51 | taxon_keys = ['acceptedUsageKey', 'usageKey', 'kingdomKey', 'phylumKey','classKey', 'orderKey', 'familyKey', 'genusKey', 'speciesKey'] 52 | result[taxon_keys] = result[taxon_keys].fillna(0).astype(int) 53 | # Fill NAs with NULL 54 | result = result.fillna("NULL") 55 | # Return result 56 | return result 57 | 58 | 59 | def create_download_given_query(login, 60 | password, 61 | download_query, 62 | api="http://api.gbif.org/v1/"): 63 | ''' 64 | Query the download API 65 | ''' 66 | headers = {'Content-Type': 'application/json'} 67 | download_request = requests.post(api + "occurrence/download/request", 68 | data=json.dumps(download_query), 69 | auth=(login, password), 70 | headers=headers) 71 | if download_request.ok: 72 | print("ok") 73 | else: 74 | print(download_request) 75 | return download_request 76 | -------------------------------------------------------------------------------- /query_species_list/query_from_species_list.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import requests\n", 11 | "import json\n", 12 | "import io\n", 13 | "from functions_query_from_species_list import *" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "login = \"\"\n", 23 | "password = \"\"\n", 24 | "URL_species_file = \"https://data-blog.gbif.org/post/2019-07-11-downloading-long-species-lists-on-gbif_files/global_tree_search_trees_1_3.csv\"" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# Get Taxon Keys\n", 34 | "species_list = pd.read_csv(URL_species_file, encoding='latin-1')\n", 35 | "taxon_keys = match_species(species_list, \"Taxon name\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 7, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# filter keys howyou see fit\n", 45 | "key_list = taxon_keys.loc[(taxon_keys[\"matchType\"]==\"EXACT\") & (taxon_keys[\"status\"]==\"ACCEPTED\")].usageKey.tolist()" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 9, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Make download query\n", 55 | "download_query = {}\n", 56 | "download_query[\"creator\"] = \"\"\n", 57 | "download_query[\"notificationAddresses\"] = [\"\"]\n", 58 | "download_query[\"sendNotification\"] = True\n", 59 | "download_query[\"format\"] = \"SIMPLE_CSV\"\n", 60 | "download_query[\"predicate\"] = {\n", 61 | " \"type\": \"in\",\n", 62 | " \"key\": \"TAXON_KEY\",\n", 63 | " \"values\": key_list\n", 64 | "}" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "create_download_given_query(login, password, download_query)" 74 | ] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.6.4" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 2 98 | } 99 | -------------------------------------------------------------------------------- /species-lookup/species-matching-gbif-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import requests\n", 11 | "import json" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Species matching API\n", 19 | "\n", 20 | "This scripts aims to emulate the GBIF species matching tool on GBIF (https://www.gbif.org/tools/species-lookup).\n", 21 | "It takes and input file containing scientific names (under the column `scientificName`) and look up a match in the GBIF taxonomy.\n", 22 | "\n", 23 | "The aletrnative matches are also included in the output file (they are tagged in the `is_alternative`).\n", 24 | "\n", 25 | "The scripts uses the GBIF Species API (https://www.gbif.org/developer/species):\n", 26 | "```\n", 27 | "http://api.gbif.org/v1/species/match?\n", 28 | "```" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "input_file = \"../../../Desktop/FS.csv\"\n", 38 | "ouptut_file = \"../../../Desktop/res_match_FS.txt\"" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "species_list = pd.read_csv(input_file) # make sure that file is UTF-8" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "species_api = \"http://api.gbif.org/v1/species/match?verbose=true&name=\"" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "matched_species = []\n", 66 | "# For each name\n", 67 | "for species in species_list.index:\n", 68 | " \n", 69 | " # Replace space by %20 for API request in names\n", 70 | " name = species_list.loc[species, \"scientificName\"].replace(\" \", \"%20\")\n", 71 | " \n", 72 | " # Find a match for the name with the API\n", 73 | " match = requests.get(species_api+name)\n", 74 | " \n", 75 | " # If the response is ok\n", 76 | " if match.ok:\n", 77 | " \n", 78 | " # Process the response\n", 79 | " match_result = match.json()\n", 80 | " match_result[\"inputName\"] = species_list.loc[species, \"scientificName\"]\n", 81 | " \n", 82 | " # If the response contains alternative matches, make one line per match\n", 83 | " if \"alternatives\" in match_result:\n", 84 | " match_result[\"has_alternatives\"] = True\n", 85 | " for alt in match_result[\"alternatives\"]:\n", 86 | " alt[\"inputName\"] = species_list.loc[species, \"scientificName\"]\n", 87 | " alt[\"is_alternative\"] = True\n", 88 | " matched_species.append(alt) # add alternative\n", 89 | " match_result.pop('alternatives')\n", 90 | " \n", 91 | " # Strore the result\n", 92 | " matched_species.append(match_result)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 6, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "result = pd.DataFrame(matched_species)\n", 102 | "\n", 103 | "# Store taxon keys as integers\n", 104 | "taxon_keys = ['acceptedUsageKey', 'usageKey', 'kingdomKey', 'phylumKey', 'classKey', 'orderKey', 'familyKey', 'genusKey', 'speciesKey']\n", 105 | "result[taxon_keys] = result[taxon_keys].fillna(0).astype(int)\n", 106 | "\n", 107 | "# Fill NAs with NULL\n", 108 | "result = result.fillna(\"NULL\")" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 7, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "result.to_csv(ouptut_file, index = False, sep = \"\\t\")" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [] 126 | } 127 | ], 128 | "metadata": { 129 | "kernelspec": { 130 | "display_name": "Python 3", 131 | "language": "python", 132 | "name": "python3" 133 | }, 134 | "language_info": { 135 | "codemirror_mode": { 136 | "name": "ipython", 137 | "version": 3 138 | }, 139 | "file_extension": ".py", 140 | "mimetype": "text/x-python", 141 | "name": "python", 142 | "nbconvert_exporter": "python", 143 | "pygments_lexer": "ipython3", 144 | "version": "3.6.4" 145 | } 146 | }, 147 | "nbformat": 4, 148 | "nbformat_minor": 2 149 | } 150 | -------------------------------------------------------------------------------- /species_per_continent/ipbes-regions-countries.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ManonGros/Small-scripts-using-GBIF-API/d07b4c54f551812e9a6e1434bce87fdf4766b247/species_per_continent/ipbes-regions-countries.xlsx -------------------------------------------------------------------------------- /species_per_continent/species_in_country_list.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | 5 | def number_of_species_in_country_list(scientificName, countryList, step=1000): 6 | """ 7 | Count the number of species for which GBIF has occurrences in a given list of countries 8 | You must use the ISO country code 9 | Example: butterflies in Latvia and Canada 10 | number_of_species_in_country_list("Lepidoptera", ["LV", "CA"]) 11 | # In 2018/10/01 returns 12 | 5278 13 | """ 14 | offset = 0 15 | end_of_records = False 16 | nb_species = 0 17 | base_request = "http://api.gbif.org/v1/occurrence/search?" 18 | base_request += "limit=0&facet=speciesKey" 19 | base_request += "&scientificName=" + scientificName 20 | base_request += "&facetLimit=" + str(step) 21 | for country_code in countryList: 22 | base_request += "&country=" + country_code 23 | while not end_of_records: 24 | response = requests.get(base_request + "&facetOffset=" + str(offset)) 25 | if response.ok: 26 | response = response.json() 27 | nb_species_in_page = len(response["facets"][0]["counts"]) 28 | nb_species += nb_species_in_page 29 | # Increment page 30 | offset += step 31 | end_of_records = (nb_species_in_page < step) 32 | else: 33 | print("ERROR", base_request) 34 | end_of_records = True 35 | return nb_species 36 | -------------------------------------------------------------------------------- /species_per_continent/species_per_continent.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import requests\n", 11 | "import json\n", 12 | "from species_in_country_list import *" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "# How many butterfly species do we have per continent?\n", 20 | "\n", 21 | "This script counts the number of species with at least one occurrence in GBIF per continent.\n", 22 | "\n", 23 | "**DISCLAMER**: there is no quality control here. You are welcome to re-use this script and add the quality control filters you need" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "countries = pd.read_excel(\"ipbes-regions-countries.xlsx\", \"IPBES-country-classification\").fillna(\"NA\")" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "butterfly = \"Lepidoptera\"" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "Europe: 10263\n", 54 | "Asia: 15170\n", 55 | "Latin America: 23732\n", 56 | "Africa: 10257\n", 57 | "Antarctica: 18\n", 58 | "Oceania: 7364\n", 59 | "North America: 13204\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "for region in countries[\"GBIF.region\"].unique():\n", 65 | " country_list = countries.loc[countries[\"GBIF.region\"] == region].ISO2.tolist()\n", 66 | " print(region+\":\", number_of_species_in_country_list(butterfly, country_list))" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.6.4" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 2 98 | } 99 | -------------------------------------------------------------------------------- /update_registry/change_registry_using_API.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | 5 | def change_publisher(uuid_dataset, uuid_new_publisher, header_comment, login, password, api="http://api.gbif-uat.org/v1/dataset/"): 6 | ''' 7 | Change publisher for a given dataset and add comments about it 8 | ''' 9 | response = requests.get(api+uuid_dataset) 10 | headers = {'Content-Type': 'application/json'} 11 | if response.ok: 12 | response = response.json() 13 | my_comment = {'content': (header_comment + 'Changed publisher from ' + 14 | response["publishingOrganizationKey"] + ' to ' + 15 | uuid_new_publisher + ' after request from publishers')} 16 | # Add comment 17 | add_comments = requests.post(api + uuid_dataset + "/comment", 18 | data=json.dumps(my_comment), 19 | auth=(login, password), 20 | headers=headers) 21 | if add_comments.ok: 22 | print("comment added") 23 | response["publishingOrganizationKey"] = uuid_new_publisher 24 | update_dataset = requests.put(api + uuid_dataset, 25 | data=json.dumps(response), 26 | auth=(login, password), 27 | headers=headers) 28 | if update_dataset.ok: 29 | print("dataset updated") 30 | else: 31 | print("dataset NOT updated") 32 | else: 33 | print("Comment could NOT be added") 34 | 35 | 36 | def add_endpoint(uuid_dataset, new_URL, endpoint_type, header_comment, login, password, api="http://api.gbif-uat.org/v1/dataset/"): 37 | ''' 38 | Add new endpoint for a given dataset and add comments about it 39 | ''' 40 | response = requests.get(api+uuid_dataset+"/endpoint") 41 | headers = {'Content-Type': 'application/json'} 42 | if response.ok: 43 | my_comment = { 44 | 'content': (header_comment + 'Replaced endpoint after request from hosting installation') 45 | } 46 | # Add comment 47 | add_comments = requests.post(api + uuid_dataset + "/comment", 48 | data=json.dumps(my_comment), 49 | auth=(login, password), 50 | headers=headers) 51 | if add_comments.ok: 52 | print("comment added") 53 | my_endpoint = { 54 | "url": new_URL, 55 | "type": endpoint_type 56 | } 57 | update_dataset = requests.post(api + uuid_dataset + "/endpoint", 58 | data=json.dumps(my_endpoint), 59 | auth=(login, password), 60 | headers=headers) 61 | for endpoints in response.json(): 62 | delete_endpoint = requests.delete(api + uuid_dataset + "/endpoint/"+str(endpoints["key"]), 63 | auth=(login, password), 64 | headers=headers) 65 | if update_dataset.ok and delete_endpoint.ok: 66 | print("dataset updated") 67 | else: 68 | print("dataset NOT updated") 69 | else: 70 | print("Comment could NOT be added") 71 | 72 | 73 | def delete_dataset(uuid_dataset, header_comment, requesting_organization, login, password, api="http://api.gbif-uat.org/v1/dataset/"): 74 | ''' 75 | Delete a dataset and add a comment 76 | ''' 77 | my_comment = { 78 | 'content': (header_comment + 'Deleted after request from ' + requesting_organization) 79 | } 80 | headers = {'Content-Type': 'application/json'} 81 | # Add comment 82 | add_comments = requests.post(api + uuid_dataset + "/comment", 83 | data=json.dumps(my_comment), 84 | auth=(login, password), 85 | headers=headers) 86 | if add_comments.ok: 87 | print("comment added") 88 | deletedDS = requests.delete(api + uuid_dataset, 89 | auth=(login, password), 90 | headers=headers) 91 | if deletedDS.ok: 92 | print("dataset deleted") 93 | else: 94 | print("dataset NOT deleted") 95 | else: 96 | print("Comment could NOT be added") 97 | 98 | 99 | def add_machineTag(uuid_dataset, namespace, name, value, login, password, api="http://api.gbif-uat.org/v1/dataset/"): 100 | ''' 101 | Add new machineTag for a given dataset 102 | ''' 103 | headers = {'Content-Type': 'application/json'} 104 | machineTag = { 105 | "namespace": namespace, 106 | "name": name, 107 | "value": value 108 | } 109 | update_dataset = requests.post(api + uuid_dataset + "/machineTag", 110 | data=json.dumps(machineTag), 111 | auth=(login, password), 112 | headers=headers) 113 | 114 | if update_dataset.ok: 115 | print("machineTag added") 116 | else: 117 | print("machineTag NOT added") 118 | 119 | 120 | 121 | def delete_machineTag_from_namespace(uuid_dataset, namespace, login, password, api="http://api.gbif-uat.org/v1/dataset/"): 122 | ''' 123 | Delete machineTag for a given dataset from a given namespace 124 | ''' 125 | response = requests.get(api+uuid_dataset+"/machineTag") 126 | headers = {'Content-Type': 'application/json'} 127 | if response.ok: 128 | response = response.json() 129 | for tag in response: 130 | if tag["namespace"] == namespace: 131 | update_dataset = requests.delete(api + uuid_dataset + "/machineTag/"+str(tag["key"]), 132 | auth=(login, password), 133 | headers=headers) 134 | 135 | if update_dataset.ok: 136 | print("machineTag deleted") 137 | else: 138 | print("machineTag NOT deleted") 139 | else: 140 | print("Dataset NOT found") 141 | 142 | 143 | def delete_comment(uuid_dataset, comment_content, login, password, api="http://api.gbif-uat.org/v1/dataset/"): 144 | ''' 145 | Delete a specific comment for a given dataset 146 | ''' 147 | response = requests.get(api+uuid_dataset+"/comment") 148 | headers = {'Content-Type': 'application/json'} 149 | if response.ok: 150 | response = response.json() 151 | for tag in response: 152 | if tag["content"] == comment_content: 153 | update_dataset = requests.delete(api + uuid_dataset + "/comment/"+str(tag["key"]), 154 | auth=(login, password), 155 | headers=headers) 156 | 157 | if update_dataset.ok: 158 | print("comment deleted") 159 | else: 160 | print("comment NOT deleted") 161 | else: 162 | print("Dataset NOT found") 163 | -------------------------------------------------------------------------------- /update_registry/create_dataset_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 8, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import requests" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 9, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# API settings\n", 20 | "api = \"http://api.gbif-uat.org/v1/dataset\"\n", 21 | "headers = {'Content-Type': 'application/json'}\n", 22 | "\n", 23 | "username = \"\"\n", 24 | "password = \"\"\n", 25 | "\n", 26 | "# Dataset Example\n", 27 | "title = \"Specify - TEST - MARIE 1\"\n", 28 | "organizaion = \"0a16da09-7719-40de-8d4f-56a15ed52fb6\" # Test organization\n", 29 | "installation = \"92d76df5-3de1-4c89-be03-7a17abad962a\" # Test HTTP installation\n", 30 | "endpoint_example = \"https://sandbox.zenodo.org/record/373582/files/exporterDwc.zip\"\n", 31 | "data_type = \"OCCURRENCE\"" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 10, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "dataset_example = {\n", 41 | " \"installationKey\": installation,\n", 42 | " \"publishingOrganizationKey\": organizaion,\n", 43 | " \"type\": data_type,\n", 44 | " \"title\": title\n", 45 | "}" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 11, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "create_dataset = requests.post(api,\n", 55 | " data=json.dumps(dataset_example),\n", 56 | " auth=(username, password),\n", 57 | " headers=headers)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 12, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "88cb6239-85aa-47c4-99e5-fab8cb2f6fa2\n", 70 | "Endpoint added\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "if create_dataset.ok:\n", 76 | " # Print UUID\n", 77 | " dataset_response = create_dataset.json()\n", 78 | " print(dataset_response)\n", 79 | " \n", 80 | " # Add endpoint\n", 81 | " my_endpoint = {\n", 82 | " \"url\": endpoint_example,\n", 83 | " \"type\": \"DWC_ARCHIVE\"\n", 84 | " }\n", 85 | " update_dataset = requests.post(api + \"/\" + dataset_response + \"/endpoint\",\n", 86 | " data=json.dumps(my_endpoint),\n", 87 | " auth=(username, password),\n", 88 | " headers=headers)\n", 89 | " if update_dataset.ok:\n", 90 | " print(\"Endpoint added\")" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 13, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "'88cb6239-85aa-47c4-99e5-fab8cb2f6fa2'" 102 | ] 103 | }, 104 | "execution_count": 13, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "create_dataset.json()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.7.4" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | --------------------------------------------------------------------------------