├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── LICENSE
├── LICENSE_selected_R_tutorials
├── README.md
├── _config.yml
├── _toc.yml
├── images
    ├── UALIB_favicon.png
    └── UALIB_logo.png
├── requirements.txt
└── src
    ├── about
        ├── contributing.rst
        ├── introduction.rst
        ├── license-reuse.rst
        └── tech-details.rst
    ├── overview
        ├── arxiv.rst
        ├── bea.rst
        ├── bls.rst
        ├── casc.rst
        ├── chronam.rst
        ├── college-scorecard.rst
        ├── congress.rst
        ├── crossref.rst
        ├── fdc.rst
        ├── geonames.rst
        ├── nasa-images.rst
        ├── nps.rst
        ├── nws.rst
        ├── openalex.rst
        ├── osf.rst
        ├── osm.rst
        ├── pubchem-periodic-table.rst
        ├── pubchem.rst
        ├── pubmed.rst
        ├── ror.rst
        ├── scopus.rst
        ├── sdirect.rst
        ├── sec-edgar.rst
        ├── speedrun.rst
        ├── springer.rst
        ├── stack-exchange.rst
        ├── us-census-geocoding.rst
        ├── us-census.rst
        ├── us-treasury.rst
        ├── usa-spending.rst
        ├── usgs-national-map.rst
        ├── wiley-tdm.rst
        ├── world-bank.rst
        ├── wos.rst
        └── z3950.rst
    ├── python
        ├── bea.ipynb
        ├── bls.ipynb
        ├── casc.ipynb
        ├── chronam.ipynb
        ├── college-scorecard.ipynb
        ├── congress.ipynb
        ├── crossref.ipynb
        ├── fdc.ipynb
        ├── geonames.ipynb
        ├── imgs
        │   ├── APOD_Image.png
        │   ├── Earth_Image1_recent.png
        │   └── Earth_Image_Stitched.png
        ├── nasa-images.ipynb
        ├── nps.ipynb
        ├── nws.ipynb
        ├── openalex.ipynb
        ├── osf.ipynb
        ├── osm.ipynb
        ├── pubchem-periodic-table.ipynb
        ├── pubchem.ipynb
        ├── pubmed.ipynb
        ├── ror.ipynb
        ├── scopus.ipynb
        ├── sdirect.ipynb
        ├── sec-edgar.ipynb
        ├── speedrun.ipynb
        ├── springer.ipynb
        ├── stack-exchange.ipynb
        ├── us-census-geocoding.ipynb
        ├── us-census.ipynb
        ├── us-treasury.ipynb
        ├── usa-spending.ipynb
        ├── usgs-national-map.ipynb
        ├── wiley-tdm.ipynb
        ├── world-bank.ipynb
        └── wos.ipynb
    ├── r-gpl3
        ├── PubMedAPItut_files
        │   └── figure-html
        │   │   └── visual-1.png
        ├── R_Pubchem_Markdown_Adam_Edit_files
        │   └── figure-html
        │   │   ├── unnamed-chunk-10-1.png
        │   │   ├── unnamed-chunk-10-2.png
        │   │   ├── unnamed-chunk-10-3.png
        │   │   ├── unnamed-chunk-10-4.png
        │   │   ├── unnamed-chunk-10-5.png
        │   │   ├── unnamed-chunk-18-1.png
        │   │   ├── unnamed-chunk-18-2.png
        │   │   ├── unnamed-chunk-18-3.png
        │   │   ├── unnamed-chunk-18-4.png
        │   │   └── unnamed-chunk-18-5.png
        ├── US_Census_Data_in_R_files
        │   └── figure-html
        │   │   └── plot-popchg-1.png
        ├── pubchem.md
        ├── pubmed.md
        └── us-census.md
    ├── r
        ├── CASCommonChemR_files
        │   └── figure-html
        │   │   ├── Display-1.png
        │   │   ├── disp-list-1.png
        │   │   ├── disp-list-2.png
        │   │   ├── disp-list-3.png
        │   │   ├── disp-list-4.png
        │   │   ├── disp-list-5.png
        │   │   ├── hist-1.png
        │   │   └── kernel-1.png
        ├── Chronam_in_R_Adam_Vers_files
        │   └── figure-html
        │   │   ├── unnamed-chunk-12-1.png
        │   │   └── unnamed-chunk-21-1.png
        ├── College_Scorecard_R_files
        │   └── figure-html
        │   │   └── program-percentage-distribution.png
        ├── R_WorldBank_Markdown_files
        │   └── figure-html
        │   │   ├── unnamed-chunk-16-1.png
        │   │   ├── unnamed-chunk-17-1.png
        │   │   └── unnamed-chunk-18-1.png
        ├── SEC_EDGAR_API_R_files
        │   └── figure-html
        │   │   ├── unnamed-chunk-10-1.png
        │   │   └── unnamed-chunk-5-1.png
        ├── USA_Spending_R_files
        │   └── figure-html
        │   │   ├── unnamed-chunk-4-1.png
        │   │   ├── unnamed-chunk-6-1.png
        │   │   └── unnamed-chunk-9-1.png
        ├── arXiv_API_in_R_files
        │   └── figure-html
        │   │   ├── unnamed-chunk-3-1.png
        │   │   └── unnamed-chunk-6-1.png
        ├── arxiv.md
        ├── casc.md
        ├── chronam.md
        ├── college-scorecard.md
        ├── crossref.md
        ├── figure
        │   ├── Display-1.png
        │   ├── disp list-1.png
        │   ├── disp list-2.png
        │   ├── disp list-3.png
        │   ├── disp list-4.png
        │   ├── disp list-5.png
        │   ├── hist-1.png
        │   ├── kernel-1.png
        │   ├── plot popchg-1.png
        │   ├── unnamed-chunk-12-1.png
        │   ├── unnamed-chunk-16-1.png
        │   ├── unnamed-chunk-17-1.png
        │   ├── unnamed-chunk-18-1.png
        │   └── visual-1.png
        ├── output.json
        ├── sdirect.md
        ├── sec-edgar.md
        ├── usa-spending.md
        ├── wiley-tdm.md
        └── world-bank.md
    └── shell
        └── z3950.rst


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Deploy Jupyter Book
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 |   workflow_dispatch:
 9 | 
10 | permissions:
11 |   contents: read
12 |   pages: write
13 |   id-token: write
14 | 
15 | concurrency:
16 |   group: "pages"
17 |   cancel-in-progress: false
18 | 
19 | jobs:
20 |   deploy:
21 |     environment:
22 |       name: github-pages
23 |       url: ${{ steps.deployment.outputs.page_url }}
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |     - name: Checkout Repository
27 |       uses: actions/checkout@v2
28 | 
29 |     - name: Setup Python
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: '3.x'
33 | 
34 |     - name: Install Dependencies
35 |       run: |
36 |         python -m pip install --upgrade pip
37 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
38 |         pip install jupyter-book
39 | 
40 |     - name: Build Jupyter Book HTML
41 |       run: |
42 |         jupyter-book build .
43 | 
44 |     - name: Setup Pages
45 |       uses: actions/configure-pages@v3
46 | 
47 |     - name: Upload artifact
48 |       uses: actions/upload-pages-artifact@v3
49 |       with:
50 |         path: './_build/html'
51 | 
52 |     - name: Deploy to GitHub Pages
53 |       id: deployment
54 |       uses: actions/deploy-pages@v4
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | .ipynb_checkpoints/
3 | .env
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 The University of Alabama Libraries
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # University of Alabama Libraries Scholarly API Cookbook
 2 | 
 3 | > [!IMPORTANT]
 4 | > Please check the individual scholarly API documentation for current information on API usage and policies.
 5 | >
 6 | > March 2025 - We have decided to no longer maintain the Matlab, Mathematica, Bash (except the Z39.50 Bash recipe) and C recipes and have removed them from the Scholarly API Cookbook. These archived recipes are in the [UA Libraries Scholarly API Cookbook Archive](https://github.com/UA-Libraries-Research-Data-Services/Scholarly_API_Cookbook_Archive).
 7 | >
 8 | > November 2024 - Some R code tutorials that were originally MIT Licensed, are now licensed under the GPL-3 License to comply with the licensing terms of dependent R libraries.
 9 | 
10 | The University of Alabama Libraries Scholarly API Cookbook is an open online book containing short scholarly API code examples (i.e., “recipes”) that demonstrate how to work with various scholarly web service APIs. It is part of the University of Alabama Libraries efforts to support Research Data Services. Read the book [here](https://ua-libraries-research-data-services.github.io/UALIB_ScholarlyAPI_Cookbook).
11 | 
12 | ## License and Reuse
13 | 
14 | Most of the code in this repository is licensed under the [MIT License](https://github.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/blob/main/LICENSE).
15 | 
16 | The Python scripts in this repository are licensed under the MIT License. However, these scripts may rely on external libraries such as matplotlib, pandas, and others. These libraries are licensed under their own respective terms, and will need to be installed separately. Refer to the documentation of each library for installation instructions and licensing details.
17 | 
18 | The Bash tutorials are licensed under the MIT License (Bash itself is licensed under the GNU General Public License). Some of the included scripts may rely on external tools such as curl, jq, YAZ, and gnuplot, each of which is licensed under its own terms. Users must obtain and install these tools separately. Refer to the documentation of each tool for installation instructions and licensing details.
19 | 
20 | Lastly, most of the R tutorial scripts are MIT licensed, but some are licensed under the [GPL-3 License](https://github.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/blob/main/LICENSE_selected_R_tutorials) because they depend on GPL-licensed R libraries (refer to the documentation of each R library for installation instructions and licensing details). The R tutorials with GPL-3 licenses are indicated at the top of the respective files and organized separately in the folder `src/r-gpl3/`.
21 | 
22 | We have endeavored to follow the appropriate terms and usage policies of each scholarly API, web service, and Z39.50 server. We have linked to the terms and policies where possible. Some database APIs may require a valid library subscription, institutional access, or individual account to use their services. Please be responsible when reusing these scripts and respect the API terms and usage policies (e.g., query limits, record downloads, data sharing restrictions). Data output snippets shown in this book are for demonstration purposes and are credited to the individual API or database service. The output generated from APIs or services remains subject to the terms and conditions of the respective provider. Some outputs (e.g., U.S. Government works) may be in the public domain, while others may require attribution or adherence to other conditions.
23 | 
24 | If you reuse the code, attribution would be appreciated. Please link to the Cookbook and cite our manuscript:
25 | 
26 | Link to Cookbook: https://ua-libraries-research-data-services.github.io/UALIB_ScholarlyAPI_Cookbook
27 | 
28 | Citation: Scalfani, V. F.; Walker, K. W.; Simpson, L.; Fernandez, A. M.; Patel, V. D.; Ramig, A.; Gomes, C.; Moen, M. T.; Nguyen, A. M. Creating a Scholarly API Cookbook: Supporting Library Users with Programmatic Access to Information. Issues in Science and Technology Librarianship, 2023, No. 104. https://doi.org/10.29173/istl2766.
29 | 
30 | ```bibtex
31 | @article{scalfani_creating_2023,
32 |         title = {Creating a {Scholarly} {API} {Cookbook}: {Supporting} {Library} {Users} with {Programmatic} {Access} to {Information}},
33 |         issn = {1092-1206},
34 |         shorttitle = {Creating a {Scholarly} {API} {Cookbook}},
35 |         url = {https://journals.library.ualberta.ca/istl/index.php/istl/article/view/2766},
36 |         doi = {10.29173/istl2766},
37 |         abstract = {Scholarly web-based application programming interfaces (APIs) allow users to interact with information and data programmatically. Interacting with information programmatically allows users to create advanced information query workflows and quickly access machine-readable data for downstream computations. With the growing availability of scholarly APIs from open and commercial library databases, supporting access to information via an API has become a key support area for research data services in libraries. This article describes our efforts with supporting API access through the development of an online Scholarly API Cookbook. The Cookbook contains code recipes (i.e., tutorials) for getting started with 10 different scholarly APIs, including for example, Scopus, World Bank, and PubMed. API tutorials are available in Python, Bash, Matlab, and Mathematica. A tutorial for interacting with library catalog data programmatically via Z39.50 is also included, as traditional library catalog metadata is rarely available via an API. In addition to describing the Scholarly API Cookbook content, we discuss our experiences building a student research data services programming team, challenges we encountered, and ideas to improve the Cookbook. The University of Alabama Libraries Scholarly API Cookbook is freely available and hosted on GitHub. All code within the API Cookbook is licensed with the permissive MIT license, and as a result, users are free to reuse and adapt the code in their teaching and research.},
38 |         number = {104},
39 |         urldate = {2023-10-13},
40 |         journal = {Issues in Science and Technology Librarianship},
41 |         author = {Scalfani, Vincent F. and Walker, Kevin W. and Simpson, Lance and Fernandez, Avery M. and Patel, Vishank D. and Ramig, Anastasia and Gomes, Cyrus and Moen, Michael T. and Nguyen, Adam M.},
42 |         month = oct,
43 |         year = {2023},
44 | }
45 | ```
46 | 
47 | ## Archived Recipes
48 | 
49 | We have decided to no longer maintain the Matlab, Mathematica, Bash (except the Z39.50 Bash recipe), and C recipes and have removed them from the Scholarly API Cookbook. These archived recipes are still in the [UA Libraries Scholarly API Cookbook Archive](https://github.com/UA-Libraries-Research-Data-Services/Scholarly_API_Cookbook_Archive).
50 | 
51 | ### Archive License Information
52 | 
53 | The code in the UA Libraries Scholarly API Cookbook Archive is licensed under the [MIT License](https://github.com/UA-Libraries-Research-Data-Services/Scholarly_API_Cookbook_Archive/blob/main/LICENSE). This includes code written to be used with Wolfram Mathematica and MathWorks MATLAB. However, these proprietary software packages themselves are not covered under the MIT License, and users must have valid licenses for Mathematica and MATLAB to run the associated code.
54 | 
55 | The Python scripts in this repository are licensed under the MIT License. However, these scripts may rely on external libraries such as matplotlib, pandas, and others. These libraries are licensed under their own respective terms, and will need to be installed separately. Refer to the documentation of each library for installation instructions and licensing details.
56 | 
57 | The Bash tutorials are licensed under the MIT License (Bash itself is licensed under the GNU General Public License). Some of the included scripts may rely on external tools such as curl, jq, and gnuplot, each of which is licensed under its own terms. Users must obtain and install these tools separately. Refer to the documentation of each tool for installation instructions and licensing details.
58 | 
59 | The C code in the archive is licensed under the MIT License. This repository provides only the source code, and users will need to compile the C programs to run them. Some of the C code depends on external libraries such as curl, jq, and YAZ, which are licensed under their own respective terms. These libraries will need to be obtained and installed separately by the user.
60 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
 1 | # Book settings
 2 | 
 3 | title: Scholarly API Cookbook
 4 | author: The University of Alabama Libraries
 5 | copyright: "2025"
 6 | logo: images/UALIB_logo.png
 7 | exclude_patterns: [readme.md, old, conversion_scripts]
 8 | # Force re-execution of notebooks on each build.
 9 | # See https://jupyterbook.org/content/execute.html
10 | execute:
11 |   execute_notebooks: off
12 | 
13 | # Define the name of the latex output file for PDF builds
14 | latex:
15 |   latex_documents:
16 |     targetname: book.tex
17 | 
18 | # Add a bibtex file so that we can create citations
19 | #bibtex_bibfiles:
20 | #  - references.bib
21 | 
22 | # Information about where the book exists on the web
23 | repository:
24 |   url: https://github.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook  # Online location of your book
25 | #  path_to_book: # Optional path to your book, relative to the repository root
26 |   branch: main  # Which branch of the repository should be used when creating links (optional)
27 | 
28 | # Add GitHub buttons to your book
29 | # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository
30 | html:
31 |   favicon: images/UALIB_favicon.png
32 |   use_issues_button: true
33 |   use_repository_button: true
34 | 
35 | launch_buttons:
36 |   colab_url: ""
37 |   binderhub_url: ""
38 |   jupyterhub_url: ""
39 | 


--------------------------------------------------------------------------------
/_toc.yml:
--------------------------------------------------------------------------------
  1 | # Table of contents
  2 | format: jb-book
  3 | root: src/about/introduction
  4 | parts:
  5 | 
  6 |   - caption: ARTICLE FULL-TEXT
  7 |     chapters:
  8 | 
  9 |     # Open Science Framework
 10 |     - file: src/overview/osf
 11 |       sections:
 12 |       - file: src/python/osf
 13 |         title: "...in Python"
 14 | 
 15 |     # ScienceDirect
 16 |     - file: src/overview/sdirect
 17 |       sections:
 18 |       - file: src/python/sdirect
 19 |         title: "...in Python"
 20 |       - file: src/r/sdirect
 21 |         title: "...in R"
 22 | 
 23 |     # Springer
 24 |     - file: src/overview/springer
 25 |       sections:
 26 |       - file: src/python/springer
 27 |         title: "...in Python"
 28 | 
 29 |     # Wiley TDM
 30 |     - file: src/overview/wiley-tdm
 31 |       sections:
 32 |       - file: src/python/wiley-tdm
 33 |         title: "...in Python"
 34 |       - file: src/r/wiley-tdm
 35 |         title: "...in R"
 36 | 
 37 | 
 38 |   - caption: BIBLIOGRAPHIC
 39 |     chapters:
 40 | 
 41 |     # arXiv
 42 |     - file: src/overview/arxiv
 43 |       sections:
 44 |       - file: src/r/arxiv
 45 |         title: "...in R"
 46 |     
 47 |     # Crossref 
 48 |     - file: src/overview/crossref
 49 |       sections:
 50 |       - file: src/python/crossref
 51 |         title: "...in Python"
 52 |       - file: src/r/crossref
 53 |         title: "...in R"
 54 | 
 55 |     # OpenAlex
 56 |     - file: src/overview/openalex
 57 |       sections:
 58 |       - file: src/python/openalex
 59 |         title: "...in Python"
 60 | 
 61 |     # Research Organization Registry
 62 |     - file: src/overview/ror
 63 |       sections:
 64 |       - file: src/python/ror
 65 |         title: "...in Python"
 66 | 
 67 |     # Scopus
 68 |     - file: src/overview/scopus
 69 |       sections:
 70 |       - file: src/python/scopus
 71 |         title: "...in Python"
 72 | 
 73 |     # Web of Science
 74 |     - file: src/overview/wos
 75 |       sections:
 76 |       - file: src/python/wos
 77 |         title: "...in Python"
 78 | 
 79 | 
 80 |   - caption: BUSINESS
 81 |     chapters:
 82 | 
 83 |     # U.S. Bureau of Economic Analysis
 84 |     - file: src/overview/bea
 85 |       sections:
 86 |       - file: src/python/bea
 87 |         title: "...in Python"
 88 | 
 89 |     # U.S. Bureau of Labor Statistics
 90 |     - file: src/overview/bls
 91 |       sections:
 92 |       - file: src/python/bls
 93 |         title: "...in Python"
 94 | 
 95 |     # U.S. Securities and Exchange
 96 |     - file: src/overview/sec-edgar
 97 |       sections:
 98 |       - file: src/python/sec-edgar
 99 |         title: "...in Python"
100 |       - file: src/r/sec-edgar
101 |         title: "...in R"
102 | 
103 |     # U.S. Treasury
104 |     - file: src/overview/us-treasury
105 |       sections:
106 |       - file: src/python/us-treasury
107 |         title: "...in Python"
108 | 
109 |     # World Bank
110 |     - file: src/overview/world-bank
111 |       sections:
112 |       - file: src/python/world-bank
113 |         title: "...in Python"
114 |       - file: src/r/world-bank
115 |         title: "...in R"
116 | 
117 | 
118 |   - caption: GENERAL
119 |     chapters:
120 | 
121 |     # National Park Service
122 |     - file: src/overview/nps
123 |       sections:
124 |       - file: src/python/nps
125 |         title: "...in Python"
126 | 
127 |     # Speedrun.com
128 |     - file: src/overview/speedrun
129 |       sections:
130 |       - file: src/python/speedrun
131 |         title: "...in Python"
132 | 
133 |     # Stack Exchange
134 |     - file: src/overview/stack-exchange
135 |       sections:
136 |       - file: src/python/stack-exchange
137 |         title: "...in Python"
138 | 
139 |     # Z39.50
140 |     - file: src/overview/z3950
141 |       sections:
142 |       - file: src/shell/z3950
143 |         title: "...in Bash"
144 | 
145 | 
146 |   - caption: GIS
147 |     chapters:
148 | 
149 |     # GeoNames
150 |     - file: src/overview/geonames
151 |       sections:
152 |       - file: src/python/geonames
153 |         title: "...in Python"
154 | 
155 |     # OpenStreetMap
156 |     - file: src/overview/osm
157 |       sections:
158 |       - file: src/python/osm
159 |         title: "...in Python"
160 | 
161 |     # U.S. Census Geocoding
162 |     - file: src/overview/us-census-geocoding
163 |       sections:
164 |       - file: src/python/us-census-geocoding
165 |         title: "...in Python"
166 |     
167 |     # USGS National Map
168 |     - file: src/overview/usgs-national-map
169 |       sections:
170 |       - file: src/python/usgs-national-map
171 |         title: "...in Python"
172 |         
173 | 
174 |   - caption: HUMANITIES
175 |     chapters:
176 | 
177 |     # Chronicling America
178 |     - file: src/overview/chronam
179 |       sections:
180 |       - file: src/python/chronam
181 |         title: "...in Python"
182 |       - file: src/r/chronam
183 |         title: "...in R"
184 | 
185 | 
186 |   - caption: SCIENTIFIC
187 |     chapters:
188 | 
189 |     # CAS Common Chemistry
190 |     - file: src/overview/casc
191 |       sections:
192 |       - file: src/python/casc
193 |         title: "...in Python"
194 |       - file: src/r/casc
195 |         title: "...in R"
196 | 
197 |     # FoodData Central
198 |     - file: src/overview/fdc
199 |       sections:
200 |       - file: src/python/fdc
201 |         title: "...in Python"
202 | 
203 |     # NASA Images
204 |     - file: src/overview/nasa-images
205 |       sections:
206 |       - file: src/python/nasa-images
207 |         title: "...in Python"
208 | 
209 |     # National Weather Service
210 |     - file: src/overview/nws
211 |       sections:
212 |       - file: src/python/nws
213 |         title: "...in Python"
214 | 
215 |     # PubChem
216 |     - file: src/overview/pubchem
217 |       sections:
218 |       - file: src/python/pubchem
219 |         title: "...in Python"
220 |       - file: src/r-gpl3/pubchem
221 |         title: "...in R"
222 | 
223 |     # PubChem Periodic Table
224 |     - file: src/overview/pubchem-periodic-table
225 |       sections:
226 |       - file: src/python/pubchem-periodic-table
227 |         title: "...in Python"
228 | 
229 |     # PubMed
230 |     - file: src/overview/pubmed
231 |       sections:
232 |       - file: src/python/pubmed
233 |         title: "...in Python"
234 |       - file: src/r-gpl3/pubmed
235 |         title: "...in R"
236 | 
237 | 
238 |   - caption: SOCIAL SCIENCES
239 |     chapters:
240 | 
241 |     # College Scorecard
242 |     - file: src/overview/college-scorecard
243 |       sections:
244 |       - file: src/python/college-scorecard
245 |         title: "...in Python"
246 |       - file: src/r/college-scorecard
247 |         title: "...in R"
248 | 
249 |     # Congress.gov
250 |     - file: src/overview/congress
251 |       sections:
252 |       - file: src/python/congress
253 |         title: "...in Python"
254 | 
255 |     # U.S. Census Data
256 |     - file: src/overview/us-census
257 |       sections:
258 |       - file: src/python/us-census
259 |         title: "...in Python"
260 |       - file: src/r-gpl3/us-census
261 |         title: "...in R"
262 | 
263 |     # USA Spending
264 |     - file: src/overview/usa-spending
265 |       sections:
266 |       - file: src/python/usa-spending
267 |         title: "...in Python"
268 |       - file: src/r/usa-spending
269 |         title: "...in R"
270 | 
271 |   - caption: ABOUT
272 |     chapters:
273 |     - file: src/about/contributing
274 |     - file: src/about/license-reuse
275 |     - file: src/about/tech-details
276 | 


--------------------------------------------------------------------------------
/images/UALIB_favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/images/UALIB_favicon.png


--------------------------------------------------------------------------------
/images/UALIB_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/images/UALIB_logo.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jupyter-book
2 | matplotlib
3 | numpy
4 | 


--------------------------------------------------------------------------------
/src/about/contributing.rst:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | %%%%%%%%%%%%%%
 3 | 
 4 | How to Contribute
 5 | ******************
 6 | 
 7 | This is an open source resource. Any contributions are welcome. If you find a 
 8 | mistake or have an idea, please let us know via the GitHub Issues Tracker.
 9 | 
10 | In addition, any general feedback is always welcome!
11 | 


--------------------------------------------------------------------------------
/src/about/introduction.rst:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | %%%%%%%%%%%%%%
 3 | 
 4 | What is this?
 5 | *************
 6 | 
 7 | This is an open online book containing short scholarly API code examples (i.e., "recipes") 
 8 | that demonstrate how to work with various scholarly web service APIs. It is part of the University of 
 9 | Alabama Libraries efforts to support `Research Data Services`_.
10 | 
11 | .. _Research Data Services: https://guides.lib.ua.edu/ResearchDataServices
12 | 
13 | What should I be aware of before getting started?
14 | *************************************************
15 | 
16 | Before interacting with any scholarly APIs (or similar web service), it is very important to review the
17 | usage policies, which generally includes information such as query limits and data reuse policies.
18 | We have endeavored to follow all appropriate API usage policies in our examples and have linked
19 | to the specific API policies where possible. While some APIs are openly accessible and do
20 | not require special authentication, other scholarly APIs require that you are affiliated with a
21 | subscribing institution, and have registered for an API key to use for authentication in API queries.
22 | We have added instructions about any necessary authentication within the relevant code recipes.
23 | 
24 | 
25 | .. important::
26 | 
27 |    In general, scholarly APIs are designed for the collection of small to medium
28 |    sized datasets; that is, in the range of 100s or maybe a few thousand queries at most
29 |    (various with API). If you need large bulk datasets, an API is likely not the method to use, and
30 |    there may be bulk data downloads available from the database instead.
31 | 
32 | If you decide that your use-case is appropriate for a scholarly API (or similar web service), here are a few good general practices 
33 | to follow when working with any web API:
34 | 
35 | 1. Read the API documentation and usage guidelines before starting.
36 | 2. Start with testing the behavior of the API using a single programmatic API request (i.e., not in a loop).
37 | 3. Add a 1 second delay between API requests when using a loop.
38 | 4. When using a loop to repeat API requests, start out with a small list, perhaps 3-5.
39 | 5. Cache the API returned data when testing. For example, if you are trying to parse the returned API data in a scripting workflow, save the returned data in a variable or to a file so that you do not need to repeat the API request unnecessarily for the downstream parsing or analysis.
40 | 
41 | What kind of content is included?
42 | *********************************
43 | 
44 | The scope of this book is to provide short code examples related to the retrieval of data and information
45 | from scholarly APIs using several different programming languages.
46 | 
47 | While there may be some introductory programming content in this book, the 
48 | content is not meant to be a general introduction to programming. 
49 | Instead, our aim with the Scholarly API Cookbook is to provide 
50 | some short scripting based workflows for working with scholarly data and information APIs. 
51 | For more general introductions to programming, we recommend searching the 
52 | UA Libraries Scout database for programming books (e.g., `TI python`). 
53 | 
54 | .. seealso::
55 | 
56 |    UA Libraries Workshop lessons and references therein for more general 
57 |    programming content [#ua_work]_.
58 | 
59 | 
60 | Which Programming Languages are Covered?
61 | ****************************************
62 | 
63 | Currently, we have scholarly API code examples in Python and R (and a Z39.50 tutorial in Bash). 
64 | For good luck, let's add ``Hello World!`` in each programming language:
65 | 
66 | .. tab-set::
67 | 
68 |    .. tab-item:: Python
69 | 
70 |       .. code-block:: python
71 | 
72 |          >>> print("Hello World!")
73 | 
74 |    .. tab-item:: R
75 | 
76 |       .. code-block:: r
77 | 
78 |          > print("Hello World!")
79 | 
80 | Who is Creating the Content?
81 | ****************************
82 | 
83 | The Scholarly API Cookbook content is authored by University of Alabama 
84 | Libraries faculty and student assistants. Specific authors are noted on each 
85 | tutorial or document page.
86 | 
87 | .. rubric:: References
88 | 
89 | .. [#ua_work] `<https://github.com/UA-Libraries-Research-Data-Services/UALIB_Workshops>`_
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/src/about/license-reuse.rst:
--------------------------------------------------------------------------------
 1 | License and Reuse
 2 | %%%%%%%%%%%%%%%%%%
 3 | 
 4 | License and Reuse
 5 | ******************
 6 | 
 7 | Most of the code in this Scholarly API Cookbook is licensed under the `MIT License <https://github.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/blob/main/LICENSE>`_.
 8 | 
 9 | The Python scripts in this Scholarly API Cookbook are licensed under the MIT License. However, these scripts may rely on external libraries such as matplotlib, pandas, and others. These libraries are licensed under their own respective terms, and will need to be installed separately. Refer to the documentation of each library for installation instructions and licensing details.
10 | 
11 | The Bash scripts are licensed under the MIT License (Bash itself is licensed under the GNU General Public License). Some of the included scripts may rely on external tools such as curl, jq, YAZ, and gnuplot, each of which is licensed under its own terms. Users must obtain and install these tools separately. Refer to the documentation of each tool for installation instructions and licensing details.
12 | 
13 | Lastly, most of the R tutorial scripts are MIT licensed, but some are licensed under the `GPL-3 License <https://github.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/blob/main/LICENSE_selected_R_tutorials>`_ because they depend on GPL-licensed R libraries (refer to the documentation of each R library for installation instructions and licensing details). The R tutorials with GPL-3 licenses are indicated at the top of the respective files.
14 | 
15 | .. important::
16 |    
17 |    We have endeavored to follow the appropriate terms and usage policies of each scholarly API, web service, and Z39.50 server. We have linked to the terms and policies where possible. Some database APIs may require a valid library subscription, institutional access, or individual account to use their services. Please be responsible when reusing these scripts and respect the API terms and usage policies (e.g., query limits, record downloads, data sharing restrictions). Data output snippets shown in this book are for demonstration purposes and are credited to the individual API or database service. The output generated from APIs or services remains subject to the terms and conditions of the respective provider. Some outputs (e.g., U.S. Government works) may be in the public domain, while others may require attribution or adherence to other conditions.
18 |  
19 | If you reuse the code, attribution would be appreciated. Please link to the Cookbook and cite our manuscript:
20 | 
21 | Link to Cookbook: `<https://ua-libraries-research-data-services.github.io/UALIB_ScholarlyAPI_Cookbook>`_
22 | 
23 | Citation: Scalfani, V. F.; Walker, K. W.; Simpson, L.; Fernandez, A. M.; Patel, V. D.; Ramig, A.; Gomes, C.; Moen, M. T.; Nguyen, A. M. Creating a Scholarly API Cookbook: Supporting Library Users with Programmatic Access to Information. *Issues in Science and Technology Librarianship*, **2023**, No. 104. `<https://doi.org/10.29173/istl2766>`_.
24 | 
25 | .. code-block:: bibtex
26 | 
27 |    @article{scalfani_creating_2023,
28 | 	   title = {Creating a {Scholarly} {API} {Cookbook}: {Supporting} {Library} {Users} with {Programmatic} {Access} to {Information}},
29 | 	   issn = {1092-1206},
30 | 	   shorttitle = {Creating a {Scholarly} {API} {Cookbook}},
31 | 	   url = {https://journals.library.ualberta.ca/istl/index.php/istl/article/view/2766},
32 | 	   doi = {10.29173/istl2766},
33 | 	   abstract = {Scholarly web-based application programming interfaces (APIs) allow users to interact with information and data programmatically. Interacting with information programmatically allows users to create advanced information query workflows and quickly access machine-readable data for downstream computations. With the growing availability of scholarly APIs from open and commercial library databases, supporting access to information via an API has become a key support area for research data services in libraries. This article describes our efforts with supporting API access through the development of an online Scholarly API Cookbook. The Cookbook contains code recipes (i.e., tutorials) for getting started with 10 different scholarly APIs, including for example, Scopus, World Bank, and PubMed. API tutorials are available in Python, Bash, Matlab, and Mathematica. A tutorial for interacting with library catalog data programmatically via Z39.50 is also included, as traditional library catalog metadata is rarely available via an API. In addition to describing the Scholarly API Cookbook content, we discuss our experiences building a student research data services programming team, challenges we encountered, and ideas to improve the Cookbook. The University of Alabama Libraries Scholarly API Cookbook is freely available and hosted on GitHub. All code within the API Cookbook is licensed with the permissive MIT license, and as a result, users are free to reuse and adapt the code in their teaching and research.},
34 | 	   number = {104},
35 | 	   urldate = {2023-10-13},
36 | 	   journal = {Issues in Science and Technology Librarianship},
37 | 	   author = {Scalfani, Vincent F. and Walker, Kevin W. and Simpson, Lance and Fernandez, Avery M. and Patel, Vishank D. and Ramig, Anastasia and Gomes, Cyrus and Moen, Michael T. and Nguyen, Adam M.},
38 | 	   month = oct,
39 | 	   year = {2023},
40 |    }
41 |    
42 | Scholarly API Cookbook Archive
43 | ******************************
44 | 
45 | We have decided to no longer maintain the Matlab, Mathematica, Bash (except the Z39.50 Bash recipe), and C recipes and have removed them from the Scholarly API Cookbook. These archived recipes are still in the `UA Libraries Scholarly API Cookbook Archive <https://github.com/UA-Libraries-Research-Data-Services/Scholarly_API_Cookbook_Archive>`_.
46 | 
47 | The code in the UA Libraries Scholarly API Cookbook Archive is licensed under the `MIT License <https://github.com/UA-Libraries-Research-Data-Services/Scholarly_API_Cookbook_Archive/blob/main/LICENSE>`_. This includes code written to be used with Wolfram Mathematica and MathWorks MATLAB. However, these proprietary software packages themselves are not covered under the MIT License, and users must have valid licenses for Mathematica and MATLAB to run the associated code.
48 | 
49 | The Python scripts in this Scholarly API Cookbook are licensed under the MIT License. However, these scripts may rely on external libraries such as matplotlib, pandas, and others. These libraries are licensed under their own respective terms, and will need to be installed separately. Refer to the documentation of each library for installation instructions and licensing details.
50 | 
51 | The Bash scripts are licensed under the MIT License (Bash itself is licensed under the GNU General Public License). Some of the included scripts may rely on external tools such as curl, jq, YAZ, and gnuplot, each of which is licensed under its own terms. Users must obtain and install these tools separately. Refer to the documentation of each tool for installation instructions and licensing details.
52 | 
53 | The C code in the archive is licensed under the MIT License. This repository provides only the source code, and users will need to compile the C programs to run them. Some of the C code depends on external libraries such as curl, jq, and YAZ, which are licensed under their own respective terms. These libraries will need to be obtained and installed separately by the user.
54 | 


--------------------------------------------------------------------------------
/src/about/tech-details.rst:
--------------------------------------------------------------------------------
 1 | Technical Details
 2 | %%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Technology and Software Used
 5 | *****************************
 6 | 
 7 | 1. Python content is written in `Jupyter Notebooks`_.
 8 | 2. R content is written in RMarkdown and exported to Markdown.
 9 | 3. All other content is written in `reStructuredText`_.
10 | 4. Code testing is done locally.
11 | 5. `Jupyter Book`_ is used to compile and create the HTML files via an automated GitHub Workflow. This workflow builds the book and hosts the HTML content with `GitHub Actions`_.
12 | 
13 | .. _Jupyter Notebooks: https://jupyter.org/
14 | .. _reStructuredText: https://www.sphinx-doc.org/en/master/usage/restructuredtext/index.html
15 | .. _Jupyter Book: https://jupyterbook.org/intro.html
16 | .. _GitHub Actions: https://docs.github.com/en/actions
17 | 


--------------------------------------------------------------------------------
/src/overview/arxiv.rst:
--------------------------------------------------------------------------------
 1 | arXiv
 2 | %%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | *****************
 6 | 
 7 | The arXiv API provides a programmatic accessible interface to the their extensive database of scientific papers on the arXiv website. It uses a RESTful interface and allows scholars to query and retrieve papers based on a variety of parameters. The API is free to use and does not require an API key. However the API does have a rate limit of one requests per three seconds [#arxiv1]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#arxiv1] `<https://info.arxiv.org/help/api/tou.html>`_


--------------------------------------------------------------------------------
/src/overview/bea.rst:
--------------------------------------------------------------------------------
 1 | U.S. Bureau of Economic Analysis
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The U.S. Bureau of Economic Analysis (BEA) API provides programmatic access to economic data published by the BEA. A UserID is required for this API, and a rate limit of 100 requests, 100 MB, and 30 errors per minute is enforced.
 8 | 
 9 | See the BEA API documentation [#bea1]_ and BEA API user guide [#bea2]_ for more information on accessing the API. Please check the terms of use [#bea3]_ for more information on the usage of this API.
10 | 
11 | *This product uses the Bureau of Economic Analysis (BEA) Data API but is not endorsed or certified by BEA.*
12 | 
13 | .. rubric:: References
14 | 
15 | .. [#bea1] `<https://apps.bea.gov/API/>`_
16 | 
17 | .. [#bea2] `<https://apps.bea.gov/api/_pdf/bea_web_service_api_user_guide.pdf>`_
18 | 
19 | .. [#bea3] `<https://apps.bea.gov/API/_pdf/bea_api_tos.pdf>`_
20 | 
21 | 


--------------------------------------------------------------------------------
/src/overview/bls.rst:
--------------------------------------------------------------------------------
 1 | U.S. Bureau of Labor Statistics
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The Bureau of Labor Statistics Public Data API provides access to the data published by the BLS, which primarily concerns prices, employment, compensation, and productivity in the United States. Registration or API keys are not required for the v1.0 API, but registration is required to access the v2.0 API [#us_bls1]_. Additionally, users must follow their rate limits [#us_bls2]_. 
 8 | 
 9 | See the BLS terms of service for more information on how the API can be used [#us_bls3]_.
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#us_bls1] `<https://www.bls.gov/developers/home.htm>`_
14 | 
15 | .. [#us_bls2] `<https://www.bls.gov/developers/api_faqs.htm#signatures4>`_
16 | 
17 | .. [#us_bls3] `<https://www.bls.gov/developers/termsOfService.htm>`_
18 | 
19 | 


--------------------------------------------------------------------------------
/src/overview/casc.rst:
--------------------------------------------------------------------------------
 1 | CAS Common Chemistry
 2 | %%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The CAS Common Chemistry API provides access to ~500,000 chemical substance information from the CAS REGISTRY. Example API queries include the ability to search via chemical name, SMILES, and InChI [#casc1]_. Registration is required for access [#casc2]_. CAS Common Chemistry content is CC-BY-NC 4.0 licensed; for specific reuse questions, contact CAS [#casc3]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#casc1] `<https://commonchemistry.cas.org/>`_
12 | 
13 | .. [#casc2] `<https://www.cas.org/services/commonchemistry-api>`_
14 | 
15 | .. [#casc3] `<https://www.cas.org/contact/commonchemistry>`_
16 | 


--------------------------------------------------------------------------------
/src/overview/chronam.rst:
--------------------------------------------------------------------------------
 1 | Chronicling America
 2 | %%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The Library of Congress Chronicling America API provides programmatic access to historic newspaper text and images. Registration is not required. See the Chronicling America API documentation for information about API specification, API policies, and example use-cases [#chronam1]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#chronam1] `<https://chroniclingamerica.loc.gov/about/api/>`_
12 |  
13 | 


--------------------------------------------------------------------------------
/src/overview/college-scorecard.rst:
--------------------------------------------------------------------------------
 1 | College Scorecard
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The College Scorecard API provides programmatic access to data on institutions of higher education published by the U.S. Department of Education. Registration for the API is required [#cs1]_. More information about the data available through the API can be found in the documentation [#cs2]_.
 8 | 
 9 | See the Department of Education's website for more information on how the data of the API can be used [#cs3]_.
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#cs1] `<https://collegescorecard.ed.gov/data/api-documentation#api-access-and-authentication>`_
14 | 
15 | .. [#cs2] `<https://collegescorecard.ed.gov/data>`_
16 | 
17 | .. [#cs3] `<https://www2.ed.gov/notices/copyright/index.html>`_
18 | 


--------------------------------------------------------------------------------
/src/overview/congress.rst:
--------------------------------------------------------------------------------
 1 | Congress.gov
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | Congress.gov [#con1]_ is the official website for U.S. federal legislative information.
 8 | 
 9 | The Congress API provides users access to a variety of information about the U.S. Congress. 
10 | 
11 | See the API documentation [#con2]_ for more information on using the API and their legal page [#con3]_ for more information on licensing.
12 | 
13 | .. rubric:: References
14 | 
15 | .. [#con1] `<https://www.congress.gov>`_
16 | 
17 | .. [#con2] `<https://api.congress.gov>`_
18 | 
19 | .. [#con3] `<https://www.loc.gov/legal>`_


--------------------------------------------------------------------------------
/src/overview/crossref.rst:
--------------------------------------------------------------------------------
 1 | Crossref
 2 | %%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | *****************
 6 | 
 7 | The Crossref API provides programmatic access to bibliographic document information and related metadata [#crossref1]_. Registration is not required. See the Crossref API documentation for examples, specific API policies, and data reuse information [#crossref2]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#crossref1] `<https://www.crossref.org/documentation/retrieve-metadata/rest-api/>`_
12 | 
13 | .. [#crossref2] `<https://api.crossref.org/swagger-ui/index.html>`_
14 | 


--------------------------------------------------------------------------------
/src/overview/fdc.rst:
--------------------------------------------------------------------------------
 1 | FoodData Central
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The FoodData Central API provides users access to a variety of information about food products and their nutritional content.
 8 | 
 9 | See the API documentation for more information on using the API [#fdc1]_ .
10 | 
11 | "U.S. Department of Agriculture, Agricultural Research Service. FoodData Central, 2019. fdc.nal.usda.gov.".
12 | 
13 | "USDA FoodData Central data are in the public domain and they are not copyrighted. They are published under CC0 1.0 Universal (CC0 1.0)" [#fdc2]_ .
14 | 
15 | .. rubric:: References
16 | 
17 | .. [#fdc1] `<https://www.congress.gov>`_
18 | 
19 | .. [#fdc2] `<https://creativecommons.org/publicdomain/zero/1.0/>`_


--------------------------------------------------------------------------------
/src/overview/geonames.rst:
--------------------------------------------------------------------------------
 1 | GeoNames
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The GeoNames API allows users to programmatically access the GeoNames database. Registration is required to access this API [#gn1]_ .
 8 | 
 9 | See the API documentation [#gn2]_ for more information on accessing the API. The GeoNames API is licensed under the CC BY 4.0 Deed license, allowing users to share and adapt its data with attribution [#gn3]_ .
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#gn1] `<https://www.geonames.org/login>`_
14 | 
15 | .. [#gn2] `<https://www.geonames.org/export/web-services.html>`_
16 | 
17 | .. [#gn3] `<https://creativecommons.org/licenses/by/4.0/>`_
18 | 


--------------------------------------------------------------------------------
/src/overview/nasa-images.rst:
--------------------------------------------------------------------------------
 1 | NASA Images
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The NASA Earth Polychromatic Imaging Camera (EPIC) API [#nasa1]_ provides the most recent images of Earth taken by the EPIC.
 8 | 
 9 | The NASA Astronomy Picture of the Day (APOD) API [#nasa2]_ provides the images of the universe taken by telescopes and other instruments.
10 | 
11 | You can find more information about NASA APIs on their website [#nasa3]_ and information regarding the use of these images on their Images and Media page [#nasa4]_ .
12 | 
13 | .. rubric:: References
14 | 
15 | .. [#nasa1] `<https://epic.gsfc.nasa.gov/about/api>`_
16 | 
17 | .. [#nasa2] `<https://github.com/nasa/apod-api>`_
18 | 
19 | .. [#nasa3] `<https://api.nasa.gov/>`_
20 | 
21 | .. [#nasa4] `<https://www.nasa.gov/nasa-brand-center/images-and-media/>`_


--------------------------------------------------------------------------------
/src/overview/nps.rst:
--------------------------------------------------------------------------------
 1 | National Park Service
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The National Park Service (NPS) API contains pertinent information about national parks, monuments, and other sites managed by the NPS. An API key is required for this API, and registration can be found on the NPS website [#nps1]_ . Users are required to follow the rate limits of 1000 requests per hour [#nps2]_ .
 8 | 
 9 | See the NPS API documentation [#nps3]_ for more information on accessing the API. Note that the data in the API is "is generally considered in the public domain," according to the API's disclaimer [#nps4]_ .
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#nps1] `<https://www.nps.gov/subjects/developer/get-started.htm>`_
14 | 
15 | .. [#nps2] `<https://www.nps.gov/subjects/developer/guides.htm>`_
16 | 
17 | .. [#nps3] `<https://www.nps.gov/subjects/developer/api-documentation.htm>`_
18 | 
19 | .. [#nps4] `<https://www.nps.gov/aboutus/disclaimer.htm>`_
20 | 


--------------------------------------------------------------------------------
/src/overview/nws.rst:
--------------------------------------------------------------------------------
 1 | National Weather Service
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The National Weather Service (NWS) API provides programmatic access to forecasts and alerts published by the NWS. An API key is not required to access this API, but users are required to include a User Agent with all API requests and adhere to an unpublished rate limit (we recommend sending a maximum of 1 request per second).
 8 | 
 9 | See the NWS API documentation [#nws1]_ for more information on accessing the API. Note that the documentation states, "All of the information presented via the API is intended to be open data, free to use for any purpose."
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#nws1] `<https://www.weather.gov/documentation/services-web-api>`_
14 | 


--------------------------------------------------------------------------------
/src/overview/openalex.rst:
--------------------------------------------------------------------------------
 1 | OpenAlex
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The OpenAlex API is an open catalog of the global research system created by the nonprofit OurResearch [#oa1]_ . An API key is not required for this API, but providing your email in requests will provide faster and more consistent response times. Users are required to follow the rate limits of 100000 requests per user per day and 10 requests per second.
 8 | 
 9 | See the OpenAlex API documentation [#oa2]_ for more information on accessing the API. Note that the data in the API is licensed under the Creative Commons CC0 license [#oa3]_ , designating it as part of the public domain. See the OpenAlex Terms of Service [#oa4]_ for more information on how you can use this API.
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#oa1] `<https://ourresearch.org/>`_
14 | 
15 | .. [#oa2] `<https://docs.openalex.org/>`_
16 | 
17 | .. [#oa3] `<https://creativecommons.org/publicdomain/zero/1.0/>`_
18 | 
19 | .. [#oa4] `<https://openalex.org/OpenAlex_termsofservice.pdf>`_
20 | 


--------------------------------------------------------------------------------
/src/overview/osf.rst:
--------------------------------------------------------------------------------
 1 | Open Science Framework
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 |   
 7 | Open Science Framework (OSF) is an open-source service maintained by the Center for Open Science [#osf1]_ . The OSF API allows programmatic access to OSF data and files.
 8 |   
 9 | A token is optional for using this API, but signup can be found on the OSF website [#osf2]_ . Unauthenticated users are limited to 100 requests per hour, but authenticated users are allowed 10,000 requests per day. 
10 | 
11 | See the OSF API documentation [#osf3]_ for more information on accessing the API. See the OSF API terms of use [#osf4]_ for more information on how you can use this API.
12 | 
13 | .. rubric:: References
14 | 
15 | .. [#osf1] `<https://www.cos.io/>`_
16 | 
17 | .. [#osf2] `<https://osf.io/settings/tokens>`_
18 | 
19 | .. [#osf3] `<https://developer.osf.io/#>`_
20 | 
21 | .. [#osf4] `<https://github.com/CenterForOpenScience/cos.io/blob/master/TERMS_OF_USE.md>`_
22 | 


--------------------------------------------------------------------------------
/src/overview/osm.rst:
--------------------------------------------------------------------------------
 1 | OpenStreetMap
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | OpenStreetMap (OSM) [#osm1]_ is a worldwide open data mapping service supported by the OpenStreetMap Foundation (OSMF).
 8 | 
 9 | The OSM Overpass API allows users to programmatically read data from OSM.
10 | 
11 | See the API documentation [#osm2]_ for more information on accessing the API and the OSM Copyright and License Page [#osm3]_ for more information on the data's license.
12 | 
13 | .. rubric:: References
14 | 
15 | .. [#osm1] `<https://www.openstreetmap.org/>`_
16 | 
17 | .. [#osm2] `<https://wiki.openstreetmap.org/wiki/Overpass_API>`_
18 | 
19 | .. [#osm3] `<https://www.openstreetmap.org/copyright>`_
20 | 


--------------------------------------------------------------------------------
/src/overview/pubchem-periodic-table.rst:
--------------------------------------------------------------------------------
 1 | PubChem Periodic Table
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | PubChem is a database of chemical molecules and their properties. It is maintained by the National Center for Biotechnology Information (NCBI), a division of the National Library of Medicine (NLM) at the National Institutes of Health (NIH). PubChem is a key chemical information resource for scientists, students, and the general public.
 8 |   
 9 | The PubChem Periodic Table API provides programmatic access to a machine-readable periodic table. An API key is not required for this API, but a rate limit of 5 requests per second is enforced.
10 | 
11 | See the PubChem Periodic Table [#ppt1]_ and PubChem APIs documentation [#ppt2]_ for more information on accessing the API. Please check the terms of use [#ppt3]_ for more information on the usage of this API.
12 | 
13 | .. rubric:: References
14 | 
15 | .. [#ppt1] `<https://pubchem.ncbi.nlm.nih.gov/periodic-table/>`_
16 | 
17 | .. [#ppt2] `<https://pubchemdocs.ncbi.nlm.nih.gov/programmatic-access>`_
18 | 
19 | .. [#ppt3] `<https://www.ncbi.nlm.nih.gov/home/about/policies/>`_
20 | 
21 | 


--------------------------------------------------------------------------------
/src/overview/pubchem.rst:
--------------------------------------------------------------------------------
 1 | PubChem
 2 | %%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | PubChem APIs allow programmatic access to search and retrieve small molecule and related data. Registration is not required. See the PubChem Docs for information about API specification, API policies, and example use-cases [#pubchem1]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#pubchem1] `<https://pubchemdocs.ncbi.nlm.nih.gov/about>`_
12 |  
13 | 


--------------------------------------------------------------------------------
/src/overview/pubmed.rst:
--------------------------------------------------------------------------------
 1 | PubMed
 2 | %%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | PubMed bibliographic and related NCBI information is programmatically accessible through the Entrez Programming Utilities API. Registration is not required, though registering may offer additional API features [#pubmed1]_. See the NCBI API Usage Guidelines [#pubmed2]_ and Data Usage Policy [#pubmed3]_. 
 8 | 
 9 | 
10 | .. rubric:: References
11 | 
12 | .. [#pubmed1] `<https://www.ncbi.nlm.nih.gov/books/NBK25501/>`_
13 | 
14 | .. [#pubmed2] `<https://www.ncbi.nlm.nih.gov/books/NBK25497/#chapter2.Usage_Guidelines_and_Requiremen>`_
15 | 
16 | .. [#pubmed3] `<https://www.ncbi.nlm.nih.gov/home/about/policies/>`_
17 | 


--------------------------------------------------------------------------------
/src/overview/ror.rst:
--------------------------------------------------------------------------------
 1 | Research Organization Registry
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The Research Organization Registry (ROR) API provides programmatic access to the organization's persistent identifiers for research organizations. An API key is not required for this API, but they do require you to and follow their rate limit of 2000 requests per five minute period. 
 8 | 
 9 | See the ROR API documentation [#ror1]_ for more information on accessing the API. Note that the data in the API is licensed under the Creative Commons CC0 license [#ror2]_ , designating it as part of the public domain.
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#ror1] `<https://ror.readme.io/docs/rest-api>`_
14 | 
15 | .. [#ror2] `<https://creativecommons.org/publicdomain/zero/1.0/>`_
16 | 
17 | 


--------------------------------------------------------------------------------
/src/overview/scopus.rst:
--------------------------------------------------------------------------------
 1 | Scopus
 2 | %%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The Elsevier Scopus API allows programmatic access to search and retrieve Scopus record information including, for example author information, bibliographic metadata, and citations. Registration is required. See the Elsevier Developer Portal for information about the Scopus API specification, policies, and allowed use-cases [#scopus1]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#scopus1] `<https://dev.elsevier.com/sc_apis.html>`_
12 |  
13 | 


--------------------------------------------------------------------------------
/src/overview/sdirect.rst:
--------------------------------------------------------------------------------
 1 | ScienceDirect
 2 | %%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The Elsevier ScienceDirect APIs allows programmatic access to search and retrieve ScienceDirect
 8 | metadata and article full-text. Registration is required. See the Elsevier Developer Portal for
 9 | information about the ScienceDirect API specification, policies, and allowed use-cases [#SDirect1]_.
10 | Note that Elsevier has detailed policies regarding text and data mining [#SDirect2]_.
11 | Please check with your institution for their Text and Data Mining Agreement with Elsevier.
12 | 
13 | .. rubric:: References
14 | 
15 | .. [#SDirect1] `<https://dev.elsevier.com/sd_apis.html>`_
16 | .. [#SDirect2] `<https://dev.elsevier.com/text_mining.html>`_
17 | 


--------------------------------------------------------------------------------
/src/overview/sec-edgar.rst:
--------------------------------------------------------------------------------
 1 | U.S. Securities and Exchange
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The U.S. Securities and Exchange Commission EDGAR API provides access to public company filing data. Registration or API keys are not required, but they do require you to add a user agent in the requests [#us_sec1]_ and follow their rate limits [#us_sec2]_. 
 8 | 
 9 | See the U.S. SEC Developer Resources [#us_sec3]_ and the SEC Web Site Privacy and Security Policy for information about data reuse [#us_sec4]_.
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#us_sec1] `<https://www.sec.gov/os/webmaster-faq#developers>`_
14 | 
15 | .. [#us_sec2] `<https://www.sec.gov/os/accessing-edgar-data>`_
16 | 
17 | .. [#us_sec3] `<https://www.sec.gov/developer>`_
18 | 
19 | .. [#us_sec4] `<https://www.sec.gov/privacy#security>`_
20 | 
21 | 


--------------------------------------------------------------------------------
/src/overview/speedrun.rst:
--------------------------------------------------------------------------------
 1 | Speedrun.com
 2 | %%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | *****************
 6 | 
 7 | The Speedrun.com API provides programmatic access to the video game speedrunning data hosted and compiled by Speedrun.com [#speedrun1]_ . Registration is not required, but including a user-agent in your API requests is recommended. See the Speedrun.com API documentation [#speedrun2]_ for more information about this API.
 8 | 
 9 | The Speedrun.com API is licensed under the CC BY-NC 4.0 [#speedrun3]_ . See the Speedrun.com terms of use [#speedrun4]_ for more information.
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#speedrun1] `<https://www.speedrun.com/>`_
14 | 
15 | .. [#speedrun2] `<https://github.com/speedruncomorg/api/blob/master/version1/README.md>`_
16 | 
17 | .. [#speedrun3] `<https://creativecommons.org/licenses/by-nc/4.0/>`_
18 | 
19 | .. [#speedrun4] `<https://www.speedrun.com/pages/terms-of-use>`_
20 | 


--------------------------------------------------------------------------------
/src/overview/springer.rst:
--------------------------------------------------------------------------------
 1 | Springer Nature
 2 | %%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The Springer Nature API offers programmatic access to a vast array of metadata and full-text content from the Springer Nature publishing database.
 8 | There are three different APIs, inclduing an Open Access API, Metadata API, and Text and Data Mining API [#springer1]_.
 9 | Access to some data might require subscription or purchase. For detailed guidelines, usage policies, and access to the API's
10 | full capabilities,refer to the official Springer Nature documentation [#springer2]_.
11 | Review their general terms of service [#springer3]_, API terms [#springer4]_, and TDM reservation policies. [#springer5]_.
12 | Please check with your institution for their Text and Data Mining Agreement with Springer Nature.
13 | 
14 | .. rubric:: References
15 | 
16 | .. [#springer1] `<https://dev.springernature.com/docs/introduction/overview-services/>`_
17 | 
18 | .. [#springer2] `<https://dev.springernature.com/>`_
19 | 
20 | .. [#springer3] `<https://www.springernature.com/gp/legal/general-terms-of-use/15067848>`_
21 | 
22 | .. [#springer4] `<https://dev.springernature.com/terms-conditions/>`_
23 | 
24 | .. [#springer5] `<https://dev.springernature.com/tdm-reservation-policy/>`_
25 | 


--------------------------------------------------------------------------------
/src/overview/stack-exchange.rst:
--------------------------------------------------------------------------------
 1 | Stack Exchange
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | Stack Exchange is a network of question-and-answer websites on topics in diverse fields, each site covering a specific topic, where questions, answers, and users are subject to a reputation award process.
 8 | 
 9 | This API provides users access to a variety of information about the Stack Exchange network.
10 | 
11 | See the API documentation for more information on using the API [#stack1]_ .
12 | 
13 | See the Stack Exchange API Terms of Use [#stack2]_ and the Terms of Service for more information on licensing [#stack3]_ .
14 | 
15 | .. rubric:: References
16 | 
17 | .. [#stack1] `<https://api.stackexchange.com/docs>`_
18 | 
19 | .. [#stack2] `<https://stackoverflow.com/legal/api-terms-of-use>`_
20 | 
21 | .. [#stack3] `<https://stackoverflow.com/legal/terms-of-service/public#licensing>`_


--------------------------------------------------------------------------------
/src/overview/us-census-geocoding.rst:
--------------------------------------------------------------------------------
 1 | U.S. Census Geocoding
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The U.S. Census Geocoding Services API allows users to obtain geographic information for U.S. addresses. An API key is not required to access this API.
 8 | 
 9 | See the API documentation [#uscg1]_ for more information on accessing the API. Please see the U.S. Census Bureau APIs terms of service [#uscg2]_ for specific information about API policies, data reuse, and allowed use-cases.
10 | 
11 | .. rubric:: References
12 | 
13 | .. [#uscg1] `<https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.html>`_
14 | 
15 | .. [#uscg2] `<https://www.census.gov/data/developers/about/terms-of-service.html>`_
16 | 


--------------------------------------------------------------------------------
/src/overview/us-census.rst:
--------------------------------------------------------------------------------
 1 | U.S. Census Data
 2 | %%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The U.S. Census Bureau offers several APIs for accessing census data and related information. Registration is not required, but registering allows more queries and features [#uscensus1]_. See the U.S. Census Data API User Guide [#uscensus2]_ and Terms of Service [#uscensus3]_ for specific information about API policies, data reuse, and allowed use-cases.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#uscensus1] `<https://www.census.gov/data/developers/about.html>`_
12 | 
13 | .. [#uscensus2] `<https://www.census.gov/data/developers/guidance.html>`_
14 | 
15 | .. [#uscensus3] `<https://www.census.gov/data/developers/about/terms-of-service.html>`_
16 | 
17 | 


--------------------------------------------------------------------------------
/src/overview/us-treasury.rst:
--------------------------------------------------------------------------------
 1 | U.S. Treasury
 2 | %%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The U.S. Department of Treasury API provides information about federal finances [#ustreasury1]_. See the Dataset Search [#ustreasury2]_ and terms of use [#ustreasury3]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#ustreasury1] `<https://fiscaldata.treasury.gov/api-documentation/>`_
12 | 
13 | .. [#ustreasury2] `<https://fiscaldata.treasury.gov/datasets/>`_
14 | 
15 | .. [#ustreasury3] `<https://fiscaldata.treasury.gov/api-documentation/#license-and-authorization>`_
16 | 


--------------------------------------------------------------------------------
/src/overview/usa-spending.rst:
--------------------------------------------------------------------------------
 1 | USA Spending
 2 | %%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The USA Spending API provides programmatic access to comprehensive U.S. government spending data, including spending on awards (e.g., federal contracts, grants, geographic breakdowns, agency breakdowns) and account-level, non-award spending such as federal employee compensation. The API is currently in V2, with V1 endpoints being deprecated. No registration is required to use the API. Visit the USA Spending API documentation for information on getting started, available endpoints, and background information [#usas1]_ .
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#usas1] `<https://www.census.gov/data/developers/about.html>`_
12 | 
13 | 


--------------------------------------------------------------------------------
/src/overview/usgs-national-map.rst:
--------------------------------------------------------------------------------
 1 | USGS National Map
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | United States Geological Survey (USGS) [#usgs1]_ is a U.S. agency whose work spans the disciplines of biology, geography, geology, and hydrology. 
 8 | 
 9 | The USGS National Map [#usgs2]_ is a collaborative effort among the USGS and other federal, state, and local partners to provide a variety of topographic information. 
10 | 
11 | You can view or download data from the USGS National Map Viewer [#usgs3]_ and the USGS National Map Downloader [#usgs4]_ respectively.
12 | 
13 | We will be running their API [#usgs5]_ to observe and download data.
14 | 
15 | See the USGS Copyrights and Credits [#usgs6]_ and their FAQ page [#usgs7]_ for more information.
16 | 
17 | .. rubric:: References
18 | 
19 | .. [#usgs1] `<https://www.usgs.gov/>`_
20 | 
21 | .. [#usgs2] `<https://www.usgs.gov/programs/national-geospatial-program/national-map>`_
22 | 
23 | .. [#usgs3] `<https://apps.nationalmap.gov/viewer/>`_
24 | 
25 | .. [#usgs4] `<https://apps.nationalmap.gov/downloader/>`_
26 | 
27 | .. [#usgs5] `<https://tnmaccess.nationalmap.gov/api/v1/docs>`_
28 | 
29 | .. [#usgs6] `<https://www.usgs.gov/information-policies-and-instructions/copyrights-and-credits>`_
30 | 
31 | .. [#usgs7] `<https://www.usgs.gov/faqs/what-are-terms-uselicensing-map-services-and-data-national-map>`_


--------------------------------------------------------------------------------
/src/overview/wiley-tdm.rst:
--------------------------------------------------------------------------------
 1 | Wiley
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The Wiley Text and Data Mining (TDM) API allows users to retrieve full-text articles of Wiley content in PDF form.
 8 | A token is required to access [#wtdm1]_ , and users are limited to 3 API requests per second. 
 9 | 
10 | See the Wiley TDM documentation [#wtdm2]_ for more information on accessing the API.
11 | Please check with your institution for their Text and Data Mining Agreement with Wiley.
12 | 
13 | .. rubric:: References
14 | 
15 | .. [#wtdm1] `<https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining#accordionHeader-2>`_
16 | 
17 | .. [#wtdm2] `<https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining>`_
18 | 
19 | 


--------------------------------------------------------------------------------
/src/overview/world-bank.rst:
--------------------------------------------------------------------------------
 1 | World Bank
 2 | %%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | The World Bank Indicators API provides access to numerous time series economic and related data such as population, income, energy, and education information [#worldbank1]_. Registration is not required. See the World Bank Indicators API documentation [#worldbank2]_ and Terms of Use for specific information about data reuse policies [#worldbank3]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#worldbank1] `<https://datahelpdesk.worldbank.org/knowledgebase/topics/125589>`_
12 | 
13 | .. [#worldbank2] `<https://datahelpdesk.worldbank.org/knowledgebase/articles/889392-about-the-indicators-api-documentation>`_
14 | 
15 | .. [#worldbank3] `<https://www.worldbank.org/en/about/legal/terms-and-conditions>`_
16 | 


--------------------------------------------------------------------------------
/src/overview/wos.rst:
--------------------------------------------------------------------------------
 1 | Web of Science
 2 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 |   
 7 | Web of Science (WOS) is a service maintained by the Clarivate [#wos1]_ . The WOS APIs allow programmatic access to WOS data.
 8 | 
 9 | The WOS Starter API allows access to some of the data in the WOS database. See the WOS Starter documentation [#wos2]_ for more information on accessing the API.
10 | 
11 | The WOS Expanded API allows access to more data from the WOS database. See the WOS Expanded documentation [#wos3]_ for more information on accessing the API.
12 |   
13 | Please check with your institution on WOS API access. Registration is required to access the API.
14 | 
15 | Please see the WOS API release notes [#wos4]_ to stay up-to-date on the APIs. See the WOS Terms of Use [#wos5]_ and Clarivate Product / Service Terms [#wos6]_ for more info on usage of this data.
16 | 
17 | .. rubric:: References
18 | 
19 | .. [#wos1] `<https://clarivate.com/>`_
20 | 
21 | .. [#wos2] `<https://api.clarivate.com/swagger-ui/?url=https%3A%2F%2Fdeveloper.clarivate.com%2Fapis%2Fwos-starter%2Fswagger%3FforUser%3D984a370b61be00d769c8d8c6ec49abba804d5a2e>`_
22 | 
23 | .. [#wos3] `<https://api.clarivate.com/swagger-ui/?url=https%3A%2F%2Fdeveloper.clarivate.com%2Fapis%2Fwos%2Fswagger>`_
24 | 
25 | .. [#wos4] `<https://clarivate.com/academia-government/release-notes/wos-apis/>`_
26 | 
27 | .. [#wos5] `<https://clarivate.com/wp-content/uploads/dlm_uploads/2019/08/End-User-Terms.pdf>`_
28 | 
29 | .. [#wos6] `<https://clarivate.com/wp-content/uploads/dlm_uploads/2023/12/Product-Terms-v3.7-Web-of-Science-APIs.pdf>`_
30 | 


--------------------------------------------------------------------------------
/src/overview/z3950.rst:
--------------------------------------------------------------------------------
 1 | Z39.50
 2 | %%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | Brief Overview
 5 | ****************
 6 | 
 7 | Z39.50 is a protocol and query language often used for interacting with library catalogs [#z3950_1]_.
 8 | 
 9 | .. rubric:: References
10 | 
11 | .. [#z3950_1] `<https://www.loc.gov/z3950/agency/>`_
12 | 
13 | 


--------------------------------------------------------------------------------
/src/python/geonames.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# GeoNames API in Python\n",
  8 |     "\n",
  9 |     "by Michael T. Moen\n",
 10 |     "\n",
 11 |     "Please see the following resources for more information on API usage:\n",
 12 |     "\n",
 13 |     "- Documentation\n",
 14 |     "    - <a href=\"https://www.geonames.org\" target=\"_blank\">GeoNames</a>\n",
 15 |     "    - <a href=\"https://www.geonames.org/export/web-services.html\" target=\"_blank\">GeoNames API Documentation</a>\n",
 16 |     "- Terms of Use\n",
 17 |     "    - <a href=\"https://www.geonames.org/export/\" target=\"_blank\">GeoNames API Terms of Use</a>\n",
 18 |     "- Data Reuse\n",
 19 |     "    - The GeoNames API Data is licensed under the Creative Commons' [CC 4.0 license](https://creativecommons.org/licenses/by/4.0/), allowing users to share and adapt the API's data for any purpose, as long as appropriate attribution is given.\n",
 20 |     "\n",
 21 |     "*These recipe examples were tested on March 7, 2025.*\n",
 22 |     "\n",
 23 |     "**_NOTE:_** The GeoNames API limits users to a maximum of 10000 credits per day and 1000 credits per hour. See [here](https://www.geonames.org/export/credits.html) for a list of how many credits a request to each endpoint uses."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## Setup\n",
 31 |     "\n",
 32 |     "### Import Libraries\n",
 33 |     "\n",
 34 |     "The following external libraries need to be installed into your enviornment to run the code examples in this tutorial:\n",
 35 |     "\n",
 36 |     "- <a href=\"https://github.com/ipython/ipykernel\" target=\"_blank\">ipykernel</a>\n",
 37 |     "- <a href=\"https://github.com/psf/requests\" target=\"_blank\">requests</a>\n",
 38 |     "- <a href=\"https://github.com/theskumar/python-dotenv\" target=\"_blank\">python-dotenv</a>"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 1,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "import requests\n",
 48 |     "import os\n",
 49 |     "from dotenv import load_dotenv"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "### Import Username\n",
 57 |     "\n",
 58 |     "Users must register with GeoNames before accessing the GeoNames API. Sign up can be found <a href=\"https://www.geonames.org/login\" target=\"_blank\">here</a>.\n",
 59 |     "\n",
 60 |     "We keep our username in a `.env` file and use the `dotenv` library to access it. If you would like to use this method, create a file named `.env` in the same directory as this notebook and add the following line to it:\n",
 61 |     "\n",
 62 |     "```text\n",
 63 |     "GEONAMES_API_USERNAME=PUT_YOUR_USERNAME_HERE\n",
 64 |     "```"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 5,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "load_dotenv()\n",
 74 |     "try:\n",
 75 |     "    USERNAME = os.environ[\"GEONAMES_API_USERNAME\"]\n",
 76 |     "except KeyError:\n",
 77 |     "    print(\"API key not found. Please set 'GEONAMES_API_USERNAME' in your .env file.\")"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "## 1. Searching with a ZIP Code\n",
 85 |     "\n",
 86 |     "This example uses the `postalCodeSearchJSON` endpoint to find the coordinates of the the ZIP code 35401."
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 10,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "200"
 98 |       ]
 99 |      },
100 |      "execution_count": 10,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "BASE_URL = f'https://secure.geonames.org/'\n",
107 |     "\n",
108 |     "endpoint = 'postalCodeSearchJSON'\n",
109 |     "params = {\n",
110 |     "    'postalcode': 35401,    # Postal code to search\n",
111 |     "    'countryBias': 'US',    # Moves US results to the top of the results list\n",
112 |     "    'username': USERNAME    # Must include GeoNames username in all API calls\n",
113 |     "}\n",
114 |     "\n",
115 |     "response = requests.get(f\"{BASE_URL}{endpoint}\", params=params)\n",
116 |     "\n",
117 |     "# Status code 200 indicates success\n",
118 |     "response.status_code"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 11,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "{'adminCode2': '125',\n",
130 |        " 'adminCode1': 'AL',\n",
131 |        " 'adminName2': 'Tuscaloosa',\n",
132 |        " 'lng': -87.562666,\n",
133 |        " 'countryCode': 'US',\n",
134 |        " 'postalCode': '35401',\n",
135 |        " 'adminName1': 'Alabama',\n",
136 |        " 'ISO3166-2': 'AL',\n",
137 |        " 'placeName': 'Tuscaloosa',\n",
138 |        " 'lat': 33.196891}"
139 |       ]
140 |      },
141 |      "execution_count": 11,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": [
147 |     "top_result = response.json()['postalCodes'][0]\n",
148 |     "top_result"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 12,
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "(33.196891, -87.562666)"
160 |       ]
161 |      },
162 |      "execution_count": 12,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": [
168 |     "latitude = top_result['lat']\n",
169 |     "longitude = top_result['lng']\n",
170 |     "latitude, longitude"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "## 2. Searching with Queries\n",
178 |     "\n",
179 |     "Queries allow users to search for location at several different levels.\n",
180 |     "\n",
181 |     "### Searching for a City\n",
182 |     "\n",
183 |     "In this example, we search for a location using the query \"Tuscaloosa.\""
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 13,
189 |    "metadata": {},
190 |    "outputs": [
191 |     {
192 |      "data": {
193 |       "text/plain": [
194 |        "200"
195 |       ]
196 |      },
197 |      "execution_count": 13,
198 |      "metadata": {},
199 |      "output_type": "execute_result"
200 |     }
201 |    ],
202 |    "source": [
203 |     "endpoint = 'searchJSON'\n",
204 |     "params = {\n",
205 |     "    'q': 'Tuscaloosa',      # Search query\n",
206 |     "    'countryBias': 'US',    # Moves US results to the top of the results list\n",
207 |     "    'maxRows': 10,          # Limit results to top 10\n",
208 |     "    'username': USERNAME    # Must include GeoNames username in all API calls\n",
209 |     "}\n",
210 |     "\n",
211 |     "response = requests.get(f\"{BASE_URL}{endpoint}\", params=params)\n",
212 |     "\n",
213 |     "# Status code 200 indicates success\n",
214 |     "response.status_code"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 14,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "data": {
224 |       "text/plain": [
225 |        "{'adminCode1': 'AL',\n",
226 |        " 'lng': '-87.56917',\n",
227 |        " 'geonameId': 4094455,\n",
228 |        " 'toponymName': 'Tuscaloosa',\n",
229 |        " 'countryId': '6252001',\n",
230 |        " 'fcl': 'P',\n",
231 |        " 'population': 98332,\n",
232 |        " 'countryCode': 'US',\n",
233 |        " 'name': 'Tuscaloosa',\n",
234 |        " 'fclName': 'city, village,...',\n",
235 |        " 'adminCodes1': {'ISO3166_2': 'AL'},\n",
236 |        " 'countryName': 'United States',\n",
237 |        " 'fcodeName': 'seat of a second-order administrative division',\n",
238 |        " 'adminName1': 'Alabama',\n",
239 |        " 'lat': '33.20984',\n",
240 |        " 'fcode': 'PPLA2'}"
241 |       ]
242 |      },
243 |      "execution_count": 14,
244 |      "metadata": {},
245 |      "output_type": "execute_result"
246 |     }
247 |    ],
248 |    "source": [
249 |     "# Display top result\n",
250 |     "response.json()['geonames'][0]"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "markdown",
255 |    "metadata": {},
256 |    "source": [
257 |     "### Seaching for a Building\n",
258 |     "\n",
259 |     "In this example, we search for a location using the query \"Bruno Business Library.\""
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 15,
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "data": {
269 |       "text/plain": [
270 |        "200"
271 |       ]
272 |      },
273 |      "execution_count": 15,
274 |      "metadata": {},
275 |      "output_type": "execute_result"
276 |     }
277 |    ],
278 |    "source": [
279 |     "endpoint = 'searchJSON'\n",
280 |     "params = {\n",
281 |     "    'q': 'Bruno Business Library',  # Search query\n",
282 |     "    'countryBias': 'US',            # Moves US results to the top of the results list\n",
283 |     "    'maxRows': 10,                  # Limit results to top 10\n",
284 |     "    'username': USERNAME            # Must include GeoNames username in all API calls\n",
285 |     "}\n",
286 |     "\n",
287 |     "response = requests.get(f'{BASE_URL}{endpoint}', params=params)\n",
288 |     "\n",
289 |     "# Status code 200 indicates success\n",
290 |     "response.status_code"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 16,
296 |    "metadata": {},
297 |    "outputs": [
298 |     {
299 |      "data": {
300 |       "text/plain": [
301 |        "{'adminCode1': 'AL',\n",
302 |        " 'lng': '-87.54925',\n",
303 |        " 'geonameId': 11524498,\n",
304 |        " 'toponymName': 'Angelo Bruno Business Library',\n",
305 |        " 'countryId': '6252001',\n",
306 |        " 'fcl': 'S',\n",
307 |        " 'population': 0,\n",
308 |        " 'countryCode': 'US',\n",
309 |        " 'name': 'Angelo Bruno Business Library',\n",
310 |        " 'fclName': 'spot, building, farm',\n",
311 |        " 'adminCodes1': {'ISO3166_2': 'AL'},\n",
312 |        " 'countryName': 'United States',\n",
313 |        " 'fcodeName': 'library',\n",
314 |        " 'adminName1': 'Alabama',\n",
315 |        " 'lat': '33.2111',\n",
316 |        " 'fcode': 'LIBR'}"
317 |       ]
318 |      },
319 |      "execution_count": 16,
320 |      "metadata": {},
321 |      "output_type": "execute_result"
322 |     }
323 |    ],
324 |    "source": [
325 |     "# Display top result\n",
326 |     "response.json()['geonames'][0]"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "### Searching for an Island\n",
334 |     "\n",
335 |     "In this example, we use the query \"Martha's Vineyard.\""
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 20,
341 |    "metadata": {},
342 |    "outputs": [
343 |     {
344 |      "data": {
345 |       "text/plain": [
346 |        "200"
347 |       ]
348 |      },
349 |      "execution_count": 20,
350 |      "metadata": {},
351 |      "output_type": "execute_result"
352 |     }
353 |    ],
354 |    "source": [
355 |     "endpoint = 'searchJSON'\n",
356 |     "params = {\n",
357 |     "    'q': \"Martha's Vineyard\",   # Search query\n",
358 |     "    'countryBias': 'US',        # Moves US results to the top of the results list\n",
359 |     "    'maxRows': 10,              # Limit results to top 10\n",
360 |     "    'username': USERNAME        # Must include GeoNames username in all API calls\n",
361 |     "}\n",
362 |     "\n",
363 |     "response = requests.get(f'{BASE_URL}{endpoint}', params=params)\n",
364 |     "\n",
365 |     "# Status code 200 indicates success\n",
366 |     "response.status_code"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 21,
372 |    "metadata": {},
373 |    "outputs": [
374 |     {
375 |      "data": {
376 |       "text/plain": [
377 |        "{'adminCode1': 'MA',\n",
378 |        " 'lng': '-70.61265',\n",
379 |        " 'geonameId': 4943237,\n",
380 |        " 'toponymName': \"Martha's Vineyard Airport\",\n",
381 |        " 'countryId': '6252001',\n",
382 |        " 'fcl': 'S',\n",
383 |        " 'population': 0,\n",
384 |        " 'countryCode': 'US',\n",
385 |        " 'name': \"Martha's Vineyard Airport\",\n",
386 |        " 'fclName': 'spot, building, farm',\n",
387 |        " 'adminCodes1': {'ISO3166_2': 'MA'},\n",
388 |        " 'countryName': 'United States',\n",
389 |        " 'fcodeName': 'airport',\n",
390 |        " 'adminName1': 'Massachusetts',\n",
391 |        " 'lat': '41.39016',\n",
392 |        " 'fcode': 'AIRP'}"
393 |       ]
394 |      },
395 |      "execution_count": 21,
396 |      "metadata": {},
397 |      "output_type": "execute_result"
398 |     }
399 |    ],
400 |    "source": [
401 |     "# Display top result\n",
402 |     "response.json()['geonames'][0]"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {},
408 |    "source": [
409 |     "Note that the result above is the data for Matha's Vineyard Airport. If we wish to find the data associated with the island, we can look at the `fcodeName` of the locations in the response:"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 22,
415 |    "metadata": {},
416 |    "outputs": [
417 |     {
418 |      "name": "stdout",
419 |      "output_type": "stream",
420 |      "text": [
421 |       "Martha's Vineyard Airport               airport\n",
422 |       "Martha's Vineyard Island                island\n",
423 |       "Vineyard Haven                          populated place\n",
424 |       "Martha's Vineyard Hospital              hospital\n",
425 |       "Martha's Vineyard Regional High School  school\n",
426 |       "Marthas Vineyard Campground             camp(s)\n",
427 |       "Martha's Vineyard Aero Light            \n",
428 |       "Martha's Vineyard State Forest          forest(s)\n",
429 |       "Martha's Vineyard Agricultural Society  vineyard\n",
430 |       "Martha's Vineyard State Forest          forest(s)\n"
431 |      ]
432 |     }
433 |    ],
434 |    "source": [
435 |     "for location in response.json()['geonames']:\n",
436 |     "    print(f'{location['toponymName']:<40}{location['fcodeName']}')"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "## 3. Reverse Geocoding\n",
444 |     "\n",
445 |     "The `findNearbyPostalCodesJSON` endpoint can be used to find the ZIP code of a pair of coordinates."
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": 25,
451 |    "metadata": {},
452 |    "outputs": [
453 |     {
454 |      "data": {
455 |       "text/plain": [
456 |        "200"
457 |       ]
458 |      },
459 |      "execution_count": 25,
460 |      "metadata": {},
461 |      "output_type": "execute_result"
462 |     }
463 |    ],
464 |    "source": [
465 |     "endpoint = 'findNearbyPostalCodesJSON'\n",
466 |     "params = {\n",
467 |     "    'lat': 38.625189,       # Search latitude\n",
468 |     "    'lng': -90.187330,      # Search longitude\n",
469 |     "    'maxRows': 10,          # Limit results to top 10\n",
470 |     "    'username': USERNAME    # Must include GeoNames username in all API calls\n",
471 |     "}\n",
472 |     "\n",
473 |     "response = requests.get(f'{BASE_URL}{endpoint}', params=params)\n",
474 |     "\n",
475 |     "# Status code 200 indicates success\n",
476 |     "response.status_code"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": 26,
482 |    "metadata": {},
483 |    "outputs": [
484 |     {
485 |      "name": "stdout",
486 |      "output_type": "stream",
487 |      "text": [
488 |       "ZIP   | Distance (km)\n",
489 |       "63102 | 0\n",
490 |       "63188 | 0.94603\n",
491 |       "63197 | 0.94603\n",
492 |       "63180 | 0.94603\n",
493 |       "63155 | 0.94603\n",
494 |       "63169 | 0.94603\n",
495 |       "63182 | 0.94603\n",
496 |       "63150 | 0.94603\n",
497 |       "63101 | 1.1038\n",
498 |       "62202 | 2.64737\n"
499 |      ]
500 |     }
501 |    ],
502 |    "source": [
503 |     "# Print 10 nearest ZIP codes\n",
504 |     "print('ZIP   | Distance (km)')\n",
505 |     "for zip in response.json()['postalCodes']:\n",
506 |     "    print(f'{zip['postalCode']} | {zip['distance']}')"
507 |    ]
508 |   }
509 |  ],
510 |  "metadata": {
511 |   "kernelspec": {
512 |    "display_name": "api_env",
513 |    "language": "python",
514 |    "name": "python3"
515 |   },
516 |   "language_info": {
517 |    "codemirror_mode": {
518 |     "name": "ipython",
519 |     "version": 3
520 |    },
521 |    "file_extension": ".py",
522 |    "mimetype": "text/x-python",
523 |    "name": "python",
524 |    "nbconvert_exporter": "python",
525 |    "pygments_lexer": "ipython3",
526 |    "version": "3.13.1"
527 |   },
528 |   "orig_nbformat": 4
529 |  },
530 |  "nbformat": 4,
531 |  "nbformat_minor": 2
532 | }
533 | 


--------------------------------------------------------------------------------
/src/python/imgs/APOD_Image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/python/imgs/APOD_Image.png


--------------------------------------------------------------------------------
/src/python/imgs/Earth_Image1_recent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/python/imgs/Earth_Image1_recent.png


--------------------------------------------------------------------------------
/src/python/imgs/Earth_Image_Stitched.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/python/imgs/Earth_Image_Stitched.png


--------------------------------------------------------------------------------
/src/python/sdirect.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "088987c6-311d-4677-9cc9-19ceeeb245b9",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# ScienceDirect API in Python\n",
  9 |     "\n",
 10 |     "by Vincent F. Scalfani\n",
 11 |     "\n",
 12 |     "**ScienceDirect**: https://www.sciencedirect.com/\n",
 13 |     "\n",
 14 |     "**Elsevier Developer Portal:** https://dev.elsevier.com/\n",
 15 |     "\n",
 16 |     "**ScienceDirect APIs Specification:** https://dev.elsevier.com/sd_api_spec.html\n",
 17 |     "\n",
 18 |     "**Elsevier How to Guide: Text Mining:** https://dev.elsevier.com/tecdoc_text_mining.html\n",
 19 |     "\n",
 20 |     "Please check with your institution for their Text and Data Mining Agreement with Elsevier.\n",
 21 |     "\n",
 22 |     "These recipe examples use the Elsevier ScienceDirect Article (Full-Text) API. This tutorial content is intended to help facillitate academic research. Before continuing or reusing any of this code, please be aware of Elsevier’s API policies and appropiate use-cases, as for example, Elsevier has detailed policies regarding [text and data mining of Elsevier full-text content](https://dev.elsevier.com/text_mining.html). If you have copyright or other related text and data mining questions, please contact The University of Alabama Libraries.\n",
 23 |     "\n",
 24 |     "*These recipe examples were tested on February 12, 2025.*"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "id": "f257ddd2-982a-4179-99c0-0b8d572ac57d",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Setup\n",
 33 |     "\n",
 34 |     "### Import Libraries"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 1,
 40 |    "id": "9eeb06c9-31ed-463d-a39d-d0207e68a336",
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "import requests\n",
 45 |     "from time import sleep"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "id": "61baba0a-06e0-4a83-bf96-da483ca02742",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "### Import API key\n",
 54 |     "\n",
 55 |     "As a good practice, do not display your API key in your computational notebook (to prevent accidental sharing). Save your API key to a separate python file, then import your key."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 2,
 61 |    "id": "1c7646f1-a700-4e00-a323-f2eea0e25768",
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from api_key import myAPIKey"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "id": "997bb204-db57-4730-addd-47abd59b67ef",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "### Identifier Note\n",
 74 |     "\n",
 75 |     "We will use DOIs as the article identifiers. See our Crossref and Scopus API tutorials for workflows on how to create lists of DOIs and identfiers for specific searches and journals. The Elsevier ScienceDirect Article (Full-Text) API also accepts other identifiers like Scopus IDs and PubMed IDs (see API specification documents linked above)."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "id": "5554fdf8-98cd-4bf5-bb44-f16e30b859c8",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "## 1. Retrieve full-text XML of an article"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 4,
 89 |    "id": "c5f36bf5-50bb-4617-b75f-ccc1c0ea1964",
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "# For XML download\n",
 94 |     "elsevier_url = \"https://api.elsevier.com/content/article/doi/\"\n",
 95 |     "doi1 = '10.1016/j.tetlet.2017.07.080' # Example Tetrahedron Letters article\n",
 96 |     "fulltext1 = requests.get(elsevier_url + doi1 + \"?APIKey=\" + myAPIKey + \"&httpAccept=text/xml\")\n",
 97 |     "\n",
 98 |     "# Save to file\n",
 99 |     "with open('fulltext1.xml', 'w', encoding='utf-8') as outfile:\n",
100 |     "    outfile.write(fulltext1.text)"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "id": "56c376b1-411c-4f1b-b117-dd006fd74181",
106 |    "metadata": {},
107 |    "source": [
108 |     "## 2. Retrieve plain text of an article"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "id": "e3dac0b9-41f5-4045-98d4-1c217c1ecd38",
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# For simplified text download\n",
119 |     "elsevier_url = \"https://api.elsevier.com/content/article/doi/\"\n",
120 |     "doi2 = '10.1016/j.tetlet.2022.153680' # example Tetrahedron Letters article\n",
121 |     "fulltext2 = requests.get(elsevier_url + doi2 + \"?APIKey=\" + myAPIKey + \"&httpAccept=text/plain\")\n",
122 |     "\n",
123 |     "# Save to file\n",
124 |     "with open('fulltext2.txt', 'w', encoding='utf-8') as outfile:\n",
125 |     "    outfile.write(fulltext2.text)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "id": "9bd40795-646f-4962-b11d-ca967c06e9cf",
131 |    "metadata": {},
132 |    "source": [
133 |     "## 3. Retrieve full-text in a loop"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 7,
139 |    "id": "2f21c309-903e-4e40-bd95-d206928c91e5",
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "# Make a list of 5 DOIs for testing\n",
144 |     "dois = ['10.1016/j.tetlet.2018.10.031',\n",
145 |     "        '10.1016/j.tetlet.2018.10.033',\n",
146 |     "        '10.1016/j.tetlet.2018.10.034',\n",
147 |     "        '10.1016/j.tetlet.2018.10.038',\n",
148 |     "        '10.1016/j.tetlet.2018.10.041']"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 8,
154 |    "id": "4be0c3e8-1931-450e-995e-06984b2218c1",
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "# Retrieve article full text for each DOI in a loop and save each article to a separate file.\n",
159 |     "# Example shown for plain text, XML also works (replace 'plain' with 'xml')\n",
160 |     "\n",
161 |     "elsevier_url = \"https://api.elsevier.com/content/article/doi/\"\n",
162 |     "for doi in dois:\n",
163 |     "    article = requests.get(elsevier_url + doi + \"?APIKey=\" + myAPIKey + \"&httpAccept=text/plain\")    \n",
164 |     "    doi_name = doi.replace('/','_') # Can't save files with a '/' character on Linux\n",
165 |     "    with open(doi_name + '_plain_text.txt', 'w', encoding='utf-8') as outfile:\n",
166 |     "        outfile.write(article.text)\n",
167 |     "    sleep(1) # Pause for 1 second between API calls"
168 |    ]
169 |   }
170 |  ],
171 |  "metadata": {
172 |   "kernelspec": {
173 |    "display_name": "Python 3",
174 |    "language": "python",
175 |    "name": "python3"
176 |   },
177 |   "language_info": {
178 |    "codemirror_mode": {
179 |     "name": "ipython",
180 |     "version": 3
181 |    },
182 |    "file_extension": ".py",
183 |    "mimetype": "text/x-python",
184 |    "name": "python",
185 |    "nbconvert_exporter": "python",
186 |    "pygments_lexer": "ipython3",
187 |    "version": "3.11.9"
188 |   }
189 |  },
190 |  "nbformat": 4,
191 |  "nbformat_minor": 5
192 | }
193 | 


--------------------------------------------------------------------------------
/src/python/springer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": false
  7 |    },
  8 |    "source": [
  9 |     "# Springer Nature API in Python\n",
 10 |     "\n",
 11 |     "by Avery Fernandez and Vincent F. Scalfani\n",
 12 |     "\n",
 13 |     "These recipe examples use the Springer Nature Open Access API to retrieve metadata and full-text content. About 1.5 million full-text are available: https://dev.springernature.com/docs/api-endpoints/open-access/\n",
 14 |     "\n",
 15 |     "An API key is required to access the Springer Nature API, sign up can be found at https://dev.springernature.com/\n",
 16 |     "\n",
 17 |     "Code was tested on October 13, 2023. This tutorial content is intended to help facillitate academic research. Please check with your institution for their Text and Data Mining Agreement with Springer Nature. Before continuing or reusing any of this code, be aware of the Springer Nature Text and Data Mining Policies, Terms and Conditions, Terms for API Users, and TDM reservation policies:\n",
 18 |     "\n",
 19 |     "* https://www.springernature.com/gp/researchers/text-and-data-mining\n",
 20 |     "* https://www.springernature.com/gp/legal/general-terms-of-use/15067848\n",
 21 |     "* https://dev.springernature.com/terms-conditions\n",
 22 |     "* https://dev.springernature.com/tdm-reservation-policy/\n"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Setup\n",
 30 |     "\n",
 31 |     "### Import Libraries"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 16,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "import requests\n",
 43 |     "from time import sleep\n",
 44 |     "from pprint import pprint"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "source": [
 53 |     "### Import API Key\n",
 54 |     "\n",
 55 |     "We store our API key in a separate file for easy access and security."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 17,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "from key import api_key"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {
 72 |     "collapsed": false
 73 |    },
 74 |    "source": [
 75 |     "## 1. Retrieve full-text JATS XML of an article"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "source": [
 84 |     "Before we can query, we must establish a few things:\n",
 85 |     "* **base_url**: The base url for the Springer API, more specifically the open access API with JATS format: https://jats.nlm.nih.gov/archiving/tag-library/1.1/index.html\n",
 86 |     "* **?q=doi:**: The query parameter, in this case we are searching for a DOI\n",
 87 |     "* **doi**: The DOI of the article\n",
 88 |     "* **openaccess:true**: This requests content through the openaccess API\n",
 89 |     "* **&api_key=**: This the text for the api key\n",
 90 |     "\n",
 91 |     "You can read more about the API parameters at https://dev.springernature.com/restfuloperations"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 18,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "base_url = 'https://api.springernature.com/openaccess/jats'\n",
103 |     "\n",
104 |     "# example DOI from SpringerOpen Brain Informatics\n",
105 |     "doi = '\"10.1007/s40708-014-0001-z\"' # doi must be wrapped in double quotes"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 20,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "<Response [200]>\n"
120 |      ]
121 |     }
122 |    ],
123 |    "source": [
124 |     "data = requests.get(f\"{base_url}?q=doi:{doi} openaccess:true&api_key={api_key}\")\n",
125 |     "pprint(data) # Response 200 means that the response was successful"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 21,
131 |    "metadata": {
132 |     "collapsed": false
133 |    },
134 |    "outputs": [],
135 |    "source": [
136 |     "# Save to a file\n",
137 |     "with open('fulltext.jats', 'w') as outfile:\n",
138 |     "    outfile.write(data.text)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "## 2. Retrieve full-text in a loop"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 22,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "# Examples from SprigerOpen Brain Informatics\n",
155 |     "\n",
156 |     "dois = [\n",
157 |     "    '\"10.1007/s40708-014-0001-z\"',\n",
158 |     "    '\"10.1007/s40708-014-0002-y\"',\n",
159 |     "    '\"10.1007/s40708-014-0003-x\"',\n",
160 |     "    '\"10.1007/s40708-014-0004-9\"',\n",
161 |     "    '\"10.1007/s40708-014-0005-8\"',\n",
162 |     "]"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 23,
168 |    "metadata": {
169 |     "collapsed": false
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "base_url = 'https://api.springernature.com/openaccess/jats'\n",
174 |     "for doi in dois:\n",
175 |     "    data = requests.get(f\"{base_url}?q=doi:{doi} openaccess:true&api_key={api_key}\")\n",
176 |     "    sleep(1) # add a delay.\n",
177 |     "    doi_name = doi.replace('/', '_').replace('\"', '') # remove / and \" from doi\n",
178 |     "    with open(f'{doi_name}_jats_text.jats', 'w') as outfile:\n",
179 |     "        outfile.write(data.text)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "## 3. Acquire and Parse Metadata\n",
187 |     "\n",
188 |     "We can also acquire only the metadata as JSON text."
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 24,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "base_url = 'https://api.springernature.com/openaccess/json'\n",
198 |     "doi = '\"10.1007/s40708-014-0001-z\"' # doi must be wrapped in double quotes\n",
199 |     "data = requests.get(f\"{base_url}?q=doi:{doi} openaccess:true&api_key={api_key}\").json()"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "We can now extract data out of `[\"records\"][0]`, where all the data is stored for the article"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 25,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "name": "stdout",
216 |      "output_type": "stream",
217 |      "text": [
218 |       "'This JSON was provided by Springer Nature'\n",
219 |       "'doi:\"10.1007/s40708-014-0001-z\" openaccess:true'\n",
220 |       "{'h1': 'Abstract',\n",
221 |       " 'p': 'Big data is the term for a collection of datasets so huge and complex '\n",
222 |       "      'that it becomes difficult to be processed using on-hand theoretical '\n",
223 |       "      'models and technique tools. Brain big data is one of the most typical, '\n",
224 |       "      'important big data collected using powerful equipments of functional '\n",
225 |       "      'magnetic resonance imaging, multichannel electroencephalography, '\n",
226 |       "      'magnetoencephalography, Positron emission tomography, near infrared '\n",
227 |       "      'spectroscopic imaging, as well as other various devices. Granular '\n",
228 |       "      'computing with multiple granular layers, referred to as multi-granular '\n",
229 |       "      'computing (MGrC) for short hereafter, is an emerging computing paradigm '\n",
230 |       "      'of information processing, which simulates the multi-granular '\n",
231 |       "      'intelligent thinking model of human brain. It concerns the processing '\n",
232 |       "      'of complex information entities called information granules, which '\n",
233 |       "      'arise in the process of data abstraction and derivation of information '\n",
234 |       "      'and even knowledge from data. This paper analyzes three basic '\n",
235 |       "      'mechanisms of MGrC, namely granularity optimization, granularity '\n",
236 |       "      'conversion, and multi-granularity joint computation, and discusses the '\n",
237 |       "      'potential of introducing MGrC into intelligent processing of brain big '\n",
238 |       "      'data.'}\n",
239 |       "'10.1007/s40708-014-0001-z'\n",
240 |       "'2014-09-06'\n",
241 |       "'2015-01-30'\n",
242 |       "'Brain Informatics'\n",
243 |       "'Granular computing with multiple granular layers for brain big data processing'\n"
244 |      ]
245 |     }
246 |    ],
247 |    "source": [
248 |     "# some examples:\n",
249 |     "pprint(data[\"apiMessage\"])\n",
250 |     "pprint(data[\"query\"])\n",
251 |     "pprint(data[\"records\"][0][\"abstract\"])\n",
252 |     "pprint(data[\"records\"][0][\"doi\"])\n",
253 |     "pprint(data[\"records\"][0][\"onlineDate\"])\n",
254 |     "pprint(data[\"records\"][0][\"printDate\"])\n",
255 |     "pprint(data[\"records\"][0][\"publicationName\"])\n",
256 |     "pprint(data[\"records\"][0][\"title\"])"
257 |    ]
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "kernelspec": {
262 |    "display_name": "Python 3",
263 |    "language": "python",
264 |    "name": "python3"
265 |   },
266 |   "language_info": {
267 |    "codemirror_mode": {
268 |     "name": "ipython",
269 |     "version": 3
270 |    },
271 |    "file_extension": ".py",
272 |    "mimetype": "text/x-python",
273 |    "name": "python",
274 |    "nbconvert_exporter": "python",
275 |    "pygments_lexer": "ipython3",
276 |    "version": "3.11.0"
277 |   }
278 |  },
279 |  "nbformat": 4,
280 |  "nbformat_minor": 0
281 | }
282 | 


--------------------------------------------------------------------------------
/src/python/us-census-geocoding.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# U.S. Census Geocoding API in Python\n",
  8 |     "\n",
  9 |     "by Michael T. Moen\n",
 10 |     "\n",
 11 |     "*This product uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.*\n",
 12 |     "\n",
 13 |     "Please see the following resources for more information on API usage:\n",
 14 |     "\n",
 15 |     "- Documentation\n",
 16 |     "    - <a href=\"https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.html\" target=\"_blank\">U.S. Census Geocoding API Documentation</a>\n",
 17 |     "- Terms of Use\n",
 18 |     "    - <a href=\"https://www.census.gov/data/developers/about/terms-of-service.html\" target=\"_blank\">U.S. Census Geocoding API Terms of Service</a>\n",
 19 |     "\n",
 20 |     "*These recipe examples were tested on March 7, 2025.*"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Setup\n",
 28 |     "\n",
 29 |     "The following external libraries need to be installed into your enviornment to run the code examples in this tutorial:\n",
 30 |     "\n",
 31 |     "- <a href=\"https://github.com/ipython/ipykernel\" target=\"_blank\">ipykernel</a>\n",
 32 |     "- <a href=\"https://github.com/psf/requests\" target=\"_blank\">requests</a>"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 1,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "import requests\n",
 42 |     "import csv\n",
 43 |     "from pprint import pprint"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "## 1. Address Lookup\n",
 51 |     "\n",
 52 |     "One of the main use cases of this API is finding the latitude and longitude of an address. In this example, we find the latitude and longitude of the Bruno Business Library at the University of Alabama.\n",
 53 |     "\n",
 54 |     "The API allows searching through two methods: `address` and `onelineaddress`. These methods are nearly identical, with the only difference being the format of the parameters passed to API.\n",
 55 |     "\n",
 56 |     "### Using `address` Search"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 3,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "data": {
 66 |       "text/plain": [
 67 |        "200"
 68 |       ]
 69 |      },
 70 |      "execution_count": 3,
 71 |      "metadata": {},
 72 |      "output_type": "execute_result"
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "BASE_URL = 'https://geocoding.geo.census.gov/geocoder/'\n",
 77 |     "return_type = 'locations'\n",
 78 |     "search_type = 'address'\n",
 79 |     "\n",
 80 |     "params = {\n",
 81 |     "    # Specify the address to lookup with the following parameters\n",
 82 |     "    'street': '425 Stadium Dr',\n",
 83 |     "    'city': 'Tuscaloosa',\n",
 84 |     "    'state': 'AL',\n",
 85 |     "    'zip': 35401,\n",
 86 |     "    # Specify the version of the locator to be searched\n",
 87 |     "    'benchmark': 'Public_AR_Current',\n",
 88 |     "    # Specify that data should be returned in JSON format\n",
 89 |     "    'format': 'json'\n",
 90 |     "}\n",
 91 |     "\n",
 92 |     "response = requests.get(f'{BASE_URL}{return_type}/{search_type}', params=params)\n",
 93 |     "\n",
 94 |     "# Status code of 200 indicates success\n",
 95 |     "response.status_code"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 4,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "{'result': {'input': {'address': {'zip': '35401',\n",
107 |        "    'city': 'Tuscaloosa',\n",
108 |        "    'street': '425 Stadium Dr',\n",
109 |        "    'state': 'AL'},\n",
110 |        "   'benchmark': {'isDefault': True,\n",
111 |        "    'benchmarkDescription': 'Public Address Ranges - Current Benchmark',\n",
112 |        "    'id': '4',\n",
113 |        "    'benchmarkName': 'Public_AR_Current'}},\n",
114 |        "  'addressMatches': [{'tigerLine': {'side': 'L', 'tigerLineId': '636109874'},\n",
115 |        "    'coordinates': {'x': -87.549700416257, 'y': 33.21105403378},\n",
116 |        "    'addressComponents': {'zip': '35401',\n",
117 |        "     'streetName': 'STADIUM',\n",
118 |        "     'preType': '',\n",
119 |        "     'city': 'TUSCALOOSA',\n",
120 |        "     'preDirection': '',\n",
121 |        "     'suffixDirection': '',\n",
122 |        "     'fromAddress': '401',\n",
123 |        "     'state': 'AL',\n",
124 |        "     'suffixType': 'DR',\n",
125 |        "     'toAddress': '499',\n",
126 |        "     'suffixQualifier': '',\n",
127 |        "     'preQualifier': ''},\n",
128 |        "    'matchedAddress': '425 STADIUM DR, TUSCALOOSA, AL, 35401'}]}}"
129 |       ]
130 |      },
131 |      "execution_count": 4,
132 |      "metadata": {},
133 |      "output_type": "execute_result"
134 |     }
135 |    ],
136 |    "source": [
137 |     "response.json()"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 5,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "(33.21105403378, -87.549700416257)"
149 |       ]
150 |      },
151 |      "execution_count": 5,
152 |      "metadata": {},
153 |      "output_type": "execute_result"
154 |     }
155 |    ],
156 |    "source": [
157 |     "latitude = response.json()['result']['addressMatches'][0]['coordinates']['y']\n",
158 |     "longitude = response.json()['result']['addressMatches'][0]['coordinates']['x']\n",
159 |     "\n",
160 |     "# Display coordinates\n",
161 |     "latitude, longitude"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "### Using `onelineaddress` Search"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 6,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/plain": [
179 |        "200"
180 |       ]
181 |      },
182 |      "execution_count": 6,
183 |      "metadata": {},
184 |      "output_type": "execute_result"
185 |     }
186 |    ],
187 |    "source": [
188 |     "return_type = 'locations'\n",
189 |     "search_type = 'onelineaddress'\n",
190 |     "\n",
191 |     "params = {\n",
192 |     "    # Specify the address to lookup with the parameters\n",
193 |     "    # Note that 'street' is required, and the other parameters are optional\n",
194 |     "    'address': '425 Stadium Dr, Tuscaloosa, AL 35401',\n",
195 |     "    # Specify the version of the locator to be searched\n",
196 |     "    'benchmark': 'Public_AR_Current',\n",
197 |     "    # Specify that data should be returned in JSON format\n",
198 |     "    'format': 'json'\n",
199 |     "}\n",
200 |     "\n",
201 |     "response = requests.get(f'{BASE_URL}{return_type}/{search_type}', params=params)\n",
202 |     "\n",
203 |     "# Status code of 200 indicates success\n",
204 |     "response.status_code"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 7,
210 |    "metadata": {},
211 |    "outputs": [
212 |     {
213 |      "data": {
214 |       "text/plain": [
215 |        "(33.21105403378, -87.549700416257)"
216 |       ]
217 |      },
218 |      "execution_count": 7,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "latitude = response.json()['result']['addressMatches'][0]['coordinates']['y']\n",
225 |     "longitude = response.json()['result']['addressMatches'][0]['coordinates']['x']\n",
226 |     "\n",
227 |     "# Display coordinates\n",
228 |     "latitude, longitude"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "## 2. Batch Address Lookup\n",
236 |     "\n",
237 |     "The U.S. Census Geocoding API also allows for batch geocoding with the submission of a CSV, TXT, DAT, XLS, or XLSX file. These files must be formatted with one record per line, where each record must be formatted as followed: Unique ID, Street address, City, State, ZIP. Users are limited to 10,000 records per batch file.\n",
238 |     "\n",
239 |     "This example uses the CSV file created below:"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 8,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "# Create list of addresses for the batch lookup\n",
249 |     "# Note that each record must begin with a unique ID\n",
250 |     "addresses = [\n",
251 |     "    ['1', '425 Stadium Dr', 'Tuscaloosa', 'AL', '35401'],\n",
252 |     "    ['2', '1600 Pennsylvania Avenue NW', 'Washington', 'DC', '20500'],\n",
253 |     "    ['3', '350 Fifth Avenue', 'New York', 'NY', '10118'],\n",
254 |     "    ['4', '660 Cannery Row', 'Monterey', 'CA', '93940'],\n",
255 |     "    ['5', '700 Clark Ave', 'St. Louis', 'MO', '63102']\n",
256 |     "]\n",
257 |     "\n",
258 |     "# Export addresses to a CSV file\n",
259 |     "input_filename = 'batch_addresses.csv'\n",
260 |     "with open(input_filename, 'w', newline='') as f:\n",
261 |     "    csv_writer = csv.writer(f)\n",
262 |     "    csv_writer.writerows(addresses)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 9,
268 |    "metadata": {},
269 |    "outputs": [
270 |     {
271 |      "data": {
272 |       "text/plain": [
273 |        "200"
274 |       ]
275 |      },
276 |      "execution_count": 9,
277 |      "metadata": {},
278 |      "output_type": "execute_result"
279 |     }
280 |    ],
281 |    "source": [
282 |     "# Format parameters needed for POST request\n",
283 |     "return_type = 'locations'\n",
284 |     "params = {\n",
285 |     "    'benchmark' : 'Public_AR_Current'\n",
286 |     "}\n",
287 |     "files = {\n",
288 |     "    'addressFile': open(input_filename, \"rb\")\n",
289 |     "}\n",
290 |     "\n",
291 |     "url = f'https://geocoding.geo.census.gov/geocoder/{return_type}/addressbatch'\n",
292 |     "response = requests.post(url, data=params, files=files)\n",
293 |     "\n",
294 |     "# Status code of 200 indicates success\n",
295 |     "response.status_code"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": 10,
301 |    "metadata": {},
302 |    "outputs": [
303 |     {
304 |      "name": "stdout",
305 |      "output_type": "stream",
306 |      "text": [
307 |       "['1', '425 Stadium Dr, Tuscaloosa, AL, 35401', 'Match', 'Exact', '425 STADIUM DR, TUSCALOOSA, AL, 35401', '-87.549700416257,33.211054033781', '636109874', 'L']\n",
308 |       "['2', '1600 Pennsylvania Avenue NW, Washington, DC, 20500', 'Match', 'Exact', '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20500', '-77.036543957308,38.898690918656', '76225813', 'L']\n",
309 |       "['3', '350 Fifth Avenue, New York, NY, 10118', 'Match', 'Exact', '350 5TH AVE, NEW YORK, NY, 10118', '-73.985077152891,40.747848600317', '59653473', 'L']\n",
310 |       "['4', '660 Cannery Row, Monterey, CA, 93940', 'Match', 'Exact', '660 CANNERY ROW, MONTEREY, CA, 93940', '-121.901280304574,36.617235842516', '647390330', 'R']\n",
311 |       "['5', '700 Clark Ave, St. Louis, MO, 63102', 'Match', 'Non_Exact', '700 CLARK AVE, SAINT LOUIS, MO, 63119', '-90.340369438036,38.602422417149', '100141071', 'R']\n"
312 |      ]
313 |     }
314 |    ],
315 |    "source": [
316 |     "# Save content of response to a new CSV\n",
317 |     "output_filename = 'geocoded_addresses.csv'\n",
318 |     "with open(output_filename, 'wb') as f:\n",
319 |     "    f.write(response.content)\n",
320 |     "\n",
321 |     "# Printing contents of CSV for demonstation purposes\n",
322 |     "with open(output_filename, newline='') as f:\n",
323 |     "    csv_reader = csv.reader(f)\n",
324 |     "    for row in csv_reader:\n",
325 |     "        print(row)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "metadata": {},
331 |    "source": [
332 |     "Note that the last two columns of the above data are the TIGER/Line ID and TIGER/Line Side. For more information on these values, please see the <a href=\"https://www.census.gov/programs-surveys/geography/technical-documentation/complete-technical-documentation/tiger-geodatabase-file.html\" target=\"_blank\">U.S. Census TIGER/Line Geodatabase Documentation</a>. However, this tutorial does not utilize any TIGER/Line data."
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "markdown",
337 |    "metadata": {},
338 |    "source": [
339 |     "## 3. Retrieving Additional Geographic Data\n",
340 |     "\n",
341 |     "The `geographies` return type allows for the retrieval of additional data associated for a given address or set of coordinates. The example below retrieves this data using the address of the Bruno Business Library at the University of Alabama.\n",
342 |     "\n",
343 |     "Note that the `geographies` return type requires the `vintage` parameter to be specified.\n",
344 |     "\n",
345 |     "Users may additionally include the `layers` parameter, which determines the types of geography data returned. For a list of all layers, see <a href=\"https://tigerweb.geo.census.gov/arcgis/rest/services/TIGERweb/tigerWMS_Current/MapServer\" target=\"_blank\">here</a>."
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": 15,
351 |    "metadata": {},
352 |    "outputs": [
353 |     {
354 |      "data": {
355 |       "text/plain": [
356 |        "200"
357 |       ]
358 |      },
359 |      "execution_count": 15,
360 |      "metadata": {},
361 |      "output_type": "execute_result"
362 |     }
363 |    ],
364 |    "source": [
365 |     "return_type = 'geographies'\n",
366 |     "search_type = 'address'\n",
367 |     "\n",
368 |     "params = {\n",
369 |     "    # Specify the address to lookup with the following parameters\n",
370 |     "    'street': '425 Stadium Dr',\n",
371 |     "    'city': 'Tuscaloosa',\n",
372 |     "    'state': 'AL',\n",
373 |     "    'zip': 35401,\n",
374 |     "    # Specify the version of the locator to be searched\n",
375 |     "    'benchmark': 'Public_AR_Current',\n",
376 |     "    # Specify the vintage\n",
377 |     "    'vintage': 'Current_Current',\n",
378 |     "    # Specify what categories of geographic data to retrieve\n",
379 |     "    'layers': 'all',\n",
380 |     "    # Specify that data should be returned in JSON format\n",
381 |     "    'format': 'json'\n",
382 |     "}\n",
383 |     "\n",
384 |     "response = requests.get(f'{BASE_URL}{return_type}/{search_type}', params=params)\n",
385 |     "\n",
386 |     "# Status code of 200 indicates success\n",
387 |     "response.status_code"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {},
393 |    "source": [
394 |     "Note that the `geographies` return type returns all of the data that the `locations` return type does in addition to the geographies data."
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": 16,
400 |    "metadata": {},
401 |    "outputs": [
402 |     {
403 |      "name": "stdout",
404 |      "output_type": "stream",
405 |      "text": [
406 |       "{'addressComponents': {...},\n",
407 |       " 'coordinates': {...},\n",
408 |       " 'geographies': {...},\n",
409 |       " 'matchedAddress': '425 STADIUM DR, TUSCALOOSA, AL, 35401',\n",
410 |       " 'tigerLine': {...}}\n"
411 |      ]
412 |     }
413 |    ],
414 |    "source": [
415 |     "pprint(response.json()['result']['addressMatches'][0], depth=1)"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "markdown",
420 |    "metadata": {},
421 |    "source": [
422 |     "The geographies data contains the following categories:"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "code",
427 |    "execution_count": 17,
428 |    "metadata": {},
429 |    "outputs": [
430 |     {
431 |      "name": "stdout",
432 |      "output_type": "stream",
433 |      "text": [
434 |       "{'119th Congressional Districts': [...],\n",
435 |       " '2020 Census Blocks': [...],\n",
436 |       " '2020 Census Public Use Microdata Areas': [...],\n",
437 |       " '2020 Census ZIP Code Tabulation Areas': [...],\n",
438 |       " '2024 State Legislative Districts - Lower': [...],\n",
439 |       " '2024 State Legislative Districts - Upper': [...],\n",
440 |       " 'Census Block Groups': [...],\n",
441 |       " 'Census Divisions': [...],\n",
442 |       " 'Census Regions': [...],\n",
443 |       " 'Census Tracts': [...],\n",
444 |       " 'Counties': [...],\n",
445 |       " 'County Subdivisions': [...],\n",
446 |       " 'Incorporated Places': [...],\n",
447 |       " 'Metropolitan Statistical Areas': [...],\n",
448 |       " 'States': [...],\n",
449 |       " 'Unified School Districts': [...],\n",
450 |       " 'Urban Areas': [...]}\n"
451 |      ]
452 |     }
453 |    ],
454 |    "source": [
455 |     "pprint(response.json()['result']['addressMatches'][0]['geographies'], depth=1)"
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "markdown",
460 |    "metadata": {},
461 |    "source": [
462 |     "As an example, this is how the Counties data is formatted."
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "code",
467 |    "execution_count": 18,
468 |    "metadata": {
469 |     "tags": []
470 |    },
471 |    "outputs": [
472 |     {
473 |      "data": {
474 |       "text/plain": [
475 |        "[{'GEOID': '01125',\n",
476 |        "  'CENTLAT': '+33.2894031',\n",
477 |        "  'AREAWATER': '78666216',\n",
478 |        "  'STATE': '01',\n",
479 |        "  'BASENAME': 'Tuscaloosa',\n",
480 |        "  'OID': '2759075608325',\n",
481 |        "  'LSADC': '06',\n",
482 |        "  'FUNCSTAT': 'A',\n",
483 |        "  'INTPTLAT': '+33.2902197',\n",
484 |        "  'NAME': 'Tuscaloosa County',\n",
485 |        "  'OBJECTID': 3113,\n",
486 |        "  'CENTLON': '-087.5250366',\n",
487 |        "  'COUNTYCC': 'H1',\n",
488 |        "  'COUNTYNS': '00161588',\n",
489 |        "  'AREALAND': '3421017287',\n",
490 |        "  'INTPTLON': '-087.5227834',\n",
491 |        "  'MTFCC': 'G4020',\n",
492 |        "  'COUNTY': '125'}]"
493 |       ]
494 |      },
495 |      "execution_count": 18,
496 |      "metadata": {},
497 |      "output_type": "execute_result"
498 |     }
499 |    ],
500 |    "source": [
501 |     "response.json()['result']['addressMatches'][0]['geographies']['Counties']"
502 |    ]
503 |   }
504 |  ],
505 |  "metadata": {
506 |   "kernelspec": {
507 |    "display_name": "api_env",
508 |    "language": "python",
509 |    "name": "python3"
510 |   },
511 |   "language_info": {
512 |    "codemirror_mode": {
513 |     "name": "ipython",
514 |     "version": 3
515 |    },
516 |    "file_extension": ".py",
517 |    "mimetype": "text/x-python",
518 |    "name": "python",
519 |    "nbconvert_exporter": "python",
520 |    "pygments_lexer": "ipython3",
521 |    "version": "3.13.1"
522 |   },
523 |   "orig_nbformat": 4
524 |  },
525 |  "nbformat": 4,
526 |  "nbformat_minor": 2
527 | }
528 | 


--------------------------------------------------------------------------------
/src/python/wiley-tdm.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Wiley Text and Data Mining (TDM) in Python\n",
  8 |     "\n",
  9 |     "by Michael T. Moen\n",
 10 |     "\n",
 11 |     "The Wiley Text and Data Mining (TDM) API allows users to retrieve the full-text articles of subscribed Wiley content in PDF form. TDM use is for non-commercial scholarly research, see terms and restrictions in below links.\n",
 12 |     "\n",
 13 |     "*This tutorial content is intended to help facilitate academic research. Please check your institution for their Text and Data Mining or related License Agreement with Wiley.*\n",
 14 |     "\n",
 15 |     "Please see the following resources for more information on API usage:\n",
 16 |     "\n",
 17 |     "- Documentation\n",
 18 |     "    - <a href=\"https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining\" target=\"_blank\">Wiley Text and Data Mining</a>\n",
 19 |     "- Terms\n",
 20 |     "    - <a href=\"https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining#accordionHeader-3\" target=\"_blank\">Wiley Text and Data Mining Agreement</a>\n",
 21 |     "- Data Reuse\n",
 22 |     "    - <a href=\"https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining#accordionHeader-3\" target=\"_blank\">Wiley TDM Data Reuse</a> (see sections 4 and 5 of Text and Data Mining Agreement)\n",
 23 |     "\n",
 24 |     "*These recipe examples were tested on April 4, 2025.*\n",
 25 |     "\n",
 26 |     "**_NOTE:_** The Wiley TDM API limits requests to a maximum of 3 requests per second."
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "## Setup\n",
 34 |     "\n",
 35 |     "### Import Libraries\n",
 36 |     "\n",
 37 |     "The following external libraries need to be installed into your enviornment to run the code examples in this tutorial:\n",
 38 |     "\n",
 39 |     "- <a href=\"https://github.com/ipython/ipykernel\" target=\"_blank\">ipykernel</a>\n",
 40 |     "- <a href=\"https://github.com/psf/requests\" target=\"_blank\">requests</a>\n",
 41 |     "- <a href=\"https://github.com/theskumar/python-dotenv\" target=\"_blank\">python-dotenv</a>\n",
 42 |     "\n",
 43 |     "We import the libraries used in this tutorial below:"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 1,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "import os\n",
 53 |     "import requests\n",
 54 |     "from time import sleep\n",
 55 |     "from dotenv import load_dotenv"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "### Import Text and Data Mining Token\n",
 63 |     "\n",
 64 |     "An token is required for text and data mining with Wiley. You can sign up for one <a href=\"https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining#accordionHeader-2\" target=\"_blank\">here</a>.\n",
 65 |     "\n",
 66 |     "We keep our token in a `.env` file and use the `dotenv` library to access it. If you would like to use this method, create a `.env` file and add the following line to it:\n",
 67 |     "\n",
 68 |     "```text\n",
 69 |     "WILEY_TDM_TOKEN=PUT_YOUR_TOKEN_HERE\n",
 70 |     "```"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "load_dotenv()\n",
 80 |     "try:\n",
 81 |     "    WILEY_TDM_TOKEN = os.environ[\"WILEY_TDM_TOKEN\"]\n",
 82 |     "except KeyError:\n",
 83 |     "    print(\"Token not found. Please set 'WILEY_TDM_TOKEN' in your .env file.\")"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "## 1. Retrieve Full-Text of an Article\n",
 91 |     "\n",
 92 |     "The Wiley TDM API returns the full-text of an article as a PDF when given the article's DOI.\n",
 93 |     "\n",
 94 |     "In the first example, we download the full-text of the article with the DOI \"10.1002/net.22207\". This article was found on the Wiley Online Library."
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "10.1002_net.22207.pdf downloaded successfully\n"
107 |      ]
108 |     }
109 |    ],
110 |    "source": [
111 |     "# DOI of article to download\n",
112 |     "doi = '10.1002/net.22207'\n",
113 |     "url = f'https://api.wiley.com/onlinelibrary/tdm/v1/articles/{doi}'\n",
114 |     "headers = {\n",
115 |     "    \"Wiley-TDM-Client-Token\": WILEY_TDM_TOKEN\n",
116 |     "}\n",
117 |     "response = requests.get(url, headers=headers)\n",
118 |     "\n",
119 |     "# Download PDF if status code indicates success\n",
120 |     "if response.status_code == 200:\n",
121 |     "    filename = f'{doi.replace('/', '_')}.pdf'\n",
122 |     "    with open(filename, 'wb') as file:\n",
123 |     "        file.write(response.content)\n",
124 |     "    print(f'{filename} downloaded successfully')\n",
125 |     "else:\n",
126 |     "    print(f'Failed to download PDF. Status code: {response.status_code}')"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "## 2. Retrieve Full-Text of Multiple Articles\n",
134 |     "\n",
135 |     "In this example, we download 5 articles found in the Wiley Online Library:"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "name": "stdout",
145 |      "output_type": "stream",
146 |      "text": [
147 |       "10.1111_j.1467-8624.2010.01564.x.pdf downloaded successfully\n",
148 |       "10.1111_1467-8624.00164.pdf downloaded successfully\n",
149 |       "10.1111_cdev.12864.pdf downloaded successfully\n",
150 |       "10.1111_j.1467-8624.2007.00995.x.pdf downloaded successfully\n",
151 |       "10.1111_j.1467-8624.2010.01499.x.pdf downloaded successfully\n",
152 |       "Failed to download PDF for 10.1111/j.1467-8624.2010.0149.x. Status code: 404\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "# DOIs of articles to download\n",
158 |     "dois = [\n",
159 |     "    '10.1111/j.1467-8624.2010.01564.x',\n",
160 |     "    '10.1111/1467-8624.00164',\n",
161 |     "    '10.1111/cdev.12864',\n",
162 |     "    '10.1111/j.1467-8624.2007.00995.x',\n",
163 |     "    '10.1111/j.1467-8624.2010.01499.x',\n",
164 |     "    '10.1111/j.1467-8624.2010.0149.x'   # Invalid DOI, will throw error\n",
165 |     "]\n",
166 |     "\n",
167 |     "# Send an HTTP request for each DOI\n",
168 |     "for doi in dois:\n",
169 |     "    url = f'https://api.wiley.com/onlinelibrary/tdm/v1/articles/{doi}'\n",
170 |     "    response = requests.get(url, headers=headers)\n",
171 |     "\n",
172 |     "    # Download PDF if status code indicates success\n",
173 |     "    if response.status_code == 200:\n",
174 |     "        filename = f'{doi.replace('/', '_')}.pdf'\n",
175 |     "        with open(filename, 'wb') as file:\n",
176 |     "            file.write(response.content)\n",
177 |     "        print(f'{filename} downloaded successfully')\n",
178 |     "    else:\n",
179 |     "        print(f'Failed to download PDF for {doi.replace('%2f', '/')}.')\n",
180 |     "        print(f'Status code: {response.status_code}')\n",
181 |     "    sleep(1)    # Wait 1 second to be nice on Wiley's servers"
182 |    ]
183 |   }
184 |  ],
185 |  "metadata": {
186 |   "kernelspec": {
187 |    "display_name": "api_env",
188 |    "language": "python",
189 |    "name": "python3"
190 |   },
191 |   "language_info": {
192 |    "codemirror_mode": {
193 |     "name": "ipython",
194 |     "version": 3
195 |    },
196 |    "file_extension": ".py",
197 |    "mimetype": "text/x-python",
198 |    "name": "python",
199 |    "nbconvert_exporter": "python",
200 |    "pygments_lexer": "ipython3",
201 |    "version": "3.13.2"
202 |   },
203 |   "orig_nbformat": 4
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 2
207 | }
208 | 


--------------------------------------------------------------------------------
/src/r-gpl3/PubMedAPItut_files/figure-html/visual-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/PubMedAPItut_files/figure-html/visual-1.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-2.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-3.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-4.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-10-5.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-2.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-3.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-4.png


--------------------------------------------------------------------------------
/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/R_Pubchem_Markdown_Adam_Edit_files/figure-html/unnamed-chunk-18-5.png


--------------------------------------------------------------------------------
/src/r-gpl3/US_Census_Data_in_R_files/figure-html/plot-popchg-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r-gpl3/US_Census_Data_in_R_files/figure-html/plot-popchg-1.png


--------------------------------------------------------------------------------
/src/r-gpl3/pubmed.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: \...in R
  3 | output:
  4 |   html_document:
  5 |     keep_md: TRUE
  6 | ---
  7 | 
  8 | # PubMed API in R
  9 | 
 10 | by Adam M. Nguyen    
 11 | 
 12 | The recipe examples were tested on Mar 24, 2023
 13 | 
 14 | The easyPubMed R package provides programmatic access to PubMed data, enabling researchers to search, retrieve, and analyze biomedical literature efficiently.
 15 | 
 16 | ### API Resources
 17 | 
 18 | - **Documentation**
 19 |   - [Getting Started with easyPubMed Article](https://cran.r-project.org/web/packages/easyPubMed/vignettes/getting_started_with_easyPubMed.html)
 20 |   - [easyPubMed PDF Documentation](https://cran.r-project.org/web/packages/easyPubMed/easyPubMed.pdf)
 21 | 
 22 | - **Tutorial License**
 23 |   - This tutorial uses the **easyPubMed** R library which is licensed as **GPL-3**.  
 24 |   - As a result, this tutorial code is also licensed as **GPL-3**:  
 25 |     [License Details](https://github.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/blob/main/LICENSE_selected_R_tutorials)
 26 | 
 27 | 
 28 | ## Setup
 29 | 
 30 | First let's install the easyPubMed package as well as load the library.
 31 | If you do not already have the package installed, run the following command in your console: "install.packages("easyPubMed", repos = "http://cran.us.r-project.org")".
 32 | 
 33 | ```r
 34 | # easyPubMed library for accessing PubMed API 
 35 | library(easyPubMed)
 36 | ```
 37 | 
 38 | ## 1. Querying PubMed API   
 39 | 
 40 | Below is an example query utilizing some valuable functions provided by the easyPubMed library as well as some important information for the API, if unfamiliar.   
 41 | 
 42 | These functions include:
 43 | 
 44 | * get_pubmed_ids()
 45 | 
 46 | * fetch_pubmed_data() 
 47 | 
 48 | **Note:** PubMed employs field tags to specify the nature of the associated string, for a comprehensive list of field tags visit: "https://pubmed.ncbi.nlm.nih.gov/help/#using-search-field-tags". Additionally, using PubMed tags will limit your search to the specified terms only. While querying PubMed, using the "get_pubmed_ids", it is allowable to provide no tags and the function will translate it for the user.
 49 | 
 50 | Let's try querying Pubmed! Check comments for additional, step-by-step detail.
 51 | 
 52 | 
 53 | ```r
 54 | example_query <- 'Ancestral population genomics using coalescence hidden Markov models and heuristic optimisation algorithms.[Title]' #State query in the format 'query[query tag]', can include AND and OR statements and a query tag is not required
 55 | example_id <- get_pubmed_ids(example_query) #Stores a list of PMIDs(PubMed Identifications) satisfying the query
 56 | 
 57 | example_id$IdList$Id
 58 | ```
 59 | 
 60 | ```
 61 | ## [1] "25819138"
 62 | ```
 63 | 
 64 | ```r
 65 | example_xml <-fetch_pubmed_data(example_id, format = "xml") # Create xml output
 66 | ```
 67 | Now we have successfully queried and stored the data of 'Ancestral population genomics using coalescence hidden Markov models and heuristic optimisation algorithms.[Title]' into the xml output. We will find that working with the xml output is advantageous due to its hierarchical structure.   
 68 | 
 69 | Next we will show an example of how we can obtain a list of authors from this query using simple R functions and the 'custom_grep()' function from easyPubMed.   
 70 | 
 71 | * custom_grep() retrieves data between the tags given   
 72 | 
 73 | 
 74 | ```r
 75 | last_name_authors <- custom_grep(example_xml, "LastName", "/LastName") # retrieve last name
 76 | forename_authors <- custom_grep(example_xml, "ForeName", "/ForeName") # retrieve forename
 77 | 
 78 | example_authors <- rbind('Last Name'=last_name_authors, 'Forename'=forename_authors) # output example_authors dataframe for PMID 27933103
 79 | example_authors
 80 | ```
 81 | 
 82 | ```
 83 | ##           [,1]      [,2]     
 84 | ## Last Name "Cheng"   "Mailund"
 85 | ## Forename  "Jade Yu" "Thomas"
 86 | ```
 87 | 
 88 | 
 89 | ## 2. Querying for Multiple Sources
 90 | 
 91 | Another convenience of using easyPubMed is whether requesting data from one article or multiple, it is the same process. The only change that must be made is changing the query, whether that be multiple PMIDs or an Author's name, as seen in the example below.
 92 | 
 93 | 
 94 | ```r
 95 | multi_example_query <- 'Vincent Scalfani[AU]' # All we need to change here is simply making more general query requests to PubMed.
 96 | multi_example_id <- get_pubmed_ids(multi_example_query) #Stores a list of PMIDs satisfying the query
 97 | 
 98 | multi_example_xml <-fetch_pubmed_data(multi_example_id, format = "xml") # XML format
 99 | 
100 | # To understand the structure of the XML output, try running the following line without the pound sign, i.e. uncomment
101 | # multi_example_xml
102 | 
103 | # In the XML format we find Journal Titles to be between "Title" and "/Title"
104 | journals <- custom_grep(multi_example_xml, "Title", "/Title") #Retrieve Journal Titles
105 | ```
106 | 
107 | Similar to the previous example, now we have retrieved a list of Journal Titles Dr. Scalfani has published under from the articles available on PubMed.
108 | 
109 | 
110 | ```r
111 | journals
112 | ```
113 | 
114 | ```
115 | ## [1] "Journal of cheminformatics" "Journal of cheminformatics"
116 | ## [3] "Science (New York, N.Y.)"   "ACS macro letters"
117 | ```
118 | ## 3. Looping Through a List of PMIDs
119 | 
120 | In some use cases, a user may be interested in looping through a list of IDs to query data. Below we will show how one can do this.
121 | 
122 | First, create an example list of PubMed IDs:
123 | 
124 | ```r
125 | pmids = as.list(c(34813985, 34813932, 34813684, 34813661, 34813372, 34813140, 34813072))
126 | ```
127 | 
128 | Next, let's begin querying through a for loop. Essentially what is happening is similar to in previous examples, but we are using the for command to iterate over each element of our 'pmid' list and then appending the results to our 'Titles' list.
129 | 
130 | 
131 | ```r
132 | # Creates empty list of titles
133 | Titles <- c()
134 | # Iterate through each listed pmid, retrieve XML formatted info, and retrieve list of Titles
135 | for (i in pmids) {
136 |   join <- paste(i, '[pmid]') #join each element with [pmid] to specify
137 |   id <- get_pubmed_ids(join)
138 |   xml <- fetch_pubmed_data(id, format = "xml")
139 |   Titles<-append(Titles,custom_grep(xml, "ArticleTitle", "/ArticleTitle"))
140 |   Sys.sleep(1)
141 | }
142 | # Display list of titles
143 | Titles 
144 | ```
145 | 
146 | ```
147 | ## [1] "Mutation in RyR2-FKBP Binding site alters Ca<sup>2+</sup> signaling modestly but increases \"arrhythmogenesis\" in human stem cells derived cardiomyocytes."
148 | ## [2] "M-CDC: Magnetic pull-down-assisted colorimetric method based on the CRISPR/Cas12a system."                                                                  
149 | ## [3] "Naturally occurring UBIAD1 mutations differentially affect menaquinone biosynthesis and vitamin K-dependent carboxylation."                                 
150 | ## [4] "Efficient visual screening of CRISPR/Cas9 genome editing in the nematode Pristionchus pacificus."                                                           
151 | ## [5] "Base Editing of Somatic Cells Using CRISPR-Cas9 in <i>Drosophila</i>."                                                                                      
152 | ## [6] "Mammalian Chemical Genomics towards Identifying Targets and Elucidating Modes-of-Action of Bioactive Compounds."                                            
153 | ## [7] "CRISPR-Cas9 Editing of the Synthesis of Biodegradable Polyesters Polyhydroxyalkanaotes (PHA) in Pseudomonas putida KT2440."
154 | ```
155 | ## 4. PubMed API Metadata Visualization
156 | 
157 | In this example we're going to show how a user can create a visualization using the PubMed API, specifically a histogram to visualize publishing frequency of the query 'hydrogel drug AND clinical trial[pt]'.
158 | 
159 | 
160 | ```r
161 | # Libraries for creating network visual
162 | visual_query <- 'hydrogel drug AND clinical trial[pt]' # Example Query
163 | 
164 | visual_id <- get_pubmed_ids(visual_query) #Stores a list of PMIDs satisfying the query
165 | 
166 | visual_xml <-fetch_pubmed_data(visual_id, format = "xml") # XML format
167 | 
168 | Year<- custom_grep(visual_xml, 'Year','/Year') #Retrieve Publication Years
169 | head(Year, n=10) #Display first 10 instances of the Year list
170 | ```
171 | 
172 | ```
173 | ##  [1] "2022" "2023" "2022" "2022" "2022" "2022" "2022" "2022" "2022" "2022"
174 | ```
175 | 
176 | ```r
177 | hist(as.numeric(Year),main= 'Freq. of Publications from PubMed "hydrogel drug AND clinical trial[pt]"',xlab='Year', breaks=40, col = 'plum') # Use base R function hist() to plot
178 | ```
179 | 
180 | ![](PubMedAPItut_files/figure-html/visual-1.png)<!-- -->
181 | 


--------------------------------------------------------------------------------
/src/r-gpl3/us-census.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: \...in R
  3 | output: 
  4 |   html_document:
  5 |     keep_md: true
  6 | ---
  7 | 
  8 | # U.S. Census Data API in R
  9 | 
 10 | by Adam M. Nguyen
 11 | 
 12 | These recipe examples were tested on March 24, 2023.
 13 | 
 14 | - **Documentation**
 15 |   - [censusapi Package Documentation (PDF)](https://cran.r-project.org/web/packages/censusapi/censusapi.pdf)
 16 |   - [U.S. Census API Documentation](https://www.census.gov/data/developers/about.html)
 17 |   - [U.S. Census Data Discovery Tool](https://api.census.gov/data.html)
 18 | 
 19 | - **Terms**
 20 |   - See also the [U.S. Census API Terms of Service](https://www.census.gov/data/developers/about.html#terms)
 21 | 
 22 | - **Attribution**
 23 |   - This tutorial uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
 24 | 
 25 | - **Tutorial License**
 26 |   - This tutorial uses the **censusapi** R library which is licensed as **GPL-3**.  
 27 |   - As a result, this tutorial code is also licensed as **GPL-3**:  
 28 |     [License Details](https://github.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/blob/main/LICENSE_selected_R_tutorials)
 29 | 
 30 | ## Setup
 31 | 
 32 | ### API Key Information   
 33 | 
 34 | While an API key is not required to use the U.S. Census Data API, you may consider registering for an API key as the API is limited to 500 calls a day without a key. Sign up can be found here: https://api.census.gov/data/key_signup.html.    
 35 | 
 36 | If using this code, make sure to access your key below.
 37 | 
 38 | Here we use 'Sys.getenv()' to retrieve our API key from the environment variables. You can either do this by creating an [.Renviron file and storing your API Key](https://docs.posit.co/how-to-guides/pre-tasks/api-keys-renv/) or simply replacing "Sys.getenv('USCensusAPIKey')" with your API Key.
 39 | 
 40 | ```r
 41 | # Access .Renviron to get PubMed API Key
 42 | user_key = Sys.getenv('USCensusAPIKey')#use Sys.getenv() to access .Renviron
 43 | ```
 44 | 
 45 | ### Setup censusapi Package
 46 | The package, censusapi, allows users to easily access U.S. Census data and metadata, including datasets such as the Decennial Census, American Community Survey, Small Area Health Insurance Estimates, Small Area Income and Poverty Estimates, Population Estimates and Projections, and more. In this tutorial, we will be using this censusapi.
 47 | 
 48 | If you haven't already, run "install.packages('censusapi')" in your R Console to install the US Census API package we will be using for this tutorial.
 49 | 
 50 | First let us set up the required library, "censusapi".
 51 | 
 52 | ```r
 53 | library(censusapi) # Access censusapi library
 54 | ```
 55 | ## 1. Get Population Estimates of Counties by State    
 56 | 
 57 | Our primary means of accessing the U.S. Census API will be through the function "getCensus". In this example we give specific comments as to each line of code that should clarify each line.
 58 | 
 59 | In the following example we use arguments including 'name' and 'vars', to access comprehensive lists of each see the censusapi documentation located at the top of the article for further documentation on the functions 'listCensusApis()' and 'makeVarlist()'.
 60 | 
 61 | ```r
 62 | your_state_code = '01' # Alabama FIPS Code
 63 | 
 64 | # Retrieve county population estimates by state
 65 | 
 66 | pop_estimates <- getCensus(name = "acs/acs5/subject", #The programmatic name of your dataset,See 'listCensusApis()' for options
 67 |                            vars = c("NAME", "S0101_C01_001E"), #list of variables to get
 68 |                            region = "county:*", #geography to get
 69 |                            vintage = "2021",#year
 70 |                            key=user_key#API key
 71 |                            )
 72 | head(pop_estimates,n=10) #Display first entries of 'pop_estimates'
 73 | ```
 74 | 
 75 | ```
 76 | ##    state county                     NAME S0101_C01_001E
 77 | ## 1     01    001  Autauga County, Alabama          58239
 78 | ## 2     01    003  Baldwin County, Alabama         227131
 79 | ## 3     01    005  Barbour County, Alabama          25259
 80 | ## 4     01    007     Bibb County, Alabama          22412
 81 | ## 5     01    009   Blount County, Alabama          58884
 82 | ## 6     01    011  Bullock County, Alabama          10386
 83 | ## 7     01    013   Butler County, Alabama          19181
 84 | ## 8     01    015  Calhoun County, Alabama         116425
 85 | ## 9     01    017 Chambers County, Alabama          34834
 86 | ## 10    01    019 Cherokee County, Alabama          24975
 87 | ```
 88 | 
 89 | The previous dataframe 'pop_estimates' gives counties from every state, given the wildcard, '*', in the 'region' argument. Now we want to filter the dataset so we are left with only Alabama. Additionally, the US Census API  utilizes codes for variables. To search for variables use the function 'makeVarlist()'; additional information on the usage can be found in the censusapi package documentation pdf file.
 90 | 
 91 | 
 92 | ```r
 93 | # Filter
 94 | alabama_counties <- pop_estimates[pop_estimates$state == your_state_code,]
 95 | 
 96 | # Extract population
 97 | alabama_counties_populations <- data.frame(County = alabama_counties$NAME, Population = alabama_counties$S0101_C01_001E)
 98 | 
 99 | # Print population
100 | head(alabama_counties_populations,n=10) #Display first entries of 'alabama_counties_populations'
101 | ```
102 | 
103 | ```
104 | ##                      County Population
105 | ## 1   Autauga County, Alabama      58239
106 | ## 2   Baldwin County, Alabama     227131
107 | ## 3   Barbour County, Alabama      25259
108 | ## 4      Bibb County, Alabama      22412
109 | ## 5    Blount County, Alabama      58884
110 | ## 6   Bullock County, Alabama      10386
111 | ## 7    Butler County, Alabama      19181
112 | ## 8   Calhoun County, Alabama     116425
113 | ## 9  Chambers County, Alabama      34834
114 | ## 10 Cherokee County, Alabama      24975
115 | ```
116 | 
117 | Now we have successfully used the U.S. Census API to store population estimates from Alabama counties in the variable 'alabama_counties_populations'.
118 | 
119 | 
120 | ## 2. Get Population Estiamtes Over a Range of Years   
121 | 
122 | We can use similar code as before, but we will loop through the different population estimate datasets by year.
123 | 
124 | ```r
125 | # Define the range of years
126 | years <- c(2016:2021)
127 | 
128 | # Create an empty data frame to store the population estimates
129 | pop_estimates_all <- data.frame()
130 | 
131 | # Loop over the years
132 | for (year in years) {
133 |   # Retrieve population estimates for Tuscaloosa County
134 |   pop_estimates <- getCensus(name = "acs/acs5/subject", 
135 |                              vars = c("NAME", "S0101_C01_001E"), 
136 |                              region = "county:*", 
137 |                              vintage = as.character(year), 
138 |                              key= user_key)
139 |   alabama <- pop_estimates[pop_estimates$state == your_state_code,]
140 | 
141 |   
142 |   # Add the population estimate and year to the data frame
143 |   pop_estimates_all <- rbind(pop_estimates_all, data.frame(Year = year, Population = alabama$S0101_C01_001E,Name= alabama$NAME))
144 | }
145 | 
146 | # Print the resulting data frame
147 | head(pop_estimates_all,n=10)
148 | ```
149 | 
150 | ```
151 | ##    Year Population                      Name
152 | ## 1  2016      21975    Monroe County, Alabama
153 | ## 2  2016      33433  Lawrence County, Alabama
154 | ## 3  2016     153947       Lee County, Alabama
155 | ## 4  2016      30239    Marion County, Alabama
156 | ## 5  2016      20042   Pickens County, Alabama
157 | ## 6  2016      13285    Sumter County, Alabama
158 | ## 7  2016     659096 Jefferson County, Alabama
159 | ## 8  2016      13287   Choctaw County, Alabama
160 | ## 9  2016      31573  Franklin County, Alabama
161 | ## 10 2016      20066   Marengo County, Alabama
162 | ```
163 | 
164 | ## 3. Plot Population Change
165 | 
166 | We will use the data we retrieved in example 2 and then calculate and graph the percent change in population per county.
167 | 
168 | ```r
169 | # Filter for the population in 2016
170 | pop_2016 <- pop_estimates_all[pop_estimates_all$Year == 2016, ]
171 | 
172 | # Filter for the population in 2021
173 | pop_2021 <- pop_estimates_all[pop_estimates_all$Year == 2021, ]
174 | 
175 | # Calculate the percent change in population
176 | pop_pct_change <- data.frame(County=pop_2021$Name,Pct_Change =round(((as.numeric( pop_2021$Population)-as.numeric(pop_2016$Population))/as.numeric(pop_2016$Population)),4)) # (pop_2021-pop_2016)/pop_2016 rounded to 5 digits
177 | 
178 | # Next we're going to remove the 'County, Alabama' because it is repetitive.
179 | pop_pct_change[]<-lapply(pop_pct_change,function(x) (sub(' County, Alabama','',x)))
180 | 
181 | head(pop_pct_change,n=10)
182 | ```
183 | 
184 | ```
185 | ##      County Pct_Change
186 | ## 1   Autauga     1.6502
187 | ## 2   Baldwin     5.7936
188 | ## 3   Barbour    -0.8359
189 | ## 4      Bibb    -0.2588
190 | ## 5    Blount      1.938
191 | ## 6   Bullock    -0.2182
192 | ## 7    Butler    -0.9709
193 | ## 8   Calhoun     7.7623
194 | ## 9  Chambers     0.1033
195 | ## 10 Cherokee     0.2446
196 | ```
197 | Next we will create a plot of the percent change in population by county in Alabama from the years 2016 to 2021 using the package ggplot2.
198 | 
199 | ```r
200 | library(ggplot2) #library for creating graphics
201 | options(repr.plot.width = 100, repr.plot.height =2)
202 | ggplot(pop_pct_change, aes(x = reorder(pop_pct_change$County, as.numeric(pop_pct_change$Pct_Change)), y = as.numeric(pop_pct_change$Pct_Change))) +
203 |   geom_point(orientation = 'y') +
204 |   ylab("Percent Change in Population") +
205 |   xlab("AL County") +
206 |   theme_bw()+
207 |   theme(
208 |     panel.grid.major.y = element_blank(),
209 |     panel.grid.minor.y = element_blank(),
210 |     panel.grid.major.x = element_line(colour = "grey80", linetype = "dashed"),
211 |     axis.text.x = element_text(angle = 90, hjust = 1, vjust=.2, size= 7 )
212 |   )+
213 |   geom_hline(yintercept=0)+ 
214 |   ggtitle("Percent Change in Population by County from 2016 to 2021")
215 | ```
216 | 
217 | ![](US_Census_Data_in_R_files/figure-html/plot-popchg-1.png)<!-- -->
218 | 


--------------------------------------------------------------------------------
/src/r/CASCommonChemR_files/figure-html/Display-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/CASCommonChemR_files/figure-html/Display-1.png


--------------------------------------------------------------------------------
/src/r/CASCommonChemR_files/figure-html/disp-list-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/CASCommonChemR_files/figure-html/disp-list-1.png


--------------------------------------------------------------------------------
/src/r/CASCommonChemR_files/figure-html/disp-list-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/CASCommonChemR_files/figure-html/disp-list-2.png


--------------------------------------------------------------------------------
/src/r/CASCommonChemR_files/figure-html/disp-list-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/CASCommonChemR_files/figure-html/disp-list-3.png


--------------------------------------------------------------------------------
/src/r/CASCommonChemR_files/figure-html/disp-list-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/CASCommonChemR_files/figure-html/disp-list-4.png


--------------------------------------------------------------------------------
/src/r/CASCommonChemR_files/figure-html/disp-list-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/CASCommonChemR_files/figure-html/disp-list-5.png


--------------------------------------------------------------------------------
/src/r/CASCommonChemR_files/figure-html/hist-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/CASCommonChemR_files/figure-html/hist-1.png


--------------------------------------------------------------------------------
/src/r/CASCommonChemR_files/figure-html/kernel-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/CASCommonChemR_files/figure-html/kernel-1.png


--------------------------------------------------------------------------------
/src/r/Chronam_in_R_Adam_Vers_files/figure-html/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/Chronam_in_R_Adam_Vers_files/figure-html/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/src/r/Chronam_in_R_Adam_Vers_files/figure-html/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/Chronam_in_R_Adam_Vers_files/figure-html/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/src/r/College_Scorecard_R_files/figure-html/program-percentage-distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/College_Scorecard_R_files/figure-html/program-percentage-distribution.png


--------------------------------------------------------------------------------
/src/r/R_WorldBank_Markdown_files/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/R_WorldBank_Markdown_files/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/src/r/R_WorldBank_Markdown_files/figure-html/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/R_WorldBank_Markdown_files/figure-html/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/src/r/R_WorldBank_Markdown_files/figure-html/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/R_WorldBank_Markdown_files/figure-html/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/src/r/SEC_EDGAR_API_R_files/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/SEC_EDGAR_API_R_files/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/src/r/SEC_EDGAR_API_R_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/SEC_EDGAR_API_R_files/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/src/r/USA_Spending_R_files/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/USA_Spending_R_files/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/src/r/USA_Spending_R_files/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/USA_Spending_R_files/figure-html/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/src/r/USA_Spending_R_files/figure-html/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/USA_Spending_R_files/figure-html/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/src/r/arXiv_API_in_R_files/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/arXiv_API_in_R_files/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/src/r/arXiv_API_in_R_files/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/arXiv_API_in_R_files/figure-html/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/src/r/arxiv.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: \...in R
  3 | output: 
  4 |   html_document:
  5 |     keep_md: true
  6 | ---
  7 | 
  8 | # arXiv API in R
  9 | 
 10 | by Adam M. Nguyen
 11 | 
 12 | 
 13 | The arXiv API provides programmatic access to metadata about scholarly papers hosted on the arXiv preprint server, covering fields such as physics, mathematics, computer science, and more. This allows users to automate the discovery and retrieval of preprint information for research, text mining, or integration into academic workflows.
 14 | 
 15 | ### API Resources
 16 | 
 17 | - **Documentation**
 18 |   - [arXiv](https://arxiv.org/)
 19 |   - [arXiv API Access Information](https://info.arxiv.org/help/api/index.html)
 20 |   - [aRxiv: R Interface to the arXiv API](https://cran.r-project.org/web/packages/aRxiv/index.html)
 21 | 
 22 | - **Terms**
 23 |   - [arXiv Terms of Use](https://info.arxiv.org/help/api/tou.html)
 24 | 
 25 | *Acknowledgment: Thank you to arXiv for use of its open access interoperability*
 26 | 
 27 | These recipe examples were tested on December 1, 2023.
 28 | 
 29 | Hosted and maintained by Cornell University, arXiv is an open-access and free distribution service containing nearly 2.5 million scholarly articles in fields including physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science and economics at the time of writing. In this tutorial we will introduce how to use the API with some examples, but for larger bulk downloads of data from arXiv, we recommend [Kaggle's arxiv Dataset](https://www.kaggle.com/datasets/Cornell-University/arxiv/data), which is updated monthly with the full arXiv data set and metadata.
 30 | 
 31 | ## Setup
 32 | 
 33 | ### Load Library
 34 | 
 35 | Run the following lines of code to load the library ‘aRxiv’. If you have not done so already, additionally, before the ‘library()’ functions, run ‘install.packages('aRxiv')’. aRxiv is the package we will use to interface with the arXiv API.
 36 | 
 37 | ```r
 38 | # Load necessary libraries
 39 | library(aRxiv)
 40 | ```
 41 | 
 42 | ### Retrieving Categories
 43 | 
 44 | Before we get started, a useful function provided by the aRxiv package is 'arxiv_cats'. This returns arXiv subject classification's abbreviation and corresponding description. Categories are especially important in forming queries to the API so we mention them here first.
 45 | 
 46 | ```r
 47 | # Here are the first 10 categories to showcase the function
 48 | head(arxiv_cats,n=10)
 49 | ```
 50 | 
 51 | ```
 52 | ##    abbreviation                                  description
 53 | ## 1       stat.AP                    Statistics - Applications
 54 | ## 2       stat.CO                     Statistics - Computation
 55 | ## 3       stat.ML                Statistics - Machine Learning
 56 | ## 4       stat.ME                     Statistics - Methodology
 57 | ## 5       stat.TH                          Statistics - Theory
 58 | ## 6      q-bio.BM          Quantitative Biology - Biomolecules
 59 | ## 7      q-bio.CB         Quantitative Biology - Cell Behavior
 60 | ## 8      q-bio.GN              Quantitative Biology - Genomics
 61 | ## 9      q-bio.MN    Quantitative Biology - Molecular Networks
 62 | ## 10     q-bio.NC Quantitative Biology - Neurons and Cognition
 63 | ```
 64 | 
 65 | ## 1. Basic Search
 66 | 
 67 | Possibly the function of most utility in the package is 'arxiv_search()'. The search allows for the programmatic searching of the arXiv repository returning 15 columns of information including 'id','title','summary', and more. We will showcase the use of this function by searching for papers with the term 'Hydrodynamics' in the title and then extract authors and see who is has the most publications.
 68 | 
 69 | ```r
 70 | # Search for Hydrodynamics papers
 71 | hydrodynamic_search <- arxiv_search('ti:Hydrodynamics', batchsize =410, limit=10000, force = TRUE)
 72 | ```
 73 | 
 74 | ```
 75 | ## retrieved batch 1
 76 | ```
 77 | 
 78 | ```
 79 | ## retrieved batch 2
 80 | ```
 81 | 
 82 | ```
 83 | ## retrieved batch 3
 84 | ```
 85 | 
 86 | ```
 87 | ## retrieved batch 4
 88 | ```
 89 | 
 90 | ```
 91 | ## retrieved batch 5
 92 | ```
 93 | 
 94 | ```
 95 | ## retrieved batch 6
 96 | ```
 97 | 
 98 | ```
 99 | ## retrieved batch 7
100 | ```
101 | 
102 | ```
103 | ## retrieved batch 8
104 | ```
105 | 
106 | ```
107 | ## retrieved batch 9
108 | ```
109 | 
110 | ```
111 | ## retrieved batch 10
112 | ```
113 | 
114 | ```r
115 | # Extract out the authors
116 | authors <- hydrodynamic_search[, c('title', 'authors')]
117 | 
118 | # Show first few entries
119 | head(authors)
120 | ```
121 | 
122 | ```
123 | ##                                                                                                 title
124 | ## 1                                               A finite model of two-dimensional ideal hydrodynamics
125 | ## 2              Hydrodynamic Stability Analysis of Burning Bubbles in Electroweak Theory\n  and in QCD
126 | ## 3                                                             Hydrodynamics of Relativistic Fireballs
127 | ## 4 Comparison of Spectral Method and Lattice Boltzmann Simulations of\n  Two-Dimensional Hydrodynamics
128 | ## 5                Classical differential geometry and integrability of systems of\n  hydrodynamic type
129 | ## 6                       Hydrodynamic Spinodal Decomposition: Growth Kinetics and Scaling\n  Functions
130 | ##                                                   authors
131 | ## 1                                  J. S. Dowker|A. Wolski
132 | ## 2  P. Huet|K. Kajantie|R. G. Leigh|B. -H. Liu|L. McLerran
133 | ## 3                   Tsvi Piran|Amotz Shemi|Ramesh Narayan
134 | ## 4 D. O. Martinez|W. H. Matthaeus|S. Chen|D. C. Montgomery
135 | ## 5                                            S. P. Tsarev
136 | ## 6                    F. J. Alexander|S. Chen|D. W. Grunau
137 | ```
138 | 
139 | ```r
140 | # Split the 'authors' column in a list of individuals
141 | author_lists <- strsplit(authors[,'authors'], split = "|", fixed = TRUE)
142 | 
143 | # List Frequency of Author Occurrences 
144 | co_freq <- table(unlist(author_lists))
145 | 
146 | # Order and Format as Data frame
147 | ordered_cofreq <- as.data.frame(co_freq[order(co_freq, decreasing = TRUE)])
148 | 
149 | # Here are the first highest publishers in Hydrodynamics as available by the arXiv repository
150 | head(ordered_cofreq)
151 | ```
152 | 
153 | ```
154 | ##                  Var1 Freq
155 | ## 1  Radoslaw Ryblewski   31
156 | ## 2    Tetsufumi Hirano   31
157 | ## 3 Wojciech Florkowski   30
158 | ## 4     Volker Springel   29
159 | ## 5  Michael Strickland   28
160 | ## 6           T. Kodama   28
161 | ```
162 | 
163 | ### Visualization
164 | 
165 | Additionally, we can create a visualization using the 'ggplot2' library. See the following code to see how to do so and what is produced.
166 | 
167 | 
168 | ```r
169 | library(ggplot2)
170 | # Visualize the top 20 highest publishers
171 | ggplot(head(ordered_cofreq,n=20), aes(x = Var1, y = Freq)) +
172 |   geom_bar(stat = "identity", fill = "#D16103") +
173 |   labs(x = "Author", y = "Number of Publications", title = "Top 20 Most Published Authors in Hydrodynamics in arXiv") +
174 |   theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))  # Rotate x-axis labels for readability
175 | ```
176 | 
177 | ![](arXiv_API_in_R_files/figure-html/unnamed-chunk-3-1.png)<!-- -->
178 | 
179 | ## 2. Retrieving Number of Query Results
180 | 
181 | Using the aRxiv package you can also retrieve counts of papers given some query. For example, we can see how many papers our previous 'Hydrodynamics' query returns.
182 | 
183 | 
184 | ```r
185 | # How many papers titles contain hydroynamics?
186 | 
187 | arxiv_count('ti:"hydrodynamics"')
188 | ```
189 | 
190 | ```
191 | ## [1] 6385
192 | ```
193 | We can also see how many HEP-th papers there are. 
194 | 
195 | 
196 | ```r
197 | # How many papers fall under the HEP-th category?
198 | 
199 | arxiv_count("cat: HEP-th")
200 | ```
201 | 
202 | ```
203 | ## [1] 162439
204 | ```
205 | And finally we can see how many HEP-th papers have been published throughout the years.
206 | 
207 | 
208 | ```r
209 | # Create a vector of years we are interested in, 1990:2023
210 | years <- 1991:2023
211 | 
212 | # Create empty vector to append annual counts to
213 | arxiv_counts <- c()
214 | 
215 | # Loop through years to create list of counts per year
216 | for(year in years){
217 |   arxiv_counts <- c(arxiv_counts, arxiv_count(paste0('cat:HEP-th AND submittedDate:[',year,' TO ',year+1,']')))
218 | }
219 | arxiv_counts_df <- as.data.frame(cbind(1991:2023,arxiv_counts))
220 | # Simple base R plot of the data
221 | plot(arxiv_counts_df, main = 'Theoretical High Energy Physics Papers Published per Year', xlab = 'Year', ylab='Number of Papers')
222 | ```
223 | 
224 | ![](arXiv_API_in_R_files/figure-html/unnamed-chunk-6-1.png)<!-- -->
225 | 
226 | ## 3. Proportion of Preprints in Hydrodynamics Papers
227 | 
228 | arXiv's repository contains both electronic preprints and and links to post print (e.g. version of record DOI). We will explore the proportion of preprints in the previous 'Hydrodynamics' query. This is possible as the 'doi' column returned in the query is empty for those articles that do not have doi, i.e. preprints.
229 | 
230 | ```r
231 | # Count the number of preprints by looking for empty 'doi' columns
232 | hydrodynamic_preprint_count <- sum(hydrodynamic_search$doi == "")
233 | 
234 | # Calculate a percentage of preprints
235 | percentage_preprints <- (hydrodynamic_preprint_count / nrow(hydrodynamic_search)) * 100
236 | 
237 | paste0('The percentage of preprints is ',round(percentage_preprints, digits = 2),'%.')
238 | ```
239 | 
240 | ```
241 | ## [1] "The percentage of preprints is 23.98%."
242 | ```
243 | 


--------------------------------------------------------------------------------
/src/r/figure/Display-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/Display-1.png


--------------------------------------------------------------------------------
/src/r/figure/disp list-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/disp list-1.png


--------------------------------------------------------------------------------
/src/r/figure/disp list-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/disp list-2.png


--------------------------------------------------------------------------------
/src/r/figure/disp list-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/disp list-3.png


--------------------------------------------------------------------------------
/src/r/figure/disp list-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/disp list-4.png


--------------------------------------------------------------------------------
/src/r/figure/disp list-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/disp list-5.png


--------------------------------------------------------------------------------
/src/r/figure/hist-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/hist-1.png


--------------------------------------------------------------------------------
/src/r/figure/kernel-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/kernel-1.png


--------------------------------------------------------------------------------
/src/r/figure/plot popchg-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/plot popchg-1.png


--------------------------------------------------------------------------------
/src/r/figure/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/src/r/figure/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/src/r/figure/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/src/r/figure/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/src/r/figure/visual-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UA-Libraries-Research-Data-Services/UALIB_ScholarlyAPI_Cookbook/13a4b56c42222db37e2b98dca50dbab889ea998a/src/r/figure/visual-1.png


--------------------------------------------------------------------------------
/src/r/output.json:
--------------------------------------------------------------------------------
1 | {"status":["ok"],"message-type":["work"],"message-version":["1.0.0"],"message":{"indexed":{"date-parts":[[2023,4,5]],"date-time":["2023-04-05T06:53:08Z"],"timestamp":[1680677588699]},"reference-count":[16],"publisher":["Springer Science and Business Media LLC"],"issue":["1"],"license":[{"content-version":"tdm","delay-in-days":0,"URL":"http://creativecommons.org/licenses/by/2.0","start.date-parts":[[2012,7,6]],"start.date-time":"2012-07-06T00:00:00Z","start.timestamp":1341532800000}],"content-domain":{"domain":[],"crossmark-restriction":[false]},"short-container-title":["J Cheminform"],"published-print":{"date-parts":[[2012,12]]},"DOI":["10.1186/1758-2946-4-12"],"type":["journal-article"],"created":{"date-parts":[[2012,7,6]],"date-time":["2012-07-06T12:14:34Z"],"timestamp":[1341576874000]},"source":["Crossref"],"is-referenced-by-count":[38],"title":["The Molecule Cloud - compact visualization of large collections of molecules"],"prefix":["10.1186"],"volume":["4"],"author":[{"given":"Peter","family":"Ertl","sequence":"first","affiliation":[]},{"given":"Bernhard","family":"Rohde","sequence":"additional","affiliation":[]}],"member":["297"],"published-online":{"date-parts":[[2012,7,6]]},"reference":[{"key":"336_CR1","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007/s10822-011-9487-0","volume":"26","author":"E Martin","year":"2011","unstructured":"Martin E, Ertl P, Hunt P, Duca J, Lewis R: Gazing into the crystal ball; the future of computer-aided drug design. J Comp-Aided Mol Des. 2011, 26: 77-79.","journal-title":"J Comp-Aided Mol Des"},{"key":"336_CR2","doi-asserted-by":"publisher","first-page":"2174","DOI":"10.1021/ci2001428","volume":"26","author":"SR Langdon","year":"2011","unstructured":"Langdon SR, Brown N, Blagg J: Scaffold diversity of exemplified medicinal chemistry space. J Chem Inf Model. 2011, 26: 2174-2185.","journal-title":"J Chem Inf Model"},{"key":"336_CR3","doi-asserted-by":"publisher","first-page":"8732","DOI":"10.1021/ja902302h","volume":"131","author":"LC Blum","year":"2009","unstructured":"Blum LC, Reymond J-C: 970 Million druglike small molecules for virtual screening in the chemical universe database GDB-13. J Am Chem Soc. 2009, 131: 8732-8733. 10.1021/ja902302h.","journal-title":"J Am Chem Soc"},{"key":"336_CR4","doi-asserted-by":"publisher","first-page":"156","DOI":"10.2174/157340908785747410","volume":"4","author":"J Dubois","year":"2008","unstructured":"Dubois J, Bourg S, Vrain C, Morin-Allory L: Collections of compounds - how to deal with them?. Cur Comp-Aided Drug Des. 2008, 4: 156-168. 10.2174/157340908785747410.","journal-title":"Cur Comp-Aided Drug Des"},{"key":"336_CR5","doi-asserted-by":"publisher","first-page":"322","DOI":"10.2174/157340908786786010","volume":"4","author":"JL Medina-Franco","year":"2008","unstructured":"Medina-Franco JL, Martinez-Mayorga K, Giulianotti MA, Houghten RA, Pinilla C: Visualization of the chemical space in drug discovery. Cur Comp-Aided Drug Des. 2008, 4: 322-333. 10.2174/157340908786786010.","journal-title":"Cur Comp-Aided Drug Des"},{"key":"336_CR6","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1021/ci600338x","volume":"47","author":"A Schuffenhauer","year":"2007","unstructured":"Schuffenhauer A, Ertl P, Roggo S, Wetzel S, Koch MA, Waldmann H: The Scaffold Tree - visualization of the scaffold universe by hierarchical scaffold classification. J Chem Inf Model. 2007, 47: 47-58. 10.1021/ci600338x.","journal-title":"J Chem Inf Model"},{"key":"336_CR7","doi-asserted-by":"publisher","first-page":"366","DOI":"10.1002/minf.201000019","volume":"29","author":"S Langdon","year":"2010","unstructured":"Langdon S, Ertl P, Brown N: Bioisosteric replacement and scaffold hopping in lead generation and optimization. Mol Inf. 2010, 29: 366-385. 10.1002/minf.201000019.","journal-title":"Mol Inf"},{"key":"336_CR8","doi-asserted-by":"publisher","first-page":"4443","DOI":"10.1021/jo8001276","volume":"73","author":"AH Lipkus","year":"2008","unstructured":"Lipkus AH, Yuan Q, Lucas KA, Funk SA, Bartelt WF, Schenck RJ, Trippe AJ: Structural diversity of organic chemistry. A scaffold analysis of the CAS Registry. J Org Chem. 2008, 73: 4443-4451. 10.1021/jo8001276.","journal-title":"J Org Chem"},{"key":"336_CR9","unstructured":"mib 2010.10, Molinspiration Cheminformatics: \n                    http://www.molinspiration.com\n                    \n                  ,"},{"key":"336_CR10","unstructured":"Bernhard R: Avalon Cheminformatics Toolkit. \n                    http://sourceforge.net/projects/avalontoolkit/\n                    \n                  ,"},{"key":"336_CR11","doi-asserted-by":"publisher","first-page":"D255","DOI":"10.1093/nar/gkp965","volume":"38","author":"Y Wang","year":"2009","unstructured":"Wang Y, Bolton E, Dracheva S, Karapetyan K, Shoemaker BA, Suzek TO, Wang J, Xiao J, Zhang J, Bryant SH: An overview of the PubChem BioAssay resource. Nucleic Acids Res. 2009, 38: D255-D266.","journal-title":"Nucleic Acids Res"},{"key":"336_CR12","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1021/ci049714+","volume":"45","author":"JJ Irwin","year":"2004","unstructured":"Irwin JJ, Shoichet BK: ZINC − a free database of commercially available compounds for virtual screening. J Chem Inf Model. 2004, 45: 177-182.","journal-title":"J Chem Inf Model"},{"key":"336_CR13","doi-asserted-by":"publisher","first-page":"D1100","DOI":"10.1093/nar/gkr777","volume":"40","author":"A Gaulton","year":"2012","unstructured":"Gaulton A, Bellis LJ, Bento AP, Chambers J, Davies M, Hersey A, Light Y, McGlinchey S, Michalovich D, Al-Lazikani B, Overington JP: ChEMBL: a large-scale bioactivity database for drug discovery. Nucleic Acids Res. 2012, 40: D1100-D1107. 10.1093/nar/gkr777.","journal-title":"Nucleic Acids Res"},{"key":"336_CR14","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1016/j.cbpa.2010.02.018","volume":"14","author":"ME Welsch","year":"2010","unstructured":"Welsch ME, Snyder SA, Stockwell BR: Privileged scaffolds for library design and drug discovery. Curr Opin Chem Biol. 2010, 14: 347-361. 10.1016/j.cbpa.2010.02.018.","journal-title":"Curr Opin Chem Biol"},{"key":"336_CR15","doi-asserted-by":"publisher","first-page":"374","DOI":"10.1021/ci0255782","volume":"43","author":"P Ertl","year":"2003","unstructured":"Ertl P: Cheminformatics analysis of organic substituents: Identification of the most common substituents, calculation of substituent properties, and automatic identification of drug-like bioisosteric groups. J Chem Inf Comp Sci. 2003, 43: 374-380. 10.1021/ci0255782.","journal-title":"J Chem Inf Comp Sci"},{"key":"336_CR16","unstructured":"TagCrowd: \n                    http://tagcrowd.com"}],"container-title":["Journal of Cheminformatics"],"original-title":[],"language":["en"],"link":[{"URL":"http://link.springer.com/content/pdf/10.1186/1758-2946-4-12.pdf","content-type":"application/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http://link.springer.com/article/10.1186/1758-2946-4-12/fulltext.html","content-type":"text/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http://link.springer.com/content/pdf/10.1186/1758-2946-4-12.pdf","content-type":"application/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,24]],"date-time":["2019-06-24T14:22:07Z"],"timestamp":[1561386127000]},"score":[1],"resource":{"primary":{"URL":["https://jcheminf.biomedcentral.com/articles/10.1186/1758-2946-4-12"]}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,7,6]]},"references-count":[16],"journal-issue":{"issue":["1"],"published-print":{"date-parts":[[2012,12]]}},"alternative-id":["336"],"URL":["http://dx.doi.org/10.1186/1758-2946-4-12"],"relation":{},"ISSN":["1758-2946"],"issn-type":[{"value":"1758-2946","type":"electronic"}],"subject":["Library and Information Sciences","Computer Graphics and Computer-Aided Design","Physical and Theoretical Chemistry","Computer Science Applications"],"published":{"date-parts":[[2012,7,6]]},"article-number":["12"]}}
2 | 


--------------------------------------------------------------------------------
/src/r/sdirect.md:
--------------------------------------------------------------------------------
 1 | # ScienceDirect API in R
 2 | 
 3 | by Michael T. Moen
 4 | 
 5 | These recipe examples demonstrate how to use Elsevier’s [ScienceDirect API](https://dev.elsevier.com/) to retrieve full-text articles in various formats (XML, text).
 6 | 
 7 | *This tutorial content is intended to help facilitate academic research. Please check your institution for their Text and Data Mining or related License Agreement with Elsevier.*
 8 | 
 9 | - **Documentation**
10 |   - [ScienceDirect API](https://dev.elsevier.com/)
11 |   - [ScienceDirect API Documentation](https://dev.elsevier.com/sd_api_spec.html)
12 | 
13 | - **Terms**
14 |   - [ScienceDirect API Terms of Use](https://dev.elsevier.com/api_key_settings.html)
15 | 
16 | - **Data Reuse**
17 |   - [Elsevier Text & Data Mining](https://dev.elsevier.com/tecdoc_text_mining.html)
18 | 
19 | > **Note:** See your institution's rate limit in the [ScienceDirect API Terms of Use](https://dev.elsevier.com/api_key_settings.html).
20 | 
21 | 
22 | *These recipe examples were tested on February 7, 2025.*
23 | 
24 | ## Setup
25 | 
26 | ### Import Libraries
27 | 
28 | ```r
29 | library(httr)
30 | ```
31 | 
32 | ### Import API Key
33 | 
34 | An API key is required to access the ScienceDirect API. Registration is available on the [Elsevier developer portal](https://dev.elsevier.com/). The key is imported from an environment variable below:
35 | 
36 | ```r
37 | myAPIKey <- Sys.getenv("sciencedirect_key")
38 | ```
39 | 
40 | ### Identifier Note
41 | 
42 | We will use DOIs as the article identifiers. See our Crossref and Scopus API tutorials for workflows on how to create lists of DOIs and identfiers for specific searches and journals. The Elsevier ScienceDirect Article (Full-Text) API also accepts other identifiers like Scopus IDs and PubMed IDs (see API specification documents linked above).
43 | 
44 | ## 1. Retrieve full-text XML of an article
45 | 
46 | ```r
47 | # For XML download
48 | elsevier_url <- "https://api.elsevier.com/content/article/doi/"
49 | doi1 <- '10.1016/j.tetlet.2017.07.080' # Example Tetrahedron Letters article
50 | fulltext1 <- GET(paste0(elsevier_url, doi1, "?APIKey=", myAPIKey, "&httpAccept=text/xml"))
51 | 
52 | # Save to file
53 | writeLines(content(fulltext1, "text"), "fulltext1.xml")
54 | ```
55 | 
56 | ## 2. Retrieve plain text of an article
57 | 
58 | ```r
59 | # For simplified text download
60 | doi2 <- '10.1016/j.tetlet.2022.153680' # Example Tetrahedron Letters article
61 | fulltext2 <- GET(paste0(elsevier_url, doi2, "?APIKey=", myAPIKey, "&httpAccept=text/plain"))
62 | 
63 | # Save to file
64 | writeLines(content(fulltext2, "text"), "fulltext2.txt")
65 | ```
66 | 
67 | ## 3. Retrieve full-text in a loop
68 | 
69 | ```r
70 | # Make a list of 5 DOIs for testing
71 | dois <- c('10.1016/j.tetlet.2018.10.031',
72 |           '10.1016/j.tetlet.2018.10.033',
73 |           '10.1016/j.tetlet.2018.10.034',
74 |           '10.1016/j.tetlet.2018.10.038',
75 |           '10.1016/j.tetlet.2018.10.041')
76 | ```
77 | 
78 | ```r
79 | for (doi in dois) {
80 |   article <- GET(paste0(elsevier_url, doi, "?APIKey=", myAPIKey, "&httpAccept=text/plain"))
81 |   doi_name <- gsub("/", "_", doi)
82 |   writeLines(content(article, "text"), paste0(doi_name, "_plain_text.txt"))
83 |   Sys.sleep(1)
84 | }
85 | ```
86 | 


--------------------------------------------------------------------------------
/src/r/sec-edgar.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: \...in R
  3 | output: 
  4 |   html_document:
  5 |     keep_md: true
  6 | ---
  7 | 
  8 | # SEC EDGAR API in R
  9 | 
 10 | by Adam M. Nguyen
 11 | 
 12 | These recipe examples were tested on December 1, 2023.
 13 | 
 14 | The U.S. Securities and Exchange Commission (SEC) allows free public access to documents filed by publicly traded companies in the Electronic Data Gathering, Analysis, and Retrieval (EDGAR) system.
 15 | 
 16 | Please see the following resources for more information on API usage:
 17 | 
 18 | ### Documentation
 19 | - [SEC EDGAR](https://www.sec.gov/search-filings)
 20 | - [SEC EDGAR API Documentation](https://www.sec.gov/search-filings/edgar-application-programming-interfaces)
 21 | - [SEC EDGAR Search Assistance](https://www.sec.gov/search-filings/edgar-search-assistance/accessing-edgar-data)
 22 | - [List of All CIKs](https://www.sec.gov/Archives/edgar/cik-lookup-data.txt)
 23 | 
 24 | ### Terms of Use
 25 | - [SEC Website Policies](https://www.sec.gov/privacy#security)
 26 | 
 27 | ### Data Reuse
 28 | - [SEC Website Dissemination Policy](https://www.sec.gov/about/privacy-information#dissemination)
 29 | 
 30 | ***NOTE:*** Sending more than 10 requests per second will place a temporary IP ban.
 31 | 
 32 | ## Setup
 33 | 
 34 | ### Load libraries
 35 | 
 36 | Run the following lines of code to load the libraries ‘httr’ and ‘jsonlite’. If you have not done so already, additionally, before the ‘library()’ functions, run ‘install.packages(c(‘httr’,’jsonlite’))’.
 37 | 
 38 | ```r
 39 | # Load necessary libraries
 40 | library(httr)
 41 | library(jsonlite)
 42 | ```
 43 | 
 44 | ### User Info
 45 | 
 46 | The SEC EDGAR API requires you to provide your name and email when sending requests. Simply edit the following variables with your information.
 47 | 
 48 | ```r
 49 | # Designate your user info
 50 | firstName <- "First"
 51 | lastName <- "Last"
 52 | email <- "Email@email.com"
 53 | ```
 54 | 
 55 | Alternatively, you can also designate environment variables ([click here to see how](https://support.posit.co/hc/en-us/articles/360047157094-Managing-R-with-Rprofile-Renviron-Rprofile-site-Renviron-site-rsession-conf-and-repos-conf)) to access your user information.
 56 | 
 57 | ```r
 58 | # Here we simply use the 'Sys.getenv()' function to grab the variables, first, last, and email
 59 | firstName <- Sys.getenv("first")
 60 | lastName <- Sys.getenv("last")
 61 | email <- Sys.getenv("email")
 62 | ```
 63 | 
 64 | ### SEC EDGAR Data Installation
 65 | 
 66 | In addition to the publicly available API, SEC EDGAR data can also be access via a bulk data download, which is compiled nightly. This approach is advantageous when working with large datasets, since it does not require making many individual API calls. However, it requires about 15 GB of storage to install and is more difficult to keep up to date.
 67 | 
 68 | To access this data, download the companyfacts.zip file under the ‘Bulk data’ heading at the bottom of [this page.](https://www.sec.gov/edgar/sec-api-documentation)
 69 | 
 70 | ## 1. Obtaining Marketing Expenses for Amazon
 71 | 
 72 | To access the data from an individual company, we must first obtain its Central Index Key (CIK) value. These values can be obtained by searching for a company [here](https://www.sec.gov/edgar/search/#). Alternatively, you can find a list of all companies and their CIK value [here](https://www.sec.gov/Archives/edgar/cik-lookup-data.txt).
 73 | 
 74 | For this section of the guide, we’ll use Amazon (AMZN) as an example, which has a CIK of 0001018724.
 75 | 
 76 | With this CIK, we can now build a URL for the /companyfacts/ endpoint:
 77 | 
 78 | 
 79 | ```r
 80 | # Define the Amazon CIK (Central Index Key) for the SEC EDGAR database
 81 | cik <- "0001018724"  # Amazon.com Inc.
 82 | 
 83 | # Define the URL for the SEC EDGAR API
 84 | base_url <- paste0("https://data.sec.gov/api/xbrl/companyfacts/CIK",cik,".json")
 85 | 
 86 | # Query SEC EDGAR API
 87 | amzn_data <- fromJSON(rawToChar(GET(url = base_url, add_headers("User-agent" = paste0(firstName,",",lastName,", ",email)))$content))
 88 | 
 89 | # Let's check the name of the company of the data retrieved
 90 | amzn_data$entityName
 91 | ```
 92 | 
 93 | ```
 94 | ## [1] "AMAZON.COM, INC."
 95 | ```
 96 | 
 97 | Now that we've retrieved the Amazon's data, let's examine their marketing expenses.
 98 | 
 99 | 
100 | ```r
101 | # Retrieve marketing expenses in USD
102 | marketing_expenses <- amzn_data$facts$`us-gaap`$MarketingExpense$units$USD
103 | 
104 | # Filter through marketing expenses to retrieve one cumulative value per Fiscal Year
105 | marketing_expenses_FY <- marketing_expenses[marketing_expenses$fp=='FY',]
106 | marketing_expenses_FY <- marketing_expenses_FY[!is.na(marketing_expenses_FY$frame),]
107 | 
108 | # Marketing Expenses per Fiscal Year
109 | marketing_expenses_FY[c('frame', 'val')]
110 | ```
111 | 
112 | ```
113 | ##      frame        val
114 | ## 1   CY2007 3.4400e+08
115 | ## 7   CY2008 4.8200e+08
116 | ## 19  CY2009 6.8000e+08
117 | ## 32  CY2010 1.0290e+09
118 | ## 45  CY2011 1.6300e+09
119 | ## 58  CY2012 2.4080e+09
120 | ## 71  CY2013 3.1330e+09
121 | ## 84  CY2014 4.3320e+09
122 | ## 97  CY2015 5.2540e+09
123 | ## 110 CY2016 7.2330e+09
124 | ## 123 CY2017 1.0069e+10
125 | ## 136 CY2018 1.3814e+10
126 | ## 149 CY2019 1.8878e+10
127 | ## 162 CY2020 2.2008e+10
128 | ## 174 CY2021 3.2551e+10
129 | ## 185 CY2022 4.2238e+10
130 | ```
131 | 
132 | One may be interested in the cumulative sum of the expenses over the years.
133 | 
134 | 
135 | ```r
136 | # Cumulative sum of marketing expenses over the years
137 | total_marketing_expenses <- sum(marketing_expenses_FY$val)
138 | 
139 | # Let's take a look
140 | paste0("Amazon's Total Marketing Expenses: ", total_marketing_expenses, ' USD')
141 | ```
142 | 
143 | ```
144 | ## [1] "Amazon's Total Marketing Expenses: 1.66083e+11 USD"
145 | ```
146 | 
147 | ### Marketing Expenses Visualization
148 | 
149 | Rather than calculating the total marketing expenses documented in the API, let's visualize the marketing expenses by fiscal year using a box plot.
150 | 
151 | ```r
152 | # Plot marketing expenses by fiscal year
153 | library(ggplot2)
154 | ggplot(data = marketing_expenses_FY, aes(x = as.numeric(substr(marketing_expenses_FY$frame, 3,6)), y = val))+
155 |   geom_bar(stat = "identity", fill = "#bcbddc", color = "black") +
156 |   labs(x = "Fiscal Year", y = "Marketing Expenses (USD)", title = "Marketing Expenses by Fiscal Year")
157 | ```
158 | 
159 | ![](SEC_EDGAR_API_R_files/figure-html/unnamed-chunk-5-1.png)<!-- -->
160 | 
161 | ## 2. Number of Shares Outstanding for Tesla
162 | 
163 | For another use case, let’s look at the number of shares outstanding for Tesla, which the SEC defines as “Number of shares of common stock outstanding. Common stock represent the ownership interest in a corporation.” Much of the process is conveniently similar.
164 | 
165 | 
166 | ```r
167 | # Define the Tesla CIK
168 | cik <- "0001318605"  # Tesla Inc.
169 | 
170 | # Define the URL for the SEC EDGAR API
171 | base_url <- paste0("https://data.sec.gov/api/xbrl/companyfacts/CIK",cik,".json")
172 | 
173 | # Query API
174 | tesla_data <- fromJSON(rawToChar(GET(url = base_url, add_headers("User-agent" = paste0(firstName,",",lastName,", ",email)))$content))
175 | 
176 | # Check the name of the company of the data retrieved
177 | tesla_data$entityName
178 | ```
179 | 
180 | ```
181 | ## [1] "Tesla, Inc."
182 | ```
183 | 
184 | ```r
185 | # Retrieve Shares Outstanding
186 | shares_outstanding <- tesla_data$facts$`us-gaap`$CommonStockSharesOutstanding$units$shares
187 | 
188 | # Filter through marketing expenses to retrieve one cumulative value per Fiscal Year
189 | shares_outstanding_FY <- shares_outstanding[shares_outstanding$fp=='FY',]
190 | shares_outstanding_FY <- shares_outstanding_FY[!is.na(shares_outstanding_FY$frame),]
191 | ```
192 | 
193 | Let's see the FY and the corresponding value of shares outstanding
194 | 
195 | 
196 | ```r
197 | cbind(shares_outstanding_FY$fy,shares_outstanding_FY$val)
198 | ```
199 | 
200 | ```
201 | ##       [,1]       [,2]
202 | ##  [1,] 2011   94908370
203 | ##  [2,] 2012  104530305
204 | ##  [3,] 2013  114214274
205 | ##  [4,] 2014  123090990
206 | ##  [5,] 2015  125688000
207 | ##  [6,] 2016  131425000
208 | ##  [7,] 2017  161561000
209 | ##  [8,] 2018  168797000
210 | ##  [9,] 2019  173000000
211 | ## [10,] 2020  905000000
212 | ## [11,] 2021  960000000
213 | ## [12,] 2022 3100000000
214 | ```
215 | ## 3. Comparing Total Assets of All Filing Companies
216 | 
217 | The SEC EDGAR API also has an endpoint called /frames/ that returns the data from all companies for a given category and filing period. In this example, we’ll look at the total assets of all companies reported for Q1 2023.
218 | 
219 | 
220 | ```r
221 | # Specify query parameters
222 | category <- "Assets/USD"
223 | year <- "2023"
224 | quarter <- "1"
225 | 
226 | # Define URL
227 | base_url <- paste0('https://data.sec.gov/api/xbrl/frames/us-gaap/',category,'/CY',year,'Q',quarter,'I.json')
228 | 
229 | # Query API
230 | asset_data <- fromJSON(rawToChar(GET(url = base_url, add_headers("User-agent" = paste0(firstName,",",lastName,", ",email)))$content))$data
231 | 
232 | # For this usecase we are only interested in the 'entityName' and 'val' columns so let's subset
233 | asset_data <- as.data.frame(cbind(asset_data$entityName, asset_data$val))
234 | 
235 | # Rename columns
236 | colnames(asset_data) <- c('Company', 'totalAssets')
237 | 
238 | # Coerce the 'totalAssets' column to numeric
239 | asset_data$totalAssets <- as.numeric(asset_data$totalAssets)
240 | 
241 | # Let's see how many entries were retrieved
242 | nrow(asset_data)
243 | ```
244 | 
245 | ```
246 | ## [1] 6220
247 | ```
248 | 
249 | ```r
250 | # We can also see the structure of the data retrieved using the 'str()' function
251 | str(asset_data)
252 | ```
253 | 
254 | ```
255 | ## 'data.frame':	6220 obs. of  2 variables:
256 | ##  $ Company    : chr  "AAR CORP" "ABBOTT LABORATORIES" "WORLDS INC." "ACME UNITED CORP" ...
257 | ##  $ totalAssets: num  1.67e+09 7.38e+10 8.07e+04 1.57e+08 3.72e+08 ...
258 | ```
259 | 
260 | ```r
261 | # Finally, let's see the first few entries of asset_data
262 | head(asset_data)
263 | ```
264 | 
265 | ```
266 | ##                          Company totalAssets
267 | ## 1                       AAR CORP  1673300000
268 | ## 2            ABBOTT LABORATORIES 73794000000
269 | ## 3                    WORLDS INC.       80675
270 | ## 4               ACME UNITED CORP   157468000
271 | ## 5 ADAMS RESOURCES & ENERGY, INC.   371563000
272 | ## 6    BK TECHNOLOGIES CORPORATION    50758000
273 | ```
274 | 
275 | ### Export to CSV
276 | 
277 | Commonly users may want to export data into a comma seperated file (.csv), this may be achieved as follows:
278 | 
279 | 
280 | ```r
281 | # Export as a csv
282 | write.csv(asset_data, file = paste0('companies_by_total_assets_q',quarter,'_',year,'.csv'))
283 | ```
284 | 
285 | ### Total Assets of All Companies Histogram
286 | 
287 | Since the total assets of all companies is a dataset that ranges from values as low as zero to those as large as 4.3 trillion, these values must be graphed logarithmically. Below, we take the log10 of the 'totalAssets' column, luckily R makes this very easy for us.
288 | 
289 | 
290 | ```r
291 | # Load the ggplot2 library
292 | library(ggplot2)
293 | 
294 | # Plot Histogram of totalAssets with log10 transformation
295 | ggplot(asset_data, aes(x = log10(totalAssets))) +
296 |   geom_histogram(bins = (10%%max(asset_data$totalAssets) +3), fill = "#756bb1", color = "black") +
297 |   labs(title = "Companies by Total Assets Reported for Q1 2023 (Logarithmic)",
298 |        x = "Assets (in 10^n USD)",
299 |        y = "Number of Companies")
300 | ```
301 | 
302 | ```
303 | ## Warning: Removed 30 rows containing non-finite values (`stat_bin()`).
304 | ```
305 | 
306 | ![](SEC_EDGAR_API_R_files/figure-html/unnamed-chunk-10-1.png)<!-- -->
307 | 
308 | ## 4. Finding the Top 500 Companies by Revenue
309 | 
310 | The Fortune 500 is a ranking of the top 500 companies by revenue, according to the data filed in their 10-K or a comparable form. In this example, we’ll look at only the revenues reported in the 10-K forms to construct a similar ranking of U.S. companies by revenue.
311 | 
312 | 
313 | ```r
314 | # Define query and parameters
315 | category <- 'Revenues/USD'
316 | year <- '2022'
317 | url <- paste0('https://data.sec.gov/api/xbrl/frames/us-gaap/',category,'/CY',year,'.json')
318 | 
319 | # Query API
320 | data_retrieved <- fromJSON(rawToChar(GET(url = url, add_headers("User-agent" = paste0(firstName,",",lastName,", ",email)))$content))$data
321 | 
322 | # Display number of results
323 | nrow(data_retrieved)
324 | ```
325 | 
326 | ```
327 | ## [1] 2433
328 | ```
329 | 
330 | ```r
331 | # Grab only first 500 highest revenues
332 | top500_revenues <- head(data_retrieved[order(-data_retrieved$val), c('entityName', 'val')], n = 500)
333 | 
334 | # Let's see the first 10 entries in the top500_revenues
335 | head(top500_revenues, n = 10)
336 | ```
337 | 
338 | ```
339 | ##                           entityName         val
340 | ## 214                     WALMART INC. 6.11289e+11
341 | ## 72           Exxon Mobil Corporation 4.13680e+11
342 | ## 320  UnitedHealth Group Incorporated 3.24162e+11
343 | ## 128                  CVS HEALTH CORP 3.22467e+11
344 | ## 776           BERKSHIRE HATHAWAY INC 3.02089e+11
345 | ## 188                     Chevron Corp 2.46252e+11
346 | ## 909                    CENCORA, INC. 2.38587e+11
347 | ## 562       COSTCO WHOLESALE CORP /NEW 2.26954e+11
348 | ## 294            Cardinal Health, Inc. 1.81364e+11
349 | ## 1992                 The Cigna Group 1.80516e+11
350 | ```
351 | 
352 | ### Export to CSV
353 | 
354 | 
355 | ```r
356 | # Export to csv
357 | write.csv(top500_revenues, file = paste0('top_500_companies_by_revenue_fy',year,'.csv'))
358 | ```
359 | 


--------------------------------------------------------------------------------
/src/r/usa-spending.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: \...in R
  3 | output: 
  4 |   html_document:
  5 |     keep_md: true
  6 | ---
  7 | 
  8 | # USAspending API in R
  9 | 
 10 | by Adam M. Nguyen
 11 | 
 12 | Please see the following resources for more information on API usage:
 13 | 
 14 | ### Documentation
 15 | - [USAspending Website](https://www.usaspending.gov/)
 16 | - [USAspending Documentation](https://api.usaspending.gov/)
 17 | - [USAspending API](https://github.com/fedspendingtransparency/usaspending-api)
 18 | 
 19 | ### Data Reuse
 20 | - [USAspending Data Reuse](https://www.usaspending.gov/about#about-licensing)
 21 | 
 22 | 
 23 | These recipe examples were tested on December 1, 2023.
 24 | 
 25 | 
 26 | ## Setup
 27 | 
 28 | Run the following lines of code to load the libraries ‘httr’ and ‘jsonlite’. If you have not done so already, additionally, before the ‘library()’ functions, run ‘install.packages(c(‘httr’,’jsonlite’))’.
 29 | 
 30 | ```r
 31 | library(httr)
 32 | library(jsonlite)
 33 | ```
 34 | ## 1. Get Agency Names and Toptier Codes  
 35 | 
 36 | To obtain data from the API, it'll be useful to have an object we can reference agency names and their toptier codes, the latter of which will be used to access subagency data.
 37 | 
 38 | ```r
 39 | # Set base url for API
 40 | base_url <- 'https://api.usaspending.gov'
 41 | 
 42 | # Define URL to obtain agency names and codes
 43 | toptier_agencies_url <- paste0(base_url,'/api/v2/references/toptier_agencies/')
 44 | 
 45 | # Query API using prepared URL and grab the results
 46 | toptier_data <- fromJSON(rawToChar(GET(toptier_agencies_url)$content))$results
 47 | 
 48 | # Let's check the first entry
 49 | head(toptier_data, n=1)
 50 | ```
 51 | 
 52 | ```
 53 | ##   agency_id toptier_code abbreviation  agency_name
 54 | ## 1      1146          310         USAB Access Board
 55 | ##   congressional_justification_url active_fy active_fq outlay_amount
 56 | ## 1 https://www.access-board.gov/cj      2023         4       9232761
 57 | ##   obligated_amount budget_authority_amount
 58 | ## 1          8863661                11366459
 59 | ##   current_total_budget_authority_amount percentage_of_total_budget_authority
 60 | ## 1                          1.188986e+13                         9.559789e-07
 61 | ##    agency_slug
 62 | ## 1 access-board
 63 | ```
 64 | 
 65 | ```r
 66 | # Show total number agencies in data
 67 | nrow(toptier_data)
 68 | ```
 69 | 
 70 | ```
 71 | ## [1] 108
 72 | ```
 73 | Now we can create a reference for agencies and their toptier codes, we call 'toptier_codes'.
 74 | 
 75 | ```r
 76 | toptier_codes <- toptier_data[c("agency_name", "toptier_code")]
 77 | # Let's see the first 10 agencies and their toptier codes
 78 | head(toptier_codes,n=10)
 79 | ```
 80 | 
 81 | ```
 82 | ##                                                           agency_name
 83 | ## 1                                                        Access Board
 84 | ## 2                               Administrative Conference of the U.S.
 85 | ## 3                           Advisory Council on Historic Preservation
 86 | ## 4                                      African Development Foundation
 87 | ## 5                                Agency for International Development
 88 | ## 6                                American Battle Monuments Commission
 89 | ## 7                                     Appalachian Regional Commission
 90 | ## 8                                        Armed Forces Retirement Home
 91 | ## 9  Barry Goldwater Scholarship and Excellence In Education Foundation
 92 | ## 10       Commission for the Preservation of America's Heritage Abroad
 93 | ##    toptier_code
 94 | ## 1           310
 95 | ## 2           302
 96 | ## 3           306
 97 | ## 4           166
 98 | ## 5           072
 99 | ## 6           074
100 | ## 7           309
101 | ## 8           084
102 | ## 9           313
103 | ## 10          321
104 | ```
105 | Finally, let's test the data frame, 'toptier_codes', by obtaining the toptier code of an agency.
106 | 
107 | 
108 | ```r
109 | # Look up toptier code of specific agency, in this case Department of Transportation
110 | toptier_codes$toptier_code[toptier_codes$agency_name == "Department of Transportation"]
111 | ```
112 | 
113 | ```
114 | ## [1] "069"
115 | ```
116 | With these codes we can access subagency data.
117 | 
118 | 
119 | ## 2. Retrieving Data from Subagencies
120 | 
121 | The 'toptier_codes' data frame we created contains every agency name in the USA Spending API. For this example we'll look at the total obligations of each subagency of the Department of Defense.
122 | 
123 | 
124 | ```r
125 | # Designate Desired Agency
126 | desired_agency_name <- 'Department of Defense'
127 | 
128 | # Find toptier code
129 | desired_toptier_code <- toptier_codes$toptier_code[toptier_codes$agency_name == desired_agency_name]
130 | 
131 | # Create URL to Query
132 | subagency_url <- paste0(base_url, '/api/v2/agency/', desired_toptier_code, '/sub_agency/?fiscal_year=2023')
133 | 
134 | # Query API and grab Results
135 | subagency_data <- fromJSON(rawToChar(GET(subagency_url)$content))$results
136 | ```
137 | ### Visualization: Pie Chart
138 | Let's try making a pie chart to visualize our data. Additionally, we will group the last four sub agencies to relieve clutter. 
139 | 
140 | 
141 | ```r
142 | # Select Categories we'd like to collect into 'Other'
143 | last_four_rows <- tail(subagency_data, 4)
144 | 
145 | # R is funny so we create a "better" as numeric function
146 | as_numeric_with_na <- function(x) {
147 |   as.numeric(as.character(x))
148 | }
149 | 
150 | # Convert last four rows to numeric
151 | last_four_rows[, -1] <- lapply(last_four_rows[, -1], as_numeric_with_na)
152 | 
153 | # Sum last four rows
154 | summed_values <- colSums(last_four_rows[, -1], na.rm = TRUE)
155 | 
156 | # Collect summed values into "other_row"
157 | other_row <- c("other", as.character(summed_values))
158 | 
159 | # Remove last four rows
160 | subagency_data_removed <- head(subagency_data, -4)
161 | 
162 | # Attach new "other_row" and rename it to 'Other'
163 | subagency_data_other <- rbind(subagency_data_removed,other_row)
164 | subagency_data_other$name[7] <- 'Other'
165 | 
166 | # Make more fancy Colors
167 | custom_colors <- rainbow(length(subagency_data_other$total_obligations))
168 | 
169 | # Make new and improved pie chart
170 | pie(as.numeric(subagency_data_other$total_obligations), labels = paste0(subagency_data_other$abbreviation," (",round(100*as.numeric(subagency_data_other$total_obligations)/sum(as.numeric(subagency_data_other$total_obligations)),digits = 3),"%)"), main = "Subagency Obligations of the Department of Defense", col = custom_colors)
171 | 
172 | # Make new and improved legend
173 | legend("topright", legend = subagency_data_other$abbreviation, fill = custom_colors)
174 | ```
175 | 
176 | ![](USA_Spending_R_files/figure-html/unnamed-chunk-4-1.png)<!-- -->
177 | 
178 | 
179 | ## 3. Acessing Fiscal Data Per Year
180 | 
181 | Using the USA Spending API, we can also examine the annual budget of an agency 2017 and onward.
182 | 
183 | ```r
184 | # Specify Agency
185 | desired_agency_name <- "Department of Health and Human Services"
186 | 
187 | # Store toptier code of specified agency using 'toptier_codes' df
188 | desired_toptier_code <- toptier_codes$toptier_code[toptier_codes$agency_name == desired_agency_name]
189 | 
190 | # Create URL for accessing budgetary resources of specified agency
191 | budgetary_resources_url <- paste0(base_url,'/api/v2/agency/',desired_toptier_code,'/budgetary_resources/')
192 | 
193 | # Query API
194 | budgetary_resources_data <- fromJSON(rawToChar(GET(budgetary_resources_url)$content))$agency_data_by_year
195 | 
196 | # Format Collected data into a dataframe containing the Fiscal Year and Total Obligated
197 | budget_by_year <- as.data.frame(cbind('Year'=tail(budgetary_resources_data, n=6)$fiscal_year,'Total_Obligated'=tail(budgetary_resources_data, n=6)$agency_total_obligated)) # We use the tail function to select only the last 6 years in the dataframe, because 2023 does not contain the entire annual budget as of the time of writing
198 | ```
199 | 
200 | We can now use ggplot2 to create a bar chart for the collected budgetary data.
201 | 
202 | 
203 | ```r
204 | # Load ggplot2 library
205 | library(ggplot2)
206 | 
207 | # Create Barplot of Total Budgetary Resources by Fiscal Year
208 | p <- ggplot(data = budget_by_year, aes(x = Year, y = Total_Obligated))
209 | p + geom_bar(stat = "identity", fill = "plum") +
210 |   labs(title = "Department of Health and Human Services Budgetary Resources", x = "Fiscal Year", y = "Total Budgetary Resources") +
211 |   theme_minimal()
212 | ```
213 | 
214 | ![](USA_Spending_R_files/figure-html/unnamed-chunk-6-1.png)<!-- -->
215 | 
216 | ## 4. Breaking Down Budget Categories
217 | 
218 | The API can also be used to view the spending breakdown of a specific agency
219 | 
220 | ```r
221 | # Specify Agency
222 | desired_agency_name <- "Department of the Interior"
223 | 
224 | # Store toptier code of specified agency
225 | desired_toptier_code <- toptier_codes$toptier_code[toptier_codes$agency_name == desired_agency_name]
226 | 
227 | # Store URL to view agency's spending breakdown
228 | obligations_by_category_url <- paste0(base_url,"/api/v2/agency/",desired_toptier_code, "/obligations_by_award_category/?fiscal_year=2023")
229 | 
230 | # Query API
231 | obligations_by_category_data <- fromJSON(rawToChar(GET(obligations_by_category_url)$content))
232 | 
233 | # Select the total aggregated obligations for this particular agency
234 | total_aggregated_amount <- obligations_by_category_data$total_aggregated_amount
235 | 
236 | # Store results of query
237 | obligations_by_category_data <- obligations_by_category_data$results
238 | obligations_by_category_data
239 | ```
240 | 
241 | ```
242 | ##          category aggregated_amount
243 | ## 1       contracts        7811857503
244 | ## 2 direct_payments        3311940758
245 | ## 3          grants        7198549492
246 | ## 4            idvs           3580836
247 | ## 5           loans                 0
248 | ## 6           other         335594193
249 | ```
250 | 
251 | ```r
252 | # Let's  remove the categories where 'aggregated_amount' = 0
253 | budget_breakdown <-obligations_by_category_data[obligations_by_category_data$aggregated_amount>0,]
254 | budget_breakdown
255 | ```
256 | 
257 | ```
258 | ##          category aggregated_amount
259 | ## 1       contracts        7811857503
260 | ## 2 direct_payments        3311940758
261 | ## 3          grants        7198549492
262 | ## 4            idvs           3580836
263 | ## 6           other         335594193
264 | ```
265 | Similar to the previous example, let's create a bar chart to visualize this data.
266 | 
267 | ```r
268 | # Sort 'budget_breakdown' from greatest to least 'aggregated_amount'
269 | budget_breakdown_sorted <- budget_breakdown[order(-budget_breakdown$aggregated_amount), ]
270 | 
271 | # Create bar chart using ggplot2
272 | ggplot(data = budget_breakdown_sorted, aes(x = reorder(category, -aggregated_amount), y = aggregated_amount)) +
273 |   geom_bar(stat = "identity", fill = "plum") +
274 |   labs(title = "Department of the Interior Budget Breakdown",
275 |        x = "Category",
276 |        y = "Aggregated Amount (USD)") +
277 |   theme_minimal() +
278 |   geom_text(aes(label = paste0(round(aggregated_amount / sum(budget_breakdown_sorted$aggregated_amount) * 100, 1), "%"), vjust = -0.5), size = 3)
279 | ```
280 | 
281 | ![](USA_Spending_R_files/figure-html/unnamed-chunk-9-1.png)<!-- -->
282 | 


--------------------------------------------------------------------------------
/src/r/wiley-tdm.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "wiley-tdm"
  3 | output: 
  4 |   html_document:
  5 |     keep_md: true
  6 | ---
  7 | 
  8 | # Wiley Text and Data Mining (TDM) in R
  9 | 
 10 | by Michael T. Moen
 11 | 
 12 | This tutorial is designed to support academic research. Please consult your institution’s library or legal office regarding its Text and Data Mining license agreement with Wiley.
 13 | 
 14 | ### Documentation
 15 | - [Wiley Text and Data Mining](https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining)
 16 | 
 17 | ### Terms of Use
 18 | - [Wiley Text and Data Mining Agreement](https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining#accordionHeader-3)
 19 | 
 20 | ### Data Reuse
 21 | - [Service Name] Data Reuse *(link to be provided by the service)*
 22 | 
 23 | *These recipe examples were tested on February 12, 2025.*
 24 | 
 25 | **_NOTE:_** The Wiley TDM API limits requests to a maximum of 3 requests per second.
 26 | 
 27 | ## Setup
 28 | 
 29 | ### Import Libraries
 30 | 
 31 | This tutorial uses the following libraries:
 32 | 
 33 | 
 34 | ``` r
 35 | library(httr)
 36 | ```
 37 | 
 38 | ### Text and Data Mining Token
 39 | 
 40 | A token is required to access the Wiley TDM API. Sign up can be found [here](https://onlinelibrary.wiley.com/library-info/resources/text-and-datamining#accordionHeader-2). Import your token below:
 41 | 
 42 | 
 43 | ``` r
 44 | wiley_token <- Sys.getenv("wiley_token")
 45 | 
 46 | # The token will be sent as a header in the API calls
 47 | headers <- add_headers("Wiley-TDM-Client-Token" = wiley_token)
 48 | ```
 49 | 
 50 | ## 1. Retrieve full-text of an article
 51 | 
 52 | The Wiley TDM API returns the full-text of an article as a PDF when given the article's DOI.
 53 | 
 54 | In the first example, we download the full-text of the article with the DOI "10.1002/net.22207". This article was found on the Wiley Online Library.
 55 | 
 56 | 
 57 | ``` r
 58 | # DOI to download
 59 | doi <- "10.1002/net.22207"
 60 | url <- paste0("https://api.wiley.com/onlinelibrary/tdm/v1/articles/", doi)
 61 | 
 62 | response <- GET(url, headers)
 63 | 
 64 | if (status_code(response) == 200) {
 65 |   # Download if status code indicates success
 66 |   filename <- paste0(gsub("/", "_", doi), ".pdf")
 67 |   writeBin(content(response, "raw"), filename)
 68 |   cat(paste0(filename, " downloaded successfully\n"))
 69 |   
 70 | } else {
 71 |   # Print status code if unsuccessful
 72 |   cat(paste0("Failed to download PDF. Status code: ", status_code(response), "\n"))
 73 | }
 74 | ```
 75 | 
 76 | ```
 77 | ## 10.1002_net.22207.pdf downloaded successfully
 78 | ```
 79 | 
 80 | ## 2. Retrieve full-text of multiple articles
 81 | 
 82 | In this example, we download 5 articles found in the Wiley Online Library:
 83 | 
 84 | 
 85 | ``` r
 86 | # DOIs of articles to download
 87 | dois <- c(
 88 |   "10.1111/j.1467-8624.2010.01564.x",
 89 |   "10.1111/1467-8624.00164",
 90 |   "10.1111/cdev.12864",
 91 |   "10.1111/j.1467-8624.2007.00995.x",
 92 |   "10.1111/j.1467-8624.2010.01499.x",
 93 |   "10.1111/j.1467-8624.2010.0149.x"  # Invalid DOI, will throw error
 94 | )
 95 | 
 96 | # Loop through DOIs and download each article
 97 | for (doi in dois) {
 98 |   url <- paste0("https://api.wiley.com/onlinelibrary/tdm/v1/articles/", doi)
 99 |   response <- GET(url, headers)
100 |   
101 |   if (status_code(response) == 200) {
102 |     # Download if status code indicates success
103 |     filename <- paste0(gsub("/", "_", doi), ".pdf")
104 |     writeBin(content(response, "raw"), filename)
105 |     cat(paste0(filename, " downloaded successfully\n"))
106 |     
107 |   } else {
108 |     # Print status code if unsuccessful
109 |     cat(paste0("Failed to download PDF. Status code: ", status_code(response), "\n"))
110 |   }
111 |   
112 |   # Wait 1 second to be nice to Wiley's servers
113 |   Sys.sleep(1)
114 | }
115 | ```
116 | 
117 | ```
118 | ## 10.1111_j.1467-8624.2010.01564.x.pdf downloaded successfully
119 | ## 10.1111_1467-8624.00164.pdf downloaded successfully
120 | ## 10.1111_cdev.12864.pdf downloaded successfully
121 | ## 10.1111_j.1467-8624.2007.00995.x.pdf downloaded successfully
122 | ## 10.1111_j.1467-8624.2010.01499.x.pdf downloaded successfully
123 | ## Failed to download PDF. Status code: 404
124 | ```
125 | 


--------------------------------------------------------------------------------
/src/r/world-bank.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: \...in R
  3 | output: 
  4 |   html_document:
  5 |     keep_md: TRUE
  6 | ---
  7 | 
  8 | # World Bank API in R
  9 | 
 10 | by Vishank Patel and Adam M. Nguyen
 11 | 
 12 | The World Bank offers a suite of APIs providing access to a vast array of global development data, including economic indicators, project information, and more. These APIs enable users to programmatically retrieve data for analysis, application development, and research purposes
 13 | 
 14 | ### Documentation
 15 | - [World Bank Indicators API Documentation](https://datahelpdesk.worldbank.org/knowledgebase/articles/889392-about-the-indicators-api-documentation)
 16 | - [World Bank Data Catalog API Documentation](https://datahelpdesk.worldbank.org/knowledgebase/articles/1886698-data-catalog-api)
 17 | - [World Bank Data Catalog](https://data.worldbank.org/)
 18 | - [World Bank Development Best Practices](https://datahelpdesk.worldbank.org/knowledgebase/articles/902064-development-best-practices)
 19 | 
 20 | ### Terms of Use
 21 | - [World Bank Terms of Use](https://www.worldbank.org/en/about/legal/terms-and-conditions)
 22 | - [World Bank Summary Terms of Use](https://data.worldbank.org/summary-terms-of-use)
 23 | 
 24 | ### Data Reuse and Licensing
 25 | - [World Bank Data Licensing and Terms of Use](https://www.worldbank.org/en/about/legal/terms-of-use-for-datasets)
 26 | 
 27 | These recipe examples were tested on March 24, 2023.
 28 | 
 29 | ## Setup
 30 | 
 31 | ```r
 32 | # Load Packages
 33 | library(tidyverse)  #ggplot2
 34 | library(dplyr)      #tibbles
 35 | library(purrr)      #turning into character
 36 | library(httr)       #GET()
 37 | library(jsonlite)   #converting to JSON
 38 | 
 39 | # define root World Bank API
 40 | urlRoot <- "https://api.worldbank.org/v2/"
 41 | ```
 42 | 
 43 | ## 1. Get list of country iso2Codes and names
 44 | 
 45 | For obtaining data from the World Bank API, it is helpful to first obtain a list of country codes and names.
 46 | 
 47 | 
 48 | ```r
 49 | countryURL <- paste0(urlRoot,"country?format=json&per_page=500") # Create URL we are querying
 50 | 
 51 | raw_country_data <- GET(countryURL) # Use 'GET()' to retrieve info
 52 | prelim_country_data <- # Reading Data to R
 53 |   fromJSON( # Converts JSON data to R objects
 54 |   rawToChar(raw_country_data$content), flatten = TRUE) # Reads raw 8 bit data to chars
 55 | # To view try 'view(prelim_country_data)'
 56 | country_data <- prelim_country_data[[2]] # Retrieve only country data frame
 57 | country_data[1:10,1:5] # Display first 5 features of first 10 countries from country_data
 58 | ```
 59 | 
 60 | ```
 61 | ##     id iso2Code                        name      capitalCity longitude
 62 | ## 1  ABW       AW                       Aruba       Oranjestad  -70.0167
 63 | ## 2  AFE       ZH Africa Eastern and Southern                           
 64 | ## 3  AFG       AF                 Afghanistan            Kabul   69.1761
 65 | ## 4  AFR       A9                      Africa                           
 66 | ## 5  AFW       ZI  Africa Western and Central                           
 67 | ## 6  AGO       AO                      Angola           Luanda    13.242
 68 | ## 7  ALB       AL                     Albania           Tirane   19.8172
 69 | ## 8  AND       AD                     Andorra Andorra la Vella    1.5218
 70 | ## 9  ARB       1A                  Arab World                           
 71 | ## 10 ARE       AE        United Arab Emirates        Abu Dhabi   54.3705
 72 | ```
 73 | ### Extract Country Codes
 74 | 
 75 | 
 76 | ```r
 77 | countryIso2Code <- as.list(country_data$iso2Code) # Extract iso2Codes
 78 | length(countryIso2Code)# Display Length
 79 | ```
 80 | 
 81 | ```
 82 | ## [1] 299
 83 | ```
 84 | 
 85 | ```r
 86 | head(countryIso2Code,n=10) # Display first 10
 87 | ```
 88 | 
 89 | ```
 90 | ## [[1]]
 91 | ## [1] "AW"
 92 | ## 
 93 | ## [[2]]
 94 | ## [1] "ZH"
 95 | ## 
 96 | ## [[3]]
 97 | ## [1] "AF"
 98 | ## 
 99 | ## [[4]]
100 | ## [1] "A9"
101 | ## 
102 | ## [[5]]
103 | ## [1] "ZI"
104 | ## 
105 | ## [[6]]
106 | ## [1] "AO"
107 | ## 
108 | ## [[7]]
109 | ## [1] "AL"
110 | ## 
111 | ## [[8]]
112 | ## [1] "AD"
113 | ## 
114 | ## [[9]]
115 | ## [1] "1A"
116 | ## 
117 | ## [[10]]
118 | ## [1] "AE"
119 | ```
120 | 
121 | ### Extract country names
122 | 
123 | 
124 | ```r
125 | countryName <- as.list(country_data$name) # Extract Country Names
126 | length(countryName)# Display Length
127 | ```
128 | 
129 | ```
130 | ## [1] 299
131 | ```
132 | 
133 | ```r
134 | head(countryName,n=10) # Display first 10 Country names
135 | ```
136 | 
137 | ```
138 | ## [[1]]
139 | ## [1] "Aruba"
140 | ## 
141 | ## [[2]]
142 | ## [1] "Africa Eastern and Southern"
143 | ## 
144 | ## [[3]]
145 | ## [1] "Afghanistan"
146 | ## 
147 | ## [[4]]
148 | ## [1] "Africa"
149 | ## 
150 | ## [[5]]
151 | ## [1] "Africa Western and Central"
152 | ## 
153 | ## [[6]]
154 | ## [1] "Angola"
155 | ## 
156 | ## [[7]]
157 | ## [1] "Albania"
158 | ## 
159 | ## [[8]]
160 | ## [1] "Andorra"
161 | ## 
162 | ## [[9]]
163 | ## [1] "Arab World"
164 | ## 
165 | ## [[10]]
166 | ## [1] "United Arab Emirates"
167 | ```
168 | 
169 | ### Store Country Codes and Names together
170 | 
171 | ```r
172 | countryIso2CodeName <- transpose(list(countryIso2Code,countryName))
173 | length(countryIso2CodeName)# Display Length
174 | ```
175 | 
176 | ```
177 | ## [1] 299
178 | ```
179 | 
180 | ```r
181 | head(countryIso2CodeName, n=10)
182 | ```
183 | 
184 | ```
185 | ## [[1]]
186 | ## [[1]][[1]]
187 | ## [1] "AW"
188 | ## 
189 | ## [[1]][[2]]
190 | ## [1] "Aruba"
191 | ## 
192 | ## 
193 | ## [[2]]
194 | ## [[2]][[1]]
195 | ## [1] "ZH"
196 | ## 
197 | ## [[2]][[2]]
198 | ## [1] "Africa Eastern and Southern"
199 | ## 
200 | ## 
201 | ## [[3]]
202 | ## [[3]][[1]]
203 | ## [1] "AF"
204 | ## 
205 | ## [[3]][[2]]
206 | ## [1] "Afghanistan"
207 | ## 
208 | ## 
209 | ## [[4]]
210 | ## [[4]][[1]]
211 | ## [1] "A9"
212 | ## 
213 | ## [[4]][[2]]
214 | ## [1] "Africa"
215 | ## 
216 | ## 
217 | ## [[5]]
218 | ## [[5]][[1]]
219 | ## [1] "ZI"
220 | ## 
221 | ## [[5]][[2]]
222 | ## [1] "Africa Western and Central"
223 | ## 
224 | ## 
225 | ## [[6]]
226 | ## [[6]][[1]]
227 | ## [1] "AO"
228 | ## 
229 | ## [[6]][[2]]
230 | ## [1] "Angola"
231 | ## 
232 | ## 
233 | ## [[7]]
234 | ## [[7]][[1]]
235 | ## [1] "AL"
236 | ## 
237 | ## [[7]][[2]]
238 | ## [1] "Albania"
239 | ## 
240 | ## 
241 | ## [[8]]
242 | ## [[8]][[1]]
243 | ## [1] "AD"
244 | ## 
245 | ## [[8]][[2]]
246 | ## [1] "Andorra"
247 | ## 
248 | ## 
249 | ## [[9]]
250 | ## [[9]][[1]]
251 | ## [1] "1A"
252 | ## 
253 | ## [[9]][[2]]
254 | ## [1] "Arab World"
255 | ## 
256 | ## 
257 | ## [[10]]
258 | ## [[10]][[1]]
259 | ## [1] "AE"
260 | ## 
261 | ## [[10]][[2]]
262 | ## [1] "United Arab Emirates"
263 | ```
264 | 
265 | Now we know the country iso2codes, which we can use to pull specific indicator data for countries.
266 | 
267 | ## 2. Compile a Custom Indicator Dataset
268 | 
269 | There are many availabe indicators: <https://data.worldbank.org/indicator>
270 | 
271 | We wll select three indicators for this example:
272 | 
273 | 1.  Scientific and Technical Journal Article Data = [IP.JRN.ARTC.SC](https://data.worldbank.org/indicator/IP.JRN.ARTC.SC?view=chart)
274 | 
275 | 2.  Patent Applications, residents = [IP.PAT.RESD](https://data.worldbank.org/indicator/IP.PAT.RESD?view=chart)
276 | 
277 | 3.  GDP per capita (current US\$) Code = [NY.GDP.PCAP.CD](https://data.worldbank.org/indicator/NY.GDP.PCAP.CD?view=chart)
278 | 
279 | Note that these three selected indictaors have a [CC-BY 4.0 license](https://datacatalog.worldbank.org/public-licenses#cc-by). We will compile this indicator data for the United States (US) and United Kingdom (GB)
280 | 
281 | 
282 | ```r
283 | indicators <- list("IP.JRN.ARTC.SC", "IP.PAT.RESD", "NY.GDP.PCAP.CD")
284 | ```
285 | 
286 | ### United States (US)
287 | 
288 | #### Generate the web API URLs we need for U.S.:
289 | 
290 | 
291 | ```r
292 | # Create an Empty List
293 | us_api_url <- c()
294 | 
295 | # Iterate through each indicator, appending to the base URL, creating a list of unique URLs
296 | for (indicator in indicators) {
297 |   us_api_url <- append(x = us_api_url,
298 |          values = paste(urlRoot,"country/US/indicator/",indicator,"?format=json&per_page=500",sep = ""))
299 | }
300 | # Display URLs
301 | us_api_url
302 | ```
303 | 
304 | ```
305 | ## [1] "https://api.worldbank.org/v2/country/US/indicator/IP.JRN.ARTC.SC?format=json&per_page=500"
306 | ## [2] "https://api.worldbank.org/v2/country/US/indicator/IP.PAT.RESD?format=json&per_page=500"   
307 | ## [3] "https://api.worldbank.org/v2/country/US/indicator/NY.GDP.PCAP.CD?format=json&per_page=500"
308 | ```
309 | 
310 | #### Retrieving Data
311 | 
312 | 
313 | ```r
314 | # Create an empty list for Indicator Data to be stored
315 | us_indicator_data <- list()
316 | # Iterate through URLs to collect and reformat data into lists
317 | for (url in us_api_url) {
318 |   temp_data <- tibble(fromJSON(rawToChar(GET(url)$content), flatten = TRUE)[[2]])
319 |   us_indicator_data <- append(us_indicator_data,list(temp_data))  #making a list of tibbles 
320 | }
321 | ```
322 | #### Extracting Data
323 | 
324 | 
325 | ```r
326 | us_journal_data <- us_indicator_data[[1]][,c("date","value")] #the first element in us_indicator_data is regarding journal publications
327 | head(us_journal_data,n=10)
328 | ```
329 | 
330 | ```
331 | ## # A tibble: 10 × 2
332 | ##    date    value
333 | ##    <chr>   <dbl>
334 | ##  1 2021      NA 
335 | ##  2 2020      NA 
336 | ##  3 2019      NA 
337 | ##  4 2018  422808.
338 | ##  5 2017  432216.
339 | ##  6 2016  427265.
340 | ##  7 2015  429989.
341 | ##  8 2014  433192.
342 | ##  9 2013  429570.
343 | ## 10 2012  427997.
344 | ```
345 | 
346 | 
347 | ```r
348 | us_patent_data <- us_indicator_data[[2]][,c("date","value")]    # Takes all rows but 2nd column
349 | head(us_patent_data,n=10)
350 | ```
351 | 
352 | ```
353 | ## # A tibble: 10 × 2
354 | ##    date   value
355 | ##    <chr>  <int>
356 | ##  1 2021      NA
357 | ##  2 2020  269586
358 | ##  3 2019  285113
359 | ##  4 2018  285095
360 | ##  5 2017  293904
361 | ##  6 2016  295327
362 | ##  7 2015  288335
363 | ##  8 2014  285096
364 | ##  9 2013  287831
365 | ## 10 2012  268782
366 | ```
367 | 
368 | 
369 | ```r
370 | us_GDP_data <- us_indicator_data[[3]][,c("date","value")] 
371 | head(us_GDP_data)
372 | ```
373 | 
374 | ```
375 | ## # A tibble: 6 × 2
376 | ##   date   value
377 | ##   <chr>  <dbl>
378 | ## 1 2021  70249.
379 | ## 2 2020  63531.
380 | ## 3 2019  65120.
381 | ## 4 2018  62823.
382 | ## 5 2017  59908.
383 | ## 6 2016  57867.
384 | ```
385 | 
386 | 
387 | ```r
388 | # Create a dataframe using retrieved data
389 | us_data <- as.data.frame(c(us_journal_data,us_patent_data[2],us_GDP_data[2]),
390 |                          col.names= c("Year","Scientific and Technical Journal Article Data", "Patent Applications, residents","GDP per capita (current US$) Code")) # Set column names
391 | head(us_data)
392 | ```
393 | 
394 | ```
395 | ##   Year Scientific.and.Technical.Journal.Article.Data
396 | ## 1 2021                                            NA
397 | ## 2 2020                                            NA
398 | ## 3 2019                                            NA
399 | ## 4 2018                                      422807.7
400 | ## 5 2017                                      432216.5
401 | ## 6 2016                                      427264.6
402 | ##   Patent.Applications..residents GDP.per.capita..current.US...Code
403 | ## 1                             NA                          70248.63
404 | ## 2                         269586                          63530.63
405 | ## 3                         285113                          65120.39
406 | ## 4                         285095                          62823.31
407 | ## 5                         293904                          59907.75
408 | ## 6                         295327                          57866.74
409 | ```
410 | 
411 | ### United Kingdom (GB)
412 | 
413 | Now we can repeat the same process to find the relevant information for the United Kingdom indicated by the country code "GB". As you will see, much of the code is the same, so if needed, reference the United States example.
414 | 
415 | 
416 | 
417 | ```r
418 | uk_api_url <- c()
419 | for (indicator in indicators) {
420 |   uk_api_url <- append(x = uk_api_url,
421 |          values = paste(urlRoot,"country/GB/indicator/",indicator,"?format=json&per_page=500",sep = ""))
422 | }
423 | uk_api_url
424 | ```
425 | 
426 | ```
427 | ## [1] "https://api.worldbank.org/v2/country/GB/indicator/IP.JRN.ARTC.SC?format=json&per_page=500"
428 | ## [2] "https://api.worldbank.org/v2/country/GB/indicator/IP.PAT.RESD?format=json&per_page=500"   
429 | ## [3] "https://api.worldbank.org/v2/country/GB/indicator/NY.GDP.PCAP.CD?format=json&per_page=500"
430 | ```
431 | 
432 | #### Retrieving Data
433 | 
434 | 
435 | ```r
436 | uk_indicator_data <- list()
437 | for (url in uk_api_url) {
438 |   temp_data <- tibble(fromJSON(rawToChar(GET(url)$content), flatten = TRUE)[[2]])
439 |   uk_indicator_data <- append(uk_indicator_data,list(temp_data))  #making a list of tibbles 
440 | }
441 | ```
442 | #### Extracting Data
443 | 
444 | ```r
445 | # Extract Data
446 | uk_journal_data <- uk_indicator_data[[1]][,c("date","value")]  #takes all rows but only two columns
447 | uk_patent_data <- uk_indicator_data[[2]][,c("date","value")]  
448 | uk_GDP_data <- uk_indicator_data[[3]][,c("date","value")] 
449 | 
450 | # Combine extracted data into a data frame
451 | uk_data <- as.data.frame(c(uk_journal_data,uk_patent_data[2],uk_GDP_data[2]),col.names= c("Year","Scientific and Technical Journal Article Data", "Patent Applications, residents","GDP per capita (current US$) Code"))
452 | head(uk_data)
453 | ```
454 | 
455 | ```
456 | ##   Year Scientific.and.Technical.Journal.Article.Data
457 | ## 1 2021                                            NA
458 | ## 2 2020                                            NA
459 | ## 3 2019                                            NA
460 | ## 4 2018                                      97680.90
461 | ## 5 2017                                      99128.72
462 | ## 6 2016                                      99366.17
463 | ##   Patent.Applications..residents GDP.per.capita..current.US...Code
464 | ## 1                             NA                          46510.28
465 | ## 2                          11990                          40318.56
466 | ## 3                          12061                          42747.08
467 | ## 4                          12865                          43306.31
468 | ## 5                          13301                          40621.33
469 | ## 6                          13876                          41146.08
470 | ```
471 | 
472 | ## 3. Plot Indicator Data
473 | 
474 | Create line plots of US/UK Number of Scientific and Technical Journal Articles and Patents by year. Upon inspecting the dataset, there are no values before the year 2000 and after 2018. Hence, we will slice our data for visualizations accordingly.
475 | 
476 | 
477 | ```r
478 | # Plotting the Data
479 | 
480 | # Part [4:22] corresponds to years 2000-2018
481 | journal_data <- tibble(dates= c(us_journal_data$date[4:22]),       
482 |                        us_journals=c(us_journal_data$value[4:22]),
483 |                        uk_journals=c(uk_journal_data$value[4:22]))
484 | 
485 | ggplot(journal_data, aes(x = dates))+
486 |   geom_line(aes(y = us_journals, color = "US IP", group=1))+
487 |   geom_point(aes(y = us_journals, color = "US IP"))+
488 |   geom_line(aes(y = uk_journals, color = "UK IP"), group=1)+
489 |   geom_point(aes(y = uk_journals, color = "UK IP"))+
490 |   labs(title = "US vs UK Journal Publications",
491 |       x="Year",
492 |       y="Publications")+
493 |   theme(axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))
494 | ```
495 | 
496 | ![](R_WorldBank_Markdown_files/figure-html/unnamed-chunk-16-1.png)<!-- -->
497 | 
498 | Similarly for the GDP data,
499 | 
500 | 
501 | ```r
502 | gdp_data <- tibble(dates= c(us_GDP_data$date[4:22]),
503 |                        us_gdp=c(us_GDP_data$value[4:22]),
504 |                        uk_gdp=c(uk_GDP_data$value[4:22]))
505 | 
506 | ggplot(gdp_data, aes(x = dates))+
507 |   geom_line(aes(y = us_gdp, color = "US GDP", group=1))+
508 |   geom_point(aes(y = us_gdp, color = "US GDP"))+
509 |   geom_line(aes(y = uk_gdp, color = "UK GDP"), group=1)+
510 |   geom_point(aes(y = uk_gdp, color = "UK GDP"))+
511 |   labs(title = "US vs UK GDP",
512 |       x="Year",
513 |       y="GDP")+
514 |   theme(axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))
515 | ```
516 | 
517 | ![](R_WorldBank_Markdown_files/figure-html/unnamed-chunk-17-1.png)<!-- -->
518 | 
519 | Patents:
520 | 
521 | 
522 | ```r
523 | patent_data <- tibble(dates= as.numeric(c(us_patent_data$date[4:41])),
524 |                        us_patents=as.numeric(c(us_patent_data$value[4:41])),
525 |                        uk_patents=as.numeric(c(uk_patent_data$value[4:41])))
526 | 
527 | ggplot(patent_data, aes(x = dates))+
528 |   geom_line(aes(y = us_patents, color = "US Patents", group=1))+
529 |   geom_point(aes(y = us_patents, color = "US Patents"))+
530 |   geom_line(aes(y = uk_patents, color = "UK Patents"), group=1)+
531 |   geom_point(aes(y = uk_patents, color = "UK Patents"))+
532 |   labs(title = "US vs UK Patents",
533 |       x="Year",
534 |       y="Patents")+
535 |   theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5)+
536 |   scale_x_continuous(breaks=seq(1980, 2020, by = 5))
537 |   )
538 | ```
539 | 
540 | ![](R_WorldBank_Markdown_files/figure-html/unnamed-chunk-18-1.png)<!-- -->
541 | 


--------------------------------------------------------------------------------