├── Google-Scraper-API-1090x275.png
├── main.py
└── README.md


/Google-Scraper-API-1090x275.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oxylabs/how-to-scrape-google-scholar/HEAD/Google-Scraper-API-1090x275.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | 
 5 | USERNAME = "USERNAME"
 6 | PASSWORD = "PASSWORD"
 7 | 
 8 | 
 9 | def get_html_for_page(url):
10 |     payload = {
11 |         "url": url,
12 |         "source": "google",
13 |     }
14 |     response = requests.post(
15 |         "https://realtime.oxylabs.io/v1/queries",
16 |         auth=(USERNAME, PASSWORD),
17 |         json=payload,
18 |     )
19 |     response.raise_for_status()
20 |     return response.json()["results"][0]["content"]
21 | 
22 | 
23 | def get_citations(article_id):
24 |     url = f"https://scholar.google.com/scholar?q=info:{article_id}:scholar.google.com&output=cite"
25 |     html = get_html_for_page(url)
26 |     soup = BeautifulSoup(html, "html.parser")
27 |     data = []
28 |     for citation in soup.find_all("tr"):
29 |         title = citation.find("th", {"class": "gs_cith"}).get_text(strip=True)
30 |         content = citation.find("div", {"class": "gs_citr"}).get_text(strip=True)
31 |         entry = {
32 |             "title": title,
33 |             "content": content,
34 |         }
35 |         data.append(entry)
36 | 
37 |     return data
38 | 
39 | 
40 | def parse_data_from_article(article):
41 |     title_elem = article.find("h3", {"class": "gs_rt"})
42 |     title = title_elem.get_text()
43 |     title_anchor_elem = article.select("a")[0]
44 |     url = title_anchor_elem["href"]
45 |     article_id = title_anchor_elem["id"]
46 |     authors = article.find("div", {"class": "gs_a"}).get_text()
47 |     return {
48 |         "title": title,
49 |         "authors": authors,
50 |         "url": url,
51 |         "citations": get_citations(article_id),
52 |     }
53 | 
54 | 
55 | def get_url_for_page(url, page_index):
56 |     return url + f"&start={page_index}"
57 | 
58 | 
59 | def get_data_from_page(url):
60 |     html = get_html_for_page(url)
61 |     soup = BeautifulSoup(html, "html.parser")
62 |     articles = soup.find_all("div", {"class": "gs_ri"})
63 |     return [parse_data_from_article(article) for article in articles]
64 | 
65 | 
66 | data = []
67 | url = "https://scholar.google.com/scholar?q=global+warming+&hl=en&as_sdt=0,5"
68 | 
69 | NUM_OF_PAGES = 1
70 | page_index = 0
71 | for _ in range(NUM_OF_PAGES):
72 |     page_url = get_url_for_page(url, page_index)
73 |     entries = get_data_from_page(page_url)
74 |     data.extend(entries)
75 |     page_index += 10
76 | 
77 | print(data)
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # How to Scrape Google Scholar
  2 | 
  3 | [![Oxylabs promo code](https://raw.githubusercontent.com/oxylabs/how-to-scrape-google-scholar/refs/heads/main/Google-Scraper-API-1090x275.png)](https://oxylabs.io/products/scraper-api/serp/google?utm_source=877&utm_medium=affiliate&groupid=877&utm_content=how-to-scrape-google-scholar-github&transaction_id=102c8d36f7f0d0e5797b8f26152160)
  4 | 
  5 | [![](https://dcbadge.limes.pink/api/server/Pds3gBmKMH?style=for-the-badge&theme=discord)](https://discord.gg/Pds3gBmKMH) [![YouTube](https://img.shields.io/badge/YouTube-Oxylabs-red?style=for-the-badge&logo=youtube&logoColor=white)](https://www.youtube.com/@oxylabs)
  6 | 
  7 | Take a look at the process of getting titles, authors, and citations from [Google Scholar](https://scholar.google.com/) using Oxylabs [SERP Scraper API](https://oxylabs.io/products/scraper-api/serp) (a part of Web Scraper API) and Python. You can get a **1-week free trial** by registering on the [dashboard](https://dashboard.oxylabs.io/).
  8 | 
  9 | For a detailed walkthrough with explanations and visuals, check our [blog post](https://oxylabs.io/blog/how-to-scrape-google-scholar).
 10 | Also, do not hesitate to check this [Best SERP APIs](https://medium.com/@oxylabs.io/the-10-best-serp-apis-in-2025-22bf7f91f8f0) list 
 11 | ## The complete code
 12 | 
 13 | ```python
 14 | import requests
 15 | from bs4 import BeautifulSoup
 16 | 
 17 | 
 18 | USERNAME = "USERNAME"
 19 | PASSWORD = "PASSWORD"
 20 | 
 21 | 
 22 | def get_html_for_page(url):
 23 |     payload = {
 24 |         "url": url,
 25 |         "source": "google",
 26 |     }
 27 |     response = requests.post(
 28 |         "https://realtime.oxylabs.io/v1/queries",
 29 |         auth=(USERNAME, PASSWORD),
 30 |         json=payload,
 31 |     )
 32 |     response.raise_for_status()
 33 |     return response.json()["results"][0]["content"]
 34 | 
 35 | 
 36 | def get_citations(article_id):
 37 |     url = f"https://scholar.google.com/scholar?q=info:{article_id}:scholar.google.com&output=cite"
 38 |     html = get_html_for_page(url)
 39 |     soup = BeautifulSoup(html, "html.parser")
 40 |     data = []
 41 |     for citation in soup.find_all("tr"):
 42 |         title = citation.find("th", {"class": "gs_cith"}).get_text(strip=True)
 43 |         content = citation.find("div", {"class": "gs_citr"}).get_text(strip=True)
 44 |         entry = {
 45 |             "title": title,
 46 |             "content": content,
 47 |         }
 48 |         data.append(entry)
 49 | 
 50 |     return data
 51 | 
 52 | 
 53 | def parse_data_from_article(article):
 54 |     title_elem = article.find("h3", {"class": "gs_rt"})
 55 |     title = title_elem.get_text()
 56 |     title_anchor_elem = article.select("a")[0]
 57 |     url = title_anchor_elem["href"]
 58 |     article_id = title_anchor_elem["id"]
 59 |     authors = article.find("div", {"class": "gs_a"}).get_text()
 60 |     return {
 61 |         "title": title,
 62 |         "authors": authors,
 63 |         "url": url,
 64 |         "citations": get_citations(article_id),
 65 |     }
 66 | 
 67 | 
 68 | def get_url_for_page(url, page_index):
 69 |     return url + f"&start={page_index}"
 70 | 
 71 | 
 72 | def get_data_from_page(url):
 73 |     html = get_html_for_page(url)
 74 |     soup = BeautifulSoup(html, "html.parser")
 75 |     articles = soup.find_all("div", {"class": "gs_ri"})
 76 |     return [parse_data_from_article(article) for article in articles]
 77 | 
 78 | 
 79 | data = []
 80 | url = "https://scholar.google.com/scholar?q=global+warming+&hl=en&as_sdt=0,5"
 81 | 
 82 | NUM_OF_PAGES = 1
 83 | page_index = 0
 84 | for _ in range(NUM_OF_PAGES):
 85 |     page_url = get_url_for_page(url, page_index)
 86 |     entries = get_data_from_page(page_url)
 87 |     data.extend(entries)
 88 |     page_index += 10
 89 | 
 90 | print(data)
 91 | ```
 92 | 
 93 | ## Final word
 94 | 
 95 | Check our [documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/google) for more API parameters and variables found in this tutorial.
 96 | 
 97 | If you have any questions, feel free to contact us at support@oxylabs.io.
 98 | 
 99 | Read More Google Scraping Related Repositories: [Google Sheets for Basic Web Scraping](https://github.com/oxylabs/web-scraping-google-sheets), [How to Scrape Google Shopping Results](https://github.com/oxylabs/scrape-google-shopping), [Google Play Scraper](https://github.com/oxylabs/google-play-scraper), [How To Scrape Google Jobs](https://github.com/oxylabs/how-to-scrape-google-jobs), [Google News Scrpaer](https://github.com/oxylabs/google-news-scraper), [How to Scrape Google Flights with Python](https://github.com/oxylabs/how-to-scrape-google-flights), [How To Scrape Google Images](https://github.com/oxylabs/how-to-scrape-google-images), [Scrape Google Search Results](https://github.com/oxylabs/scrape-google-python), [Scrape Google Trends](https://github.com/oxylabs/how-to-scrape-google-trends)
100 | 


--------------------------------------------------------------------------------