├── ReadMe.md └── GoogleMapScraper.js /ReadMe.md: -------------------------------------------------------------------------------- 1 | # Project title 2 | 3 | GoogleMap web scraping sample 4 | 5 | ## Project source 6 | 7 | ``` 8 | https://github.com/pioneeringdev/Node-Puppeteer-googleMap-scrapping 9 | ``` 10 | 11 | ## Description 12 | 13 | This is the sample of the Google Map scraping. 14 | It gets all the reviews and the start of location with Infinite scroll over. 15 | 16 | ### Frameworks 17 | 18 | Node.js, puppeteer 19 | developed for the testing project 20 | -------------------------------------------------------------------------------- /GoogleMapScraper.js: -------------------------------------------------------------------------------- 1 | const puppeteer = require("puppeteer"); 2 | (() => { 3 | const REVIEW_BUTTON_CLASS = "button.section-reviewchart-numreviews"; 4 | const LEFT_SIDE_BAR = ".widget-pane-content scrollable-y"; 5 | const REVIEW_TITLE = ".section-review-title"; 6 | const REVIEW_SUBTITLE = ".section-review-subtitle"; 7 | const SITE_LINK = 8 | "https://www.google.com/maps/place/T.C.R.+Rooter+and+Plumbing+Repair/@35.2032875,-111.6229485,17z/data=!3m1!4b1!4m5!3m4!1s0x872d86303b8fb69d:0x10a7f62740d8aa7e!8m2!3d35.2032875!4d-111.6207598"; 9 | 10 | const extractTotalReviewCount = () => { 11 | const reviewButton = document.querySelector( 12 | "div.section-reviewchart-numreviews" 13 | ); 14 | const countReviews = +reviewButton.firstChild.nodeValue.split(" ")[0]; 15 | return countReviews; 16 | }; 17 | 18 | const extractReviewCount = () => { 19 | return document.querySelectorAll(".section-review-content").length; 20 | }; 21 | 22 | const getPrevioutHeight = () => { 23 | return document.querySelector( 24 | ".section-listbox.section-scrollbox.scrollable-y.scrollable-show" 25 | ).scrollHeight; 26 | }; 27 | 28 | const scrapInfiniteScrollItems = async (page, scrollDelay = 100) => { 29 | let totalReviewCount = await page.evaluate(extractTotalReviewCount); 30 | let _reviewCount = await page.evaluate(extractReviewCount); 31 | 32 | while (totalReviewCount > _reviewCount) { 33 | const previoustHeight = await page.evaluate(getPrevioutHeight); 34 | await page.evaluate(() => { 35 | let leftSideBar = document.querySelector( 36 | ".section-listbox.section-scrollbox.scrollable-y.scrollable-show" 37 | ); 38 | leftSideBar.scrollTo(0, leftSideBar.scrollHeight); 39 | }); 40 | await page.waitForFunction( 41 | `document.querySelector('.section-listbox.section-scrollbox.scrollable-y.scrollable-show').scrollHeight>${previoustHeight}` 42 | ); 43 | await page.waitFor(scrollDelay); 44 | _reviewCount = await page.evaluate(extractReviewCount); 45 | } 46 | }; 47 | 48 | (async () => { 49 | const browser = await puppeteer.launch(); 50 | const page = await browser.newPage(); 51 | await page.goto(SITE_LINK); 52 | await page.waitForSelector(REVIEW_BUTTON_CLASS); 53 | await page.click(REVIEW_BUTTON_CLASS); 54 | await page.waitForSelector(REVIEW_TITLE); 55 | await scrapInfiniteScrollItems(page, 100); 56 | const titles = await page.evaluate(() => { 57 | const reviews = document.querySelectorAll(".section-review-content"); 58 | let titles = []; 59 | reviews.forEach(review => { 60 | titles.push({ 61 | title: review.querySelector(".section-review-title").textContent, 62 | subtitle: review.querySelector(".section-review-subtitle") 63 | .textContent, 64 | review: review.querySelector(".section-review-text").textContent 65 | }); 66 | }); 67 | return titles; 68 | }); 69 | console.log(titles); 70 | await browser.close(); 71 | })(); 72 | })(); 73 | --------------------------------------------------------------------------------