├── .DS_Store
├── AmazonScraper.py
├── LICENSE
├── Main.py
├── README.md
├── chromedriver
└── chromedriver.exe


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mxmnci/AmazonCommentScraper/f97fcd51cad4c0143e7936c54804a1ced79c8185/.DS_Store


--------------------------------------------------------------------------------
/AmazonScraper.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup
  2 | from selenium import webdriver
  3 | from datetime import datetime  
  4 | from datetime import timedelta 
  5 | from random import randint
  6 | from time import sleep
  7 | from tqdm import tqdm
  8 | import sys
  9 | import csv
 10 | import os
 11 | 
 12 | def descWordCount(str1):
 13 |     total = 1
 14 |     for i in range(len(str1)):
 15 |         if(str1[i] == ' ' or str1 == '\n' or str1 == '\t'):
 16 |             total = total + 1
 17 |     return total
 18 | 
 19 | def containsWord(description, words):
 20 |     containsWord = False
 21 |     for word in words:
 22 |         if word in description.lower():
 23 |             containsWord = True
 24 |     return containsWord
 25 | 
 26 | def scrape(AMAZONURL, PRODUCTHANDLE, RATINGLIMIT, COMMENTLIMIT, EXCLUDE):
 27 |     print(EXCLUDE)
 28 |     try:
 29 |         os.remove("comments.csv")
 30 |     except OSError:
 31 |         pass
 32 |     if getattr(sys, 'frozen', False):
 33 |         if (sys.platform == "darwin"):
 34 |             chromedriver_path = os.path.join(sys._MEIPASS, 'chromedriver')
 35 |         elif (sys.platform == "win32"):
 36 |             chromedriver_path = os.path.join(sys._MEIPASS, 'chromedriver.exe')
 37 |         else:
 38 |             print("Chrome Driver not found...")
 39 |     else:
 40 |         if (sys.platform == "darwin"):
 41 |             chromedriver_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"chromedriver")
 42 |         elif (sys.platform == "win32"):
 43 |             chromedriver_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),"chromedriver.exe")
 44 |         else:
 45 |             print("Chrome Driver not found...")
 46 |     browser = webdriver.Chrome(chromedriver_path)
 47 |     url = AMAZONURL
 48 |     browser.get(url)
 49 |     print("Please be patient while the comments are gathered. \nDO NOT close the browser window.")
 50 |     try:
 51 |         currentButton = browser.find_element_by_xpath("//*[@id='reviews-medley-footer']/div[2]/a")
 52 |         currentButton.click()
 53 |     except:
 54 |         print("There are no comments to extract, terminating process...")
 55 |         sys.exit()
 56 |     html = browser.page_source
 57 |     soup = BeautifulSoup(html, 'html.parser')
 58 |     REVIEWCOUNT = int(soup.find("span", {"class" : "totalReviewCount"}).get_text().replace(",",""))
 59 |     loop = tqdm(total=REVIEWCOUNT, position=0, leave=False)
 60 |     TOTALCOMMENTCOUNT = 0
 61 |     COMMENTCOUNT = 10
 62 |     sleep(1)
 63 |     with open('DONOTUSE.csv', 'w', encoding='utf-8') as csvfile:
 64 |         filewriter = csv.writer(csvfile, delimiter=',',
 65 |                             quotechar='|', quoting=csv.QUOTE_MINIMAL, lineterminator='\n')
 66 |         filewriter.writerow(['product_handle', 'state', 'rating', 'title','author', 'email', 'body', 'created_at'])
 67 |         productHandle = PRODUCTHANDLE
 68 |         while COMMENTCOUNT == 10 and (TOTALCOMMENTCOUNT < COMMENTLIMIT or COMMENTLIMIT == 0):
 69 |             loop.set_description("Status: ")
 70 |             loop.update(COMMENTCOUNT)
 71 |             COMMENTCOUNT = 0
 72 |             html = browser.page_source
 73 |             soup = BeautifulSoup(html, 'html.parser')
 74 |             for i in soup.find_all("div", {"class" : "a-section celwidget"}):
 75 |                 rating = int(i.find("span", {"class" : "a-icon-alt"}).get_text()[0])
 76 |                 profileName = i.find("span", {"class" : "a-profile-name"}).get_text()
 77 |                 description = i.find("span", {"class" : "review-text-content"}).get_text()[:-1]
 78 |                 title = i.find("a", {"class" : "review-title-content"}).contents[0].get_text()
 79 |                 fakeTime = str(datetime.now().replace(microsecond=0) - timedelta(days=randint(0,100), minutes=randint(0,60), seconds=randint(0,60))) + " -0700"
 80 | 
 81 |                 if (rating >= RATINGLIMIT and profileName != "Amazon Customer" and "\"" not in description and not containsWord(description, EXCLUDE) and not containsWord(title, EXCLUDE)):
 82 |                     filewriter.writerow([productHandle, 'published', "\"" + str(rating) + "\"", "\"" + title + "\"", "\"" + profileName + "\"", 'imported@review.com', "\"" + description + "\"", fakeTime])
 83 | 
 84 |                 COMMENTCOUNT += 1
 85 |                 TOTALCOMMENTCOUNT += 1
 86 |             try:
 87 |                 currentButton = browser.find_element_by_xpath("//*[@id='cm_cr-pagination_bar']/ul/li[2]")
 88 |                 currentButton.click()
 89 |             except: 
 90 |                 print("There are no comments to extract, terminating process...")
 91 |                 sys.exit()
 92 |             sleep(1)
 93 | 
 94 |     loop.close()
 95 | 
 96 |     text = open("DONOTUSE.csv", "r", encoding='utf-8')
 97 |     text = ''.join([i for i in text]).replace("|", "")
 98 |     out = open("comments.csv","w", encoding='utf-8')
 99 |     out.writelines(text)
100 |     out.close()
101 |     os.remove("DONOTUSE.csv")
102 |     s = open("comments.csv", mode='r', encoding='utf-8-sig').read() #CONVERT UTF-8 TO NO BOM - Necessary for shopify import
103 |     open("comments.csv", mode='w', encoding='utf-8').write(s)
104 | 
105 |     with open("comments.csv", 'rb') as open_file: # CONVERT LINE ENDINGS TO UNIX - Necessary for shopify import
106 |         content = open_file.read()
107 |     WINDOWS_LINE_ENDING = b'\r\n'
108 |     UNIX_LINE_ENDING = b'\n'
109 |     content = content.replace(WINDOWS_LINE_ENDING, UNIX_LINE_ENDING)
110 |     with open("comments.csv", 'wb') as open_file:
111 |         open_file.write(content)
112 | 
113 |     print("Comments have been gathered successfully, your file is located at: " + os.path.abspath("comments.csv"))
114 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 maxmonciardini
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Main.py:
--------------------------------------------------------------------------------
1 | import AmazonScraper
2 | 
3 | AmazonScraper.scrape(input("Please input your Amazon Product URL: "), 
4 | input("Please input the EXACT Shopify product name: "), 
5 | int(input("Please input the minimum rating you would like to gather 1-5: ")), 
6 | int(input("Please input the amount of comments you would like to search (input 0 to search all): ")), 
7 | input("Please input the words you would like to exclude separated by commas (If there are none then leave this blank): \n").split(','))
8 | 
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AmazonCommentScraper - Mac OS & Windows!
 2 | A python based app that uses the selenium automation framework to convert comments from any Amazon product page into a CSV file (compatible with Shopify!).
 3 | 
 4 | ## Instructions:
 5 | 1. To install necessary dependencies, run this command:
 6 | ```pip3 install beautifulsoup4, selenium, tqdm```
 7 | 2. Open up your Command Prompt or Terminal inside the "AmazonCommentScraper" folder and run this command:</br>
 8 | ```python3 Main.py```<br><br>
 9 | 3. Go to Amazon and locate your desired product and copy the product link. Example:</br>
10 | ```https://www.amazon.com/Scotch-Brand-Applications-Engineered-810K6/dp/B00006IF67/ref=cm_cr_arp_d_product_top?ie=UTF8```<br><br>
11 | 4. (Optional) If you are using Shopify make sure this name matches EXACTLY what your product name is at the end of the product link! Example:```https://modernbeyond.com/products/magic-scrubber-electric-brush```<br>
12 | Use only the <b>```magic-scrubber-electric-brush```</b> part of your product link!<br><br>
13 | 5. Choose the minimum rating that you would like to gather. Example: choosing 3 stars collects reviews that are 3 stars and above.<br><br>
14 | 6. Choose the amount of comments that you would like the program to search for before terminating. Input 0 to search through all of them. <br><br>
15 | 7. Input any words that you would like to exclude from the comments. All reviews containing these words will be ignored.
16 | 
17 | ### That's it, Enjoy :)
18 | 


--------------------------------------------------------------------------------
/chromedriver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mxmnci/AmazonCommentScraper/f97fcd51cad4c0143e7936c54804a1ced79c8185/chromedriver


--------------------------------------------------------------------------------
/chromedriver.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mxmnci/AmazonCommentScraper/f97fcd51cad4c0143e7936c54804a1ced79c8185/chromedriver.exe


--------------------------------------------------------------------------------