├── .gitignore ├── README.md ├── file ├── Caesars Hotels.csv ├── Cafe Rio.csv ├── California Pizza Kitchen.csv ├── Duffy's Sports Grill.csv └── Edwin Watts Golf.csv ├── company_names100-200.csv └── prove.py /.gitignore: -------------------------------------------------------------------------------- 1 | /venv -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is Google Map scraping project 2 | -------------------------------------------------------------------------------- /file/Caesars Hotels.csv: -------------------------------------------------------------------------------- 1 | company_name,street,city,state,zip_code 2 | -------------------------------------------------------------------------------- /file/Cafe Rio.csv: -------------------------------------------------------------------------------- 1 | company_name,street,city,state,zip_code 2 | Cafe Rio,821 W State Rd, American Fork,UT,84003 3 | -------------------------------------------------------------------------------- /file/California Pizza Kitchen.csv: -------------------------------------------------------------------------------- 1 | company_name,street,city,state,zip_code 2 | California Pizza Kitchen,1001 Broxton Ave, Los Angeles,CA,90024 3 | California Pizza Kitchen,6801 Hollywood Blvd, Hollywood,CA,90028 4 | California Pizza Kitchen, 53 3rd St, San Francisco,CA,94103 5 | California Pizza Kitchen, 7th St Space 305, Los Angeles,CA,90017 6 | California Pizza Kitchen,1 Garden State Plaza Blvd, Paramus,NJ,07652 7 | California Pizza Kitchen,800 Boylston St, Boston,MA,02199 8 | California Pizza Kitchen,73080 El Paseo, Palm Desert,CA,92260 9 | -------------------------------------------------------------------------------- /file/Duffy's Sports Grill.csv: -------------------------------------------------------------------------------- 1 | company_name,street,city,state,zip_code 2 | Duffy's Sports Grill,11588 US-1, North Palm Beach,FL,33408 3 | Duffy's Sports Grill,3969 NE 163rd St, North Miami Beach,FL,33160 4 | Duffy's Sports Grill,8575 SW 124th Ave., Miami,FL,33183 5 | Duffy's Sports Grill,1804 Cordova Rd, Fort Lauderdale,FL,33316 6 | Duffy's Sports Grill,1750 S Federal Hwy, Delray Beach,FL,33483 7 | Duffy's Sports Grill,3001 SW Port St Lucie Blvd, Port St. Lucie,FL,34953 8 | Duffy's Sports Grill,20465 FL-7, Boca Raton,FL,33498 9 | Duffy's Sports Grill,3005 University Pkwy, Sarasota,FL,34243 10 | Duffy's Sports Grill,811 S University Dr, Plantation,FL,33324 11 | Duffy's Sports Grill,6545 Boynton Beach Blvd, Boynton Beach,FL,33437 12 | Duffy's Sports Grill,21212 St Andrews Blvd, Boca Raton,FL,33433 13 | -------------------------------------------------------------------------------- /company_names100-200.csv: -------------------------------------------------------------------------------- 1 | CompanyName 2 | Duffy's Sports Grill 3 | California Pizza Kitchen 4 | California Southern University 5 | California's Great America 6 | Camelback Mountain Resort 7 | Camping World 8 | Campmor 9 | Car Spa 10 | Carhartt 11 | Caribe Royale Orlando 12 | Cariloha 13 | Carl's Jr. 14 | Carnival Cruise Lines 15 | Carowinds 16 | Carrabbas 17 | Cash N Out Liquidators 18 | Casper 19 | Castle Resorts 20 | Ceasars Atlantic City 21 | Ceasars Palace Las Vegas 22 | Ceasars Southern Indiana 23 | Cedar Pointe 24 | Cedar Pointe Shores 25 | Champion 26 | Champs Sports 27 | Charles & Colvard 28 | Chase Bank 29 | Chase Bank 30 | Chevrolet 31 | Chevy Fresh Mex 32 | Chick-Fil-A 33 | Chili's 34 | Chipotle 35 | Choice Hotels 36 | Christopher & Banks 37 | Chrysler 38 | Chubbies Shorts 39 | Chuck E. Cheese�s 40 | Cici�s Pizza 41 | Cigars International 42 | Cinemark 43 | Cinnabon 44 | "Circus Circus Hotel, Casino, & Theme Park" 45 | Circus Circus Reno at the Row 46 | Citizen Watch 47 | Claire's 48 | Clarks 49 | Clean Origin 50 | Cleveland Browns Shop 51 | Cold Stone Creamery 52 | Cole Haan 53 | Coleman Furniture 54 | Columbia 55 | Converse 56 | Costco 57 | Cracker Barrel 58 | Crocs 59 | Cromwell Hotel and Casino 60 | Crooked Pint Ale House 61 | Crumbl Cookies 62 | -------------------------------------------------------------------------------- /file/Edwin Watts Golf.csv: -------------------------------------------------------------------------------- 1 | company_name,street,city,state,zip_code 2 | Edwin Watts Golf,32257 US Hwy 19 N, Palm Harbor,FL,34684 3 | Edwin Watts Golf,8484 NW 36th St Ste. 200, Doral,FL,33166 4 | Edwin Watts Golf,15999 Biscayne Blvd, North Miami Beach,FL,33160 5 | Edwin Watts Golf,189 State Farm Pkwy, Birmingham,AL,35209 6 | Edwin Watts Golf,2436 Veterans Memorial Blvd, Kenner,LA,70062 7 | Edwin Watts Golf,7501 Turkey Lake Rd, Orlando,FL,32819 8 | Edwin Watts Golf,3503 Wedgewood Ln, The Villages,FL,32162 9 | Edwin Watts Golf,20655 Lyons Rd, Boca Raton,FL,33434 10 | Edwin Watts Golf,5395 Westheimer Rd, Houston,TX,77056 11 | Edwin Watts Golf,11120 S Cleveland Ave, Fort Myers,FL,33907 12 | Edwin Watts Golf,2515 McGavock Pk, Nashville,TN,37214 13 | Edwin Watts Golf,5805 Camp Bowie Blvd, Fort Worth,TX,76107 14 | Edwin Watts Golf,435 S Federal Hwy, Pompano Beach,FL,33062 15 | Edwin Watts Golf,36074 Emerald Coast Pkwy, Destin,FL,32541 16 | Edwin Watts Golf,7203 Kingery Hwy, Willowbrook,IL,60527 17 | Edwin Watts Golf,4082 3rd St S, Jacksonville Beach,FL,32250 18 | Edwin Watts Golf,6824 W 119th St, Overland Park,KS,66209 19 | Edwin Watts Golf,3150 Piedmont Rd NE, Atlanta,GA,30305 20 | Edwin Watts Golf,5895 Poplar Ave, Memphis,TN,38119 21 | Edwin Watts Golf,9365 Philips Hwy Ste. 110, Jacksonville,FL,32256 22 | Edwin Watts Golf,1050 NE Interstate 410 Loop, San Antonio,TX,78209 23 | Edwin Watts Golf,7700 Abercorn St, Savannah,GA,31406 24 | Edwin Watts Golf,3024 Richmond Rd, Lexington,KY,40509 25 | Edwin Watts Golf,2423 Capital Cir NE, Tallahassee,FL,32308 26 | Edwin Watts Golf,1 Buckingham Plantation Dr, Bluffton,SC,29910 27 | Edwin Watts Golf,4526 S Padre Island Dr, Corpus Christi,TX,78411 28 | Edwin Watts Golf,5717 N Davis Hwy, Pensacola,FL,32503 29 | Edwin Watts Golf,946 Orleans Rd, Charleston,SC,29407 30 | Edwin Watts Golf,266 Bobby Jones Expy #21, Martinez,GA,30907 31 | Edwin Watts Golf,2602 S Shackleford Rd G, Little Rock,AR,72205 32 | Edwin Watts Golf,11364 Parkside Dr, Knoxville,TN,37934 33 | Edwin Watts Golf,168 West Interstate 65 Service Rd S, Mobile,AL,36608 34 | Edwin Watts Golf,820 E County Line Rd, Ridgeland,MS,39157 35 | Edwin Watts Golf,410 W SW Loop 323, Tyler,TX,75701 36 | Edwin Watts Golf,5235 S Sherwood Forest Blvd, Baton Rouge,LA,70816 37 | Edwin Watts Golf,1820 E 70th St Ste. 300, Shreveport,LA,71105 38 | Edwin Watts Golf,230 Retreat Village, St Simons Island,GA,31522 39 | Edwin Watts Golf,7100 Fairway Dr #62, Palm Beach Gardens,FL,33418 40 | Edwin Watts Golf,1421 Village Way, Santa Ana,CA,92705 41 | Edwin Watts Golf,7628 Memorial Pkwy SW, Huntsville,AL,35802 42 | -------------------------------------------------------------------------------- /prove.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from bs4 import BeautifulSoup as BS 3 | from selenium.webdriver.common.by import By 4 | from selenium.webdriver.chrome.service import Service 5 | from webdriver_manager.chrome import ChromeDriverManager 6 | from selenium.webdriver.common.keys import Keys 7 | from time import sleep 8 | from selenium.webdriver.support.ui import WebDriverWait 9 | from selenium.webdriver.support import expected_conditions as EC 10 | 11 | import csv 12 | import re 13 | 14 | 15 | arg1 = "--profile-directory=Person1" 16 | arg2 = "user-data-dir=C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\User Data\\Person1" 17 | chrome_options = webdriver.ChromeOptions() 18 | chrome_options.add_argument(arg1) 19 | chrome_options.add_argument(arg2) 20 | chrome_options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" 21 | driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options) 22 | 23 | 24 | def write_info(data): 25 | try: 26 | with open("file/" + data[0] + '.csv', mode = 'a', newline='', encoding='utf-8') as file: 27 | writer = csv.writer(file) 28 | writer.writerow(data) 29 | return True 30 | except: 31 | print("write error") 32 | return False 33 | 34 | # WRITE header into each business file 35 | def write_header(companyname): 36 | header = [ 'company_name', 'street', 'city', 'state', 'zip_code' ] 37 | try: 38 | with open("file/" + companyname + ".csv", 'w', newline='', encoding='utf-8') as file: 39 | writer = csv.writer(file) 40 | writer.writerow(header) 41 | return True 42 | except: 43 | print('write error') 44 | return False 45 | 46 | def Seperate_address(address): 47 | list = address.split(",") 48 | if len(list) == 2: 49 | street = "" 50 | city = "" 51 | str = list[0] 52 | elif len(list) == 5: 53 | street = list[1] 54 | city = list[2] 55 | str = list[3].replace(" ","") 56 | elif len(list) == 3: 57 | street = '' 58 | city = list[0] 59 | str = list[1].replace(" ", "") 60 | else: 61 | street = list[0] 62 | city = list[1] 63 | str = list[2].replace(" ", "") 64 | if len(list) == 2: 65 | state = str 66 | zipcode = "" 67 | else: 68 | divi = re.match(r"([a-zA-Z]+)(\d+(?:-\d+)?)", str) 69 | if divi: 70 | state = divi.group(1) 71 | zipcode = divi.group(2) 72 | else: print("error occured") 73 | 74 | res = [ street, city, state, zipcode ] 75 | return res 76 | 77 | def onedata(Companyname): 78 | print('Onedata') 79 | address = driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[9]/div[3]/button/div/div[2]/div[1]').text 80 | data = Seperate_address(address) 81 | data.insert(0, Companyname) 82 | print(data) 83 | write_info(data) 84 | 85 | return data 86 | 87 | def moredata(Companyname): 88 | print("one more") 89 | while(True): 90 | try: 91 | final = driver.find_element(By.CLASS_NAME, "HlvSq").text 92 | if final: break 93 | except: 94 | driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]").send_keys(Keys.DOWN) 95 | sleep(2) 96 | 97 | elements = driver.find_elements(By.CLASS_NAME, "hfpxzc") 98 | print(len(elements)) 99 | try: 100 | for i in range(len(elements)): 101 | print(i) 102 | try: 103 | WebDriverWait(driver, 7).until( 104 | EC.element_to_be_clickable((By.CLASS_NAME, "hfpxzc")) 105 | ) 106 | print("Element is clickable!") 107 | elements[i].click() 108 | except Exception as e: 109 | print("Exception:", e) 110 | sleep(4) 111 | address = driver.find_element(By.CLASS_NAME, 'kR99db').text 112 | data = Seperate_address(address) 113 | data.insert(0, Companyname) 114 | print(data) 115 | sleep(2) 116 | write_info(data) 117 | except: print("error") 118 | 119 | def start(): 120 | driver.get("https://map.google.com") 121 | driver.maximize_window() 122 | 123 | try: 124 | WebDriverWait(driver, 7).until( 125 | EC.element_to_be_clickable((By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[9]/div/div/div[1]/div[1]/ul/li[1]/button")) 126 | ) 127 | driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[9]/div/div/div[1]/div[1]/ul/li[1]/button").click() 128 | except Exception as e: 129 | print("Exception:", e) 130 | sleep(3 131 | ) 132 | try: 133 | WebDriverWait(driver, 7).until( 134 | EC.element_to_be_clickable((By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[25]/div/div[2]/ul/div[7]/li[1]/button")) 135 | ) 136 | driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[25]/div/div[2]/ul/div[7]/li[1]/button").click() 137 | except Exception as e: 138 | print("Exception:", e) 139 | sleep(3) 140 | try: 141 | WebDriverWait(driver, 7).until( 142 | EC.element_to_be_clickable((By.XPATH, "/html/body/div[1]/div[3]/div[1]/div/div[2]/div/div[3]/div/div/div/div[2]/div[1]/div[11]/a")) 143 | ) 144 | driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[1]/div/div[2]/div/div[3]/div/div/div/div[2]/div[1]/div[11]/a").click() 145 | except Exception as e: 146 | driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[1]/div/div[2]/div/div[2]/button").click() 147 | sleep(3) 148 | 149 | 150 | with open('company_names100-200.csv', 'r') as file: 151 | csv_reader = csv.DictReader(file) 152 | 153 | for row in csv_reader: 154 | print(row['CompanyName']) 155 | if(row['CompanyName']): 156 | driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[3]/div[1]/div[1]/div/div[2]/form/input").clear() 157 | 158 | res = write_header(row['CompanyName']) 159 | if not res: 160 | continue 161 | driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[3]/div[1]/div[1]/div/div[2]/form/input").send_keys(row['CompanyName'] + " in United States") 162 | sleep(1) 163 | driver.find_element(By.XPATH, "/html/body/div[1]/div[3]/div[8]/div[3]/div[1]/div[1]/div/div[2]/div[1]").click() 164 | 165 | sleep(3) 166 | elecss = WebDriverWait(driver, 7).until( 167 | EC.presence_of_element_located((By.CLASS_NAME, "T7HQDc")) 168 | ) 169 | driver.execute_script("arguments[0].style.display ='none';" ,elecss) 170 | try: 171 | soup = BS(driver.page_source, 'html.parser') 172 | title = soup.find('h1', {'class': 'DUwDvf lfPIob'}) 173 | if title: 174 | data = onedata(row['CompanyName']) 175 | else: 176 | data = moredata(row['CompanyName']) 177 | except: 178 | flag = False 179 | print("find one error") 180 | 181 | 182 | 183 | start() --------------------------------------------------------------------------------