├── .gitattributes ├── ecommerce_scraper.py └── tesla_pricing_excel.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /ecommerce_scraper.py: -------------------------------------------------------------------------------- 1 | import webql 2 | from webql.web import ChromePlaywrightWebDriver as PlaywrightWebDriver 3 | from dotenv import load_dotenv 4 | import csv 5 | import time 6 | 7 | load_dotenv() 8 | 9 | url = "catch.com.au" 10 | product = "coffee machine" 11 | file_name = "data/catch_products.csv" 12 | 13 | 14 | # Set up the Playwright web driver and start a new webQL session 15 | driver = PlaywrightWebDriver(headless=False) 16 | user_session_extras = {"user_data_dir": "tmp/playwright_chrome_user_data"} 17 | session = webql.start_session(url, user_session_extras, web_driver=driver) 18 | 19 | 20 | # Helper function to save JSON data as CSV 21 | def _save_json_as_csv(json_data, file_name): 22 | csv_header = [ 23 | "product_name", 24 | "product_price", 25 | "product_rating", 26 | "number_of_reviews", 27 | ] 28 | csv_data = [csv_header] 29 | products = json_data.get("results", {}).get("products", []) 30 | for product in products: 31 | print(product) 32 | product_name = product.get("product_name", "") 33 | product_price = product.get("product_price", "") 34 | product_rating = product.get("product_rating", "") 35 | number_of_reviews = product.get("number_of_reviews", "") 36 | csv_data.append( 37 | [product_name, product_price, product_rating, number_of_reviews] 38 | ) 39 | 40 | with open(file_name, "w", newline="") as file: 41 | writer = csv.writer(file) 42 | writer.writerows(csv_data) 43 | 44 | 45 | HOME_QUERY = """ 46 | { 47 | search_box 48 | search_btn 49 | } 50 | """ 51 | 52 | SEARCH_QUERY = """ 53 | { 54 | results { 55 | products[] { 56 | product_name 57 | product_price 58 | product_rating 59 | number_of_reviews 60 | } 61 | } 62 | } 63 | """ 64 | 65 | # Run query on home page & go to search result 66 | home_page = session.query(HOME_QUERY) 67 | home_page.search_box.fill(product) 68 | home_page.search_btn.click(force=True) 69 | 70 | time.sleep(2) 71 | 72 | search_results = session.query(SEARCH_QUERY) 73 | print(search_results) 74 | 75 | _save_json_as_csv(search_results.to_data(), file_name) 76 | 77 | session.stop() 78 | -------------------------------------------------------------------------------- /tesla_pricing_excel.py: -------------------------------------------------------------------------------- 1 | # pylint: skip-file 2 | 3 | import csv 4 | import time 5 | 6 | import webql 7 | from webql.web import ChromePlaywrightWebDriver as PlaywrightWebDriver 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | CSV_FILE = "tmp/tesla_pricing.csv" 13 | CHAT_URL = "https://app.slack.com/client/T06J6D3BYDR/C06HZT327JS" 14 | 15 | COUNTRIES = { 16 | "Canada": "https://www.tesla.com/en_CA/models/design#overview", 17 | "France": "https://www.tesla.com/fr_FR/models/design#overview", 18 | "Korea": "https://www.tesla.com/ko_KR/models/design#overview", 19 | "UAE": "https://www.tesla.com/ar_AE/models/design#overview", 20 | } 21 | 22 | MODEL_S_QUERY = """ 23 | { 24 | model_s { 25 | delivery 26 | model_s_price 27 | mode_s_plaid_price 28 | } 29 | } 30 | """ 31 | 32 | driver = PlaywrightWebDriver(headless=False) 33 | user_session_extras = { 34 | "user_data_dir": "/Users/jasonzhou/Library/Application Support/Google/Chrome/agent_profile" 35 | } 36 | session = webql.start_session("", user_session_extras, web_driver=driver) 37 | 38 | 39 | def _save_json_as_csv(json_data, file_name): 40 | csv_header = ["Country", "Delivery Date", "Model S Price", "Mode S Plaid Price"] 41 | csv_data = [csv_header] 42 | for country, data in json_data.items(): 43 | if not data: 44 | continue 45 | model_s_data = data.get("model_s", {}) 46 | delivery_date = model_s_data.get("delivery", "") 47 | model_s_price = model_s_data.get("model_s_price", "") 48 | mode_s_plaid_price = model_s_data.get("mode_s_plaid_price", "") 49 | csv_data.append([country, delivery_date, model_s_price, mode_s_plaid_price]) 50 | 51 | with open(file_name, "w", newline="") as file: 52 | writer = csv.writer(file) 53 | writer.writerows(csv_data) 54 | 55 | 56 | def _highlight(response): 57 | try: 58 | driver.prepare_highlight() 59 | driver.highlight(response.model_s.delivery) 60 | driver.highlight(response.model_s.model_s_price) 61 | driver.highlight(response.model_s.mode_s_plaid_price) 62 | time.sleep(0.5) 63 | except Exception as e: 64 | print(f"Error highlighting: {e}") 65 | 66 | 67 | def extract_region_data(country: str, region_url: str): 68 | """Extract region data for a given country as json""" 69 | print(f"Extracting region data for {country}") 70 | try: 71 | driver.open_url(region_url) 72 | response = session.query(MODEL_S_QUERY, lazy_load_pages_count=0) 73 | _highlight(response) 74 | data = response.to_data() 75 | print(f"Tesla pricing data for {country}: {data}") 76 | return data 77 | except Exception as _: 78 | return None 79 | 80 | 81 | def extract_tesla_pricing_data(): 82 | """Extract Tesla pricing data and save as csv""" 83 | tesla_data = {} 84 | for country, url in COUNTRIES.items(): 85 | tesla_data[country] = extract_region_data(country, url) 86 | 87 | _save_json_as_csv(tesla_data, CSV_FILE) 88 | 89 | 90 | def upload_to_google_sheet(): 91 | """Upload Tesla pricing data to Google Sheet""" 92 | try: 93 | with open(CSV_FILE) as file: 94 | csv_data = file.readlines() 95 | driver.open_url("https://docs.google.com/spreadsheets/u/0/create") 96 | time.sleep(1) 97 | driver.paste_via_clipboard("".join(csv_data)) 98 | time.sleep(1) 99 | session.query("{paste_format_btn}").paste_format_btn.click() 100 | time.sleep(1) 101 | session.query( 102 | "{split_text_to_columns_menuitem}" 103 | ).split_text_to_columns_menuitem.click() 104 | time.sleep(1) 105 | session.query("{rename_input}").rename_input.fill("Tesla Pricing") 106 | driver.press_key("Enter") 107 | time.sleep(2) 108 | 109 | return driver.get_current_url() 110 | except Exception as e: 111 | print(f"Error: {e}") 112 | 113 | 114 | def share_on_chat(url, message): 115 | """Share the url on chat""" 116 | driver.open_url(CHAT_URL) 117 | time.sleep(1) 118 | session.query("{message_textbox}").message_textbox.fill(message) 119 | driver.paste_via_clipboard(url) 120 | time.sleep(1) 121 | driver.press_key("Enter") 122 | try: 123 | session.query("{attach_button}").attach_button.click() 124 | attach_file = session.query( 125 | "{upload_from_your_computer}" 126 | ).upload_from_your_computer 127 | driver.upload_file(attach_file, CSV_FILE) 128 | time.sleep(5) 129 | driver.press_key("Enter") 130 | except Exception as e: 131 | print(f"Upload file error: {e}") 132 | 133 | 134 | if __name__ == "__main__": 135 | extract_tesla_pricing_data() 136 | google_sheet_url = upload_to_google_sheet() 137 | share_on_chat(google_sheet_url, "Here is the Tesla pricing data: ") 138 | time.sleep(100) 139 | session.stop() 140 | --------------------------------------------------------------------------------