├── scrapes └── .gitkeep ├── .gitignore ├── requirements.txt ├── .idea ├── .gitignore ├── vcs.xml ├── misc.xml ├── inspectionProfiles │ ├── profiles_settings.xml │ └── Project_Default.xml ├── carfax_webscraper.iml ├── csv-plugin.xml └── modules.xml ├── LICENSE ├── README.md └── main.py /scrapes/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | scrapes/* 2 | !scrapes/.gitkeep 3 | cars_list -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==1.0.2 2 | requests==2.22.0 3 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/carfax_webscraper.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/csv-plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 15 | 16 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, Graham 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Disclaimer 2 | 3 | * I do not promote, encourage, support or excite any illegal activity or hacking without written permission in general. The repo and author of the repo is no way responsible for any misuse of the information. 4 | 5 | * "carfax-scraper" is just a terms that represents the name of the repo and is not a repo that provides any illegal information. 6 | 7 | * The Software's and Scripts provided by the repo should only be used for **_EDUCATIONAL PURPOSES ONLY_**. The repo or the author can not be held responsible for the misuse of them by the users. 8 | 9 | * I am not responsible for any direct or indirect damage caused due to the usage of the code provided on this site. All the information provided on this repo are for educational purposes only. 10 | 11 | 12 | 13 | ## Usage Instructions 14 | 15 | (Google Chrome only; only tested on Mac OS) 16 | 17 | 18 | 19 | 1. Copy repo: 20 | 21 | `git clone https://github.com/grsahagian/carfax-scraper` 22 | 23 | 24 | 2. Install dependencies (requirements.txt) 25 | 26 | #### Get Authorization code 27 | 3. Navigate to https://www.carfax.com/cars-for-sale 28 | 3. Search for any car model, make within any valid zip code then click "Show me results. 29 | 4. Right click anywhere on the page and select "Inspect" 30 | 5. Click "Network" on the top of the new window then click "Search" again 31 | 6. On the left side under "Name" click on the row labelled "findVehicles?tpQualityThreshold=150..." 32 | 7. Scroll down on the header tab and look for 'authorization' (under "Request Headers") 33 | 8. Copy the entire value for `authorization:` after the colon and paste it into 'main.py' as `AUTH = ` 34 | 9. Set the parameters (under `#PARAMS`) values according to preference (make, model, and zip) 35 | 8. In the command line navigate to the project folder and run `python main.py` 36 | 37 | 38 | Special thanks to Michael (https://github.com/Michael001154) for help developing the project 39 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import requests 3 | from datetime import date 4 | import time 5 | import random 6 | 7 | # PARAMS - set according to preference 8 | CAR_MAKE = 'Honda' # brand 9 | CAR_MODEL = 'Civic' # specific model 10 | ZIP = '10001' # input as string 11 | 12 | # FOLLOW README TO GET YOUR OWN AUTHORIZATION TOKEN AND SET TO VARIABLE BELOW 13 | AUTH = 'YOUR AUTH TOKEN HERE' 14 | 15 | def request_carfax(zip_code, car_make, car_model): 16 | scraped_results = [] 17 | 18 | pages_headers = { # initial request to find # pages for current search 19 | 'authority': 'www.carfax.com', 20 | 'sec-ch-ua': '"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"', 21 | 'authorization': AUTH, 22 | 'accept': 'application/json', 23 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36', 24 | } 25 | 26 | pages_url = f"https://www.carfax.com/api/v2/consumers/auth0%7Coasc%7C454836195/findVehicles?tpQualityThreshold=150&tpPositions=1%2C2%2C3&tpValueBadges=GOOD%2CGREAT&zip={zip_code}&radius=50&sort=BEST&dynamicRadius=false&make={car_make}&model={car_model}&certified=false&oneAccountId=auth0%7Coasc%7C454836195" 27 | response = requests.request("GET", pages_url, headers=pages_headers, data={}) 28 | json_res1 = response.json() 29 | pages_in_search = json_res1['totalPageCount'] # number of pages in search 30 | print(f'There are {pages_in_search} pages of search results', f'for {car_make} {car_model} in Area Code: {zip_code}') 31 | 32 | for x in range(pages_in_search): # scraping each page of search results 33 | print("Scraping page", x , "...") 34 | headers = { 35 | 'authority': 'www.carfax.com', 36 | 'method': 'GET', 37 | 'scheme': 'https', 38 | 'sec-ch-ua': '"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"', 39 | 'x-cfx-alternator': 'Qhids3yPOM5IZ7U6FyGoXSBbcI2q+VnCGv+8KEy6twk=', 40 | 'x-cfx-dynamo': '1631218927918', 41 | 'sec-ch-ua-mobile': '?0', 42 | 'authorization': AUTH, 43 | 'accept': 'application/json', 44 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36', 45 | 'sec-ch-ua-platform': '"macOS"', 46 | 'sec-fetch-site': 'same-origin', 47 | 'sec-fetch-mode': 'cors', 48 | 'sec-fetch-dest': 'empty', 49 | 'referer': 'https://www.carfax.com/', 50 | 'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7', 51 | 'cookie': 'uuid=2785a5d4-9028-4fc2-b119-4f1a2dc8f20b; abt=eligible; vdp=1; s_fid=4A9CDC63CCF1CF8F-3CEE39DC979DF661; numberOfRecentSearches=3; search_uuid=b061514d-3656-4b36-8132-da1873db78b1; g_state={"i_p":1630433297917,"i_l":2}; id=auth0%7Coasc%7C454836195; name=awsomdude17%40aim.com; api_token=eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUkNRMEZETnpReVFrVXlNVE01TkVNME5URkNOREU1TWpGQ01EaEVOalZHUWtJNU16Z3pNdyJ9.eyJodHRwczovL2NhcmZheC5jb20vY29uc3VtZXJzIjp7ImVtYWlsIjoiYXdzb21kdWRlMTdAYWltLmNvbSIsInN0YXR1cyI6IkFDVElWRSJ9LCJpc3MiOiJodHRwczovL2F1dGguY2FyZmF4LmNvbS8iLCJzdWIiOiJhdXRoMHxvYXNjfDQ1NDgzNjE5NSIsImF1ZCI6Imh0dHBzOi8vd3d3LmNhcmZheC5jb20vYXBpIiwiaWF0IjoxNjMwMzQ3MDA2LCJleHAiOjE2MzI5MzkwMDYsImF6cCI6ImZSWlhYSjdlWUwzaDk5R2RERmcyU1BUejBRWjR2MVZIIiwiZ3R5IjoicGFzc3dvcmQifQ.S7UXbvh_QaAKMxSbmyENjVoGHuJaR5fw6PAOGRo6H_LyzjXRJB6BZJ5kU4oek1DjW5HBaqeLTDhJNCuI2yEdc9R8wRBPwGN7xvNu_q2YcySlw2VeBM2QOc91azPNDw6ChBD-pZpOWoNL4WAzxXMZg5BUblBW9wSPyXC6Ey9iflCSIj20vXof0oAulcwYnAnTYZ7go4Fl-eNaSMdgBJli26pfnSsR8lWeEStdUJfmF4MEusG7yjf-wH0tM8uo-YNmGQH1k5VLT1DNai9JWDm4WqiCKUxzPTPX_DQnJUyPM1fQKpcfPGKDCMvmoW6ANpoMSVvueKdgaWGW2S-4FgD43w; en=p; carousel_uuid=b8b42d2f-3da5-48a0-b6ef-8a457b9f31e0; cache=MISS; AMCVS_AAC63BC75245B47C0A490D4D%40AdobeOrg=1; s_sess=%20s_cc%3Dtrue%3B; OptanonConsent=isIABGlobal=false&datestamp=Thu+Sep+09+2021+15%3A57%3A29+GMT-0400+(Eastern+Daylight+Time)&version=5.15.0&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A1%2CC0003%3A1%2CC0004%3A1&hosts=&legInt=&AwaitingReconsent=false&geolocation=US%3BMA; OptanonAlertBoxClosed=2021-09-09T19:57:29.123Z; AMCV_AAC63BC75245B47C0A490D4D%40AdobeOrg=1406116232%7CMCIDTS%7C18880%7CMCMID%7C23126708835703172285165607164356578194%7CMCAID%7CNONE%7CMCOPTOUT-1631224649s%7CNONE%7CvVersion%7C2.5.0; crv=2.288.0; d_l_a=%7B%22fname%22%3A%22%22%2C%22lname%22%3A%22%22%2C%22hzip%22%3A%7B%22words%22%3A%5B-2025185127%2C-384937933%2C1643938057%2C1825401765%2C829041662%2C-659239416%2C-595878451%2C1106219770%5D%2C%22sigBytes%22%3A32%7D%2C%22emailHashed256%22%3A%7B%22words%22%3A%5B-865319190%2C1120342809%2C-939676476%2C38293973%2C494886427%2C178949054%2C1602683276%2C-1549884321%5D%2C%22sigBytes%22%3A32%7D%2C%22pno%22%3A%22%22%7D; zip=01545; datadome=SlyD2pIaGE62Rs0znkFWmQt91ZO.c9fs.j0-~l8Z4LxoPLDK9jVQqc_ppQr2CVOZPU8hCAfN6aXOC4hEvmy9tQU085Dh.U8ASK.cMkiVrT; cfx_search_params=%7B%22vin%22%3A%22JF1VA2E63L9803015%22%2C%22params%22%3A%7B%22tpQualityThreshold%22%3A%22150%22%2C%22tpPositions%22%3A%221%2C2%2C3%22%2C%22tpValueBadges%22%3A%22GOOD%2CGREAT%22%2C%22zip%22%3A%2201545%22%2C%22radius%22%3A50%2C%22sort%22%3A%22BEST%22%2C%22make%22%3A%22Subaru%22%2C%22model%22%3A%22WRX%22%2C%22page%22%3A1%2C%22urlInfo%22%3A%22Subaru-WRX_w621%22%7D%2C%22apiUrl%22%3A%22https%3A%2F%2Fwww.carfax.com%2Fapi%2Fv2%2Fconsumers%2Fauth0%257Coasc%257C454836195%2FfindVehicles%3FtpQualityThreshold%3D150%26tpPositions%3D1%252C2%252C3%26tpValueBadges%3DGOOD%252CGREAT%26zip%3D01545%26radius%3D50%26sort%3DBEST%26make%3DSubaru%26model%3DWRX%26certified%3Dfalse%26oneAccountId%3Dauth0%257Coasc%257C454836195%22%2C%22seoUrl%22%3A%22Used-Subaru-WRX_w621%22%2C%22srpTitle%22%3A%22Used%20Subaru%20WRX%20for%20Sale%20in%20Shrewsbury%2C%20MA%20(with%20Photos)%20-%20CARFAX%22%7D; search_uuid=42e36ec1-f1ac-45a2-976c-6a26aa5c03c7; s_pers=%20gpv_p17%3Dno%2520value%7C1631220727861%3B%20gpv_p18%3Dno%2520value%7C1631220727883%3B; s_sq=%5B%5BB%5D%5D' 52 | } 53 | url = f"https://www.carfax.com/api/v2/consumers/auth0%7Coasc%7C454836195/findVehicles?tpQualityThreshold=150&tpPositions=1%2C2%2C3&tpValueBadges=GOOD%2CGREAT&zip={zip_code}&radius=50&sort=BEST&dynamicRadius=false&make={car_make}&model={car_model}&certified=false&page={x}&oneAccountId=auth0%7Coasc%7C454836195" 54 | response = requests.request("GET", url, headers=headers, data={}) 55 | json_res = response.json() 56 | todays_date = date.today().strftime('%m-%d-%Y') #date for exporting 57 | for cars in json_res["listings"]: 58 | year = cars["year"] 59 | make = cars["make"] 60 | model = cars["model"] 61 | car_listing_price = cars["listPrice"] 62 | car_mileage = cars["mileage"] 63 | dealer_address = cars["dealer"]["address"] 64 | dealer_city = cars["dealer"]["city"] 65 | dealer_state = cars["dealer"]["state"] 66 | dealer_name = cars["dealer"]["name"] 67 | car_url = cars["vdpUrl"] 68 | 69 | 70 | 71 | listing_details = { 72 | 'year': year, 73 | 'make': make, 74 | 'model': model, 75 | 'list_price': car_listing_price, 76 | 'mileage': car_mileage, 77 | 'dealer_address': dealer_address, 78 | 'dealer_city': dealer_city, 79 | 'dealer_state': dealer_state, 80 | 'dealer_name': dealer_name, 81 | 'link': car_url 82 | } 83 | 84 | scraped_results.append(listing_details) # add listing details as dict to ongoing list 85 | time.sleep(random.uniform(0, 0.8)) # adding delay to prevent IP ban -> increase delay for larger scrapes 86 | 87 | 88 | scraped_results = pd.DataFrame(scraped_results) # convert list of dicts to dataframe 89 | scraped_results = scraped_results.drop_duplicates(subset=['link']) # check & delete duplicates 90 | 91 | print("Successfully scraped", str(len(scraped_results)), car_make, car_model, "listing(s) from Area Code:", zip_code) 92 | scraped_results.to_excel('scrapes/{}_{}_{}_scrapes_{}.xlsx'.format(car_make, car_model, zip_code, todays_date)) 93 | 94 | request_carfax(ZIP, CAR_MAKE, CAR_MODEL) 95 | 96 | 97 | --------------------------------------------------------------------------------