├── scrapes
└── .gitkeep
├── .gitignore
├── requirements.txt
├── .idea
├── .gitignore
├── vcs.xml
├── misc.xml
├── inspectionProfiles
│ ├── profiles_settings.xml
│ └── Project_Default.xml
├── carfax_webscraper.iml
├── csv-plugin.xml
└── modules.xml
├── LICENSE
├── README.md
└── main.py
/scrapes/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | scrapes/*
2 | !scrapes/.gitkeep
3 | cars_list
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas==1.0.2
2 | requests==2.22.0
3 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/carfax_webscraper.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/csv-plugin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
15 |
16 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2021, Graham
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Disclaimer
2 |
3 | * I do not promote, encourage, support or excite any illegal activity or hacking without written permission in general. The repo and author of the repo is no way responsible for any misuse of the information.
4 |
5 | * "carfax-scraper" is just a terms that represents the name of the repo and is not a repo that provides any illegal information.
6 |
7 | * The Software's and Scripts provided by the repo should only be used for **_EDUCATIONAL PURPOSES ONLY_**. The repo or the author can not be held responsible for the misuse of them by the users.
8 |
9 | * I am not responsible for any direct or indirect damage caused due to the usage of the code provided on this site. All the information provided on this repo are for educational purposes only.
10 |
11 |
12 |
13 | ## Usage Instructions
14 |
15 | (Google Chrome only; only tested on Mac OS)
16 |
17 |
18 |
19 | 1. Copy repo:
20 |
21 | `git clone https://github.com/grsahagian/carfax-scraper`
22 |
23 |
24 | 2. Install dependencies (requirements.txt)
25 |
26 | #### Get Authorization code
27 | 3. Navigate to https://www.carfax.com/cars-for-sale
28 | 3. Search for any car model, make within any valid zip code then click "Show me results.
29 | 4. Right click anywhere on the page and select "Inspect"
30 | 5. Click "Network" on the top of the new window then click "Search" again
31 | 6. On the left side under "Name" click on the row labelled "findVehicles?tpQualityThreshold=150..."
32 | 7. Scroll down on the header tab and look for 'authorization' (under "Request Headers")
33 | 8. Copy the entire value for `authorization:` after the colon and paste it into 'main.py' as `AUTH = `
34 | 9. Set the parameters (under `#PARAMS`) values according to preference (make, model, and zip)
35 | 8. In the command line navigate to the project folder and run `python main.py`
36 |
37 |
38 | Special thanks to Michael (https://github.com/Michael001154) for help developing the project
39 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import requests
3 | from datetime import date
4 | import time
5 | import random
6 |
7 | # PARAMS - set according to preference
8 | CAR_MAKE = 'Honda' # brand
9 | CAR_MODEL = 'Civic' # specific model
10 | ZIP = '10001' # input as string
11 |
12 | # FOLLOW README TO GET YOUR OWN AUTHORIZATION TOKEN AND SET TO VARIABLE BELOW
13 | AUTH = 'YOUR AUTH TOKEN HERE'
14 |
15 | def request_carfax(zip_code, car_make, car_model):
16 | scraped_results = []
17 |
18 | pages_headers = { # initial request to find # pages for current search
19 | 'authority': 'www.carfax.com',
20 | 'sec-ch-ua': '"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"',
21 | 'authorization': AUTH,
22 | 'accept': 'application/json',
23 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36',
24 | }
25 |
26 | pages_url = f"https://www.carfax.com/api/v2/consumers/auth0%7Coasc%7C454836195/findVehicles?tpQualityThreshold=150&tpPositions=1%2C2%2C3&tpValueBadges=GOOD%2CGREAT&zip={zip_code}&radius=50&sort=BEST&dynamicRadius=false&make={car_make}&model={car_model}&certified=false&oneAccountId=auth0%7Coasc%7C454836195"
27 | response = requests.request("GET", pages_url, headers=pages_headers, data={})
28 | json_res1 = response.json()
29 | pages_in_search = json_res1['totalPageCount'] # number of pages in search
30 | print(f'There are {pages_in_search} pages of search results', f'for {car_make} {car_model} in Area Code: {zip_code}')
31 |
32 | for x in range(pages_in_search): # scraping each page of search results
33 | print("Scraping page", x , "...")
34 | headers = {
35 | 'authority': 'www.carfax.com',
36 | 'method': 'GET',
37 | 'scheme': 'https',
38 | 'sec-ch-ua': '"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"',
39 | 'x-cfx-alternator': 'Qhids3yPOM5IZ7U6FyGoXSBbcI2q+VnCGv+8KEy6twk=',
40 | 'x-cfx-dynamo': '1631218927918',
41 | 'sec-ch-ua-mobile': '?0',
42 | 'authorization': AUTH,
43 | 'accept': 'application/json',
44 | 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36',
45 | 'sec-ch-ua-platform': '"macOS"',
46 | 'sec-fetch-site': 'same-origin',
47 | 'sec-fetch-mode': 'cors',
48 | 'sec-fetch-dest': 'empty',
49 | 'referer': 'https://www.carfax.com/',
50 | 'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
51 | 'cookie': 'uuid=2785a5d4-9028-4fc2-b119-4f1a2dc8f20b; abt=eligible; vdp=1; s_fid=4A9CDC63CCF1CF8F-3CEE39DC979DF661; numberOfRecentSearches=3; search_uuid=b061514d-3656-4b36-8132-da1873db78b1; g_state={"i_p":1630433297917,"i_l":2}; id=auth0%7Coasc%7C454836195; name=awsomdude17%40aim.com; api_token=eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUkNRMEZETnpReVFrVXlNVE01TkVNME5URkNOREU1TWpGQ01EaEVOalZHUWtJNU16Z3pNdyJ9.eyJodHRwczovL2NhcmZheC5jb20vY29uc3VtZXJzIjp7ImVtYWlsIjoiYXdzb21kdWRlMTdAYWltLmNvbSIsInN0YXR1cyI6IkFDVElWRSJ9LCJpc3MiOiJodHRwczovL2F1dGguY2FyZmF4LmNvbS8iLCJzdWIiOiJhdXRoMHxvYXNjfDQ1NDgzNjE5NSIsImF1ZCI6Imh0dHBzOi8vd3d3LmNhcmZheC5jb20vYXBpIiwiaWF0IjoxNjMwMzQ3MDA2LCJleHAiOjE2MzI5MzkwMDYsImF6cCI6ImZSWlhYSjdlWUwzaDk5R2RERmcyU1BUejBRWjR2MVZIIiwiZ3R5IjoicGFzc3dvcmQifQ.S7UXbvh_QaAKMxSbmyENjVoGHuJaR5fw6PAOGRo6H_LyzjXRJB6BZJ5kU4oek1DjW5HBaqeLTDhJNCuI2yEdc9R8wRBPwGN7xvNu_q2YcySlw2VeBM2QOc91azPNDw6ChBD-pZpOWoNL4WAzxXMZg5BUblBW9wSPyXC6Ey9iflCSIj20vXof0oAulcwYnAnTYZ7go4Fl-eNaSMdgBJli26pfnSsR8lWeEStdUJfmF4MEusG7yjf-wH0tM8uo-YNmGQH1k5VLT1DNai9JWDm4WqiCKUxzPTPX_DQnJUyPM1fQKpcfPGKDCMvmoW6ANpoMSVvueKdgaWGW2S-4FgD43w; en=p; carousel_uuid=b8b42d2f-3da5-48a0-b6ef-8a457b9f31e0; cache=MISS; AMCVS_AAC63BC75245B47C0A490D4D%40AdobeOrg=1; s_sess=%20s_cc%3Dtrue%3B; OptanonConsent=isIABGlobal=false&datestamp=Thu+Sep+09+2021+15%3A57%3A29+GMT-0400+(Eastern+Daylight+Time)&version=5.15.0&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A1%2CC0003%3A1%2CC0004%3A1&hosts=&legInt=&AwaitingReconsent=false&geolocation=US%3BMA; OptanonAlertBoxClosed=2021-09-09T19:57:29.123Z; AMCV_AAC63BC75245B47C0A490D4D%40AdobeOrg=1406116232%7CMCIDTS%7C18880%7CMCMID%7C23126708835703172285165607164356578194%7CMCAID%7CNONE%7CMCOPTOUT-1631224649s%7CNONE%7CvVersion%7C2.5.0; crv=2.288.0; d_l_a=%7B%22fname%22%3A%22%22%2C%22lname%22%3A%22%22%2C%22hzip%22%3A%7B%22words%22%3A%5B-2025185127%2C-384937933%2C1643938057%2C1825401765%2C829041662%2C-659239416%2C-595878451%2C1106219770%5D%2C%22sigBytes%22%3A32%7D%2C%22emailHashed256%22%3A%7B%22words%22%3A%5B-865319190%2C1120342809%2C-939676476%2C38293973%2C494886427%2C178949054%2C1602683276%2C-1549884321%5D%2C%22sigBytes%22%3A32%7D%2C%22pno%22%3A%22%22%7D; zip=01545; datadome=SlyD2pIaGE62Rs0znkFWmQt91ZO.c9fs.j0-~l8Z4LxoPLDK9jVQqc_ppQr2CVOZPU8hCAfN6aXOC4hEvmy9tQU085Dh.U8ASK.cMkiVrT; cfx_search_params=%7B%22vin%22%3A%22JF1VA2E63L9803015%22%2C%22params%22%3A%7B%22tpQualityThreshold%22%3A%22150%22%2C%22tpPositions%22%3A%221%2C2%2C3%22%2C%22tpValueBadges%22%3A%22GOOD%2CGREAT%22%2C%22zip%22%3A%2201545%22%2C%22radius%22%3A50%2C%22sort%22%3A%22BEST%22%2C%22make%22%3A%22Subaru%22%2C%22model%22%3A%22WRX%22%2C%22page%22%3A1%2C%22urlInfo%22%3A%22Subaru-WRX_w621%22%7D%2C%22apiUrl%22%3A%22https%3A%2F%2Fwww.carfax.com%2Fapi%2Fv2%2Fconsumers%2Fauth0%257Coasc%257C454836195%2FfindVehicles%3FtpQualityThreshold%3D150%26tpPositions%3D1%252C2%252C3%26tpValueBadges%3DGOOD%252CGREAT%26zip%3D01545%26radius%3D50%26sort%3DBEST%26make%3DSubaru%26model%3DWRX%26certified%3Dfalse%26oneAccountId%3Dauth0%257Coasc%257C454836195%22%2C%22seoUrl%22%3A%22Used-Subaru-WRX_w621%22%2C%22srpTitle%22%3A%22Used%20Subaru%20WRX%20for%20Sale%20in%20Shrewsbury%2C%20MA%20(with%20Photos)%20-%20CARFAX%22%7D; search_uuid=42e36ec1-f1ac-45a2-976c-6a26aa5c03c7; s_pers=%20gpv_p17%3Dno%2520value%7C1631220727861%3B%20gpv_p18%3Dno%2520value%7C1631220727883%3B; s_sq=%5B%5BB%5D%5D'
52 | }
53 | url = f"https://www.carfax.com/api/v2/consumers/auth0%7Coasc%7C454836195/findVehicles?tpQualityThreshold=150&tpPositions=1%2C2%2C3&tpValueBadges=GOOD%2CGREAT&zip={zip_code}&radius=50&sort=BEST&dynamicRadius=false&make={car_make}&model={car_model}&certified=false&page={x}&oneAccountId=auth0%7Coasc%7C454836195"
54 | response = requests.request("GET", url, headers=headers, data={})
55 | json_res = response.json()
56 | todays_date = date.today().strftime('%m-%d-%Y') #date for exporting
57 | for cars in json_res["listings"]:
58 | year = cars["year"]
59 | make = cars["make"]
60 | model = cars["model"]
61 | car_listing_price = cars["listPrice"]
62 | car_mileage = cars["mileage"]
63 | dealer_address = cars["dealer"]["address"]
64 | dealer_city = cars["dealer"]["city"]
65 | dealer_state = cars["dealer"]["state"]
66 | dealer_name = cars["dealer"]["name"]
67 | car_url = cars["vdpUrl"]
68 |
69 |
70 |
71 | listing_details = {
72 | 'year': year,
73 | 'make': make,
74 | 'model': model,
75 | 'list_price': car_listing_price,
76 | 'mileage': car_mileage,
77 | 'dealer_address': dealer_address,
78 | 'dealer_city': dealer_city,
79 | 'dealer_state': dealer_state,
80 | 'dealer_name': dealer_name,
81 | 'link': car_url
82 | }
83 |
84 | scraped_results.append(listing_details) # add listing details as dict to ongoing list
85 | time.sleep(random.uniform(0, 0.8)) # adding delay to prevent IP ban -> increase delay for larger scrapes
86 |
87 |
88 | scraped_results = pd.DataFrame(scraped_results) # convert list of dicts to dataframe
89 | scraped_results = scraped_results.drop_duplicates(subset=['link']) # check & delete duplicates
90 |
91 | print("Successfully scraped", str(len(scraped_results)), car_make, car_model, "listing(s) from Area Code:", zip_code)
92 | scraped_results.to_excel('scrapes/{}_{}_{}_scrapes_{}.xlsx'.format(car_make, car_model, zip_code, todays_date))
93 |
94 | request_carfax(ZIP, CAR_MAKE, CAR_MODEL)
95 |
96 |
97 |
--------------------------------------------------------------------------------