├── requirements.txt ├── data └── headers.json ├── README.md ├── .gitignore ├── searchByStore.py └── helper.py /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.22.0 2 | -------------------------------------------------------------------------------- /data/headers.json: -------------------------------------------------------------------------------- 1 | {"terrafirm":{"pragma": "no-cache","content-type": "application/json","accept": "*/*","cache-control": "no-cache","authority": "www.walmart.com"}} 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WalmartClearanceFinder 2 | Tool to find unmarked clearance items at Walmart stores across the US. 3 | 4 | ## What does this do? 5 | 6 | This program uses a series of private API endpoints on Walmart.com to find unmarked clearance items at walmart stores across the United States. 7 | 8 | ## Installing Dependencies 9 | 10 | ``` {.sourceCode .bash} 11 | $ pip install -r requirements.txt 12 | ``` 13 | 14 | ## How to use 15 | 16 | 17 | -i determines the skus to search (leave default for all, takes text files and strings as inputs) 18 | 19 | -s determines which store to search (leave default for all) 20 | 21 | -a determines if it should only find items in stock (setting -a 1 for in stock only, leave default for everything) 22 | 23 | -v Determines how verbose the output is 24 | 25 | -o determines the destination of the csv file 26 | 27 | -t determines how many threads to use 28 | 29 | 30 | ## Examples 31 | 32 | ### Search for every item that is in stock at Walmart store #2265 33 | 34 | ```bash 35 | 36 | python searchByStore.py -s 2265 -o 2265.csv -a 1 -v 2 37 | 38 | ``` 39 | 40 | ### Search for every chromecast in the united states 41 | 42 | ```bash 43 | 44 | python searchByStore.py -i 435188866 -o chromecast.csv -v 2 45 | 46 | ``` 47 | 48 | ### Pull the full inventory of every walmart store in the united states (in-stock and out of stock) (200,000,000+ items) 49 | 50 | ```bash 51 | 52 | python searchByStore.py -o literallyEverything.csv -v 2 53 | 54 | ``` 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | *.pyc -------------------------------------------------------------------------------- /searchByStore.py: -------------------------------------------------------------------------------- 1 | # encoding=utf8 2 | import sys 3 | reload(sys) 4 | sys.setdefaultencoding('utf8') 5 | import argparse 6 | import helper 7 | import threading 8 | import csv 9 | import time 10 | import random 11 | from time import gmtime, strftime 12 | 13 | lock = threading.Lock() 14 | 15 | THREADS = 30 16 | CSV_UPDATE_INTERVAL = 30 17 | # This is the frequency of CSV Updates 18 | ALL_SKUS = "data/MasterList.txt" 19 | # Default thread count 20 | SEARCH_VALS = [] 21 | ALL_ITEMS = [] 22 | CSV_HEADERS = ["skuVal", "title", "price", "quantity", "store", "availability", "primaryProductId", "category", "longSku", "rollback", "productType", "storeName", "storeAddress", "storeCity", "strikethrough", "upc", "usItemId", "storePostalCode", "storeStateOrProvinceCode", "reducedPrice", "clearance", "wupc"] 23 | COMPLETED = [] 24 | STATIC_VALS = [] 25 | 26 | def get_current_time(): 27 | return strftime("%Y-%m-%d-%H-%M-%S", gmtime()) 28 | 29 | CSV_FILE = "{}.csv".format(get_current_time()) 30 | 31 | def update_csv(fileName): 32 | with open(fileName, "wb") as f: 33 | toWrite = [CSV_HEADERS] + ALL_ITEMS 34 | writer = csv.writer(f) 35 | writer.writerows(toWrite) 36 | 37 | def search(): 38 | while len(SEARCH_VALS) > 0: 39 | try: 40 | lock.acquire() 41 | searchVal = SEARCH_VALS.pop(0) 42 | lock.release() 43 | skuNumber = searchVal['sku'] 44 | storeNumber = searchVal['store'] 45 | val = helper.local_item_info(storeNumber, skuNumber) 46 | #print val 47 | #print val.keys() 48 | if val != None: 49 | if val['availability'] == "NOT_AVAILABLE" and STATIC_VALS[1] == True: 50 | pass 51 | else: 52 | val['skuVal'] = skuNumber 53 | if helper.VERBOSE > 1: 54 | print "{} | {} | {} | {} | {}/{}\n".format(val['title'][:40], val['price'], skuNumber, len(ALL_ITEMS), len(COMPLETED), STATIC_VALS[0]), 55 | tVal = [] 56 | for key in CSV_HEADERS: 57 | tVal.append(val[key]) 58 | ALL_ITEMS.append(tVal) 59 | except Exception as exp: 60 | if helper.VERBOSE > 3: 61 | print("ERROR: {}".format(exp)) 62 | try: 63 | lock.release() 64 | except: 65 | pass 66 | try: 67 | COMPLETED.append(searchVal) 68 | except: 69 | pass 70 | 71 | if __name__ == '__main__': 72 | parser = argparse.ArgumentParser(description='') 73 | parser.add_argument('-o','--output', help='Specify csv output', required=False, default=CSV_FILE) 74 | parser.add_argument('-t','--threads', help='Specify thread count', required=False, default=THREADS) 75 | parser.add_argument('-s','--store', help='Specify store to search', required=False, default=None) 76 | parser.add_argument('-i','--input', help='Specify SKU list or single sku', required=False, default=ALL_SKUS) 77 | parser.add_argument('-v','--verbose', help='Verbose Mode', required=False, default=False) 78 | parser.add_argument('-a','--allstock', help='Show all items regardless of availability', required=False, default="False") 79 | args = vars(parser.parse_args()) 80 | # Contains a dictionary of all arguments 81 | if args['verbose'] != False: 82 | # Sets verbose setting 83 | try: 84 | helper.VERBOSE = int(args['verbose']) 85 | except: 86 | helper.VERBOSE = 5 87 | if '.' not in args['input']: 88 | # This means it's a single sku search 89 | skuList = [args['input']] 90 | # Creates a list with a single item 91 | else: 92 | # This means it's a file input 93 | skuList = [x for x in open(args['input']).read().split("\n") if len(x) > 0] 94 | # Creates a list from the file input 95 | if args['store'] == None: 96 | # This means the user did not specify a store number 97 | storeVals = helper.GrabAllStoreNumbers() 98 | # Creates a list of every store 99 | else: 100 | # The user specified a single store 101 | storeVals = [args['store']] 102 | # Creates a list with a single item 103 | totalVals = 0 104 | for store in storeVals: 105 | # Iterates through all inputted stores 106 | for sku in skuList: 107 | # Iterates through all inputted skus 108 | SEARCH_VALS.append({"sku": sku, "store": store}) 109 | totalVals += 1 110 | # Creates a list of all search terms 111 | random.shuffle(SEARCH_VALS) 112 | STATIC_VALS.append(totalVals) 113 | STATIC_VALS.append((args['allstock'] != "False")) 114 | thread_count = args['threads'] 115 | threads = [threading.Thread(target=search) for _ in range(thread_count)] 116 | for thread in threads: 117 | thread.daemon = True 118 | thread.start() 119 | while len(COMPLETED) != totalVals: 120 | # This allows you to kill the thread 121 | try: 122 | time.sleep(5) 123 | try: 124 | lock.acquire() 125 | # Gets lock to wait on active processes 126 | update_csv(args['output']) 127 | lock.release() 128 | except Exception as exp: 129 | print("CSV ERROR: {}".format(exp)) 130 | try: 131 | lock.relase() 132 | except: 133 | pass 134 | except: 135 | print("Program Killed...") 136 | lock.acquire() 137 | # Gets lock to wait on active processes 138 | update_csv(args['output']) 139 | lock.release() 140 | raise Exception("Program Killed...") 141 | -------------------------------------------------------------------------------- /helper.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | import csv 5 | VERBOSE = 0 6 | REQUEST_TIMEOUT = 10 7 | NETWORK_RETRY = 3 8 | PAUSE_BETWEEN_REQUESTS = 2 9 | # This is the short pause between consequetive network requests 10 | 11 | HEADER_TEMPLATE = json.load(open("data/headers.json")) 12 | 13 | TERRAFIRM_URL = "https://www.walmart.com/terra-firma/fetch" 14 | WALMART_SEARCH_URL = "https://www.walmart.com/search/api/preso?prg=mWeb&cat_id=0&facet=retailer%3AWalmart.com&query={0}" 15 | PRESCO_BASE = "https://www.walmart.com/search/api/preso?" 16 | # This is the url that allows you to search on walmart.com 17 | 18 | class Hasher(dict): 19 | def __missing__(self, key): 20 | value = self[key] = type(self)() 21 | return value 22 | 23 | def gen_terrafirm_headers(sku): 24 | # Headers for terrafirm request 25 | # Requires item SKU 26 | header = HEADER_TEMPLATE['terrafirm'] 27 | header['referer'] = "https://www.walmart.com/product/{}/sellers".format(sku) 28 | return header 29 | 30 | def gen_facet(start_price=0, end_price=5000): 31 | facet = "&facet=retailer%3AWalmart.com%7C%7Cprice%3A{}%20-%20%24{}" 32 | 33 | def network_request(url, headers={}, post=False, params=None, timeout=None, data=None, network_retry=None): 34 | # This is the function that makes network requests 35 | if network_retry == None: 36 | # Sets params to default 37 | network_retry = NETWORK_RETRY 38 | if timeout == None: 39 | # Sets params to default 40 | timeout = REQUEST_TIMEOUT 41 | for _ in range(network_retry): 42 | if post: 43 | res = requests.post(url, headers=headers, params=params, data=data, timeout=timeout) 44 | else: 45 | res = requests.get(url, headers=headers, params=params, data=data, timeout=timeout) 46 | # Makes the network request 47 | if res != None: 48 | if res.status_code == 200: 49 | # This means it was successful 50 | return res 51 | time.sleep(PAUSE_BETWEEN_REQUESTS) 52 | # Pause to prevent back to back requests 53 | 54 | def gen_all_pages(url, itemCount): 55 | urls = [] 56 | for i in range((itemCount/20)+1): 57 | urls.append("{}&page={}".format(url, i+1)) 58 | return urls 59 | 60 | def get_category_facets(url): 61 | urls = [] 62 | res = network_request(url).json() 63 | for val in res['facets']: 64 | if val['type'] == 'cat_id': 65 | for department in val['values']: 66 | if department['itemCount'] > 1000: 67 | for urlVal in department['values']: 68 | for tempUrl in gen_all_pages(urlVal['url'], urlVal['itemCount']): 69 | urls.append(PRESCO_BASE + tempUrl) 70 | else: 71 | for tempUrl in gen_all_pages(department['url'], department['itemCount']): 72 | urls.append(PRESCO_BASE + tempUrl) 73 | return urls 74 | 75 | def get_all_facets(url): 76 | urls = [] 77 | res = network_request(url).json() 78 | for val in res['facets']: 79 | if val['type'] == 'price': 80 | for priceRange in val['values']: 81 | if priceRange['itemCount'] > 1000: 82 | urls += get_category_facets(PRESCO_BASE + priceRange['url']) 83 | else: 84 | for tempUrl in gen_all_pages(priceRange['url'], priceRange['itemCount']): 85 | urls.append(PRESCO_BASE + tempUrl) 86 | return urls 87 | 88 | 89 | 90 | def gen_search_urls(query, store=None): 91 | itemVals = [] 92 | urlVals = [] 93 | url = WALMART_SEARCH_URL.format(query) 94 | if store != None: 95 | url += "&stores={}".format(store) 96 | res = network_request(url).json() 97 | total_results = res['requestContext']['itemCount']['total'] 98 | print("Total Results: {}".format(total_results)) 99 | if total_results > 1000: 100 | urlVals += get_all_facets(url) 101 | else: 102 | urlVals += gen_all_pages(url, total_results) 103 | return urlVals 104 | 105 | 106 | def returnPricing(terrafirmaDoc): 107 | # Extracts pricing information from the terrafirm API response 108 | terrafirmaDoc = Hasher(terrafirmaDoc) 109 | for key, value in terrafirmaDoc['payload']['offers'].items(): 110 | try: 111 | price = terrafirmaDoc['payload']['offers'][key]['pricesInfo']['priceMap']['CURRENT']['price'] 112 | store = terrafirmaDoc['payload']['offers'][key]['fulfillment']['pickupOptions'][0]['storeId'] 113 | quantity = terrafirmaDoc['payload']['offers'][key]['fulfillment']['pickupOptions'][0]["inStoreStockStatus"] 114 | rollback = terrafirmaDoc['payload']['offers'][key]['pricesInfo']['priceDisplayCodes']['rollback'] 115 | strikethrough = terrafirmaDoc['payload']['offers'][key]['pricesInfo']['priceDisplayCodes']['strikethrough'] 116 | reducedPrice = terrafirmaDoc['payload']['offers'][key]['pricesInfo']['priceDisplayCodes']['reducedPrice'] 117 | clearance = terrafirmaDoc['payload']['offers'][key]['pricesInfo']['priceDisplayCodes']['clearance'] 118 | storeCity = terrafirmaDoc['payload']['offers'][key]['fulfillment']['pickupOptions'][0]['storeCity'] 119 | storeName = terrafirmaDoc['payload']['offers'][key]['fulfillment']['pickupOptions'][0]['storeName'] 120 | storeAddress = terrafirmaDoc['payload']['offers'][key]['fulfillment']['pickupOptions'][0]['storeAddress'] 121 | storeStateOrProvinceCode = terrafirmaDoc['payload']['offers'][key]['fulfillment']['pickupOptions'][0]['storeStateOrProvinceCode'] 122 | storePostalCode = terrafirmaDoc['payload']['offers'][key]['fulfillment']['pickupOptions'][0]['storePostalCode'] 123 | availability = terrafirmaDoc['payload']['offers'][key]['fulfillment']['pickupOptions'][0]['availability'] 124 | productInfo = terrafirmaDoc['payload']['products'] 125 | productInfo = productInfo[productInfo.keys()[0]] 126 | productInfo = Hasher(productInfo) 127 | primaryProductId = productInfo['primaryProductId'] 128 | wupc = productInfo['wupc'] 129 | usItemId = productInfo['usItemId'] 130 | upc = productInfo['upc'] 131 | productType = productInfo['productType'] 132 | longSku = productInfo['productAttributes']['sku'] 133 | titleVal = productInfo['productAttributes']['productName'] 134 | category = productInfo['productAttributes']['productCategory']['categoryPath'] 135 | information = {"title": titleVal, "price": price, "rollback": rollback, "strikethrough": strikethrough, "reducedPrice": reducedPrice, "clearance": clearance, "store": store, "storeCity": storeCity, "storeName": storeName, "storeAddress": storeAddress, "storeStateOrProvinceCode": storeStateOrProvinceCode, "storePostalCode": storePostalCode, "availability": availability, "quantity": quantity, "primaryProductId": primaryProductId, "wupc": wupc, "usItemId": usItemId, "upc": upc, "productType": productType, "longSku": longSku, "category": category} 136 | for key, val in information.items(): 137 | if "Hasher" in str(type(information[key])): 138 | information[key] = "" 139 | return information 140 | except Exception as exp: 141 | if VERBOSE > 5: 142 | print exp 143 | 144 | def local_item_info(store, sku): 145 | # Returns all store-specific information for a SKU 146 | header = gen_terrafirm_headers(sku) 147 | # Generates the header for this request 148 | params = (('rgs', 'OFFER_PRODUCT,OFFER_INVENTORY,OFFER_PRICE,VARIANT_SUMMARY'),) 149 | # Parameters that specify the data we want to return 150 | data = '{{"itemId":"{}","paginationContext":{{"selected":false}},"storeFrontIds":[{{"usStoreId":{},"preferred":false,"semStore":false}}]}}'.format(sku, store) 151 | response = network_request(TERRAFIRM_URL, post=True, headers=header, params=params, data=data) 152 | # This calls the API endpoint 153 | if VERBOSE > 3: 154 | # This will print the api response for debugging 155 | print response.json() 156 | return returnPricing(response.json()) 157 | 158 | def convertSKUToUPC(sku): 159 | # Converts a walmart SKU number for a UPC 160 | try: 161 | headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} 162 | res = requests.get('https://brickseek.com/walmart-inventory-checker/?sku={}'.format(sku), headers=headers) 163 | page = bs4.BeautifulSoup(res.text, 'lxml') 164 | upc = str(page).partition('upc=')[2].partition('"')[0] 165 | print("Converted SKU: {} to UPC: {}".format(sku, upc)) 166 | return upc 167 | except: 168 | if VERBOSE > 1: 169 | print("Converting to UPC failed.") 170 | return None 171 | 172 | def GrabAllStoreNumbers(): 173 | ListOfStores = [] 174 | with open('data/Walmarts.csv', 'r') as f: 175 | reader = csv.reader(f) 176 | your_list = list(reader) 177 | for line in your_list: 178 | if 'Walmart Supercenter' in str(line[1]): 179 | ListOfStores.append(line[0]) 180 | return ListOfStores 181 | 182 | if __name__ == '__main__': 183 | VERBOSE = 3 184 | print local_item_info('2265', 'adsfadsf435188866') 185 | --------------------------------------------------------------------------------