├── __pycache__ ├── engine_lister.cpython-36.pyc └── engine_scraper.cpython-36.pyc ├── data ├── jewelry_crosschecked.csv ├── jewelry_final.csv └── jewelry_merchandise.csv ├── engine_core.py ├── engine_lister.py └── scrapers ├── __pycache__ └── jewelry_scraper.cpython-36.pyc └── jewelry_scraper.py /__pycache__/engine_lister.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicksarris/dropshipping/b2f135498d2a3c7cfcff0e45b187456fb51032f3/__pycache__/engine_lister.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/engine_scraper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicksarris/dropshipping/b2f135498d2a3c7cfcff0e45b187456fb51032f3/__pycache__/engine_scraper.cpython-36.pyc -------------------------------------------------------------------------------- /data/jewelry_crosschecked.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicksarris/dropshipping/b2f135498d2a3c7cfcff0e45b187456fb51032f3/data/jewelry_crosschecked.csv -------------------------------------------------------------------------------- /data/jewelry_merchandise.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicksarris/dropshipping/b2f135498d2a3c7cfcff0e45b187456fb51032f3/data/jewelry_merchandise.csv -------------------------------------------------------------------------------- /engine_core.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Nick Sarris (ngs5st)' 2 | 3 | import engine_lister 4 | from scrapers import jewelry_scraper 5 | 6 | def main(): 7 | 8 | selling_category = "Jewelry" 9 | walmart_key = "" 10 | rescrape_data = False 11 | 12 | if rescrape_data != False: 13 | if selling_category == "Jewelry": 14 | jewelry_scraper.walmart_jewelry(walmart_key) 15 | jewelry_scraper.ebay_jewelry() 16 | jewelry_scraper.data_cleanup() 17 | 18 | engine_lister.list_ebay(selling_category) 19 | 20 | if __name__ == '__main__': 21 | main() -------------------------------------------------------------------------------- /engine_lister.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Nick Sarris (ngs5st)' 2 | 3 | import re 4 | import time 5 | import pandas as pd 6 | from ebaysdk.trading import Connection as Trading 7 | from ebaysdk.exception import ConnectionError 8 | 9 | def decide_category(item_name): 10 | 11 | category_dict = { 12 | 13 | "Anklet": 101437, 14 | "Bracelet": {"Diamond": 10976, 15 | "Pearl": 164316, 16 | "Other": 164315}, 17 | "Earrings": {"Diamond": 10986, 18 | "Pearl": 10990, 19 | "Other": 164321}, 20 | "Necklace": {"Diamond": 164331, 21 | "Pearl": 164333, 22 | "Other": 164332}, 23 | "Pendant": {"Diamond": 164331, 24 | "Pearl": 164333, 25 | "Other": 164332}, 26 | "Brooch": {"Pearl": 11013, 27 | "Other": 164337}, 28 | "Ring": {"Diamond": 67726, 29 | "Pearl": 11021, 30 | "Other": 164343}, 31 | "Band": {"Diamond": 67726, 32 | "Pearl": 11021, 33 | "Other": 164343}, 34 | "Other": 505 35 | 36 | } 37 | 38 | category_list_1 = ["Diamond", "Pearl"] 39 | category_list_2 = ["Pearl"] 40 | 41 | for key in category_dict.keys(): 42 | if key in item_name: 43 | if key not in ["Anklet", "Brooch"]: 44 | for cat in category_list_1: 45 | if cat in item_name: 46 | return cat, category_dict[key][cat] 47 | else: 48 | return "Other Gemstone", category_dict[key]["Other"] 49 | else: 50 | if key == "Brooch": 51 | for cat in category_list_2: 52 | if cat in item_name: 53 | return cat, category_dict[key][cat] 54 | else: 55 | return "Other Gemstone", category_dict[key]["Other"] 56 | else: 57 | return "Other Gemstone", category_dict[key] 58 | else: 59 | continue 60 | 61 | return "Other Gemstone", category_dict["Other"] 62 | 63 | def list_ebay(selling_category): 64 | 65 | if selling_category == 'Jewelry': 66 | 67 | merchandise = pd.read_csv('data/jewelry_final.csv', encoding='ISO-8859-1') 68 | api = Trading(config_file='data/ebay_auth.yaml') 69 | 70 | counter = 0 71 | for i, row in merchandise.iterrows(): 72 | while True: 73 | 74 | try: 75 | print("Listing Item #{}: {}".format(i, row['name'])) 76 | 77 | item_name = row['name'] 78 | image_urls = row['images'].split(',') 79 | description = row['long_description'] 80 | sale_price = ((int(row['sale_price']) * 1.15) - 0.01) 81 | type, category = decide_category(row['name']) 82 | upc_value = row['upc'] 83 | 84 | replacement_list = ["T.G.W.","T.W.","T.G.W","T.W","Created", 85 | "Princess-Cut","Cross-Over","Three Stone", 86 | "Cultured","Freshwater","Cocktail", 87 | "Three-Stone","Two-Tone"] 88 | 89 | listing_title = re.sub(r' \d+\-\d+\/\d+ Carat', "", item_name) 90 | listing_title = re.sub(r' \d+\/\d+ Carat', "", listing_title) 91 | listing_title = re.sub(r' \d+\/\d+ CT', "", listing_title) 92 | listing_title = re.sub(r' \d+\/\d+', "", listing_title) 93 | listing_title = re.sub(r' \d+ Carat', "", listing_title) 94 | listing_title = re.sub(r' \d+kt', "", listing_title) 95 | listing_title = re.sub(r' \d+\-\d+\.\dmm', "", listing_title) 96 | listing_title = re.sub(r' \d+\.\d+\-\d+\.\dmm', "", listing_title) 97 | listing_title = re.sub(r' \d+\.\d+\-\dmm', "", listing_title) 98 | listing_title = re.sub(r' \d+\-\dmm', "", listing_title) 99 | listing_title = re.sub(r' \dmm+\-\d+\.\dmm', "", listing_title) 100 | listing_title = re.sub(r' \d+\.\dmm+\-\d+\.\dmm', "", listing_title) 101 | listing_title = re.sub(r' \d+\.\dmm+\-\dmm', "", listing_title) 102 | listing_title = re.sub(r' \dmm+\-\dmm', "", listing_title) 103 | 104 | for replacement in replacement_list: 105 | listing_title = listing_title.replace(" " + replacement, "") 106 | 107 | try: 108 | listing_title = listing_title.split(',')[0] 109 | except: 110 | pass 111 | 112 | if len(listing_title) < 80: 113 | 114 | image_list = [] 115 | for image_url in image_urls: 116 | image_url = image_url.replace('[','').replace(']','') \ 117 | .replace("'",'').split('?')[0].strip() 118 | list.append(image_list, image_url) 119 | 120 | myitem = { 121 | 122 | "Item":{ 123 | 124 | "Title": listing_title, 125 | "Description": ''+ item_name +'

Description

' 131 | '

  • ' 137 | 'Gender: Women
  • Fine Or Fashion: Fine
  • ' 138 | '
  • Brand: Miabella
  • ' 139 | 'Age Group: Adult
  • Gemstone Type: ' + type + '
  • ' 140 | '
  • Manufacturer Name: Miabella
    ' 144 | '

' 145 | '
  • ' + description+ '


      Handling

      We will ship all orders within ' 160 | '' 161 | '3 business days of payment. We take great care packaging every item to ' 162 | 'ensure safe and quality shipping


    • ' 163 | '

      Delivery

    • We will ' 169 | 'ship UPS/USPS/FedEx depending on your location and our discretion. Please make sure ' 170 | 'to provide the correct shipping address when placing your order. Packages are NOT ' 171 | 'sent out on Saturday or Sunday and transit times may vary depending on the carrier. ' 172 | 'Shipping time by eBay are not guaranateed and are subject to change especially during ' 173 | 'peak periods.


    • Feedback

    • We take ' 180 | 'our reputation seriously, we buy and sell online, so we understand the value of trust.' 181 | ' If you are unsatisfied with your order, please contact us and we ' 183 | 'will work with you to resolve it to your satisfaction. Please allow 1-3 days for a ' 184 | 'response to all inquiries.

    ]]>', 185 | 186 | "PrimaryCategory": {"CategoryID": category}, 187 | "StartPrice": sale_price, 188 | "PictureDetails": {"PictureURL": image_list}, 189 | 190 | "Country": "US", 191 | "Currency": "USD", 192 | "ConditionID": "1000", 193 | "CategoryMappingAllowed": "true", 194 | "DispatchTimeMax": "3", 195 | "ListingDuration": "Days_30", 196 | "Quantity": "1", 197 | 198 | "PaymentMethods": "PayPal", 199 | "PayPalEmailAddress": "ngs5st@virginia.edu", 200 | "PostalCode": "23505", 201 | 202 | "ReturnPolicy": { 203 | "ReturnsAcceptedOption": "ReturnsNotAccepted", 204 | }, 205 | 206 | "ProductListingDetails": { 207 | "UPC": str(int(upc_value)), 208 | "Brand": "MiaBella" 209 | }, 210 | 211 | "ShippingDetails": { 212 | "ShippingType": "Flat", 213 | "ShippingServiceOptions": { 214 | "ShippingServicePriority": "1", 215 | "ShippingService": "UPS2ndDay", 216 | "ShippingServiceCost": "0" 217 | } 218 | }, 219 | 220 | "Site": "US" 221 | 222 | } 223 | } 224 | 225 | #r = api.execute("AddFixedPriceItem", myitem) 226 | 227 | else: 228 | pass 229 | 230 | except ConnectionError as e: 231 | 232 | if counter < 5: 233 | 234 | print('') 235 | print("Currently Waiting Before Retrying Listing - ConnectionError: {}/5".format(counter)) 236 | print("Error: ", e) 237 | print('') 238 | 239 | time.sleep(60) 240 | counter += 1 241 | continue 242 | 243 | else: 244 | 245 | print('') 246 | print("Currently Waiting Before Retrying Listing - ConnectionError: {}/5".format(counter)) 247 | print("Error: ", e) 248 | print('') 249 | 250 | time.sleep(60) 251 | 252 | counter = 0 253 | break 254 | 255 | else: 256 | print("Invalid Category") -------------------------------------------------------------------------------- /scrapers/__pycache__/jewelry_scraper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicksarris/dropshipping/b2f135498d2a3c7cfcff0e45b187456fb51032f3/scrapers/__pycache__/jewelry_scraper.cpython-36.pyc -------------------------------------------------------------------------------- /scrapers/jewelry_scraper.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Nick Sarris (ngs5st)' 2 | 3 | import re 4 | import time 5 | import numpy as np 6 | import pandas as pd 7 | from wapy.wapy.api import Wapy 8 | from ebaysdk.finding import Connection as finding 9 | from ebaysdk.trading import Connection as Trading 10 | from ebaysdk.exception import ConnectionError 11 | 12 | def data_cleanup(): 13 | 14 | merchandise = pd.read_csv('data/jewelry_crosschecked.csv', encoding='ISO-8859-1') 15 | 16 | merchandise['sale_price'] = merchandise['sale_price'].map(lambda x: float(x)) 17 | merchandise['name'] = merchandise['name'].map(lambda x: x.strip()) 18 | merchandise['name'] = merchandise['name'].map(lambda x: str(x).replace('"','')) 19 | 20 | merchandise = merchandise[merchandise['sale_price'] > 50] 21 | merchandise = merchandise[merchandise['sale_price'] < 300] 22 | merchandise = merchandise[merchandise['stock'] == 'Available'] 23 | merchandise = merchandise[np.isfinite(merchandise['upc'])] 24 | merchandise = merchandise.drop_duplicates() 25 | 26 | merchandise.to_csv('data/finalized_data.csv', index=False) 27 | 28 | def walmart_jewelry(walmart_key): 29 | 30 | final_list = [] 31 | item_list = [] 32 | 33 | wapy = Wapy(walmart_key) 34 | for i in range(1, 40): 35 | print('Scraping from Page: {}'.format(i)) 36 | items = wapy.search('Miabella', categoryId=3891, numItems=25, page=i) 37 | for item in items: 38 | list.append(item_list, item) 39 | 40 | headers = ['item_id','name','sale_price','short_description', 41 | 'long_description','images','stock','upc'] 42 | 43 | for item in item_list: 44 | data = [item.item_id, item.name, item.sale_price, 45 | item.short_description, item.long_description, 46 | item.images, item.stock, item.upc] 47 | 48 | list.append(final_list, data) 49 | 50 | output_df = pd.DataFrame(final_list, columns=headers) 51 | output_df.to_csv('data/jewelry_merchandise.csv', index=False) 52 | 53 | def ebay_jewelry(): 54 | 55 | final_list = [] 56 | merchandise = pd.read_csv('data/jewelry_merchandise.csv', encoding='ISO-8859-1') 57 | api = finding(config_file='data/ebay_auth.yaml') 58 | 59 | headers = ['item_id','name','sale_price','short_description', 60 | 'long_description','images','stock','upc'] 61 | 62 | for i, item in merchandise.iterrows(): 63 | 64 | try: 65 | 66 | response = api.execute( 67 | 'findItemsAdvanced', { 68 | 'keywords': item['name'], 69 | 'paginationInput': { 70 | 'entriesPerPage': '25', 71 | 'pageNumber': '1' 72 | }, 73 | 'sortOrder': 'BestMatch' 74 | }) 75 | 76 | item_values = [] 77 | dictstr = response.reply.get('searchResult') 78 | if dictstr.get('_count') != '0': 79 | clearedArray = dictstr.get('item') 80 | for listing in clearedArray: 81 | list.append(item_values, 82 | listing.get('sellingStatus') 83 | .get('currentPrice') 84 | .get('value')) 85 | 86 | value_flag = True 87 | for value in item_values: 88 | if float(value) < float(item['sale_price']): 89 | value_flag = False 90 | 91 | if value_flag == True: 92 | print(i, item['name']) 93 | list.append(final_list, 94 | [item['item_id'], 95 | item['name'], 96 | item['sale_price'], 97 | item['short_description'], 98 | item['long_description'], 99 | item['images'], 100 | item['stock'], 101 | item['upc']]) 102 | 103 | except ConnectionError as e: 104 | pass 105 | 106 | output_df = pd.DataFrame(final_list, columns=headers) 107 | output_df.to_csv('data/jewelry_crosschecked.csv', index=False) --------------------------------------------------------------------------------