├── smart_w.csv └── webscrap.py /smart_w.csv: -------------------------------------------------------------------------------- 1 | name,price,DESCRIPTIF TECHNIQUE,links 2 | f 3 | i 4 | l 5 | e 6 | _ 7 | l 8 | i 9 | s 10 | t 11 | -------------------------------------------------------------------------------- /webscrap.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import csv 4 | from itertools import zip_longest 5 | 6 | names=[] 7 | prices=[] 8 | links=[] 9 | DTs=[] 10 | page_num=1 11 | while True: 12 | result=requests.get(f"https://www.jumia.com.tn/catalog/?q=smart+watches&page={page_num}#catalog-listing") 13 | src=result.content 14 | soup=BeautifulSoup(src,'lxml') 15 | res=soup.find("p",{"class":"-gy5 -phs"}).text.split() 16 | page_lim=int(res[0]) 17 | if page_num > (page_lim//40): 18 | print("endedpage") 19 | break 20 | 21 | name = soup.find_all("h3",{"class":"name"}) 22 | price = soup.find_all("div",{"class":"prc"}) 23 | article = soup.find_all("article",{"class":"prd _fb col c-prd"}) 24 | 25 | for i in range(len(article)): 26 | names.append(name[i].text) 27 | prices.append(price[i].text) 28 | links.append("https://www.jumia.com.tn"+article[i].find("a").attrs['href']) 29 | page_num+=1 30 | print("page_switched") 31 | 32 | for link in links: 33 | result=requests.get(link) 34 | src=result.content 35 | soup=BeautifulSoup(src,"lxml") 36 | DT=soup.find("ul",{"class":"-pvs -mvxs -phm -lsn"}) 37 | ch="" 38 | for li in DT.find_all("li"): 39 | ch+= li.text + "| " 40 | ch=ch[:-2] 41 | DTs.append(ch) 42 | print("article num°"+str(links.index(link))) 43 | 44 | file_list=[names,prices,DTs,links] 45 | exported=zip_longest(*file_list) 46 | with open("C:\\Users\\asus\\Desktop\\projet python\\smart_w.csv","w") as myfile: 47 | wr=csv.writer(myfile) 48 | wr.writerow(["name","price", "DESCRIPTIF TECHNIQUE","links"]) 49 | wr.writerows(exported) --------------------------------------------------------------------------------