├── smart_w.csv
└── webscrap.py


/smart_w.csv:
--------------------------------------------------------------------------------
 1 | name,price,DESCRIPTIF TECHNIQUE,links
 2 | f
 3 | i
 4 | l
 5 | e
 6 | _
 7 | l
 8 | i
 9 | s
10 | t
11 | 


--------------------------------------------------------------------------------
/webscrap.py:
--------------------------------------------------------------------------------
 1 | import requests 
 2 | from bs4 import BeautifulSoup
 3 | import csv
 4 | from itertools import zip_longest
 5 | 
 6 | names=[]
 7 | prices=[]
 8 | links=[]
 9 | DTs=[]
10 | page_num=1
11 | while True:
12 |     result=requests.get(f"https://www.jumia.com.tn/catalog/?q=smart+watches&page={page_num}#catalog-listing")
13 |     src=result.content
14 |     soup=BeautifulSoup(src,'lxml')
15 |     res=soup.find("p",{"class":"-gy5 -phs"}).text.split()
16 |     page_lim=int(res[0])
17 |     if page_num > (page_lim//40):
18 |         print("endedpage")
19 |         break
20 | 
21 |     name = soup.find_all("h3",{"class":"name"})
22 |     price = soup.find_all("div",{"class":"prc"})
23 |     article = soup.find_all("article",{"class":"prd _fb col c-prd"})
24 | 
25 |     for i in range(len(article)):
26 |         names.append(name[i].text)
27 |         prices.append(price[i].text)
28 |         links.append("https://www.jumia.com.tn"+article[i].find("a").attrs['href'])
29 |     page_num+=1
30 |     print("page_switched")
31 | 
32 | for link in links:
33 |     result=requests.get(link)
34 |     src=result.content
35 |     soup=BeautifulSoup(src,"lxml")
36 |     DT=soup.find("ul",{"class":"-pvs -mvxs -phm -lsn"})
37 |     ch=""
38 |     for li in DT.find_all("li"):
39 |         ch+= li.text + "| "
40 |     ch=ch[:-2]
41 |     DTs.append(ch)
42 |     print("article num°"+str(links.index(link)))
43 | 
44 | file_list=[names,prices,DTs,links]
45 | exported=zip_longest(*file_list)
46 | with open("C:\\Users\\asus\\Desktop\\projet python\\smart_w.csv","w") as myfile:
47 |     wr=csv.writer(myfile)
48 |     wr.writerow(["name","price", "DESCRIPTIF TECHNIQUE","links"])
49 |     wr.writerows(exported)


--------------------------------------------------------------------------------