├── .gitignore
├── .idea
├── .gitignore
├── advanced-web-scraping.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── .ipynb_checkpoints
└── Untitled-checkpoint.ipynb
├── README.md
├── ajio.html
├── ajio.py
├── campusx.py
├── smartprix-smartphones.ipynb
├── smartprix.html
└── smartprix.py
/.gitignore:
--------------------------------------------------------------------------------
1 | venv
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/advanced-web-scraping.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "09eea0e5",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "with open(\"smartprix.html\", \"r\", encoding=\"utf-8\") as f:\n",
11 | " html = f.read()"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "id": "c9b72a4b",
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "from bs4 import BeautifulSoup\n",
22 | "import pandas as pd\n",
23 | "import numpy as np"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 3,
29 | "id": "e0d12dc4",
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "soup = BeautifulSoup(html, \"lxml\")"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 4,
39 | "id": "3faf85c9",
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "containers = soup.find_all(\"div\", {\"class\": \"sm-product has-tag has-features has-actions\"})"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 25,
49 | "id": "349ef85b",
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "name = []\n",
54 | "price = []\n",
55 | "spec_score = []\n",
56 | "sim = []\n",
57 | "processor = []\n",
58 | "ram = []\n",
59 | "battery = []\n",
60 | "display = []\n",
61 | "camera = []\n",
62 | "card = []\n",
63 | "os = []\n",
64 | "\n",
65 | "for i in soup.find_all(\"div\", {\"class\": \"sm-product has-tag has-features has-actions\"}):\n",
66 | " \n",
67 | " try:\n",
68 | " name.append(i.find('h2').text)\n",
69 | " except:\n",
70 | " name.append(np.nan)\n",
71 | " \n",
72 | " try:\n",
73 | " price.append(i.find('span', {'class': 'price'}).text)\n",
74 | " except:\n",
75 | " price.append(np.nan)\n",
76 | " \n",
77 | " try:\n",
78 | " spec_score.append(i.find('div', {'class': 'score rank-2-bg'}).find('b').text)\n",
79 | " except:\n",
80 | " spec_score.append(np.nan)\n",
81 | " \n",
82 | " x = i.find('ul', {\"class\": \"sm-feat specs\"}).find_all('li')\n",
83 | " \n",
84 | " try:\n",
85 | " sim.append(x[0])\n",
86 | " except:\n",
87 | " sim.append(np.nan)\n",
88 | " \n",
89 | " try:\n",
90 | " processor.append(x[1])\n",
91 | " except:\n",
92 | " processor.append(np.nan)\n",
93 | " \n",
94 | " try:\n",
95 | " ram.append(x[2])\n",
96 | " except:\n",
97 | " ram.append(np.nan)\n",
98 | " \n",
99 | " try:\n",
100 | " battery.append(x[3])\n",
101 | " except:\n",
102 | " battery.append(np.nan)\n",
103 | " \n",
104 | " try:\n",
105 | " display.append(x[4])\n",
106 | " except:\n",
107 | " display.append(np.nan)\n",
108 | " \n",
109 | " try:\n",
110 | " camera.append(x[5])\n",
111 | " except:\n",
112 | " camera.append(np.nan)\n",
113 | " \n",
114 | " try:\n",
115 | " card.append(x[6])\n",
116 | " except:\n",
117 | " card.append(np.nan)\n",
118 | " \n",
119 | " try:\n",
120 | " os.append(x[7])\n",
121 | " except:\n",
122 | " os.append(np.nan)"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 26,
128 | "id": "778a2325",
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "data": {
133 | "text/html": [
134 | "
\n",
135 | "\n",
148 | "
\n",
149 | " \n",
150 | " \n",
151 | " | \n",
152 | " model | \n",
153 | " price | \n",
154 | " score | \n",
155 | " sim | \n",
156 | " processor | \n",
157 | " ram | \n",
158 | " battery | \n",
159 | " display | \n",
160 | " camera | \n",
161 | " card | \n",
162 | " os | \n",
163 | "
\n",
164 | " \n",
165 | " \n",
166 | " \n",
167 | " 0 | \n",
168 | " Motorola Edge 40 Neo | \n",
169 | " ₹20,999 | \n",
170 | " 83 | \n",
171 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
172 | " [Dimensity 7030, Octa Core, 2.5 GHz Processor] | \n",
173 | " [8 GB RAM, 128 GB inbuilt] | \n",
174 | " [5000 mAh Battery with 68W Fast Charging] | \n",
175 | " [6.55 inches, 1080 x 2400 px, 144 Hz Display w... | \n",
176 | " [50 MP + 13 MP Dual Rear & 32 MP Front Camera] | \n",
177 | " [Android v13] | \n",
178 | " [No FM Radio] | \n",
179 | "
\n",
180 | " \n",
181 | " 1 | \n",
182 | " Motorola Edge 40 Neo (12GB RAM + 256GB) | \n",
183 | " ₹22,999 | \n",
184 | " 85 | \n",
185 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
186 | " [Dimensity 7030, Octa Core, 2.5 GHz Processor] | \n",
187 | " [12 GB RAM, 256 GB inbuilt] | \n",
188 | " [5000 mAh Battery with 68W Fast Charging] | \n",
189 | " [6.55 inches, 1080 x 2400 px, 144 Hz Display w... | \n",
190 | " [50 MP + 13 MP Dual Rear & 32 MP Front Camera] | \n",
191 | " [Android v13] | \n",
192 | " [No FM Radio] | \n",
193 | "
\n",
194 | " \n",
195 | " 2 | \n",
196 | " Motorola Moto G54 5G | \n",
197 | " ₹13,999 | \n",
198 | " 85 | \n",
199 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
200 | " [Dimensity 7020, Octa Core, 2.2 GHz Processor] | \n",
201 | " [8 GB RAM, 128 GB inbuilt] | \n",
202 | " [6000 mAh Battery with 33W Fast Charging] | \n",
203 | " [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... | \n",
204 | " [50 MP + 8 MP Dual Rear & 16 MP Front Camera] | \n",
205 | " [Memory Card (Hybrid), upto 1 TB] | \n",
206 | " [Android v13] | \n",
207 | "
\n",
208 | " \n",
209 | " 3 | \n",
210 | " Motorola Moto G54 5G (12GB RAM +256GB) | \n",
211 | " ₹15,999 | \n",
212 | " 87 | \n",
213 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
214 | " [Dimensity 7020, Octa Core, 2.2 GHz Processor] | \n",
215 | " [12 GB RAM, 256 GB inbuilt] | \n",
216 | " [6000 mAh Battery with 33W Fast Charging] | \n",
217 | " [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... | \n",
218 | " [50 MP + 8 MP Dual Rear & 16 MP Front Camera] | \n",
219 | " [Memory Card (Hybrid), upto 1 TB] | \n",
220 | " [Android v13] | \n",
221 | "
\n",
222 | " \n",
223 | " 4 | \n",
224 | " Motorola Edge 40 5G | \n",
225 | " ₹24,999 | \n",
226 | " 87 | \n",
227 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
228 | " [Dimensity 8020, Octa Core, 2.6 GHz Processor] | \n",
229 | " [8 GB RAM, 256 GB inbuilt] | \n",
230 | " [4400 mAh Battery with 68W Fast Charging] | \n",
231 | " [6.55 inches, 1080 x 2400 px, 144 Hz Display] | \n",
232 | " [50 MP + 13 MP Dual Rear & 32 MP Front Camera] | \n",
233 | " [Android v13] | \n",
234 | " [Bluetooth] | \n",
235 | "
\n",
236 | " \n",
237 | " ... | \n",
238 | " ... | \n",
239 | " ... | \n",
240 | " ... | \n",
241 | " ... | \n",
242 | " ... | \n",
243 | " ... | \n",
244 | " ... | \n",
245 | " ... | \n",
246 | " ... | \n",
247 | " ... | \n",
248 | " ... | \n",
249 | "
\n",
250 | " \n",
251 | " 1015 | \n",
252 | " Snexian Bold 9K | \n",
253 | " ₹1,199 | \n",
254 | " NaN | \n",
255 | " [Dual Sim] | \n",
256 | " [No 3G] | \n",
257 | " [No Wifi] | \n",
258 | " [1 MHz Processor] | \n",
259 | " [32 MB RAM, 32 MB inbuilt] | \n",
260 | " [2500 mAh Battery] | \n",
261 | " [2.4 inches, 240 x 320 px Display] | \n",
262 | " [1.3 MP Rear & No Front Camera] | \n",
263 | "
\n",
264 | " \n",
265 | " 1016 | \n",
266 | " Nokia C31 | \n",
267 | " ₹7,999 | \n",
268 | " 62 | \n",
269 | " [Dual Sim, 3G, 4G, VoLTE, Wi-Fi] | \n",
270 | " [Unisoc SC9863A, Octa Core, 1.6 GHz Processor] | \n",
271 | " [3 GB RAM, 32 GB inbuilt] | \n",
272 | " [5050 mAh Battery with 10W Fast Charging] | \n",
273 | " [6.74 inches, 720 x 1600 px Display with Water... | \n",
274 | " [13 MP + 2 MP + 2 MP Triple Rear & 5 MP Front ... | \n",
275 | " [Memory Card Supported, upto 256 GB] | \n",
276 | " [Android v12] | \n",
277 | "
\n",
278 | " \n",
279 | " 1017 | \n",
280 | " Micromax S115 | \n",
281 | " ₹910 | \n",
282 | " NaN | \n",
283 | " [Dual Sim] | \n",
284 | " [No 3G] | \n",
285 | " [No Wifi] | \n",
286 | " [32 MB RAM, 32 MB inbuilt] | \n",
287 | " [800 mAh Battery] | \n",
288 | " [1.77 inches, 240 x 320 px Display] | \n",
289 | " [No Rear Camera] | \n",
290 | " [Memory Card Supported] | \n",
291 | "
\n",
292 | " \n",
293 | " 1018 | \n",
294 | " Pear P313 | \n",
295 | " ₹699 | \n",
296 | " NaN | \n",
297 | " [Dual Sim] | \n",
298 | " [32 MB RAM, 32 MB inbuilt] | \n",
299 | " [1100 mAh Battery] | \n",
300 | " [1.8 inches, 240 x 360 px Display] | \n",
301 | " [0.3 MP Rear Camera] | \n",
302 | " [Memory Card Supported] | \n",
303 | " [Bluetooth] | \n",
304 | " NaN | \n",
305 | "
\n",
306 | " \n",
307 | " 1019 | \n",
308 | " Pear P100 | \n",
309 | " ₹799 | \n",
310 | " NaN | \n",
311 | " [Dual Sim] | \n",
312 | " [32 MB RAM, 32 MB inbuilt] | \n",
313 | " [3000 mAh Battery] | \n",
314 | " [1.8 inches, 240 x 360 px Display] | \n",
315 | " [0.3 MP Rear Camera] | \n",
316 | " [Memory Card Supported] | \n",
317 | " [Bluetooth] | \n",
318 | " NaN | \n",
319 | "
\n",
320 | " \n",
321 | "
\n",
322 | "
1020 rows × 11 columns
\n",
323 | "
"
324 | ],
325 | "text/plain": [
326 | " model price score \\\n",
327 | "0 Motorola Edge 40 Neo ₹20,999 83 \n",
328 | "1 Motorola Edge 40 Neo (12GB RAM + 256GB) ₹22,999 85 \n",
329 | "2 Motorola Moto G54 5G ₹13,999 85 \n",
330 | "3 Motorola Moto G54 5G (12GB RAM +256GB) ₹15,999 87 \n",
331 | "4 Motorola Edge 40 5G ₹24,999 87 \n",
332 | "... ... ... ... \n",
333 | "1015 Snexian Bold 9K ₹1,199 NaN \n",
334 | "1016 Nokia C31 ₹7,999 62 \n",
335 | "1017 Micromax S115 ₹910 NaN \n",
336 | "1018 Pear P313 ₹699 NaN \n",
337 | "1019 Pear P100 ₹799 NaN \n",
338 | "\n",
339 | " sim \\\n",
340 | "0 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
341 | "1 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
342 | "2 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
343 | "3 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
344 | "4 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
345 | "... ... \n",
346 | "1015 [Dual Sim] \n",
347 | "1016 [Dual Sim, 3G, 4G, VoLTE, Wi-Fi] \n",
348 | "1017 [Dual Sim] \n",
349 | "1018 [Dual Sim] \n",
350 | "1019 [Dual Sim] \n",
351 | "\n",
352 | " processor \\\n",
353 | "0 [Dimensity 7030, Octa Core, 2.5 GHz Processor] \n",
354 | "1 [Dimensity 7030, Octa Core, 2.5 GHz Processor] \n",
355 | "2 [Dimensity 7020, Octa Core, 2.2 GHz Processor] \n",
356 | "3 [Dimensity 7020, Octa Core, 2.2 GHz Processor] \n",
357 | "4 [Dimensity 8020, Octa Core, 2.6 GHz Processor] \n",
358 | "... ... \n",
359 | "1015 [No 3G] \n",
360 | "1016 [Unisoc SC9863A, Octa Core, 1.6 GHz Processor] \n",
361 | "1017 [No 3G] \n",
362 | "1018 [32 MB RAM, 32 MB inbuilt] \n",
363 | "1019 [32 MB RAM, 32 MB inbuilt] \n",
364 | "\n",
365 | " ram battery \\\n",
366 | "0 [8 GB RAM, 128 GB inbuilt] [5000 mAh Battery with 68W Fast Charging] \n",
367 | "1 [12 GB RAM, 256 GB inbuilt] [5000 mAh Battery with 68W Fast Charging] \n",
368 | "2 [8 GB RAM, 128 GB inbuilt] [6000 mAh Battery with 33W Fast Charging] \n",
369 | "3 [12 GB RAM, 256 GB inbuilt] [6000 mAh Battery with 33W Fast Charging] \n",
370 | "4 [8 GB RAM, 256 GB inbuilt] [4400 mAh Battery with 68W Fast Charging] \n",
371 | "... ... ... \n",
372 | "1015 [No Wifi] [1 MHz Processor] \n",
373 | "1016 [3 GB RAM, 32 GB inbuilt] [5050 mAh Battery with 10W Fast Charging] \n",
374 | "1017 [No Wifi] [32 MB RAM, 32 MB inbuilt] \n",
375 | "1018 [1100 mAh Battery] [1.8 inches, 240 x 360 px Display] \n",
376 | "1019 [3000 mAh Battery] [1.8 inches, 240 x 360 px Display] \n",
377 | "\n",
378 | " display \\\n",
379 | "0 [6.55 inches, 1080 x 2400 px, 144 Hz Display w... \n",
380 | "1 [6.55 inches, 1080 x 2400 px, 144 Hz Display w... \n",
381 | "2 [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... \n",
382 | "3 [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... \n",
383 | "4 [6.55 inches, 1080 x 2400 px, 144 Hz Display] \n",
384 | "... ... \n",
385 | "1015 [32 MB RAM, 32 MB inbuilt] \n",
386 | "1016 [6.74 inches, 720 x 1600 px Display with Water... \n",
387 | "1017 [800 mAh Battery] \n",
388 | "1018 [0.3 MP Rear Camera] \n",
389 | "1019 [0.3 MP Rear Camera] \n",
390 | "\n",
391 | " camera \\\n",
392 | "0 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n",
393 | "1 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n",
394 | "2 [50 MP + 8 MP Dual Rear & 16 MP Front Camera] \n",
395 | "3 [50 MP + 8 MP Dual Rear & 16 MP Front Camera] \n",
396 | "4 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n",
397 | "... ... \n",
398 | "1015 [2500 mAh Battery] \n",
399 | "1016 [13 MP + 2 MP + 2 MP Triple Rear & 5 MP Front ... \n",
400 | "1017 [1.77 inches, 240 x 320 px Display] \n",
401 | "1018 [Memory Card Supported] \n",
402 | "1019 [Memory Card Supported] \n",
403 | "\n",
404 | " card os \n",
405 | "0 [Android v13] [No FM Radio] \n",
406 | "1 [Android v13] [No FM Radio] \n",
407 | "2 [Memory Card (Hybrid), upto 1 TB] [Android v13] \n",
408 | "3 [Memory Card (Hybrid), upto 1 TB] [Android v13] \n",
409 | "4 [Android v13] [Bluetooth] \n",
410 | "... ... ... \n",
411 | "1015 [2.4 inches, 240 x 320 px Display] [1.3 MP Rear & No Front Camera] \n",
412 | "1016 [Memory Card Supported, upto 256 GB] [Android v12] \n",
413 | "1017 [No Rear Camera] [Memory Card Supported] \n",
414 | "1018 [Bluetooth] NaN \n",
415 | "1019 [Bluetooth] NaN \n",
416 | "\n",
417 | "[1020 rows x 11 columns]"
418 | ]
419 | },
420 | "execution_count": 26,
421 | "metadata": {},
422 | "output_type": "execute_result"
423 | }
424 | ],
425 | "source": [
426 | "df = pd.DataFrame({\n",
427 | " 'model': name,\n",
428 | " 'price': price,\n",
429 | " 'score': spec_score,\n",
430 | " 'sim': sim,\n",
431 | " 'processor': processor,\n",
432 | " 'ram': ram,\n",
433 | " 'battery': battery,\n",
434 | " 'display': display,\n",
435 | " 'camera': camera,\n",
436 | " 'card': card,\n",
437 | " 'os': os\n",
438 | "})\n",
439 | "\n",
440 | "df"
441 | ]
442 | },
443 | {
444 | "cell_type": "code",
445 | "execution_count": 27,
446 | "id": "cc56c55f",
447 | "metadata": {},
448 | "outputs": [
449 | {
450 | "data": {
451 | "text/plain": [
452 | "model 0\n",
453 | "price 0\n",
454 | "score 280\n",
455 | "sim 0\n",
456 | "processor 0\n",
457 | "ram 0\n",
458 | "battery 0\n",
459 | "display 0\n",
460 | "camera 2\n",
461 | "card 27\n",
462 | "os 90\n",
463 | "dtype: int64"
464 | ]
465 | },
466 | "execution_count": 27,
467 | "metadata": {},
468 | "output_type": "execute_result"
469 | }
470 | ],
471 | "source": [
472 | "df.isnull().sum()"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": 18,
478 | "id": "802f013e",
479 | "metadata": {},
480 | "outputs": [
481 | {
482 | "data": {
483 | "text/plain": [
484 | "1020"
485 | ]
486 | },
487 | "execution_count": 18,
488 | "metadata": {},
489 | "output_type": "execute_result"
490 | }
491 | ],
492 | "source": [
493 | "len(processor)"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": null,
499 | "id": "eaa15c91",
500 | "metadata": {},
501 | "outputs": [],
502 | "source": []
503 | }
504 | ],
505 | "metadata": {
506 | "kernelspec": {
507 | "display_name": "Python 3 (ipykernel)",
508 | "language": "python",
509 | "name": "python3"
510 | },
511 | "language_info": {
512 | "codemirror_mode": {
513 | "name": "ipython",
514 | "version": 3
515 | },
516 | "file_extension": ".py",
517 | "mimetype": "text/x-python",
518 | "name": "python",
519 | "nbconvert_exporter": "python",
520 | "pygments_lexer": "ipython3",
521 | "version": "3.10.7"
522 | }
523 | },
524 | "nbformat": 4,
525 | "nbformat_minor": 5
526 | }
527 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Advanced Web Scraping
2 |
3 | This repository contains advanced web scraping projects using Selenium, a powerful web automation and scraping tool. Selenium enables you to automate interactions with websites and extract data from web pages, making it an essential tool for advanced web scraping.
4 |
5 | ## Overview
6 |
7 | Web scraping with Selenium is a versatile solution for extracting data from dynamic and interactive websites. This repository hosts projects that demonstrate advanced web scraping techniques using Selenium. Whether you need to scrape data from a JavaScript-heavy website, perform automated testing, or simulate user interactions, these projects will help you master Selenium.
8 |
9 | ## Project List
10 |
11 | 1. **Automated Form Submission**: Automate form submission on a web page using Selenium.
12 |
13 | 2. **Dynamic Page Loading**: Scrape data from web pages with dynamic content loading via Ajax requests.
14 |
15 | 3. **Crawling and Pagination**: Build a web scraper that navigates through paginated web pages.
16 |
17 | 4. **User Interaction Simulation**: Simulate user interactions, such as mouse clicks and keyboard inputs, for web scraping.
18 |
19 | 5. **Headless Browsing**: Perform headless browsing to scrape data without a visible browser window.
20 |
21 | 6. **Testing Automation**: Create automated tests for web applications using Selenium.
22 |
23 | Each project includes detailed documentation and code explanations to aid your understanding.
24 |
25 | ## Getting Started
26 |
27 | To get started with the projects, clone this repository to your local machine:
28 |
29 | ```bash
30 | git clone https://github.com/pxxthik/advanced-web-scraping.git
31 |
--------------------------------------------------------------------------------
/ajio.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.chrome.service import Service
3 |
4 | import time
5 |
6 | s = Service("C:/Users/Acer/Desktop/chromedriver.exe")
7 | options = webdriver.ChromeOptions()
8 | options.add_experimental_option("detach", True)
9 |
10 | driver = webdriver.Chrome(options=options, service=s)
11 |
12 | driver.get("https://www.ajio.com/men-backpacks/c/830201001")
13 |
14 | old_height = driver.execute_script("return document.body.scrollHeight")
15 |
16 | counter = 1
17 | while True:
18 |
19 | driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
20 | time.sleep(0.5)
21 | counter += 1
22 |
23 | new_height = driver.execute_script("return document.body.scrollHeight")
24 |
25 | print(counter)
26 | print("old height: ", old_height)
27 | print("new height: ", new_height)
28 | print()
29 |
30 | if new_height == old_height:
31 | break
32 | old_height = new_height
33 |
34 |
35 | html = driver.page_source
36 | with open("ajio.html", "w", encoding='utf-8') as f:
37 | f.write(html)
38 |
--------------------------------------------------------------------------------
/campusx.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.chrome.service import Service
3 | from selenium.webdriver.common.by import By
4 | from selenium.webdriver.common.keys import Keys
5 |
6 | import time
7 |
8 | s = Service("C:/Users/Acer/Desktop/chromedriver.exe")
9 | options = webdriver.ChromeOptions()
10 | options.add_experimental_option("detach", True)
11 |
12 | driver = webdriver.Chrome(options=options, service=s)
13 |
14 | driver.get("http://google.com")
15 |
16 | # fetch the input box using xpath
17 | user_input = driver.find_element(by=By.XPATH, value='//*[@id="APjFqb"]')
18 | user_input.send_keys("Campusx")
19 | time.sleep(1)
20 | user_input.send_keys(Keys.ENTER)
21 | time.sleep(1)
22 |
23 | link = driver.find_element(by=By.XPATH, value='//*[@id="rso"]/div[2]/div/div/div/div[1]/div/div/span/a/h3')
24 | link.click()
25 | time.sleep(1)
26 |
27 | link2 = driver.find_element(by=By.XPATH, value='//*[@id="1668425005116"]/span[2]/a')
28 | link2.click()
29 |
--------------------------------------------------------------------------------
/smartprix-smartphones.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "09eea0e5",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "with open(\"smartprix.html\", \"r\", encoding=\"utf-8\") as f:\n",
11 | " html = f.read()"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "id": "c9b72a4b",
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "from bs4 import BeautifulSoup\n",
22 | "import pandas as pd\n",
23 | "import numpy as np"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 3,
29 | "id": "e0d12dc4",
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "soup = BeautifulSoup(html, \"lxml\")"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 4,
39 | "id": "3faf85c9",
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "containers = soup.find_all(\"div\", {\"class\": \"sm-product has-tag has-features has-actions\"})"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 25,
49 | "id": "349ef85b",
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "name = []\n",
54 | "price = []\n",
55 | "spec_score = []\n",
56 | "sim = []\n",
57 | "processor = []\n",
58 | "ram = []\n",
59 | "battery = []\n",
60 | "display = []\n",
61 | "camera = []\n",
62 | "card = []\n",
63 | "os = []\n",
64 | "\n",
65 | "for i in soup.find_all(\"div\", {\"class\": \"sm-product has-tag has-features has-actions\"}):\n",
66 | " \n",
67 | " try:\n",
68 | " name.append(i.find('h2').text)\n",
69 | " except:\n",
70 | " name.append(np.nan)\n",
71 | " \n",
72 | " try:\n",
73 | " price.append(i.find('span', {'class': 'price'}).text)\n",
74 | " except:\n",
75 | " price.append(np.nan)\n",
76 | " \n",
77 | " try:\n",
78 | " spec_score.append(i.find('div', {'class': 'score rank-2-bg'}).find('b').text)\n",
79 | " except:\n",
80 | " spec_score.append(np.nan)\n",
81 | " \n",
82 | " x = i.find('ul', {\"class\": \"sm-feat specs\"}).find_all('li')\n",
83 | " \n",
84 | " try:\n",
85 | " sim.append(x[0])\n",
86 | " except:\n",
87 | " sim.append(np.nan)\n",
88 | " \n",
89 | " try:\n",
90 | " processor.append(x[1])\n",
91 | " except:\n",
92 | " processor.append(np.nan)\n",
93 | " \n",
94 | " try:\n",
95 | " ram.append(x[2])\n",
96 | " except:\n",
97 | " ram.append(np.nan)\n",
98 | " \n",
99 | " try:\n",
100 | " battery.append(x[3])\n",
101 | " except:\n",
102 | " battery.append(np.nan)\n",
103 | " \n",
104 | " try:\n",
105 | " display.append(x[4])\n",
106 | " except:\n",
107 | " display.append(np.nan)\n",
108 | " \n",
109 | " try:\n",
110 | " camera.append(x[5])\n",
111 | " except:\n",
112 | " camera.append(np.nan)\n",
113 | " \n",
114 | " try:\n",
115 | " card.append(x[6])\n",
116 | " except:\n",
117 | " card.append(np.nan)\n",
118 | " \n",
119 | " try:\n",
120 | " os.append(x[7])\n",
121 | " except:\n",
122 | " os.append(np.nan)"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 26,
128 | "id": "778a2325",
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "data": {
133 | "text/html": [
134 | "\n",
135 | "\n",
148 | "
\n",
149 | " \n",
150 | " \n",
151 | " | \n",
152 | " model | \n",
153 | " price | \n",
154 | " score | \n",
155 | " sim | \n",
156 | " processor | \n",
157 | " ram | \n",
158 | " battery | \n",
159 | " display | \n",
160 | " camera | \n",
161 | " card | \n",
162 | " os | \n",
163 | "
\n",
164 | " \n",
165 | " \n",
166 | " \n",
167 | " 0 | \n",
168 | " Motorola Edge 40 Neo | \n",
169 | " ₹20,999 | \n",
170 | " 83 | \n",
171 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
172 | " [Dimensity 7030, Octa Core, 2.5 GHz Processor] | \n",
173 | " [8 GB RAM, 128 GB inbuilt] | \n",
174 | " [5000 mAh Battery with 68W Fast Charging] | \n",
175 | " [6.55 inches, 1080 x 2400 px, 144 Hz Display w... | \n",
176 | " [50 MP + 13 MP Dual Rear & 32 MP Front Camera] | \n",
177 | " [Android v13] | \n",
178 | " [No FM Radio] | \n",
179 | "
\n",
180 | " \n",
181 | " 1 | \n",
182 | " Motorola Edge 40 Neo (12GB RAM + 256GB) | \n",
183 | " ₹22,999 | \n",
184 | " 85 | \n",
185 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
186 | " [Dimensity 7030, Octa Core, 2.5 GHz Processor] | \n",
187 | " [12 GB RAM, 256 GB inbuilt] | \n",
188 | " [5000 mAh Battery with 68W Fast Charging] | \n",
189 | " [6.55 inches, 1080 x 2400 px, 144 Hz Display w... | \n",
190 | " [50 MP + 13 MP Dual Rear & 32 MP Front Camera] | \n",
191 | " [Android v13] | \n",
192 | " [No FM Radio] | \n",
193 | "
\n",
194 | " \n",
195 | " 2 | \n",
196 | " Motorola Moto G54 5G | \n",
197 | " ₹13,999 | \n",
198 | " 85 | \n",
199 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
200 | " [Dimensity 7020, Octa Core, 2.2 GHz Processor] | \n",
201 | " [8 GB RAM, 128 GB inbuilt] | \n",
202 | " [6000 mAh Battery with 33W Fast Charging] | \n",
203 | " [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... | \n",
204 | " [50 MP + 8 MP Dual Rear & 16 MP Front Camera] | \n",
205 | " [Memory Card (Hybrid), upto 1 TB] | \n",
206 | " [Android v13] | \n",
207 | "
\n",
208 | " \n",
209 | " 3 | \n",
210 | " Motorola Moto G54 5G (12GB RAM +256GB) | \n",
211 | " ₹15,999 | \n",
212 | " 87 | \n",
213 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
214 | " [Dimensity 7020, Octa Core, 2.2 GHz Processor] | \n",
215 | " [12 GB RAM, 256 GB inbuilt] | \n",
216 | " [6000 mAh Battery with 33W Fast Charging] | \n",
217 | " [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... | \n",
218 | " [50 MP + 8 MP Dual Rear & 16 MP Front Camera] | \n",
219 | " [Memory Card (Hybrid), upto 1 TB] | \n",
220 | " [Android v13] | \n",
221 | "
\n",
222 | " \n",
223 | " 4 | \n",
224 | " Motorola Edge 40 5G | \n",
225 | " ₹24,999 | \n",
226 | " 87 | \n",
227 | " [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] | \n",
228 | " [Dimensity 8020, Octa Core, 2.6 GHz Processor] | \n",
229 | " [8 GB RAM, 256 GB inbuilt] | \n",
230 | " [4400 mAh Battery with 68W Fast Charging] | \n",
231 | " [6.55 inches, 1080 x 2400 px, 144 Hz Display] | \n",
232 | " [50 MP + 13 MP Dual Rear & 32 MP Front Camera] | \n",
233 | " [Android v13] | \n",
234 | " [Bluetooth] | \n",
235 | "
\n",
236 | " \n",
237 | " ... | \n",
238 | " ... | \n",
239 | " ... | \n",
240 | " ... | \n",
241 | " ... | \n",
242 | " ... | \n",
243 | " ... | \n",
244 | " ... | \n",
245 | " ... | \n",
246 | " ... | \n",
247 | " ... | \n",
248 | " ... | \n",
249 | "
\n",
250 | " \n",
251 | " 1015 | \n",
252 | " Snexian Bold 9K | \n",
253 | " ₹1,199 | \n",
254 | " NaN | \n",
255 | " [Dual Sim] | \n",
256 | " [No 3G] | \n",
257 | " [No Wifi] | \n",
258 | " [1 MHz Processor] | \n",
259 | " [32 MB RAM, 32 MB inbuilt] | \n",
260 | " [2500 mAh Battery] | \n",
261 | " [2.4 inches, 240 x 320 px Display] | \n",
262 | " [1.3 MP Rear & No Front Camera] | \n",
263 | "
\n",
264 | " \n",
265 | " 1016 | \n",
266 | " Nokia C31 | \n",
267 | " ₹7,999 | \n",
268 | " 62 | \n",
269 | " [Dual Sim, 3G, 4G, VoLTE, Wi-Fi] | \n",
270 | " [Unisoc SC9863A, Octa Core, 1.6 GHz Processor] | \n",
271 | " [3 GB RAM, 32 GB inbuilt] | \n",
272 | " [5050 mAh Battery with 10W Fast Charging] | \n",
273 | " [6.74 inches, 720 x 1600 px Display with Water... | \n",
274 | " [13 MP + 2 MP + 2 MP Triple Rear & 5 MP Front ... | \n",
275 | " [Memory Card Supported, upto 256 GB] | \n",
276 | " [Android v12] | \n",
277 | "
\n",
278 | " \n",
279 | " 1017 | \n",
280 | " Micromax S115 | \n",
281 | " ₹910 | \n",
282 | " NaN | \n",
283 | " [Dual Sim] | \n",
284 | " [No 3G] | \n",
285 | " [No Wifi] | \n",
286 | " [32 MB RAM, 32 MB inbuilt] | \n",
287 | " [800 mAh Battery] | \n",
288 | " [1.77 inches, 240 x 320 px Display] | \n",
289 | " [No Rear Camera] | \n",
290 | " [Memory Card Supported] | \n",
291 | "
\n",
292 | " \n",
293 | " 1018 | \n",
294 | " Pear P313 | \n",
295 | " ₹699 | \n",
296 | " NaN | \n",
297 | " [Dual Sim] | \n",
298 | " [32 MB RAM, 32 MB inbuilt] | \n",
299 | " [1100 mAh Battery] | \n",
300 | " [1.8 inches, 240 x 360 px Display] | \n",
301 | " [0.3 MP Rear Camera] | \n",
302 | " [Memory Card Supported] | \n",
303 | " [Bluetooth] | \n",
304 | " NaN | \n",
305 | "
\n",
306 | " \n",
307 | " 1019 | \n",
308 | " Pear P100 | \n",
309 | " ₹799 | \n",
310 | " NaN | \n",
311 | " [Dual Sim] | \n",
312 | " [32 MB RAM, 32 MB inbuilt] | \n",
313 | " [3000 mAh Battery] | \n",
314 | " [1.8 inches, 240 x 360 px Display] | \n",
315 | " [0.3 MP Rear Camera] | \n",
316 | " [Memory Card Supported] | \n",
317 | " [Bluetooth] | \n",
318 | " NaN | \n",
319 | "
\n",
320 | " \n",
321 | "
\n",
322 | "
1020 rows × 11 columns
\n",
323 | "
"
324 | ],
325 | "text/plain": [
326 | " model price score \\\n",
327 | "0 Motorola Edge 40 Neo ₹20,999 83 \n",
328 | "1 Motorola Edge 40 Neo (12GB RAM + 256GB) ₹22,999 85 \n",
329 | "2 Motorola Moto G54 5G ₹13,999 85 \n",
330 | "3 Motorola Moto G54 5G (12GB RAM +256GB) ₹15,999 87 \n",
331 | "4 Motorola Edge 40 5G ₹24,999 87 \n",
332 | "... ... ... ... \n",
333 | "1015 Snexian Bold 9K ₹1,199 NaN \n",
334 | "1016 Nokia C31 ₹7,999 62 \n",
335 | "1017 Micromax S115 ₹910 NaN \n",
336 | "1018 Pear P313 ₹699 NaN \n",
337 | "1019 Pear P100 ₹799 NaN \n",
338 | "\n",
339 | " sim \\\n",
340 | "0 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
341 | "1 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
342 | "2 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
343 | "3 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
344 | "4 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n",
345 | "... ... \n",
346 | "1015 [Dual Sim] \n",
347 | "1016 [Dual Sim, 3G, 4G, VoLTE, Wi-Fi] \n",
348 | "1017 [Dual Sim] \n",
349 | "1018 [Dual Sim] \n",
350 | "1019 [Dual Sim] \n",
351 | "\n",
352 | " processor \\\n",
353 | "0 [Dimensity 7030, Octa Core, 2.5 GHz Processor] \n",
354 | "1 [Dimensity 7030, Octa Core, 2.5 GHz Processor] \n",
355 | "2 [Dimensity 7020, Octa Core, 2.2 GHz Processor] \n",
356 | "3 [Dimensity 7020, Octa Core, 2.2 GHz Processor] \n",
357 | "4 [Dimensity 8020, Octa Core, 2.6 GHz Processor] \n",
358 | "... ... \n",
359 | "1015 [No 3G] \n",
360 | "1016 [Unisoc SC9863A, Octa Core, 1.6 GHz Processor] \n",
361 | "1017 [No 3G] \n",
362 | "1018 [32 MB RAM, 32 MB inbuilt] \n",
363 | "1019 [32 MB RAM, 32 MB inbuilt] \n",
364 | "\n",
365 | " ram battery \\\n",
366 | "0 [8 GB RAM, 128 GB inbuilt] [5000 mAh Battery with 68W Fast Charging] \n",
367 | "1 [12 GB RAM, 256 GB inbuilt] [5000 mAh Battery with 68W Fast Charging] \n",
368 | "2 [8 GB RAM, 128 GB inbuilt] [6000 mAh Battery with 33W Fast Charging] \n",
369 | "3 [12 GB RAM, 256 GB inbuilt] [6000 mAh Battery with 33W Fast Charging] \n",
370 | "4 [8 GB RAM, 256 GB inbuilt] [4400 mAh Battery with 68W Fast Charging] \n",
371 | "... ... ... \n",
372 | "1015 [No Wifi] [1 MHz Processor] \n",
373 | "1016 [3 GB RAM, 32 GB inbuilt] [5050 mAh Battery with 10W Fast Charging] \n",
374 | "1017 [No Wifi] [32 MB RAM, 32 MB inbuilt] \n",
375 | "1018 [1100 mAh Battery] [1.8 inches, 240 x 360 px Display] \n",
376 | "1019 [3000 mAh Battery] [1.8 inches, 240 x 360 px Display] \n",
377 | "\n",
378 | " display \\\n",
379 | "0 [6.55 inches, 1080 x 2400 px, 144 Hz Display w... \n",
380 | "1 [6.55 inches, 1080 x 2400 px, 144 Hz Display w... \n",
381 | "2 [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... \n",
382 | "3 [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... \n",
383 | "4 [6.55 inches, 1080 x 2400 px, 144 Hz Display] \n",
384 | "... ... \n",
385 | "1015 [32 MB RAM, 32 MB inbuilt] \n",
386 | "1016 [6.74 inches, 720 x 1600 px Display with Water... \n",
387 | "1017 [800 mAh Battery] \n",
388 | "1018 [0.3 MP Rear Camera] \n",
389 | "1019 [0.3 MP Rear Camera] \n",
390 | "\n",
391 | " camera \\\n",
392 | "0 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n",
393 | "1 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n",
394 | "2 [50 MP + 8 MP Dual Rear & 16 MP Front Camera] \n",
395 | "3 [50 MP + 8 MP Dual Rear & 16 MP Front Camera] \n",
396 | "4 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n",
397 | "... ... \n",
398 | "1015 [2500 mAh Battery] \n",
399 | "1016 [13 MP + 2 MP + 2 MP Triple Rear & 5 MP Front ... \n",
400 | "1017 [1.77 inches, 240 x 320 px Display] \n",
401 | "1018 [Memory Card Supported] \n",
402 | "1019 [Memory Card Supported] \n",
403 | "\n",
404 | " card os \n",
405 | "0 [Android v13] [No FM Radio] \n",
406 | "1 [Android v13] [No FM Radio] \n",
407 | "2 [Memory Card (Hybrid), upto 1 TB] [Android v13] \n",
408 | "3 [Memory Card (Hybrid), upto 1 TB] [Android v13] \n",
409 | "4 [Android v13] [Bluetooth] \n",
410 | "... ... ... \n",
411 | "1015 [2.4 inches, 240 x 320 px Display] [1.3 MP Rear & No Front Camera] \n",
412 | "1016 [Memory Card Supported, upto 256 GB] [Android v12] \n",
413 | "1017 [No Rear Camera] [Memory Card Supported] \n",
414 | "1018 [Bluetooth] NaN \n",
415 | "1019 [Bluetooth] NaN \n",
416 | "\n",
417 | "[1020 rows x 11 columns]"
418 | ]
419 | },
420 | "execution_count": 26,
421 | "metadata": {},
422 | "output_type": "execute_result"
423 | }
424 | ],
425 | "source": [
426 | "df = pd.DataFrame({\n",
427 | " 'model': name,\n",
428 | " 'price': price,\n",
429 | " 'score': spec_score,\n",
430 | " 'sim': sim,\n",
431 | " 'processor': processor,\n",
432 | " 'ram': ram,\n",
433 | " 'battery': battery,\n",
434 | " 'display': display,\n",
435 | " 'camera': camera,\n",
436 | " 'card': card,\n",
437 | " 'os': os\n",
438 | "})\n",
439 | "\n",
440 | "df"
441 | ]
442 | },
443 | {
444 | "cell_type": "code",
445 | "execution_count": 27,
446 | "id": "cc56c55f",
447 | "metadata": {},
448 | "outputs": [
449 | {
450 | "data": {
451 | "text/plain": [
452 | "model 0\n",
453 | "price 0\n",
454 | "score 280\n",
455 | "sim 0\n",
456 | "processor 0\n",
457 | "ram 0\n",
458 | "battery 0\n",
459 | "display 0\n",
460 | "camera 2\n",
461 | "card 27\n",
462 | "os 90\n",
463 | "dtype: int64"
464 | ]
465 | },
466 | "execution_count": 27,
467 | "metadata": {},
468 | "output_type": "execute_result"
469 | }
470 | ],
471 | "source": [
472 | "df.isnull().sum()"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": 18,
478 | "id": "802f013e",
479 | "metadata": {},
480 | "outputs": [
481 | {
482 | "data": {
483 | "text/plain": [
484 | "1020"
485 | ]
486 | },
487 | "execution_count": 18,
488 | "metadata": {},
489 | "output_type": "execute_result"
490 | }
491 | ],
492 | "source": [
493 | "len(processor)"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": null,
499 | "id": "eaa15c91",
500 | "metadata": {},
501 | "outputs": [],
502 | "source": []
503 | }
504 | ],
505 | "metadata": {
506 | "kernelspec": {
507 | "display_name": "Python 3 (ipykernel)",
508 | "language": "python",
509 | "name": "python3"
510 | },
511 | "language_info": {
512 | "codemirror_mode": {
513 | "name": "ipython",
514 | "version": 3
515 | },
516 | "file_extension": ".py",
517 | "mimetype": "text/x-python",
518 | "name": "python",
519 | "nbconvert_exporter": "python",
520 | "pygments_lexer": "ipython3",
521 | "version": "3.10.7"
522 | }
523 | },
524 | "nbformat": 4,
525 | "nbformat_minor": 5
526 | }
527 |
--------------------------------------------------------------------------------
/smartprix.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from selenium import webdriver
4 | from selenium.webdriver.chrome.service import Service
5 | from selenium.webdriver.common.by import By
6 |
7 | s = Service("C:/Users/Acer/Desktop/chromedriver.exe")
8 | options = webdriver.ChromeOptions()
9 | options.add_experimental_option("detach", True)
10 |
11 | driver = webdriver.Chrome(options=options, service=s)
12 |
13 | driver.get("https://www.smartprix.com/mobiles")
14 | time.sleep(1)
15 |
16 | driver.find_element(by=By.XPATH, value='//*[@id="app"]/main/aside/div/div[5]/div[2]/label[1]/input').click()
17 |
18 | driver.find_element(by=By.XPATH, value='//*[@id="app"]/main/aside/div/div[5]/div[2]/label[2]/input').click()
19 | time.sleep(2)
20 |
21 | old_height = driver.execute_script("return document.body.scrollHeight")
22 |
23 | counter = 1
24 | while True:
25 |
26 | driver.find_element(by=By.XPATH, value='//*[@id="app"]/main/div[1]/div[2]/div[3]').click()
27 | counter += 1
28 | time.sleep(1)
29 |
30 | new_height = driver.execute_script("return document.body.scrollHeight")
31 |
32 | print(counter)
33 | print("old height: ", old_height)
34 | print("new height: ", new_height)
35 |
36 | if new_height == old_height:
37 | break
38 |
39 | old_height = new_height
40 |
41 |
42 | html = driver.page_source
43 | with open("smartprix.html", "w", encoding='utf-8') as f:
44 | f.write(html)
45 |
--------------------------------------------------------------------------------