├── .gitignore ├── .idea ├── .gitignore ├── advanced-web-scraping.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── .ipynb_checkpoints └── Untitled-checkpoint.ipynb ├── README.md ├── ajio.html ├── ajio.py ├── campusx.py ├── smartprix-smartphones.ipynb ├── smartprix.html └── smartprix.py /.gitignore: -------------------------------------------------------------------------------- 1 | venv -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/advanced-web-scraping.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "09eea0e5", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "with open(\"smartprix.html\", \"r\", encoding=\"utf-8\") as f:\n", 11 | " html = f.read()" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "id": "c9b72a4b", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from bs4 import BeautifulSoup\n", 22 | "import pandas as pd\n", 23 | "import numpy as np" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "id": "e0d12dc4", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "soup = BeautifulSoup(html, \"lxml\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "id": "3faf85c9", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "containers = soup.find_all(\"div\", {\"class\": \"sm-product has-tag has-features has-actions\"})" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 25, 49 | "id": "349ef85b", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "name = []\n", 54 | "price = []\n", 55 | "spec_score = []\n", 56 | "sim = []\n", 57 | "processor = []\n", 58 | "ram = []\n", 59 | "battery = []\n", 60 | "display = []\n", 61 | "camera = []\n", 62 | "card = []\n", 63 | "os = []\n", 64 | "\n", 65 | "for i in soup.find_all(\"div\", {\"class\": \"sm-product has-tag has-features has-actions\"}):\n", 66 | " \n", 67 | " try:\n", 68 | " name.append(i.find('h2').text)\n", 69 | " except:\n", 70 | " name.append(np.nan)\n", 71 | " \n", 72 | " try:\n", 73 | " price.append(i.find('span', {'class': 'price'}).text)\n", 74 | " except:\n", 75 | " price.append(np.nan)\n", 76 | " \n", 77 | " try:\n", 78 | " spec_score.append(i.find('div', {'class': 'score rank-2-bg'}).find('b').text)\n", 79 | " except:\n", 80 | " spec_score.append(np.nan)\n", 81 | " \n", 82 | " x = i.find('ul', {\"class\": \"sm-feat specs\"}).find_all('li')\n", 83 | " \n", 84 | " try:\n", 85 | " sim.append(x[0])\n", 86 | " except:\n", 87 | " sim.append(np.nan)\n", 88 | " \n", 89 | " try:\n", 90 | " processor.append(x[1])\n", 91 | " except:\n", 92 | " processor.append(np.nan)\n", 93 | " \n", 94 | " try:\n", 95 | " ram.append(x[2])\n", 96 | " except:\n", 97 | " ram.append(np.nan)\n", 98 | " \n", 99 | " try:\n", 100 | " battery.append(x[3])\n", 101 | " except:\n", 102 | " battery.append(np.nan)\n", 103 | " \n", 104 | " try:\n", 105 | " display.append(x[4])\n", 106 | " except:\n", 107 | " display.append(np.nan)\n", 108 | " \n", 109 | " try:\n", 110 | " camera.append(x[5])\n", 111 | " except:\n", 112 | " camera.append(np.nan)\n", 113 | " \n", 114 | " try:\n", 115 | " card.append(x[6])\n", 116 | " except:\n", 117 | " card.append(np.nan)\n", 118 | " \n", 119 | " try:\n", 120 | " os.append(x[7])\n", 121 | " except:\n", 122 | " os.append(np.nan)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 26, 128 | "id": "778a2325", 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/html": [ 134 | "
\n", 135 | "\n", 148 | "\n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | "
modelpricescoresimprocessorrambatterydisplaycameracardos
0Motorola Edge 40 Neo₹20,99983[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 7030, Octa Core, 2.5 GHz Processor][8 GB RAM, 128 GB inbuilt][5000 mAh Battery with 68W Fast Charging][6.55 inches, 1080 x 2400 px, 144 Hz Display w...[50 MP + 13 MP Dual Rear & 32 MP Front Camera][Android v13][No FM Radio]
1Motorola Edge 40 Neo (12GB RAM + 256GB)₹22,99985[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 7030, Octa Core, 2.5 GHz Processor][12 GB RAM, 256 GB inbuilt][5000 mAh Battery with 68W Fast Charging][6.55 inches, 1080 x 2400 px, 144 Hz Display w...[50 MP + 13 MP Dual Rear & 32 MP Front Camera][Android v13][No FM Radio]
2Motorola Moto G54 5G₹13,99985[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 7020, Octa Core, 2.2 GHz Processor][8 GB RAM, 128 GB inbuilt][6000 mAh Battery with 33W Fast Charging][6.5 inches, 1080 x 2400 px, 120 Hz Display wi...[50 MP + 8 MP Dual Rear & 16 MP Front Camera][Memory Card (Hybrid), upto 1 TB][Android v13]
3Motorola Moto G54 5G (12GB RAM +256GB)₹15,99987[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 7020, Octa Core, 2.2 GHz Processor][12 GB RAM, 256 GB inbuilt][6000 mAh Battery with 33W Fast Charging][6.5 inches, 1080 x 2400 px, 120 Hz Display wi...[50 MP + 8 MP Dual Rear & 16 MP Front Camera][Memory Card (Hybrid), upto 1 TB][Android v13]
4Motorola Edge 40 5G₹24,99987[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 8020, Octa Core, 2.6 GHz Processor][8 GB RAM, 256 GB inbuilt][4400 mAh Battery with 68W Fast Charging][6.55 inches, 1080 x 2400 px, 144 Hz Display][50 MP + 13 MP Dual Rear & 32 MP Front Camera][Android v13][Bluetooth]
....................................
1015Snexian Bold 9K₹1,199NaN[Dual Sim][No 3G][No Wifi][1 MHz Processor][32 MB RAM, 32 MB inbuilt][2500 mAh Battery][2.4 inches, 240 x 320 px Display][1.3 MP Rear & No Front Camera]
1016Nokia C31₹7,99962[Dual Sim, 3G, 4G, VoLTE, Wi-Fi][Unisoc SC9863A, Octa Core, 1.6 GHz Processor][3 GB RAM, 32 GB inbuilt][5050 mAh Battery with 10W Fast Charging][6.74 inches, 720 x 1600 px Display with Water...[13 MP + 2 MP + 2 MP Triple Rear & 5 MP Front ...[Memory Card Supported, upto 256 GB][Android v12]
1017Micromax S115₹910NaN[Dual Sim][No 3G][No Wifi][32 MB RAM, 32 MB inbuilt][800 mAh Battery][1.77 inches, 240 x 320 px Display][No Rear Camera][Memory Card Supported]
1018Pear P313₹699NaN[Dual Sim][32 MB RAM, 32 MB inbuilt][1100 mAh Battery][1.8 inches, 240 x 360 px Display][0.3 MP Rear Camera][Memory Card Supported][Bluetooth]NaN
1019Pear P100₹799NaN[Dual Sim][32 MB RAM, 32 MB inbuilt][3000 mAh Battery][1.8 inches, 240 x 360 px Display][0.3 MP Rear Camera][Memory Card Supported][Bluetooth]NaN
\n", 322 | "

1020 rows × 11 columns

\n", 323 | "
" 324 | ], 325 | "text/plain": [ 326 | " model price score \\\n", 327 | "0 Motorola Edge 40 Neo ₹20,999 83 \n", 328 | "1 Motorola Edge 40 Neo (12GB RAM + 256GB) ₹22,999 85 \n", 329 | "2 Motorola Moto G54 5G ₹13,999 85 \n", 330 | "3 Motorola Moto G54 5G (12GB RAM +256GB) ₹15,999 87 \n", 331 | "4 Motorola Edge 40 5G ₹24,999 87 \n", 332 | "... ... ... ... \n", 333 | "1015 Snexian Bold 9K ₹1,199 NaN \n", 334 | "1016 Nokia C31 ₹7,999 62 \n", 335 | "1017 Micromax S115 ₹910 NaN \n", 336 | "1018 Pear P313 ₹699 NaN \n", 337 | "1019 Pear P100 ₹799 NaN \n", 338 | "\n", 339 | " sim \\\n", 340 | "0 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 341 | "1 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 342 | "2 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 343 | "3 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 344 | "4 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 345 | "... ... \n", 346 | "1015 [Dual Sim] \n", 347 | "1016 [Dual Sim, 3G, 4G, VoLTE, Wi-Fi] \n", 348 | "1017 [Dual Sim] \n", 349 | "1018 [Dual Sim] \n", 350 | "1019 [Dual Sim] \n", 351 | "\n", 352 | " processor \\\n", 353 | "0 [Dimensity 7030, Octa Core, 2.5 GHz Processor] \n", 354 | "1 [Dimensity 7030, Octa Core, 2.5 GHz Processor] \n", 355 | "2 [Dimensity 7020, Octa Core, 2.2 GHz Processor] \n", 356 | "3 [Dimensity 7020, Octa Core, 2.2 GHz Processor] \n", 357 | "4 [Dimensity 8020, Octa Core, 2.6 GHz Processor] \n", 358 | "... ... \n", 359 | "1015 [No 3G] \n", 360 | "1016 [Unisoc SC9863A, Octa Core, 1.6 GHz Processor] \n", 361 | "1017 [No 3G] \n", 362 | "1018 [32 MB RAM, 32 MB inbuilt] \n", 363 | "1019 [32 MB RAM, 32 MB inbuilt] \n", 364 | "\n", 365 | " ram battery \\\n", 366 | "0 [8 GB RAM, 128 GB inbuilt] [5000 mAh Battery with 68W Fast Charging] \n", 367 | "1 [12 GB RAM, 256 GB inbuilt] [5000 mAh Battery with 68W Fast Charging] \n", 368 | "2 [8 GB RAM, 128 GB inbuilt] [6000 mAh Battery with 33W Fast Charging] \n", 369 | "3 [12 GB RAM, 256 GB inbuilt] [6000 mAh Battery with 33W Fast Charging] \n", 370 | "4 [8 GB RAM, 256 GB inbuilt] [4400 mAh Battery with 68W Fast Charging] \n", 371 | "... ... ... \n", 372 | "1015 [No Wifi] [1 MHz Processor] \n", 373 | "1016 [3 GB RAM, 32 GB inbuilt] [5050 mAh Battery with 10W Fast Charging] \n", 374 | "1017 [No Wifi] [32 MB RAM, 32 MB inbuilt] \n", 375 | "1018 [1100 mAh Battery] [1.8 inches, 240 x 360 px Display] \n", 376 | "1019 [3000 mAh Battery] [1.8 inches, 240 x 360 px Display] \n", 377 | "\n", 378 | " display \\\n", 379 | "0 [6.55 inches, 1080 x 2400 px, 144 Hz Display w... \n", 380 | "1 [6.55 inches, 1080 x 2400 px, 144 Hz Display w... \n", 381 | "2 [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... \n", 382 | "3 [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... \n", 383 | "4 [6.55 inches, 1080 x 2400 px, 144 Hz Display] \n", 384 | "... ... \n", 385 | "1015 [32 MB RAM, 32 MB inbuilt] \n", 386 | "1016 [6.74 inches, 720 x 1600 px Display with Water... \n", 387 | "1017 [800 mAh Battery] \n", 388 | "1018 [0.3 MP Rear Camera] \n", 389 | "1019 [0.3 MP Rear Camera] \n", 390 | "\n", 391 | " camera \\\n", 392 | "0 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n", 393 | "1 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n", 394 | "2 [50 MP + 8 MP Dual Rear & 16 MP Front Camera] \n", 395 | "3 [50 MP + 8 MP Dual Rear & 16 MP Front Camera] \n", 396 | "4 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n", 397 | "... ... \n", 398 | "1015 [2500 mAh Battery] \n", 399 | "1016 [13 MP + 2 MP + 2 MP Triple Rear & 5 MP Front ... \n", 400 | "1017 [1.77 inches, 240 x 320 px Display] \n", 401 | "1018 [Memory Card Supported] \n", 402 | "1019 [Memory Card Supported] \n", 403 | "\n", 404 | " card os \n", 405 | "0 [Android v13] [No FM Radio] \n", 406 | "1 [Android v13] [No FM Radio] \n", 407 | "2 [Memory Card (Hybrid), upto 1 TB] [Android v13] \n", 408 | "3 [Memory Card (Hybrid), upto 1 TB] [Android v13] \n", 409 | "4 [Android v13] [Bluetooth] \n", 410 | "... ... ... \n", 411 | "1015 [2.4 inches, 240 x 320 px Display] [1.3 MP Rear & No Front Camera] \n", 412 | "1016 [Memory Card Supported, upto 256 GB] [Android v12] \n", 413 | "1017 [No Rear Camera] [Memory Card Supported] \n", 414 | "1018 [Bluetooth] NaN \n", 415 | "1019 [Bluetooth] NaN \n", 416 | "\n", 417 | "[1020 rows x 11 columns]" 418 | ] 419 | }, 420 | "execution_count": 26, 421 | "metadata": {}, 422 | "output_type": "execute_result" 423 | } 424 | ], 425 | "source": [ 426 | "df = pd.DataFrame({\n", 427 | " 'model': name,\n", 428 | " 'price': price,\n", 429 | " 'score': spec_score,\n", 430 | " 'sim': sim,\n", 431 | " 'processor': processor,\n", 432 | " 'ram': ram,\n", 433 | " 'battery': battery,\n", 434 | " 'display': display,\n", 435 | " 'camera': camera,\n", 436 | " 'card': card,\n", 437 | " 'os': os\n", 438 | "})\n", 439 | "\n", 440 | "df" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": 27, 446 | "id": "cc56c55f", 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/plain": [ 452 | "model 0\n", 453 | "price 0\n", 454 | "score 280\n", 455 | "sim 0\n", 456 | "processor 0\n", 457 | "ram 0\n", 458 | "battery 0\n", 459 | "display 0\n", 460 | "camera 2\n", 461 | "card 27\n", 462 | "os 90\n", 463 | "dtype: int64" 464 | ] 465 | }, 466 | "execution_count": 27, 467 | "metadata": {}, 468 | "output_type": "execute_result" 469 | } 470 | ], 471 | "source": [ 472 | "df.isnull().sum()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 18, 478 | "id": "802f013e", 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/plain": [ 484 | "1020" 485 | ] 486 | }, 487 | "execution_count": 18, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "len(processor)" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "id": "eaa15c91", 500 | "metadata": {}, 501 | "outputs": [], 502 | "source": [] 503 | } 504 | ], 505 | "metadata": { 506 | "kernelspec": { 507 | "display_name": "Python 3 (ipykernel)", 508 | "language": "python", 509 | "name": "python3" 510 | }, 511 | "language_info": { 512 | "codemirror_mode": { 513 | "name": "ipython", 514 | "version": 3 515 | }, 516 | "file_extension": ".py", 517 | "mimetype": "text/x-python", 518 | "name": "python", 519 | "nbconvert_exporter": "python", 520 | "pygments_lexer": "ipython3", 521 | "version": "3.10.7" 522 | } 523 | }, 524 | "nbformat": 4, 525 | "nbformat_minor": 5 526 | } 527 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Advanced Web Scraping 2 | 3 | This repository contains advanced web scraping projects using Selenium, a powerful web automation and scraping tool. Selenium enables you to automate interactions with websites and extract data from web pages, making it an essential tool for advanced web scraping. 4 | 5 | ## Overview 6 | 7 | Web scraping with Selenium is a versatile solution for extracting data from dynamic and interactive websites. This repository hosts projects that demonstrate advanced web scraping techniques using Selenium. Whether you need to scrape data from a JavaScript-heavy website, perform automated testing, or simulate user interactions, these projects will help you master Selenium. 8 | 9 | ## Project List 10 | 11 | 1. **Automated Form Submission**: Automate form submission on a web page using Selenium. 12 | 13 | 2. **Dynamic Page Loading**: Scrape data from web pages with dynamic content loading via Ajax requests. 14 | 15 | 3. **Crawling and Pagination**: Build a web scraper that navigates through paginated web pages. 16 | 17 | 4. **User Interaction Simulation**: Simulate user interactions, such as mouse clicks and keyboard inputs, for web scraping. 18 | 19 | 5. **Headless Browsing**: Perform headless browsing to scrape data without a visible browser window. 20 | 21 | 6. **Testing Automation**: Create automated tests for web applications using Selenium. 22 | 23 | Each project includes detailed documentation and code explanations to aid your understanding. 24 | 25 | ## Getting Started 26 | 27 | To get started with the projects, clone this repository to your local machine: 28 | 29 | ```bash 30 | git clone https://github.com/pxxthik/advanced-web-scraping.git 31 | -------------------------------------------------------------------------------- /ajio.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.chrome.service import Service 3 | 4 | import time 5 | 6 | s = Service("C:/Users/Acer/Desktop/chromedriver.exe") 7 | options = webdriver.ChromeOptions() 8 | options.add_experimental_option("detach", True) 9 | 10 | driver = webdriver.Chrome(options=options, service=s) 11 | 12 | driver.get("https://www.ajio.com/men-backpacks/c/830201001") 13 | 14 | old_height = driver.execute_script("return document.body.scrollHeight") 15 | 16 | counter = 1 17 | while True: 18 | 19 | driver.execute_script("window.scrollTo(0, document.body.scrollHeight)") 20 | time.sleep(0.5) 21 | counter += 1 22 | 23 | new_height = driver.execute_script("return document.body.scrollHeight") 24 | 25 | print(counter) 26 | print("old height: ", old_height) 27 | print("new height: ", new_height) 28 | print() 29 | 30 | if new_height == old_height: 31 | break 32 | old_height = new_height 33 | 34 | 35 | html = driver.page_source 36 | with open("ajio.html", "w", encoding='utf-8') as f: 37 | f.write(html) 38 | -------------------------------------------------------------------------------- /campusx.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.chrome.service import Service 3 | from selenium.webdriver.common.by import By 4 | from selenium.webdriver.common.keys import Keys 5 | 6 | import time 7 | 8 | s = Service("C:/Users/Acer/Desktop/chromedriver.exe") 9 | options = webdriver.ChromeOptions() 10 | options.add_experimental_option("detach", True) 11 | 12 | driver = webdriver.Chrome(options=options, service=s) 13 | 14 | driver.get("http://google.com") 15 | 16 | # fetch the input box using xpath 17 | user_input = driver.find_element(by=By.XPATH, value='//*[@id="APjFqb"]') 18 | user_input.send_keys("Campusx") 19 | time.sleep(1) 20 | user_input.send_keys(Keys.ENTER) 21 | time.sleep(1) 22 | 23 | link = driver.find_element(by=By.XPATH, value='//*[@id="rso"]/div[2]/div/div/div/div[1]/div/div/span/a/h3') 24 | link.click() 25 | time.sleep(1) 26 | 27 | link2 = driver.find_element(by=By.XPATH, value='//*[@id="1668425005116"]/span[2]/a') 28 | link2.click() 29 | -------------------------------------------------------------------------------- /smartprix-smartphones.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "09eea0e5", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "with open(\"smartprix.html\", \"r\", encoding=\"utf-8\") as f:\n", 11 | " html = f.read()" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "id": "c9b72a4b", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from bs4 import BeautifulSoup\n", 22 | "import pandas as pd\n", 23 | "import numpy as np" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "id": "e0d12dc4", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "soup = BeautifulSoup(html, \"lxml\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 4, 39 | "id": "3faf85c9", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "containers = soup.find_all(\"div\", {\"class\": \"sm-product has-tag has-features has-actions\"})" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 25, 49 | "id": "349ef85b", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "name = []\n", 54 | "price = []\n", 55 | "spec_score = []\n", 56 | "sim = []\n", 57 | "processor = []\n", 58 | "ram = []\n", 59 | "battery = []\n", 60 | "display = []\n", 61 | "camera = []\n", 62 | "card = []\n", 63 | "os = []\n", 64 | "\n", 65 | "for i in soup.find_all(\"div\", {\"class\": \"sm-product has-tag has-features has-actions\"}):\n", 66 | " \n", 67 | " try:\n", 68 | " name.append(i.find('h2').text)\n", 69 | " except:\n", 70 | " name.append(np.nan)\n", 71 | " \n", 72 | " try:\n", 73 | " price.append(i.find('span', {'class': 'price'}).text)\n", 74 | " except:\n", 75 | " price.append(np.nan)\n", 76 | " \n", 77 | " try:\n", 78 | " spec_score.append(i.find('div', {'class': 'score rank-2-bg'}).find('b').text)\n", 79 | " except:\n", 80 | " spec_score.append(np.nan)\n", 81 | " \n", 82 | " x = i.find('ul', {\"class\": \"sm-feat specs\"}).find_all('li')\n", 83 | " \n", 84 | " try:\n", 85 | " sim.append(x[0])\n", 86 | " except:\n", 87 | " sim.append(np.nan)\n", 88 | " \n", 89 | " try:\n", 90 | " processor.append(x[1])\n", 91 | " except:\n", 92 | " processor.append(np.nan)\n", 93 | " \n", 94 | " try:\n", 95 | " ram.append(x[2])\n", 96 | " except:\n", 97 | " ram.append(np.nan)\n", 98 | " \n", 99 | " try:\n", 100 | " battery.append(x[3])\n", 101 | " except:\n", 102 | " battery.append(np.nan)\n", 103 | " \n", 104 | " try:\n", 105 | " display.append(x[4])\n", 106 | " except:\n", 107 | " display.append(np.nan)\n", 108 | " \n", 109 | " try:\n", 110 | " camera.append(x[5])\n", 111 | " except:\n", 112 | " camera.append(np.nan)\n", 113 | " \n", 114 | " try:\n", 115 | " card.append(x[6])\n", 116 | " except:\n", 117 | " card.append(np.nan)\n", 118 | " \n", 119 | " try:\n", 120 | " os.append(x[7])\n", 121 | " except:\n", 122 | " os.append(np.nan)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 26, 128 | "id": "778a2325", 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/html": [ 134 | "
\n", 135 | "\n", 148 | "\n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | "
modelpricescoresimprocessorrambatterydisplaycameracardos
0Motorola Edge 40 Neo₹20,99983[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 7030, Octa Core, 2.5 GHz Processor][8 GB RAM, 128 GB inbuilt][5000 mAh Battery with 68W Fast Charging][6.55 inches, 1080 x 2400 px, 144 Hz Display w...[50 MP + 13 MP Dual Rear & 32 MP Front Camera][Android v13][No FM Radio]
1Motorola Edge 40 Neo (12GB RAM + 256GB)₹22,99985[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 7030, Octa Core, 2.5 GHz Processor][12 GB RAM, 256 GB inbuilt][5000 mAh Battery with 68W Fast Charging][6.55 inches, 1080 x 2400 px, 144 Hz Display w...[50 MP + 13 MP Dual Rear & 32 MP Front Camera][Android v13][No FM Radio]
2Motorola Moto G54 5G₹13,99985[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 7020, Octa Core, 2.2 GHz Processor][8 GB RAM, 128 GB inbuilt][6000 mAh Battery with 33W Fast Charging][6.5 inches, 1080 x 2400 px, 120 Hz Display wi...[50 MP + 8 MP Dual Rear & 16 MP Front Camera][Memory Card (Hybrid), upto 1 TB][Android v13]
3Motorola Moto G54 5G (12GB RAM +256GB)₹15,99987[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 7020, Octa Core, 2.2 GHz Processor][12 GB RAM, 256 GB inbuilt][6000 mAh Battery with 33W Fast Charging][6.5 inches, 1080 x 2400 px, 120 Hz Display wi...[50 MP + 8 MP Dual Rear & 16 MP Front Camera][Memory Card (Hybrid), upto 1 TB][Android v13]
4Motorola Edge 40 5G₹24,99987[Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC][Dimensity 8020, Octa Core, 2.6 GHz Processor][8 GB RAM, 256 GB inbuilt][4400 mAh Battery with 68W Fast Charging][6.55 inches, 1080 x 2400 px, 144 Hz Display][50 MP + 13 MP Dual Rear & 32 MP Front Camera][Android v13][Bluetooth]
....................................
1015Snexian Bold 9K₹1,199NaN[Dual Sim][No 3G][No Wifi][1 MHz Processor][32 MB RAM, 32 MB inbuilt][2500 mAh Battery][2.4 inches, 240 x 320 px Display][1.3 MP Rear & No Front Camera]
1016Nokia C31₹7,99962[Dual Sim, 3G, 4G, VoLTE, Wi-Fi][Unisoc SC9863A, Octa Core, 1.6 GHz Processor][3 GB RAM, 32 GB inbuilt][5050 mAh Battery with 10W Fast Charging][6.74 inches, 720 x 1600 px Display with Water...[13 MP + 2 MP + 2 MP Triple Rear & 5 MP Front ...[Memory Card Supported, upto 256 GB][Android v12]
1017Micromax S115₹910NaN[Dual Sim][No 3G][No Wifi][32 MB RAM, 32 MB inbuilt][800 mAh Battery][1.77 inches, 240 x 320 px Display][No Rear Camera][Memory Card Supported]
1018Pear P313₹699NaN[Dual Sim][32 MB RAM, 32 MB inbuilt][1100 mAh Battery][1.8 inches, 240 x 360 px Display][0.3 MP Rear Camera][Memory Card Supported][Bluetooth]NaN
1019Pear P100₹799NaN[Dual Sim][32 MB RAM, 32 MB inbuilt][3000 mAh Battery][1.8 inches, 240 x 360 px Display][0.3 MP Rear Camera][Memory Card Supported][Bluetooth]NaN
\n", 322 | "

1020 rows × 11 columns

\n", 323 | "
" 324 | ], 325 | "text/plain": [ 326 | " model price score \\\n", 327 | "0 Motorola Edge 40 Neo ₹20,999 83 \n", 328 | "1 Motorola Edge 40 Neo (12GB RAM + 256GB) ₹22,999 85 \n", 329 | "2 Motorola Moto G54 5G ₹13,999 85 \n", 330 | "3 Motorola Moto G54 5G (12GB RAM +256GB) ₹15,999 87 \n", 331 | "4 Motorola Edge 40 5G ₹24,999 87 \n", 332 | "... ... ... ... \n", 333 | "1015 Snexian Bold 9K ₹1,199 NaN \n", 334 | "1016 Nokia C31 ₹7,999 62 \n", 335 | "1017 Micromax S115 ₹910 NaN \n", 336 | "1018 Pear P313 ₹699 NaN \n", 337 | "1019 Pear P100 ₹799 NaN \n", 338 | "\n", 339 | " sim \\\n", 340 | "0 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 341 | "1 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 342 | "2 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 343 | "3 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 344 | "4 [Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC] \n", 345 | "... ... \n", 346 | "1015 [Dual Sim] \n", 347 | "1016 [Dual Sim, 3G, 4G, VoLTE, Wi-Fi] \n", 348 | "1017 [Dual Sim] \n", 349 | "1018 [Dual Sim] \n", 350 | "1019 [Dual Sim] \n", 351 | "\n", 352 | " processor \\\n", 353 | "0 [Dimensity 7030, Octa Core, 2.5 GHz Processor] \n", 354 | "1 [Dimensity 7030, Octa Core, 2.5 GHz Processor] \n", 355 | "2 [Dimensity 7020, Octa Core, 2.2 GHz Processor] \n", 356 | "3 [Dimensity 7020, Octa Core, 2.2 GHz Processor] \n", 357 | "4 [Dimensity 8020, Octa Core, 2.6 GHz Processor] \n", 358 | "... ... \n", 359 | "1015 [No 3G] \n", 360 | "1016 [Unisoc SC9863A, Octa Core, 1.6 GHz Processor] \n", 361 | "1017 [No 3G] \n", 362 | "1018 [32 MB RAM, 32 MB inbuilt] \n", 363 | "1019 [32 MB RAM, 32 MB inbuilt] \n", 364 | "\n", 365 | " ram battery \\\n", 366 | "0 [8 GB RAM, 128 GB inbuilt] [5000 mAh Battery with 68W Fast Charging] \n", 367 | "1 [12 GB RAM, 256 GB inbuilt] [5000 mAh Battery with 68W Fast Charging] \n", 368 | "2 [8 GB RAM, 128 GB inbuilt] [6000 mAh Battery with 33W Fast Charging] \n", 369 | "3 [12 GB RAM, 256 GB inbuilt] [6000 mAh Battery with 33W Fast Charging] \n", 370 | "4 [8 GB RAM, 256 GB inbuilt] [4400 mAh Battery with 68W Fast Charging] \n", 371 | "... ... ... \n", 372 | "1015 [No Wifi] [1 MHz Processor] \n", 373 | "1016 [3 GB RAM, 32 GB inbuilt] [5050 mAh Battery with 10W Fast Charging] \n", 374 | "1017 [No Wifi] [32 MB RAM, 32 MB inbuilt] \n", 375 | "1018 [1100 mAh Battery] [1.8 inches, 240 x 360 px Display] \n", 376 | "1019 [3000 mAh Battery] [1.8 inches, 240 x 360 px Display] \n", 377 | "\n", 378 | " display \\\n", 379 | "0 [6.55 inches, 1080 x 2400 px, 144 Hz Display w... \n", 380 | "1 [6.55 inches, 1080 x 2400 px, 144 Hz Display w... \n", 381 | "2 [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... \n", 382 | "3 [6.5 inches, 1080 x 2400 px, 120 Hz Display wi... \n", 383 | "4 [6.55 inches, 1080 x 2400 px, 144 Hz Display] \n", 384 | "... ... \n", 385 | "1015 [32 MB RAM, 32 MB inbuilt] \n", 386 | "1016 [6.74 inches, 720 x 1600 px Display with Water... \n", 387 | "1017 [800 mAh Battery] \n", 388 | "1018 [0.3 MP Rear Camera] \n", 389 | "1019 [0.3 MP Rear Camera] \n", 390 | "\n", 391 | " camera \\\n", 392 | "0 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n", 393 | "1 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n", 394 | "2 [50 MP + 8 MP Dual Rear & 16 MP Front Camera] \n", 395 | "3 [50 MP + 8 MP Dual Rear & 16 MP Front Camera] \n", 396 | "4 [50 MP + 13 MP Dual Rear & 32 MP Front Camera] \n", 397 | "... ... \n", 398 | "1015 [2500 mAh Battery] \n", 399 | "1016 [13 MP + 2 MP + 2 MP Triple Rear & 5 MP Front ... \n", 400 | "1017 [1.77 inches, 240 x 320 px Display] \n", 401 | "1018 [Memory Card Supported] \n", 402 | "1019 [Memory Card Supported] \n", 403 | "\n", 404 | " card os \n", 405 | "0 [Android v13] [No FM Radio] \n", 406 | "1 [Android v13] [No FM Radio] \n", 407 | "2 [Memory Card (Hybrid), upto 1 TB] [Android v13] \n", 408 | "3 [Memory Card (Hybrid), upto 1 TB] [Android v13] \n", 409 | "4 [Android v13] [Bluetooth] \n", 410 | "... ... ... \n", 411 | "1015 [2.4 inches, 240 x 320 px Display] [1.3 MP Rear & No Front Camera] \n", 412 | "1016 [Memory Card Supported, upto 256 GB] [Android v12] \n", 413 | "1017 [No Rear Camera] [Memory Card Supported] \n", 414 | "1018 [Bluetooth] NaN \n", 415 | "1019 [Bluetooth] NaN \n", 416 | "\n", 417 | "[1020 rows x 11 columns]" 418 | ] 419 | }, 420 | "execution_count": 26, 421 | "metadata": {}, 422 | "output_type": "execute_result" 423 | } 424 | ], 425 | "source": [ 426 | "df = pd.DataFrame({\n", 427 | " 'model': name,\n", 428 | " 'price': price,\n", 429 | " 'score': spec_score,\n", 430 | " 'sim': sim,\n", 431 | " 'processor': processor,\n", 432 | " 'ram': ram,\n", 433 | " 'battery': battery,\n", 434 | " 'display': display,\n", 435 | " 'camera': camera,\n", 436 | " 'card': card,\n", 437 | " 'os': os\n", 438 | "})\n", 439 | "\n", 440 | "df" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": 27, 446 | "id": "cc56c55f", 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "text/plain": [ 452 | "model 0\n", 453 | "price 0\n", 454 | "score 280\n", 455 | "sim 0\n", 456 | "processor 0\n", 457 | "ram 0\n", 458 | "battery 0\n", 459 | "display 0\n", 460 | "camera 2\n", 461 | "card 27\n", 462 | "os 90\n", 463 | "dtype: int64" 464 | ] 465 | }, 466 | "execution_count": 27, 467 | "metadata": {}, 468 | "output_type": "execute_result" 469 | } 470 | ], 471 | "source": [ 472 | "df.isnull().sum()" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 18, 478 | "id": "802f013e", 479 | "metadata": {}, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/plain": [ 484 | "1020" 485 | ] 486 | }, 487 | "execution_count": 18, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "len(processor)" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": null, 499 | "id": "eaa15c91", 500 | "metadata": {}, 501 | "outputs": [], 502 | "source": [] 503 | } 504 | ], 505 | "metadata": { 506 | "kernelspec": { 507 | "display_name": "Python 3 (ipykernel)", 508 | "language": "python", 509 | "name": "python3" 510 | }, 511 | "language_info": { 512 | "codemirror_mode": { 513 | "name": "ipython", 514 | "version": 3 515 | }, 516 | "file_extension": ".py", 517 | "mimetype": "text/x-python", 518 | "name": "python", 519 | "nbconvert_exporter": "python", 520 | "pygments_lexer": "ipython3", 521 | "version": "3.10.7" 522 | } 523 | }, 524 | "nbformat": 4, 525 | "nbformat_minor": 5 526 | } 527 | -------------------------------------------------------------------------------- /smartprix.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from selenium import webdriver 4 | from selenium.webdriver.chrome.service import Service 5 | from selenium.webdriver.common.by import By 6 | 7 | s = Service("C:/Users/Acer/Desktop/chromedriver.exe") 8 | options = webdriver.ChromeOptions() 9 | options.add_experimental_option("detach", True) 10 | 11 | driver = webdriver.Chrome(options=options, service=s) 12 | 13 | driver.get("https://www.smartprix.com/mobiles") 14 | time.sleep(1) 15 | 16 | driver.find_element(by=By.XPATH, value='//*[@id="app"]/main/aside/div/div[5]/div[2]/label[1]/input').click() 17 | 18 | driver.find_element(by=By.XPATH, value='//*[@id="app"]/main/aside/div/div[5]/div[2]/label[2]/input').click() 19 | time.sleep(2) 20 | 21 | old_height = driver.execute_script("return document.body.scrollHeight") 22 | 23 | counter = 1 24 | while True: 25 | 26 | driver.find_element(by=By.XPATH, value='//*[@id="app"]/main/div[1]/div[2]/div[3]').click() 27 | counter += 1 28 | time.sleep(1) 29 | 30 | new_height = driver.execute_script("return document.body.scrollHeight") 31 | 32 | print(counter) 33 | print("old height: ", old_height) 34 | print("new height: ", new_height) 35 | 36 | if new_height == old_height: 37 | break 38 | 39 | old_height = new_height 40 | 41 | 42 | html = driver.page_source 43 | with open("smartprix.html", "w", encoding='utf-8') as f: 44 | f.write(html) 45 | --------------------------------------------------------------------------------