├── image ├── readme.md ├── Github.pdf ├── libwebscraping.jpg ├── Top-python-web-frameworks.jpg └── web-scraping-using-python.png ├── .DS_Store ├── scrapy ├── .DS_Store ├── BigMac │ ├── 1_7KVe2szj1rjt1_Jlmdznkw.png │ ├── readme.md │ └── IphoneProduct.csv ├── SIX │ ├── buku.py │ └── ReadMe.md ├── readme.md └── SDS │ ├── big_data_books.csv │ └── readme.md ├── selenium ├── .DS_Store ├── group9 │ ├── lazada.png │ ├── selenium_web.jpeg │ ├── Honey Stars Lazada.csv │ ├── Honey Stars Shopee.csv │ └── readme.md ├── readme.md ├── AdMiPeQa │ ├── Lazada_Milo.csv │ ├── readme.md │ └── WebScraping_AdMiPeQa.ipynb └── SamVerse │ ├── readme.md │ ├── PRU15_Results.csv │ └── Selenium_SamVerse.ipynb ├── beautiful-soup ├── .DS_Store ├── Group 10 │ ├── 20 Public Universities in Malaysia.csv │ └── readme.md ├── readme.md ├── QwQ │ ├── readme.md │ └── Malaysia_states.csv └── High Five │ ├── readme.md │ └── courses_data.csv ├── pyscript ├── demo.py ├── exer2.html ├── exer1.html ├── exer3.html ├── exer4.html ├── readme.md ├── exer5.html └── matplotlib.html ├── lxml ├── readme.md └── QUAD │ ├── readme.md │ ├── job_search.csv │ └── QUAD_LXML.ipynb ├── requests ├── puma_sneakers_women_sale.csv └── readme.md └── README.md /image/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/.DS_Store -------------------------------------------------------------------------------- /image/Github.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/image/Github.pdf -------------------------------------------------------------------------------- /scrapy/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/scrapy/.DS_Store -------------------------------------------------------------------------------- /selenium/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/selenium/.DS_Store -------------------------------------------------------------------------------- /beautiful-soup/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/beautiful-soup/.DS_Store -------------------------------------------------------------------------------- /image/libwebscraping.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/image/libwebscraping.jpg -------------------------------------------------------------------------------- /selenium/group9/lazada.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/selenium/group9/lazada.png -------------------------------------------------------------------------------- /pyscript/demo.py: -------------------------------------------------------------------------------- 1 | import random 2 | lucky = random.randint(1, 12) 3 | print('Hello my lucky number is '+str(lucky)) 4 | -------------------------------------------------------------------------------- /selenium/group9/selenium_web.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/selenium/group9/selenium_web.jpeg -------------------------------------------------------------------------------- /image/Top-python-web-frameworks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/image/Top-python-web-frameworks.jpg -------------------------------------------------------------------------------- /image/web-scraping-using-python.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/image/web-scraping-using-python.png -------------------------------------------------------------------------------- /scrapy/BigMac/1_7KVe2szj1rjt1_Jlmdznkw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drshahizan/python-web/HEAD/scrapy/BigMac/1_7KVe2szj1rjt1_Jlmdznkw.png -------------------------------------------------------------------------------- /scrapy/SIX/buku.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | 4 | class BukuSpider(scrapy.Spider): 5 | name = 'buku' 6 | allowed_domains = ['bukukita.com'] 7 | start_urls = ['http://bukukita.com/'] 8 | 9 | def parse(self, response): 10 | pass 11 | -------------------------------------------------------------------------------- /pyscript/exer2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | My First Page 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /pyscript/exer1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | My First Page 6 | 7 | 8 | print ('Hello World') 9 | 10 | 11 | -------------------------------------------------------------------------------- /pyscript/exer3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | - numpy 9 | - pandas 10 | 11 | 12 | import numpy as np 13 | import pandas as pd 14 | x = pd.Series([1,2,3,4,5,6,7,8,9,10]) 15 | print([i**2 for i in x]) 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /pyscript/exer4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 14 | 15 | - numpy 16 | - pandas 17 | 18 |

Interactive shell

19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /scrapy/readme.md: -------------------------------------------------------------------------------- 1 | # Scrapy 2 | 3 | Scrapy is a Python web scraping framework. It provides a pre-defined set of methods and classes for crawling websites and extracting structured data, such as data for items like products, prices, reviews, etc. Scrapy is built on top of the Twisted asynchronous networking library, which means that it can handle large amounts of data and high concurrency without blocking the execution of the program. 4 | 5 | Scrapy has several built-in features like: 6 | 7 | Support for handling cookies and user-agents 8 | Built-in support for handling redirects 9 | Built-in support for handling forms 10 | Built-in support for handling common HTTP status codes 11 | Built-in support for extracting data from HTML and XML 12 | Built-in support for generating CSV, JSON, or XML output 13 | Scrapy is an open-source project, so you can use it for free and make any modifications you need. It is widely used for data mining, data extraction, and web scraping. You can use Scrapy to scrape data from websites, process it and store it in any format you want. 14 | -------------------------------------------------------------------------------- /lxml/readme.md: -------------------------------------------------------------------------------- 1 |
2 |

3 | 4 |

5 |
6 | 7 | 8 | # Lxml 9 | 10 | lxml is a Python library for working with XML and HTML. It provides a number of ways to parse and interact with these formats, including the use of xpath expressions to search for specific elements in a document. lxml also includes a number of other useful features for working with XML and HTML, such as support for parsing and generating these formats, as well as support for working with different encodings and character sets. 11 | 12 | lxml is a powerful and efficient library for parsing and manipulating XML and HTML documents in Python. It is built on top of the C libraries libxml2 and libxslt, which means it can handle very large documents quickly and efficiently. Additionally, lxml provides a simple and consistent API that makes it easy to work with XML and HTML documents in Python. 13 | 14 | > For more information, you can read here in [lxml - XML and HTML with Python](https://lxml.de/) 15 | -------------------------------------------------------------------------------- /selenium/readme.md: -------------------------------------------------------------------------------- 1 |

2 | Selenium 3 |
4 |

5 | Web scraping is the automated gathering of content and data from a website or any other resource available on the internet. Unlike screen scraping, web scraping extracts the HTML code under the webpage. Users can then process the HTML code of the webpage to extract data and carry out data cleaning, manipulation, and analysis. Exhaustive amounts of this data can even be stored in a database for large-scale data analysis projects. The prominence and need for data analysis, along with the amount of raw data which can be generated using web scrapers, has led to the development of tailor-made python packages which make web scraping easy as pie.

6 | 7 | Web Scraping with Selenium allows you to gather all the required data using Selenium Webdriver Browser Automation. Selenium crawls the target URL webpage and gathers data at scale.
8 | 9 | Selenium uses the Webdriver protocol to automate processes on various popular browsers such as Firefox, Chrome, and Safari. This automation can be carried out locally (for purposes such as testing a web page) or remotely (for purposes such as web scraping). 10 | 11 | -------------------------------------------------------------------------------- /pyscript/readme.md: -------------------------------------------------------------------------------- 1 | # PyScript is here! 2 | PyScript is a new JavaScript framework that lets users embed Python code in HTML documents. Furthermore, PyScript supports the Python scientific stack, including libraries like NumPy, pandas and scikit-learn, giving it significant potential for data science applications. The library was recently announced by Anaconda, Inc. and this is making me quite excited! Check the link below for more information, and let me know your thoughts in the comments. 3 | 4 | - PyScript official website: [https://pyscript.net/](https://pyscript.net/) 5 | - Project page: [https://pyscript.net/](https://pyscript.net/) 6 | - GitHub Repo: [https://github.com/pyscript/pyscript](https://github.com/pyscript/pyscript) 7 | - Examples: [https://pyscript.net/examples/](https://pyscript.net/examples/) 8 | - Blogpost: [https://engineering.anaconda.com/2022/04/welcome-pyscript.html](https://engineering.anaconda.com/2022/04/welcome-pyscript.html) 9 | 10 | This is surely make things easier if you are interested in developing web applications with Python instead of JavaScript! 11 | 12 | ## Lab 13 | 14 | - [Pyscript Tutorial With Simple Code Examples](https://pythonistaplanet.com/pyscript/) 15 | -------------------------------------------------------------------------------- /requests/puma_sneakers_women_sale.csv: -------------------------------------------------------------------------------- 1 | PRODUCT NAME,PRICE NEW,PRICE OLD 2 | Muse Metal Women’s Trainers,RM 216.22,RM 429.00 3 | Muse X-2 Metallic Women's Sneakers,RM 216.22,RM 429.00 4 | Graviton Trainers,RM 236.88,RM 329.00 5 | Smash v2 Trainers,RM 193.68,RM 269.00 6 | Taper Trainers,RM 164.88,RM 229.00 7 | Smash Cat Leather Trainers,RM 160.70,RM 279.00 8 | PUMA x VOGUE Slipstream Sneakers Women,RM 431.28,RM 599.00 9 | PUMA x AMI Slipstream Lo Sneakers,RM 467.28,RM 649.00 10 | PUMA x AMI TRC Blaze Mid Sneakers,RM 625.68,RM 869.00 11 | PUMA x MARKET Slipstream Sneakers,RM 431.28,RM 599.00 12 | PUMA x POKÉMON Rider FV Pikachu Sneakers,RM 467.28,RM 649.00 13 | CA Pro Sneakers,RM 366.48,RM 509.00 14 | PUMA x POKÉMON Rider FV Bulbasaur Sneakers,RM 467.28,RM 649.00 15 | Mayze Stack SQ Sneakers Women,RM 431.28,RM 599.00 16 | PUMA x POKÉMON Slipstream Lo Charmander Sneakers,RM 373.68,RM 519.00 17 | TRC Mira Star Quality Sneakers Women,RM 409.68,RM 569.00 18 | Harrow Women's Shoes,RM 215.28,RM 299.00 19 | Wired Run In Motion Sneakers,RM 258.48,RM 359.00 20 | Court Star Suede Trainers,RM 212.54,RM 369.00 21 | Supertec Zero Trainers,RM 222.48,RM 309.00 22 | Smash Vulcanised V3 Low Trainers,RM 186.48,RM 259.00 23 | Slipstream Lo Trainers,RM 308.88,RM 429.00 24 | TRC Blaze Chance Sneakers,RM 388.08,RM 539.00 25 | -------------------------------------------------------------------------------- /beautiful-soup/Group 10/20 Public Universities in Malaysia.csv: -------------------------------------------------------------------------------- 1 | No.,Name of university,Date established,Location 2 | 1,Universiti Malaya (UM),1-1-1962,Kuala Lumpur 3 | 2,Universiti Sains Malaysia (USM),1969,Penang 4 | 3,Universiti Kebangsaan Malaysia (UKM),18-5-1970,Selangor 5 | 4,Universiti Putra Malaysia (UPM),4-10-1971,Selangor 6 | 5,Universiti Teknologi Malaysia (UTM),1-4-1975,Johor 7 | 6,Universiti Teknologi MARA (UiTM),26-8-1999,Selangor 8 | 7,Universiti Islam Antarabangsa Malaysia (UIAM),10-5-1983,Selangor 9 | 8,Universiti Utara Malaysia (UUM),16-2-1984,Kedah 10 | 9,Universiti Malaysia Sarawak (UNIMAS),24-12-1992,Sarawak 11 | 10,Universiti Malaysia Sabah (UMS),24-11-1994,Sabah 12 | 11,Universiti Pendidikan Sultan Idris (UPSI),24-2-1997,Perak 13 | 12,Universiti Sains Islam Malaysia (USIM),13-3-1998,Negeri Sembilan 14 | 13,Universiti Malaysia Terengganu (UMT),15-7-1999,Terengganu 15 | 14,Universiti Tun Hussein Onn Malaysia (UTHM),30-9-2000,Johor 16 | 15,Universiti Teknikal Malaysia Melaka (UTeM),1-12-2000,Melaka 17 | 16,Universiti Malaysia Pahang (UMP),16-2-2002,Pahang 18 | 17,Universiti Malaysia Perlis (UniMAP),2-5-2002,Perlis 19 | 18,Sultan Zainal Abidin (UniSZA),1-1-2006,Terengganu 20 | 19,Universiti Malaysia Kelantan (UMK),14-6-2006,Kelantan 21 | 20,"Universiti Pertahanan Nasional Malaysia, (UPNM)",10-11-2006,Kuala Lumpur 22 | -------------------------------------------------------------------------------- /selenium/group9/Honey Stars Lazada.csv: -------------------------------------------------------------------------------- 1 | ,ItemName,Price,Platform 2 | 0,NESTLE HONEY STARS Cereal 150g,7.19,Lazada 3 | 1,Nestle Honey Stars Cereal With Whole Grain 150g,7.34,Lazada 4 | 2,HONEY STAR CEREAL 150G #Okborong #Borong #Breakfast #Cereal #Kids #Nestle #150gm #Bijirin Sarapan Gandum Dan Jagung Bersalut Madu,7.5,Lazada 5 | 3,HONEY STAR CEREAL 150G x 6 BOX #Okborong #Borong #Breakfast #Cereal #Kids #Nestle #150gm #Bijirin Sarapan Gandum Dan Jagung Bersalut Madu,10.9,Lazada 6 | 4,Nestle Honey Stars Cereal 150G,49.8,Lazada 7 | 5,Nestle Honey Stars(150g),8.59,Lazada 8 | 6,Nestle Honey Stars(150g),7.45,Lazada 9 | 7,Nestle Honey Star 150g / 300g,7.45,Lazada 10 | 8,READY STOCK MALAYSIA - Nestle Honey Star 150g,7.9,Lazada 11 | 14,Nestle Honey Stars Honey-Coated Wheat and Corn Puffs Breakfast Cereal 150g,19.0,Lazada 12 | 15,Nestle - Honey Stars Honey Coated Wheat and Corn Puffs Breakfast Cereal (150g),5.8,Lazada 13 | 16,Nestle Honey Stars Cereal 150g / 300g,7.44,Lazada 14 | 17,Nestle - Honey Stars Honey Coated Wheat and Corn Puffs Breakfast Cereal (150g),6.99,Lazada 15 | 18,Nestle Honey Stars (150g),8.92,Lazada 16 | 19,Nestle Honey Stars Whole Grain (150G),6.35,Lazada 17 | 20,Nestle Honey Stars (150g),9.57,Lazada 18 | 21,"NESTLE Koko Krunch 330g , Honey Star 150g , Gold Corn Flakes Cereal 150g / 275g (Breakfast Cereal)",6.0,Lazada 19 | 22,Nestle - Honey Stars Cereal 150g,6.9,Lazada 20 | 23,[SALE] Nestle Honey Star Cereal 150g Huat Tiong,9.69,Lazada 21 | 24,NESTLE FITNESSE CEREAL FRUITS 450G + FREE HONEY STAR 150G,8.5,Lazada 22 | 27,Nestle Whole grain 全麦谷粮 Milo 170g/ Honey stars 150g/ Kokokrunch cookie 330g/ Trix 330g,59.39,Lazada 23 | -------------------------------------------------------------------------------- /beautiful-soup/readme.md: -------------------------------------------------------------------------------- 1 |

2 | Beautiful Soap 3 |
4 |

5 | 6 |

7 | 'beautifulsoup4' is a Python library that is used for web scraping and parsing HTML and XML documents. It creates parse trees from the HTML or XML documents that can be used to extract data from the webpages. It allows developers to navigate, search, and modify the parse tree, which can be useful for web scraping and other tasks. 8 | 9 | One of the key features of 'beautifulsoup4' is its ability to search for specific elements within the parse tree using the find() and find_all() methods. These methods can be used to search for elements based on their tag name, class, or id, making it easy to extract specific data from a webpage. 10 | 11 | Another useful feature of beautifulsoup4 is its ability to access the attributes of elements within the parse tree. This allows developers to extract information such as the src attribute of an image tag, the href attribute of a link tag, and other attributes that may be useful for web scraping. 12 | 13 | 'beautifulsoup4' also provides a lot of functionalities and options to handle the different cases of parsing, such as handling missing tags, converting elements from string to int and float, and much more. 14 | 15 | 'beautifulsoup4' is a powerful and easy-to-use library for web scraping, and it is widely used by developers for a wide range of tasks. It is compatible with Python 2 and Python 3 and can be easily installed using pip. 16 | 17 | Overall, 'beautifulsoup4' is a great tool for web scraping, it is fast and efficient and it provides a lot of functionalities that make it easy to extract data from webpages and handle different cases of parsing. 18 |

19 | -------------------------------------------------------------------------------- /scrapy/BigMac/readme.md: -------------------------------------------------------------------------------- 1 | # **Web Scraping Using Scrapy** 2 | 3 | 4 | ## Group Members: 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
NameMatric
Muhammad Imran Hakimi Bin Mohd ShukriA20EC0213
Afif Hazmie Arsyad Bin AgusA20EC0176
Rasmin Kaur SandhuA19ET0216
27 | 28 | --- 29 | **Introduction** 30 | --- 31 | Scrapy is a Python web scraping framework. It provides a pre-defined set of methods and classes for crawling websites and extracting structured data, such as data for items like products, prices, reviews, etc. Scrapy is built on top of the Twisted asynchronous networking library, which means that it can handle large amounts of data and high concurrency without blocking the execution of the program. 32 | 33 | In this assignment, we were ask to do some web scraping with the tool that we assigned on. We will try do do web scraping using scrapy on `CompAsia.com`. The data that we will be scraping are `product's name`, `price`, `discount` and `URL`. After that, data will be put in a dataframe and export to a .CSV file. The csv file can be found with the name [IphoneProduct.csv](https://github.com/drshahizan/python-web/blob/main/scrapy/BigMac/IphoneProduct.csv) file. 34 | 35 | In conclusion, using scrapy for web scrapping is very benificial for extracting important information from the web. By doing so, only necessary details can be achieved and imported to an external file. 36 | 37 | --- 38 | -------------------------------------------------------------------------------- /selenium/AdMiPeQa/Lazada_Milo.csv: -------------------------------------------------------------------------------- 1 | ItemName,Price,Location,Review 2 | Milo Activ-Go 1kg,18.9,Selangor,1832 3 | Nestle MILO Activ-Go Chocolate Malt Powder (1kg) Exp2023,22.8,Pahang,52 4 | NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Softpack 1kg[SHIP WITHIN 24 HOURS],21.5,Perak,13 5 | Milo Active-Go Softpack 1kg,22.8,Wp Kuala Lumpur,21 6 | ( Beng kee) HOT ITEM Milo activ-go 1kg...Nestle MILO Activ-Go Chocolate Malt Powder (1kg)ready stock....,24.0,Perak,39 7 | MILO ACTIV-GO CHOCOLATE MALT POWDER (1KG),22.1,Wp Kuala Lumpur,7 8 | Nestle MILO Activ-Go Chocolate Malt Powder 2kg or1kg /MILO 2kg or 1kg / Nestle Milo Activ-Go Soft Pack 2kg or 1kg/Milo Activ-Go Softpack 2kgp or 1kg,21.99,Johor,46 9 | Nestle Milo Activ-Go Chocolate Malt Powder (1kg) | NE Grocer,22.3,Perak,450 10 | Nestle Milo Active-Go 【1kg】,39.99,Penang,14 11 | NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Softpack 1kg,22.5,Selangor,1 12 | NESTLÉ MILO Activ-Go Chocolate Malt Powder 1kg [Halal] [READY STOCK],25.0,Selangor,3 13 | Nestle MILO Activ-Go Chocolate Malt Powder (200G/400G/1kg/2kg/tin 1.5kg) stock baharu 💯 HALAL - ComeToShopp,20.82,Kelantan,7 14 | NESTLE Milo Activ-Go Chocolate Malt Drink SoftPack 1kg,23.99,Melaka,7 15 | Nestle Milo 美禄 1kg Activ-Go Chocolate Malt Powder Softpack,25.99,Perak,12 16 | Nestle Milo Activ-Go Chocolate Malt 1kg/2kg,10.89,Penang,2 17 | [READY STOCK] Nestle MILO Activ-Go Chocolate Malt Powder (200g/400g/1kg),11.9,Penang,4 18 | Nestle Milo Activ-Go Chocolate Malt Powder Softpack (1kg),20.89,Selangor,1 19 | Nestle Milo Activ-Go Chocolate Malt Powder Softpack (1kg),21.99,Wp Kuala Lumpur,11 20 | MILO ACTIV-GO 1kg,29.0,Selangor,1 21 | Nestle Milo Activ-go 1kg,23.5,Selangor,79 22 | NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Softpack 1kg,9.9,Perak,1 23 | Nestle Milo Active - Go Softpack 1KG,27.9,Kedah,3 24 | Nestlé Milo Activ-Go Softpack 1kg,30.9,Johor,2 25 | Nestle Milo Active-Go 1kg,28.0,Penang,12 26 | -------------------------------------------------------------------------------- /pyscript/exer5.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 10 | 12 | 14 | 16 | 18 | Chart Example 19 | Chart Example 20 | 21 | - bokeh 22 | - numpy 23 | - pandas 24 | 25 | 26 | 27 |

Bokeh Chart

28 |
29 | 30 | import json 31 | import pyodide 32 | from js import Bokeh, console, JSON 33 | from bokeh.embed import json_item 34 | from bokeh.plotting import figure 35 | from bokeh.resources import CDN 36 | vegetables = ['Carrot', 'Onion', 'Potato', 'Cabbage', 'garlic'] 37 | values = [15, 23, 34, 14, 12] 38 | pl = figure(x_range=vegetables, height=350, title="Vegetables Counts", 39 | toolbar_location=None, tools="") 40 | pl.vbar(x=vegetables, top=values, width=0.9) 41 | pl.xgrid.grid_line_color = None 42 | pl.y_range.start = 0 43 | p_json = json.dumps(json_item(pl, "chart")) 44 | Bokeh.embed.embed_item(JSON.parse(p_json)) 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /scrapy/SIX/ReadMe.md: -------------------------------------------------------------------------------- 1 | ![Scapy logo](https://scrapy.org/img/scrapylogo.png) 2 | 3 | # BukuKita Web Scraping with Scrapy 4 | 5 | This is a web scraping project using Scrapy to extract data from Bukukita.com. 6 | 7 | ## About Scrapy 8 | 9 | Scrapy is an open-source web scraping framework for Python. 10 | It is used to extract data from websites and can be used to extract structured data such as contact information, product details, or prices. 11 | 12 | Scrapy provides an integrated way to follow links and extract data from multiple pages. 13 | It also provides support for handling common web scraping challenges such as handling cookies and sessions, handling redirects, and handling CAPTCHAs. 14 | 15 | Scrapy is built on top of the Twisted asynchronous networking library and provides an extensible architecture for adding custom functionality. 16 | It also includes a built-in logging system, which can be used to track errors and monitor the progress of your spiders. 17 | 18 | Scrapy can be used to scrape websites in a variety of formats, including HTML, XML, and JSON. 19 | It can be used to scrape websites built with a variety of technologies, including JavaScript, Flash, and AJAX. 20 | 21 | 22 | ## Spider's functionality 23 | Scrapy will use a python code called "Spider" to crawl through the website. The spider in this porject is called buku and the functions are defined in file buku.py. The spider will start by making an HTTP request to the Bukukita website. Once the spider receives the response it will extract the following data for each book: 24 | 25 | 1. source 26 | 2. Title 27 | 3. ISBN No. 28 | 4. Publisher 29 | 5. Publish Date 30 | 6. No of page 31 | 7. Weight(g) 32 | 8. Cover Type 33 | 9. Dimension 34 | 10. Category 35 | 11. Bonus 36 | 12. Language 37 | 13. Stock Location 38 | 39 | The spider will continue to follow the links on the page and extract the data for each book it finds. 40 | 41 | 42 | ## Group Member 43 | 1. LEE MING QI 44 | 45 | 2. NUR IRDINA ALIAH BINTI ABDUL WAHAB 46 | 47 | 3. SINGTHAI SRISOI 48 | 49 | 4. AMIRAH RAIHANAH BINTI ABDUL RAHIM 50 | 51 | 52 | -------------------------------------------------------------------------------- /selenium/SamVerse/readme.md: -------------------------------------------------------------------------------- 1 | ## Web-Scraping GE-15 by using Selenium 2 | We will demonstrate a different method on web scraping by using Selenium. We have chosen a website that presents the winners of the latest General Election (GE-15). From this website, we would like to acquire the names of the candidates that won along with the seat they were constesting for and the amount of votes they won with. 3 |
4 |

Group Members

5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
NameMatric
LUQMAN ARIFF BIN NOOR AZHARA20EC0202
AHMAD AIMAN HAFIZI BIN MUHAMMADA20EC0177
LEE CAI XUANA20EC0062
MYZA NAZIFA BINTI NAZRYA20EC0219
26 |

About Selenium

27 |
28 | Selenium is an open-source software suite of browser automation tools for controlling web browsers through programs and performing browser automation. It is widely used for testing web applications, automating repetitive tasks, and scraping data from websites. 29 |
30 |
31 | Python, C#, Java, Perl, Ruby, and JavaScript are just a few of the scripting languages that are supported by the open source automation testing tool Selenium. One can select the script in accordance with the application that needs to be tested. 32 | In addition, it is a Python library and utility for automating a variety of operations in web browsers. Web scraping is one method for obtaining relevant data and information that would not otherwise be available. 33 |
34 |
35 | Overall, Selenium is a powerful and flexible tool that can greatly simplify and streamline the process of testing and automating web applications in Python. 36 |

Results

37 |
38 | We used Selenium to sort all the candidates that won, their seat and the number of votes they won. We also added the column of which coalition they were under and their political party. The results can be seen at Dataset 39 | -------------------------------------------------------------------------------- /requests/readme.md: -------------------------------------------------------------------------------- 1 | ![Requests](https://media.licdn.com/dms/image/C4E12AQFncA0AxujAng/article-cover_image-shrink_600_2000/0/1520086554238?e=2147483647&v=beta&t=ZvpayB6CfpbF7YCWJlynIyYqkBR23iRZpj2kd2XDR5E) 2 | 3 | # **Web Scrapping (Requests)** 4 | 5 | **Group 7**: No Name 6 | 7 | **Group Members**: 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 |
NameMatric
Madina Suraya binti ZharinA20EC0203
Nur Izzah Mardhiah binti RashidiA20EC0116
Tan Yong ShengA20EC0157
Chloe Racquelmae KennedyA20EC0026
30 | 31 | ## About Requests 32 | Using requests library, we can fetch the content from the URL given. Requests library is the best choice if we just start with web scraping and have access to an API. The requests library will make a GET request to a web server, which will download the HTML contents of a given web page for us. 33 | 34 | - It is easy to understand and does not require much practice to master. 35 | - Requests also minimizes the need to include query strings in your URLs manually. 36 | - It also supports authentication modules and handles cookies and sessions with excellent stability. 37 | 38 | ## Purpose 39 | 40 | However, using requests library solely is not enough to do web scraping. Hence, we need libraries that can parse the document. In this notebook, we use the Beautiful Soup library to parse this document, and extract the text from the div tag. 41 | 42 | We chose [Puma website](https://my.puma.com/my/en/women/shoes/sneakers) to perform web scraping since it is the Chinese New Year season, and they offer sale. Therefore, we would like to see if there is any interesting data **(Product Name, Price New, Price Old)** related to their sneakers. 43 | 44 | ## Results 45 | There are 36 items that we had extracted. However, some of them is duplicates and contains null values. 46 | 47 | - Product Name 48 | 49 | - Price New = price after CNY sale discount 50 | 51 | - Price Old = the original price without any discount 52 | 53 | We then perform some data cleaning before store the data into an Excel file which we also uploaded entitled [puma_sneakers_women_sale.csv](https://github.com/drshahizan/python-web/blob/main/requests/puma_sneakers_women_sale.csv) file. 54 | -------------------------------------------------------------------------------- /beautiful-soup/Group 10/readme.md: -------------------------------------------------------------------------------- 1 |

2 | Group 10 - Web Scraping using BeautifulSoup 3 |
4 |

5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
NameMatric Number
FARAH IRDINA BINTI AHMAD BAHARUDINA20EC0035
LOW JUNYIA20EC0071
NURFARRAHIN BINTI CHE ALIASA20EC0121
YONG ZHI YANA20EC0172
28 |
29 |
30 | beautiful soup 31 |
32 |
33 |

34 | Beautiful Soup is a Python library that is used for web scraping. It allows you to parse the HTML or XML documents into a readable tree-like format, and then extract data from the tree based on its structure. With Beautiful Soup, you can easily navigate through the document, search for specific tags, and extract the text or attributes of those tags. It is often used in combination with other libraries such as requests to programmatically access web pages and extract data from them. The website that we will be using is from https://www.studymalaysia.com/education/top-stories/list-of-universities-in-malaysia. 35 | 36 | This website is a resource for individuals interested in higher education in Malaysia. It provides a comprehensive list of universities in Malaysia, including both public and private institutions. The website also includes information about the universities' locations, programs offered, and contact information. Additionally, the website provides articles and news related to education and universities in Malaysia, as well as resources for students and parents. The website appears to be operated by StudyMalaysia Group, which is a provider of education and career guidance in Malaysia. 37 | 38 | We plan to obtain data from the website by extracting one of its tables, specifically the list of 20 Public Universities in Malaysia. By analyzing the website's code, we will locate the table and access it using the 'full boxed' class. We will then utilize the pandas library and the BeautifulSoup package to extract the information from the table in html format. Finally, we will convert the obtained data into a CSV file. In summary, we will efficiently retrieve various tables and contents from the website using these tools. 39 |

40 | -------------------------------------------------------------------------------- /selenium/AdMiPeQa/readme.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | Web scraping E-commerce sites with Selenium 6 | 7 | 8 |
9 |

10 | In this file, we are going to do some web scraping by using selenium library. Website that we choose is Lazada which is the famous e-commerce site of Malaysia. We aim to get the information like product name, price, origin and number of review from the website.

11 | Website : Lazada
12 |

Group Members

13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
NameMatric
ADAM WAFII BIN AZUARA20EC0003
HONG PEI GEOKA20EC0044
MIKHEL ADAM BIN MUHAMMAD EZRINA20EC0237
QAISARA BINTI ROHZANA20EC0133
35 |

36 | About Selenium 37 | 38 | 39 |
40 |

41 | Selenium is an open source automation testing tool that supports a number of scripting languages like Python, C#, Java, Perl, Ruby, JavaScript, etc. depending on the application to be tested, one can choose the script accordingly.

42 | Other than that, it is a Python library and tool used for automating web browsers to do a number of tasks. One of such is web-scraping to extract useful data and information that may be otherwise unavailable. 43 | 44 |

45 | Results 46 | 47 | 48 |
49 |

50 | 51 | We used Selenium to browse Lazada to look for sellers that sell Milo Activ powder in 1KG packages. We collected the information from the first page of results when searching for "Milo Activ Go 1kg". We collected information such as "ItemName", "Price", "Location", and "Reviews". The results can be seen at the [Lazada_Milo.csv](https://github.com/drshahizan/python-web/blob/main/selenium/AdMiPeQa/Lazada_Milo.csv) file 52 | -------------------------------------------------------------------------------- /scrapy/SDS/big_data_books.csv: -------------------------------------------------------------------------------- 1 | Title,Author,Published date,Foramt,Price 2 | Weapons of Math Destruction,Cathy O'Neil,2017-07-06,Paperback,12.49 3 | Big Data,Viktor Mayer-Schönberger,2014-03-04,Paperback,13.57 4 | Big Data,Viktor Mayer-Schönberger,2017-01-12,Paperback,15.58 5 | Big Data,Brian Clegg,2017-10-10,Paperback,16.11 6 | "Big Data - Using SMART Big Data, Analytics and Metrics To Make Better Decisions and Improve Performance",BB Marr,2015-03-02,Paperback,12.48 7 | Big Data: A Very Short Introduction,Dawn E. Holmes,2018-01-30,Paperback,16.04 8 | Everybody Lies,Seth Stephens-Davidowitz,2017-05-09,Hardback,7.32 9 | Big Data in Practice (use cases) - How 45 Successful Companies Used Big Data Analytics to Deliver Extraordinary Results,BB Marr,2016-05-02,Hardback,12.95 10 | Big Data For Dummies,JS Hurwitz,2013-04-19,Paperback,13.62 11 | Big Data,Timandra Harkness,2017-08-29,Paperback,22.22 12 | "Data Science & Big Data Analytics - Discovering, A nalyzing, Visualizing and Presenting Data",EMC Education S,2015-03-17,Hardback,8.78 13 | The Art of Invisibility,Kevin D. Mitnick,2019-09-26,Paperback,12.07 14 | "Big Data, Big Dupe",Stephen Few,2018-02-01,Paperback,19.35 15 | Spark - The Definitive Guide,Bill Chambers,2018-04-03,Paperback,29.28 16 | Big Data Fundamentals,Thomas Erl,2016-01-15,Paperback,27.86 17 | Life After Google,George Gilder,2018-08-02,Hardback,39.95 18 | Big Data Baseball,Travis Sawchik,2016-05-03,Paperback,25.44 19 | Big Data Demystified,David Stephenson,2018-02-22,Paperback,34.99 20 | Loyalty 3.0: How to Revolutionize Customer and Employee Engagement with Big Data and Gamification,Rajat Paharia,2013-07-01,Hardback,13.80 21 | Statistics For Big Data For Dummies,A Anderson,2015-09-08,Paperback,17.17 22 | Weapons of Math Destruction,Cathy O'Neil,2017-09-05,Paperback,64.34 23 | "Privacy, Big Data, and the Public Good",Julia Lane,2018-11-23,Paperback,16.05 24 | The Rise of Big Data Policing,Andrew Guthrie Ferguson,2017-10-03,Hardback,19.18 25 | Reinventing Capitalism in the Age of Big Data,Viktor Mayer-Schönberger,2019-01-24,Paperback,13.94 26 | Smart Cities,Anthony M. Townsend,2014-11-17,Paperback,48.62 27 | "The Big Data-Driven Business - How to Use Big Data to Win Customers, Beat Competitors, and Boost Profits",R Glass,2015-01-20,Hardback,60.58 28 | Big Data and Machine Learning in Quantitative Investment,T Guida,2019-03-25,Hardback,38.76 29 | Big Data MBA - Driving Business Strategies with Data Science,B Schmarzo,2017-10-03,Paperback,40.38 30 | Analytics in a Big Data World - The Essential Guide to Data Science and its Applications,B Baesens,2014-07-01,Hardback,29.75 31 | The Enterprise Big Data Lake,Alex Gorelik,2019-04-02,Paperback,30.29 32 | -------------------------------------------------------------------------------- /beautiful-soup/QwQ/readme.md: -------------------------------------------------------------------------------- 1 | ![course-1212-bs](https://user-images.githubusercontent.com/120595244/213877440-1971c0d7-16e6-4858-b1a8-9da0d876705f.jpg) 2 | 3 |

4 | Group Members 5 |
6 |

7 | 8 |

9 | MUHAMMAD DINIE HAZIM BIN AZALI
10 | RADIN DAFINA BINTI RADIN ZULKAR NAIN
11 | ADRINA ASYIQIN BINTI MD ADHA
12 | KELVIN EE
13 |

14 | 15 |

16 | Web Scraping Malaysia States website using Beautiful Soup 17 |
18 |

19 | 20 |

Introduction

21 | In this file, we are scraping the table of states in Malaysia from the Wikipedia website. To scrape the table, we will be using a BeautifulSoup library which allows us to extract specific elements from a webpage. The process involves sending a request to the Wikipedia webpage, parsing the HTML content, and then extracting the desired data. Once we have the data, we can then store it in a format that is easy to work with, which is a CSV. This file can then be used for further analysis or visualization. 22 | 23 | 24 |

What is BeautifulSoup

25 | 26 | Beautiful Soup is a Python library for pulling data out of HTML and XML files. It works with your favorite parser to provide idiomatic ways of navigating, searching, and modifying the parse tree. It commonly saves programmers hours or days of work. It also provides a few simple methods and Pythonic idioms for navigating, searching, and modifying a parse tree: a toolkit for dissecting a document and extracting what you need and it doesn't take much code to write an application 27 | 28 |

Dependencies

29 | 30 | Install beautifulsoup4 with: 31 | 32 | ```sh 33 | pip install beautifulsoup4 34 | ``` 35 | 36 | Other than that, we also need pandas. 37 | 38 |

Scraping the Website

39 | 40 | As mentioned above, we use Wikipidea website of [Table of States in Malaysia](https://en.wikipedia.org/wiki/States_and_federal_territories_of_Malaysia). The reason we chose Wikipedia is because it is a reliable source of information and it is easily accessible to anyone with internet access. The table contains important information such as the state name, capital city, and area of each state. This information can be used for a variety of purposes, such as research or data analysis. 41 | 42 |

Conclusion

43 | 44 | Overall, scraping the table of states in Malaysia from Wikipedia is a useful and efficient way to gather important information that can be used for a variety of purposes. 45 | 46 |

References

47 | 48 | 1. [Beautiful Soup Documentation](https://beautiful-soup-4.readthedocs.io/en/latest/) 49 | 2. [Web Scraping with Beautiful Soup](https://realpython.com/beautiful-soup-web-scraper-python/) 50 | -------------------------------------------------------------------------------- /lxml/QUAD/readme.md: -------------------------------------------------------------------------------- 1 |
2 |

3 | 4 |

5 |
6 | 7 | 🚀 Group Members QUAD 8 | > 1. Terence Loorthanathan 9 | > 2. RISHMA FATHIMA BINTI BASHER 10 | > 3. CHONG KAI ZHE 11 | > 4. NUR SYAMALIA FAIQAH BINTI MOHD KAMAL 12 | 13 | 14 | # Lxml 15 | 16 | 17 | In this notebook, we will show you how to scrape a website using lxml. lxml is a Python library for parsing and manipulating XML and HTML documents. It provides a way to navigate, search, and modify the elements and attributes of an XML or HTML document using a simple and consistent API. 18 | 19 | The library is built on top of the libxml2 and libxslt C libraries, which provide fast and efficient parsing and manipulation of XML and HTML documents. lxml provides a Pythonic API that is easy to use and intuitive for Python programmers, while still being very powerful and flexible. lxml also includes a number of other useful features for working with XML and HTML, such as support for parsing and generating these formats, as well as support for working with different encodings and character sets. 20 | 21 | lxml is a powerful and efficient library for parsing and manipulating XML and HTML documents in Python. It is built on top of the C libraries libxml2 and libxslt, which means it can handle very large documents quickly and efficiently. Additionally, lxml provides a simple and consistent API that makes it easy to work with XML and HTML documents in Python. 22 | 23 |
24 | 25 | --- 26 |
27 | Why use lxml?
28 | lxml is considered to be one of the most feature-rich and stable XML and HTML parsing libraries for Python. It's considered to be much faster than other libraries like BeautifulSoup, and it's more powerful when it comes to handling complex xpath and xslt. 29 | 30 |
31 | For more information on lxml please go to this link https://lxml.de/ 32 |

33 | 34 | --- 35 |
36 | 37 |
38 |

39 | 40 |

41 |
42 | 43 | What website we are trying to scrape?
44 | We are going to use the most used online job search website in Malaysia, Jobstreet. Jobstreet operates primarily in Southeast Asia, including countries such as Malaysia, Singapore, Philippines, Indonesia, and Vietnam. However it has established its HQ in Malaysia. 45 |

46 | 47 | --- 48 |
49 | What data we are going to scrape?
50 | We are going to retrieve data of job offerings for Computer/Information technology specialists. We will get basic information of the job offering such as what company is offering it, what is the salary, and what is the job title.
51 | 52 | 53 |

54 | > For more information, you can read here in [lxml - XML and HTML with Python](https://lxml.de/) 55 | 56 |
57 | -------------------------------------------------------------------------------- /selenium/group9/Honey Stars Shopee.csv: -------------------------------------------------------------------------------- 1 | ,ItemName,Price,Platform 2 | 0,['Nestle Honey Stars Cereal (150g)'],6.80,Shopee 3 | 1,['Nestle Honey Stars Cereal (300g/150g)'],6.40,Shopee 4 | 2,Nestle Honey Stars 150g,8.90,Shopee 5 | 3,['Nestle Honey Stars Cereal Econopack (450g x 2 Packs)'],16.55,Shopee 6 | 4,['Nestle Honey Stars Cereal (300g)'],29.35,Shopee 7 | 5,Nestle Honey Stars 150g,10.50,Shopee 8 | 6,Nestle Honey Stars 150g,15.90,Shopee 9 | 7,Nestle Honey Stars 150g,6.80,Shopee 10 | 8,['Nestle Honey Stars 150g'],7.90,Shopee 11 | 9,Nestle Honey Stars 150g,12.90,Shopee 12 | 10,['Nestle Honey Stars (150g)'],8.60,Shopee 13 | 11,Nestle Honey Stars 150g,6.20,Shopee 14 | 12,Nestle Honey Stars 150g,7.65,Shopee 15 | 13,Nestle Honey Stars 150g,7.45,Shopee 16 | 14,['Nestle Honey Stars (150g)'],8.50,Shopee 17 | 15,['NESTLE HONEY STARS-150G'],8.50,Shopee 18 | 16,['NESTLE HONEY STAR MEDIUM PACKS 150G'],8.10,Shopee 19 | 17,['NESTLE HONEY STARS-150G'],6.79,Shopee 20 | 18,['Nestle Honey Stars (150g)'],7.90,Shopee 21 | 19,['Nestle Honey Star (150g)'],6.79,Shopee 22 | 20,['Nestle Honey Stars (150g)'],6.50,Shopee 23 | 21,['Nestle Honey Stars Cereal 150G'],7.45,Shopee 24 | 22,['Nestle Honey Stars (150g)'],6.50,Shopee 25 | 23,['Nestle Honey Stars Cereal 150g'],8.59,Shopee 26 | 24,['Nestle - Honey Stars Cereal 150g'],7.99,Shopee 27 | 25,Nestle Honey Stars 150g,7.91,Shopee 28 | 26,['Nestle Honey Star Breakfast Cereal With Whole Grain 150g'],9.69,Shopee 29 | 27,Nestle Honey Stars 150g,4.90,Shopee 30 | 28,['Nestle Honey Stars (150g)'],9.50,Shopee 31 | 29,['Nestle Honey Stars Whole Grain (150G)'],9.69,Shopee 32 | 30,['Nestle Honey Stars (150g)'],6.50,Shopee 33 | 31,['Nestle Honey Stars (150g)'],8.80,Shopee 34 | 32,['Nestle Honey Stars Cereal With Whole Grain 150g'],6.50,Shopee 35 | 33,Nestle Honey Stars 150g,6.50,Shopee 36 | 34,Nestle Honey Stars 150g,7.50,Shopee 37 | 35,Nestle Honey Stars 150g,7.20,Shopee 38 | 36,Nestle Honey Stars 150g,7.49,Shopee 39 | 37,Nestle Honey Stars 150g,7.69,Shopee 40 | 38,Nestle Honey Stars 150g,7.21,Shopee 41 | 39,Nestle Honey Stars 150g,7.06,Shopee 42 | 40,['[1.1] Nestle Honey Stars Cereal (450g)'],7.70,Shopee 43 | 41,['Nestle Honey Gold Flakes (220g)'],7.90,Shopee 44 | 42,Nestle Honey Stars 150g,11.90,Shopee 45 | 43,['!!READY STOCK!!Nestle Nestum Grains & More 3 in 1 Honey 28g x 15s'],6.20,Shopee 46 | 44,['Nestle Honey Star Cereal - 300g'],16.55,Shopee 47 | 45,['NESTLE NESTUM All Family Cereal Honey (500g x 2 packs)'],7.59,Shopee 48 | 46,['Nestle MILO Breakfast Cereal (300g)'],11.80,Shopee 49 | 47,['Mi Sedaap Fried Noodle - Original Flavour (5 x 91g)'],20.50,Shopee 50 | 48,['NESTLE NESTUM 3in1 Kurma & Prun (10 x 29g)'],8.90,Shopee 51 | 49,['Kitkat Sharebag (17g x 12)'],14.29,Shopee 52 | 50,['Nestle MILO Breakfast Cereal (330g)'],16.49,Shopee 53 | 51,['NESTLE School Pack Cereal (26g x 6 Boxes x 2 Set)'],10.50,Shopee 54 | 52,['[1.1] Nestle Nestum Grains & More 3 in 1 Original (28g x 15s)'],5.39,Shopee 55 | 53,['NESTLE Koko Krunch Cereal Large (330g/170g) and cookie'],11.11,Shopee 56 | -------------------------------------------------------------------------------- /pyscript/matplotlib.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Ice Cream Picker 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | packages = ["matplotlib", "pandas"] 12 | 13 | 14 | 15 | import pandas as pd 16 | import matplotlib.pyplot as plt 17 | 18 | from pyodide.http import open_url 19 | from pyodide.ffi import create_proxy 20 | 21 | url = ( 22 | "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv" 23 | ) 24 | ice_data = pd.read_csv(open_url(url)) 25 | 26 | current_selected = [] 27 | flavour_elements = js.document.getElementsByName("flavour") 28 | 29 | def plot(data): 30 | plt.rcParams["figure.figsize"] = (22,20) 31 | fig, ax = plt.subplots() 32 | bars = ax.barh(data["name"], data["rating"], height=0.7) 33 | ax.bar_label(bars) 34 | plt.title("Rating of ice cream flavours of your choice") 35 | display(fig, target="graph-area", append=False) 36 | 37 | def select_flavour(event): 38 | for ele in flavour_elements: 39 | if ele.checked: 40 | current_selected = ele.value 41 | break 42 | if current_selected == "ALL": 43 | plot(ice_data) 44 | else: 45 | filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1) 46 | plot(ice_data[filter]) 47 | 48 | ele_proxy = create_proxy(select_flavour) 49 | 50 | for ele in flavour_elements: 51 | if ele.value == "ALL": 52 | ele.checked = True 53 | current_selected = ele.value 54 | ele.addEventListener("change", ele_proxy) 55 | 56 | plot(ice_data) 57 | 58 | 59 | 60 |
61 | Select your 🍨 flavour:
62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 |
75 | 76 | 77 | ice_data 78 | 79 | 80 |
81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /scrapy/SDS/readme.md: -------------------------------------------------------------------------------- 1 | ![Scapy logo](https://scrapy.org/img/scrapylogo.png) 2 | 3 | # Web Scraping on Book Depository using Scrapy 4 | 5 | We perform web scraping on the Book Depository website using Scrapy to extract the book information related to big data. Only the first page of the search result is extracted. Then, the data is cleaned and exported to a csv file. As a result, 30 rows of big data books information including their title, author, published date, format and price are successfully scraped and stored in the output `big_data_books.csv` file. 6 | 7 | **Group Members:** 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
NameMatric
ONG HAN WAHA20EC0129
GOO YE JUIA20EC0191
MAIZATUL AFRINA SAFIAH BINTI SAIFUL AZWANA20EC0204
28 | 29 | --- 30 | 31 | # About Scrapy 32 | 33 | Scrapy is a Python web scraping framework. It provides a pre-defined set of methods and classes for crawling websites and extracting structured data, such as data for items like products, prices, reviews, etc. Scrapy is built on top of the Twisted asynchronous networking library, which means that it can handle large amounts of data and high concurrency without blocking the execution of the program. 34 | 35 | Scrapy has several built-in features like: 36 | 37 | Support for handling cookies and user-agents 38 | Built-in support for handling redirects 39 | Built-in support for handling forms 40 | Built-in support for handling common HTTP status codes 41 | Built-in support for extracting data from HTML and XML 42 | Built-in support for generating CSV, JSON, or XML output 43 | Scrapy is an open-source project, so you can use it for free and make any modifications you need. It is widely used for data mining, data extraction, and web scraping. You can use Scrapy to scrape data from websites, process it and store it in any format you want. 44 | 45 | ## Installation guide for scrapy 46 | ### Supported Python versions 47 | Scrapy requires Python 3.7+, either the CPython implementation (default) or the PyPy implementation (see Alternate Implementations). 48 | 49 | ### Installing Scrapy 50 | You can install Scrapy and its dependencies from PyPI with: 51 | 52 | ``` pip install Scrapy ``` 53 | 54 | --- 55 | 56 | # About Dataset 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 |
ColumnsDescription
TitleTitle of the book
AuthorAuthor of the book
Published dateThe date of the book published
FormatFormat of the book
PricePrice of the book
84 | -------------------------------------------------------------------------------- /selenium/group9/readme.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 |

5 | Web Scraping 6 | 7 | vs 8 | 9 | using Selenium 10 |

11 | ________________________________________________________________________________________________________________________________________________________ 12 |
13 |

Web scraping E-commerce sites to compare prices with Python

14 | Shopee and Lazada are two e-commerce platforms that customers are likely to consider when making an online purchase. As the two largest e-commerce sites that are operating in Malaysia, they are both excellent places to start looking for a deal. It is a common belief that, of the two e-commerce platforms in Malaysia, one is typically more affordable and caters to people who are looking for deals, while the other platform serves customers who are less concerned about prices. 15 |

16 | This study will determine whether one of the e-commerce websites is in fact less expensive than the other. Using a Python script with Selenium and the Chrome web driver, we will automate the scraping process and generate our dataset. Here, we will be scraping the product's name and price. Then, we will perform some basic data analysis with Pandas on the scraped dataset. We will conclude the price comparison using Matplotlib and Seaborn to create a simple visual chart.
17 |

Group Members

18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 |
NameMatric
Nurarissa Dayana binti Mohd SukriA20EC0120
Madihah binti Che Zabri A20EC0074
Eddie Wong Chung Pheng A20EC0031
Vincent Boo Ee KhaiA20EC0231
42 |

43 | About Selenium 44 |
45 |

46 | The term "Selenium" refers to a variety of open-source browser automation projects. It offers bindings for all significant programming languages, including Python, which is our preferred language. The WebDriver protocol is used by the Selenium API to control web browsers including Chrome, Firefox, and Safari. Selenium has the ability to control both locally installed and remotely accessible browser instances. 47 | 48 | The ability to handle websites naturally, like any browser will, is the main argument in favour of Selenium. It offers a wide variety of ways to interact with websites. This really excels when used with Single-Page Application sites that use a lot of JavaScript. If you used the conventional HTTP client and HTML parser to scrape such a site, you would mostly have a lot of JavaScript files but not a lot of data to scrape. 49 | 50 |

51 | Reference 52 |
53 |

54 | Web scraping E-commerce sites to compare prices with Python — Part 1 55 | -------------------------------------------------------------------------------- /beautiful-soup/High Five/readme.md: -------------------------------------------------------------------------------- 1 |

2 | Web Scraping with Beautiful Soup [High Five] 3 |
4 |

5 | 6 | Web scraping is the process of automatically extracting information from a website using a software program. It involves making HTTP requests to a website's server, downloading the HTML of the web page, and then parsing that HTML to extract the data you're interested in. The data can then be stored in a file, a database or a spreadsheet for further analysis and use.Web scraping can be used for a wide variety of purposes, such as data mining, data analysis, price comparison, sentiment analysis, and more. The web scraping process can be done manually or using web scraping tools and libraries such as Beautiful Soup, Scrapy, Selenium, and many more.It is important to note that web scraping can be subject to legal restrictions and terms of use of the websites. 7 | 8 | 9 | Beautiful Soup is a package provided by Python with the purpose of parsing XML and HTML files. Beautiful Soup is commonly used to perform web scraping since most websites and web pages used HTML. It provides simple methods and Pythonic idioms for navigating, searching, and modifying the parse tree, and it sits on top of popular Python parsers like lxml and html5lib, allowing users to try out different parsing strategies or trade speed for flexibility. 10 | 11 | In this project, we are required to perform web scraping using Beautiful Soup for any website which have any relation with Malaysia. Since we were briefed to specifically choose the website that related to Malaysia, we had chosen to use [this](https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/top-guides-choosing-the-best-course/best-courses-study-in-malaysia/top-10-degree-courses-in-malaysia-with-highest-starting-salaries/choosing-a-degree-malaysia-top-most-popular-courses/top-ten-best-degree-diploma-foundtion-pre-university-courses-to-study-in-malaysia-after-spm-uec-olevels-stpm-alevels-cpu-sam-ausmat-mufy/) website to execute the web scraping. 12 | 13 | The output of this web scraping will be a list of dictionaries, where each dictionary represents one item from the first ordered list (ol) element on the web page. The dictionaries will contain the following key-value pairs: 14 | 15 | 'Category': title 16 | 'Course Name': the text of the list item 17 | 'Link': the value of the 'href' attribute of the first element found within the list item. 18 | 19 | "The 'Category' field represents the general field of study, while the 'Course Name' is a subcategory within that field, and the 'Link' directs the user to a webpage that provides more detailed information about the course." 20 | 21 |

22 | 23 | ![image](https://user-images.githubusercontent.com/99240177/214037671-3a6d291e-358c-4f66-b2bc-204d65dae20e.png) 24 | 25 | Group members: 26 |

27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 |
NameMatric Number
AHMAD MUHAIMIN BIN AHMAD HAMBALIA20EC0006
NAYLI NABIHAH BINTI JASNIA20EC0105
SAKINAH AL’IZZAH BINTI MOHD ASRIA20EC0142
LEE JIA XIANA20EC0200
50 | -------------------------------------------------------------------------------- /scrapy/BigMac/IphoneProduct.csv: -------------------------------------------------------------------------------- 1 | Product Name,Price,Discount,Link 2 | iPhone XS,RM 999,RM 800,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-xs 3 | iPhone XR,"RM 1,029",RM 770,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-xr 4 | iPhone 11 Pro,"RM 1,699",RM 500,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-11-pro 5 | iPhone 12,"RM 1,899","RM 1,300",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-12 6 | iPhone 11 Pro Max,"RM 1,899","RM 1,500",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-11-pro-max 7 | Fast Charging Set,RM 49,RM 16,https://shop.compasia.com/collections/apple/collections/apple/products/30w-fast-charging-set 8 | iPhone 12 Pro,"RM 2,449","RM 1,150",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-12-pro 9 | iPhone 12 Pro Max,"RM 2,849","RM 1,650",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-12-pro-max 10 | iPhone 13,"RM 2,749","RM 1,150",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-13 11 | iPhone 12 Mini,"RM 1,749","RM 1,150",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-12-mini 12 | iPhone 13 Pro,"RM 3,449","RM 1,450",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-13-pro 13 | iPhone 11 128GB Fair,RM 999,RM 720,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-11-128gb-fair 14 | iPhone XS,"RM 1,029",RM 800,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-xs 15 | iPhone XR,"RM 1,699",RM 770,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-xr 16 | iPhone 11 Pro,"RM 1,899",RM 500,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-11-pro 17 | iPhone 12,"RM 1,199","RM 1,300",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-12 18 | iPhone XS Max,"RM 1,899",RM 900,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-xs-max 19 | iPhone 11 Pro Max,RM 929,"RM 1,500",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-11-pro-max 20 | iPhone X,"RM 2,449",RM 570,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-x 21 | iPhone 12 Pro,"RM 2,849","RM 1,150",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-12-pro 22 | iPhone 12 Pro Max,RM 799,"RM 1,650",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-12-pro-max 23 | iPhone 8 Plus,"RM 2,749",RM 500,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-8-plus 24 | iPhone 13,"RM 1,749","RM 1,150",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-13 25 | iPhone 12 Mini,RM 929,"RM 1,150",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-12-mini 26 | iPhone SE (2020),"RM 3,449",RM 670,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-se-2020 27 | iPhone 11 128GB Excellent,"RM 3,949",RM 550,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-11-128gb-excellent 28 | iPhone 13 Pro,"RM 3,399","RM 1,450",https://shop.compasia.com/collections/apple/collections/apple/products/iphone-13-pro 29 | iPhone 11 128GB Fair,RM 989,RM 720,https://shop.compasia.com/collections/apple/collections/apple/products/iphone-11-128gb-fair 30 | -------------------------------------------------------------------------------- /lxml/QUAD/job_search.csv: -------------------------------------------------------------------------------- 1 | Job Title,Company Name,Job Location,Salary,Benefit 2 | Senior Software Engineer / Software Engineer (.Net),Ideagen Plc.,Subang Jaya,Not Specified,13 Month Salary 3 | Software Developer ( Java ),Wiseview Information Technology,Kuala Lumpur,"MYR 6K - 8,400 monthly",Nothing 4 | Data Analyst,Zempot Malaysia Sdn. Bhd.,Johor Bahru,Not Specified,Training Provided 5 | Software Engineer (Java),Ideagen Plc.,Subang Jaya,Not Specified,13 Month Salary 6 | Internship for Information Technology (IT) Students,Infineon Technologies (Malaysia) Sdn Bhd,Melaka,Not Specified,Nothing 7 | Full Stack Developer,SKIN LOVERS SDN. BHD.,Kuala Lumpur,"MYR 7K - 14,000 monthly",Project Performance Bonus + Support Incentive 8 | SOFTWARE ENGINEER,Company Confidential,Kota Kinabalu,Not Specified,5-Day work week / Office Hour 9 | Team Lead,MARVELCONNECT TECHNOLOGY LIMITED,Kuala Lumpur,"MYR 6K - 10,000 monthly","5 working days (Monday to Friday), EPF, SOCSO, EIS, Company Trip" 10 | Development Team Lead (.Net),Ideagen Plc.,Subang Jaya,Not Specified,13 Month Salary 11 | Software Developer,Tunku Abdul Rahman University of Management and Technology (TAR UMT),Kuala Lumpur,Not Specified,Nothing 12 | "Executive, Security System Support",Cenviro Sdn Bhd,Negeri Sembilan,"MYR 3.5K - 4,300 monthly",Nothing 13 | Software Developer,hiputec web,Seremban,"MYR 5K - 8,000 monthly",Nothing 14 | Software Engineer,Leading Innovative Technologies and Systems,Shah Alam/Subang,Not Specified,Yearly bonuses & salary increment. 15 | Senior Software Engineer - C++ (CPP),Confidential,Kuala Lumpur,"MYR 8K - 15,000 monthly",Career Growth & Development Opportunities 16 | IT Specialist,"Bodhi Meditation, Malaysia",Klang/Port Klang,Not Specified,Nothing 17 | Software Engineer (Senior & Junior),Juris Technologies Sdn Bhd,Kuala Lumpur,Not Specified,Fun-filled work environment with great learning opportunities. 18 | Programmer,myNEWS HOLDINGS BERHAD,Selangor - Others,"MYR 3K - 6,000 monthly",Professional and Career Development 19 | Application Engineer (Software Developer),Steelcase Office Solutions (M) Sdn Bhd,Kuala Lumpur,Not Specified,Nothing 20 | Technology Intern - Innovation & Cloud Development Center - MY (KL / Penang),Deloitte Consulting SEA,Kuala Lumpur,Not Specified,Nothing 21 | System Analyst,Ann Joo Steel Berhad,Perai,"MYR 6K - 8,500 monthly",Nothing 22 | R&D Software Engineer (Backend Developer),Keysight Technologies Malaysia Sdn. Bhd.,Penang,Not Specified,Nothing 23 | Full-Stack Developer,Sellup Private Limited,Johor,"MYR 4.5K - 6,300 monthly",Remote working 24 | Solution Engineer (with experience),Exclusive Networks Malaysia Sdn Bhd,Petaling Jaya,"MYR 6.5K - 8,000 monthly",Opportunities for on-going technical certifications. 25 | AS400 Developer,Accenture TCM,Kuala Lumpur,Not Specified,Nothing 26 | Senior Software Developer,ABX Express (M) Sdn Bhd,Selangor,Not Specified,Stable and growing EXPRESS company 27 | Operation Maintenance Engineer (LINUX) - Fresh Grad are welcome to apply!,SNSOFT SDN. BHD.,Kuala Lumpur,"MYR 4K - 6,000 monthly",Outstanding career growth & development opportunities 28 | Game Operators / Game Master,Zempot Malaysia Sdn. Bhd.,Johor - Others,"MYR 3K - 3,500 monthly",Training Provided 29 | Software Engineer (Singapore),WPG Holdings,Singapore,"SGD 2K - 3,000 monthly",Fast Growing Company 30 | Senior Information System Engineer,Sanmina-SCI Systems (Malaysia) Sdn. Bhd. (Technoplex Penang),Bayan Lepas,"MYR 4.5K - 7,000 monthly",Inclusive workplace 31 | React Native Developer,SANS PAPER,Kuala Lumpur,"MYR 5K - 7,000 monthly","Flexible work, remote-only, WFH" 32 | -------------------------------------------------------------------------------- /beautiful-soup/QwQ/Malaysia_states.csv: -------------------------------------------------------------------------------- 1 | Flag,Emblem,State,Capital,Royal_capital,Population,Area_km2,Licence_plate,Area_code,Abbr,ISO,HDI,Region,Head_of_state,Head_of_government 2 | //upload.wikimedia.org/wikipedia/commons/thumb/5/5a/Flag_of_Johor.svg/50px-Flag_of_Johor.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/6/60/Coat_of_arms_of_Johor.svg/50px-Coat_of_arms_of_Johor.svg.png,Johor,Johor Bahru,Muar,"3,794,000","19,166",J,"07, 06 (Muar & Tangkak)",JHR,MY-01,0.825,Peninsular Malaysia,Sultan,Menteri Besar 3 | //upload.wikimedia.org/wikipedia/commons/thumb/c/cc/Flag_of_Kedah.svg/50px-Flag_of_Kedah.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/1/11/Coat_of_arms_of_Kedah.svg/50px-Coat_of_arms_of_Kedah.svg.png,Kedah,Alor Setar,Anak Bukit,"2,194,100","9,492",K,04,KDH,MY-02,0.808,Peninsular Malaysia,Sultan,Menteri Besar 4 | //upload.wikimedia.org/wikipedia/commons/thumb/6/61/Flag_of_Kelantan.svg/50px-Flag_of_Kelantan.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Coat_of_arms_of_Kelantan.svg/50px-Coat_of_arms_of_Kelantan.svg.png,Kelantan,Kota Bharu,Kubang Kerian,"1,928,800","15,040",D,09,KTN,MY-03,0.779,Peninsular Malaysia,Sultan,Menteri Besar 5 | //upload.wikimedia.org/wikipedia/commons/thumb/0/09/Flag_of_Malacca.svg/50px-Flag_of_Malacca.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/8/86/Coat_of_arms_of_Malacca_New.svg/50px-Coat_of_arms_of_Malacca_New.svg.png,Malacca,Malacca City,—,"937,500","1,712",M,06,MLK,MY-04,0.835,Peninsular Malaysia,Yang di-Pertua Negeri (Governor),Chief Minister 6 | //upload.wikimedia.org/wikipedia/commons/thumb/d/db/Flag_of_Negeri_Sembilan.svg/50px-Flag_of_Negeri_Sembilan.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/3/34/Coat_of_arms_of_Negeri_Sembilan.svg/50px-Coat_of_arms_of_Negeri_Sembilan.svg.png,Negeri Sembilan,Seremban,Seri Menanti,"1,129,100","6,658",N,06,NSN,MY-05,0.829,Peninsular Malaysia,Yang di-Pertuan Besar(Grand Ruler),Menteri Besar 7 | //upload.wikimedia.org/wikipedia/commons/thumb/a/aa/Flag_of_Pahang.svg/50px-Flag_of_Pahang.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/0/0f/Coat_of_arms_of_Pahang_%28Sultan%29.svg/50px-Coat_of_arms_of_Pahang_%28Sultan%29.svg.png,Pahang,Kuantan,Pekan,"1,684,600","35,965",C,"09, 03 (Genting Highlands), 05 (Cameron)",PHG,MY-06,0.804,Peninsular Malaysia,Sultan,Menteri Besar 8 | //upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Flag_of_Penang_%28Malaysia%29.svg/50px-Flag_of_Penang_%28Malaysia%29.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/8/8b/Coat_of_arms_of_Penang.svg/50px-Coat_of_arms_of_Penang.svg.png,Penang,George Town,—,"1,774,400","1,049",P,04,PNG,MY-07,0.845,Peninsular Malaysia,Yang di-Pertua Negeri (Governor),Chief Minister 9 | //upload.wikimedia.org/wikipedia/commons/thumb/8/87/Flag_of_Perak.svg/50px-Flag_of_Perak.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/a/ab/Coat_of_arms_of_Perak.svg/50px-Coat_of_arms_of_Perak.svg.png,Perak,Ipoh,Kuala Kangsar,"2,508,900","21,146",A,05,PRK,MY-08,0.816,Peninsular Malaysia,Sultan,Menteri Besar 10 | //upload.wikimedia.org/wikipedia/commons/thumb/a/aa/Flag_of_Perlis.svg/50px-Flag_of_Perlis.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/3/3d/Coat_of_arms_of_Perlis.svg/50px-Coat_of_arms_of_Perlis.svg.png,Perlis,Kangar,Arau,"255,400",819,R,04,PLS,MY-09,0.805,Peninsular Malaysia,Raja,Menteri Besar 11 | //upload.wikimedia.org/wikipedia/commons/thumb/b/b5/Flag_of_Sabah.svg/50px-Flag_of_Sabah.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/6/68/Coat_of_arms_of_Sabah.svg/50px-Coat_of_arms_of_Sabah.svg.png,Sabah,Kota Kinabalu,—,"3,833,000","73,621",S,087–089,SBH,MY-12,0.710,East Malaysia,Yang di-Pertua Negeri (Governor),Chief Minister 12 | //upload.wikimedia.org/wikipedia/commons/thumb/7/7e/Flag_of_Sarawak.svg/50px-Flag_of_Sarawak.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/7/79/Coat_of_arms_of_Sarawak.svg/50px-Coat_of_arms_of_Sarawak.svg.png,Sarawak,Kuching,—,"2,822,200","124,450",Q,081–086,SWK,MY-13,0.745,East Malaysia,Yang di-Pertua Negeri (Governor),Premier 13 | //upload.wikimedia.org/wikipedia/commons/thumb/0/0c/Flag_of_Selangor.svg/50px-Flag_of_Selangor.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/9/90/Coat_of_arms_of_Selangor.svg/50px-Coat_of_arms_of_Selangor.svg.png,Selangor,Shah Alam,Klang,"6,555,400","7,951",B,03,SGR,MY-10,0.863,Peninsular Malaysia,Sultan,Menteri Besar 14 | //upload.wikimedia.org/wikipedia/commons/thumb/6/6b/Flag_of_Terengganu.svg/50px-Flag_of_Terengganu.svg.png,//upload.wikimedia.org/wikipedia/commons/thumb/7/72/Coat_of_arms_of_Terengganu.svg/50px-Coat_of_arms_of_Terengganu.svg.png,Terengganu,Kuala Terengganu,Kuala Terengganu,"1,275,100","12,958",T,09,TRG,MY-11,0.800,Peninsular Malaysia,Sultan,Menteri Besar 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
Stars Badge 2 | Forks Badge 3 | Pull Requests Badge 4 | Issues Badge 5 | GitHub contributors 6 | ![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fgithub.com%2Fdrshahizan%2Fpython-web&labelColor=%23d9e3f0&countColor=%23697689&style=flat) 7 | 8 | Don't forget to hit the :star: if you like this repo. 9 | 10 | # About Us 11 | The information on this Github is part of the materials for the subject High Performance Data Processing (SECP3133). This folder contains general big data information as well as big data case studies using Malaysian datasets. This case study was created by a [Bachelor of Computer Science (Data Engineering)](https://comp.utm.my/bachelor-of-computer-science-data-engineering/), Universiti Teknologi Malaysia student. 12 | 13 |

14 | 15 |

16 | 17 | # 📚 Course: High Performance Data Processing 18 | - [Python for beginners](https://github.com/drshahizan/python-tutorial) 19 | - [Web scraping and Python web framework](https://github.com/drshahizan/python-web) 20 | - [Exploratory data analysis](https://github.com/drshahizan/Python_EDA) 21 | - [Big data processing](https://github.com/drshahizan/Python-big-data) 22 | 23 | ### Contents: 24 | - [About Us](#about-us) 25 | - [📚 Course: High Performance Data Processing](#-course-high-performance-data-processing) 26 | - [Contents:](#contents) 27 | - [Web Scraping](#web-scraping) 28 | - [Tutorial](#tutorial) 29 | - [Selenium](#selenium) 30 | - [Beautiful Soup](#beautiful-soup) 31 | - [Scrapy](#scrapy) 32 | - [Requests](#requests) 33 | - [Lxml](#lxml) 34 | - [🌟 Case Study: Web Scraping](#-case-study-web-scraping) 35 | - [Contribution 🛠️](#contribution-️) 36 | 37 |

38 | 39 |

40 | 41 | # Web Scraping 42 | - [Grouping: Web Scraping](https://docs.google.com/spreadsheets/d/1fSaFVWygEo6oZvQjXUaEYAWEDmNj7An0qg3ouKmz8a4/edit#gid=1313952741) 43 | - [Web Scraping with EDA and Visualization Using Python](https://medium.com/analytics-vidhya/web-scraping-with-eda-and-visualization-using-python-a496ddf5f98e) 44 | - [Python Web Scraping - Quick Guide](https://www.tutorialspoint.com/python_web_scraping/python_web_scraping_quick_guide.htm) 45 | - [Web Scraping of E-Commerce Website Using BeautifulSoup in Python](https://www.dosm.gov.my/v1/uploads/files/4_Portal%20Content/2_%20Statistics/MyStats/2022/Slaid/Sesi%201a_Web%20Scraping%20of%20E-Commerce%20Website%20Using%20BeautifulSoup%20in%20Python.pdf) 46 | - [Github: malaysia-election-data-scraping](https://github.com/Thibico/malaysia-election-data-scraping) 47 | - [Applications and a Guide to Web Scraping](https://www.fstep.org.my/wp-content/uploads/2020/12/FSTEP-The-application-of-Webscraping.pdf) 48 | - [Github:malaysiastockbiz_scraper]( 49 | https://github.com/atlas-github/malaysiastockbiz_scraper/blob/master/malaysiastock_biz_scraper.ipynb) 50 | - [An automated web scraping tool for Malaysia tourism](http://eprints.utar.edu.my/3493/1/CS-2019-1505499.pdf) 51 | - [7 Python Libraries For Web Scraping To Master Data Extraction](https://www.projectpro.io/article/python-libraries-for-web-scraping/625) 52 | - [Web Scraping Freelancers in Malaysia](https://www.truelancer.com/web-scraping-freelancers-in-malaysia) 53 | - [Web Scraping, Regular Expressions, and Data Visualization: Doing it all in Python](https://towardsdatascience.com/web-scraping-regular-expressions-and-data-visualization-doing-it-all-in-python-37a1aade7924) 54 | - [5 Web Scraping Projects with Python](https://amankharwal.medium.com/5-web-scraping-projects-with-python-4bcc25ff039) 55 | 56 | ## Tutorial 57 | - [Video: Web Scraping untuk laman web berkaitan Bursa Malaysia](https://youtu.be/bmi_-oj2S9c) 58 | - [Video: Web Scraping (The Star Malaysia News)](https://youtu.be/6tZAmu17M6g) 59 | - [Video: Scraping YellowPages.my | Name, Website, Email, Phone | Yellow Pages Malaysia](https://youtu.be/fO9701IUSrY) 60 | - [Video: Tutorial Web Scraper - Dengan Chrome Web Scraper](https://youtu.be/FcrscxUeEj0) 61 | - [Video: Cara Scraping Banyak Produk Tokopedia Dengan Tools Gratisan Chrome](https://youtu.be/zGOcGU1BSsc) 62 | - [Python Web Scraping Tutorial: Step-By-Step](https://oxylabs.io/blog/python-web-scraping) 63 | - [Web Scraping Python Tutorial – How to Scrape Data From A Website](https://www.freecodecamp.org/news/web-scraping-python-tutorial-how-to-scrape-data-from-a-website/) 64 |

65 | 66 |

67 | 68 | ## Selenium 69 | - [Web scraping E-commerce sites to compare prices with Python — Part 1](https://medium.com/@zfwong.wilson/web-scraping-e-commerce-sites-to-compare-prices-with-python-part-1-360509ee5c62) 70 | - [Web scraping E-commerce sites to compare prices with Python — Part 2](https://medium.com/@zfwong.wilson/web-scraping-e-commerce-sites-to-compare-prices-with-python-part-2-367140620cb6) 71 | - [Web Scraping Election Results of PRU-15 (GE-15) using Python](https://medium.com/@elvinado/web-scraping-election-results-of-pru-15-ge-15-using-python-e9310129bf9e) 72 | 73 | ## Beautiful Soup 74 | - [Beautiful Soup: Build a Web Scraper With Python](https://realpython.com/beautiful-soup-web-scraper-python/) 75 | - [Python Web Scraping Tutorial](https://www.freecodecamp.org/news/web-scraping-python-tutorial-how-to-scrape-data-from-a-website/) 76 | 77 | ## Scrapy 78 | - [Scrapy.org](https://scrapy.org/) 79 | - [Github: Scrapy](https://github.com/scrapy/scrapy) 80 | - [Python Scrapy Tutorial for Beginners](https://360digitmg.com/blog/python-scrapy-tutorial-for-beginners) 81 | - [Spider Web Crawler: Mudah.my](https://gist.github.com/amirulasyraf88/55b5938dd220c5d883e7) 82 | - [Scrapy: Malaysia House Property](https://github.com/neurotichl/Scrapy-Crawl-IProperty) 83 | 84 | ## Requests 85 | - [Requests - Web Scraping using Requests](https://www.tutorialspoint.com/requests/requests_web_scraping_using_requests.htm) 86 | - [Web Scraping in Python with Beautiful Soup and Requests](https://www.kirenz.com/post/2022-05-02-web-scraping-in-python-with-beautiful-soup-requests-and-pandas/) 87 | 88 | ## Lxml 89 | - [Web Scraping using lxml and Python 2018: Extracting data from Steam](https://youtu.be/5N066ISH8og) 90 | 91 | ## 🌟 Case Study: Web Scraping 92 | 93 | | Team | Library | Website | GitHub | 94 | | ----- | ----- | ------ | ------ | 95 | | Group 10 | Beautiful soup | [StudyMalaysia.com](https://www.studymalaysia.com) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/beautiful-soup/Group%2010) | 96 | | High Five | Beautiful soup | [EduSpiral Consultant Services](https://eduspiral.com/) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/beautiful-soup/High%20Five) | 97 | | QwQ | Beautiful soup | [States and federal territories of Malaysia](https://en.wikipedia.org/wiki/States_and_federal_territories_of_Malaysia) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/beautiful-soup/QwQ) | 98 | | SDS | Scrapy | [Book Depository](https://www.bookdepository.com/search?searchTerm=big+data&search=Find+book) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/scrapy/SDS) | 99 | | BigMac | Scrapy | [CompAsia.com](https://shop.compasia.com/collections/apple) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/scrapy/BigMac) | 100 | |SIX | Scrapy | [bukukita.com](https://www.bukukita.com/) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/scrapy/BigMac) | 101 | | AdMiPeQa | Selenium | [Lazada](https://www.lazada.com.my) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/selenium/AdMiPeQa) | 102 | | SamVerse | Selenium | [Malaysia General Election (GE-15)](https://www.bharian.com.my/berita/nasional/2022/11/1028360/keputusan-rasmi-pru15) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/selenium/SamVerse) | 103 | | Group 9 | Selenium | [Lazada](https://www.lazada.com.my) [Shopee](https://www.shopee.com.my) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/selenium/group9) | 104 | | No Name | Requests | [Puma: sneakers](https://my.puma.com/my/en/women/shoes/sneakers) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/requests) | 105 | | Quad | Lxml | [Jobstreet.com](https://www.jobstreet.com.my) | [![Open in GitHub](https://img.shields.io/static/v1?label=&message=Open%20in%20GitHub&labelColor=grey&color=blue&logo=github)](https://github.com/drshahizan/python-web/tree/main/lxml/QUAD) | 106 | 107 | ## Contribution 🛠️ 108 | Please create an [Issue](https://github.com/drshahizan/python-web/issues) for any improvements, suggestions or errors in the content. 109 | 110 | You can also contact me using [Linkedin](https://www.linkedin.com/in/drshahizan/) for any other queries or feedback. 111 | 112 | [![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fgithub.com%2Fdrshahizan&labelColor=%23697689&countColor=%23555555&style=plastic)](https://visitorbadge.io/status?path=https%3A%2F%2Fgithub.com%2Fdrshahizan) 113 | ![](https://hit.yhype.me/github/profile?user_id=81284918) 114 | -------------------------------------------------------------------------------- /selenium/SamVerse/PRU15_Results.csv: -------------------------------------------------------------------------------- 1 | pnum,parliment,mp,party,votes,coalition 2 | P1,PADANG BESAR, RUSHDAN RUSMI ,PN-PAS,24267,PN 3 | P2,KANGAR, ZAKRI HASSAN ,PN-BERSATU,24562,PN 4 | P3,ARAU, SHAHIDAN KASSIM ,PN- BERSATU,31458,PN 5 | P4,LANGKAWI, SUHAIMI ABDULLAH ,PN-BERSATU,25463,PN 6 | P5,JERLUN, ABD GHANI AHMAD ,PN-PAS,31685,PN 7 | P6,KUBANG PASU, DATUK KU ABD RAHMAN KU ISMAIL ,PN-BERSATU,47584,PN 8 | P7,PADANG TERAP, NURUL AMIN HAMID ,PN-PAS,28217,PN 9 | P8,POKOK SENA, DATUK AHMAD SAAD @ YAHAYA ,PN-PAS,52275,PN 10 | P9,ALOR SETAR, AFNAN HAMIMI TAIB AZAMUDDEN ,PN-PAS,37486,PN 11 | P10,KUALA KEDAH, AHMAD FAKHRUDDIN FAKHRURAZI ,PN-PAS,56298,PN 12 | P11,PENDANG, AWANG HASHIM ,PN-PAS,49008,PN 13 | P12,JERAI, SABRI AZIT ,PN-PAS,49461,PN 14 | P13,SIK, AHMAD TARMIZI SULAIMAN ,PN-PAS,34606,PN 15 | P14,MERBOK, MOHD NAZRI ABU HASSAN ,PN-BERSATU,52573,PN 16 | P15,SUNGAI PETANI, MOHAMMED TAUFIQ JOHARI ,PH-PKR,50580,PH 17 | P16,BALING, HASSAN SAAD ,PN-PAS,64493,PN 18 | 1.,"DATUK AZMAN NASRUDIN (PN-BERSATU) 51,637 MENANG",None,None,None,OTHER 19 | 2.,"ROSLAN HASHIM (PN-BERSATU) 34,469 MENANG",None,None,None,OTHER 20 | 2.,"DATUK MUMTAZ MD NAWI (PAS) 65,426 MENANG",None,None,None,OTHER 21 | 4.,"DATUK AHMAD MARZUK SHAARY (PAS) 53,933 MENANG",None,None,None,OTHER 22 | 1.,"DATUK SERI TAKIYUDDIN HASSAN (PAS) 41,869 MENANG",None,None,None,OTHER 23 | 2.,"AHMAD FADHLI SHAARY (PAS) 44,444 MENANG",None,None,None,OTHER 24 | 2.,"SITI ZAILAH MOHD YUSOFF (PN-PAS) 37,759 MENANG",None,None,None,OTHER 25 | 4.,"DATUK SERI TUAN IBRAHIM TUAN MAN (PAS) 55,654 MENANG",None,None,None,OTHER 26 | 1.,"MOHD SYAHIR CHE SULAIMAN (PN-PAS) 57,130 MENANG",None,None,None,OTHER 27 | 3.,"KHLIR MOHD NOR (PN-BERSATU) 40,542 MENANG",None,None,None,OTHER 28 | 5.,"IKMAL HISHAM ABDUL AZIZ (PN-BERSATU) 54,266 MENANG",None,None,None,OTHER 29 | 4.,"DATUK DR NIK MUHAMMAD ZAWAWI SALLEH (PAS)52,937 MENANG",None,None,None,OTHER 30 | 3.,"WAN AHMAD FAYHSAL WAN AHMAD KAMAL (PN-BERSATU) 35,603 MENANG",None,None,None,OTHER 31 | 2.,"ZAHARI KECIK (PN-BERSATU) 27,072 MENANG",None,None,None,OTHER 32 | 1.," ABDUL LATIFF ABDUL RAHMAN (PN-PAS) 42,740 MENANG",None,None,None,OTHER 33 | 2.,"DATUK MOHD AZIZI ABU NAIM (PAS-Bersatu) 21,826 MENANG",None,None,None,OTHER 34 | 1.," CHE MOHAMAD ZULKEFLY JUSOH (PN-PAS) 49,569 MENANG",None,None,None,OTHER 35 | 2.,"SHAHARIZUKIRNAIN ABD KADIR (PN-PAS) 50,768 MENANG",None,None,None,OTHER 36 | 1.,"DR ALIAS RAZAK (PN-PAS) 56,697 MENANG",None,None,None,OTHER 37 | 4.,"AHMAD AMZAD HASHIM (PN-PAS) 63,016 MENANG",None,None,None,OTHER 38 | 1.,"TAN SRI ABDUL HADI AWANG (PAS) 73,115 MENANG",None,None,None,OTHER 39 | 3.,"ROSOL WAHID (PN-BERSATU) 42,910 MENANG",None,None,None,OTHER 40 | 5.,"WAN HASAN MOHD RAMLI (PN-PAS) 59,720 MENANG",None,None,None,OTHER 41 | 2.,"CHE ALIAS HAMID (PAS) 65,714 MENANG",None,None,None,OTHER 42 | 2.,"DR SITI MASTURA MOHAMAD (PN-PAS): 28,604 MENANG",None,None,None,OTHER 43 | 5.,"WAN SAIFUL WAN JAN (PN-BERSATU): 31,116 MENANG",None,None,None,OTHER 44 | 1.," LIM GUAN ENG (PH-DAP): 55,797 MENANG",None,None,None,OTHER 45 | 2.,"MUHAMMAD FAWWAZ MD JAN (PN-PAS): 37,638 MENANG",None,None,None,OTHER 46 | 1.," SIM CHEE KEONG (PH-DAP): 71,722 MENANG",None,None,None,OTHER 47 | 2.,"CHOW KON YEOW (PH-DAP) 50,744 MENANG",None,None,None,OTHER 48 | 1.,"FADHLINA SIDEK (PH-PKR) 42,188 MENANG",None,None,None,OTHER 49 | 1.,"SYERLEENA ABDUL RASHID (PH-DAP): 49,353 MENANG",None,None,None,OTHER 50 | 1.,"LIM HUI YING (PH-DAP)31,968 MENANG",None,None,None,OTHER 51 | 1.," SANISVARA NETHAJI RAYER RAJAJI (PH-DAP): 50,369 MENANG",None,None,None,OTHER 52 | 2.,"RAMKARPAL SINGH (PH-DAP)........71,204 MENANG",None,None,None,OTHER 53 | 5.,"SIM TZE TZIN (PH-PKR): 55,209 MENANG",None,None,None,OTHER 54 | 2.,"MUHAMAD BAKHTIAR WAN CHIK (PH-PKR): 24,564 MENANG",None,None,None,OTHER 55 | 2.,"FATHUL HUZIR AYOB (PN-BERSATU) 15,105 MENANG",None,None,None,OTHER 56 | 1.,"SHAMSUL ANUAR NASARAH (BN-UMNO).12,588 MENANG",None,None,None,OTHER 57 | 3.,"DATUK SERI HAMZAH ZAINUDIN (PN-BERSATU) 28,350 MENANG",None,None,None,OTHER 58 | 2.,"MOHD MISBAHUL MUNIR MASDUKI (PN-PAS) 23,223 MENANG",None,None,None,OTHER 59 | 1.,"IDRIS AHMAD (PN-PAS) 33,753 MENANG",None,None,None,OTHER 60 | 1.,"SYED ABU HUSSIN HAFIZ SYED ABDUL FASAL (PN-BERSATU) 32,625 MENANG",None,None,None,OTHER 61 | 3.,"WONG KAH WOH (PH-DAP)47,098 MENANG",None,None,None,OTHER 62 | 1.,"KAPTEN AZAHARI HASAN (PN-Bersatu)12,931 MENANG",None,None,None,OTHER 63 | 4.,"S. KESAVAN (PH-PKR) 21,637 MENANG",None,None,None,OTHER 64 | 1.,"DATUK SERI ANWAR IBRAHIM (PH-PKR) 49,625 menang",None,None,None,OTHER 65 | 3.,"HOWARD LEE CHUAN HOW (PH-DAP) 57,549 menang",None,None,None,OTHER 66 | 2.,"M. KULASEGARAN (PH-DAP) 63,915 MENANG",None,None,None,OTHER 67 | 2.,"V. SIVAKUMAR (PH-DAP) 60,999 MENANG",None,None,None,OTHER 68 | 1.,"DATUK ISKANDAR DZULKARNAIN ABDUL KHALID (PN-BERSATU) 14,380 MENANG",None,None,None,OTHER 69 | 1.,"DATUK NGEH KOO HAM (PH-DAP) 46,710 MENANG",None,None,None,OTHER 70 | 1.,"MUHAMMAD ISMI MAT TAIB (PN-PAS)17,181 MENANG",None,None,None,OTHER 71 | 3.,"CHONG ZHEMIN (PH-DAP)30,467 MENANG",None,None,None,OTHER 72 | 3.,"TAN KAR HING (PH-DAP)55,880 MENANG",None,None,None,OTHER 73 | 4.,"DATUK SERI M SARAVANAN (BN-MIC)18,398 MENANG",None,None,None,OTHER 74 | 3.,"JAMALUDDIN YAHYA (PN-PAS) 24,897 MENANG",None,None,None,OTHER 75 | 3.,"KOMANDER (B) NORDIN AHMAD ISMAIL (PN-BERSATU) 25,212 MENANG",None,None,None,OTHER 76 | 4.,"DATUK SERI DR AHMAD ZAHID HAMIDI (BN-UMNO) 16,578 MENANG",None,None,None,OTHER 77 | 1.,"NGA KOR MING (PH-DAP) 33,133 MENANG",None,None,None,OTHER 78 | 3.,"CHANG LIH KANG (PH-PKR) 25,140 MENANG",None,None,None,OTHER 79 | 1.,"RAMLI MOHD NOR (BN-UMNO)...16,120 MENANG",None,None,None,OTHER 80 | 4.,"DATUK SERI ABDUL RAHMAN MOHAMAD (BN-UMNO) 17,672 MENANG",None,None,None,OTHER 81 | 1.,"CHOW YU HUI (PH-DAP) 21,613 MENANG",None,None,None,OTHER 82 | 3.,KHAIRIL NIZAM KHIRUDIN (PN-PAS)...31.701 MENANG,None,None,None,OTHER 83 | 2.,"SAIFUDDIN ABDULLAH (PN-BERSATU)...41,692 MENANG",None,None,None,OTHER 84 | 3.,"WAN RAZALI WAN NOR (PN-PAS)...25,514 MENANG",None,None,None,OTHER 85 | 4.,"MOHD SHAHAR ABDULLAH (BN-UMNO)...26,899 MENANG",None,None,None,OTHER 86 | 1.,"SH MOHMED PUZI SH ALI (BN-UMNO)..47,418 MENANG",None,None,None,OTHER 87 | 3.,"ISMAIL ABD MUTTALIB (PN-PAS).......19,600 MENANG",None,None,None,OTHER 88 | 2.,"KAMAL ASHAARI (PN-PAS) 22,505 MENANG",None,None,None,OTHER 89 | 2.,"SALAMIAH MOHD NOR (PN-PAS)...30,929 MENANG",None,None,None,OTHER 90 | 2.,"YOUNG SYEFURA OTHMAN (PH-DAP)...25,075 MENANG",None,None,None,OTHER 91 | 2.,"ISMAIL SABRI YAAKOB (BN-UMNO)...31,762 MENANG",None,None,None,OTHER 92 | 3.,"ABDUL KHALIB ABDULLAH (PN-BERSATU)... 31,589 MENANG",None,None,None,OTHER 93 | 3.,"KALAM SALAN (PN-BERSATU) 17,973 MENANG",None,None,None,OTHER 94 | 3.,"MUSLIMIN YAHAYA (PN-BERSATU) 19,791 MENANG",None,None,None,OTHER 95 | 6.,"HASNIZAN HARUN (PN-BERSATU) 46,823 MENANG",None,None,None,OTHER 96 | 3.,"KAPTEN (B) DATUK DR ZULKAFPERI HANAPI (PN-Bersatu) 18,054 MENANG",None,None,None,OTHER 97 | 3.,"DZULKEFLY AHMAD (PH-AMANAH) 31,033 MENANG",None,None,None,OTHER 98 | 1.," ABDUL RASHID ASARI (PN-BERSATU) 49,154 MENANG", William Leong ,PH-PKR,72773,PH 99 | P98,GOMBAK, AMIRUDIN SHARI ,PH-PKR,72267,PH 100 | P99,AMPANG, RODZIAH ISMAIL ,PH-PKR,56754,PH 101 | P100,PANDAN, Rafizi Ramli ,PH-PKR,74002,PH 102 | P101,HULU LANGAT, MOHD SANY HAMZAN ,PH-AMANAH,58382,PH 103 | P102,BANGI, SYAHREDZAN JOHAN ,PH-DAP,141568,PH 104 | P103,PUCHONG, YEO BEE YIN ,PH-DAP,79425,PH 105 | P104,SUBANG, WONG CHEN ,PH-PKR,138259,PH 106 | P105,PETALING JAYA, LEE CHEAN CHUNG ,PH-PKR,83311,PH 107 | P106,DAMANSARA, GOBIND SINGH DEO ,PH-DAP,142875,PH 108 | P107,SUNGAI BULOH, DATUK R. RAMANAN ,PH-PKR,50943,PH 109 | P108,SHAH ALAM, AZLI YUSOF ,PH-AMANAH,61409,PH 110 | P109,KAPAR, HALIMAH ALI ,PN-PAS,65751,PN 111 | P110,KLANG, V GANABATIRAU ,PH-DAP,115539,PH 112 | P111,KOTA RAJA, MOHAMAD SABU ,PH-AMANAH,123306,PH 113 | P112,KUALA LANGAT, AHMAD YUNUS HAIRI ,PN-PAS,52867,PN 114 | P113,SEPANG, RAJ MUNNI @ AIMAN ATHIRAH ,PH-AMANAH,56264,PH 115 | P114,KEPONG, LIM LIP ENG ,PH-DAP,64308,PH 116 | P115,BATU, PRABAKARAN M PARAMESWARAN ,PH-PKR,45716,PH 117 | P116,WANGSA MAJU, ZAHIR HASSAN ,PH-PKR,46031,PH 118 | P117,SEGAMBUT, HANNAH YEOH TSEOW SUAN ,PH-DAP,68438,PH 119 | P118,SETIAWANGSA, NIK NAZMI NIK AHMAD ,PH-PKR,34434,PH 120 | P119,TITIWANGSA, JOHARI ABDUL GHANI ,BN-UMNO,25042,BN 121 | P120,BUKIT BINTANG, FONG KUI LUN ,PH-DAP,43827,PH 122 | P121,LEMBAH PANTAI, AHMAD FAHMI MOHAMED FADZIL ,PH-PKR,35359,PH 123 | P122,SEPUTEH, TERESA KOK SUH SIM ,PH-DAP,73234,PH 124 | P123,CHERAS, TAN KOK WAI ,PH-DAP,60294,PH 125 | P124,BANDAR TUN RAZAK,None,None,None,OTHER 126 | 5.,"RADZI MD JIDIN (PN-BERSATU) - 16,002 MENANG",None,None,None,OTHER 127 | 2.,"DATUK SERI JALALUDDIN ALIAS (BN-UMNO) 21,805 MENANG",None,None,None,OTHER 128 | 3.,"SHAMSHULKAHAR MOHD DELI (BN-UMNO).. 30,138 MENANG",None,None,None,OTHER 129 | 1.,"ANTHONY LOKE SIEW FOOK (PH-DAP)... 63,916 MENANG",None,None,None,OTHER 130 | 5.,"DATUK ADNAN ABU HASSAN (BN-UMNO) 21,423 MENANG",None,None,None,OTHER 131 | 3.,"CHA KEE CHIN (PH-DAP) .. 81,434 MENANG",None,None,None,OTHER 132 | 2.,"MOHAMAD HASAN (BN-UMNO).... 53,075 MENANG",None,None,None,OTHER 133 | 5.,"AMINUDDIN HARUN (PH-PKR)... 42,013 MENANG",None,None,None,OTHER 134 | 1.,"DATUK MOHD ISAM MOHD ISA (BN-UMNO) 23,283 MENANG",None,None,None,OTHER 135 | 2.,"DATUK MAS ERMIEYATI SAMSUDIN (PN-Bersatu) 25,604 MENANG",None,None,None,OTHER 136 | 2.,"ADLY ZAHARI (PH-AMANAH) 28,178 MENANG",None,None,None,OTHER 137 | 4.,"BAKRI JAMALUDDIN (PN-PAS) 37,406 MENANG",None,None,None,OTHER 138 | 1.,"ADAM ADLI ABD HALIM (PH-PKR) 39,418 MENANG",None,None,None,OTHER 139 | 1.,"KHOO POAY TIONG (PH-DAP) 73,995 MENANG",None,None,None,OTHER 140 | 4.,"ZULKIFLI ISMAIL (PN-PAS) 27,893 MENANG",None,None,None,OTHER 141 | 3.,"YUNESWARAN L RAMARAJ (PH-PKR).... 23,437 MENANG",None,None,None,OTHER 142 | 4.,"ZALIHA MUSTAPHA (PH-PKR).......18,941 MENANG",None,None,None,OTHER 143 | 2.,"PANG HOK LIONG (PH-DAP)16,133 MENANG",None,None,None,OTHER 144 | 2.,"TAN SRI MUHYIDDIN YASSIN (PN-Bersatu) 24,986 MENANG",None,None,None,OTHER 145 | 1.,"SYED IBRAHIM SYED NOH (PH-PKR) 33,650 MENANG",None,None,None,OTHER 146 | 3.,"TAN HONG PIN (PH-DAP) 36,636 MENANG",None,None,None,OTHER 147 | 3.,"SYED SADDIQ ABDUL RAHMAN (PH-MUDA) .....19,961 MENANG",None,None,None,OTHER 148 | 1.,"DATUK SERI DR NORAINI AHMAD (BN-UMNO) 25,740 MENANG",None,None,None,OTHER 149 | 1.,"DATUK SERI DR WEE KA SIONG (BN-MCA) 18,911 MENANG",None,None,None,OTHER 150 | 1.,"AMINOLHUDA HASSAN (PH-AMANAH) ......23,242 MENANG",None,None,None,OTHER 151 | 1.,"ONN ABU BAKAR (PH-PKR) 45,242 MENANG",None,None,None,OTHER 152 | 4.,"DATUK SERI HASNI MOHAMMAD (BN-UMNO): 18,312 MENANG",None,None,None,OTHER 153 | 2.,"WONG SHU QI (PH-DAP) 49,801 MENANG",None,None,None,OTHER 154 | 1.,"DATUK SERI HISHAMMUDDIN HUSSEIN (BN-UMNO) 22,572 MENANG",None,None,None,OTHER 155 | 3.,"MUHAMMAD ISLAHUDDIN ABAS (PN-BERSATU) ....21,066 MENANG",None,None,None,OTHER 156 | 1.,"MANNDZRI NASIB (BN-UMNO) 21,185 MENANG",None,None,None,OTHER 157 | 1.,"DATUK SERI MOHAMED KHALED NORDIN (BN-UMNO)25,410 MENANG",None,None,None,OTHER 158 | 2.,"DATUK SERI AZALINA OTHMAN SAID (BN-UMNO): 21,738",None,None,None,OTHER 159 | 1.,"JIMMY PUAH WEE TSE (PH-PKR) 83,959 MENANG",None,None,None,OTHER 160 | 2.,"HASSAN KARIM (PH-PKR)..71,233 MENANG",None,None,None,OTHER 161 | 4.,"AKMAL NASRULLAH MOHD NASIR (PH-PKR) 43,252 MENANG",None,None,None,OTHER 162 | 3.,"DATUK SERI SALAHUDDIN AYUB (PH-AMANAH) 64,900 MENANG",None,None,None,OTHER 163 | 3.,"LIEW CHIN TONG (PH-DAP)........................... 96,819 MENANG",None,None,None,OTHER 164 | 2.,"TEO NIE CHING (PH-DAP) ........................65,529 MENANG",None,None,None,OTHER 165 | 2.,"DATUK SERI AHMAD MASLAN (BN-UMNO) 23,201 MENANG",None,None,None,OTHER 166 | 2.,"DATUK SERI DR WEE JECK SENG (BN-MCA) 23,593 MENANG",None,None,None,OTHER 167 | 5.,"SUHAILI ABD RAHMAN (PN-BERSATU) - 8,124 MENANG",None,None,None,OTHER 168 | 4.,"VERDON BAHANDA (BEBAS)....16,323 MENANG",None,None,None,OTHER 169 | 6.,"WETROM BAHANDA (PKDM)............24,318 MENANG",None,None,None,OTHER 170 | 3.,"ISNARAISSAH MUNIRAH MAJILIS (Warisan)25,148 MENANG",None,None,None,OTHER 171 | 1.," MADIUS TANGAU (PH)....24,943 MENANG",None,None,None,OTHER 172 | 4.,"MUSTAPHA SAKMUD (PH-PKR)....27,022 MENANG",None,None,None,OTHER 173 | 4.,"CHAN FOONG HIN (PH-DAP)....31,359 MENANG",None,None,None,OTHER 174 | 2.,"SHAHELMEY YAHYA (BN-GRS)....16,234 MENANG",None,None,None,OTHER 175 | 1.,"DATUK EWON BENEDICK (PH-UPKO)29,066 MENANG",None,None,None,OTHER 176 | 6.,"ARMIZAN MOHD ALI (GRS-BERSATU)....22,620 MENANG",None,None,None,OTHER 177 | 1.,"DATUK MOHAMAD ALAMIN (BN-UMNO) 13,004 MENANG",None,None,None,OTHER 178 | 5.,"DATUK SITI AMINAH ACHING (BN-UMNO): 10,570 MENANG",None,None,None,OTHER 179 | 1.,"DATUK MATBALI MUSAH (GRS-Bersatu) 14,459 MENANG",None,None,None,OTHER 180 | 2.,"JONATHAN YASSIN (GRS-BERSATU)....22,606 MENANG",None,None,None,OTHER 181 | 3.,"JEFFREY KITINGAN (GRS-STAR)....23,155 MENANG",None,None,None,OTHER 182 | 3.,"RIDUAN RUBIN (BEBAS) 10,027 MENANG",None,None,None,OTHER 183 | 5.,"ARTHUR JOSEPH KURUP (BN)....19,623 MENANG",None,None,None,OTHER 184 | 4.,"DATUK SERI DR RONALD KIANDEE (PN-Bersatu) 11,303 MENANG",None,None,None,OTHER 185 | 6.,"SUHAIMI NASIR (BN-UMNO)....22,969 MENANG",None,None,None,OTHER 186 | 4.,"MOHD KHAIRUL FIRDAUS AKBARKHAN (GRS-BERSATU)....12,152 MENANG",None,None,None,OTHER 187 | 4.,"VIVIAN WONG SHIR YEE (PH-DAP) 16,673 MENANG",None,None,None,OTHER 188 | 2.,"DATUK SERI BUNG MOKTAR RADIN (BN-UMNO) 16,842 MENANG",None,None,None,OTHER 189 | 2.,"DATUK MOHAMMAD YUSOF APDAL (WARISAN) 27,116 MENANG",None,None,None,OTHER 190 | 2.,"MOHD SHAFIE APDAL (WARISAN)....28,702 MENANG",None,None,None,OTHER 191 | 6.,"LO SU FUI (GRS-PBS) 19,865 MENANG",None,None,None,OTHER 192 | 2.,"ANDI MUHAMMAD SURYADI BANDY (BN-UMNO)....23,855 MENANG",None,None,None,OTHER 193 | 3.,"MORDI BIMOL (DAP). -----17,274 MENANG",None,None,None,OTHER 194 | 1.,"DATUK SERI NANCY SHUKRI (GPS-PBB) 43,739 MENANG",None,None,None,OTHER 195 | 1.," FADILLAH YUSOF (GPS) .----------54,745 MENANG",None,None,None,OTHER 196 | 1.," DR KELVIN YII LEE WUEN (PH-DAP) ------45,353 MENANG",None,None,None,OTHER 197 | 1.," CHONG CHIENG JEN (PH-DAP) ------ 39,310 MENANG",None,None,None,OTHER 198 | 1.,"DATUK RUBIAH WANG (GPS-PBB) 42,278 MENANG",None,None,None,OTHER 199 | 1.,"DATUK WILLIE MONGIN (GPS-PBB) 29,457 MENANG",None,None,None,OTHER 200 | 2.,"RICHARD RIOT JAEM (GPS) 22,876 MENANG",None,None,None,OTHER 201 | 1.,"RODIYAH SAPIEE (GPS-PBB): 18,668 MENANG",None,None,None,OTHER 202 | 1.," MOHAMAD SHAFIZAN KEPLI (GPS) 19,627 MENANG",None,None,None,OTHER 203 | 1.,"DATUK SERI DORIS SOPHIA BRODIE (GPS-PRS) 14,131 MENANG",None,None,None,OTHER 204 | 3.,"ROY ANGAU GINGKOI (GPS) 6,644 MENANG",None,None,None,OTHER 205 | 3.,"DR RICHARD RAPU @ AMAN BEGRI (GPS-PBB): 16,479 MENANG",None,None,None,OTHER 206 | 2.,"ALI BIJU (PN-BERSATU)....19,223 MENANG",None,None,None,OTHER 207 | 1.,"YUSUF ABD WAHAB (GPS-PBB)16,474 MENANG",None,None,None,OTHER 208 | 1.,"AHMAD JOHNIE ZAWAWI (GPS-PBB) 15,824 MENANG",None,None,None,OTHER 209 | 1.,"DATUK SERI HUANG TIONG SII (GPS-SUPP) 20,080 MENANG",None,None,None,OTHER 210 | 3.,"DATUK LARRY SNG WEI SHIEN (PBM) 9,159 MENANG",None,None,None,OTHER 211 | 3.,"DATUK AARON AGO DAGANG (GPS-PRS) 7,411 MENANG",None,None,None,OTHER 212 | 3.,"ALICE LAU KIONG YIENG (DAP-PH) 30,120 MENANG",None,None,None,OTHER 213 | 1.,"OSCAR LING CHAI YEW (DAP-PH) 31,287 MENANG",None,None,None,OTHER 214 | 1.,"DATUK HANIFAH HAJAR TAIB (GPS-PBB) 21,733 MENANG",None,None,None,OTHER 215 | 2.,"EDWIN BANTA (GPS-PRS)16,078 MENANG",None,None,None,OTHER 216 | 1.,"DATUK SERI ALEXANDER NANTA LINGGI (GPS-PBB) 16,522 MENANG",None,None,None,OTHER 217 | 2.,"DATUK WILSON UGAK KUMBONG (GPS-PRS) 15,456 MENANG",None,None,None,OTHER 218 | 2.,"DATUK SERI TIONG KING SING (GPS-PDP) 43,455 MENANG",None,None,None,OTHER 219 | 1.,"LUKANISMAN AWANG SAUNI (GPS-PBB) 22,150 MENANG",None,None,None,OTHER 220 | 3.,"CHIEW CHOON MAN (PH-PKR) 39,549 MENANG",None,None,None,OTHER 221 | 1.,"DATUK ANYI NGAU (GPS-PDP) 18,399 MENANG", DATUK ANYI NGAU ,GPS-PDP,18399,GPS 222 | P221,LIMBANG, DATUK HASBI HABIBOLLAH ,GPS-PBB,14897,GPS 223 | P222,LAWAS, DATUK HENRY SUM AGONG ,GPS-PBB,11361,GPS 224 | -------------------------------------------------------------------------------- /selenium/SamVerse/Selenium_SamVerse.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "cf69b245", 6 | "metadata": {}, 7 | "source": [ 8 | "# WEB SCRAPING GE-15 WITH SELENIUM\n", 9 | "\n", 10 | "For this assignment, we will be doing web scraping by using a library called Selenium. The website that we have chosen is the official website of Berita Harian regarding the results of PRU15 in Malaysia. It contains the name of the party, the parliment and the number of votes.\n", 11 | "\n", 12 | "#### Group Members:\n", 13 | "|Name |Matric|\n", 14 | "|:---:|:---:|\n", 15 | "|LUQMAN ARIFF BIN NOOR AZHAR |A20EC0202|\n", 16 | "|AHMAD AIMAN HAFIZI BIN MUHAMMAD\t| A20EC0177|\n", 17 | "|LEE CAI XUAN\t| A20EC0062|\n", 18 | "|MYZA NAZIFA BINTI NAZRY\t| A20EC0219|" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "id": "b7b589ee", 24 | "metadata": {}, 25 | "source": [ 26 | "First, we will install the Selenium package." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "id": "ae1892ef", 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "Requirement already satisfied: selenium in c:\\users\\hp\\anaconda3\\lib\\site-packages (4.7.2)\n", 40 | "Requirement already satisfied: trio-websocket~=0.9 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from selenium) (0.9.2)\n", 41 | "Requirement already satisfied: urllib3[socks]~=1.26 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from selenium) (1.26.11)\n", 42 | "Requirement already satisfied: trio~=0.17 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from selenium) (0.22.0)\n", 43 | "Requirement already satisfied: certifi>=2021.10.8 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from selenium) (2022.9.14)\n", 44 | "Requirement already satisfied: exceptiongroup>=1.0.0rc9 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio~=0.17->selenium) (1.1.0)\n", 45 | "Requirement already satisfied: sniffio in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio~=0.17->selenium) (1.2.0)\n", 46 | "Requirement already satisfied: idna in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio~=0.17->selenium) (3.3)\n", 47 | "Requirement already satisfied: sortedcontainers in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio~=0.17->selenium) (2.4.0)\n", 48 | "Requirement already satisfied: outcome in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio~=0.17->selenium) (1.2.0)\n", 49 | "Requirement already satisfied: attrs>=19.2.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio~=0.17->selenium) (21.4.0)\n", 50 | "Requirement already satisfied: cffi>=1.14 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio~=0.17->selenium) (1.15.1)\n", 51 | "Requirement already satisfied: async-generator>=1.9 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio~=0.17->selenium) (1.10)\n", 52 | "Requirement already satisfied: wsproto>=0.14 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from trio-websocket~=0.9->selenium) (1.2.0)\n", 53 | "Requirement already satisfied: PySocks!=1.5.7,<2.0,>=1.5.6 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from urllib3[socks]~=1.26->selenium) (1.7.1)\n", 54 | "Requirement already satisfied: pycparser in c:\\users\\hp\\anaconda3\\lib\\site-packages (from cffi>=1.14->trio~=0.17->selenium) (2.21)\n", 55 | "Requirement already satisfied: h11<1,>=0.9.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from wsproto>=0.14->trio-websocket~=0.9->selenium) (0.14.0)\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "!pip install selenium" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "id": "8e39d720", 66 | "metadata": {}, 67 | "source": [ 68 | "Then, we will import all the necessary libraries for web-scraping." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "id": "13af71b3", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "from selenium import webdriver\n", 79 | "from selenium.webdriver.chrome.options import Options\n", 80 | "from selenium.webdriver.chrome.service import Service\n", 81 | "from selenium.webdriver.common.by import By\n", 82 | "from selenium.webdriver.support.ui import WebDriverWait\n", 83 | "from selenium.webdriver.support import expected_conditions as EC" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "9932258e", 89 | "metadata": {}, 90 | "source": [ 91 | "Once we have imported the libraries, we will then get the data by connecting to the Chrome webdriver." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 3, 97 | "id": "39e49850", 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "#get the data\n", 102 | "#define the browser\n", 103 | "\n", 104 | "chrome_opts = Options()\n", 105 | "chrome_opts.add_argument(\"--disable-notifications\")\n", 106 | "driver = webdriver.Chrome(service=Service(\"chromedriver.exe\"),options=chrome_opts)\n", 107 | "#open the browser and go to the page\n", 108 | "driver.get(\"https://www.bharian.com.my/berita/nasional/2022/11/1028360/keputusan-rasmi-pru15\")\n", 109 | "#get the section of website with the information\n", 110 | "content = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CLASS_NAME,'dable-content-wrapper')))\n", 111 | "text = content.text\n", 112 | "#close the browser\n", 113 | "driver.close()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "id": "c0ce62f6", 119 | "metadata": {}, 120 | "source": [ 121 | "Then, we will do data cleaning such as split by space, extracting matching patterns and others. After we finished with the data cleaning, we will combined the data into a sorted pandas data frame." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 4, 127 | "id": "c65cbf15", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "import re \n", 132 | "import pandas as pd\n", 133 | "#split the content into lines\n", 134 | "lines = [l for l in text.split('\\n')]\n", 135 | "#only extract all winners (there are many different patterns)\n", 136 | "patt = \"^P*\\d+\\:* .+\" #pattern to get the parliment number and names\n", 137 | "#except P17 PADANG SERAI because not contested\n", 138 | "parliments = [(i,x) for i, x in enumerate(lines)\n", 139 | " if re.match(patt,x)\n", 140 | " and \"P17 PADANG SERAI\" not in x]\n", 141 | "\n", 142 | "# sepecific because no 'MENANG' mentioned in line\n", 143 | "#selayang and bandar tun razak wrong winners stated in the article\n", 144 | "special = [(i,x) for i,x in enumerate(lines)\n", 145 | " if '1. DATUK ANYI NGAU (GPS-PDP)' in x or #baram\n", 146 | " '2. DATUK SERI AZALINA OTHMAN SAID (BN-UMNO)' in x or #pengerang\n", 147 | " '1. Wan Azizah Wan Ismail (PH-PKR)' in x or #bandar tun razak\n", 148 | " '5. William Leong (PH-PKR)' in x] #selayang\n", 149 | "\n", 150 | "#contains menang except bandar tun razak and selayang\n", 151 | "members = [(i,x) for i,x in enumerate(lines)\n", 152 | " if \"menang\" in x.lower() and\n", 153 | " '2. KAMARUDIN JAFFAR (PN-BERSATU)' not in x and #bandar tun razak\n", 154 | " '1. ABDUL RASHID ASARI (PN-BERSATU)' not in x] #selayang\n", 155 | "\n", 156 | "#keep data into dataframes\n", 157 | "ps = pd.DataFrame(parliments, columns=['line', 'item'])\n", 158 | "sp = pd.DataFrame(special, columns=['line','item'])\n", 159 | "ms = pd.DataFrame(members, columns=['line','item'])" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 5, 165 | "id": "724e257a", 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "#put all the result back into one dataframe\n", 170 | "sorted_results = pd.concat([ps,ms,sp]).sort_values('line').reset_index(drop=True)\n", 171 | "#index odd is parliment and index even is winner\n", 172 | "plms = sorted_results[sorted_results.index%2 == 0].reset_index(drop=True)\n", 173 | "mems = sorted_results[sorted_results.index%2 == 1].reset_index(drop=True)\n", 174 | "results = plms.join(mems, lsuffix='_').drop(['line_','line'],axis=1)\n", 175 | "results.columns = ['p','m']" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 6, 181 | "id": "ac0ea482", 182 | "metadata": { 183 | "scrolled": false 184 | }, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/html": [ 189 | "
\n", 190 | "\n", 203 | "\n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | "
pm
0P1 PADANG BESAR3. RUSHDAN RUSMI (PN-PAS) 24,267 MENANG
1P2 KANGAR1. ZAKRI HASSAN (PN-BERSATU) 24,562 MENANG
2P3 ARAU2. SHAHIDAN KASSIM (PN- BERSATU) 31,458 MENANG
3P4 LANGKAWI3. SUHAIMI ABDULLAH (PN-BERSATU) 25,463 MENANG
4P5 JERLUN1. ABD GHANI AHMAD (PN-PAS) 31,685 MENANG
\n", 239 | "
" 240 | ], 241 | "text/plain": [ 242 | " p m\n", 243 | "0 P1 PADANG BESAR 3. RUSHDAN RUSMI (PN-PAS) 24,267 MENANG\n", 244 | "1 P2 KANGAR 1. ZAKRI HASSAN (PN-BERSATU) 24,562 MENANG\n", 245 | "2 P3 ARAU 2. SHAHIDAN KASSIM (PN- BERSATU) 31,458 MENANG\n", 246 | "3 P4 LANGKAWI 3. SUHAIMI ABDULLAH (PN-BERSATU) 25,463 MENANG\n", 247 | "4 P5 JERLUN 1. ABD GHANI AHMAD (PN-PAS) 31,685 MENANG" 248 | ] 249 | }, 250 | "execution_count": 6, 251 | "metadata": {}, 252 | "output_type": "execute_result" 253 | } 254 | ], 255 | "source": [ 256 | "results.head()" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 7, 262 | "id": "bb57eb43", 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "#extract parliment numbers and parliment names\n", 267 | "pnum = results['p'].apply(lambda x: x.split(\" \",1)[0])\n", 268 | "pname = results['p'].apply(lambda x: x.split(\" \",1)[1])\n", 269 | "# extract member's names, party, and their received votes\n", 270 | "mname = results['m'].apply(lambda x: re.match('^\\d*.(.+)\\((.+)\\)\\s?(.+)',x).groups()[0] if re.match('^\\d*.(.+)\\((.+)\\)\\s?(.+)',x) else None).astype(str)\n", 271 | "mparty = results['m'].apply(lambda x: re.match('^\\d*.(.+)\\((.+)\\)\\s?(.+)',x).groups()[1] if re.match('^\\d*.(.+)\\((.+)\\)\\s?(.+)',x) else None).astype(str)\n", 272 | "mvotes = results['m'].apply(lambda x: re.match('^\\d*.(.+)\\((.+)\\)\\s?(.+)',x).groups()[2] if re.match('^\\d*.(.+)\\((.+)\\)\\s?(.+)',x) else None).astype(str)\n", 273 | "# clean the text votes and turn to integer\n", 274 | "mvotes = (\n", 275 | " mvotes.str.replace('.','',regex=False)\n", 276 | " .str.replace(',','', regex=False)\n", 277 | " .str.replace('MENANG','',regex=False)\n", 278 | " .str.replace('menang','',regex=False)\n", 279 | " .str.replace('-','',regex=False)\n", 280 | " .str.replace(':','',regex=False)\n", 281 | " .str.replace(' ','',regex=False)\n", 282 | ")" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "id": "39bab463", 288 | "metadata": {}, 289 | "source": [ 290 | "The newly extracted columns will now be combined into a single dataframe with appropriate column names." 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 8, 296 | "id": "ae14a76d", 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "#combine all the extracted data back into a dataframe\n", 301 | "df = pd.concat([pnum,pname,mname,mparty,mvotes],axis=1)\n", 302 | "df.columns = ['pnum','parliment','mp','party','votes']" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 9, 308 | "id": "80d6eda8", 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "#name the coalition\n", 313 | "def coalition(party):\n", 314 | " for x in ['PN','BN','PH', 'GPS','GRS','BEBAS']:\n", 315 | " if x in party:\n", 316 | " return x\n", 317 | " for x in ['Warisan', 'WARISAN']:\n", 318 | " if x in party:\n", 319 | " return 'Warisan'\n", 320 | " for x in ['PAS', 'BERSATU']:\n", 321 | " if x in party:\n", 322 | " return 'PN'\n", 323 | " if 'DAP' in party:\n", 324 | " return 'PH'\n", 325 | " return 'OTHER'\n", 326 | "\n", 327 | "df['coalition'] = df['party'].apply(coalition)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "id": "17838c93", 333 | "metadata": {}, 334 | "source": [ 335 | "The newly cleaned data will look like below:" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 10, 341 | "id": "8c2d3251", 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/html": [ 347 | "
\n", 348 | "\n", 361 | "\n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | "
pnumparlimentmppartyvotescoalition
0P1PADANG BESARRUSHDAN RUSMIPN-PAS24267PN
1P2KANGARZAKRI HASSANPN-BERSATU24562PN
2P3ARAUSHAHIDAN KASSIMPN- BERSATU31458PN
3P4LANGKAWISUHAIMI ABDULLAHPN-BERSATU25463PN
4P5JERLUNABD GHANI AHMADPN-PAS31685PN
\n", 421 | "
" 422 | ], 423 | "text/plain": [ 424 | " pnum parliment mp party votes coalition\n", 425 | "0 P1 PADANG BESAR RUSHDAN RUSMI PN-PAS 24267 PN\n", 426 | "1 P2 KANGAR ZAKRI HASSAN PN-BERSATU 24562 PN\n", 427 | "2 P3 ARAU SHAHIDAN KASSIM PN- BERSATU 31458 PN\n", 428 | "3 P4 LANGKAWI SUHAIMI ABDULLAH PN-BERSATU 25463 PN\n", 429 | "4 P5 JERLUN ABD GHANI AHMAD PN-PAS 31685 PN" 430 | ] 431 | }, 432 | "execution_count": 10, 433 | "metadata": {}, 434 | "output_type": "execute_result" 435 | } 436 | ], 437 | "source": [ 438 | "df.head()" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "id": "296cf4e9", 444 | "metadata": {}, 445 | "source": [ 446 | "Lastly, export the dataframe into a CSV file" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 11, 452 | "id": "3742682b", 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "df.to_csv('PRU15_Results.csv', index=False)" 457 | ] 458 | } 459 | ], 460 | "metadata": { 461 | "kernelspec": { 462 | "display_name": "Python 3 (ipykernel)", 463 | "language": "python", 464 | "name": "python3" 465 | }, 466 | "language_info": { 467 | "codemirror_mode": { 468 | "name": "ipython", 469 | "version": 3 470 | }, 471 | "file_extension": ".py", 472 | "mimetype": "text/x-python", 473 | "name": "python", 474 | "nbconvert_exporter": "python", 475 | "pygments_lexer": "ipython3", 476 | "version": "3.9.13" 477 | } 478 | }, 479 | "nbformat": 4, 480 | "nbformat_minor": 5 481 | } 482 | -------------------------------------------------------------------------------- /lxml/QUAD/QUAD_LXML.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyMeLqJzqtUB9f6IYkOxydJM", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "## Web Scraping using LXML\n", 33 | "\n", 34 | "
\n", 35 | "

\n", 36 | " \n", 37 | "

\n", 38 | "
\n", 39 | "\n", 40 | "🚀 Group Members QUAD\n", 41 | "\n", 42 | "> 1. CHONG KAI ZHE\n", 43 | "> 2. TERENCE A/L LOORTHANATHAN\n", 44 | "> 3. RISHMA FATHIMA BINTI BASHER\n", 45 | "> 4. NUR SYAMALIA FAIQAH BINTI MOHD KAMAL" 46 | ], 47 | "metadata": { 48 | "id": "y5vCLNMhj4ae" 49 | } 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "source": [ 54 | "In this notebook, we will show you how to scrape a website using lxml. lxml is a Python library for parsing and manipulating XML and HTML documents. It provides a way to navigate, search, and modify the elements and attributes of an XML or HTML document using a simple and consistent API.\n", 55 | "\n", 56 | "The library is built on top of the libxml2 and libxslt C libraries, which provide fast and efficient parsing and manipulation of XML and HTML documents. lxml provides a Pythonic API that is easy to use and intuitive for Python programmers, while still being very powerful and flexible.\n", 57 | "\n", 58 | "
\n", 59 | "\n", 60 | "---\n", 61 | "
\n", 62 | "Why use lxml?
\n", 63 | "lxml is considered to be one of the most feature-rich and stable XML and HTML parsing libraries for Python. It's considered to be much faster than other libraries like BeautifulSoup, and it's more powerful when it comes to handling complex xpath and xslt.\n", 64 | "\n", 65 | "
\n", 66 | "For more information on lxml please go to this link https://lxml.de/ \n", 67 | "

\n", 68 | "\n", 69 | "---\n", 70 | "
\n", 71 | "\n", 72 | "
\n", 73 | "

\n", 74 | " \n", 75 | "

\n", 76 | "
\n", 77 | "\n", 78 | "What website we are trying to scrape?
\n", 79 | "We are going to use the most used online job search website in Malaysia, Jobstreet. Jobstreet operates primarily in Southeast Asia, including countries such as Malaysia, Singapore, Philippines, Indonesia, and Vietnam. However it has established its HQ in Malaysia.\n", 80 | "

\n", 81 | "\n", 82 | "---\n", 83 | "
\n", 84 | "What data we are going to scrape?
\n", 85 | "We are going to retrieve data of job offerings for Computer/Information technology specialists. We will get basic information of the job offering such as what company is offering it, what is the salary, and what is the job title.
\n" 86 | ], 87 | "metadata": { 88 | "id": "F2qu0IH6kE6o" 89 | } 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "source": [ 94 | "**First step** - Install all the required libraries.\n", 95 | "
\n", 96 | "Since lxml is not pre-installed, we have to install it manually by `!pip install lxml`. However why we need `!pip install requests`? well we need requests library to retrieve the HTML content of the website we are trying to scrape.\n", 97 | "\n" 98 | ], 99 | "metadata": { 100 | "id": "nj8BNPHYn3m8" 101 | } 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "colab": { 108 | "base_uri": "https://localhost:8080/" 109 | }, 110 | "id": "BEO6dzbHzZDR", 111 | "outputId": "33aa1b45-ac11-4061-e375-4cee6e1679ce" 112 | }, 113 | "outputs": [ 114 | { 115 | "output_type": "stream", 116 | "name": "stdout", 117 | "text": [ 118 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 119 | "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (2.25.1)\n", 120 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests) (1.24.3)\n", 121 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests) (2022.12.7)\n", 122 | "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests) (4.0.0)\n", 123 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests) (2.10)\n", 124 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 125 | "Requirement already satisfied: lxml in /usr/local/lib/python3.8/dist-packages (4.9.2)\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "!pip install requests\n", 131 | "!pip install lxml" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "source": [ 137 | "**Second step** - Import required libraries that is going to be used\n", 138 | "
\n", 139 | "As explained we need requests library to to retrieve the HTML content of the website. We need lxml to parse the HTML and locate elements using the specified xpath. We also need dataframe functionality from pandas." 140 | ], 141 | "metadata": { 142 | "id": "2G-eIMYJq54f" 143 | } 144 | }, 145 | { 146 | "cell_type": "code", 147 | "source": [ 148 | "import requests\n", 149 | "from lxml import html\n", 150 | "import pandas as pd" 151 | ], 152 | "metadata": { 153 | "id": "io59fvug8dtZ" 154 | }, 155 | "execution_count": null, 156 | "outputs": [] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "source": [ 161 | "**Third step** - Use the requests package to retrieve the HTML source for the first page of 30 job offerings" 162 | ], 163 | "metadata": { 164 | "id": "HTcVq3RDrW0-" 165 | } 166 | }, 167 | { 168 | "cell_type": "code", 169 | "source": [ 170 | "url = 'https://www.jobstreet.com.my/en/job-search/job-vacancy.php?specialization=191%2C192%2C193'\n", 171 | "\n", 172 | "response = requests.get(url)\n", 173 | "\n", 174 | "tree = html.fromstring(response.content)" 175 | ], 176 | "metadata": { 177 | "id": "wATQKytO8gu4" 178 | }, 179 | "execution_count": null, 180 | "outputs": [] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "source": [ 185 | "**Fourth step** - create a variable and store the value retrieved from xpath `'//div[@class=\"sx2jih0 zcydq876 zcydq866 zcydq896 zcydq886 zcydq8n zcydq856 zcydq8f6 zcydq8eu\"]'` which finds all div elements with a class attribute whose value is `\"sx2jih0 zcydq876 zcydq866 zcydq896 zcydq886 zcydq8n zcydq856 zcydq8f6 zcydq8eu\"`. We then can use the variable to iterate and find information of each job offering." 186 | ], 187 | "metadata": { 188 | "id": "5HzZwOTisLb_" 189 | } 190 | }, 191 | { 192 | "cell_type": "code", 193 | "source": [ 194 | "elements = tree.xpath('//div[@class=\"sx2jih0 zcydq876 zcydq866 zcydq896 zcydq886 zcydq8n zcydq856 zcydq8f6 zcydq8eu\"]')" 195 | ], 196 | "metadata": { 197 | "id": "AOOXjiQ0ra2c" 198 | }, 199 | "execution_count": null, 200 | "outputs": [] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "source": [ 205 | "**Fifth step** - Now we have to access specific sub-elements using the xpath from before

\n", 206 | "\n", 207 | "We can use a `for loop` to iterate through each element in the elements list, and for each element use the xpath() method with the specified xpath to locate the specific sub-elements. The xpath() method will then return a list of elements that match the specified xpath, so we then need to use indexing to access the first element in the list. \n", 208 | "\n", 209 | "> While looking through the job offerings, we figured that some companies prefer to not give full information of the job offering. This could be because of confidentiality issues, or the recruiter just simply forgot. Because of this we have to make sure that if the list is empty (no element matched the xpath) it assigns an empty string.\n", 210 | "\n", 211 | "The extracted data has to be appended to the data list, which will be used to create a Pandas dataframe." 212 | ], 213 | "metadata": { 214 | "id": "t6cXp7fis_5C" 215 | } 216 | }, 217 | { 218 | "cell_type": "code", 219 | "source": [ 220 | "data = []\n", 221 | "\n", 222 | "for element in elements:\n", 223 | " \n", 224 | " #Get company name it is in span tag with class attribute value of sx2jih0\n", 225 | " company_name = element.xpath('.//span[@class=\"sx2jih0\"]/text()')\n", 226 | " company_name = company_name[0] if company_name else '' #If value is empty then make sure it is represented\n", 227 | "\n", 228 | " #Get Job Title being offered it is in a tag with data-automation attribute value of jobCardCompanyLink\n", 229 | " job_title = element.xpath('.//a[@data-automation=\"jobCardCompanyLink\"]/text()')\n", 230 | " job_title = job_title[0] if job_title else 'Company Confidential' #If value is empty then company prefers to be confidentiality\n", 231 | " \n", 232 | " #Get Job Location being offered it is in a tag with data-automation attribute value of jobCardLocationLink\n", 233 | " job_loc = element.xpath('.//a[@data-automation=\"jobCardLocationLink\"]/text()')\n", 234 | " job_loc = job_loc[0] if job_loc else '' #If value is empty then make sure it is represented\n", 235 | "\n", 236 | " #Get Salary being offered it is in span tag with class attribute value of sx2jih0 zcydq84u es8sxo0 es8sxo3 es8sxo21 es8sxoh \n", 237 | " salary = element.xpath('.//span[@class=\"sx2jih0 zcydq84u es8sxo0 es8sxo3 es8sxo21 es8sxoh\"]/text()')\n", 238 | " salary = salary[0] if salary else 'Not Specified' #If value is empty then company did not specify salary\n", 239 | "\n", 240 | " #Get the first Benefit being offered it is in span tag with class attribute value of sx2jih0 zcydq84u es8sxo0 es8sxo1 es8sxo21 _1d0g9qk4 es8sxo7 \n", 241 | " benefit = element.xpath('.//span[@class=\"sx2jih0 zcydq84u es8sxo0 es8sxo1 es8sxo21 _1d0g9qk4 es8sxo7\"]/text()')\n", 242 | " benefit = benefit[0] if benefit else 'Nothing' #If value is empty then company did not specify benefit\n", 243 | "\n", 244 | " #Append value into list\n", 245 | " data.append([company_name, job_title, job_loc, salary, benefit])" 246 | ], 247 | "metadata": { 248 | "id": "fRQLv7O-rlBG" 249 | }, 250 | "execution_count": null, 251 | "outputs": [] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "source": [ 256 | "**Sixth step** - Convert the list to a dataframe, rename the columns as well" 257 | ], 258 | "metadata": { 259 | "id": "DehWo8GfuZNb" 260 | } 261 | }, 262 | { 263 | "cell_type": "code", 264 | "source": [ 265 | "df = pd.DataFrame(data, columns=['Job Title', 'Company Name', 'Job Location', 'Salary', 'Benefit'])\n", 266 | "df.head()" 267 | ], 268 | "metadata": { 269 | "colab": { 270 | "base_uri": "https://localhost:8080/", 271 | "height": 337 272 | }, 273 | "id": "j6YgtN3-rnNC", 274 | "outputId": "3fa1db2d-1945-4010-ddd4-a32663f2ad33" 275 | }, 276 | "execution_count": null, 277 | "outputs": [ 278 | { 279 | "output_type": "execute_result", 280 | "data": { 281 | "text/plain": [ 282 | " Job Title \\\n", 283 | "0 Senior Software Engineer / Software Engineer (... \n", 284 | "1 Software Developer ( Java ) \n", 285 | "2 Data Analyst \n", 286 | "3 Software Engineer (Java) \n", 287 | "4 Internship for Information Technology (IT) Stu... \n", 288 | "\n", 289 | " Company Name Job Location \\\n", 290 | "0 Ideagen Plc. Subang Jaya \n", 291 | "1 Wiseview Information Technology Kuala Lumpur \n", 292 | "2 Zempot Malaysia Sdn. Bhd. Johor Bahru \n", 293 | "3 Ideagen Plc. Subang Jaya \n", 294 | "4 Infineon Technologies (Malaysia) Sdn Bhd Melaka \n", 295 | "\n", 296 | " Salary Benefit \n", 297 | "0 Not Specified 13 Month Salary \n", 298 | "1 MYR 6K - 8,400 monthly Nothing \n", 299 | "2 Not Specified Training Provided \n", 300 | "3 Not Specified 13 Month Salary \n", 301 | "4 Not Specified Nothing " 302 | ], 303 | "text/html": [ 304 | "\n", 305 | "
\n", 306 | "
\n", 307 | "
\n", 308 | "\n", 321 | "\n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | "
Job TitleCompany NameJob LocationSalaryBenefit
0Senior Software Engineer / Software Engineer (...Ideagen Plc.Subang JayaNot Specified13 Month Salary
1Software Developer ( Java )Wiseview Information TechnologyKuala LumpurMYR 6K - 8,400 monthlyNothing
2Data AnalystZempot Malaysia Sdn. Bhd.Johor BahruNot SpecifiedTraining Provided
3Software Engineer (Java)Ideagen Plc.Subang JayaNot Specified13 Month Salary
4Internship for Information Technology (IT) Stu...Infineon Technologies (Malaysia) Sdn BhdMelakaNot SpecifiedNothing
\n", 375 | "
\n", 376 | " \n", 386 | " \n", 387 | " \n", 424 | "\n", 425 | " \n", 449 | "
\n", 450 | "
\n", 451 | " " 452 | ] 453 | }, 454 | "metadata": {}, 455 | "execution_count": 91 456 | } 457 | ] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "source": [ 462 | "**Seventh step** - Make sure there are no null values\n", 463 | "

\n", 464 | "use `.isnull()` to check if there are null values" 465 | ], 466 | "metadata": { 467 | "id": "bMtSzPZXumH7" 468 | } 469 | }, 470 | { 471 | "cell_type": "code", 472 | "source": [ 473 | "df.isnull().sum()" 474 | ], 475 | "metadata": { 476 | "colab": { 477 | "base_uri": "https://localhost:8080/" 478 | }, 479 | "id": "tWlwIxPzVnwH", 480 | "outputId": "4d45dfc2-389c-4852-ca7a-74489ee3dbb4" 481 | }, 482 | "execution_count": null, 483 | "outputs": [ 484 | { 485 | "output_type": "execute_result", 486 | "data": { 487 | "text/plain": [ 488 | "Job Title 0\n", 489 | "Company Name 0\n", 490 | "Job Location 0\n", 491 | "Salary 0\n", 492 | "Benefit 0\n", 493 | "dtype: int64" 494 | ] 495 | }, 496 | "metadata": {}, 497 | "execution_count": 92 498 | } 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "source": [ 504 | "There is no null values. Therefore we do not have to take any action in terms of null values." 505 | ], 506 | "metadata": { 507 | "id": "gi7hKHqZuzbW" 508 | } 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "source": [ 513 | "**Last step** - Convert the result dataframe into a csv file" 514 | ], 515 | "metadata": { 516 | "id": "6_hgniXmu8mf" 517 | } 518 | }, 519 | { 520 | "cell_type": "code", 521 | "source": [ 522 | "# Convert the dataframe into a csv file\n", 523 | "df.to_csv('job_search.csv', index=False)" 524 | ], 525 | "metadata": { 526 | "id": "gP7dig9kayNW" 527 | }, 528 | "execution_count": null, 529 | "outputs": [] 530 | } 531 | ] 532 | } -------------------------------------------------------------------------------- /selenium/AdMiPeQa/WebScraping_AdMiPeQa.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "2bf9904d", 6 | "metadata": {}, 7 | "source": [ 8 | "# **Title: Web scraping E-commerce sites with Selenium**" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "4f0da3c2", 14 | "metadata": {}, 15 | "source": [ 16 | "In this file, we are going to do some web scraping by using selenium library. Website that we choose is Lazada which is the famous e-commerce site of Malaysia." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "aefb558b", 22 | "metadata": {}, 23 | "source": [ 24 | "**Group Members:**\n", 25 | "\n", 26 | "\n", 27 | " \n", 28 | " \n", 29 | " \n", 30 | " \n", 31 | " \n", 32 | " \n", 33 | " \n", 34 | " \n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | "
NameMatric
ADAM WAFII BIN AZUARA20EC0003
HONG PEI GEOKA20EC0044
MIKHEL ADAM BIN MUHAMMAD EZRINA20EC0237
QAISARA BINTI ROHZANA20EC0133
" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "id": "439c63f0", 53 | "metadata": {}, 54 | "source": [ 55 | "First, install the necessary packages." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 1, 61 | "id": "56349b11", 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "Requirement already satisfied: webdriver-manager in c:\\users\\acer\\anaconda3\\lib\\site-packages (3.8.5)\n", 69 | "Requirement already satisfied: python-dotenv in c:\\users\\acer\\anaconda3\\lib\\site-packages (from webdriver-manager) (0.21.0)\n", 70 | "Requirement already satisfied: requests in c:\\users\\acer\\anaconda3\\lib\\site-packages (from webdriver-manager) (2.28.1)\n", 71 | "Requirement already satisfied: packaging in c:\\users\\acer\\anaconda3\\lib\\site-packages (from webdriver-manager) (21.3)\n", 72 | "Requirement already satisfied: tqdm in c:\\users\\acer\\anaconda3\\lib\\site-packages (from webdriver-manager) (4.64.1)\n", 73 | "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\\users\\acer\\anaconda3\\lib\\site-packages (from packaging->webdriver-manager) (3.0.9)\n", 74 | "Requirement already satisfied: charset-normalizer<3,>=2 in c:\\users\\acer\\anaconda3\\lib\\site-packages (from requests->webdriver-manager) (2.0.4)\n", 75 | "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\acer\\anaconda3\\lib\\site-packages (from requests->webdriver-manager) (2022.9.14)\n", 76 | "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\acer\\anaconda3\\lib\\site-packages (from requests->webdriver-manager) (3.3)\n", 77 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\acer\\anaconda3\\lib\\site-packages (from requests->webdriver-manager) (1.26.11)\n", 78 | "Requirement already satisfied: colorama in c:\\users\\acer\\anaconda3\\lib\\site-packages (from tqdm->webdriver-manager) (0.4.5)\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "!pip3 install webdriver-manager" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "747dadb1", 89 | "metadata": {}, 90 | "source": [ 91 | "Then, import the required libraries." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 2, 97 | "id": "912c0e2c", 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "from selenium import webdriver\n", 102 | "from selenium.webdriver.chrome.service import Service\n", 103 | "from webdriver_manager.chrome import ChromeDriverManager\n", 104 | "from selenium.webdriver.common.by import By\n", 105 | "from selenium.common.exceptions import *\n", 106 | "import pandas as pd" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "id": "97d46e55", 112 | "metadata": {}, 113 | "source": [ 114 | "The website link and key word of search item were declared. " 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 3, 120 | "id": "3eb13612", 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "Lazada_url = 'https://www.lazada.com.my'\n", 125 | "search_item = 'Milo Activ Go 1kg' " 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "id": "c75369c8", 131 | "metadata": {}, 132 | "source": [ 133 | "Connect to the Chrome webdriver and search the url of Lazada." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 4, 139 | "id": "ce957c73", 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))\n", 144 | "driver.get(Lazada_url)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "id": "69a803e0", 150 | "metadata": {}, 151 | "source": [ 152 | "Find the product." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 5, 158 | "id": "cc597dfa", 159 | "metadata": { 160 | "scrolled": false 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "p = driver.find_element(By.ID, 'q')\n", 165 | "p.send_keys(search_item)\n", 166 | "p.submit()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "id": "1b4fdabb", 172 | "metadata": {}, 173 | "source": [ 174 | "Get the product title, price, location and review by their classes. " 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 24, 180 | "id": "cd12cc76", 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "item_titles = driver.find_elements(By.CLASS_NAME, 'RfADt')\n", 185 | "item_prices = driver.find_elements(By.CLASS_NAME, 'ooOxS')\n", 186 | "item_locations = driver.find_elements(By.CLASS_NAME, 'oa6ri')\n", 187 | "item_reviews = driver.find_elements(By.CLASS_NAME, 'qzqFw')" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "id": "34fc7b02", 193 | "metadata": {}, 194 | "source": [ 195 | "Next, we unpack the item_titles, item_prices, item_locations and item_reviews variables onto lists:" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 25, 201 | "id": "843f4922", 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "# Initialize empty lists\n", 206 | "titles_list = []\n", 207 | "prices_list = []\n", 208 | "locations_list = []\n", 209 | "reviews_list = []\n", 210 | "\n", 211 | "# Loop over the item_titles and item_prices\n", 212 | "for title in item_titles:\n", 213 | " titles_list.append(title.text)\n", 214 | "for price in item_prices:\n", 215 | " prices_list.append(price.text)\n", 216 | "for location in item_locations:\n", 217 | " locations_list.append(location.text)\n", 218 | "for review in item_reviews:\n", 219 | " reviews_list.append(review.text)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "id": "33508781", 225 | "metadata": {}, 226 | "source": [ 227 | "We convert the four lists into a dataframe:" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 35, 233 | "id": "f80c5bc9", 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/html": [ 239 | "
\n", 240 | "\n", 253 | "\n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | "
ItemNamePriceLocationReview
0Milo Activ-Go 1kgRM18.90Selangor(1832)
1Nestle MILO Activ-Go Chocolate Malt Powder (1k...RM22.80Pahang(52)
2NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof...RM21.50Perak(13)
3Milo Active-Go Softpack 1kgRM22.80Wp Kuala Lumpur(21)
4( Beng kee) HOT ITEM Milo activ-go 1kg...Nestl...RM24.00Perak(39)
5MILO ACTIV-GO CHOCOLATE MALT POWDER (1KG)RM22.10Wp Kuala Lumpur(7)
6Nestle MILO Activ-Go Chocolate Malt Powder 2kg...RM21.99Johor(46)
7Nestle Milo Activ-Go Chocolate Malt Powder (1k...RM22.30Perak(450)
8Nestle Milo Active-Go 【1kg】RM39.99Penang(14)
9NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof...RM22.50Selangor(1)
10NESTLÉ MILO Activ-Go Chocolate Malt Powder 1kg...RM25.00Selangor(3)
11Nestle MILO Activ-Go Chocolate Malt Powder (20...RM20.82Kelantan(7)
12NESTLE Milo Activ-Go Chocolate Malt Drink Soft...RM23.99Melaka(7)
13Nestle Milo 美禄 1kg Activ-Go Chocolate Malt Pow...RM25.99Perak(12)
14Nestle Milo Activ-Go Chocolate Malt 1kg/2kgRM10.89Penang(2)
15[READY STOCK] Nestle MILO Activ-Go Chocolate M...RM11.90Penang(4)
16Nestle Milo Activ-Go Chocolate Malt Powder Sof...RM20.89Selangor(1)
17Nestle Milo Activ-Go Chocolate Malt Powder Sof...RM21.99Wp Kuala Lumpur(11)
18MILO ACTIV-GO 1kgRM29.00Selangor(1)
19Nestle Milo Activ-go 1kgRM23.50Selangor(79)
20NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof...RM9.90Perak(1)
21Nestle Milo Active - Go Softpack 1KGRM27.90Kedah(3)
22Nestlé Milo Activ-Go Softpack 1kgRM30.90Johor(2)
23Nestle Milo Active-Go 1kgRM28.00Penang(12)
\n", 434 | "
" 435 | ], 436 | "text/plain": [ 437 | " ItemName Price \\\n", 438 | "0 Milo Activ-Go 1kg RM18.90 \n", 439 | "1 Nestle MILO Activ-Go Chocolate Malt Powder (1k... RM22.80 \n", 440 | "2 NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof... RM21.50 \n", 441 | "3 Milo Active-Go Softpack 1kg RM22.80 \n", 442 | "4 ( Beng kee) HOT ITEM Milo activ-go 1kg...Nestl... RM24.00 \n", 443 | "5 MILO ACTIV-GO CHOCOLATE MALT POWDER (1KG) RM22.10 \n", 444 | "6 Nestle MILO Activ-Go Chocolate Malt Powder 2kg... RM21.99 \n", 445 | "7 Nestle Milo Activ-Go Chocolate Malt Powder (1k... RM22.30 \n", 446 | "8 Nestle Milo Active-Go 【1kg】 RM39.99 \n", 447 | "9 NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof... RM22.50 \n", 448 | "10 NESTLÉ MILO Activ-Go Chocolate Malt Powder 1kg... RM25.00 \n", 449 | "11 Nestle MILO Activ-Go Chocolate Malt Powder (20... RM20.82 \n", 450 | "12 NESTLE Milo Activ-Go Chocolate Malt Drink Soft... RM23.99 \n", 451 | "13 Nestle Milo 美禄 1kg Activ-Go Chocolate Malt Pow... RM25.99 \n", 452 | "14 Nestle Milo Activ-Go Chocolate Malt 1kg/2kg RM10.89 \n", 453 | "15 [READY STOCK] Nestle MILO Activ-Go Chocolate M... RM11.90 \n", 454 | "16 Nestle Milo Activ-Go Chocolate Malt Powder Sof... RM20.89 \n", 455 | "17 Nestle Milo Activ-Go Chocolate Malt Powder Sof... RM21.99 \n", 456 | "18 MILO ACTIV-GO 1kg RM29.00 \n", 457 | "19 Nestle Milo Activ-go 1kg RM23.50 \n", 458 | "20 NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof... RM9.90 \n", 459 | "21 Nestle Milo Active - Go Softpack 1KG RM27.90 \n", 460 | "22 Nestlé Milo Activ-Go Softpack 1kg RM30.90 \n", 461 | "23 Nestle Milo Active-Go 1kg RM28.00 \n", 462 | "\n", 463 | " Location Review \n", 464 | "0 Selangor (1832) \n", 465 | "1 Pahang (52) \n", 466 | "2 Perak (13) \n", 467 | "3 Wp Kuala Lumpur (21) \n", 468 | "4 Perak (39) \n", 469 | "5 Wp Kuala Lumpur (7) \n", 470 | "6 Johor (46) \n", 471 | "7 Perak (450) \n", 472 | "8 Penang (14) \n", 473 | "9 Selangor (1) \n", 474 | "10 Selangor (3) \n", 475 | "11 Kelantan (7) \n", 476 | "12 Melaka (7) \n", 477 | "13 Perak (12) \n", 478 | "14 Penang (2) \n", 479 | "15 Penang (4) \n", 480 | "16 Selangor (1) \n", 481 | "17 Wp Kuala Lumpur (11) \n", 482 | "18 Selangor (1) \n", 483 | "19 Selangor (79) \n", 484 | "20 Perak (1) \n", 485 | "21 Kedah (3) \n", 486 | "22 Johor (2) \n", 487 | "23 Penang (12) " 488 | ] 489 | }, 490 | "execution_count": 35, 491 | "metadata": {}, 492 | "output_type": "execute_result" 493 | } 494 | ], 495 | "source": [ 496 | "df = pd.DataFrame(zip(titles_list, prices_list, locations_list, reviews_list), columns=['ItemName', 'Price', 'Location', 'Review'])\n", 497 | "df" 498 | ] 499 | }, 500 | { 501 | "cell_type": "markdown", 502 | "id": "5e4aa608", 503 | "metadata": {}, 504 | "source": [ 505 | "Clean the data." 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 36, 511 | "id": "ca6e6e06", 512 | "metadata": {}, 513 | "outputs": [ 514 | { 515 | "name": "stderr", 516 | "output_type": "stream", 517 | "text": [ 518 | "C:\\Users\\ACER\\AppData\\Local\\Temp\\ipykernel_34632\\2512760009.py:2: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", 519 | " df['Review'] = df['Review'].str.replace('(', '')\n", 520 | "C:\\Users\\ACER\\AppData\\Local\\Temp\\ipykernel_34632\\2512760009.py:3: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", 521 | " df['Review'] = df['Review'].str.replace(')', '')\n" 522 | ] 523 | }, 524 | { 525 | "data": { 526 | "text/html": [ 527 | "
\n", 528 | "\n", 541 | "\n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | "
ItemNamePriceLocationReview
0Milo Activ-Go 1kg18.90Selangor1832
1Nestle MILO Activ-Go Chocolate Malt Powder (1k...22.80Pahang52
2NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof...21.50Perak13
3Milo Active-Go Softpack 1kg22.80Wp Kuala Lumpur21
4( Beng kee) HOT ITEM Milo activ-go 1kg...Nestl...24.00Perak39
5MILO ACTIV-GO CHOCOLATE MALT POWDER (1KG)22.10Wp Kuala Lumpur7
6Nestle MILO Activ-Go Chocolate Malt Powder 2kg...21.99Johor46
7Nestle Milo Activ-Go Chocolate Malt Powder (1k...22.30Perak450
8Nestle Milo Active-Go 【1kg】39.99Penang14
9NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof...22.50Selangor1
10NESTLÉ MILO Activ-Go Chocolate Malt Powder 1kg...25.00Selangor3
11Nestle MILO Activ-Go Chocolate Malt Powder (20...20.82Kelantan7
12NESTLE Milo Activ-Go Chocolate Malt Drink Soft...23.99Melaka7
13Nestle Milo 美禄 1kg Activ-Go Chocolate Malt Pow...25.99Perak12
14Nestle Milo Activ-Go Chocolate Malt 1kg/2kg10.89Penang2
15[READY STOCK] Nestle MILO Activ-Go Chocolate M...11.90Penang4
16Nestle Milo Activ-Go Chocolate Malt Powder Sof...20.89Selangor1
17Nestle Milo Activ-Go Chocolate Malt Powder Sof...21.99Wp Kuala Lumpur11
18MILO ACTIV-GO 1kg29.00Selangor1
19Nestle Milo Activ-go 1kg23.50Selangor79
20NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof...9.90Perak1
21Nestle Milo Active - Go Softpack 1KG27.90Kedah3
22Nestlé Milo Activ-Go Softpack 1kg30.90Johor2
23Nestle Milo Active-Go 1kg28.00Penang12
\n", 722 | "
" 723 | ], 724 | "text/plain": [ 725 | " ItemName Price Location \\\n", 726 | "0 Milo Activ-Go 1kg 18.90 Selangor \n", 727 | "1 Nestle MILO Activ-Go Chocolate Malt Powder (1k... 22.80 Pahang \n", 728 | "2 NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof... 21.50 Perak \n", 729 | "3 Milo Active-Go Softpack 1kg 22.80 Wp Kuala Lumpur \n", 730 | "4 ( Beng kee) HOT ITEM Milo activ-go 1kg...Nestl... 24.00 Perak \n", 731 | "5 MILO ACTIV-GO CHOCOLATE MALT POWDER (1KG) 22.10 Wp Kuala Lumpur \n", 732 | "6 Nestle MILO Activ-Go Chocolate Malt Powder 2kg... 21.99 Johor \n", 733 | "7 Nestle Milo Activ-Go Chocolate Malt Powder (1k... 22.30 Perak \n", 734 | "8 Nestle Milo Active-Go 【1kg】 39.99 Penang \n", 735 | "9 NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof... 22.50 Selangor \n", 736 | "10 NESTLÉ MILO Activ-Go Chocolate Malt Powder 1kg... 25.00 Selangor \n", 737 | "11 Nestle MILO Activ-Go Chocolate Malt Powder (20... 20.82 Kelantan \n", 738 | "12 NESTLE Milo Activ-Go Chocolate Malt Drink Soft... 23.99 Melaka \n", 739 | "13 Nestle Milo 美禄 1kg Activ-Go Chocolate Malt Pow... 25.99 Perak \n", 740 | "14 Nestle Milo Activ-Go Chocolate Malt 1kg/2kg 10.89 Penang \n", 741 | "15 [READY STOCK] Nestle MILO Activ-Go Chocolate M... 11.90 Penang \n", 742 | "16 Nestle Milo Activ-Go Chocolate Malt Powder Sof... 20.89 Selangor \n", 743 | "17 Nestle Milo Activ-Go Chocolate Malt Powder Sof... 21.99 Wp Kuala Lumpur \n", 744 | "18 MILO ACTIV-GO 1kg 29.00 Selangor \n", 745 | "19 Nestle Milo Activ-go 1kg 23.50 Selangor \n", 746 | "20 NESTLE MILO ACTIV-GO CHOCOLATE MALT POWDER Sof... 9.90 Perak \n", 747 | "21 Nestle Milo Active - Go Softpack 1KG 27.90 Kedah \n", 748 | "22 Nestlé Milo Activ-Go Softpack 1kg 30.90 Johor \n", 749 | "23 Nestle Milo Active-Go 1kg 28.00 Penang \n", 750 | "\n", 751 | " Review \n", 752 | "0 1832 \n", 753 | "1 52 \n", 754 | "2 13 \n", 755 | "3 21 \n", 756 | "4 39 \n", 757 | "5 7 \n", 758 | "6 46 \n", 759 | "7 450 \n", 760 | "8 14 \n", 761 | "9 1 \n", 762 | "10 3 \n", 763 | "11 7 \n", 764 | "12 7 \n", 765 | "13 12 \n", 766 | "14 2 \n", 767 | "15 4 \n", 768 | "16 1 \n", 769 | "17 11 \n", 770 | "18 1 \n", 771 | "19 79 \n", 772 | "20 1 \n", 773 | "21 3 \n", 774 | "22 2 \n", 775 | "23 12 " 776 | ] 777 | }, 778 | "execution_count": 36, 779 | "metadata": {}, 780 | "output_type": "execute_result" 781 | } 782 | ], 783 | "source": [ 784 | "df['Price'] = df['Price'].str.replace('RM', '').astype(float)\n", 785 | "df['Review'] = df['Review'].str.replace('(', '')\n", 786 | "df['Review'] = df['Review'].str.replace(')', '')\n", 787 | "df" 788 | ] 789 | }, 790 | { 791 | "cell_type": "markdown", 792 | "id": "f5bbe1e7", 793 | "metadata": {}, 794 | "source": [ 795 | "Last, we export the data to a csv file" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": 37, 801 | "id": "b3b34126", 802 | "metadata": {}, 803 | "outputs": [], 804 | "source": [ 805 | "# Export the dataframe to csv file\n", 806 | "df.to_csv('Lazada_Milo.csv', index=False)" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": null, 812 | "id": "0ac47b67", 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [] 816 | } 817 | ], 818 | "metadata": { 819 | "kernelspec": { 820 | "display_name": "Python 3 (ipykernel)", 821 | "language": "python", 822 | "name": "python3" 823 | }, 824 | "language_info": { 825 | "codemirror_mode": { 826 | "name": "ipython", 827 | "version": 3 828 | }, 829 | "file_extension": ".py", 830 | "mimetype": "text/x-python", 831 | "name": "python", 832 | "nbconvert_exporter": "python", 833 | "pygments_lexer": "ipython3", 834 | "version": "3.9.13" 835 | } 836 | }, 837 | "nbformat": 4, 838 | "nbformat_minor": 5 839 | } 840 | -------------------------------------------------------------------------------- /beautiful-soup/High Five/courses_data.csv: -------------------------------------------------------------------------------- 1 | Category,Course Name,Link 2 | "Best Computer Science, Computing & Information Technology (IT) ","Best Computer Science, Computing & Information Technology (IT) Courses to Study in Malaysia 3 | I was confused about what to study & at which university. Talking to EduSpiral helped clear my doubts.Zen Yi, Graduated from Software Engineering at Asia Pacific University (APU) 4 | 5 | Computing 6 | Computer Science 7 | Software Engineering or Computer Programming 8 | Information Technology (IT) or Information Communications Technology 9 | Artificial Intelligence (Ai) 10 | Blockchain 11 | Business Information Systems (BIS) 12 | Cloud Computing 13 | Cyber Security 14 | Data Science or Data Analytics or Big Data 15 | Forensic Computing 16 | Game Design or Computer Games Development 17 | Intelligent Systems 18 | Internet of Things (IoT) 19 | Mobile Computing 20 | Network Computing 21 | Systems Security 22 | 23 | ",https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/23456242_1246066915539040_2377339565484969943_o/ 24 | "Best Computer Science, Computing & Information Technology (IT) ",Computing,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-computing-courses-top-universities-malaysia/top-ten-information-technology-it-computer-science-courses-in-malaysia/top-3-computing-courses-in-malaysia-information-technology-it-computer-science-software-engineering/ 25 | "Best Computer Science, Computing & Information Technology (IT) ",Computer Science,https://eduspiral.com/top-courses-in-malaysia-high-job-in-demand/best-universities-in-malaysia-top-computer-science-diploma-degree-courses-in-malaysia/ 26 | "Best Computer Science, Computing & Information Technology (IT) ",Software Engineering or Computer Programming,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-ten-computer-engineering-software-programming-study-in-malaysia/ 27 | "Best Computer Science, Computing & Information Technology (IT) ",Information Technology (IT) or Information Communications Technology,https://eduspiral.com/2019/03/30/top-10-private-universities-in-malaysia-information-technology-it/ 28 | "Best Computer Science, Computing & Information Technology (IT) ",Artificial Intelligence (Ai),https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-computer-science-at-top-universities-in-malaysia/top-10-universities-for-best-computer-science-in-malaysia/best-artificial-intelligence-ai-intelligent-systems-courses-top-universities-malaysia/ 29 | "Best Computer Science, Computing & Information Technology (IT) ",Blockchain,https://eduspiral.com/2019/12/12/top-university-malaysia-blockchain/ 30 | "Best Computer Science, Computing & Information Technology (IT) ",Business Information Systems (BIS),https://eduspiral.com/best-universities-malaysia-business-information-systems-bis/ 31 | "Best Computer Science, Computing & Information Technology (IT) ",Cloud Computing,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-computing-courses-top-universities-malaysia/best-cloud-computing-courses-top-universities-malaysia/top-degree-cloud-computing-university-malaysia/ 32 | "Best Computer Science, Computing & Information Technology (IT) ",Cyber Security,https://eduspiral.com/top-universities-in-malaysia-best-cyber-security-computer-security-technology-degree-course/ 33 | "Best Computer Science, Computing & Information Technology (IT) ",Data Science or Data Analytics or Big Data,https://eduspiral.com/2017/12/02/top-universities-in-malaysia-best-for-data-science-data-analytics-course/ 34 | "Best Computer Science, Computing & Information Technology (IT) ",Forensic Computing,https://eduspiral.com/2019/12/04/best-university-study-digital-forensics-course-malaysia/ 35 | "Best Computer Science, Computing & Information Technology (IT) ",Game Design or Computer Games Development,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-computing-courses-top-universities-malaysia/best-computer-games-design-courses-top-universities-malaysia/malaysia-best-universities-for-top-computer-games-development-or-game-design-technology-degree/ 36 | "Best Computer Science, Computing & Information Technology (IT) ",Intelligent Systems,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-computing-courses-top-universities-malaysia/best-artificial-intelligence-ai-intelligent-systems-courses-top-universities-malaysia/top-private-universities-malaysia-artificial-intelligence-ai-degree-course/ 37 | "Best Computer Science, Computing & Information Technology (IT) ",Internet of Things (IoT),https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-universities-malaysia-information-technology-it-course/best-internet-of-things-iot-course-at-top-university-in-malaysia-high-job-demand/ 38 | "Best Computer Science, Computing & Information Technology (IT) ",Mobile Computing,http://eduspiral.com/mobile-computing-degree-best-computing-university-malaysia/ 39 | "Best Computer Science, Computing & Information Technology (IT) ",Network Computing,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-universities-malaysia-information-technology-it-course/top-universities-in-malaysia-best-for-computer-network-computing/ 40 | "Best Computer Science, Computing & Information Technology (IT) ",Systems Security,http://eduspiral.com/best-university-to-study-information-systems-security-in-malaysia/ 41 | "Best Accounting, Finance & Quantitative Studies ","Best Accounting, Finance & Quantitative Studies Courses to Study in Malaysia 42 | 43 | Accounting 44 | Accounting & Finance 45 | Actuarial Science 46 | Finance 47 | Finance & Investment 48 | Financial Engineering 49 | Financial Technology (Fintech) 50 | Banking & Finance 51 | Islamic Banking & Finance 52 | Economics 53 | 54 | ",http://eduspiral.com/2015/12/07/best-pathway-be-professional-chartered-accountant-in-malaysia-accounting-degree-acca-exemptions-at-top-universities-after-spm-stpm-uec-olevels-alevels/ 55 | "Best Accounting, Finance & Quantitative Studies ",Accounting,http://eduspiral.com/2015/12/07/best-pathway-be-professional-chartered-accountant-in-malaysia-accounting-degree-acca-exemptions-at-top-universities-after-spm-stpm-uec-olevels-alevels/ 56 | "Best Accounting, Finance & Quantitative Studies ",Accounting & Finance,http://eduspiral.com/the-best-universities-in-malaysia-for-accounting-finance-degree-programs/ 57 | "Best Accounting, Finance & Quantitative Studies ",Actuarial Science,http://eduspiral.com/2013/09/25/all-you-need-to-know-about-study-actuarial-science-in-malaysia-and-the-pathway-to-become-a-professional-actuaries/ 58 | "Best Accounting, Finance & Quantitative Studies ",Finance,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-universities-in-malaysia-top-bachelor-of-business-administration-honours-degree/banking-and-finance-degree-malaysia-uk/malaysia-top-universities-best-banking-investment-finance-courses/ 59 | "Best Accounting, Finance & Quantitative Studies ",Finance & Investment,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-universities-in-malaysia-top-bachelor-of-business-administration-honours-degree/banking-and-finance-degree-malaysia-uk/top-universities-in-malaysia-for-banking-finance-investment-degree-courses/ 60 | "Best Accounting, Finance & Quantitative Studies ",Financial Engineering,https://eduspiral.com/best-financial-engineering-degree-in-malaysia-at-top-ranked-multimedia-university-mmu-cyberjaya/ 61 | "Best Accounting, Finance & Quantitative Studies ",Financial Technology (Fintech),https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/top-guides-choosing-the-best-course/top-job-demand-high-salary-reports-malaysia/job-demand-reports-and-salary-guides-in-malaysia-by-the-courses-that-you-want-to-study/top-list-best-information-technology-it-computing-computer-science-jobs-in-demand-in-malaysia/a-fantastic-career-opportunity-with-financial-technology-fintech-as-it-has-high-job-demand-salary-in-malaysia/ 62 | "Best Accounting, Finance & Quantitative Studies ",Banking & Finance,https://eduspiral.com/top-courses-in-malaysia-high-job-in-demand/best-banking-finance-degree-programmes-at-top-private-universities-in-malaysia/ 63 | "Best Accounting, Finance & Quantitative Studies ",Islamic Banking & Finance,http://eduspiral.com/best-universities-for-top-bachelor-of-arts-honours-islamic-banking-finance-degree-course-in-malaysia/ 64 | "Best Accounting, Finance & Quantitative Studies ",Economics,http://eduspiral.com/best-top-universities-in-malaysia-to-study-bachelor-of-economics-hons-degree/ 65 | Best Business Management ,"Best Business Management Courses to Study in Malaysia 66 | 67 | Business Studies 68 | Business Management 69 | Business Administration 70 | Business 71 | Business & Knowledge Management 72 | Digital Marketing 73 | eBusiness or e-Commerce 74 | Entrepreneurship 75 | Human Resource Management (HRM) 76 | International Business Management (IBM) 77 | Logistics Management 78 | Marketing Management 79 | Sales & Marketing 80 | Supply Chain Management 81 | 82 | ",https://eduspiral.com/top-best-business-administration-degree-course-in-malaysia-at-the-best-university-college-in-malaysia/ 83 | Best Business Management ,Business Studies,https://eduspiral.com/top-best-business-administration-degree-course-in-malaysia-at-the-best-university-college-in-malaysia/ 84 | Best Business Management ,Business Management,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/business-courses/best-business-administration-courses-top-universities-malaysia/top-best-business-administration-degree-course-in-malaysia-at-the-best-university-college-in-malaysia/best-business-administration-degree-at-top-private-universities-in-malaysia/ 85 | Best Business Management ,Business Administration,http://eduspiral.com/best-universities-to-study-top-business-administration-degree-in-malaysia/ 86 | Best Business Management ,Business,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-universities-in-malaysia-top-bachelor-of-business-administration-honours-degree/ 87 | Best Business Management ,Business & Knowledge Management,https://eduspiral.com/best-business-knowledge-management-degree-sap-certificate-in-malaysia-at-top-ranked-multimedia-university-mmu-melaka/ 88 | Best Business Management ,Digital Marketing,https://eduspiral.com/2020/05/06/digital-marketing-degree-course-malaysia/ 89 | Best Business Management ,eBusiness or e-Commerce,https://eduspiral.com/2021/04/22/best-courses-to-study-in-malaysia-if-you-want-to-work-in-ecommerce/ 90 | Best Business Management ,Entrepreneurship,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/business-courses/best-business-administration-courses-top-universities-malaysia/top-best-business-administration-degree-course-in-malaysia-at-the-best-university-college-in-malaysia/best-business-administration-degree-at-top-private-universities-in-malaysia/best-university-malaysia-top-entrepreneurship-business-degree-help-university/ 91 | Best Business Management ,Human Resource Management (HRM),https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-universities-in-malaysia-top-bachelor-of-business-administration-honours-degree/top-university-in-malaysia-for-best-human-resource-management-hrm-degree/ 92 | Best Business Management ,International Business Management (IBM),https://eduspiral.com/top-universities-in-malaysia-best-for-international-business-management/ 93 | Best Business Management ,Logistics Management,http://eduspiral.com/best-university-logistics-management-diploma-degree-course-malaysia/ 94 | Best Business Management ,Marketing Management,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-universities-in-malaysia-top-bachelor-of-business-administration-honours-degree/top-universities-in-malaysia-best-marketing-degree-course/ 95 | Best Business Management ,Sales & Marketing,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-universities-in-malaysia-top-bachelor-of-business-administration-honours-degree/top-universities-in-malaysia-best-marketing-degree-course/best-marketing-courses-top-universities-malaysia/ 96 | Best Business Management ,Supply Chain Management,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/business-courses/best-logistics-supply-chain-management-courses-top-universities-malaysia/logistics-supply-chain-management-top-private-universities-in-malaysia/ 97 | Best Engineering & Built Environment Courses to Study in Malaysia,"Best Engineering & Built Environment Courses to Study in Malaysia 98 | 99 | Engineering 100 | Chemical Engineering 101 | Civil Engineering 102 | Electrical & Electronic Engineering 103 | 104 | Nanotechnology 105 | Optical Engineering 106 | Telecommunication Engineering 107 | 108 | 109 | Mechanical Engineering 110 | Mechatronic Engineering 111 | 112 | Robotics & Automation Engineering 113 | 114 | 115 | Petroleum Engineering 116 | Architecture 117 | Interior Architecture 118 | Quantity Surveying 119 | Construction Project Management 120 | Aircraft Maintenance 121 | 122 | ",https://eduspiral.com/top-courses-in-malaysia-high-job-in-demand/study-engineering-course-malaysia/top-engineering-courses-study-malaysia/ 123 | Best Engineering & Built Environment Courses to Study in Malaysia,Engineering,https://eduspiral.com/top-courses-in-malaysia-high-job-in-demand/study-engineering-course-malaysia/top-engineering-courses-study-malaysia/ 124 | Best Engineering & Built Environment Courses to Study in Malaysia,Chemical Engineering,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/engineering-courses-universities-malaysia/best-chemical-engineering-courses-accredited-top-universities-malaysia/top-chemical-engineering-universities-in-malaysia-best-course/ 125 | Best Engineering & Built Environment Courses to Study in Malaysia,Civil Engineering,http://eduspiral.com/2014/11/18/study-civil-engineering-degree-course-in-malaysia-at-top-private-universities/ 126 | Best Engineering & Built Environment Courses to Study in Malaysia,"Electrical & Electronic Engineering 127 | 128 | Nanotechnology 129 | Optical Engineering 130 | Telecommunication Engineering 131 | 132 | ",http://eduspiral.com/2017/02/13/top-university-in-malaysia-for-best-electrical-electronic-engineering-accredited-by-board-of-engineers-malaysia-bem/ 133 | Best Engineering & Built Environment Courses to Study in Malaysia,Nanotechnology,http://eduspiral.com/2017/11/24/best-nanotechnology-engineering-in-malaysia-top-world-rank-electronics-engineering-at-multimedia-university-mmu-cyberjaya/ 134 | Best Engineering & Built Environment Courses to Study in Malaysia,Optical Engineering,http://eduspiral.com/2017/11/24/best-optical-engineering-in-malaysia-top-world-rank-electronics-engineering-at-multimedia-university-mmu-cyberjaya/ 135 | Best Engineering & Built Environment Courses to Study in Malaysia,Telecommunication Engineering,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/engineering-courses-universities-malaysia/best-telecommunication-engineering-courses-accredited-top-universities-malaysia/top-best-university-in-malaysia-to-study-telecommunications-engineering/ 136 | Best Engineering & Built Environment Courses to Study in Malaysia,Mechanical Engineering,http://eduspiral.com/2014/10/14/the-best-universities-in-malaysia-for-mechanical-engineering/ 137 | Best Engineering & Built Environment Courses to Study in Malaysia,"Mechatronic Engineering 138 | 139 | Robotics & Automation Engineering 140 | 141 | ",https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/engineering-courses-universities-malaysia/best-mechatronic-engineering-courses-accredited-top-universities-malaysia/top-mechatronic-engineering-university-malaysia/ 142 | Best Engineering & Built Environment Courses to Study in Malaysia,Robotics & Automation Engineering,http://eduspiral.com/2017/11/22/best-engineering-electronics-robotics-automation-automotive-mechatronics-in-malaysia-top-rank-multimedia-university-mmu-melaka-accredited-by-board-of-engineers-malaysia-bem/ 143 | Best Engineering & Built Environment Courses to Study in Malaysia,Petroleum Engineering,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/engineering-courses-universities-malaysia/petroleum-engineering/malaysia-top-universities-best-petroleum-engineering-degree-course/ 144 | Best Engineering & Built Environment Courses to Study in Malaysia,Architecture,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-architecture-built-environment-courses-top-universities-malaysia/top-private-universities-in-malaysia-for-best-architecture-degree-accredited-by-lembaga-arkitek-malaysia-lam/top-university-in-malaysia-for-best-cheap-bachelor-science-honours-architecture-degree-accredited-by-lembaga-arkitek-malaysia-lam/architecture-degree-pathway-best-university/best-architecture-degree-in-malaysia-accredited-by-board-of-architects-malaysia-lam-part-1-at-top-private-universities/ 145 | Best Engineering & Built Environment Courses to Study in Malaysia,Interior Architecture,http://eduspiral.com/2016/04/28/best-colleges-top-private-university-in-malaysia-for-diploma-interior-design-or-interior-architecture-degree-cheapest-study-after-spm-olevels-uec/ 146 | Best Engineering & Built Environment Courses to Study in Malaysia,Quantity Surveying,http://eduspiral.com/best-accredited-quantity-survey-degree-in-malaysia-at-top-universities/ 147 | Best Engineering & Built Environment Courses to Study in Malaysia,Construction Project Management,http://eduspiral.com/top-construction-project-management-degree-at-the-best-university-in-malaysia-top-ranked-heriot-watt-university-malaysia/ 148 | Best Engineering & Built Environment Courses to Study in Malaysia,Aircraft Maintenance,http://eduspiral.com/2013/06/07/diploma-in-aircraft-maintenance-engineering/ 149 | Best Mass Communication ,"Best Mass Communication Courses to Study in Malaysia 150 | 151 | Mass Communication 152 | Digital Marketing 153 | Branding & Advertising 154 | Broadcasting 155 | Media Studies 156 | Public Relations 157 | 158 | Public Relations & Marketing 159 | Public Relations & Event Management 160 | 161 | 162 | Marketing Communications 163 | Events Management 164 | 165 | ",https://eduspiral.com/top-courses-in-malaysia-high-job-in-demand/best-mass-communication-course-at-top-universities-in-malaysia/ 166 | Best Mass Communication ,Mass Communication,https://eduspiral.com/top-courses-in-malaysia-high-job-in-demand/best-mass-communication-course-at-top-universities-in-malaysia/ 167 | Best Mass Communication ,Digital Marketing,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/business-courses/best-marketing-courses-top-universities-malaysia/top-private-universities-malaysia-digital-marketing/ 168 | Best Mass Communication ,Branding & Advertising,https://eduspiral.com/best-private-universities-colleges-in-malaysia-to-study-advertising-brand-management/ 169 | Best Mass Communication ,Broadcasting,https://eduspiral.com/best-private-universities-top-colleges-in-malaysia-to-study-film-broadcasting/ 170 | Best Mass Communication ,Media Studies,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/communication/media-studies/best-media-communication-studies-at-top-universities-colleges-in-malaysia/ 171 | Best Mass Communication ,"Public Relations 172 | 173 | Public Relations & Marketing 174 | Public Relations & Event Management 175 | 176 | ",https://eduspiral.com/list-of-the-best-private-universities-in-malaysia-to-study-top-public-relations-pr-degree-courses/ 177 | Best Mass Communication ,Public Relations & Marketing,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/study-mass-communication-at-the-best-universities-colleges-in-malaysia/best-mass-communication-in-malaysia-top-uk-dual-award-degree-programmes-at-taylors-university-lakeside-campus/best-public-relations-top-mass-communication-marketing-taylors-university-malaysia/ 178 | Best Mass Communication ,Public Relations & Event Management,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/study-mass-communication-at-the-best-universities-colleges-in-malaysia/best-public-relations-pr-event-management-degree-malaysia-top-ranked-taylors-university/ 179 | Best Mass Communication ,Marketing Communications,https://eduspiral.com/2014/03/13/study-at-the-best-university-in-malaysia-for-public-relations/ 180 | Best Mass Communication ,Events Management,http://eduspiral.com/best-events-management-degree-in-malaysia-at-top-private-universities/ 181 | "Best Hospitality, Tourism & Culinary Arts ","Best Hospitality, Tourism & Culinary Arts Courses to Study in Malaysia 182 | 183 | Hotel Management 184 | Hospitality Management 185 | Culinary Arts 186 | Patisserie 187 | Baking 188 | Tourism Management 189 | Events Management 190 | 191 | ",https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-colleges-universities-in-malaysia-best-hotel-management-top-hospitality-management-course/ 192 | "Best Hospitality, Tourism & Culinary Arts ",Hotel Management,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-colleges-universities-in-malaysia-best-hotel-management-top-hospitality-management-course/ 193 | "Best Hospitality, Tourism & Culinary Arts ",Hospitality Management,https://eduspiral.com/best-hotel-hospitality-tourism-management-diploma-degree-course-malaysia/ 194 | "Best Hospitality, Tourism & Culinary Arts ",Culinary Arts,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-universities-colleges-malaysia-culinary-arts-baking/top-ten-best-culinary-arts-university-college-schools-in-malaysia/ 195 | "Best Hospitality, Tourism & Culinary Arts ",Patisserie,https://eduspiral.com/best-university-colleges-malaysia-diploma-patisserie-baking-degree/ 196 | "Best Hospitality, Tourism & Culinary Arts ",Baking,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-universities-colleges-malaysia-culinary-arts-baking/ 197 | "Best Hospitality, Tourism & Culinary Arts ",Tourism Management,https://eduspiral.com/top-hospitality-university-best-hotel-management-universities-malaysia-tourism-management-colleges-in-malaysia/ 198 | "Best Hospitality, Tourism & Culinary Arts ",Events Management,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-colleges-universities-in-malaysia-best-hotel-management-top-hospitality-management-course/best-university-in-malaysia-top-events-management/ 199 | Best Art and Design ,"Best Art and Design Courses to Study in Malaysia 200 | 201 | Design 202 | Graphic Design 203 | Multimedia Design 204 | Fashion Design 205 | Product Design 206 | Interior Design 207 | 3D Animation 208 | Entertainment Arts 209 | Film, Broadcasting & Cinematic Arts 210 | Visual Effects 211 | Virtual Reality (VR) / Augmented Reality (AR) 212 | 213 | ",http://eduspiral.com/best-design-courses-at-top-colleges-and-universities-in-malaysia/ 214 | Best Art and Design ,Design,http://eduspiral.com/best-design-courses-at-top-colleges-and-universities-in-malaysia/ 215 | Best Art and Design ,Graphic Design,http://eduspiral.com/top-best-colleges-in-malaysia-for-graphic-design-diploma-degree/ 216 | Best Art and Design ,Multimedia Design,http://eduspiral.com/best-universities-colleges-for-multimedia-design-courses-in-malaysia/ 217 | Best Art and Design ,Fashion Design,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-10-best-design-colleges-universities-in-malaysia/best-design-courses-at-top-colleges-and-universities-in-malaysia/top-colleges-universities-in-malaysia-best-for-diploma-in-fashion-design/ 218 | Best Art and Design ,Product Design,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-design-courses-top-universities-malaysia/interior-design-architecture/kbu-international-college-best-in-design/best-university-in-malaysia-for-top-furniture-product-design-at-first-city-university-college/ 219 | Best Art and Design ,Interior Design,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-design-courses-top-universities-malaysia/interior-design-architecture/top-university-malaysia-interior-design-interior-architecture/ 220 | Best Art and Design ,3D Animation,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-design-courses-top-universities-malaysia/animation/top-universities-study-3d-animation-design-malaysia/ 221 | Best Art and Design ,Entertainment Arts,http://eduspiral.com/study-at-the-best-university-in-malaysia-for-entertainment-arts/ 222 | Best Art and Design ,"Film, Broadcasting & Cinematic Arts",https://eduspiral.com/best-private-universities-top-colleges-in-malaysia-to-study-film-broadcasting/ 223 | Best Art and Design ,Visual Effects,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-10-best-design-colleges-universities-in-malaysia/top-universities-colleges-malaysia-3d-animation-design/best-animation-visual-effects-degree-course-in-malaysia-top-multimedia-university-mmu-cyberjaya/ 224 | Best Art and Design ,Virtual Reality (VR) / Augmented Reality (AR),https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-design-courses-top-universities-malaysia/animation/top-universities-virtual-reality-augmented-reality-vr-ar-courses-malaysia/ 225 | Best Health Science ,"Best Health Science Courses to Study in Malaysia 226 | 227 | Medicine 228 | Dentistry 229 | Nursing 230 | Physiotherapy 231 | Medical Imaging 232 | Medical Lab Technology 233 | Pharmacy 234 | 235 | ",http://eduspiral.com/affordable-medical-programme-accredited-by-the-malaysian-medical-council-that-best-fits-your-needs/ 236 | Best Health Science ,Medicine,http://eduspiral.com/affordable-medical-programme-accredited-by-the-malaysian-medical-council-that-best-fits-your-needs/ 237 | Best Health Science ,Dentistry,http://eduspiral.com/2017/06/19/top-affordable-dentistry-degree-recognised-by-dental-council-malaysia-at-mahsa-university-best-for-health-sciences/ 238 | Best Health Science ,Nursing,http://eduspiral.com/2016/06/10/best-university-study-top-diploma-in-nursing-bachelor-nursing-in-malaysia/ 239 | Best Health Science ,Physiotherapy,http://eduspiral.com/2017/06/24/best-physiotherapy-degree-in-malaysia-top-mahsa-university/ 240 | Best Health Science ,Medical Imaging,http://eduspiral.com/2017/06/26/best-medical-imaging-radiography-degree-in-malaysia-top-mahsa-university/ 241 | Best Health Science ,Medical Lab Technology,http://eduspiral.com/2017/07/08/best-university-in-malaysia-diploma-in-medical-laboratory-technology-top-mahsa-university/ 242 | Best Health Science ,Pharmacy,http://eduspiral.com/best-universities-in-malaysia-for-top-pharmacy-degree/ 243 | Best Applied Science ,"Best Applied Science Courses to Study in Malaysia 244 | 245 | Biotechnology 246 | Culinology® 247 | Food Science 248 | Biomedical Science 249 | Bioinformatics 250 | 251 | ",https://eduspiral.com/top-universities-in-malaysia-best-for-biotechnology-degree/ 252 | Best Applied Science ,Biotechnology,https://eduspiral.com/top-universities-in-malaysia-best-for-biotechnology-degree/ 253 | Best Applied Science ,Culinology®,http://eduspiral.com/2017/03/07/best-culinology-university-malaysia-top-food-science-taylors-university 254 | Best Applied Science ,Food Science,http://eduspiral.com/study-at-the-best-university-in-malaysia-for-food-science-and-nutrition/ 255 | Best Applied Science ,Biomedical Science,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/best-health-science-courses-top-universities-malaysia/top-private-universities-malaysia-biomedical-science/ 256 | Best Applied Science ,Bioinformatics,https://eduspiral.com/best-bioinformatics-degree-in-malaysia-at-top-multimedia-university-mmu-melaka/ 257 | Best Humanities & Social Science ,"Best Humanities & Social Science Courses to Study in Malaysia 258 | 259 | Law 260 | Teaching English as a Second Language (TESL) 261 | Psychology 262 | 263 | ",https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-top-university-in-malaysia-for-recognised-law-degree/ 264 | Best Humanities & Social Science ,Law,https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/best-top-university-in-malaysia-for-recognised-law-degree/ 265 | Best Humanities & Social Science ,Teaching English as a Second Language (TESL),https://eduspiral.com/best-university-top-colleges-ranking-in-malaysia-offering-foundation-pre-university-degree-diploma-courses/top-ranking-list-private-university-colleges-malaysia/top-universities-in-malaysia-best-for-teaching-english-as-a-second-language-tesl/ 266 | Best Humanities & Social Science ,Psychology,https://eduspiral.com/about-us-eduspiral-consultant-services/advise-best-course-study-top-private-universities-malaysia/choosing-your-course/psychology/psychology-diploma-program-degree-course/top-private-universities-in-malaysia-to-study-psychology-degree/ 267 | --------------------------------------------------------------------------------