├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug.yaml
│ ├── documentationupdate.yaml
│ └── featurerequest.yaml
├── pull_request_template.md
└── workflows
│ ├── greetings.yaml
│ └── pr_merged.yml
├── .gitignore
├── .vscode
└── settings.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── dev-documentation.md
├── docs
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── home.md
├── index.md
├── installation.md
└── modules
│ ├── Finance.md
│ ├── HackerEarth.md
│ ├── Hackernews.md
│ ├── Internshala.md
│ ├── Twitter.md
│ ├── academia.md
│ ├── amazon.md
│ ├── ask-ubuntu.md
│ ├── bbc.md
│ ├── codechef.md
│ ├── coinmarketcap.md
│ ├── coursera.md
│ ├── covid-19.md
│ ├── crickbuzz.md
│ ├── devpost.md
│ ├── dribbble.md
│ ├── eazydinner.md
│ ├── ebay.md
│ ├── espn.md
│ ├── flexjobs.md
│ ├── flipkart.md
│ ├── flipkartclothing.md
│ ├── flipkartlaptop.md
│ ├── flyrobu.md
│ ├── github.md
│ ├── githubedu.md
│ ├── gitlab.md
│ ├── googlenews.md
│ ├── hackerrank.md
│ ├── hashnode.md
│ ├── healthgrade.md
│ ├── iccranking.md
│ ├── imdb-actor.md
│ ├── imdb-boxoffice.md
│ ├── imdb-celeb.md
│ ├── imdb-indian.md
│ ├── imdb-movies.md
│ ├── imdb.md
│ ├── instagram.md
│ ├── installation.md
│ ├── kooapp.md
│ ├── leetcode.md
│ ├── letterboxd.md
│ ├── luma.md
│ ├── medium.md
│ ├── reddit.md
│ ├── spotify.md
│ ├── stackoverflow.md
│ ├── techcrunch.md
│ ├── wikipedia.md
│ └── youtube.md
├── documentation.md
├── mkdocs.yml
├── project_setup.sh
├── pyproject.toml
├── requirements.txt
├── setup.cfg
└── src
├── scrape_up
├── __init__.py
├── academia
│ ├── __init__.py
│ └── academia.py
├── amazon
│ ├── __init__.py
│ └── products.py
├── ambitionBox
│ └── company.py
├── askubuntu
│ ├── __init__.py
│ └── questions.py
├── atcoder
│ ├── __init__.py
│ └── atcoder.py
├── banners
│ ├── __init__.py
│ └── scraper88x31.py
├── bayt
│ ├── __init__.py
│ └── bayt.py
├── bbcnews
│ ├── __init__.py
│ └── bbcnews.py
├── billionaires
│ └── billionaires.py
├── bugmenot
│ └── bugmenot.py
├── cars
│ ├── __init__.py
│ └── cars.py
├── codechef
│ ├── __init__.py
│ └── codechef.py
├── codeforces
│ ├── __init__.py
│ ├── contests.py
│ └── user.py
├── codewars
│ ├── __init__.py
│ └── codewars.py
├── coinmarketcap
│ ├── __init__.py
│ └── crypto.py
├── config
│ ├── __init__.py
│ └── request_config.py
├── coursera
│ ├── __init__.py
│ └── courses.py
├── covidinfo
│ ├── __init__.py
│ └── covidinfo.py
├── cricbuzz
│ ├── __init__.py
│ └── cricbuzz.py
├── devcommunity
│ ├── __init__.py
│ └── articles.py
├── devpost
│ ├── __init__.py
│ └── devpost.py
├── dictionary
│ ├── __init__.py
│ └── wordoftheday.py
├── dribbble
│ ├── __init__.py
│ └── dribbble.py
├── eazydiner
│ ├── __init__.py
│ └── eazydiner.py
├── ebay
│ ├── __init__.py
│ └── ebay.py
├── espn
│ ├── __init__.py
│ └── espnmodule.py
├── espncricinfo
│ ├── __init__.py
│ └── espncricinfo.py
├── fide
│ ├── __init__.py
│ └── fide.py
├── finance
│ ├── bse.py
│ ├── nasdaq.py
│ ├── nse.py
│ └── stock_price.py
├── flexjobs
│ ├── __init__.py
│ └── flexjobs.py
├── flipkart
│ ├── __init__.py
│ ├── flipkart_clothing.py
│ ├── flipkart_file.py
│ └── flipkart_laptop.py
├── flyrobu
│ ├── __init__.py
│ └── flyrobu.py
├── geeksforgeeks
│ ├── __init__.py
│ └── geeksforgeeks.py
├── github
│ ├── __init__.py
│ ├── issue.py
│ ├── organization.py
│ ├── pull_request.py
│ ├── repository.py
│ └── users.py
├── github_education
│ ├── __init__.py
│ └── events.py
├── googlenews
│ └── googleNews.py
├── hackerearth
│ ├── __init__.py
│ └── challenges.py
├── hackernews
│ ├── __init__.py
│ └── articles.py
├── hackerrank
│ ├── __init__.py
│ ├── contest.py
│ └── user.py
├── hashnode
│ ├── __init__.py
│ └── hashnode.py
├── healthgrades
│ ├── __init__.py
│ └── healthgradesmodule.py
├── icc
│ ├── __init__.py
│ └── icc_rankings.py
├── imdb
│ ├── __init__.py
│ ├── actor.py
│ ├── box_office.py
│ ├── celeb.py
│ ├── imdb.py
│ ├── indian_movies.py
│ └── movie.py
├── indiantrekking
│ ├── __init__.py
│ └── trek.py
├── indiatodayweather
│ ├── __init__.py
│ └── weather.py
├── instagram
│ └── users.py
├── internshala
│ └── internships.py
├── kindle_bookstore
│ └── kindle.py
├── kooapp
│ └── users.py
├── lastfm
│ ├── __init__.py
│ └── lastfm.py
├── leetcode
│ └── leetcode_scraper.py
├── letterboxd
│ ├── __init__.py
│ └── letterboxd.py
├── librarygenesis
│ ├── __init__.py
│ └── library.py
├── lichess
│ ├── __init__.py
│ └── lichess.py
├── linkedIn
│ └── linkedInspider.py
├── luma
│ └── events.py
├── magicbricks
│ └── MagicBricks.py
├── mediencyclopedia
│ └── mediencyclopedia.py
├── medium
│ ├── publication.py
│ ├── trending.py
│ └── user.py
├── moneycontrol
│ ├── equity_mutual_funds.py
│ ├── gold.py
│ ├── index_contribution.py
│ ├── indian_index.py
│ ├── silver_prices.py
│ ├── top_gainers.py
│ └── top_losers.py
├── myanimelist
│ ├── __init__.py
│ └── scraper.py
├── newscnn
│ └── newscnn.py
├── olympics
│ └── olympic.py
├── pinterest
│ ├── __init__.py
│ └── pinterest.py
├── quora
│ ├── __init__.py
│ └── quora.py
├── reddit
│ └── reddit.py
├── robu
│ └── robu.py
├── rottentomatoes
│ └── rot_tom.py
├── spotify
│ └── spotify_file.py
├── stackoverflow
│ └── questions.py
├── steam
│ ├── __init__.py
│ └── steamScraper.py
├── swiggy
│ ├── __init__.py
│ └── swiggy.py
├── sysreqlab
│ ├── __init__.py
│ ├── find_titles.py
│ └── requirements.py
├── techcrunch
│ └── techCrunch.py
├── thehindu
│ └── thehindu.py
├── timeanddate
│ ├── city.py
│ ├── day_in_history.py
│ ├── extended_forecast.py
│ ├── fun_holidays.py
│ ├── time_zones.py
│ └── utc.py
├── timesjobs
│ └── timesjobs_scraper.py
├── tripadvisor
│ └── TripAdvisor.py
├── twitter
│ └── numidconverter.py
├── uci
│ └── UCI.py
├── udemy
│ └── courses.py
├── unsplash
│ └── unsplash_scraper.py
├── who
│ ├── WHO.py
│ └── __init__.py
├── wikipedia
│ └── wikipedia.py
├── wuzzuf
│ ├── __init__.py
│ └── wuzzuf.py
├── yahoofinance
│ └── YahooFinance.py
├── yellowpages
│ ├── __init__.py
│ └── yellowpages.py
├── youtube
│ ├── channel.py
│ └── video.py
└── zomato
│ ├── __init__.py
│ └── zomato.py
└── test
├── academia_test.py
├── amazon_test.py
├── askubuntu_test.py
├── atcoder_test.py
├── banners_test.py
├── bayt_test.py
├── bbc_test.py
├── codechef_test.py
├── coinmarketcap_test.py
├── coursera_test.py
├── covidinfo_test.py
├── cricbuzz_test.py
├── devpost_test.py
├── dribbble_test.py
├── eazydiner_test.py
├── ebay_test.py
├── espncricinfo_test.py
├── fide_test.py
├── flexjobs_test.py
├── flipkart_test.py
├── flyrobu_test.py
├── geeksforgeeks_test.py
├── github_education_test.py
├── github_test.py
├── hackerearth_test.py
├── hackernews_test.py
├── hackerrank_test.py
├── healthgrades_test.py
├── icc_test.py
├── librarygenesis_test.py
├── lichess_test.py
├── myanimelist.py
├── pinterest_test.py
├── quora_test.py
├── swiggy_test.py
├── who_test.py
├── wuzzuf_test.py
└── zomato_test.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [nikhil25803]
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.yaml:
--------------------------------------------------------------------------------
1 | name: "🐞 Bug Report"
2 | description: "Create a report to help us improve"
3 | title: "BUG:"
4 | labels: [Bug, Needs Triage]
5 | body:
6 | - type: checkboxes
7 | attributes:
8 | label: "Is there an existing issue for this?"
9 | description: "Please search to see if an issue already exists for the bug you encountered."
10 | options:
11 | - label: "I have searched the existing issues"
12 | required: true
13 | - type: textarea
14 | attributes:
15 | label: "What happened?"
16 | description: "A concise description of what you're experiencing."
17 | validations:
18 | required: true
19 | - type: checkboxes
20 | attributes:
21 | label: "Record"
22 | options:
23 | - label: "I agree to follow this project's Code of Conduct"
24 | required: true
25 | - label: "I'm a GSSoC'24 contributor"
26 | - label: "I want to work on this issue"
27 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentationupdate.yaml:
--------------------------------------------------------------------------------
1 | name: "📑 Documentation Update"
2 | description: "Improve Documentation"
3 | title: "DOC:"
4 | labels: [DOC, Needs Triage]
5 | body:
6 | - type: textarea
7 | attributes:
8 | label: "What's wrong with the existing documentation"
9 | description: "Which things do we need to add or delete"
10 | validations:
11 | required: true
12 | - type: textarea
13 | attributes:
14 | label: "Add ScreenShots"
15 | description: "Add sufficient SS to explain your issue."
16 | validations:
17 | required: false
18 |
19 | - type: checkboxes
20 | attributes:
21 | label: "Record"
22 | options:
23 | - label: "I agree to follow this project's Code of Conduct"
24 | required: true
25 | - label: "I'm a GSSoC'24 contributor"
26 | - label: "I want to work on this issue"
27 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/featurerequest.yaml:
--------------------------------------------------------------------------------
1 | name: "✨ Feature Request"
2 | description: "Suggest an idea for this project "
3 | title: "Feat:"
4 | labels: [Feat, Needs Triage]
5 | body:
6 | - type: textarea
7 | attributes:
8 | label: "Describe the feature"
9 | description:
10 | validations:
11 | required: true
12 | - type: textarea
13 | attributes:
14 | label: "Add ScreenShots"
15 | description: "Add sufficient SS to explain your issue."
16 | validations:
17 | required: true
18 | - type: checkboxes
19 | attributes:
20 | label: "Record"
21 | options:
22 | - label: "I agree to follow this project's Code of Conduct"
23 | required: true
24 | - label: "I'm a GSSoC'24 contributor"
25 | - label: "I want to work on this issue"
26 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Description
2 |
3 | > The changes made in this pull request should be briefly described.
4 |
5 | ## Resolves: [Issue Number]
6 |
7 | ## Checklist
8 |
9 | > Before submitting this pull request, kindly verify that the ensuing checkpoints have been reached.
10 |
11 | - [ ] Have you adhered to the repository's defined coding convention rules?
12 | - [ ] Have you updated the 'documentation.md' file with the method/function documentation?
13 | - [ ] Have you sent a message along with the result or response?
14 | - [ ] Have you used the try-catch technique?
15 | - [ ] Has the method/class been added to the documentation (md file)?
16 |
17 | ## Screenshots
18 |
19 | > Uploading a screenshot illustrating the approach you developed for validation is required.
20 |
21 | ## Additional Notes/Comments
22 |
23 | > Any additional remarks or suggestions concerning this pull request are welcome.
24 |
25 | ---
26 |
27 | I certify that I have carried out the relevant checks and provided the requisite screenshot for validation by submitting this pull request.
28 | I appreciate your contribution.
29 |
--------------------------------------------------------------------------------
/.github/workflows/greetings.yaml:
--------------------------------------------------------------------------------
1 | name: Greetings
2 |
3 | on: [issues]
4 |
5 | jobs:
6 | greeting:
7 | runs-on: ubuntu-latest
8 | permissions:
9 | issues: write
10 | pull-requests: write
11 | steps:
12 | - uses: actions/first-interaction@v1
13 | with:
14 | repo-token: ${{ secrets.GITHUB_TOKEN }}
15 | issue-message: "Hi there! Thanks for opening this issue. We appreciate your contribution to this open-source project. We aim to respond or assign your issue as soon as possible."
16 | pr-message: "Welcome to Our repository.🎊 Thank you so much for taking the time to point this out."
17 |
--------------------------------------------------------------------------------
/.github/workflows/pr_merged.yml:
--------------------------------------------------------------------------------
1 | name: Auto Comment on PR Merged
2 |
3 | on:
4 | pull_request_target:
5 | types: [closed]
6 |
7 | permissions:
8 | issues: write
9 | pull-requests: write
10 |
11 | jobs:
12 | comment:
13 | runs-on: ubuntu-latest
14 | if: github.event.pull_request.merged == true
15 | steps:
16 | - name: Add Comment to Pull Request
17 | run: |
18 | COMMENT=$(cat <(Back to top)
47 |
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | ### Install the package using `pip`:
2 |
3 | ```bash
4 | pip install scrape-up --upgrade
5 | ```
6 |
7 | ### Import the required module
8 |
9 | > For example - `GitHub`
10 |
11 | ```py
12 | # Import the required module
13 | from scrape_up import github
14 | ```
15 |
16 | ### Instantiate an object with required parameters
17 |
18 | > Also mentioned in the docstring
19 |
20 | ```
21 | user = github.Users(username="nikhil25803")
22 | ```
23 |
24 | ### Call the required method.
25 |
26 | > For example, to extract the number of followers of a user:
27 |
28 | ```python
29 | # Call the followers method
30 | followers_count = user.followers()
31 | ```
32 |
--------------------------------------------------------------------------------
/docs/modules/Finance.md:
--------------------------------------------------------------------------------
1 | ```python
2 | from scrape_up import StockPrice
3 | ```
4 |
5 | ### Scrape stock data
6 |
7 | First, create an instance of class `StockPrice` with stock name and index name.
8 |
9 | ```python
10 | infosys = StockPrice('infosys','nse')
11 | ```
12 |
13 | | Methods | Details |
14 | | ----------------------------------------- | --------------------------------------------------------------------------------------- |
15 | | `.get_latest_price()` | Returns the latest stock price of the given stock name. |
16 | | `.get_historical_data(from_date,to_date)` | Returns stock price from `from_date` to `to_date` in format (date in format dd-mm-yyyy) |
17 |
18 | ---
19 |
--------------------------------------------------------------------------------
/docs/modules/HackerEarth.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import hackerearth
3 | ```
4 |
5 | Create an object of class `Challenges`
6 |
7 | ```python
8 | hackerearth = hackerearth.Challenges()
9 | ```
10 |
11 | | Methods | Details |
12 | | ---------------- | ------------------------------------------------------ |
13 | | `get_upcoming()` | Get the details of upcoming challenges on Hackerearth. |
14 | | `get_ongoing()` | Get the details of ongoing challenges on Hackerearth. |
15 | | `get_hiring()` | Get the details of hiring challenges on Hackerearth. |
16 |
17 | ---
18 |
--------------------------------------------------------------------------------
/docs/modules/Hackernews.md:
--------------------------------------------------------------------------------
1 |
2 | ```py
3 | from scrape_up import hacker_news
4 | ```
5 |
6 | Create an instance of `HackerNews` class.
7 |
8 | ```py
9 | articles = HackerNews()
10 | ```
11 |
12 | | Methods | Details |
13 | | ------------------ | ------------------------------------------------------------------------------------------------------------------------ |
14 | | `.articles_list()` | Returns the latest articles along with their score, author, author url, time, comment count and link in JSON format. |
15 | | `.new_articles()` | Returns the latest new articles along with their score, author, author url, time, comment count and link in JSON format. |
16 | | `.past_articles()` | Returns the past articles along with their score, author, author url, time, comment count and link in JSON format. |
17 | | `.ask_articles()` | Returns the ask articles along with their score, author, author url, time, comment count and link in JSON format. |
18 | | `.show_articles()` | Returns the show articles along with their score, author, author url, time, comment count and link in JSON format. |
19 | | `.jobs()` | Returns the jobs along with their time and link in JSON format. |
20 |
21 | ---
--------------------------------------------------------------------------------
/docs/modules/Internshala.md:
--------------------------------------------------------------------------------
1 | ## Internshala
2 |
3 | Create an object for the 'Internshala' class:
4 |
5 | ```python
6 | search = Internshala(search_type="machine learning")
7 | ```
8 |
9 | | Methods | Details |
10 | | -------------------------- | ------------------------------------------------------------------------------ |
11 | | `.internships()` | Scrapes and returns a list of dictionaries representing internships. |
12 | | `.jobs()` | Scrapes and returns a list of dictionaries representing jobs. |
13 | | `.certification_courses()` | Scrapes and returns a list of dictionaries representing certification courses. |
--------------------------------------------------------------------------------
/docs/modules/Twitter.md:
--------------------------------------------------------------------------------
1 | ```python
2 | from scrape_up import twitter
3 | ```
4 |
5 | ### Scrape
6 |
7 | First, create an object of class `TwitterScraper`
8 |
9 | ```python
10 | twitter_scraper = TwitterScraper()
11 | ```
12 |
13 | | Methods | Details |
14 | | -------------------------- | --------------------------------------------- |
15 | | `.unametoid(username)` | Returns the numerical_id on passing username. |
16 | | `.idtouname(numerical_id)` | Returns the username on passing numerical_id. |
17 |
18 | ---
19 |
--------------------------------------------------------------------------------
/docs/modules/academia.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import academia
3 | ```
4 |
5 | Create an instance of `Academia` class
6 |
7 | ```python
8 | academia = academia.Academia()
9 | ```
10 |
11 | | Method | Details |
12 | | ----------------------------- | --------------------------------------------------------------------- |
13 | | `get_research_topics()` | Fetches and returns research topics. |
14 | | `get_research_papers(search)` | Fetches and returns research papers related to the given search term. |
15 |
16 | ---
--------------------------------------------------------------------------------
/docs/modules/amazon.md:
--------------------------------------------------------------------------------
1 |
2 | ### Scrape details about a product
3 |
4 | Create an instance of `Product` class with a `product_name` propertiese.
5 |
6 | ```python
7 | product = Product(product_name="watch")
8 | ```
9 |
10 | | Methods | Details |
11 | | ------------------------ | ---------------------------- |
12 | | `.get_product()` | Returns product data(links). |
13 | | `.get_product_details()` | Returns product detail. |
14 | | `.get_product_image()` | Returns product image. |
15 | | `.customer_review()` | Returns product review. |
16 |
17 | ## Amazon-Kindle Bookstore
18 |
19 | Create an instance of `Book` class.
20 |
21 | ```python
22 | books = AmazonKindle()
23 | ```
24 |
25 | | Methods | Details |
26 | | ---------------- | ------------------------------------------------------ |
27 | | `.bestsellers()` | Returns the list of best-selling books on AmazonKindle |
28 | | `.topbooks()` | Returns the list of top books on AmazonKindle |
--------------------------------------------------------------------------------
/docs/modules/ask-ubuntu.md:
--------------------------------------------------------------------------------
1 |
2 | ### Scrape questions, views, votes, answer counts, and descriptions from Ask Ubuntu website regarding a topic
3 |
4 | Create an instance of `Questions` class.
5 |
6 | ```python
7 | questions = Questions("topic")
8 | ```
9 |
10 | | Methods | Details |
11 | | --------------------------- | ---------------------------------------------------------------------------------------------------- |
12 | | `.getNewQuestions()` | Returns the new questions, views, votes, answer counts, and descriptions in JSON format |
13 | | `.getActiveQuestions()` | Returns the active questions, views, votes, answer counts, and descriptions in JSON format |
14 | | `.getUnansweredQuestions()` | Returns the unanswered questions, views, votes, answer counts, and descriptions in JSON format |
15 | | `.getBountiedQuestions()` | Returns the bountied questions, views, votes, answer counts, and descriptions in JSON format |
16 | | `.getFrequentQuestions()` | Returns the frequently asked questions, views, votes, answer counts, and descriptions in JSON format |
17 | | `.getHighScoredQuestions()` | Returns the most voted questions, views, votes, answer counts, and descriptions in JSON format |
18 |
19 | ---
20 |
--------------------------------------------------------------------------------
/docs/modules/bbc.md:
--------------------------------------------------------------------------------
1 | from scrape_up import bbcnews
2 | ```
3 |
4 | First create an object of class `BBCNews`
5 |
6 | ```python
7 | user = bbcnews.BBCNews()
8 | ```
9 |
10 | | Methods | Details |
11 | | ------------------ | --------------------------------------------------------- |
12 | | `.get_headlines()` | Returns the list of objects containing the headlines. |
13 | | `get_article()` | Returns an object with proper details about the articles. |
14 |
15 | ---
--------------------------------------------------------------------------------
/docs/modules/codechef.md:
--------------------------------------------------------------------------------
1 |
2 | ```python
3 | from scrape_up import codechef
4 | ```
5 |
6 | ### Scrape user details
7 |
8 | Create an object of class `Codechef`
9 |
10 | ```python
11 | user1 = codechef.User(id="username")
12 |
13 | ```
14 |
15 | | Methods | Details |
16 | | --------------- | ---------------------------------------------------------------- |
17 | | `get_profile()` | Returns name, username, profile_image_link, rating, details etc. |
18 |
19 | ---
20 |
--------------------------------------------------------------------------------
/docs/modules/coinmarketcap.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import coinmarketcap
3 | ```
4 |
5 | Create an instance of `Crypto` class
6 |
7 | ```python
8 | crypto = coinmarketcap.Crypto()
9 | ```
10 |
11 | | Method | Details |
12 | | ---------------------------- | -------------------------------------------------------- |
13 | | `get_top_cryptocurrencies()` | Fetches and returns data about the top cryptocurrencies. |
14 |
15 | ---
16 |
--------------------------------------------------------------------------------
/docs/modules/coursera.md:
--------------------------------------------------------------------------------
1 | Create an object of the 'Courses' class:
2 |
3 | ```python
4 | scraper = Courses(topic="topic")
5 | ```
6 |
7 | | Methods | Details |
8 | | -------------------------------------- | ------------------------------------------------------------------------------------------ |
9 | | `.get_courses()` | Returns the courses with title, teached by, skills, rating, review count, img url and link |
10 | | `.fetch_modules(course='Course Name')` | Returns the modules associated with the Coursera. |
11 |
--------------------------------------------------------------------------------
/docs/modules/covid-19.md:
--------------------------------------------------------------------------------
1 |
2 | ```py
3 | from scrape_up import covidinfo
4 | ```
5 |
6 | Create an instance of the `CovidInfo` class.
7 |
8 | ```python
9 | response = covidinfo.CovidInfo()
10 | ```
11 |
12 | | Methods | Details |
13 | | -------------------- | -------------------------------------------------------------- |
14 | | `.covid_data()` | Returns the list of all covid data scraped from the website. |
15 | | `.total_cases()` | Returns the count of total covid cases all over the world. |
16 | | `.total_deaths()` | Returns the count of deaths covid cases all over the world. |
17 | | `.total_recovered()` | Returns the count of recovered covid cases all over the world. |
18 | | `.latest_news()` | Return the latest news of the day. |
19 |
20 | ---
--------------------------------------------------------------------------------
/docs/modules/crickbuzz.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import cricbuzz
3 | ```
4 |
5 | Create an instance of `Cricubzz` class.
6 |
7 | ```python
8 | cricbuzz = cricbuzz.Cricubzz()
9 | ```
10 |
11 | | Methods | Details |
12 | | ---------------------------- | ---------------------------------------------------------------------- |
13 | | `.get_live_matches()` | Returns a list of live matches from Cricbuzz. |
14 | | `.get_recent_matches()` | Returns a list of recent matches from Cricbuzz. |
15 | | `.get_upcoming_matches()` | Returns a list of upcoming matches from Cricbuzz. |
16 | | `.get_series()` | Returns a dictionary of series in month and year format from Cricbuzz. |
17 | | `.get_series_from_archive()` | Returns a list of series from the archive from Cricbuzz. |
18 | | `.get_matches_by_day()` | Returns a dictionary of matches by day from Cricbuzz. |
19 | | `.get_series_matches()` | Returns a list of matches in a series from Cricbuzz. |
20 | | `.get_series_stats()` | Returns a list of stats of players in a series from Cricbuzz. |
21 | | `.get_teams_list()` | Returns a list of teams from Cricbuzz. |
22 | | `.get_team_schedule()` | Returns a list of matches of a team from Cricbuzz. |
23 | | `.get_team_players()` | Returns a list of players of a team from Cricbuzz. |
24 | | `.get_team_results()` | Returns a list of past results of a team from Cricbuzz. |
25 | | `.get_team_stats()` | Returns a list of player stats of a team from Cricbuzz. |
26 |
27 | ---
--------------------------------------------------------------------------------
/docs/modules/devpost.md:
--------------------------------------------------------------------------------
1 |
2 | Create an instance of `Devpost` class.
3 |
4 | ```python
5 | posts = Devpost()
6 | ```
7 |
8 | | Methods | Details |
9 | | ------------------- | -------------------------------------------------------------------------------------------------------------------- |
10 | | `.get_projects()` | Returns the latest projects along with their decription, like and commment count, image and member details. |
11 | | `.search(topic)` | Returns the searched projects along with their decription, like and commment count, image and member details. |
12 | | `.get_hackathons()` | Returns the latest hackathons along with their title, participants, prizes, deadlines. |
13 | | `.get_featured()` | Returns the latest featured projects along with their decription, like and commment count, image and member details. |
14 | | `.get_winner()` | Returns the latest winning projects along with their decription, like and commment count, image and member details. |
15 |
16 | ---
--------------------------------------------------------------------------------
/docs/modules/dribbble.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import dribbble
3 | ```
4 |
5 | Create an instance of `Dribbble` class.
6 |
7 | ```python
8 | shots = dribbble.Dribbble()
9 | ```
10 |
11 | | Methods | Details |
12 | | --------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
13 | | `.get_shots()` | Returns the latest shots along with their title, designer, designer URL, like and view count, and link. |
14 | | `.search(topic)` | Returns the latest shots along with their title, designer, designer URL, like and view count, and link for the searched topic. |
15 | | `.get_animation()` | Returns the latest animation along with their title, designer, designer URL, like and view count, and link. |
16 | | `.get_branding()` | Returns the latest branding along with their title, designer, designer URL, like and view count, and link. |
17 | | `.get_illustration()` | Returns the latest illustration along with their title, designer, designer URL, like and view count, and link. |
18 | | `.get_mobile()` | Returns the latest mobile shots along with their title, designer, designer URL, like and view count, and link. |
19 | | `.get_webdesign()` | Returns the latest web-design shots along with their title, designer, designer URL, like and view count, and link. |
20 |
21 | ---
22 |
--------------------------------------------------------------------------------
/docs/modules/eazydinner.md:
--------------------------------------------------------------------------------
1 |
2 | Create an instance of `EazyDiner` class.
3 |
4 | ```python
5 | restaurants = EazyDiner(location="city-name")
6 | ```
7 |
8 | | Methods | Details |
9 | | ------------------------- | ------------------------------------------------------------------------------------------------ |
10 | | `.get_restaurants()` | Returns the restaurants name, location, rating, cuisine and prices in JSON format. |
11 | | `.get_breakfast()` | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Breakfast. |
12 | | `.get_lunch()` | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Lunch. |
13 | | `.get_dinner()` | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Dinner. |
14 | | `.dinner_with_discount()` | Returns a list of restaurants from the entered location with a 50% offer. |
15 | | `.get_top10()` | Returns a list of the top 10 restaurants from a given city. |
16 |
17 | ---
18 |
--------------------------------------------------------------------------------
/docs/modules/ebay.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import ebay
3 | ```
4 |
5 | Create an instance of `EBAY` class
6 |
7 | ```python
8 | quora = ebay.eBay()
9 | ```
10 |
11 | | Methods | Details |
12 | | ------------------- | ----------------------------------- |
13 | | `.spotlights()` | Returns spotlight deals on eBay. |
14 | | `.featured()` | Returns the featured deals on eBay. |
15 | | `.specific_deals()` | Returns the specific deals on eBay. |
16 |
17 | ---
--------------------------------------------------------------------------------
/docs/modules/espn.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import espn
3 | ```
4 |
5 | Create an instance of `ESPN` class
6 |
7 | ```python
8 | espn = espn.ESPN()
9 | ```
10 |
11 | | Method | Details |
12 | | ------------------- | -------------------------------------------------------------- |
13 | | `get_scoreboard()` | Fetches and returns the football scoreboards for a given date. |
14 | | `get_tournaments()` | Fetches and returns information about football tournaments. |
15 | | `get_teams()` | Fetches and returns information about football teams. |
16 |
17 | ---
18 |
--------------------------------------------------------------------------------
/docs/modules/flexjobs.md:
--------------------------------------------------------------------------------
1 |
2 | ```python
3 | flex_jobs = FlexJobs(search_query, location_query, min_jobs)
4 | ```
5 |
6 | - Attributes
7 |
8 | | Attribute | Description |
9 | | ---------------- | ----------------------------------------------------------------- |
10 | | `search_query` | The search query to filter job listings. |
11 | | `location_query` | The location query to filter job listings (defaults to ''). |
12 | | `min_jobs` | The maximum number of job listings to retrieve (defaults to 100). |
13 |
14 | - Methods
15 |
16 | | Method | Description |
17 | | -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
18 | | `get_jobs() -> list` | Retrieves job listings from FlexJobs website based on search and location queries. Returns a list of dictionaries containing job details. |
19 | | `scrape_job_info(job_listing) -> dict` | Extracts job details from a job listing HTML element. |
20 |
21 | ---
--------------------------------------------------------------------------------
/docs/modules/flipkart.md:
--------------------------------------------------------------------------------
1 |
2 | ### Scrape details of products
3 |
4 | Create an instance of `Flipkart` class.
5 |
6 | ```python
7 | item = Flipkart()
8 | ```
9 |
10 | | Methods | Details |
11 | | --------------------- | ------------------------------------------------------------------ |
12 | | `.TVs()` | Returns the list of TV sets on flipkart |
13 | | `.bestseller_books()` | Returns the list of bestselling books data listed on Flipkart. |
14 | | `.mobiles()` | Returns the list of mobile phones under 50K along with their data. |
15 | | `.sport_shoes()` | Returns the list of trendong sport shoes data. |
16 | | `.laptops()` | Returns the list of laptop from flipkart. |
17 | | `.camera()` | Returns the list of camera from flipkart. |
18 | | `.computer()` | Returns the list of computer from flipkart. |
19 | | `.tablets()` | Returns the list of tablets from flipkart. |
20 | | `.cycle()` | Returns the list of bicycles from flipkart. |
21 | | `.printers()` | Returns the list of printers from flipkart. |
22 | | `.monitor()` | Returns the list of monitors from flipkart. |
23 | | `.ac()` | Returns the list of acs from flipkart. |
24 | | `.refrigerator()` | Returns the list of refrigerators from flipkart. |
25 | | `.VRbox()` | Returns the list of VRbox from flipkart. |
26 | | `.Speakers()` | Returns the list of Speakers from flipkart. |
27 |
28 | ---
--------------------------------------------------------------------------------
/docs/modules/flipkartclothing.md:
--------------------------------------------------------------------------------
1 | Create an instance of `FlipkartClothing` class.
2 |
3 | ```python
4 | cloth = flipkart.FlipkartClothing()
5 | ```
6 |
7 | | Methods | Details |
8 | | -------------------------- | -------------------------------------------------------------- |
9 | | `.scrape()` | Returns the list of t-shirts with other relevant info. |
10 | | `.range()` | Returns the list of t-shirts between a particular price range. |
11 | | `.minrating()` | Returns the list of t-shirts having a minimum given rating. |
12 | | `.gendermale()` | Returns the list of t-shirts which are for males. |
13 | | `.genderfemale()` | Returns the list of t-shirts that are there for females. |
14 | | `.size()` | Returns the list of t-shirts having a particular size. |
15 | | `formal_shirts_for_male()` | It returns those t-shirts which are of a particular size. |
16 |
17 | ---
--------------------------------------------------------------------------------
/docs/modules/flipkartlaptop.md:
--------------------------------------------------------------------------------
1 | Create an instance of `FlipkartLaptops` class.
2 |
3 | ```python
4 | item = flipkart.FlipkartLaptops()
5 | ```
6 |
7 | | Methods | Details |
8 | | ------------ | ----------------------------------------- |
9 | | `.laptops()` | Returns the list of laptops with details. |
10 |
11 | ---
12 |
--------------------------------------------------------------------------------
/docs/modules/flyrobu.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import flyrobu
3 | ```
4 |
5 | Create an instance of `Flyrobu` class.
6 |
7 | ```python
8 | flyrobu = flyrobu.Flyrobu()
9 | ```
10 |
11 | | Methods | Details |
12 | | ------------------------------------ | -------------------------------------------------------------------------------------------------------------- |
13 | | `.search(keyword)` | Returns the json data of all the details related to search by informing about the total amount of items found. |
14 | | `.get_product_details(product_name)` | Returns the json data of the product details based on the given `product_name`. |
15 |
16 | ---
--------------------------------------------------------------------------------
/docs/modules/githubedu.md:
--------------------------------------------------------------------------------
1 |
2 | ```python
3 | from scrape_up import github_education
4 | ```
5 |
6 | ### Scrape user details
7 |
8 | Create an instance of the `Events` class.
9 |
10 | ```py
11 | events = github_education.Events()
12 | ```
13 |
14 | | Methods | Details |
15 | | --------------- | ------------------------------------------------------------------------------------------------------------------- |
16 | | `.get_events()` | Returns the latest events along with their title, image_url, description, date, location, language, tags, and link. |
17 |
18 | ---
--------------------------------------------------------------------------------
/docs/modules/googlenews.md:
--------------------------------------------------------------------------------
1 |
2 | ### Scrape articles with title, descriptions, news source, date and link regarding a topic
3 |
4 | Create an instance of `GoogleNews` class.
5 |
6 | ```python
7 | articles = GoogleNews()
8 | ```
9 |
10 | | Methods | Details |
11 | | ------------------------------ | ------------------------------------------------------------------------------------------------ |
12 | | `.getArticles(topic="github")` | Returns the list of articles with title, descriptions, news source, date and link in JSON format |
13 | | `.top_stories()` | Returns the list of top stories listed regarding the mentioned topic |
14 | | `.timed_aticles(time)` | Returns the list of top stories listed regarding the mentioned topic and within that time frame |
15 | | `.bylanguage(lang)` | Returns the list of top stories listed regarding the mentioned topic in the specified language |
16 | | `.bynumerofdaysback(number)` | Returns the list of stories listed by given number of days back from the current day |
17 | | `.bylocation(countryname)` | Returns the list of top stories listed of the specified country or geolocation |
18 |
19 | ---
--------------------------------------------------------------------------------
/docs/modules/hackerrank.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import hackerrank
3 | ```
4 |
5 | ### Scrape user details
6 |
7 | Create an object of class `User`.
8 |
9 | ```python
10 | hackerank = hackerrank.User()
11 | ```
12 |
13 | | Methods | Details |
14 | | ---------------------------- | ----------------------------------------------------------------------------------------- |
15 | | `get_profile(id="username")` | Returns name, username, country, user_type, details, badges, verified_skills, social etc. |
16 | | `get_skills()` | Returns a list of verified skills and their links. |
17 |
18 | ### Scrape contest details
19 |
20 | Create an object of class `Contest`.
21 |
22 | ```python
23 | hackerank = hackerrank.Contest()
24 | ```
25 |
26 | | Methods | Details |
27 | | --------------------- | -------------------------------------------------------------------- |
28 | | `active_contests()` | Returns information on active contests like title, status, and link. |
29 | | `archived_contests()` | Returns information regarding archived contests. |
30 |
31 | ---
--------------------------------------------------------------------------------
/docs/modules/hashnode.md:
--------------------------------------------------------------------------------
1 |
2 | Create an instance of `Hashnode` class.
3 |
4 | ```python
5 | blogs = Hashnode()
6 | ```
7 |
8 | | Methods | Details |
9 | | ----------------- | ----------------------------------------------------------------------------------------------------- |
10 | | `.get_feed()` | Returns the blogs with title, descriptions, author, read time, like and comment count, date and link |
11 | | `.get_featured()` | Returns the featured blogs with title, descriptions, author, like and comment count, date and link |
12 | | `.get_recent()` | Returns the recent blogs with title, descriptions, author, like and comment count, date and link |
13 | | `.search(topic)` | Returns the blogs with title, descriptions, author, like and comment count, date and link for a topic |
14 |
15 | ---
16 |
--------------------------------------------------------------------------------
/docs/modules/healthgrade.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import healthgrades
3 | ```
4 |
5 | Create an instance of `HealthGrades` class
6 |
7 | ```python
8 | hc = healthgrades.HealthGrades()
9 | ```
10 |
11 | | Method | Details |
12 | | --------------------------- | -------------------------------------------------------------------- |
13 | | `get_best_hospitals(state)` | Fetches and returns information about the best hospitals in a state. |
14 |
15 | ---
--------------------------------------------------------------------------------
/docs/modules/iccranking.md:
--------------------------------------------------------------------------------
1 | ```py
2 | from scrape_up import icc
3 | ```
4 |
5 | Create an instance of `ICC` class.
6 |
7 | ```python
8 | scraper = icc.ICC()
9 | ```
10 |
11 | | Method | Details |
12 | | ------------------------------------ | --------------------------------------------------------------------- |
13 | | `.team_rankings(format)` | Returns the list of rankings of teams of the desired format. |
14 | | `.player_ranking(type,format)` | Returns the list of player rankings of desired type and format. |
15 | | `.team_rankings_women(format)` | Returns the list of rankings of teams of the desired format. |
16 | | `.player_ranking_women(type,format)` | Returns the list of women player rankings of desired type and format. |
17 |
18 | ---
19 |
--------------------------------------------------------------------------------
/docs/modules/imdb-actor.md:
--------------------------------------------------------------------------------
1 | Create an instance of `Actor` class.
2 |
3 | ```python
4 | actor = imdb.Actor(actor_name)
5 | ```
6 |
7 | | Methods | Details |
8 | | ------------------- | -------------------------------------------------------- |
9 | | `.popular_movies()` | Returns the popular movies in which the actor has acted. |
10 | | `.all_movies()` | Returns all movies acted in, and upcoming movies. |
11 | | `.awards()` | Returns the number of awards and nominations. |
12 |
--------------------------------------------------------------------------------
/docs/modules/imdb-boxoffice.md:
--------------------------------------------------------------------------------
1 | Create an instance of `BoxOffice` class.
2 |
3 | ```python
4 | boxoffice = imdb.BoxOffice()
5 | ```
6 |
7 | | Methods | Details |
8 | | --------------- | ------------------------------------------------------------------------------- |
9 | | `.top_movies()` | Returns the top box office movies, weekend and total gross, and weeks released. |
--------------------------------------------------------------------------------
/docs/modules/imdb-celeb.md:
--------------------------------------------------------------------------------
1 | Create an instance of `Celeb` class.
2 |
3 | ```python
4 | celeb = imdb.Celeb()
5 | ```
6 |
7 | | Methods | Details |
8 | | --------------- | ------------------------------------------------------- |
9 | | `.top_celebs()` | Returns the name, roles, and famous movie of the celeb. |
--------------------------------------------------------------------------------
/docs/modules/imdb-indian.md:
--------------------------------------------------------------------------------
1 | Create an instance of `IndianMovies` class.
2 |
3 | ```python
4 | indianmovies = imdb.IndianMovies()
5 | ```
6 |
7 | | Methods | Details |
8 | | ---------------------- | ---------------------------------------------- |
9 | | `.top_indian_movies()` | Returns the current list of top Indian movies. |
10 |
--------------------------------------------------------------------------------
/docs/modules/imdb-movies.md:
--------------------------------------------------------------------------------
1 | Create an instance of `Movie` class.
2 |
3 | ```python
4 | movie = imdb.Movie(movie_name)
5 | ```
6 |
7 | | Methods | Details |
8 | | ---------------- | --------------------------------------------------------- |
9 | | `.rating()` | Returns the IMDB rating of the movie. |
10 | | `.description()` | Returns the description, cast, and director of the movie. |
11 | | `.more_movies()` | Returns similar movies recommended by IMDB. |
12 | | `.box_office()` | Returns budget, gross worldwide collections of the movie. |
13 |
--------------------------------------------------------------------------------
/docs/modules/imdb.md:
--------------------------------------------------------------------------------
1 | Create an instance of the `IMDB` class.
2 |
3 | ```python
4 | scraper = IMDB()
5 | ```
6 |
7 | | Methods | Details |
8 | | ----------------------------- | -------------------------------------------------------------- |
9 | | `.top_rated()` | Returns the top-rated movies listed on IMDB. |
10 | | `.scrape_genre_movies(genre)` | Returns the list of movies related to the genre you mentioned. |
11 | | `.top_rated_shows()` | Returns the top-rated shows listed on IMDB. |
12 |
13 | Create an instance of `Movie` class.
14 |
15 | ```python
16 | movie = Movie(movie_name)
17 | ```
18 |
19 | | Methods | Details |
20 | | ---------------- | -------------------------------------------------------- |
21 | | `.rating()` | Returns the IMDB rating of the movie |
22 | | `.description()` | Returns the description, cast and director of the movie |
23 | | `.more_movies()` | Returns similar movies recommended by IMDB |
24 | | `.box_office()` | Returns budget, gross worldwide collections of the movie |
25 |
26 | Create an instance of `Actor` class.
27 |
28 | ```python
29 | actor = Actor(actor_name)
30 | ```
31 |
32 | | Methods | Details |
33 | | ------------------- | ------------------------------------------------------- |
34 | | `.popular_movies()` | Returns the popular movies in which the actor has acted |
35 | | `.all_movies()` | Returns all movies acted in and upcoming movies |
36 | | `.awards()` | Returns the number of awards and nominations |
37 |
38 | Create an instance of `Celeb` class.
39 |
40 | ```python
41 | celeb = Celeb()
42 | ```
43 |
44 | | Methods | Details |
45 | | --------------- | -------------------------------------------------- |
46 | | `.top_celebs()` | Returns the name, roles, famous movie of the celeb |
47 |
48 | Create an instance of `IndianMovies` class.
49 |
50 | ```python
51 | indianmovies = IndianMovies()
52 | ```
53 |
54 | | Methods | Details |
55 | | ---------------------- | --------------------------------------------- |
56 | | `.top_indian_movies()` | Returns the current list of top Indian movies |
57 |
58 | Create an instance of `BoxOffice` class.
59 |
60 | ```python
61 | boxoffice = BoxOffice()
62 | ```
63 |
64 | | Methods | Details |
65 | | --------------- | ----------------------------------------------------------------------------- |
66 | | `.top_movies()` | Returns the top box office movies, weekend and total gross and weeks released |
67 |
68 | ---
69 |
70 | (Back to top)
71 |
--------------------------------------------------------------------------------
/docs/modules/instagram.md:
--------------------------------------------------------------------------------
1 | ## Instagram
2 |
3 | ```python
4 | from scrape_up import instagram
5 | ```
6 |
7 | ### Scrape User details
8 |
9 | First, create an object of the class `User`
10 |
11 | ```python
12 | user = instagram.User(username="nikhil25803")
13 | ```
14 |
15 | | Methods | Details |
16 | | ----------------- | ------------------------------------------ |
17 | | `.user_details()` | Returns the number of followers of a user. |
18 |
--------------------------------------------------------------------------------
/docs/modules/installation.md:
--------------------------------------------------------------------------------
1 | # How to use this package? 👀
2 |
3 | - Install the package from `pip`
4 |
5 | ```PowerShell
6 | pip install scrape-up
7 | ```
8 |
9 | - Scrape the required information, for example, one wants to extract the number of followers of a user.
10 |
--------------------------------------------------------------------------------
/docs/modules/kooapp.md:
--------------------------------------------------------------------------------
1 |
2 | ```py
3 | from scrape_up import kooapp
4 | ```
5 |
6 | ### Scrap up the kooapp user's detail
7 |
8 | Create an instance of `KooUser` class.
9 |
10 | ```py
11 | user = kooapp.KooUser('krvishal')
12 | ```
13 |
14 | | Methods | Details |
15 | | ------------------------ | ------------------------------------------------------------ |
16 | | `.get_name()` | Returns the name of the user. |
17 | | `.get_bio()` | Returns the bio of the user. |
18 | | `.get_avatar_url()` | Returns the URL of the first avatar of the user. |
19 | | `.followers()` | Returns the number of followers of a user. |
20 | | `.following()` | Returns the number of people the user is following. |
21 | | `.get_social_profiles()` | Returns all the connected social media profiles of the user. |
22 | | `.get_profession()` | Returns the title/profession of the user. |
--------------------------------------------------------------------------------
/docs/modules/leetcode.md:
--------------------------------------------------------------------------------
1 | ```python
2 | from scrape_up import leetcode
3 | ```
4 |
5 | ### Scrape user details
6 |
7 | First, create an object of class `LeetCodeScraper`
8 |
9 | ```python
10 | leetcode_scraper = LeetCodeScraper(username="nikhil25803")
11 | ```
12 |
13 | **User Specific Methods - Require Username**
14 |
15 | | Methods | Details |
16 | | ----------------------------- | --------------------------------------------------------------------- |
17 | | `.scrape_rank()` | Used to scrape the rank of a user on LeetCode. |
18 | | `.scrape_rating()` | Used to scrape the rating of a user on LeetCode. |
19 | | `.get_problems_solved()` | Used to scrape total problems solved by a user on LeetCode. |
20 | | `.get_solved_by_difficulty()` | Used to scrape difficulty wise problems solved by a user on LeetCode. |
21 | | `.get_github_link()` | Used to scrape github link of a user on LeetCode. |
22 | | `.get_linkedin_link()` | Used to scrape linkedin link of a user on LeetCode. |
23 | | `.get_community_stats()` | Used to scrape community stats of a user on LeetCode. |
24 |
25 | **General Purpose Methods - Does not Require Username**
26 |
27 | | Methods | Details |
28 | | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
29 | | `.get_problems(difficulty, tags_list, search_key)` | Used to scrape top problems of LeetCode based on filters. Difficulty is string from ("easy", "medium", "hard"). Tags_list is list of tags. Search_key is string to search. All ther parameters are optional. |
30 | | `.get_contests()` | Used to scrape the upcoming LeetCode Contests details. |
31 | | `.get_daily_challenge()` | Used to scrape LeetCode Daily Challenge details. |
32 |
--------------------------------------------------------------------------------
/docs/modules/letterboxd.md:
--------------------------------------------------------------------------------
1 | Create an instance of `Letterboxd` class.
2 |
3 | ```python
4 | letterboxd_user = Letterboxd(user="arpy8")
5 | ```
6 |
7 | | Methods | Details |
8 | | --------------------------- | ---------------------------------------------------------------------------------------------------- |
9 | | `.films_watched()` | Returns the numbers of films watched by the user. |
10 | | `.recent_activity(n)` | Returns a list of length `n` of the latest activity by the user. |
11 | | `.recent_reviews(n)` | Returns a list of dictionaries of length `n` with the latest reviews by the user. |
12 | | `.get_watchlist(n)` | Returns a list of length `n` including movies and series watchlisted by the user. |
13 | | `.get_followers_count()` | Returns the number of followers of the user. |
14 | | `.get_following_count()` | Returns the number of following of the user. |
15 |
16 | Note: `n` is an integer value which is optional and can be used to limit the number of results returned by the methods.
17 |
18 | ---
--------------------------------------------------------------------------------
/docs/modules/luma.md:
--------------------------------------------------------------------------------
1 |
2 | Create an instance of `Luma` class.
3 |
4 | ```python
5 | events = Luma()
6 | ```
7 |
8 | | Methods | Details |
9 | | --------------- | ------------------------------------------------------------------------------------------ |
10 | | `.get_events()` | Returns the latest events along with their organizer, location, image url, price and link. |
11 |
12 | ---
13 |
--------------------------------------------------------------------------------
/docs/modules/medium.md:
--------------------------------------------------------------------------------
1 | ## Medium
2 |
3 | ```python
4 | from scrape_up import medium
5 | ```
6 |
7 | ### Scrape user details
8 |
9 | First, create an object of class `User`
10 |
11 | ```python
12 | user = medium.Users(username="nikhil25803")
13 | ```
14 |
15 | | Methods | Details |
16 | | ----------------- | ---------------------------------------- |
17 | | `.get_articles()` | Returns the article titles of the users. |
18 |
19 | ### Scrape trending articles
20 |
21 | | Methods | Details |
22 | | ----------------- | ------------------------------------------ |
23 | | `.get_trending()` | Returns the trending titles of the medium. |
24 |
25 | ### Scrape publication details
26 |
27 | First, create an object of class `Publication`
28 |
29 | ```python
30 | publication = medium.Publication(link="https://....")
31 | ```
32 |
33 | | Methods | Details |
34 | | ----------------- | ---------------------------------------------------- |
35 | | `.get_articles()` | Returns a list of articles of the given publication. |
36 |
37 | ---
--------------------------------------------------------------------------------
/docs/modules/reddit.md:
--------------------------------------------------------------------------------
1 |
2 | Create an instance of `Reddit` class.
3 |
4 | ```python
5 | posts = Reddit()
6 | ```
7 |
8 | | Methods | Details |
9 | | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------ |
10 | | `.getFeed()` | Returns the posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link |
11 | | `.get_best()` | Returns the best posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link |
12 | | `.get_hot()` | Returns the hot posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link |
13 | | `.get_top()` | Returns the top posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link |
14 | | `.search(topic)` | Returns the top posts with title, subreddit, subreddit avatar, date, vote and comment count and link for a searched topic |
15 |
16 | ---
--------------------------------------------------------------------------------
/docs/modules/spotify.md:
--------------------------------------------------------------------------------
1 |
2 | ### Scrape up songs
3 |
4 | Create an instance of `Spotify` class.
5 |
6 | ```python
7 | scraper = Spotify()
8 | ```
9 |
10 | | Methods | Details |
11 | | ---------------------------- | --------------------------------------------------------- |
12 | | `.scrape_songs_by_keyword()` | Returns the list of songs that are related to the keyword |
13 | | `.scrape_homepage()` | Returns the list of playlists on the homepage |
14 | | `.close()` | To close the chrome tab that is showing results |
15 |
16 | ---
17 |
--------------------------------------------------------------------------------
/docs/modules/stackoverflow.md:
--------------------------------------------------------------------------------
1 | reate an instance of `StackOverflow` class.
2 |
3 | ```python
4 | questions = StackOverflow("topic")
5 | ```
6 |
7 | | Methods | Details |
8 | | --------------------------- | ---------------------------------------------------------------------------------------------------- |
9 | | `.getNewQuestions()` | Returns the new questions, views, votes, answer counts, and descriptions in JSON format |
10 | | `.getActiveQuestions()` | Returns the active questions, views, votes, answer counts, and descriptions in JSON format |
11 | | `.getUnansweredQuestions()` | Returns the unanswered questions, views, votes, answer counts, and descriptions in JSON format |
12 | | `.getBountiedQuestions()` | Returns the bountied questions, views, votes, answer counts, and descriptions in JSON format |
13 | | `.getFrequentQuestions()` | Returns the frequently asked questions, views, votes, answer counts, and descriptions in JSON format |
14 | | `.getHighScoredQuestions()` | Returns the most voted questions, views, votes, answer counts, and descriptions in JSON format |
15 |
16 | ---
17 |
--------------------------------------------------------------------------------
/docs/modules/techcrunch.md:
--------------------------------------------------------------------------------
1 |
2 | ### Scrape articles with title, descriptions, images, author, date and link
3 |
4 | Create an instance of `TechCrunch` class.
5 |
6 | ```python
7 | articles = TechCrunch()
8 | ```
9 |
10 | | Methods | Details |
11 | | ---------------- | ---------------------------------------------------------------------------------------------------------------------- |
12 | | `.getArticles()` | Returns the articles with title, descriptions, images, author, date and link regarding a category in JSON format |
13 | | `.search()` | Returns the searched articles with title, descriptions, images, author, date and link regarding a topic in JSON format |
14 |
15 | ---
--------------------------------------------------------------------------------
/docs/modules/wikipedia.md:
--------------------------------------------------------------------------------
1 | ```python
2 | from scrape_up import Wikipedia
3 | ```
4 |
5 | ## Scrape Wikipedia Details
6 |
7 | Create an object of the 'WikipediaScrapper' class:
8 |
9 | ```python
10 | Scraper = WikipediaScraper()
11 | ```
12 |
13 | | Methods | Details |
14 | | ----------------- | ------------------------------------------------------- |
15 | | `.scrape(url)` | Returns the Scraped Data from Wikipedia |
16 | | `.get_featured()` | Returns the featured article for the day from Wikipedia |
17 |
18 | ---
19 |
--------------------------------------------------------------------------------
/docs/modules/youtube.md:
--------------------------------------------------------------------------------
1 |
2 | ### Scrape Video Details
3 |
4 | Create an instance of `Video` class.
5 |
6 | ```python
7 | video = Video(video_url="video_url")
8 | ```
9 |
10 | | Methods | Details |
11 | | --------------- | ------------------------- |
12 | | `.getDetails()` | Returns the video details |
13 |
14 | ## Scrape Channel Details
15 |
16 | Create an instance of `Channel` class.
17 |
18 | ```python
19 | channel_data = Channel(channel_username="BeABetterDev")
20 | ```
21 |
22 | | Methods | Details |
23 | | ------------------ | ---------------------------------------------------------------------- |
24 | | `.getAbout()` | Returns the channel details mentioned in the about page of the channel |
25 | | `.getVideos()` | Returns all the video details in the videos page of the channel |
26 | | `.get_community()` | Returns all the post details in the community page of the channel |
27 |
28 | ---
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: Scrape Up
2 | site_description: A web-scraping-based python package that enables you to scrape data from various platforms like GitHub, Twitter, Instagram, or any useful website.
3 | site_url: ""
4 | plugins:
5 | - search
6 | nav:
7 | - Scrape Up: index.md
8 | - Contribution: CONTRIBUTING.md
9 | - Installation: installation.md
10 | - Guide: home.md
11 | - Modules:
12 | - 'Academia': modules/academia.md
13 | - 'BBC News': modules/bbc.md
14 | - 'Codechef': modules/codechef.md
15 | - 'Coin Market Cap': modules/coinmarketcap.md
16 | - 'Covid-19': modules/covid-19.md
17 | - 'Crickbuzz': modules/crickbuzz.md
18 | - 'Dribbble': modules/dribbble.md
19 | - 'Ebay': modules/ebay.md
20 | - 'ESPN': modules/espn.md
21 | - 'Eazydiner': modules/eazydinner.md
22 | - 'Flipkart': modules/flipkart.md
23 | - 'Flipkart Clothing': modules/flipkartclothing.md
24 | - 'Flipkart laptops': modules/flipkartlaptop.md
25 | - 'Flyrobu': modules/flyrobu.md
26 | - 'GitHub': modules/github.md
27 | - 'Github Education': modules/githubedu.md
28 | - 'Gitlab': modules/gitlab.md
29 | - 'HackerEarth': modules/HackerEarth.md
30 | - 'Hackernews': modules/Hackernews.md
31 | - 'Hashnode': modules/hashnode.md
32 | - 'Health Grades': modules/healthgrade.md
33 | - 'ICC Rankings': modules/iccranking.md
34 | - 'IMDb': modules/imdb.md
35 | - 'IMDb Box Office': modules/imdb-boxoffice.md
36 | - 'IMDb Indian Movies': modules/imdb-indian.md
37 | - 'imdb-actor': modules/imdb-actor.md
38 | - 'imdb-celebrity': modules/imdb-celeb.md
39 | - 'imdb-movies': modules/imdb-movies.md
40 | # - 'Amazon': modules/modules/amazon.md
41 | # - 'Ask-Ubuntu': modules/ask-ubuntu.md
42 | # - 'Coursera': modules/coursera.md
43 | # - 'Devpost': modules/devpost.md
44 | # - 'Finance': modules/Finance.md
45 | # - 'Flex Jobs': modules/flexjobs.md
46 | # - 'Google News': modules/googlenews.md
47 | # - 'Instagram': modules/instagram.md
48 | # - 'Internshala': modules/internshala.md
49 | # - 'Kooapp': modules/kooapp.md
50 | # - 'Leet Code': modules/leetcode.md
51 | # - 'Luma': modules/luma.md
52 | # - 'Medium': modules/medium.md
53 | # - 'Reddit': modules/reddit.md
54 | # - 'Spotify': modules/spotify.md
55 | # - 'Stack Overflow': modules/stackoverflow.md
56 | # - 'Tech Crunch': modules/techcrunch.md
57 | # - 'Twitter': modules/Twitter.md
58 | # - 'Wikipedia': modules/wikipedia.md
59 | # - 'Youtube': modules/youtube.md- Code of Conduct: CODE_OF_CONDUCT.md
60 |
61 | theme:
62 | features:
63 | - header.autohide
64 | name: material
65 | palette:
66 | - media: '(prefers-color-scheme: dark)'
67 | scheme: default
68 | primary: teal
69 | accent: amber
70 | toggle:
71 | icon: material/lightbulb
72 | name: Switch to dark mode
73 | - media: '(prefers-color-scheme: dark)'
74 | scheme: slate
75 | primary: teal
76 | accent: amber
77 | toggle:
78 | icon: material/lightbulb-outline
79 | name: Switch to light mode
80 | repo_name: /scrape-up
81 | repo_url: https://github.com/Clueless-Community/scrape-up
--------------------------------------------------------------------------------
/project_setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | pip install --upgrade setuptools wheel
6 |
7 |
8 | pip_version=$(pip --version | awk '{print $2}')
9 | required_version="24.0"
10 |
11 | version_greater_equal() {
12 | printf '%s\n%s' "$1" "$2" | sort -C -V
13 | }
14 |
15 | if ! version_greater_equal "$pip_version" "$required_version"; then
16 | echo "Upgrading pip from version $pip_version to $required_version"
17 | pip install --upgrade pip
18 | else
19 | echo "pip is already at version $pip_version, no need to upgrade."
20 | fi
21 |
22 | pip install -r requirements.txt
23 |
24 | pip install .
25 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools",
4 | "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.11.1
2 | bs4==0.0.1
3 | requests==2.28.2
4 | requests-html==0.10.0
5 | mkdocs==1.6.0
6 | mkdocs-material
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = scrape_up
3 | version = 1.1.7
4 | author = Clueless Community
5 | author_email = official.cluelesscommunity@gmail.com
6 | description = A web-scraping-based python package that enables you to scrape data from various platforms.
7 | long_description = file: documentation.md
8 | long_description_content_type = text/markdown
9 | url = https://github.com/Clueless-Community/scrape-up
10 | classifiers =
11 | Programming Language :: Python :: 3
12 | License :: OSI Approved :: MIT License
13 | Operating System :: OS Independent
14 |
15 | [options]
16 | package_dir =
17 | = src
18 | packages = find:
19 | python_requires = >=3.6
20 | install_requires =
21 | bs4
22 | requests
23 | requests-html
24 | beautifulsoup4
25 |
26 | [options.packages.find]
27 | where = src
--------------------------------------------------------------------------------
/src/scrape_up/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Clueless-Community/scrape-up/5a92d9b64d25329035c4afc0ef7f18e49d774997/src/scrape_up/__init__.py
--------------------------------------------------------------------------------
/src/scrape_up/academia/__init__.py:
--------------------------------------------------------------------------------
1 | from .academia import Academia
2 |
3 | __all__ = ["Academia"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/amazon/__init__.py:
--------------------------------------------------------------------------------
1 | from .products import Product
2 |
3 | __all__ = ["Product"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/askubuntu/__init__.py:
--------------------------------------------------------------------------------
1 | from .questions import Questions
2 |
3 | __all__ = ["Questions"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/atcoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .atcoder import Atcoder
2 |
3 | __all__ = ["Atcoder"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/banners/__init__.py:
--------------------------------------------------------------------------------
1 | from .scraper88x31 import Scraper88x31
2 |
3 | __all__ = ["Scraper88x31"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/banners/scraper88x31.py:
--------------------------------------------------------------------------------
1 | import bs4
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class Scraper88x31:
7 | """
8 | Create an instance of the `Scraper88x31` class.
9 | ```python
10 | scraper = Scraper88x31()
11 | ```
12 | | Methods | Details |
13 | | ------------------ | -------------------------------------------------------- |
14 | | `get_all()` | Returns the list of all available 88x31 banners |
15 | """
16 |
17 | def __init__(self, *, config: RequestConfig = RequestConfig()):
18 | self.urls_to_scrape = [
19 | "https://cyber.dabamos.de/88x31/index.html",
20 | "https://cyber.dabamos.de/88x31/index2.html",
21 | "https://cyber.dabamos.de/88x31/index3.html",
22 | "https://cyber.dabamos.de/88x31/index4.html",
23 | "https://cyber.dabamos.de/88x31/index5.html",
24 | ]
25 | self.config = config
26 |
27 | def get_all(self):
28 | """
29 | Class: Scraper88x31
30 | Returns the list of all available 88x31 banners
31 | Example:
32 | ```python
33 | banners = Scraper88x31()
34 | result = banners.get_all()
35 | ```
36 |
37 | Returns:
38 | ```json
39 | ["https://cyber.dabamos.de/88x31/000010.gif", "https://cyber.dabamos.de/88x31/007button.gif", "..."]
40 | ```
41 | """
42 | img_alt = []
43 | for url in self.urls_to_scrape:
44 | try:
45 | response = get(url, self.config)
46 | response.raise_for_status()
47 | source = response.content
48 | soup = bs4.BeautifulSoup(source, "lxml")
49 | for img_tag in soup.find_all("img"):
50 | img_alt.append(
51 | "https://cyber.dabamos.de/88x31/" + img_tag.get("alt") + ".gif"
52 | )
53 | return img_alt
54 | except:
55 | return None
56 |
--------------------------------------------------------------------------------
/src/scrape_up/bayt/__init__.py:
--------------------------------------------------------------------------------
1 | from .bayt import Jobs
2 |
3 | __all__ = ["Jobs"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/bayt/bayt.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import requests
3 | from bs4 import BeautifulSoup
4 |
5 |
6 | class Jobs:
7 | """
8 | Create an instance of the class `Jobs`
9 | ```python
10 | scraper = Jobs()
11 | jobs_data = scraper.fetch_jobs(query, page)
12 | ```
13 | | Methods | Details |
14 | | ----------------------------- | -------------------------------------------------------------------------- |
15 | | `.fetch_jobs(query, page)` | Fetch job listings data from Bayt.com based on the given query and page. |
16 | """
17 |
18 | def __init__(self):
19 | self.base_url = "https://www.bayt.com"
20 |
21 | def fetch_jobs(self, query, page=1):
22 | """
23 | Fetch job listings data from Bayt.com based on the given query and page.
24 |
25 | Parameters:
26 | - `query`: The job search query.
27 | - `page` : The page number of the search results (default: 1).
28 |
29 | Example:
30 | ```python
31 | scraper = Jobs()
32 | jobs_data = scraper.fetch_jobs("software developer", page=1)
33 | ```
34 | """
35 | try:
36 | url = f"{self.base_url}/en/international/jobs/{query}-jobs/?page={page}"
37 | response = requests.get(url)
38 |
39 | response.raise_for_status()
40 |
41 | soup = BeautifulSoup(response.text, "html.parser")
42 | job_listings = soup.find_all("li", class_="has-pointer-d")
43 |
44 | jobs = []
45 | for job in job_listings:
46 | job_info = self.__extract_job_info(job)
47 | if job_info:
48 | jobs.append(job_info)
49 | sys.stdout.reconfigure(encoding="utf-8")
50 | return jobs
51 | except Exception:
52 | return None
53 |
54 | def __extract_job_info(self, job):
55 | """
56 | Extract job information from a single job listing.
57 | """
58 | job_general_information = job.find("h2", class_="jb-title")
59 | if not job_general_information:
60 | return None
61 |
62 | job_title = self.__extract_job_title(job_general_information)
63 | job_url = self.__extract_job_url(job_general_information)
64 | company_name = self.__extract_company_name(job)
65 | job_location = self.__extract_job_location(job)
66 |
67 | return {
68 | "title": job_title,
69 | "company": company_name,
70 | "location": job_location,
71 | "url": job_url,
72 | }
73 |
74 | def __extract_job_title(self, job_general_information):
75 | return job_general_information.text.strip()
76 |
77 | def __extract_job_url(self, job_general_information):
78 | return self.base_url + job_general_information.a["href"].strip()
79 |
80 | def __extract_company_name(self, job):
81 | company_name = job.find("b", class_="jb-company")
82 | if company_name:
83 | return company_name.text.strip()
84 | return None
85 |
86 | def __extract_job_location(self, job):
87 | job_location = job.find("span", class_="jb-loc")
88 | if job_location:
89 | return job_location.text.strip()
90 | return None
91 |
--------------------------------------------------------------------------------
/src/scrape_up/bbcnews/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbcnews import BBCNews
2 |
3 | __all__ = ["BBCNews"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/bbcnews/bbcnews.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class BBCNews:
7 | """
8 | First create an object of class `BBCNews`\n
9 | ```python
10 | scraper = BBCNews()
11 | ```
12 | | Methods | Details |
13 | | ------------------ | -------------------------------------------------------- |
14 | | `.get_headlines()` | Returns the list of object containig the headlines |
15 | | `get_article()` | Returns an object with proper details about the articles |
16 | """
17 |
18 | def __init__(self, *, config: RequestConfig = RequestConfig()):
19 | self.base_url = "https://www.bbc.co.uk"
20 | self.headlines_url = self.base_url + "/news"
21 | headers = {
22 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
23 | }
24 | self.config = config
25 | if self.config.headers == {}:
26 | self.config.set_headers(headers)
27 |
28 | def get_headlines(self):
29 | """
30 | Fetches the latest headlines from BBC News website.\n
31 | Returns:
32 | A list of dictionaries, each containing the index and headline text.
33 | Example: [{'index': 1, 'headline': 'Headline 1'}, {'index': 2, 'headline': 'Headline 2'}, ...]
34 | """
35 | try:
36 | response = get(self.headlines_url, self.config)
37 | response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
38 | except:
39 | return None
40 |
41 | soup = BeautifulSoup(response.content, "html.parser")
42 | headlines = soup.find_all("h3", class_="gs-c-promo-heading__title")
43 | news_set = set()
44 | news_list = []
45 | index = 1
46 |
47 | for headline in headlines:
48 | news_text = headline.get_text(strip=True)
49 | if news_text not in news_set:
50 | news_set.add(news_text)
51 | news_list.append({"index": index, "headline": news_text})
52 | index += 1
53 |
54 | return news_list
55 |
56 | def get_article(self, url: str):
57 | """
58 | Create an instance of the class - `BBCNews`\n
59 | ```python
60 | scraper = BBCNews()
61 | article = scraper.get_article()
62 | print(article)
63 | ```
64 | """
65 | try:
66 | response = get(url, self.config).text
67 | soup = BeautifulSoup(response, "lxml")
68 |
69 | main_heading = soup.find("h1", {"id": "main-heading"}).text.strip()
70 | time = soup.find("time").text.strip()
71 | text_content = soup.find_all("div", {"data-component": "text-block"})
72 | Text = ""
73 | for text in text_content:
74 | Text += text.text.strip() + " "
75 | data = {"main_heading": main_heading, "time": time, "text": Text}
76 | return data
77 | except:
78 | return None
79 |
--------------------------------------------------------------------------------
/src/scrape_up/cars/__init__.py:
--------------------------------------------------------------------------------
1 | from .cars import Cars
2 |
3 | __all__ = ["Cars"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/cars/cars.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | import json
4 |
5 |
6 | class Cars:
7 | """
8 | Create an instance of the class `Cars`
9 | ```python
10 | scraper = Cars()
11 | cars_data = scraper.fetch_cars(model, page)
12 | ```
13 | | Methods | Details |
14 | | ---------------------------| ---------------------------------------------------- |
15 | | `.fetch_cars(model, page)` | Fetch car listings data based on the model and page. |
16 | """
17 |
18 | def __init__(self):
19 | self.base_url = "https://www.cars.com"
20 |
21 | def fetch_cars(self, model, page=1):
22 | """
23 | Fetch car listings data based on the model, and page.
24 |
25 | Parameters:
26 | - `model`: The model of the car.
27 | - `page` : The page number of the search results (default: 1).
28 |
29 | Example:
30 | ```python
31 | scraper = Cars()
32 | cars_data = scraper.fetch_cars("Toyota", page=1)
33 | ```
34 | """
35 | try:
36 | url = f"{self.base_url}/shopping/results/?&keyword={model}&page={page}"
37 | response = requests.get(url)
38 | response.raise_for_status()
39 | soup = BeautifulSoup(response.text, "html.parser")
40 | car_listings = soup.find_all("div", class_="vehicle-card")
41 |
42 | cars = []
43 | for car in car_listings:
44 | car_info = self.__extract_car_info(car)
45 | if car_info:
46 | cars.append(car_info)
47 | return cars
48 | except Exception:
49 | return None
50 |
51 | def __extract_car_info(self, car):
52 | """
53 | Extract car information from a single car listing.
54 | """
55 |
56 | car_model = self.__extract_car_model(car)
57 | car_url = self.__extract_car_url(car)
58 | dealer_name = self.__extract_dealer_name(car)
59 | car_price = self.__extract_car_price(car)
60 | car_discount = self.__extract_car_discount(car)
61 |
62 | return {
63 | "model": car_model,
64 | "dealer": dealer_name,
65 | "price": car_price,
66 | "discount": car_discount,
67 | "url": car_url,
68 | }
69 |
70 | def __extract_car_model(self, car):
71 | return car.find("h2", class_="title").text.strip()
72 |
73 | def __extract_car_url(self, car):
74 | return self.base_url + car.find("a")["href"]
75 |
76 | def __extract_car_price(self, car):
77 | car_price = car.find("span", class_="primary-price").text.strip()
78 | if car_price == "Not Priced":
79 | return None
80 | return car_price
81 |
82 | def __extract_dealer_name(self, car):
83 | dealer_name = car.find("div", class_="dealer-name")
84 | return dealer_name.text.strip() if dealer_name else None
85 |
86 | def __extract_car_discount(self, car):
87 | car_discount = car.find("span", class_="price-drop")
88 | return car_discount.text.strip() if car_discount else None
89 |
--------------------------------------------------------------------------------
/src/scrape_up/codechef/__init__.py:
--------------------------------------------------------------------------------
1 | from .codechef import User
2 |
3 | __all__ = ["User"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/codeforces/__init__.py:
--------------------------------------------------------------------------------
1 | from .user import Users
2 | from .contests import Contest
3 |
4 | __all__ = ["Users", "Contest"]
5 |
--------------------------------------------------------------------------------
/src/scrape_up/codewars/__init__.py:
--------------------------------------------------------------------------------
1 | from .codewars import Codewars
2 |
3 | __all__ = ["Codewars"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/codewars/codewars.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import json
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class Codewars:
7 | """
8 | Create an instance of the class `GeeksforGeeks`
9 | ```py
10 | cwars = Codewars(user="agastya463")
11 | cwars.get_profile()
12 | ```
13 |
14 | | Methods | Details |
15 | | ----------------- | ---------------------------------------------------------------------------------- |
16 | | `.get_profile()` | Returns the user data in json format. |
17 |
18 |
19 | Response:
20 | ```js
21 | {
22 | "Name": "Agastya Kumar Yadav",
23 | "Clan": "Unknown",
24 | "Member Since": "May 2024",
25 | "Last Seen": "May 2024",
26 | "Profiles": "",
27 | "Following": "0",
28 | "Followers": "0",
29 | "Allies": "0",
30 | "Rank": "8 kyu",
31 | "Honor": "3",
32 | "Total Completed Kata": "1",
33 | "Total Languages Trained": "1",
34 | "Highest Trained": "C++ (8 kyu)",
35 | "Most Recent": "C++",
36 | "Comments": "0 (0 replies)",
37 | "Collections": "0",
38 | "Kumite": "0",
39 | "Translations": "0 (0 approved)"
40 | }
41 | ```
42 | """
43 |
44 | def __init__(self, user: str, *, config: RequestConfig = RequestConfig()):
45 | self.user = user
46 | headers = {"User-Agent": "scrapeup"}
47 | self.config = config
48 | if self.config.headers == {}:
49 | self.config.set_headers(headers)
50 |
51 | def get_profile(self):
52 | try:
53 | url = f"https://www.codewars.com/users/{self.user}"
54 | response = get(url, self.config)
55 | soup = BeautifulSoup(response.text, "html.parser")
56 | d = soup.find_all("div", class_="stat")
57 | data = {}
58 | for i in d:
59 | k = i.text.split(":")
60 | data[k[0]] = k[1]
61 | return json.dumps(data)
62 | except Exception:
63 | return None
64 |
--------------------------------------------------------------------------------
/src/scrape_up/coinmarketcap/__init__.py:
--------------------------------------------------------------------------------
1 | from .crypto import Crypto
2 |
3 | __all__ = ["Crypto"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .request_config import RequestConfig, get
2 |
--------------------------------------------------------------------------------
/src/scrape_up/config/request_config.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Union
2 | import requests
3 |
4 |
5 | class RequestConfig:
6 | """
7 | A class used to configure requests.
8 |
9 | Args
10 | ----
11 | timeout: int
12 | The timeout in seconds.
13 | redirect: bool
14 | Whether to follow redirects.
15 | """
16 |
17 | def __init__(
18 | self,
19 | timeout: Union[int, None] = None,
20 | redirect: bool = False,
21 | headers: Dict[str, str] = {},
22 | proxies: Dict[str, str] = {},
23 | ):
24 | self._timeout = timeout
25 | self._redirect = redirect
26 | self._headers = headers
27 | self._proxies = proxies
28 |
29 | def set_timeout(self, timeout: int):
30 | self._timeout = timeout
31 |
32 | def set_redirect(self, redirect: bool):
33 | self._redirect = redirect
34 |
35 | def set_headers(self, headers: Dict[str, str]):
36 | self._headers = headers
37 |
38 | def set_proxies(self, proxies: Dict[str, str]):
39 | self._proxies = proxies
40 |
41 | @property
42 | def timeout(self):
43 | return self._timeout
44 |
45 | @property
46 | def redirect(self):
47 | return self._redirect
48 |
49 | @property
50 | def headers(self):
51 | return self._headers
52 |
53 | @property
54 | def proxies(self):
55 | return self._proxies
56 |
57 |
58 | def get(url: str, config: RequestConfig):
59 | r = requests.get(
60 | url=url,
61 | headers=config.headers,
62 | timeout=config.timeout,
63 | allow_redirects=config.redirect,
64 | proxies=config.proxies,
65 | )
66 | return r
67 |
--------------------------------------------------------------------------------
/src/scrape_up/coursera/__init__.py:
--------------------------------------------------------------------------------
1 | from .courses import Coursera
2 |
3 | __all__ = ["Coursera"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/covidinfo/__init__.py:
--------------------------------------------------------------------------------
1 | from .covidinfo import CovidInfo
2 |
3 | __all__ = {"CovidInfo"}
4 |
--------------------------------------------------------------------------------
/src/scrape_up/cricbuzz/__init__.py:
--------------------------------------------------------------------------------
1 | from .cricbuzz import Cricbuzz
2 |
3 | __all__ = ["Cricbuzz"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/devcommunity/__init__.py:
--------------------------------------------------------------------------------
1 | from .articles import DevCommunity
2 |
3 | __all__ = ["DevCommunity"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/devpost/__init__.py:
--------------------------------------------------------------------------------
1 | from .devpost import Devpost
2 |
3 | __all__ = ["Devpost"]
--------------------------------------------------------------------------------
/src/scrape_up/dictionary/__init__.py:
--------------------------------------------------------------------------------
1 | from .wordoftheday import Dictionary
2 |
3 | __all__ = ["Dictionary"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/dictionary/wordoftheday.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | from urllib.request import urlopen
3 |
4 | from scrape_up.config.request_config import RequestConfig, get
5 |
6 |
7 | class Dictionary:
8 | """
9 | Create an instance of the `Dictionary` class.
10 | ```python
11 | scraper = Dictionary()
12 | ```
13 | | Methods | Details |
14 | | -------------- | -------------------------------------------- |
15 | | `.get_word_of_the_day()` | Returns word of the day from Dictionary.com. |
16 | | `.word_of_the_day_definition()` | Returns the definition of the word of the day. |
17 | """
18 |
19 | def __init__(self, *, config: RequestConfig = RequestConfig()) -> None:
20 | self.config = config
21 |
22 | def __get_word_of_the_day_url(self):
23 | try:
24 | response = get("https://www.dictionary.com/", self.config)
25 | soup = BeautifulSoup(response.text, "html.parser")
26 |
27 | for anchor in soup("button"):
28 | url = anchor.get("data-linkurl", "/")
29 |
30 | if "word-of-the-day" in url:
31 | return url
32 |
33 | except:
34 | return None
35 |
36 | def __word_of_the_day_definition(self):
37 | try:
38 | response = get(self.__get_word_of_the_day_url(), self.config)
39 | soup = BeautifulSoup(response.text, "html.parser")
40 |
41 | for para in soup("p"):
42 | if para.string and para.string[0] not in "EG":
43 | return para.string
44 | except:
45 | return None
46 |
47 | def get_word_of_the_day(self):
48 | """
49 | Returns a string containing the word of the day.
50 |
51 | ```python
52 | scraper = Dictionary()
53 | print(scraper.get_word_of_the_day())
54 | ```
55 |
56 | Sample output:
57 | >> unfalsifiable
58 | """
59 | response = {}
60 | try:
61 | response["word"] = (
62 | self.__get_word_of_the_day_url().split("/")[-2].split("-")[0]
63 | )
64 | response["meaning"] = self.__word_of_the_day_definition()
65 | return response
66 | except:
67 | return None
68 |
--------------------------------------------------------------------------------
/src/scrape_up/dribbble/__init__.py:
--------------------------------------------------------------------------------
1 | from .dribbble import Dribbble
2 |
3 | __all__ = ["Dribbble"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/eazydiner/__init__.py:
--------------------------------------------------------------------------------
1 | from .eazydiner import EazyDiner
2 |
3 | __all__ = ["EazyDiner"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/ebay/__init__.py:
--------------------------------------------------------------------------------
1 | from .ebay import EBAY
2 |
3 | __all__ = ["EBAY"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/espn/__init__.py:
--------------------------------------------------------------------------------
1 | from .espnmodule import ESPN
2 |
3 | __all__ = ["ESPN"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/espncricinfo/__init__.py:
--------------------------------------------------------------------------------
1 | from .espncricinfo import Espncricinfo
2 |
3 | __all__ = ["Espncricinfo"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/fide/__init__.py:
--------------------------------------------------------------------------------
1 | from .fide import FIDE
2 |
3 | __all__ = ["FIDE"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/flexjobs/__init__.py:
--------------------------------------------------------------------------------
1 | from .flexjobs import FlexJobs
2 |
3 | __all__ = ["FlexJobs"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/flipkart/__init__.py:
--------------------------------------------------------------------------------
1 | from .flipkart_clothing import FlipkartClothing
2 | from .flipkart_file import Flipkart
3 | from .flipkart_laptop import FlipkartLaptops
4 |
5 | __all__ = ["FlipkartClothing", "Flipkart", "FlipkartLaptops"]
6 |
--------------------------------------------------------------------------------
/src/scrape_up/flyrobu/__init__.py:
--------------------------------------------------------------------------------
1 | from .flyrobu import Flyrobu
2 |
3 | __all__ = ["Flyrobu"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/geeksforgeeks/__init__.py:
--------------------------------------------------------------------------------
1 | from .geeksforgeeks import Geeksforgeeks
2 |
3 | __all__ = ["Geeksforgeeks"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/github/__init__.py:
--------------------------------------------------------------------------------
1 | from .users import Users
2 | from .repository import Repository
3 | from .issue import Issue
4 | from .organization import Organization
5 | from .pull_request import PullRequest
6 |
7 |
8 | __all__ = ["Users", "Repository", "Issue", "Organization", "PullRequest"]
9 |
--------------------------------------------------------------------------------
/src/scrape_up/github_education/__init__.py:
--------------------------------------------------------------------------------
1 | from .events import Events
2 |
3 | __all__ = ["Events"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/hackerearth/__init__.py:
--------------------------------------------------------------------------------
1 | from .challenges import Challenges
2 |
3 | __all__ = ["Challenges"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/hackernews/__init__.py:
--------------------------------------------------------------------------------
1 | from .articles import Articles
2 |
3 | __all__ = ["Articles"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/hackerrank/__init__.py:
--------------------------------------------------------------------------------
1 | from .user import User
2 | from .contest import Contest
3 |
4 | __all__ = ["HackerRank", "Contest"]
5 |
--------------------------------------------------------------------------------
/src/scrape_up/hashnode/__init__.py:
--------------------------------------------------------------------------------
1 | from .hashnode import Hashnode
2 |
3 | __all__ = ["Hashnode"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/healthgrades/__init__.py:
--------------------------------------------------------------------------------
1 | from .healthgradesmodule import HealthGrades
2 |
3 | __all__ = ["HealthGrades"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/healthgrades/healthgradesmodule.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class HealthGrades:
7 | """
8 | Create an instance of `HealthGrades` class
9 |
10 | ```python
11 | hc = HealthGrades()
12 | ```
13 |
14 | | Method | Details |
15 | | --------------------------- | -------------------------------------------------------------------- |
16 | | `get_best_hospitals(state)` | Fetches and returns information about the best hospitals in a state. |
17 |
18 | """
19 |
20 | def __init__(self, *, config: RequestConfig = RequestConfig()):
21 | headers = {
22 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
23 | }
24 | self.config = config
25 | if self.config.headers == {}:
26 | self.config.set_headers(headers)
27 |
28 | def get_best_hospitals(self, state):
29 | """
30 | Fetches and returns information about the best hospitals in a state.\n
31 | ```python
32 | hc = HealthGrades()
33 | hc.get_best_hospitals(state="bihar")
34 | ```
35 |
36 | Example output:
37 | ```python
38 | [
39 | {
40 | "Name": "ABC Hospital",
41 | "Location": "123 Main St, Philadelphia, PA",
42 | "Link": "https://www.healthgrades.com/hospital/abc-hospital",
43 | "Awards": ["America's 100 Best Hospitals", "Patient Safety Excellence Award"]
44 | },
45 | ...
46 | ]
47 | ```
48 | """
49 | try:
50 | state = state.replace(" ", "-")
51 | url = (
52 | f"https://www.healthgrades.com/quality/americas-best-hospitals/{state}"
53 | )
54 | html_text = get(url, self.config).text
55 | soup = BeautifulSoup(html_text, "lxml")
56 |
57 | hospitals = []
58 | container = soup.find("ul", {"class": "quality-results-group"})
59 |
60 | for items in container.find_all("div", {"class": "quality-card"}):
61 | award = []
62 | title = items.find("h3")
63 | location = items.find("div", {"class": "location-info"})
64 | link = (
65 | "https://www.healthgrades.com"
66 | + items.find("div", {"class": "hospital-info__hospital-link"}).find(
67 | "a", href=True
68 | )["href"]
69 | )
70 | awards = items.find("ul", {"class": "awards-list__quality-award"})
71 | for item in awards.find_all("li"):
72 | award.append(item.text)
73 | data = {
74 | "Name": title.text,
75 | "Location": location.text,
76 | "Link": link,
77 | "Awards": award[:-2],
78 | }
79 | hospitals.append(data)
80 | return hospitals
81 | except:
82 | return None
83 |
--------------------------------------------------------------------------------
/src/scrape_up/icc/__init__.py:
--------------------------------------------------------------------------------
1 | from .icc_rankings import ICC
2 |
3 | __all__ = ["ICC"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/imdb/__init__.py:
--------------------------------------------------------------------------------
1 | from .actor import Actor
2 | from .box_office import BoxOffice
3 | from .celeb import Celeb
4 | from .imdb import IMDB
5 | from .indian_movies import IndianMovies
6 | from .movie import Movie
7 |
8 |
9 | __all__ = ["Actor", "BoxOffice", "Celeb", "IMDB", "IndianMovies", "Movie"]
10 |
--------------------------------------------------------------------------------
/src/scrape_up/imdb/box_office.py:
--------------------------------------------------------------------------------
1 | from urllib.request import Request, urlopen
2 | from bs4 import BeautifulSoup as soup
3 |
4 |
5 | class BoxOffice:
6 | """
7 | Create an instance of `BoxOffice` class.
8 | ```python
9 | boxoffice = BoxOffice()
10 | ```
11 | | Methods | Details |
12 | | -------------------|-------------------------------------------------------------------------------|
13 | | `.top_movies()` | Returns the top box office movies, weekend and total gross and weeks released |
14 |
15 | """
16 |
17 | def __init__(self):
18 | self.__scrape_page()
19 |
20 | def __scrape_page(self):
21 | try:
22 | url = "https://www.imdb.com/chart/boxoffice/?ref_=hm_cht_sm"
23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 |
25 | webpage = urlopen(req).read()
26 | self.page_soup = soup(webpage, "html.parser")
27 |
28 | except:
29 | return None
30 |
31 | def top_movies(self):
32 | """
33 | Create an instance of `BoxOffice` class
34 |
35 | ```python
36 | boxoffice = BoxOffice()
37 | boxoffice.top_movies()
38 | ```
39 |
40 | Return\n
41 | ```js
42 | [
43 | {
44 | "Movie Name": "Barbie",
45 | "Weekend Gross": "$53M",
46 | "Total Gross": "$459M",
47 | "Weeks released": "3"
48 | },
49 | ...
50 | ]
51 |
52 | ```
53 | """
54 | try:
55 | x = self.page_soup.find_all("h3", {"class": "ipc-title__text"})
56 | x = x[1:11]
57 | movie_names = []
58 |
59 | for y in x:
60 | movie_names.append(" ".join(y.get_text().split()[1:]))
61 |
62 | x = self.page_soup.find_all("li", {"class": "sc-ee64acb1-1 lkUVhM"})
63 | x = [y.get_text() for y in x]
64 |
65 | lis = []
66 |
67 | for y in range(0, len(x), 3):
68 | dic = {}
69 | dic["Movie Name"] = movie_names[y // 3]
70 | dic["Weekend Gross"] = x[y].split()[2]
71 | dic["Total Gross"] = x[y + 1].split()[2]
72 | dic["Weeks released"] = x[y + 2].split()[2]
73 | lis.append(dic)
74 |
75 | return lis
76 |
77 | except:
78 | return None
79 |
--------------------------------------------------------------------------------
/src/scrape_up/imdb/celeb.py:
--------------------------------------------------------------------------------
1 | from urllib.request import Request, urlopen
2 | from bs4 import BeautifulSoup as soup
3 |
4 |
5 | class Celeb:
6 | """
7 | Create an instance of `Celeb` class.
8 | ```python
9 | celeb = Celeb()
10 | ```
11 | | Methods | Details |
12 | | -------------------|----------------------------------------------------|
13 | | `.top_celebs()` | Returns the name, roles, famous movie of the celeb |
14 |
15 | """
16 |
17 | def __init__(self):
18 | self.__scrape_page()
19 |
20 | def __scrape_page(self):
21 | try:
22 | url = "https://www.imdb.com/chart/starmeter/?ref_=chtbo_ql_8"
23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 |
25 | webpage = urlopen(req).read()
26 | self.page_soup = soup(webpage, "html.parser")
27 |
28 | except:
29 | return None
30 |
31 | def top_celebs(self):
32 | """
33 | Create an instance of `Celeb` class.\n
34 | ```python
35 | celeb = Celeb()
36 | celeb.top_celebs()
37 | ```
38 | Return\n
39 | ```js
40 | [
41 | {
42 | 'Name': 'Paul Reubens',
43 | 'Roles': ['Actor', 'Writer', 'Director'],
44 | 'Famous Movie': "Pee-wee's Playhouse"
45 | },
46 | ...
47 | ]
48 | ```
49 | """
50 | try:
51 | x = self.page_soup.find_all("div", {"class": "sc-89c756a0-4 euZqVD"})
52 | celeb_list = []
53 | for y in x:
54 | dic = {}
55 | dic["Name"] = y.find("h3", {"class": "ipc-title__text"}).get_text()
56 |
57 | lis = []
58 | for z in y.find_all(
59 | "li", {"class": "ipc-inline-list__item sc-89c756a0-6 jpNWoI"}
60 | ):
61 | lis.append(z.get_text())
62 |
63 | dic["Roles"] = lis
64 |
65 | dic["Famous Movie"] = y.find(
66 | "span", {"class": "sc-1c8554ae-1 cKAFFg"}
67 | ).get_text()
68 |
69 | celeb_list.append(dic)
70 |
71 | return celeb_list
72 |
73 | except:
74 | return None
75 |
--------------------------------------------------------------------------------
/src/scrape_up/imdb/indian_movies.py:
--------------------------------------------------------------------------------
1 | from urllib.request import Request, urlopen
2 | from bs4 import BeautifulSoup as soup
3 |
4 |
5 | class IndianMovies:
6 | """
7 | Create an instance of `IndianMovies` class.
8 | ```python
9 | indianmovies = IndianMovies()
10 | ```
11 | | Methods | Details |
12 | | -----------------------|-----------------------------------------------|
13 | | `.top_indian_movies()` | Returns the current list of top Indian movies |
14 |
15 | """
16 |
17 | def __init__(self):
18 | self.__scrape_page()
19 |
20 | def __scrape_page(self):
21 | try:
22 | url = "https://www.imdb.com/india/top-rated-indian-movies/?ref_=fea_eds_center-1_india_tr_india250_cta"
23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 |
25 | webpage = urlopen(req).read()
26 | self.page_soup = soup(webpage, "html.parser")
27 |
28 | except:
29 | return None
30 |
31 | def top_movies(self):
32 | """
33 | Create an instance of `IndianMovies` class.
34 | ```python
35 | indianmovies = IndianMovies()
36 | movies = indianmovies.top_movies()
37 | ```
38 |
39 | Return\n
40 | ```js
41 | [
42 | 'Ramayana: The Legend of Prince Rama',
43 | 'Rocketry: The Nambi Effect',
44 | 'Nayakan',
45 | 'Gol Maal',
46 | 'Anbe Sivam',
47 | ...
48 | ]
49 | ```
50 | """
51 | try:
52 | x = self.page_soup.find_all("span", {"data-testid": "rank-list-item-title"})
53 |
54 | lis = []
55 | for i in range(len(x)):
56 | lis.append(x[i].get_text()[len(str(i)) :])
57 |
58 | return lis
59 |
60 | except:
61 | return None
62 |
--------------------------------------------------------------------------------
/src/scrape_up/indiantrekking/__init__.py:
--------------------------------------------------------------------------------
1 | from .trek import Indiantrekking
2 |
3 | __all__ = ["Indiantrekking"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/indiantrekking/trek.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import re
3 | import requests
4 |
5 |
6 | class Indiantrekking:
7 | """
8 | A class to scrape data from Indian trekking
9 |
10 | Create an instance of `Indiantrekking` class
11 |
12 | ```python
13 | trek=Indiantrekking("hidden-lakes-of-kashmir")
14 | ```
15 |
16 | | Method | Details |
17 | | --------------------------- | -------------------------------------------------------------------- |
18 | |`destination()` | return name of the place. |
19 | |'trip_fact()' | returns the trip duration, destination, altitude and the season good for trekking |
20 | |'outline_day_to_day_itinerary' | returns the ouline of the day to day itinerary |
21 | ---
22 | """
23 |
24 | def __init__(self, place):
25 | self.place = place
26 | try:
27 | url = f"https://www.indiantrekking.com/{self.place}.html"
28 | response = requests.get(url, headers={"User-Agent": "XY"})
29 | self.soup = BeautifulSoup(response.content, "lxml")
30 | except:
31 | return None
32 |
33 | def destination_name(self):
34 | try:
35 | place = self.soup.find("div", class_="main-title").text
36 | return place
37 | except:
38 | return None
39 |
40 | def trip_fact(self):
41 | try:
42 | trip_duration = self.soup.findAll("div", class_="inner-wrap")[0].b.text
43 | trip_destination = self.soup.findAll("div", class_="inner-wrap")[1].b.text
44 | trip_season = self.soup.findAll("div", class_="inner-wrap")[3].b.text
45 | trip_altitude = self.soup.findAll("div", class_="inner-wrap")[4].b.text
46 |
47 | tripfact = {
48 | "trip_duration": re.sub(" +", " ", trip_duration.strip()),
49 | "trip_destination": re.sub(" +", " ", trip_destination.strip()),
50 | "trip_season": re.sub(" +", " ", trip_season.strip()),
51 | "trip_altitude": re.sub(" +", " ", trip_altitude.strip()),
52 | }
53 | return tripfact
54 | except:
55 | return None
56 |
57 | def outline_day_to_day_itinerary(self):
58 | try:
59 | outline = self.soup.find("div", class_="itinerary").text
60 | return outline
61 | except:
62 | return None
63 |
--------------------------------------------------------------------------------
/src/scrape_up/indiatodayweather/__init__.py:
--------------------------------------------------------------------------------
1 | from weather import Indiatodayweather
2 |
3 | __all__ = ["Indiatodayweather"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/indiatodayweather/weather.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import datetime as dt
3 | from bs4 import BeautifulSoup
4 |
5 |
6 | class Indiatodayweather:
7 | """
8 | A class to scrape weather data from Indian today
9 |
10 | Create an instance of `Indiatodayweather` class
11 |
12 | ```python
13 | weather=Indiatodayweather("Mumbai")
14 | ```
15 |
16 | | Method | Details |
17 | | --------------------------- | ------------------------------------------------------------------------ |
18 | |`info_about_weather()` | return the temperature, wind speed, description(windy, cloudy, clear) and humidity of the place. | |
19 |
20 | ---
21 | """
22 |
23 | def __init__(self, place):
24 | try:
25 | self.place = place
26 | url = (
27 | "https://www.indiatoday.in/weather/"
28 | + self.place
29 | + "-weather-forecast-today"
30 | )
31 | response = requests.get(url, headers={"User-Agent": "XY"})
32 | self.soup = BeautifulSoup(response.content, "lxml")
33 |
34 | except:
35 | return None
36 |
37 | def info_about_weather(self):
38 | try:
39 | temp = self.soup.find("div", class_="wtr_tmp_rhs").text
40 | humid = self.soup.find("span", class_="wtr_crd_ttl").text + " %"
41 | description = self.soup.find("span", class_="wtr_tmp_txt").text
42 | speed = (
43 | self.soup.find("div", class_="wtr_wid_sec crd_three")
44 | .find("span", class_="wtr_crd_ttl")
45 | .text
46 | ) + " km/h"
47 |
48 | weather_info = {
49 | "temperature": temp,
50 | "humidity": humid,
51 | "description": description,
52 | "wind_speed": speed,
53 | }
54 | return weather_info
55 | except:
56 | return None
57 |
--------------------------------------------------------------------------------
/src/scrape_up/instagram/users.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.common.by import By
3 | from selenium.webdriver.support.ui import WebDriverWait
4 | from selenium.webdriver.support import expected_conditions as EC
5 | from webdriver_manager.firefox import GeckoDriverManager
6 | from selenium.webdriver.firefox.service import Service
7 |
8 | driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()))
9 |
10 |
11 | class User:
12 | def __init__(self, username: str):
13 | self.username = username
14 |
15 | def user_details(self):
16 | """
17 | user = User(username=" ")
18 | print(user.user_details())
19 | """
20 | try:
21 | driver.get(f"https://www.instagram.com/{self.username}/")
22 | wait = WebDriverWait(driver, 180)
23 | account_details = wait.until(
24 | EC.presence_of_all_elements_located(
25 | (By.XPATH, '//span[@class="_ac2a"]')
26 | )
27 | )
28 | return {
29 | "Number of Posts:": account_details[0].text,
30 | "Number of Followers:": account_details[1].text,
31 | "Number of Following:": account_details[2].text,
32 | }
33 |
34 | except Exception as e:
35 | message = f"{self.username} not found!"
36 | return {"data": None, "message": message}
37 | finally:
38 | driver.quit()
39 |
--------------------------------------------------------------------------------
/src/scrape_up/lastfm/__init__.py:
--------------------------------------------------------------------------------
1 | from .lastfm import Lastfm
2 |
3 | __all__ = ["Lastfm"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/letterboxd/__init__.py:
--------------------------------------------------------------------------------
1 | from .letterboxd import Letterboxd
2 |
3 | __all__ = ["Letterboxd"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/librarygenesis/__init__.py:
--------------------------------------------------------------------------------
1 | from .library import LibGen
2 |
3 | __all__ = ["LibGen"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/lichess/__init__.py:
--------------------------------------------------------------------------------
1 | from .lichess import LichessGames
2 |
3 | __all__ = ["LichessGames"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/luma/events.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class Events:
7 | """
8 | Create an instance of `Events` class.
9 | ```py
10 | events = Events()
11 | ```
12 | | Methods | Details |
13 | | ------------------ | -------------------------------------------------------------------------------------------------------------------- |
14 | | `.get_events()` | Returns the latest events along with their organizer, location, image url, price and link. |
15 | """
16 |
17 | def __init__(self, *, config: RequestConfig = RequestConfig()):
18 | self.config = config
19 |
20 | def get_events(self):
21 | """
22 | Class - `Events`
23 | Example -
24 | ```python
25 | luma = Events()
26 | events = luma.get_events()
27 | ```
28 | Return
29 | ```js
30 | [
31 | {
32 | 'title': 'Brexfast Club',
33 | 'organizer': 'By Shai Goldman & Alexandra Settlemyer',
34 | 'location': 'Register to See Location',
35 | 'img_url': 'https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=200,height=100/event-covers/gd/45c21ae7-67f6-40c7-8820-1cb57ea14705',
36 | 'price': 'Sold Out',
37 | 'link': 'https://lu.ma//nycaug9'
38 | }
39 | ...
40 | ]
41 | ```
42 | """
43 | url = "https://lu.ma/nyc"
44 | events_data = {"events": []}
45 | try:
46 | res = get(url, self.config)
47 | soup = BeautifulSoup(res.content, "html.parser")
48 | cards = soup.find_all("div", class_="jsx-3249095655 card-wrapper")
49 |
50 | for c in cards:
51 | title = c.find("a")["aria-label"]
52 | base = c.find_all("div", class_="jsx-3575689807 min-width-0")
53 | organizer = base[0].getText()
54 | loc = base[1].getText()
55 | try:
56 | price = c.find("div", class_="jsx-146954525 pill-label").getText()
57 | except:
58 | price = ""
59 | img = c.find(
60 | "div", class_="jsx-4068354093 img-aspect-ratio rounded"
61 | ).find("img")["src"]
62 | link = c.find("a")["href"]
63 | events_data["events"].append(
64 | {
65 | "title": title,
66 | "organizer": organizer,
67 | "location": loc,
68 | "img_url": img,
69 | "price": price,
70 | "link": "https://lu.ma/" + link,
71 | }
72 | )
73 | return events_data["events"]
74 | except:
75 | return None
76 |
--------------------------------------------------------------------------------
/src/scrape_up/medium/publication.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.common.by import By
3 | from selenium.webdriver.chrome.options import Options
4 | import time
5 |
6 |
7 | options = Options()
8 | options.add_argument("--headless")
9 | options.add_argument("--log-level=3")
10 | options.add_experimental_option("excludeSwitches", ["enable-logging"])
11 | driver = webdriver.Chrome(options=options)
12 |
13 |
14 | class Publication:
15 | def __init__(self, link):
16 | self.link = link
17 |
18 | def get_articles(self):
19 | """
20 | Class - `Publication`
21 | Example
22 | ```python
23 | publication = medium.Publication("https://towardsdatascience.com")
24 | articles = publication.get_articles()
25 | for article in articles:
26 | print(article) #For better readability/clarity
27 | ```
28 | Returns the articles of the publication which are arranged in the form of a list
29 | """
30 | try:
31 | articles = []
32 | link = self.link
33 | driver.get(link)
34 | scroll_pause = 0.5
35 | # Get scroll height
36 | last_height = driver.execute_script(
37 | "return document.documentElement.scrollHeight"
38 | )
39 | run_time, max_run_time = 0, 1
40 | while True:
41 | iteration_start = time.time()
42 | # Scroll down to bottom
43 | driver.execute_script(
44 | "window.scrollTo(0, 1000*document.documentElement.scrollHeight);"
45 | )
46 |
47 | # Wait to load page
48 | time.sleep(scroll_pause)
49 |
50 | # Calculate new scroll height and compare with last scroll height
51 | new_height = driver.execute_script(
52 | "return document.documentElement.scrollHeight"
53 | )
54 | scrolled = new_height != last_height
55 | timed_out = run_time >= max_run_time
56 | if scrolled:
57 | run_time = 0
58 | last_height = new_height
59 | elif not scrolled and not timed_out:
60 | run_time += time.time() - iteration_start
61 | elif not scrolled and timed_out:
62 | break
63 | elements = driver.find_elements(By.XPATH, "//h2 | //h3")
64 | for x in elements:
65 | articles.append(x.text)
66 | return articles
67 | except:
68 | return "page/publication not found."
69 |
70 |
71 | # publication = Publication("https://pub.towardsai.net")
72 | # articles = publication.get_articles_list()
73 | # for article in articles:
74 | # print(article)
75 |
--------------------------------------------------------------------------------
/src/scrape_up/medium/trending.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup as bs
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 | headers = {
6 | "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
7 | } # mimics a browser's request
8 |
9 |
10 | class Trending:
11 | def __init__(self, *, config: RequestConfig = RequestConfig()):
12 | self.config = config
13 |
14 | def get_trending(self):
15 | """
16 | Class - `Trending`
17 | Example
18 | ```python
19 | trending = Trending.get_trending()
20 | for trend in trending:
21 | print(trend) #For better readability/clarity
22 | ```
23 | Returns a list of trending titles
24 |
25 | """
26 | try:
27 | titles = []
28 | r = get("https://medium.com/", self.config)
29 | soup = bs(r.text, "html.parser")
30 | elements = soup.select('h2[class^="by j"]')
31 | for x in elements:
32 | titles.append(x.text)
33 | return titles
34 |
35 | except:
36 | return {"data": None, "message": "Something went wrong! Try again!"}
37 |
--------------------------------------------------------------------------------
/src/scrape_up/medium/user.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.common.by import By
3 | from selenium.webdriver.chrome.options import Options
4 | import time
5 |
6 | options = Options()
7 | options.add_argument("--headless")
8 | options.add_argument("--log-level=3")
9 | options.add_experimental_option("excludeSwitches", ["enable-logging"])
10 | driver = webdriver.Chrome(options=options)
11 |
12 |
13 | class User:
14 | def __init__(self, username):
15 | self.username = username
16 |
17 | def get_articles(self):
18 | """
19 | Class `Users`
20 | Example:
21 | ```python
22 | user = medium.User(username='karthikbhandary2')
23 | article_titles = user.get_articles()
24 | for article in article_titles:
25 | print(article) # For better readability/clarity
26 | ```
27 | Returns a list of the titles.
28 | """
29 | try:
30 | titles = []
31 | username = self.username
32 | driver.get(f"https://{username}.medium.com")
33 | scroll_pause = 0.5
34 | # Get scroll height
35 | last_height = driver.execute_script(
36 | "return document.documentElement.scrollHeight"
37 | )
38 | run_time, max_run_time = 0, 1
39 | while True:
40 | iteration_start = time.time()
41 | # Scroll down to bottom
42 | driver.execute_script(
43 | "window.scrollTo(0, 1000*document.documentElement.scrollHeight);"
44 | )
45 |
46 | # Wait to load page
47 | time.sleep(scroll_pause)
48 |
49 | # Calculate new scroll height and compare with last scroll height
50 | new_height = driver.execute_script(
51 | "return document.documentElement.scrollHeight"
52 | )
53 | scrolled = new_height != last_height
54 | timed_out = run_time >= max_run_time
55 | if scrolled:
56 | run_time = 0
57 | last_height = new_height
58 | elif not scrolled and not timed_out:
59 | run_time += time.time() - iteration_start
60 | elif not scrolled and timed_out:
61 | break
62 | elements = driver.find_elements(By.CSS_SELECTOR, "h2")
63 | for x in elements:
64 | titles.append(x.text)
65 | return titles
66 |
67 | except:
68 | return f"{username} not found."
69 |
--------------------------------------------------------------------------------
/src/scrape_up/moneycontrol/equity_mutual_funds.py:
--------------------------------------------------------------------------------
1 | from urllib.request import Request, urlopen
2 | from bs4 import BeautifulSoup as soup
3 |
4 |
5 | class EquityMutualFunds:
6 | """
7 | Create an instance of `EquityMutualFunds` class.
8 | ```python
9 | equitymutualfunds = EquityMutualFunds()
10 | ```
11 | | Methods | Details |
12 | | -------------------------|-------------------------------------------------|
13 | | `.historical_returns` | Returns mutual funds based on historic returns |
14 |
15 | """
16 |
17 | def __init__(self):
18 | self.__scrape_page()
19 |
20 | def __scrape_page(self):
21 | try:
22 | url = "https://www.moneycontrol.com/mutual-funds/best-funds/equity.html"
23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 |
25 | webpage = urlopen(req).read()
26 | self.page_soup = soup(webpage, "html.parser")
27 |
28 | except:
29 | return None
30 |
31 | def historical_returns(self):
32 | """
33 | Create an instance of `EquityMutualFunds` class.
34 |
35 | ```python
36 | equitymutualfunds = EquityMutualFunds()
37 | equitymutualfunds.historical_returns()
38 |
39 | ```
40 | Return\n
41 | ```js
42 | [
43 | 'Motilal Oswal Midcap Fund',
44 | 'Quant Small Cap Fund',
45 | 'UTI Flexi Cap Fund',
46 | ....
47 | ]
48 | ```
49 | """
50 |
51 | try:
52 | L = []
53 | for x in self.page_soup.find_all("a", {"class": "robo_medium"}):
54 | temp = x.get_text().split(" - ")[0]
55 | if temp not in L:
56 | L.append(temp)
57 |
58 | return L
59 |
60 | except:
61 | return None
62 |
--------------------------------------------------------------------------------
/src/scrape_up/moneycontrol/gold.py:
--------------------------------------------------------------------------------
1 | from urllib.request import Request, urlopen
2 | from bs4 import BeautifulSoup as soup
3 |
4 |
5 | class GoldPrice:
6 | """
7 | Create an instance of `GoldPrice` class
8 | ```python
9 | goldprice = GoldPrice()
10 | ```
11 |
12 | | Methods | Details |
13 | | -------------| ----------------------------------------------|
14 | | `.price_22_carat()`| Returns the price of 22k gold prices citywise |
15 | | `.price_24_carat()`| Returns the price of 22k gold prices citywise |
16 |
17 | """
18 |
19 | def __init__(self):
20 | self.__scrape_page()
21 | self.__get_values()
22 |
23 | def __scrape_page(self):
24 | try:
25 | url = "https://www.moneycontrol.com/news/gold-rates-today/"
26 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
27 |
28 | webpage = urlopen(req).read()
29 | self.page_soup = soup(webpage, "html.parser")
30 |
31 | except:
32 | return None
33 |
34 | def __get_values(self):
35 | y = self.page_soup.find_all("td")
36 | y = y[25:-33]
37 | L = []
38 |
39 | for x in y:
40 | L.append(x.get_text())
41 |
42 | self.vals = []
43 | for i in range(0, len(y), 5):
44 | self.vals.append(L[i : i + 5])
45 |
46 | def price_22_carat(self):
47 | """
48 | Create an instance of GoldPrice class
49 | ```python
50 | goldprice = GoldPrice()
51 | goldprice.price_22_carat()
52 | ```
53 | Return\n
54 | ```js
55 | {
56 | "Agra": "₹ 5,610",
57 | "Ahmedabad": "₹ 5,614",
58 | "Andhra pradesh": "₹ 5,550",
59 | "Assam": "₹ 5,655",
60 | "Bangalore": "₹ 5,615",
61 | "Bhilai": "₹ 5,603"
62 | }
63 | ```
64 | """
65 | try:
66 | cities = [x[0] for x in self.vals]
67 | prices = [x[1] for x in self.vals]
68 | return dict(zip(cities, prices))
69 | except:
70 | return None
71 |
72 | def price_24_carat(self):
73 | """
74 | Create an instance of GoldPrice class
75 | ```python
76 | goldprice = GoldPrice()
77 | goldprice.price_24_carat()
78 | ```
79 | Return\n
80 | ```js
81 | {
82 | 'Agra': '₹ 5,891',
83 | 'Ahmedabad': '₹ 5,895',
84 | 'Andhra pradesh': '₹ 5,828',
85 | 'Assam': '₹ 5,938',
86 | 'Bangalore': '₹ 5,896',
87 | 'Bhilai': '₹ 5,883'
88 | }
89 | ```
90 | """
91 | try:
92 | cities = [x[0] for x in self.vals]
93 | prices = [x[3] for x in self.vals]
94 | return dict(zip(cities, prices))
95 | except:
96 | return None
97 |
--------------------------------------------------------------------------------
/src/scrape_up/moneycontrol/silver_prices.py:
--------------------------------------------------------------------------------
1 | from urllib.request import Request, urlopen
2 | from bs4 import BeautifulSoup as soup
3 |
4 |
5 | class SilverPrice:
6 | """
7 | Create an instance of `SilverPrice` class
8 | ```python
9 | silverprice = SilverPrice()
10 | ```
11 |
12 | | Methods | Details |
13 | | -------------------|---------------------------------------------------------------------|
14 | | `.citywise_price()`| Returns the price of silver citywise in rupees |
15 | | `.last_10_days()` | Returns the price of 10 grams silver for the last 10 days in rupees |
16 |
17 | """
18 |
19 | def __init__(self):
20 | self.__scrape_page()
21 |
22 | def __scrape_page(self):
23 | try:
24 | url = "https://www.moneycontrol.com/news/silver-rates-today/"
25 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
26 |
27 | webpage = urlopen(req).read()
28 | self.page_soup = soup(webpage, "html.parser")
29 |
30 | except:
31 | return None
32 |
33 | def citywise_price(self):
34 | """
35 | Create an instance of `SilverPrice` class
36 | ```python
37 | silverprice = SilverPrice()
38 | silverprice.citywise_price()
39 | ```
40 |
41 | Return\n
42 | ```js
43 | {
44 | 'Agra': '81',
45 | 'Ahmedabad': '81',
46 | 'Bangalore': '81',
47 | 'Bhilai': '81',
48 | 'Bhopal': '81'
49 | }
50 | ```
51 | """
52 | try:
53 | x = self.page_soup.find_all("tr")
54 | x = x[7:-12]
55 |
56 | x = [(y.get_text()).split("₹ ")[:-1] for y in x]
57 | keys = [y[0] for y in x]
58 | values = [y[1] for y in x]
59 |
60 | return dict(zip(keys, values))
61 |
62 | except:
63 | return None
64 |
65 | def last_10_days(self):
66 | """
67 | Create an instance of `SilverPrice` class
68 | ```python
69 | silverprice = SilverPrice()
70 | silverprice.citywise_price()
71 | ```
72 |
73 | Return\n
74 | ```js
75 | {
76 | 'Aug 01, 2023': '810',
77 | 'Jul 31, 2023': '800',
78 | 'Jul 30, 2023': '800',
79 | 'Jul 29, 2023': '800',
80 | 'Jul 28, 2023': '795',
81 | 'Jul 26, 2023': '804',
82 | 'Jul 25, 2023': '800',
83 | 'Jul 24, 2023': '805',
84 | 'Jul 23, 2023': '805',
85 | 'Jul 22, 2023': '805'
86 | }
87 | ```
88 | """
89 | try:
90 | x = self.page_soup.find_all("tr")
91 | x = x[-10:]
92 |
93 | x = [(y.get_text()).split("₹ ") for y in x]
94 | keys = [y[0] for y in x]
95 | values = [y[1] for y in x]
96 |
97 | return dict(zip(keys, values))
98 |
99 | except:
100 | return None
101 |
--------------------------------------------------------------------------------
/src/scrape_up/myanimelist/__init__.py:
--------------------------------------------------------------------------------
1 | from .scraper import Anime
2 |
3 | __all__ = ["Anime"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/pinterest/__init__.py:
--------------------------------------------------------------------------------
1 | from .pinterest import Pinterest
2 |
3 | __all__ = ["Pinterest"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/quora/__init__.py:
--------------------------------------------------------------------------------
1 | from .quora import Quora
2 |
3 | __all__ = ["Quora"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/steam/__init__.py:
--------------------------------------------------------------------------------
1 | from .steamscraper import SteamStoreScraper
2 |
3 | __all__ = ["SteamStoreScraper"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/swiggy/__init__.py:
--------------------------------------------------------------------------------
1 | from .swiggy import Swiggy
2 |
3 | __all__ = ["Swiggy"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/sysreqlab/__init__.py:
--------------------------------------------------------------------------------
1 | from find_titles import FindTitles
2 | from requirements import Requirements
3 |
4 |
5 | __all__ = ["FindTitles", "Requirements"]
6 |
--------------------------------------------------------------------------------
/src/scrape_up/sysreqlab/find_titles.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | import requests
3 |
4 |
5 | class FindTitles:
6 | """
7 | Create an instance of `FindTitles` class.
8 |
9 | ```python
10 | titles = sysreqlab.FindTitles(search_term="Call of Duty", search_alphabet="c")
11 | ```
12 |
13 | | Methods | Details |
14 | | -------------------------------- | --------------------------------------------------------------------- |
15 | | `.find_titles(number_of_titles)` | Returns the list of titles based on the search term, search alphabet. |
16 | """
17 |
18 | def __init__(self, search_term: str, search_alphabet: str):
19 | self.search_term = search_term
20 | self.search_alphabet = search_alphabet
21 |
22 | def __scrape_data(self):
23 | try:
24 | url = f"https://www.systemrequirementslab.com/all-games-list/?filter={self.search_alphabet}"
25 | html = requests.get(url)
26 | html.raise_for_status()
27 | return html.text
28 |
29 | except requests.exceptions.RequestException as e:
30 | raise Exception(f"An error occurred while fetching the page: {str(e)}")
31 |
32 | def __parse_page(self):
33 | html = self.__scrape_data()
34 | soup = BeautifulSoup(html, "html.parser")
35 | return soup
36 |
37 | def find_titles(self, number_of_titles: int):
38 | """
39 | Class - `FindTitles`
40 | Example:
41 | ```python
42 | titles = FindTitles(search_term="Call of Duty", search_alphabet="c")
43 | titles = titles.find_titles(5)
44 | ```
45 | Returns a list of titles that match the search term.
46 | """
47 |
48 | try:
49 | soup = self.__parse_page()
50 |
51 | div_elements = soup.find("div", class_="pt-3")
52 | li_elements = div_elements.find_all("li")
53 | all_titles = [title.text.strip() for title in li_elements]
54 |
55 | titles = [
56 | title
57 | for title in all_titles
58 | if self.search_term.lower() in title.lower()
59 | ]
60 |
61 | return titles[:number_of_titles]
62 |
63 | except Exception as e:
64 | raise Exception(f"An error occurred while fetching the titles: {str(e)}")
65 |
--------------------------------------------------------------------------------
/src/scrape_up/thehindu/thehindu.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup as bs
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class TheHindu:
7 | """
8 | Create an object of the 'TheHindu' class\n
9 | ```python
10 | scraper = TheHindu()
11 | ```
12 | | Methods | Details |
13 | | --------------------- | ------------------------------------------------------------------------- |
14 | | `.get_news(page_url)` | gets heading, subheading, time, and news content |
15 | """
16 |
17 | def __init__(self, *, config: RequestConfig = RequestConfig()):
18 | headers = {
19 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
20 | }
21 | self.config = config
22 | if self.config.headers == {}:
23 | self.config.set_headers(headers)
24 |
25 | def get_news(self, page_url):
26 | """
27 | Create an object of the 'TheHindu' class\n
28 | ```python
29 | scraper = TheHindu()
30 | scraper.get_news(page_url="https://www.thehindu.com/news/cities/Delhi/sc-appoints-former-delhi-hc-judge-justice-jayant-nath-as-interim-chairperson-of-power-regulator-derc/article67157713.ece")
31 | ```
32 | Response
33 | ```js
34 | {
35 | "title":"SC appoints former Delhi HC judge Justice Jayant Nath as interim chairperson of power regulator DERC",
36 | "subtitle":"The office of the DERC chairperson has been vacant for over six months",
37 | "last_updated":"August 04, 2023 02:59 pm | Updated 03:11 pm IST - New Delhi",
38 | "news":"The Supreme Court on Friday appointed former Delhi High Court judge, ..."
39 | }
40 | ```
41 | """
42 | try:
43 | page_url = "https://www.thehindu.com/news/cities/Delhi/sc-appoints-former-delhi-hc-judge-justice-jayant-nath-as-interim-chairperson-of-power-regulator-derc/article67157713.ece"
44 | response = get(page_url, self.config).text
45 | soup = bs(response, "lxml")
46 | main_content_box = soup.find("div", {"class": "articlebodycontent"})
47 | news_text = main_content_box.find_all("p")
48 | news = ""
49 | for p in news_text:
50 | if "class" not in str(p):
51 | news += p.text
52 | heading = soup.find("h1", {"class": "title"}).text.strip()
53 | sub_heading = soup.find("h3", {"class": "sub-title"}).text.strip()
54 | last_updated = soup.find("p", {"class": "publish-time"}).text.strip()
55 | news_data = {
56 | "title": heading,
57 | "subtitle": sub_heading,
58 | "last_updated": last_updated,
59 | "news": news,
60 | }
61 | return news_data
62 | except:
63 | return None
64 |
--------------------------------------------------------------------------------
/src/scrape_up/timeanddate/time_zones.py:
--------------------------------------------------------------------------------
1 | from urllib.request import Request, urlopen
2 | from bs4 import BeautifulSoup as soup
3 |
4 |
5 | class Timezones:
6 | """
7 | Create an instance of `Timezones` class.\n
8 | ```python
9 | timezones = Timezones()
10 | ```
11 | | Methods | Details |
12 | | -------------------|--------------------------------------------------|
13 | | `.city_timezones()`| Returns the timezones of cites around the world |
14 |
15 | """
16 |
17 | def __init__(self):
18 | self.__scrape_page()
19 |
20 | def __scrape_page(self):
21 | try:
22 | url = "https://www.timeanddate.com/worldclock/full.html"
23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 |
25 | webpage = urlopen(req).read()
26 | self.page_soup = soup(webpage, "html.parser")
27 |
28 | except:
29 | return None
30 |
31 | def city_timezones(self):
32 | """
33 | Create an instance of `Timezones` class
34 | ```python
35 | timezones = Timezones()
36 | timezones.city_timezones()
37 | ```
38 |
39 | Return\n
40 | ```js
41 | {
42 | "Abidjan": "16.31",
43 | "Gitega": "18.31",
44 | "Oral": "21.31",
45 | "Abu Dhabi": "20.31",
46 | "Grise Fiord *": "12.31",
47 | "Oslo *": "18.31",
48 | "Abuja": "17.31"
49 | }
50 | ```
51 | """
52 | try:
53 | x = self.page_soup.find_all("td")
54 | p = False
55 |
56 | timezones_dict = {}
57 | for y in x[:-1]:
58 | if p == False:
59 | key = y.get_text()
60 | else:
61 | timezones_dict[key] = (y.get_text())[5:]
62 | p = not (p)
63 |
64 | return timezones_dict
65 |
66 | except:
67 | return None
68 |
--------------------------------------------------------------------------------
/src/scrape_up/timesjobs/timesjobs_scraper.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class TimesJobs:
7 | def __init__(self, role: str, *, config: RequestConfig = RequestConfig()):
8 | self.role = role
9 | self.config = config
10 |
11 | def list_jobs(self):
12 | """
13 | Class - `TimesJobs`\n
14 | Example -\n
15 | ```python
16 |
17 | jobs = TimesJobs(role="developer")
18 | jobs.list_jobs()
19 | ```
20 | Return\n
21 | ```python
22 | return
23 | {
24 | "Company": "Name of the comapny",
25 | "Location": "Location at which the company is located",
26 | "Experience": "Experience of the applicants required for that post",
27 | "Posted": "Number of days before which this job was posted on this webiste",
28 | "Apply here": "Link which directly takes you to the Web-page where you can apply for the job"
29 | }
30 | """
31 | try:
32 | spl = self.role.split()
33 | self.role = "%20".join(spl)
34 | except:
35 | return None
36 | try:
37 | url = f"https://m.timesjobs.com/mobile/jobs-search-result.html?txtKeywords={self.role}&txtLocation=India&cboWorkExp1=-1"
38 | response = get(url, self.config)
39 | soup = BeautifulSoup(response.text, "html.parser")
40 | companies = soup.find_all("h4")
41 | experiences = soup.find_all("div", class_="srp-exp")
42 | locations = soup.find_all("div", class_="srp-loc")
43 | days_ago = soup.find_all("span", class_="posting-time")
44 | application_links = soup.find_all("h3")
45 |
46 | job_data = []
47 |
48 | for i in range(len(companies)):
49 | company = companies[i].text
50 | location = locations[i].text
51 | experience = experiences[i].text
52 | days = days_ago[i].text
53 | href_value = application_links[i].a["href"]
54 |
55 | job_info = {
56 | "Company": company,
57 | "Location": location,
58 | "Experience": experience,
59 | "Posted": days,
60 | "Apply here": href_value,
61 | }
62 | job_data.append(job_info)
63 |
64 | return job_data
65 |
66 | except Exception as e:
67 | print("Not possible to webscrape")
68 | return None
69 |
--------------------------------------------------------------------------------
/src/scrape_up/tripadvisor/TripAdvisor.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class TripAdvisor:
7 | """
8 | First, create an object of class `TripAdvisor`
9 |
10 | ```python
11 | hotel = TripAdvisor()
12 | ```
13 |
14 | | Methods | Details |
15 | | ------------------------ | ---------------------------------------------------- |
16 | | `get_details(hotel_url)` | Get the details of a hotel from its TripAdvisor URL. |
17 | """
18 |
19 | def __init__(self, *, config: RequestConfig = RequestConfig()):
20 | headers = {
21 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
22 | }
23 | self.config = config
24 | if self.config.headers == {}:
25 | self.config.set_headers(headers)
26 |
27 | def get_details(self, hotel_url: str):
28 | """
29 | Get the details of a hotel from its TripAdvisor URL.\n
30 | Parameters:- `hotel_url (str)`: The URL of the hotel on TripAdvisor.
31 | ```python
32 | hotel = TripAdvisor()
33 | hotel.get_details()
34 | ```
35 | Returns:
36 | ```js
37 | {
38 | "Rating": "The hotel's rating",
39 | "Experience": "The hotel's experience summary",
40 | "Reviews": "The number of reviews for the hotel",
41 | "Award": "The award received by the hotel, or None if not available",
42 | "Description": "The description of the hotel as a BeautifulSoup Tag",
43 | "Amenities": "List of amenities offered by the hotel"
44 | }
45 | ```
46 | """
47 | try:
48 | url = hotel_url
49 | html_text = get(url, self.config).text
50 | soup = BeautifulSoup(html_text, "lxml")
51 |
52 | container = soup.find("div", {"class": "ppuFV _T Z BB"})
53 |
54 | rating = container.find("span", {"class": "uwJeR P"}).text
55 | experience = container.find("div", {"class": "kkzVG"}).text
56 | reviews = container.find("span", {"class": "hkxYU q Wi z Wc"}).text
57 | award = container.find("div", {"class": "bhYSr P"})
58 | if award:
59 | award = award.text
60 | else:
61 | award = None
62 | description = container.find("div", {"class": "fIrGe _T"}).text
63 | pa = container.find("div", {"class": "OsCbb K"})
64 | amineties = []
65 | for items in pa.find_all("div", {"class": "yplav f ME H3 _c"}):
66 | amineties.append(items.text)
67 |
68 | data = {
69 | "Rating": rating,
70 | "Experience": experience,
71 | "Reviews": reviews,
72 | "Award": award,
73 | "Description": description,
74 | "Amenities": amineties,
75 | }
76 | return data
77 | except:
78 | return None
79 |
--------------------------------------------------------------------------------
/src/scrape_up/twitter/numidconverter.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.chrome.options import Options
3 | from bs4 import BeautifulSoup
4 | import json
5 |
6 |
7 | class TwitterScraper:
8 | def __init__(self):
9 | self.chrome_options = Options()
10 | self.chrome_options.add_argument("--headless")
11 | self.chrome_options.add_argument("--window-size=1920,1080")
12 | self.chrome_options.add_argument("--disable-gpu")
13 | self.chrome_options.add_argument("--no-sandbox")
14 | self.chrome_options.add_argument("--disable-dev-shm-usage")
15 | self.chrome_options.add_argument("--disable-extensions")
16 | self.chrome_options.add_argument("--disable-logging")
17 | self.chrome_options.add_argument("--log-level=3")
18 | self.chrome_options.add_argument("--silent")
19 | self.chrome_options.add_argument("--blink-settings=imagesEnabled=false")
20 |
21 | def unametoid(self, username):
22 | url = "https://twitter.com/{}".format(username)
23 | # print(url)
24 | driver = webdriver.Chrome(options=self.chrome_options)
25 | driver.get(url)
26 |
27 | html = driver.page_source
28 | soup = BeautifulSoup(html, "html.parser")
29 | try:
30 | user_id = soup.find("script", {"data-testid": "UserProfileSchema-test"})
31 | data = json.loads(user_id.string)
32 | # driver.quit()
33 | return {
34 | "data": data["author"]["identifier"],
35 | "message": f"Numerical id found for username {username}",
36 | }
37 | except:
38 | return {
39 | "data": None,
40 | "message": f"Numerical id not found for username {username}",
41 | }
42 |
43 | def idtouname(self, numid):
44 | url = "https://twitter.com/i/user/{}".format(numid)
45 | driver = webdriver.Chrome(options=self.chrome_options)
46 | driver.get(url)
47 | html = driver.page_source
48 | soup = BeautifulSoup(html, "html.parser")
49 | try:
50 | user_id = soup.find("script", {"data-testid": "UserProfileSchema-test"})
51 | data = json.loads(user_id.string)
52 | # driver.quit()
53 | return {
54 | "data": data["author"]["additionalName"],
55 | "message": f"Username found for numerical id {numid}",
56 | }
57 | except:
58 | return {
59 | "data": None,
60 | "message": f"Username not found for numerical id {numid}",
61 | }
62 |
--------------------------------------------------------------------------------
/src/scrape_up/uci/UCI.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class UCI:
7 | """
8 | Create an instance of UCI class
9 | ```python
10 | uci = UCI()
11 | ```
12 | | Methods | Details |
13 | | ------------- | ------------------------------------- |
14 | | `.datasets()` | Fetches datasets information from UCI |
15 | """
16 |
17 | def __init__(self, *, config: RequestConfig = RequestConfig()):
18 | headers = {
19 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
20 | }
21 | self.config = config
22 | if self.config.headers == {}:
23 | self.config.set_headers(headers)
24 |
25 | def datasets(self, number):
26 | """
27 | Get UCI datasets information.\n
28 | Args:
29 | `number (int)`: The number of datasets to fetch. The method fetches datasets in batches of 10.
30 | Example:
31 | ```python
32 | uci = UCI()
33 | datasets_info = uci.datasets(20)
34 | ```
35 | Returns:
36 | ```js
37 | [
38 | {
39 | "Name":"Iris",
40 | "Link":"https://archive.ics.uci.edu//dataset/53/iris",
41 | "Description":"A small classic dataset from Fisher, 1936. One of the earliest datasets used for evaluation of classification methodologies.\n",
42 | "Extra Info":" Classification Multivariate 150 Instances 4 Attributes "
43 | }
44 | ]
45 | ```
46 | """
47 | try:
48 | number = number // 10
49 | dataset = []
50 | for i in range(0, number):
51 | url = "https://archive.ics.uci.edu/datasets?skip={}&take=10&sort=desc&orderBy=NumHits&search=s".format(
52 | i * 10
53 | )
54 | html_text = get(url, self.config).text
55 | soup = BeautifulSoup(html_text, "lxml")
56 |
57 | container = soup.find("div", {"class": "flex flex-col gap-1"})
58 |
59 | for items in container.find_all(
60 | "div", {"class": "rounded-box bg-base-100"}
61 | ):
62 | title = items.find("h2").text
63 | link = (
64 | "https://archive.ics.uci.edu/"
65 | + items.find("a", href=True)["href"]
66 | )
67 | description = items.find("p").text
68 | extra_info = ""
69 | for item in items.find_all(
70 | "div", {"class": "col-span-3 flex items-center gap-2"}
71 | ):
72 | extra_info = extra_info + item.text + " "
73 | data = {
74 | "Name": title,
75 | "Link": link,
76 | "Description": description,
77 | "Extra Info": extra_info,
78 | }
79 | dataset.append(data)
80 | return dataset
81 | except:
82 | return None
83 |
--------------------------------------------------------------------------------
/src/scrape_up/who/WHO.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | from scrape_up.config.request_config import RequestConfig, get
3 |
4 |
5 | class WHO:
6 | """
7 | Create an instance of WHO class.\n
8 | ```python
9 | who = WHO()
10 | ```
11 | | Methods | Details |
12 | | ------------------------------ | ------------------------------------------- |
13 | | `get_disease_outbreak()` | Get Disease Outbreak News from WHO website. |
14 | """
15 |
16 | def __init__(self, *, config: RequestConfig = RequestConfig()):
17 | headers = {
18 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
19 | }
20 | self.config = config
21 | if self.config.headers == {}:
22 | self.config.set_headers(headers)
23 |
24 | def get_disease_outbreak(self, number):
25 | """
26 | Get Disease Outbreak News from WHO website.\n
27 | Parameters: `number` (int): The number of pages (each page contains 10 items).
28 | ```python
29 | who = WHO()
30 | who.get_disease_outbreak()
31 | ```
32 | Returns:
33 | ```js
34 | [
35 | {
36 | "Title":"Circulating vaccine-derived poliovirus type 2 (cVDPV2) - United Republic of Tanzania",
37 | "Date":"28 July 2023 ",
38 | "Link":"https://www.who.int/emergencies/disease-outbreak-news/item/2023-DON480"
39 | }
40 | ...
41 | ]
42 | ```
43 | """
44 |
45 | try:
46 | number = number // 10
47 | DON = []
48 | for i in range(1, number + 1):
49 | url = f"https://www.who.int/emergencies/disease-outbreak-news/{i}"
50 | html_text = get(url, self.config).text
51 | soup = BeautifulSoup(html_text, "lxml")
52 |
53 | container = soup.find("div", {"class": "sf-list-vertical"})
54 |
55 | for items in container.find_all(
56 | "a", {"class": "sf-list-vertical__item"}, href=True
57 | ):
58 | title = items.find("span", {"class": "full-title"})
59 | date = title.findNext()
60 | date = date.text.split("|")[0]
61 | link = items["href"]
62 | data = {"Title": title.text, "Date": date, "Link": link}
63 | DON.append(data)
64 | return DON
65 | except:
66 | return None
67 |
68 |
69 | if __name__ == "__main__":
70 | who = WHO()
71 | print(who.get_disease_outbreak(number=10))
72 |
--------------------------------------------------------------------------------
/src/scrape_up/who/__init__.py:
--------------------------------------------------------------------------------
1 | from .WHO import WHO
2 |
3 | __all__ = ["WHO"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/wikipedia/wikipedia.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 |
3 | from scrape_up.config.request_config import RequestConfig, get
4 |
5 |
6 | class WikipediaScraper:
7 | """
8 | Create an object of the 'WikipediaScrapper' class:
9 |
10 | ```python
11 | Scraper = WikipediaScraper()
12 | ```
13 |
14 | | Methods | Details |
15 | | ----------------- | ------------------------------------------------------- |
16 | | `.scrape(url)` | Returns the Scraped Data from Wikipedia |
17 | | `.get_featured()` | Returns the featured article for the day from Wikipedia |
18 | """
19 |
20 | def __init__(self, *, config: RequestConfig = RequestConfig()):
21 | self.config = config
22 |
23 | def scrape(self, query: str):
24 | try:
25 | URL = f"https://en.wikipedia.org/wiki/{query}"
26 | response = get(URL, self.config)
27 | soup = BeautifulSoup(response.text, "html.parser")
28 |
29 | # Extract the title
30 | title = soup.find(id="firstHeading").text
31 |
32 | # Extract all the headings and their content
33 | sections = soup.find_all("h2")
34 | data = {}
35 | for section in sections:
36 | heading = section.find("span", class_="mw-headline")
37 | if heading:
38 | content = []
39 | next_node = section.find_next_sibling(
40 | ["h2", "h3", "h4", "h5", "h6"]
41 | )
42 | while next_node and next_node.name != "h2":
43 | if next_node.name in ["h3", "h4", "h5", "h6"]:
44 | content.append({"heading": next_node.text.strip()})
45 | elif next_node.name == "p":
46 | content.append({"text": next_node.text.strip()})
47 | next_node = next_node.find_next_sibling(
48 | ["h2", "h3", "h4", "h5", "h6", "p"]
49 | )
50 | data[heading.text] = content
51 |
52 | # Return the data as JSON
53 | result = {"title": title, "sections": data}
54 | return result
55 | except:
56 | return None
57 |
58 | def get_featured(self):
59 | """
60 | Get the featured data from the main page of Wikipedia.
61 |
62 | Returns:
63 | A string containing the featured data from the main page of Wikipedia.
64 | """
65 | try:
66 | url = "https://en.wikipedia.org/wiki/Main_Page"
67 | html_text = requests.get(url).text
68 | soup = BeautifulSoup(html_text, "lxml")
69 |
70 | container = soup.find("div", {"id": "mp-left"})
71 | data = container.find("p").text
72 | return data
73 | except:
74 | return None
75 |
--------------------------------------------------------------------------------
/src/scrape_up/wuzzuf/__init__.py:
--------------------------------------------------------------------------------
1 | from .wuzzuf import Jobs
2 |
3 | __all__ = ["Jobs"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/yellowpages/__init__.py:
--------------------------------------------------------------------------------
1 | from .yellowpages import yellowpages
2 |
3 | __all__ = ["yellowpages"]
4 |
--------------------------------------------------------------------------------
/src/scrape_up/yellowpages/yellowpages.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 |
4 |
5 | class Yellowpages:
6 | """
7 | Create an instance of `yellowpages` class
8 |
9 | ```python
10 | # This will return the list of restaurtants in New York and their information
11 | data = yellowpages("restaurtant", "New York")
12 | ```
13 | | Method | Details |
14 | | ----------------- | ----------------------------------------------------------------- |
15 | | `business_info()` | Returns the list of dictionaries containing business information. |
16 |
17 | """
18 |
19 | def __init__(self, business, place):
20 | self.business = business
21 | self.place = place
22 | self.info = []
23 | try:
24 | url = f"https://www.yellowpages.com/search?search_terms={self.business}&geo_location_terms={self.place}"
25 | response = requests.get(url, headers={"User-Agent": "XY"})
26 | self.soup = BeautifulSoup(response.content, "lxml")
27 |
28 | except:
29 | return None
30 |
31 | def business_info(self):
32 | businesses = self.soup.find_all("div", class_="srp-listing clickable-area mdm")
33 | for item in businesses:
34 | name = item.find("a", class_="business-name").text
35 | address = item.find("div", class_="street-address").text
36 | try:
37 | rating = item.find("div", class_="ratings").text
38 | except:
39 | rating = " "
40 | try:
41 | website = item.find("a", class_="track-visit-website")["href"]
42 | except:
43 | website = " "
44 | try:
45 | phone_no = item.find("div", class_="phones phone primary").text
46 | except:
47 | phone_no = " "
48 | try:
49 | menu = (
50 | "https://www.yellowpages.com"
51 | + item.find("a", class_="menu")["href"]
52 | )
53 | except:
54 | menu = " "
55 | try:
56 | description = item.find("p", class_="body").text
57 | except:
58 | description = " "
59 | try:
60 | amenities = item.find("div", class_="amenities-info").text
61 | except:
62 | amenities = " "
63 | try:
64 | opentime = item.find("div", class_="open-status").text
65 | except:
66 | opentime = " "
67 | businessinfo = {
68 | "name": name,
69 | "address": address,
70 | "rating": rating,
71 | "website": website,
72 | "phone_no": phone_no,
73 | "menu": menu,
74 | "description": description,
75 | "amenities": amenities,
76 | "opentime": opentime,
77 | }
78 | self.info.append(businessinfo)
79 | return self.info
80 |
--------------------------------------------------------------------------------
/src/scrape_up/zomato/__init__.py:
--------------------------------------------------------------------------------
1 | from .zomato import Zomato
2 |
3 | __all__ = ["Zomato"]
4 |
--------------------------------------------------------------------------------
/src/test/academia_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.academia import Academia
3 |
4 |
5 | class TestAcademia(unittest.TestCase):
6 | def setUp(self):
7 | self.academia = Academia()
8 |
9 | def test_get_research_topics(self):
10 | academia = Academia()
11 | result = academia.get_research_topics()
12 | self.assertIsNotNone(result)
13 | self.assertIsInstance(result, list)
14 |
15 | if result is not None:
16 | for topic in result:
17 | self.assertIn("Title", topic)
18 | self.assertIn("Link", topic)
19 | self.assertIn("Number of Articles", topic)
20 | self.assertIn("Followers", topic)
21 |
22 | self.assertIsInstance(topic["Title"], str)
23 | self.assertIsInstance(topic["Link"], str)
24 | self.assertIsInstance(topic["Number of Articles"], str)
25 | self.assertIsInstance(topic["Followers"], str)
26 |
27 | def test_get_research_paper(self):
28 | academia = Academia()
29 | result = academia.get_research_papers(search="Machine Learning")
30 | self.assertIsNotNone(result)
31 | self.assertIsInstance(result, list)
32 |
33 | if result is not None:
34 | for paper in result:
35 | self.assertIn("Title", paper)
36 | self.assertIn("Summary", paper)
37 | self.assertIn("Link", paper)
38 |
39 | self.assertIsInstance(paper["Title"], str)
40 | if paper["Summary"] is not None:
41 | self.assertIsInstance(paper["Summary"], str)
42 | self.assertIsInstance(paper["Link"], str)
43 |
44 |
45 | if __name__ == "__main__":
46 | unittest.main()
47 |
--------------------------------------------------------------------------------
/src/test/amazon_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.amazon import Product
3 |
4 |
5 | class AmazonTest(unittest.TestCase):
6 | def setUp(self):
7 | self.product = Product("Watch")
8 |
9 | def test_get_product(self):
10 | result = self.product.get_product_details()
11 | self.assertIsNotNone(result)
12 | self.assertIsInstance(result, dict)
13 | if result is not None:
14 | self.assertIn("data", result)
15 | self.assertIn("message", result)
16 | self.assertIsNotNone(result["data"], str)
17 | if result["data"] is not None:
18 | self.assertIsInstance(result["data"], str)
19 | if result["message"] is not None:
20 | self.assertIsInstance(result["message"], str)
21 |
22 | def test_get_product_details(self):
23 | result = self.product.get_product_details()
24 | self.assertIsNotNone(result)
25 | self.assertIsInstance(result, dict)
26 | if result is not None:
27 | self.assertIn("data", result)
28 | self.assertIn("message", result)
29 | self.assertIsNotNone(result["data"], str)
30 | if result["data"] is not None:
31 | self.assertIsInstance(result["data"], str)
32 | if result["message"] is not None:
33 | self.assertIsInstance(result["message"], str)
34 |
35 | def test_get_product_image(self):
36 | result = self.product.get_product_details()
37 | self.assertIsNotNone(result)
38 | self.assertIsInstance(result, dict)
39 | if result is not None:
40 | self.assertIn("data", result)
41 | self.assertIn("message", result)
42 | self.assertIsNotNone(result["data"], str)
43 | if result["data"] is not None:
44 | self.assertIsInstance(result["data"], str)
45 | if result["message"] is not None:
46 | self.assertIsInstance(result["message"], str)
47 |
48 | def test_customer_review(self):
49 | result = self.product.get_product_details()
50 | self.assertIsNotNone(result)
51 | self.assertIsInstance(result, dict)
52 | if result is not None:
53 | self.assertIn("data", result)
54 | self.assertIn("message", result)
55 | self.assertIsNotNone(result["data"], str)
56 | if result["data"] is not None:
57 | self.assertIsInstance(result["data"], str)
58 | if result["message"] is not None:
59 | self.assertIsInstance(result["message"], str)
60 |
61 |
62 | if __name__ == "__main__":
63 | unittest.main()
64 |
--------------------------------------------------------------------------------
/src/test/banners_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from src.scrape_up.banners import Scraper88x31
3 |
4 |
5 | class TestScraper88x31(unittest.TestCase):
6 | def setUp(self):
7 | """
8 | Initialize a Scraper88x31 instance before each test method.
9 | """
10 | self.scraper = Scraper88x31()
11 |
12 | def test_get_all(self):
13 | """
14 | | Methods | Details |
15 | | ------------------ | -------------------------------------------------------- |
16 | | `get_all()` | Returns the list of all available 88x31 banners |
17 | """
18 | try:
19 | banners = self.scraper.get_all()
20 |
21 | # Check if banners is a list of URLs
22 | self.assertIsInstance(banners, list)
23 | for banner in banners:
24 | self.assertIsInstance(banner, str)
25 | self.assertTrue(banner.startswith("https://cyber.dabamos.de/88x31/"))
26 | self.assertTrue(banner.endswith(".gif"))
27 | except:
28 | return None
29 |
30 |
31 | if __name__ == "__main__":
32 | unittest.main()
33 |
--------------------------------------------------------------------------------
/src/test/bayt_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.bayt import Jobs
3 |
4 |
5 | class TestJobs(unittest.TestCase):
6 | """
7 | | Methods | Details |
8 | | ----------------------------- | -------------------------------------------------------------------------- |
9 | | `.fetch_jobs(query, page)` | Fetch job listings data from Bayt.com based on the given query and page. |
10 | """
11 |
12 | def setUp(self):
13 | """
14 | Initialize an instance of the Jobs class before each test.
15 | """
16 | self.scraper = Jobs()
17 | self.query = "software developer"
18 | self.page = 1
19 |
20 | def test_fetch_jobs(self):
21 | """
22 | Test the fetch_jobs method.
23 | """
24 | try:
25 | jobs_data = self.scraper.fetch_jobs(self.query, self.page)
26 | self.assertIsNotNone(jobs_data, "Failed to fetch job listings")
27 | self.assertIsInstance(jobs_data, list, "Job listings should be a list")
28 | self.assertGreater(len(jobs_data), 0, "Job listings should not be empty")
29 |
30 | # Check the structure of the first job listing
31 | job = jobs_data[0]
32 | expected_keys = ["title", "company", "location", "url"]
33 | for key in expected_keys:
34 | self.assertIn(key, job, f"Missing expected key: {key}")
35 | self.assertIsInstance(job[key], str, f"{key} should be a string")
36 |
37 | except:
38 | return None
39 |
40 | def test_extract_job_info(self):
41 | """
42 | Test the __extract_job_info method indirectly by testing fetch_jobs.
43 | """
44 | try:
45 | jobs_data = self.scraper.fetch_jobs(self.query, self.page)
46 | self.assertIsNotNone(jobs_data, "Failed to fetch job listings")
47 | self.assertGreater(len(jobs_data), 0, "Job listings should not be empty")
48 |
49 | # Check the first job listing details
50 | job = jobs_data[0]
51 | self.assertIn("title", job, "Job should have a title")
52 | self.assertIn("company", job, "Job should have a company name")
53 | self.assertIn("location", job, "Job should have a location")
54 | self.assertIn("url", job, "Job should have a URL")
55 |
56 | # Ensure that none of the fields are empty
57 | self.assertNotEqual(job["title"], "", "Job title should not be empty")
58 | self.assertNotEqual(job["url"], "", "Job URL should not be empty")
59 |
60 | except:
61 | return None
62 |
63 |
64 | if __name__ == "__main__":
65 | unittest.main()
66 |
--------------------------------------------------------------------------------
/src/test/bbc_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.bbcnews import BBCNews
3 |
4 |
5 | class TestBBCNews(unittest.TestCase):
6 | """
7 | | Methods | Details |
8 | | ------------------ | -------------------------------------------------------- |
9 | | `.get_headlines()` | Returns the list of object containig the headlines |
10 | | `get_article()` | Returns an object with proper details about the articles |
11 |
12 | """
13 |
14 | def setUp(self):
15 | """
16 | Initialize a BBCNews instance before each test method.
17 | """
18 | self.bbc_scraper = BBCNews()
19 |
20 | def test_get_headlines(self):
21 | """
22 | Testing the get_headlines() method.
23 | """
24 | try:
25 | headlines = self.bbc_scraper.get_headlines()
26 |
27 | # Check if headlines is a list of dictionaries
28 | if headlines is not None:
29 | self.assertIsInstance(headlines, list)
30 | for headline in headlines:
31 | self.assertIsInstance(headline, dict)
32 | self.assertIn("index", headline)
33 | self.assertIn("headline", headline)
34 |
35 | # Check if all headlines have unique indices
36 | indices = {headline["index"] for headline in headlines}
37 | self.assertEqual(
38 | len(indices), len(headlines), "Duplicate indices found in headlines"
39 | )
40 | # Check if headlines list is not empty
41 | self.assertGreater(len(headlines), 0, "No headlines extracted")
42 | except:
43 | return None
44 |
45 | def test_get_article(self):
46 | """
47 | Testing the get_article(url) method.
48 | """
49 | try:
50 | valid_url = "https://www.bbc.co.uk/news/world-europe-61258011" # Test with a valid article URL
51 | article = self.bbc_scraper.get_article(valid_url)
52 |
53 | if article is not None:
54 | self.assertIsInstance(
55 | article, dict
56 | ) # Check if article is a dictionary or not
57 | self.assertIn(
58 | "main_heading", article
59 | ) # Does it contain main_heading or not
60 | self.assertIn("time", article) # Does it contain time or not
61 | self.assertIn("text", article) # Does it contain text or not
62 |
63 | invalid_url = "https://www.bbc.co.uk/news/non-existent-article" # Test with an invalid article URL
64 | invalid_article = self.bbc_scraper.get_article(
65 | invalid_url
66 | ) # Should return None
67 | self.assertIsNone(invalid_article, "Invalid URL should return None")
68 | except:
69 | return None
70 |
71 |
72 | if __name__ == "__main__":
73 | unittest.main()
74 |
--------------------------------------------------------------------------------
/src/test/codechef_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up import codechef
3 |
4 |
5 | class CodeChefTest(unittest.TestCase):
6 | """
7 | CodeChef module test.\n
8 | | Methods | Details |
9 | | --------------- | ---------------------------------------------------------------- |
10 | | `get_profile()` | Returns name, username, profile_image_link, rating, details etc. |
11 | """
12 |
13 | def test_get_profile(self):
14 | instance = codechef.User(id="heltion")
15 | method_response = instance.get_profile()
16 |
17 | self.assertEqual(
18 | list(method_response.keys()),
19 | ["name", "username", "profile_image_link", "rating", "details"],
20 | "Codechef:get_profile - keys mismatch",
21 | )
22 |
23 |
24 | if __name__ == "__main__":
25 | unittest.main()
26 |
--------------------------------------------------------------------------------
/src/test/coinmarketcap_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up import coinmarketcap
3 |
4 | # sys.path.insert(0, "..")
5 |
6 |
7 | class CoinMarketCapTest(unittest.TestCase):
8 | """
9 | CoinMarketCap module test.\n
10 | | Method | Details |
11 | | ---------------------------- | -------------------------------------------------------- |
12 | | `get_top_cryptocurrencies()` | Fetches and returns data about the top cryptocurrencies. |
13 | """
14 |
15 | def test_get_top_cryptocurrencies(self):
16 | instance = coinmarketcap.Crypto()
17 | top_cryptocurrencies = instance.get_top_cryptocurrencies()
18 |
19 | self.assertIsInstance(top_cryptocurrencies, list)
20 |
21 | for item in top_cryptocurrencies:
22 | self.assertIsInstance(item, dict)
23 |
24 | self.assertEqual(
25 | list(item.keys()),
26 | [
27 | "Name",
28 | "Symbol",
29 | "Link",
30 | "Price",
31 | "1h%",
32 | "24h%",
33 | "7d%",
34 | "MarketCap",
35 | "Volume(24h)",
36 | "Circulating Supply",
37 | ],
38 | )
39 |
40 | for value in item.values():
41 | self.assertIsInstance(value, str)
42 |
43 |
44 | if __name__ == "__main__":
45 | unittest.main()
46 |
--------------------------------------------------------------------------------
/src/test/coursera_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.coursera import Coursera
3 |
4 |
5 | class TestCoursera(unittest.TestCase):
6 | def setUp(self, topic="Machine Learning"):
7 | self.scraper = Coursera(topic)
8 |
9 | def test_get_courses(self):
10 | result = self.scraper.get_courses()
11 | self.assertIsNotNone(result)
12 | self.assertIsInstance(result, list)
13 |
14 | if result is not None:
15 | for topic in result:
16 | self.assertIn("title", topic)
17 | self.assertIn("taught_by", topic)
18 | self.assertIn("skills", topic)
19 | self.assertIn("rating", topic)
20 | self.assertIn("review_count", topic)
21 | self.assertIn("img_url", topic)
22 | self.assertIn("link", topic)
23 |
24 | self.assertIsInstance(topic["title"], str)
25 | self.assertIsInstance(topic["taught_by"], str)
26 | self.assertIsInstance(topic["skills"], str)
27 | self.assertIsInstance(topic["rating"], str)
28 | self.assertIsInstance(topic["review_count"], str)
29 | self.assertIsInstance(topic["img_url"], str)
30 | self.assertIsInstance(topic["link"], str)
31 |
32 | def test_fetch_modules_with_modules(self):
33 | result = self.scraper.fetch_modules(course="Machine Learning with Python")
34 | self.assertIsNotNone(result)
35 | self.assertIsInstance(result, dict)
36 |
37 | if result is not None:
38 | for key, value in result.items():
39 | self.assertIsInstance(value, str)
40 |
41 | def test_fetch_modules_with_specializations(self):
42 | result = self.scraper.fetch_modules(course="Machine Learning")
43 | self.assertIsNotNone(result)
44 | self.assertIsInstance(result, dict)
45 |
46 | if result is not None:
47 | for key, value in result.items():
48 | self.assertIsInstance(value, dict)
49 | self.assertIn("Title", value)
50 | self.assertIn("Link", value)
51 | self.assertIsInstance(value["Title"], str)
52 | self.assertIsInstance(value["Link"], str)
53 |
54 |
55 | if __name__ == "__main__":
56 | unittest.main()
57 |
--------------------------------------------------------------------------------
/src/test/covidinfo_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.covidinfo import covidinfo
3 |
4 |
5 | class CovidInfoTest(unittest.TestCase):
6 | def setUp(self):
7 | self.instance = covidinfo.CovidInfo()
8 |
9 | """
10 | CovidInfo module test.\n
11 | | Methods | Details |
12 | | --------------------------- | ---------------------------------------------------------------------------------------------------- |
13 | | `.covid_data()` | Returns the list of all the covid data scraped from the website |
14 | | `.total_cases()` | Returns the count of total covid cases all over the world |
15 | | `.total_deaths()` | Returns the count of deaths covid cases all over the world |
16 | | `.total_recovered()` | Returns the count of recovered covid cases all over the world |
17 | | `.latest_news()` | Return the lastest news of the day |
18 | """
19 |
20 | def test_covid_data(self):
21 | covid_data_response = self.instance.covid_data()
22 | self.assertIsInstance(covid_data_response, list)
23 | if covid_data_response is not None:
24 | for data in covid_data_response:
25 | self.assertIsInstance(data, dict)
26 | self.assertIn("Country", data)
27 | self.assertIn("Number of Cases", data)
28 | self.assertIn("Deaths", data)
29 | self.assertIn("Continent", data)
30 | self.assertIsInstance(data["Country"], str)
31 | self.assertIsInstance(data["Number of Cases"], int)
32 | self.assertIsInstance(data["Deaths"], int)
33 | self.assertIsInstance(data["Continent"], str)
34 |
35 | def test_total_cases(self):
36 | total_cases_response = self.instance.total_cases()
37 | self.assertIsInstance(total_cases_response, str)
38 |
39 | def test_total_deaths(self):
40 | total_deaths_response = self.instance.total_deaths()
41 | self.assertIsInstance(total_deaths_response, str)
42 |
43 | def test_total_recovered(self):
44 | test_total_response = self.instance.total_recovered()
45 | self.assertIsInstance(test_total_response, dict)
46 |
47 | def test_latest_news(self):
48 | latest_news_response = self.instance.latest_news()
49 | self.assertIsInstance(latest_news_response, (list, type(None)))
50 | if latest_news_response is not None:
51 | for news in latest_news_response:
52 | self.assertIsInstance(news, dict)
53 | self.assertIn("news", news)
54 | self.assertIn("source", news)
55 | self.assertIsInstance(news["news"], str)
56 | self.assertIsInstance(news["source"], str)
57 |
58 |
59 | if __name__ == "__main__":
60 | unittest.main()
61 |
--------------------------------------------------------------------------------
/src/test/eazydiner_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import json
3 | from scrape_up.eazydiner import EazyDiner
4 |
5 |
6 | class EazyDinerTest(unittest.TestCase):
7 | """
8 | EazyDiner class test.\n
9 | | Methods | Details |
10 | | ------------------------ | ---------------------------------------------------------------- |
11 | | `.get_restaurants()` | Tests the get_restaurants() method of the EazyDiner class |
12 | | `.get_breakfast()` | Tests the get_breakfast() method of the EazyDiner class |
13 | | `.get_lunch()` | Tests the get_lunch() method of the EazyDiner class |
14 | | `.get_dinner()` | Tests the get_dinner() method of the EazyDiner class |
15 | | `.dinner_with_discount()`| Tests the dinner_with_discount() method of the EazyDiner class |
16 | | `.get_top10()` | Tests the get_top10() method of the EazyDiner class |
17 | """
18 |
19 | def assert_response_keys(self, response, expected_keys):
20 | if isinstance(response, str):
21 | response_dict = json.loads(response)
22 |
23 | for key in expected_keys:
24 | self.assertTrue(
25 | key in response_dict, f"Key '{key}' is missing in the response."
26 | )
27 |
28 | def test_get_restaurants(self):
29 | eazydiner = EazyDiner(
30 | location="Delhi NCR"
31 | ) # Replace with an appropriate location
32 | restaurants = eazydiner.get_restaurants()
33 | self.assertIsInstance(restaurants, str)
34 | self.assert_response_keys(restaurants, ["restaurants"])
35 |
36 | def test_get_breakfast(self):
37 | eazydiner = EazyDiner(
38 | location="Delhi NCR"
39 | ) # Replace with an appropriate location
40 | breakfast = eazydiner.get_breakfast()
41 | self.assertIsInstance(breakfast, str)
42 | self.assert_response_keys(breakfast, ["restaurants"])
43 |
44 | def test_get_lunch(self):
45 | eazydiner = EazyDiner(
46 | location="Delhi NCR"
47 | ) # Replace with an appropriate location
48 | lunch = eazydiner.get_lunch()
49 | self.assertIsInstance(lunch, str)
50 | self.assert_response_keys(lunch, ["restaurants"])
51 |
52 | def test_get_dinner(self):
53 | eazydiner = EazyDiner(
54 | location="Delhi NCR"
55 | ) # Replace with an appropriate location
56 | dinner = eazydiner.get_dinner()
57 | self.assertIsInstance(dinner, str)
58 | self.assert_response_keys(dinner, ["restaurants"])
59 |
60 | def test_dinner_with_discount(self):
61 | eazydiner = EazyDiner(
62 | location="Delhi NCR"
63 | ) # Replace with an appropriate location
64 | dinner_discount = eazydiner.dinner_with_discount()
65 | self.assertIsInstance(dinner_discount, list)
66 |
67 | def test_get_top10(self):
68 | eazydiner = EazyDiner(
69 | location="Delhi NCR"
70 | ) # Replace with an appropriate location
71 | top10 = eazydiner.get_top10()
72 | self.assertIsInstance(top10, dict)
73 |
74 |
75 | if __name__ == "__main__":
76 | unittest.main()
77 |
--------------------------------------------------------------------------------
/src/test/ebay_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.ebay import EBAY
3 |
4 |
5 | class eBayTest(unittest.TestCase):
6 | """
7 | eBay module test.\n
8 | | Methods | Details |
9 | | ------------------- | ----------------------------------- |
10 | | `spotlights()` | Returns spotlight deals on EBAY. |
11 | | `featured()` | Returns the featured deals on EBAY. |
12 | | `specific_deals()` | Returns the specific deals on EBAY. |
13 | """
14 |
15 | def setUp(self):
16 | self.instance = EBAY()
17 |
18 | def test_spotlights(self):
19 | spotlights = self.instance.spotlights()
20 |
21 | self.assertIsNotNone(spotlights)
22 | self.assertIsInstance(spotlights, dict)
23 | self.assertEqual(
24 | list(spotlights.keys()), ["Description", "Product", "Price", "Link"]
25 | )
26 |
27 | for value in spotlights.values():
28 | self.assertIsInstance(value, str)
29 |
30 | def test_featured(self):
31 | featured = self.instance.featured()
32 |
33 | self.assertIsNotNone(featured)
34 | self.assertIsInstance(featured, list)
35 |
36 | for item in featured:
37 | self.assertIsInstance(item, dict)
38 | self.assertEqual(list(item.keys()), ["Product", "Price", "Link"])
39 |
40 | for value in item.values():
41 | self.assertIsInstance(value, str)
42 |
43 | def test_specific_deals(self):
44 | specific_deals = self.instance.specific_deals()
45 |
46 | self.assertIsNotNone(specific_deals)
47 | self.assertIsInstance(specific_deals, list)
48 |
49 | for item in specific_deals:
50 | self.assertIsInstance(item, dict)
51 | self.assertEqual(list(item.keys()), ["Product", "Price", "Link"])
52 |
53 | for value in item.values():
54 | self.assertIsInstance(value, str)
55 |
56 |
57 | if __name__ == "__main__":
58 | unittest.main()
59 |
--------------------------------------------------------------------------------
/src/test/espncricinfo_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.espncricinfo import Espncricinfo
3 |
4 |
5 | class ESPNTest(unittest.TestCase):
6 | def test_connection(self):
7 | instance = Espncricinfo()
8 | self.assertTrue(
9 | instance,
10 | "ESPN:__init__ - connection failed",
11 | )
12 |
13 | def test_get_news(self):
14 | instance = Espncricinfo()
15 | method_response = instance.get_news()
16 |
17 | self.assertIsInstance(
18 | method_response,
19 | list,
20 | "ESPN:get_news - invalid response",
21 | )
22 |
23 | def test_get_livescores(self):
24 | instance = Espncricinfo()
25 | method_response = instance.get_livescores()
26 |
27 | self.assertIsInstance(
28 | method_response,
29 | list,
30 | "ESPN:get_livescores - invalid response",
31 | )
32 |
33 |
34 | if __name__ == "__main__":
35 | unittest.main()
36 |
--------------------------------------------------------------------------------
/src/test/fide_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.fide import FIDE
3 |
4 |
5 | class FIDETest(unittest.TestCase):
6 | """
7 | Tests for the FIDE class in the fide module.
8 | | Methods | Details |
9 | | ------------------------- | -------------------------------------------------- |
10 | | `.get_events()` | Returns all the major chess events of 2024. |
11 | | `.get_open_ratings()` | Returns a list of top 100 open category players. |
12 | | `.get_women_ratings()` | Returns a list of top 100 women category players. |
13 | | `.get_juniors_ratings()` | Returns a list of top 100 juniors category players.|
14 | | `.get_girls_ratings()` | Returns a list of top 100 girls category players. |
15 | | `.get_news()` | Returns a list of top chess/fide news. |
16 | """
17 |
18 | def test_connection(self):
19 | instance = FIDE()
20 | self.assertTrue(
21 | instance,
22 | "FIDE:__init__ - connection failed",
23 | )
24 |
25 | def test_get_events(self):
26 | instance = FIDE()
27 | method_response = instance.get_events()
28 |
29 | self.assertIsInstance(
30 | method_response,
31 | list,
32 | "FIDE:get_events - invalid response",
33 | )
34 |
35 | def test_get_open_ratings(self):
36 | instance = FIDE()
37 | method_response = instance.get_open_ratings()
38 |
39 | self.assertIsInstance(
40 | method_response,
41 | list,
42 | "FIDE:get_open_ratings - invalid response",
43 | )
44 |
45 | def test_get_women_ratings(self):
46 | instance = FIDE()
47 | method_response = instance.get_women_ratings()
48 |
49 | self.assertIsInstance(
50 | method_response,
51 | list,
52 | "FIDE:get_women_ratings - invalid response",
53 | )
54 |
55 | def test_get_juniors_ratings(self):
56 | instance = FIDE()
57 | method_response = instance.get_juniors_ratings()
58 |
59 | self.assertIsInstance(
60 | method_response,
61 | list,
62 | "FIDE:get_juniors_ratings - invalid response",
63 | )
64 |
65 | def test_get_girls_ratings(self):
66 | instance = FIDE()
67 | method_response = instance.get_girls_ratings()
68 |
69 | self.assertIsInstance(
70 | method_response,
71 | list,
72 | "FIDE:get_girls_ratings - invalid response",
73 | )
74 |
75 | def test_get_news(self):
76 | instance = FIDE()
77 | method_response = instance.get_news()
78 |
79 | self.assertIsInstance(
80 | method_response,
81 | list,
82 | "FIDE:get_news - invalid response",
83 | )
84 |
85 |
86 | if __name__ == "__main__":
87 | unittest.main()
88 |
--------------------------------------------------------------------------------
/src/test/flexjobs_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from scrape_up.flexjobs import FlexJobs
4 |
5 |
6 | class TestFlexJobs(unittest.TestCase):
7 | def test_get_jobs_with_valid_search_query(self):
8 | flexjobs = FlexJobs("python developer")
9 | jobs = flexjobs.get_jobs()
10 | self.assertTrue(len(jobs) > 0, "No jobs found for valid search query")
11 |
12 | def test_get_jobs_with_location_query(self):
13 | flexjobs = FlexJobs("python developer", "New York")
14 | jobs = flexjobs.get_jobs()
15 | self.assertTrue(len(jobs) > 0, "No jobs found for valid location query")
16 |
17 | def test_get_jobs_with_min_jobs_limit(self):
18 | flexjobs = FlexJobs("python developer", min_jobs=5)
19 | jobs = flexjobs.get_jobs()
20 | self.assertTrue(
21 | len(jobs) >= 5, "Number of jobs retrieved exceeds max jobs limit"
22 | )
23 |
24 |
25 | if __name__ == "__main__":
26 | unittest.main()
27 |
--------------------------------------------------------------------------------
/src/test/geeksforgeeks_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.geeksforgeeks import Geeksforgeeks
3 | import json
4 |
5 |
6 | class GeeksforgeeksTest(unittest.TestCase):
7 | """
8 | Geeksforgeeks module test.
9 | | Methods | Details |
10 | | ----------------- | ---------------------------------------------------------------------------------- |
11 | | `.get_profile()` | Returns the user data in json format. |
12 | """
13 |
14 | def test_get_profile(self):
15 | instance = Geeksforgeeks(user="nikhil25803")
16 | method_response = instance.get_profile()
17 |
18 | if isinstance(method_response, str):
19 | try:
20 | method_response = json.loads(method_response)
21 | except json.JSONDecodeError:
22 | self.fail("get_profile should return a dictionary or a JSON string")
23 |
24 | expected_keys = [
25 | "username",
26 | "collage_name",
27 | "collage_rank",
28 | "overall_coding_score",
29 | "monthly_coding_score",
30 | "languages_used",
31 | "current_potd_streak",
32 | "total_problem_solved",
33 | "campus_ambassader",
34 | ]
35 |
36 | self.assertEqual(
37 | list(method_response.keys()),
38 | expected_keys,
39 | "Geeksforgeeks:get_profile - keys mismatch",
40 | )
41 |
42 |
43 | if __name__ == "__main__":
44 | unittest.main()
45 |
--------------------------------------------------------------------------------
/src/test/github_education_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up import github_education
3 |
4 |
5 | class GitHubEducationTest(unittest.TestCase):
6 | """
7 | CodeChef module test.\n
8 | | Methods | Details |
9 | | -------------- | ------------------------------------------------------------------------------------------------------------------- |
10 | | `get_events()` | Returns the latest events along with their title, image_url, description, date, location, language, tags, and link. |
11 | """
12 |
13 | def test_get_events(self):
14 | instance = github_education.Events()
15 | method_response = instance.get_events()
16 |
17 | self.assertIsInstance(
18 | method_response, list, "GitHubEducation:get_events - return type mismatch"
19 | )
20 | self.assertTrue(all(isinstance(event, dict) for event in method_response))
21 |
22 | for event in method_response:
23 | self.assertEqual(
24 | list(event.keys()),
25 | [
26 | "title",
27 | "image_url",
28 | "description",
29 | "date",
30 | "location",
31 | "language",
32 | "tags",
33 | "link",
34 | ],
35 | "GitHubEducation:get_events - keys mismatch",
36 | )
37 |
38 |
39 | if __name__ == "__main__":
40 | unittest.main()
41 |
--------------------------------------------------------------------------------
/src/test/hackerearth_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from scrape_up import hackerearth
4 |
5 |
6 | class HackerEarthTest(unittest.TestCase):
7 | """
8 | HackerEarth module test.\n
9 | | Methods | Details |
10 | | --------------- | ---------------------------------------------------------------- |
11 | | `get_ongoing()` | Returns the ongoing challenges. |
12 | | `get_upcoming()`| Returns the upcoming challenges. |
13 | | `get_hiring()` | Returns information about ongoing hiring challenges. |
14 | """
15 |
16 | def setUp(self):
17 | self.instance = hackerearth.challenges.Challenges()
18 |
19 | def test_get_ongoing(self):
20 | ongoing_challenges = self.instance.get_ongoing()
21 | self.assertIsInstance(ongoing_challenges, list)
22 |
23 | if len(ongoing_challenges) > 0:
24 | first_challenge = ongoing_challenges[0]
25 | self.assertIsInstance(first_challenge, dict)
26 | self.assertEqual(
27 | list(first_challenge.keys()),
28 | ["Title", "No of Registrations", "Link"],
29 | "HackerEarth-Challenges:get_ongoing - keys mismatch",
30 | )
31 |
32 | def test_get_upcoming(self):
33 | upcoming_challenges = self.instance.get_upcoming()
34 | self.assertIsInstance(upcoming_challenges, list)
35 |
36 | if len(upcoming_challenges) > 0:
37 | first_challenge = upcoming_challenges[0]
38 | self.assertIsInstance(first_challenge, dict)
39 | self.assertEqual(
40 | list(first_challenge.keys()),
41 | ["Title", "No of Registrations", "Link"],
42 | "HackerEarth-Challenges:get_upcoming - keys mismatch",
43 | )
44 |
45 | def test_get_hiring(self):
46 | hiring_challenges = self.instance.get_hiring()
47 | self.assertIsInstance(hiring_challenges, list)
48 |
49 | if len(hiring_challenges) > 0:
50 | first_challenge = hiring_challenges[0]
51 | self.assertIsInstance(first_challenge, dict)
52 | self.assertEqual(
53 | list(first_challenge.keys()),
54 | ["Title", "Description", "Link"],
55 | "HackerEarth-Challenges:get_hiring - keys mismatch",
56 | )
57 |
58 |
59 | if __name__ == "__main__":
60 | unittest.main()
61 |
--------------------------------------------------------------------------------
/src/test/hackerrank_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up import hackerrank
3 |
4 |
5 | class HackerrankTest(unittest.TestCase):
6 | """
7 | CodeChef module test.\n
8 | | Methods | Details |
9 | | ----------------------------- | ---------------------------------------------------------------- |
10 | | `get_profile(id="username")` | Returns name, username, country, user_type, details, badges, verified_skills, social etc. |
11 | | `get_skills()` | Returns information on active contests like title, status, and link |
12 | | `active_contests()` | Returns a list of verified skills and their links |
13 | | `archived_contests()` | Returns information regarding archived contests |
14 | """
15 |
16 | def test_get_profile(self):
17 | instance = hackerrank.User()
18 | method_response = instance.get_profile(id="inclinedadarsh")
19 |
20 | self.assertEqual(
21 | list(method_response.keys()),
22 | [
23 | "name",
24 | "username",
25 | "country",
26 | "user_type",
27 | "details",
28 | "badges",
29 | "verified_skills",
30 | "social",
31 | ],
32 | "Hackerrank:get_profile - keys mismatch",
33 | )
34 |
35 | def test_get_skills(self):
36 | instance = hackerrank.User()
37 | method_response = instance.get_skills()
38 |
39 | self.assertIsInstance(
40 | method_response, list, "Hackerrank:get_skills - return type mismatch"
41 | )
42 | self.assertTrue(
43 | all(isinstance(skill, dict) for skill in method_response),
44 | "Hackerrank:get_skills - return type mismatch",
45 | )
46 |
47 | for skill in method_response:
48 | self.assertIn("Name", skill)
49 | self.assertIn("Link", skill)
50 |
51 | def test_active_contests(self):
52 | instance = hackerrank.Contest()
53 | method_response = instance.active_contests()
54 |
55 | self.assertIsInstance(
56 | method_response, list, "Hackerrank:active_contests - return type mismatch"
57 | )
58 | self.assertTrue(
59 | all(isinstance(contest, dict) for contest in method_response),
60 | "Hackerrank:active_contests - return type mismatch",
61 | )
62 | for contest in method_response:
63 | self.assertIn("Title", contest)
64 | self.assertIn("Status", contest)
65 | self.assertIn("Link", contest)
66 |
67 | def test_archived_contests(self):
68 | instance = hackerrank.Contest()
69 | method_response = instance.archived_contests()
70 |
71 | self.assertIsInstance(
72 | method_response, list, "Hackerrank:archived_contests - return type mismatch"
73 | )
74 |
75 | for contest in method_response:
76 | self.assertIn("title", contest)
77 |
78 |
79 | if __name__ == "__main__":
80 | unittest.main()
81 |
--------------------------------------------------------------------------------
/src/test/healthgrades_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from scrape_up.healthgrades import HealthGrades
4 |
5 |
6 | class HealthGradesTest(unittest.TestCase):
7 | """
8 | HealthGrades module test.\n
9 | | Methods | Details |
10 | | --------------- | ---------------------------------------------------------------- |
11 | | `get_best_hospitals()` | Returns Name, Location, Link, Awards etc. |
12 | """
13 |
14 | def setUp(self):
15 | """
16 | setup instance for HealthGrades class
17 | """
18 | self.instance = HealthGrades()
19 |
20 | def test_get_best_hospitals(self):
21 | """
22 | Test get_best_hospitals for state 'bihar'
23 | """
24 | best_hospitals = self.instance.get_best_hospitals("bihar")
25 | first_hospital = best_hospitals[0]
26 |
27 | # assert statements
28 | self.assertIsInstance(best_hospitals, list)
29 | self.assertIsInstance(first_hospital, dict)
30 | self.assertEqual(
31 | list(first_hospital.keys()),
32 | ["Name", "Location", "Link", "Awards"],
33 | "Healthgrades:get_best_hospitals - keys mismatch",
34 | )
35 |
36 |
37 | if __name__ == "__main__":
38 | unittest.main()
39 |
--------------------------------------------------------------------------------
/src/test/icc_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up import icc
3 |
4 |
5 | class ICCTest(unittest.TestCase):
6 | """
7 | ICC module test.\n
8 | | Method | Details |
9 | | ---------------------------- | ------------------------------------------------------------------- |
10 | | `.team_rankings(format)` | Returns the list of rankings of teams of desired format |
11 | | `.player_ranking(type,format)` | Returns the list of player ranking of desired type and format |
12 | | `.team_rankings_women(format)` | Returns the list of rankings of teams of desired format |
13 | | `.player_ranking_women(type,format)` | Returns the list of player ranking of desired type and format |
14 | """
15 |
16 | def test_team_rankings(self):
17 | instance = icc.ICC()
18 | response = instance.team_rankings("ODI")
19 | self.assertGreater(len(response), 0, "Team rankings is empty")
20 | self.assertTrue(isinstance(response, list), "Team rankings is not a list")
21 | self.assertTrue(
22 | all(
23 | isinstance(team, dict) and "rank" in team and "team" in team
24 | for team in response
25 | ),
26 | "Incorrect format for team rankings",
27 | )
28 |
29 | def test_player_ranking(self):
30 | instance = icc.ICC()
31 | response = instance.player_ranking("batting", "TEST")
32 | self.assertGreater(len(response), 0, "Player ranking is empty")
33 | self.assertTrue(isinstance(response, list), "Player ranking is not a list")
34 | self.assertTrue(
35 | all(
36 | isinstance(player, dict) and "rank" in player and "name" in player
37 | for player in response
38 | ),
39 | "Incorrect format for player rankings",
40 | )
41 |
42 | def test_team_rankings_women(self):
43 | instance = icc.ICC()
44 | response = instance.team_rankings_women("T20")
45 | self.assertGreater(len(response), 0, "Team rankings for women is empty")
46 | self.assertTrue(
47 | isinstance(response, list), "Team rankings for women is not a list"
48 | )
49 | self.assertTrue(
50 | all(
51 | isinstance(team, dict) and "rank" in team and "team" in team
52 | for team in response
53 | ),
54 | "Incorrect format for team rankings for women",
55 | )
56 |
57 | def test_player_ranking_women(self):
58 | instance = icc.ICC()
59 | response = instance.player_ranking("bowling", "ODI")
60 | self.assertGreater(len(response), 0, "Player ranking for women is empty")
61 | self.assertTrue(
62 | isinstance(response, list), "Player ranking for women is not a list"
63 | )
64 | self.assertTrue(
65 | all(
66 | isinstance(player, dict) and "rank" in player and "name" in player
67 | for player in response
68 | ),
69 | "Incorrect format for player rankings for women",
70 | )
71 |
72 |
73 | if __name__ == "__main__":
74 | unittest.main()
75 |
--------------------------------------------------------------------------------
/src/test/librarygenesis_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.librarygenesis import LibGen
3 |
4 |
5 | class TestLibGen(unittest.TestCase):
6 | """
7 | | Methods | Details |
8 | | --------------| ----------------------------- |
9 | | `.getBooks(book_name=" ")` | Returns the books with name, author, size, format, book link, book cover link, language |
10 | """
11 |
12 | def setUp(self):
13 | """
14 | Initialize a LibGen instance before each test method.
15 | """
16 | self.libgen = LibGen()
17 |
18 | def test_getBooks_empty_name(self):
19 | """
20 | Test the getBooks() method with an empty book name.
21 | """
22 | try:
23 | result = self.libgen.getBooks("")
24 | self.assertEqual(
25 | result,
26 | "Error: enter name",
27 | "Expected error message for empty book name",
28 | )
29 | except:
30 | return None
31 |
32 | def test_getBooks_short_name(self):
33 | """
34 | Test the getBooks() method with a short book name.
35 | """
36 | try:
37 | result = self.libgen.getBooks("AI")
38 | self.assertEqual(
39 | result,
40 | "Error: Title Too Short",
41 | "Expected error message for short book name",
42 | )
43 | except:
44 | return None
45 |
46 | def test_getBooks_valid_name(self):
47 | """
48 | Test the getBooks() method with a valid book name.
49 | """
50 | try:
51 | result = self.libgen.getBooks("Python")
52 | self.assertIsInstance(result, list, "Expected a list of books")
53 | if result: # Check if there are books returned
54 | book = result[0]
55 | self.assertIn("name", book, "Book should have a 'name' field")
56 | self.assertIn("author", book, "Book should have an 'author' field")
57 | self.assertIn("size", book, "Book should have a 'size' field")
58 | self.assertIn("format", book, "Book should have a 'format' field")
59 | self.assertIn("link", book, "Book should have a 'link' field")
60 | self.assertIn("language", book, "Book should have a 'language' field")
61 | except:
62 | return None
63 |
64 | def test_getBooks_no_results(self):
65 | """
66 | Test the getBooks() method with a book name that yields no results.
67 | """
68 | try:
69 | result = self.libgen.getBooks("somebookthatdoesnotexist")
70 | self.assertEqual(
71 | result,
72 | "Error: no results found",
73 | "Expected error message for no results found",
74 | )
75 | except:
76 | return None
77 |
78 |
79 | if __name__ == "__main__":
80 | unittest.main()
81 |
--------------------------------------------------------------------------------
/src/test/lichess_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.lichess import LichessGames
3 |
4 |
5 | class TestLichessGames(unittest.TestCase):
6 | """
7 | | Methods | Details |
8 | | ----------------------------- | -------------------------------------------------------------------------- |
9 | | `.fetch_games()` | Fetch all the games data for the specified username. |
10 | """
11 |
12 | def setUp(self):
13 | """
14 | Initialize a LichessGames instance before each test method.
15 | """
16 | self.username = "chess_player" # Example username
17 | self.lichess_scraper = LichessGames(username=self.username)
18 |
19 | def test_fetch_games(self):
20 | """
21 | Test the fetch_games() method.
22 | """
23 | try:
24 | games = self.lichess_scraper.fetch_games()
25 |
26 | # Check if games is a list of dictionaries
27 | self.assertIsInstance(games, list)
28 | for game in games:
29 | self.assertIsInstance(game, dict)
30 | self.assertIn("white_player", game)
31 | self.assertIn("black_player", game)
32 | self.assertIn("pgn", game)
33 |
34 | white_player = game["white_player"]
35 | black_player = game["black_player"]
36 |
37 | self.assertIn("username", white_player)
38 | self.assertIn("before_game_score", white_player)
39 | self.assertIn("score_change", white_player)
40 |
41 | self.assertIn("username", black_player)
42 | self.assertIn("before_game_score", black_player)
43 | self.assertIn("score_change", black_player)
44 | except:
45 | return None
46 |
47 | def test_fetch_games_empty(self):
48 | """
49 | Test fetch_games() method with a username that has no games.
50 | """
51 | try:
52 | self.lichess_scraper = LichessGames(username="non_existent_user")
53 | games = self.lichess_scraper.fetch_games()
54 | self.assertEqual(
55 | games, [], "Expected an empty list for a non-existent user"
56 | )
57 | except:
58 | return None
59 |
60 |
61 | if __name__ == "__main__":
62 | unittest.main()
63 |
--------------------------------------------------------------------------------
/src/test/pinterest_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.pinterest import Pinterest
3 |
4 |
5 | class TestPinterest(unittest.TestCase):
6 | def setUp(self):
7 | self.pinterest = Pinterest()
8 |
9 | def test_get_today(self):
10 | today_topics = self.pinterest.get_today()
11 | self.assertIsInstance(today_topics, list, "Expected get_today to return a list")
12 | if today_topics:
13 | for topic in today_topics:
14 | self.assertIn("link", topic)
15 | self.assertIn("title", topic)
16 | self.assertIn("subtitle", topic)
17 | self.assertIn("image", topic)
18 |
19 | def test_get_photo(self):
20 | url = "https://pin.it/1ZhgQA5AG"
21 | photo = self.pinterest.get_photo(url)
22 | if photo:
23 | self.assertIn("alt", photo)
24 | self.assertIn("image", photo)
25 |
26 | def test_search_pins(self):
27 | keyword = "nature"
28 | pins = self.pinterest.search_pins(keyword=keyword)
29 | self.assertIsInstance(pins, list, "Expected search_pins to return a list")
30 | if pins:
31 | for pin in pins:
32 | self.assertIn("link", pin)
33 | self.assertIn("image", pin)
34 |
35 | def test_get_pin_details(self):
36 | pin_url = "https://pin.it/1ZhgQA5AG"
37 | details = self.pinterest.get_pin_details(pin_url)
38 | if details:
39 | self.assertIn("title", details)
40 | self.assertIn("description", details)
41 | self.assertIn("saves", details)
42 | self.assertIn("comments", details)
43 |
44 |
45 | if __name__ == "__main__":
46 | unittest.main()
47 |
--------------------------------------------------------------------------------
/src/test/quora_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest.mock import patch
3 | from scrape_up.quora import Quora
4 |
5 |
6 | class TestQuora(unittest.TestCase):
7 | def setUp(self):
8 | self.scrapper = Quora()
9 |
10 | def test_fetch_answers(self):
11 | try:
12 | expected_answers = ["Accepted answer 1", "Suggested answer 1"]
13 |
14 | self.assertEqual(
15 | self.scrapper.fetch_answers("https://www.quora.com/question"),
16 | expected_answers,
17 | )
18 | except:
19 | return None
20 |
21 | def test_get_by_query(self):
22 | try:
23 | expected_answer = "Suggested answer 1"
24 |
25 | self.assertEqual(
26 | self.scrapper.get_by_query("How-should-I-start-learning-Python-1"),
27 | expected_answer,
28 | )
29 | except:
30 | return None
31 |
32 | def test_profile_details(self):
33 | try:
34 | expected_profile = {
35 | "name": "Nikhil Raj",
36 | "url": "https://www.quora.com/profile/Nikhil-Raj",
37 | }
38 |
39 | self.assertEqual(
40 | self.scrapper.profile_details("Nikhil Raj"), expected_profile
41 | )
42 | except:
43 | return None
44 |
45 |
46 | if __name__ == "__main__":
47 | unittest.main()
48 |
--------------------------------------------------------------------------------
/src/test/swiggy_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import requests
3 | from unittest.mock import patch
4 | from scrape_up.swiggy import Swiggy
5 |
6 |
7 | class TestSwiggy(unittest.TestCase):
8 | """
9 | Swiggy module test.
10 | | Methods | Details |
11 | | ------------------------- | ------------------------------------------------------------------------- |
12 | | `get_restraunt_details()` | Returns the restaurant data with name, cuisine, area, rating, offers, etc |
13 | | `get_restaurants()` | Returns the restaurant names as per given city |
14 | """
15 |
16 | def setUp(self):
17 | self.scrapper = Swiggy()
18 |
19 | def test_get_restraunt_details(self):
20 | try:
21 | expected_data = {
22 | "name": "Pizza Hut",
23 | "cuisine": "Pizzas",
24 | "area": "Karol Bagh",
25 | "rating": "3.7",
26 | "rating_count": "1K+ ratings",
27 | "cost_per_person": "₹350 for two",
28 | "offers": [{"15% OFF UPTO ₹300": "USE CITIFOODIE | ABOVE ₹1200"}],
29 | }
30 |
31 | self.assertEqual(
32 | self.scrapper.get_restraunt_details("https://www.swiggy.com/pizza-hut"),
33 | expected_data,
34 | )
35 | except:
36 | return None
37 |
38 | def test_get_restaurants(self):
39 | try:
40 | expected_restaurants = [
41 | {
42 | "Name": "Domino's Pizza",
43 | "Rating": "4.2",
44 | "Cusine": "Pizzas, Italian, Pastas, Desserts",
45 | "Location": "Punjabi Bagh",
46 | "Link": "/restaurant1",
47 | }
48 | ]
49 |
50 | self.assertEqual(
51 | self.scrapper.get_restaurants("Delhi"), expected_restaurants
52 | )
53 | except:
54 | return None
55 |
56 |
57 | if __name__ == "__main__":
58 | unittest.main()
59 |
--------------------------------------------------------------------------------
/src/test/who_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scrape_up.who import WHO
3 |
4 |
5 | class TestWHO(unittest.TestCase):
6 | def setUp(self):
7 | """
8 | Initialize a WHO instance before each test method.
9 | """
10 | self.who_scraper = WHO()
11 |
12 | def test_get_disease_outbreak(self):
13 | """
14 | | Methods | Details |
15 | | ------------------------------ | ------------------------------------------- |
16 | | `get_disease_outbreak()` | Get Disease Outbreak News from WHO website. |
17 | """
18 | try:
19 | # Test with a valid number of items (assuming each page contains 10 items)
20 | number_of_items = 10
21 | disease_outbreaks = self.who_scraper.get_disease_outbreak(number_of_items)
22 |
23 | # Check if disease_outbreaks is a list
24 | self.assertIsNotNone(disease_outbreaks, "Failed to fetch disease outbreaks")
25 | self.assertIsInstance(
26 | disease_outbreaks, list, "Disease outbreaks data should be a list"
27 | )
28 |
29 | if disease_outbreaks:
30 | # Check if each item in the list is a dictionary with the required keys
31 | for outbreak in disease_outbreaks:
32 | self.assertIsInstance(
33 | outbreak, dict, "Each outbreak should be a dictionary"
34 | )
35 | self.assertIn("Title", outbreak, "Missing expected key: 'Title'")
36 | self.assertIn("Date", outbreak, "Missing expected key: 'Date'")
37 | self.assertIn("Link", outbreak, "Missing expected key: 'Link'")
38 |
39 | # Check if the values are of the correct type
40 | self.assertIsInstance(
41 | outbreak["Title"], str, "'Title' should be a string"
42 | )
43 | self.assertIsInstance(
44 | outbreak["Date"], str, "'Date' should be a string"
45 | )
46 | self.assertIsInstance(
47 | outbreak["Link"], str, "'Link' should be a string"
48 | )
49 |
50 | except:
51 | return None
52 |
53 | def test_invalid_number(self):
54 | """
55 | Test the get_disease_outbreak() method with an invalid number.
56 | """
57 | try:
58 | invalid_number = -10
59 | disease_outbreaks = self.who_scraper.get_disease_outbreak(invalid_number)
60 |
61 | # Check if the function handles invalid numbers gracefully
62 | self.assertIsNone(
63 | disease_outbreaks, "Function should return None for invalid input"
64 | )
65 | except:
66 | return None
67 |
68 |
69 | if __name__ == "__main__":
70 | unittest.main()
71 |
--------------------------------------------------------------------------------
/src/test/wuzzuf_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest.mock import patch
3 | from scrape_up.wuzzuf import Jobs
4 | import requests
5 |
6 |
7 | class JobsTest(unittest.TestCase):
8 | """
9 | Jobs module test.
10 | | Methods | Details |
11 | | ------------------- | -------------------------------------------------------------------------------------------- |
12 | | `filter_job()` | Apply filters to the job search using parameters like title, country, city, min/max years of experience. |
13 | | `fetch_jobs()` | Fetch job listings based on the applied filters, with an optional maximum number of pages to scrape. |
14 | """
15 |
16 | def setUp(self):
17 | self.scraper = Jobs()
18 |
19 | def test_filter_job(self):
20 | self.scraper.filter_job(
21 | title="software engineer",
22 | country="Egypt",
23 | city="Cairo",
24 | min_years_of_experience=2,
25 | max_years_of_experience=5,
26 | )
27 | expected_url = "https://wuzzuf.net/search/jobs/?q=software+engineer&filters[country][0]=Egypt&filters[city][0]=Cairo&filters[years_of_experience_min][0]=2&filters[years_of_experience_max][0]=5"
28 | self.assertEqual(self.scraper.url, expected_url)
29 |
30 | @patch("requests.get")
31 | def test_fetch_jobs(self, mock_get):
32 | # Mock the get response
33 | mock_response = requests.Response()
34 | mock_response.status_code = 200
35 | mock_response._content = b"""
36 |
37 |
38 |
39 |
Cairo, Egypt
40 |
3 days ago
41 |
Full Time
42 |
Senior
43 |
44 | """
45 | mock_get.return_value = mock_response
46 |
47 | jobs = self.scraper.fetch_jobs(max_page_number=1)
48 | expected_job = {
49 | "name": "Software Engineer",
50 | "url": "/job/1",
51 | "company": "Company Name",
52 | "location": "Cairo, Egypt",
53 | "published_time": "3 days ago",
54 | "properties": "Full Time ,Senior",
55 | }
56 |
57 | self.assertEqual(len(jobs), 1)
58 | self.assertEqual(jobs[0], expected_job)
59 |
60 |
61 | if __name__ == "__main__":
62 | unittest.main()
63 |
--------------------------------------------------------------------------------