├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug.yaml │ ├── documentationupdate.yaml │ └── featurerequest.yaml ├── pull_request_template.md └── workflows │ ├── greetings.yaml │ └── pr_merged.yml ├── .gitignore ├── .vscode └── settings.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── dev-documentation.md ├── docs ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── home.md ├── index.md ├── installation.md └── modules │ ├── Finance.md │ ├── HackerEarth.md │ ├── Hackernews.md │ ├── Internshala.md │ ├── Twitter.md │ ├── academia.md │ ├── amazon.md │ ├── ask-ubuntu.md │ ├── bbc.md │ ├── codechef.md │ ├── coinmarketcap.md │ ├── coursera.md │ ├── covid-19.md │ ├── crickbuzz.md │ ├── devpost.md │ ├── dribbble.md │ ├── eazydinner.md │ ├── ebay.md │ ├── espn.md │ ├── flexjobs.md │ ├── flipkart.md │ ├── flipkartclothing.md │ ├── flipkartlaptop.md │ ├── flyrobu.md │ ├── github.md │ ├── githubedu.md │ ├── gitlab.md │ ├── googlenews.md │ ├── hackerrank.md │ ├── hashnode.md │ ├── healthgrade.md │ ├── iccranking.md │ ├── imdb-actor.md │ ├── imdb-boxoffice.md │ ├── imdb-celeb.md │ ├── imdb-indian.md │ ├── imdb-movies.md │ ├── imdb.md │ ├── instagram.md │ ├── installation.md │ ├── kooapp.md │ ├── leetcode.md │ ├── letterboxd.md │ ├── luma.md │ ├── medium.md │ ├── reddit.md │ ├── spotify.md │ ├── stackoverflow.md │ ├── techcrunch.md │ ├── wikipedia.md │ └── youtube.md ├── documentation.md ├── mkdocs.yml ├── project_setup.sh ├── pyproject.toml ├── requirements.txt ├── setup.cfg └── src ├── scrape_up ├── __init__.py ├── academia │ ├── __init__.py │ └── academia.py ├── amazon │ ├── __init__.py │ └── products.py ├── ambitionBox │ └── company.py ├── askubuntu │ ├── __init__.py │ └── questions.py ├── atcoder │ ├── __init__.py │ └── atcoder.py ├── banners │ ├── __init__.py │ └── scraper88x31.py ├── bayt │ ├── __init__.py │ └── bayt.py ├── bbcnews │ ├── __init__.py │ └── bbcnews.py ├── billionaires │ └── billionaires.py ├── bugmenot │ └── bugmenot.py ├── cars │ ├── __init__.py │ └── cars.py ├── codechef │ ├── __init__.py │ └── codechef.py ├── codeforces │ ├── __init__.py │ ├── contests.py │ └── user.py ├── codewars │ ├── __init__.py │ └── codewars.py ├── coinmarketcap │ ├── __init__.py │ └── crypto.py ├── config │ ├── __init__.py │ └── request_config.py ├── coursera │ ├── __init__.py │ └── courses.py ├── covidinfo │ ├── __init__.py │ └── covidinfo.py ├── cricbuzz │ ├── __init__.py │ └── cricbuzz.py ├── devcommunity │ ├── __init__.py │ └── articles.py ├── devpost │ ├── __init__.py │ └── devpost.py ├── dictionary │ ├── __init__.py │ └── wordoftheday.py ├── dribbble │ ├── __init__.py │ └── dribbble.py ├── eazydiner │ ├── __init__.py │ └── eazydiner.py ├── ebay │ ├── __init__.py │ └── ebay.py ├── espn │ ├── __init__.py │ └── espnmodule.py ├── espncricinfo │ ├── __init__.py │ └── espncricinfo.py ├── fide │ ├── __init__.py │ └── fide.py ├── finance │ ├── bse.py │ ├── nasdaq.py │ ├── nse.py │ └── stock_price.py ├── flexjobs │ ├── __init__.py │ └── flexjobs.py ├── flipkart │ ├── __init__.py │ ├── flipkart_clothing.py │ ├── flipkart_file.py │ └── flipkart_laptop.py ├── flyrobu │ ├── __init__.py │ └── flyrobu.py ├── geeksforgeeks │ ├── __init__.py │ └── geeksforgeeks.py ├── github │ ├── __init__.py │ ├── issue.py │ ├── organization.py │ ├── pull_request.py │ ├── repository.py │ └── users.py ├── github_education │ ├── __init__.py │ └── events.py ├── googlenews │ └── googleNews.py ├── hackerearth │ ├── __init__.py │ └── challenges.py ├── hackernews │ ├── __init__.py │ └── articles.py ├── hackerrank │ ├── __init__.py │ ├── contest.py │ └── user.py ├── hashnode │ ├── __init__.py │ └── hashnode.py ├── healthgrades │ ├── __init__.py │ └── healthgradesmodule.py ├── icc │ ├── __init__.py │ └── icc_rankings.py ├── imdb │ ├── __init__.py │ ├── actor.py │ ├── box_office.py │ ├── celeb.py │ ├── imdb.py │ ├── indian_movies.py │ └── movie.py ├── indiantrekking │ ├── __init__.py │ └── trek.py ├── indiatodayweather │ ├── __init__.py │ └── weather.py ├── instagram │ └── users.py ├── internshala │ └── internships.py ├── kindle_bookstore │ └── kindle.py ├── kooapp │ └── users.py ├── lastfm │ ├── __init__.py │ └── lastfm.py ├── leetcode │ └── leetcode_scraper.py ├── letterboxd │ ├── __init__.py │ └── letterboxd.py ├── librarygenesis │ ├── __init__.py │ └── library.py ├── lichess │ ├── __init__.py │ └── lichess.py ├── linkedIn │ └── linkedInspider.py ├── luma │ └── events.py ├── magicbricks │ └── MagicBricks.py ├── mediencyclopedia │ └── mediencyclopedia.py ├── medium │ ├── publication.py │ ├── trending.py │ └── user.py ├── moneycontrol │ ├── equity_mutual_funds.py │ ├── gold.py │ ├── index_contribution.py │ ├── indian_index.py │ ├── silver_prices.py │ ├── top_gainers.py │ └── top_losers.py ├── myanimelist │ ├── __init__.py │ └── scraper.py ├── newscnn │ └── newscnn.py ├── olympics │ └── olympic.py ├── pinterest │ ├── __init__.py │ └── pinterest.py ├── quora │ ├── __init__.py │ └── quora.py ├── reddit │ └── reddit.py ├── robu │ └── robu.py ├── rottentomatoes │ └── rot_tom.py ├── spotify │ └── spotify_file.py ├── stackoverflow │ └── questions.py ├── steam │ ├── __init__.py │ └── steamScraper.py ├── swiggy │ ├── __init__.py │ └── swiggy.py ├── sysreqlab │ ├── __init__.py │ ├── find_titles.py │ └── requirements.py ├── techcrunch │ └── techCrunch.py ├── thehindu │ └── thehindu.py ├── timeanddate │ ├── city.py │ ├── day_in_history.py │ ├── extended_forecast.py │ ├── fun_holidays.py │ ├── time_zones.py │ └── utc.py ├── timesjobs │ └── timesjobs_scraper.py ├── tripadvisor │ └── TripAdvisor.py ├── twitter │ └── numidconverter.py ├── uci │ └── UCI.py ├── udemy │ └── courses.py ├── unsplash │ └── unsplash_scraper.py ├── who │ ├── WHO.py │ └── __init__.py ├── wikipedia │ └── wikipedia.py ├── wuzzuf │ ├── __init__.py │ └── wuzzuf.py ├── yahoofinance │ └── YahooFinance.py ├── yellowpages │ ├── __init__.py │ └── yellowpages.py ├── youtube │ ├── channel.py │ └── video.py └── zomato │ ├── __init__.py │ └── zomato.py └── test ├── academia_test.py ├── amazon_test.py ├── askubuntu_test.py ├── atcoder_test.py ├── banners_test.py ├── bayt_test.py ├── bbc_test.py ├── codechef_test.py ├── coinmarketcap_test.py ├── coursera_test.py ├── covidinfo_test.py ├── cricbuzz_test.py ├── devpost_test.py ├── dribbble_test.py ├── eazydiner_test.py ├── ebay_test.py ├── espncricinfo_test.py ├── fide_test.py ├── flexjobs_test.py ├── flipkart_test.py ├── flyrobu_test.py ├── geeksforgeeks_test.py ├── github_education_test.py ├── github_test.py ├── hackerearth_test.py ├── hackernews_test.py ├── hackerrank_test.py ├── healthgrades_test.py ├── icc_test.py ├── librarygenesis_test.py ├── lichess_test.py ├── myanimelist.py ├── pinterest_test.py ├── quora_test.py ├── swiggy_test.py ├── who_test.py ├── wuzzuf_test.py └── zomato_test.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [nikhil25803] 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.yaml: -------------------------------------------------------------------------------- 1 | name: "🐞 Bug Report" 2 | description: "Create a report to help us improve" 3 | title: "BUG:" 4 | labels: [Bug, Needs Triage] 5 | body: 6 | - type: checkboxes 7 | attributes: 8 | label: "Is there an existing issue for this?" 9 | description: "Please search to see if an issue already exists for the bug you encountered." 10 | options: 11 | - label: "I have searched the existing issues" 12 | required: true 13 | - type: textarea 14 | attributes: 15 | label: "What happened?" 16 | description: "A concise description of what you're experiencing." 17 | validations: 18 | required: true 19 | - type: checkboxes 20 | attributes: 21 | label: "Record" 22 | options: 23 | - label: "I agree to follow this project's Code of Conduct" 24 | required: true 25 | - label: "I'm a GSSoC'24 contributor" 26 | - label: "I want to work on this issue" 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentationupdate.yaml: -------------------------------------------------------------------------------- 1 | name: "📑 Documentation Update" 2 | description: "Improve Documentation" 3 | title: "DOC:" 4 | labels: [DOC, Needs Triage] 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: "What's wrong with the existing documentation" 9 | description: "Which things do we need to add or delete" 10 | validations: 11 | required: true 12 | - type: textarea 13 | attributes: 14 | label: "Add ScreenShots" 15 | description: "Add sufficient SS to explain your issue." 16 | validations: 17 | required: false 18 | 19 | - type: checkboxes 20 | attributes: 21 | label: "Record" 22 | options: 23 | - label: "I agree to follow this project's Code of Conduct" 24 | required: true 25 | - label: "I'm a GSSoC'24 contributor" 26 | - label: "I want to work on this issue" 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/featurerequest.yaml: -------------------------------------------------------------------------------- 1 | name: "✨ Feature Request" 2 | description: "Suggest an idea for this project " 3 | title: "Feat:" 4 | labels: [Feat, Needs Triage] 5 | body: 6 | - type: textarea 7 | attributes: 8 | label: "Describe the feature" 9 | description: 10 | validations: 11 | required: true 12 | - type: textarea 13 | attributes: 14 | label: "Add ScreenShots" 15 | description: "Add sufficient SS to explain your issue." 16 | validations: 17 | required: true 18 | - type: checkboxes 19 | attributes: 20 | label: "Record" 21 | options: 22 | - label: "I agree to follow this project's Code of Conduct" 23 | required: true 24 | - label: "I'm a GSSoC'24 contributor" 25 | - label: "I want to work on this issue" 26 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | > The changes made in this pull request should be briefly described. 4 | 5 | ## Resolves: [Issue Number] 6 | 7 | ## Checklist 8 | 9 | > Before submitting this pull request, kindly verify that the ensuing checkpoints have been reached. 10 | 11 | - [ ] Have you adhered to the repository's defined coding convention rules? 12 | - [ ] Have you updated the 'documentation.md' file with the method/function documentation? 13 | - [ ] Have you sent a message along with the result or response? 14 | - [ ] Have you used the try-catch technique? 15 | - [ ] Has the method/class been added to the documentation (md file)? 16 | 17 | ## Screenshots 18 | 19 | > Uploading a screenshot illustrating the approach you developed for validation is required. 20 | 21 | ## Additional Notes/Comments 22 | 23 | > Any additional remarks or suggestions concerning this pull request are welcome. 24 | 25 | --- 26 | 27 | I certify that I have carried out the relevant checks and provided the requisite screenshot for validation by submitting this pull request. 28 | I appreciate your contribution. 29 | -------------------------------------------------------------------------------- /.github/workflows/greetings.yaml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | steps: 12 | - uses: actions/first-interaction@v1 13 | with: 14 | repo-token: ${{ secrets.GITHUB_TOKEN }} 15 | issue-message: "Hi there! Thanks for opening this issue. We appreciate your contribution to this open-source project. We aim to respond or assign your issue as soon as possible." 16 | pr-message: "Welcome to Our repository.🎊 Thank you so much for taking the time to point this out." 17 | -------------------------------------------------------------------------------- /.github/workflows/pr_merged.yml: -------------------------------------------------------------------------------- 1 | name: Auto Comment on PR Merged 2 | 3 | on: 4 | pull_request_target: 5 | types: [closed] 6 | 7 | permissions: 8 | issues: write 9 | pull-requests: write 10 | 11 | jobs: 12 | comment: 13 | runs-on: ubuntu-latest 14 | if: github.event.pull_request.merged == true 15 | steps: 16 | - name: Add Comment to Pull Request 17 | run: | 18 | COMMENT=$(cat <(Back to top)

47 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | ### Install the package using `pip`: 2 | 3 | ```bash 4 | pip install scrape-up --upgrade 5 | ``` 6 | 7 | ### Import the required module 8 | 9 | > For example - `GitHub` 10 | 11 | ```py 12 | # Import the required module 13 | from scrape_up import github 14 | ``` 15 | 16 | ### Instantiate an object with required parameters 17 | 18 | > Also mentioned in the docstring 19 | 20 | ``` 21 | user = github.Users(username="nikhil25803") 22 | ``` 23 | 24 | ### Call the required method. 25 | 26 | > For example, to extract the number of followers of a user: 27 | 28 | ```python 29 | # Call the followers method 30 | followers_count = user.followers() 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/modules/Finance.md: -------------------------------------------------------------------------------- 1 | ```python 2 | from scrape_up import StockPrice 3 | ``` 4 | 5 | ### Scrape stock data 6 | 7 | First, create an instance of class `StockPrice` with stock name and index name. 8 | 9 | ```python 10 | infosys = StockPrice('infosys','nse') 11 | ``` 12 | 13 | | Methods | Details | 14 | | ----------------------------------------- | --------------------------------------------------------------------------------------- | 15 | | `.get_latest_price()` | Returns the latest stock price of the given stock name. | 16 | | `.get_historical_data(from_date,to_date)` | Returns stock price from `from_date` to `to_date` in format (date in format dd-mm-yyyy) | 17 | 18 | --- 19 | -------------------------------------------------------------------------------- /docs/modules/HackerEarth.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import hackerearth 3 | ``` 4 | 5 | Create an object of class `Challenges` 6 | 7 | ```python 8 | hackerearth = hackerearth.Challenges() 9 | ``` 10 | 11 | | Methods | Details | 12 | | ---------------- | ------------------------------------------------------ | 13 | | `get_upcoming()` | Get the details of upcoming challenges on Hackerearth. | 14 | | `get_ongoing()` | Get the details of ongoing challenges on Hackerearth. | 15 | | `get_hiring()` | Get the details of hiring challenges on Hackerearth. | 16 | 17 | --- 18 | -------------------------------------------------------------------------------- /docs/modules/Hackernews.md: -------------------------------------------------------------------------------- 1 | 2 | ```py 3 | from scrape_up import hacker_news 4 | ``` 5 | 6 | Create an instance of `HackerNews` class. 7 | 8 | ```py 9 | articles = HackerNews() 10 | ``` 11 | 12 | | Methods | Details | 13 | | ------------------ | ------------------------------------------------------------------------------------------------------------------------ | 14 | | `.articles_list()` | Returns the latest articles along with their score, author, author url, time, comment count and link in JSON format. | 15 | | `.new_articles()` | Returns the latest new articles along with their score, author, author url, time, comment count and link in JSON format. | 16 | | `.past_articles()` | Returns the past articles along with their score, author, author url, time, comment count and link in JSON format. | 17 | | `.ask_articles()` | Returns the ask articles along with their score, author, author url, time, comment count and link in JSON format. | 18 | | `.show_articles()` | Returns the show articles along with their score, author, author url, time, comment count and link in JSON format. | 19 | | `.jobs()` | Returns the jobs along with their time and link in JSON format. | 20 | 21 | --- -------------------------------------------------------------------------------- /docs/modules/Internshala.md: -------------------------------------------------------------------------------- 1 | ## Internshala 2 | 3 | Create an object for the 'Internshala' class: 4 | 5 | ```python 6 | search = Internshala(search_type="machine learning") 7 | ``` 8 | 9 | | Methods | Details | 10 | | -------------------------- | ------------------------------------------------------------------------------ | 11 | | `.internships()` | Scrapes and returns a list of dictionaries representing internships. | 12 | | `.jobs()` | Scrapes and returns a list of dictionaries representing jobs. | 13 | | `.certification_courses()` | Scrapes and returns a list of dictionaries representing certification courses. | -------------------------------------------------------------------------------- /docs/modules/Twitter.md: -------------------------------------------------------------------------------- 1 | ```python 2 | from scrape_up import twitter 3 | ``` 4 | 5 | ### Scrape 6 | 7 | First, create an object of class `TwitterScraper` 8 | 9 | ```python 10 | twitter_scraper = TwitterScraper() 11 | ``` 12 | 13 | | Methods | Details | 14 | | -------------------------- | --------------------------------------------- | 15 | | `.unametoid(username)` | Returns the numerical_id on passing username. | 16 | | `.idtouname(numerical_id)` | Returns the username on passing numerical_id. | 17 | 18 | --- 19 | -------------------------------------------------------------------------------- /docs/modules/academia.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import academia 3 | ``` 4 | 5 | Create an instance of `Academia` class 6 | 7 | ```python 8 | academia = academia.Academia() 9 | ``` 10 | 11 | | Method | Details | 12 | | ----------------------------- | --------------------------------------------------------------------- | 13 | | `get_research_topics()` | Fetches and returns research topics. | 14 | | `get_research_papers(search)` | Fetches and returns research papers related to the given search term. | 15 | 16 | --- -------------------------------------------------------------------------------- /docs/modules/amazon.md: -------------------------------------------------------------------------------- 1 | 2 | ### Scrape details about a product 3 | 4 | Create an instance of `Product` class with a `product_name` propertiese. 5 | 6 | ```python 7 | product = Product(product_name="watch") 8 | ``` 9 | 10 | | Methods | Details | 11 | | ------------------------ | ---------------------------- | 12 | | `.get_product()` | Returns product data(links). | 13 | | `.get_product_details()` | Returns product detail. | 14 | | `.get_product_image()` | Returns product image. | 15 | | `.customer_review()` | Returns product review. | 16 | 17 | ## Amazon-Kindle Bookstore 18 | 19 | Create an instance of `Book` class. 20 | 21 | ```python 22 | books = AmazonKindle() 23 | ``` 24 | 25 | | Methods | Details | 26 | | ---------------- | ------------------------------------------------------ | 27 | | `.bestsellers()` | Returns the list of best-selling books on AmazonKindle | 28 | | `.topbooks()` | Returns the list of top books on AmazonKindle | -------------------------------------------------------------------------------- /docs/modules/ask-ubuntu.md: -------------------------------------------------------------------------------- 1 | 2 | ### Scrape questions, views, votes, answer counts, and descriptions from Ask Ubuntu website regarding a topic 3 | 4 | Create an instance of `Questions` class. 5 | 6 | ```python 7 | questions = Questions("topic") 8 | ``` 9 | 10 | | Methods | Details | 11 | | --------------------------- | ---------------------------------------------------------------------------------------------------- | 12 | | `.getNewQuestions()` | Returns the new questions, views, votes, answer counts, and descriptions in JSON format | 13 | | `.getActiveQuestions()` | Returns the active questions, views, votes, answer counts, and descriptions in JSON format | 14 | | `.getUnansweredQuestions()` | Returns the unanswered questions, views, votes, answer counts, and descriptions in JSON format | 15 | | `.getBountiedQuestions()` | Returns the bountied questions, views, votes, answer counts, and descriptions in JSON format | 16 | | `.getFrequentQuestions()` | Returns the frequently asked questions, views, votes, answer counts, and descriptions in JSON format | 17 | | `.getHighScoredQuestions()` | Returns the most voted questions, views, votes, answer counts, and descriptions in JSON format | 18 | 19 | --- 20 | -------------------------------------------------------------------------------- /docs/modules/bbc.md: -------------------------------------------------------------------------------- 1 | from scrape_up import bbcnews 2 | ``` 3 | 4 | First create an object of class `BBCNews` 5 | 6 | ```python 7 | user = bbcnews.BBCNews() 8 | ``` 9 | 10 | | Methods | Details | 11 | | ------------------ | --------------------------------------------------------- | 12 | | `.get_headlines()` | Returns the list of objects containing the headlines. | 13 | | `get_article()` | Returns an object with proper details about the articles. | 14 | 15 | --- -------------------------------------------------------------------------------- /docs/modules/codechef.md: -------------------------------------------------------------------------------- 1 | 2 | ```python 3 | from scrape_up import codechef 4 | ``` 5 | 6 | ### Scrape user details 7 | 8 | Create an object of class `Codechef` 9 | 10 | ```python 11 | user1 = codechef.User(id="username") 12 | 13 | ``` 14 | 15 | | Methods | Details | 16 | | --------------- | ---------------------------------------------------------------- | 17 | | `get_profile()` | Returns name, username, profile_image_link, rating, details etc. | 18 | 19 | --- 20 | -------------------------------------------------------------------------------- /docs/modules/coinmarketcap.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import coinmarketcap 3 | ``` 4 | 5 | Create an instance of `Crypto` class 6 | 7 | ```python 8 | crypto = coinmarketcap.Crypto() 9 | ``` 10 | 11 | | Method | Details | 12 | | ---------------------------- | -------------------------------------------------------- | 13 | | `get_top_cryptocurrencies()` | Fetches and returns data about the top cryptocurrencies. | 14 | 15 | --- 16 | -------------------------------------------------------------------------------- /docs/modules/coursera.md: -------------------------------------------------------------------------------- 1 | Create an object of the 'Courses' class: 2 | 3 | ```python 4 | scraper = Courses(topic="topic") 5 | ``` 6 | 7 | | Methods | Details | 8 | | -------------------------------------- | ------------------------------------------------------------------------------------------ | 9 | | `.get_courses()` | Returns the courses with title, teached by, skills, rating, review count, img url and link | 10 | | `.fetch_modules(course='Course Name')` | Returns the modules associated with the Coursera. | 11 | -------------------------------------------------------------------------------- /docs/modules/covid-19.md: -------------------------------------------------------------------------------- 1 | 2 | ```py 3 | from scrape_up import covidinfo 4 | ``` 5 | 6 | Create an instance of the `CovidInfo` class. 7 | 8 | ```python 9 | response = covidinfo.CovidInfo() 10 | ``` 11 | 12 | | Methods | Details | 13 | | -------------------- | -------------------------------------------------------------- | 14 | | `.covid_data()` | Returns the list of all covid data scraped from the website. | 15 | | `.total_cases()` | Returns the count of total covid cases all over the world. | 16 | | `.total_deaths()` | Returns the count of deaths covid cases all over the world. | 17 | | `.total_recovered()` | Returns the count of recovered covid cases all over the world. | 18 | | `.latest_news()` | Return the latest news of the day. | 19 | 20 | --- -------------------------------------------------------------------------------- /docs/modules/crickbuzz.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import cricbuzz 3 | ``` 4 | 5 | Create an instance of `Cricubzz` class. 6 | 7 | ```python 8 | cricbuzz = cricbuzz.Cricubzz() 9 | ``` 10 | 11 | | Methods | Details | 12 | | ---------------------------- | ---------------------------------------------------------------------- | 13 | | `.get_live_matches()` | Returns a list of live matches from Cricbuzz. | 14 | | `.get_recent_matches()` | Returns a list of recent matches from Cricbuzz. | 15 | | `.get_upcoming_matches()` | Returns a list of upcoming matches from Cricbuzz. | 16 | | `.get_series()` | Returns a dictionary of series in month and year format from Cricbuzz. | 17 | | `.get_series_from_archive()` | Returns a list of series from the archive from Cricbuzz. | 18 | | `.get_matches_by_day()` | Returns a dictionary of matches by day from Cricbuzz. | 19 | | `.get_series_matches()` | Returns a list of matches in a series from Cricbuzz. | 20 | | `.get_series_stats()` | Returns a list of stats of players in a series from Cricbuzz. | 21 | | `.get_teams_list()` | Returns a list of teams from Cricbuzz. | 22 | | `.get_team_schedule()` | Returns a list of matches of a team from Cricbuzz. | 23 | | `.get_team_players()` | Returns a list of players of a team from Cricbuzz. | 24 | | `.get_team_results()` | Returns a list of past results of a team from Cricbuzz. | 25 | | `.get_team_stats()` | Returns a list of player stats of a team from Cricbuzz. | 26 | 27 | --- -------------------------------------------------------------------------------- /docs/modules/devpost.md: -------------------------------------------------------------------------------- 1 | 2 | Create an instance of `Devpost` class. 3 | 4 | ```python 5 | posts = Devpost() 6 | ``` 7 | 8 | | Methods | Details | 9 | | ------------------- | -------------------------------------------------------------------------------------------------------------------- | 10 | | `.get_projects()` | Returns the latest projects along with their decription, like and commment count, image and member details. | 11 | | `.search(topic)` | Returns the searched projects along with their decription, like and commment count, image and member details. | 12 | | `.get_hackathons()` | Returns the latest hackathons along with their title, participants, prizes, deadlines. | 13 | | `.get_featured()` | Returns the latest featured projects along with their decription, like and commment count, image and member details. | 14 | | `.get_winner()` | Returns the latest winning projects along with their decription, like and commment count, image and member details. | 15 | 16 | --- -------------------------------------------------------------------------------- /docs/modules/dribbble.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import dribbble 3 | ``` 4 | 5 | Create an instance of `Dribbble` class. 6 | 7 | ```python 8 | shots = dribbble.Dribbble() 9 | ``` 10 | 11 | | Methods | Details | 12 | | --------------------- | ------------------------------------------------------------------------------------------------------------------------------ | 13 | | `.get_shots()` | Returns the latest shots along with their title, designer, designer URL, like and view count, and link. | 14 | | `.search(topic)` | Returns the latest shots along with their title, designer, designer URL, like and view count, and link for the searched topic. | 15 | | `.get_animation()` | Returns the latest animation along with their title, designer, designer URL, like and view count, and link. | 16 | | `.get_branding()` | Returns the latest branding along with their title, designer, designer URL, like and view count, and link. | 17 | | `.get_illustration()` | Returns the latest illustration along with their title, designer, designer URL, like and view count, and link. | 18 | | `.get_mobile()` | Returns the latest mobile shots along with their title, designer, designer URL, like and view count, and link. | 19 | | `.get_webdesign()` | Returns the latest web-design shots along with their title, designer, designer URL, like and view count, and link. | 20 | 21 | --- 22 | -------------------------------------------------------------------------------- /docs/modules/eazydinner.md: -------------------------------------------------------------------------------- 1 | 2 | Create an instance of `EazyDiner` class. 3 | 4 | ```python 5 | restaurants = EazyDiner(location="city-name") 6 | ``` 7 | 8 | | Methods | Details | 9 | | ------------------------- | ------------------------------------------------------------------------------------------------ | 10 | | `.get_restaurants()` | Returns the restaurants name, location, rating, cuisine and prices in JSON format. | 11 | | `.get_breakfast()` | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Breakfast. | 12 | | `.get_lunch()` | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Lunch. | 13 | | `.get_dinner()` | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Dinner. | 14 | | `.dinner_with_discount()` | Returns a list of restaurants from the entered location with a 50% offer. | 15 | | `.get_top10()` | Returns a list of the top 10 restaurants from a given city. | 16 | 17 | --- 18 | -------------------------------------------------------------------------------- /docs/modules/ebay.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import ebay 3 | ``` 4 | 5 | Create an instance of `EBAY` class 6 | 7 | ```python 8 | quora = ebay.eBay() 9 | ``` 10 | 11 | | Methods | Details | 12 | | ------------------- | ----------------------------------- | 13 | | `.spotlights()` | Returns spotlight deals on eBay. | 14 | | `.featured()` | Returns the featured deals on eBay. | 15 | | `.specific_deals()` | Returns the specific deals on eBay. | 16 | 17 | --- -------------------------------------------------------------------------------- /docs/modules/espn.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import espn 3 | ``` 4 | 5 | Create an instance of `ESPN` class 6 | 7 | ```python 8 | espn = espn.ESPN() 9 | ``` 10 | 11 | | Method | Details | 12 | | ------------------- | -------------------------------------------------------------- | 13 | | `get_scoreboard()` | Fetches and returns the football scoreboards for a given date. | 14 | | `get_tournaments()` | Fetches and returns information about football tournaments. | 15 | | `get_teams()` | Fetches and returns information about football teams. | 16 | 17 | --- 18 | -------------------------------------------------------------------------------- /docs/modules/flexjobs.md: -------------------------------------------------------------------------------- 1 | 2 | ```python 3 | flex_jobs = FlexJobs(search_query, location_query, min_jobs) 4 | ``` 5 | 6 | - Attributes 7 | 8 | | Attribute | Description | 9 | | ---------------- | ----------------------------------------------------------------- | 10 | | `search_query` | The search query to filter job listings. | 11 | | `location_query` | The location query to filter job listings (defaults to ''). | 12 | | `min_jobs` | The maximum number of job listings to retrieve (defaults to 100). | 13 | 14 | - Methods 15 | 16 | | Method | Description | 17 | | -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | 18 | | `get_jobs() -> list` | Retrieves job listings from FlexJobs website based on search and location queries. Returns a list of dictionaries containing job details. | 19 | | `scrape_job_info(job_listing) -> dict` | Extracts job details from a job listing HTML element. | 20 | 21 | --- -------------------------------------------------------------------------------- /docs/modules/flipkart.md: -------------------------------------------------------------------------------- 1 | 2 | ### Scrape details of products 3 | 4 | Create an instance of `Flipkart` class. 5 | 6 | ```python 7 | item = Flipkart() 8 | ``` 9 | 10 | | Methods | Details | 11 | | --------------------- | ------------------------------------------------------------------ | 12 | | `.TVs()` | Returns the list of TV sets on flipkart | 13 | | `.bestseller_books()` | Returns the list of bestselling books data listed on Flipkart. | 14 | | `.mobiles()` | Returns the list of mobile phones under 50K along with their data. | 15 | | `.sport_shoes()` | Returns the list of trendong sport shoes data. | 16 | | `.laptops()` | Returns the list of laptop from flipkart. | 17 | | `.camera()` | Returns the list of camera from flipkart. | 18 | | `.computer()` | Returns the list of computer from flipkart. | 19 | | `.tablets()` | Returns the list of tablets from flipkart. | 20 | | `.cycle()` | Returns the list of bicycles from flipkart. | 21 | | `.printers()` | Returns the list of printers from flipkart. | 22 | | `.monitor()` | Returns the list of monitors from flipkart. | 23 | | `.ac()` | Returns the list of acs from flipkart. | 24 | | `.refrigerator()` | Returns the list of refrigerators from flipkart. | 25 | | `.VRbox()` | Returns the list of VRbox from flipkart. | 26 | | `.Speakers()` | Returns the list of Speakers from flipkart. | 27 | 28 | --- -------------------------------------------------------------------------------- /docs/modules/flipkartclothing.md: -------------------------------------------------------------------------------- 1 | Create an instance of `FlipkartClothing` class. 2 | 3 | ```python 4 | cloth = flipkart.FlipkartClothing() 5 | ``` 6 | 7 | | Methods | Details | 8 | | -------------------------- | -------------------------------------------------------------- | 9 | | `.scrape()` | Returns the list of t-shirts with other relevant info. | 10 | | `.range()` | Returns the list of t-shirts between a particular price range. | 11 | | `.minrating()` | Returns the list of t-shirts having a minimum given rating. | 12 | | `.gendermale()` | Returns the list of t-shirts which are for males. | 13 | | `.genderfemale()` | Returns the list of t-shirts that are there for females. | 14 | | `.size()` | Returns the list of t-shirts having a particular size. | 15 | | `formal_shirts_for_male()` | It returns those t-shirts which are of a particular size. | 16 | 17 | --- -------------------------------------------------------------------------------- /docs/modules/flipkartlaptop.md: -------------------------------------------------------------------------------- 1 | Create an instance of `FlipkartLaptops` class. 2 | 3 | ```python 4 | item = flipkart.FlipkartLaptops() 5 | ``` 6 | 7 | | Methods | Details | 8 | | ------------ | ----------------------------------------- | 9 | | `.laptops()` | Returns the list of laptops with details. | 10 | 11 | --- 12 | -------------------------------------------------------------------------------- /docs/modules/flyrobu.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import flyrobu 3 | ``` 4 | 5 | Create an instance of `Flyrobu` class. 6 | 7 | ```python 8 | flyrobu = flyrobu.Flyrobu() 9 | ``` 10 | 11 | | Methods | Details | 12 | | ------------------------------------ | -------------------------------------------------------------------------------------------------------------- | 13 | | `.search(keyword)` | Returns the json data of all the details related to search by informing about the total amount of items found. | 14 | | `.get_product_details(product_name)` | Returns the json data of the product details based on the given `product_name`. | 15 | 16 | --- -------------------------------------------------------------------------------- /docs/modules/githubedu.md: -------------------------------------------------------------------------------- 1 | 2 | ```python 3 | from scrape_up import github_education 4 | ``` 5 | 6 | ### Scrape user details 7 | 8 | Create an instance of the `Events` class. 9 | 10 | ```py 11 | events = github_education.Events() 12 | ``` 13 | 14 | | Methods | Details | 15 | | --------------- | ------------------------------------------------------------------------------------------------------------------- | 16 | | `.get_events()` | Returns the latest events along with their title, image_url, description, date, location, language, tags, and link. | 17 | 18 | --- -------------------------------------------------------------------------------- /docs/modules/googlenews.md: -------------------------------------------------------------------------------- 1 | 2 | ### Scrape articles with title, descriptions, news source, date and link regarding a topic 3 | 4 | Create an instance of `GoogleNews` class. 5 | 6 | ```python 7 | articles = GoogleNews() 8 | ``` 9 | 10 | | Methods | Details | 11 | | ------------------------------ | ------------------------------------------------------------------------------------------------ | 12 | | `.getArticles(topic="github")` | Returns the list of articles with title, descriptions, news source, date and link in JSON format | 13 | | `.top_stories()` | Returns the list of top stories listed regarding the mentioned topic | 14 | | `.timed_aticles(time)` | Returns the list of top stories listed regarding the mentioned topic and within that time frame | 15 | | `.bylanguage(lang)` | Returns the list of top stories listed regarding the mentioned topic in the specified language | 16 | | `.bynumerofdaysback(number)` | Returns the list of stories listed by given number of days back from the current day | 17 | | `.bylocation(countryname)` | Returns the list of top stories listed of the specified country or geolocation | 18 | 19 | --- -------------------------------------------------------------------------------- /docs/modules/hackerrank.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import hackerrank 3 | ``` 4 | 5 | ### Scrape user details 6 | 7 | Create an object of class `User`. 8 | 9 | ```python 10 | hackerank = hackerrank.User() 11 | ``` 12 | 13 | | Methods | Details | 14 | | ---------------------------- | ----------------------------------------------------------------------------------------- | 15 | | `get_profile(id="username")` | Returns name, username, country, user_type, details, badges, verified_skills, social etc. | 16 | | `get_skills()` | Returns a list of verified skills and their links. | 17 | 18 | ### Scrape contest details 19 | 20 | Create an object of class `Contest`. 21 | 22 | ```python 23 | hackerank = hackerrank.Contest() 24 | ``` 25 | 26 | | Methods | Details | 27 | | --------------------- | -------------------------------------------------------------------- | 28 | | `active_contests()` | Returns information on active contests like title, status, and link. | 29 | | `archived_contests()` | Returns information regarding archived contests. | 30 | 31 | --- -------------------------------------------------------------------------------- /docs/modules/hashnode.md: -------------------------------------------------------------------------------- 1 | 2 | Create an instance of `Hashnode` class. 3 | 4 | ```python 5 | blogs = Hashnode() 6 | ``` 7 | 8 | | Methods | Details | 9 | | ----------------- | ----------------------------------------------------------------------------------------------------- | 10 | | `.get_feed()` | Returns the blogs with title, descriptions, author, read time, like and comment count, date and link | 11 | | `.get_featured()` | Returns the featured blogs with title, descriptions, author, like and comment count, date and link | 12 | | `.get_recent()` | Returns the recent blogs with title, descriptions, author, like and comment count, date and link | 13 | | `.search(topic)` | Returns the blogs with title, descriptions, author, like and comment count, date and link for a topic | 14 | 15 | --- 16 | -------------------------------------------------------------------------------- /docs/modules/healthgrade.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import healthgrades 3 | ``` 4 | 5 | Create an instance of `HealthGrades` class 6 | 7 | ```python 8 | hc = healthgrades.HealthGrades() 9 | ``` 10 | 11 | | Method | Details | 12 | | --------------------------- | -------------------------------------------------------------------- | 13 | | `get_best_hospitals(state)` | Fetches and returns information about the best hospitals in a state. | 14 | 15 | --- -------------------------------------------------------------------------------- /docs/modules/iccranking.md: -------------------------------------------------------------------------------- 1 | ```py 2 | from scrape_up import icc 3 | ``` 4 | 5 | Create an instance of `ICC` class. 6 | 7 | ```python 8 | scraper = icc.ICC() 9 | ``` 10 | 11 | | Method | Details | 12 | | ------------------------------------ | --------------------------------------------------------------------- | 13 | | `.team_rankings(format)` | Returns the list of rankings of teams of the desired format. | 14 | | `.player_ranking(type,format)` | Returns the list of player rankings of desired type and format. | 15 | | `.team_rankings_women(format)` | Returns the list of rankings of teams of the desired format. | 16 | | `.player_ranking_women(type,format)` | Returns the list of women player rankings of desired type and format. | 17 | 18 | --- 19 | -------------------------------------------------------------------------------- /docs/modules/imdb-actor.md: -------------------------------------------------------------------------------- 1 | Create an instance of `Actor` class. 2 | 3 | ```python 4 | actor = imdb.Actor(actor_name) 5 | ``` 6 | 7 | | Methods | Details | 8 | | ------------------- | -------------------------------------------------------- | 9 | | `.popular_movies()` | Returns the popular movies in which the actor has acted. | 10 | | `.all_movies()` | Returns all movies acted in, and upcoming movies. | 11 | | `.awards()` | Returns the number of awards and nominations. | 12 | -------------------------------------------------------------------------------- /docs/modules/imdb-boxoffice.md: -------------------------------------------------------------------------------- 1 | Create an instance of `BoxOffice` class. 2 | 3 | ```python 4 | boxoffice = imdb.BoxOffice() 5 | ``` 6 | 7 | | Methods | Details | 8 | | --------------- | ------------------------------------------------------------------------------- | 9 | | `.top_movies()` | Returns the top box office movies, weekend and total gross, and weeks released. | -------------------------------------------------------------------------------- /docs/modules/imdb-celeb.md: -------------------------------------------------------------------------------- 1 | Create an instance of `Celeb` class. 2 | 3 | ```python 4 | celeb = imdb.Celeb() 5 | ``` 6 | 7 | | Methods | Details | 8 | | --------------- | ------------------------------------------------------- | 9 | | `.top_celebs()` | Returns the name, roles, and famous movie of the celeb. | -------------------------------------------------------------------------------- /docs/modules/imdb-indian.md: -------------------------------------------------------------------------------- 1 | Create an instance of `IndianMovies` class. 2 | 3 | ```python 4 | indianmovies = imdb.IndianMovies() 5 | ``` 6 | 7 | | Methods | Details | 8 | | ---------------------- | ---------------------------------------------- | 9 | | `.top_indian_movies()` | Returns the current list of top Indian movies. | 10 | -------------------------------------------------------------------------------- /docs/modules/imdb-movies.md: -------------------------------------------------------------------------------- 1 | Create an instance of `Movie` class. 2 | 3 | ```python 4 | movie = imdb.Movie(movie_name) 5 | ``` 6 | 7 | | Methods | Details | 8 | | ---------------- | --------------------------------------------------------- | 9 | | `.rating()` | Returns the IMDB rating of the movie. | 10 | | `.description()` | Returns the description, cast, and director of the movie. | 11 | | `.more_movies()` | Returns similar movies recommended by IMDB. | 12 | | `.box_office()` | Returns budget, gross worldwide collections of the movie. | 13 | -------------------------------------------------------------------------------- /docs/modules/imdb.md: -------------------------------------------------------------------------------- 1 | Create an instance of the `IMDB` class. 2 | 3 | ```python 4 | scraper = IMDB() 5 | ``` 6 | 7 | | Methods | Details | 8 | | ----------------------------- | -------------------------------------------------------------- | 9 | | `.top_rated()` | Returns the top-rated movies listed on IMDB. | 10 | | `.scrape_genre_movies(genre)` | Returns the list of movies related to the genre you mentioned. | 11 | | `.top_rated_shows()` | Returns the top-rated shows listed on IMDB. | 12 | 13 | Create an instance of `Movie` class. 14 | 15 | ```python 16 | movie = Movie(movie_name) 17 | ``` 18 | 19 | | Methods | Details | 20 | | ---------------- | -------------------------------------------------------- | 21 | | `.rating()` | Returns the IMDB rating of the movie | 22 | | `.description()` | Returns the description, cast and director of the movie | 23 | | `.more_movies()` | Returns similar movies recommended by IMDB | 24 | | `.box_office()` | Returns budget, gross worldwide collections of the movie | 25 | 26 | Create an instance of `Actor` class. 27 | 28 | ```python 29 | actor = Actor(actor_name) 30 | ``` 31 | 32 | | Methods | Details | 33 | | ------------------- | ------------------------------------------------------- | 34 | | `.popular_movies()` | Returns the popular movies in which the actor has acted | 35 | | `.all_movies()` | Returns all movies acted in and upcoming movies | 36 | | `.awards()` | Returns the number of awards and nominations | 37 | 38 | Create an instance of `Celeb` class. 39 | 40 | ```python 41 | celeb = Celeb() 42 | ``` 43 | 44 | | Methods | Details | 45 | | --------------- | -------------------------------------------------- | 46 | | `.top_celebs()` | Returns the name, roles, famous movie of the celeb | 47 | 48 | Create an instance of `IndianMovies` class. 49 | 50 | ```python 51 | indianmovies = IndianMovies() 52 | ``` 53 | 54 | | Methods | Details | 55 | | ---------------------- | --------------------------------------------- | 56 | | `.top_indian_movies()` | Returns the current list of top Indian movies | 57 | 58 | Create an instance of `BoxOffice` class. 59 | 60 | ```python 61 | boxoffice = BoxOffice() 62 | ``` 63 | 64 | | Methods | Details | 65 | | --------------- | ----------------------------------------------------------------------------- | 66 | | `.top_movies()` | Returns the top box office movies, weekend and total gross and weeks released | 67 | 68 | --- 69 | 70 |

(Back to top)

71 | -------------------------------------------------------------------------------- /docs/modules/instagram.md: -------------------------------------------------------------------------------- 1 | ## Instagram 2 | 3 | ```python 4 | from scrape_up import instagram 5 | ``` 6 | 7 | ### Scrape User details 8 | 9 | First, create an object of the class `User` 10 | 11 | ```python 12 | user = instagram.User(username="nikhil25803") 13 | ``` 14 | 15 | | Methods | Details | 16 | | ----------------- | ------------------------------------------ | 17 | | `.user_details()` | Returns the number of followers of a user. | 18 | -------------------------------------------------------------------------------- /docs/modules/installation.md: -------------------------------------------------------------------------------- 1 | # How to use this package? 👀 2 | 3 | - Install the package from `pip` 4 | 5 | ```PowerShell 6 | pip install scrape-up 7 | ``` 8 | 9 | - Scrape the required information, for example, one wants to extract the number of followers of a user. 10 | -------------------------------------------------------------------------------- /docs/modules/kooapp.md: -------------------------------------------------------------------------------- 1 | 2 | ```py 3 | from scrape_up import kooapp 4 | ``` 5 | 6 | ### Scrap up the kooapp user's detail 7 | 8 | Create an instance of `KooUser` class. 9 | 10 | ```py 11 | user = kooapp.KooUser('krvishal') 12 | ``` 13 | 14 | | Methods | Details | 15 | | ------------------------ | ------------------------------------------------------------ | 16 | | `.get_name()` | Returns the name of the user. | 17 | | `.get_bio()` | Returns the bio of the user. | 18 | | `.get_avatar_url()` | Returns the URL of the first avatar of the user. | 19 | | `.followers()` | Returns the number of followers of a user. | 20 | | `.following()` | Returns the number of people the user is following. | 21 | | `.get_social_profiles()` | Returns all the connected social media profiles of the user. | 22 | | `.get_profession()` | Returns the title/profession of the user. | -------------------------------------------------------------------------------- /docs/modules/leetcode.md: -------------------------------------------------------------------------------- 1 | ```python 2 | from scrape_up import leetcode 3 | ``` 4 | 5 | ### Scrape user details 6 | 7 | First, create an object of class `LeetCodeScraper` 8 | 9 | ```python 10 | leetcode_scraper = LeetCodeScraper(username="nikhil25803") 11 | ``` 12 | 13 | **User Specific Methods - Require Username** 14 | 15 | | Methods | Details | 16 | | ----------------------------- | --------------------------------------------------------------------- | 17 | | `.scrape_rank()` | Used to scrape the rank of a user on LeetCode. | 18 | | `.scrape_rating()` | Used to scrape the rating of a user on LeetCode. | 19 | | `.get_problems_solved()` | Used to scrape total problems solved by a user on LeetCode. | 20 | | `.get_solved_by_difficulty()` | Used to scrape difficulty wise problems solved by a user on LeetCode. | 21 | | `.get_github_link()` | Used to scrape github link of a user on LeetCode. | 22 | | `.get_linkedin_link()` | Used to scrape linkedin link of a user on LeetCode. | 23 | | `.get_community_stats()` | Used to scrape community stats of a user on LeetCode. | 24 | 25 | **General Purpose Methods - Does not Require Username** 26 | 27 | | Methods | Details | 28 | | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | 29 | | `.get_problems(difficulty, tags_list, search_key)` | Used to scrape top problems of LeetCode based on filters. Difficulty is string from ("easy", "medium", "hard"). Tags_list is list of tags. Search_key is string to search. All ther parameters are optional. | 30 | | `.get_contests()` | Used to scrape the upcoming LeetCode Contests details. | 31 | | `.get_daily_challenge()` | Used to scrape LeetCode Daily Challenge details. | 32 | -------------------------------------------------------------------------------- /docs/modules/letterboxd.md: -------------------------------------------------------------------------------- 1 | Create an instance of `Letterboxd` class. 2 | 3 | ```python 4 | letterboxd_user = Letterboxd(user="arpy8") 5 | ``` 6 | 7 | | Methods | Details | 8 | | --------------------------- | ---------------------------------------------------------------------------------------------------- | 9 | | `.films_watched()` | Returns the numbers of films watched by the user. | 10 | | `.recent_activity(n)` | Returns a list of length `n` of the latest activity by the user. | 11 | | `.recent_reviews(n)` | Returns a list of dictionaries of length `n` with the latest reviews by the user. | 12 | | `.get_watchlist(n)` | Returns a list of length `n` including movies and series watchlisted by the user. | 13 | | `.get_followers_count()` | Returns the number of followers of the user. | 14 | | `.get_following_count()` | Returns the number of following of the user. | 15 | 16 | Note: `n` is an integer value which is optional and can be used to limit the number of results returned by the methods. 17 | 18 | --- -------------------------------------------------------------------------------- /docs/modules/luma.md: -------------------------------------------------------------------------------- 1 | 2 | Create an instance of `Luma` class. 3 | 4 | ```python 5 | events = Luma() 6 | ``` 7 | 8 | | Methods | Details | 9 | | --------------- | ------------------------------------------------------------------------------------------ | 10 | | `.get_events()` | Returns the latest events along with their organizer, location, image url, price and link. | 11 | 12 | --- 13 | -------------------------------------------------------------------------------- /docs/modules/medium.md: -------------------------------------------------------------------------------- 1 | ## Medium 2 | 3 | ```python 4 | from scrape_up import medium 5 | ``` 6 | 7 | ### Scrape user details 8 | 9 | First, create an object of class `User` 10 | 11 | ```python 12 | user = medium.Users(username="nikhil25803") 13 | ``` 14 | 15 | | Methods | Details | 16 | | ----------------- | ---------------------------------------- | 17 | | `.get_articles()` | Returns the article titles of the users. | 18 | 19 | ### Scrape trending articles 20 | 21 | | Methods | Details | 22 | | ----------------- | ------------------------------------------ | 23 | | `.get_trending()` | Returns the trending titles of the medium. | 24 | 25 | ### Scrape publication details 26 | 27 | First, create an object of class `Publication` 28 | 29 | ```python 30 | publication = medium.Publication(link="https://....") 31 | ``` 32 | 33 | | Methods | Details | 34 | | ----------------- | ---------------------------------------------------- | 35 | | `.get_articles()` | Returns a list of articles of the given publication. | 36 | 37 | --- -------------------------------------------------------------------------------- /docs/modules/reddit.md: -------------------------------------------------------------------------------- 1 | 2 | Create an instance of `Reddit` class. 3 | 4 | ```python 5 | posts = Reddit() 6 | ``` 7 | 8 | | Methods | Details | 9 | | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------ | 10 | | `.getFeed()` | Returns the posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link | 11 | | `.get_best()` | Returns the best posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link | 12 | | `.get_hot()` | Returns the hot posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link | 13 | | `.get_top()` | Returns the top posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link | 14 | | `.search(topic)` | Returns the top posts with title, subreddit, subreddit avatar, date, vote and comment count and link for a searched topic | 15 | 16 | --- -------------------------------------------------------------------------------- /docs/modules/spotify.md: -------------------------------------------------------------------------------- 1 | 2 | ### Scrape up songs 3 | 4 | Create an instance of `Spotify` class. 5 | 6 | ```python 7 | scraper = Spotify() 8 | ``` 9 | 10 | | Methods | Details | 11 | | ---------------------------- | --------------------------------------------------------- | 12 | | `.scrape_songs_by_keyword()` | Returns the list of songs that are related to the keyword | 13 | | `.scrape_homepage()` | Returns the list of playlists on the homepage | 14 | | `.close()` | To close the chrome tab that is showing results | 15 | 16 | --- 17 | -------------------------------------------------------------------------------- /docs/modules/stackoverflow.md: -------------------------------------------------------------------------------- 1 | reate an instance of `StackOverflow` class. 2 | 3 | ```python 4 | questions = StackOverflow("topic") 5 | ``` 6 | 7 | | Methods | Details | 8 | | --------------------------- | ---------------------------------------------------------------------------------------------------- | 9 | | `.getNewQuestions()` | Returns the new questions, views, votes, answer counts, and descriptions in JSON format | 10 | | `.getActiveQuestions()` | Returns the active questions, views, votes, answer counts, and descriptions in JSON format | 11 | | `.getUnansweredQuestions()` | Returns the unanswered questions, views, votes, answer counts, and descriptions in JSON format | 12 | | `.getBountiedQuestions()` | Returns the bountied questions, views, votes, answer counts, and descriptions in JSON format | 13 | | `.getFrequentQuestions()` | Returns the frequently asked questions, views, votes, answer counts, and descriptions in JSON format | 14 | | `.getHighScoredQuestions()` | Returns the most voted questions, views, votes, answer counts, and descriptions in JSON format | 15 | 16 | --- 17 | -------------------------------------------------------------------------------- /docs/modules/techcrunch.md: -------------------------------------------------------------------------------- 1 | 2 | ### Scrape articles with title, descriptions, images, author, date and link 3 | 4 | Create an instance of `TechCrunch` class. 5 | 6 | ```python 7 | articles = TechCrunch() 8 | ``` 9 | 10 | | Methods | Details | 11 | | ---------------- | ---------------------------------------------------------------------------------------------------------------------- | 12 | | `.getArticles()` | Returns the articles with title, descriptions, images, author, date and link regarding a category in JSON format | 13 | | `.search()` | Returns the searched articles with title, descriptions, images, author, date and link regarding a topic in JSON format | 14 | 15 | --- -------------------------------------------------------------------------------- /docs/modules/wikipedia.md: -------------------------------------------------------------------------------- 1 | ```python 2 | from scrape_up import Wikipedia 3 | ``` 4 | 5 | ## Scrape Wikipedia Details 6 | 7 | Create an object of the 'WikipediaScrapper' class: 8 | 9 | ```python 10 | Scraper = WikipediaScraper() 11 | ``` 12 | 13 | | Methods | Details | 14 | | ----------------- | ------------------------------------------------------- | 15 | | `.scrape(url)` | Returns the Scraped Data from Wikipedia | 16 | | `.get_featured()` | Returns the featured article for the day from Wikipedia | 17 | 18 | --- 19 | -------------------------------------------------------------------------------- /docs/modules/youtube.md: -------------------------------------------------------------------------------- 1 | 2 | ### Scrape Video Details 3 | 4 | Create an instance of `Video` class. 5 | 6 | ```python 7 | video = Video(video_url="video_url") 8 | ``` 9 | 10 | | Methods | Details | 11 | | --------------- | ------------------------- | 12 | | `.getDetails()` | Returns the video details | 13 | 14 | ## Scrape Channel Details 15 | 16 | Create an instance of `Channel` class. 17 | 18 | ```python 19 | channel_data = Channel(channel_username="BeABetterDev") 20 | ``` 21 | 22 | | Methods | Details | 23 | | ------------------ | ---------------------------------------------------------------------- | 24 | | `.getAbout()` | Returns the channel details mentioned in the about page of the channel | 25 | | `.getVideos()` | Returns all the video details in the videos page of the channel | 26 | | `.get_community()` | Returns all the post details in the community page of the channel | 27 | 28 | --- -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Scrape Up 2 | site_description: A web-scraping-based python package that enables you to scrape data from various platforms like GitHub, Twitter, Instagram, or any useful website. 3 | site_url: "" 4 | plugins: 5 | - search 6 | nav: 7 | - Scrape Up: index.md 8 | - Contribution: CONTRIBUTING.md 9 | - Installation: installation.md 10 | - Guide: home.md 11 | - Modules: 12 | - 'Academia': modules/academia.md 13 | - 'BBC News': modules/bbc.md 14 | - 'Codechef': modules/codechef.md 15 | - 'Coin Market Cap': modules/coinmarketcap.md 16 | - 'Covid-19': modules/covid-19.md 17 | - 'Crickbuzz': modules/crickbuzz.md 18 | - 'Dribbble': modules/dribbble.md 19 | - 'Ebay': modules/ebay.md 20 | - 'ESPN': modules/espn.md 21 | - 'Eazydiner': modules/eazydinner.md 22 | - 'Flipkart': modules/flipkart.md 23 | - 'Flipkart Clothing': modules/flipkartclothing.md 24 | - 'Flipkart laptops': modules/flipkartlaptop.md 25 | - 'Flyrobu': modules/flyrobu.md 26 | - 'GitHub': modules/github.md 27 | - 'Github Education': modules/githubedu.md 28 | - 'Gitlab': modules/gitlab.md 29 | - 'HackerEarth': modules/HackerEarth.md 30 | - 'Hackernews': modules/Hackernews.md 31 | - 'Hashnode': modules/hashnode.md 32 | - 'Health Grades': modules/healthgrade.md 33 | - 'ICC Rankings': modules/iccranking.md 34 | - 'IMDb': modules/imdb.md 35 | - 'IMDb Box Office': modules/imdb-boxoffice.md 36 | - 'IMDb Indian Movies': modules/imdb-indian.md 37 | - 'imdb-actor': modules/imdb-actor.md 38 | - 'imdb-celebrity': modules/imdb-celeb.md 39 | - 'imdb-movies': modules/imdb-movies.md 40 | # - 'Amazon': modules/modules/amazon.md 41 | # - 'Ask-Ubuntu': modules/ask-ubuntu.md 42 | # - 'Coursera': modules/coursera.md 43 | # - 'Devpost': modules/devpost.md 44 | # - 'Finance': modules/Finance.md 45 | # - 'Flex Jobs': modules/flexjobs.md 46 | # - 'Google News': modules/googlenews.md 47 | # - 'Instagram': modules/instagram.md 48 | # - 'Internshala': modules/internshala.md 49 | # - 'Kooapp': modules/kooapp.md 50 | # - 'Leet Code': modules/leetcode.md 51 | # - 'Luma': modules/luma.md 52 | # - 'Medium': modules/medium.md 53 | # - 'Reddit': modules/reddit.md 54 | # - 'Spotify': modules/spotify.md 55 | # - 'Stack Overflow': modules/stackoverflow.md 56 | # - 'Tech Crunch': modules/techcrunch.md 57 | # - 'Twitter': modules/Twitter.md 58 | # - 'Wikipedia': modules/wikipedia.md 59 | # - 'Youtube': modules/youtube.md- Code of Conduct: CODE_OF_CONDUCT.md 60 | 61 | theme: 62 | features: 63 | - header.autohide 64 | name: material 65 | palette: 66 | - media: '(prefers-color-scheme: dark)' 67 | scheme: default 68 | primary: teal 69 | accent: amber 70 | toggle: 71 | icon: material/lightbulb 72 | name: Switch to dark mode 73 | - media: '(prefers-color-scheme: dark)' 74 | scheme: slate 75 | primary: teal 76 | accent: amber 77 | toggle: 78 | icon: material/lightbulb-outline 79 | name: Switch to light mode 80 | repo_name: /scrape-up 81 | repo_url: https://github.com/Clueless-Community/scrape-up -------------------------------------------------------------------------------- /project_setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | pip install --upgrade setuptools wheel 6 | 7 | 8 | pip_version=$(pip --version | awk '{print $2}') 9 | required_version="24.0" 10 | 11 | version_greater_equal() { 12 | printf '%s\n%s' "$1" "$2" | sort -C -V 13 | } 14 | 15 | if ! version_greater_equal "$pip_version" "$required_version"; then 16 | echo "Upgrading pip from version $pip_version to $required_version" 17 | pip install --upgrade pip 18 | else 19 | echo "pip is already at version $pip_version, no need to upgrade." 20 | fi 21 | 22 | pip install -r requirements.txt 23 | 24 | pip install . 25 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.11.1 2 | bs4==0.0.1 3 | requests==2.28.2 4 | requests-html==0.10.0 5 | mkdocs==1.6.0 6 | mkdocs-material -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = scrape_up 3 | version = 1.1.7 4 | author = Clueless Community 5 | author_email = official.cluelesscommunity@gmail.com 6 | description = A web-scraping-based python package that enables you to scrape data from various platforms. 7 | long_description = file: documentation.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/Clueless-Community/scrape-up 10 | classifiers = 11 | Programming Language :: Python :: 3 12 | License :: OSI Approved :: MIT License 13 | Operating System :: OS Independent 14 | 15 | [options] 16 | package_dir = 17 | = src 18 | packages = find: 19 | python_requires = >=3.6 20 | install_requires = 21 | bs4 22 | requests 23 | requests-html 24 | beautifulsoup4 25 | 26 | [options.packages.find] 27 | where = src -------------------------------------------------------------------------------- /src/scrape_up/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clueless-Community/scrape-up/5a92d9b64d25329035c4afc0ef7f18e49d774997/src/scrape_up/__init__.py -------------------------------------------------------------------------------- /src/scrape_up/academia/__init__.py: -------------------------------------------------------------------------------- 1 | from .academia import Academia 2 | 3 | __all__ = ["Academia"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/amazon/__init__.py: -------------------------------------------------------------------------------- 1 | from .products import Product 2 | 3 | __all__ = ["Product"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/askubuntu/__init__.py: -------------------------------------------------------------------------------- 1 | from .questions import Questions 2 | 3 | __all__ = ["Questions"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/atcoder/__init__.py: -------------------------------------------------------------------------------- 1 | from .atcoder import Atcoder 2 | 3 | __all__ = ["Atcoder"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/banners/__init__.py: -------------------------------------------------------------------------------- 1 | from .scraper88x31 import Scraper88x31 2 | 3 | __all__ = ["Scraper88x31"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/banners/scraper88x31.py: -------------------------------------------------------------------------------- 1 | import bs4 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class Scraper88x31: 7 | """ 8 | Create an instance of the `Scraper88x31` class. 9 | ```python 10 | scraper = Scraper88x31() 11 | ``` 12 | | Methods | Details | 13 | | ------------------ | -------------------------------------------------------- | 14 | | `get_all()` | Returns the list of all available 88x31 banners | 15 | """ 16 | 17 | def __init__(self, *, config: RequestConfig = RequestConfig()): 18 | self.urls_to_scrape = [ 19 | "https://cyber.dabamos.de/88x31/index.html", 20 | "https://cyber.dabamos.de/88x31/index2.html", 21 | "https://cyber.dabamos.de/88x31/index3.html", 22 | "https://cyber.dabamos.de/88x31/index4.html", 23 | "https://cyber.dabamos.de/88x31/index5.html", 24 | ] 25 | self.config = config 26 | 27 | def get_all(self): 28 | """ 29 | Class: Scraper88x31 30 | Returns the list of all available 88x31 banners 31 | Example: 32 | ```python 33 | banners = Scraper88x31() 34 | result = banners.get_all() 35 | ``` 36 | 37 | Returns: 38 | ```json 39 | ["https://cyber.dabamos.de/88x31/000010.gif", "https://cyber.dabamos.de/88x31/007button.gif", "..."] 40 | ``` 41 | """ 42 | img_alt = [] 43 | for url in self.urls_to_scrape: 44 | try: 45 | response = get(url, self.config) 46 | response.raise_for_status() 47 | source = response.content 48 | soup = bs4.BeautifulSoup(source, "lxml") 49 | for img_tag in soup.find_all("img"): 50 | img_alt.append( 51 | "https://cyber.dabamos.de/88x31/" + img_tag.get("alt") + ".gif" 52 | ) 53 | return img_alt 54 | except: 55 | return None 56 | -------------------------------------------------------------------------------- /src/scrape_up/bayt/__init__.py: -------------------------------------------------------------------------------- 1 | from .bayt import Jobs 2 | 3 | __all__ = ["Jobs"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/bayt/bayt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import requests 3 | from bs4 import BeautifulSoup 4 | 5 | 6 | class Jobs: 7 | """ 8 | Create an instance of the class `Jobs` 9 | ```python 10 | scraper = Jobs() 11 | jobs_data = scraper.fetch_jobs(query, page) 12 | ``` 13 | | Methods | Details | 14 | | ----------------------------- | -------------------------------------------------------------------------- | 15 | | `.fetch_jobs(query, page)` | Fetch job listings data from Bayt.com based on the given query and page. | 16 | """ 17 | 18 | def __init__(self): 19 | self.base_url = "https://www.bayt.com" 20 | 21 | def fetch_jobs(self, query, page=1): 22 | """ 23 | Fetch job listings data from Bayt.com based on the given query and page. 24 | 25 | Parameters: 26 | - `query`: The job search query. 27 | - `page` : The page number of the search results (default: 1). 28 | 29 | Example: 30 | ```python 31 | scraper = Jobs() 32 | jobs_data = scraper.fetch_jobs("software developer", page=1) 33 | ``` 34 | """ 35 | try: 36 | url = f"{self.base_url}/en/international/jobs/{query}-jobs/?page={page}" 37 | response = requests.get(url) 38 | 39 | response.raise_for_status() 40 | 41 | soup = BeautifulSoup(response.text, "html.parser") 42 | job_listings = soup.find_all("li", class_="has-pointer-d") 43 | 44 | jobs = [] 45 | for job in job_listings: 46 | job_info = self.__extract_job_info(job) 47 | if job_info: 48 | jobs.append(job_info) 49 | sys.stdout.reconfigure(encoding="utf-8") 50 | return jobs 51 | except Exception: 52 | return None 53 | 54 | def __extract_job_info(self, job): 55 | """ 56 | Extract job information from a single job listing. 57 | """ 58 | job_general_information = job.find("h2", class_="jb-title") 59 | if not job_general_information: 60 | return None 61 | 62 | job_title = self.__extract_job_title(job_general_information) 63 | job_url = self.__extract_job_url(job_general_information) 64 | company_name = self.__extract_company_name(job) 65 | job_location = self.__extract_job_location(job) 66 | 67 | return { 68 | "title": job_title, 69 | "company": company_name, 70 | "location": job_location, 71 | "url": job_url, 72 | } 73 | 74 | def __extract_job_title(self, job_general_information): 75 | return job_general_information.text.strip() 76 | 77 | def __extract_job_url(self, job_general_information): 78 | return self.base_url + job_general_information.a["href"].strip() 79 | 80 | def __extract_company_name(self, job): 81 | company_name = job.find("b", class_="jb-company") 82 | if company_name: 83 | return company_name.text.strip() 84 | return None 85 | 86 | def __extract_job_location(self, job): 87 | job_location = job.find("span", class_="jb-loc") 88 | if job_location: 89 | return job_location.text.strip() 90 | return None 91 | -------------------------------------------------------------------------------- /src/scrape_up/bbcnews/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbcnews import BBCNews 2 | 3 | __all__ = ["BBCNews"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/bbcnews/bbcnews.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class BBCNews: 7 | """ 8 | First create an object of class `BBCNews`\n 9 | ```python 10 | scraper = BBCNews() 11 | ``` 12 | | Methods | Details | 13 | | ------------------ | -------------------------------------------------------- | 14 | | `.get_headlines()` | Returns the list of object containig the headlines | 15 | | `get_article()` | Returns an object with proper details about the articles | 16 | """ 17 | 18 | def __init__(self, *, config: RequestConfig = RequestConfig()): 19 | self.base_url = "https://www.bbc.co.uk" 20 | self.headlines_url = self.base_url + "/news" 21 | headers = { 22 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" 23 | } 24 | self.config = config 25 | if self.config.headers == {}: 26 | self.config.set_headers(headers) 27 | 28 | def get_headlines(self): 29 | """ 30 | Fetches the latest headlines from BBC News website.\n 31 | Returns: 32 | A list of dictionaries, each containing the index and headline text. 33 | Example: [{'index': 1, 'headline': 'Headline 1'}, {'index': 2, 'headline': 'Headline 2'}, ...] 34 | """ 35 | try: 36 | response = get(self.headlines_url, self.config) 37 | response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx) 38 | except: 39 | return None 40 | 41 | soup = BeautifulSoup(response.content, "html.parser") 42 | headlines = soup.find_all("h3", class_="gs-c-promo-heading__title") 43 | news_set = set() 44 | news_list = [] 45 | index = 1 46 | 47 | for headline in headlines: 48 | news_text = headline.get_text(strip=True) 49 | if news_text not in news_set: 50 | news_set.add(news_text) 51 | news_list.append({"index": index, "headline": news_text}) 52 | index += 1 53 | 54 | return news_list 55 | 56 | def get_article(self, url: str): 57 | """ 58 | Create an instance of the class - `BBCNews`\n 59 | ```python 60 | scraper = BBCNews() 61 | article = scraper.get_article() 62 | print(article) 63 | ``` 64 | """ 65 | try: 66 | response = get(url, self.config).text 67 | soup = BeautifulSoup(response, "lxml") 68 | 69 | main_heading = soup.find("h1", {"id": "main-heading"}).text.strip() 70 | time = soup.find("time").text.strip() 71 | text_content = soup.find_all("div", {"data-component": "text-block"}) 72 | Text = "" 73 | for text in text_content: 74 | Text += text.text.strip() + " " 75 | data = {"main_heading": main_heading, "time": time, "text": Text} 76 | return data 77 | except: 78 | return None 79 | -------------------------------------------------------------------------------- /src/scrape_up/cars/__init__.py: -------------------------------------------------------------------------------- 1 | from .cars import Cars 2 | 3 | __all__ = ["Cars"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/cars/cars.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import json 4 | 5 | 6 | class Cars: 7 | """ 8 | Create an instance of the class `Cars` 9 | ```python 10 | scraper = Cars() 11 | cars_data = scraper.fetch_cars(model, page) 12 | ``` 13 | | Methods | Details | 14 | | ---------------------------| ---------------------------------------------------- | 15 | | `.fetch_cars(model, page)` | Fetch car listings data based on the model and page. | 16 | """ 17 | 18 | def __init__(self): 19 | self.base_url = "https://www.cars.com" 20 | 21 | def fetch_cars(self, model, page=1): 22 | """ 23 | Fetch car listings data based on the model, and page. 24 | 25 | Parameters: 26 | - `model`: The model of the car. 27 | - `page` : The page number of the search results (default: 1). 28 | 29 | Example: 30 | ```python 31 | scraper = Cars() 32 | cars_data = scraper.fetch_cars("Toyota", page=1) 33 | ``` 34 | """ 35 | try: 36 | url = f"{self.base_url}/shopping/results/?&keyword={model}&page={page}" 37 | response = requests.get(url) 38 | response.raise_for_status() 39 | soup = BeautifulSoup(response.text, "html.parser") 40 | car_listings = soup.find_all("div", class_="vehicle-card") 41 | 42 | cars = [] 43 | for car in car_listings: 44 | car_info = self.__extract_car_info(car) 45 | if car_info: 46 | cars.append(car_info) 47 | return cars 48 | except Exception: 49 | return None 50 | 51 | def __extract_car_info(self, car): 52 | """ 53 | Extract car information from a single car listing. 54 | """ 55 | 56 | car_model = self.__extract_car_model(car) 57 | car_url = self.__extract_car_url(car) 58 | dealer_name = self.__extract_dealer_name(car) 59 | car_price = self.__extract_car_price(car) 60 | car_discount = self.__extract_car_discount(car) 61 | 62 | return { 63 | "model": car_model, 64 | "dealer": dealer_name, 65 | "price": car_price, 66 | "discount": car_discount, 67 | "url": car_url, 68 | } 69 | 70 | def __extract_car_model(self, car): 71 | return car.find("h2", class_="title").text.strip() 72 | 73 | def __extract_car_url(self, car): 74 | return self.base_url + car.find("a")["href"] 75 | 76 | def __extract_car_price(self, car): 77 | car_price = car.find("span", class_="primary-price").text.strip() 78 | if car_price == "Not Priced": 79 | return None 80 | return car_price 81 | 82 | def __extract_dealer_name(self, car): 83 | dealer_name = car.find("div", class_="dealer-name") 84 | return dealer_name.text.strip() if dealer_name else None 85 | 86 | def __extract_car_discount(self, car): 87 | car_discount = car.find("span", class_="price-drop") 88 | return car_discount.text.strip() if car_discount else None 89 | -------------------------------------------------------------------------------- /src/scrape_up/codechef/__init__.py: -------------------------------------------------------------------------------- 1 | from .codechef import User 2 | 3 | __all__ = ["User"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/codeforces/__init__.py: -------------------------------------------------------------------------------- 1 | from .user import Users 2 | from .contests import Contest 3 | 4 | __all__ = ["Users", "Contest"] 5 | -------------------------------------------------------------------------------- /src/scrape_up/codewars/__init__.py: -------------------------------------------------------------------------------- 1 | from .codewars import Codewars 2 | 3 | __all__ = ["Codewars"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/codewars/codewars.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import json 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class Codewars: 7 | """ 8 | Create an instance of the class `GeeksforGeeks` 9 | ```py 10 | cwars = Codewars(user="agastya463") 11 | cwars.get_profile() 12 | ``` 13 | 14 | | Methods | Details | 15 | | ----------------- | ---------------------------------------------------------------------------------- | 16 | | `.get_profile()` | Returns the user data in json format. | 17 | 18 | 19 | Response: 20 | ```js 21 | { 22 | "Name": "Agastya Kumar Yadav", 23 | "Clan": "Unknown", 24 | "Member Since": "May 2024", 25 | "Last Seen": "May 2024", 26 | "Profiles": "", 27 | "Following": "0", 28 | "Followers": "0", 29 | "Allies": "0", 30 | "Rank": "8 kyu", 31 | "Honor": "3", 32 | "Total Completed Kata": "1", 33 | "Total Languages Trained": "1", 34 | "Highest Trained": "C++ (8 kyu)", 35 | "Most Recent": "C++", 36 | "Comments": "0 (0 replies)", 37 | "Collections": "0", 38 | "Kumite": "0", 39 | "Translations": "0 (0 approved)" 40 | } 41 | ``` 42 | """ 43 | 44 | def __init__(self, user: str, *, config: RequestConfig = RequestConfig()): 45 | self.user = user 46 | headers = {"User-Agent": "scrapeup"} 47 | self.config = config 48 | if self.config.headers == {}: 49 | self.config.set_headers(headers) 50 | 51 | def get_profile(self): 52 | try: 53 | url = f"https://www.codewars.com/users/{self.user}" 54 | response = get(url, self.config) 55 | soup = BeautifulSoup(response.text, "html.parser") 56 | d = soup.find_all("div", class_="stat") 57 | data = {} 58 | for i in d: 59 | k = i.text.split(":") 60 | data[k[0]] = k[1] 61 | return json.dumps(data) 62 | except Exception: 63 | return None 64 | -------------------------------------------------------------------------------- /src/scrape_up/coinmarketcap/__init__.py: -------------------------------------------------------------------------------- 1 | from .crypto import Crypto 2 | 3 | __all__ = ["Crypto"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .request_config import RequestConfig, get 2 | -------------------------------------------------------------------------------- /src/scrape_up/config/request_config.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Union 2 | import requests 3 | 4 | 5 | class RequestConfig: 6 | """ 7 | A class used to configure requests. 8 | 9 | Args 10 | ---- 11 | timeout: int 12 | The timeout in seconds. 13 | redirect: bool 14 | Whether to follow redirects. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | timeout: Union[int, None] = None, 20 | redirect: bool = False, 21 | headers: Dict[str, str] = {}, 22 | proxies: Dict[str, str] = {}, 23 | ): 24 | self._timeout = timeout 25 | self._redirect = redirect 26 | self._headers = headers 27 | self._proxies = proxies 28 | 29 | def set_timeout(self, timeout: int): 30 | self._timeout = timeout 31 | 32 | def set_redirect(self, redirect: bool): 33 | self._redirect = redirect 34 | 35 | def set_headers(self, headers: Dict[str, str]): 36 | self._headers = headers 37 | 38 | def set_proxies(self, proxies: Dict[str, str]): 39 | self._proxies = proxies 40 | 41 | @property 42 | def timeout(self): 43 | return self._timeout 44 | 45 | @property 46 | def redirect(self): 47 | return self._redirect 48 | 49 | @property 50 | def headers(self): 51 | return self._headers 52 | 53 | @property 54 | def proxies(self): 55 | return self._proxies 56 | 57 | 58 | def get(url: str, config: RequestConfig): 59 | r = requests.get( 60 | url=url, 61 | headers=config.headers, 62 | timeout=config.timeout, 63 | allow_redirects=config.redirect, 64 | proxies=config.proxies, 65 | ) 66 | return r 67 | -------------------------------------------------------------------------------- /src/scrape_up/coursera/__init__.py: -------------------------------------------------------------------------------- 1 | from .courses import Coursera 2 | 3 | __all__ = ["Coursera"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/covidinfo/__init__.py: -------------------------------------------------------------------------------- 1 | from .covidinfo import CovidInfo 2 | 3 | __all__ = {"CovidInfo"} 4 | -------------------------------------------------------------------------------- /src/scrape_up/cricbuzz/__init__.py: -------------------------------------------------------------------------------- 1 | from .cricbuzz import Cricbuzz 2 | 3 | __all__ = ["Cricbuzz"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/devcommunity/__init__.py: -------------------------------------------------------------------------------- 1 | from .articles import DevCommunity 2 | 3 | __all__ = ["DevCommunity"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/devpost/__init__.py: -------------------------------------------------------------------------------- 1 | from .devpost import Devpost 2 | 3 | __all__ = ["Devpost"] -------------------------------------------------------------------------------- /src/scrape_up/dictionary/__init__.py: -------------------------------------------------------------------------------- 1 | from .wordoftheday import Dictionary 2 | 3 | __all__ = ["Dictionary"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/dictionary/wordoftheday.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from urllib.request import urlopen 3 | 4 | from scrape_up.config.request_config import RequestConfig, get 5 | 6 | 7 | class Dictionary: 8 | """ 9 | Create an instance of the `Dictionary` class. 10 | ```python 11 | scraper = Dictionary() 12 | ``` 13 | | Methods | Details | 14 | | -------------- | -------------------------------------------- | 15 | | `.get_word_of_the_day()` | Returns word of the day from Dictionary.com. | 16 | | `.word_of_the_day_definition()` | Returns the definition of the word of the day. | 17 | """ 18 | 19 | def __init__(self, *, config: RequestConfig = RequestConfig()) -> None: 20 | self.config = config 21 | 22 | def __get_word_of_the_day_url(self): 23 | try: 24 | response = get("https://www.dictionary.com/", self.config) 25 | soup = BeautifulSoup(response.text, "html.parser") 26 | 27 | for anchor in soup("button"): 28 | url = anchor.get("data-linkurl", "/") 29 | 30 | if "word-of-the-day" in url: 31 | return url 32 | 33 | except: 34 | return None 35 | 36 | def __word_of_the_day_definition(self): 37 | try: 38 | response = get(self.__get_word_of_the_day_url(), self.config) 39 | soup = BeautifulSoup(response.text, "html.parser") 40 | 41 | for para in soup("p"): 42 | if para.string and para.string[0] not in "EG": 43 | return para.string 44 | except: 45 | return None 46 | 47 | def get_word_of_the_day(self): 48 | """ 49 | Returns a string containing the word of the day. 50 | 51 | ```python 52 | scraper = Dictionary() 53 | print(scraper.get_word_of_the_day()) 54 | ``` 55 | 56 | Sample output: 57 | >> unfalsifiable 58 | """ 59 | response = {} 60 | try: 61 | response["word"] = ( 62 | self.__get_word_of_the_day_url().split("/")[-2].split("-")[0] 63 | ) 64 | response["meaning"] = self.__word_of_the_day_definition() 65 | return response 66 | except: 67 | return None 68 | -------------------------------------------------------------------------------- /src/scrape_up/dribbble/__init__.py: -------------------------------------------------------------------------------- 1 | from .dribbble import Dribbble 2 | 3 | __all__ = ["Dribbble"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/eazydiner/__init__.py: -------------------------------------------------------------------------------- 1 | from .eazydiner import EazyDiner 2 | 3 | __all__ = ["EazyDiner"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/ebay/__init__.py: -------------------------------------------------------------------------------- 1 | from .ebay import EBAY 2 | 3 | __all__ = ["EBAY"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/espn/__init__.py: -------------------------------------------------------------------------------- 1 | from .espnmodule import ESPN 2 | 3 | __all__ = ["ESPN"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/espncricinfo/__init__.py: -------------------------------------------------------------------------------- 1 | from .espncricinfo import Espncricinfo 2 | 3 | __all__ = ["Espncricinfo"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/fide/__init__.py: -------------------------------------------------------------------------------- 1 | from .fide import FIDE 2 | 3 | __all__ = ["FIDE"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/flexjobs/__init__.py: -------------------------------------------------------------------------------- 1 | from .flexjobs import FlexJobs 2 | 3 | __all__ = ["FlexJobs"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/flipkart/__init__.py: -------------------------------------------------------------------------------- 1 | from .flipkart_clothing import FlipkartClothing 2 | from .flipkart_file import Flipkart 3 | from .flipkart_laptop import FlipkartLaptops 4 | 5 | __all__ = ["FlipkartClothing", "Flipkart", "FlipkartLaptops"] 6 | -------------------------------------------------------------------------------- /src/scrape_up/flyrobu/__init__.py: -------------------------------------------------------------------------------- 1 | from .flyrobu import Flyrobu 2 | 3 | __all__ = ["Flyrobu"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/geeksforgeeks/__init__.py: -------------------------------------------------------------------------------- 1 | from .geeksforgeeks import Geeksforgeeks 2 | 3 | __all__ = ["Geeksforgeeks"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/github/__init__.py: -------------------------------------------------------------------------------- 1 | from .users import Users 2 | from .repository import Repository 3 | from .issue import Issue 4 | from .organization import Organization 5 | from .pull_request import PullRequest 6 | 7 | 8 | __all__ = ["Users", "Repository", "Issue", "Organization", "PullRequest"] 9 | -------------------------------------------------------------------------------- /src/scrape_up/github_education/__init__.py: -------------------------------------------------------------------------------- 1 | from .events import Events 2 | 3 | __all__ = ["Events"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/hackerearth/__init__.py: -------------------------------------------------------------------------------- 1 | from .challenges import Challenges 2 | 3 | __all__ = ["Challenges"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/hackernews/__init__.py: -------------------------------------------------------------------------------- 1 | from .articles import Articles 2 | 3 | __all__ = ["Articles"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/hackerrank/__init__.py: -------------------------------------------------------------------------------- 1 | from .user import User 2 | from .contest import Contest 3 | 4 | __all__ = ["HackerRank", "Contest"] 5 | -------------------------------------------------------------------------------- /src/scrape_up/hashnode/__init__.py: -------------------------------------------------------------------------------- 1 | from .hashnode import Hashnode 2 | 3 | __all__ = ["Hashnode"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/healthgrades/__init__.py: -------------------------------------------------------------------------------- 1 | from .healthgradesmodule import HealthGrades 2 | 3 | __all__ = ["HealthGrades"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/healthgrades/healthgradesmodule.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class HealthGrades: 7 | """ 8 | Create an instance of `HealthGrades` class 9 | 10 | ```python 11 | hc = HealthGrades() 12 | ``` 13 | 14 | | Method | Details | 15 | | --------------------------- | -------------------------------------------------------------------- | 16 | | `get_best_hospitals(state)` | Fetches and returns information about the best hospitals in a state. | 17 | 18 | """ 19 | 20 | def __init__(self, *, config: RequestConfig = RequestConfig()): 21 | headers = { 22 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" 23 | } 24 | self.config = config 25 | if self.config.headers == {}: 26 | self.config.set_headers(headers) 27 | 28 | def get_best_hospitals(self, state): 29 | """ 30 | Fetches and returns information about the best hospitals in a state.\n 31 | ```python 32 | hc = HealthGrades() 33 | hc.get_best_hospitals(state="bihar") 34 | ``` 35 | 36 | Example output: 37 | ```python 38 | [ 39 | { 40 | "Name": "ABC Hospital", 41 | "Location": "123 Main St, Philadelphia, PA", 42 | "Link": "https://www.healthgrades.com/hospital/abc-hospital", 43 | "Awards": ["America's 100 Best Hospitals", "Patient Safety Excellence Award"] 44 | }, 45 | ... 46 | ] 47 | ``` 48 | """ 49 | try: 50 | state = state.replace(" ", "-") 51 | url = ( 52 | f"https://www.healthgrades.com/quality/americas-best-hospitals/{state}" 53 | ) 54 | html_text = get(url, self.config).text 55 | soup = BeautifulSoup(html_text, "lxml") 56 | 57 | hospitals = [] 58 | container = soup.find("ul", {"class": "quality-results-group"}) 59 | 60 | for items in container.find_all("div", {"class": "quality-card"}): 61 | award = [] 62 | title = items.find("h3") 63 | location = items.find("div", {"class": "location-info"}) 64 | link = ( 65 | "https://www.healthgrades.com" 66 | + items.find("div", {"class": "hospital-info__hospital-link"}).find( 67 | "a", href=True 68 | )["href"] 69 | ) 70 | awards = items.find("ul", {"class": "awards-list__quality-award"}) 71 | for item in awards.find_all("li"): 72 | award.append(item.text) 73 | data = { 74 | "Name": title.text, 75 | "Location": location.text, 76 | "Link": link, 77 | "Awards": award[:-2], 78 | } 79 | hospitals.append(data) 80 | return hospitals 81 | except: 82 | return None 83 | -------------------------------------------------------------------------------- /src/scrape_up/icc/__init__.py: -------------------------------------------------------------------------------- 1 | from .icc_rankings import ICC 2 | 3 | __all__ = ["ICC"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/imdb/__init__.py: -------------------------------------------------------------------------------- 1 | from .actor import Actor 2 | from .box_office import BoxOffice 3 | from .celeb import Celeb 4 | from .imdb import IMDB 5 | from .indian_movies import IndianMovies 6 | from .movie import Movie 7 | 8 | 9 | __all__ = ["Actor", "BoxOffice", "Celeb", "IMDB", "IndianMovies", "Movie"] 10 | -------------------------------------------------------------------------------- /src/scrape_up/imdb/box_office.py: -------------------------------------------------------------------------------- 1 | from urllib.request import Request, urlopen 2 | from bs4 import BeautifulSoup as soup 3 | 4 | 5 | class BoxOffice: 6 | """ 7 | Create an instance of `BoxOffice` class. 8 | ```python 9 | boxoffice = BoxOffice() 10 | ``` 11 | | Methods | Details | 12 | | -------------------|-------------------------------------------------------------------------------| 13 | | `.top_movies()` | Returns the top box office movies, weekend and total gross and weeks released | 14 | 15 | """ 16 | 17 | def __init__(self): 18 | self.__scrape_page() 19 | 20 | def __scrape_page(self): 21 | try: 22 | url = "https://www.imdb.com/chart/boxoffice/?ref_=hm_cht_sm" 23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) 24 | 25 | webpage = urlopen(req).read() 26 | self.page_soup = soup(webpage, "html.parser") 27 | 28 | except: 29 | return None 30 | 31 | def top_movies(self): 32 | """ 33 | Create an instance of `BoxOffice` class 34 | 35 | ```python 36 | boxoffice = BoxOffice() 37 | boxoffice.top_movies() 38 | ``` 39 | 40 | Return\n 41 | ```js 42 | [ 43 | { 44 | "Movie Name": "Barbie", 45 | "Weekend Gross": "$53M", 46 | "Total Gross": "$459M", 47 | "Weeks released": "3" 48 | }, 49 | ... 50 | ] 51 | 52 | ``` 53 | """ 54 | try: 55 | x = self.page_soup.find_all("h3", {"class": "ipc-title__text"}) 56 | x = x[1:11] 57 | movie_names = [] 58 | 59 | for y in x: 60 | movie_names.append(" ".join(y.get_text().split()[1:])) 61 | 62 | x = self.page_soup.find_all("li", {"class": "sc-ee64acb1-1 lkUVhM"}) 63 | x = [y.get_text() for y in x] 64 | 65 | lis = [] 66 | 67 | for y in range(0, len(x), 3): 68 | dic = {} 69 | dic["Movie Name"] = movie_names[y // 3] 70 | dic["Weekend Gross"] = x[y].split()[2] 71 | dic["Total Gross"] = x[y + 1].split()[2] 72 | dic["Weeks released"] = x[y + 2].split()[2] 73 | lis.append(dic) 74 | 75 | return lis 76 | 77 | except: 78 | return None 79 | -------------------------------------------------------------------------------- /src/scrape_up/imdb/celeb.py: -------------------------------------------------------------------------------- 1 | from urllib.request import Request, urlopen 2 | from bs4 import BeautifulSoup as soup 3 | 4 | 5 | class Celeb: 6 | """ 7 | Create an instance of `Celeb` class. 8 | ```python 9 | celeb = Celeb() 10 | ``` 11 | | Methods | Details | 12 | | -------------------|----------------------------------------------------| 13 | | `.top_celebs()` | Returns the name, roles, famous movie of the celeb | 14 | 15 | """ 16 | 17 | def __init__(self): 18 | self.__scrape_page() 19 | 20 | def __scrape_page(self): 21 | try: 22 | url = "https://www.imdb.com/chart/starmeter/?ref_=chtbo_ql_8" 23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) 24 | 25 | webpage = urlopen(req).read() 26 | self.page_soup = soup(webpage, "html.parser") 27 | 28 | except: 29 | return None 30 | 31 | def top_celebs(self): 32 | """ 33 | Create an instance of `Celeb` class.\n 34 | ```python 35 | celeb = Celeb() 36 | celeb.top_celebs() 37 | ``` 38 | Return\n 39 | ```js 40 | [ 41 | { 42 | 'Name': 'Paul Reubens', 43 | 'Roles': ['Actor', 'Writer', 'Director'], 44 | 'Famous Movie': "Pee-wee's Playhouse" 45 | }, 46 | ... 47 | ] 48 | ``` 49 | """ 50 | try: 51 | x = self.page_soup.find_all("div", {"class": "sc-89c756a0-4 euZqVD"}) 52 | celeb_list = [] 53 | for y in x: 54 | dic = {} 55 | dic["Name"] = y.find("h3", {"class": "ipc-title__text"}).get_text() 56 | 57 | lis = [] 58 | for z in y.find_all( 59 | "li", {"class": "ipc-inline-list__item sc-89c756a0-6 jpNWoI"} 60 | ): 61 | lis.append(z.get_text()) 62 | 63 | dic["Roles"] = lis 64 | 65 | dic["Famous Movie"] = y.find( 66 | "span", {"class": "sc-1c8554ae-1 cKAFFg"} 67 | ).get_text() 68 | 69 | celeb_list.append(dic) 70 | 71 | return celeb_list 72 | 73 | except: 74 | return None 75 | -------------------------------------------------------------------------------- /src/scrape_up/imdb/indian_movies.py: -------------------------------------------------------------------------------- 1 | from urllib.request import Request, urlopen 2 | from bs4 import BeautifulSoup as soup 3 | 4 | 5 | class IndianMovies: 6 | """ 7 | Create an instance of `IndianMovies` class. 8 | ```python 9 | indianmovies = IndianMovies() 10 | ``` 11 | | Methods | Details | 12 | | -----------------------|-----------------------------------------------| 13 | | `.top_indian_movies()` | Returns the current list of top Indian movies | 14 | 15 | """ 16 | 17 | def __init__(self): 18 | self.__scrape_page() 19 | 20 | def __scrape_page(self): 21 | try: 22 | url = "https://www.imdb.com/india/top-rated-indian-movies/?ref_=fea_eds_center-1_india_tr_india250_cta" 23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) 24 | 25 | webpage = urlopen(req).read() 26 | self.page_soup = soup(webpage, "html.parser") 27 | 28 | except: 29 | return None 30 | 31 | def top_movies(self): 32 | """ 33 | Create an instance of `IndianMovies` class. 34 | ```python 35 | indianmovies = IndianMovies() 36 | movies = indianmovies.top_movies() 37 | ``` 38 | 39 | Return\n 40 | ```js 41 | [ 42 | 'Ramayana: The Legend of Prince Rama', 43 | 'Rocketry: The Nambi Effect', 44 | 'Nayakan', 45 | 'Gol Maal', 46 | 'Anbe Sivam', 47 | ... 48 | ] 49 | ``` 50 | """ 51 | try: 52 | x = self.page_soup.find_all("span", {"data-testid": "rank-list-item-title"}) 53 | 54 | lis = [] 55 | for i in range(len(x)): 56 | lis.append(x[i].get_text()[len(str(i)) :]) 57 | 58 | return lis 59 | 60 | except: 61 | return None 62 | -------------------------------------------------------------------------------- /src/scrape_up/indiantrekking/__init__.py: -------------------------------------------------------------------------------- 1 | from .trek import Indiantrekking 2 | 3 | __all__ = ["Indiantrekking"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/indiantrekking/trek.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import re 3 | import requests 4 | 5 | 6 | class Indiantrekking: 7 | """ 8 | A class to scrape data from Indian trekking 9 | 10 | Create an instance of `Indiantrekking` class 11 | 12 | ```python 13 | trek=Indiantrekking("hidden-lakes-of-kashmir") 14 | ``` 15 | 16 | | Method | Details | 17 | | --------------------------- | -------------------------------------------------------------------- | 18 | |`destination()` | return name of the place. | 19 | |'trip_fact()' | returns the trip duration, destination, altitude and the season good for trekking | 20 | |'outline_day_to_day_itinerary' | returns the ouline of the day to day itinerary | 21 | --- 22 | """ 23 | 24 | def __init__(self, place): 25 | self.place = place 26 | try: 27 | url = f"https://www.indiantrekking.com/{self.place}.html" 28 | response = requests.get(url, headers={"User-Agent": "XY"}) 29 | self.soup = BeautifulSoup(response.content, "lxml") 30 | except: 31 | return None 32 | 33 | def destination_name(self): 34 | try: 35 | place = self.soup.find("div", class_="main-title").text 36 | return place 37 | except: 38 | return None 39 | 40 | def trip_fact(self): 41 | try: 42 | trip_duration = self.soup.findAll("div", class_="inner-wrap")[0].b.text 43 | trip_destination = self.soup.findAll("div", class_="inner-wrap")[1].b.text 44 | trip_season = self.soup.findAll("div", class_="inner-wrap")[3].b.text 45 | trip_altitude = self.soup.findAll("div", class_="inner-wrap")[4].b.text 46 | 47 | tripfact = { 48 | "trip_duration": re.sub(" +", " ", trip_duration.strip()), 49 | "trip_destination": re.sub(" +", " ", trip_destination.strip()), 50 | "trip_season": re.sub(" +", " ", trip_season.strip()), 51 | "trip_altitude": re.sub(" +", " ", trip_altitude.strip()), 52 | } 53 | return tripfact 54 | except: 55 | return None 56 | 57 | def outline_day_to_day_itinerary(self): 58 | try: 59 | outline = self.soup.find("div", class_="itinerary").text 60 | return outline 61 | except: 62 | return None 63 | -------------------------------------------------------------------------------- /src/scrape_up/indiatodayweather/__init__.py: -------------------------------------------------------------------------------- 1 | from weather import Indiatodayweather 2 | 3 | __all__ = ["Indiatodayweather"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/indiatodayweather/weather.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import datetime as dt 3 | from bs4 import BeautifulSoup 4 | 5 | 6 | class Indiatodayweather: 7 | """ 8 | A class to scrape weather data from Indian today 9 | 10 | Create an instance of `Indiatodayweather` class 11 | 12 | ```python 13 | weather=Indiatodayweather("Mumbai") 14 | ``` 15 | 16 | | Method | Details | 17 | | --------------------------- | ------------------------------------------------------------------------ | 18 | |`info_about_weather()` | return the temperature, wind speed, description(windy, cloudy, clear) and humidity of the place. | | 19 | 20 | --- 21 | """ 22 | 23 | def __init__(self, place): 24 | try: 25 | self.place = place 26 | url = ( 27 | "https://www.indiatoday.in/weather/" 28 | + self.place 29 | + "-weather-forecast-today" 30 | ) 31 | response = requests.get(url, headers={"User-Agent": "XY"}) 32 | self.soup = BeautifulSoup(response.content, "lxml") 33 | 34 | except: 35 | return None 36 | 37 | def info_about_weather(self): 38 | try: 39 | temp = self.soup.find("div", class_="wtr_tmp_rhs").text 40 | humid = self.soup.find("span", class_="wtr_crd_ttl").text + " %" 41 | description = self.soup.find("span", class_="wtr_tmp_txt").text 42 | speed = ( 43 | self.soup.find("div", class_="wtr_wid_sec crd_three") 44 | .find("span", class_="wtr_crd_ttl") 45 | .text 46 | ) + " km/h" 47 | 48 | weather_info = { 49 | "temperature": temp, 50 | "humidity": humid, 51 | "description": description, 52 | "wind_speed": speed, 53 | } 54 | return weather_info 55 | except: 56 | return None 57 | -------------------------------------------------------------------------------- /src/scrape_up/instagram/users.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.common.by import By 3 | from selenium.webdriver.support.ui import WebDriverWait 4 | from selenium.webdriver.support import expected_conditions as EC 5 | from webdriver_manager.firefox import GeckoDriverManager 6 | from selenium.webdriver.firefox.service import Service 7 | 8 | driver = webdriver.Firefox(service=Service(GeckoDriverManager().install())) 9 | 10 | 11 | class User: 12 | def __init__(self, username: str): 13 | self.username = username 14 | 15 | def user_details(self): 16 | """ 17 | user = User(username=" ") 18 | print(user.user_details()) 19 | """ 20 | try: 21 | driver.get(f"https://www.instagram.com/{self.username}/") 22 | wait = WebDriverWait(driver, 180) 23 | account_details = wait.until( 24 | EC.presence_of_all_elements_located( 25 | (By.XPATH, '//span[@class="_ac2a"]') 26 | ) 27 | ) 28 | return { 29 | "Number of Posts:": account_details[0].text, 30 | "Number of Followers:": account_details[1].text, 31 | "Number of Following:": account_details[2].text, 32 | } 33 | 34 | except Exception as e: 35 | message = f"{self.username} not found!" 36 | return {"data": None, "message": message} 37 | finally: 38 | driver.quit() 39 | -------------------------------------------------------------------------------- /src/scrape_up/lastfm/__init__.py: -------------------------------------------------------------------------------- 1 | from .lastfm import Lastfm 2 | 3 | __all__ = ["Lastfm"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/letterboxd/__init__.py: -------------------------------------------------------------------------------- 1 | from .letterboxd import Letterboxd 2 | 3 | __all__ = ["Letterboxd"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/librarygenesis/__init__.py: -------------------------------------------------------------------------------- 1 | from .library import LibGen 2 | 3 | __all__ = ["LibGen"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/lichess/__init__.py: -------------------------------------------------------------------------------- 1 | from .lichess import LichessGames 2 | 3 | __all__ = ["LichessGames"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/luma/events.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class Events: 7 | """ 8 | Create an instance of `Events` class. 9 | ```py 10 | events = Events() 11 | ``` 12 | | Methods | Details | 13 | | ------------------ | -------------------------------------------------------------------------------------------------------------------- | 14 | | `.get_events()` | Returns the latest events along with their organizer, location, image url, price and link. | 15 | """ 16 | 17 | def __init__(self, *, config: RequestConfig = RequestConfig()): 18 | self.config = config 19 | 20 | def get_events(self): 21 | """ 22 | Class - `Events` 23 | Example - 24 | ```python 25 | luma = Events() 26 | events = luma.get_events() 27 | ``` 28 | Return 29 | ```js 30 | [ 31 | { 32 | 'title': 'Brexfast Club', 33 | 'organizer': 'By Shai Goldman & Alexandra Settlemyer', 34 | 'location': 'Register to See Location', 35 | 'img_url': 'https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=200,height=100/event-covers/gd/45c21ae7-67f6-40c7-8820-1cb57ea14705', 36 | 'price': 'Sold Out', 37 | 'link': 'https://lu.ma//nycaug9' 38 | } 39 | ... 40 | ] 41 | ``` 42 | """ 43 | url = "https://lu.ma/nyc" 44 | events_data = {"events": []} 45 | try: 46 | res = get(url, self.config) 47 | soup = BeautifulSoup(res.content, "html.parser") 48 | cards = soup.find_all("div", class_="jsx-3249095655 card-wrapper") 49 | 50 | for c in cards: 51 | title = c.find("a")["aria-label"] 52 | base = c.find_all("div", class_="jsx-3575689807 min-width-0") 53 | organizer = base[0].getText() 54 | loc = base[1].getText() 55 | try: 56 | price = c.find("div", class_="jsx-146954525 pill-label").getText() 57 | except: 58 | price = "" 59 | img = c.find( 60 | "div", class_="jsx-4068354093 img-aspect-ratio rounded" 61 | ).find("img")["src"] 62 | link = c.find("a")["href"] 63 | events_data["events"].append( 64 | { 65 | "title": title, 66 | "organizer": organizer, 67 | "location": loc, 68 | "img_url": img, 69 | "price": price, 70 | "link": "https://lu.ma/" + link, 71 | } 72 | ) 73 | return events_data["events"] 74 | except: 75 | return None 76 | -------------------------------------------------------------------------------- /src/scrape_up/medium/publication.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.common.by import By 3 | from selenium.webdriver.chrome.options import Options 4 | import time 5 | 6 | 7 | options = Options() 8 | options.add_argument("--headless") 9 | options.add_argument("--log-level=3") 10 | options.add_experimental_option("excludeSwitches", ["enable-logging"]) 11 | driver = webdriver.Chrome(options=options) 12 | 13 | 14 | class Publication: 15 | def __init__(self, link): 16 | self.link = link 17 | 18 | def get_articles(self): 19 | """ 20 | Class - `Publication` 21 | Example 22 | ```python 23 | publication = medium.Publication("https://towardsdatascience.com") 24 | articles = publication.get_articles() 25 | for article in articles: 26 | print(article) #For better readability/clarity 27 | ``` 28 | Returns the articles of the publication which are arranged in the form of a list 29 | """ 30 | try: 31 | articles = [] 32 | link = self.link 33 | driver.get(link) 34 | scroll_pause = 0.5 35 | # Get scroll height 36 | last_height = driver.execute_script( 37 | "return document.documentElement.scrollHeight" 38 | ) 39 | run_time, max_run_time = 0, 1 40 | while True: 41 | iteration_start = time.time() 42 | # Scroll down to bottom 43 | driver.execute_script( 44 | "window.scrollTo(0, 1000*document.documentElement.scrollHeight);" 45 | ) 46 | 47 | # Wait to load page 48 | time.sleep(scroll_pause) 49 | 50 | # Calculate new scroll height and compare with last scroll height 51 | new_height = driver.execute_script( 52 | "return document.documentElement.scrollHeight" 53 | ) 54 | scrolled = new_height != last_height 55 | timed_out = run_time >= max_run_time 56 | if scrolled: 57 | run_time = 0 58 | last_height = new_height 59 | elif not scrolled and not timed_out: 60 | run_time += time.time() - iteration_start 61 | elif not scrolled and timed_out: 62 | break 63 | elements = driver.find_elements(By.XPATH, "//h2 | //h3") 64 | for x in elements: 65 | articles.append(x.text) 66 | return articles 67 | except: 68 | return "page/publication not found." 69 | 70 | 71 | # publication = Publication("https://pub.towardsai.net") 72 | # articles = publication.get_articles_list() 73 | # for article in articles: 74 | # print(article) 75 | -------------------------------------------------------------------------------- /src/scrape_up/medium/trending.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup as bs 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | headers = { 6 | "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36" 7 | } # mimics a browser's request 8 | 9 | 10 | class Trending: 11 | def __init__(self, *, config: RequestConfig = RequestConfig()): 12 | self.config = config 13 | 14 | def get_trending(self): 15 | """ 16 | Class - `Trending` 17 | Example 18 | ```python 19 | trending = Trending.get_trending() 20 | for trend in trending: 21 | print(trend) #For better readability/clarity 22 | ``` 23 | Returns a list of trending titles 24 | 25 | """ 26 | try: 27 | titles = [] 28 | r = get("https://medium.com/", self.config) 29 | soup = bs(r.text, "html.parser") 30 | elements = soup.select('h2[class^="by j"]') 31 | for x in elements: 32 | titles.append(x.text) 33 | return titles 34 | 35 | except: 36 | return {"data": None, "message": "Something went wrong! Try again!"} 37 | -------------------------------------------------------------------------------- /src/scrape_up/medium/user.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.common.by import By 3 | from selenium.webdriver.chrome.options import Options 4 | import time 5 | 6 | options = Options() 7 | options.add_argument("--headless") 8 | options.add_argument("--log-level=3") 9 | options.add_experimental_option("excludeSwitches", ["enable-logging"]) 10 | driver = webdriver.Chrome(options=options) 11 | 12 | 13 | class User: 14 | def __init__(self, username): 15 | self.username = username 16 | 17 | def get_articles(self): 18 | """ 19 | Class `Users` 20 | Example: 21 | ```python 22 | user = medium.User(username='karthikbhandary2') 23 | article_titles = user.get_articles() 24 | for article in article_titles: 25 | print(article) # For better readability/clarity 26 | ``` 27 | Returns a list of the titles. 28 | """ 29 | try: 30 | titles = [] 31 | username = self.username 32 | driver.get(f"https://{username}.medium.com") 33 | scroll_pause = 0.5 34 | # Get scroll height 35 | last_height = driver.execute_script( 36 | "return document.documentElement.scrollHeight" 37 | ) 38 | run_time, max_run_time = 0, 1 39 | while True: 40 | iteration_start = time.time() 41 | # Scroll down to bottom 42 | driver.execute_script( 43 | "window.scrollTo(0, 1000*document.documentElement.scrollHeight);" 44 | ) 45 | 46 | # Wait to load page 47 | time.sleep(scroll_pause) 48 | 49 | # Calculate new scroll height and compare with last scroll height 50 | new_height = driver.execute_script( 51 | "return document.documentElement.scrollHeight" 52 | ) 53 | scrolled = new_height != last_height 54 | timed_out = run_time >= max_run_time 55 | if scrolled: 56 | run_time = 0 57 | last_height = new_height 58 | elif not scrolled and not timed_out: 59 | run_time += time.time() - iteration_start 60 | elif not scrolled and timed_out: 61 | break 62 | elements = driver.find_elements(By.CSS_SELECTOR, "h2") 63 | for x in elements: 64 | titles.append(x.text) 65 | return titles 66 | 67 | except: 68 | return f"{username} not found." 69 | -------------------------------------------------------------------------------- /src/scrape_up/moneycontrol/equity_mutual_funds.py: -------------------------------------------------------------------------------- 1 | from urllib.request import Request, urlopen 2 | from bs4 import BeautifulSoup as soup 3 | 4 | 5 | class EquityMutualFunds: 6 | """ 7 | Create an instance of `EquityMutualFunds` class. 8 | ```python 9 | equitymutualfunds = EquityMutualFunds() 10 | ``` 11 | | Methods | Details | 12 | | -------------------------|-------------------------------------------------| 13 | | `.historical_returns` | Returns mutual funds based on historic returns | 14 | 15 | """ 16 | 17 | def __init__(self): 18 | self.__scrape_page() 19 | 20 | def __scrape_page(self): 21 | try: 22 | url = "https://www.moneycontrol.com/mutual-funds/best-funds/equity.html" 23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) 24 | 25 | webpage = urlopen(req).read() 26 | self.page_soup = soup(webpage, "html.parser") 27 | 28 | except: 29 | return None 30 | 31 | def historical_returns(self): 32 | """ 33 | Create an instance of `EquityMutualFunds` class. 34 | 35 | ```python 36 | equitymutualfunds = EquityMutualFunds() 37 | equitymutualfunds.historical_returns() 38 | 39 | ``` 40 | Return\n 41 | ```js 42 | [ 43 | 'Motilal Oswal Midcap Fund', 44 | 'Quant Small Cap Fund', 45 | 'UTI Flexi Cap Fund', 46 | .... 47 | ] 48 | ``` 49 | """ 50 | 51 | try: 52 | L = [] 53 | for x in self.page_soup.find_all("a", {"class": "robo_medium"}): 54 | temp = x.get_text().split(" - ")[0] 55 | if temp not in L: 56 | L.append(temp) 57 | 58 | return L 59 | 60 | except: 61 | return None 62 | -------------------------------------------------------------------------------- /src/scrape_up/moneycontrol/gold.py: -------------------------------------------------------------------------------- 1 | from urllib.request import Request, urlopen 2 | from bs4 import BeautifulSoup as soup 3 | 4 | 5 | class GoldPrice: 6 | """ 7 | Create an instance of `GoldPrice` class 8 | ```python 9 | goldprice = GoldPrice() 10 | ``` 11 | 12 | | Methods | Details | 13 | | -------------| ----------------------------------------------| 14 | | `.price_22_carat()`| Returns the price of 22k gold prices citywise | 15 | | `.price_24_carat()`| Returns the price of 22k gold prices citywise | 16 | 17 | """ 18 | 19 | def __init__(self): 20 | self.__scrape_page() 21 | self.__get_values() 22 | 23 | def __scrape_page(self): 24 | try: 25 | url = "https://www.moneycontrol.com/news/gold-rates-today/" 26 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) 27 | 28 | webpage = urlopen(req).read() 29 | self.page_soup = soup(webpage, "html.parser") 30 | 31 | except: 32 | return None 33 | 34 | def __get_values(self): 35 | y = self.page_soup.find_all("td") 36 | y = y[25:-33] 37 | L = [] 38 | 39 | for x in y: 40 | L.append(x.get_text()) 41 | 42 | self.vals = [] 43 | for i in range(0, len(y), 5): 44 | self.vals.append(L[i : i + 5]) 45 | 46 | def price_22_carat(self): 47 | """ 48 | Create an instance of GoldPrice class 49 | ```python 50 | goldprice = GoldPrice() 51 | goldprice.price_22_carat() 52 | ``` 53 | Return\n 54 | ```js 55 | { 56 | "Agra": "₹ 5,610", 57 | "Ahmedabad": "₹ 5,614", 58 | "Andhra pradesh": "₹ 5,550", 59 | "Assam": "₹ 5,655", 60 | "Bangalore": "₹ 5,615", 61 | "Bhilai": "₹ 5,603" 62 | } 63 | ``` 64 | """ 65 | try: 66 | cities = [x[0] for x in self.vals] 67 | prices = [x[1] for x in self.vals] 68 | return dict(zip(cities, prices)) 69 | except: 70 | return None 71 | 72 | def price_24_carat(self): 73 | """ 74 | Create an instance of GoldPrice class 75 | ```python 76 | goldprice = GoldPrice() 77 | goldprice.price_24_carat() 78 | ``` 79 | Return\n 80 | ```js 81 | { 82 | 'Agra': '₹ 5,891', 83 | 'Ahmedabad': '₹ 5,895', 84 | 'Andhra pradesh': '₹ 5,828', 85 | 'Assam': '₹ 5,938', 86 | 'Bangalore': '₹ 5,896', 87 | 'Bhilai': '₹ 5,883' 88 | } 89 | ``` 90 | """ 91 | try: 92 | cities = [x[0] for x in self.vals] 93 | prices = [x[3] for x in self.vals] 94 | return dict(zip(cities, prices)) 95 | except: 96 | return None 97 | -------------------------------------------------------------------------------- /src/scrape_up/moneycontrol/silver_prices.py: -------------------------------------------------------------------------------- 1 | from urllib.request import Request, urlopen 2 | from bs4 import BeautifulSoup as soup 3 | 4 | 5 | class SilverPrice: 6 | """ 7 | Create an instance of `SilverPrice` class 8 | ```python 9 | silverprice = SilverPrice() 10 | ``` 11 | 12 | | Methods | Details | 13 | | -------------------|---------------------------------------------------------------------| 14 | | `.citywise_price()`| Returns the price of silver citywise in rupees | 15 | | `.last_10_days()` | Returns the price of 10 grams silver for the last 10 days in rupees | 16 | 17 | """ 18 | 19 | def __init__(self): 20 | self.__scrape_page() 21 | 22 | def __scrape_page(self): 23 | try: 24 | url = "https://www.moneycontrol.com/news/silver-rates-today/" 25 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) 26 | 27 | webpage = urlopen(req).read() 28 | self.page_soup = soup(webpage, "html.parser") 29 | 30 | except: 31 | return None 32 | 33 | def citywise_price(self): 34 | """ 35 | Create an instance of `SilverPrice` class 36 | ```python 37 | silverprice = SilverPrice() 38 | silverprice.citywise_price() 39 | ``` 40 | 41 | Return\n 42 | ```js 43 | { 44 | 'Agra': '81', 45 | 'Ahmedabad': '81', 46 | 'Bangalore': '81', 47 | 'Bhilai': '81', 48 | 'Bhopal': '81' 49 | } 50 | ``` 51 | """ 52 | try: 53 | x = self.page_soup.find_all("tr") 54 | x = x[7:-12] 55 | 56 | x = [(y.get_text()).split("₹ ")[:-1] for y in x] 57 | keys = [y[0] for y in x] 58 | values = [y[1] for y in x] 59 | 60 | return dict(zip(keys, values)) 61 | 62 | except: 63 | return None 64 | 65 | def last_10_days(self): 66 | """ 67 | Create an instance of `SilverPrice` class 68 | ```python 69 | silverprice = SilverPrice() 70 | silverprice.citywise_price() 71 | ``` 72 | 73 | Return\n 74 | ```js 75 | { 76 | 'Aug 01, 2023': '810', 77 | 'Jul 31, 2023': '800', 78 | 'Jul 30, 2023': '800', 79 | 'Jul 29, 2023': '800', 80 | 'Jul 28, 2023': '795', 81 | 'Jul 26, 2023': '804', 82 | 'Jul 25, 2023': '800', 83 | 'Jul 24, 2023': '805', 84 | 'Jul 23, 2023': '805', 85 | 'Jul 22, 2023': '805' 86 | } 87 | ``` 88 | """ 89 | try: 90 | x = self.page_soup.find_all("tr") 91 | x = x[-10:] 92 | 93 | x = [(y.get_text()).split("₹ ") for y in x] 94 | keys = [y[0] for y in x] 95 | values = [y[1] for y in x] 96 | 97 | return dict(zip(keys, values)) 98 | 99 | except: 100 | return None 101 | -------------------------------------------------------------------------------- /src/scrape_up/myanimelist/__init__.py: -------------------------------------------------------------------------------- 1 | from .scraper import Anime 2 | 3 | __all__ = ["Anime"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/pinterest/__init__.py: -------------------------------------------------------------------------------- 1 | from .pinterest import Pinterest 2 | 3 | __all__ = ["Pinterest"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/quora/__init__.py: -------------------------------------------------------------------------------- 1 | from .quora import Quora 2 | 3 | __all__ = ["Quora"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/steam/__init__.py: -------------------------------------------------------------------------------- 1 | from .steamscraper import SteamStoreScraper 2 | 3 | __all__ = ["SteamStoreScraper"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/swiggy/__init__.py: -------------------------------------------------------------------------------- 1 | from .swiggy import Swiggy 2 | 3 | __all__ = ["Swiggy"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/sysreqlab/__init__.py: -------------------------------------------------------------------------------- 1 | from find_titles import FindTitles 2 | from requirements import Requirements 3 | 4 | 5 | __all__ = ["FindTitles", "Requirements"] 6 | -------------------------------------------------------------------------------- /src/scrape_up/sysreqlab/find_titles.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import requests 3 | 4 | 5 | class FindTitles: 6 | """ 7 | Create an instance of `FindTitles` class. 8 | 9 | ```python 10 | titles = sysreqlab.FindTitles(search_term="Call of Duty", search_alphabet="c") 11 | ``` 12 | 13 | | Methods | Details | 14 | | -------------------------------- | --------------------------------------------------------------------- | 15 | | `.find_titles(number_of_titles)` | Returns the list of titles based on the search term, search alphabet. | 16 | """ 17 | 18 | def __init__(self, search_term: str, search_alphabet: str): 19 | self.search_term = search_term 20 | self.search_alphabet = search_alphabet 21 | 22 | def __scrape_data(self): 23 | try: 24 | url = f"https://www.systemrequirementslab.com/all-games-list/?filter={self.search_alphabet}" 25 | html = requests.get(url) 26 | html.raise_for_status() 27 | return html.text 28 | 29 | except requests.exceptions.RequestException as e: 30 | raise Exception(f"An error occurred while fetching the page: {str(e)}") 31 | 32 | def __parse_page(self): 33 | html = self.__scrape_data() 34 | soup = BeautifulSoup(html, "html.parser") 35 | return soup 36 | 37 | def find_titles(self, number_of_titles: int): 38 | """ 39 | Class - `FindTitles` 40 | Example: 41 | ```python 42 | titles = FindTitles(search_term="Call of Duty", search_alphabet="c") 43 | titles = titles.find_titles(5) 44 | ``` 45 | Returns a list of titles that match the search term. 46 | """ 47 | 48 | try: 49 | soup = self.__parse_page() 50 | 51 | div_elements = soup.find("div", class_="pt-3") 52 | li_elements = div_elements.find_all("li") 53 | all_titles = [title.text.strip() for title in li_elements] 54 | 55 | titles = [ 56 | title 57 | for title in all_titles 58 | if self.search_term.lower() in title.lower() 59 | ] 60 | 61 | return titles[:number_of_titles] 62 | 63 | except Exception as e: 64 | raise Exception(f"An error occurred while fetching the titles: {str(e)}") 65 | -------------------------------------------------------------------------------- /src/scrape_up/thehindu/thehindu.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup as bs 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class TheHindu: 7 | """ 8 | Create an object of the 'TheHindu' class\n 9 | ```python 10 | scraper = TheHindu() 11 | ``` 12 | | Methods | Details | 13 | | --------------------- | ------------------------------------------------------------------------- | 14 | | `.get_news(page_url)` | gets heading, subheading, time, and news content | 15 | """ 16 | 17 | def __init__(self, *, config: RequestConfig = RequestConfig()): 18 | headers = { 19 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" 20 | } 21 | self.config = config 22 | if self.config.headers == {}: 23 | self.config.set_headers(headers) 24 | 25 | def get_news(self, page_url): 26 | """ 27 | Create an object of the 'TheHindu' class\n 28 | ```python 29 | scraper = TheHindu() 30 | scraper.get_news(page_url="https://www.thehindu.com/news/cities/Delhi/sc-appoints-former-delhi-hc-judge-justice-jayant-nath-as-interim-chairperson-of-power-regulator-derc/article67157713.ece") 31 | ``` 32 | Response 33 | ```js 34 | { 35 | "title":"SC appoints former Delhi HC judge Justice Jayant Nath as interim chairperson of power regulator DERC", 36 | "subtitle":"The office of the DERC chairperson has been vacant for over six months", 37 | "last_updated":"August 04, 2023 02:59 pm | Updated 03:11 pm IST - New Delhi", 38 | "news":"The Supreme Court on Friday appointed former Delhi High Court judge, ..." 39 | } 40 | ``` 41 | """ 42 | try: 43 | page_url = "https://www.thehindu.com/news/cities/Delhi/sc-appoints-former-delhi-hc-judge-justice-jayant-nath-as-interim-chairperson-of-power-regulator-derc/article67157713.ece" 44 | response = get(page_url, self.config).text 45 | soup = bs(response, "lxml") 46 | main_content_box = soup.find("div", {"class": "articlebodycontent"}) 47 | news_text = main_content_box.find_all("p") 48 | news = "" 49 | for p in news_text: 50 | if "class" not in str(p): 51 | news += p.text 52 | heading = soup.find("h1", {"class": "title"}).text.strip() 53 | sub_heading = soup.find("h3", {"class": "sub-title"}).text.strip() 54 | last_updated = soup.find("p", {"class": "publish-time"}).text.strip() 55 | news_data = { 56 | "title": heading, 57 | "subtitle": sub_heading, 58 | "last_updated": last_updated, 59 | "news": news, 60 | } 61 | return news_data 62 | except: 63 | return None 64 | -------------------------------------------------------------------------------- /src/scrape_up/timeanddate/time_zones.py: -------------------------------------------------------------------------------- 1 | from urllib.request import Request, urlopen 2 | from bs4 import BeautifulSoup as soup 3 | 4 | 5 | class Timezones: 6 | """ 7 | Create an instance of `Timezones` class.\n 8 | ```python 9 | timezones = Timezones() 10 | ``` 11 | | Methods | Details | 12 | | -------------------|--------------------------------------------------| 13 | | `.city_timezones()`| Returns the timezones of cites around the world | 14 | 15 | """ 16 | 17 | def __init__(self): 18 | self.__scrape_page() 19 | 20 | def __scrape_page(self): 21 | try: 22 | url = "https://www.timeanddate.com/worldclock/full.html" 23 | req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) 24 | 25 | webpage = urlopen(req).read() 26 | self.page_soup = soup(webpage, "html.parser") 27 | 28 | except: 29 | return None 30 | 31 | def city_timezones(self): 32 | """ 33 | Create an instance of `Timezones` class 34 | ```python 35 | timezones = Timezones() 36 | timezones.city_timezones() 37 | ``` 38 | 39 | Return\n 40 | ```js 41 | { 42 | "Abidjan": "16.31", 43 | "Gitega": "18.31", 44 | "Oral": "21.31", 45 | "Abu Dhabi": "20.31", 46 | "Grise Fiord *": "12.31", 47 | "Oslo *": "18.31", 48 | "Abuja": "17.31" 49 | } 50 | ``` 51 | """ 52 | try: 53 | x = self.page_soup.find_all("td") 54 | p = False 55 | 56 | timezones_dict = {} 57 | for y in x[:-1]: 58 | if p == False: 59 | key = y.get_text() 60 | else: 61 | timezones_dict[key] = (y.get_text())[5:] 62 | p = not (p) 63 | 64 | return timezones_dict 65 | 66 | except: 67 | return None 68 | -------------------------------------------------------------------------------- /src/scrape_up/timesjobs/timesjobs_scraper.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class TimesJobs: 7 | def __init__(self, role: str, *, config: RequestConfig = RequestConfig()): 8 | self.role = role 9 | self.config = config 10 | 11 | def list_jobs(self): 12 | """ 13 | Class - `TimesJobs`\n 14 | Example -\n 15 | ```python 16 | 17 | jobs = TimesJobs(role="developer") 18 | jobs.list_jobs() 19 | ``` 20 | Return\n 21 | ```python 22 | return 23 | { 24 | "Company": "Name of the comapny", 25 | "Location": "Location at which the company is located", 26 | "Experience": "Experience of the applicants required for that post", 27 | "Posted": "Number of days before which this job was posted on this webiste", 28 | "Apply here": "Link which directly takes you to the Web-page where you can apply for the job" 29 | } 30 | """ 31 | try: 32 | spl = self.role.split() 33 | self.role = "%20".join(spl) 34 | except: 35 | return None 36 | try: 37 | url = f"https://m.timesjobs.com/mobile/jobs-search-result.html?txtKeywords={self.role}&txtLocation=India&cboWorkExp1=-1" 38 | response = get(url, self.config) 39 | soup = BeautifulSoup(response.text, "html.parser") 40 | companies = soup.find_all("h4") 41 | experiences = soup.find_all("div", class_="srp-exp") 42 | locations = soup.find_all("div", class_="srp-loc") 43 | days_ago = soup.find_all("span", class_="posting-time") 44 | application_links = soup.find_all("h3") 45 | 46 | job_data = [] 47 | 48 | for i in range(len(companies)): 49 | company = companies[i].text 50 | location = locations[i].text 51 | experience = experiences[i].text 52 | days = days_ago[i].text 53 | href_value = application_links[i].a["href"] 54 | 55 | job_info = { 56 | "Company": company, 57 | "Location": location, 58 | "Experience": experience, 59 | "Posted": days, 60 | "Apply here": href_value, 61 | } 62 | job_data.append(job_info) 63 | 64 | return job_data 65 | 66 | except Exception as e: 67 | print("Not possible to webscrape") 68 | return None 69 | -------------------------------------------------------------------------------- /src/scrape_up/tripadvisor/TripAdvisor.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class TripAdvisor: 7 | """ 8 | First, create an object of class `TripAdvisor` 9 | 10 | ```python 11 | hotel = TripAdvisor() 12 | ``` 13 | 14 | | Methods | Details | 15 | | ------------------------ | ---------------------------------------------------- | 16 | | `get_details(hotel_url)` | Get the details of a hotel from its TripAdvisor URL. | 17 | """ 18 | 19 | def __init__(self, *, config: RequestConfig = RequestConfig()): 20 | headers = { 21 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" 22 | } 23 | self.config = config 24 | if self.config.headers == {}: 25 | self.config.set_headers(headers) 26 | 27 | def get_details(self, hotel_url: str): 28 | """ 29 | Get the details of a hotel from its TripAdvisor URL.\n 30 | Parameters:- `hotel_url (str)`: The URL of the hotel on TripAdvisor. 31 | ```python 32 | hotel = TripAdvisor() 33 | hotel.get_details() 34 | ``` 35 | Returns: 36 | ```js 37 | { 38 | "Rating": "The hotel's rating", 39 | "Experience": "The hotel's experience summary", 40 | "Reviews": "The number of reviews for the hotel", 41 | "Award": "The award received by the hotel, or None if not available", 42 | "Description": "The description of the hotel as a BeautifulSoup Tag", 43 | "Amenities": "List of amenities offered by the hotel" 44 | } 45 | ``` 46 | """ 47 | try: 48 | url = hotel_url 49 | html_text = get(url, self.config).text 50 | soup = BeautifulSoup(html_text, "lxml") 51 | 52 | container = soup.find("div", {"class": "ppuFV _T Z BB"}) 53 | 54 | rating = container.find("span", {"class": "uwJeR P"}).text 55 | experience = container.find("div", {"class": "kkzVG"}).text 56 | reviews = container.find("span", {"class": "hkxYU q Wi z Wc"}).text 57 | award = container.find("div", {"class": "bhYSr P"}) 58 | if award: 59 | award = award.text 60 | else: 61 | award = None 62 | description = container.find("div", {"class": "fIrGe _T"}).text 63 | pa = container.find("div", {"class": "OsCbb K"}) 64 | amineties = [] 65 | for items in pa.find_all("div", {"class": "yplav f ME H3 _c"}): 66 | amineties.append(items.text) 67 | 68 | data = { 69 | "Rating": rating, 70 | "Experience": experience, 71 | "Reviews": reviews, 72 | "Award": award, 73 | "Description": description, 74 | "Amenities": amineties, 75 | } 76 | return data 77 | except: 78 | return None 79 | -------------------------------------------------------------------------------- /src/scrape_up/twitter/numidconverter.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.chrome.options import Options 3 | from bs4 import BeautifulSoup 4 | import json 5 | 6 | 7 | class TwitterScraper: 8 | def __init__(self): 9 | self.chrome_options = Options() 10 | self.chrome_options.add_argument("--headless") 11 | self.chrome_options.add_argument("--window-size=1920,1080") 12 | self.chrome_options.add_argument("--disable-gpu") 13 | self.chrome_options.add_argument("--no-sandbox") 14 | self.chrome_options.add_argument("--disable-dev-shm-usage") 15 | self.chrome_options.add_argument("--disable-extensions") 16 | self.chrome_options.add_argument("--disable-logging") 17 | self.chrome_options.add_argument("--log-level=3") 18 | self.chrome_options.add_argument("--silent") 19 | self.chrome_options.add_argument("--blink-settings=imagesEnabled=false") 20 | 21 | def unametoid(self, username): 22 | url = "https://twitter.com/{}".format(username) 23 | # print(url) 24 | driver = webdriver.Chrome(options=self.chrome_options) 25 | driver.get(url) 26 | 27 | html = driver.page_source 28 | soup = BeautifulSoup(html, "html.parser") 29 | try: 30 | user_id = soup.find("script", {"data-testid": "UserProfileSchema-test"}) 31 | data = json.loads(user_id.string) 32 | # driver.quit() 33 | return { 34 | "data": data["author"]["identifier"], 35 | "message": f"Numerical id found for username {username}", 36 | } 37 | except: 38 | return { 39 | "data": None, 40 | "message": f"Numerical id not found for username {username}", 41 | } 42 | 43 | def idtouname(self, numid): 44 | url = "https://twitter.com/i/user/{}".format(numid) 45 | driver = webdriver.Chrome(options=self.chrome_options) 46 | driver.get(url) 47 | html = driver.page_source 48 | soup = BeautifulSoup(html, "html.parser") 49 | try: 50 | user_id = soup.find("script", {"data-testid": "UserProfileSchema-test"}) 51 | data = json.loads(user_id.string) 52 | # driver.quit() 53 | return { 54 | "data": data["author"]["additionalName"], 55 | "message": f"Username found for numerical id {numid}", 56 | } 57 | except: 58 | return { 59 | "data": None, 60 | "message": f"Username not found for numerical id {numid}", 61 | } 62 | -------------------------------------------------------------------------------- /src/scrape_up/uci/UCI.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class UCI: 7 | """ 8 | Create an instance of UCI class 9 | ```python 10 | uci = UCI() 11 | ``` 12 | | Methods | Details | 13 | | ------------- | ------------------------------------- | 14 | | `.datasets()` | Fetches datasets information from UCI | 15 | """ 16 | 17 | def __init__(self, *, config: RequestConfig = RequestConfig()): 18 | headers = { 19 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" 20 | } 21 | self.config = config 22 | if self.config.headers == {}: 23 | self.config.set_headers(headers) 24 | 25 | def datasets(self, number): 26 | """ 27 | Get UCI datasets information.\n 28 | Args: 29 | `number (int)`: The number of datasets to fetch. The method fetches datasets in batches of 10. 30 | Example: 31 | ```python 32 | uci = UCI() 33 | datasets_info = uci.datasets(20) 34 | ``` 35 | Returns: 36 | ```js 37 | [ 38 | { 39 | "Name":"Iris", 40 | "Link":"https://archive.ics.uci.edu//dataset/53/iris", 41 | "Description":"A small classic dataset from Fisher, 1936. One of the earliest datasets used for evaluation of classification methodologies.\n", 42 | "Extra Info":" Classification Multivariate 150 Instances 4 Attributes " 43 | } 44 | ] 45 | ``` 46 | """ 47 | try: 48 | number = number // 10 49 | dataset = [] 50 | for i in range(0, number): 51 | url = "https://archive.ics.uci.edu/datasets?skip={}&take=10&sort=desc&orderBy=NumHits&search=s".format( 52 | i * 10 53 | ) 54 | html_text = get(url, self.config).text 55 | soup = BeautifulSoup(html_text, "lxml") 56 | 57 | container = soup.find("div", {"class": "flex flex-col gap-1"}) 58 | 59 | for items in container.find_all( 60 | "div", {"class": "rounded-box bg-base-100"} 61 | ): 62 | title = items.find("h2").text 63 | link = ( 64 | "https://archive.ics.uci.edu/" 65 | + items.find("a", href=True)["href"] 66 | ) 67 | description = items.find("p").text 68 | extra_info = "" 69 | for item in items.find_all( 70 | "div", {"class": "col-span-3 flex items-center gap-2"} 71 | ): 72 | extra_info = extra_info + item.text + " " 73 | data = { 74 | "Name": title, 75 | "Link": link, 76 | "Description": description, 77 | "Extra Info": extra_info, 78 | } 79 | dataset.append(data) 80 | return dataset 81 | except: 82 | return None 83 | -------------------------------------------------------------------------------- /src/scrape_up/who/WHO.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from scrape_up.config.request_config import RequestConfig, get 3 | 4 | 5 | class WHO: 6 | """ 7 | Create an instance of WHO class.\n 8 | ```python 9 | who = WHO() 10 | ``` 11 | | Methods | Details | 12 | | ------------------------------ | ------------------------------------------- | 13 | | `get_disease_outbreak()` | Get Disease Outbreak News from WHO website. | 14 | """ 15 | 16 | def __init__(self, *, config: RequestConfig = RequestConfig()): 17 | headers = { 18 | "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" 19 | } 20 | self.config = config 21 | if self.config.headers == {}: 22 | self.config.set_headers(headers) 23 | 24 | def get_disease_outbreak(self, number): 25 | """ 26 | Get Disease Outbreak News from WHO website.\n 27 | Parameters: `number` (int): The number of pages (each page contains 10 items). 28 | ```python 29 | who = WHO() 30 | who.get_disease_outbreak() 31 | ``` 32 | Returns: 33 | ```js 34 | [ 35 | { 36 | "Title":"Circulating vaccine-derived poliovirus type 2 (cVDPV2) - United Republic of Tanzania", 37 | "Date":"28 July 2023 ", 38 | "Link":"https://www.who.int/emergencies/disease-outbreak-news/item/2023-DON480" 39 | } 40 | ... 41 | ] 42 | ``` 43 | """ 44 | 45 | try: 46 | number = number // 10 47 | DON = [] 48 | for i in range(1, number + 1): 49 | url = f"https://www.who.int/emergencies/disease-outbreak-news/{i}" 50 | html_text = get(url, self.config).text 51 | soup = BeautifulSoup(html_text, "lxml") 52 | 53 | container = soup.find("div", {"class": "sf-list-vertical"}) 54 | 55 | for items in container.find_all( 56 | "a", {"class": "sf-list-vertical__item"}, href=True 57 | ): 58 | title = items.find("span", {"class": "full-title"}) 59 | date = title.findNext() 60 | date = date.text.split("|")[0] 61 | link = items["href"] 62 | data = {"Title": title.text, "Date": date, "Link": link} 63 | DON.append(data) 64 | return DON 65 | except: 66 | return None 67 | 68 | 69 | if __name__ == "__main__": 70 | who = WHO() 71 | print(who.get_disease_outbreak(number=10)) 72 | -------------------------------------------------------------------------------- /src/scrape_up/who/__init__.py: -------------------------------------------------------------------------------- 1 | from .WHO import WHO 2 | 3 | __all__ = ["WHO"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/wikipedia/wikipedia.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from scrape_up.config.request_config import RequestConfig, get 4 | 5 | 6 | class WikipediaScraper: 7 | """ 8 | Create an object of the 'WikipediaScrapper' class: 9 | 10 | ```python 11 | Scraper = WikipediaScraper() 12 | ``` 13 | 14 | | Methods | Details | 15 | | ----------------- | ------------------------------------------------------- | 16 | | `.scrape(url)` | Returns the Scraped Data from Wikipedia | 17 | | `.get_featured()` | Returns the featured article for the day from Wikipedia | 18 | """ 19 | 20 | def __init__(self, *, config: RequestConfig = RequestConfig()): 21 | self.config = config 22 | 23 | def scrape(self, query: str): 24 | try: 25 | URL = f"https://en.wikipedia.org/wiki/{query}" 26 | response = get(URL, self.config) 27 | soup = BeautifulSoup(response.text, "html.parser") 28 | 29 | # Extract the title 30 | title = soup.find(id="firstHeading").text 31 | 32 | # Extract all the headings and their content 33 | sections = soup.find_all("h2") 34 | data = {} 35 | for section in sections: 36 | heading = section.find("span", class_="mw-headline") 37 | if heading: 38 | content = [] 39 | next_node = section.find_next_sibling( 40 | ["h2", "h3", "h4", "h5", "h6"] 41 | ) 42 | while next_node and next_node.name != "h2": 43 | if next_node.name in ["h3", "h4", "h5", "h6"]: 44 | content.append({"heading": next_node.text.strip()}) 45 | elif next_node.name == "p": 46 | content.append({"text": next_node.text.strip()}) 47 | next_node = next_node.find_next_sibling( 48 | ["h2", "h3", "h4", "h5", "h6", "p"] 49 | ) 50 | data[heading.text] = content 51 | 52 | # Return the data as JSON 53 | result = {"title": title, "sections": data} 54 | return result 55 | except: 56 | return None 57 | 58 | def get_featured(self): 59 | """ 60 | Get the featured data from the main page of Wikipedia. 61 | 62 | Returns: 63 | A string containing the featured data from the main page of Wikipedia. 64 | """ 65 | try: 66 | url = "https://en.wikipedia.org/wiki/Main_Page" 67 | html_text = requests.get(url).text 68 | soup = BeautifulSoup(html_text, "lxml") 69 | 70 | container = soup.find("div", {"id": "mp-left"}) 71 | data = container.find("p").text 72 | return data 73 | except: 74 | return None 75 | -------------------------------------------------------------------------------- /src/scrape_up/wuzzuf/__init__.py: -------------------------------------------------------------------------------- 1 | from .wuzzuf import Jobs 2 | 3 | __all__ = ["Jobs"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/yellowpages/__init__.py: -------------------------------------------------------------------------------- 1 | from .yellowpages import yellowpages 2 | 3 | __all__ = ["yellowpages"] 4 | -------------------------------------------------------------------------------- /src/scrape_up/yellowpages/yellowpages.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | 4 | 5 | class Yellowpages: 6 | """ 7 | Create an instance of `yellowpages` class 8 | 9 | ```python 10 | # This will return the list of restaurtants in New York and their information 11 | data = yellowpages("restaurtant", "New York") 12 | ``` 13 | | Method | Details | 14 | | ----------------- | ----------------------------------------------------------------- | 15 | | `business_info()` | Returns the list of dictionaries containing business information. | 16 | 17 | """ 18 | 19 | def __init__(self, business, place): 20 | self.business = business 21 | self.place = place 22 | self.info = [] 23 | try: 24 | url = f"https://www.yellowpages.com/search?search_terms={self.business}&geo_location_terms={self.place}" 25 | response = requests.get(url, headers={"User-Agent": "XY"}) 26 | self.soup = BeautifulSoup(response.content, "lxml") 27 | 28 | except: 29 | return None 30 | 31 | def business_info(self): 32 | businesses = self.soup.find_all("div", class_="srp-listing clickable-area mdm") 33 | for item in businesses: 34 | name = item.find("a", class_="business-name").text 35 | address = item.find("div", class_="street-address").text 36 | try: 37 | rating = item.find("div", class_="ratings").text 38 | except: 39 | rating = " " 40 | try: 41 | website = item.find("a", class_="track-visit-website")["href"] 42 | except: 43 | website = " " 44 | try: 45 | phone_no = item.find("div", class_="phones phone primary").text 46 | except: 47 | phone_no = " " 48 | try: 49 | menu = ( 50 | "https://www.yellowpages.com" 51 | + item.find("a", class_="menu")["href"] 52 | ) 53 | except: 54 | menu = " " 55 | try: 56 | description = item.find("p", class_="body").text 57 | except: 58 | description = " " 59 | try: 60 | amenities = item.find("div", class_="amenities-info").text 61 | except: 62 | amenities = " " 63 | try: 64 | opentime = item.find("div", class_="open-status").text 65 | except: 66 | opentime = " " 67 | businessinfo = { 68 | "name": name, 69 | "address": address, 70 | "rating": rating, 71 | "website": website, 72 | "phone_no": phone_no, 73 | "menu": menu, 74 | "description": description, 75 | "amenities": amenities, 76 | "opentime": opentime, 77 | } 78 | self.info.append(businessinfo) 79 | return self.info 80 | -------------------------------------------------------------------------------- /src/scrape_up/zomato/__init__.py: -------------------------------------------------------------------------------- 1 | from .zomato import Zomato 2 | 3 | __all__ = ["Zomato"] 4 | -------------------------------------------------------------------------------- /src/test/academia_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.academia import Academia 3 | 4 | 5 | class TestAcademia(unittest.TestCase): 6 | def setUp(self): 7 | self.academia = Academia() 8 | 9 | def test_get_research_topics(self): 10 | academia = Academia() 11 | result = academia.get_research_topics() 12 | self.assertIsNotNone(result) 13 | self.assertIsInstance(result, list) 14 | 15 | if result is not None: 16 | for topic in result: 17 | self.assertIn("Title", topic) 18 | self.assertIn("Link", topic) 19 | self.assertIn("Number of Articles", topic) 20 | self.assertIn("Followers", topic) 21 | 22 | self.assertIsInstance(topic["Title"], str) 23 | self.assertIsInstance(topic["Link"], str) 24 | self.assertIsInstance(topic["Number of Articles"], str) 25 | self.assertIsInstance(topic["Followers"], str) 26 | 27 | def test_get_research_paper(self): 28 | academia = Academia() 29 | result = academia.get_research_papers(search="Machine Learning") 30 | self.assertIsNotNone(result) 31 | self.assertIsInstance(result, list) 32 | 33 | if result is not None: 34 | for paper in result: 35 | self.assertIn("Title", paper) 36 | self.assertIn("Summary", paper) 37 | self.assertIn("Link", paper) 38 | 39 | self.assertIsInstance(paper["Title"], str) 40 | if paper["Summary"] is not None: 41 | self.assertIsInstance(paper["Summary"], str) 42 | self.assertIsInstance(paper["Link"], str) 43 | 44 | 45 | if __name__ == "__main__": 46 | unittest.main() 47 | -------------------------------------------------------------------------------- /src/test/amazon_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.amazon import Product 3 | 4 | 5 | class AmazonTest(unittest.TestCase): 6 | def setUp(self): 7 | self.product = Product("Watch") 8 | 9 | def test_get_product(self): 10 | result = self.product.get_product_details() 11 | self.assertIsNotNone(result) 12 | self.assertIsInstance(result, dict) 13 | if result is not None: 14 | self.assertIn("data", result) 15 | self.assertIn("message", result) 16 | self.assertIsNotNone(result["data"], str) 17 | if result["data"] is not None: 18 | self.assertIsInstance(result["data"], str) 19 | if result["message"] is not None: 20 | self.assertIsInstance(result["message"], str) 21 | 22 | def test_get_product_details(self): 23 | result = self.product.get_product_details() 24 | self.assertIsNotNone(result) 25 | self.assertIsInstance(result, dict) 26 | if result is not None: 27 | self.assertIn("data", result) 28 | self.assertIn("message", result) 29 | self.assertIsNotNone(result["data"], str) 30 | if result["data"] is not None: 31 | self.assertIsInstance(result["data"], str) 32 | if result["message"] is not None: 33 | self.assertIsInstance(result["message"], str) 34 | 35 | def test_get_product_image(self): 36 | result = self.product.get_product_details() 37 | self.assertIsNotNone(result) 38 | self.assertIsInstance(result, dict) 39 | if result is not None: 40 | self.assertIn("data", result) 41 | self.assertIn("message", result) 42 | self.assertIsNotNone(result["data"], str) 43 | if result["data"] is not None: 44 | self.assertIsInstance(result["data"], str) 45 | if result["message"] is not None: 46 | self.assertIsInstance(result["message"], str) 47 | 48 | def test_customer_review(self): 49 | result = self.product.get_product_details() 50 | self.assertIsNotNone(result) 51 | self.assertIsInstance(result, dict) 52 | if result is not None: 53 | self.assertIn("data", result) 54 | self.assertIn("message", result) 55 | self.assertIsNotNone(result["data"], str) 56 | if result["data"] is not None: 57 | self.assertIsInstance(result["data"], str) 58 | if result["message"] is not None: 59 | self.assertIsInstance(result["message"], str) 60 | 61 | 62 | if __name__ == "__main__": 63 | unittest.main() 64 | -------------------------------------------------------------------------------- /src/test/banners_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from src.scrape_up.banners import Scraper88x31 3 | 4 | 5 | class TestScraper88x31(unittest.TestCase): 6 | def setUp(self): 7 | """ 8 | Initialize a Scraper88x31 instance before each test method. 9 | """ 10 | self.scraper = Scraper88x31() 11 | 12 | def test_get_all(self): 13 | """ 14 | | Methods | Details | 15 | | ------------------ | -------------------------------------------------------- | 16 | | `get_all()` | Returns the list of all available 88x31 banners | 17 | """ 18 | try: 19 | banners = self.scraper.get_all() 20 | 21 | # Check if banners is a list of URLs 22 | self.assertIsInstance(banners, list) 23 | for banner in banners: 24 | self.assertIsInstance(banner, str) 25 | self.assertTrue(banner.startswith("https://cyber.dabamos.de/88x31/")) 26 | self.assertTrue(banner.endswith(".gif")) 27 | except: 28 | return None 29 | 30 | 31 | if __name__ == "__main__": 32 | unittest.main() 33 | -------------------------------------------------------------------------------- /src/test/bayt_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.bayt import Jobs 3 | 4 | 5 | class TestJobs(unittest.TestCase): 6 | """ 7 | | Methods | Details | 8 | | ----------------------------- | -------------------------------------------------------------------------- | 9 | | `.fetch_jobs(query, page)` | Fetch job listings data from Bayt.com based on the given query and page. | 10 | """ 11 | 12 | def setUp(self): 13 | """ 14 | Initialize an instance of the Jobs class before each test. 15 | """ 16 | self.scraper = Jobs() 17 | self.query = "software developer" 18 | self.page = 1 19 | 20 | def test_fetch_jobs(self): 21 | """ 22 | Test the fetch_jobs method. 23 | """ 24 | try: 25 | jobs_data = self.scraper.fetch_jobs(self.query, self.page) 26 | self.assertIsNotNone(jobs_data, "Failed to fetch job listings") 27 | self.assertIsInstance(jobs_data, list, "Job listings should be a list") 28 | self.assertGreater(len(jobs_data), 0, "Job listings should not be empty") 29 | 30 | # Check the structure of the first job listing 31 | job = jobs_data[0] 32 | expected_keys = ["title", "company", "location", "url"] 33 | for key in expected_keys: 34 | self.assertIn(key, job, f"Missing expected key: {key}") 35 | self.assertIsInstance(job[key], str, f"{key} should be a string") 36 | 37 | except: 38 | return None 39 | 40 | def test_extract_job_info(self): 41 | """ 42 | Test the __extract_job_info method indirectly by testing fetch_jobs. 43 | """ 44 | try: 45 | jobs_data = self.scraper.fetch_jobs(self.query, self.page) 46 | self.assertIsNotNone(jobs_data, "Failed to fetch job listings") 47 | self.assertGreater(len(jobs_data), 0, "Job listings should not be empty") 48 | 49 | # Check the first job listing details 50 | job = jobs_data[0] 51 | self.assertIn("title", job, "Job should have a title") 52 | self.assertIn("company", job, "Job should have a company name") 53 | self.assertIn("location", job, "Job should have a location") 54 | self.assertIn("url", job, "Job should have a URL") 55 | 56 | # Ensure that none of the fields are empty 57 | self.assertNotEqual(job["title"], "", "Job title should not be empty") 58 | self.assertNotEqual(job["url"], "", "Job URL should not be empty") 59 | 60 | except: 61 | return None 62 | 63 | 64 | if __name__ == "__main__": 65 | unittest.main() 66 | -------------------------------------------------------------------------------- /src/test/bbc_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.bbcnews import BBCNews 3 | 4 | 5 | class TestBBCNews(unittest.TestCase): 6 | """ 7 | | Methods | Details | 8 | | ------------------ | -------------------------------------------------------- | 9 | | `.get_headlines()` | Returns the list of object containig the headlines | 10 | | `get_article()` | Returns an object with proper details about the articles | 11 | 12 | """ 13 | 14 | def setUp(self): 15 | """ 16 | Initialize a BBCNews instance before each test method. 17 | """ 18 | self.bbc_scraper = BBCNews() 19 | 20 | def test_get_headlines(self): 21 | """ 22 | Testing the get_headlines() method. 23 | """ 24 | try: 25 | headlines = self.bbc_scraper.get_headlines() 26 | 27 | # Check if headlines is a list of dictionaries 28 | if headlines is not None: 29 | self.assertIsInstance(headlines, list) 30 | for headline in headlines: 31 | self.assertIsInstance(headline, dict) 32 | self.assertIn("index", headline) 33 | self.assertIn("headline", headline) 34 | 35 | # Check if all headlines have unique indices 36 | indices = {headline["index"] for headline in headlines} 37 | self.assertEqual( 38 | len(indices), len(headlines), "Duplicate indices found in headlines" 39 | ) 40 | # Check if headlines list is not empty 41 | self.assertGreater(len(headlines), 0, "No headlines extracted") 42 | except: 43 | return None 44 | 45 | def test_get_article(self): 46 | """ 47 | Testing the get_article(url) method. 48 | """ 49 | try: 50 | valid_url = "https://www.bbc.co.uk/news/world-europe-61258011" # Test with a valid article URL 51 | article = self.bbc_scraper.get_article(valid_url) 52 | 53 | if article is not None: 54 | self.assertIsInstance( 55 | article, dict 56 | ) # Check if article is a dictionary or not 57 | self.assertIn( 58 | "main_heading", article 59 | ) # Does it contain main_heading or not 60 | self.assertIn("time", article) # Does it contain time or not 61 | self.assertIn("text", article) # Does it contain text or not 62 | 63 | invalid_url = "https://www.bbc.co.uk/news/non-existent-article" # Test with an invalid article URL 64 | invalid_article = self.bbc_scraper.get_article( 65 | invalid_url 66 | ) # Should return None 67 | self.assertIsNone(invalid_article, "Invalid URL should return None") 68 | except: 69 | return None 70 | 71 | 72 | if __name__ == "__main__": 73 | unittest.main() 74 | -------------------------------------------------------------------------------- /src/test/codechef_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up import codechef 3 | 4 | 5 | class CodeChefTest(unittest.TestCase): 6 | """ 7 | CodeChef module test.\n 8 | | Methods | Details | 9 | | --------------- | ---------------------------------------------------------------- | 10 | | `get_profile()` | Returns name, username, profile_image_link, rating, details etc. | 11 | """ 12 | 13 | def test_get_profile(self): 14 | instance = codechef.User(id="heltion") 15 | method_response = instance.get_profile() 16 | 17 | self.assertEqual( 18 | list(method_response.keys()), 19 | ["name", "username", "profile_image_link", "rating", "details"], 20 | "Codechef:get_profile - keys mismatch", 21 | ) 22 | 23 | 24 | if __name__ == "__main__": 25 | unittest.main() 26 | -------------------------------------------------------------------------------- /src/test/coinmarketcap_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up import coinmarketcap 3 | 4 | # sys.path.insert(0, "..") 5 | 6 | 7 | class CoinMarketCapTest(unittest.TestCase): 8 | """ 9 | CoinMarketCap module test.\n 10 | | Method | Details | 11 | | ---------------------------- | -------------------------------------------------------- | 12 | | `get_top_cryptocurrencies()` | Fetches and returns data about the top cryptocurrencies. | 13 | """ 14 | 15 | def test_get_top_cryptocurrencies(self): 16 | instance = coinmarketcap.Crypto() 17 | top_cryptocurrencies = instance.get_top_cryptocurrencies() 18 | 19 | self.assertIsInstance(top_cryptocurrencies, list) 20 | 21 | for item in top_cryptocurrencies: 22 | self.assertIsInstance(item, dict) 23 | 24 | self.assertEqual( 25 | list(item.keys()), 26 | [ 27 | "Name", 28 | "Symbol", 29 | "Link", 30 | "Price", 31 | "1h%", 32 | "24h%", 33 | "7d%", 34 | "MarketCap", 35 | "Volume(24h)", 36 | "Circulating Supply", 37 | ], 38 | ) 39 | 40 | for value in item.values(): 41 | self.assertIsInstance(value, str) 42 | 43 | 44 | if __name__ == "__main__": 45 | unittest.main() 46 | -------------------------------------------------------------------------------- /src/test/coursera_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.coursera import Coursera 3 | 4 | 5 | class TestCoursera(unittest.TestCase): 6 | def setUp(self, topic="Machine Learning"): 7 | self.scraper = Coursera(topic) 8 | 9 | def test_get_courses(self): 10 | result = self.scraper.get_courses() 11 | self.assertIsNotNone(result) 12 | self.assertIsInstance(result, list) 13 | 14 | if result is not None: 15 | for topic in result: 16 | self.assertIn("title", topic) 17 | self.assertIn("taught_by", topic) 18 | self.assertIn("skills", topic) 19 | self.assertIn("rating", topic) 20 | self.assertIn("review_count", topic) 21 | self.assertIn("img_url", topic) 22 | self.assertIn("link", topic) 23 | 24 | self.assertIsInstance(topic["title"], str) 25 | self.assertIsInstance(topic["taught_by"], str) 26 | self.assertIsInstance(topic["skills"], str) 27 | self.assertIsInstance(topic["rating"], str) 28 | self.assertIsInstance(topic["review_count"], str) 29 | self.assertIsInstance(topic["img_url"], str) 30 | self.assertIsInstance(topic["link"], str) 31 | 32 | def test_fetch_modules_with_modules(self): 33 | result = self.scraper.fetch_modules(course="Machine Learning with Python") 34 | self.assertIsNotNone(result) 35 | self.assertIsInstance(result, dict) 36 | 37 | if result is not None: 38 | for key, value in result.items(): 39 | self.assertIsInstance(value, str) 40 | 41 | def test_fetch_modules_with_specializations(self): 42 | result = self.scraper.fetch_modules(course="Machine Learning") 43 | self.assertIsNotNone(result) 44 | self.assertIsInstance(result, dict) 45 | 46 | if result is not None: 47 | for key, value in result.items(): 48 | self.assertIsInstance(value, dict) 49 | self.assertIn("Title", value) 50 | self.assertIn("Link", value) 51 | self.assertIsInstance(value["Title"], str) 52 | self.assertIsInstance(value["Link"], str) 53 | 54 | 55 | if __name__ == "__main__": 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /src/test/covidinfo_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.covidinfo import covidinfo 3 | 4 | 5 | class CovidInfoTest(unittest.TestCase): 6 | def setUp(self): 7 | self.instance = covidinfo.CovidInfo() 8 | 9 | """ 10 | CovidInfo module test.\n 11 | | Methods | Details | 12 | | --------------------------- | ---------------------------------------------------------------------------------------------------- | 13 | | `.covid_data()` | Returns the list of all the covid data scraped from the website | 14 | | `.total_cases()` | Returns the count of total covid cases all over the world | 15 | | `.total_deaths()` | Returns the count of deaths covid cases all over the world | 16 | | `.total_recovered()` | Returns the count of recovered covid cases all over the world | 17 | | `.latest_news()` | Return the lastest news of the day | 18 | """ 19 | 20 | def test_covid_data(self): 21 | covid_data_response = self.instance.covid_data() 22 | self.assertIsInstance(covid_data_response, list) 23 | if covid_data_response is not None: 24 | for data in covid_data_response: 25 | self.assertIsInstance(data, dict) 26 | self.assertIn("Country", data) 27 | self.assertIn("Number of Cases", data) 28 | self.assertIn("Deaths", data) 29 | self.assertIn("Continent", data) 30 | self.assertIsInstance(data["Country"], str) 31 | self.assertIsInstance(data["Number of Cases"], int) 32 | self.assertIsInstance(data["Deaths"], int) 33 | self.assertIsInstance(data["Continent"], str) 34 | 35 | def test_total_cases(self): 36 | total_cases_response = self.instance.total_cases() 37 | self.assertIsInstance(total_cases_response, str) 38 | 39 | def test_total_deaths(self): 40 | total_deaths_response = self.instance.total_deaths() 41 | self.assertIsInstance(total_deaths_response, str) 42 | 43 | def test_total_recovered(self): 44 | test_total_response = self.instance.total_recovered() 45 | self.assertIsInstance(test_total_response, dict) 46 | 47 | def test_latest_news(self): 48 | latest_news_response = self.instance.latest_news() 49 | self.assertIsInstance(latest_news_response, (list, type(None))) 50 | if latest_news_response is not None: 51 | for news in latest_news_response: 52 | self.assertIsInstance(news, dict) 53 | self.assertIn("news", news) 54 | self.assertIn("source", news) 55 | self.assertIsInstance(news["news"], str) 56 | self.assertIsInstance(news["source"], str) 57 | 58 | 59 | if __name__ == "__main__": 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /src/test/eazydiner_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import json 3 | from scrape_up.eazydiner import EazyDiner 4 | 5 | 6 | class EazyDinerTest(unittest.TestCase): 7 | """ 8 | EazyDiner class test.\n 9 | | Methods | Details | 10 | | ------------------------ | ---------------------------------------------------------------- | 11 | | `.get_restaurants()` | Tests the get_restaurants() method of the EazyDiner class | 12 | | `.get_breakfast()` | Tests the get_breakfast() method of the EazyDiner class | 13 | | `.get_lunch()` | Tests the get_lunch() method of the EazyDiner class | 14 | | `.get_dinner()` | Tests the get_dinner() method of the EazyDiner class | 15 | | `.dinner_with_discount()`| Tests the dinner_with_discount() method of the EazyDiner class | 16 | | `.get_top10()` | Tests the get_top10() method of the EazyDiner class | 17 | """ 18 | 19 | def assert_response_keys(self, response, expected_keys): 20 | if isinstance(response, str): 21 | response_dict = json.loads(response) 22 | 23 | for key in expected_keys: 24 | self.assertTrue( 25 | key in response_dict, f"Key '{key}' is missing in the response." 26 | ) 27 | 28 | def test_get_restaurants(self): 29 | eazydiner = EazyDiner( 30 | location="Delhi NCR" 31 | ) # Replace with an appropriate location 32 | restaurants = eazydiner.get_restaurants() 33 | self.assertIsInstance(restaurants, str) 34 | self.assert_response_keys(restaurants, ["restaurants"]) 35 | 36 | def test_get_breakfast(self): 37 | eazydiner = EazyDiner( 38 | location="Delhi NCR" 39 | ) # Replace with an appropriate location 40 | breakfast = eazydiner.get_breakfast() 41 | self.assertIsInstance(breakfast, str) 42 | self.assert_response_keys(breakfast, ["restaurants"]) 43 | 44 | def test_get_lunch(self): 45 | eazydiner = EazyDiner( 46 | location="Delhi NCR" 47 | ) # Replace with an appropriate location 48 | lunch = eazydiner.get_lunch() 49 | self.assertIsInstance(lunch, str) 50 | self.assert_response_keys(lunch, ["restaurants"]) 51 | 52 | def test_get_dinner(self): 53 | eazydiner = EazyDiner( 54 | location="Delhi NCR" 55 | ) # Replace with an appropriate location 56 | dinner = eazydiner.get_dinner() 57 | self.assertIsInstance(dinner, str) 58 | self.assert_response_keys(dinner, ["restaurants"]) 59 | 60 | def test_dinner_with_discount(self): 61 | eazydiner = EazyDiner( 62 | location="Delhi NCR" 63 | ) # Replace with an appropriate location 64 | dinner_discount = eazydiner.dinner_with_discount() 65 | self.assertIsInstance(dinner_discount, list) 66 | 67 | def test_get_top10(self): 68 | eazydiner = EazyDiner( 69 | location="Delhi NCR" 70 | ) # Replace with an appropriate location 71 | top10 = eazydiner.get_top10() 72 | self.assertIsInstance(top10, dict) 73 | 74 | 75 | if __name__ == "__main__": 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /src/test/ebay_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.ebay import EBAY 3 | 4 | 5 | class eBayTest(unittest.TestCase): 6 | """ 7 | eBay module test.\n 8 | | Methods | Details | 9 | | ------------------- | ----------------------------------- | 10 | | `spotlights()` | Returns spotlight deals on EBAY. | 11 | | `featured()` | Returns the featured deals on EBAY. | 12 | | `specific_deals()` | Returns the specific deals on EBAY. | 13 | """ 14 | 15 | def setUp(self): 16 | self.instance = EBAY() 17 | 18 | def test_spotlights(self): 19 | spotlights = self.instance.spotlights() 20 | 21 | self.assertIsNotNone(spotlights) 22 | self.assertIsInstance(spotlights, dict) 23 | self.assertEqual( 24 | list(spotlights.keys()), ["Description", "Product", "Price", "Link"] 25 | ) 26 | 27 | for value in spotlights.values(): 28 | self.assertIsInstance(value, str) 29 | 30 | def test_featured(self): 31 | featured = self.instance.featured() 32 | 33 | self.assertIsNotNone(featured) 34 | self.assertIsInstance(featured, list) 35 | 36 | for item in featured: 37 | self.assertIsInstance(item, dict) 38 | self.assertEqual(list(item.keys()), ["Product", "Price", "Link"]) 39 | 40 | for value in item.values(): 41 | self.assertIsInstance(value, str) 42 | 43 | def test_specific_deals(self): 44 | specific_deals = self.instance.specific_deals() 45 | 46 | self.assertIsNotNone(specific_deals) 47 | self.assertIsInstance(specific_deals, list) 48 | 49 | for item in specific_deals: 50 | self.assertIsInstance(item, dict) 51 | self.assertEqual(list(item.keys()), ["Product", "Price", "Link"]) 52 | 53 | for value in item.values(): 54 | self.assertIsInstance(value, str) 55 | 56 | 57 | if __name__ == "__main__": 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /src/test/espncricinfo_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.espncricinfo import Espncricinfo 3 | 4 | 5 | class ESPNTest(unittest.TestCase): 6 | def test_connection(self): 7 | instance = Espncricinfo() 8 | self.assertTrue( 9 | instance, 10 | "ESPN:__init__ - connection failed", 11 | ) 12 | 13 | def test_get_news(self): 14 | instance = Espncricinfo() 15 | method_response = instance.get_news() 16 | 17 | self.assertIsInstance( 18 | method_response, 19 | list, 20 | "ESPN:get_news - invalid response", 21 | ) 22 | 23 | def test_get_livescores(self): 24 | instance = Espncricinfo() 25 | method_response = instance.get_livescores() 26 | 27 | self.assertIsInstance( 28 | method_response, 29 | list, 30 | "ESPN:get_livescores - invalid response", 31 | ) 32 | 33 | 34 | if __name__ == "__main__": 35 | unittest.main() 36 | -------------------------------------------------------------------------------- /src/test/fide_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.fide import FIDE 3 | 4 | 5 | class FIDETest(unittest.TestCase): 6 | """ 7 | Tests for the FIDE class in the fide module. 8 | | Methods | Details | 9 | | ------------------------- | -------------------------------------------------- | 10 | | `.get_events()` | Returns all the major chess events of 2024. | 11 | | `.get_open_ratings()` | Returns a list of top 100 open category players. | 12 | | `.get_women_ratings()` | Returns a list of top 100 women category players. | 13 | | `.get_juniors_ratings()` | Returns a list of top 100 juniors category players.| 14 | | `.get_girls_ratings()` | Returns a list of top 100 girls category players. | 15 | | `.get_news()` | Returns a list of top chess/fide news. | 16 | """ 17 | 18 | def test_connection(self): 19 | instance = FIDE() 20 | self.assertTrue( 21 | instance, 22 | "FIDE:__init__ - connection failed", 23 | ) 24 | 25 | def test_get_events(self): 26 | instance = FIDE() 27 | method_response = instance.get_events() 28 | 29 | self.assertIsInstance( 30 | method_response, 31 | list, 32 | "FIDE:get_events - invalid response", 33 | ) 34 | 35 | def test_get_open_ratings(self): 36 | instance = FIDE() 37 | method_response = instance.get_open_ratings() 38 | 39 | self.assertIsInstance( 40 | method_response, 41 | list, 42 | "FIDE:get_open_ratings - invalid response", 43 | ) 44 | 45 | def test_get_women_ratings(self): 46 | instance = FIDE() 47 | method_response = instance.get_women_ratings() 48 | 49 | self.assertIsInstance( 50 | method_response, 51 | list, 52 | "FIDE:get_women_ratings - invalid response", 53 | ) 54 | 55 | def test_get_juniors_ratings(self): 56 | instance = FIDE() 57 | method_response = instance.get_juniors_ratings() 58 | 59 | self.assertIsInstance( 60 | method_response, 61 | list, 62 | "FIDE:get_juniors_ratings - invalid response", 63 | ) 64 | 65 | def test_get_girls_ratings(self): 66 | instance = FIDE() 67 | method_response = instance.get_girls_ratings() 68 | 69 | self.assertIsInstance( 70 | method_response, 71 | list, 72 | "FIDE:get_girls_ratings - invalid response", 73 | ) 74 | 75 | def test_get_news(self): 76 | instance = FIDE() 77 | method_response = instance.get_news() 78 | 79 | self.assertIsInstance( 80 | method_response, 81 | list, 82 | "FIDE:get_news - invalid response", 83 | ) 84 | 85 | 86 | if __name__ == "__main__": 87 | unittest.main() 88 | -------------------------------------------------------------------------------- /src/test/flexjobs_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from scrape_up.flexjobs import FlexJobs 4 | 5 | 6 | class TestFlexJobs(unittest.TestCase): 7 | def test_get_jobs_with_valid_search_query(self): 8 | flexjobs = FlexJobs("python developer") 9 | jobs = flexjobs.get_jobs() 10 | self.assertTrue(len(jobs) > 0, "No jobs found for valid search query") 11 | 12 | def test_get_jobs_with_location_query(self): 13 | flexjobs = FlexJobs("python developer", "New York") 14 | jobs = flexjobs.get_jobs() 15 | self.assertTrue(len(jobs) > 0, "No jobs found for valid location query") 16 | 17 | def test_get_jobs_with_min_jobs_limit(self): 18 | flexjobs = FlexJobs("python developer", min_jobs=5) 19 | jobs = flexjobs.get_jobs() 20 | self.assertTrue( 21 | len(jobs) >= 5, "Number of jobs retrieved exceeds max jobs limit" 22 | ) 23 | 24 | 25 | if __name__ == "__main__": 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /src/test/geeksforgeeks_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.geeksforgeeks import Geeksforgeeks 3 | import json 4 | 5 | 6 | class GeeksforgeeksTest(unittest.TestCase): 7 | """ 8 | Geeksforgeeks module test. 9 | | Methods | Details | 10 | | ----------------- | ---------------------------------------------------------------------------------- | 11 | | `.get_profile()` | Returns the user data in json format. | 12 | """ 13 | 14 | def test_get_profile(self): 15 | instance = Geeksforgeeks(user="nikhil25803") 16 | method_response = instance.get_profile() 17 | 18 | if isinstance(method_response, str): 19 | try: 20 | method_response = json.loads(method_response) 21 | except json.JSONDecodeError: 22 | self.fail("get_profile should return a dictionary or a JSON string") 23 | 24 | expected_keys = [ 25 | "username", 26 | "collage_name", 27 | "collage_rank", 28 | "overall_coding_score", 29 | "monthly_coding_score", 30 | "languages_used", 31 | "current_potd_streak", 32 | "total_problem_solved", 33 | "campus_ambassader", 34 | ] 35 | 36 | self.assertEqual( 37 | list(method_response.keys()), 38 | expected_keys, 39 | "Geeksforgeeks:get_profile - keys mismatch", 40 | ) 41 | 42 | 43 | if __name__ == "__main__": 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /src/test/github_education_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up import github_education 3 | 4 | 5 | class GitHubEducationTest(unittest.TestCase): 6 | """ 7 | CodeChef module test.\n 8 | | Methods | Details | 9 | | -------------- | ------------------------------------------------------------------------------------------------------------------- | 10 | | `get_events()` | Returns the latest events along with their title, image_url, description, date, location, language, tags, and link. | 11 | """ 12 | 13 | def test_get_events(self): 14 | instance = github_education.Events() 15 | method_response = instance.get_events() 16 | 17 | self.assertIsInstance( 18 | method_response, list, "GitHubEducation:get_events - return type mismatch" 19 | ) 20 | self.assertTrue(all(isinstance(event, dict) for event in method_response)) 21 | 22 | for event in method_response: 23 | self.assertEqual( 24 | list(event.keys()), 25 | [ 26 | "title", 27 | "image_url", 28 | "description", 29 | "date", 30 | "location", 31 | "language", 32 | "tags", 33 | "link", 34 | ], 35 | "GitHubEducation:get_events - keys mismatch", 36 | ) 37 | 38 | 39 | if __name__ == "__main__": 40 | unittest.main() 41 | -------------------------------------------------------------------------------- /src/test/hackerearth_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from scrape_up import hackerearth 4 | 5 | 6 | class HackerEarthTest(unittest.TestCase): 7 | """ 8 | HackerEarth module test.\n 9 | | Methods | Details | 10 | | --------------- | ---------------------------------------------------------------- | 11 | | `get_ongoing()` | Returns the ongoing challenges. | 12 | | `get_upcoming()`| Returns the upcoming challenges. | 13 | | `get_hiring()` | Returns information about ongoing hiring challenges. | 14 | """ 15 | 16 | def setUp(self): 17 | self.instance = hackerearth.challenges.Challenges() 18 | 19 | def test_get_ongoing(self): 20 | ongoing_challenges = self.instance.get_ongoing() 21 | self.assertIsInstance(ongoing_challenges, list) 22 | 23 | if len(ongoing_challenges) > 0: 24 | first_challenge = ongoing_challenges[0] 25 | self.assertIsInstance(first_challenge, dict) 26 | self.assertEqual( 27 | list(first_challenge.keys()), 28 | ["Title", "No of Registrations", "Link"], 29 | "HackerEarth-Challenges:get_ongoing - keys mismatch", 30 | ) 31 | 32 | def test_get_upcoming(self): 33 | upcoming_challenges = self.instance.get_upcoming() 34 | self.assertIsInstance(upcoming_challenges, list) 35 | 36 | if len(upcoming_challenges) > 0: 37 | first_challenge = upcoming_challenges[0] 38 | self.assertIsInstance(first_challenge, dict) 39 | self.assertEqual( 40 | list(first_challenge.keys()), 41 | ["Title", "No of Registrations", "Link"], 42 | "HackerEarth-Challenges:get_upcoming - keys mismatch", 43 | ) 44 | 45 | def test_get_hiring(self): 46 | hiring_challenges = self.instance.get_hiring() 47 | self.assertIsInstance(hiring_challenges, list) 48 | 49 | if len(hiring_challenges) > 0: 50 | first_challenge = hiring_challenges[0] 51 | self.assertIsInstance(first_challenge, dict) 52 | self.assertEqual( 53 | list(first_challenge.keys()), 54 | ["Title", "Description", "Link"], 55 | "HackerEarth-Challenges:get_hiring - keys mismatch", 56 | ) 57 | 58 | 59 | if __name__ == "__main__": 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /src/test/hackerrank_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up import hackerrank 3 | 4 | 5 | class HackerrankTest(unittest.TestCase): 6 | """ 7 | CodeChef module test.\n 8 | | Methods | Details | 9 | | ----------------------------- | ---------------------------------------------------------------- | 10 | | `get_profile(id="username")` | Returns name, username, country, user_type, details, badges, verified_skills, social etc. | 11 | | `get_skills()` | Returns information on active contests like title, status, and link | 12 | | `active_contests()` | Returns a list of verified skills and their links | 13 | | `archived_contests()` | Returns information regarding archived contests | 14 | """ 15 | 16 | def test_get_profile(self): 17 | instance = hackerrank.User() 18 | method_response = instance.get_profile(id="inclinedadarsh") 19 | 20 | self.assertEqual( 21 | list(method_response.keys()), 22 | [ 23 | "name", 24 | "username", 25 | "country", 26 | "user_type", 27 | "details", 28 | "badges", 29 | "verified_skills", 30 | "social", 31 | ], 32 | "Hackerrank:get_profile - keys mismatch", 33 | ) 34 | 35 | def test_get_skills(self): 36 | instance = hackerrank.User() 37 | method_response = instance.get_skills() 38 | 39 | self.assertIsInstance( 40 | method_response, list, "Hackerrank:get_skills - return type mismatch" 41 | ) 42 | self.assertTrue( 43 | all(isinstance(skill, dict) for skill in method_response), 44 | "Hackerrank:get_skills - return type mismatch", 45 | ) 46 | 47 | for skill in method_response: 48 | self.assertIn("Name", skill) 49 | self.assertIn("Link", skill) 50 | 51 | def test_active_contests(self): 52 | instance = hackerrank.Contest() 53 | method_response = instance.active_contests() 54 | 55 | self.assertIsInstance( 56 | method_response, list, "Hackerrank:active_contests - return type mismatch" 57 | ) 58 | self.assertTrue( 59 | all(isinstance(contest, dict) for contest in method_response), 60 | "Hackerrank:active_contests - return type mismatch", 61 | ) 62 | for contest in method_response: 63 | self.assertIn("Title", contest) 64 | self.assertIn("Status", contest) 65 | self.assertIn("Link", contest) 66 | 67 | def test_archived_contests(self): 68 | instance = hackerrank.Contest() 69 | method_response = instance.archived_contests() 70 | 71 | self.assertIsInstance( 72 | method_response, list, "Hackerrank:archived_contests - return type mismatch" 73 | ) 74 | 75 | for contest in method_response: 76 | self.assertIn("title", contest) 77 | 78 | 79 | if __name__ == "__main__": 80 | unittest.main() 81 | -------------------------------------------------------------------------------- /src/test/healthgrades_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from scrape_up.healthgrades import HealthGrades 4 | 5 | 6 | class HealthGradesTest(unittest.TestCase): 7 | """ 8 | HealthGrades module test.\n 9 | | Methods | Details | 10 | | --------------- | ---------------------------------------------------------------- | 11 | | `get_best_hospitals()` | Returns Name, Location, Link, Awards etc. | 12 | """ 13 | 14 | def setUp(self): 15 | """ 16 | setup instance for HealthGrades class 17 | """ 18 | self.instance = HealthGrades() 19 | 20 | def test_get_best_hospitals(self): 21 | """ 22 | Test get_best_hospitals for state 'bihar' 23 | """ 24 | best_hospitals = self.instance.get_best_hospitals("bihar") 25 | first_hospital = best_hospitals[0] 26 | 27 | # assert statements 28 | self.assertIsInstance(best_hospitals, list) 29 | self.assertIsInstance(first_hospital, dict) 30 | self.assertEqual( 31 | list(first_hospital.keys()), 32 | ["Name", "Location", "Link", "Awards"], 33 | "Healthgrades:get_best_hospitals - keys mismatch", 34 | ) 35 | 36 | 37 | if __name__ == "__main__": 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /src/test/icc_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up import icc 3 | 4 | 5 | class ICCTest(unittest.TestCase): 6 | """ 7 | ICC module test.\n 8 | | Method | Details | 9 | | ---------------------------- | ------------------------------------------------------------------- | 10 | | `.team_rankings(format)` | Returns the list of rankings of teams of desired format | 11 | | `.player_ranking(type,format)` | Returns the list of player ranking of desired type and format | 12 | | `.team_rankings_women(format)` | Returns the list of rankings of teams of desired format | 13 | | `.player_ranking_women(type,format)` | Returns the list of player ranking of desired type and format | 14 | """ 15 | 16 | def test_team_rankings(self): 17 | instance = icc.ICC() 18 | response = instance.team_rankings("ODI") 19 | self.assertGreater(len(response), 0, "Team rankings is empty") 20 | self.assertTrue(isinstance(response, list), "Team rankings is not a list") 21 | self.assertTrue( 22 | all( 23 | isinstance(team, dict) and "rank" in team and "team" in team 24 | for team in response 25 | ), 26 | "Incorrect format for team rankings", 27 | ) 28 | 29 | def test_player_ranking(self): 30 | instance = icc.ICC() 31 | response = instance.player_ranking("batting", "TEST") 32 | self.assertGreater(len(response), 0, "Player ranking is empty") 33 | self.assertTrue(isinstance(response, list), "Player ranking is not a list") 34 | self.assertTrue( 35 | all( 36 | isinstance(player, dict) and "rank" in player and "name" in player 37 | for player in response 38 | ), 39 | "Incorrect format for player rankings", 40 | ) 41 | 42 | def test_team_rankings_women(self): 43 | instance = icc.ICC() 44 | response = instance.team_rankings_women("T20") 45 | self.assertGreater(len(response), 0, "Team rankings for women is empty") 46 | self.assertTrue( 47 | isinstance(response, list), "Team rankings for women is not a list" 48 | ) 49 | self.assertTrue( 50 | all( 51 | isinstance(team, dict) and "rank" in team and "team" in team 52 | for team in response 53 | ), 54 | "Incorrect format for team rankings for women", 55 | ) 56 | 57 | def test_player_ranking_women(self): 58 | instance = icc.ICC() 59 | response = instance.player_ranking("bowling", "ODI") 60 | self.assertGreater(len(response), 0, "Player ranking for women is empty") 61 | self.assertTrue( 62 | isinstance(response, list), "Player ranking for women is not a list" 63 | ) 64 | self.assertTrue( 65 | all( 66 | isinstance(player, dict) and "rank" in player and "name" in player 67 | for player in response 68 | ), 69 | "Incorrect format for player rankings for women", 70 | ) 71 | 72 | 73 | if __name__ == "__main__": 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /src/test/librarygenesis_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.librarygenesis import LibGen 3 | 4 | 5 | class TestLibGen(unittest.TestCase): 6 | """ 7 | | Methods | Details | 8 | | --------------| ----------------------------- | 9 | | `.getBooks(book_name=" ")` | Returns the books with name, author, size, format, book link, book cover link, language | 10 | """ 11 | 12 | def setUp(self): 13 | """ 14 | Initialize a LibGen instance before each test method. 15 | """ 16 | self.libgen = LibGen() 17 | 18 | def test_getBooks_empty_name(self): 19 | """ 20 | Test the getBooks() method with an empty book name. 21 | """ 22 | try: 23 | result = self.libgen.getBooks("") 24 | self.assertEqual( 25 | result, 26 | "Error: enter name", 27 | "Expected error message for empty book name", 28 | ) 29 | except: 30 | return None 31 | 32 | def test_getBooks_short_name(self): 33 | """ 34 | Test the getBooks() method with a short book name. 35 | """ 36 | try: 37 | result = self.libgen.getBooks("AI") 38 | self.assertEqual( 39 | result, 40 | "Error: Title Too Short", 41 | "Expected error message for short book name", 42 | ) 43 | except: 44 | return None 45 | 46 | def test_getBooks_valid_name(self): 47 | """ 48 | Test the getBooks() method with a valid book name. 49 | """ 50 | try: 51 | result = self.libgen.getBooks("Python") 52 | self.assertIsInstance(result, list, "Expected a list of books") 53 | if result: # Check if there are books returned 54 | book = result[0] 55 | self.assertIn("name", book, "Book should have a 'name' field") 56 | self.assertIn("author", book, "Book should have an 'author' field") 57 | self.assertIn("size", book, "Book should have a 'size' field") 58 | self.assertIn("format", book, "Book should have a 'format' field") 59 | self.assertIn("link", book, "Book should have a 'link' field") 60 | self.assertIn("language", book, "Book should have a 'language' field") 61 | except: 62 | return None 63 | 64 | def test_getBooks_no_results(self): 65 | """ 66 | Test the getBooks() method with a book name that yields no results. 67 | """ 68 | try: 69 | result = self.libgen.getBooks("somebookthatdoesnotexist") 70 | self.assertEqual( 71 | result, 72 | "Error: no results found", 73 | "Expected error message for no results found", 74 | ) 75 | except: 76 | return None 77 | 78 | 79 | if __name__ == "__main__": 80 | unittest.main() 81 | -------------------------------------------------------------------------------- /src/test/lichess_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.lichess import LichessGames 3 | 4 | 5 | class TestLichessGames(unittest.TestCase): 6 | """ 7 | | Methods | Details | 8 | | ----------------------------- | -------------------------------------------------------------------------- | 9 | | `.fetch_games()` | Fetch all the games data for the specified username. | 10 | """ 11 | 12 | def setUp(self): 13 | """ 14 | Initialize a LichessGames instance before each test method. 15 | """ 16 | self.username = "chess_player" # Example username 17 | self.lichess_scraper = LichessGames(username=self.username) 18 | 19 | def test_fetch_games(self): 20 | """ 21 | Test the fetch_games() method. 22 | """ 23 | try: 24 | games = self.lichess_scraper.fetch_games() 25 | 26 | # Check if games is a list of dictionaries 27 | self.assertIsInstance(games, list) 28 | for game in games: 29 | self.assertIsInstance(game, dict) 30 | self.assertIn("white_player", game) 31 | self.assertIn("black_player", game) 32 | self.assertIn("pgn", game) 33 | 34 | white_player = game["white_player"] 35 | black_player = game["black_player"] 36 | 37 | self.assertIn("username", white_player) 38 | self.assertIn("before_game_score", white_player) 39 | self.assertIn("score_change", white_player) 40 | 41 | self.assertIn("username", black_player) 42 | self.assertIn("before_game_score", black_player) 43 | self.assertIn("score_change", black_player) 44 | except: 45 | return None 46 | 47 | def test_fetch_games_empty(self): 48 | """ 49 | Test fetch_games() method with a username that has no games. 50 | """ 51 | try: 52 | self.lichess_scraper = LichessGames(username="non_existent_user") 53 | games = self.lichess_scraper.fetch_games() 54 | self.assertEqual( 55 | games, [], "Expected an empty list for a non-existent user" 56 | ) 57 | except: 58 | return None 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /src/test/pinterest_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.pinterest import Pinterest 3 | 4 | 5 | class TestPinterest(unittest.TestCase): 6 | def setUp(self): 7 | self.pinterest = Pinterest() 8 | 9 | def test_get_today(self): 10 | today_topics = self.pinterest.get_today() 11 | self.assertIsInstance(today_topics, list, "Expected get_today to return a list") 12 | if today_topics: 13 | for topic in today_topics: 14 | self.assertIn("link", topic) 15 | self.assertIn("title", topic) 16 | self.assertIn("subtitle", topic) 17 | self.assertIn("image", topic) 18 | 19 | def test_get_photo(self): 20 | url = "https://pin.it/1ZhgQA5AG" 21 | photo = self.pinterest.get_photo(url) 22 | if photo: 23 | self.assertIn("alt", photo) 24 | self.assertIn("image", photo) 25 | 26 | def test_search_pins(self): 27 | keyword = "nature" 28 | pins = self.pinterest.search_pins(keyword=keyword) 29 | self.assertIsInstance(pins, list, "Expected search_pins to return a list") 30 | if pins: 31 | for pin in pins: 32 | self.assertIn("link", pin) 33 | self.assertIn("image", pin) 34 | 35 | def test_get_pin_details(self): 36 | pin_url = "https://pin.it/1ZhgQA5AG" 37 | details = self.pinterest.get_pin_details(pin_url) 38 | if details: 39 | self.assertIn("title", details) 40 | self.assertIn("description", details) 41 | self.assertIn("saves", details) 42 | self.assertIn("comments", details) 43 | 44 | 45 | if __name__ == "__main__": 46 | unittest.main() 47 | -------------------------------------------------------------------------------- /src/test/quora_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch 3 | from scrape_up.quora import Quora 4 | 5 | 6 | class TestQuora(unittest.TestCase): 7 | def setUp(self): 8 | self.scrapper = Quora() 9 | 10 | def test_fetch_answers(self): 11 | try: 12 | expected_answers = ["Accepted answer 1", "Suggested answer 1"] 13 | 14 | self.assertEqual( 15 | self.scrapper.fetch_answers("https://www.quora.com/question"), 16 | expected_answers, 17 | ) 18 | except: 19 | return None 20 | 21 | def test_get_by_query(self): 22 | try: 23 | expected_answer = "Suggested answer 1" 24 | 25 | self.assertEqual( 26 | self.scrapper.get_by_query("How-should-I-start-learning-Python-1"), 27 | expected_answer, 28 | ) 29 | except: 30 | return None 31 | 32 | def test_profile_details(self): 33 | try: 34 | expected_profile = { 35 | "name": "Nikhil Raj", 36 | "url": "https://www.quora.com/profile/Nikhil-Raj", 37 | } 38 | 39 | self.assertEqual( 40 | self.scrapper.profile_details("Nikhil Raj"), expected_profile 41 | ) 42 | except: 43 | return None 44 | 45 | 46 | if __name__ == "__main__": 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /src/test/swiggy_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import requests 3 | from unittest.mock import patch 4 | from scrape_up.swiggy import Swiggy 5 | 6 | 7 | class TestSwiggy(unittest.TestCase): 8 | """ 9 | Swiggy module test. 10 | | Methods | Details | 11 | | ------------------------- | ------------------------------------------------------------------------- | 12 | | `get_restraunt_details()` | Returns the restaurant data with name, cuisine, area, rating, offers, etc | 13 | | `get_restaurants()` | Returns the restaurant names as per given city | 14 | """ 15 | 16 | def setUp(self): 17 | self.scrapper = Swiggy() 18 | 19 | def test_get_restraunt_details(self): 20 | try: 21 | expected_data = { 22 | "name": "Pizza Hut", 23 | "cuisine": "Pizzas", 24 | "area": "Karol Bagh", 25 | "rating": "3.7", 26 | "rating_count": "1K+ ratings", 27 | "cost_per_person": "₹350 for two", 28 | "offers": [{"15% OFF UPTO ₹300": "USE CITIFOODIE | ABOVE ₹1200"}], 29 | } 30 | 31 | self.assertEqual( 32 | self.scrapper.get_restraunt_details("https://www.swiggy.com/pizza-hut"), 33 | expected_data, 34 | ) 35 | except: 36 | return None 37 | 38 | def test_get_restaurants(self): 39 | try: 40 | expected_restaurants = [ 41 | { 42 | "Name": "Domino's Pizza", 43 | "Rating": "4.2", 44 | "Cusine": "Pizzas, Italian, Pastas, Desserts", 45 | "Location": "Punjabi Bagh", 46 | "Link": "/restaurant1", 47 | } 48 | ] 49 | 50 | self.assertEqual( 51 | self.scrapper.get_restaurants("Delhi"), expected_restaurants 52 | ) 53 | except: 54 | return None 55 | 56 | 57 | if __name__ == "__main__": 58 | unittest.main() 59 | -------------------------------------------------------------------------------- /src/test/who_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from scrape_up.who import WHO 3 | 4 | 5 | class TestWHO(unittest.TestCase): 6 | def setUp(self): 7 | """ 8 | Initialize a WHO instance before each test method. 9 | """ 10 | self.who_scraper = WHO() 11 | 12 | def test_get_disease_outbreak(self): 13 | """ 14 | | Methods | Details | 15 | | ------------------------------ | ------------------------------------------- | 16 | | `get_disease_outbreak()` | Get Disease Outbreak News from WHO website. | 17 | """ 18 | try: 19 | # Test with a valid number of items (assuming each page contains 10 items) 20 | number_of_items = 10 21 | disease_outbreaks = self.who_scraper.get_disease_outbreak(number_of_items) 22 | 23 | # Check if disease_outbreaks is a list 24 | self.assertIsNotNone(disease_outbreaks, "Failed to fetch disease outbreaks") 25 | self.assertIsInstance( 26 | disease_outbreaks, list, "Disease outbreaks data should be a list" 27 | ) 28 | 29 | if disease_outbreaks: 30 | # Check if each item in the list is a dictionary with the required keys 31 | for outbreak in disease_outbreaks: 32 | self.assertIsInstance( 33 | outbreak, dict, "Each outbreak should be a dictionary" 34 | ) 35 | self.assertIn("Title", outbreak, "Missing expected key: 'Title'") 36 | self.assertIn("Date", outbreak, "Missing expected key: 'Date'") 37 | self.assertIn("Link", outbreak, "Missing expected key: 'Link'") 38 | 39 | # Check if the values are of the correct type 40 | self.assertIsInstance( 41 | outbreak["Title"], str, "'Title' should be a string" 42 | ) 43 | self.assertIsInstance( 44 | outbreak["Date"], str, "'Date' should be a string" 45 | ) 46 | self.assertIsInstance( 47 | outbreak["Link"], str, "'Link' should be a string" 48 | ) 49 | 50 | except: 51 | return None 52 | 53 | def test_invalid_number(self): 54 | """ 55 | Test the get_disease_outbreak() method with an invalid number. 56 | """ 57 | try: 58 | invalid_number = -10 59 | disease_outbreaks = self.who_scraper.get_disease_outbreak(invalid_number) 60 | 61 | # Check if the function handles invalid numbers gracefully 62 | self.assertIsNone( 63 | disease_outbreaks, "Function should return None for invalid input" 64 | ) 65 | except: 66 | return None 67 | 68 | 69 | if __name__ == "__main__": 70 | unittest.main() 71 | -------------------------------------------------------------------------------- /src/test/wuzzuf_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import patch 3 | from scrape_up.wuzzuf import Jobs 4 | import requests 5 | 6 | 7 | class JobsTest(unittest.TestCase): 8 | """ 9 | Jobs module test. 10 | | Methods | Details | 11 | | ------------------- | -------------------------------------------------------------------------------------------- | 12 | | `filter_job()` | Apply filters to the job search using parameters like title, country, city, min/max years of experience. | 13 | | `fetch_jobs()` | Fetch job listings based on the applied filters, with an optional maximum number of pages to scrape. | 14 | """ 15 | 16 | def setUp(self): 17 | self.scraper = Jobs() 18 | 19 | def test_filter_job(self): 20 | self.scraper.filter_job( 21 | title="software engineer", 22 | country="Egypt", 23 | city="Cairo", 24 | min_years_of_experience=2, 25 | max_years_of_experience=5, 26 | ) 27 | expected_url = "https://wuzzuf.net/search/jobs/?q=software+engineer&filters[country][0]=Egypt&filters[city][0]=Cairo&filters[years_of_experience_min][0]=2&filters[years_of_experience_max][0]=5" 28 | self.assertEqual(self.scraper.url, expected_url) 29 | 30 | @patch("requests.get") 31 | def test_fetch_jobs(self, mock_get): 32 | # Mock the get response 33 | mock_response = requests.Response() 34 | mock_response.status_code = 200 35 | mock_response._content = b""" 36 |
37 |

Software Engineer

38 | 39 | Cairo, Egypt 40 |
3 days ago
41 | Full Time 42 | Senior 43 |
44 | """ 45 | mock_get.return_value = mock_response 46 | 47 | jobs = self.scraper.fetch_jobs(max_page_number=1) 48 | expected_job = { 49 | "name": "Software Engineer", 50 | "url": "/job/1", 51 | "company": "Company Name", 52 | "location": "Cairo, Egypt", 53 | "published_time": "3 days ago", 54 | "properties": "Full Time ,Senior", 55 | } 56 | 57 | self.assertEqual(len(jobs), 1) 58 | self.assertEqual(jobs[0], expected_job) 59 | 60 | 61 | if __name__ == "__main__": 62 | unittest.main() 63 | --------------------------------------------------------------------------------