├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug.yaml
    │   ├── documentationupdate.yaml
    │   └── featurerequest.yaml
    ├── pull_request_template.md
    └── workflows
    │   ├── greetings.yaml
    │   └── pr_merged.yml
├── .gitignore
├── .vscode
    └── settings.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── dev-documentation.md
├── docs
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── home.md
    ├── index.md
    ├── installation.md
    └── modules
    │   ├── Finance.md
    │   ├── HackerEarth.md
    │   ├── Hackernews.md
    │   ├── Internshala.md
    │   ├── Twitter.md
    │   ├── academia.md
    │   ├── amazon.md
    │   ├── ask-ubuntu.md
    │   ├── bbc.md
    │   ├── codechef.md
    │   ├── coinmarketcap.md
    │   ├── coursera.md
    │   ├── covid-19.md
    │   ├── crickbuzz.md
    │   ├── devpost.md
    │   ├── dribbble.md
    │   ├── eazydinner.md
    │   ├── ebay.md
    │   ├── espn.md
    │   ├── flexjobs.md
    │   ├── flipkart.md
    │   ├── flipkartclothing.md
    │   ├── flipkartlaptop.md
    │   ├── flyrobu.md
    │   ├── github.md
    │   ├── githubedu.md
    │   ├── gitlab.md
    │   ├── googlenews.md
    │   ├── hackerrank.md
    │   ├── hashnode.md
    │   ├── healthgrade.md
    │   ├── iccranking.md
    │   ├── imdb-actor.md
    │   ├── imdb-boxoffice.md
    │   ├── imdb-celeb.md
    │   ├── imdb-indian.md
    │   ├── imdb-movies.md
    │   ├── imdb.md
    │   ├── instagram.md
    │   ├── installation.md
    │   ├── kooapp.md
    │   ├── leetcode.md
    │   ├── letterboxd.md
    │   ├── luma.md
    │   ├── medium.md
    │   ├── reddit.md
    │   ├── spotify.md
    │   ├── stackoverflow.md
    │   ├── techcrunch.md
    │   ├── wikipedia.md
    │   └── youtube.md
├── documentation.md
├── mkdocs.yml
├── project_setup.sh
├── pyproject.toml
├── requirements.txt
├── setup.cfg
└── src
    ├── scrape_up
        ├── __init__.py
        ├── academia
        │   ├── __init__.py
        │   └── academia.py
        ├── amazon
        │   ├── __init__.py
        │   └── products.py
        ├── ambitionBox
        │   └── company.py
        ├── askubuntu
        │   ├── __init__.py
        │   └── questions.py
        ├── atcoder
        │   ├── __init__.py
        │   └── atcoder.py
        ├── banners
        │   ├── __init__.py
        │   └── scraper88x31.py
        ├── bayt
        │   ├── __init__.py
        │   └── bayt.py
        ├── bbcnews
        │   ├── __init__.py
        │   └── bbcnews.py
        ├── billionaires
        │   └── billionaires.py
        ├── bugmenot
        │   └── bugmenot.py
        ├── cars
        │   ├── __init__.py
        │   └── cars.py
        ├── codechef
        │   ├── __init__.py
        │   └── codechef.py
        ├── codeforces
        │   ├── __init__.py
        │   ├── contests.py
        │   └── user.py
        ├── codewars
        │   ├── __init__.py
        │   └── codewars.py
        ├── coinmarketcap
        │   ├── __init__.py
        │   └── crypto.py
        ├── config
        │   ├── __init__.py
        │   └── request_config.py
        ├── coursera
        │   ├── __init__.py
        │   └── courses.py
        ├── covidinfo
        │   ├── __init__.py
        │   └── covidinfo.py
        ├── cricbuzz
        │   ├── __init__.py
        │   └── cricbuzz.py
        ├── devcommunity
        │   ├── __init__.py
        │   └── articles.py
        ├── devpost
        │   ├── __init__.py
        │   └── devpost.py
        ├── dictionary
        │   ├── __init__.py
        │   └── wordoftheday.py
        ├── dribbble
        │   ├── __init__.py
        │   └── dribbble.py
        ├── eazydiner
        │   ├── __init__.py
        │   └── eazydiner.py
        ├── ebay
        │   ├── __init__.py
        │   └── ebay.py
        ├── espn
        │   ├── __init__.py
        │   └── espnmodule.py
        ├── espncricinfo
        │   ├── __init__.py
        │   └── espncricinfo.py
        ├── fide
        │   ├── __init__.py
        │   └── fide.py
        ├── finance
        │   ├── bse.py
        │   ├── nasdaq.py
        │   ├── nse.py
        │   └── stock_price.py
        ├── flexjobs
        │   ├── __init__.py
        │   └── flexjobs.py
        ├── flipkart
        │   ├── __init__.py
        │   ├── flipkart_clothing.py
        │   ├── flipkart_file.py
        │   └── flipkart_laptop.py
        ├── flyrobu
        │   ├── __init__.py
        │   └── flyrobu.py
        ├── geeksforgeeks
        │   ├── __init__.py
        │   └── geeksforgeeks.py
        ├── github
        │   ├── __init__.py
        │   ├── issue.py
        │   ├── organization.py
        │   ├── pull_request.py
        │   ├── repository.py
        │   └── users.py
        ├── github_education
        │   ├── __init__.py
        │   └── events.py
        ├── googlenews
        │   └── googleNews.py
        ├── hackerearth
        │   ├── __init__.py
        │   └── challenges.py
        ├── hackernews
        │   ├── __init__.py
        │   └── articles.py
        ├── hackerrank
        │   ├── __init__.py
        │   ├── contest.py
        │   └── user.py
        ├── hashnode
        │   ├── __init__.py
        │   └── hashnode.py
        ├── healthgrades
        │   ├── __init__.py
        │   └── healthgradesmodule.py
        ├── icc
        │   ├── __init__.py
        │   └── icc_rankings.py
        ├── imdb
        │   ├── __init__.py
        │   ├── actor.py
        │   ├── box_office.py
        │   ├── celeb.py
        │   ├── imdb.py
        │   ├── indian_movies.py
        │   └── movie.py
        ├── indiantrekking
        │   ├── __init__.py
        │   └── trek.py
        ├── indiatodayweather
        │   ├── __init__.py
        │   └── weather.py
        ├── instagram
        │   └── users.py
        ├── internshala
        │   └── internships.py
        ├── kindle_bookstore
        │   └── kindle.py
        ├── kooapp
        │   └── users.py
        ├── lastfm
        │   ├── __init__.py
        │   └── lastfm.py
        ├── leetcode
        │   └── leetcode_scraper.py
        ├── letterboxd
        │   ├── __init__.py
        │   └── letterboxd.py
        ├── librarygenesis
        │   ├── __init__.py
        │   └── library.py
        ├── lichess
        │   ├── __init__.py
        │   └── lichess.py
        ├── linkedIn
        │   └── linkedInspider.py
        ├── luma
        │   └── events.py
        ├── magicbricks
        │   └── MagicBricks.py
        ├── mediencyclopedia
        │   └── mediencyclopedia.py
        ├── medium
        │   ├── publication.py
        │   ├── trending.py
        │   └── user.py
        ├── moneycontrol
        │   ├── equity_mutual_funds.py
        │   ├── gold.py
        │   ├── index_contribution.py
        │   ├── indian_index.py
        │   ├── silver_prices.py
        │   ├── top_gainers.py
        │   └── top_losers.py
        ├── myanimelist
        │   ├── __init__.py
        │   └── scraper.py
        ├── newscnn
        │   └── newscnn.py
        ├── olympics
        │   └── olympic.py
        ├── pinterest
        │   ├── __init__.py
        │   └── pinterest.py
        ├── quora
        │   ├── __init__.py
        │   └── quora.py
        ├── reddit
        │   └── reddit.py
        ├── robu
        │   └── robu.py
        ├── rottentomatoes
        │   └── rot_tom.py
        ├── spotify
        │   └── spotify_file.py
        ├── stackoverflow
        │   └── questions.py
        ├── steam
        │   ├── __init__.py
        │   └── steamScraper.py
        ├── swiggy
        │   ├── __init__.py
        │   └── swiggy.py
        ├── sysreqlab
        │   ├── __init__.py
        │   ├── find_titles.py
        │   └── requirements.py
        ├── techcrunch
        │   └── techCrunch.py
        ├── thehindu
        │   └── thehindu.py
        ├── timeanddate
        │   ├── city.py
        │   ├── day_in_history.py
        │   ├── extended_forecast.py
        │   ├── fun_holidays.py
        │   ├── time_zones.py
        │   └── utc.py
        ├── timesjobs
        │   └── timesjobs_scraper.py
        ├── tripadvisor
        │   └── TripAdvisor.py
        ├── twitter
        │   └── numidconverter.py
        ├── uci
        │   └── UCI.py
        ├── udemy
        │   └── courses.py
        ├── unsplash
        │   └── unsplash_scraper.py
        ├── who
        │   ├── WHO.py
        │   └── __init__.py
        ├── wikipedia
        │   └── wikipedia.py
        ├── wuzzuf
        │   ├── __init__.py
        │   └── wuzzuf.py
        ├── yahoofinance
        │   └── YahooFinance.py
        ├── yellowpages
        │   ├── __init__.py
        │   └── yellowpages.py
        ├── youtube
        │   ├── channel.py
        │   └── video.py
        └── zomato
        │   ├── __init__.py
        │   └── zomato.py
    └── test
        ├── academia_test.py
        ├── amazon_test.py
        ├── askubuntu_test.py
        ├── atcoder_test.py
        ├── banners_test.py
        ├── bayt_test.py
        ├── bbc_test.py
        ├── codechef_test.py
        ├── coinmarketcap_test.py
        ├── coursera_test.py
        ├── covidinfo_test.py
        ├── cricbuzz_test.py
        ├── devpost_test.py
        ├── dribbble_test.py
        ├── eazydiner_test.py
        ├── ebay_test.py
        ├── espncricinfo_test.py
        ├── fide_test.py
        ├── flexjobs_test.py
        ├── flipkart_test.py
        ├── flyrobu_test.py
        ├── geeksforgeeks_test.py
        ├── github_education_test.py
        ├── github_test.py
        ├── hackerearth_test.py
        ├── hackernews_test.py
        ├── hackerrank_test.py
        ├── healthgrades_test.py
        ├── icc_test.py
        ├── librarygenesis_test.py
        ├── lichess_test.py
        ├── myanimelist.py
        ├── pinterest_test.py
        ├── quora_test.py
        ├── swiggy_test.py
        ├── who_test.py
        ├── wuzzuf_test.py
        └── zomato_test.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [nikhil25803]
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.yaml:
--------------------------------------------------------------------------------
 1 | name: "🐞 Bug Report"
 2 | description: "Create a report to help us improve"
 3 | title: "BUG:"
 4 | labels: [Bug, Needs Triage]
 5 | body:
 6 |   - type: checkboxes
 7 |     attributes:
 8 |       label: "Is there an existing issue for this?"
 9 |       description: "Please search to see if an issue already exists for the bug you encountered."
10 |       options:
11 |         - label: "I have searched the existing issues"
12 |           required: true
13 |   - type: textarea
14 |     attributes:
15 |       label: "What happened?"
16 |       description: "A concise description of what you're experiencing."
17 |     validations:
18 |       required: true
19 |   - type: checkboxes
20 |     attributes:
21 |       label: "Record"
22 |       options:
23 |         - label: "I agree to follow this project's Code of Conduct"
24 |           required: true
25 |         - label: "I'm a GSSoC'24 contributor"
26 |         - label: "I want to work on this issue"
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentationupdate.yaml:
--------------------------------------------------------------------------------
 1 | name: "📑 Documentation Update"
 2 | description: "Improve Documentation"
 3 | title: "DOC:"
 4 | labels: [DOC, Needs Triage]
 5 | body:
 6 |   - type: textarea
 7 |     attributes:
 8 |       label: "What's wrong with the existing documentation"
 9 |       description: "Which things do we need to add or delete"
10 |     validations:
11 |       required: true
12 |   - type: textarea
13 |     attributes:
14 |       label: "Add ScreenShots"
15 |       description: "Add sufficient SS to explain your issue."
16 |     validations:
17 |       required: false
18 | 
19 |   - type: checkboxes
20 |     attributes:
21 |       label: "Record"
22 |       options:
23 |         - label: "I agree to follow this project's Code of Conduct"
24 |           required: true
25 |         - label: "I'm a GSSoC'24 contributor"
26 |         - label: "I want to work on this issue"
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/featurerequest.yaml:
--------------------------------------------------------------------------------
 1 | name: "✨ Feature Request"
 2 | description: "Suggest an idea for this project "
 3 | title: "Feat:"
 4 | labels: [Feat, Needs Triage]
 5 | body:
 6 |   - type: textarea
 7 |     attributes:
 8 |       label: "Describe the feature"
 9 |       description:
10 |     validations:
11 |       required: true
12 |   - type: textarea
13 |     attributes:
14 |       label: "Add ScreenShots"
15 |       description: "Add sufficient SS to explain your issue."
16 |     validations:
17 |       required: true
18 |   - type: checkboxes
19 |     attributes:
20 |       label: "Record"
21 |       options:
22 |         - label: "I agree to follow this project's Code of Conduct"
23 |           required: true
24 |         - label: "I'm a GSSoC'24 contributor"
25 |         - label: "I want to work on this issue"
26 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ## Description
 2 | 
 3 | > The changes made in this pull request should be briefly described.
 4 | 
 5 | ## Resolves: [Issue Number]
 6 | 
 7 | ## Checklist
 8 | 
 9 | > Before submitting this pull request, kindly verify that the ensuing checkpoints have been reached.
10 | 
11 | - [ ] Have you adhered to the repository's defined coding convention rules?
12 | - [ ] Have you updated the 'documentation.md' file with the method/function documentation?
13 | - [ ] Have you sent a message along with the result or response?
14 | - [ ] Have you used the try-catch technique?
15 | - [ ] Has the method/class been added to the documentation (md file)?
16 | 
17 | ## Screenshots
18 | 
19 | > Uploading a screenshot illustrating the approach you developed for validation is required.
20 | 
21 | ## Additional Notes/Comments
22 | 
23 | > Any additional remarks or suggestions concerning this pull request are welcome.
24 | 
25 | ---
26 | 
27 | I certify that I have carried out the relevant checks and provided the requisite screenshot for validation by submitting this pull request.
28 | I appreciate your contribution.
29 | 


--------------------------------------------------------------------------------
/.github/workflows/greetings.yaml:
--------------------------------------------------------------------------------
 1 | name: Greetings
 2 | 
 3 | on: [issues]
 4 | 
 5 | jobs:
 6 |   greeting:
 7 |     runs-on: ubuntu-latest
 8 |     permissions:
 9 |       issues: write
10 |       pull-requests: write
11 |     steps:
12 |     - uses: actions/first-interaction@v1
13 |       with:
14 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
15 |         issue-message: "Hi there! Thanks for opening this issue. We appreciate your contribution to this open-source project. We aim to respond or assign your issue as soon as possible."
16 |         pr-message: "Welcome to Our repository.🎊 Thank you so much for taking the time to point this out."
17 | 


--------------------------------------------------------------------------------
/.github/workflows/pr_merged.yml:
--------------------------------------------------------------------------------
 1 | name: Auto Comment on PR Merged
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     types: [closed]
 6 | 
 7 | permissions:
 8 |   issues: write
 9 |   pull-requests: write
10 | 
11 | jobs:
12 |   comment:
13 |     runs-on: ubuntu-latest
14 |     if: github.event.pull_request.merged == true
15 |     steps:
16 |     - name: Add Comment to Pull Request
17 |       run: |
18 |         COMMENT=$(cat <<EOF
19 |         {
20 |           "body": "Great work @${{ github.event.pull_request.user.login }} !! \nDo not forget to \n+ Share your contribution on socials and tag Clueless 🫂 \n+ Give this repository a star ✨ \n+ Follow Clueless Community on GitHub ✅"
21 |         }
22 |         EOF
23 |         )
24 |         RESPONSE=$(curl -s -o response.json -w "%{http_code}" \
25 |           -X POST \
26 |           -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
27 |           -H "Accept: application/vnd.github.v3+json" \
28 |           https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments \
29 |           -d "$COMMENT")
30 |         cat response.json
31 |         if [ "$RESPONSE" -ne 201 ]; then
32 |           echo "Failed to add comment"
33 |           exit 1
34 |         fi
35 |       env:
36 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | local_test.py
  6 | 
  7 | # C extensions
  8 | *.so
  9 | *.ipynb
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | /.vscode
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 
132 | #PyCharm
133 | .idea/
134 | site/
135 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "liveServer.settings.port": 5501,
3 |   "python.formatting.provider": "none",
4 |   "python.testing.unittestArgs": ["-v", "-s", "./src", "-p", "*_test.py"],
5 |   "python.testing.pytestEnabled": false,
6 |   "python.testing.unittestEnabled": true
7 | }
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Clueless
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: project-setup
2 | project-setup:
3 | 	- ./project_setup.sh
4 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Dependencies
 4 | 
 5 | Use this section to tell people about which versions of your project are
 6 | currently being supported with security updates.
 7 | 
 8 | | Library              | Version     |
 9 | | -------------------- | ----------- |
10 | | `beautifulsoup4`     | 4.11.1      |
11 | | `bs4`                | 0.0.1       |
12 | | `requests`           | 2.28.2      |
13 | | `requests-html`      | 0.10.0      |
14 | | `selenium`           | 4.9.1       |
15 | | `webdriver_manager`  | `latest`    |
16 | 
17 | ## Reporting a Vulnerability
18 | 
19 | Click [here](https://github.com/Clueless-Community/scrape-up/issues/new/choose) to report a new Vulnerability you found in the codebase.
20 | 


--------------------------------------------------------------------------------
/docs/home.md:
--------------------------------------------------------------------------------
 1 | ## Example Usage
 2 | 
 3 | Here's how you can use some of the modules in Scrape Up:
 4 | 
 5 | ```python
 6 | # Import the modules
 7 | from scrape_up import github, githubedu, codechef
 8 | 
 9 | # Scrape data from GitHub
10 | github_data = github.scrape('username')
11 | 
12 | # Scrape data from GitHub Education
13 | githubedu_data = githubedu.scrape('username')
14 | 
15 | # Scrape data from Codechef
16 | codechef_data = codechef.scrape('username')
17 | 
18 | # Print the data
19 | print(github_data)
20 | print(githubedu_data)
21 | print(codechef_data)
22 | ```
23 | 
24 | Replace 'username' with the actual username you want to scrape data for. The scrape function returns the scraped data, which you can then print or use in your application.
25 | 
26 | ## The platforms we cover.
27 | 
28 | We provide modules for a wide range of platforms. Each module allows you to interact with the corresponding platform in a specific way. Click on a platform name to view its module.
29 | 
30 | - [Academia](modules/academia.md)
31 | - [BBC News](modules/bbc.md)
32 | - [Codechef](modules/codechef.md)
33 | - [Coin Market Cap](modules/coinmarketcap.md)
34 | - [Covid Info](modules/covid-19.md)
35 | - [Cricbuzz](modules/crickbuzz.md)
36 | - [GitHub](modules/github.md)
37 | - [GitHub Education](modules/githubedu.md)
38 | - [HackerEarth](modules/HackerEarth.md)
39 | - [Hacker News](modules/Hackernews.md)
40 | - [HackerRank](modules/hackerrank.md)
41 | - [Hashnode](modules/hashnode.md)
42 | - [ICC Rankings](modules/iccranking.md)
43 | 
44 | Remember, you can contribute to the development of these modules or suggest new ones by following our [contribution guidelines](CONTRIBUTING.md).
45 | 
46 | <p align="right">(<a href="#top">Back to top</a>)</p>
47 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | ### Install the package using `pip`:
 2 | 
 3 | ```bash
 4 | pip install scrape-up --upgrade
 5 | ```
 6 | 
 7 | ### Import the required module
 8 | 
 9 | > For example - `GitHub`
10 | 
11 | ```py
12 | # Import the required module
13 | from scrape_up import github
14 | ```
15 | 
16 | ### Instantiate an object with required parameters
17 | 
18 | > Also mentioned in the docstring
19 | 
20 | ```
21 | user = github.Users(username="nikhil25803")
22 | ```
23 | 
24 | ### Call the required method.
25 | 
26 | > For example, to extract the number of followers of a user:
27 | 
28 | ```python
29 | # Call the followers method
30 | followers_count = user.followers()
31 | ```
32 | 


--------------------------------------------------------------------------------
/docs/modules/Finance.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from scrape_up import StockPrice
 3 | ```
 4 | 
 5 | ### Scrape stock data
 6 | 
 7 | First, create an instance of class `StockPrice` with stock name and index name.
 8 | 
 9 | ```python
10 | infosys = StockPrice('infosys','nse')
11 | ```
12 | 
13 | | Methods                                   | Details                                                                                 |
14 | | ----------------------------------------- | --------------------------------------------------------------------------------------- |
15 | | `.get_latest_price()`                     | Returns the latest stock price of the given stock name.                                 |
16 | | `.get_historical_data(from_date,to_date)` | Returns stock price from `from_date` to `to_date` in format (date in format dd-mm-yyyy) |
17 | 
18 | ---
19 | 


--------------------------------------------------------------------------------
/docs/modules/HackerEarth.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import hackerearth
 3 | ```
 4 | 
 5 | Create an object of class `Challenges`
 6 | 
 7 | ```python
 8 | hackerearth = hackerearth.Challenges()
 9 | ```
10 | 
11 | | Methods          | Details                                                |
12 | | ---------------- | ------------------------------------------------------ |
13 | | `get_upcoming()` | Get the details of upcoming challenges on Hackerearth. |
14 | | `get_ongoing()`  | Get the details of ongoing challenges on Hackerearth.  |
15 | | `get_hiring()`   | Get the details of hiring challenges on Hackerearth.   |
16 | 
17 | ---
18 | 


--------------------------------------------------------------------------------
/docs/modules/Hackernews.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```py
 3 | from scrape_up import hacker_news
 4 | ```
 5 | 
 6 | Create an instance of `HackerNews` class.
 7 | 
 8 | ```py
 9 | articles = HackerNews()
10 | ```
11 | 
12 | | Methods            | Details                                                                                                                  |
13 | | ------------------ | ------------------------------------------------------------------------------------------------------------------------ |
14 | | `.articles_list()` | Returns the latest articles along with their score, author, author url, time, comment count and link in JSON format.     |
15 | | `.new_articles()`  | Returns the latest new articles along with their score, author, author url, time, comment count and link in JSON format. |
16 | | `.past_articles()` | Returns the past articles along with their score, author, author url, time, comment count and link in JSON format.       |
17 | | `.ask_articles()`  | Returns the ask articles along with their score, author, author url, time, comment count and link in JSON format.        |
18 | | `.show_articles()` | Returns the show articles along with their score, author, author url, time, comment count and link in JSON format.       |
19 | | `.jobs()`          | Returns the jobs along with their time and link in JSON format.                                                          |
20 | 
21 | ---


--------------------------------------------------------------------------------
/docs/modules/Internshala.md:
--------------------------------------------------------------------------------
 1 | ## Internshala
 2 | 
 3 | Create an object for the 'Internshala' class:
 4 | 
 5 | ```python
 6 | search = Internshala(search_type="machine learning")
 7 | ```
 8 | 
 9 | | Methods                    | Details                                                                        |
10 | | -------------------------- | ------------------------------------------------------------------------------ |
11 | | `.internships()`           | Scrapes and returns a list of dictionaries representing internships.           |
12 | | `.jobs()`                  | Scrapes and returns a list of dictionaries representing jobs.                  |
13 | | `.certification_courses()` | Scrapes and returns a list of dictionaries representing certification courses. |


--------------------------------------------------------------------------------
/docs/modules/Twitter.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from scrape_up import twitter
 3 | ```
 4 | 
 5 | ### Scrape
 6 | 
 7 | First, create an object of class `TwitterScraper`
 8 | 
 9 | ```python
10 | twitter_scraper = TwitterScraper()
11 | ```
12 | 
13 | | Methods                    | Details                                       |
14 | | -------------------------- | --------------------------------------------- |
15 | | `.unametoid(username)`     | Returns the numerical_id on passing username. |
16 | | `.idtouname(numerical_id)` | Returns the username on passing numerical_id. |
17 | 
18 | ---
19 | 


--------------------------------------------------------------------------------
/docs/modules/academia.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import academia
 3 | ```
 4 | 
 5 | Create an instance of `Academia` class
 6 | 
 7 | ```python
 8 | academia = academia.Academia()
 9 | ```
10 | 
11 | | Method                        | Details                                                               |
12 | | ----------------------------- | --------------------------------------------------------------------- |
13 | | `get_research_topics()`       | Fetches and returns research topics.                                  |
14 | | `get_research_papers(search)` | Fetches and returns research papers related to the given search term. |
15 | 
16 | ---


--------------------------------------------------------------------------------
/docs/modules/amazon.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Scrape details about a product
 3 | 
 4 | Create an instance of `Product` class with a `product_name` propertiese.
 5 | 
 6 | ```python
 7 | product = Product(product_name="watch")
 8 | ```
 9 | 
10 | | Methods                  | Details                      |
11 | | ------------------------ | ---------------------------- |
12 | | `.get_product()`         | Returns product data(links). |
13 | | `.get_product_details()` | Returns product detail.      |
14 | | `.get_product_image()`   | Returns product image.       |
15 | | `.customer_review()`     | Returns product review.      |
16 | 
17 | ## Amazon-Kindle Bookstore
18 | 
19 | Create an instance of `Book` class.
20 | 
21 | ```python
22 | books = AmazonKindle()
23 | ```
24 | 
25 | | Methods          | Details                                                |
26 | | ---------------- | ------------------------------------------------------ |
27 | | `.bestsellers()` | Returns the list of best-selling books on AmazonKindle |
28 | | `.topbooks()`    | Returns the list of top books on AmazonKindle          |


--------------------------------------------------------------------------------
/docs/modules/ask-ubuntu.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Scrape questions, views, votes, answer counts, and descriptions from Ask Ubuntu website regarding a topic
 3 | 
 4 | Create an instance of `Questions` class.
 5 | 
 6 | ```python
 7 | questions = Questions("topic")
 8 | ```
 9 | 
10 | | Methods                     | Details                                                                                              |
11 | | --------------------------- | ---------------------------------------------------------------------------------------------------- |
12 | | `.getNewQuestions()`        | Returns the new questions, views, votes, answer counts, and descriptions in JSON format              |
13 | | `.getActiveQuestions()`     | Returns the active questions, views, votes, answer counts, and descriptions in JSON format           |
14 | | `.getUnansweredQuestions()` | Returns the unanswered questions, views, votes, answer counts, and descriptions in JSON format       |
15 | | `.getBountiedQuestions()`   | Returns the bountied questions, views, votes, answer counts, and descriptions in JSON format         |
16 | | `.getFrequentQuestions()`   | Returns the frequently asked questions, views, votes, answer counts, and descriptions in JSON format |
17 | | `.getHighScoredQuestions()` | Returns the most voted questions, views, votes, answer counts, and descriptions in JSON format       |
18 | 
19 | ---
20 | 


--------------------------------------------------------------------------------
/docs/modules/bbc.md:
--------------------------------------------------------------------------------
 1 | from scrape_up import bbcnews
 2 | ```
 3 | 
 4 | First create an object of class `BBCNews`
 5 | 
 6 | ```python
 7 | user = bbcnews.BBCNews()
 8 | ```
 9 | 
10 | | Methods            | Details                                                   |
11 | | ------------------ | --------------------------------------------------------- |
12 | | `.get_headlines()` | Returns the list of objects containing the headlines.     |
13 | | `get_article()`    | Returns an object with proper details about the articles. |
14 | 
15 | ---


--------------------------------------------------------------------------------
/docs/modules/codechef.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```python
 3 | from scrape_up import codechef
 4 | ```
 5 | 
 6 | ### Scrape user details
 7 | 
 8 | Create an object of class `Codechef`
 9 | 
10 | ```python
11 | user1 = codechef.User(id="username")
12 | 
13 | ```
14 | 
15 | | Methods         | Details                                                          |
16 | | --------------- | ---------------------------------------------------------------- |
17 | | `get_profile()` | Returns name, username, profile_image_link, rating, details etc. |
18 | 
19 | ---
20 | 


--------------------------------------------------------------------------------
/docs/modules/coinmarketcap.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import coinmarketcap
 3 | ```
 4 | 
 5 | Create an instance of `Crypto` class
 6 | 
 7 | ```python
 8 | crypto = coinmarketcap.Crypto()
 9 | ```
10 | 
11 | | Method                       | Details                                                  |
12 | | ---------------------------- | -------------------------------------------------------- |
13 | | `get_top_cryptocurrencies()` | Fetches and returns data about the top cryptocurrencies. |
14 | 
15 | ---
16 | 


--------------------------------------------------------------------------------
/docs/modules/coursera.md:
--------------------------------------------------------------------------------
 1 | Create an object of the 'Courses' class:
 2 | 
 3 | ```python
 4 | scraper = Courses(topic="topic")
 5 | ```
 6 | 
 7 | | Methods                                | Details                                                                                    |
 8 | | -------------------------------------- | ------------------------------------------------------------------------------------------ |
 9 | | `.get_courses()`                       | Returns the courses with title, teached by, skills, rating, review count, img url and link |
10 | | `.fetch_modules(course='Course Name')` | Returns the modules associated with the Coursera.                                          |
11 | 


--------------------------------------------------------------------------------
/docs/modules/covid-19.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```py
 3 | from scrape_up import covidinfo
 4 | ```
 5 | 
 6 | Create an instance of the `CovidInfo` class.
 7 | 
 8 | ```python
 9 | response = covidinfo.CovidInfo()
10 | ```
11 | 
12 | | Methods              | Details                                                        |
13 | | -------------------- | -------------------------------------------------------------- |
14 | | `.covid_data()`      | Returns the list of all covid data scraped from the website.   |
15 | | `.total_cases()`     | Returns the count of total covid cases all over the world.     |
16 | | `.total_deaths()`    | Returns the count of deaths covid cases all over the world.    |
17 | | `.total_recovered()` | Returns the count of recovered covid cases all over the world. |
18 | | `.latest_news()`     | Return the latest news of the day.                             |
19 | 
20 | ---


--------------------------------------------------------------------------------
/docs/modules/crickbuzz.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import cricbuzz
 3 | ```
 4 | 
 5 | Create an instance of `Cricubzz` class.
 6 | 
 7 | ```python
 8 | 	cricbuzz = cricbuzz.Cricubzz()
 9 | ```
10 | 
11 | | Methods                      | Details                                                                |
12 | | ---------------------------- | ---------------------------------------------------------------------- |
13 | | `.get_live_matches()`        | Returns a list of live matches from Cricbuzz.                          |
14 | | `.get_recent_matches()`      | Returns a list of recent matches from Cricbuzz.                        |
15 | | `.get_upcoming_matches()`    | Returns a list of upcoming matches from Cricbuzz.                      |
16 | | `.get_series()`              | Returns a dictionary of series in month and year format from Cricbuzz. |
17 | | `.get_series_from_archive()` | Returns a list of series from the archive from Cricbuzz.               |
18 | | `.get_matches_by_day()`      | Returns a dictionary of matches by day from Cricbuzz.                  |
19 | | `.get_series_matches()`      | Returns a list of matches in a series from Cricbuzz.                   |
20 | | `.get_series_stats()`        | Returns a list of stats of players in a series from Cricbuzz.          |
21 | | `.get_teams_list()`          | Returns a list of teams from Cricbuzz.                                 |
22 | | `.get_team_schedule()`       | Returns a list of matches of a team from Cricbuzz.                     |
23 | | `.get_team_players()`        | Returns a list of players of a team from Cricbuzz.                     |
24 | | `.get_team_results()`        | Returns a list of past results of a team from Cricbuzz.                |
25 | | `.get_team_stats()`          | Returns a list of player stats of a team from Cricbuzz.                |
26 | 
27 | ---


--------------------------------------------------------------------------------
/docs/modules/devpost.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Create an instance of `Devpost` class.
 3 | 
 4 | ```python
 5 | posts = Devpost()
 6 | ```
 7 | 
 8 | | Methods             | Details                                                                                                              |
 9 | | ------------------- | -------------------------------------------------------------------------------------------------------------------- |
10 | | `.get_projects()`   | Returns the latest projects along with their decription, like and commment count, image and member details.          |
11 | | `.search(topic)`    | Returns the searched projects along with their decription, like and commment count, image and member details.        |
12 | | `.get_hackathons()` | Returns the latest hackathons along with their title, participants, prizes, deadlines.                               |
13 | | `.get_featured()`   | Returns the latest featured projects along with their decription, like and commment count, image and member details. |
14 | | `.get_winner()`     | Returns the latest winning projects along with their decription, like and commment count, image and member details.  |
15 | 
16 | ---


--------------------------------------------------------------------------------
/docs/modules/dribbble.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import dribbble
 3 | ```
 4 | 
 5 | Create an instance of `Dribbble` class.
 6 | 
 7 | ```python
 8 | shots = dribbble.Dribbble()
 9 | ```
10 | 
11 | | Methods               | Details                                                                                                                        |
12 | | --------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
13 | | `.get_shots()`        | Returns the latest shots along with their title, designer, designer URL, like and view count, and link.                        |
14 | | `.search(topic)`      | Returns the latest shots along with their title, designer, designer URL, like and view count, and link for the searched topic. |
15 | | `.get_animation()`    | Returns the latest animation along with their title, designer, designer URL, like and view count, and link.                    |
16 | | `.get_branding()`     | Returns the latest branding along with their title, designer, designer URL, like and view count, and link.                     |
17 | | `.get_illustration()` | Returns the latest illustration along with their title, designer, designer URL, like and view count, and link.                 |
18 | | `.get_mobile()`       | Returns the latest mobile shots along with their title, designer, designer URL, like and view count, and link.                 |
19 | | `.get_webdesign()`    | Returns the latest web-design shots along with their title, designer, designer URL, like and view count, and link.             |
20 | 
21 | ---
22 | 


--------------------------------------------------------------------------------
/docs/modules/eazydinner.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Create an instance of `EazyDiner` class.
 3 | 
 4 | ```python
 5 | restaurants = EazyDiner(location="city-name")
 6 | ```
 7 | 
 8 | | Methods                   | Details                                                                                          |
 9 | | ------------------------- | ------------------------------------------------------------------------------------------------ |
10 | | `.get_restaurants()`      | Returns the restaurants name, location, rating, cuisine and prices in JSON format.               |
11 | | `.get_breakfast()`        | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Breakfast. |
12 | | `.get_lunch()`            | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Lunch.     |
13 | | `.get_dinner()`           | Returns the restaurants name, location, rating, cuisine and prices in JSON format for Dinner.    |
14 | | `.dinner_with_discount()` | Returns a list of restaurants from the entered location with a 50% offer.                        |
15 | | `.get_top10()`            | Returns a list of the top 10 restaurants from a given city.                                      |
16 | 
17 | ---
18 | 


--------------------------------------------------------------------------------
/docs/modules/ebay.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import ebay
 3 | ```
 4 | 
 5 | Create an instance of `EBAY` class
 6 | 
 7 | ```python
 8 | quora = ebay.eBay()
 9 | ```
10 | 
11 | | Methods             | Details                             |
12 | | ------------------- | ----------------------------------- |
13 | | `.spotlights()`     | Returns spotlight deals on eBay.    |
14 | | `.featured()`       | Returns the featured deals on eBay. |
15 | | `.specific_deals()` | Returns the specific deals on eBay. |
16 | 
17 | ---


--------------------------------------------------------------------------------
/docs/modules/espn.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import espn
 3 | ```
 4 | 
 5 | Create an instance of `ESPN` class
 6 | 
 7 | ```python
 8 | espn = espn.ESPN()
 9 | ```
10 | 
11 | | Method              | Details                                                        |
12 | | ------------------- | -------------------------------------------------------------- |
13 | | `get_scoreboard()`  | Fetches and returns the football scoreboards for a given date. |
14 | | `get_tournaments()` | Fetches and returns information about football tournaments.    |
15 | | `get_teams()`       | Fetches and returns information about football teams.          |
16 | 
17 | ---
18 | 


--------------------------------------------------------------------------------
/docs/modules/flexjobs.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```python
 3 |     flex_jobs = FlexJobs(search_query, location_query, min_jobs)
 4 | ```
 5 | 
 6 | - Attributes
 7 | 
 8 | | Attribute        | Description                                                       |
 9 | | ---------------- | ----------------------------------------------------------------- |
10 | | `search_query`   | The search query to filter job listings.                          |
11 | | `location_query` | The location query to filter job listings (defaults to '').       |
12 | | `min_jobs`       | The maximum number of job listings to retrieve (defaults to 100). |
13 | 
14 | - Methods
15 | 
16 | | Method                                 | Description                                                                                                                               |
17 | | -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
18 | | `get_jobs() -> list`                   | Retrieves job listings from FlexJobs website based on search and location queries. Returns a list of dictionaries containing job details. |
19 | | `scrape_job_info(job_listing) -> dict` | Extracts job details from a job listing HTML element.                                                                                     |
20 | 
21 | ---


--------------------------------------------------------------------------------
/docs/modules/flipkart.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Scrape details of products
 3 | 
 4 | Create an instance of `Flipkart` class.
 5 | 
 6 | ```python
 7 | item = Flipkart()
 8 | ```
 9 | 
10 | | Methods               | Details                                                            |
11 | | --------------------- | ------------------------------------------------------------------ |
12 | | `.TVs()`              | Returns the list of TV sets on flipkart                            |
13 | | `.bestseller_books()` | Returns the list of bestselling books data listed on Flipkart.     |
14 | | `.mobiles()`          | Returns the list of mobile phones under 50K along with their data. |
15 | | `.sport_shoes()`      | Returns the list of trendong sport shoes data.                     |
16 | | `.laptops()`          | Returns the list of laptop from flipkart.                          |
17 | | `.camera()`           | Returns the list of camera from flipkart.                          |
18 | | `.computer()`         | Returns the list of computer from flipkart.                        |
19 | | `.tablets()`          | Returns the list of tablets from flipkart.                         |
20 | | `.cycle()`            | Returns the list of bicycles from flipkart.                        |
21 | | `.printers()`         | Returns the list of printers from flipkart.                        |
22 | | `.monitor()`          | Returns the list of monitors from flipkart.                        |
23 | | `.ac()`               | Returns the list of acs from flipkart.                             |
24 | | `.refrigerator()`     | Returns the list of refrigerators from flipkart.                   |
25 | | `.VRbox()`            | Returns the list of VRbox from flipkart.                           |
26 | | `.Speakers()`         | Returns the list of Speakers from flipkart.                        |
27 | 
28 | ---


--------------------------------------------------------------------------------
/docs/modules/flipkartclothing.md:
--------------------------------------------------------------------------------
 1 | Create an instance of `FlipkartClothing` class.
 2 | 
 3 | ```python
 4 | cloth = flipkart.FlipkartClothing()
 5 | ```
 6 | 
 7 | | Methods                    | Details                                                        |
 8 | | -------------------------- | -------------------------------------------------------------- |
 9 | | `.scrape()`                | Returns the list of t-shirts with other relevant info.         |
10 | | `.range()`                 | Returns the list of t-shirts between a particular price range. |
11 | | `.minrating()`             | Returns the list of t-shirts having a minimum given rating.    |
12 | | `.gendermale()`            | Returns the list of t-shirts which are for males.              |
13 | | `.genderfemale()`          | Returns the list of t-shirts that are there for females.       |
14 | | `.size()`                  | Returns the list of t-shirts having a particular size.         |
15 | | `formal_shirts_for_male()` | It returns those t-shirts which are of a particular size.      |
16 | 
17 | ---


--------------------------------------------------------------------------------
/docs/modules/flipkartlaptop.md:
--------------------------------------------------------------------------------
 1 | Create an instance of `FlipkartLaptops` class.
 2 | 
 3 | ```python
 4 | item = flipkart.FlipkartLaptops()
 5 | ```
 6 | 
 7 | | Methods      | Details                                   |
 8 | | ------------ | ----------------------------------------- |
 9 | | `.laptops()` | Returns the list of laptops with details. |
10 | 
11 | ---
12 | 


--------------------------------------------------------------------------------
/docs/modules/flyrobu.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import flyrobu
 3 | ```
 4 | 
 5 | Create an instance of `Flyrobu` class.
 6 | 
 7 | ```python
 8 | flyrobu = flyrobu.Flyrobu()
 9 | ```
10 | 
11 | | Methods                              | Details                                                                                                        |
12 | | ------------------------------------ | -------------------------------------------------------------------------------------------------------------- |
13 | | `.search(keyword)`                   | Returns the json data of all the details related to search by informing about the total amount of items found. |
14 | | `.get_product_details(product_name)` | Returns the json data of the product details based on the given `product_name`.                                |
15 | 
16 | ---


--------------------------------------------------------------------------------
/docs/modules/githubedu.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```python
 3 | from scrape_up import github_education
 4 | ```
 5 | 
 6 | ### Scrape user details
 7 | 
 8 | Create an instance of the `Events` class.
 9 | 
10 | ```py
11 | events = github_education.Events()
12 | ```
13 | 
14 | | Methods         | Details                                                                                                             |
15 | | --------------- | ------------------------------------------------------------------------------------------------------------------- |
16 | | `.get_events()` | Returns the latest events along with their title, image_url, description, date, location, language, tags, and link. |
17 | 
18 | ---


--------------------------------------------------------------------------------
/docs/modules/googlenews.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Scrape articles with title, descriptions, news source, date and link regarding a topic
 3 | 
 4 | Create an instance of `GoogleNews` class.
 5 | 
 6 | ```python
 7 | articles = GoogleNews()
 8 | ```
 9 | 
10 | | Methods                        | Details                                                                                          |
11 | | ------------------------------ | ------------------------------------------------------------------------------------------------ |
12 | | `.getArticles(topic="github")` | Returns the list of articles with title, descriptions, news source, date and link in JSON format |
13 | | `.top_stories()`               | Returns the list of top stories listed regarding the mentioned topic                             |
14 | | `.timed_aticles(time)`         | Returns the list of top stories listed regarding the mentioned topic and within that time frame  |
15 | | `.bylanguage(lang)`            | Returns the list of top stories listed regarding the mentioned topic in the specified language   |
16 | | `.bynumerofdaysback(number)`   | Returns the list of stories listed by given number of days back from the current day             |
17 | | `.bylocation(countryname)`     | Returns the list of top stories listed of the specified country or geolocation                   |
18 | 
19 | ---


--------------------------------------------------------------------------------
/docs/modules/hackerrank.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import hackerrank
 3 | ```
 4 | 
 5 | ### Scrape user details
 6 | 
 7 | Create an object of class `User`.
 8 | 
 9 | ```python
10 | hackerank = hackerrank.User()
11 | ```
12 | 
13 | | Methods                      | Details                                                                                   |
14 | | ---------------------------- | ----------------------------------------------------------------------------------------- |
15 | | `get_profile(id="username")` | Returns name, username, country, user_type, details, badges, verified_skills, social etc. |
16 | | `get_skills()`               | Returns a list of verified skills and their links.                                        |
17 | 
18 | ### Scrape contest details
19 | 
20 | Create an object of class `Contest`.
21 | 
22 | ```python
23 | hackerank = hackerrank.Contest()
24 | ```
25 | 
26 | | Methods               | Details                                                              |
27 | | --------------------- | -------------------------------------------------------------------- |
28 | | `active_contests()`   | Returns information on active contests like title, status, and link. |
29 | | `archived_contests()` | Returns information regarding archived contests.                     |
30 | 
31 | ---


--------------------------------------------------------------------------------
/docs/modules/hashnode.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Create an instance of `Hashnode` class.
 3 | 
 4 | ```python
 5 | blogs = Hashnode()
 6 | ```
 7 | 
 8 | | Methods           | Details                                                                                               |
 9 | | ----------------- | ----------------------------------------------------------------------------------------------------- |
10 | | `.get_feed()`     | Returns the blogs with title, descriptions, author, read time, like and comment count, date and link  |
11 | | `.get_featured()` | Returns the featured blogs with title, descriptions, author, like and comment count, date and link    |
12 | | `.get_recent()`   | Returns the recent blogs with title, descriptions, author, like and comment count, date and link      |
13 | | `.search(topic)`  | Returns the blogs with title, descriptions, author, like and comment count, date and link for a topic |
14 | 
15 | ---
16 | 


--------------------------------------------------------------------------------
/docs/modules/healthgrade.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import healthgrades
 3 | ```
 4 | 
 5 | Create an instance of `HealthGrades` class
 6 | 
 7 | ```python
 8 | hc = healthgrades.HealthGrades()
 9 | ```
10 | 
11 | | Method                      | Details                                                              |
12 | | --------------------------- | -------------------------------------------------------------------- |
13 | | `get_best_hospitals(state)` | Fetches and returns information about the best hospitals in a state. |
14 | 
15 | ---


--------------------------------------------------------------------------------
/docs/modules/iccranking.md:
--------------------------------------------------------------------------------
 1 | ```py
 2 | from scrape_up import icc
 3 | ```
 4 | 
 5 | Create an instance of `ICC` class.
 6 | 
 7 | ```python
 8 | scraper = icc.ICC()
 9 | ```
10 | 
11 | | Method                               | Details                                                               |
12 | | ------------------------------------ | --------------------------------------------------------------------- |
13 | | `.team_rankings(format)`             | Returns the list of rankings of teams of the desired format.          |
14 | | `.player_ranking(type,format)`       | Returns the list of player rankings of desired type and format.       |
15 | | `.team_rankings_women(format)`       | Returns the list of rankings of teams of the desired format.          |
16 | | `.player_ranking_women(type,format)` | Returns the list of women player rankings of desired type and format. |
17 | 
18 | ---
19 | 


--------------------------------------------------------------------------------
/docs/modules/imdb-actor.md:
--------------------------------------------------------------------------------
 1 | Create an instance of `Actor` class.
 2 | 
 3 | ```python
 4 | actor = imdb.Actor(actor_name)
 5 | ```
 6 | 
 7 | | Methods             | Details                                                  |
 8 | | ------------------- | -------------------------------------------------------- |
 9 | | `.popular_movies()` | Returns the popular movies in which the actor has acted. |
10 | | `.all_movies()`     | Returns all movies acted in, and upcoming movies.        |
11 | | `.awards()`         | Returns the number of awards and nominations.            |
12 | 


--------------------------------------------------------------------------------
/docs/modules/imdb-boxoffice.md:
--------------------------------------------------------------------------------
1 | Create an instance of `BoxOffice` class.
2 | 
3 | ```python
4 | boxoffice = imdb.BoxOffice()
5 | ```
6 | 
7 | | Methods         | Details                                                                         |
8 | | --------------- | ------------------------------------------------------------------------------- |
9 | | `.top_movies()` | Returns the top box office movies, weekend and total gross, and weeks released. |


--------------------------------------------------------------------------------
/docs/modules/imdb-celeb.md:
--------------------------------------------------------------------------------
1 | Create an instance of `Celeb` class.
2 | 
3 | ```python
4 | celeb = imdb.Celeb()
5 | ```
6 | 
7 | | Methods         | Details                                                 |
8 | | --------------- | ------------------------------------------------------- |
9 | | `.top_celebs()` | Returns the name, roles, and famous movie of the celeb. |


--------------------------------------------------------------------------------
/docs/modules/imdb-indian.md:
--------------------------------------------------------------------------------
 1 | Create an instance of `IndianMovies` class.
 2 | 
 3 | ```python
 4 | indianmovies = imdb.IndianMovies()
 5 | ```
 6 | 
 7 | | Methods                | Details                                        |
 8 | | ---------------------- | ---------------------------------------------- |
 9 | | `.top_indian_movies()` | Returns the current list of top Indian movies. |
10 | 


--------------------------------------------------------------------------------
/docs/modules/imdb-movies.md:
--------------------------------------------------------------------------------
 1 | Create an instance of `Movie` class.
 2 | 
 3 | ```python
 4 | movie = imdb.Movie(movie_name)
 5 | ```
 6 | 
 7 | | Methods          | Details                                                   |
 8 | | ---------------- | --------------------------------------------------------- |
 9 | | `.rating()`      | Returns the IMDB rating of the movie.                     |
10 | | `.description()` | Returns the description, cast, and director of the movie. |
11 | | `.more_movies()` | Returns similar movies recommended by IMDB.               |
12 | | `.box_office()`  | Returns budget, gross worldwide collections of the movie. |
13 | 


--------------------------------------------------------------------------------
/docs/modules/imdb.md:
--------------------------------------------------------------------------------
 1 | Create an instance of the `IMDB` class.
 2 | 
 3 | ```python
 4 | scraper = IMDB()
 5 | ```
 6 | 
 7 | | Methods                       | Details                                                        |
 8 | | ----------------------------- | -------------------------------------------------------------- |
 9 | | `.top_rated()`                | Returns the top-rated movies listed on IMDB.                   |
10 | | `.scrape_genre_movies(genre)` | Returns the list of movies related to the genre you mentioned. |
11 | | `.top_rated_shows()`          | Returns the top-rated shows listed on IMDB.                    |
12 | 
13 | Create an instance of `Movie` class.
14 | 
15 | ```python
16 | movie = Movie(movie_name)
17 | ```
18 | 
19 | | Methods          | Details                                                  |
20 | | ---------------- | -------------------------------------------------------- |
21 | | `.rating()`      | Returns the IMDB rating of the movie                     |
22 | | `.description()` | Returns the description, cast and director of the movie  |
23 | | `.more_movies()` | Returns similar movies recommended by IMDB               |
24 | | `.box_office()`  | Returns budget, gross worldwide collections of the movie |
25 | 
26 | Create an instance of `Actor` class.
27 | 
28 | ```python
29 | actor = Actor(actor_name)
30 | ```
31 | 
32 | | Methods             | Details                                                 |
33 | | ------------------- | ------------------------------------------------------- |
34 | | `.popular_movies()` | Returns the popular movies in which the actor has acted |
35 | | `.all_movies()`     | Returns all movies acted in and upcoming movies         |
36 | | `.awards()`         | Returns the number of awards and nominations            |
37 | 
38 | Create an instance of `Celeb` class.
39 | 
40 | ```python
41 | celeb = Celeb()
42 | ```
43 | 
44 | | Methods         | Details                                            |
45 | | --------------- | -------------------------------------------------- |
46 | | `.top_celebs()` | Returns the name, roles, famous movie of the celeb |
47 | 
48 | Create an instance of `IndianMovies` class.
49 | 
50 | ```python
51 | indianmovies = IndianMovies()
52 | ```
53 | 
54 | | Methods                | Details                                       |
55 | | ---------------------- | --------------------------------------------- |
56 | | `.top_indian_movies()` | Returns the current list of top Indian movies |
57 | 
58 | Create an instance of `BoxOffice` class.
59 | 
60 | ```python
61 | boxoffice = BoxOffice()
62 | ```
63 | 
64 | | Methods         | Details                                                                       |
65 | | --------------- | ----------------------------------------------------------------------------- |
66 | | `.top_movies()` | Returns the top box office movies, weekend and total gross and weeks released |
67 | 
68 | ---
69 | 
70 | <p align="right">(<a href="#top">Back to top</a>)</p>
71 | 


--------------------------------------------------------------------------------
/docs/modules/instagram.md:
--------------------------------------------------------------------------------
 1 | ## Instagram
 2 | 
 3 | ```python
 4 | from scrape_up import instagram
 5 | ```
 6 | 
 7 | ### Scrape User details
 8 | 
 9 | First, create an object of the class `User`
10 | 
11 | ```python
12 | user = instagram.User(username="nikhil25803")
13 | ```
14 | 
15 | | Methods           | Details                                    |
16 | | ----------------- | ------------------------------------------ |
17 | | `.user_details()` | Returns the number of followers of a user. |
18 | 


--------------------------------------------------------------------------------
/docs/modules/installation.md:
--------------------------------------------------------------------------------
 1 | # How to use this package? 👀
 2 | 
 3 | - Install the package from `pip`
 4 | 
 5 | ```PowerShell
 6 | pip install scrape-up
 7 | ```
 8 | 
 9 | - Scrape the required information, for example, one wants to extract the number of followers of a user.
10 | 


--------------------------------------------------------------------------------
/docs/modules/kooapp.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```py
 3 | from scrape_up import kooapp
 4 | ```
 5 | 
 6 | ### Scrap up the kooapp user's detail
 7 | 
 8 | Create an instance of `KooUser` class.
 9 | 
10 | ```py
11 | user = kooapp.KooUser('krvishal')
12 | ```
13 | 
14 | | Methods                  | Details                                                      |
15 | | ------------------------ | ------------------------------------------------------------ |
16 | | `.get_name()`            | Returns the name of the user.                                |
17 | | `.get_bio()`             | Returns the bio of the user.                                 |
18 | | `.get_avatar_url()`      | Returns the URL of the first avatar of the user.             |
19 | | `.followers()`           | Returns the number of followers of a user.                   |
20 | | `.following()`           | Returns the number of people the user is following.          |
21 | | `.get_social_profiles()` | Returns all the connected social media profiles of the user. |
22 | | `.get_profession()`      | Returns the title/profession of the user.                    |


--------------------------------------------------------------------------------
/docs/modules/leetcode.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from scrape_up import leetcode
 3 | ```
 4 | 
 5 | ### Scrape user details
 6 | 
 7 | First, create an object of class `LeetCodeScraper`
 8 | 
 9 | ```python
10 | leetcode_scraper = LeetCodeScraper(username="nikhil25803")
11 | ```
12 | 
13 | **User Specific Methods - Require Username**
14 | 
15 | | Methods                       | Details                                                               |
16 | | ----------------------------- | --------------------------------------------------------------------- |
17 | | `.scrape_rank()`              | Used to scrape the rank of a user on LeetCode.                        |
18 | | `.scrape_rating()`            | Used to scrape the rating of a user on LeetCode.                      |
19 | | `.get_problems_solved()`      | Used to scrape total problems solved by a user on LeetCode.           |
20 | | `.get_solved_by_difficulty()` | Used to scrape difficulty wise problems solved by a user on LeetCode. |
21 | | `.get_github_link()`          | Used to scrape github link of a user on LeetCode.                     |
22 | | `.get_linkedin_link()`        | Used to scrape linkedin link of a user on LeetCode.                   |
23 | | `.get_community_stats()`      | Used to scrape community stats of a user on LeetCode.                 |
24 | 
25 | **General Purpose Methods - Does not Require Username**
26 | 
27 | | Methods                                            | Details                                                                                                                                                                                                      |
28 | | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
29 | | `.get_problems(difficulty, tags_list, search_key)` | Used to scrape top problems of LeetCode based on filters. Difficulty is string from ("easy", "medium", "hard"). Tags_list is list of tags. Search_key is string to search. All ther parameters are optional. |
30 | | `.get_contests()`                                  | Used to scrape the upcoming LeetCode Contests details.                                                                                                                                                       |
31 | | `.get_daily_challenge()`                           | Used to scrape LeetCode Daily Challenge details.                                                                                                                                                             |
32 | 


--------------------------------------------------------------------------------
/docs/modules/letterboxd.md:
--------------------------------------------------------------------------------
 1 | Create an instance of `Letterboxd` class.
 2 | 
 3 | ```python
 4 | letterboxd_user = Letterboxd(user="arpy8")
 5 | ```
 6 | 
 7 | | Methods                     | Details                                                                                              |
 8 | | --------------------------- | ---------------------------------------------------------------------------------------------------- |
 9 | | `.films_watched()`          | Returns the numbers of films watched by the user.                                                    |
10 | | `.recent_activity(n)`       | Returns a list of length `n` of the latest activity by the user.                                     |
11 | | `.recent_reviews(n)`        | Returns a list of dictionaries of length `n` with the latest reviews by the user.                    |
12 | | `.get_watchlist(n)`         | Returns a list of length `n` including movies and series watchlisted by the user.                    |
13 | | `.get_followers_count()`    | Returns the number of followers of the user.                                                         |  
14 | | `.get_following_count()`    | Returns the number of following of the user.                                                         |
15 | 
16 | Note: `n` is an integer value which is optional and can be used to limit the number of results returned by the methods.
17 | 
18 | ---


--------------------------------------------------------------------------------
/docs/modules/luma.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Create an instance of `Luma` class.
 3 | 
 4 | ```python
 5 | events = Luma()
 6 | ```
 7 | 
 8 | | Methods         | Details                                                                                    |
 9 | | --------------- | ------------------------------------------------------------------------------------------ |
10 | | `.get_events()` | Returns the latest events along with their organizer, location, image url, price and link. |
11 | 
12 | ---
13 | 


--------------------------------------------------------------------------------
/docs/modules/medium.md:
--------------------------------------------------------------------------------
 1 | ## Medium
 2 | 
 3 | ```python
 4 | from scrape_up import medium
 5 | ```
 6 | 
 7 | ### Scrape user details
 8 | 
 9 | First, create an object of class `User`
10 | 
11 | ```python
12 | user = medium.Users(username="nikhil25803")
13 | ```
14 | 
15 | | Methods           | Details                                  |
16 | | ----------------- | ---------------------------------------- |
17 | | `.get_articles()` | Returns the article titles of the users. |
18 | 
19 | ### Scrape trending articles
20 | 
21 | | Methods           | Details                                    |
22 | | ----------------- | ------------------------------------------ |
23 | | `.get_trending()` | Returns the trending titles of the medium. |
24 | 
25 | ### Scrape publication details
26 | 
27 | First, create an object of class `Publication`
28 | 
29 | ```python
30 | publication = medium.Publication(link="https://....")
31 | ```
32 | 
33 | | Methods           | Details                                              |
34 | | ----------------- | ---------------------------------------------------- |
35 | | `.get_articles()` | Returns a list of articles of the given publication. |
36 | 
37 | ---


--------------------------------------------------------------------------------
/docs/modules/reddit.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Create an instance of `Reddit` class.
 3 | 
 4 | ```python
 5 | posts = Reddit()
 6 | ```
 7 | 
 8 | | Methods          | Details                                                                                                                              |
 9 | | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------ |
10 | | `.getFeed()`     | Returns the posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link      |
11 | | `.get_best()`    | Returns the best posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link |
12 | | `.get_hot()`     | Returns the hot posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link  |
13 | | `.get_top()`     | Returns the top posts with title, descriptions, subreddit, subreddit avatar, time, vote and comment count, image, category and link  |
14 | | `.search(topic)` | Returns the top posts with title, subreddit, subreddit avatar, date, vote and comment count and link for a searched topic            |
15 | 
16 | ---


--------------------------------------------------------------------------------
/docs/modules/spotify.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Scrape up songs
 3 | 
 4 | Create an instance of `Spotify` class.
 5 | 
 6 | ```python
 7 | scraper = Spotify()
 8 | ```
 9 | 
10 | | Methods                      | Details                                                   |
11 | | ---------------------------- | --------------------------------------------------------- |
12 | | `.scrape_songs_by_keyword()` | Returns the list of songs that are related to the keyword |
13 | | `.scrape_homepage()`         | Returns the list of playlists on the homepage             |
14 | | `.close()`                   | To close the chrome tab that is showing results           |
15 | 
16 | ---
17 | 


--------------------------------------------------------------------------------
/docs/modules/stackoverflow.md:
--------------------------------------------------------------------------------
 1 | reate an instance of `StackOverflow` class.
 2 | 
 3 | ```python
 4 | questions = StackOverflow("topic")
 5 | ```
 6 | 
 7 | | Methods                     | Details                                                                                              |
 8 | | --------------------------- | ---------------------------------------------------------------------------------------------------- |
 9 | | `.getNewQuestions()`        | Returns the new questions, views, votes, answer counts, and descriptions in JSON format              |
10 | | `.getActiveQuestions()`     | Returns the active questions, views, votes, answer counts, and descriptions in JSON format           |
11 | | `.getUnansweredQuestions()` | Returns the unanswered questions, views, votes, answer counts, and descriptions in JSON format       |
12 | | `.getBountiedQuestions()`   | Returns the bountied questions, views, votes, answer counts, and descriptions in JSON format         |
13 | | `.getFrequentQuestions()`   | Returns the frequently asked questions, views, votes, answer counts, and descriptions in JSON format |
14 | | `.getHighScoredQuestions()` | Returns the most voted questions, views, votes, answer counts, and descriptions in JSON format       |
15 | 
16 | ---
17 | 


--------------------------------------------------------------------------------
/docs/modules/techcrunch.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Scrape articles with title, descriptions, images, author, date and link
 3 | 
 4 | Create an instance of `TechCrunch` class.
 5 | 
 6 | ```python
 7 | articles = TechCrunch()
 8 | ```
 9 | 
10 | | Methods          | Details                                                                                                                |
11 | | ---------------- | ---------------------------------------------------------------------------------------------------------------------- |
12 | | `.getArticles()` | Returns the articles with title, descriptions, images, author, date and link regarding a category in JSON format       |
13 | | `.search()`      | Returns the searched articles with title, descriptions, images, author, date and link regarding a topic in JSON format |
14 | 
15 | ---


--------------------------------------------------------------------------------
/docs/modules/wikipedia.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from scrape_up import Wikipedia
 3 | ```
 4 | 
 5 | ## Scrape Wikipedia Details
 6 | 
 7 | Create an object of the 'WikipediaScrapper' class:
 8 | 
 9 | ```python
10 | Scraper = WikipediaScraper()
11 | ```
12 | 
13 | | Methods           | Details                                                 |
14 | | ----------------- | ------------------------------------------------------- |
15 | | `.scrape(url)`    | Returns the Scraped Data from Wikipedia                 |
16 | | `.get_featured()` | Returns the featured article for the day from Wikipedia |
17 | 
18 | ---
19 | 


--------------------------------------------------------------------------------
/docs/modules/youtube.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Scrape Video Details
 3 | 
 4 | Create an instance of `Video` class.
 5 | 
 6 | ```python
 7 | video = Video(video_url="video_url")
 8 | ```
 9 | 
10 | | Methods         | Details                   |
11 | | --------------- | ------------------------- |
12 | | `.getDetails()` | Returns the video details |
13 | 
14 | ## Scrape Channel Details
15 | 
16 | Create an instance of `Channel` class.
17 | 
18 | ```python
19 | channel_data = Channel(channel_username="BeABetterDev")
20 | ```
21 | 
22 | | Methods            | Details                                                                |
23 | | ------------------ | ---------------------------------------------------------------------- |
24 | | `.getAbout()`      | Returns the channel details mentioned in the about page of the channel |
25 | | `.getVideos()`     | Returns all the video details in the videos page of the channel        |
26 | | `.get_community()` | Returns all the post details in the community page of the channel      |
27 | 
28 | ---


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: Scrape Up
 2 | site_description: A web-scraping-based python package that enables you to scrape data from various platforms like GitHub, Twitter, Instagram, or any useful website.
 3 | site_url: ""
 4 | plugins:
 5 |   - search
 6 | nav:
 7 | - Scrape Up: index.md
 8 | - Contribution: CONTRIBUTING.md
 9 | - Installation: installation.md
10 | - Guide: home.md
11 | - Modules: 
12 |   - 'Academia': modules/academia.md
13 |   - 'BBC News': modules/bbc.md
14 |   - 'Codechef': modules/codechef.md
15 |   - 'Coin Market Cap': modules/coinmarketcap.md
16 |   - 'Covid-19': modules/covid-19.md
17 |   - 'Crickbuzz': modules/crickbuzz.md
18 |   - 'Dribbble': modules/dribbble.md
19 |   - 'Ebay': modules/ebay.md
20 |   - 'ESPN': modules/espn.md
21 |   - 'Eazydiner': modules/eazydinner.md
22 |   - 'Flipkart': modules/flipkart.md
23 |   - 'Flipkart Clothing': modules/flipkartclothing.md
24 |   - 'Flipkart laptops': modules/flipkartlaptop.md
25 |   - 'Flyrobu': modules/flyrobu.md
26 |   - 'GitHub': modules/github.md
27 |   - 'Github Education': modules/githubedu.md
28 |   - 'Gitlab': modules/gitlab.md
29 |   - 'HackerEarth': modules/HackerEarth.md
30 |   - 'Hackernews': modules/Hackernews.md
31 |   - 'Hashnode': modules/hashnode.md
32 |   - 'Health Grades': modules/healthgrade.md
33 |   - 'ICC Rankings': modules/iccranking.md
34 |   - 'IMDb': modules/imdb.md
35 |   - 'IMDb Box Office': modules/imdb-boxoffice.md
36 |   - 'IMDb Indian Movies': modules/imdb-indian.md
37 |   - 'imdb-actor': modules/imdb-actor.md
38 |   - 'imdb-celebrity': modules/imdb-celeb.md
39 |   - 'imdb-movies': modules/imdb-movies.md
40 |   # - 'Amazon': modules/modules/amazon.md
41 |   # - 'Ask-Ubuntu': modules/ask-ubuntu.md
42 |   # - 'Coursera': modules/coursera.md
43 |   # - 'Devpost': modules/devpost.md
44 |   # - 'Finance': modules/Finance.md
45 |   # - 'Flex Jobs': modules/flexjobs.md
46 |   # - 'Google News': modules/googlenews.md
47 |   # - 'Instagram': modules/instagram.md
48 |   # - 'Internshala': modules/internshala.md
49 |   # - 'Kooapp': modules/kooapp.md
50 |   # - 'Leet Code': modules/leetcode.md
51 |   # - 'Luma': modules/luma.md
52 |   # - 'Medium': modules/medium.md
53 |   # - 'Reddit': modules/reddit.md
54 |   # - 'Spotify': modules/spotify.md
55 |   # - 'Stack Overflow': modules/stackoverflow.md
56 |   # - 'Tech Crunch': modules/techcrunch.md
57 |   # - 'Twitter': modules/Twitter.md
58 |   # - 'Wikipedia': modules/wikipedia.md
59 |   # - 'Youtube': modules/youtube.md- Code of Conduct: CODE_OF_CONDUCT.md
60 | 
61 | theme:
62 |   features:
63 |   - header.autohide
64 |   name: material
65 |   palette:
66 |   - media: '(prefers-color-scheme: dark)'
67 |     scheme: default
68 |     primary: teal
69 |     accent: amber
70 |     toggle:
71 |       icon: material/lightbulb
72 |       name: Switch to dark mode
73 |   - media: '(prefers-color-scheme: dark)'
74 |     scheme: slate
75 |     primary: teal
76 |     accent: amber
77 |     toggle:
78 |       icon: material/lightbulb-outline
79 |       name: Switch to light mode
80 | repo_name: /scrape-up
81 | repo_url: https://github.com/Clueless-Community/scrape-up


--------------------------------------------------------------------------------
/project_setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | pip install --upgrade setuptools wheel
 6 | 
 7 | 
 8 | pip_version=$(pip --version | awk '{print $2}')
 9 | required_version="24.0"
10 | 
11 | version_greater_equal() {
12 |     printf '%s\n%s' "$1" "$2" | sort -C -V
13 | }
14 | 
15 | if ! version_greater_equal "$pip_version" "$required_version"; then
16 |     echo "Upgrading pip from version $pip_version to $required_version"
17 |     pip install --upgrade pip
18 | else
19 |     echo "pip is already at version $pip_version, no need to upgrade."
20 | fi
21 | 
22 | pip install -r requirements.txt
23 | 
24 | pip install .
25 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.11.1
2 | bs4==0.0.1
3 | requests==2.28.2
4 | requests-html==0.10.0
5 | mkdocs==1.6.0
6 | mkdocs-material


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = scrape_up
 3 | version = 1.1.7
 4 | author = Clueless Community
 5 | author_email = official.cluelesscommunity@gmail.com
 6 | description = A web-scraping-based python package that enables you to scrape data from various platforms.
 7 | long_description = file: documentation.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/Clueless-Community/scrape-up
10 | classifiers =
11 |     Programming Language :: Python :: 3
12 |     License :: OSI Approved :: MIT License
13 |     Operating System :: OS Independent
14 | 
15 | [options]
16 | package_dir =
17 |     = src
18 | packages = find:
19 | python_requires = >=3.6
20 | install_requires =
21 |     bs4
22 |     requests
23 |     requests-html
24 |     beautifulsoup4
25 | 
26 | [options.packages.find]
27 | where = src


--------------------------------------------------------------------------------
/src/scrape_up/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Clueless-Community/scrape-up/5a92d9b64d25329035c4afc0ef7f18e49d774997/src/scrape_up/__init__.py


--------------------------------------------------------------------------------
/src/scrape_up/academia/__init__.py:
--------------------------------------------------------------------------------
1 | from .academia import Academia
2 | 
3 | __all__ = ["Academia"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/amazon/__init__.py:
--------------------------------------------------------------------------------
1 | from .products import Product
2 | 
3 | __all__ = ["Product"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/askubuntu/__init__.py:
--------------------------------------------------------------------------------
1 | from .questions import Questions
2 | 
3 | __all__ = ["Questions"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/atcoder/__init__.py:
--------------------------------------------------------------------------------
1 | from .atcoder import Atcoder
2 | 
3 | __all__ = ["Atcoder"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/banners/__init__.py:
--------------------------------------------------------------------------------
1 | from .scraper88x31 import Scraper88x31
2 | 
3 | __all__ = ["Scraper88x31"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/banners/scraper88x31.py:
--------------------------------------------------------------------------------
 1 | import bs4
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class Scraper88x31:
 7 |     """
 8 |     Create an instance of the `Scraper88x31` class.
 9 |     ```python
10 |     scraper = Scraper88x31()
11 |     ```
12 |     | Methods            | Details                                                  |
13 |     | ------------------ | -------------------------------------------------------- |
14 |     | `get_all()`        | Returns the list of all available 88x31 banners          |
15 |     """
16 | 
17 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
18 |         self.urls_to_scrape = [
19 |             "https://cyber.dabamos.de/88x31/index.html",
20 |             "https://cyber.dabamos.de/88x31/index2.html",
21 |             "https://cyber.dabamos.de/88x31/index3.html",
22 |             "https://cyber.dabamos.de/88x31/index4.html",
23 |             "https://cyber.dabamos.de/88x31/index5.html",
24 |         ]
25 |         self.config = config
26 | 
27 |     def get_all(self):
28 |         """
29 |         Class: Scraper88x31
30 |         Returns the list of all available 88x31 banners
31 |         Example:
32 |         ```python
33 |         banners = Scraper88x31()
34 |         result = banners.get_all()
35 |         ```
36 | 
37 |         Returns:
38 |         ```json
39 |         ["https://cyber.dabamos.de/88x31/000010.gif", "https://cyber.dabamos.de/88x31/007button.gif", "..."]
40 |         ```
41 |         """
42 |         img_alt = []
43 |         for url in self.urls_to_scrape:
44 |             try:
45 |                 response = get(url, self.config)
46 |                 response.raise_for_status()
47 |                 source = response.content
48 |                 soup = bs4.BeautifulSoup(source, "lxml")
49 |                 for img_tag in soup.find_all("img"):
50 |                     img_alt.append(
51 |                         "https://cyber.dabamos.de/88x31/" + img_tag.get("alt") + ".gif"
52 |                     )
53 |                 return img_alt
54 |             except:
55 |                 return None
56 | 


--------------------------------------------------------------------------------
/src/scrape_up/bayt/__init__.py:
--------------------------------------------------------------------------------
1 | from .bayt import Jobs
2 | 
3 | __all__ = ["Jobs"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/bayt/bayt.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import requests
 3 | from bs4 import BeautifulSoup
 4 | 
 5 | 
 6 | class Jobs:
 7 |     """
 8 |     Create an instance of the class `Jobs`
 9 |     ```python
10 |     scraper = Jobs()
11 |     jobs_data = scraper.fetch_jobs(query, page)
12 |     ```
13 |     | Methods                       | Details                                                                    |
14 |     | ----------------------------- | -------------------------------------------------------------------------- |
15 |     | `.fetch_jobs(query, page)`    | Fetch job listings data from Bayt.com based on the given query and page.   |
16 |     """
17 | 
18 |     def __init__(self):
19 |         self.base_url = "https://www.bayt.com"
20 | 
21 |     def fetch_jobs(self, query, page=1):
22 |         """
23 |         Fetch job listings data from Bayt.com based on the given query and page.
24 | 
25 |         Parameters:
26 |         - `query`: The job search query.
27 |         - `page` : The page number of the search results (default: 1).
28 | 
29 |         Example:
30 |         ```python
31 |         scraper = Jobs()
32 |         jobs_data = scraper.fetch_jobs("software developer", page=1)
33 |         ```
34 |         """
35 |         try:
36 |             url = f"{self.base_url}/en/international/jobs/{query}-jobs/?page={page}"
37 |             response = requests.get(url)
38 | 
39 |             response.raise_for_status()
40 | 
41 |             soup = BeautifulSoup(response.text, "html.parser")
42 |             job_listings = soup.find_all("li", class_="has-pointer-d")
43 | 
44 |             jobs = []
45 |             for job in job_listings:
46 |                 job_info = self.__extract_job_info(job)
47 |                 if job_info:
48 |                     jobs.append(job_info)
49 |             sys.stdout.reconfigure(encoding="utf-8")
50 |             return jobs
51 |         except Exception:
52 |             return None
53 | 
54 |     def __extract_job_info(self, job):
55 |         """
56 |         Extract job information from a single job listing.
57 |         """
58 |         job_general_information = job.find("h2", class_="jb-title")
59 |         if not job_general_information:
60 |             return None
61 | 
62 |         job_title = self.__extract_job_title(job_general_information)
63 |         job_url = self.__extract_job_url(job_general_information)
64 |         company_name = self.__extract_company_name(job)
65 |         job_location = self.__extract_job_location(job)
66 | 
67 |         return {
68 |             "title": job_title,
69 |             "company": company_name,
70 |             "location": job_location,
71 |             "url": job_url,
72 |         }
73 | 
74 |     def __extract_job_title(self, job_general_information):
75 |         return job_general_information.text.strip()
76 | 
77 |     def __extract_job_url(self, job_general_information):
78 |         return self.base_url + job_general_information.a["href"].strip()
79 | 
80 |     def __extract_company_name(self, job):
81 |         company_name = job.find("b", class_="jb-company")
82 |         if company_name:
83 |             return company_name.text.strip()
84 |         return None
85 | 
86 |     def __extract_job_location(self, job):
87 |         job_location = job.find("span", class_="jb-loc")
88 |         if job_location:
89 |             return job_location.text.strip()
90 |         return None
91 | 


--------------------------------------------------------------------------------
/src/scrape_up/bbcnews/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbcnews import BBCNews
2 | 
3 | __all__ = ["BBCNews"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/bbcnews/bbcnews.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class BBCNews:
 7 |     """
 8 |     First create an object of class `BBCNews`\n
 9 |     ```python
10 |     scraper = BBCNews()
11 |     ```
12 |     | Methods            | Details                                                  |
13 |     | ------------------ | -------------------------------------------------------- |
14 |     | `.get_headlines()` | Returns the list of object containig the headlines       |
15 |     | `get_article()`    | Returns an object with proper details about the articles |
16 |     """
17 | 
18 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
19 |         self.base_url = "https://www.bbc.co.uk"
20 |         self.headlines_url = self.base_url + "/news"
21 |         headers = {
22 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
23 |         }
24 |         self.config = config
25 |         if self.config.headers == {}:
26 |             self.config.set_headers(headers)
27 | 
28 |     def get_headlines(self):
29 |         """
30 |         Fetches the latest headlines from BBC News website.\n
31 |         Returns:
32 |         A list of dictionaries, each containing the index and headline text.
33 |         Example: [{'index': 1, 'headline': 'Headline 1'}, {'index': 2, 'headline': 'Headline 2'}, ...]
34 |         """
35 |         try:
36 |             response = get(self.headlines_url, self.config)
37 |             response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)
38 |         except:
39 |             return None
40 | 
41 |         soup = BeautifulSoup(response.content, "html.parser")
42 |         headlines = soup.find_all("h3", class_="gs-c-promo-heading__title")
43 |         news_set = set()
44 |         news_list = []
45 |         index = 1
46 | 
47 |         for headline in headlines:
48 |             news_text = headline.get_text(strip=True)
49 |             if news_text not in news_set:
50 |                 news_set.add(news_text)
51 |                 news_list.append({"index": index, "headline": news_text})
52 |                 index += 1
53 | 
54 |         return news_list
55 | 
56 |     def get_article(self, url: str):
57 |         """
58 |         Create an instance of the class - `BBCNews`\n
59 |         ```python
60 |         scraper = BBCNews()
61 |         article = scraper.get_article()
62 |         print(article)
63 |         ```
64 |         """
65 |         try:
66 |             response = get(url, self.config).text
67 |             soup = BeautifulSoup(response, "lxml")
68 | 
69 |             main_heading = soup.find("h1", {"id": "main-heading"}).text.strip()
70 |             time = soup.find("time").text.strip()
71 |             text_content = soup.find_all("div", {"data-component": "text-block"})
72 |             Text = ""
73 |             for text in text_content:
74 |                 Text += text.text.strip() + " "
75 |             data = {"main_heading": main_heading, "time": time, "text": Text}
76 |             return data
77 |         except:
78 |             return None
79 | 


--------------------------------------------------------------------------------
/src/scrape_up/cars/__init__.py:
--------------------------------------------------------------------------------
1 | from .cars import Cars
2 | 
3 | __all__ = ["Cars"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/cars/cars.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | import json
 4 | 
 5 | 
 6 | class Cars:
 7 |     """
 8 |     Create an instance of the class `Cars`
 9 |     ```python
10 |     scraper = Cars()
11 |     cars_data = scraper.fetch_cars(model, page)
12 |     ```
13 |     | Methods                    | Details                                              |
14 |     | ---------------------------| ---------------------------------------------------- |
15 |     | `.fetch_cars(model, page)` | Fetch car listings data based on the model and page. |
16 |     """
17 | 
18 |     def __init__(self):
19 |         self.base_url = "https://www.cars.com"
20 | 
21 |     def fetch_cars(self, model, page=1):
22 |         """
23 |         Fetch car listings data based on the model, and page.
24 | 
25 |         Parameters:
26 |         - `model`: The model of the car.
27 |         - `page` : The page number of the search results (default: 1).
28 | 
29 |         Example:
30 |         ```python
31 |         scraper = Cars()
32 |         cars_data = scraper.fetch_cars("Toyota", page=1)
33 |         ```
34 |         """
35 |         try:
36 |             url = f"{self.base_url}/shopping/results/?&keyword={model}&page={page}"
37 |             response = requests.get(url)
38 |             response.raise_for_status()
39 |             soup = BeautifulSoup(response.text, "html.parser")
40 |             car_listings = soup.find_all("div", class_="vehicle-card")
41 | 
42 |             cars = []
43 |             for car in car_listings:
44 |                 car_info = self.__extract_car_info(car)
45 |                 if car_info:
46 |                     cars.append(car_info)
47 |             return cars
48 |         except Exception:
49 |             return None
50 | 
51 |     def __extract_car_info(self, car):
52 |         """
53 |         Extract car information from a single car listing.
54 |         """
55 | 
56 |         car_model = self.__extract_car_model(car)
57 |         car_url = self.__extract_car_url(car)
58 |         dealer_name = self.__extract_dealer_name(car)
59 |         car_price = self.__extract_car_price(car)
60 |         car_discount = self.__extract_car_discount(car)
61 | 
62 |         return {
63 |             "model": car_model,
64 |             "dealer": dealer_name,
65 |             "price": car_price,
66 |             "discount": car_discount,
67 |             "url": car_url,
68 |         }
69 | 
70 |     def __extract_car_model(self, car):
71 |         return car.find("h2", class_="title").text.strip()
72 | 
73 |     def __extract_car_url(self, car):
74 |         return self.base_url + car.find("a")["href"]
75 | 
76 |     def __extract_car_price(self, car):
77 |         car_price = car.find("span", class_="primary-price").text.strip()
78 |         if car_price == "Not Priced":
79 |             return None
80 |         return car_price
81 | 
82 |     def __extract_dealer_name(self, car):
83 |         dealer_name = car.find("div", class_="dealer-name")
84 |         return dealer_name.text.strip() if dealer_name else None
85 | 
86 |     def __extract_car_discount(self, car):
87 |         car_discount = car.find("span", class_="price-drop")
88 |         return car_discount.text.strip() if car_discount else None
89 | 


--------------------------------------------------------------------------------
/src/scrape_up/codechef/__init__.py:
--------------------------------------------------------------------------------
1 | from .codechef import User
2 | 
3 | __all__ = ["User"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/codeforces/__init__.py:
--------------------------------------------------------------------------------
1 | from .user import Users
2 | from .contests import Contest
3 | 
4 | __all__ = ["Users", "Contest"]
5 | 


--------------------------------------------------------------------------------
/src/scrape_up/codewars/__init__.py:
--------------------------------------------------------------------------------
1 | from .codewars import Codewars
2 | 
3 | __all__ = ["Codewars"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/codewars/codewars.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | import json
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class Codewars:
 7 |     """
 8 |     Create an instance of the class `GeeksforGeeks`
 9 |     ```py
10 |     cwars = Codewars(user="agastya463")
11 |     cwars.get_profile()
12 |     ```
13 | 
14 |     | Methods           | Details                                                                            |
15 |     | ----------------- | ---------------------------------------------------------------------------------- |
16 |     | `.get_profile()`  | Returns the user data in json format.                                              |
17 | 
18 | 
19 |     Response:
20 |     ```js
21 |     {
22 |       "Name": "Agastya Kumar Yadav",
23 |       "Clan": "Unknown",
24 |       "Member Since": "May 2024",
25 |       "Last Seen": "May 2024",
26 |       "Profiles": "",
27 |       "Following": "0",
28 |       "Followers": "0",
29 |       "Allies": "0",
30 |       "Rank": "8 kyu",
31 |       "Honor": "3",
32 |       "Total Completed Kata": "1",
33 |       "Total Languages Trained": "1",
34 |       "Highest Trained": "C++ (8 kyu)",
35 |       "Most Recent": "C++",
36 |       "Comments": "0 (0 replies)",
37 |       "Collections": "0",
38 |       "Kumite": "0",
39 |       "Translations": "0 (0 approved)"
40 |     }
41 |     ```
42 |     """
43 | 
44 |     def __init__(self, user: str, *, config: RequestConfig = RequestConfig()):
45 |         self.user = user
46 |         headers = {"User-Agent": "scrapeup"}
47 |         self.config = config
48 |         if self.config.headers == {}:
49 |             self.config.set_headers(headers)
50 | 
51 |     def get_profile(self):
52 |         try:
53 |             url = f"https://www.codewars.com/users/{self.user}"
54 |             response = get(url, self.config)
55 |             soup = BeautifulSoup(response.text, "html.parser")
56 |             d = soup.find_all("div", class_="stat")
57 |             data = {}
58 |             for i in d:
59 |                 k = i.text.split(":")
60 |                 data[k[0]] = k[1]
61 |             return json.dumps(data)
62 |         except Exception:
63 |             return None
64 | 


--------------------------------------------------------------------------------
/src/scrape_up/coinmarketcap/__init__.py:
--------------------------------------------------------------------------------
1 | from .crypto import Crypto
2 | 
3 | __all__ = ["Crypto"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .request_config import RequestConfig, get
2 | 


--------------------------------------------------------------------------------
/src/scrape_up/config/request_config.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Union
 2 | import requests
 3 | 
 4 | 
 5 | class RequestConfig:
 6 |     """
 7 |     A class used to configure requests.
 8 | 
 9 |     Args
10 |     ----
11 |     timeout: int
12 |         The timeout in seconds.
13 |     redirect: bool
14 |         Whether to follow redirects.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         timeout: Union[int, None] = None,
20 |         redirect: bool = False,
21 |         headers: Dict[str, str] = {},
22 |         proxies: Dict[str, str] = {},
23 |     ):
24 |         self._timeout = timeout
25 |         self._redirect = redirect
26 |         self._headers = headers
27 |         self._proxies = proxies
28 | 
29 |     def set_timeout(self, timeout: int):
30 |         self._timeout = timeout
31 | 
32 |     def set_redirect(self, redirect: bool):
33 |         self._redirect = redirect
34 | 
35 |     def set_headers(self, headers: Dict[str, str]):
36 |         self._headers = headers
37 | 
38 |     def set_proxies(self, proxies: Dict[str, str]):
39 |         self._proxies = proxies
40 | 
41 |     @property
42 |     def timeout(self):
43 |         return self._timeout
44 | 
45 |     @property
46 |     def redirect(self):
47 |         return self._redirect
48 | 
49 |     @property
50 |     def headers(self):
51 |         return self._headers
52 | 
53 |     @property
54 |     def proxies(self):
55 |         return self._proxies
56 | 
57 | 
58 | def get(url: str, config: RequestConfig):
59 |     r = requests.get(
60 |         url=url,
61 |         headers=config.headers,
62 |         timeout=config.timeout,
63 |         allow_redirects=config.redirect,
64 |         proxies=config.proxies,
65 |     )
66 |     return r
67 | 


--------------------------------------------------------------------------------
/src/scrape_up/coursera/__init__.py:
--------------------------------------------------------------------------------
1 | from .courses import Coursera
2 | 
3 | __all__ = ["Coursera"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/covidinfo/__init__.py:
--------------------------------------------------------------------------------
1 | from .covidinfo import CovidInfo
2 | 
3 | __all__ = {"CovidInfo"}
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/cricbuzz/__init__.py:
--------------------------------------------------------------------------------
1 | from .cricbuzz import Cricbuzz
2 | 
3 | __all__ = ["Cricbuzz"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/devcommunity/__init__.py:
--------------------------------------------------------------------------------
1 | from .articles import DevCommunity
2 | 
3 | __all__ = ["DevCommunity"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/devpost/__init__.py:
--------------------------------------------------------------------------------
1 | from .devpost import Devpost
2 | 
3 | __all__ = ["Devpost"]


--------------------------------------------------------------------------------
/src/scrape_up/dictionary/__init__.py:
--------------------------------------------------------------------------------
1 | from .wordoftheday import Dictionary
2 | 
3 | __all__ = ["Dictionary"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/dictionary/wordoftheday.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | from urllib.request import urlopen
 3 | 
 4 | from scrape_up.config.request_config import RequestConfig, get
 5 | 
 6 | 
 7 | class Dictionary:
 8 |     """
 9 |     Create an instance of the `Dictionary` class.
10 |     ```python
11 |     scraper = Dictionary()
12 |     ```
13 |     | Methods        | Details                                      |
14 |     | -------------- | -------------------------------------------- |
15 |     | `.get_word_of_the_day()` | Returns word of the day from Dictionary.com. |
16 |     | `.word_of_the_day_definition()` | Returns the definition of the word of the day. |
17 |     """
18 | 
19 |     def __init__(self, *, config: RequestConfig = RequestConfig()) -> None:
20 |         self.config = config
21 | 
22 |     def __get_word_of_the_day_url(self):
23 |         try:
24 |             response = get("https://www.dictionary.com/", self.config)
25 |             soup = BeautifulSoup(response.text, "html.parser")
26 | 
27 |             for anchor in soup("button"):
28 |                 url = anchor.get("data-linkurl", "/")
29 | 
30 |                 if "word-of-the-day" in url:
31 |                     return url
32 | 
33 |         except:
34 |             return None
35 | 
36 |     def __word_of_the_day_definition(self):
37 |         try:
38 |             response = get(self.__get_word_of_the_day_url(), self.config)
39 |             soup = BeautifulSoup(response.text, "html.parser")
40 | 
41 |             for para in soup("p"):
42 |                 if para.string and para.string[0] not in "EG":
43 |                     return para.string
44 |         except:
45 |             return None
46 | 
47 |     def get_word_of_the_day(self):
48 |         """
49 |         Returns a string containing the word of the day.
50 | 
51 |         ```python
52 |         scraper = Dictionary()
53 |         print(scraper.get_word_of_the_day())
54 |         ```
55 | 
56 |         Sample output:
57 |         >> unfalsifiable
58 |         """
59 |         response = {}
60 |         try:
61 |             response["word"] = (
62 |                 self.__get_word_of_the_day_url().split("/")[-2].split("-")[0]
63 |             )
64 |             response["meaning"] = self.__word_of_the_day_definition()
65 |             return response
66 |         except:
67 |             return None
68 | 


--------------------------------------------------------------------------------
/src/scrape_up/dribbble/__init__.py:
--------------------------------------------------------------------------------
1 | from .dribbble import Dribbble
2 | 
3 | __all__ = ["Dribbble"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/eazydiner/__init__.py:
--------------------------------------------------------------------------------
1 | from .eazydiner import EazyDiner
2 | 
3 | __all__ = ["EazyDiner"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/ebay/__init__.py:
--------------------------------------------------------------------------------
1 | from .ebay import EBAY
2 | 
3 | __all__ = ["EBAY"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/espn/__init__.py:
--------------------------------------------------------------------------------
1 | from .espnmodule import ESPN
2 | 
3 | __all__ = ["ESPN"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/espncricinfo/__init__.py:
--------------------------------------------------------------------------------
1 | from .espncricinfo import Espncricinfo
2 | 
3 | __all__ = ["Espncricinfo"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/fide/__init__.py:
--------------------------------------------------------------------------------
1 | from .fide import FIDE
2 | 
3 | __all__ = ["FIDE"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/flexjobs/__init__.py:
--------------------------------------------------------------------------------
1 | from .flexjobs import FlexJobs
2 | 
3 | __all__ = ["FlexJobs"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/flipkart/__init__.py:
--------------------------------------------------------------------------------
1 | from .flipkart_clothing import FlipkartClothing
2 | from .flipkart_file import Flipkart
3 | from .flipkart_laptop import FlipkartLaptops
4 | 
5 | __all__ = ["FlipkartClothing", "Flipkart", "FlipkartLaptops"]
6 | 


--------------------------------------------------------------------------------
/src/scrape_up/flyrobu/__init__.py:
--------------------------------------------------------------------------------
1 | from .flyrobu import Flyrobu
2 | 
3 | __all__ = ["Flyrobu"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/geeksforgeeks/__init__.py:
--------------------------------------------------------------------------------
1 | from .geeksforgeeks import Geeksforgeeks
2 | 
3 | __all__ = ["Geeksforgeeks"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/github/__init__.py:
--------------------------------------------------------------------------------
1 | from .users import Users
2 | from .repository import Repository
3 | from .issue import Issue
4 | from .organization import Organization
5 | from .pull_request import PullRequest
6 | 
7 | 
8 | __all__ = ["Users", "Repository", "Issue", "Organization", "PullRequest"]
9 | 


--------------------------------------------------------------------------------
/src/scrape_up/github_education/__init__.py:
--------------------------------------------------------------------------------
1 | from .events import Events
2 | 
3 | __all__ = ["Events"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/hackerearth/__init__.py:
--------------------------------------------------------------------------------
1 | from .challenges import Challenges
2 | 
3 | __all__ = ["Challenges"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/hackernews/__init__.py:
--------------------------------------------------------------------------------
1 | from .articles import Articles
2 | 
3 | __all__ = ["Articles"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/hackerrank/__init__.py:
--------------------------------------------------------------------------------
1 | from .user import User
2 | from .contest import Contest
3 | 
4 | __all__ = ["HackerRank", "Contest"]
5 | 


--------------------------------------------------------------------------------
/src/scrape_up/hashnode/__init__.py:
--------------------------------------------------------------------------------
1 | from .hashnode import Hashnode
2 | 
3 | __all__ = ["Hashnode"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/healthgrades/__init__.py:
--------------------------------------------------------------------------------
1 | from .healthgradesmodule import HealthGrades
2 | 
3 | __all__ = ["HealthGrades"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/healthgrades/healthgradesmodule.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class HealthGrades:
 7 |     """
 8 |     Create an instance of `HealthGrades` class
 9 | 
10 |     ```python
11 |     hc = HealthGrades()
12 |     ```
13 | 
14 |     | Method                      | Details                                                              |
15 |     | --------------------------- | -------------------------------------------------------------------- |
16 |     | `get_best_hospitals(state)` | Fetches and returns information about the best hospitals in a state. |
17 | 
18 |     """
19 | 
20 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
21 |         headers = {
22 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
23 |         }
24 |         self.config = config
25 |         if self.config.headers == {}:
26 |             self.config.set_headers(headers)
27 | 
28 |     def get_best_hospitals(self, state):
29 |         """
30 |         Fetches and returns information about the best hospitals in a state.\n
31 |         ```python
32 |         hc = HealthGrades()
33 |         hc.get_best_hospitals(state="bihar")
34 |         ```
35 | 
36 |         Example output:
37 |         ```python
38 |         [
39 |             {
40 |                 "Name": "ABC Hospital",
41 |                 "Location": "123 Main St, Philadelphia, PA",
42 |                 "Link": "https://www.healthgrades.com/hospital/abc-hospital",
43 |                 "Awards": ["America's 100 Best Hospitals", "Patient Safety Excellence Award"]
44 |             },
45 |             ...
46 |         ]
47 |         ```
48 |         """
49 |         try:
50 |             state = state.replace(" ", "-")
51 |             url = (
52 |                 f"https://www.healthgrades.com/quality/americas-best-hospitals/{state}"
53 |             )
54 |             html_text = get(url, self.config).text
55 |             soup = BeautifulSoup(html_text, "lxml")
56 | 
57 |             hospitals = []
58 |             container = soup.find("ul", {"class": "quality-results-group"})
59 | 
60 |             for items in container.find_all("div", {"class": "quality-card"}):
61 |                 award = []
62 |                 title = items.find("h3")
63 |                 location = items.find("div", {"class": "location-info"})
64 |                 link = (
65 |                     "https://www.healthgrades.com"
66 |                     + items.find("div", {"class": "hospital-info__hospital-link"}).find(
67 |                         "a", href=True
68 |                     )["href"]
69 |                 )
70 |                 awards = items.find("ul", {"class": "awards-list__quality-award"})
71 |                 for item in awards.find_all("li"):
72 |                     award.append(item.text)
73 |                 data = {
74 |                     "Name": title.text,
75 |                     "Location": location.text,
76 |                     "Link": link,
77 |                     "Awards": award[:-2],
78 |                 }
79 |                 hospitals.append(data)
80 |             return hospitals
81 |         except:
82 |             return None
83 | 


--------------------------------------------------------------------------------
/src/scrape_up/icc/__init__.py:
--------------------------------------------------------------------------------
1 | from .icc_rankings import ICC
2 | 
3 | __all__ = ["ICC"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/imdb/__init__.py:
--------------------------------------------------------------------------------
 1 | from .actor import Actor
 2 | from .box_office import BoxOffice
 3 | from .celeb import Celeb
 4 | from .imdb import IMDB
 5 | from .indian_movies import IndianMovies
 6 | from .movie import Movie
 7 | 
 8 | 
 9 | __all__ = ["Actor", "BoxOffice", "Celeb", "IMDB", "IndianMovies", "Movie"]
10 | 


--------------------------------------------------------------------------------
/src/scrape_up/imdb/box_office.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import Request, urlopen
 2 | from bs4 import BeautifulSoup as soup
 3 | 
 4 | 
 5 | class BoxOffice:
 6 |     """
 7 |     Create an instance of `BoxOffice` class.
 8 |     ```python
 9 |     boxoffice = BoxOffice()
10 |     ```
11 |     | Methods            | Details                                                                       |
12 |     | -------------------|-------------------------------------------------------------------------------|
13 |     | `.top_movies()`    | Returns the top box office movies, weekend and total gross and weeks released |
14 | 
15 |     """
16 | 
17 |     def __init__(self):
18 |         self.__scrape_page()
19 | 
20 |     def __scrape_page(self):
21 |         try:
22 |             url = "https://www.imdb.com/chart/boxoffice/?ref_=hm_cht_sm"
23 |             req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 | 
25 |             webpage = urlopen(req).read()
26 |             self.page_soup = soup(webpage, "html.parser")
27 | 
28 |         except:
29 |             return None
30 | 
31 |     def top_movies(self):
32 |         """
33 |         Create an instance of `BoxOffice` class
34 | 
35 |         ```python
36 |         boxoffice = BoxOffice()
37 |         boxoffice.top_movies()
38 |         ```
39 | 
40 |         Return\n
41 |         ```js
42 |         [
43 |             {
44 |                 "Movie Name": "Barbie",
45 |                 "Weekend Gross": "$53M",
46 |                 "Total Gross": "$459M",
47 |                 "Weeks released": "3"
48 |             },
49 |             ...
50 |         ]
51 | 
52 |         ```
53 |         """
54 |         try:
55 |             x = self.page_soup.find_all("h3", {"class": "ipc-title__text"})
56 |             x = x[1:11]
57 |             movie_names = []
58 | 
59 |             for y in x:
60 |                 movie_names.append(" ".join(y.get_text().split()[1:]))
61 | 
62 |             x = self.page_soup.find_all("li", {"class": "sc-ee64acb1-1 lkUVhM"})
63 |             x = [y.get_text() for y in x]
64 | 
65 |             lis = []
66 | 
67 |             for y in range(0, len(x), 3):
68 |                 dic = {}
69 |                 dic["Movie Name"] = movie_names[y // 3]
70 |                 dic["Weekend Gross"] = x[y].split()[2]
71 |                 dic["Total Gross"] = x[y + 1].split()[2]
72 |                 dic["Weeks released"] = x[y + 2].split()[2]
73 |                 lis.append(dic)
74 | 
75 |             return lis
76 | 
77 |         except:
78 |             return None
79 | 


--------------------------------------------------------------------------------
/src/scrape_up/imdb/celeb.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import Request, urlopen
 2 | from bs4 import BeautifulSoup as soup
 3 | 
 4 | 
 5 | class Celeb:
 6 |     """
 7 |     Create an instance of `Celeb` class.
 8 |     ```python
 9 |     celeb = Celeb()
10 |     ```
11 |     | Methods            | Details                                            |
12 |     | -------------------|----------------------------------------------------|
13 |     | `.top_celebs()`    | Returns the name, roles, famous movie of the celeb |
14 | 
15 |     """
16 | 
17 |     def __init__(self):
18 |         self.__scrape_page()
19 | 
20 |     def __scrape_page(self):
21 |         try:
22 |             url = "https://www.imdb.com/chart/starmeter/?ref_=chtbo_ql_8"
23 |             req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 | 
25 |             webpage = urlopen(req).read()
26 |             self.page_soup = soup(webpage, "html.parser")
27 | 
28 |         except:
29 |             return None
30 | 
31 |     def top_celebs(self):
32 |         """
33 |         Create an instance of `Celeb` class.\n
34 |         ```python
35 |         celeb = Celeb()
36 |         celeb.top_celebs()
37 |         ```
38 |         Return\n
39 |         ```js
40 |         [
41 |             {
42 |                 'Name': 'Paul Reubens',
43 |                 'Roles': ['Actor', 'Writer', 'Director'],
44 |                 'Famous Movie': "Pee-wee's Playhouse"
45 |             },
46 |             ...
47 |         ]
48 |         ```
49 |         """
50 |         try:
51 |             x = self.page_soup.find_all("div", {"class": "sc-89c756a0-4 euZqVD"})
52 |             celeb_list = []
53 |             for y in x:
54 |                 dic = {}
55 |                 dic["Name"] = y.find("h3", {"class": "ipc-title__text"}).get_text()
56 | 
57 |                 lis = []
58 |                 for z in y.find_all(
59 |                     "li", {"class": "ipc-inline-list__item sc-89c756a0-6 jpNWoI"}
60 |                 ):
61 |                     lis.append(z.get_text())
62 | 
63 |                 dic["Roles"] = lis
64 | 
65 |                 dic["Famous Movie"] = y.find(
66 |                     "span", {"class": "sc-1c8554ae-1 cKAFFg"}
67 |                 ).get_text()
68 | 
69 |                 celeb_list.append(dic)
70 | 
71 |             return celeb_list
72 | 
73 |         except:
74 |             return None
75 | 


--------------------------------------------------------------------------------
/src/scrape_up/imdb/indian_movies.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import Request, urlopen
 2 | from bs4 import BeautifulSoup as soup
 3 | 
 4 | 
 5 | class IndianMovies:
 6 |     """
 7 |     Create an instance of `IndianMovies` class.
 8 |     ```python
 9 |     indianmovies = IndianMovies()
10 |     ```
11 |     | Methods                | Details                                       |
12 |     | -----------------------|-----------------------------------------------|
13 |     | `.top_indian_movies()` | Returns the current list of top Indian movies |
14 | 
15 |     """
16 | 
17 |     def __init__(self):
18 |         self.__scrape_page()
19 | 
20 |     def __scrape_page(self):
21 |         try:
22 |             url = "https://www.imdb.com/india/top-rated-indian-movies/?ref_=fea_eds_center-1_india_tr_india250_cta"
23 |             req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 | 
25 |             webpage = urlopen(req).read()
26 |             self.page_soup = soup(webpage, "html.parser")
27 | 
28 |         except:
29 |             return None
30 | 
31 |     def top_movies(self):
32 |         """
33 |         Create an instance of `IndianMovies` class.
34 |         ```python
35 |         indianmovies = IndianMovies()
36 |         movies = indianmovies.top_movies()
37 |         ```
38 | 
39 |         Return\n
40 |         ```js
41 |         [
42 |             'Ramayana: The Legend of Prince Rama',
43 |             'Rocketry: The Nambi Effect',
44 |             'Nayakan',
45 |             'Gol Maal',
46 |             'Anbe Sivam',
47 |             ...
48 |         ]
49 |         ```
50 |         """
51 |         try:
52 |             x = self.page_soup.find_all("span", {"data-testid": "rank-list-item-title"})
53 | 
54 |             lis = []
55 |             for i in range(len(x)):
56 |                 lis.append(x[i].get_text()[len(str(i)) :])
57 | 
58 |             return lis
59 | 
60 |         except:
61 |             return None
62 | 


--------------------------------------------------------------------------------
/src/scrape_up/indiantrekking/__init__.py:
--------------------------------------------------------------------------------
1 | from .trek import Indiantrekking
2 | 
3 | __all__ = ["Indiantrekking"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/indiantrekking/trek.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | import re
 3 | import requests
 4 | 
 5 | 
 6 | class Indiantrekking:
 7 |     """
 8 |     A class to scrape data from Indian trekking
 9 | 
10 |     Create an instance of `Indiantrekking` class
11 | 
12 |     ```python
13 |     trek=Indiantrekking("hidden-lakes-of-kashmir")
14 |     ```
15 | 
16 |     | Method                        | Details                                                                              |
17 |     | ---------------------------   | --------------------------------------------------------------------                 |
18 |     |`destination()`                |  return name of the place.                                                           |
19 |     |'trip_fact()'                  |  returns the trip duration, destination, altitude and the season good for trekking   |
20 |     |'outline_day_to_day_itinerary' |  returns the ouline of the day to day itinerary                                      |
21 |     ---
22 |     """
23 | 
24 |     def __init__(self, place):
25 |         self.place = place
26 |         try:
27 |             url = f"https://www.indiantrekking.com/{self.place}.html"
28 |             response = requests.get(url, headers={"User-Agent": "XY"})
29 |             self.soup = BeautifulSoup(response.content, "lxml")
30 |         except:
31 |             return None
32 | 
33 |     def destination_name(self):
34 |         try:
35 |             place = self.soup.find("div", class_="main-title").text
36 |             return place
37 |         except:
38 |             return None
39 | 
40 |     def trip_fact(self):
41 |         try:
42 |             trip_duration = self.soup.findAll("div", class_="inner-wrap")[0].b.text
43 |             trip_destination = self.soup.findAll("div", class_="inner-wrap")[1].b.text
44 |             trip_season = self.soup.findAll("div", class_="inner-wrap")[3].b.text
45 |             trip_altitude = self.soup.findAll("div", class_="inner-wrap")[4].b.text
46 | 
47 |             tripfact = {
48 |                 "trip_duration": re.sub(" +", " ", trip_duration.strip()),
49 |                 "trip_destination": re.sub(" +", " ", trip_destination.strip()),
50 |                 "trip_season": re.sub(" +", " ", trip_season.strip()),
51 |                 "trip_altitude": re.sub(" +", " ", trip_altitude.strip()),
52 |             }
53 |             return tripfact
54 |         except:
55 |             return None
56 | 
57 |     def outline_day_to_day_itinerary(self):
58 |         try:
59 |             outline = self.soup.find("div", class_="itinerary").text
60 |             return outline
61 |         except:
62 |             return None
63 | 


--------------------------------------------------------------------------------
/src/scrape_up/indiatodayweather/__init__.py:
--------------------------------------------------------------------------------
1 | from weather import Indiatodayweather
2 | 
3 | __all__ = ["Indiatodayweather"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/indiatodayweather/weather.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import datetime as dt
 3 | from bs4 import BeautifulSoup
 4 | 
 5 | 
 6 | class Indiatodayweather:
 7 |     """
 8 |     A class to scrape weather data from Indian today
 9 | 
10 |     Create an instance of `Indiatodayweather` class
11 | 
12 |     ```python
13 |     weather=Indiatodayweather("Mumbai")
14 |     ```
15 | 
16 |     | Method                        | Details                                                                                            |
17 |     | ---------------------------   | ------------------------------------------------------------------------                           |
18 |     |`info_about_weather()`         |  return the temperature, wind speed, description(windy, cloudy, clear) and humidity  of the place. |                                                    |
19 | 
20 |     ---
21 |     """
22 | 
23 |     def __init__(self, place):
24 |         try:
25 |             self.place = place
26 |             url = (
27 |                 "https://www.indiatoday.in/weather/"
28 |                 + self.place
29 |                 + "-weather-forecast-today"
30 |             )
31 |             response = requests.get(url, headers={"User-Agent": "XY"})
32 |             self.soup = BeautifulSoup(response.content, "lxml")
33 | 
34 |         except:
35 |             return None
36 | 
37 |     def info_about_weather(self):
38 |         try:
39 |             temp = self.soup.find("div", class_="wtr_tmp_rhs").text
40 |             humid = self.soup.find("span", class_="wtr_crd_ttl").text + " %"
41 |             description = self.soup.find("span", class_="wtr_tmp_txt").text
42 |             speed = (
43 |                 self.soup.find("div", class_="wtr_wid_sec crd_three")
44 |                 .find("span", class_="wtr_crd_ttl")
45 |                 .text
46 |             ) + " km/h"
47 | 
48 |             weather_info = {
49 |                 "temperature": temp,
50 |                 "humidity": humid,
51 |                 "description": description,
52 |                 "wind_speed": speed,
53 |             }
54 |             return weather_info
55 |         except:
56 |             return None
57 | 


--------------------------------------------------------------------------------
/src/scrape_up/instagram/users.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from selenium.webdriver.common.by import By
 3 | from selenium.webdriver.support.ui import WebDriverWait
 4 | from selenium.webdriver.support import expected_conditions as EC
 5 | from webdriver_manager.firefox import GeckoDriverManager
 6 | from selenium.webdriver.firefox.service import Service
 7 | 
 8 | driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()))
 9 | 
10 | 
11 | class User:
12 |     def __init__(self, username: str):
13 |         self.username = username
14 | 
15 |     def user_details(self):
16 |         """
17 |         user = User(username=" ")
18 |         print(user.user_details())
19 |         """
20 |         try:
21 |             driver.get(f"https://www.instagram.com/{self.username}/")
22 |             wait = WebDriverWait(driver, 180)
23 |             account_details = wait.until(
24 |                 EC.presence_of_all_elements_located(
25 |                     (By.XPATH, '//span[@class="_ac2a"]')
26 |                 )
27 |             )
28 |             return {
29 |                 "Number of Posts:": account_details[0].text,
30 |                 "Number of Followers:": account_details[1].text,
31 |                 "Number of Following:": account_details[2].text,
32 |             }
33 | 
34 |         except Exception as e:
35 |             message = f"{self.username} not found!"
36 |             return {"data": None, "message": message}
37 |         finally:
38 |             driver.quit()
39 | 


--------------------------------------------------------------------------------
/src/scrape_up/lastfm/__init__.py:
--------------------------------------------------------------------------------
1 | from .lastfm import Lastfm
2 | 
3 | __all__ = ["Lastfm"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/letterboxd/__init__.py:
--------------------------------------------------------------------------------
1 | from .letterboxd import Letterboxd
2 | 
3 | __all__ = ["Letterboxd"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/librarygenesis/__init__.py:
--------------------------------------------------------------------------------
1 | from .library import LibGen
2 | 
3 | __all__ = ["LibGen"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/lichess/__init__.py:
--------------------------------------------------------------------------------
1 | from .lichess import LichessGames
2 | 
3 | __all__ = ["LichessGames"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/luma/events.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class Events:
 7 |     """
 8 |     Create an instance of `Events` class.
 9 |     ```py
10 |     events = Events()
11 |     ```
12 |     | Methods            | Details                                                                                                              |
13 |     | ------------------ | -------------------------------------------------------------------------------------------------------------------- |
14 |     | `.get_events()` | Returns the latest events along with their organizer, location, image url, price and link. |
15 |     """
16 | 
17 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
18 |         self.config = config
19 | 
20 |     def get_events(self):
21 |         """
22 |         Class - `Events`
23 |         Example -
24 |         ```python
25 |         luma = Events()
26 |         events = luma.get_events()
27 |         ```
28 |         Return
29 |         ```js
30 |         [
31 |             {
32 |                 'title': 'Brexfast Club',
33 |                 'organizer': 'By Shai Goldman & Alexandra Settlemyer',
34 |                 'location': 'Register to See Location',
35 |                 'img_url': 'https://images.lumacdn.com/cdn-cgi/image/format=auto,fit=cover,dpr=2,quality=75,width=200,height=100/event-covers/gd/45c21ae7-67f6-40c7-8820-1cb57ea14705',
36 |                 'price': 'Sold Out',
37 |                 'link': 'https://lu.ma//nycaug9'
38 |             }
39 |             ...
40 |         ]
41 |         ```
42 |         """
43 |         url = "https://lu.ma/nyc"
44 |         events_data = {"events": []}
45 |         try:
46 |             res = get(url, self.config)
47 |             soup = BeautifulSoup(res.content, "html.parser")
48 |             cards = soup.find_all("div", class_="jsx-3249095655 card-wrapper")
49 | 
50 |             for c in cards:
51 |                 title = c.find("a")["aria-label"]
52 |                 base = c.find_all("div", class_="jsx-3575689807 min-width-0")
53 |                 organizer = base[0].getText()
54 |                 loc = base[1].getText()
55 |                 try:
56 |                     price = c.find("div", class_="jsx-146954525 pill-label").getText()
57 |                 except:
58 |                     price = ""
59 |                 img = c.find(
60 |                     "div", class_="jsx-4068354093 img-aspect-ratio rounded"
61 |                 ).find("img")["src"]
62 |                 link = c.find("a")["href"]
63 |                 events_data["events"].append(
64 |                     {
65 |                         "title": title,
66 |                         "organizer": organizer,
67 |                         "location": loc,
68 |                         "img_url": img,
69 |                         "price": price,
70 |                         "link": "https://lu.ma/" + link,
71 |                     }
72 |                 )
73 |             return events_data["events"]
74 |         except:
75 |             return None
76 | 


--------------------------------------------------------------------------------
/src/scrape_up/medium/publication.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from selenium.webdriver.common.by import By
 3 | from selenium.webdriver.chrome.options import Options
 4 | import time
 5 | 
 6 | 
 7 | options = Options()
 8 | options.add_argument("--headless")
 9 | options.add_argument("--log-level=3")
10 | options.add_experimental_option("excludeSwitches", ["enable-logging"])
11 | driver = webdriver.Chrome(options=options)
12 | 
13 | 
14 | class Publication:
15 |     def __init__(self, link):
16 |         self.link = link
17 | 
18 |     def get_articles(self):
19 |         """
20 |         Class - `Publication`
21 |         Example
22 |         ```python
23 |         publication = medium.Publication("https://towardsdatascience.com")
24 |         articles = publication.get_articles()
25 |         for article in articles:
26 |             print(article) #For better readability/clarity
27 |         ```
28 |         Returns the articles of the publication which are arranged in the form of a list
29 |         """
30 |         try:
31 |             articles = []
32 |             link = self.link
33 |             driver.get(link)
34 |             scroll_pause = 0.5
35 |             # Get scroll height
36 |             last_height = driver.execute_script(
37 |                 "return document.documentElement.scrollHeight"
38 |             )
39 |             run_time, max_run_time = 0, 1
40 |             while True:
41 |                 iteration_start = time.time()
42 |                 # Scroll down to bottom
43 |                 driver.execute_script(
44 |                     "window.scrollTo(0, 1000*document.documentElement.scrollHeight);"
45 |                 )
46 | 
47 |                 # Wait to load page
48 |                 time.sleep(scroll_pause)
49 | 
50 |                 # Calculate new scroll height and compare with last scroll height
51 |                 new_height = driver.execute_script(
52 |                     "return document.documentElement.scrollHeight"
53 |                 )
54 |                 scrolled = new_height != last_height
55 |                 timed_out = run_time >= max_run_time
56 |                 if scrolled:
57 |                     run_time = 0
58 |                     last_height = new_height
59 |                 elif not scrolled and not timed_out:
60 |                     run_time += time.time() - iteration_start
61 |                 elif not scrolled and timed_out:
62 |                     break
63 |             elements = driver.find_elements(By.XPATH, "//h2 | //h3")
64 |             for x in elements:
65 |                 articles.append(x.text)
66 |             return articles
67 |         except:
68 |             return "page/publication not found."
69 | 
70 | 
71 | # publication = Publication("https://pub.towardsai.net")
72 | # articles = publication.get_articles_list()
73 | # for article in articles:
74 | #     print(article)
75 | 


--------------------------------------------------------------------------------
/src/scrape_up/medium/trending.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup as bs
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | headers = {
 6 |     "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
 7 | }  # mimics a browser's request
 8 | 
 9 | 
10 | class Trending:
11 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
12 |         self.config = config
13 | 
14 |     def get_trending(self):
15 |         """
16 |         Class - `Trending`
17 |         Example
18 |         ```python
19 |         trending = Trending.get_trending()
20 |         for trend in trending:
21 |             print(trend) #For better readability/clarity
22 |         ```
23 |         Returns a list of trending titles
24 | 
25 |         """
26 |         try:
27 |             titles = []
28 |             r = get("https://medium.com/", self.config)
29 |             soup = bs(r.text, "html.parser")
30 |             elements = soup.select('h2[class^="by j"]')
31 |             for x in elements:
32 |                 titles.append(x.text)
33 |             return titles
34 | 
35 |         except:
36 |             return {"data": None, "message": "Something went wrong! Try again!"}
37 | 


--------------------------------------------------------------------------------
/src/scrape_up/medium/user.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from selenium.webdriver.common.by import By
 3 | from selenium.webdriver.chrome.options import Options
 4 | import time
 5 | 
 6 | options = Options()
 7 | options.add_argument("--headless")
 8 | options.add_argument("--log-level=3")
 9 | options.add_experimental_option("excludeSwitches", ["enable-logging"])
10 | driver = webdriver.Chrome(options=options)
11 | 
12 | 
13 | class User:
14 |     def __init__(self, username):
15 |         self.username = username
16 | 
17 |     def get_articles(self):
18 |         """
19 |         Class `Users`
20 |         Example:
21 |         ```python
22 |         user = medium.User(username='karthikbhandary2')
23 |         article_titles = user.get_articles()
24 |         for article in article_titles:
25 |             print(article) # For better readability/clarity
26 |         ```
27 |         Returns a list of the titles.
28 |         """
29 |         try:
30 |             titles = []
31 |             username = self.username
32 |             driver.get(f"https://{username}.medium.com")
33 |             scroll_pause = 0.5
34 |             # Get scroll height
35 |             last_height = driver.execute_script(
36 |                 "return document.documentElement.scrollHeight"
37 |             )
38 |             run_time, max_run_time = 0, 1
39 |             while True:
40 |                 iteration_start = time.time()
41 |                 # Scroll down to bottom
42 |                 driver.execute_script(
43 |                     "window.scrollTo(0, 1000*document.documentElement.scrollHeight);"
44 |                 )
45 | 
46 |                 # Wait to load page
47 |                 time.sleep(scroll_pause)
48 | 
49 |                 # Calculate new scroll height and compare with last scroll height
50 |                 new_height = driver.execute_script(
51 |                     "return document.documentElement.scrollHeight"
52 |                 )
53 |                 scrolled = new_height != last_height
54 |                 timed_out = run_time >= max_run_time
55 |                 if scrolled:
56 |                     run_time = 0
57 |                     last_height = new_height
58 |                 elif not scrolled and not timed_out:
59 |                     run_time += time.time() - iteration_start
60 |                 elif not scrolled and timed_out:
61 |                     break
62 |             elements = driver.find_elements(By.CSS_SELECTOR, "h2")
63 |             for x in elements:
64 |                 titles.append(x.text)
65 |             return titles
66 | 
67 |         except:
68 |             return f"{username} not found."
69 | 


--------------------------------------------------------------------------------
/src/scrape_up/moneycontrol/equity_mutual_funds.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import Request, urlopen
 2 | from bs4 import BeautifulSoup as soup
 3 | 
 4 | 
 5 | class EquityMutualFunds:
 6 |     """
 7 |     Create an instance of `EquityMutualFunds` class.
 8 |     ```python
 9 |     equitymutualfunds = EquityMutualFunds()
10 |     ```
11 |     | Methods                  | Details                                         |
12 |     | -------------------------|-------------------------------------------------|
13 |     | `.historical_returns`    | Returns mutual funds based on historic returns  |
14 | 
15 |     """
16 | 
17 |     def __init__(self):
18 |         self.__scrape_page()
19 | 
20 |     def __scrape_page(self):
21 |         try:
22 |             url = "https://www.moneycontrol.com/mutual-funds/best-funds/equity.html"
23 |             req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 | 
25 |             webpage = urlopen(req).read()
26 |             self.page_soup = soup(webpage, "html.parser")
27 | 
28 |         except:
29 |             return None
30 | 
31 |     def historical_returns(self):
32 |         """
33 |         Create an instance of `EquityMutualFunds` class.
34 | 
35 |         ```python
36 |         equitymutualfunds = EquityMutualFunds()
37 |         equitymutualfunds.historical_returns()
38 | 
39 |         ```
40 |         Return\n
41 |         ```js
42 |         [
43 |             'Motilal Oswal Midcap Fund',
44 |             'Quant Small Cap Fund',
45 |             'UTI Flexi Cap Fund',
46 |             ....
47 |         ]
48 |         ```
49 |         """
50 | 
51 |         try:
52 |             L = []
53 |             for x in self.page_soup.find_all("a", {"class": "robo_medium"}):
54 |                 temp = x.get_text().split(" - ")[0]
55 |                 if temp not in L:
56 |                     L.append(temp)
57 | 
58 |             return L
59 | 
60 |         except:
61 |             return None
62 | 


--------------------------------------------------------------------------------
/src/scrape_up/moneycontrol/gold.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import Request, urlopen
 2 | from bs4 import BeautifulSoup as soup
 3 | 
 4 | 
 5 | class GoldPrice:
 6 |     """
 7 |     Create an instance of `GoldPrice` class
 8 |     ```python
 9 |     goldprice = GoldPrice()
10 |     ```
11 | 
12 |     | Methods      | Details                                       |
13 |     | -------------| ----------------------------------------------|
14 |     | `.price_22_carat()`| Returns the price of 22k gold prices citywise |
15 |     | `.price_24_carat()`| Returns the price of 22k gold prices citywise |
16 | 
17 |     """
18 | 
19 |     def __init__(self):
20 |         self.__scrape_page()
21 |         self.__get_values()
22 | 
23 |     def __scrape_page(self):
24 |         try:
25 |             url = "https://www.moneycontrol.com/news/gold-rates-today/"
26 |             req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
27 | 
28 |             webpage = urlopen(req).read()
29 |             self.page_soup = soup(webpage, "html.parser")
30 | 
31 |         except:
32 |             return None
33 | 
34 |     def __get_values(self):
35 |         y = self.page_soup.find_all("td")
36 |         y = y[25:-33]
37 |         L = []
38 | 
39 |         for x in y:
40 |             L.append(x.get_text())
41 | 
42 |         self.vals = []
43 |         for i in range(0, len(y), 5):
44 |             self.vals.append(L[i : i + 5])
45 | 
46 |     def price_22_carat(self):
47 |         """
48 |         Create an instance of GoldPrice class
49 |         ```python
50 |         goldprice = GoldPrice()
51 |         goldprice.price_22_carat()
52 |         ```
53 |         Return\n
54 |         ```js
55 |         {
56 |             "Agra": "₹ 5,610",
57 |             "Ahmedabad": "₹ 5,614",
58 |             "Andhra pradesh": "₹ 5,550",
59 |             "Assam": "₹ 5,655",
60 |             "Bangalore": "₹ 5,615",
61 |             "Bhilai": "₹ 5,603"
62 |         }
63 |         ```
64 |         """
65 |         try:
66 |             cities = [x[0] for x in self.vals]
67 |             prices = [x[1] for x in self.vals]
68 |             return dict(zip(cities, prices))
69 |         except:
70 |             return None
71 | 
72 |     def price_24_carat(self):
73 |         """
74 |         Create an instance of GoldPrice class
75 |         ```python
76 |         goldprice = GoldPrice()
77 |         goldprice.price_24_carat()
78 |         ```
79 |         Return\n
80 |         ```js
81 |         {
82 |             'Agra': '₹ 5,891',
83 |             'Ahmedabad': '₹ 5,895',
84 |             'Andhra pradesh': '₹ 5,828',
85 |             'Assam': '₹ 5,938',
86 |             'Bangalore': '₹ 5,896',
87 |             'Bhilai': '₹ 5,883'
88 |         }
89 |         ```
90 |         """
91 |         try:
92 |             cities = [x[0] for x in self.vals]
93 |             prices = [x[3] for x in self.vals]
94 |             return dict(zip(cities, prices))
95 |         except:
96 |             return None
97 | 


--------------------------------------------------------------------------------
/src/scrape_up/moneycontrol/silver_prices.py:
--------------------------------------------------------------------------------
  1 | from urllib.request import Request, urlopen
  2 | from bs4 import BeautifulSoup as soup
  3 | 
  4 | 
  5 | class SilverPrice:
  6 |     """
  7 |     Create an instance of `SilverPrice` class
  8 |     ```python
  9 |     silverprice = SilverPrice()
 10 |     ```
 11 | 
 12 |     | Methods            | Details                                                             |
 13 |     | -------------------|---------------------------------------------------------------------|
 14 |     | `.citywise_price()`| Returns the price of  silver citywise in rupees                     |
 15 |     | `.last_10_days()`  | Returns the price of 10 grams silver for the last 10 days in rupees |
 16 | 
 17 |     """
 18 | 
 19 |     def __init__(self):
 20 |         self.__scrape_page()
 21 | 
 22 |     def __scrape_page(self):
 23 |         try:
 24 |             url = "https://www.moneycontrol.com/news/silver-rates-today/"
 25 |             req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
 26 | 
 27 |             webpage = urlopen(req).read()
 28 |             self.page_soup = soup(webpage, "html.parser")
 29 | 
 30 |         except:
 31 |             return None
 32 | 
 33 |     def citywise_price(self):
 34 |         """
 35 |         Create an instance of `SilverPrice` class
 36 |         ```python
 37 |         silverprice = SilverPrice()
 38 |         silverprice.citywise_price()
 39 |         ```
 40 | 
 41 |         Return\n
 42 |         ```js
 43 |         {
 44 |             'Agra': '81',
 45 |             'Ahmedabad': '81',
 46 |             'Bangalore': '81',
 47 |             'Bhilai': '81',
 48 |             'Bhopal': '81'
 49 |         }
 50 |         ```
 51 |         """
 52 |         try:
 53 |             x = self.page_soup.find_all("tr")
 54 |             x = x[7:-12]
 55 | 
 56 |             x = [(y.get_text()).split("₹ ")[:-1] for y in x]
 57 |             keys = [y[0] for y in x]
 58 |             values = [y[1] for y in x]
 59 | 
 60 |             return dict(zip(keys, values))
 61 | 
 62 |         except:
 63 |             return None
 64 | 
 65 |     def last_10_days(self):
 66 |         """
 67 |         Create an instance of `SilverPrice` class
 68 |         ```python
 69 |         silverprice = SilverPrice()
 70 |         silverprice.citywise_price()
 71 |         ```
 72 | 
 73 |         Return\n
 74 |         ```js
 75 |         {
 76 |             'Aug 01, 2023': '810',
 77 |             'Jul 31, 2023': '800',
 78 |             'Jul 30, 2023': '800',
 79 |             'Jul 29, 2023': '800',
 80 |             'Jul 28, 2023': '795',
 81 |             'Jul 26, 2023': '804',
 82 |             'Jul 25, 2023': '800',
 83 |             'Jul 24, 2023': '805',
 84 |             'Jul 23, 2023': '805',
 85 |             'Jul 22, 2023': '805'
 86 |         }
 87 |         ```
 88 |         """
 89 |         try:
 90 |             x = self.page_soup.find_all("tr")
 91 |             x = x[-10:]
 92 | 
 93 |             x = [(y.get_text()).split("₹ ") for y in x]
 94 |             keys = [y[0] for y in x]
 95 |             values = [y[1] for y in x]
 96 | 
 97 |             return dict(zip(keys, values))
 98 | 
 99 |         except:
100 |             return None
101 | 


--------------------------------------------------------------------------------
/src/scrape_up/myanimelist/__init__.py:
--------------------------------------------------------------------------------
1 | from .scraper import Anime
2 | 
3 | __all__ = ["Anime"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/pinterest/__init__.py:
--------------------------------------------------------------------------------
1 | from .pinterest import Pinterest
2 | 
3 | __all__ = ["Pinterest"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/quora/__init__.py:
--------------------------------------------------------------------------------
1 | from .quora import Quora
2 | 
3 | __all__ = ["Quora"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/steam/__init__.py:
--------------------------------------------------------------------------------
1 | from .steamscraper import SteamStoreScraper
2 | 
3 | __all__ = ["SteamStoreScraper"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/swiggy/__init__.py:
--------------------------------------------------------------------------------
1 | from .swiggy import Swiggy
2 | 
3 | __all__ = ["Swiggy"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/sysreqlab/__init__.py:
--------------------------------------------------------------------------------
1 | from find_titles import FindTitles
2 | from requirements import Requirements
3 | 
4 | 
5 | __all__ = ["FindTitles", "Requirements"]
6 | 


--------------------------------------------------------------------------------
/src/scrape_up/sysreqlab/find_titles.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | import requests
 3 | 
 4 | 
 5 | class FindTitles:
 6 |     """
 7 |     Create an instance of `FindTitles` class.
 8 | 
 9 |     ```python
10 |     titles = sysreqlab.FindTitles(search_term="Call of Duty", search_alphabet="c")
11 |     ```
12 | 
13 |     | Methods                          | Details                                                               |
14 |     | -------------------------------- | --------------------------------------------------------------------- |
15 |     | `.find_titles(number_of_titles)` | Returns the list of titles based on the search term, search alphabet. |
16 |     """
17 | 
18 |     def __init__(self, search_term: str, search_alphabet: str):
19 |         self.search_term = search_term
20 |         self.search_alphabet = search_alphabet
21 | 
22 |     def __scrape_data(self):
23 |         try:
24 |             url = f"https://www.systemrequirementslab.com/all-games-list/?filter={self.search_alphabet}"
25 |             html = requests.get(url)
26 |             html.raise_for_status()
27 |             return html.text
28 | 
29 |         except requests.exceptions.RequestException as e:
30 |             raise Exception(f"An error occurred while fetching the page: {str(e)}")
31 | 
32 |     def __parse_page(self):
33 |         html = self.__scrape_data()
34 |         soup = BeautifulSoup(html, "html.parser")
35 |         return soup
36 | 
37 |     def find_titles(self, number_of_titles: int):
38 |         """
39 |         Class - `FindTitles`
40 |         Example:
41 |         ```python
42 |         titles = FindTitles(search_term="Call of Duty", search_alphabet="c")
43 |         titles = titles.find_titles(5)
44 |         ```
45 |         Returns a list of titles that match the search term.
46 |         """
47 | 
48 |         try:
49 |             soup = self.__parse_page()
50 | 
51 |             div_elements = soup.find("div", class_="pt-3")
52 |             li_elements = div_elements.find_all("li")
53 |             all_titles = [title.text.strip() for title in li_elements]
54 | 
55 |             titles = [
56 |                 title
57 |                 for title in all_titles
58 |                 if self.search_term.lower() in title.lower()
59 |             ]
60 | 
61 |             return titles[:number_of_titles]
62 | 
63 |         except Exception as e:
64 |             raise Exception(f"An error occurred while fetching the titles: {str(e)}")
65 | 


--------------------------------------------------------------------------------
/src/scrape_up/thehindu/thehindu.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup as bs
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class TheHindu:
 7 |     """
 8 |     Create an object of the 'TheHindu' class\n
 9 |     ```python
10 |     scraper = TheHindu()
11 |     ```
12 |     | Methods               | Details                                                                   |
13 |     | --------------------- | ------------------------------------------------------------------------- |
14 |     | `.get_news(page_url)` |  gets heading, subheading, time, and news content                         |
15 |     """
16 | 
17 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
18 |         headers = {
19 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
20 |         }
21 |         self.config = config
22 |         if self.config.headers == {}:
23 |             self.config.set_headers(headers)
24 | 
25 |     def get_news(self, page_url):
26 |         """
27 |         Create an object of the 'TheHindu' class\n
28 |         ```python
29 |         scraper = TheHindu()
30 |         scraper.get_news(page_url="https://www.thehindu.com/news/cities/Delhi/sc-appoints-former-delhi-hc-judge-justice-jayant-nath-as-interim-chairperson-of-power-regulator-derc/article67157713.ece")
31 |         ```
32 |         Response
33 |         ```js
34 |         {
35 |             "title":"SC appoints former Delhi HC judge Justice Jayant Nath as interim chairperson of power regulator DERC",
36 |             "subtitle":"The office of the DERC chairperson has been vacant for over six months",
37 |             "last_updated":"August 04, 2023 02:59 pm | Updated 03:11 pm IST - New Delhi",
38 |             "news":"The Supreme Court on Friday appointed former Delhi High Court judge, ..."
39 |         }
40 |         ```
41 |         """
42 |         try:
43 |             page_url = "https://www.thehindu.com/news/cities/Delhi/sc-appoints-former-delhi-hc-judge-justice-jayant-nath-as-interim-chairperson-of-power-regulator-derc/article67157713.ece"
44 |             response = get(page_url, self.config).text
45 |             soup = bs(response, "lxml")
46 |             main_content_box = soup.find("div", {"class": "articlebodycontent"})
47 |             news_text = main_content_box.find_all("p")
48 |             news = ""
49 |             for p in news_text:
50 |                 if "class" not in str(p):
51 |                     news += p.text
52 |             heading = soup.find("h1", {"class": "title"}).text.strip()
53 |             sub_heading = soup.find("h3", {"class": "sub-title"}).text.strip()
54 |             last_updated = soup.find("p", {"class": "publish-time"}).text.strip()
55 |             news_data = {
56 |                 "title": heading,
57 |                 "subtitle": sub_heading,
58 |                 "last_updated": last_updated,
59 |                 "news": news,
60 |             }
61 |             return news_data
62 |         except:
63 |             return None
64 | 


--------------------------------------------------------------------------------
/src/scrape_up/timeanddate/time_zones.py:
--------------------------------------------------------------------------------
 1 | from urllib.request import Request, urlopen
 2 | from bs4 import BeautifulSoup as soup
 3 | 
 4 | 
 5 | class Timezones:
 6 |     """
 7 |     Create an instance of `Timezones` class.\n
 8 |     ```python
 9 |     timezones = Timezones()
10 |     ```
11 |     | Methods            | Details                                          |
12 |     | -------------------|--------------------------------------------------|
13 |     | `.city_timezones()`| Returns the timezones of cites around the world  |
14 | 
15 |     """
16 | 
17 |     def __init__(self):
18 |         self.__scrape_page()
19 | 
20 |     def __scrape_page(self):
21 |         try:
22 |             url = "https://www.timeanddate.com/worldclock/full.html"
23 |             req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
24 | 
25 |             webpage = urlopen(req).read()
26 |             self.page_soup = soup(webpage, "html.parser")
27 | 
28 |         except:
29 |             return None
30 | 
31 |     def city_timezones(self):
32 |         """
33 |         Create an instance of `Timezones` class
34 |         ```python
35 |         timezones = Timezones()
36 |         timezones.city_timezones()
37 |         ```
38 | 
39 |         Return\n
40 |         ```js
41 |         {
42 |             "Abidjan": "16.31",
43 |             "Gitega": "18.31",
44 |             "Oral": "21.31",
45 |             "Abu Dhabi": "20.31",
46 |             "Grise Fiord *": "12.31",
47 |             "Oslo *": "18.31",
48 |             "Abuja": "17.31"
49 |         }
50 |         ```
51 |         """
52 |         try:
53 |             x = self.page_soup.find_all("td")
54 |             p = False
55 | 
56 |             timezones_dict = {}
57 |             for y in x[:-1]:
58 |                 if p == False:
59 |                     key = y.get_text()
60 |                 else:
61 |                     timezones_dict[key] = (y.get_text())[5:]
62 |                 p = not (p)
63 | 
64 |             return timezones_dict
65 | 
66 |         except:
67 |             return None
68 | 


--------------------------------------------------------------------------------
/src/scrape_up/timesjobs/timesjobs_scraper.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class TimesJobs:
 7 |     def __init__(self, role: str, *, config: RequestConfig = RequestConfig()):
 8 |         self.role = role
 9 |         self.config = config
10 | 
11 |     def list_jobs(self):
12 |         """
13 |         Class - `TimesJobs`\n
14 |         Example -\n
15 |         ```python
16 | 
17 |         jobs = TimesJobs(role="developer")
18 |         jobs.list_jobs()
19 |         ```
20 |         Return\n
21 |         ```python
22 |         return
23 |         {
24 |             "Company": "Name of the comapny",
25 |             "Location": "Location at which the company is located",
26 |             "Experience": "Experience of the applicants required for that post",
27 |             "Posted": "Number of days before which this job was posted on this webiste",
28 |             "Apply here": "Link which directly takes you to the Web-page where you can apply for the job"
29 |         }
30 |         """
31 |         try:
32 |             spl = self.role.split()
33 |             self.role = "%20".join(spl)
34 |         except:
35 |             return None
36 |         try:
37 |             url = f"https://m.timesjobs.com/mobile/jobs-search-result.html?txtKeywords={self.role}&txtLocation=India&cboWorkExp1=-1"
38 |             response = get(url, self.config)
39 |             soup = BeautifulSoup(response.text, "html.parser")
40 |             companies = soup.find_all("h4")
41 |             experiences = soup.find_all("div", class_="srp-exp")
42 |             locations = soup.find_all("div", class_="srp-loc")
43 |             days_ago = soup.find_all("span", class_="posting-time")
44 |             application_links = soup.find_all("h3")
45 | 
46 |             job_data = []
47 | 
48 |             for i in range(len(companies)):
49 |                 company = companies[i].text
50 |                 location = locations[i].text
51 |                 experience = experiences[i].text
52 |                 days = days_ago[i].text
53 |                 href_value = application_links[i].a["href"]
54 | 
55 |                 job_info = {
56 |                     "Company": company,
57 |                     "Location": location,
58 |                     "Experience": experience,
59 |                     "Posted": days,
60 |                     "Apply here": href_value,
61 |                 }
62 |                 job_data.append(job_info)
63 | 
64 |             return job_data
65 | 
66 |         except Exception as e:
67 |             print("Not possible to webscrape")
68 |             return None
69 | 


--------------------------------------------------------------------------------
/src/scrape_up/tripadvisor/TripAdvisor.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class TripAdvisor:
 7 |     """
 8 |     First, create an object of class `TripAdvisor`
 9 | 
10 |     ```python
11 |     hotel = TripAdvisor()
12 |     ```
13 | 
14 |     | Methods                  | Details                                              |
15 |     | ------------------------ | ---------------------------------------------------- |
16 |     | `get_details(hotel_url)` | Get the details of a hotel from its TripAdvisor URL. |
17 |     """
18 | 
19 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
20 |         headers = {
21 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
22 |         }
23 |         self.config = config
24 |         if self.config.headers == {}:
25 |             self.config.set_headers(headers)
26 | 
27 |     def get_details(self, hotel_url: str):
28 |         """
29 |         Get the details of a hotel from its TripAdvisor URL.\n
30 |         Parameters:- `hotel_url (str)`: The URL of the hotel on TripAdvisor.
31 |         ```python
32 |         hotel = TripAdvisor()
33 |         hotel.get_details()
34 |         ```
35 |         Returns:
36 |         ```js
37 |         {
38 |             "Rating": "The hotel's rating",
39 |             "Experience": "The hotel's experience summary",
40 |             "Reviews": "The number of reviews for the hotel",
41 |             "Award": "The award received by the hotel, or None if not available",
42 |             "Description": "The description of the hotel as a BeautifulSoup Tag",
43 |             "Amenities": "List of amenities offered by the hotel"
44 |         }
45 |         ```
46 |         """
47 |         try:
48 |             url = hotel_url
49 |             html_text = get(url, self.config).text
50 |             soup = BeautifulSoup(html_text, "lxml")
51 | 
52 |             container = soup.find("div", {"class": "ppuFV _T Z BB"})
53 | 
54 |             rating = container.find("span", {"class": "uwJeR P"}).text
55 |             experience = container.find("div", {"class": "kkzVG"}).text
56 |             reviews = container.find("span", {"class": "hkxYU q Wi z Wc"}).text
57 |             award = container.find("div", {"class": "bhYSr P"})
58 |             if award:
59 |                 award = award.text
60 |             else:
61 |                 award = None
62 |             description = container.find("div", {"class": "fIrGe _T"}).text
63 |             pa = container.find("div", {"class": "OsCbb K"})
64 |             amineties = []
65 |             for items in pa.find_all("div", {"class": "yplav f ME H3 _c"}):
66 |                 amineties.append(items.text)
67 | 
68 |             data = {
69 |                 "Rating": rating,
70 |                 "Experience": experience,
71 |                 "Reviews": reviews,
72 |                 "Award": award,
73 |                 "Description": description,
74 |                 "Amenities": amineties,
75 |             }
76 |             return data
77 |         except:
78 |             return None
79 | 


--------------------------------------------------------------------------------
/src/scrape_up/twitter/numidconverter.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from selenium.webdriver.chrome.options import Options
 3 | from bs4 import BeautifulSoup
 4 | import json
 5 | 
 6 | 
 7 | class TwitterScraper:
 8 |     def __init__(self):
 9 |         self.chrome_options = Options()
10 |         self.chrome_options.add_argument("--headless")
11 |         self.chrome_options.add_argument("--window-size=1920,1080")
12 |         self.chrome_options.add_argument("--disable-gpu")
13 |         self.chrome_options.add_argument("--no-sandbox")
14 |         self.chrome_options.add_argument("--disable-dev-shm-usage")
15 |         self.chrome_options.add_argument("--disable-extensions")
16 |         self.chrome_options.add_argument("--disable-logging")
17 |         self.chrome_options.add_argument("--log-level=3")
18 |         self.chrome_options.add_argument("--silent")
19 |         self.chrome_options.add_argument("--blink-settings=imagesEnabled=false")
20 | 
21 |     def unametoid(self, username):
22 |         url = "https://twitter.com/{}".format(username)
23 |         # print(url)
24 |         driver = webdriver.Chrome(options=self.chrome_options)
25 |         driver.get(url)
26 | 
27 |         html = driver.page_source
28 |         soup = BeautifulSoup(html, "html.parser")
29 |         try:
30 |             user_id = soup.find("script", {"data-testid": "UserProfileSchema-test"})
31 |             data = json.loads(user_id.string)
32 |             # driver.quit()
33 |             return {
34 |                 "data": data["author"]["identifier"],
35 |                 "message": f"Numerical id found for username {username}",
36 |             }
37 |         except:
38 |             return {
39 |                 "data": None,
40 |                 "message": f"Numerical id not found for username {username}",
41 |             }
42 | 
43 |     def idtouname(self, numid):
44 |         url = "https://twitter.com/i/user/{}".format(numid)
45 |         driver = webdriver.Chrome(options=self.chrome_options)
46 |         driver.get(url)
47 |         html = driver.page_source
48 |         soup = BeautifulSoup(html, "html.parser")
49 |         try:
50 |             user_id = soup.find("script", {"data-testid": "UserProfileSchema-test"})
51 |             data = json.loads(user_id.string)
52 |             # driver.quit()
53 |             return {
54 |                 "data": data["author"]["additionalName"],
55 |                 "message": f"Username found for numerical id {numid}",
56 |             }
57 |         except:
58 |             return {
59 |                 "data": None,
60 |                 "message": f"Username not found for numerical id {numid}",
61 |             }
62 | 


--------------------------------------------------------------------------------
/src/scrape_up/uci/UCI.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class UCI:
 7 |     """
 8 |     Create an instance of UCI class
 9 |     ```python
10 |     uci = UCI()
11 |     ```
12 |     | Methods       | Details                               |
13 |     | ------------- | ------------------------------------- |
14 |     | `.datasets()` | Fetches datasets information from UCI |
15 |     """
16 | 
17 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
18 |         headers = {
19 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
20 |         }
21 |         self.config = config
22 |         if self.config.headers == {}:
23 |             self.config.set_headers(headers)
24 | 
25 |     def datasets(self, number):
26 |         """
27 |         Get UCI datasets information.\n
28 |         Args:
29 |         `number (int)`: The number of datasets to fetch. The method fetches datasets in batches of 10.
30 |         Example:
31 |         ```python
32 |         uci = UCI()
33 |         datasets_info = uci.datasets(20)
34 |         ```
35 |         Returns:
36 |         ```js
37 |         [
38 |             {
39 |                 "Name":"Iris",
40 |                 "Link":"https://archive.ics.uci.edu//dataset/53/iris",
41 |                 "Description":"A small classic dataset from Fisher, 1936. One of the earliest datasets used for evaluation of classification methodologies.\n",
42 |                 "Extra Info":" Classification  Multivariate  150 Instances  4 Attributes "
43 |             }
44 |         ]
45 |         ```
46 |         """
47 |         try:
48 |             number = number // 10
49 |             dataset = []
50 |             for i in range(0, number):
51 |                 url = "https://archive.ics.uci.edu/datasets?skip={}&take=10&sort=desc&orderBy=NumHits&search=s".format(
52 |                     i * 10
53 |                 )
54 |                 html_text = get(url, self.config).text
55 |                 soup = BeautifulSoup(html_text, "lxml")
56 | 
57 |                 container = soup.find("div", {"class": "flex flex-col gap-1"})
58 | 
59 |                 for items in container.find_all(
60 |                     "div", {"class": "rounded-box bg-base-100"}
61 |                 ):
62 |                     title = items.find("h2").text
63 |                     link = (
64 |                         "https://archive.ics.uci.edu/"
65 |                         + items.find("a", href=True)["href"]
66 |                     )
67 |                     description = items.find("p").text
68 |                     extra_info = ""
69 |                     for item in items.find_all(
70 |                         "div", {"class": "col-span-3 flex items-center gap-2"}
71 |                     ):
72 |                         extra_info = extra_info + item.text + " "
73 |                     data = {
74 |                         "Name": title,
75 |                         "Link": link,
76 |                         "Description": description,
77 |                         "Extra Info": extra_info,
78 |                     }
79 |                     dataset.append(data)
80 |             return dataset
81 |         except:
82 |             return None
83 | 


--------------------------------------------------------------------------------
/src/scrape_up/who/WHO.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | from scrape_up.config.request_config import RequestConfig, get
 3 | 
 4 | 
 5 | class WHO:
 6 |     """
 7 |     Create an instance of WHO class.\n
 8 |     ```python
 9 |     who = WHO()
10 |     ```
11 |     | Methods                        | Details                                     |
12 |     | ------------------------------ | ------------------------------------------- |
13 |     | `get_disease_outbreak()` | Get Disease Outbreak News from WHO website. |
14 |     """
15 | 
16 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
17 |         headers = {
18 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36"
19 |         }
20 |         self.config = config
21 |         if self.config.headers == {}:
22 |             self.config.set_headers(headers)
23 | 
24 |     def get_disease_outbreak(self, number):
25 |         """
26 |         Get Disease Outbreak News from WHO website.\n
27 |         Parameters: `number` (int): The number of pages (each page contains 10 items).
28 |         ```python
29 |         who = WHO()
30 |         who.get_disease_outbreak()
31 |         ```
32 |         Returns:
33 |         ```js
34 |         [
35 |             {
36 |                 "Title":"Circulating vaccine-derived poliovirus type 2 (cVDPV2) - United Republic of Tanzania",
37 |                 "Date":"28 July 2023 ",
38 |                 "Link":"https://www.who.int/emergencies/disease-outbreak-news/item/2023-DON480"
39 |             }
40 |             ...
41 |         ]
42 |         ```
43 |         """
44 | 
45 |         try:
46 |             number = number // 10
47 |             DON = []
48 |             for i in range(1, number + 1):
49 |                 url = f"https://www.who.int/emergencies/disease-outbreak-news/{i}"
50 |                 html_text = get(url, self.config).text
51 |                 soup = BeautifulSoup(html_text, "lxml")
52 | 
53 |                 container = soup.find("div", {"class": "sf-list-vertical"})
54 | 
55 |                 for items in container.find_all(
56 |                     "a", {"class": "sf-list-vertical__item"}, href=True
57 |                 ):
58 |                     title = items.find("span", {"class": "full-title"})
59 |                     date = title.findNext()
60 |                     date = date.text.split("|")[0]
61 |                     link = items["href"]
62 |                     data = {"Title": title.text, "Date": date, "Link": link}
63 |                     DON.append(data)
64 |             return DON
65 |         except:
66 |             return None
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     who = WHO()
71 |     print(who.get_disease_outbreak(number=10))
72 | 


--------------------------------------------------------------------------------
/src/scrape_up/who/__init__.py:
--------------------------------------------------------------------------------
1 | from .WHO import WHO
2 | 
3 | __all__ = ["WHO"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/wikipedia/wikipedia.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from scrape_up.config.request_config import RequestConfig, get
 4 | 
 5 | 
 6 | class WikipediaScraper:
 7 |     """
 8 |     Create an object of the 'WikipediaScrapper' class:
 9 | 
10 |     ```python
11 |     Scraper = WikipediaScraper()
12 |     ```
13 | 
14 |     | Methods           | Details                                                 |
15 |     | ----------------- | ------------------------------------------------------- |
16 |     | `.scrape(url)`    | Returns the Scraped Data from Wikipedia                 |
17 |     | `.get_featured()` | Returns the featured article for the day from Wikipedia |
18 |     """
19 | 
20 |     def __init__(self, *, config: RequestConfig = RequestConfig()):
21 |         self.config = config
22 | 
23 |     def scrape(self, query: str):
24 |         try:
25 |             URL = f"https://en.wikipedia.org/wiki/{query}"
26 |             response = get(URL, self.config)
27 |             soup = BeautifulSoup(response.text, "html.parser")
28 | 
29 |             # Extract the title
30 |             title = soup.find(id="firstHeading").text
31 | 
32 |             # Extract all the headings and their content
33 |             sections = soup.find_all("h2")
34 |             data = {}
35 |             for section in sections:
36 |                 heading = section.find("span", class_="mw-headline")
37 |                 if heading:
38 |                     content = []
39 |                     next_node = section.find_next_sibling(
40 |                         ["h2", "h3", "h4", "h5", "h6"]
41 |                     )
42 |                     while next_node and next_node.name != "h2":
43 |                         if next_node.name in ["h3", "h4", "h5", "h6"]:
44 |                             content.append({"heading": next_node.text.strip()})
45 |                         elif next_node.name == "p":
46 |                             content.append({"text": next_node.text.strip()})
47 |                         next_node = next_node.find_next_sibling(
48 |                             ["h2", "h3", "h4", "h5", "h6", "p"]
49 |                         )
50 |                     data[heading.text] = content
51 | 
52 |             # Return the data as JSON
53 |             result = {"title": title, "sections": data}
54 |             return result
55 |         except:
56 |             return None
57 | 
58 |     def get_featured(self):
59 |         """
60 |         Get the featured data from the main page of Wikipedia.
61 | 
62 |         Returns:
63 |         A string containing the featured data from the main page of Wikipedia.
64 |         """
65 |         try:
66 |             url = "https://en.wikipedia.org/wiki/Main_Page"
67 |             html_text = requests.get(url).text
68 |             soup = BeautifulSoup(html_text, "lxml")
69 | 
70 |             container = soup.find("div", {"id": "mp-left"})
71 |             data = container.find("p").text
72 |             return data
73 |         except:
74 |             return None
75 | 


--------------------------------------------------------------------------------
/src/scrape_up/wuzzuf/__init__.py:
--------------------------------------------------------------------------------
1 | from .wuzzuf import Jobs
2 | 
3 | __all__ = ["Jobs"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/yellowpages/__init__.py:
--------------------------------------------------------------------------------
1 | from .yellowpages import yellowpages
2 | 
3 | __all__ = ["yellowpages"]
4 | 


--------------------------------------------------------------------------------
/src/scrape_up/yellowpages/yellowpages.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | 
 5 | class Yellowpages:
 6 |     """
 7 |     Create an instance of `yellowpages` class
 8 | 
 9 |     ```python
10 |     # This will return the list of restaurtants in New York and their information
11 |     data = yellowpages("restaurtant", "New York")
12 |     ```
13 |     | Method            | Details                                                           |
14 |     | ----------------- | ----------------------------------------------------------------- |
15 |     | `business_info()` | Returns the list of dictionaries containing business information. |
16 | 
17 |     """
18 | 
19 |     def __init__(self, business, place):
20 |         self.business = business
21 |         self.place = place
22 |         self.info = []
23 |         try:
24 |             url = f"https://www.yellowpages.com/search?search_terms={self.business}&geo_location_terms={self.place}"
25 |             response = requests.get(url, headers={"User-Agent": "XY"})
26 |             self.soup = BeautifulSoup(response.content, "lxml")
27 | 
28 |         except:
29 |             return None
30 | 
31 |     def business_info(self):
32 |         businesses = self.soup.find_all("div", class_="srp-listing clickable-area mdm")
33 |         for item in businesses:
34 |             name = item.find("a", class_="business-name").text
35 |             address = item.find("div", class_="street-address").text
36 |             try:
37 |                 rating = item.find("div", class_="ratings").text
38 |             except:
39 |                 rating = " "
40 |             try:
41 |                 website = item.find("a", class_="track-visit-website")["href"]
42 |             except:
43 |                 website = " "
44 |             try:
45 |                 phone_no = item.find("div", class_="phones phone primary").text
46 |             except:
47 |                 phone_no = " "
48 |             try:
49 |                 menu = (
50 |                     "https://www.yellowpages.com"
51 |                     + item.find("a", class_="menu")["href"]
52 |                 )
53 |             except:
54 |                 menu = " "
55 |             try:
56 |                 description = item.find("p", class_="body").text
57 |             except:
58 |                 description = " "
59 |             try:
60 |                 amenities = item.find("div", class_="amenities-info").text
61 |             except:
62 |                 amenities = " "
63 |             try:
64 |                 opentime = item.find("div", class_="open-status").text
65 |             except:
66 |                 opentime = " "
67 |             businessinfo = {
68 |                 "name": name,
69 |                 "address": address,
70 |                 "rating": rating,
71 |                 "website": website,
72 |                 "phone_no": phone_no,
73 |                 "menu": menu,
74 |                 "description": description,
75 |                 "amenities": amenities,
76 |                 "opentime": opentime,
77 |             }
78 |             self.info.append(businessinfo)
79 |         return self.info
80 | 


--------------------------------------------------------------------------------
/src/scrape_up/zomato/__init__.py:
--------------------------------------------------------------------------------
1 | from .zomato import Zomato
2 | 
3 | __all__ = ["Zomato"]
4 | 


--------------------------------------------------------------------------------
/src/test/academia_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.academia import Academia
 3 | 
 4 | 
 5 | class TestAcademia(unittest.TestCase):
 6 |     def setUp(self):
 7 |         self.academia = Academia()
 8 | 
 9 |     def test_get_research_topics(self):
10 |         academia = Academia()
11 |         result = academia.get_research_topics()
12 |         self.assertIsNotNone(result)
13 |         self.assertIsInstance(result, list)
14 | 
15 |         if result is not None:
16 |             for topic in result:
17 |                 self.assertIn("Title", topic)
18 |                 self.assertIn("Link", topic)
19 |                 self.assertIn("Number of Articles", topic)
20 |                 self.assertIn("Followers", topic)
21 | 
22 |                 self.assertIsInstance(topic["Title"], str)
23 |                 self.assertIsInstance(topic["Link"], str)
24 |                 self.assertIsInstance(topic["Number of Articles"], str)
25 |                 self.assertIsInstance(topic["Followers"], str)
26 | 
27 |     def test_get_research_paper(self):
28 |         academia = Academia()
29 |         result = academia.get_research_papers(search="Machine Learning")
30 |         self.assertIsNotNone(result)
31 |         self.assertIsInstance(result, list)
32 | 
33 |         if result is not None:
34 |             for paper in result:
35 |                 self.assertIn("Title", paper)
36 |                 self.assertIn("Summary", paper)
37 |                 self.assertIn("Link", paper)
38 | 
39 |                 self.assertIsInstance(paper["Title"], str)
40 |                 if paper["Summary"] is not None:
41 |                     self.assertIsInstance(paper["Summary"], str)
42 |                 self.assertIsInstance(paper["Link"], str)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     unittest.main()
47 | 


--------------------------------------------------------------------------------
/src/test/amazon_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.amazon import Product
 3 | 
 4 | 
 5 | class AmazonTest(unittest.TestCase):
 6 |     def setUp(self):
 7 |         self.product = Product("Watch")
 8 | 
 9 |     def test_get_product(self):
10 |         result = self.product.get_product_details()
11 |         self.assertIsNotNone(result)
12 |         self.assertIsInstance(result, dict)
13 |         if result is not None:
14 |             self.assertIn("data", result)
15 |             self.assertIn("message", result)
16 |         self.assertIsNotNone(result["data"], str)
17 |         if result["data"] is not None:
18 |             self.assertIsInstance(result["data"], str)
19 |         if result["message"] is not None:
20 |             self.assertIsInstance(result["message"], str)
21 | 
22 |     def test_get_product_details(self):
23 |         result = self.product.get_product_details()
24 |         self.assertIsNotNone(result)
25 |         self.assertIsInstance(result, dict)
26 |         if result is not None:
27 |             self.assertIn("data", result)
28 |             self.assertIn("message", result)
29 |         self.assertIsNotNone(result["data"], str)
30 |         if result["data"] is not None:
31 |             self.assertIsInstance(result["data"], str)
32 |         if result["message"] is not None:
33 |             self.assertIsInstance(result["message"], str)
34 | 
35 |     def test_get_product_image(self):
36 |         result = self.product.get_product_details()
37 |         self.assertIsNotNone(result)
38 |         self.assertIsInstance(result, dict)
39 |         if result is not None:
40 |             self.assertIn("data", result)
41 |             self.assertIn("message", result)
42 |         self.assertIsNotNone(result["data"], str)
43 |         if result["data"] is not None:
44 |             self.assertIsInstance(result["data"], str)
45 |         if result["message"] is not None:
46 |             self.assertIsInstance(result["message"], str)
47 | 
48 |     def test_customer_review(self):
49 |         result = self.product.get_product_details()
50 |         self.assertIsNotNone(result)
51 |         self.assertIsInstance(result, dict)
52 |         if result is not None:
53 |             self.assertIn("data", result)
54 |             self.assertIn("message", result)
55 |         self.assertIsNotNone(result["data"], str)
56 |         if result["data"] is not None:
57 |             self.assertIsInstance(result["data"], str)
58 |         if result["message"] is not None:
59 |             self.assertIsInstance(result["message"], str)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     unittest.main()
64 | 


--------------------------------------------------------------------------------
/src/test/banners_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from src.scrape_up.banners import Scraper88x31
 3 | 
 4 | 
 5 | class TestScraper88x31(unittest.TestCase):
 6 |     def setUp(self):
 7 |         """
 8 |         Initialize a Scraper88x31 instance before each test method.
 9 |         """
10 |         self.scraper = Scraper88x31()
11 | 
12 |     def test_get_all(self):
13 |         """
14 |         | Methods            | Details                                                  |
15 |         | ------------------ | -------------------------------------------------------- |
16 |         | `get_all()`        | Returns the list of all available 88x31 banners          |
17 |         """
18 |         try:
19 |             banners = self.scraper.get_all()
20 | 
21 |             # Check if banners is a list of URLs
22 |             self.assertIsInstance(banners, list)
23 |             for banner in banners:
24 |                 self.assertIsInstance(banner, str)
25 |                 self.assertTrue(banner.startswith("https://cyber.dabamos.de/88x31/"))
26 |                 self.assertTrue(banner.endswith(".gif"))
27 |         except:
28 |             return None
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     unittest.main()
33 | 


--------------------------------------------------------------------------------
/src/test/bayt_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.bayt import Jobs
 3 | 
 4 | 
 5 | class TestJobs(unittest.TestCase):
 6 |     """
 7 |     | Methods                       | Details                                                                    |
 8 |     | ----------------------------- | -------------------------------------------------------------------------- |
 9 |     | `.fetch_jobs(query, page)`    | Fetch job listings data from Bayt.com based on the given query and page.   |
10 |     """
11 | 
12 |     def setUp(self):
13 |         """
14 |         Initialize an instance of the Jobs class before each test.
15 |         """
16 |         self.scraper = Jobs()
17 |         self.query = "software developer"
18 |         self.page = 1
19 | 
20 |     def test_fetch_jobs(self):
21 |         """
22 |         Test the fetch_jobs method.
23 |         """
24 |         try:
25 |             jobs_data = self.scraper.fetch_jobs(self.query, self.page)
26 |             self.assertIsNotNone(jobs_data, "Failed to fetch job listings")
27 |             self.assertIsInstance(jobs_data, list, "Job listings should be a list")
28 |             self.assertGreater(len(jobs_data), 0, "Job listings should not be empty")
29 | 
30 |             # Check the structure of the first job listing
31 |             job = jobs_data[0]
32 |             expected_keys = ["title", "company", "location", "url"]
33 |             for key in expected_keys:
34 |                 self.assertIn(key, job, f"Missing expected key: {key}")
35 |                 self.assertIsInstance(job[key], str, f"{key} should be a string")
36 | 
37 |         except:
38 |             return None
39 | 
40 |     def test_extract_job_info(self):
41 |         """
42 |         Test the __extract_job_info method indirectly by testing fetch_jobs.
43 |         """
44 |         try:
45 |             jobs_data = self.scraper.fetch_jobs(self.query, self.page)
46 |             self.assertIsNotNone(jobs_data, "Failed to fetch job listings")
47 |             self.assertGreater(len(jobs_data), 0, "Job listings should not be empty")
48 | 
49 |             # Check the first job listing details
50 |             job = jobs_data[0]
51 |             self.assertIn("title", job, "Job should have a title")
52 |             self.assertIn("company", job, "Job should have a company name")
53 |             self.assertIn("location", job, "Job should have a location")
54 |             self.assertIn("url", job, "Job should have a URL")
55 | 
56 |             # Ensure that none of the fields are empty
57 |             self.assertNotEqual(job["title"], "", "Job title should not be empty")
58 |             self.assertNotEqual(job["url"], "", "Job URL should not be empty")
59 | 
60 |         except:
61 |             return None
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     unittest.main()
66 | 


--------------------------------------------------------------------------------
/src/test/bbc_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.bbcnews import BBCNews
 3 | 
 4 | 
 5 | class TestBBCNews(unittest.TestCase):
 6 |     """
 7 |     | Methods            | Details                                                  |
 8 |     | ------------------ | -------------------------------------------------------- |
 9 |     | `.get_headlines()` | Returns the list of object containig the headlines       |
10 |     | `get_article()`    | Returns an object with proper details about the articles |
11 | 
12 |     """
13 | 
14 |     def setUp(self):
15 |         """
16 |         Initialize a BBCNews instance before each test method.
17 |         """
18 |         self.bbc_scraper = BBCNews()
19 | 
20 |     def test_get_headlines(self):
21 |         """
22 |         Testing the get_headlines() method.
23 |         """
24 |         try:
25 |             headlines = self.bbc_scraper.get_headlines()
26 | 
27 |             # Check if headlines is a list of dictionaries
28 |             if headlines is not None:
29 |                 self.assertIsInstance(headlines, list)
30 |                 for headline in headlines:
31 |                     self.assertIsInstance(headline, dict)
32 |                     self.assertIn("index", headline)
33 |                     self.assertIn("headline", headline)
34 | 
35 |                 # Check if all headlines have unique indices
36 |                 indices = {headline["index"] for headline in headlines}
37 |                 self.assertEqual(
38 |                     len(indices), len(headlines), "Duplicate indices found in headlines"
39 |                 )
40 |                 # Check if headlines list is not empty
41 |                 self.assertGreater(len(headlines), 0, "No headlines extracted")
42 |         except:
43 |             return None
44 | 
45 |     def test_get_article(self):
46 |         """
47 |         Testing the get_article(url) method.
48 |         """
49 |         try:
50 |             valid_url = "https://www.bbc.co.uk/news/world-europe-61258011"  # Test with a valid article URL
51 |             article = self.bbc_scraper.get_article(valid_url)
52 | 
53 |             if article is not None:
54 |                 self.assertIsInstance(
55 |                     article, dict
56 |                 )  # Check if article is a dictionary or not
57 |                 self.assertIn(
58 |                     "main_heading", article
59 |                 )  # Does it contain main_heading or not
60 |                 self.assertIn("time", article)  # Does it contain time or not
61 |                 self.assertIn("text", article)  # Does it contain text or not
62 | 
63 |             invalid_url = "https://www.bbc.co.uk/news/non-existent-article"  # Test with an invalid article URL
64 |             invalid_article = self.bbc_scraper.get_article(
65 |                 invalid_url
66 |             )  # Should return None
67 |             self.assertIsNone(invalid_article, "Invalid URL should return None")
68 |         except:
69 |             return None
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     unittest.main()
74 | 


--------------------------------------------------------------------------------
/src/test/codechef_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up import codechef
 3 | 
 4 | 
 5 | class CodeChefTest(unittest.TestCase):
 6 |     """
 7 |     CodeChef module test.\n
 8 |     | Methods         | Details                                                          |
 9 |     | --------------- | ---------------------------------------------------------------- |
10 |     | `get_profile()` | Returns name, username, profile_image_link, rating, details etc. |
11 |     """
12 | 
13 |     def test_get_profile(self):
14 |         instance = codechef.User(id="heltion")
15 |         method_response = instance.get_profile()
16 | 
17 |         self.assertEqual(
18 |             list(method_response.keys()),
19 |             ["name", "username", "profile_image_link", "rating", "details"],
20 |             "Codechef:get_profile - keys mismatch",
21 |         )
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     unittest.main()
26 | 


--------------------------------------------------------------------------------
/src/test/coinmarketcap_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up import coinmarketcap
 3 | 
 4 | # sys.path.insert(0, "..")
 5 | 
 6 | 
 7 | class CoinMarketCapTest(unittest.TestCase):
 8 |     """
 9 |     CoinMarketCap module test.\n
10 |     | Method                       | Details                                                  |
11 |     | ---------------------------- | -------------------------------------------------------- |
12 |     | `get_top_cryptocurrencies()` | Fetches and returns data about the top cryptocurrencies. |
13 |     """
14 | 
15 |     def test_get_top_cryptocurrencies(self):
16 |         instance = coinmarketcap.Crypto()
17 |         top_cryptocurrencies = instance.get_top_cryptocurrencies()
18 | 
19 |         self.assertIsInstance(top_cryptocurrencies, list)
20 | 
21 |         for item in top_cryptocurrencies:
22 |             self.assertIsInstance(item, dict)
23 | 
24 |             self.assertEqual(
25 |                 list(item.keys()),
26 |                 [
27 |                     "Name",
28 |                     "Symbol",
29 |                     "Link",
30 |                     "Price",
31 |                     "1h%",
32 |                     "24h%",
33 |                     "7d%",
34 |                     "MarketCap",
35 |                     "Volume(24h)",
36 |                     "Circulating Supply",
37 |                 ],
38 |             )
39 | 
40 |             for value in item.values():
41 |                 self.assertIsInstance(value, str)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     unittest.main()
46 | 


--------------------------------------------------------------------------------
/src/test/coursera_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.coursera import Coursera
 3 | 
 4 | 
 5 | class TestCoursera(unittest.TestCase):
 6 |     def setUp(self, topic="Machine Learning"):
 7 |         self.scraper = Coursera(topic)
 8 | 
 9 |     def test_get_courses(self):
10 |         result = self.scraper.get_courses()
11 |         self.assertIsNotNone(result)
12 |         self.assertIsInstance(result, list)
13 | 
14 |         if result is not None:
15 |             for topic in result:
16 |                 self.assertIn("title", topic)
17 |                 self.assertIn("taught_by", topic)
18 |                 self.assertIn("skills", topic)
19 |                 self.assertIn("rating", topic)
20 |                 self.assertIn("review_count", topic)
21 |                 self.assertIn("img_url", topic)
22 |                 self.assertIn("link", topic)
23 | 
24 |                 self.assertIsInstance(topic["title"], str)
25 |                 self.assertIsInstance(topic["taught_by"], str)
26 |                 self.assertIsInstance(topic["skills"], str)
27 |                 self.assertIsInstance(topic["rating"], str)
28 |                 self.assertIsInstance(topic["review_count"], str)
29 |                 self.assertIsInstance(topic["img_url"], str)
30 |                 self.assertIsInstance(topic["link"], str)
31 | 
32 |     def test_fetch_modules_with_modules(self):
33 |         result = self.scraper.fetch_modules(course="Machine Learning with Python")
34 |         self.assertIsNotNone(result)
35 |         self.assertIsInstance(result, dict)
36 | 
37 |         if result is not None:
38 |             for key, value in result.items():
39 |                 self.assertIsInstance(value, str)
40 | 
41 |     def test_fetch_modules_with_specializations(self):
42 |         result = self.scraper.fetch_modules(course="Machine Learning")
43 |         self.assertIsNotNone(result)
44 |         self.assertIsInstance(result, dict)
45 | 
46 |         if result is not None:
47 |             for key, value in result.items():
48 |                 self.assertIsInstance(value, dict)
49 |                 self.assertIn("Title", value)
50 |                 self.assertIn("Link", value)
51 |                 self.assertIsInstance(value["Title"], str)
52 |                 self.assertIsInstance(value["Link"], str)
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     unittest.main()
57 | 


--------------------------------------------------------------------------------
/src/test/covidinfo_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.covidinfo import covidinfo
 3 | 
 4 | 
 5 | class CovidInfoTest(unittest.TestCase):
 6 |     def setUp(self):
 7 |         self.instance = covidinfo.CovidInfo()
 8 | 
 9 |     """
10 |     CovidInfo module test.\n
11 |     | Methods | Details |
12 |     | --------------------------- | ---------------------------------------------------------------------------------------------------- |
13 |     | `.covid_data()` | Returns the list of all the covid data scraped from the website |
14 |     | `.total_cases()` | Returns the count of total covid cases all over the world |
15 |     | `.total_deaths()` | Returns the count of deaths covid cases all over the world |
16 |     | `.total_recovered()` | Returns the count of recovered covid cases all over the world |
17 |     | `.latest_news()` | Return the lastest news of the day |
18 |     """
19 | 
20 |     def test_covid_data(self):
21 |         covid_data_response = self.instance.covid_data()
22 |         self.assertIsInstance(covid_data_response, list)
23 |         if covid_data_response is not None:
24 |             for data in covid_data_response:
25 |                 self.assertIsInstance(data, dict)
26 |                 self.assertIn("Country", data)
27 |                 self.assertIn("Number of Cases", data)
28 |                 self.assertIn("Deaths", data)
29 |                 self.assertIn("Continent", data)
30 |                 self.assertIsInstance(data["Country"], str)
31 |                 self.assertIsInstance(data["Number of Cases"], int)
32 |                 self.assertIsInstance(data["Deaths"], int)
33 |                 self.assertIsInstance(data["Continent"], str)
34 | 
35 |     def test_total_cases(self):
36 |         total_cases_response = self.instance.total_cases()
37 |         self.assertIsInstance(total_cases_response, str)
38 | 
39 |     def test_total_deaths(self):
40 |         total_deaths_response = self.instance.total_deaths()
41 |         self.assertIsInstance(total_deaths_response, str)
42 | 
43 |     def test_total_recovered(self):
44 |         test_total_response = self.instance.total_recovered()
45 |         self.assertIsInstance(test_total_response, dict)
46 | 
47 |     def test_latest_news(self):
48 |         latest_news_response = self.instance.latest_news()
49 |         self.assertIsInstance(latest_news_response, (list, type(None)))
50 |         if latest_news_response is not None:
51 |             for news in latest_news_response:
52 |                 self.assertIsInstance(news, dict)
53 |                 self.assertIn("news", news)
54 |                 self.assertIn("source", news)
55 |                 self.assertIsInstance(news["news"], str)
56 |                 self.assertIsInstance(news["source"], str)
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     unittest.main()
61 | 


--------------------------------------------------------------------------------
/src/test/eazydiner_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import json
 3 | from scrape_up.eazydiner import EazyDiner
 4 | 
 5 | 
 6 | class EazyDinerTest(unittest.TestCase):
 7 |     """
 8 |     EazyDiner class test.\n
 9 |     | Methods                  | Details                                                          |
10 |     | ------------------------ | ---------------------------------------------------------------- |
11 |     | `.get_restaurants()`     | Tests the get_restaurants() method of the EazyDiner class        |
12 |     | `.get_breakfast()`       | Tests the get_breakfast() method of the EazyDiner class          |
13 |     | `.get_lunch()`           | Tests the get_lunch() method of the EazyDiner class              |
14 |     | `.get_dinner()`          | Tests the get_dinner() method of the EazyDiner class             |
15 |     | `.dinner_with_discount()`| Tests the dinner_with_discount() method of the EazyDiner class   |
16 |     | `.get_top10()`           | Tests the get_top10() method of the EazyDiner class              |
17 |     """
18 | 
19 |     def assert_response_keys(self, response, expected_keys):
20 |         if isinstance(response, str):
21 |             response_dict = json.loads(response)
22 | 
23 |             for key in expected_keys:
24 |                 self.assertTrue(
25 |                     key in response_dict, f"Key '{key}' is missing in the response."
26 |                 )
27 | 
28 |     def test_get_restaurants(self):
29 |         eazydiner = EazyDiner(
30 |             location="Delhi NCR"
31 |         )  # Replace with an appropriate location
32 |         restaurants = eazydiner.get_restaurants()
33 |         self.assertIsInstance(restaurants, str)
34 |         self.assert_response_keys(restaurants, ["restaurants"])
35 | 
36 |     def test_get_breakfast(self):
37 |         eazydiner = EazyDiner(
38 |             location="Delhi NCR"
39 |         )  # Replace with an appropriate location
40 |         breakfast = eazydiner.get_breakfast()
41 |         self.assertIsInstance(breakfast, str)
42 |         self.assert_response_keys(breakfast, ["restaurants"])
43 | 
44 |     def test_get_lunch(self):
45 |         eazydiner = EazyDiner(
46 |             location="Delhi NCR"
47 |         )  # Replace with an appropriate location
48 |         lunch = eazydiner.get_lunch()
49 |         self.assertIsInstance(lunch, str)
50 |         self.assert_response_keys(lunch, ["restaurants"])
51 | 
52 |     def test_get_dinner(self):
53 |         eazydiner = EazyDiner(
54 |             location="Delhi NCR"
55 |         )  # Replace with an appropriate location
56 |         dinner = eazydiner.get_dinner()
57 |         self.assertIsInstance(dinner, str)
58 |         self.assert_response_keys(dinner, ["restaurants"])
59 | 
60 |     def test_dinner_with_discount(self):
61 |         eazydiner = EazyDiner(
62 |             location="Delhi NCR"
63 |         )  # Replace with an appropriate location
64 |         dinner_discount = eazydiner.dinner_with_discount()
65 |         self.assertIsInstance(dinner_discount, list)
66 | 
67 |     def test_get_top10(self):
68 |         eazydiner = EazyDiner(
69 |             location="Delhi NCR"
70 |         )  # Replace with an appropriate location
71 |         top10 = eazydiner.get_top10()
72 |         self.assertIsInstance(top10, dict)
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     unittest.main()
77 | 


--------------------------------------------------------------------------------
/src/test/ebay_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.ebay import EBAY
 3 | 
 4 | 
 5 | class eBayTest(unittest.TestCase):
 6 |     """
 7 |     eBay module test.\n
 8 |     | Methods             | Details                             |
 9 |     | ------------------- | ----------------------------------- |
10 |     | `spotlights()`      | Returns spotlight deals on EBAY.    |
11 |     | `featured()`        | Returns the featured deals on EBAY. |
12 |     | `specific_deals()`  | Returns the specific deals on EBAY. |
13 |     """
14 | 
15 |     def setUp(self):
16 |         self.instance = EBAY()
17 | 
18 |     def test_spotlights(self):
19 |         spotlights = self.instance.spotlights()
20 | 
21 |         self.assertIsNotNone(spotlights)
22 |         self.assertIsInstance(spotlights, dict)
23 |         self.assertEqual(
24 |             list(spotlights.keys()), ["Description", "Product", "Price", "Link"]
25 |         )
26 | 
27 |         for value in spotlights.values():
28 |             self.assertIsInstance(value, str)
29 | 
30 |     def test_featured(self):
31 |         featured = self.instance.featured()
32 | 
33 |         self.assertIsNotNone(featured)
34 |         self.assertIsInstance(featured, list)
35 | 
36 |         for item in featured:
37 |             self.assertIsInstance(item, dict)
38 |             self.assertEqual(list(item.keys()), ["Product", "Price", "Link"])
39 | 
40 |             for value in item.values():
41 |                 self.assertIsInstance(value, str)
42 | 
43 |     def test_specific_deals(self):
44 |         specific_deals = self.instance.specific_deals()
45 | 
46 |         self.assertIsNotNone(specific_deals)
47 |         self.assertIsInstance(specific_deals, list)
48 | 
49 |         for item in specific_deals:
50 |             self.assertIsInstance(item, dict)
51 |             self.assertEqual(list(item.keys()), ["Product", "Price", "Link"])
52 | 
53 |             for value in item.values():
54 |                 self.assertIsInstance(value, str)
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     unittest.main()
59 | 


--------------------------------------------------------------------------------
/src/test/espncricinfo_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.espncricinfo import Espncricinfo
 3 | 
 4 | 
 5 | class ESPNTest(unittest.TestCase):
 6 |     def test_connection(self):
 7 |         instance = Espncricinfo()
 8 |         self.assertTrue(
 9 |             instance,
10 |             "ESPN:__init__ - connection failed",
11 |         )
12 | 
13 |     def test_get_news(self):
14 |         instance = Espncricinfo()
15 |         method_response = instance.get_news()
16 | 
17 |         self.assertIsInstance(
18 |             method_response,
19 |             list,
20 |             "ESPN:get_news - invalid response",
21 |         )
22 | 
23 |     def test_get_livescores(self):
24 |         instance = Espncricinfo()
25 |         method_response = instance.get_livescores()
26 | 
27 |         self.assertIsInstance(
28 |             method_response,
29 |             list,
30 |             "ESPN:get_livescores - invalid response",
31 |         )
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     unittest.main()
36 | 


--------------------------------------------------------------------------------
/src/test/fide_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.fide import FIDE
 3 | 
 4 | 
 5 | class FIDETest(unittest.TestCase):
 6 |     """
 7 |     Tests for the FIDE class in the fide module.
 8 |     | Methods                   | Details                                            |
 9 |     | ------------------------- | -------------------------------------------------- |
10 |     | `.get_events()`           | Returns all the major chess events of 2024.        |
11 |     | `.get_open_ratings()`     | Returns a list of top 100 open category players.   |
12 |     | `.get_women_ratings()`    | Returns a list of top 100 women category players.  |
13 |     | `.get_juniors_ratings()`  | Returns a list of top 100 juniors category players.|
14 |     | `.get_girls_ratings()`    | Returns a list of top 100 girls category players.  |
15 |     | `.get_news()`             | Returns a list of top chess/fide news.             |
16 |     """
17 | 
18 |     def test_connection(self):
19 |         instance = FIDE()
20 |         self.assertTrue(
21 |             instance,
22 |             "FIDE:__init__ - connection failed",
23 |         )
24 | 
25 |     def test_get_events(self):
26 |         instance = FIDE()
27 |         method_response = instance.get_events()
28 | 
29 |         self.assertIsInstance(
30 |             method_response,
31 |             list,
32 |             "FIDE:get_events - invalid response",
33 |         )
34 | 
35 |     def test_get_open_ratings(self):
36 |         instance = FIDE()
37 |         method_response = instance.get_open_ratings()
38 | 
39 |         self.assertIsInstance(
40 |             method_response,
41 |             list,
42 |             "FIDE:get_open_ratings - invalid response",
43 |         )
44 | 
45 |     def test_get_women_ratings(self):
46 |         instance = FIDE()
47 |         method_response = instance.get_women_ratings()
48 | 
49 |         self.assertIsInstance(
50 |             method_response,
51 |             list,
52 |             "FIDE:get_women_ratings - invalid response",
53 |         )
54 | 
55 |     def test_get_juniors_ratings(self):
56 |         instance = FIDE()
57 |         method_response = instance.get_juniors_ratings()
58 | 
59 |         self.assertIsInstance(
60 |             method_response,
61 |             list,
62 |             "FIDE:get_juniors_ratings - invalid response",
63 |         )
64 | 
65 |     def test_get_girls_ratings(self):
66 |         instance = FIDE()
67 |         method_response = instance.get_girls_ratings()
68 | 
69 |         self.assertIsInstance(
70 |             method_response,
71 |             list,
72 |             "FIDE:get_girls_ratings - invalid response",
73 |         )
74 | 
75 |     def test_get_news(self):
76 |         instance = FIDE()
77 |         method_response = instance.get_news()
78 | 
79 |         self.assertIsInstance(
80 |             method_response,
81 |             list,
82 |             "FIDE:get_news - invalid response",
83 |         )
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     unittest.main()
88 | 


--------------------------------------------------------------------------------
/src/test/flexjobs_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from scrape_up.flexjobs import FlexJobs
 4 | 
 5 | 
 6 | class TestFlexJobs(unittest.TestCase):
 7 |     def test_get_jobs_with_valid_search_query(self):
 8 |         flexjobs = FlexJobs("python developer")
 9 |         jobs = flexjobs.get_jobs()
10 |         self.assertTrue(len(jobs) > 0, "No jobs found for valid search query")
11 | 
12 |     def test_get_jobs_with_location_query(self):
13 |         flexjobs = FlexJobs("python developer", "New York")
14 |         jobs = flexjobs.get_jobs()
15 |         self.assertTrue(len(jobs) > 0, "No jobs found for valid location query")
16 | 
17 |     def test_get_jobs_with_min_jobs_limit(self):
18 |         flexjobs = FlexJobs("python developer", min_jobs=5)
19 |         jobs = flexjobs.get_jobs()
20 |         self.assertTrue(
21 |             len(jobs) >= 5, "Number of jobs retrieved exceeds max jobs limit"
22 |         )
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     unittest.main()
27 | 


--------------------------------------------------------------------------------
/src/test/geeksforgeeks_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.geeksforgeeks import Geeksforgeeks
 3 | import json
 4 | 
 5 | 
 6 | class GeeksforgeeksTest(unittest.TestCase):
 7 |     """
 8 |     Geeksforgeeks module test.
 9 |     | Methods           | Details                                                                            |
10 |     | ----------------- | ---------------------------------------------------------------------------------- |
11 |     | `.get_profile()`  | Returns the user data in json format.                                              |
12 |     """
13 | 
14 |     def test_get_profile(self):
15 |         instance = Geeksforgeeks(user="nikhil25803")
16 |         method_response = instance.get_profile()
17 | 
18 |         if isinstance(method_response, str):
19 |             try:
20 |                 method_response = json.loads(method_response)
21 |             except json.JSONDecodeError:
22 |                 self.fail("get_profile should return a dictionary or a JSON string")
23 | 
24 |         expected_keys = [
25 |             "username",
26 |             "collage_name",
27 |             "collage_rank",
28 |             "overall_coding_score",
29 |             "monthly_coding_score",
30 |             "languages_used",
31 |             "current_potd_streak",
32 |             "total_problem_solved",
33 |             "campus_ambassader",
34 |         ]
35 | 
36 |         self.assertEqual(
37 |             list(method_response.keys()),
38 |             expected_keys,
39 |             "Geeksforgeeks:get_profile - keys mismatch",
40 |         )
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     unittest.main()
45 | 


--------------------------------------------------------------------------------
/src/test/github_education_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up import github_education
 3 | 
 4 | 
 5 | class GitHubEducationTest(unittest.TestCase):
 6 |     """
 7 |     CodeChef module test.\n
 8 |     | Methods        | Details                                                                                                             |
 9 |     | -------------- | ------------------------------------------------------------------------------------------------------------------- |
10 |     | `get_events()` | Returns the latest events along with their title, image_url, description, date, location, language, tags, and link. |
11 |     """
12 | 
13 |     def test_get_events(self):
14 |         instance = github_education.Events()
15 |         method_response = instance.get_events()
16 | 
17 |         self.assertIsInstance(
18 |             method_response, list, "GitHubEducation:get_events - return type mismatch"
19 |         )
20 |         self.assertTrue(all(isinstance(event, dict) for event in method_response))
21 | 
22 |         for event in method_response:
23 |             self.assertEqual(
24 |                 list(event.keys()),
25 |                 [
26 |                     "title",
27 |                     "image_url",
28 |                     "description",
29 |                     "date",
30 |                     "location",
31 |                     "language",
32 |                     "tags",
33 |                     "link",
34 |                 ],
35 |                 "GitHubEducation:get_events - keys mismatch",
36 |             )
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     unittest.main()
41 | 


--------------------------------------------------------------------------------
/src/test/hackerearth_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from scrape_up import hackerearth
 4 | 
 5 | 
 6 | class HackerEarthTest(unittest.TestCase):
 7 |     """
 8 |     HackerEarth module test.\n
 9 |     | Methods         | Details                                                          |
10 |     | --------------- | ---------------------------------------------------------------- |
11 |     | `get_ongoing()` | Returns the ongoing challenges.                                  |
12 |     | `get_upcoming()`| Returns the upcoming challenges.                                 |
13 |     | `get_hiring()`  | Returns information about ongoing hiring challenges.             |
14 |     """
15 | 
16 |     def setUp(self):
17 |         self.instance = hackerearth.challenges.Challenges()
18 | 
19 |     def test_get_ongoing(self):
20 |         ongoing_challenges = self.instance.get_ongoing()
21 |         self.assertIsInstance(ongoing_challenges, list)
22 | 
23 |         if len(ongoing_challenges) > 0:
24 |             first_challenge = ongoing_challenges[0]
25 |             self.assertIsInstance(first_challenge, dict)
26 |             self.assertEqual(
27 |                 list(first_challenge.keys()),
28 |                 ["Title", "No of Registrations", "Link"],
29 |                 "HackerEarth-Challenges:get_ongoing - keys mismatch",
30 |             )
31 | 
32 |     def test_get_upcoming(self):
33 |         upcoming_challenges = self.instance.get_upcoming()
34 |         self.assertIsInstance(upcoming_challenges, list)
35 | 
36 |         if len(upcoming_challenges) > 0:
37 |             first_challenge = upcoming_challenges[0]
38 |             self.assertIsInstance(first_challenge, dict)
39 |             self.assertEqual(
40 |                 list(first_challenge.keys()),
41 |                 ["Title", "No of Registrations", "Link"],
42 |                 "HackerEarth-Challenges:get_upcoming - keys mismatch",
43 |             )
44 | 
45 |     def test_get_hiring(self):
46 |         hiring_challenges = self.instance.get_hiring()
47 |         self.assertIsInstance(hiring_challenges, list)
48 | 
49 |         if len(hiring_challenges) > 0:
50 |             first_challenge = hiring_challenges[0]
51 |             self.assertIsInstance(first_challenge, dict)
52 |             self.assertEqual(
53 |                 list(first_challenge.keys()),
54 |                 ["Title", "Description", "Link"],
55 |                 "HackerEarth-Challenges:get_hiring - keys mismatch",
56 |             )
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     unittest.main()
61 | 


--------------------------------------------------------------------------------
/src/test/hackerrank_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up import hackerrank
 3 | 
 4 | 
 5 | class HackerrankTest(unittest.TestCase):
 6 |     """
 7 |     CodeChef module test.\n
 8 |     | Methods                       | Details                                                          |
 9 |     | ----------------------------- | ---------------------------------------------------------------- |
10 |     | `get_profile(id="username")`  | Returns name, username, country, user_type, details, badges, verified_skills, social etc. |
11 |     | `get_skills()`                | Returns information on active contests like title, status, and link |
12 |     | `active_contests()`           | Returns a list of verified skills and their links |
13 |     | `archived_contests()`         | Returns information regarding archived contests |
14 |     """
15 | 
16 |     def test_get_profile(self):
17 |         instance = hackerrank.User()
18 |         method_response = instance.get_profile(id="inclinedadarsh")
19 | 
20 |         self.assertEqual(
21 |             list(method_response.keys()),
22 |             [
23 |                 "name",
24 |                 "username",
25 |                 "country",
26 |                 "user_type",
27 |                 "details",
28 |                 "badges",
29 |                 "verified_skills",
30 |                 "social",
31 |             ],
32 |             "Hackerrank:get_profile - keys mismatch",
33 |         )
34 | 
35 |     def test_get_skills(self):
36 |         instance = hackerrank.User()
37 |         method_response = instance.get_skills()
38 | 
39 |         self.assertIsInstance(
40 |             method_response, list, "Hackerrank:get_skills - return type mismatch"
41 |         )
42 |         self.assertTrue(
43 |             all(isinstance(skill, dict) for skill in method_response),
44 |             "Hackerrank:get_skills - return type mismatch",
45 |         )
46 | 
47 |         for skill in method_response:
48 |             self.assertIn("Name", skill)
49 |             self.assertIn("Link", skill)
50 | 
51 |     def test_active_contests(self):
52 |         instance = hackerrank.Contest()
53 |         method_response = instance.active_contests()
54 | 
55 |         self.assertIsInstance(
56 |             method_response, list, "Hackerrank:active_contests - return type mismatch"
57 |         )
58 |         self.assertTrue(
59 |             all(isinstance(contest, dict) for contest in method_response),
60 |             "Hackerrank:active_contests - return type mismatch",
61 |         )
62 |         for contest in method_response:
63 |             self.assertIn("Title", contest)
64 |             self.assertIn("Status", contest)
65 |             self.assertIn("Link", contest)
66 | 
67 |     def test_archived_contests(self):
68 |         instance = hackerrank.Contest()
69 |         method_response = instance.archived_contests()
70 | 
71 |         self.assertIsInstance(
72 |             method_response, list, "Hackerrank:archived_contests - return type mismatch"
73 |         )
74 | 
75 |         for contest in method_response:
76 |             self.assertIn("title", contest)
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     unittest.main()
81 | 


--------------------------------------------------------------------------------
/src/test/healthgrades_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from scrape_up.healthgrades import HealthGrades
 4 | 
 5 | 
 6 | class HealthGradesTest(unittest.TestCase):
 7 |     """
 8 |     HealthGrades module test.\n
 9 |     | Methods         | Details                                                          |
10 |     | --------------- | ---------------------------------------------------------------- |
11 |     | `get_best_hospitals()` | Returns Name, Location, Link, Awards etc.                 |
12 |     """
13 | 
14 |     def setUp(self):
15 |         """
16 |         setup instance for HealthGrades class
17 |         """
18 |         self.instance = HealthGrades()
19 | 
20 |     def test_get_best_hospitals(self):
21 |         """
22 |         Test get_best_hospitals for state 'bihar'
23 |         """
24 |         best_hospitals = self.instance.get_best_hospitals("bihar")
25 |         first_hospital = best_hospitals[0]
26 | 
27 |         # assert statements
28 |         self.assertIsInstance(best_hospitals, list)
29 |         self.assertIsInstance(first_hospital, dict)
30 |         self.assertEqual(
31 |             list(first_hospital.keys()),
32 |             ["Name", "Location", "Link", "Awards"],
33 |             "Healthgrades:get_best_hospitals - keys mismatch",
34 |         )
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     unittest.main()
39 | 


--------------------------------------------------------------------------------
/src/test/icc_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up import icc
 3 | 
 4 | 
 5 | class ICCTest(unittest.TestCase):
 6 |     """
 7 |     ICC module test.\n
 8 |     | Method                               | Details                                                             |
 9 |     | ----------------------------         | ------------------------------------------------------------------- |
10 |     | `.team_rankings(format)`             | Returns the list of rankings of teams of desired format             |
11 |     | `.player_ranking(type,format)`       | Returns the list of player ranking of desired type and format       |
12 |     | `.team_rankings_women(format)`       | Returns the list of rankings of teams of desired format             |
13 |     | `.player_ranking_women(type,format)` | Returns the list of player ranking of desired type and format       |
14 |     """
15 | 
16 |     def test_team_rankings(self):
17 |         instance = icc.ICC()
18 |         response = instance.team_rankings("ODI")
19 |         self.assertGreater(len(response), 0, "Team rankings is empty")
20 |         self.assertTrue(isinstance(response, list), "Team rankings is not a list")
21 |         self.assertTrue(
22 |             all(
23 |                 isinstance(team, dict) and "rank" in team and "team" in team
24 |                 for team in response
25 |             ),
26 |             "Incorrect format for team rankings",
27 |         )
28 | 
29 |     def test_player_ranking(self):
30 |         instance = icc.ICC()
31 |         response = instance.player_ranking("batting", "TEST")
32 |         self.assertGreater(len(response), 0, "Player ranking is empty")
33 |         self.assertTrue(isinstance(response, list), "Player ranking is not a list")
34 |         self.assertTrue(
35 |             all(
36 |                 isinstance(player, dict) and "rank" in player and "name" in player
37 |                 for player in response
38 |             ),
39 |             "Incorrect format for player rankings",
40 |         )
41 | 
42 |     def test_team_rankings_women(self):
43 |         instance = icc.ICC()
44 |         response = instance.team_rankings_women("T20")
45 |         self.assertGreater(len(response), 0, "Team rankings for women is empty")
46 |         self.assertTrue(
47 |             isinstance(response, list), "Team rankings for women is not a list"
48 |         )
49 |         self.assertTrue(
50 |             all(
51 |                 isinstance(team, dict) and "rank" in team and "team" in team
52 |                 for team in response
53 |             ),
54 |             "Incorrect format for team rankings for women",
55 |         )
56 | 
57 |     def test_player_ranking_women(self):
58 |         instance = icc.ICC()
59 |         response = instance.player_ranking("bowling", "ODI")
60 |         self.assertGreater(len(response), 0, "Player ranking for women is empty")
61 |         self.assertTrue(
62 |             isinstance(response, list), "Player ranking for women is not a list"
63 |         )
64 |         self.assertTrue(
65 |             all(
66 |                 isinstance(player, dict) and "rank" in player and "name" in player
67 |                 for player in response
68 |             ),
69 |             "Incorrect format for player rankings for women",
70 |         )
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     unittest.main()
75 | 


--------------------------------------------------------------------------------
/src/test/librarygenesis_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.librarygenesis import LibGen
 3 | 
 4 | 
 5 | class TestLibGen(unittest.TestCase):
 6 |     """
 7 |     | Methods       | Details                       |
 8 |     | --------------| ----------------------------- |
 9 |     | `.getBooks(book_name=" ")` | Returns the books with name, author, size, format, book link, book cover link, language |
10 |     """
11 | 
12 |     def setUp(self):
13 |         """
14 |         Initialize a LibGen instance before each test method.
15 |         """
16 |         self.libgen = LibGen()
17 | 
18 |     def test_getBooks_empty_name(self):
19 |         """
20 |         Test the getBooks() method with an empty book name.
21 |         """
22 |         try:
23 |             result = self.libgen.getBooks("")
24 |             self.assertEqual(
25 |                 result,
26 |                 "Error: enter name",
27 |                 "Expected error message for empty book name",
28 |             )
29 |         except:
30 |             return None
31 | 
32 |     def test_getBooks_short_name(self):
33 |         """
34 |         Test the getBooks() method with a short book name.
35 |         """
36 |         try:
37 |             result = self.libgen.getBooks("AI")
38 |             self.assertEqual(
39 |                 result,
40 |                 "Error: Title Too Short",
41 |                 "Expected error message for short book name",
42 |             )
43 |         except:
44 |             return None
45 | 
46 |     def test_getBooks_valid_name(self):
47 |         """
48 |         Test the getBooks() method with a valid book name.
49 |         """
50 |         try:
51 |             result = self.libgen.getBooks("Python")
52 |             self.assertIsInstance(result, list, "Expected a list of books")
53 |             if result:  # Check if there are books returned
54 |                 book = result[0]
55 |                 self.assertIn("name", book, "Book should have a 'name' field")
56 |                 self.assertIn("author", book, "Book should have an 'author' field")
57 |                 self.assertIn("size", book, "Book should have a 'size' field")
58 |                 self.assertIn("format", book, "Book should have a 'format' field")
59 |                 self.assertIn("link", book, "Book should have a 'link' field")
60 |                 self.assertIn("language", book, "Book should have a 'language' field")
61 |         except:
62 |             return None
63 | 
64 |     def test_getBooks_no_results(self):
65 |         """
66 |         Test the getBooks() method with a book name that yields no results.
67 |         """
68 |         try:
69 |             result = self.libgen.getBooks("somebookthatdoesnotexist")
70 |             self.assertEqual(
71 |                 result,
72 |                 "Error: no results found",
73 |                 "Expected error message for no results found",
74 |             )
75 |         except:
76 |             return None
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     unittest.main()
81 | 


--------------------------------------------------------------------------------
/src/test/lichess_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.lichess import LichessGames
 3 | 
 4 | 
 5 | class TestLichessGames(unittest.TestCase):
 6 |     """
 7 |     | Methods                       | Details                                                                    |
 8 |     | ----------------------------- | -------------------------------------------------------------------------- |
 9 |     | `.fetch_games()`              | Fetch all the games data for the specified username.                       |
10 |     """
11 | 
12 |     def setUp(self):
13 |         """
14 |         Initialize a LichessGames instance before each test method.
15 |         """
16 |         self.username = "chess_player"  # Example username
17 |         self.lichess_scraper = LichessGames(username=self.username)
18 | 
19 |     def test_fetch_games(self):
20 |         """
21 |         Test the fetch_games() method.
22 |         """
23 |         try:
24 |             games = self.lichess_scraper.fetch_games()
25 | 
26 |             # Check if games is a list of dictionaries
27 |             self.assertIsInstance(games, list)
28 |             for game in games:
29 |                 self.assertIsInstance(game, dict)
30 |                 self.assertIn("white_player", game)
31 |                 self.assertIn("black_player", game)
32 |                 self.assertIn("pgn", game)
33 | 
34 |                 white_player = game["white_player"]
35 |                 black_player = game["black_player"]
36 | 
37 |                 self.assertIn("username", white_player)
38 |                 self.assertIn("before_game_score", white_player)
39 |                 self.assertIn("score_change", white_player)
40 | 
41 |                 self.assertIn("username", black_player)
42 |                 self.assertIn("before_game_score", black_player)
43 |                 self.assertIn("score_change", black_player)
44 |         except:
45 |             return None
46 | 
47 |     def test_fetch_games_empty(self):
48 |         """
49 |         Test fetch_games() method with a username that has no games.
50 |         """
51 |         try:
52 |             self.lichess_scraper = LichessGames(username="non_existent_user")
53 |             games = self.lichess_scraper.fetch_games()
54 |             self.assertEqual(
55 |                 games, [], "Expected an empty list for a non-existent user"
56 |             )
57 |         except:
58 |             return None
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     unittest.main()
63 | 


--------------------------------------------------------------------------------
/src/test/pinterest_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.pinterest import Pinterest
 3 | 
 4 | 
 5 | class TestPinterest(unittest.TestCase):
 6 |     def setUp(self):
 7 |         self.pinterest = Pinterest()
 8 | 
 9 |     def test_get_today(self):
10 |         today_topics = self.pinterest.get_today()
11 |         self.assertIsInstance(today_topics, list, "Expected get_today to return a list")
12 |         if today_topics:
13 |             for topic in today_topics:
14 |                 self.assertIn("link", topic)
15 |                 self.assertIn("title", topic)
16 |                 self.assertIn("subtitle", topic)
17 |                 self.assertIn("image", topic)
18 | 
19 |     def test_get_photo(self):
20 |         url = "https://pin.it/1ZhgQA5AG"
21 |         photo = self.pinterest.get_photo(url)
22 |         if photo:
23 |             self.assertIn("alt", photo)
24 |             self.assertIn("image", photo)
25 | 
26 |     def test_search_pins(self):
27 |         keyword = "nature"
28 |         pins = self.pinterest.search_pins(keyword=keyword)
29 |         self.assertIsInstance(pins, list, "Expected search_pins to return a list")
30 |         if pins:
31 |             for pin in pins:
32 |                 self.assertIn("link", pin)
33 |                 self.assertIn("image", pin)
34 | 
35 |     def test_get_pin_details(self):
36 |         pin_url = "https://pin.it/1ZhgQA5AG"
37 |         details = self.pinterest.get_pin_details(pin_url)
38 |         if details:
39 |             self.assertIn("title", details)
40 |             self.assertIn("description", details)
41 |             self.assertIn("saves", details)
42 |             self.assertIn("comments", details)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     unittest.main()
47 | 


--------------------------------------------------------------------------------
/src/test/quora_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import patch
 3 | from scrape_up.quora import Quora
 4 | 
 5 | 
 6 | class TestQuora(unittest.TestCase):
 7 |     def setUp(self):
 8 |         self.scrapper = Quora()
 9 | 
10 |     def test_fetch_answers(self):
11 |         try:
12 |             expected_answers = ["Accepted answer 1", "Suggested answer 1"]
13 | 
14 |             self.assertEqual(
15 |                 self.scrapper.fetch_answers("https://www.quora.com/question"),
16 |                 expected_answers,
17 |             )
18 |         except:
19 |             return None
20 | 
21 |     def test_get_by_query(self):
22 |         try:
23 |             expected_answer = "Suggested answer 1"
24 | 
25 |             self.assertEqual(
26 |                 self.scrapper.get_by_query("How-should-I-start-learning-Python-1"),
27 |                 expected_answer,
28 |             )
29 |         except:
30 |             return None
31 | 
32 |     def test_profile_details(self):
33 |         try:
34 |             expected_profile = {
35 |                 "name": "Nikhil Raj",
36 |                 "url": "https://www.quora.com/profile/Nikhil-Raj",
37 |             }
38 | 
39 |             self.assertEqual(
40 |                 self.scrapper.profile_details("Nikhil Raj"), expected_profile
41 |             )
42 |         except:
43 |             return None
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     unittest.main()
48 | 


--------------------------------------------------------------------------------
/src/test/swiggy_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import requests
 3 | from unittest.mock import patch
 4 | from scrape_up.swiggy import Swiggy
 5 | 
 6 | 
 7 | class TestSwiggy(unittest.TestCase):
 8 |     """
 9 |     Swiggy module test.
10 |     | Methods                   | Details                                                                   |
11 |     | ------------------------- | ------------------------------------------------------------------------- |
12 |     | `get_restraunt_details()` | Returns the restaurant data with name, cuisine, area, rating, offers, etc |
13 |     | `get_restaurants()`       | Returns the restaurant names as per given city                            |
14 |     """
15 | 
16 |     def setUp(self):
17 |         self.scrapper = Swiggy()
18 | 
19 |     def test_get_restraunt_details(self):
20 |         try:
21 |             expected_data = {
22 |                 "name": "Pizza Hut",
23 |                 "cuisine": "Pizzas",
24 |                 "area": "Karol Bagh",
25 |                 "rating": "3.7",
26 |                 "rating_count": "1K+ ratings",
27 |                 "cost_per_person": "₹350 for two",
28 |                 "offers": [{"15% OFF UPTO ₹300": "USE CITIFOODIE | ABOVE ₹1200"}],
29 |             }
30 | 
31 |             self.assertEqual(
32 |                 self.scrapper.get_restraunt_details("https://www.swiggy.com/pizza-hut"),
33 |                 expected_data,
34 |             )
35 |         except:
36 |             return None
37 | 
38 |     def test_get_restaurants(self):
39 |         try:
40 |             expected_restaurants = [
41 |                 {
42 |                     "Name": "Domino's Pizza",
43 |                     "Rating": "4.2",
44 |                     "Cusine": "Pizzas, Italian, Pastas, Desserts",
45 |                     "Location": "Punjabi Bagh",
46 |                     "Link": "/restaurant1",
47 |                 }
48 |             ]
49 | 
50 |             self.assertEqual(
51 |                 self.scrapper.get_restaurants("Delhi"), expected_restaurants
52 |             )
53 |         except:
54 |             return None
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     unittest.main()
59 | 


--------------------------------------------------------------------------------
/src/test/who_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from scrape_up.who import WHO
 3 | 
 4 | 
 5 | class TestWHO(unittest.TestCase):
 6 |     def setUp(self):
 7 |         """
 8 |         Initialize a WHO instance before each test method.
 9 |         """
10 |         self.who_scraper = WHO()
11 | 
12 |     def test_get_disease_outbreak(self):
13 |         """
14 |         | Methods                        | Details                                     |
15 |         | ------------------------------ | ------------------------------------------- |
16 |         |    `get_disease_outbreak()`    | Get Disease Outbreak News from WHO website. |
17 |         """
18 |         try:
19 |             # Test with a valid number of items (assuming each page contains 10 items)
20 |             number_of_items = 10
21 |             disease_outbreaks = self.who_scraper.get_disease_outbreak(number_of_items)
22 | 
23 |             # Check if disease_outbreaks is a list
24 |             self.assertIsNotNone(disease_outbreaks, "Failed to fetch disease outbreaks")
25 |             self.assertIsInstance(
26 |                 disease_outbreaks, list, "Disease outbreaks data should be a list"
27 |             )
28 | 
29 |             if disease_outbreaks:
30 |                 # Check if each item in the list is a dictionary with the required keys
31 |                 for outbreak in disease_outbreaks:
32 |                     self.assertIsInstance(
33 |                         outbreak, dict, "Each outbreak should be a dictionary"
34 |                     )
35 |                     self.assertIn("Title", outbreak, "Missing expected key: 'Title'")
36 |                     self.assertIn("Date", outbreak, "Missing expected key: 'Date'")
37 |                     self.assertIn("Link", outbreak, "Missing expected key: 'Link'")
38 | 
39 |                     # Check if the values are of the correct type
40 |                     self.assertIsInstance(
41 |                         outbreak["Title"], str, "'Title' should be a string"
42 |                     )
43 |                     self.assertIsInstance(
44 |                         outbreak["Date"], str, "'Date' should be a string"
45 |                     )
46 |                     self.assertIsInstance(
47 |                         outbreak["Link"], str, "'Link' should be a string"
48 |                     )
49 | 
50 |         except:
51 |             return None
52 | 
53 |     def test_invalid_number(self):
54 |         """
55 |         Test the get_disease_outbreak() method with an invalid number.
56 |         """
57 |         try:
58 |             invalid_number = -10
59 |             disease_outbreaks = self.who_scraper.get_disease_outbreak(invalid_number)
60 | 
61 |             # Check if the function handles invalid numbers gracefully
62 |             self.assertIsNone(
63 |                 disease_outbreaks, "Function should return None for invalid input"
64 |             )
65 |         except:
66 |             return None
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     unittest.main()
71 | 


--------------------------------------------------------------------------------
/src/test/wuzzuf_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import patch
 3 | from scrape_up.wuzzuf import Jobs
 4 | import requests
 5 | 
 6 | 
 7 | class JobsTest(unittest.TestCase):
 8 |     """
 9 |     Jobs module test.
10 |     | Methods             | Details                                                                                      |
11 |     | ------------------- | -------------------------------------------------------------------------------------------- |
12 |     | `filter_job()`      | Apply filters to the job search using parameters like title, country, city, min/max years of experience. |
13 |     | `fetch_jobs()`      | Fetch job listings based on the applied filters, with an optional maximum number of pages to scrape.    |
14 |     """
15 | 
16 |     def setUp(self):
17 |         self.scraper = Jobs()
18 | 
19 |     def test_filter_job(self):
20 |         self.scraper.filter_job(
21 |             title="software engineer",
22 |             country="Egypt",
23 |             city="Cairo",
24 |             min_years_of_experience=2,
25 |             max_years_of_experience=5,
26 |         )
27 |         expected_url = "https://wuzzuf.net/search/jobs/?q=software+engineer&filters[country][0]=Egypt&filters[city][0]=Cairo&filters[years_of_experience_min][0]=2&filters[years_of_experience_max][0]=5"
28 |         self.assertEqual(self.scraper.url, expected_url)
29 | 
30 |     @patch("requests.get")
31 |     def test_fetch_jobs(self, mock_get):
32 |         # Mock the get response
33 |         mock_response = requests.Response()
34 |         mock_response.status_code = 200
35 |         mock_response._content = b"""
36 |         <div class="css-1gatmva e1v1l3u10">
37 |             <h2 class="css-m604qf"><a href="/job/1">Software Engineer</a></h2>
38 |             <div class="css-d7j1kk"><a href="#">Company Name</a></div>
39 |             <span class="css-5wys0k">Cairo, Egypt</span>
40 |             <div class="css-4c4ojb">3 days ago</div>
41 |             <span class="eoyjyou0">Full Time</span>
42 |             <span class="eoyjyou0">Senior</span>
43 |         </div>
44 |         """
45 |         mock_get.return_value = mock_response
46 | 
47 |         jobs = self.scraper.fetch_jobs(max_page_number=1)
48 |         expected_job = {
49 |             "name": "Software Engineer",
50 |             "url": "/job/1",
51 |             "company": "Company Name",
52 |             "location": "Cairo, Egypt",
53 |             "published_time": "3 days ago",
54 |             "properties": "Full Time ,Senior",
55 |         }
56 | 
57 |         self.assertEqual(len(jobs), 1)
58 |         self.assertEqual(jobs[0], expected_job)
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     unittest.main()
63 | 


--------------------------------------------------------------------------------