├── .dockerignore ├── .github └── workflows │ ├── python-publish.yml │ ├── static.yml │ └── tests.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE.txt ├── README.md ├── docker-compose.yaml ├── gnews ├── __init__.py ├── gnews.py └── utils │ ├── __init__.py │ ├── constants.py │ └── utils.py ├── imgs ├── gnews.gif └── logo.png ├── index.rst ├── main.py ├── requirements.txt ├── setup.py └── tests └── test_gnews.py /.dockerignore: -------------------------------------------------------------------------------- 1 | ### JetBrains template 2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 4 | 5 | # User-specific stuff 6 | .idea/**/workspace.xml 7 | .idea/**/tasks.xml 8 | .idea/**/usage.statistics.xml 9 | .idea/**/dictionaries 10 | .idea/**/shelf 11 | 12 | # Generated files 13 | .idea/**/contentModel.xml 14 | 15 | # Sensitive or high-churn files 16 | .idea/**/dataSources/ 17 | .idea/**/dataSources.ids 18 | .idea/**/dataSources.local.xml 19 | .idea/**/sqlDataSources.xml 20 | .idea/**/dynamic.xml 21 | .idea/**/uiDesigner.xml 22 | .idea/**/dbnavigator.xml 23 | 24 | # Gradle 25 | .idea/**/gradle.xml 26 | .idea/**/libraries 27 | 28 | # Gradle and Maven with auto-import 29 | # When using Gradle or Maven with auto-import, you should exclude module files, 30 | # since they will be recreated, and may cause churn. Uncomment if using 31 | # auto-import. 32 | # .idea/artifacts 33 | # .idea/compiler.xml 34 | # .idea/jarRepositories.xml 35 | # .idea/modules.xml 36 | # .idea/*.iml 37 | # .idea/modules 38 | # *.iml 39 | # *.ipr 40 | 41 | # CMake 42 | cmake-build-*/ 43 | 44 | # Mongo Explorer plugin 45 | .idea/**/mongoSettings.xml 46 | 47 | # File-based project format 48 | *.iws 49 | 50 | # IntelliJ 51 | out/ 52 | 53 | # mpeltonen/sbt-idea plugin 54 | .idea_modules/ 55 | 56 | # JIRA plugin 57 | atlassian-ide-plugin.xml 58 | 59 | # Cursive Clojure plugin 60 | .idea/replstate.xml 61 | 62 | # Crashlytics plugin (for Android Studio and IntelliJ) 63 | com_crashlytics_export_strings.xml 64 | crashlytics.properties 65 | crashlytics-build.properties 66 | fabric.properties 67 | 68 | # Editor-based Rest Client 69 | .idea/httpRequests 70 | 71 | # Android studio 3.1+ serialized cache file 72 | .idea/caches/build_file_checksums.ser 73 | 74 | ### Python template 75 | # Byte-compiled / optimized / DLL files 76 | __pycache__/ 77 | *.py[cod] 78 | *$py.class 79 | 80 | # C extensions 81 | *.so 82 | 83 | # Distribution / packaging 84 | .Python 85 | build/ 86 | develop-eggs/ 87 | dist/ 88 | downloads/ 89 | eggs/ 90 | .eggs/ 91 | lib/ 92 | lib64/ 93 | parts/ 94 | sdist/ 95 | var/ 96 | wheels/ 97 | share/python-wheels/ 98 | *.egg-info/ 99 | .installed.cfg 100 | *.egg 101 | MANIFEST 102 | 103 | # PyInstaller 104 | # Usually these files are written by a python script from a template 105 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 106 | *.manifest 107 | *.spec 108 | 109 | # Installer logs 110 | pip-log.txt 111 | pip-delete-this-directory.txt 112 | 113 | # Unit test / coverage reports 114 | htmlcov/ 115 | .tox/ 116 | .nox/ 117 | .coverage 118 | .coverage.* 119 | .cache 120 | nosetests.xml 121 | coverage.xml 122 | *.cover 123 | *.py,cover 124 | .hypothesis/ 125 | .pytest_cache/ 126 | cover/ 127 | 128 | # Translations 129 | *.mo 130 | *.pot 131 | 132 | # Django stuff: 133 | *.log 134 | local_settings.py 135 | db.sqlite3 136 | db.sqlite3-journal 137 | 138 | # Flask stuff: 139 | instance/ 140 | .webassets-cache 141 | 142 | # Scrapy stuff: 143 | .scrapy 144 | 145 | # Sphinx documentation 146 | docs/_build/ 147 | 148 | # PyBuilder 149 | .pybuilder/ 150 | target/ 151 | 152 | # Jupyter Notebook 153 | .ipynb_checkpoints 154 | 155 | # IPython 156 | profile_default/ 157 | ipython_config.py 158 | 159 | # pyenv 160 | # For a library or package, you might want to ignore these files since the code is 161 | # intended to run in multiple environments; otherwise, check them in: 162 | # .python-version 163 | 164 | # pipenv 165 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 166 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 167 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 168 | # install all needed dependencies. 169 | #Pipfile.lock 170 | 171 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 172 | __pypackages__/ 173 | 174 | # Celery stuff 175 | celerybeat-schedule 176 | celerybeat.pid 177 | 178 | # SageMath parsed files 179 | *.sage.py 180 | 181 | # Environments 182 | .env 183 | .venv 184 | env/ 185 | venv/ 186 | ENV/ 187 | env.bak/ 188 | venv.bak/ 189 | 190 | # Spyder project settings 191 | .spyderproject 192 | .spyproject 193 | 194 | # Rope project settings 195 | .ropeproject 196 | 197 | # mkdocs documentation 198 | /site 199 | 200 | # mypy 201 | .mypy_cache/ 202 | .dmypy.json 203 | dmypy.json 204 | 205 | # Pyre type checker 206 | .pyre/ 207 | 208 | # pytype static type analyzer 209 | .pytype/ 210 | 211 | # Cython debug symbols 212 | cython_debug/ 213 | 214 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | push: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | pypi-publish: 13 | name: Publish release to PyPI 14 | runs-on: ubuntu-latest 15 | environment: 16 | name: pypi 17 | url: https://pypi.org/p/gnews 18 | permissions: 19 | id-token: write 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: "3.x" 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install setuptools wheel 30 | - name: Build package 31 | run: | 32 | python setup.py sdist bdist_wheel # Could also be python -m build 33 | - name: Publish package distributions to PyPI 34 | uses: pypa/gh-action-pypi-publish@release/v1 35 | with: 36 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/static.yml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Deploy static content to Pages 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["master"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Single deploy job since we're just deploying 26 | deploy: 27 | environment: 28 | name: github-pages 29 | url: ${{ steps.deployment.outputs.page_url }} 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@v4 34 | - name: Setup Pages 35 | uses: actions/configure-pages@v5 36 | - name: Upload artifact 37 | uses: actions/upload-pages-artifact@v3 38 | with: 39 | # Upload entire repository 40 | path: '.' 41 | - name: Deploy to GitHub Pages 42 | id: deployment 43 | uses: actions/deploy-pages@v4 44 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Run Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | jobs: 8 | test: 9 | name: Run Test Cases 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v2 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: 3.8 19 | 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install -r requirements.txt 24 | 25 | - name: Run test cases 26 | run: | 27 | python -m unittest tests/test_gnews.py 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/pycharm,python 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=pycharm,python 4 | 5 | ### PyCharm ### 6 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 7 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 8 | 9 | # User-specific stuff 10 | .idea/**/workspace.xml 11 | .idea/**/tasks.xml 12 | .idea/**/usage.statistics.xml 13 | .idea/**/dictionaries 14 | .idea/**/shelf 15 | 16 | # Generated files 17 | .idea/**/contentModel.xml 18 | 19 | # Sensitive or high-churn files 20 | .idea/**/dataSources/ 21 | .idea/**/dataSources.ids 22 | .idea/**/dataSources.local.xml 23 | .idea/**/sqlDataSources.xml 24 | .idea/**/dynamic.xml 25 | .idea/**/uiDesigner.xml 26 | .idea/**/dbnavigator.xml 27 | 28 | .idea/ 29 | 30 | # Gradle 31 | .idea/**/gradle.xml 32 | .idea/**/libraries 33 | 34 | .main.py 35 | 36 | # Gradle and Maven with auto-import 37 | # When using Gradle or Maven with auto-import, you should exclude module files, 38 | # since they will be recreated, and may cause churn. Uncomment if using 39 | # auto-import. 40 | # .idea/artifacts 41 | # .idea/compiler.xml 42 | # .idea/jarRepositories.xml 43 | # .idea/modules.xml 44 | # .idea/*.iml 45 | # .idea/modules 46 | # *.iml 47 | # *.ipr 48 | 49 | # CMake 50 | cmake-build-*/ 51 | 52 | # Mongo Explorer plugin 53 | .idea/**/mongoSettings.xml 54 | 55 | # File-based project format 56 | *.iws 57 | 58 | # IntelliJ 59 | out/ 60 | 61 | # mpeltonen/sbt-idea plugin 62 | .idea_modules/ 63 | 64 | # JIRA plugin 65 | atlassian-ide-plugin.xml 66 | 67 | # Cursive Clojure plugin 68 | .idea/replstate.xml 69 | 70 | # Crashlytics plugin (for Android Studio and IntelliJ) 71 | com_crashlytics_export_strings.xml 72 | crashlytics.properties 73 | crashlytics-build.properties 74 | fabric.properties 75 | 76 | # Editor-based Rest Client 77 | .idea/httpRequests 78 | 79 | # Android studio 3.1+ serialized cache file 80 | .idea/caches/build_file_checksums.ser 81 | 82 | ### PyCharm Patch ### 83 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 84 | 85 | # *.iml 86 | # modules.xml 87 | # .idea/misc.xml 88 | # *.ipr 89 | 90 | # Sonarlint plugin 91 | # https://plugins.jetbrains.com/plugin/7973-sonarlint 92 | .idea/**/sonarlint/ 93 | 94 | # SonarQube Plugin 95 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin 96 | .idea/**/sonarIssues.xml 97 | 98 | # Markdown Navigator plugin 99 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced 100 | .idea/**/markdown-navigator.xml 101 | .idea/**/markdown-navigator-enh.xml 102 | .idea/**/markdown-navigator/ 103 | 104 | # Cache file creation bug 105 | # See https://youtrack.jetbrains.com/issue/JBR-2257 106 | .idea/$CACHE_FILE$ 107 | 108 | # CodeStream plugin 109 | # https://plugins.jetbrains.com/plugin/12206-codestream 110 | .idea/codestream.xml 111 | 112 | ### Python ### 113 | # Byte-compiled / optimized / DLL files 114 | __pycache__/ 115 | *.py[cod] 116 | *$py.class 117 | 118 | # C extensions 119 | *.so 120 | 121 | # Distribution / packaging 122 | .Python 123 | build/ 124 | develop-eggs/ 125 | dist/ 126 | downloads/ 127 | eggs/ 128 | .eggs/ 129 | lib/ 130 | lib64/ 131 | parts/ 132 | sdist/ 133 | var/ 134 | wheels/ 135 | pip-wheel-metadata/ 136 | share/python-wheels/ 137 | *.egg-info/ 138 | .installed.cfg 139 | *.egg 140 | MANIFEST 141 | 142 | # PyInstaller 143 | # Usually these files are written by a python script from a template 144 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 145 | *.manifest 146 | *.spec 147 | 148 | # Installer logs 149 | pip-log.txt 150 | pip-delete-this-directory.txt 151 | 152 | # Unit test / coverage reports 153 | htmlcov/ 154 | .tox/ 155 | .nox/ 156 | .coverage 157 | .coverage.* 158 | .cache 159 | nosetests.xml 160 | coverage.xml 161 | *.cover 162 | *.py,cover 163 | .hypothesis/ 164 | .pytest_cache/ 165 | pytestdebug.log 166 | 167 | # Translations 168 | *.mo 169 | *.pot 170 | 171 | # Django stuff: 172 | *.log 173 | local_settings.py 174 | db.sqlite3 175 | db.sqlite3-journal 176 | 177 | # Flask stuff: 178 | instance/ 179 | .webassets-cache 180 | 181 | # Scrapy stuff: 182 | .scrapy 183 | 184 | # Sphinx documentation 185 | docs/_build/ 186 | doc/_build/ 187 | 188 | # PyBuilder 189 | target/ 190 | 191 | # Jupyter Notebook 192 | .ipynb_checkpoints 193 | 194 | # IPython 195 | profile_default/ 196 | ipython_config.py 197 | 198 | # pyenv 199 | .python-version 200 | 201 | # pipenv 202 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 203 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 204 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 205 | # install all needed dependencies. 206 | #Pipfile.lock 207 | 208 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 209 | __pypackages__/ 210 | 211 | # Celery stuff 212 | celerybeat-schedule 213 | celerybeat.pid 214 | 215 | # SageMath parsed files 216 | *.sage.py 217 | 218 | # Environments 219 | .env 220 | .venv 221 | env/ 222 | venv/ 223 | ENV/ 224 | env.bak/ 225 | venv.bak/ 226 | pythonenv* 227 | 228 | # Spyder project settings 229 | .spyderproject 230 | .spyproject 231 | 232 | # Rope project settings 233 | .ropeproject 234 | 235 | # mkdocs documentation 236 | /site 237 | 238 | # mypy 239 | .mypy_cache/ 240 | .dmypy.json 241 | dmypy.json 242 | 243 | # Pyre type checker 244 | .pyre/ 245 | 246 | # pytype static type analyzer 247 | .pytype/ 248 | 249 | # profiling data 250 | .prof 251 | 252 | # End of https://www.toptal.com/developers/gitignore/api/pycharm,python 253 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10.0 2 | 3 | RUN mkdir -p /usr/src/app 4 | 5 | WORKDIR /usr/src/app 6 | 7 | COPY . /usr/src/app 8 | 9 | RUN pip install -r requirements.txt 10 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Contributors][contributors-shield]][contributors-url] 2 | [![Forks][forks-shield]][forks-url] 3 | [![Stargazers][stars-shield]][stars-url] 4 | [![Issues][issues-shield]][issues-url] 5 | [![MIT License][license-shield]][license-url] 6 | [![Download][download-sheild]][download-url] 7 | [![LinkedIn][linkedin-shield]][linkedin-url] 8 | 9 | 10 | 11 | 12 |
13 |

14 | 15 | GNews 16 | 17 | 18 |

GNews 📰

19 | 20 |

21 | A Happy and lightweight Python Package that Provides an API to search for articles on Google News and returns a usable JSON response! 🚀 22 |
23 | If you like ❤️ GNews or find it useful 🌟, support the project by buying me a coffee ☕. 24 |
25 | Buy Me A Coffee 26 |
27 |
28 | 🚀 View Demo 29 | · 30 | 🐞 Report Bug 31 | · 32 | 🚀 Request Feature 33 |

34 |

35 | 36 | 37 |
38 | Table of Contents 📑 39 |
    40 |
  1. 41 | About 🚩 42 | 45 |
  2. 46 |
  3. 47 | Getting Started 🚀 48 | 51 | 54 |
  4. 55 |
  5. 56 | Usage 🧩 57 | 69 |
  6. 70 |
  7. To Do 📋
  8. 71 |
  9. Roadmap 🛣️
  10. 72 |
  11. Contributing 🤝
  12. 73 |
  13. License ⚖️
  14. 74 |
  15. Contact 📬
  16. 75 |
  17. Acknowledgements 🙏
  18. 76 |
77 |
78 | 79 | 80 | ## About GNews 81 | 82 | 🚩 GNews is A Happy and lightweight Python Package that searches Google News RSS Feed and returns a usable JSON 83 | response \ 84 | 🚩 As well as you can fetch full article (**No need to write scrappers for articles fetching anymore**) 85 | 86 | Google News cover across **141+ countries** with **41+ languages**. On the bottom left side of the Google News page you 87 | may find a `Language & region` section where you can find all of the supported combinations. 88 | 89 | ### Demo 90 | 91 | [![GNews Demo][demo-gif]](https://github.com/ranahaani/GNews) 92 | 93 | 94 | 95 | 96 | 97 | ## Getting Started 98 | 99 | This section provides instructions for two different use cases: 100 | 101 | 1. **Installing the GNews package** for immediate use. 102 | 2. **Setting up the GNews project** for local development. 103 | 104 | ### 1. Installing the GNews package 105 | 106 | To install the package and start using it in your own projects, follow these steps: 107 | 108 | ``` shell 109 | pip install gnews 110 | ``` 111 | ### 2. Setting Up GNews for Local Development 112 | 113 | If you want to make modifications locally, follow these steps to set up the development environment. 114 | 115 | #### Option 1: Setup with Docker 116 | 117 | 1. Install [docker and docker-compose](https://docs.docker.com/get-docker/). 118 | 2. Configure the `.env` file by placing your MongoDB credentials. 119 | 3. Run the following command to build and start the Docker containers: 120 | 121 | ``` shell 122 | docker-compose up --build 123 | ``` 124 | 125 | #### Option 2: Install Using Git Clone 126 | 127 | 1. Clone this repository: 128 | ``` shell 129 | git clone https://github.com/ranahaani/GNews.git 130 | ``` 131 | 132 | 2. Set up a virtual environment: 133 | ```shell 134 | virtualenv venv 135 | source venv/bin/activate # MacOS/Linux 136 | .\venv\Scripts\activate # Windows 137 | ``` 138 | 139 | 3. Install the required dependencies: 140 | ```shell 141 | pip install -r requirements.txt 142 | ``` 143 | 144 | 145 | 146 | ### Example usage 147 | 148 | ```python 149 | from gnews import GNews 150 | 151 | google_news = GNews() 152 | pakistan_news = google_news.get_news('Pakistan') 153 | print(pakistan_news[0]) 154 | ``` 155 | 156 | ``` 157 | [{ 158 | 'publisher': 'Aljazeera.com', 159 | 'description': 'Pakistan accuses India of stoking conflict in Indian Ocean ' 160 | 'Aljazeera.com', 161 | 'published date': 'Tue, 16 Feb 2021 11:50:43 GMT', 162 | 'title': 'Pakistan accuses India of stoking conflict in Indian Ocean - ' 163 | 'Aljazeera.com', 164 | 'url': 'https://www.aljazeera.com/news/2021/2/16/pakistan-accuses-india-of-nuclearizing-indian-ocean' 165 | }, 166 | ...] 167 | ``` 168 | 169 | ### Get top news 170 | 171 | * `GNews.get_top_news()` 172 | 173 | ### Get news by keyword 174 | 175 | * `GNews.get_news(keyword)` 176 | 177 | ### Get news by major topic 178 | 179 | * `GNews.get_news_by_topic(topic)` 180 | * Available topics:` WORLD, NATION, BUSINESS, TECHNOLOGY, ENTERTAINMENT, SPORTS, SCIENCE, HEALTH, POLITICS, CELEBRITIES, TV, MUSIC, MOVIES, THEATER, SOCCER, CYCLING, MOTOR SPORTS, TENNIS, COMBAT SPORTS, BASKETBALL, BASEBALL, FOOTBALL, SPORTS BETTING, WATER SPORTS, HOCKEY, GOLF, 181 | CRICKET, RUGBY, ECONOMY, PERSONAL FINANCE, FINANCE, DIGITAL CURRENCIES, MOBILE, ENERGY, GAMING, INTERNET SECURITY, GADGETS, VIRTUAL REALITY, ROBOTICS, NUTRITION, PUBLIC HEALTH, MENTAL HEALTH, MEDICINE, SPACE, WILDLIFE, ENVIRONMENT, NEUROSCIENCE, PHYSICS, GEOLOGY, PALEONTOLOGY, SOCIAL SCIENCES, EDUCATION, JOBS, ONLINE EDUCATION, HIGHER EDUCATION, VEHICLES, ARTS-DESIGN, BEAUTY, FOOD, TRAVEL, SHOPPING, HOME, OUTDOORS, FASHION.` 182 | 183 | ### Get news by geo location 184 | 185 | * `GNews.get_news_by_location(location)` 186 | * location can be name of city/state/country 187 | 188 | ### Get news by site 189 | 190 | * `GNews.get_news_by_site(site)` 191 | * site should be in the format of: `"cnn.com"` 192 | 193 | ### Results specification 194 | All parameters are optional and can be passed during initialization. Here’s a list of the available parameters: 195 | 196 | - **language**: The language in which to return results (default: 'en'). 197 | - **country**: The country code for the headlines (default: 'US'). 198 | - **period**: The time period for which you want news. 199 | - **start_date**: Date after which results must have been published. 200 | - **end_date**: Date before which results must have been published. 201 | - **max_results**: The maximum number of results to return (default: 100). 202 | - **exclude_websites**: A list of websites to exclude from results. 203 | - **proxy**: A dictionary specifying the proxy settings used to route requests. The dictionary should contain a single key-value pair where the key is the protocol (`http` or `https`) and the value is the proxy address. Example: 204 | ```python 205 | # Example with only HTTP proxy 206 | proxy = { 207 | 'http': 'http://your_proxy_address', 208 | } 209 | 210 | # Example with only HTTPS proxy 211 | proxy = { 212 | 'https': 'http://your_proxy_address', 213 | } 214 | ``` 215 | 216 | #### Example Initialization 217 | ```python 218 | from gnews import GNews 219 | 220 | # Initialize GNews with various parameters, including proxy 221 | google_news = GNews( 222 | language='en', 223 | country='US', 224 | period='7d', 225 | start_date=None, 226 | end_date=None, 227 | max_results=10, 228 | exclude_websites=['yahoo.com', 'cnn.com'], 229 | proxy={ 230 | 'https': 'https://your_proxy_address' 231 | } 232 | ) 233 | ``` 234 | 235 | * Or change it to an existing object 236 | 237 | ```python 238 | google_news.period = '7d' # News from last 7 days 239 | google_news.max_results = 10 # number of responses across a keyword 240 | google_news.country = 'United States' # News from a specific country 241 | google_news.language = 'english' # News in a specific language 242 | google_news.exclude_websites = ['yahoo.com', 'cnn.com'] # Exclude news from specific website i.e Yahoo.com and CNN.com 243 | google_news.start_date = (2020, 1, 1) # Search from 1st Jan 2020 244 | google_news.end_date = (2020, 3, 1) # Search until 1st March 2020 245 | ``` 246 | 247 | The format of the timeframe is a string comprised of a number, followed by a letter representing the time operator. For 248 | example 1y would signify 1 year. Full list of operators below: 249 | 250 | ``` 251 | - h = hours (eg: 12h) 252 | - d = days (eg: 7d) 253 | - m = months (eg: 6m) 254 | - y = years (eg: 1y) 255 | ``` 256 | 257 | Setting the start and end dates can be done by passing in either a datetime or a tuple in the form (YYYY, MM, DD). 258 | 259 | ### Supported Countries 260 | 261 | ```python 262 | print(google_news.AVAILABLE_COUNTRIES) 263 | 264 | {'Australia': 'AU', 'Botswana': 'BW', 'Canada ': 'CA', 'Ethiopia': 'ET', 'Ghana': 'GH', 'India ': 'IN', 265 | 'Indonesia': 'ID', 'Ireland': 'IE', 'Israel ': 'IL', 'Kenya': 'KE', 'Latvia': 'LV', 'Malaysia': 'MY', 'Namibia': 'NA', 266 | 'New Zealand': 'NZ', 'Nigeria': 'NG', 'Pakistan': 'PK', 'Philippines': 'PH', 'Singapore': 'SG', 'South Africa': 'ZA', 267 | 'Tanzania': 'TZ', 'Uganda': 'UG', 'United Kingdom': 'GB', 'United States': 'US', 'Zimbabwe': 'ZW', 268 | 'Czech Republic': 'CZ', 'Germany': 'DE', 'Austria': 'AT', 'Switzerland': 'CH', 'Argentina': 'AR', 'Chile': 'CL', 269 | 'Colombia': 'CO', 'Cuba': 'CU', 'Mexico': 'MX', 'Peru': 'PE', 'Venezuela': 'VE', 'Belgium ': 'BE', 'France': 'FR', 270 | 'Morocco': 'MA', 'Senegal': 'SN', 'Italy': 'IT', 'Lithuania': 'LT', 'Hungary': 'HU', 'Netherlands': 'NL', 271 | 'Norway': 'NO', 'Poland': 'PL', 'Brazil': 'BR', 'Portugal': 'PT', 'Romania': 'RO', 'Slovakia': 'SK', 'Slovenia': 'SI', 272 | 'Sweden': 'SE', 'Vietnam': 'VN', 'Turkey': 'TR', 'Greece': 'GR', 'Bulgaria': 'BG', 'Russia': 'RU', 'Ukraine ': 'UA', 273 | 'Serbia': 'RS', 'United Arab Emirates': 'AE', 'Saudi Arabia': 'SA', 'Lebanon': 'LB', 'Egypt': 'EG', 274 | 'Bangladesh': 'BD', 'Thailand': 'TH', 'China': 'CN', 'Taiwan': 'TW', 'Hong Kong': 'HK', 'Japan': 'JP', 275 | 'Republic of Korea': 'KR'} 276 | ``` 277 | 278 | ### Supported Languages 279 | 280 | ```python 281 | print(google_news.AVAILABLE_LANGUAGES) 282 | 283 | {'english': 'en', 'indonesian': 'id', 'czech': 'cs', 'german': 'de', 'spanish': 'es-419', 'french': 'fr', 284 | 'italian': 'it', 'latvian': 'lv', 'lithuanian': 'lt', 'hungarian': 'hu', 'dutch': 'nl', 'norwegian': 'no', 285 | 'polish': 'pl', 'portuguese brasil': 'pt-419', 'portuguese portugal': 'pt-150', 'romanian': 'ro', 'slovak': 'sk', 286 | 'slovenian': 'sl', 'swedish': 'sv', 'vietnamese': 'vi', 'turkish': 'tr', 'greek': 'el', 'bulgarian': 'bg', 287 | 'russian': 'ru', 'serbian': 'sr', 'ukrainian': 'uk', 'hebrew': 'he', 'arabic': 'ar', 'marathi': 'mr', 'hindi': 'hi', 288 | 'bengali': 'bn', 'tamil': 'ta', 'telugu': 'te', 'malyalam': 'ml', 'thai': 'th', 'chinese simplified': 'zh-Hans', 289 | 'chinese traditional': 'zh-Hant', 'japanese': 'ja', 'korean': 'ko'} 290 | ``` 291 | 292 | ### Article Properties 293 | 294 | - Get news returns the list with following keys: `title`, `published_date`, `description`, `url`, `publisher`. 295 | 296 | | Properties | Description | Example | 297 | |--------------|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 298 | | title | Title of the article | IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility | 299 | | url | Google news link to article | [Article Link](http://news.google.com/news/url?sa=t&fd=R&ct2=us&usg=AFQjCNGNR4Qg8LGbjszT1yt2s2lMXvvufQ&clid=c3a7d30bb8a4878e06b80cf16b898331&cid=52779522121279&ei=VQU7WYjiFoLEhQHIs4HQCQ&url=https://www.theguardian.com/commentisfree/2017/jun/07/why-dont-unicorns-exist-google) | 300 | | published date | Published date | Wed, 07 Jun 2017 07:01:30 GMT | 301 | | description | Short description of article | IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility ... | 302 | | publisher | Publisher of article | The Guardian | | 303 | 304 | ## Getting full article 305 | 306 | * To read a full article you can either: 307 | * Navigate to the url directly in your browser, or 308 | * Use `newspaper3k` library to scrape the article 309 | * The article url, needed for both methods, is accessed as `article['url']`. 310 | 311 | #### Using newspaper3k 312 | 313 | 1. Install the library - `pip3 install newspaper3k`. 314 | 2. Use `get_full_article` method from `GNews`, that creates an `newspaper.article.Article` object from the url. 315 | 316 | ```python 317 | from gnews import GNews 318 | 319 | google_news = GNews() 320 | json_resp = google_news.get_news('Pakistan') 321 | article = google_news.get_full_article( 322 | json_resp[0]['url']) # newspaper3k instance, you can access newspaper3k all attributes in article 323 | ``` 324 | 325 | This new object contains `title`, `text` (full article) or `images` attributes. Examples: 326 | 327 | ```python 328 | article.title 329 | ``` 330 | 331 | > IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility' 332 | 333 | ```python 334 | article.text 335 | ``` 336 | 337 | > End-of-Mission press releases include statements of IMF staff teams that convey preliminary findings after a mission. The views expressed are those of the IMF staff and do not necessarily represent the views of the IMF’s Executive Board.\n\nIMF staff and the Pakistani authorities have reached an agreement on a package of measures to complete second to fifth reviews of the authorities’ reform program supported by the IMF Extended Fund Facility (EFF) ..... (full article) 338 | 339 | ```python 340 | article.images 341 | ``` 342 | 343 | > `{'https://www.imf.org/~/media/Images/IMF/Live-Page/imf-live-rgb-h.ashx?la=en', 'https://www.imf.org/-/media/Images/IMF/Data/imf-logo-eng-sep2019-update.ashx', 'https://www.imf.org/-/media/Images/IMF/Data/imf-seal-shadow-sep2019-update.ashx', 'https://www.imf.org/-/media/Images/IMF/Social/TW-Thumb/twitter-seal.ashx', 'https://www.imf.org/assets/imf/images/footer/IMF_seal.png'} 344 | ` 345 | 346 | ```python 347 | article.authors 348 | ``` 349 | 350 | > `[]` 351 | 352 | Read full documentation for `newspaper3k` 353 | [newspaper3k](https://newspaper.readthedocs.io/en/latest/user_guide/quickstart.html#parsing-an-article) 354 | 355 | 356 | ## Todo 357 | 358 | - Save to MongoDB 359 | - Save to SQLite 360 | - Save to JSON 361 | - Save to .CSV file 362 | - More than 100 articles 363 | 364 | 365 | 366 | ## Roadmap 367 | 368 | See the [open issues](https://github.com/ranahaani/GNews/issues) for a list of proposed features (and known issues). 369 | 370 | 371 | 372 | 373 | 374 | ## Contributing 375 | 376 | Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Any 377 | contributions you make are **greatly appreciated**. 378 | 379 | 1. Fork the Project 380 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`) 381 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`) 382 | 4. Push to the Branch (`git push origin feature/AmazingFeature`) 383 | 5. Open a Pull Request 384 | 385 | 386 | 387 | ## License 388 | 389 | Distributed under the MIT License. See `LICENSE` for more information. 390 | 391 | 392 | 393 | 394 | 395 | ## Contact 396 | 397 | Muhammad Abdullah - [@ranahaani](https://twitter.com/ranahaani) - ranahaani@gmail.com 398 | 399 | Project Link: [https://github.com/ranahaani/GNews](https://github.com/ranahaani/GNews) 400 | 401 | [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/ranahaani) 402 | 403 | [contributors-shield]: https://img.shields.io/github/contributors/ranahaani/GNews.svg?style=for-the-badge 404 | 405 | [contributors-url]: https://github.com/ranahaani/GNews/graphs/contributors 406 | 407 | [forks-shield]: https://img.shields.io/github/forks/ranahaani/GNews.svg?style=for-the-badge 408 | 409 | [forks-url]: https://github.com/ranahaani/GNews/network/members 410 | 411 | [stars-shield]: https://img.shields.io/github/stars/ranahaani/GNews.svg?style=for-the-badge 412 | 413 | [stars-url]: https://github.com/ranahaani/GNews/stargazers 414 | 415 | [issues-shield]: https://img.shields.io/github/issues/ranahaani/GNews.svg?style=for-the-badge 416 | 417 | [issues-url]: https://github.com/ranahaani/GNews/issues 418 | 419 | [license-shield]: https://img.shields.io/github/license/ranahaani/GNews.svg?style=for-the-badge 420 | 421 | [license-url]: https://github.com/ranahaani/GNews/blob/master/LICENSE.txt 422 | 423 | [download-sheild]: https://img.shields.io/pypi/dm/GNews.svg?style=for-the-badge 424 | 425 | [download-url]: https://pypistats.org/packages/gnews 426 | 427 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555 428 | 429 | [linkedin-url]: https://linkedin.com/in/ranahaani 430 | 431 | [demo-gif]: https://github.com/ranahaani/GNews/raw/master/imgs/gnews.gif 432 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | services: 3 | 4 | mongodb: 5 | image: mongo 6 | restart: always 7 | environment: 8 | MONGO_INITDB_ROOT_USERNAME: root 9 | MONGO_INITDB_ROOT_PASSWORD: example 10 | MONGO_INITDB_DATABASE: gnews 11 | ports: 12 | - 27017:27017 13 | 14 | gnews: 15 | build: 16 | context: . 17 | dockerfile: Dockerfile 18 | env_file: 19 | - .env 20 | working_dir: /usr/src/app 21 | volumes: 22 | - .:/usr/src/app 23 | depends_on: 24 | - mongodb 25 | -------------------------------------------------------------------------------- /gnews/__init__.py: -------------------------------------------------------------------------------- 1 | from .gnews import GNews 2 | 3 | name = "gnews" 4 | __all__ = ["GNews"] 5 | -------------------------------------------------------------------------------- /gnews/gnews.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import urllib.request 3 | import datetime 4 | import inspect 5 | import warnings 6 | 7 | import feedparser 8 | from bs4 import BeautifulSoup as Soup 9 | 10 | from gnews.utils.constants import AVAILABLE_COUNTRIES, AVAILABLE_LANGUAGES, SECTIONS, TOPICS, BASE_URL, USER_AGENT 11 | from gnews.utils.utils import process_url 12 | 13 | logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO, 14 | datefmt='%m/%d/%Y %I:%M:%S %p') 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class GNews: 19 | def __init__(self, language="en", country="US", max_results=100, period=None, start_date=None, end_date=None, 20 | exclude_websites=None, proxy=None): 21 | """ 22 | (optional parameters) 23 | :param language: The language in which to return results, defaults to en (optional) 24 | :param country: The country code of the country you want to get headlines for, defaults to US 25 | :param max_results: The maximum number of results to return. The default is 100, defaults to 100 26 | :param period: The period of time from which you want the news 27 | :param start_date: Date after which results must have been published 28 | :param end_date: Date before which results must have been published 29 | :param exclude_websites: A list of strings that indicate websites to exclude from results 30 | :param proxy: The proxy parameter is a dictionary with a single key-value pair. The key is the 31 | protocol name and the value is the proxy address 32 | """ 33 | self.countries = tuple(AVAILABLE_COUNTRIES), 34 | self.languages = tuple(AVAILABLE_LANGUAGES), 35 | self._max_results = max_results 36 | self._language = language 37 | self._country = country 38 | self._period = period 39 | self._end_date = None 40 | self._start_date = None 41 | self.end_date = self.end_date = end_date 42 | self._start_date = self.start_date = start_date 43 | self._exclude_websites = exclude_websites if exclude_websites and isinstance(exclude_websites, list) else [] 44 | self._proxy = {'http': proxy, 'https': proxy} if proxy else None 45 | 46 | def _ceid(self): 47 | time_query = '' 48 | if self._start_date or self._end_date: 49 | if inspect.stack()[2][3] != 'get_news': 50 | warnings.warn(message=("Only searches using the function get_news support date ranges. Review the " 51 | f"documentation for {inspect.stack()[2][3]} for a partial workaround. \nStart " 52 | "date and end date will be ignored"), category=UserWarning, stacklevel=4) 53 | if self._period: 54 | time_query += 'when%3A'.format(self._period) 55 | if self._period: 56 | warnings.warn(message=f'\nPeriod ({self.period}) will be ignored in favour of the start and end dates', 57 | category=UserWarning, stacklevel=4) 58 | if self.end_date is not None: 59 | time_query += '%20before%3A{}'.format(self.end_date) 60 | if self.start_date is not None: 61 | time_query += '%20after%3A{}'.format(self.start_date) 62 | elif self._period: 63 | time_query += '%20when%3A{}'.format(self._period) 64 | 65 | return time_query + '&hl={}&gl={}&ceid={}:{}'.format(self._language, 66 | self._country, 67 | self._country, 68 | self._language,) 69 | 70 | @property 71 | def language(self): 72 | return self._language 73 | 74 | @language.setter 75 | def language(self, language): 76 | """ 77 | :param language: The language code for the language you want to use 78 | """ 79 | self._language = AVAILABLE_LANGUAGES.get(language, language) 80 | 81 | @property 82 | def exclude_websites(self): 83 | return self._exclude_websites 84 | 85 | @exclude_websites.setter 86 | def exclude_websites(self, exclude_websites): 87 | """ 88 | The function takes in a list of websites that you want to exclude 89 | :param exclude_websites: A list of strings that will be used to filter out websites 90 | """ 91 | self._exclude_websites = exclude_websites 92 | 93 | @property 94 | def max_results(self): 95 | return self._max_results 96 | 97 | @max_results.setter 98 | def max_results(self, size): 99 | self._max_results = size 100 | 101 | @property 102 | def period(self): 103 | return self._period 104 | 105 | @period.setter 106 | def period(self, period): 107 | self._period = period 108 | 109 | @property 110 | def start_date(self): 111 | """ 112 | :return: string of start_date in form YYYY-MM-DD, or None if start_date is not set 113 | …NOTE this will reset period to None if start_date is not none 114 | """ 115 | if self._start_date is None: 116 | return None 117 | self.period = None 118 | return self._start_date.strftime("%Y-%m-%d") 119 | 120 | @start_date.setter 121 | def start_date(self, start_date): 122 | """ 123 | The function sets the start of the date range you want to search 124 | :param start_date: either a tuple in the form (YYYY, MM, DD) or a datetime 125 | """ 126 | if type(start_date) is tuple: 127 | start_date = datetime.datetime(start_date[0], start_date[1], start_date[2]) 128 | if self._end_date: 129 | if start_date - self._end_date == datetime.timedelta(days=0): 130 | warnings.warn("The start and end dates should be at least 1 day apart, or GNews will return no results") 131 | elif self._end_date < start_date: 132 | warnings.warn("End date should be after start date, or GNews will return no results") 133 | self._start_date = start_date 134 | 135 | @property 136 | def end_date(self): 137 | """ 138 | :return: string of end_date in form YYYY-MM-DD, or None if end_date is not set 139 | …NOTE this will reset period to None if end date is not None 140 | """ 141 | if self._end_date is None: 142 | return None 143 | self.period = None 144 | return self._end_date.strftime("%Y-%m-%d") 145 | 146 | @end_date.setter 147 | def end_date(self, end_date): 148 | """ 149 | The function sets the end of the date range you want to search 150 | :param end_date: either a tuple in the form (YYYY, MM, DD) or a datetime 151 | …NOTE this will reset period to None 152 | """ 153 | if type(end_date) is tuple: 154 | end_date = datetime.datetime(end_date[0], end_date[1], end_date[2]) 155 | if self._start_date: 156 | if end_date - self._start_date == datetime.timedelta(days=0): 157 | warnings.warn("The start and end dates should be at least 1 day apart, or GNews will return no results") 158 | elif end_date < self._start_date: 159 | warnings.warn("End date should be after start date, or GNews will return no results") 160 | self._end_date = end_date 161 | 162 | @property 163 | def country(self): 164 | return self._country 165 | 166 | @country.setter 167 | def country(self, country): 168 | self._country = AVAILABLE_COUNTRIES.get(country, country) 169 | 170 | def get_full_article(self, url): 171 | """ 172 | Download an article from the specified URL, parse it, and return an article object. 173 | :param url: The URL of the article you wish to summarize. 174 | :return: An `Article` object returned by the `newspaper3k` library if installed; otherwise, None. 175 | """ 176 | try: 177 | import newspaper 178 | except ImportError: 179 | print("\nget_full_article() requires the `newspaper3k` library.") 180 | print("You can install it by running `pip3 install newspaper3k` in your shell.") 181 | return None 182 | 183 | try: 184 | article = newspaper.Article(url="%s" % url, language=self._language) 185 | article.download() 186 | article.parse() 187 | except Exception as error: 188 | print(f"An error occurred while fetching the article: {error}") 189 | return None 190 | 191 | return article 192 | 193 | 194 | @staticmethod 195 | def _clean(html): 196 | soup = Soup(html, features="html.parser") 197 | text = soup.get_text() 198 | text = text.replace('\xa0', ' ') 199 | return text 200 | 201 | def _process(self, item): 202 | url = process_url(item, self._exclude_websites) 203 | if url: 204 | title = item.get("title", "") 205 | item = { 206 | 'title': title, 207 | 'description': self._clean(item.get("description", "")), 208 | 'published date': item.get("published", ""), 209 | 'url': url, 210 | 'publisher': item.get("source", " ") 211 | } 212 | return item 213 | 214 | def docstring_parameter(*sub): 215 | def dec(obj): 216 | obj.__doc__ = obj.__doc__.format(*sub) 217 | return obj 218 | 219 | return dec 220 | 221 | indent = '\n\t\t\t' 222 | indent2 = indent + '\t' 223 | standard_output = (indent + "{'title': Article Title," + indent + "'description': Google News summary of the " 224 | "article," + indent + "'url': link to the news article," + indent + "'publisher':" + indent2 + 225 | "{'href': link to publisher's website," + indent2 + "'title': name of the publisher}}") 226 | 227 | @docstring_parameter(standard_output) 228 | def get_news(self, key): 229 | """ 230 | The function takes in a key and returns a list of news articles 231 | :param key: The query you want to search for. For example, if you want to search for news about 232 | the "Yahoo", you would get results from Google News according to your key i.e "yahoo" 233 | :return: A list of dictionaries with structure: {0}. 234 | """ 235 | if key: 236 | if self._max_results > 100: 237 | return self._get_news_more_than_100(key) 238 | 239 | key = "%20".join(key.split(" ")) 240 | query = '/search?q={}'.format(key) 241 | return self._get_news(query) 242 | 243 | def _get_news_more_than_100(self, key): 244 | """ 245 | Fetch more than 100 news articles by iterating backward in time, dynamically adjusting 246 | the date range based on the earliest date seen so far. 247 | """ 248 | articles = [] 249 | seen_urls = set() 250 | earliest_date = None 251 | 252 | if self._start_date or self._end_date or self._period: 253 | warnings.warn(message=("Searches for over 100 articles do not currently support date ranges. \nStart " 254 | "date, end date, and period will be ignored"), category=UserWarning, stacklevel=4) 255 | 256 | # Start with no specific date range for the first query 257 | self._start_date = None 258 | self._end_date = None 259 | 260 | while len(articles) < self._max_results: 261 | # Fetch articles for the current range 262 | fetched_articles = self._get_news(f'/search?q={key}') 263 | if not fetched_articles: # Stop if no more articles are found 264 | break 265 | 266 | for article in fetched_articles: 267 | if article['url'] not in seen_urls: 268 | articles.append(article) 269 | seen_urls.add(article['url']) 270 | 271 | # Track the earliest published date 272 | published_date = article.get("published date") 273 | try: 274 | published_date = datetime.datetime.strptime(published_date, '%a, %d %b %Y %H:%M:%S GMT') 275 | except Exception as e: 276 | logger.warning(f"Failed to parse published date: {e}") 277 | continue 278 | 279 | if earliest_date is None or published_date < earliest_date: 280 | earliest_date = published_date 281 | 282 | if len(articles) >= self._max_results: 283 | return articles 284 | 285 | # If fewer than 100 articles were fetched, assume the range is exhausted 286 | if len(fetched_articles) < 100: 287 | break 288 | 289 | # Update the sliding window to fetch older articles 290 | self._end_date = earliest_date 291 | self._start_date = earliest_date - datetime.timedelta(days=7) 292 | 293 | return articles 294 | 295 | @docstring_parameter(standard_output) 296 | def get_top_news(self): 297 | """ 298 | This function returns top news stories for the current time 299 | :return: A list of dictionaries with structure: {0}. 300 | ..To implement date range try get_news('?') 301 | """ 302 | query = "?" 303 | return self._get_news(query) 304 | 305 | @docstring_parameter(standard_output, ', '.join(TOPICS), ', '.join(SECTIONS.keys())) 306 | def get_news_by_topic(self, topic: str): 307 | """ 308 | Function to get news from one of Google's key topics 309 | :param topic: TOPIC names i.e {1} 310 | :return: A list of dictionaries with structure: {0}. 311 | ..To implement date range try get_news('topic') 312 | """ 313 | topic = topic.upper() 314 | if topic in TOPICS: 315 | query = '/headlines/section/topic/' + topic + '?' 316 | return self._get_news(query) 317 | elif topic in SECTIONS.keys(): 318 | query = '/topics/' + SECTIONS[topic] + '?' 319 | return self._get_news(query) 320 | 321 | logger.info(f"Invalid topic. \nAvailable topics are: {', '.join(TOPICS), ', '.join(SECTIONS.keys())}.") 322 | return [] 323 | 324 | @docstring_parameter(standard_output) 325 | def get_news_by_location(self, location: str): 326 | """ 327 | This function is used to get news from a specific location (city, state, and country) 328 | :param location: (type: str) The location for which you want to get headlines 329 | :return: A list of dictionaries with structure: {0}. 330 | ..To implement date range try get_news('location') 331 | """ 332 | if location: 333 | query = '/headlines/section/geo/' + location + '?' 334 | return self._get_news(query) 335 | logger.warning("Enter a valid location.") 336 | return [] 337 | 338 | @docstring_parameter(standard_output) 339 | def get_news_by_site(self, site: str): 340 | """ 341 | This function is used to get news from a specific site 342 | :param site: (type: str) The site domain for which you want to get headlines. E.g., 'cnn.com' 343 | :return: A list of news articles from the specified site. 344 | """ 345 | if site: 346 | key = "site:{}".format(site) 347 | return self.get_news(key) 348 | logger.warning("Enter a valid site domain.") 349 | return [] 350 | 351 | def _get_news(self, query): 352 | url = BASE_URL + query + self._ceid() 353 | try: 354 | if self._proxy: 355 | proxy_handler = urllib.request.ProxyHandler(self._proxy) 356 | feed_data = feedparser.parse(url, agent=USER_AGENT, handlers=[proxy_handler]) 357 | else: 358 | feed_data = feedparser.parse(url, agent=USER_AGENT) 359 | 360 | return [item for item in 361 | map(self._process, feed_data.entries[:self._max_results]) if item] 362 | except Exception as err: 363 | logger.error(err.args[0]) 364 | return [] 365 | -------------------------------------------------------------------------------- /gnews/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranahaani/GNews/5058075ba13a6be20a101fbdc84294d396150510/gnews/utils/__init__.py -------------------------------------------------------------------------------- /gnews/utils/constants.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | USER_AGENTS = '''Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 4 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36 5 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36 6 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36 7 | Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36 8 | Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36 9 | Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36 10 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36 11 | Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36 12 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36 13 | Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36 14 | Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36 15 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36 16 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36 17 | Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36 18 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36 19 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36 20 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36 21 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36 22 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36 23 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36 24 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F 25 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10 26 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36 27 | Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36 28 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36 29 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36 30 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36 31 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36 32 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36 33 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36 34 | Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36 35 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36 36 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36 37 | Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36 38 | Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36 39 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36 40 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36 41 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36 42 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36 43 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36 44 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36 45 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24 46 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24 47 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/19.0.1047.0 Safari/535.22 48 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21 49 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1041.0 Safari/535.21 50 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20 51 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0 Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0 52 | Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/18.6.872 53 | Mozilla/5.0 (X11; CrOS i686 1660.57.0) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.46 Safari/535.19 54 | Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19 55 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19 56 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19 57 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19 58 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19 59 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.19 (KHTML, like Gecko) Ubuntu/11.10 Chromium/18.0.1025.142 Chrome/18.0.1025.142 Safari/535.19 60 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.11 Safari/535.19 61 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 62 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 63 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 64 | Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 65 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 66 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 67 | Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 68 | Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 69 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 70 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 71 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 72 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 73 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11 74 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11 75 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11 76 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/10.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11 77 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11 78 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.700.3 Safari/534.24 79 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.699.0 Safari/534.24 80 | Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.699.0 Safari/534.24 81 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.698.0 Safari/534.24 82 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.697.0 Safari/534.24 83 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24 84 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24 85 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24 86 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24 87 | Mozilla/5.0 Slackware/13.37 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/11.0.696.50 88 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.43 Safari/534.24 89 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.34 Safari/534.24 90 | Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.34 Safari/534.24 91 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24 92 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24 93 | Mozilla/5.0 (Windows NT 6.0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24 94 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.14 Safari/534.24 95 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24 96 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24 97 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.04 Chromium/11.0.696.0 Chrome/11.0.696.0 Safari/534.24 98 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.0 Safari/534.24 99 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.694.0 Safari/534.24 100 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.23 (KHTML, like Gecko) Chrome/11.0.686.3 Safari/534.23 101 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.682.0 Safari/534.21 102 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21 103 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7_0; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21 104 | Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20 105 | Mozilla/5.0 (Windows NT) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20 106 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20 107 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.669.0 Safari/534.20 108 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.19 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.19 109 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.18 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.18 110 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.18 (KHTML, like Gecko) Chrome/11.0.660.0 Safari/534.18 111 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.655.0 Safari/534.17 112 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.655.0 Safari/534.17 113 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.654.0 Safari/534.17 114 | Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.652.0 Safari/534.17 115 | Mozilla/4.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/11.0.1245.0 Safari/537.36 116 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17 117 | Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17 118 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.82 Safari/534.16 119 | Mozilla/5.0 (X11; U; Linux armv7l; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16 120 | Mozilla/5.0 (X11; U; FreeBSD x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16 121 | Mozilla/5.0 (X11; U; FreeBSD i386; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16 122 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 123 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16 124 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16 125 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16 126 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16 127 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.133 Chrome/10.0.648.133 Safari/534.16 128 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16 129 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.133 Chrome/10.0.648.133 Safari/534.16 130 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16 131 | Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16 132 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16 133 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16 134 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.127 Chrome/10.0.648.127 Safari/534.16 135 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16 136 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16 137 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16 138 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16 139 | Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU; AppleWebKit/534.16; KHTML; like Gecko; Chrome/10.0.648.11;Safari/534.16) 140 | Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16 141 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16 142 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16 143 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16 144 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.0 Safari/534.16 145 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.642.0 Chrome/10.0.642.0 Safari/534.16 146 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.639.0 Safari/534.16 147 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.638.0 Safari/534.16 148 | Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.634.0 Safari/534.16 149 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.634.0 Safari/534.16 150 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 SUSE/10.0.626.0 (KHTML, like Gecko) Chrome/10.0.626.0 Safari/534.16 151 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Chrome/10.0.613.0 Safari/534.15 152 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.613.0 Chrome/10.0.613.0 Safari/534.15 153 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Ubuntu/10.04 Chromium/10.0.612.3 Chrome/10.0.612.3 Safari/534.15 154 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Chrome/10.0.612.1 Safari/534.15 155 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.611.0 Chrome/10.0.611.0 Safari/534.15 156 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.602.0 Safari/534.14 157 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14 158 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14 159 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML,like Gecko) Chrome/9.1.0.0 Safari/540.0 160 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML, like Gecko) Ubuntu/10.10 Chrome/9.1.0.0 Safari/540.0 161 | Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.601.0 Safari/534.14 162 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Ubuntu/10.10 Chromium/9.0.600.0 Chrome/9.0.600.0 Safari/534.14 163 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.600.0 Safari/534.14 164 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.599.0 Safari/534.13 165 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-CA) AppleWebKit/534.13 (KHTML like Gecko) Chrome/9.0.597.98 Safari/534.13 166 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.84 Safari/534.13 167 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.44 Safari/534.13 168 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.19 Safari/534.13 169 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13 170 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13 171 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416758524.9051 172 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416748405.3871 173 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416670950.695 174 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416664997.4379 175 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1333515017.9196 176 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13 177 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13 178 | Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13 179 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13 180 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13 181 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13 182 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.596.0 Safari/534.13 183 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Ubuntu/10.04 Chromium/9.0.595.0 Chrome/9.0.595.0 Safari/534.13 184 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Ubuntu/9.10 Chromium/9.0.592.0 Chrome/9.0.592.0 Safari/534.13 185 | Mozilla/5.0 (X11; U; Windows NT 6; en-US) AppleWebKit/534.12 (KHTML, like Gecko) Chrome/9.0.587.0 Safari/534.12 186 | Mozilla/5.0 (Windows U Windows NT 5.1 en-US) AppleWebKit/534.12 (KHTML, like Gecko) Chrome/9.0.583.0 Safari/534.12'''.split('\n') 187 | 188 | 189 | USER_AGENT = random.choice(USER_AGENTS) 190 | 191 | 192 | AVAILABLE_LANGUAGES = { 193 | "english": "en", 194 | "indonesian": "id", 195 | "czech": "cs", 196 | "german": "de", 197 | "spanish": "es-419", 198 | "french": "fr", 199 | "italian": "it", 200 | "latvian": "lv", 201 | "lithuanian": "lt", 202 | "hungarian": "hu", 203 | "dutch": "nl", 204 | "norwegian": "no", 205 | "polish": "pl", 206 | "portuguese brasil": "pt-419", 207 | "portuguese portugal": "pt-150", 208 | "romanian": "ro", 209 | "slovak": "sk", 210 | "slovenian": "sl", 211 | "swedish": "sv", 212 | "vietnamese": "vi", 213 | "turkish": "tr", 214 | "greek": "el", 215 | "bulgarian": "bg", 216 | "russian": "ru", 217 | "serbian": "sr", 218 | "ukrainian": "uk", 219 | "hebrew": "he", 220 | "arabic": "ar", 221 | "marathi": "mr", 222 | "hindi": "hi", 223 | "bengali": "bn", 224 | "tamil": "ta", 225 | "telugu": "te", 226 | "malyalam": "ml", 227 | "thai": "th", 228 | "chinese simplified": "zh-Hans", 229 | "chinese traditional": "zh-Hant", 230 | "japanese": "ja", 231 | "korean": "ko" 232 | } 233 | 234 | AVAILABLE_COUNTRIES = { 235 | "Australia": "AU", 236 | "Botswana": "BW", 237 | "Canada ": "CA", 238 | "Ethiopia": "ET", 239 | "Ghana": "GH", 240 | "India ": "IN", 241 | "Indonesia": "ID", 242 | "Ireland": "IE", 243 | "Israel ": "IL", 244 | "Kenya": "KE", 245 | "Latvia": "LV", 246 | "Malaysia": "MY", 247 | "Namibia": "NA", 248 | "New Zealand": "NZ", 249 | "Nigeria": "NG", 250 | "Pakistan": "PK", 251 | "Philippines": "PH", 252 | "Singapore": "SG", 253 | "South Africa": "ZA", 254 | "Tanzania": "TZ", 255 | "Uganda": "UG", 256 | "United Kingdom": "GB", 257 | "United States": "US", 258 | "Zimbabwe": "ZW", 259 | "Czech Republic": "CZ", 260 | "Germany": "DE", 261 | "Austria": "AT", 262 | "Switzerland": "CH", 263 | "Argentina": "AR", 264 | "Chile": "CL", 265 | "Colombia": "CO", 266 | "Cuba": "CU", 267 | "Mexico": "MX", 268 | "Peru": "PE", 269 | "Venezuela": "VE", 270 | "Belgium ": "BE", 271 | "France": "FR", 272 | "Morocco": "MA", 273 | "Senegal": "SN", 274 | "Italy": "IT", 275 | "Lithuania": "LT", 276 | "Hungary": "HU", 277 | "Netherlands": "NL", 278 | "Norway": "NO", 279 | "Poland": "PL", 280 | "Brazil": "BR", 281 | "Portugal": "PT", 282 | "Romania": "RO", 283 | "Slovakia": "SK", 284 | "Slovenia": "SI", 285 | "Sweden": "SE", 286 | "Vietnam": "VN", 287 | "Turkey": "TR", 288 | "Greece": "GR", 289 | "Bulgaria": "BG", 290 | "Russia": "RU", 291 | "Ukraine ": "UA", 292 | "Serbia": "RS", 293 | "United Arab Emirates": "AE", 294 | "Saudi Arabia": "SA", 295 | "Lebanon": "LB", 296 | "Egypt": "EG", 297 | "Bangladesh": "BD", 298 | "Thailand": "TH", 299 | "China": "CN", 300 | "Taiwan": "TW", 301 | "Hong Kong": "HK", 302 | "Japan": "JP", 303 | "Republic of Korea": "KR" 304 | } 305 | 306 | GOOGLE_NEWS_URL = 'https://news.google.com' 307 | BASE_URL = "{0}/rss".format(GOOGLE_NEWS_URL) 308 | 309 | GOOGLE_NEWS_REGEX = f'^http(s)?://(www.)?news.google.com*' 310 | 311 | TOPICS = ["WORLD", "NATION", "BUSINESS", "TECHNOLOGY", "ENTERTAINMENT", "SPORTS", "SCIENCE", "HEALTH"] 312 | 313 | SECTIONS = { 314 | "POLITICS": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZ4ZERBU0FtVnVLQUFQAQ", 315 | "CELEBRITIES": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREZ5Wm5vU0FtVnVLQUFQAQ", 316 | "TV": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRGRqTlRJU0FtVnVLQUFQAQ", 317 | "MUSIC": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFJ5YkdZU0FtVnVLQUFQAQ", 318 | "MOVIES": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREoyZUc0U0FtVnVLQUFQAQ", 319 | "THEATER": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRE54YzJSd2F4SUNaVzRvQUFQAQ", 320 | "SOCCER": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREoyZURRU0FtVnVLQUFQAQ", 321 | "CYCLING": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREZ6WjJ3U0FtVnVLQUFQAQ", 322 | "MOTOR SPORTS": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRFF4TUhSMGFCSUNaVzRvQUFQAQ", 323 | "TENNIS": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRGRpY3pBU0FtVnVLQUFQAQ", 324 | "COMBAT SPORTS": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRFZyWXpJNUVnSmxiaWdBUAE", 325 | "BASKETBALL": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREU0ZHpnU0FtVnVLQUFQAQ", 326 | "BASEBALL": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREU0YW5vU0FtVnVLQUFQAQ", 327 | "FOOTBALL": "CAAqIAgKIhpDQkFTRFFvSEwyMHZNR3B0WHhJQ1pXNG9BQVAB", 328 | "SPORTS BETTING": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRFIwTXpsa0VnSmxiaWdBUAE", 329 | "WATER SPORTS": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREptYUdSbUVnSmxiaWdBUAE", 330 | "HOCKEY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE4wYlhJU0FtVnVLQUFQAQ", 331 | "GOLF": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE0zYUhvU0FtVnVLQUFQAQ", 332 | "CRICKET": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRGw0Y0Y4U0FtVnVLQUFQAQ", 333 | "RUGBY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFppY2pnU0FtVnVLQUFQAQ", 334 | "ECONOMY": "CAAqIggKIhxDQkFTRHdvSkwyMHZNR2RtY0hNekVnSmxiaWdBUAE", 335 | "PERSONAL FINANCE": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREY1Tm1OeEVnSmxiaWdBUAE", 336 | "FINANCE": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREpmTjNRU0FtVnVLQUFQAQ", 337 | "DIGITAL CURRENCIES": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNSEk0YkhsM054SUNaVzRvQUFQAQ", 338 | "MOBILE": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFV3YXpnU0FtVnVLQUFQAQ", 339 | "ENERGY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREp0YlY4U0FtVnVLQUFQAQ", 340 | "GAMING": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREZ0ZHpFU0FtVnVLQUFQAQ", 341 | "INTERNET SECURITY": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRE5xWm01NEVnSmxiaWdBUAE", 342 | "GADGETS": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREp0WmpGdUVnSmxiaWdBUAE", 343 | "VIRTUAL REALITY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRGRmYm5rU0FtVnVLQUFQAQ", 344 | "ROBOTICS": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNREp3TUhRMVpoSUNaVzRvQUFQAQ", 345 | "NUTRITION": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZrYW1NU0FtVnVLQUFQAQ", 346 | "PUBLIC HEALTH": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREpqYlRZeEVnSmxiaWdBUAE", 347 | "MENTAL HEALTH": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRE40TmpsbkVnSmxiaWdBUAE", 348 | "MEDICINE": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFJ6YURNU0FtVnVLQUFQAQ", 349 | "SPACE": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREU0TXpOM0VnSmxiaWdBUAE", 350 | "WILDLIFE": "CAAqJAgKIh5DQkFTRUFvS0wyY3ZNVE5pWWw5MGN4SUNaVzRvQUFQAQ", 351 | "ENVIRONMENT": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREp3ZVRBNUVnSmxiaWdBUAE", 352 | "NEUROSCIENCE": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZpTm1NU0FtVnVLQUFQAQ", 353 | "PHYSICS": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZ4YW5RU0FtVnVLQUFQAQ", 354 | "GEOLOGY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE0yYUhZU0FtVnVLQUFQAQ", 355 | "PALEONTOLOGY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZ5YW13U0FtVnVLQUFQAQ", 356 | "SOCIAL SCIENCES": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFp1Tm5BU0FtVnVLQUFQAQ", 357 | "EDUCATION": "CAAqJQgKIh9DQkFTRVFvTEwyY3ZNVEl4Y0Raa09UQVNBbVZ1S0FBUAE", 358 | "JOBS": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRFF4TVRWME1oSUNaVzRvQUFQAQ", 359 | "ONLINE EDUCATION": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRFYwYW5KaUVnSmxiaWdBUAE", 360 | "HIGHER EDUCATION": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE55TlRVU0FtVnVLQUFQAQ", 361 | "VEHICLES": "CAAqIAgKIhpDQkFTRFFvSEwyMHZNR3MwYWhJQ1pXNG9BQVAB", 362 | "ARTS-DESIGN": "CAAqIAgKIhpDQkFTRFFvSEwyMHZNR3BxZHhJQ1pXNG9BQVAB", 363 | "BEAUTY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREZtTkRNU0FtVnVLQUFQAQ", 364 | "FOOD": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREozWW0wU0FtVnVLQUFQAQ", 365 | "TRAVEL": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREUwWkhONEVnSmxiaWdBUAE", 366 | "SHOPPING": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNR2hvWkdJU0FtVnVLQUFQAQ", 367 | "HOME": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREZzTUcxM0VnSmxiaWdBUAE", 368 | "OUTDOORS": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRFZpTUc0M2F4SUNaVzRvQUFQAQ", 369 | "FASHION": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE15ZEd3U0FtVnVLQUFQAQ", 370 | "BITCOIN": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRFZ3TUhKeWVCSUNaVzRvQUFQAQ", 371 | } 372 | -------------------------------------------------------------------------------- /gnews/utils/utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import json 3 | import logging 4 | import re 5 | 6 | import requests 7 | from gnews.utils.constants import AVAILABLE_COUNTRIES, AVAILABLE_LANGUAGES, GOOGLE_NEWS_REGEX 8 | 9 | 10 | def lang_mapping(lang): 11 | return AVAILABLE_LANGUAGES.get(lang) 12 | 13 | 14 | def country_mapping(country): 15 | return AVAILABLE_COUNTRIES.get(country) 16 | 17 | 18 | def process_url(item, exclude_websites): 19 | source = item.get('source').get('href') 20 | if not all([not re.match(website, source) for website in 21 | [f'^http(s)?://(www.)?{website.lower()}.*' for website in exclude_websites]]): 22 | return 23 | url = item.get('link') 24 | if re.match(GOOGLE_NEWS_REGEX, url): 25 | url = requests.head(url).headers.get('location', url) 26 | return url 27 | -------------------------------------------------------------------------------- /imgs/gnews.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranahaani/GNews/5058075ba13a6be20a101fbdc84294d396150510/imgs/gnews.gif -------------------------------------------------------------------------------- /imgs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ranahaani/GNews/5058075ba13a6be20a101fbdc84294d396150510/imgs/logo.png -------------------------------------------------------------------------------- /index.rst: -------------------------------------------------------------------------------- 1 | [![Contributors][contributors-shield]][contributors-url] 2 | [![Forks][forks-shield]][forks-url] 3 | [![Stargazers][stars-shield]][stars-url] 4 | [![Issues][issues-shield]][issues-url] 5 | [![MIT License][license-shield]][license-url] 6 | [![Download][download-sheild]][download-url] 7 | [![LinkedIn][linkedin-shield]][linkedin-url] 8 | 9 | 10 |
11 |

12 | 13 | GNews 14 | 15 | 16 |

GNews

17 | 18 |

19 | A Happy and lightweight Python Package that Provide an API to search for articles on Google News and returns a usable JSON response! 20 |
21 | Explore the docs » 22 |
23 |
24 | View Demo 25 | · 26 | Report Bug 27 | · 28 | Request Feature 29 |

30 | 31 | 32 |
33 | Table of Contents 34 |
    35 |
  1. 36 | About 37 | 40 |
  2. 41 |
  3. 42 | Getting Started 43 | 46 | 49 |
  4. 50 |
  5. 51 | Usage 52 | 55 | 58 | 61 | 64 | 67 | 70 | 73 | 76 |
  6. 77 |
  7. To do
  8. 78 |
  9. Roadmap
  10. 79 |
  11. Contributing
  12. 80 |
  13. License
  14. 81 |
  15. Contact
  16. 82 |
  17. Acknowledgements
  18. 83 |
84 | 85 |
86 | 87 | 88 | ## About GNews 89 | 90 | 🚩 GNews is A Happy and lightweight Python Package that searches Google News RSS Feed and returns a usable JSON 91 | response \ 92 | 🚩 As well as you can fetch full article (**No need to write scrappers for articles fetching anymore**) 93 | 94 | Google News cover across **141+ countries** with **41+ languages**. On the bottom left side of the Google News page you 95 | may find a `Language & region` section where you can find all of the supported combinations. 96 | 97 | ### Demo 98 | 99 | [![GNews Demo][demo-gif]](https://github.com/ranahaani/GNews) 100 | 101 | 102 | 103 | ## Getting Started 104 | 105 | This is an example of how you may give instructions on setting up your project locally. To get a local copy up and 106 | running follow these simple example steps. 107 | 108 | ### Installation 109 | 110 | ``` shell 111 | pip install gnews 112 | ``` 113 | 114 | ### Setup with Docker 115 | 116 | #### Developing with docker 117 | 118 | 1. Install [docker and docker-compose](https://docs.docker.com/get-docker/). 119 | 2. Set-up your .env environment placing the mongo db credentials. 120 | 3. Run `docker-compose up --build` 121 | 122 | #### Install using clone 123 | 124 | 1. Clone this repository `virtualenv gnews` 125 | 2. Start your virtual environment `virtualenv gnews` 126 | 3. Install the requirements with `pip install -r requirements.txt` 127 | 128 | 129 | 130 | ### Example usage 131 | 132 | ```python 133 | from gnews import GNews 134 | 135 | google_news = GNews() 136 | pakistan_news = google_news.get_news('Pakistan') 137 | print(pakistan_news[0]) 138 | ``` 139 | 140 | ``` 141 | [{ 142 | 'publisher': 'Aljazeera.com', 143 | 'description': 'Pakistan accuses India of stoking conflict in Indian Ocean ' 144 | 'Aljazeera.com', 145 | 'published date': 'Tue, 16 Feb 2021 11:50:43 GMT', 146 | 'title': 'Pakistan accuses India of stoking conflict in Indian Ocean - ' 147 | 'Aljazeera.com', 148 | 'url': 'https://www.aljazeera.com/news/2021/2/16/pakistan-accuses-india-of-nuclearizing-indian-ocean' 149 | }, 150 | ...] 151 | ``` 152 | 153 | ### Get top news 154 | 155 | * `GNews.get_top_news()` 156 | 157 | ### Get news by keyword 158 | 159 | * `GNews.get_news(keyword)` 160 | 161 | ### Get news by major topic 162 | 163 | * `GNews.get_news_by_topic(topic)` 164 | * Available topics:` WORLD, NATION, BUSINESS, TECHNOLOGY, ENTERTAINMENT, SPORTS, SCIENCE, HEALTH.` 165 | 166 | ### Get news by geo location 167 | 168 | * `GNews.get_news_by_location(location)` 169 | * location can be name of city/state/country 170 | 171 | ### Results specification 172 | 173 | * It's possible to pass proxy, country, language, period, exclude websites and size during initialization 174 | 175 | ```python 176 | google_news = GNews(language='en', country='US', period='7d', max_results=10, exclude_websites=['yahoo.com', 'cnn.com'], 177 | proxy=proxy) 178 | ``` 179 | 180 | * Or change it to an existing object 181 | 182 | ```python 183 | google_news.period = '7d' # News from last 7 days 184 | google_news.results = 10 # number of responses across a keyword 185 | google_news.country = 'United States' # News from a specific country 186 | google_news.language = 'english' # News in a specific language 187 | google_news.exclude_websites = ['yahoo.com', 'cnn.com'] # Exclude news from specific website i.e Yahoo.com and CNN.com 188 | ``` 189 | 190 | The format of the timeframe is a string comprised of a number, followed by a letter representing the time operator. For 191 | example 1y would signify 1 year. Full list of operators below: 192 | 193 | ``` 194 | - h = hours (eg: 12h) 195 | - d = days (eg: 7d) 196 | - m = months (eg: 6m) 197 | - y = years (eg: 1y) 198 | ``` 199 | 200 | #### Supported Countries 201 | 202 | ```python 203 | print(google_news.AVAILABLE_COUNTRIES) 204 | 205 | {'Australia': 'AU', 'Botswana': 'BW', 'Canada ': 'CA', 'Ethiopia': 'ET', 'Ghana': 'GH', 'India ': 'IN', 206 | 'Indonesia': 'ID', 'Ireland': 'IE', 'Israel ': 'IL', 'Kenya': 'KE', 'Latvia': 'LV', 'Malaysia': 'MY', 'Namibia': 'NA', 207 | 'New Zealand': 'NZ', 'Nigeria': 'NG', 'Pakistan': 'PK', 'Philippines': 'PH', 'Singapore': 'SG', 'South Africa': 'ZA', 208 | 'Tanzania': 'TZ', 'Uganda': 'UG', 'United Kingdom': 'GB', 'United States': 'US', 'Zimbabwe': 'ZW', 209 | 'Czech Republic': 'CZ', 'Germany': 'DE', 'Austria': 'AT', 'Switzerland': 'CH', 'Argentina': 'AR', 'Chile': 'CL', 210 | 'Colombia': 'CO', 'Cuba': 'CU', 'Mexico': 'MX', 'Peru': 'PE', 'Venezuela': 'VE', 'Belgium ': 'BE', 'France': 'FR', 211 | 'Morocco': 'MA', 'Senegal': 'SN', 'Italy': 'IT', 'Lithuania': 'LT', 'Hungary': 'HU', 'Netherlands': 'NL', 212 | 'Norway': 'NO', 'Poland': 'PL', 'Brazil': 'BR', 'Portugal': 'PT', 'Romania': 'RO', 'Slovakia': 'SK', 'Slovenia': 'SI', 213 | 'Sweden': 'SE', 'Vietnam': 'VN', 'Turkey': 'TR', 'Greece': 'GR', 'Bulgaria': 'BG', 'Russia': 'RU', 'Ukraine ': 'UA', 214 | 'Serbia': 'RS', 'United Arab Emirates': 'AE', 'Saudi Arabia': 'SA', 'Lebanon': 'LB', 'Egypt': 'EG', 215 | 'Bangladesh': 'BD', 'Thailand': 'TH', 'China': 'CN', 'Taiwan': 'TW', 'Hong Kong': 'HK', 'Japan': 'JP', 216 | 'Republic of Korea': 'KR'} 217 | ``` 218 | 219 | #### Supported Languages 220 | 221 | ```python 222 | print(google_news.AVAILABLE_LANGUAGES) 223 | 224 | {'english': 'en', 'indonesian': 'id', 'czech': 'cs', 'german': 'de', 'spanish': 'es-419', 'french': 'fr', 225 | 'italian': 'it', 'latvian': 'lv', 'lithuanian': 'lt', 'hungarian': 'hu', 'dutch': 'nl', 'norwegian': 'no', 226 | 'polish': 'pl', 'portuguese brasil': 'pt-419', 'portuguese portugal': 'pt-150', 'romanian': 'ro', 'slovak': 'sk', 227 | 'slovenian': 'sl', 'swedish': 'sv', 'vietnamese': 'vi', 'turkish': 'tr', 'greek': 'el', 'bulgarian': 'bg', 228 | 'russian': 'ru', 'serbian': 'sr', 'ukrainian': 'uk', 'hebrew': 'he', 'arabic': 'ar', 'marathi': 'mr', 'hindi': 'hi', 229 | 'bengali': 'bn', 'tamil': 'ta', 'telugu': 'te', 'malyalam': 'ml', 'thai': 'th', 'chinese simplified': 'zh-Hans', 230 | 'chinese traditional': 'zh-Hant', 'japanese': 'ja', 'korean': 'ko'} 231 | ``` 232 | 233 | ### Article Properties 234 | 235 | - Get news returns the list with following keys: `title`, `published_date`, `description`, `url`, `publisher`. 236 | 237 | | Properties | Description | Example | 238 | |--------------|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 239 | | title | Title of the article | IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility | 240 | | url | Google news link to article | [Article Link](http://news.google.com/news/url?sa=t&fd=R&ct2=us&usg=AFQjCNGNR4Qg8LGbjszT1yt2s2lMXvvufQ&clid=c3a7d30bb8a4878e06b80cf16b898331&cid=52779522121279&ei=VQU7WYjiFoLEhQHIs4HQCQ&url=https://www.theguardian.com/commentisfree/2017/jun/07/why-dont-unicorns-exist-google) | 241 | | published date | Published date | Wed, 07 Jun 2017 07:01:30 GMT | 242 | | description | Short description of article | IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility ... | 243 | | publisher | Publisher of article | The Guardian | | 244 | 245 | ## Getting full article 246 | 247 | * To read a full article you can either: 248 | * Navigate to the url directly in your browser, or 249 | * Use `newspaper3k` library to scrape the article 250 | * The article url, needed for both methods, is accessed as `article['url']`. 251 | 252 | #### Using newspaper3k 253 | 254 | 1. Install the library - `pip3 install newspaper3k`. 255 | 2. Use `get_full_article` method from `GNews`, that creates an `newspaper.article.Article` object from the url. 256 | 257 | ```python 258 | from gnews import GNews 259 | 260 | google_news = GNews() 261 | json_resp = google_news.get_news('Pakistan') 262 | article = google_news.get_full_article( 263 | json_resp[0]['url']) # newspaper3k instance, you can access newspaper3k all attributes in article 264 | ``` 265 | 266 | This new object contains `title`, `text` (full article) or `images` attributes. Examples: 267 | 268 | ```python 269 | article.title 270 | ``` 271 | 272 | > IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility' 273 | 274 | ```python 275 | article.text 276 | ``` 277 | 278 | > End-of-Mission press releases include statements of IMF staff teams that convey preliminary findings after a mission. The views expressed are those of the IMF staff and do not necessarily represent the views of the IMF’s Executive Board.\n\nIMF staff and the Pakistani authorities have reached an agreement on a package of measures to complete second to fifth reviews of the authorities’ reform program supported by the IMF Extended Fund Facility (EFF) ..... (full article) 279 | 280 | ```python 281 | article.images 282 | ``` 283 | 284 | > `{'https://www.imf.org/~/media/Images/IMF/Live-Page/imf-live-rgb-h.ashx?la=en', 'https://www.imf.org/-/media/Images/IMF/Data/imf-logo-eng-sep2019-update.ashx', 'https://www.imf.org/-/media/Images/IMF/Data/imf-seal-shadow-sep2019-update.ashx', 'https://www.imf.org/-/media/Images/IMF/Social/TW-Thumb/twitter-seal.ashx', 'https://www.imf.org/assets/imf/images/footer/IMF_seal.png'} 285 | ` 286 | 287 | ```python 288 | article.authors 289 | ``` 290 | 291 | > `[]` 292 | 293 | Read full documentation for `newspaper3k` 294 | [newspaper3k](https://newspaper.readthedocs.io/en/latest/user_guide/quickstart.html#parsing-an-article) 295 | 296 | 297 | ## Todo 298 | 299 | - Save to MongoDB 300 | - Save to SQLite 301 | - Save to JSON 302 | - Save to .CSV file 303 | - More than 100 articles 304 | 305 | 306 | 307 | ## Roadmap 308 | 309 | See the [open issues](https://github.com/ranahaani/GNews/issues) for a list of proposed features (and known issues). 310 | 311 | 312 | 313 | 314 | 315 | ## Contributing 316 | 317 | Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Any 318 | contributions you make are **greatly appreciated**. 319 | 320 | 1. Fork the Project 321 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`) 322 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`) 323 | 4. Push to the Branch (`git push origin feature/AmazingFeature`) 324 | 5. Open a Pull Request 325 | 326 | 327 | 328 | ## License 329 | 330 | Distributed under the MIT License. See `LICENSE` for more information. 331 | 332 | 333 | 334 | 335 | 336 | ## Contact 337 | 338 | Muhammad Abdullah - [@ranahaani](https://twitter.com/ranahaani) - ranahaani@gmail.com 339 | 340 | Project Link: [https://github.com/ranahaani/GNews](https://github.com/ranahaani/GNews) 341 | 342 | [contributors-shield]: https://img.shields.io/github/contributors/ranahaani/GNews.svg?style=for-the-badge 343 | 344 | [contributors-url]: https://github.com/ranahaani/GNews/graphs/contributors 345 | 346 | [forks-shield]: https://img.shields.io/github/forks/ranahaani/GNews.svg?style=for-the-badge 347 | 348 | [forks-url]: https://github.com/ranahaani/GNews/network/members 349 | 350 | [stars-shield]: https://img.shields.io/github/stars/ranahaani/GNews.svg?style=for-the-badge 351 | 352 | [stars-url]: https://github.com/ranahaani/GNews/stargazers 353 | 354 | [issues-shield]: https://img.shields.io/github/issues/ranahaani/GNews.svg?style=for-the-badge 355 | 356 | [issues-url]: https://github.com/ranahaani/GNews/issues 357 | 358 | [license-shield]: https://img.shields.io/github/license/ranahaani/GNews.svg?style=for-the-badge 359 | 360 | [license-url]: https://github.com/ranahaani/GNews/blob/master/LICENSE.txt 361 | 362 | [download-sheild]: https://img.shields.io/pypi/dm/GNews.svg?style=for-the-badge 363 | 364 | [download-url]: https://pypistats.org/packages/gnews 365 | 366 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555 367 | 368 | [linkedin-url]: https://linkedin.com/in/ranahaani 369 | 370 | [demo-gif]: https://github.com/ranahaani/GNews/raw/master/imgs/gnews.gif 371 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from gnews import GNews 2 | 3 | google_news = GNews() 4 | 5 | 6 | google_news.start_date = (2021, 1, 1) 7 | google_news.end_date = (2021, 2, 1) 8 | google_news.max_results = 2 9 | 10 | result = google_news.get_news('"WORLD"') 11 | print(result) 12 | 13 | print(google_news.get_news_by_topic.__doc__) 14 | 15 | google_news.get_news_by_location("WORLD") 16 | 17 | result = google_news.get_news('"WORLD"') 18 | print(result) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | feedparser~=6.0.2 2 | beautifulsoup4>=4.9.3,<5 3 | dnspython 4 | requests 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open('requirements.txt') as f: 4 | requirements = f.read().splitlines() 5 | 6 | with open("README.md", "r") as fh: 7 | long_description = fh.read() 8 | 9 | setup( 10 | name='gnews', 11 | version='0.4.1', 12 | # setup_requires=['setuptools_scm'], 13 | # use_scm_version={ 14 | # "local_scheme": "no-local-version" 15 | # }, 16 | 17 | author="Muhammad Abdullah", 18 | author_email="ranahaani@gmail.com", 19 | description='Provide an API to search for articles on Google News and returns a usable JSON response.', 20 | long_description=long_description, 21 | long_description_content_type="text/markdown", 22 | packages=find_packages(), 23 | install_requires=requirements, 24 | url='https://github.com/ranahaani/GNews/', 25 | project_urls={ 26 | 'Documentation': 'https://github.com/ranahaani/GNews/blob/master/README.md', 27 | 'Source': 'https://github.com/ranahaani/GNews/', 28 | 'Tracker': 'https://github.com/ranahaani/GNews/issues', 29 | }, 30 | classifiers=[ 31 | 'Development Status :: 5 - Production/Stable', 32 | 'Intended Audience :: Developers', 33 | 'Programming Language :: Python :: 3', 34 | 'Programming Language :: Python :: 3.8', 35 | 'Programming Language :: Python :: 3.9', 36 | 'Programming Language :: Python :: 3.10', 37 | 'Programming Language :: Python :: 3.11', 38 | 'License :: OSI Approved :: MIT License', 39 | 'Operating System :: OS Independent', 40 | ], 41 | ) 42 | -------------------------------------------------------------------------------- /tests/test_gnews.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from gnews import GNews 3 | 4 | class TestGNews(unittest.TestCase): 5 | def setUp(self): 6 | # Create a GNews instance with default parameters for testing 7 | self.gnews = GNews() 8 | 9 | def test_get_news(self): 10 | # Test that get_news returns a non-empty list of news articles 11 | key = "Google" 12 | news_articles = self.gnews.get_news(key) 13 | self.assertTrue(isinstance(news_articles, list)) 14 | self.assertTrue(len(news_articles) > 0) 15 | 16 | def test_get_top_news(self): 17 | # Test that get_top_news returns a non-empty list of news articles 18 | top_news_articles = self.gnews.get_top_news() 19 | self.assertTrue(isinstance(top_news_articles, list)) 20 | self.assertTrue(len(top_news_articles) > 0) 21 | 22 | def test_get_news_by_topic(self): 23 | # Test that get_news_by_topic returns a non-empty list of news articles for a valid topic 24 | topic = "business" 25 | news_articles = self.gnews.get_news_by_topic(topic) 26 | self.assertTrue(isinstance(news_articles, list)) 27 | self.assertTrue(len(news_articles) > 0) 28 | 29 | def test_get_news_by_location(self): 30 | # Test that get_news_by_location returns a non-empty list of news articles for a valid location 31 | location = "India" 32 | news_articles = self.gnews.get_news_by_location(location) 33 | self.assertTrue(isinstance(news_articles, list)) 34 | self.assertTrue(len(news_articles) > 0) 35 | 36 | def test_get_news_by_site_valid(self): 37 | site = "cnn.com" 38 | news_articles = self.gnews.get_news_by_site(site) 39 | self.assertTrue(isinstance(news_articles, list)) 40 | self.assertTrue(len(news_articles) > 0) 41 | 42 | def test_get_news_by_site_invalid(self): 43 | # Test that get_news_by_site returns an empty list for an invalid site domain 44 | site = "invalidsite123.com" 45 | news_articles = self.gnews.get_news_by_site(site) 46 | self.assertEqual(news_articles, []) 47 | 48 | def test_get_news_more_than_100(self): 49 | # Set up a GNews instance with a high max_results value 50 | self.gnews = GNews(max_results=150) 51 | query = "technology" 52 | 53 | # Call get_news with the query 54 | news_articles = self.gnews.get_news(query) 55 | 56 | # Verify the result respects the maximum result cap 57 | self.assertTrue(isinstance(news_articles, list)) 58 | self.assertTrue(len(news_articles) > 0) 59 | self.assertTrue(len(news_articles) <= 150, "Should fetch no more than max_results") 60 | 61 | # Ensure no duplicates in the results 62 | urls = [article['url'] for article in news_articles] 63 | self.assertEqual(len(urls), len(set(urls)), "No duplicate articles should be fetched") 64 | 65 | def test_get_full_article(self): 66 | pass 67 | # Test that get_full_article returns a valid article object for a valid URL 68 | # url = "https://www.bbc.com/news/live/world-us-canada-66248859" 69 | # article = self.gnews.get_full_article(url) 70 | # self.assertIsNotNone(article) 71 | # self.assertTrue(hasattr(article, 'title')) 72 | # self.assertTrue(hasattr(article, 'text')) 73 | 74 | if __name__ == '__main__': 75 | unittest.main() 76 | --------------------------------------------------------------------------------