├── .dockerignore
├── .github
└── workflows
│ ├── python-publish.yml
│ ├── static.yml
│ └── tests.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE.txt
├── README.md
├── docker-compose.yaml
├── gnews
├── __init__.py
├── gnews.py
└── utils
│ ├── __init__.py
│ ├── constants.py
│ └── utils.py
├── imgs
├── gnews.gif
└── logo.png
├── index.rst
├── main.py
├── requirements.txt
├── setup.py
└── tests
└── test_gnews.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | ### JetBrains template
2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
4 |
5 | # User-specific stuff
6 | .idea/**/workspace.xml
7 | .idea/**/tasks.xml
8 | .idea/**/usage.statistics.xml
9 | .idea/**/dictionaries
10 | .idea/**/shelf
11 |
12 | # Generated files
13 | .idea/**/contentModel.xml
14 |
15 | # Sensitive or high-churn files
16 | .idea/**/dataSources/
17 | .idea/**/dataSources.ids
18 | .idea/**/dataSources.local.xml
19 | .idea/**/sqlDataSources.xml
20 | .idea/**/dynamic.xml
21 | .idea/**/uiDesigner.xml
22 | .idea/**/dbnavigator.xml
23 |
24 | # Gradle
25 | .idea/**/gradle.xml
26 | .idea/**/libraries
27 |
28 | # Gradle and Maven with auto-import
29 | # When using Gradle or Maven with auto-import, you should exclude module files,
30 | # since they will be recreated, and may cause churn. Uncomment if using
31 | # auto-import.
32 | # .idea/artifacts
33 | # .idea/compiler.xml
34 | # .idea/jarRepositories.xml
35 | # .idea/modules.xml
36 | # .idea/*.iml
37 | # .idea/modules
38 | # *.iml
39 | # *.ipr
40 |
41 | # CMake
42 | cmake-build-*/
43 |
44 | # Mongo Explorer plugin
45 | .idea/**/mongoSettings.xml
46 |
47 | # File-based project format
48 | *.iws
49 |
50 | # IntelliJ
51 | out/
52 |
53 | # mpeltonen/sbt-idea plugin
54 | .idea_modules/
55 |
56 | # JIRA plugin
57 | atlassian-ide-plugin.xml
58 |
59 | # Cursive Clojure plugin
60 | .idea/replstate.xml
61 |
62 | # Crashlytics plugin (for Android Studio and IntelliJ)
63 | com_crashlytics_export_strings.xml
64 | crashlytics.properties
65 | crashlytics-build.properties
66 | fabric.properties
67 |
68 | # Editor-based Rest Client
69 | .idea/httpRequests
70 |
71 | # Android studio 3.1+ serialized cache file
72 | .idea/caches/build_file_checksums.ser
73 |
74 | ### Python template
75 | # Byte-compiled / optimized / DLL files
76 | __pycache__/
77 | *.py[cod]
78 | *$py.class
79 |
80 | # C extensions
81 | *.so
82 |
83 | # Distribution / packaging
84 | .Python
85 | build/
86 | develop-eggs/
87 | dist/
88 | downloads/
89 | eggs/
90 | .eggs/
91 | lib/
92 | lib64/
93 | parts/
94 | sdist/
95 | var/
96 | wheels/
97 | share/python-wheels/
98 | *.egg-info/
99 | .installed.cfg
100 | *.egg
101 | MANIFEST
102 |
103 | # PyInstaller
104 | # Usually these files are written by a python script from a template
105 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
106 | *.manifest
107 | *.spec
108 |
109 | # Installer logs
110 | pip-log.txt
111 | pip-delete-this-directory.txt
112 |
113 | # Unit test / coverage reports
114 | htmlcov/
115 | .tox/
116 | .nox/
117 | .coverage
118 | .coverage.*
119 | .cache
120 | nosetests.xml
121 | coverage.xml
122 | *.cover
123 | *.py,cover
124 | .hypothesis/
125 | .pytest_cache/
126 | cover/
127 |
128 | # Translations
129 | *.mo
130 | *.pot
131 |
132 | # Django stuff:
133 | *.log
134 | local_settings.py
135 | db.sqlite3
136 | db.sqlite3-journal
137 |
138 | # Flask stuff:
139 | instance/
140 | .webassets-cache
141 |
142 | # Scrapy stuff:
143 | .scrapy
144 |
145 | # Sphinx documentation
146 | docs/_build/
147 |
148 | # PyBuilder
149 | .pybuilder/
150 | target/
151 |
152 | # Jupyter Notebook
153 | .ipynb_checkpoints
154 |
155 | # IPython
156 | profile_default/
157 | ipython_config.py
158 |
159 | # pyenv
160 | # For a library or package, you might want to ignore these files since the code is
161 | # intended to run in multiple environments; otherwise, check them in:
162 | # .python-version
163 |
164 | # pipenv
165 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
166 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
167 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
168 | # install all needed dependencies.
169 | #Pipfile.lock
170 |
171 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
172 | __pypackages__/
173 |
174 | # Celery stuff
175 | celerybeat-schedule
176 | celerybeat.pid
177 |
178 | # SageMath parsed files
179 | *.sage.py
180 |
181 | # Environments
182 | .env
183 | .venv
184 | env/
185 | venv/
186 | ENV/
187 | env.bak/
188 | venv.bak/
189 |
190 | # Spyder project settings
191 | .spyderproject
192 | .spyproject
193 |
194 | # Rope project settings
195 | .ropeproject
196 |
197 | # mkdocs documentation
198 | /site
199 |
200 | # mypy
201 | .mypy_cache/
202 | .dmypy.json
203 | dmypy.json
204 |
205 | # Pyre type checker
206 | .pyre/
207 |
208 | # pytype static type analyzer
209 | .pytype/
210 |
211 | # Cython debug symbols
212 | cython_debug/
213 |
214 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | push:
8 | branches:
9 | - master
10 |
11 | jobs:
12 | pypi-publish:
13 | name: Publish release to PyPI
14 | runs-on: ubuntu-latest
15 | environment:
16 | name: pypi
17 | url: https://pypi.org/p/gnews
18 | permissions:
19 | id-token: write
20 | steps:
21 | - uses: actions/checkout@v4
22 | - name: Set up Python
23 | uses: actions/setup-python@v4
24 | with:
25 | python-version: "3.x"
26 | - name: Install dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | pip install setuptools wheel
30 | - name: Build package
31 | run: |
32 | python setup.py sdist bdist_wheel # Could also be python -m build
33 | - name: Publish package distributions to PyPI
34 | uses: pypa/gh-action-pypi-publish@release/v1
35 | with:
36 | password: ${{ secrets.PYPI_API_TOKEN }}
--------------------------------------------------------------------------------
/.github/workflows/static.yml:
--------------------------------------------------------------------------------
1 | # Simple workflow for deploying static content to GitHub Pages
2 | name: Deploy static content to Pages
3 |
4 | on:
5 | # Runs on pushes targeting the default branch
6 | push:
7 | branches: ["master"]
8 |
9 | # Allows you to run this workflow manually from the Actions tab
10 | workflow_dispatch:
11 |
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 | contents: read
15 | pages: write
16 | id-token: write
17 |
18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20 | concurrency:
21 | group: "pages"
22 | cancel-in-progress: false
23 |
24 | jobs:
25 | # Single deploy job since we're just deploying
26 | deploy:
27 | environment:
28 | name: github-pages
29 | url: ${{ steps.deployment.outputs.page_url }}
30 | runs-on: ubuntu-latest
31 | steps:
32 | - name: Checkout
33 | uses: actions/checkout@v4
34 | - name: Setup Pages
35 | uses: actions/configure-pages@v5
36 | - name: Upload artifact
37 | uses: actions/upload-pages-artifact@v3
38 | with:
39 | # Upload entire repository
40 | path: '.'
41 | - name: Deploy to GitHub Pages
42 | id: deployment
43 | uses: actions/deploy-pages@v4
44 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Run Tests
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | jobs:
8 | test:
9 | name: Run Test Cases
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Checkout code
13 | uses: actions/checkout@v2
14 |
15 | - name: Set up Python
16 | uses: actions/setup-python@v2
17 | with:
18 | python-version: 3.8
19 |
20 | - name: Install dependencies
21 | run: |
22 | python -m pip install --upgrade pip
23 | pip install -r requirements.txt
24 |
25 | - name: Run test cases
26 | run: |
27 | python -m unittest tests/test_gnews.py
28 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Created by https://www.toptal.com/developers/gitignore/api/pycharm,python
3 | # Edit at https://www.toptal.com/developers/gitignore?templates=pycharm,python
4 |
5 | ### PyCharm ###
6 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
7 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
8 |
9 | # User-specific stuff
10 | .idea/**/workspace.xml
11 | .idea/**/tasks.xml
12 | .idea/**/usage.statistics.xml
13 | .idea/**/dictionaries
14 | .idea/**/shelf
15 |
16 | # Generated files
17 | .idea/**/contentModel.xml
18 |
19 | # Sensitive or high-churn files
20 | .idea/**/dataSources/
21 | .idea/**/dataSources.ids
22 | .idea/**/dataSources.local.xml
23 | .idea/**/sqlDataSources.xml
24 | .idea/**/dynamic.xml
25 | .idea/**/uiDesigner.xml
26 | .idea/**/dbnavigator.xml
27 |
28 | .idea/
29 |
30 | # Gradle
31 | .idea/**/gradle.xml
32 | .idea/**/libraries
33 |
34 | .main.py
35 |
36 | # Gradle and Maven with auto-import
37 | # When using Gradle or Maven with auto-import, you should exclude module files,
38 | # since they will be recreated, and may cause churn. Uncomment if using
39 | # auto-import.
40 | # .idea/artifacts
41 | # .idea/compiler.xml
42 | # .idea/jarRepositories.xml
43 | # .idea/modules.xml
44 | # .idea/*.iml
45 | # .idea/modules
46 | # *.iml
47 | # *.ipr
48 |
49 | # CMake
50 | cmake-build-*/
51 |
52 | # Mongo Explorer plugin
53 | .idea/**/mongoSettings.xml
54 |
55 | # File-based project format
56 | *.iws
57 |
58 | # IntelliJ
59 | out/
60 |
61 | # mpeltonen/sbt-idea plugin
62 | .idea_modules/
63 |
64 | # JIRA plugin
65 | atlassian-ide-plugin.xml
66 |
67 | # Cursive Clojure plugin
68 | .idea/replstate.xml
69 |
70 | # Crashlytics plugin (for Android Studio and IntelliJ)
71 | com_crashlytics_export_strings.xml
72 | crashlytics.properties
73 | crashlytics-build.properties
74 | fabric.properties
75 |
76 | # Editor-based Rest Client
77 | .idea/httpRequests
78 |
79 | # Android studio 3.1+ serialized cache file
80 | .idea/caches/build_file_checksums.ser
81 |
82 | ### PyCharm Patch ###
83 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
84 |
85 | # *.iml
86 | # modules.xml
87 | # .idea/misc.xml
88 | # *.ipr
89 |
90 | # Sonarlint plugin
91 | # https://plugins.jetbrains.com/plugin/7973-sonarlint
92 | .idea/**/sonarlint/
93 |
94 | # SonarQube Plugin
95 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
96 | .idea/**/sonarIssues.xml
97 |
98 | # Markdown Navigator plugin
99 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
100 | .idea/**/markdown-navigator.xml
101 | .idea/**/markdown-navigator-enh.xml
102 | .idea/**/markdown-navigator/
103 |
104 | # Cache file creation bug
105 | # See https://youtrack.jetbrains.com/issue/JBR-2257
106 | .idea/$CACHE_FILE$
107 |
108 | # CodeStream plugin
109 | # https://plugins.jetbrains.com/plugin/12206-codestream
110 | .idea/codestream.xml
111 |
112 | ### Python ###
113 | # Byte-compiled / optimized / DLL files
114 | __pycache__/
115 | *.py[cod]
116 | *$py.class
117 |
118 | # C extensions
119 | *.so
120 |
121 | # Distribution / packaging
122 | .Python
123 | build/
124 | develop-eggs/
125 | dist/
126 | downloads/
127 | eggs/
128 | .eggs/
129 | lib/
130 | lib64/
131 | parts/
132 | sdist/
133 | var/
134 | wheels/
135 | pip-wheel-metadata/
136 | share/python-wheels/
137 | *.egg-info/
138 | .installed.cfg
139 | *.egg
140 | MANIFEST
141 |
142 | # PyInstaller
143 | # Usually these files are written by a python script from a template
144 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
145 | *.manifest
146 | *.spec
147 |
148 | # Installer logs
149 | pip-log.txt
150 | pip-delete-this-directory.txt
151 |
152 | # Unit test / coverage reports
153 | htmlcov/
154 | .tox/
155 | .nox/
156 | .coverage
157 | .coverage.*
158 | .cache
159 | nosetests.xml
160 | coverage.xml
161 | *.cover
162 | *.py,cover
163 | .hypothesis/
164 | .pytest_cache/
165 | pytestdebug.log
166 |
167 | # Translations
168 | *.mo
169 | *.pot
170 |
171 | # Django stuff:
172 | *.log
173 | local_settings.py
174 | db.sqlite3
175 | db.sqlite3-journal
176 |
177 | # Flask stuff:
178 | instance/
179 | .webassets-cache
180 |
181 | # Scrapy stuff:
182 | .scrapy
183 |
184 | # Sphinx documentation
185 | docs/_build/
186 | doc/_build/
187 |
188 | # PyBuilder
189 | target/
190 |
191 | # Jupyter Notebook
192 | .ipynb_checkpoints
193 |
194 | # IPython
195 | profile_default/
196 | ipython_config.py
197 |
198 | # pyenv
199 | .python-version
200 |
201 | # pipenv
202 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
203 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
204 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
205 | # install all needed dependencies.
206 | #Pipfile.lock
207 |
208 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
209 | __pypackages__/
210 |
211 | # Celery stuff
212 | celerybeat-schedule
213 | celerybeat.pid
214 |
215 | # SageMath parsed files
216 | *.sage.py
217 |
218 | # Environments
219 | .env
220 | .venv
221 | env/
222 | venv/
223 | ENV/
224 | env.bak/
225 | venv.bak/
226 | pythonenv*
227 |
228 | # Spyder project settings
229 | .spyderproject
230 | .spyproject
231 |
232 | # Rope project settings
233 | .ropeproject
234 |
235 | # mkdocs documentation
236 | /site
237 |
238 | # mypy
239 | .mypy_cache/
240 | .dmypy.json
241 | dmypy.json
242 |
243 | # Pyre type checker
244 | .pyre/
245 |
246 | # pytype static type analyzer
247 | .pytype/
248 |
249 | # profiling data
250 | .prof
251 |
252 | # End of https://www.toptal.com/developers/gitignore/api/pycharm,python
253 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | .
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series
86 | of actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or
93 | permanent ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.10.0
2 |
3 | RUN mkdir -p /usr/src/app
4 |
5 | WORKDIR /usr/src/app
6 |
7 | COPY . /usr/src/app
8 |
9 | RUN pip install -r requirements.txt
10 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) [year] [fullname]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [![Contributors][contributors-shield]][contributors-url]
2 | [![Forks][forks-shield]][forks-url]
3 | [![Stargazers][stars-shield]][stars-url]
4 | [![Issues][issues-shield]][issues-url]
5 | [![MIT License][license-shield]][license-url]
6 | [![Download][download-sheild]][download-url]
7 | [![LinkedIn][linkedin-shield]][linkedin-url]
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
GNews 📰
19 |
20 |
21 | A Happy and lightweight Python Package that Provides an API to search for articles on Google News and returns a usable JSON response! 🚀
22 |
23 | If you like ❤️ GNews or find it useful 🌟, support the project by buying me a coffee ☕.
24 |
25 |
26 |
27 |
28 | 🚀 View Demo
29 | ·
30 | 🐞 Report Bug
31 | ·
32 | 🚀 Request Feature
33 |
34 |
35 |
36 |
37 |
38 | Table of Contents 📑
39 |
40 | -
41 | About 🚩
42 |
45 |
46 | -
47 | Getting Started 🚀
48 |
51 |
54 |
55 | -
56 | Usage 🧩
57 |
69 |
70 | - To Do 📋
71 | - Roadmap 🛣️
72 | - Contributing 🤝
73 | - License ⚖️
74 | - Contact 📬
75 | - Acknowledgements 🙏
76 |
77 |
78 |
79 |
80 | ## About GNews
81 |
82 | 🚩 GNews is A Happy and lightweight Python Package that searches Google News RSS Feed and returns a usable JSON
83 | response \
84 | 🚩 As well as you can fetch full article (**No need to write scrappers for articles fetching anymore**)
85 |
86 | Google News cover across **141+ countries** with **41+ languages**. On the bottom left side of the Google News page you
87 | may find a `Language & region` section where you can find all of the supported combinations.
88 |
89 | ### Demo
90 |
91 | [![GNews Demo][demo-gif]](https://github.com/ranahaani/GNews)
92 |
93 |
94 |
95 |
96 |
97 | ## Getting Started
98 |
99 | This section provides instructions for two different use cases:
100 |
101 | 1. **Installing the GNews package** for immediate use.
102 | 2. **Setting up the GNews project** for local development.
103 |
104 | ### 1. Installing the GNews package
105 |
106 | To install the package and start using it in your own projects, follow these steps:
107 |
108 | ``` shell
109 | pip install gnews
110 | ```
111 | ### 2. Setting Up GNews for Local Development
112 |
113 | If you want to make modifications locally, follow these steps to set up the development environment.
114 |
115 | #### Option 1: Setup with Docker
116 |
117 | 1. Install [docker and docker-compose](https://docs.docker.com/get-docker/).
118 | 2. Configure the `.env` file by placing your MongoDB credentials.
119 | 3. Run the following command to build and start the Docker containers:
120 |
121 | ``` shell
122 | docker-compose up --build
123 | ```
124 |
125 | #### Option 2: Install Using Git Clone
126 |
127 | 1. Clone this repository:
128 | ``` shell
129 | git clone https://github.com/ranahaani/GNews.git
130 | ```
131 |
132 | 2. Set up a virtual environment:
133 | ```shell
134 | virtualenv venv
135 | source venv/bin/activate # MacOS/Linux
136 | .\venv\Scripts\activate # Windows
137 | ```
138 |
139 | 3. Install the required dependencies:
140 | ```shell
141 | pip install -r requirements.txt
142 | ```
143 |
144 |
145 |
146 | ### Example usage
147 |
148 | ```python
149 | from gnews import GNews
150 |
151 | google_news = GNews()
152 | pakistan_news = google_news.get_news('Pakistan')
153 | print(pakistan_news[0])
154 | ```
155 |
156 | ```
157 | [{
158 | 'publisher': 'Aljazeera.com',
159 | 'description': 'Pakistan accuses India of stoking conflict in Indian Ocean '
160 | 'Aljazeera.com',
161 | 'published date': 'Tue, 16 Feb 2021 11:50:43 GMT',
162 | 'title': 'Pakistan accuses India of stoking conflict in Indian Ocean - '
163 | 'Aljazeera.com',
164 | 'url': 'https://www.aljazeera.com/news/2021/2/16/pakistan-accuses-india-of-nuclearizing-indian-ocean'
165 | },
166 | ...]
167 | ```
168 |
169 | ### Get top news
170 |
171 | * `GNews.get_top_news()`
172 |
173 | ### Get news by keyword
174 |
175 | * `GNews.get_news(keyword)`
176 |
177 | ### Get news by major topic
178 |
179 | * `GNews.get_news_by_topic(topic)`
180 | * Available topics:` WORLD, NATION, BUSINESS, TECHNOLOGY, ENTERTAINMENT, SPORTS, SCIENCE, HEALTH, POLITICS, CELEBRITIES, TV, MUSIC, MOVIES, THEATER, SOCCER, CYCLING, MOTOR SPORTS, TENNIS, COMBAT SPORTS, BASKETBALL, BASEBALL, FOOTBALL, SPORTS BETTING, WATER SPORTS, HOCKEY, GOLF,
181 | CRICKET, RUGBY, ECONOMY, PERSONAL FINANCE, FINANCE, DIGITAL CURRENCIES, MOBILE, ENERGY, GAMING, INTERNET SECURITY, GADGETS, VIRTUAL REALITY, ROBOTICS, NUTRITION, PUBLIC HEALTH, MENTAL HEALTH, MEDICINE, SPACE, WILDLIFE, ENVIRONMENT, NEUROSCIENCE, PHYSICS, GEOLOGY, PALEONTOLOGY, SOCIAL SCIENCES, EDUCATION, JOBS, ONLINE EDUCATION, HIGHER EDUCATION, VEHICLES, ARTS-DESIGN, BEAUTY, FOOD, TRAVEL, SHOPPING, HOME, OUTDOORS, FASHION.`
182 |
183 | ### Get news by geo location
184 |
185 | * `GNews.get_news_by_location(location)`
186 | * location can be name of city/state/country
187 |
188 | ### Get news by site
189 |
190 | * `GNews.get_news_by_site(site)`
191 | * site should be in the format of: `"cnn.com"`
192 |
193 | ### Results specification
194 | All parameters are optional and can be passed during initialization. Here’s a list of the available parameters:
195 |
196 | - **language**: The language in which to return results (default: 'en').
197 | - **country**: The country code for the headlines (default: 'US').
198 | - **period**: The time period for which you want news.
199 | - **start_date**: Date after which results must have been published.
200 | - **end_date**: Date before which results must have been published.
201 | - **max_results**: The maximum number of results to return (default: 100).
202 | - **exclude_websites**: A list of websites to exclude from results.
203 | - **proxy**: A dictionary specifying the proxy settings used to route requests. The dictionary should contain a single key-value pair where the key is the protocol (`http` or `https`) and the value is the proxy address. Example:
204 | ```python
205 | # Example with only HTTP proxy
206 | proxy = {
207 | 'http': 'http://your_proxy_address',
208 | }
209 |
210 | # Example with only HTTPS proxy
211 | proxy = {
212 | 'https': 'http://your_proxy_address',
213 | }
214 | ```
215 |
216 | #### Example Initialization
217 | ```python
218 | from gnews import GNews
219 |
220 | # Initialize GNews with various parameters, including proxy
221 | google_news = GNews(
222 | language='en',
223 | country='US',
224 | period='7d',
225 | start_date=None,
226 | end_date=None,
227 | max_results=10,
228 | exclude_websites=['yahoo.com', 'cnn.com'],
229 | proxy={
230 | 'https': 'https://your_proxy_address'
231 | }
232 | )
233 | ```
234 |
235 | * Or change it to an existing object
236 |
237 | ```python
238 | google_news.period = '7d' # News from last 7 days
239 | google_news.max_results = 10 # number of responses across a keyword
240 | google_news.country = 'United States' # News from a specific country
241 | google_news.language = 'english' # News in a specific language
242 | google_news.exclude_websites = ['yahoo.com', 'cnn.com'] # Exclude news from specific website i.e Yahoo.com and CNN.com
243 | google_news.start_date = (2020, 1, 1) # Search from 1st Jan 2020
244 | google_news.end_date = (2020, 3, 1) # Search until 1st March 2020
245 | ```
246 |
247 | The format of the timeframe is a string comprised of a number, followed by a letter representing the time operator. For
248 | example 1y would signify 1 year. Full list of operators below:
249 |
250 | ```
251 | - h = hours (eg: 12h)
252 | - d = days (eg: 7d)
253 | - m = months (eg: 6m)
254 | - y = years (eg: 1y)
255 | ```
256 |
257 | Setting the start and end dates can be done by passing in either a datetime or a tuple in the form (YYYY, MM, DD).
258 |
259 | ### Supported Countries
260 |
261 | ```python
262 | print(google_news.AVAILABLE_COUNTRIES)
263 |
264 | {'Australia': 'AU', 'Botswana': 'BW', 'Canada ': 'CA', 'Ethiopia': 'ET', 'Ghana': 'GH', 'India ': 'IN',
265 | 'Indonesia': 'ID', 'Ireland': 'IE', 'Israel ': 'IL', 'Kenya': 'KE', 'Latvia': 'LV', 'Malaysia': 'MY', 'Namibia': 'NA',
266 | 'New Zealand': 'NZ', 'Nigeria': 'NG', 'Pakistan': 'PK', 'Philippines': 'PH', 'Singapore': 'SG', 'South Africa': 'ZA',
267 | 'Tanzania': 'TZ', 'Uganda': 'UG', 'United Kingdom': 'GB', 'United States': 'US', 'Zimbabwe': 'ZW',
268 | 'Czech Republic': 'CZ', 'Germany': 'DE', 'Austria': 'AT', 'Switzerland': 'CH', 'Argentina': 'AR', 'Chile': 'CL',
269 | 'Colombia': 'CO', 'Cuba': 'CU', 'Mexico': 'MX', 'Peru': 'PE', 'Venezuela': 'VE', 'Belgium ': 'BE', 'France': 'FR',
270 | 'Morocco': 'MA', 'Senegal': 'SN', 'Italy': 'IT', 'Lithuania': 'LT', 'Hungary': 'HU', 'Netherlands': 'NL',
271 | 'Norway': 'NO', 'Poland': 'PL', 'Brazil': 'BR', 'Portugal': 'PT', 'Romania': 'RO', 'Slovakia': 'SK', 'Slovenia': 'SI',
272 | 'Sweden': 'SE', 'Vietnam': 'VN', 'Turkey': 'TR', 'Greece': 'GR', 'Bulgaria': 'BG', 'Russia': 'RU', 'Ukraine ': 'UA',
273 | 'Serbia': 'RS', 'United Arab Emirates': 'AE', 'Saudi Arabia': 'SA', 'Lebanon': 'LB', 'Egypt': 'EG',
274 | 'Bangladesh': 'BD', 'Thailand': 'TH', 'China': 'CN', 'Taiwan': 'TW', 'Hong Kong': 'HK', 'Japan': 'JP',
275 | 'Republic of Korea': 'KR'}
276 | ```
277 |
278 | ### Supported Languages
279 |
280 | ```python
281 | print(google_news.AVAILABLE_LANGUAGES)
282 |
283 | {'english': 'en', 'indonesian': 'id', 'czech': 'cs', 'german': 'de', 'spanish': 'es-419', 'french': 'fr',
284 | 'italian': 'it', 'latvian': 'lv', 'lithuanian': 'lt', 'hungarian': 'hu', 'dutch': 'nl', 'norwegian': 'no',
285 | 'polish': 'pl', 'portuguese brasil': 'pt-419', 'portuguese portugal': 'pt-150', 'romanian': 'ro', 'slovak': 'sk',
286 | 'slovenian': 'sl', 'swedish': 'sv', 'vietnamese': 'vi', 'turkish': 'tr', 'greek': 'el', 'bulgarian': 'bg',
287 | 'russian': 'ru', 'serbian': 'sr', 'ukrainian': 'uk', 'hebrew': 'he', 'arabic': 'ar', 'marathi': 'mr', 'hindi': 'hi',
288 | 'bengali': 'bn', 'tamil': 'ta', 'telugu': 'te', 'malyalam': 'ml', 'thai': 'th', 'chinese simplified': 'zh-Hans',
289 | 'chinese traditional': 'zh-Hant', 'japanese': 'ja', 'korean': 'ko'}
290 | ```
291 |
292 | ### Article Properties
293 |
294 | - Get news returns the list with following keys: `title`, `published_date`, `description`, `url`, `publisher`.
295 |
296 | | Properties | Description | Example |
297 | |--------------|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
298 | | title | Title of the article | IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility |
299 | | url | Google news link to article | [Article Link](http://news.google.com/news/url?sa=t&fd=R&ct2=us&usg=AFQjCNGNR4Qg8LGbjszT1yt2s2lMXvvufQ&clid=c3a7d30bb8a4878e06b80cf16b898331&cid=52779522121279&ei=VQU7WYjiFoLEhQHIs4HQCQ&url=https://www.theguardian.com/commentisfree/2017/jun/07/why-dont-unicorns-exist-google) |
300 | | published date | Published date | Wed, 07 Jun 2017 07:01:30 GMT |
301 | | description | Short description of article | IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility ... |
302 | | publisher | Publisher of article | The Guardian | |
303 |
304 | ## Getting full article
305 |
306 | * To read a full article you can either:
307 | * Navigate to the url directly in your browser, or
308 | * Use `newspaper3k` library to scrape the article
309 | * The article url, needed for both methods, is accessed as `article['url']`.
310 |
311 | #### Using newspaper3k
312 |
313 | 1. Install the library - `pip3 install newspaper3k`.
314 | 2. Use `get_full_article` method from `GNews`, that creates an `newspaper.article.Article` object from the url.
315 |
316 | ```python
317 | from gnews import GNews
318 |
319 | google_news = GNews()
320 | json_resp = google_news.get_news('Pakistan')
321 | article = google_news.get_full_article(
322 | json_resp[0]['url']) # newspaper3k instance, you can access newspaper3k all attributes in article
323 | ```
324 |
325 | This new object contains `title`, `text` (full article) or `images` attributes. Examples:
326 |
327 | ```python
328 | article.title
329 | ```
330 |
331 | > IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility'
332 |
333 | ```python
334 | article.text
335 | ```
336 |
337 | > End-of-Mission press releases include statements of IMF staff teams that convey preliminary findings after a mission. The views expressed are those of the IMF staff and do not necessarily represent the views of the IMF’s Executive Board.\n\nIMF staff and the Pakistani authorities have reached an agreement on a package of measures to complete second to fifth reviews of the authorities’ reform program supported by the IMF Extended Fund Facility (EFF) ..... (full article)
338 |
339 | ```python
340 | article.images
341 | ```
342 |
343 | > `{'https://www.imf.org/~/media/Images/IMF/Live-Page/imf-live-rgb-h.ashx?la=en', 'https://www.imf.org/-/media/Images/IMF/Data/imf-logo-eng-sep2019-update.ashx', 'https://www.imf.org/-/media/Images/IMF/Data/imf-seal-shadow-sep2019-update.ashx', 'https://www.imf.org/-/media/Images/IMF/Social/TW-Thumb/twitter-seal.ashx', 'https://www.imf.org/assets/imf/images/footer/IMF_seal.png'}
344 | `
345 |
346 | ```python
347 | article.authors
348 | ```
349 |
350 | > `[]`
351 |
352 | Read full documentation for `newspaper3k`
353 | [newspaper3k](https://newspaper.readthedocs.io/en/latest/user_guide/quickstart.html#parsing-an-article)
354 |
355 |
356 | ## Todo
357 |
358 | - Save to MongoDB
359 | - Save to SQLite
360 | - Save to JSON
361 | - Save to .CSV file
362 | - More than 100 articles
363 |
364 |
365 |
366 | ## Roadmap
367 |
368 | See the [open issues](https://github.com/ranahaani/GNews/issues) for a list of proposed features (and known issues).
369 |
370 |
371 |
372 |
373 |
374 | ## Contributing
375 |
376 | Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Any
377 | contributions you make are **greatly appreciated**.
378 |
379 | 1. Fork the Project
380 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
381 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
382 | 4. Push to the Branch (`git push origin feature/AmazingFeature`)
383 | 5. Open a Pull Request
384 |
385 |
386 |
387 | ## License
388 |
389 | Distributed under the MIT License. See `LICENSE` for more information.
390 |
391 |
392 |
393 |
394 |
395 | ## Contact
396 |
397 | Muhammad Abdullah - [@ranahaani](https://twitter.com/ranahaani) - ranahaani@gmail.com
398 |
399 | Project Link: [https://github.com/ranahaani/GNews](https://github.com/ranahaani/GNews)
400 |
401 | [](https://www.buymeacoffee.com/ranahaani)
402 |
403 | [contributors-shield]: https://img.shields.io/github/contributors/ranahaani/GNews.svg?style=for-the-badge
404 |
405 | [contributors-url]: https://github.com/ranahaani/GNews/graphs/contributors
406 |
407 | [forks-shield]: https://img.shields.io/github/forks/ranahaani/GNews.svg?style=for-the-badge
408 |
409 | [forks-url]: https://github.com/ranahaani/GNews/network/members
410 |
411 | [stars-shield]: https://img.shields.io/github/stars/ranahaani/GNews.svg?style=for-the-badge
412 |
413 | [stars-url]: https://github.com/ranahaani/GNews/stargazers
414 |
415 | [issues-shield]: https://img.shields.io/github/issues/ranahaani/GNews.svg?style=for-the-badge
416 |
417 | [issues-url]: https://github.com/ranahaani/GNews/issues
418 |
419 | [license-shield]: https://img.shields.io/github/license/ranahaani/GNews.svg?style=for-the-badge
420 |
421 | [license-url]: https://github.com/ranahaani/GNews/blob/master/LICENSE.txt
422 |
423 | [download-sheild]: https://img.shields.io/pypi/dm/GNews.svg?style=for-the-badge
424 |
425 | [download-url]: https://pypistats.org/packages/gnews
426 |
427 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
428 |
429 | [linkedin-url]: https://linkedin.com/in/ranahaani
430 |
431 | [demo-gif]: https://github.com/ranahaani/GNews/raw/master/imgs/gnews.gif
432 |
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: "3.8"
2 | services:
3 |
4 | mongodb:
5 | image: mongo
6 | restart: always
7 | environment:
8 | MONGO_INITDB_ROOT_USERNAME: root
9 | MONGO_INITDB_ROOT_PASSWORD: example
10 | MONGO_INITDB_DATABASE: gnews
11 | ports:
12 | - 27017:27017
13 |
14 | gnews:
15 | build:
16 | context: .
17 | dockerfile: Dockerfile
18 | env_file:
19 | - .env
20 | working_dir: /usr/src/app
21 | volumes:
22 | - .:/usr/src/app
23 | depends_on:
24 | - mongodb
25 |
--------------------------------------------------------------------------------
/gnews/__init__.py:
--------------------------------------------------------------------------------
1 | from .gnews import GNews
2 |
3 | name = "gnews"
4 | __all__ = ["GNews"]
5 |
--------------------------------------------------------------------------------
/gnews/gnews.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import urllib.request
3 | import datetime
4 | import inspect
5 | import warnings
6 |
7 | import feedparser
8 | from bs4 import BeautifulSoup as Soup
9 |
10 | from gnews.utils.constants import AVAILABLE_COUNTRIES, AVAILABLE_LANGUAGES, SECTIONS, TOPICS, BASE_URL, USER_AGENT
11 | from gnews.utils.utils import process_url
12 |
13 | logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO,
14 | datefmt='%m/%d/%Y %I:%M:%S %p')
15 | logger = logging.getLogger(__name__)
16 |
17 |
18 | class GNews:
19 | def __init__(self, language="en", country="US", max_results=100, period=None, start_date=None, end_date=None,
20 | exclude_websites=None, proxy=None):
21 | """
22 | (optional parameters)
23 | :param language: The language in which to return results, defaults to en (optional)
24 | :param country: The country code of the country you want to get headlines for, defaults to US
25 | :param max_results: The maximum number of results to return. The default is 100, defaults to 100
26 | :param period: The period of time from which you want the news
27 | :param start_date: Date after which results must have been published
28 | :param end_date: Date before which results must have been published
29 | :param exclude_websites: A list of strings that indicate websites to exclude from results
30 | :param proxy: The proxy parameter is a dictionary with a single key-value pair. The key is the
31 | protocol name and the value is the proxy address
32 | """
33 | self.countries = tuple(AVAILABLE_COUNTRIES),
34 | self.languages = tuple(AVAILABLE_LANGUAGES),
35 | self._max_results = max_results
36 | self._language = language
37 | self._country = country
38 | self._period = period
39 | self._end_date = None
40 | self._start_date = None
41 | self.end_date = self.end_date = end_date
42 | self._start_date = self.start_date = start_date
43 | self._exclude_websites = exclude_websites if exclude_websites and isinstance(exclude_websites, list) else []
44 | self._proxy = {'http': proxy, 'https': proxy} if proxy else None
45 |
46 | def _ceid(self):
47 | time_query = ''
48 | if self._start_date or self._end_date:
49 | if inspect.stack()[2][3] != 'get_news':
50 | warnings.warn(message=("Only searches using the function get_news support date ranges. Review the "
51 | f"documentation for {inspect.stack()[2][3]} for a partial workaround. \nStart "
52 | "date and end date will be ignored"), category=UserWarning, stacklevel=4)
53 | if self._period:
54 | time_query += 'when%3A'.format(self._period)
55 | if self._period:
56 | warnings.warn(message=f'\nPeriod ({self.period}) will be ignored in favour of the start and end dates',
57 | category=UserWarning, stacklevel=4)
58 | if self.end_date is not None:
59 | time_query += '%20before%3A{}'.format(self.end_date)
60 | if self.start_date is not None:
61 | time_query += '%20after%3A{}'.format(self.start_date)
62 | elif self._period:
63 | time_query += '%20when%3A{}'.format(self._period)
64 |
65 | return time_query + '&hl={}&gl={}&ceid={}:{}'.format(self._language,
66 | self._country,
67 | self._country,
68 | self._language,)
69 |
70 | @property
71 | def language(self):
72 | return self._language
73 |
74 | @language.setter
75 | def language(self, language):
76 | """
77 | :param language: The language code for the language you want to use
78 | """
79 | self._language = AVAILABLE_LANGUAGES.get(language, language)
80 |
81 | @property
82 | def exclude_websites(self):
83 | return self._exclude_websites
84 |
85 | @exclude_websites.setter
86 | def exclude_websites(self, exclude_websites):
87 | """
88 | The function takes in a list of websites that you want to exclude
89 | :param exclude_websites: A list of strings that will be used to filter out websites
90 | """
91 | self._exclude_websites = exclude_websites
92 |
93 | @property
94 | def max_results(self):
95 | return self._max_results
96 |
97 | @max_results.setter
98 | def max_results(self, size):
99 | self._max_results = size
100 |
101 | @property
102 | def period(self):
103 | return self._period
104 |
105 | @period.setter
106 | def period(self, period):
107 | self._period = period
108 |
109 | @property
110 | def start_date(self):
111 | """
112 | :return: string of start_date in form YYYY-MM-DD, or None if start_date is not set
113 | …NOTE this will reset period to None if start_date is not none
114 | """
115 | if self._start_date is None:
116 | return None
117 | self.period = None
118 | return self._start_date.strftime("%Y-%m-%d")
119 |
120 | @start_date.setter
121 | def start_date(self, start_date):
122 | """
123 | The function sets the start of the date range you want to search
124 | :param start_date: either a tuple in the form (YYYY, MM, DD) or a datetime
125 | """
126 | if type(start_date) is tuple:
127 | start_date = datetime.datetime(start_date[0], start_date[1], start_date[2])
128 | if self._end_date:
129 | if start_date - self._end_date == datetime.timedelta(days=0):
130 | warnings.warn("The start and end dates should be at least 1 day apart, or GNews will return no results")
131 | elif self._end_date < start_date:
132 | warnings.warn("End date should be after start date, or GNews will return no results")
133 | self._start_date = start_date
134 |
135 | @property
136 | def end_date(self):
137 | """
138 | :return: string of end_date in form YYYY-MM-DD, or None if end_date is not set
139 | …NOTE this will reset period to None if end date is not None
140 | """
141 | if self._end_date is None:
142 | return None
143 | self.period = None
144 | return self._end_date.strftime("%Y-%m-%d")
145 |
146 | @end_date.setter
147 | def end_date(self, end_date):
148 | """
149 | The function sets the end of the date range you want to search
150 | :param end_date: either a tuple in the form (YYYY, MM, DD) or a datetime
151 | …NOTE this will reset period to None
152 | """
153 | if type(end_date) is tuple:
154 | end_date = datetime.datetime(end_date[0], end_date[1], end_date[2])
155 | if self._start_date:
156 | if end_date - self._start_date == datetime.timedelta(days=0):
157 | warnings.warn("The start and end dates should be at least 1 day apart, or GNews will return no results")
158 | elif end_date < self._start_date:
159 | warnings.warn("End date should be after start date, or GNews will return no results")
160 | self._end_date = end_date
161 |
162 | @property
163 | def country(self):
164 | return self._country
165 |
166 | @country.setter
167 | def country(self, country):
168 | self._country = AVAILABLE_COUNTRIES.get(country, country)
169 |
170 | def get_full_article(self, url):
171 | """
172 | Download an article from the specified URL, parse it, and return an article object.
173 | :param url: The URL of the article you wish to summarize.
174 | :return: An `Article` object returned by the `newspaper3k` library if installed; otherwise, None.
175 | """
176 | try:
177 | import newspaper
178 | except ImportError:
179 | print("\nget_full_article() requires the `newspaper3k` library.")
180 | print("You can install it by running `pip3 install newspaper3k` in your shell.")
181 | return None
182 |
183 | try:
184 | article = newspaper.Article(url="%s" % url, language=self._language)
185 | article.download()
186 | article.parse()
187 | except Exception as error:
188 | print(f"An error occurred while fetching the article: {error}")
189 | return None
190 |
191 | return article
192 |
193 |
194 | @staticmethod
195 | def _clean(html):
196 | soup = Soup(html, features="html.parser")
197 | text = soup.get_text()
198 | text = text.replace('\xa0', ' ')
199 | return text
200 |
201 | def _process(self, item):
202 | url = process_url(item, self._exclude_websites)
203 | if url:
204 | title = item.get("title", "")
205 | item = {
206 | 'title': title,
207 | 'description': self._clean(item.get("description", "")),
208 | 'published date': item.get("published", ""),
209 | 'url': url,
210 | 'publisher': item.get("source", " ")
211 | }
212 | return item
213 |
214 | def docstring_parameter(*sub):
215 | def dec(obj):
216 | obj.__doc__ = obj.__doc__.format(*sub)
217 | return obj
218 |
219 | return dec
220 |
221 | indent = '\n\t\t\t'
222 | indent2 = indent + '\t'
223 | standard_output = (indent + "{'title': Article Title," + indent + "'description': Google News summary of the "
224 | "article," + indent + "'url': link to the news article," + indent + "'publisher':" + indent2 +
225 | "{'href': link to publisher's website," + indent2 + "'title': name of the publisher}}")
226 |
227 | @docstring_parameter(standard_output)
228 | def get_news(self, key):
229 | """
230 | The function takes in a key and returns a list of news articles
231 | :param key: The query you want to search for. For example, if you want to search for news about
232 | the "Yahoo", you would get results from Google News according to your key i.e "yahoo"
233 | :return: A list of dictionaries with structure: {0}.
234 | """
235 | if key:
236 | if self._max_results > 100:
237 | return self._get_news_more_than_100(key)
238 |
239 | key = "%20".join(key.split(" "))
240 | query = '/search?q={}'.format(key)
241 | return self._get_news(query)
242 |
243 | def _get_news_more_than_100(self, key):
244 | """
245 | Fetch more than 100 news articles by iterating backward in time, dynamically adjusting
246 | the date range based on the earliest date seen so far.
247 | """
248 | articles = []
249 | seen_urls = set()
250 | earliest_date = None
251 |
252 | if self._start_date or self._end_date or self._period:
253 | warnings.warn(message=("Searches for over 100 articles do not currently support date ranges. \nStart "
254 | "date, end date, and period will be ignored"), category=UserWarning, stacklevel=4)
255 |
256 | # Start with no specific date range for the first query
257 | self._start_date = None
258 | self._end_date = None
259 |
260 | while len(articles) < self._max_results:
261 | # Fetch articles for the current range
262 | fetched_articles = self._get_news(f'/search?q={key}')
263 | if not fetched_articles: # Stop if no more articles are found
264 | break
265 |
266 | for article in fetched_articles:
267 | if article['url'] not in seen_urls:
268 | articles.append(article)
269 | seen_urls.add(article['url'])
270 |
271 | # Track the earliest published date
272 | published_date = article.get("published date")
273 | try:
274 | published_date = datetime.datetime.strptime(published_date, '%a, %d %b %Y %H:%M:%S GMT')
275 | except Exception as e:
276 | logger.warning(f"Failed to parse published date: {e}")
277 | continue
278 |
279 | if earliest_date is None or published_date < earliest_date:
280 | earliest_date = published_date
281 |
282 | if len(articles) >= self._max_results:
283 | return articles
284 |
285 | # If fewer than 100 articles were fetched, assume the range is exhausted
286 | if len(fetched_articles) < 100:
287 | break
288 |
289 | # Update the sliding window to fetch older articles
290 | self._end_date = earliest_date
291 | self._start_date = earliest_date - datetime.timedelta(days=7)
292 |
293 | return articles
294 |
295 | @docstring_parameter(standard_output)
296 | def get_top_news(self):
297 | """
298 | This function returns top news stories for the current time
299 | :return: A list of dictionaries with structure: {0}.
300 | ..To implement date range try get_news('?')
301 | """
302 | query = "?"
303 | return self._get_news(query)
304 |
305 | @docstring_parameter(standard_output, ', '.join(TOPICS), ', '.join(SECTIONS.keys()))
306 | def get_news_by_topic(self, topic: str):
307 | """
308 | Function to get news from one of Google's key topics
309 | :param topic: TOPIC names i.e {1}
310 | :return: A list of dictionaries with structure: {0}.
311 | ..To implement date range try get_news('topic')
312 | """
313 | topic = topic.upper()
314 | if topic in TOPICS:
315 | query = '/headlines/section/topic/' + topic + '?'
316 | return self._get_news(query)
317 | elif topic in SECTIONS.keys():
318 | query = '/topics/' + SECTIONS[topic] + '?'
319 | return self._get_news(query)
320 |
321 | logger.info(f"Invalid topic. \nAvailable topics are: {', '.join(TOPICS), ', '.join(SECTIONS.keys())}.")
322 | return []
323 |
324 | @docstring_parameter(standard_output)
325 | def get_news_by_location(self, location: str):
326 | """
327 | This function is used to get news from a specific location (city, state, and country)
328 | :param location: (type: str) The location for which you want to get headlines
329 | :return: A list of dictionaries with structure: {0}.
330 | ..To implement date range try get_news('location')
331 | """
332 | if location:
333 | query = '/headlines/section/geo/' + location + '?'
334 | return self._get_news(query)
335 | logger.warning("Enter a valid location.")
336 | return []
337 |
338 | @docstring_parameter(standard_output)
339 | def get_news_by_site(self, site: str):
340 | """
341 | This function is used to get news from a specific site
342 | :param site: (type: str) The site domain for which you want to get headlines. E.g., 'cnn.com'
343 | :return: A list of news articles from the specified site.
344 | """
345 | if site:
346 | key = "site:{}".format(site)
347 | return self.get_news(key)
348 | logger.warning("Enter a valid site domain.")
349 | return []
350 |
351 | def _get_news(self, query):
352 | url = BASE_URL + query + self._ceid()
353 | try:
354 | if self._proxy:
355 | proxy_handler = urllib.request.ProxyHandler(self._proxy)
356 | feed_data = feedparser.parse(url, agent=USER_AGENT, handlers=[proxy_handler])
357 | else:
358 | feed_data = feedparser.parse(url, agent=USER_AGENT)
359 |
360 | return [item for item in
361 | map(self._process, feed_data.entries[:self._max_results]) if item]
362 | except Exception as err:
363 | logger.error(err.args[0])
364 | return []
365 |
--------------------------------------------------------------------------------
/gnews/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranahaani/GNews/5058075ba13a6be20a101fbdc84294d396150510/gnews/utils/__init__.py
--------------------------------------------------------------------------------
/gnews/utils/constants.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | USER_AGENTS = '''Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36
4 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36
5 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36
6 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36
7 | Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36
8 | Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36
9 | Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36
10 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36
11 | Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36
12 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36
13 | Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36
14 | Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36
15 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36
16 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36
17 | Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36
18 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36
19 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36
20 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36
21 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36
22 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36
23 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36
24 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F
25 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10
26 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36
27 | Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36
28 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36
29 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36
30 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36
31 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36
32 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36
33 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36
34 | Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36
35 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36
36 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36
37 | Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36
38 | Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36
39 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36
40 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36
41 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36
42 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36
43 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36
44 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36
45 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24
46 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24
47 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/19.0.1047.0 Safari/535.22
48 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21
49 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1041.0 Safari/535.21
50 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20
51 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0 Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0
52 | Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/18.6.872
53 | Mozilla/5.0 (X11; CrOS i686 1660.57.0) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.46 Safari/535.19
54 | Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19
55 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19
56 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19
57 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19
58 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19
59 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.19 (KHTML, like Gecko) Ubuntu/11.10 Chromium/18.0.1025.142 Chrome/18.0.1025.142 Safari/535.19
60 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.11 Safari/535.19
61 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
62 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
63 | Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
64 | Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
65 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
66 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
67 | Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
68 | Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
69 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
70 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
71 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
72 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
73 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11
74 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11
75 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11
76 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/10.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11
77 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11
78 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.700.3 Safari/534.24
79 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.699.0 Safari/534.24
80 | Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.699.0 Safari/534.24
81 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.698.0 Safari/534.24
82 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.697.0 Safari/534.24
83 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24
84 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24
85 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24
86 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24
87 | Mozilla/5.0 Slackware/13.37 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/11.0.696.50
88 | Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.43 Safari/534.24
89 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.34 Safari/534.24
90 | Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.34 Safari/534.24
91 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24
92 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24
93 | Mozilla/5.0 (Windows NT 6.0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24
94 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.14 Safari/534.24
95 | Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24
96 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24
97 | Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.04 Chromium/11.0.696.0 Chrome/11.0.696.0 Safari/534.24
98 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.0 Safari/534.24
99 | Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.694.0 Safari/534.24
100 | Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.23 (KHTML, like Gecko) Chrome/11.0.686.3 Safari/534.23
101 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.682.0 Safari/534.21
102 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21
103 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7_0; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21
104 | Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20
105 | Mozilla/5.0 (Windows NT) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20
106 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20
107 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.669.0 Safari/534.20
108 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.19 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.19
109 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.18 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.18
110 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.18 (KHTML, like Gecko) Chrome/11.0.660.0 Safari/534.18
111 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.655.0 Safari/534.17
112 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.655.0 Safari/534.17
113 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.654.0 Safari/534.17
114 | Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.652.0 Safari/534.17
115 | Mozilla/4.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/11.0.1245.0 Safari/537.36
116 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17
117 | Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17
118 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.82 Safari/534.16
119 | Mozilla/5.0 (X11; U; Linux armv7l; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16
120 | Mozilla/5.0 (X11; U; FreeBSD x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16
121 | Mozilla/5.0 (X11; U; FreeBSD i386; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16
122 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204
123 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16
124 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16
125 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16
126 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16
127 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.133 Chrome/10.0.648.133 Safari/534.16
128 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16
129 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.133 Chrome/10.0.648.133 Safari/534.16
130 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16
131 | Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16
132 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16
133 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16
134 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.127 Chrome/10.0.648.127 Safari/534.16
135 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16
136 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16
137 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16
138 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16
139 | Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU; AppleWebKit/534.16; KHTML; like Gecko; Chrome/10.0.648.11;Safari/534.16)
140 | Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16
141 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16
142 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16
143 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16
144 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.0 Safari/534.16
145 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.642.0 Chrome/10.0.642.0 Safari/534.16
146 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.639.0 Safari/534.16
147 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.638.0 Safari/534.16
148 | Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.634.0 Safari/534.16
149 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.634.0 Safari/534.16
150 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 SUSE/10.0.626.0 (KHTML, like Gecko) Chrome/10.0.626.0 Safari/534.16
151 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Chrome/10.0.613.0 Safari/534.15
152 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.613.0 Chrome/10.0.613.0 Safari/534.15
153 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Ubuntu/10.04 Chromium/10.0.612.3 Chrome/10.0.612.3 Safari/534.15
154 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Chrome/10.0.612.1 Safari/534.15
155 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.611.0 Chrome/10.0.611.0 Safari/534.15
156 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.602.0 Safari/534.14
157 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14
158 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14
159 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML,like Gecko) Chrome/9.1.0.0 Safari/540.0
160 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML, like Gecko) Ubuntu/10.10 Chrome/9.1.0.0 Safari/540.0
161 | Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.601.0 Safari/534.14
162 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Ubuntu/10.10 Chromium/9.0.600.0 Chrome/9.0.600.0 Safari/534.14
163 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.600.0 Safari/534.14
164 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.599.0 Safari/534.13
165 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-CA) AppleWebKit/534.13 (KHTML like Gecko) Chrome/9.0.597.98 Safari/534.13
166 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.84 Safari/534.13
167 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.44 Safari/534.13
168 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.19 Safari/534.13
169 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13
170 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13
171 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416758524.9051
172 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416748405.3871
173 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416670950.695
174 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416664997.4379
175 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1333515017.9196
176 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13
177 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13
178 | Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13
179 | Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13
180 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13
181 | Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13
182 | Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.596.0 Safari/534.13
183 | Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Ubuntu/10.04 Chromium/9.0.595.0 Chrome/9.0.595.0 Safari/534.13
184 | Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Ubuntu/9.10 Chromium/9.0.592.0 Chrome/9.0.592.0 Safari/534.13
185 | Mozilla/5.0 (X11; U; Windows NT 6; en-US) AppleWebKit/534.12 (KHTML, like Gecko) Chrome/9.0.587.0 Safari/534.12
186 | Mozilla/5.0 (Windows U Windows NT 5.1 en-US) AppleWebKit/534.12 (KHTML, like Gecko) Chrome/9.0.583.0 Safari/534.12'''.split('\n')
187 |
188 |
189 | USER_AGENT = random.choice(USER_AGENTS)
190 |
191 |
192 | AVAILABLE_LANGUAGES = {
193 | "english": "en",
194 | "indonesian": "id",
195 | "czech": "cs",
196 | "german": "de",
197 | "spanish": "es-419",
198 | "french": "fr",
199 | "italian": "it",
200 | "latvian": "lv",
201 | "lithuanian": "lt",
202 | "hungarian": "hu",
203 | "dutch": "nl",
204 | "norwegian": "no",
205 | "polish": "pl",
206 | "portuguese brasil": "pt-419",
207 | "portuguese portugal": "pt-150",
208 | "romanian": "ro",
209 | "slovak": "sk",
210 | "slovenian": "sl",
211 | "swedish": "sv",
212 | "vietnamese": "vi",
213 | "turkish": "tr",
214 | "greek": "el",
215 | "bulgarian": "bg",
216 | "russian": "ru",
217 | "serbian": "sr",
218 | "ukrainian": "uk",
219 | "hebrew": "he",
220 | "arabic": "ar",
221 | "marathi": "mr",
222 | "hindi": "hi",
223 | "bengali": "bn",
224 | "tamil": "ta",
225 | "telugu": "te",
226 | "malyalam": "ml",
227 | "thai": "th",
228 | "chinese simplified": "zh-Hans",
229 | "chinese traditional": "zh-Hant",
230 | "japanese": "ja",
231 | "korean": "ko"
232 | }
233 |
234 | AVAILABLE_COUNTRIES = {
235 | "Australia": "AU",
236 | "Botswana": "BW",
237 | "Canada ": "CA",
238 | "Ethiopia": "ET",
239 | "Ghana": "GH",
240 | "India ": "IN",
241 | "Indonesia": "ID",
242 | "Ireland": "IE",
243 | "Israel ": "IL",
244 | "Kenya": "KE",
245 | "Latvia": "LV",
246 | "Malaysia": "MY",
247 | "Namibia": "NA",
248 | "New Zealand": "NZ",
249 | "Nigeria": "NG",
250 | "Pakistan": "PK",
251 | "Philippines": "PH",
252 | "Singapore": "SG",
253 | "South Africa": "ZA",
254 | "Tanzania": "TZ",
255 | "Uganda": "UG",
256 | "United Kingdom": "GB",
257 | "United States": "US",
258 | "Zimbabwe": "ZW",
259 | "Czech Republic": "CZ",
260 | "Germany": "DE",
261 | "Austria": "AT",
262 | "Switzerland": "CH",
263 | "Argentina": "AR",
264 | "Chile": "CL",
265 | "Colombia": "CO",
266 | "Cuba": "CU",
267 | "Mexico": "MX",
268 | "Peru": "PE",
269 | "Venezuela": "VE",
270 | "Belgium ": "BE",
271 | "France": "FR",
272 | "Morocco": "MA",
273 | "Senegal": "SN",
274 | "Italy": "IT",
275 | "Lithuania": "LT",
276 | "Hungary": "HU",
277 | "Netherlands": "NL",
278 | "Norway": "NO",
279 | "Poland": "PL",
280 | "Brazil": "BR",
281 | "Portugal": "PT",
282 | "Romania": "RO",
283 | "Slovakia": "SK",
284 | "Slovenia": "SI",
285 | "Sweden": "SE",
286 | "Vietnam": "VN",
287 | "Turkey": "TR",
288 | "Greece": "GR",
289 | "Bulgaria": "BG",
290 | "Russia": "RU",
291 | "Ukraine ": "UA",
292 | "Serbia": "RS",
293 | "United Arab Emirates": "AE",
294 | "Saudi Arabia": "SA",
295 | "Lebanon": "LB",
296 | "Egypt": "EG",
297 | "Bangladesh": "BD",
298 | "Thailand": "TH",
299 | "China": "CN",
300 | "Taiwan": "TW",
301 | "Hong Kong": "HK",
302 | "Japan": "JP",
303 | "Republic of Korea": "KR"
304 | }
305 |
306 | GOOGLE_NEWS_URL = 'https://news.google.com'
307 | BASE_URL = "{0}/rss".format(GOOGLE_NEWS_URL)
308 |
309 | GOOGLE_NEWS_REGEX = f'^http(s)?://(www.)?news.google.com*'
310 |
311 | TOPICS = ["WORLD", "NATION", "BUSINESS", "TECHNOLOGY", "ENTERTAINMENT", "SPORTS", "SCIENCE", "HEALTH"]
312 |
313 | SECTIONS = {
314 | "POLITICS": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZ4ZERBU0FtVnVLQUFQAQ",
315 | "CELEBRITIES": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREZ5Wm5vU0FtVnVLQUFQAQ",
316 | "TV": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRGRqTlRJU0FtVnVLQUFQAQ",
317 | "MUSIC": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFJ5YkdZU0FtVnVLQUFQAQ",
318 | "MOVIES": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREoyZUc0U0FtVnVLQUFQAQ",
319 | "THEATER": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRE54YzJSd2F4SUNaVzRvQUFQAQ",
320 | "SOCCER": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREoyZURRU0FtVnVLQUFQAQ",
321 | "CYCLING": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREZ6WjJ3U0FtVnVLQUFQAQ",
322 | "MOTOR SPORTS": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRFF4TUhSMGFCSUNaVzRvQUFQAQ",
323 | "TENNIS": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRGRpY3pBU0FtVnVLQUFQAQ",
324 | "COMBAT SPORTS": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRFZyWXpJNUVnSmxiaWdBUAE",
325 | "BASKETBALL": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREU0ZHpnU0FtVnVLQUFQAQ",
326 | "BASEBALL": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREU0YW5vU0FtVnVLQUFQAQ",
327 | "FOOTBALL": "CAAqIAgKIhpDQkFTRFFvSEwyMHZNR3B0WHhJQ1pXNG9BQVAB",
328 | "SPORTS BETTING": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRFIwTXpsa0VnSmxiaWdBUAE",
329 | "WATER SPORTS": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREptYUdSbUVnSmxiaWdBUAE",
330 | "HOCKEY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE4wYlhJU0FtVnVLQUFQAQ",
331 | "GOLF": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE0zYUhvU0FtVnVLQUFQAQ",
332 | "CRICKET": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRGw0Y0Y4U0FtVnVLQUFQAQ",
333 | "RUGBY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFppY2pnU0FtVnVLQUFQAQ",
334 | "ECONOMY": "CAAqIggKIhxDQkFTRHdvSkwyMHZNR2RtY0hNekVnSmxiaWdBUAE",
335 | "PERSONAL FINANCE": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREY1Tm1OeEVnSmxiaWdBUAE",
336 | "FINANCE": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREpmTjNRU0FtVnVLQUFQAQ",
337 | "DIGITAL CURRENCIES": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNSEk0YkhsM054SUNaVzRvQUFQAQ",
338 | "MOBILE": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFV3YXpnU0FtVnVLQUFQAQ",
339 | "ENERGY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREp0YlY4U0FtVnVLQUFQAQ",
340 | "GAMING": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREZ0ZHpFU0FtVnVLQUFQAQ",
341 | "INTERNET SECURITY": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRE5xWm01NEVnSmxiaWdBUAE",
342 | "GADGETS": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREp0WmpGdUVnSmxiaWdBUAE",
343 | "VIRTUAL REALITY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRGRmYm5rU0FtVnVLQUFQAQ",
344 | "ROBOTICS": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNREp3TUhRMVpoSUNaVzRvQUFQAQ",
345 | "NUTRITION": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZrYW1NU0FtVnVLQUFQAQ",
346 | "PUBLIC HEALTH": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREpqYlRZeEVnSmxiaWdBUAE",
347 | "MENTAL HEALTH": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRE40TmpsbkVnSmxiaWdBUAE",
348 | "MEDICINE": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFJ6YURNU0FtVnVLQUFQAQ",
349 | "SPACE": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREU0TXpOM0VnSmxiaWdBUAE",
350 | "WILDLIFE": "CAAqJAgKIh5DQkFTRUFvS0wyY3ZNVE5pWWw5MGN4SUNaVzRvQUFQAQ",
351 | "ENVIRONMENT": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREp3ZVRBNUVnSmxiaWdBUAE",
352 | "NEUROSCIENCE": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZpTm1NU0FtVnVLQUFQAQ",
353 | "PHYSICS": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZ4YW5RU0FtVnVLQUFQAQ",
354 | "GEOLOGY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE0yYUhZU0FtVnVLQUFQAQ",
355 | "PALEONTOLOGY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFZ5YW13U0FtVnVLQUFQAQ",
356 | "SOCIAL SCIENCES": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRFp1Tm5BU0FtVnVLQUFQAQ",
357 | "EDUCATION": "CAAqJQgKIh9DQkFTRVFvTEwyY3ZNVEl4Y0Raa09UQVNBbVZ1S0FBUAE",
358 | "JOBS": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRFF4TVRWME1oSUNaVzRvQUFQAQ",
359 | "ONLINE EDUCATION": "CAAqIggKIhxDQkFTRHdvSkwyMHZNRFYwYW5KaUVnSmxiaWdBUAE",
360 | "HIGHER EDUCATION": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE55TlRVU0FtVnVLQUFQAQ",
361 | "VEHICLES": "CAAqIAgKIhpDQkFTRFFvSEwyMHZNR3MwYWhJQ1pXNG9BQVAB",
362 | "ARTS-DESIGN": "CAAqIAgKIhpDQkFTRFFvSEwyMHZNR3BxZHhJQ1pXNG9BQVAB",
363 | "BEAUTY": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREZtTkRNU0FtVnVLQUFQAQ",
364 | "FOOD": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNREozWW0wU0FtVnVLQUFQAQ",
365 | "TRAVEL": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREUwWkhONEVnSmxiaWdBUAE",
366 | "SHOPPING": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNR2hvWkdJU0FtVnVLQUFQAQ",
367 | "HOME": "CAAqIggKIhxDQkFTRHdvSkwyMHZNREZzTUcxM0VnSmxiaWdBUAE",
368 | "OUTDOORS": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRFZpTUc0M2F4SUNaVzRvQUFQAQ",
369 | "FASHION": "CAAqIQgKIhtDQkFTRGdvSUwyMHZNRE15ZEd3U0FtVnVLQUFQAQ",
370 | "BITCOIN": "CAAqJAgKIh5DQkFTRUFvS0wyMHZNRFZ3TUhKeWVCSUNaVzRvQUFQAQ",
371 | }
372 |
--------------------------------------------------------------------------------
/gnews/utils/utils.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import json
3 | import logging
4 | import re
5 |
6 | import requests
7 | from gnews.utils.constants import AVAILABLE_COUNTRIES, AVAILABLE_LANGUAGES, GOOGLE_NEWS_REGEX
8 |
9 |
10 | def lang_mapping(lang):
11 | return AVAILABLE_LANGUAGES.get(lang)
12 |
13 |
14 | def country_mapping(country):
15 | return AVAILABLE_COUNTRIES.get(country)
16 |
17 |
18 | def process_url(item, exclude_websites):
19 | source = item.get('source').get('href')
20 | if not all([not re.match(website, source) for website in
21 | [f'^http(s)?://(www.)?{website.lower()}.*' for website in exclude_websites]]):
22 | return
23 | url = item.get('link')
24 | if re.match(GOOGLE_NEWS_REGEX, url):
25 | url = requests.head(url).headers.get('location', url)
26 | return url
27 |
--------------------------------------------------------------------------------
/imgs/gnews.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranahaani/GNews/5058075ba13a6be20a101fbdc84294d396150510/imgs/gnews.gif
--------------------------------------------------------------------------------
/imgs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ranahaani/GNews/5058075ba13a6be20a101fbdc84294d396150510/imgs/logo.png
--------------------------------------------------------------------------------
/index.rst:
--------------------------------------------------------------------------------
1 | [![Contributors][contributors-shield]][contributors-url]
2 | [![Forks][forks-shield]][forks-url]
3 | [![Stargazers][stars-shield]][stars-url]
4 | [![Issues][issues-shield]][issues-url]
5 | [![MIT License][license-shield]][license-url]
6 | [![Download][download-sheild]][download-url]
7 | [![LinkedIn][linkedin-shield]][linkedin-url]
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
GNews
17 |
18 |
19 | A Happy and lightweight Python Package that Provide an API to search for articles on Google News and returns a usable JSON response!
20 |
21 | Explore the docs »
22 |
23 |
24 | View Demo
25 | ·
26 | Report Bug
27 | ·
28 | Request Feature
29 |
30 |
31 |
32 |
33 | Table of Contents
34 |
35 | -
36 | About
37 |
40 |
41 | -
42 | Getting Started
43 |
46 |
49 |
50 | -
51 | Usage
52 |
55 |
58 |
61 |
64 |
67 |
70 |
73 |
76 |
77 | - To do
78 | - Roadmap
79 | - Contributing
80 | - License
81 | - Contact
82 | - Acknowledgements
83 |
84 |
85 |
86 |
87 |
88 | ## About GNews
89 |
90 | 🚩 GNews is A Happy and lightweight Python Package that searches Google News RSS Feed and returns a usable JSON
91 | response \
92 | 🚩 As well as you can fetch full article (**No need to write scrappers for articles fetching anymore**)
93 |
94 | Google News cover across **141+ countries** with **41+ languages**. On the bottom left side of the Google News page you
95 | may find a `Language & region` section where you can find all of the supported combinations.
96 |
97 | ### Demo
98 |
99 | [![GNews Demo][demo-gif]](https://github.com/ranahaani/GNews)
100 |
101 |
102 |
103 | ## Getting Started
104 |
105 | This is an example of how you may give instructions on setting up your project locally. To get a local copy up and
106 | running follow these simple example steps.
107 |
108 | ### Installation
109 |
110 | ``` shell
111 | pip install gnews
112 | ```
113 |
114 | ### Setup with Docker
115 |
116 | #### Developing with docker
117 |
118 | 1. Install [docker and docker-compose](https://docs.docker.com/get-docker/).
119 | 2. Set-up your .env environment placing the mongo db credentials.
120 | 3. Run `docker-compose up --build`
121 |
122 | #### Install using clone
123 |
124 | 1. Clone this repository `virtualenv gnews`
125 | 2. Start your virtual environment `virtualenv gnews`
126 | 3. Install the requirements with `pip install -r requirements.txt`
127 |
128 |
129 |
130 | ### Example usage
131 |
132 | ```python
133 | from gnews import GNews
134 |
135 | google_news = GNews()
136 | pakistan_news = google_news.get_news('Pakistan')
137 | print(pakistan_news[0])
138 | ```
139 |
140 | ```
141 | [{
142 | 'publisher': 'Aljazeera.com',
143 | 'description': 'Pakistan accuses India of stoking conflict in Indian Ocean '
144 | 'Aljazeera.com',
145 | 'published date': 'Tue, 16 Feb 2021 11:50:43 GMT',
146 | 'title': 'Pakistan accuses India of stoking conflict in Indian Ocean - '
147 | 'Aljazeera.com',
148 | 'url': 'https://www.aljazeera.com/news/2021/2/16/pakistan-accuses-india-of-nuclearizing-indian-ocean'
149 | },
150 | ...]
151 | ```
152 |
153 | ### Get top news
154 |
155 | * `GNews.get_top_news()`
156 |
157 | ### Get news by keyword
158 |
159 | * `GNews.get_news(keyword)`
160 |
161 | ### Get news by major topic
162 |
163 | * `GNews.get_news_by_topic(topic)`
164 | * Available topics:` WORLD, NATION, BUSINESS, TECHNOLOGY, ENTERTAINMENT, SPORTS, SCIENCE, HEALTH.`
165 |
166 | ### Get news by geo location
167 |
168 | * `GNews.get_news_by_location(location)`
169 | * location can be name of city/state/country
170 |
171 | ### Results specification
172 |
173 | * It's possible to pass proxy, country, language, period, exclude websites and size during initialization
174 |
175 | ```python
176 | google_news = GNews(language='en', country='US', period='7d', max_results=10, exclude_websites=['yahoo.com', 'cnn.com'],
177 | proxy=proxy)
178 | ```
179 |
180 | * Or change it to an existing object
181 |
182 | ```python
183 | google_news.period = '7d' # News from last 7 days
184 | google_news.results = 10 # number of responses across a keyword
185 | google_news.country = 'United States' # News from a specific country
186 | google_news.language = 'english' # News in a specific language
187 | google_news.exclude_websites = ['yahoo.com', 'cnn.com'] # Exclude news from specific website i.e Yahoo.com and CNN.com
188 | ```
189 |
190 | The format of the timeframe is a string comprised of a number, followed by a letter representing the time operator. For
191 | example 1y would signify 1 year. Full list of operators below:
192 |
193 | ```
194 | - h = hours (eg: 12h)
195 | - d = days (eg: 7d)
196 | - m = months (eg: 6m)
197 | - y = years (eg: 1y)
198 | ```
199 |
200 | #### Supported Countries
201 |
202 | ```python
203 | print(google_news.AVAILABLE_COUNTRIES)
204 |
205 | {'Australia': 'AU', 'Botswana': 'BW', 'Canada ': 'CA', 'Ethiopia': 'ET', 'Ghana': 'GH', 'India ': 'IN',
206 | 'Indonesia': 'ID', 'Ireland': 'IE', 'Israel ': 'IL', 'Kenya': 'KE', 'Latvia': 'LV', 'Malaysia': 'MY', 'Namibia': 'NA',
207 | 'New Zealand': 'NZ', 'Nigeria': 'NG', 'Pakistan': 'PK', 'Philippines': 'PH', 'Singapore': 'SG', 'South Africa': 'ZA',
208 | 'Tanzania': 'TZ', 'Uganda': 'UG', 'United Kingdom': 'GB', 'United States': 'US', 'Zimbabwe': 'ZW',
209 | 'Czech Republic': 'CZ', 'Germany': 'DE', 'Austria': 'AT', 'Switzerland': 'CH', 'Argentina': 'AR', 'Chile': 'CL',
210 | 'Colombia': 'CO', 'Cuba': 'CU', 'Mexico': 'MX', 'Peru': 'PE', 'Venezuela': 'VE', 'Belgium ': 'BE', 'France': 'FR',
211 | 'Morocco': 'MA', 'Senegal': 'SN', 'Italy': 'IT', 'Lithuania': 'LT', 'Hungary': 'HU', 'Netherlands': 'NL',
212 | 'Norway': 'NO', 'Poland': 'PL', 'Brazil': 'BR', 'Portugal': 'PT', 'Romania': 'RO', 'Slovakia': 'SK', 'Slovenia': 'SI',
213 | 'Sweden': 'SE', 'Vietnam': 'VN', 'Turkey': 'TR', 'Greece': 'GR', 'Bulgaria': 'BG', 'Russia': 'RU', 'Ukraine ': 'UA',
214 | 'Serbia': 'RS', 'United Arab Emirates': 'AE', 'Saudi Arabia': 'SA', 'Lebanon': 'LB', 'Egypt': 'EG',
215 | 'Bangladesh': 'BD', 'Thailand': 'TH', 'China': 'CN', 'Taiwan': 'TW', 'Hong Kong': 'HK', 'Japan': 'JP',
216 | 'Republic of Korea': 'KR'}
217 | ```
218 |
219 | #### Supported Languages
220 |
221 | ```python
222 | print(google_news.AVAILABLE_LANGUAGES)
223 |
224 | {'english': 'en', 'indonesian': 'id', 'czech': 'cs', 'german': 'de', 'spanish': 'es-419', 'french': 'fr',
225 | 'italian': 'it', 'latvian': 'lv', 'lithuanian': 'lt', 'hungarian': 'hu', 'dutch': 'nl', 'norwegian': 'no',
226 | 'polish': 'pl', 'portuguese brasil': 'pt-419', 'portuguese portugal': 'pt-150', 'romanian': 'ro', 'slovak': 'sk',
227 | 'slovenian': 'sl', 'swedish': 'sv', 'vietnamese': 'vi', 'turkish': 'tr', 'greek': 'el', 'bulgarian': 'bg',
228 | 'russian': 'ru', 'serbian': 'sr', 'ukrainian': 'uk', 'hebrew': 'he', 'arabic': 'ar', 'marathi': 'mr', 'hindi': 'hi',
229 | 'bengali': 'bn', 'tamil': 'ta', 'telugu': 'te', 'malyalam': 'ml', 'thai': 'th', 'chinese simplified': 'zh-Hans',
230 | 'chinese traditional': 'zh-Hant', 'japanese': 'ja', 'korean': 'ko'}
231 | ```
232 |
233 | ### Article Properties
234 |
235 | - Get news returns the list with following keys: `title`, `published_date`, `description`, `url`, `publisher`.
236 |
237 | | Properties | Description | Example |
238 | |--------------|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
239 | | title | Title of the article | IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility |
240 | | url | Google news link to article | [Article Link](http://news.google.com/news/url?sa=t&fd=R&ct2=us&usg=AFQjCNGNR4Qg8LGbjszT1yt2s2lMXvvufQ&clid=c3a7d30bb8a4878e06b80cf16b898331&cid=52779522121279&ei=VQU7WYjiFoLEhQHIs4HQCQ&url=https://www.theguardian.com/commentisfree/2017/jun/07/why-dont-unicorns-exist-google) |
241 | | published date | Published date | Wed, 07 Jun 2017 07:01:30 GMT |
242 | | description | Short description of article | IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility ... |
243 | | publisher | Publisher of article | The Guardian | |
244 |
245 | ## Getting full article
246 |
247 | * To read a full article you can either:
248 | * Navigate to the url directly in your browser, or
249 | * Use `newspaper3k` library to scrape the article
250 | * The article url, needed for both methods, is accessed as `article['url']`.
251 |
252 | #### Using newspaper3k
253 |
254 | 1. Install the library - `pip3 install newspaper3k`.
255 | 2. Use `get_full_article` method from `GNews`, that creates an `newspaper.article.Article` object from the url.
256 |
257 | ```python
258 | from gnews import GNews
259 |
260 | google_news = GNews()
261 | json_resp = google_news.get_news('Pakistan')
262 | article = google_news.get_full_article(
263 | json_resp[0]['url']) # newspaper3k instance, you can access newspaper3k all attributes in article
264 | ```
265 |
266 | This new object contains `title`, `text` (full article) or `images` attributes. Examples:
267 |
268 | ```python
269 | article.title
270 | ```
271 |
272 | > IMF Staff and Pakistan Reach Staff-Level Agreement on the Pending Reviews Under the Extended Fund Facility'
273 |
274 | ```python
275 | article.text
276 | ```
277 |
278 | > End-of-Mission press releases include statements of IMF staff teams that convey preliminary findings after a mission. The views expressed are those of the IMF staff and do not necessarily represent the views of the IMF’s Executive Board.\n\nIMF staff and the Pakistani authorities have reached an agreement on a package of measures to complete second to fifth reviews of the authorities’ reform program supported by the IMF Extended Fund Facility (EFF) ..... (full article)
279 |
280 | ```python
281 | article.images
282 | ```
283 |
284 | > `{'https://www.imf.org/~/media/Images/IMF/Live-Page/imf-live-rgb-h.ashx?la=en', 'https://www.imf.org/-/media/Images/IMF/Data/imf-logo-eng-sep2019-update.ashx', 'https://www.imf.org/-/media/Images/IMF/Data/imf-seal-shadow-sep2019-update.ashx', 'https://www.imf.org/-/media/Images/IMF/Social/TW-Thumb/twitter-seal.ashx', 'https://www.imf.org/assets/imf/images/footer/IMF_seal.png'}
285 | `
286 |
287 | ```python
288 | article.authors
289 | ```
290 |
291 | > `[]`
292 |
293 | Read full documentation for `newspaper3k`
294 | [newspaper3k](https://newspaper.readthedocs.io/en/latest/user_guide/quickstart.html#parsing-an-article)
295 |
296 |
297 | ## Todo
298 |
299 | - Save to MongoDB
300 | - Save to SQLite
301 | - Save to JSON
302 | - Save to .CSV file
303 | - More than 100 articles
304 |
305 |
306 |
307 | ## Roadmap
308 |
309 | See the [open issues](https://github.com/ranahaani/GNews/issues) for a list of proposed features (and known issues).
310 |
311 |
312 |
313 |
314 |
315 | ## Contributing
316 |
317 | Contributions are what make the open source community such an amazing place to be learn, inspire, and create. Any
318 | contributions you make are **greatly appreciated**.
319 |
320 | 1. Fork the Project
321 | 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
322 | 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
323 | 4. Push to the Branch (`git push origin feature/AmazingFeature`)
324 | 5. Open a Pull Request
325 |
326 |
327 |
328 | ## License
329 |
330 | Distributed under the MIT License. See `LICENSE` for more information.
331 |
332 |
333 |
334 |
335 |
336 | ## Contact
337 |
338 | Muhammad Abdullah - [@ranahaani](https://twitter.com/ranahaani) - ranahaani@gmail.com
339 |
340 | Project Link: [https://github.com/ranahaani/GNews](https://github.com/ranahaani/GNews)
341 |
342 | [contributors-shield]: https://img.shields.io/github/contributors/ranahaani/GNews.svg?style=for-the-badge
343 |
344 | [contributors-url]: https://github.com/ranahaani/GNews/graphs/contributors
345 |
346 | [forks-shield]: https://img.shields.io/github/forks/ranahaani/GNews.svg?style=for-the-badge
347 |
348 | [forks-url]: https://github.com/ranahaani/GNews/network/members
349 |
350 | [stars-shield]: https://img.shields.io/github/stars/ranahaani/GNews.svg?style=for-the-badge
351 |
352 | [stars-url]: https://github.com/ranahaani/GNews/stargazers
353 |
354 | [issues-shield]: https://img.shields.io/github/issues/ranahaani/GNews.svg?style=for-the-badge
355 |
356 | [issues-url]: https://github.com/ranahaani/GNews/issues
357 |
358 | [license-shield]: https://img.shields.io/github/license/ranahaani/GNews.svg?style=for-the-badge
359 |
360 | [license-url]: https://github.com/ranahaani/GNews/blob/master/LICENSE.txt
361 |
362 | [download-sheild]: https://img.shields.io/pypi/dm/GNews.svg?style=for-the-badge
363 |
364 | [download-url]: https://pypistats.org/packages/gnews
365 |
366 | [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
367 |
368 | [linkedin-url]: https://linkedin.com/in/ranahaani
369 |
370 | [demo-gif]: https://github.com/ranahaani/GNews/raw/master/imgs/gnews.gif
371 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from gnews import GNews
2 |
3 | google_news = GNews()
4 |
5 |
6 | google_news.start_date = (2021, 1, 1)
7 | google_news.end_date = (2021, 2, 1)
8 | google_news.max_results = 2
9 |
10 | result = google_news.get_news('"WORLD"')
11 | print(result)
12 |
13 | print(google_news.get_news_by_topic.__doc__)
14 |
15 | google_news.get_news_by_location("WORLD")
16 |
17 | result = google_news.get_news('"WORLD"')
18 | print(result)
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | feedparser~=6.0.2
2 | beautifulsoup4>=4.9.3,<5
3 | dnspython
4 | requests
5 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | with open('requirements.txt') as f:
4 | requirements = f.read().splitlines()
5 |
6 | with open("README.md", "r") as fh:
7 | long_description = fh.read()
8 |
9 | setup(
10 | name='gnews',
11 | version='0.4.1',
12 | # setup_requires=['setuptools_scm'],
13 | # use_scm_version={
14 | # "local_scheme": "no-local-version"
15 | # },
16 |
17 | author="Muhammad Abdullah",
18 | author_email="ranahaani@gmail.com",
19 | description='Provide an API to search for articles on Google News and returns a usable JSON response.',
20 | long_description=long_description,
21 | long_description_content_type="text/markdown",
22 | packages=find_packages(),
23 | install_requires=requirements,
24 | url='https://github.com/ranahaani/GNews/',
25 | project_urls={
26 | 'Documentation': 'https://github.com/ranahaani/GNews/blob/master/README.md',
27 | 'Source': 'https://github.com/ranahaani/GNews/',
28 | 'Tracker': 'https://github.com/ranahaani/GNews/issues',
29 | },
30 | classifiers=[
31 | 'Development Status :: 5 - Production/Stable',
32 | 'Intended Audience :: Developers',
33 | 'Programming Language :: Python :: 3',
34 | 'Programming Language :: Python :: 3.8',
35 | 'Programming Language :: Python :: 3.9',
36 | 'Programming Language :: Python :: 3.10',
37 | 'Programming Language :: Python :: 3.11',
38 | 'License :: OSI Approved :: MIT License',
39 | 'Operating System :: OS Independent',
40 | ],
41 | )
42 |
--------------------------------------------------------------------------------
/tests/test_gnews.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from gnews import GNews
3 |
4 | class TestGNews(unittest.TestCase):
5 | def setUp(self):
6 | # Create a GNews instance with default parameters for testing
7 | self.gnews = GNews()
8 |
9 | def test_get_news(self):
10 | # Test that get_news returns a non-empty list of news articles
11 | key = "Google"
12 | news_articles = self.gnews.get_news(key)
13 | self.assertTrue(isinstance(news_articles, list))
14 | self.assertTrue(len(news_articles) > 0)
15 |
16 | def test_get_top_news(self):
17 | # Test that get_top_news returns a non-empty list of news articles
18 | top_news_articles = self.gnews.get_top_news()
19 | self.assertTrue(isinstance(top_news_articles, list))
20 | self.assertTrue(len(top_news_articles) > 0)
21 |
22 | def test_get_news_by_topic(self):
23 | # Test that get_news_by_topic returns a non-empty list of news articles for a valid topic
24 | topic = "business"
25 | news_articles = self.gnews.get_news_by_topic(topic)
26 | self.assertTrue(isinstance(news_articles, list))
27 | self.assertTrue(len(news_articles) > 0)
28 |
29 | def test_get_news_by_location(self):
30 | # Test that get_news_by_location returns a non-empty list of news articles for a valid location
31 | location = "India"
32 | news_articles = self.gnews.get_news_by_location(location)
33 | self.assertTrue(isinstance(news_articles, list))
34 | self.assertTrue(len(news_articles) > 0)
35 |
36 | def test_get_news_by_site_valid(self):
37 | site = "cnn.com"
38 | news_articles = self.gnews.get_news_by_site(site)
39 | self.assertTrue(isinstance(news_articles, list))
40 | self.assertTrue(len(news_articles) > 0)
41 |
42 | def test_get_news_by_site_invalid(self):
43 | # Test that get_news_by_site returns an empty list for an invalid site domain
44 | site = "invalidsite123.com"
45 | news_articles = self.gnews.get_news_by_site(site)
46 | self.assertEqual(news_articles, [])
47 |
48 | def test_get_news_more_than_100(self):
49 | # Set up a GNews instance with a high max_results value
50 | self.gnews = GNews(max_results=150)
51 | query = "technology"
52 |
53 | # Call get_news with the query
54 | news_articles = self.gnews.get_news(query)
55 |
56 | # Verify the result respects the maximum result cap
57 | self.assertTrue(isinstance(news_articles, list))
58 | self.assertTrue(len(news_articles) > 0)
59 | self.assertTrue(len(news_articles) <= 150, "Should fetch no more than max_results")
60 |
61 | # Ensure no duplicates in the results
62 | urls = [article['url'] for article in news_articles]
63 | self.assertEqual(len(urls), len(set(urls)), "No duplicate articles should be fetched")
64 |
65 | def test_get_full_article(self):
66 | pass
67 | # Test that get_full_article returns a valid article object for a valid URL
68 | # url = "https://www.bbc.com/news/live/world-us-canada-66248859"
69 | # article = self.gnews.get_full_article(url)
70 | # self.assertIsNotNone(article)
71 | # self.assertTrue(hasattr(article, 'title'))
72 | # self.assertTrue(hasattr(article, 'text'))
73 |
74 | if __name__ == '__main__':
75 | unittest.main()
76 |
--------------------------------------------------------------------------------