├── .gitattributes
├── .github
    └── workflows
    │   ├── codeql-analysis.yml
    │   ├── pylint.yml
    │   ├── pypi.yml
    │   └── pytest.yml
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── googlesearch
    ├── LICENSE
    ├── __init__.py
    ├── __main__.py
    ├── constants.py
    ├── exceptions.py
    ├── googlesearch.py
    ├── models.py
    └── utils
    │   ├── __init__.py
    │   └── cleanup.py
├── playground
    └── README.md
├── requirements.txt
├── setup.py
└── tests
    ├── README.md
    └── test_search.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL Python Analysis"
13 | 
14 | on:
15 |   push:
16 |     branches: [ main ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ main ]
20 |   schedule:
21 |     - cron: '15 0 * * 6'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 | 
28 |     strategy:
29 |       fail-fast: false
30 |       matrix:
31 |         language: [ 'python' ]
32 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
33 |         # Learn more:
34 |         # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
35 | 
36 |     steps:
37 |     - name: Checkout repository
38 |       uses: actions/checkout@v2
39 | 
40 |     # Initializes the CodeQL tools for scanning.
41 |     - name: Initialize CodeQL
42 |       uses: github/codeql-action/init@v1
43 |       with:
44 |         languages: ${{ matrix.language }}
45 |         # If you wish to specify custom queries, you can do so here or in a config file.
46 |         # By default, queries listed here will override any specified in a config file.
47 |         # Prefix the list here with "+" to use these queries and those in the config file.
48 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
49 | 
50 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
51 |     # If this step fails, then you should remove it and run the build manually (see below)
52 |     - name: Autobuild
53 |       uses: github/codeql-action/autobuild@v1
54 | 
55 |     # ℹ️ Command-line programs to run using the OS shell.
56 |     # 📚 https://git.io/JvXDl
57 | 
58 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
59 |     #    and modify them (or add more) to build your code if your project
60 |     #    uses a compiled language
61 | 
62 |     #- run: |
63 |     #   make bootstrap
64 |     #   make release
65 | 
66 |     - name: Perform CodeQL Analysis
67 |       uses: github/codeql-action/analyze@v1


--------------------------------------------------------------------------------
/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Pylint
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: [3.9]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v2
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         python -m pip install pylint
30 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
31 |     - name: Lint with pylint
32 |       run: pylint googlesearch
33 |       continue-on-error: true


--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: PyPI Upload
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: '3.9'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python setup.py sdist bdist_wheel
31 |         twine upload dist/*


--------------------------------------------------------------------------------
/.github/workflows/pytest.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Pytest
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: [3.5, 3.7, 3.9]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v2
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         python -m pip install pytest
30 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
31 |         python3 setup.py install
32 |     - name: Test with pytest
33 |       run: |
34 |         pytest -vv


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 | 
109 | # Spyder project settings
110 | .spyderproject
111 | .spyproject
112 | 
113 | # Rope project settings
114 | .ropeproject
115 | 
116 | # mkdocs documentation
117 | /site
118 | 
119 | # mypy
120 | .mypy_cache/
121 | .dmypy.json
122 | dmypy.json
123 | 
124 | # Pyre type checker
125 | .pyre/
126 | 
127 | # macOS
128 | .DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Animenosekai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include googlesearch/ *


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # `googlesearch`
  2 | 
  3 | > `googlesearch` lets you use Google Searching capabilities right from your Python code or from your CLI
  4 | 
  5 | ***Make any Google Search right from Python!***
  6 | 
  7 | [![PyPI version](https://badge.fury.io/py/python-googlesearch.svg)](https://pypi.org/project/python-googlesearch/)
  8 | [![PyPI - Downloads](https://img.shields.io/pypi/dm/python-googlesearch)](https://pypistats.org/packages/python-googlesearch)
  9 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/python-googlesearch)](https://pypi.org/project/python-googlesearch/)
 10 | [![PyPI - Status](https://img.shields.io/pypi/status/python-googlesearch)](https://pypi.org/project/python-googlesearch/)
 11 | [![GitHub - License](https://img.shields.io/github/license/Animenosekai/googlesearch)](https://github.com/Animenosekai/googlesearch/blob/master/LICENSE)
 12 | [![GitHub top language](https://img.shields.io/github/languages/top/Animenosekai/googlesearch)](https://github.com/Animenosekai/googlesearch)
 13 | [![CodeQL Checks Badge](https://github.com/Animenosekai/googlesearch/workflows/CodeQL%20Python%20Analysis/badge.svg)](https://github.com/Animenosekai/googlesearch/actions?query=workflow%3ACodeQL)
 14 | [![Pytest](https://github.com/Animenosekai/googlesearch/actions/workflows/pytest.yml/badge.svg)](https://github.com/Animenosekai/googlesearch/actions/workflows/pytest.yml)
 15 | ![Code Size](https://img.shields.io/github/languages/code-size/Animenosekai/googlesearch)
 16 | ![Repo Size](https://img.shields.io/github/repo-size/Animenosekai/googlesearch)
 17 | ![Issues](https://img.shields.io/github/issues/Animenosekai/googlesearch)
 18 | 
 19 | ## Getting Started
 20 | 
 21 | These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.
 22 | 
 23 | ### Prerequisites
 24 | 
 25 | You will need Python 3 to use this module
 26 | 
 27 | ```bash
 28 | # vermin output
 29 | Minimum required versions: 3.2
 30 | Incompatible versions:     2
 31 | ```
 32 | 
 33 | According to Vermin, Python 3.2 is needed
 34 | 
 35 | Always check if your Python version works with `googlesearch` before using it in production
 36 | 
 37 | ## Installing
 38 | 
 39 | ### Option 1: From PyPI
 40 | 
 41 | ```bash
 42 | pip install python-googlesearch
 43 | ```
 44 | 
 45 | **Make sure to download `python-googlesearch` as `googlesearch` cannot be given to any package on PyPI**
 46 | 
 47 | **Even if you download `python-googlesearch`, `googlesearch` is used for the imports and the CLI version for conveniency purposes**
 48 | 
 49 | ### Option 2: From Git
 50 | 
 51 | ```bash
 52 | pip install git+https://github.com/Animenosekai/googlesearch
 53 | ```
 54 | 
 55 | You can check if you successfully installed it by printing out its version:
 56 | 
 57 | ```bash
 58 | $ python -c "import googlesearch; print(googlesearch.__version__)"
 59 | # output:
 60 | googlesearch v1.1.1
 61 | ```
 62 | 
 63 | <!--If a CLI version is available-->
 64 | 
 65 | or just:
 66 | 
 67 | ```bash
 68 | $ googlesearch --version
 69 | # output:
 70 | googlesearch v1.1.1
 71 | ```
 72 | 
 73 | ## Usage
 74 | 
 75 | You can use googlesearch in Python by importing it in your script:
 76 | 
 77 | ```python
 78 | >>> from googlesearch import Search
 79 | >>> python_results = Search("Python")
 80 | >>> python_results.results
 81 | [<SearchResult title="Python.org" (www.python.org)>, <SearchResult title="Python" ()>, <SearchResult title="Python (langage) — Wikipédia" (fr.wikipedia.org › wiki › Python_(langage))>, ...]
 82 | ```
 83 | 
 84 | ### CLI usage
 85 | 
 86 | You can use googlesearch in other apps by accessing it through the CLI version:
 87 | 
 88 | ```bash
 89 | $ googlesearch --query Python
 90 | {
 91 |     "query": "Python",
 92 |     "results": [
 93 |         {
 94 |             "url": "https://www.python.org/",
 95 |             "title": "Welcome to Python.org",
 96 |             "displayedURL": "www.python.org",
 97 |             "description": "The official home of the Python Programming Language.\nDownloads \u00b7 Python For Beginners \u00b7 Quotes about Python \u00b7 Python Essays"
 98 |         },
 99 |         [...]
100 |     ],
101 |     "relatedSearches": [
102 |         "Python serpent",
103 |         "Python openclassroom",
104 |         [...]
105 |     ],
106 |     "success": true
107 | }
108 | ```
109 | 
110 | #### Interactive Shell (REPL)
111 | 
112 | An interactive version of the CLI is also available
113 | 
114 | ```bash
115 | $ googlesearch
116 | Enter '.quit' to exit googlesearch
117 | [?] (googlesearch ~ Query) > : ... # enter your query
118 | 
119 | [?] What do you want to do?: # select the result with your keyboard's arrows and [enter]
120 | 
121 | —————————————————SEARCH RESULT—————————————————
122 | [...] # site's name
123 | 
124 | Description: # the site's description
125 | URL: ... # site's URL
126 | Related Searches: # a max of 3 related searches
127 | ```
128 | 
129 | **You can get help on this version by using:**
130 | 
131 | ```bash
132 | $ googlesearch --help
133 | usage: googlesearch [-h] [--version] [--query QUERY] [--langua...
134 | ```
135 | 
136 | ### As a Python module
137 | 
138 | ## Search
139 | 
140 | The search class represents a Google Search.
141 | 
142 | It lets you retrieve the different results/websites (`Search.results`) and the related searches (`Search.related_searches`)
143 | 
144 | ### How to use
145 | 
146 | This class is lazy loading the results.
147 | 
148 | When you initialize it with `Search()`, it takes a `query` as the required parameter and the following parameters as optional parameters:
149 | 
150 | - `language`: The language to request the results in (All of the website won't be in the given language as it is biased by lots of factors, including your IP address location). This needs to be a two-letter ISO 639-1 language code (default: "en")
151 | - `number_of_results`: The max number of results to be passed to Google Search while requesting the results (This won't give you the **exact** number of results) (default: 10)
152 | - `retry_count`: A positive integer representing the number of retries done before raising an exception (useful as `googlesearch` seems to fail sometimes) (default: 3)
153 | - `parser`: The BeautifulSoup parser to use (default: "html.parser")
154 | 
155 | It will only load and parse the website when `results` or `related_searches` is called.
156 | 
157 | `parser` is the `BeautifulSoup` parser used to parse the website and .
158 | 
159 | `results` is a list of `googlesearch.models.SearchResultElement`.
160 | 
161 | `related_searches` is a list of `Search` elements.
162 | 
163 | ## SearchResultElement
164 | 
165 | This class represents a result and is initialized by `googlesearch`.
166 | 
167 | It holds the following information:
168 | 
169 | - `url`: The URL of the website
170 | - `title`: The title of the website
171 | - `displayed_url`: The URL displayed on Google Search
172 | - `description`: The description of the website
173 | 
174 | ### Extra
175 | 
176 | Every class has the `as_dict` function which converts the object into a dictionary. For `Search`, the as_dict function will convert the other `Search` objects in `related_search` to a string with the query.
177 | 
178 | ### Exceptions
179 | 
180 | All of the exceptions inherit from the `GoogleSearchException` exception.
181 | 
182 | You can find a list of exceptions in the `exceptions.py` file
183 | 
184 | ## Deployment
185 | 
186 | This module is currently in development and might contain bugs.
187 | 
188 | Feel free to use it in production if you feel like it is suitable for your production even if you may encounter issues.
189 | 
190 | ## Built With
191 | 
192 | - [beautifulsoup4](https://pypi.org/project/beautifulsoup4/) - To parse the HTML
193 | - [requests](https://github.com/psf/requests) - To make HTTP requests
194 | - [pyuseragents](https://github.com/Animenosekai/useragents) - To create the `User-Agent` HTTP header
195 | - [inquirer](https://github.com/magmax/python-inquirer) - To make a beautiful CLI interface
196 | 
197 | ## Authors
198 | 
199 | - **Anime no Sekai** - *Initial work* - [Animenosekai](https://github.com/Animenosekai)
200 | 
201 | ## License
202 | 
203 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for more details
204 | 


--------------------------------------------------------------------------------
/googlesearch/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Animenosekai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/googlesearch/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | googlesearch\n
 3 | Make any Google Search right from Python!
 4 | 
 5 | © Anime no Sekai — 2021
 6 | """
 7 | 
 8 | from googlesearch.googlesearch import Search
 9 | from googlesearch import exceptions, models, constants
10 | 
11 | # For backward compatibility
12 | # old_name = new_name
13 | # if something has been renamed in the new versions
14 | 
15 | __author__      = 'Anime no Sekai'
16 | __copyright__   = 'Copyright 2021, googlesearch'
17 | __credits__     = ['animenosekai']
18 | __license__     = 'MIT License'
19 | __version__     = 'googlesearch v1.1.1'
20 | __maintainer__  = 'Anime no Sekai'
21 | __email__       = 'niichannomail@gmail.com'
22 | __status__      = 'Stable'


--------------------------------------------------------------------------------
/googlesearch/__main__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File containing the CLI version of googlesearch
 3 | """
 4 | 
 5 | import argparse
 6 | import inquirer
 7 | import googlesearch
 8 | from urllib.parse import urlparse
 9 | from json import dumps
10 | 
11 | INPUT_PREFIX = "(\033[90mgooglesearch ~ \033[0m{action}) > "
12 | 
13 | def boolean_type(value):
14 |     """
15 |     Defaults to False
16 |     """
17 |     return value in {'yes', 'true', 't', 'y', '1'}
18 |     
19 | def main():
20 |     parser = argparse.ArgumentParser(prog='googlesearch', description='This module lets you use Google Searching capabilities right from your code')
21 | 
22 |     parser.add_argument('--version', '-v', action='version', version=googlesearch.__version__)
23 |     
24 |     # optional
25 |     parser.add_argument('--query', '-q', type=str, help='The string query to search on Google (If not specified, the interactive mode will be enabled)', required=False, default=None)
26 |     parser.add_argument('--language', '-l', type=str, help='The language to be used to retrieve the results (Default: en)', required=False, default="en")
27 |     parser.add_argument('--number-of-results', '-n', type=int, help='The number of results to retrieve (Warning: a high number of results might not work) (Default: 10)', required=False, default=10)
28 |     parser.add_argument('--minify', '-m', type=boolean_type, help='If the response in the non-interactive mode should be minified or not (Default: False)', required=False, default=False)
29 |     parser.add_argument('--retry-count', '-r', type=int, help='The number of times the request should be retried before raising an exception (Default: 3)', required=False, default=3)
30 |     parser.add_argument('--parser', '-p', type=str, help='The HTML parser to use (Default: html.parser)', required=False, default="html.parser")
31 |     
32 |     args = parser.parse_args()
33 | 
34 |     if args.query is not None:
35 |         try:
36 |             result = googlesearch.Search(query=args.query, language=args.language, number_of_results=args.number_of_results, retry_count=args.retry_count, parser=args.parser).as_dict()
37 |             result["success"] = True
38 |         except googlesearch.exceptions.GoogleSearchException:
39 |             result = {"success": False}
40 |         if args.minify:
41 |             print(dumps(result, separators=(",", ":")))
42 |         else:
43 |             print(dumps(result, indent=4))
44 |     else:
45 |         while True:
46 |             print("\033[96mEnter '.quit' to exit googlesearch\033[0m")
47 |             answers = inquirer.prompt([
48 |                 inquirer.Text(
49 |                     name='query',
50 |                     message=INPUT_PREFIX.format(action="Query")
51 |                 )
52 |             ])
53 |             if answers["query"] == ".quit":
54 |                 break
55 |             result = googlesearch.Search(query=answers["query"], language=args.language, number_of_results=args.number_of_results, retry_count=args.retry_count, parser=args.parser)
56 |             print("")
57 |             try:
58 |                 answers = inquirer.prompt([
59 |                     inquirer.List(
60 |                         name='chosen',
61 |                         message="What do you want to do?",
62 |                         choices=[str(index) + " — " + result.title + " (" + urlparse(result.url).netloc + ")" for index, result in enumerate(result.results, start=1)] + ["Quit"],
63 |                         carousel=True
64 |                     )
65 |                 ])
66 |             except googlesearch.exceptions.GoogleSearchException:
67 |                 print("\033[90mAn error occured while searching up \033[0m" + str(answers["query"]) + " \033[90mon Google\033[0m")
68 |                 continue
69 |             if answers["chosen"] == "Quit":
70 |                 break
71 |             chosen_index = ""
72 |             for element in answers["chosen"]:
73 |                 element = str(element)
74 |                 if element.isdecimal():
75 |                     chosen_index += element
76 |             chosen_result = result.results[int(chosen_index) - 1]
77 |             print("—————————————————SEARCH RESULT—————————————————")
78 |             print("[" + chosen_result.title + "]")
79 |             print("")
80 |             print("\033[90mDescription:\033[0m", chosen_result.description)
81 |             print("\033[90mURL:\033[0m", chosen_result.url)
82 |             print("\033[90mRelated Searches:\033[0m", ", ".join([search.query for index, search in enumerate(result.related_searches) if index < 3]))
83 |             print("")
84 |             print("")


--------------------------------------------------------------------------------
/googlesearch/constants.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Constants.py
 3 | 
 4 | Stores the constants for the Python Google Search module
 5 | """
 6 | # request
 7 | BASE = "https://www.google.com"
 8 | BASE_URL = BASE + "/search?client=safari&rls=en&gbv=1&q={query}&hl={language}&num={number}"
 9 | CONSENT_VALUE = "YES+cb"
10 | HEADERS = {
11 |     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15",
12 |     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
13 |     "Host": "www.google.com",
14 |     "Accept-Language": "en-us"
15 | }
16 | 
17 | # parser
18 | # BEAUTIFULSOUP_PARSER = "html.parser"
19 | CLEANUP_TAGS = ["script", "style", "svg", "header", "textarea"]


--------------------------------------------------------------------------------
/googlesearch/exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File containing the different exceptions which can be raised in googlesearch
 3 | """
 4 | 
 5 | class GoogleSearchException(Exception):
 6 |     def __init__(self, *args: object) -> None:
 7 |         super().__init__(*args)
 8 | 
 9 | class InvalidParameter(GoogleSearchException):
10 |     def __init__(self, *args: object) -> None:
11 |         super().__init__(*args)
12 | 
13 | class RequestError(GoogleSearchException):
14 |     def __init__(self, *args: object) -> None:
15 |         super().__init__(*args)
16 | 
17 | class ParsingError(GoogleSearchException):
18 |     def __init__(self, *args: object) -> None:
19 |         super().__init__(*args)
20 | 
21 | class CleanupError(ParsingError):
22 |     def __init__(self, *args: object) -> None:
23 |         super().__init__(*args)
24 | 
25 | class RelatedSearchError(ParsingError):
26 |     def __init__(self, *args: object) -> None:
27 |         super().__init__(*args)
28 | 
29 | class ResultsError(ParsingError):
30 |     def __init__(self, *args: object) -> None:
31 |         super().__init__(*args)


--------------------------------------------------------------------------------
/googlesearch/googlesearch.py:
--------------------------------------------------------------------------------
  1 | """
  2 | googlesearch v1.1.1 (Stable)
  3 | 
  4 | © Anime no Sekai — 2021
  5 | """
  6 | from urllib.parse import quote
  7 | 
  8 | from requests import get
  9 | from bs4 import BeautifulSoup
 10 | from pyuseragents import random
 11 | 
 12 | from googlesearch.utils.cleanup import remove_all
 13 | from googlesearch.models import SearchResultElement
 14 | from googlesearch.exceptions import CleanupError, InvalidParameter, ParsingError, RelatedSearchError, RequestError, ResultsError, GoogleSearchException
 15 | from googlesearch.constants import BASE_URL, CONSENT_VALUE, CLEANUP_TAGS, HEADERS
 16 | 
 17 | class Search():
 18 |     def __init__(self, query: str, language: str = "en", number_of_results: int = 10, retry_count: int = 3, parser: str = "html.parser") -> None:
 19 |         self.query = str(query)
 20 |         self.retry_count = int(retry_count)
 21 |         if self.retry_count < 1:
 22 |             raise InvalidParameter("'retry_count' cannot be less than 1")
 23 |         self.loaded = False
 24 |         
 25 |         # parameters
 26 |         self._query = quote(self.query, safe='')
 27 |         self._language = quote(language, safe='')
 28 |         if number_of_results < 1:
 29 |             raise InvalidParameter("'number_of_results' cannot be less than 1")
 30 |         self._requested_number_of_results = int(number_of_results)
 31 |         self._headers = HEADERS.copy()
 32 |         self._headers["User-Agent"] = random()
 33 |         self._parser = str(parser)
 34 |         
 35 |         # storing values, can be accessed without loading
 36 |         self._related_searches = []
 37 |         self._results = []
 38 | 
 39 |     def _check_loading_state(self):
 40 |         # def decorator(function):
 41 |         #     return function()
 42 |         if not self.loaded:
 43 |             for i in range(1, self.retry_count + 1):
 44 |                 try:
 45 |                     self.load()
 46 |                     break
 47 |                 except GoogleSearchException as e:
 48 |                     if i >= self.retry_count:
 49 |                         raise e
 50 |                     continue
 51 |             
 52 |         # return decorator
 53 | 
 54 |     def load(self):
 55 |         try:
 56 |             response = get(BASE_URL.format(query=self._query, language=self._language, number=str((int(self._requested_number_of_results) - 1 if self._requested_number_of_results >= 2 else 1))), headers=self._headers, cookies={"CONSENT": CONSENT_VALUE})
 57 |             if response.status_code >= 400:
 58 |                 raise RequestError("Google Returned Status Code: " + str(response.status_code))
 59 |         except:
 60 |             RequestError("An error occured while requesting for the webpage to parse")
 61 |         
 62 |         try:
 63 |             # parse the response
 64 |             website = BeautifulSoup(response.text, features=self._parser)
 65 | 
 66 |             # cleanup
 67 |             try:
 68 |                 for tag in CLEANUP_TAGS:
 69 |                     remove_all(website, tag)
 70 |             except Exception as e:
 71 |                 raise CleanupError("An error occured while cleaning up the retrieved webpage (error: {err})".format(err=str(e)))
 72 | 
 73 |             # retrieving
 74 |             try:
 75 |                 _related_history = []
 76 |                 for associate in website.find("div", {"id": "main"}).find_all("div", recursive=False)[-1].find("div").find_all("div"):
 77 |                     if len(associate.find_all("a")) >= 1:
 78 |                         _related_history.append(associate.text)
 79 |                 for element in set(_related_history):
 80 |                     self._related_searches.append(Search(element, parser=self._parser, retry_count=self.retry_count))
 81 |             except Exception as e:
 82 |                 if self._requested_number_of_results > 95:
 83 |                     extra_error = " This error might come from the high number of results asked for. "
 84 |                 else:
 85 |                     extra_error = ""
 86 |                 raise RelatedSearchError("An error occured while parsing the related searches.{extra}(error: {err})".format(extra=extra_error, err=str(e)))
 87 | 
 88 |             try:
 89 |                 for result in website.select("#main > div > div"):
 90 |                     try:
 91 |                         self._results.append(SearchResultElement(result))
 92 |                     except Exception:
 93 |                         continue
 94 |             except Exception as e:
 95 |                 raise ResultsError("An error occured while parsing the results (error: {err})").format(err=str(e))
 96 |             
 97 |             self.loaded = True
 98 |             self._response = str(website)
 99 |         except GoogleSearchException as e:
100 |             raise e
101 |         except Exception as e:
102 |             raise ParsingError("An error occured while parsing Google Search results (error: {err})").format(err=str(e))
103 | 
104 |     # properties declaration
105 | 
106 |     @property
107 |     def related_searches(self):
108 |         self._check_loading_state()
109 |         return self._related_searches
110 | 
111 |     @related_searches.setter
112 |     def related_searches(self, value):
113 |         self._related_searches = value
114 | 
115 |     @property
116 |     def results(self):
117 |         self._check_loading_state()
118 |         return self._results
119 | 
120 |     @results.setter
121 |     def results(self, value):
122 |         self._results = value
123 |     
124 |     # class functions
125 |     def __repr__(self) -> str:
126 |         return '<Search query="{query}" results={results_count}>'.format(query=self.query, results_count=(str(len(self.results)) + " elements" if self.loaded else "(Not loaded)"))
127 | 
128 |     def as_dict(self) -> str:
129 |         return {
130 |             "query": self.query,
131 |             "language": self._language,
132 |             "requestedNumberOfResults": self._requested_number_of_results,
133 |             "results": [elem.as_dict() for elem in self.results],
134 |             "relatedSearches": [elem.query for elem in self.related_searches]
135 |         }


--------------------------------------------------------------------------------
/googlesearch/models.py:
--------------------------------------------------------------------------------
 1 | """
 2 | File containing the different classes used in googlesearch
 3 | """
 4 | 
 5 | from urllib.parse import urlparse, parse_qsl
 6 | from bs4 import BeautifulSoup
 7 | from googlesearch.constants import BASE
 8 | 
 9 | class SearchResultElement():
10 |     def __init__(self, resultobj: BeautifulSoup) -> None:
11 |         # get the url
12 |         href = str(resultobj.find("a")["href"])
13 |         if href.startswith("/"):
14 |             href = BASE + href
15 |         self.url = str(dict(parse_qsl(urlparse(href).query))["q"])
16 | 
17 |         # get the title
18 |         self.title = str(resultobj.find("h3").text)
19 |         self.displayed_url = str(resultobj.select_one("div:nth-child(1) > a > div").text)
20 |         self.description = str(resultobj.find_all("div")[-1].text)
21 | 
22 |     def __repr__(self) -> str:
23 |         return '<SearchResult title="{title}" ({url})>'.format(title=self.title, url=self.displayed_url)
24 | 
25 |     def as_dict(self) -> str:
26 |         return {
27 |             "url": self.url,
28 |             "title": self.title,
29 |             "displayedURL": self.displayed_url,
30 |             "description": self.description
31 |         }


--------------------------------------------------------------------------------
/googlesearch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Animenosekai/googlesearch/346bcaf7b344c6cf7c8fdd63d9493648dad9a163/googlesearch/utils/__init__.py


--------------------------------------------------------------------------------
/googlesearch/utils/cleanup.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | 
3 | def remove_all(bsobject: BeautifulSoup, tag: str):
4 |     """
5 |     Removes all of the elements with the given tag recursively
6 |     """
7 |     for element in bsobject.find_all(tag):
8 |         element.decompose()
9 | 


--------------------------------------------------------------------------------
/playground/README.md:
--------------------------------------------------------------------------------
1 | # playground
2 | 
3 | This directory is used to test things, as it will not appear in the final module
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4
2 | requests
3 | pyuseragents
4 | inquirer


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from os import path
 3 | 
 4 | with open(path.join(path.abspath(path.dirname(__file__)), 'README.md'), encoding='utf-8') as f:
 5 |     readme_description = f.read()
 6 | 
 7 | setup(
 8 |     name ="python-googlesearch",
 9 |     packages = ["googlesearch"],
10 |     version = "1.1.1",
11 |     license = "MIT License",
12 |     description = "This module lets you use Google Searching capabilities right from your Python code",
13 |     author = "Anime no Sekai",
14 |     author_email = "niichannomail@gmail.com",
15 |     url = "https://github.com/Animenosekai/googlesearch",
16 |     download_url = "https://github.com/Animenosekai/googlesearch/archive/v1.1.1.tar.gz",
17 |     keywords = ['python', 'Anime no Sekai', "animenosekai", "googlesearch"],
18 |     install_requires = ['beautifulsoup4', 'requests', 'pyuseragents', 'inquirer'],
19 |     classifiers = ['Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9'],
20 |     long_description = readme_description,
21 |     long_description_content_type = "text/markdown",
22 |     include_package_data=True,
23 |     python_requires='>=3.2, <4',
24 |     entry_points={
25 |         'console_scripts': [
26 |             'googlesearch = googlesearch.__main__:main'
27 |         ]
28 |     },
29 |     package_data={
30 |         'googlesearch': ['LICENSE'],
31 |     },
32 | )


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # tests
2 | 
3 | Include CI tests for the Pytest GitHub workflow.
4 | 
5 | The files need to start with `test_`
6 | 
7 | Each test function in the files need to start with `test_`
8 | 


--------------------------------------------------------------------------------
/tests/test_search.py:
--------------------------------------------------------------------------------
 1 | import googlesearch
 2 | from googlesearch import Search
 3 | from googlesearch.models import SearchResultElement
 4 | 
 5 | def test_search():
 6 |     print("[test] --> Testing Search")
 7 |     python = Search("Python", retry_count=10)
 8 |     assert python.query == "Python"
 9 |     assert python._query == "Python"
10 |     assert Search("How is the weather in Tokyo?")._query == "How%20is%20the%20weather%20in%20Tokyo%3F"
11 |     assert python.loaded == False
12 |     assert isinstance(python.results, list)
13 |     assert isinstance(python.related_searches, list)
14 |     assert python.loaded == True
15 |     assert isinstance(python.results[0], SearchResultElement)
16 |     assert isinstance(python.related_searches[0], Search)
17 |     assert python.related_searches[0].loaded == False


--------------------------------------------------------------------------------