├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── heroku-deploy.yml
    │   ├── pip-upload.yml
    │   └── python-test.yml
├── .gitignore
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── bin
    └── websearch
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    └── test.py
└── websearch
    ├── __init__.py
    ├── __main__.py
    ├── extension.json
    └── script.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: gaetan1903
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Screenshots**
20 | If applicable, add screenshots to help explain your problem.
21 | 
22 | **Desktop (please complete the following information):**
23 |  - OS: [e.g. Linux, Windows]
24 |  - Python Version [e.g. 3.6, 3.9]
25 |  - Version [e.g. 22]
26 | 
27 | 
28 | **Additional context**
29 | Add any other context about the problem here.
30 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[Features]"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/heroku-deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy on Heroku [CD]
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - created
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v2
13 |       - uses: akhileshns/heroku-deploy@v3.12.12 
14 |         with:
15 |           heroku_api_key: ${{secrets.HEROKU_API_KEY}}
16 |           heroku_app_name: "websearch-python" 
17 |           heroku_email: "gaetan.s118@gmail.com"
18 |           usedocker: true
19 | 


--------------------------------------------------------------------------------
/.github/workflows/pip-upload.yml:
--------------------------------------------------------------------------------
 1 | name: CD Publish
 2 | on:
 3 |   release:
 4 |     types: # This configuration does not affect the page_build event above
 5 |       - created
 6 | jobs:
 7 |   publish:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
11 |       - uses: actions/checkout@v2
12 | 
13 |       # Sets up python
14 |       - uses: actions/setup-python@v2
15 |         with:
16 |           python-version: 3.8
17 | 
18 |       # Install dependencies
19 |       - name: "Installs dependencies"
20 |         run: |
21 |           python3 -m pip install --upgrade pip
22 |           python3 -m pip install setuptools wheel twine
23 | 
24 |       # Build and upload to PyPI
25 |       - name: "Builds and uploads to PyPI"
26 |         run: |
27 |           python3 setup.py sdist bdist_wheel
28 |           python3 -m twine upload dist/*
29 |         env:
30 |           TWINE_USERNAME: __token__
31 |           TWINE_PASSWORD: ${{ secrets.PIP_TOKEN }}
32 |       
33 | 


--------------------------------------------------------------------------------
/.github/workflows/python-test.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: CI Python Test
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Set up Python 3.8
20 |       uses: actions/setup-python@v2
21 |       with:
22 |         python-version: 3.8
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         pip install flake8
27 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 |     - name: Lint with flake8
29 |       run: |
30 |         # stop the build if there are Python syntax errors or undefined names
31 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
32 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
33 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
34 |     - name: Run UnitTest
35 |       run: |
36 |         python -m unittest -v
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | env/
132 | 
133 | .vscode/


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | **To contribute to this project**, make sure you touch the 3 files:
 2 | - the code file
 3 | - the test file
 4 | - the doc file (README)
 5 | 
 6 | The steps are therefore:
 7 | - Add your contribution
 8 | - Write the unit test following the model already present
 9 | - Update the README by specifying the new functionality
10 | - Create your REQUEST PULL
11 | 
12 | 
13 | Notes: Thos links can help for adding a new extension
14 | - `https://developer.mozilla.org/fr/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types`
15 | - `https://support.google.com/webmasters/answer/35287`
16 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9-buster
2 | 
3 | RUN pip install --no-cache-dir websearch-python
4 | 
5 | CMD ["sh", "-c", "websearch --host 0.0.0.0 --port $PORT"]
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | « Copyright © 2021, iTeam-$ Community
 2 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”),
 3 | to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
 4 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 5 | 
 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 7 | 
 8 | The Software is provided “as is”, without warranty of any kind, express or implied, including but not limited to the warranties of merchantability,
 9 | fitness for a particular purpose and noninfringement. In no event shall the authors or copyright holders X be liable for any claim, damages or other liability,
10 | whether in an action of contract, tort or otherwise, arising from, out of or in connection with the software or the use or other dealings in the Software.
11 | 
12 | Except as contained in this notice, the name of the iTeam-$ Community shall not be used in advertising or otherwise to promote the sale,
13 | use or other dealings in this Software without prior written authorization from the iTeam-$ Community Manager. »
14 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.json


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # WebSearch
  2 | 
  3 | 
  4 | 
  5 | > Python module allowing you to do various searches for links on the Web.
  6 | 
  7 | 
  8 | [![Python application](https://github.com/iTeam-S/WebSearch/actions/workflows/python-test.yml/badge.svg)](https://github.com/iTeam-S/WebSearch/actions/workflows/python-test.yml)
  9 | [![Publish](https://github.com/iTeam-S/WebSearch/actions/workflows/pip-upload.yml/badge.svg)](https://github.com/iTeam-S/WebSearch/actions/workflows/pip-upload.yml)
 10 | 
 11 | [![PyPI - Version](https://img.shields.io/pypi/v/websearch-python?style=for-the-badge)](https://pypi.org/project/websearch-python/)
 12 | [![PyPI - Downloads](https://img.shields.io/pypi/dm/websearch-python?label=DOWNLOADS&style=for-the-badge)](https://pypi.org/project/websearch-python/)
 13 | 
 14 | 
 15 | 
 16 | ## Installation
 17 | 
 18 | ```s
 19 | pip3 install websearch-python
 20 | ```
 21 | **OR** you can install dev version
 22 | ```s
 23 | pip3 install https://github.com/iTeam-S/WebSearch/archive/refs/heads/main.zip
 24 | ```
 25 | 
 26 | ## Use
 27 | 
 28 | ### Quick Start as Module
 29 | 
 30 | ```python
 31 | from websearch import WebSearch as web
 32 | for page in web('iTeam-$').pages[:2]:
 33 |    print(page)
 34 | ```
 35 | 
 36 | ```
 37 | [RESULTS]
 38 | 
 39 |  https://iteam-s.mg/
 40 |  https://github.com/iTeam-S
 41 | ```
 42 | 
 43 | 
 44 | ### Quick Start as Webserver
 45 | 
 46 | ```s
 47 | # run webserver 
 48 | websearch --host 0.0.0.0 --port 7845
 49 | ```
 50 | 
 51 | **OR**
 52 | 
 53 | ```s
 54 | # run webserver 
 55 | python -m websearch --host 0.0.0.0 --port 7845
 56 | ```
 57 | 
 58 | ```s
 59 | # requests contents
 60 | curl http://0.0.0.0:7845/pages/botoravony+arleme
 61 | ```
 62 | 
 63 |  ```json
 64 |  [
 65 |    "https://portfolio.iteam-s.mg/?id=2",
 66 |    "https://portfolio.iteam-s.mg/libs/cv/arleme.pdf",
 67 |    "https://madagascar.webcup.fr/team-webcup/iteams"
 68 |  ]
 69 | ```
 70 | 
 71 | ### Use Deployed Version
 72 | ```s
 73 | curl https://websearch-python.herokuapp.com/pages/botoravony+arleme
 74 | ```
 75 | 
 76 | __________________________
 77 | 
 78 | <details>
 79 |    <summary style='font-size:24'>  FULL DOCUMENTATION </summary>
 80 | 
 81 | ### Initialization
 82 | 
 83 | ```python
 84 | from websearch import WebSearch
 85 | web = WebSearch('Gaetan Jonathan BAKARY')
 86 | ```
 87 | You can pass a `list` for mutliple keyword.
 88 | 
 89 | ```python
 90 | web = WebSearch(['Gaetan Jonathan BAKARY', 'iTeam-S'])
 91 | ```
 92 | You can also specify a `website` as a reference.
 93 | 
 94 | ```python
 95 | web = WebSearch('Gaetan Jonathan', site='iteam-s.mg')
 96 | ```
 97 | 
 98 | 
 99 | ### Webpages results
100 | 
101 | ```python
102 | from websearch import WebSearch
103 | web = WebSearch('Gaetan Jonathan BAKARY')
104 | webpages = web.pages
105 | for wp in webpages[:5]:
106 |    print(wp)
107 | ```
108 | 
109 | ```
110 | [RESULTS]
111 | 
112 |    https://mg.linkedin.com/in/gaetanj
113 |    https://portfolio.iteam-s.mg/?u=gaetan
114 |    https://github.com/gaetan1903
115 |    https://medium.com/@gaetan1903
116 |    https://gitlab.com/gaetan1903
117 | ```
118 | 
119 | 
120 | ### Images results
121 | 
122 | ```python
123 | from websearch import WebSearch
124 | web = WebSearch('Gaetan Jonathan BAKARY')
125 | webimages = web.images
126 | for im in webimages[:5]:
127 |    print(im)
128 | ```
129 | 
130 | ```
131 | [RESULTS]
132 | 
133 |    https://tse3.mm.bing.net/th?id=OIP.-K25y8TqkOi9UG_40Ti8bgAAAA
134 |    https://tse1.mm.bing.net/th?id=OIP.yJPVcDx6znFSOewLdQBbHgHaJA
135 |    https://tse3.mm.bing.net/th?id=OIP.7rO2T_nDAS0bXm4tQ4LKQAHaJA
136 |    https://tse2.mm.bing.net/th?id=OIP.IUIEkGQVzYRKaDA7WeeV7QHaEF
137 |    https://tse3.explicit.bing.net/th?id=OIP.OmvVnMIVu2ZdNZHZzJK_hgAAAA
138 | ```
139 | 
140 | 
141 | ### PDF results
142 | 
143 | ```python
144 | from websearch import WebSearch
145 | web = WebSearch('Math 220')
146 | pdfs = web.pdf
147 | for pdf in pdfs[:5]:
148 |    print(pdf)
149 | ```
150 | 
151 | ```
152 | [RESULTS]
153 | 
154 |    https://www.coconino.edu/resources/files/pdfs/registration/curriculum/course-outlines/m/mat/mat_220.pdf
155 |    https://www.jmu.edu/mathstat/Files/ALEKSmatrix.pdf
156 |    https://www.jjc.edu/sites/default/files/Academics/Math/M220%20Master%20Syllabus%20SP18.pdf
157 |    https://www.sonoma.edu/sites/www/files/2018-19cat-11math.pdf
158 |    https://www.svsd.net/cms/lib5/PA01001234/Centricity/Domain/1009/3.3-3.3B-Practice-KEY.pdf
159 | ```
160 | 
161 | To prevent the search for attachments with format verification, set `verif=False`, which is `True` by default.
162 | 
163 | Format verification is presented [here](https://github.com/iTeam-S/WebSearch/pull/4)
164 | 
165 | ```python
166 | from websearch import WebSearch
167 | web = WebSearch('Math 220', verif=False)
168 | ```
169 | 
170 | 
171 | ### DOCX results
172 | ```python
173 | from websearch import WebSearch:
174 | web = WebSearch('python')
175 | words = web.docx
176 | for word in words[:3]:
177 |    print(word)
178 | ```
179 | 
180 | ```
181 | [RESULTS]
182 | 
183 |    https://www.ocr.org.uk/Images/572953-j277-programming-techniques-python.docx
184 |    https://www.niu.edu/brown/_pdf/physics374_spring2021/l1-19-21.docx
185 |    https://ent2d.ac-bordeaux.fr/disciplines/mathematiques/wp-content/uploads/sites/3/2017/09/de-Scratch-%C3%A0-Python.docx
186 | ```
187 | 
188 | 
189 | ### XLSX results
190 | ```python
191 | from websearch import WebSearch:
192 | web = WebSearch('datalist')
193 | excels = web.xlsx
194 | for excel in excels[:3]:
195 |    print(excel)
196 | ```
197 | 
198 | ```
199 | [RESULTS]
200 | 
201 |    https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/979255/Detailed_Single_Data_List_-_2021-2022.xlsx
202 |    https://www.jaist.ac.jp/top/data/list-achievement-research-e.xlsx
203 |    https://img1.wsimg.com/blobby/go/bed8f8d7-d6c2-488d-9aa3-5910e18aa8d2/downloads/Datalist.xlsx
204 | ```
205 | 
206 | 
207 | ### PPTX results
208 | ```python
209 | from websearch import WebSearch:
210 | web = WebSearch('Leadership')
211 | powerpoints = web.pptx
212 | for powerpoint in powerpoints[:3]:
213 |    print(powerpoint)
214 | ```
215 | 
216 | ```
217 | [RESULTS]
218 | 
219 |    https://www.plainviewisd.org/cms/lib6/TX01918200/Centricity/Domain/853/Leadership%20Behav.%20Styles.pptx
220 |    https://www.yorksandhumberdeanery.nhs.uk/sites/default/files/leadership_activity_and_msf.pptx
221 |    https://www.itfglobal.org/sites/default/files/node/resources/files/Stage%203.1%20Powerpoint.pptx
222 | ```
223 | 
224 | 
225 | ### ODT results
226 | ```python
227 | from websearch import WebSearch
228 | web = WebSearch('Finance')
229 | documents = web.odt
230 | for doc in documents[:2]:
231 |    print(doc)
232 | ```
233 | 
234 | ```
235 | [RESULTS]
236 |    https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/970748/Green_Finance_Report.odt
237 |    https://iati.fcdo.gov.uk/iati_documents/3678707.odt
238 |   
239 | ```
240 | 
241 | ### ODS results
242 | ```python
243 | from websearch import WebSearch
244 | web = WebSearch('Commerce')
245 | documents = web.ods
246 | for doc in documents[:2]:
247 |    print(doc)
248 | ```
249 | 
250 | ```
251 | [RESULTS]
252 | http://www.justice.gouv.fr/art_pix/Stat_RSJ_12.7_Civil_Les_tribunaux_de_commerce.ods
253 | https://www.insee.fr/fr/metadonnees/source/fichier/Precision-principaux-indicateurs-crise-sanitaire-2020.ods
254 | ```
255 | 
256 | ### ODP results
257 | ```python
258 | from websearch import WebSearch
259 | web = WebSearch('Renaissance')
260 | documents = web.odp
261 | for doc in documents[:2]:
262 |    print(doc)
263 | ```
264 | 
265 | ```
266 | [RESULTS]
267 | http://ekladata.com/9sHTcbLYfwbNGKU9cpnZXjlsbfA/17-Art-Renaissance.odp
268 | https://www.college-yvescoppens-malestroit.ac-rennes.fr/sites/college-yvescoppens-malestroit.ac-rennes.fr/IMG/odp/diapo-presentation-voyage-5e.odp
269 | ```
270 | 
271 | ### KML results
272 | ```python
273 | from websearch import WebSearch
274 | web = WebSearch('Madagascar')
275 | maps = web.kml
276 | for map in maps[:3]:
277 |    print(map)
278 | ```
279 | 
280 | ```
281 | [RESULTS]
282 | http://www.hydrosciences.fr/sierem/kmz_files/MGPLGRA.kml
283 | https://www.ngoaidmap.org/downloads?doc=kml&name=association-intercooperation-madagascar-aim_projects&partners%5B%5D=6160&sectors%5B%5D=1&status=active
284 | https://ngoaidmap.org/downloads?doc=kml&name=nemp-madagascar-cyclone-enawo-response_projects&projects%5B%5D=20655&status=active
285 | ```
286 | 
287 | ### CUSTOM results
288 | 
289 | For other extensions, not present, use the `custom` function
290 | 
291 | Second arg can be taken [here](https://developer.mozilla.org/fr/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types)
292 | 
293 | ```python
294 | from websearch import WebSearch
295 | web = WebSearch('Biologie')
296 | ps_documents = web.custom('ps', 'application/postscript')
297 | for doc in ps_documents[:3]:
298 |    print(doc)
299 | ```
300 | 
301 | ```
302 | [RESULTS]
303 | 
304 | http://irma.math.unistra.fr/~fbertran/Master1_2020_2/L3Court.ps
305 | http://jfla.inria.fr/2002/actes/10-michel.ps
306 | https://www.crstra.dz/telechargement/pnr/ps/environnement/fadel-djamel.ps
307 | ```
308 | 
309 | 
310 | ### Webserver
311 | 
312 | you can deploy as webserver and send an http request
313 | 
314 | ```s
315 |    python -m websearch --host [host] --port [port]
316 |       [*] default host : 0.0.0.0
317 |       [*] default port : 7845 
318 | ```
319 | Exemple for page:
320 |    ```s
321 |    curl http://<host>:<port>/pages/botoravony+arleme
322 | 
323 |    
324 |    [
325 | 
326 |       "https://portfolio.iteam-s.mg/?id=2",
327 |       "https://portfolio.iteam-s.mg/libs/cv/arleme.pdf",
328 |       "https://madagascar.webcup.fr/team-webcup/iteams"
329 |    ]
330 | ```
331 | 
332 | Exemple for image:
333 | ```s
334 |    curl http://<host>:<port>/images/one+piece
335 | 
336 | 
337 |    [
338 |       "https://tse1.mm.bing.net/th?id=OIP.GlNk7idD3RCI_SYLiVzSBAHaE7",
339 |       "https://tse2.mm.bing.net/th?id=OIP.uePUN5rwpB-7wicu1uxQcgHaFj",
340 |       "https://tse2.mm.bing.net/th?id=OIP.dwWBU-A_6KPvvEYsL2nhVgHaFc",
341 |       "https://tse1.mm.bing.net/th?id=OIP.5M8tKIhIWvbqGO1prhUGfAHaJ4",
342 |       .....
343 |       "https://tse4.mm.bing.net/th?id=OIP.uvp3efwHRLDJnUWZ5KLWCwHaE8",
344 |       "https://tse3.mm.bing.net/th?id=OIP.d_uUoc-8R13RZ1bb76yhZgHaKp",
345 |       "https://tse1.mm.bing.net/th?id=OIP.cBWDvspBM036p6h4DS6RTAHaFj"
346 |    }
347 | ```
348 | 
349 | Search by extension : `curl http://<host>:<port>/<extension>/<query>`
350 | 
351 | Where extension is from this list: 
352 | 
353 | ```
354 | swf, pdf, ps, dwf, kml, kmz, gpx, hwp, htm, html, xls, xlsx,
355 | ppt, pptx, doc, docx, odp, ods, odt, rtf, svg, tex, txt, text,
356 | bas, c, cc, cpp, cxx, h, hpp, cs, java, pl, py, wml, wap, xml
357 | ```
358 | 
359 | Exemple : 
360 | ```s
361 |    curl http://<host>:<port>/kml/madagascar+antananarivo
362 | 
363 | 
364 |    [
365 |       "https://ifl.francophonelibre.org/atelier/ActionOSMMG2019/wms/kml?layers=ActionOSMMG2019:MG_Antananarivo_pharmacy_point_OSM_20190427"
366 |    ]
367 | ```
368 | 
369 | You can use the parameter `limit` to limit results
370 | ```
371 |    curl http://<host>:<port>/images/one+piece?limit=4
372 | 
373 | 
374 |    [
375 |       "https://tse1.mm.bing.net/th?id=OIP.GlNk7idD3RCI_SYLiVzSBAHaE7",
376 |       "https://tse2.mm.bing.net/th?id=OIP.uePUN5rwpB-7wicu1uxQcgHaFj",
377 |       "https://tse2.mm.bing.net/th?id=OIP.dwWBU-A_6KPvvEYsL2nhVgHaFc",
378 |       "https://tse1.mm.bing.net/th?id=OIP.5M8tKIhIWvbqGO1prhUGfAHaJ4"
379 |    ]
380 | 
381 | ```
382 | ##### Note: `site` and `verif` parameter in module can be given in url parameter
383 | `curl http://<host>:<port>/pdf/statut?verif=false&site=iteam-s.mg`
384 | 
385 | 
386 |  </details>
387 | 
388 |    
389 | _____________________________________________________________________  
390 |    
391 |    
392 | ## Show your support
393 |  Give a star 🌟 if this project helped you!
394 |    
395 |  [!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://www.buymeacoffee.com/gaetan1903) 
396 |    
397 |  
398 | ## License
399 | 
400 | MIT License
401 | 
402 | Copyright (c) 2021 [iTeam-$](https://iteam-s.mg)
403 | 
404 | 
405 | ___________________________________________________________________
406 |    
407 |  ## Contributors
408 | ![contributors GitHub](https://contrib.rocks/image?repo=iTeam-S/WebSearch)
409 | 
410 | 


--------------------------------------------------------------------------------
/bin/websearch:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | from http.client import error
 5 | from websearch import WebSearch
 6 | from gevent.pywsgi import WSGIServer
 7 | from flask import Flask, redirect, request, jsonify
 8 | 
 9 | 
10 | webserver = Flask(__name__)
11 | 
12 | 
13 | @webserver.errorhandler(404)
14 | def page_not_found(e):
15 |     return """
16 |         Can't find what you want.
17 |         Please change the query or the extensions
18 |     """, 404
19 | 
20 | 
21 | @webserver.route('/v1/<string:ext>/<string:query>')
22 | def old_route(ext, query):
23 |     return redirect(f'/{ext}/{query}', code=301)
24 | 
25 | 
26 | @webserver.route('/<string:ext>/<string:query>')
27 | def websearch(ext, query):
28 |     limit = request.args.get('limit', '')
29 |     if limit and limit.isdigit():
30 |         limit = int(limit)
31 |     else:
32 |         limit = 100
33 |     try:
34 |         query = query.replace('+', ' ')
35 |         web = WebSearch(query, **dict(request.args))
36 |         if ext == 'pages':
37 |             res = WebSearch(query).pages
38 |         elif ext == 'images':
39 |             res = WebSearch(query).images
40 |         else:
41 |             res = web.custom(extension=ext)
42 |     except error as e:
43 |         print(e)
44 |         return "Error 500, Something Wrong", 500
45 | 
46 |     return jsonify(res[:limit]) \
47 |         if res and type(res) == list else redirect('/404')
48 | 
49 | 
50 | parser = argparse.ArgumentParser(
51 |     description='Webserver version for websearch-python'
52 | )
53 | parser.add_argument(
54 |     '--host', help='HOST for server, default: 0.0.0.0', default='0.0.0.0'
55 | )
56 | parser.add_argument(
57 |     '--port', type=int, help='PORT for server. default 7845', default=7845
58 | )
59 | args = parser.parse_args()
60 | 
61 | print(f'''
62 | _    _   _____   _____   _____   _____    ___    _____   ____    _   _
63 | | |  | | |  ___| | ___ \ /  ___| |  ___|  / _ \  | ___ \ /  __ \ | | | |
64 | | |  | | | |__   | |_/ / \ `--.  | |__   / /_\ \ | |_/ / | /  \/ | |_| |
65 | | |/\| | |  __|  | ___ \  `--. \ |  __|  |  _  | |    /  | |     |  _  |
66 | \  /\  / | |___  | |_/ / /\__/ / | |___  | | | | | |\ \  | \__/\ | | | |
67 | \/  \/  \____/  \____/  \____/  \____/  \_| |_/ \_| \_|  \____/ \_| |_/
68 | 
69 | Server listening on {args.host}:{args.port}
70 | ''')  # noqa: W605
71 | 
72 | SERVER = WSGIServer((args.host, args.port), webserver)
73 | SERVER.serve_forever()
74 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | beautifulsoup4
3 | flask
4 | gevent


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = WebSearch
 3 | version = 1.2.2
 4 | author = iTeam-$
 5 | author_email = contact@iteam-s.xyz
 6 | description = Python module allowing you to do various searches for links on the Web.
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/iTeam-S/WebSearch
10 | project_urls =
11 |     Bug Tracker = https://github.com/iTeam-S/WebSearch/issues
12 | classifiers =
13 |     Programming Language :: Python :: 3
14 |     License :: OSI Approved :: MIT License
15 |     Operating System :: OS Independent
16 | 
17 | [options]
18 | 
19 | packages = find:
20 | python_requires = >=3.6
21 | 
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="websearch-python",  # This is the name of the package
 8 |     version="1.2.2",  # The initial release version
 9 |     author="iTeam-$",  # Full name of the author
10 |     description="Module allowing you to do various searches for links on the Web",
11 |     long_description=long_description,  # Long description read from the readme
12 |     long_description_content_type="text/markdown",
13 |     packages=setuptools.find_packages(),  # List of all modules to be installed
14 |     classifiers=[
15 |         "Programming Language :: Python :: 3",
16 |         "License :: OSI Approved :: MIT License",
17 |         "Operating System :: OS Independent",
18 |     ],  # Information to filter the project on PyPi website
19 |     python_requires=">=3.6",
20 |     py_modules=["websearch"],  # Name of the python package
21 |     install_requires=["BeautifulSoup4", "requests", "gevent", "flask"],  # depandance
22 |     include_package_data=True,  # Include all data file with the package
23 |     package_data={"": ["*.json"]},
24 |     scripts=["bin/websearch"],
25 | )
26 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iTeam-S/WebSearch/1f703455e39df384e303bbb7d93bb53951e3c51c/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import unittest
  4 | 
  5 | import websearch
  6 | 
  7 | sys.path.insert(0, os.path.dirname('/'.join(__file__.split('/')[:-1])))
  8 | 
  9 | 
 10 | class TestCaseModule(unittest.TestCase):
 11 | 
 12 |     def test1_pages(self):
 13 |         pages = websearch.WebSearch('iTeam-$').pages[:5]
 14 |         # Verification de nombres de resultats
 15 |         self.assertTrue(len(pages))
 16 |         # verification lien
 17 |         for page in pages:
 18 |             self.assertTrue(page.startswith('http'))
 19 | 
 20 |     def test2_images(self):
 21 |         images = websearch.WebSearch('Madagascar').images[:5]
 22 |         # Verification de nombres de resultats
 23 |         self.assertTrue(len(images))
 24 |         # verification lien
 25 |         for image in images:
 26 |             self.assertTrue(image.startswith('http'))
 27 | 
 28 |     def test3_pdf(self):
 29 |         pdfs = websearch.WebSearch('Math 220').pdf[:2]
 30 |         # Verification de nombres de resultats
 31 |         self.assertTrue(len(pdfs))
 32 |         # verification lien
 33 |         for pdf in pdfs:
 34 |             self.assertTrue(pdf.startswith('http'))
 35 | 
 36 |     def test4_word(self):
 37 |         words = websearch.WebSearch('python').docx[:3]
 38 |         # Verification de nombres de resultats
 39 |         self.assertTrue(len(words))
 40 |         # Verification lien
 41 |         for word in words:
 42 |             self.assertTrue(word.startswith('http'))
 43 | 
 44 |     def test5_excel(self):
 45 |         excels = websearch.WebSearch('datalist').xlsx[:3]
 46 |         # Verification de nombre de résultats
 47 |         self.assertTrue(len(excels))
 48 |         # Verification lien
 49 |         for excel in excels:
 50 |             self.assertTrue(excel.startswith('http'))
 51 | 
 52 |     def test6_powerpoint(self):
 53 |         powerpoints = websearch.WebSearch('Communication').pptx[:3]
 54 |         # Verification de nombre de résultats
 55 |         self.assertTrue(len(powerpoints))
 56 |         # Verification lien
 57 |         for powerpoint in powerpoints:
 58 |             self.assertTrue(powerpoint.startswith('http'))
 59 | 
 60 |     def test7_odt(self):
 61 |         documents = websearch.WebSearch('Finance').odt[:3]
 62 |         # Verification de nombre de résultats
 63 |         self.assertTrue(len(documents))
 64 |         # Verification lien
 65 |         for doc in documents:
 66 |             self.assertTrue(doc.startswith('http'))
 67 | 
 68 |     def test8_ods(self):
 69 |         documents = websearch.WebSearch('Commerce').ods[:1]
 70 |         # Verification de nombre de résultats
 71 |         self.assertTrue(len(documents))
 72 |         # Verification lien
 73 |         for doc in documents:
 74 |             self.assertTrue(doc.startswith('http'))
 75 | 
 76 |     def test9_kml(self):
 77 |         maps = websearch.WebSearch('Madagascar').kml[:1]
 78 |         # Verification de nombre de résultats
 79 |         self.assertTrue(len(maps))
 80 |         # Verification lien
 81 |         for map in maps:
 82 |             self.assertTrue(map.startswith('http'))
 83 | 
 84 |     def test10_custom(self):
 85 |         web = websearch.WebSearch('Biologie')
 86 |         documents = web.custom('ps', 'application/postscript')[:1]
 87 |         # Verification de nombre de résultats
 88 |         self.assertTrue(len(documents))
 89 |         # Verification lien
 90 |         for doc in documents:
 91 |             self.assertTrue(doc.startswith('http'))
 92 | 
 93 |     def test11_odp(self):
 94 |         documents = websearch.WebSearch('Renaissance').odp[:1]
 95 |         # Verification de nombre de résultats
 96 |         self.assertTrue(len(documents))
 97 |         # Verification lien
 98 |         for doc in documents:
 99 |             self.assertTrue(doc.startswith('http'))
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     runner = unittest.TestCase()
104 |     runner.run()
105 | 


--------------------------------------------------------------------------------
/websearch/__init__.py:
--------------------------------------------------------------------------------
1 | from . import __main__
2 | from .script import WebSearch
3 | 
4 | __version__ = "1.2.2"
5 | __author__ = "iTeam-$"
6 | __license__ = "MIT"
7 | __all__ = ["WebSearch", "__main__"]
8 | 


--------------------------------------------------------------------------------
/websearch/__main__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | if __name__ == '__main__':
5 |     os.system(f"websearch {' '.join(sys.argv[1:])}")


--------------------------------------------------------------------------------
/websearch/extension.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "swf": "application/x-shockwave-flash",
 3 |     "pdf": "application/pdf",
 4 |     "ps": "application/postscript",
 5 |     "dwf": "application/dwf",
 6 |     "kml": "application/vnd.google-earth.kml+xml",
 7 |     "kmz": "application/vnd.google-earth.kmz",
 8 |     "gpx": "application/gpx+xml",
 9 |     "hwp": "application/x-hwp",
10 |     "htm": "text/html",
11 |     "html": "text/html",
12 |     "xls": "application/vnd.ms-excel",
13 |     "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
14 |     "ppt": "application/vnd.ms-powerpoint",
15 |     "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
16 |     "doc": "application/msword",
17 |     "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
18 |     "odp": "application/vnd.oasis.opendocument.presentation",
19 |     "ods": "application/vnd.oasis.opendocument.spreadsheet",
20 |     "odt": "application/vnd.oasis.opendocument.text",
21 |     "rtf": "application/rtf",
22 |     "svg": "image/svg+xml",
23 |     "tex": "application/x-tex",
24 |     "txt": "text/plain",
25 |     "text": "text/plain",
26 |     "bas": "text/plain",
27 |     "c": "text/plain",
28 |     "cc": "text/plain",
29 |     "cpp": "text/plain",
30 |     "cxx": "text/plain",
31 |     "h": "text/plain",
32 |     "hpp": "text/plain",
33 |     "cs": "text/plain",
34 |     "java": "text/plain",
35 |     "pl": "text/plain",
36 |     "py": "text/plain",
37 |     "wml": "text/vnd.wap.wml",
38 |     "wap": "image/wap",
39 |     "xml": "application/xml"
40 | }


--------------------------------------------------------------------------------
/websearch/script.py:
--------------------------------------------------------------------------------
  1 | import urllib.parse
  2 | from bs4 import BeautifulSoup
  3 | from requests import get, head
  4 | 
  5 | import os
  6 | import json
  7 | 
  8 | __location__ = os.path.dirname(os.path.abspath(__file__))
  9 | 
 10 | 
 11 | class WebSearch:
 12 |     """
 13 |     Module permettant de prendre les différents lien sur le web.
 14 |         * query: prend l'expression à rechercher.
 15 |         * verif: si True, lance une requete à l'url pour valider
 16 |             le bon format du résultat, pardefaut à True.
 17 |             peut être desactiver en mettant `verif=False` en argument.
 18 |         * site: pour preciser un site precis comme source
 19 |     """
 20 | 
 21 |     _headers = {"User-Agent": "Googlebot/2.1 (http://www.googlebot.com/bot.html)"}
 22 | 
 23 |     def __init__(self, query, **kwargs):
 24 |         # verifier si la recherche est de type mutliple.
 25 |         if isinstance(query, list):
 26 |             self.query = "'"
 27 |             self.query += "' OR '".join(query)
 28 |             self.query += "'"
 29 |         else:
 30 |             self.query = query
 31 | 
 32 |         # verification du presence du site
 33 |         if kwargs.get("site"):
 34 |             self.query = f"site:{kwargs.get('site')} {self.query}"
 35 | 
 36 |         # Utiliser pour la verification des liens.
 37 |         self.verif = kwargs.get("verif", True)
 38 |         # utiliser pour l'optimisation
 39 |         self.__data = {}
 40 | 
 41 |     def __verif_content(self, urls, mimetype):
 42 |         """
 43 |         Verification du bon format du lien
 44 |         argument `ext` peut être consulté ici:
 45 |         https://developer.mozilla.org/fr/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
 46 |         """
 47 |         if not self.verif:
 48 |             # si Faux pas de verification
 49 |             # reenvoie directement la liste données
 50 |             return urls
 51 | 
 52 |         new_urls = []
 53 |         for url in urls:
 54 |             # Envoie d'une requete qui recupere que l'en tête.
 55 |             try:
 56 |                 rq = head(url).headers
 57 |             except Exception as err:
 58 |                 print(err)
 59 |                 continue
 60 |             # Verfier si le lien renvoie bien le format voulu.
 61 |             if rq.get("content-type") == mimetype:
 62 |                 new_urls.append(url)
 63 |         # renvoyer les urls verfiés.
 64 |         return new_urls
 65 | 
 66 |     @property
 67 |     def images(self):
 68 |         """
 69 |         Une fonction qui récupère toutes les liens des images
 70 |         resultats selon les mot-clé en paramètre.
 71 |         """
 72 |         #  On verifie que les resultats n'est pas deja enregistrer.
 73 |         if self.__data.get("images"):
 74 |             if self.__data["images"][0] == self.query:
 75 |                 return self.__data["images"][1]
 76 | 
 77 |         result = []
 78 |         url = (
 79 |             "https://fr.images.search.yahoo.com/search/images;_ylt=AwrJS5dMFghcBh4AgWpjAQx.;\
 80 |         _ylu=X3oDMTE0aDRlcHI2BGNvbG8DaXIyBHBvcwMxBHZ0aWQDQjY1NjlfMQRzZWMDcGl2cw--?p="
 81 |             + urllib.parse.quote(self.query)
 82 |             + "&fr2=piv-web&fr=yfp-t-905-s"
 83 |         )
 84 | 
 85 |         requete = get(url, headers=self._headers, timeout=10)
 86 |         soup = BeautifulSoup(requete.text, "html.parser")
 87 |         container = soup.find("ul", {"id": "sres"})
 88 |         try:
 89 |             lis = container.find_all("li")
 90 |         except Exception as e:
 91 |             print(e)
 92 |             return result
 93 | 
 94 |         if len(lis) == 0:
 95 |             return result
 96 | 
 97 |         for li in lis:
 98 |             try:
 99 |                 img = li.find("img")
100 |                 img = str(img["data-src"]).split("&pid")
101 |                 result.append(str(img[0]))
102 | 
103 |             except Exception as e:
104 |                 print(e)
105 |                 continue
106 | 
107 |         #  Sauvegarde des resultats pour optimiser la prochaine même appel.
108 |         self.__data["images"] = (self.query, result)
109 |         return result
110 | 
111 |     @property
112 |     def pages(self):
113 |         """
114 |         Une fonction qui récupère toutes les liens des
115 |         resultats selon les mot-clé en paramètre.
116 |         """
117 |         #  On verifie que les resultats n'est pas deja enregistrer.
118 |         if self.__data.get("pages"):
119 |             if self.__data["pages"][0] == self.query:
120 |                 return self.__data["pages"][1]
121 |         result = []
122 | 
123 |         url = (
124 |             "https://www.google.com/search?client=firefox-b-d&q="
125 |             + urllib.parse.quote(self.query)
126 |         )
127 | 
128 |         requete = get(url, headers=self._headers, timeout=10)
129 |         soup = BeautifulSoup(requete.text, "html.parser")
130 |         a = soup.find_all("a")
131 |         for link in a:
132 |             tmp = link["href"][7:-1].split("&")
133 |             if tmp[0].startswith("http"):
134 |                 result.append(urllib.parse.unquote(tmp[0]))
135 |         #  Sauvegarde des resultats pour optimiser la prochaine même appel.
136 |         self.__data["pages"] = (self.query, result)
137 |         """ On enleve les deux liens non necessaire à la fin du liste
138 |             -> https://support.google.com/websearch?p=...
139 |             -> https://accounts.google.com/ServiceLogin?continue=...
140 |         """
141 |         return result[:-2]
142 | 
143 |     @property
144 |     def pdf(self):
145 |         """
146 |         Fonction pour recuperer que les pdf.
147 |         """
148 |         return self.custom("pdf", "application/pdf")
149 | 
150 |     @property
151 |     def docx(self):
152 |         """
153 |         Fonction pour récupérer les documents word.
154 |         """
155 |         # vérifier si les résultats ne sont pas déjà enregistrer
156 |         if self.__data.get("docx"):
157 |             if self.__data["docx"][0] == self.query:
158 |                 return self.__data["docx"][1]
159 |         tmp = self.query
160 |         self.query = "filetype:docx " + self.query
161 |         result = self.__verif_content(
162 |             self.pages,
163 |             "application/vnd.openxmlformats-officedocument"
164 |             ".wordprocessingml.document",
165 |         )
166 | 
167 |         self.query = tmp
168 |         #  Sauvegarde des resultats pour optimiser la prochaine même appel.
169 |         self.__data["docx"] = (self.query, result)
170 |         return result
171 | 
172 |     @property
173 |     def xlsx(self):
174 |         """
175 |         Fonction pour récupérer les excels
176 |         """
177 |         # vérifier si les résultat ne sont pas déjà enregistrer
178 |         if self.__data.get("xlsx"):
179 |             if self.__data["xlsx"][0] == self.query:
180 |                 return self.__data["xlsx"][1]
181 |         tmp = self.query
182 |         self.query = "filetype:xlsx " + self.query
183 |         result = self.__verif_content(
184 |             self.pages,
185 |             "application/vnd.openxmlformats-officedocument" ".spreadsheetml.sheet",
186 |         )
187 |         self.query = tmp
188 | 
189 |         #  Sauvegarde des resultats pour optimiser la prochaine même appel.
190 |         self.__data["xlsx"] = (self.query, result)
191 |         return result
192 | 
193 |     @property
194 |     def pptx(self):
195 |         """Fonction pour récupérer les excels"""
196 |         # Vérifier si les résultat ne sont pas déjà enregistrer
197 |         if self.__data.get("pptx"):
198 |             if self.__data["pptx"][0] == self.query:
199 |                 return self.__data["pptx"][1]
200 |         tmp = self.query
201 |         self.query = "filetype:pptx " + self.query
202 | 
203 |         result = self.__verif_content(
204 |             self.pages,
205 |             "application/vnd.openxmlformats-officedocument"
206 |             ".presentationml.presentation",
207 |         )
208 |         self.query = tmp
209 | 
210 |         # Sauvegarde des resultats pour optimiser la prochaine même appel.
211 |         self.__data["pptx"] = (self.query, result)
212 |         return result
213 | 
214 |     @property
215 |     def odt(self):
216 |         """
217 |         Fonction pour recuperer que les documents odt.
218 |         """
219 |         # On vérifie que les résultats ne sont pas déjà enregistrés.
220 |         if self.__data.get("odt"):
221 |             if self.__data["odt"][0] == self.query:
222 |                 return self.__data["odt"][1]
223 |         tmp = self.query
224 |         self.query = "filetype:odt " + self.query
225 | 
226 |         result = self.__verif_content(
227 |             self.pages, "application/vnd.oasis.opendocument.text"
228 |         )
229 |         self.query = tmp
230 | 
231 |         # Sauvegarde des resultats pour optimiser la prochaine même appel.
232 |         self.__data["odt"] = (self.query, result)
233 |         return result
234 | 
235 |     @property
236 |     def ods(self):
237 |         """
238 |         Fonction pour recuperer que les documents ods.
239 |         """
240 |         # On vérifie que les résultats ne sont pas déjà enregistrés.
241 |         if self.__data.get("ods"):
242 |             if self.__data["ods"][0] == self.query:
243 |                 return self.__data["ods"][1]
244 |         tmp = self.query
245 |         self.query = "filetype:ods " + self.query
246 |         result = self.__verif_content(
247 |             self.pages, "application/vnd.oasis.opendocument.spreadsheet"
248 |         )
249 |         self.query = tmp
250 |         # Sauvegarde des resultats pour optimiser la prochaine même appel.
251 |         self.__data["ods"] = (self.query, result)
252 |         return result
253 | 
254 |     @property
255 |     def odp(self):
256 |         """
257 |         Fonction pour recuperer que les documents odp.
258 |         """
259 |         # On vérifie que les résultats ne sont pas déjà enregistrés.
260 |         if self.__data.get("odp"):
261 |             if self.__data["odp"][0] == self.query:
262 |                 return self.__data["odp"][1]
263 |         tmp = self.query
264 |         self.query = "filetype:odp " + self.query
265 |         result = self.__verif_content(
266 |             self.pages, "application/vnd.oasis.opendocument.presentation"
267 |         )
268 |         self.query = tmp
269 |         # Sauvegarde des resultats pour optimiser la prochaine même appel.
270 |         self.__data["odp"] = (self.query, result)
271 |         return result
272 | 
273 |     @property
274 |     def kml(self):
275 |         """
276 |         Fonction pour recuperer des fichiers de projets géographiques
277 |         pour google earth sous la format kml
278 |         """
279 |         # On vérifie que les résultats ne sont pas déjà enregistrés.
280 |         if self.__data.get("kml"):
281 |             if self.__data["kml"][0] == self.query:
282 |                 return self.__data["kml"][1]
283 |         tmp = self.query
284 |         self.query = "filetype:kml " + self.query
285 |         result = self.__verif_content(
286 |             self.pages, "application/vnd.google-earth.kml+xml"
287 |         )
288 |         self.query = tmp
289 | 
290 |         # Sauvegarde des resultats pour optimiser la prochaine même appel.
291 |         self.__data["kml"] = (self.query, result)
292 |         return result
293 | 
294 |     def custom(self, extension="pdf", mimetype=None):
295 |         """
296 |         Fonction pour recuperer des fichiers en fonction
297 |         de l'extension voulu et des type de mime que ce dernier utilise
298 | 
299 |         Keyword arguments:
300 |         extension -- The file's extension (default pdf)
301 |         mimetype -- The mimetype that match the extension (default pdf)
302 |         """
303 |         # On verifie que les resultats n'est pas deja enregistrer.
304 |         if self.__data.get(extension):
305 |             if self.__data[extension][0] == self.query:
306 |                 return self.__data[extension][1]
307 |         tmp = self.query
308 |         self.query = f"filetype:{extension} {self.query}"
309 | 
310 |         if not mimetype:
311 |             with open(os.path.join(__location__, "extension.json")) as file:
312 |                 mimetype = json.load(file).get(extension)
313 | 
314 |         if mimetype:
315 |             result = self.__verif_content(self.pages, mimetype)
316 |             self.query = tmp
317 |             #  Sauvegarde des resultats pour optimiser la prochaine même appel.
318 |             self.__data[extension] = (self.query, result)
319 |             return result
320 |         else:
321 |             return """Can't find mimetype that match this extension\n
322 |                 Please provide the mimetypes as arguments.
323 |             """
324 | 
325 |     def custom_search(self, *args, **kwargs):
326 |         raise Exception(
327 |             "`custom_search` is deprecated since v1.0.4, use `custom` instead"
328 |         )
329 | 


--------------------------------------------------------------------------------