├── .bowerrc
├── .dockerignore
├── .editorconfig
├── .flake8.ini
├── .github
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE.md
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── .yaydoc.yml
├── Dockerfile
├── LICENSE
├── Procfile
├── README.md
├── app.json
├── app
    ├── __init__.py
    ├── query_cache.py
    ├── scrapers
    │   ├── __init__.py
    │   ├── ask.py
    │   ├── baidu.py
    │   ├── bing.py
    │   ├── dailymotion.py
    │   ├── duckduckgo.py
    │   ├── exalead.py
    │   ├── generalized.py
    │   ├── google.py
    │   ├── mojeek.py
    │   ├── parsijoo.py
    │   ├── quora.py
    │   ├── twitter.py
    │   ├── yahoo.py
    │   └── youtube.py
    ├── server.py
    ├── static
    │   ├── css
    │   │   └── styles.css
    │   └── images
    │   │   ├── ask_icon.ico
    │   │   ├── baidu_icon.ico
    │   │   ├── bing_icon.ico
    │   │   ├── dailymotion_icon.png
    │   │   ├── duckduckgo_icon.png
    │   │   ├── exalead_icon.png
    │   │   ├── favicon.ico
    │   │   ├── forkme_right_green_007200.png
    │   │   ├── foss_asia.png
    │   │   ├── google_icon.png
    │   │   ├── mojeek_icon.png
    │   │   ├── parsijoo_icon.png
    │   │   ├── quora_icon.png
    │   │   ├── ripple.gif
    │   │   ├── twitter_icon.png
    │   │   ├── yahoo_icon.ico
    │   │   └── youtube_icon.png
    └── templates
    │   └── index.html
├── bandit.yml
├── bower.json
├── codecov.yml
├── docker-compose.yml
├── docs
    └── installation
    │   ├── docker.md
    │   ├── heroku.md
    │   └── local.md
├── manifest.yml
├── package-lock.json
├── package.json
├── requirements-dev.txt
├── requirements.txt
├── runtime.txt
├── scalingo.json
└── test
    ├── __init__.py
    ├── test_ask.py
    ├── test_baidu.py
    ├── test_bing.py
    ├── test_duckduckgo.py
    ├── test_generalized.py
    ├── test_google.py
    ├── test_mojeek.py
    ├── test_parsijoo.py
    ├── test_quora.py
    ├── test_server.py
    ├── test_twitter.py
    ├── test_yahoo.py
    └── test_youtube.py


/.bowerrc:
--------------------------------------------------------------------------------
1 | {
2 |   "directory" : "app/static/bower_components"
3 | }


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | .dockerignore
 3 | 
 4 | #####################
 5 | # .gitignore copied
 6 | #####################
 7 | 
 8 | .cache/*
 9 | __pycache__/*
10 | node_modules/*
11 | .coverage
12 | .idea/*
13 | *.pyc
14 | *.py.bak
15 | *.swp
16 | app/static/bower_components/


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | indent_style = space
 6 | indent_size = 4
 7 | end_of_line = lf
 8 | insert_final_newline = true
 9 | trim_trailing_whitespace = true
10 | 
11 | [*.html]
12 | indent_size = 4
13 | 
14 | [*.js]
15 | indent_size = 4
16 | 
17 | [*.css]
18 | indent_size = 4


--------------------------------------------------------------------------------
/.flake8.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 |     .git,
4 |     __pycache__ 
5 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contributions Best Practices
 2 | 
 3 | **Commits**
 4 | * Write clear meaningful git commit messages (Do read http://chris.beams.io/posts/git-commit/)
 5 | * Make sure your PR's description contains GitHub's special keyword references that automatically close the related issue when the PR is merged. (More info at https://github.com/blog/1506-closing-issues-via-pull-requests )
 6 | * When you make very very minor changes to a PR of yours (like for example fixing a failing travis build or some small style corrections or minor changes requested by reviewers) make sure you squash your commits afterwards so that you don't have an absurd number of commits for a very small fix. (Learn how to squash at https://davidwalsh.name/squash-commits-git )
 7 | * When you're submitting a PR for a UI-related issue, it would be really awesome if you add a screenshot of your change or a link to a deployment where it can be tested out along with your PR. It makes it very easy for the reviewers and you'll also get reviews quicker.
 8 | 
 9 | **Code Styleguide**
10 | * Do follow the .editorconfig file regarding maintaining of code style (It's mandatory).
11 | * For more information regarding .editorconfig file, see [editorconfig](http://editorconfig.org/#download)
12 | 
13 | **Feature Requests and Bug Reports**
14 | * When you file a feature request or when you are submitting a bug report to the [issue tracker](https://github.com/fossasia/query-server/issues), make sure you add steps to reproduce it. Especially if that bug is some weird/rare one.
15 | 
16 | **Join the development**
17 | * Before you join development, please set up the project on your local machine, run it and go through the application completely. Press on any button you can find and see where it leads to. Explore. (Don't worry ... Nothing will happen to the app or to you due to the exploring :wink: Only thing that will happen is, you'll be more familiar with what is where and might even get some cool ideas on how to improve various aspects of the app.)
18 | * If you would like to work on an issue, drop in a comment at the issue. If it is already assigned to someone, but there is no sign of any work being done, please free to drop in a comment so that the issue can be assigned to you if the previous assignee has dropped it entirely.
19 | 
20 | Do read the [Open Source Developer Guide and Best Practices at FOSSASIA](https://blog.fossasia.org/open-source-developer-guide-and-best-practices-at-fossasia).
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | **I'm submitting a ...**
 3 | - [ ] bug report
 4 | - [ ] feature request
 5 | 
 6 | **Current behavior:**
 7 | <!-- How the bug manifests. -->
 8 | 
 9 | **Expected behavior:**
10 | <!-- Behavior would be without the bug. -->
11 | 
12 | **Steps to reproduce:**
13 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!-- Add the issue number that is fixed by this PR (In the form Fixes #123) -->
 2 | Fixes #
 3 | 
 4 | #### Checklist
 5 | 
 6 | - [ ] I have read the [Contribution & Best practices Guide](https://blog.fossasia.org/open-source-developer-guide-and-best-practices-at-fossasia) and my PR follows them.
 7 | - [ ] My branch is up-to-date with the Upstream `master` branch.
 8 | - [ ] I have added necessary documentation (if appropriate)
 9 | 
10 | #### Changes proposed in this pull request:
11 | 
12 | -
13 | -
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .cache/*
 2 | __pycache__/*
 3 | venv/*
 4 | node_modules/*
 5 | app/static/bower_components/*
 6 | .coverage
 7 | .idea/*
 8 | *.pyc
 9 | *.py.bak
10 | *.swp
11 | Pipfile
12 | Pipfile.lock
13 | .vscode/*
14 | 
15 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "2.7"
 5 |   - "3.6"
 6 | 
 7 | services:
 8 |   - mongodb
 9 | 
10 | install:
11 |   - pip install codecov flake8 pytest
12 |   - pip install -r requirements-dev.txt
13 |   - pip install -r requirements.txt
14 | 
15 | before_script:
16 |   - flake8 . --count --max-complexity=16 --show-source --statistics
17 | 
18 | script:
19 |   - python -m app.server > /dev/null &
20 |   - pytest --cov=./
21 |   - kill $(lsof -t -i:7001)
22 | 
23 | after_success:
24 |   - bash <(curl -s https://codecov.io/bash)
25 | 


--------------------------------------------------------------------------------
/.yaydoc.yml:
--------------------------------------------------------------------------------
 1 | metadata:
 2 |   author: FOSSASIA
 3 |   projectname: "Query Server"
 4 |   version: development
 5 | build:
 6 |   theme:
 7 |     name: sphinx_fossasia_theme
 8 |   source: .
 9 | publish:
10 |   ghpages:
11 |     docurl: query-server.fossasia.org
12 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:boron
 2 | MAINTAINER Afroz Ahamad <enigmaeth@gmail.com>
 3 | 
 4 | RUN mkdir -p /usr/src/app
 5 | WORKDIR /usr/src/app
 6 | 
 7 | RUN apt-get update && apt-get install -y --no-install-recommends \
 8 | 	build-essential \
 9 | 	python3-dev \
10 | 	libpq-dev \
11 | 	libevent-dev \
12 | 	libmagic-dev \
13 | 	python3-pip && apt-get clean -y
14 | 
15 | # copy requirements
16 | COPY package.json /usr/src/app/
17 | COPY bower.json /usr/src/app/
18 | COPY .bowerrc /usr/src/app
19 | COPY requirements.txt /usr/src/app/
20 | 
21 | # install requirements
22 | RUN npm install
23 | RUN npm install --global bower
24 | RUN bower --allow-root install
25 | RUN pip3 install -r requirements.txt
26 | 
27 | # Bundle app source
28 | COPY . /usr/src/app
29 | 
30 | EXPOSE 7001
31 | 
32 | CMD [ "python3", "app/server.py" ]
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: python app/server.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Query-Server
 2 | 
 3 | [![Build Status](https://travis-ci.org/fossasia/query-server.svg?branch=master)](https://travis-ci.org/fossasia/query-server)
 4 | [![Dependency Status](https://david-dm.org/fossasia/query-server.svg)](https://david-dm.org/ossasia/query-server)
 5 | [![Join the chat at https://gitter.im/fossasia/query-server](https://badges.gitter.im/fossasia/query-server.svg)](https://gitter.im/fossasia/query-server?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 6 | [![codecov](https://codecov.io/gh/fossasia/query-server/branch/master/graph/badge.svg)](https://codecov.io/gh/fossasia/query-server)
 7 | 
 8 | The query server can be used to search a keyword/phrase on a search engine (Google, Yahoo, Bing, Ask, DuckDuckGo, Baidu, Exalead, Quora, Parsijoo, Dailymotion, Mojeek and Youtube) and get the results as `json`, `xml` or `csv`. The tool also stores the searched query string in a MongoDB database for analytical purposes.
 9 | 
10 | [![Deploy to Docker Cloud](https://files.cloud.docker.com/images/deploy-to-dockercloud.svg)](https://cloud.docker.com/stack/deploy/?repo=https://github.com/fossasia/query-server) [![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/fossasia/query-server) [![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/fossasia/query-server#master) [![Deploy to Bluemix](https://bluemix.net/deploy/button.png)](https://bluemix.net/deploy?repository=https://github.com/fossasia/query-server&branch=master)
11 | 
12 | ## Table of Contents
13 | 
14 | - [Test Deployment](#test-deployment)
15 | - [API](#api)
16 | - [Error Codes](#error-codes)
17 | - [Dependencies](#dependencies)
18 | - [Installation](#installation)
19 | - [Contribute](#contribute)
20 | 
21 | ## Test Deployment
22 | 
23 | A test deployment of the project is available here: https://query-server.herokuapp.com
24 | 
25 | ## API
26 | 
27 | The API(s) provided by query-server are as follows:
28 | 
29 | ` GET /api/v1/search/<search-engine>?query=query&format=format `
30 | 
31 | > *search-engine* : [`google`, `ask`, `bing`, `duckduckgo`, `yahoo`, `baidu`, `exalead`, `quora`, `youtube`, `parsijoo`, `mojeek`, `dailymotion`]
32 | 
33 | > *query* : query can be any string 
34 | 
35 | > *format* : [`json`, `xml`, `csv`]
36 | 
37 | A sample query : `/api/v1/search/bing?query=fossasia&format=xml&num=10`
38 | 
39 | ## Error Codes
40 |     404 Not Found : Incorrect Search Engine, Zero Response
41 |     400 Bad Request : query and/or format is not in the correct format
42 |     500 Internal Server Error : Server Error from Search Engine
43 | 
44 | ## Dependencies
45 | 
46 | * [MongoDB](https://www.mongodb.com)
47 | * [Python 2.7](https://python.org)
48 |     * [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup/bs4/doc)
49 |     * [dicttoxml](https://github.com/quandyfactory/dicttoxml)
50 |     * [Flask](http://flask.pocoo.org)
51 |     * [pymongo](https://api.mongodb.com/python/current)
52 |     * [requests](http://docs.python-requests.org)
53 | * [Node.js](https://nodejs.org/en)
54 |     * [bower.io](https://bower.io)
55 | 
56 | ## Installation
57 | 
58 | 1. [Local Installation](/docs/installation/local.md)
59 | 
60 | 2. [Deployment on Heroku](/docs/installation/heroku.md)
61 | 
62 | 3. [Deployment with Docker](/docs/installation/docker.md)
63 | 
64 | 
65 | ## Contribute
66 | 
67 | Found an issue? Post it in the [issue tracker](https://github.com/fossasia/query-server/issues)  For pull requests please read [Open Source Developer Guide and Best Practices at FOSSASIA](https://blog.fossasia.org/open-source-developer-guide-and-best-practices-at-fossasia/)
68 | 
69 | ## License
70 | 
71 | This project is currently licensed under the Apache License version 2.0. A copy of `LICENSE` should be present along with the source code. To obtain the software under a different license, please contact [FOSSASIA](http://blog.fossasia.org/contact/).
72 | 
73 | 


--------------------------------------------------------------------------------
/app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "query-server",
 3 |   "description": "Query server that stores a query string on a server.",
 4 |   "repository": "https://github.com/fossasia/query-server/",
 5 |   "logo": "http://labs.fossasia.org/images/fossasia.png",
 6 |   "keywords": [
 7 |     "query-server",
 8 |     "fossasia"
 9 |   ],
10 |   "addons": [
11 |     {
12 |       "plan": "mongolab:sandbox",
13 |       "as": "MONGO"
14 |     }
15 |   ],
16 |   "buildpacks": [
17 |     {
18 |       "url": "heroku/python"
19 |     },
20 |     {
21 |       "url": "heroku/nodejs"
22 |     }
23 |   ],
24 |   "scripts": {
25 |     "postinstall": "bower install"
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/__init__.py


--------------------------------------------------------------------------------
/app/query_cache.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | query_cache.py -- Implements a caching system for query server based on MongoDB
 5 | 
 6 | Before sending a query to a remote search engine, use lookup() see if results
 7 | from that same search engine and query are already in the cache.  If so, then
 8 | print a cache hit message return the cached results.  If not, then use store()
 9 | to write the search engine, query, query results, and a datetime created into
10 | the cache.  MongoDB will use the datetime to automatically delete out dated
11 | query results.
12 | 
13 | Ideas for improvement:
14 | * Add a lookup_count to see how often cache actually saves us time.
15 | """
16 | 
17 | import datetime as dt
18 | import os
19 | 
20 | from pymongo import DESCENDING, MongoClient
21 | from pymongo.errors import OperationFailure
22 | 
23 | client = MongoClient(os.environ.get('MONGO_URI', 'mongodb://localhost:27017/'))
24 | db = client['query-server-v2']
25 | db = db['queries']  # Automatically delete records that are older than one day
26 | try:
27 |     db.create_index([('createdAt', DESCENDING)],
28 |                     expireAfterSeconds=60 * 60 * 24)
29 | except OperationFailure:
30 |     pass  # Database index already exists
31 | 
32 | 
33 | def lookup(url):
34 |     """return search result if the URL is in the db or None on a cache miss."""
35 |     data = db.find_one({'url': url}) or {}
36 |     return data.get('links', None)
37 | 
38 | 
39 | def store(url, links):
40 |     """write the URL, the links, and a UTC timestamp into the database."""
41 |     db.delete_many({'url': url})  # remove all records for this URL
42 |     db.insert({'url': url, 'links': links, 'createdAt': dt.datetime.utcnow()})
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     url = 'test_url'
47 |     print(lookup(url))
48 |     store(url, 'a b c d e'.split())
49 |     print(lookup(url))
50 | 


--------------------------------------------------------------------------------
/app/scrapers/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | from .ask import Ask
 4 | from .baidu import Baidu
 5 | from .bing import Bing
 6 | from .dailymotion import DailyMotion
 7 | from .duckduckgo import DuckDuckGo
 8 | from .exalead import ExaLead
 9 | from .google import Google
10 | from .mojeek import Mojeek
11 | from .parsijoo import Parsijoo
12 | from .quora import Quora
13 | from .twitter import Twitter
14 | from .yahoo import Yahoo
15 | from .youtube import Youtube
16 | 
17 | scrapers = {
18 |     'ask': Ask(),
19 |     'baidu': Baidu(),
20 |     'bing': Bing(),
21 |     'dailymotion': DailyMotion(),
22 |     'duckduckgo': DuckDuckGo(),
23 |     'exalead': ExaLead(),
24 |     'google': Google(),
25 |     'mojeek': Mojeek(),
26 |     'parsijoo': Parsijoo(),
27 |     'quora': Quora(),
28 |     'twitter': Twitter(),
29 |     'yahoo': Yahoo(),
30 |     'youtube': Youtube()
31 | }
32 | 
33 | 
34 | def small_test():
35 |     assert isinstance(scrapers['google'].search('fossasia', 1), list)
36 | 
37 | 
38 | def feed_gen(query, engine, count=10, qtype=''):
39 |     engine = engine.lower()
40 |     # provide temporary backwards compatibility for old names
41 |     old_names = {'ubaidu': 'baidu',
42 |                  'vdailymotion': 'dailymotion',
43 |                  'tyoutube': 'youtube'}
44 |     engine = old_names.get(engine, engine)
45 |     if engine in ('quora', 'youtube'):
46 |         urls = scrapers[engine].search_without_count(query)
47 |     else:
48 |         urls = scrapers[engine].search(query, count, qtype)
49 |     return urls
50 | 


--------------------------------------------------------------------------------
/app/scrapers/ask.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class Ask(Scraper):
 6 |     """Scrapper class for Ask"""
 7 |     def __init__(self):
 8 |         Scraper.__init__(self)
 9 |         self.url = 'http://ask.com/web'
10 |         self.videoURL = 'https://www.ask.com/youtube'
11 |         self.defaultStart = 1
12 |         self.startKey = 'page'
13 |         self.name = 'ask'
14 | 
15 |     @staticmethod
16 |     def next_start(current_start, prev_results):
17 |         return current_start + 1
18 | 
19 |     @staticmethod
20 |     def parse_response(soup):
21 |         """ Parse the response and return set of urls
22 |         Returns: urls (list)
23 |                 [[Tile1,url1], [Title2, url2],..]
24 |         """
25 |         urls = []
26 |         if soup.find('div', class_='PartialSearchResults-noresults'):
27 |             return None
28 |         for div in soup.findAll('div', class_='PartialSearchResults-item'):
29 |             title = div.div.a.text
30 |             url = div.div.a['href']
31 |             try:
32 |                 p = div.find('p', class_='PartialSearchResults-item-abstract')
33 |                 desc = p.text.replace('\n', '')
34 |                 urls.append({'title': title, 'link': url, 'desc': desc})
35 |             except Exception:
36 |                 urls.append({'title': title, 'link': url})
37 |         print('Ask parsed: ' + str(urls))
38 |         return urls
39 | 
40 |     @staticmethod
41 |     def parse_video_response(soup):
42 |         """ Parse response and returns the urls
43 | 
44 |             Returns: urls (list)
45 |                     [[Tile1, url1], [Title2, url2], ...]
46 |         """
47 |         urls = []
48 |         for div in soup.findAll('div', attrs={'class': 'v-info'}):
49 |             title = div.div.find('a').getText()
50 |             url = 'https' + div.div.a.get('href')
51 |             desc = div.find('div', attrs={'class': 'desc'}).getText()
52 |             urls.append({
53 |                 'title': title,
54 |                 'link': url,
55 |                 'desc': desc
56 |             })
57 | 
58 |         print('Ask parsed: ' + str(urls))
59 | 
60 |         return urls
61 | 


--------------------------------------------------------------------------------
/app/scrapers/baidu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class Baidu(Scraper):
 6 |     """Scrapper class for Baidu"""
 7 | 
 8 |     def __init__(self):
 9 |         Scraper.__init__(self)
10 |         self.url = 'https://www.baidu.com/s'
11 |         self.newsURL = 'http://news.baidu.com/ns'
12 |         self.defaultStart = 0
13 |         self.queryKey = 'word'
14 |         self.startKey = 'pn'
15 |         self.name = 'baidu'
16 | 
17 |     @staticmethod
18 |     def parse_response(soup):
19 |         """ Parse the response and return set of urls
20 |         Returns: urls (list)
21 |                 [[Tile1,url1], [Title2, url2],..]
22 |         """
23 |         urls = []
24 |         for div in soup.findAll('div', {'class': 'result'}):
25 |             title = div.h3.a.getText()
26 |             url = div.h3.a['href']
27 |             urls.append({'title': title, 'link': url})
28 | 
29 |         print('Baidu parsed: ' + str(urls))
30 | 
31 |         return urls
32 | 
33 |     @staticmethod
34 |     def parse_news_response(soup):
35 |         """ Parse the response and return set of urls
36 |         Returns: urls (list)
37 |                 [[Tile1,url1], [Title2, url2],..]
38 |         """
39 |         urls = []
40 |         for h3 in soup.findAll('h3', {'class': 'c-title'}):
41 |             title = h3.a.getText()
42 |             link = h3.a.get('href')
43 |             urls.append({'title': title, 'link': link})
44 | 
45 |         print('Baidu parsed: ' + str(urls))
46 | 
47 |         return urls
48 | 


--------------------------------------------------------------------------------
/app/scrapers/bing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class Bing(Scraper):
 6 |     """Scrapper class for Bing"""
 7 | 
 8 |     def __init__(self):
 9 |         Scraper.__init__(self)
10 |         self.url = 'http://www.bing.com/search'
11 |         self.videoURL = 'https://www.bing.com/videos/search'
12 |         self.imageURL = 'https://www.bing.com/images/search'
13 |         self.newsURL = 'https://www.bing.com/news/search'
14 |         self.defaultStart = 1
15 |         self.startKey = 'first'
16 |         self.name = 'bing'
17 | 
18 |     @staticmethod
19 |     def parse_response(soup):
20 |         """ Parses the reponse and return set of urls
21 |         Returns: urls (list)
22 |                 [[Tile1,url1], [Title2, url2],..]
23 |         """
24 |         urls = []
25 |         for li in soup.findAll('li', {'class': 'b_algo'}):
26 |             title = li.h2.text.replace('\n', '').replace('  ', '')
27 |             url = li.h2.a['href']
28 |             desc = li.find('p').text
29 |             url_entry = {'title': title,
30 |                          'link': url,
31 |                          'desc': desc}
32 |             urls.append(url_entry)
33 | 
34 |         print('Bing parsed: ' + str(urls))
35 | 
36 |         return urls
37 | 
38 |     @staticmethod
39 |     def parse_video_response(soup):
40 |         """ Parse response and returns the urls
41 | 
42 |             Returns: urls (list)
43 |                     [[Tile1, url1], [Title2, url2], ...]
44 |         """
45 |         urls = []
46 |         for a in soup.findAll('a', attrs={'class': 'mc_vtvc_link'}):
47 |             title = a.get('aria-label').split(' Duration')[0]
48 |             url = 'https://www.bing.com' + a.get('href')
49 |             urls.append({
50 |                 'title': title,
51 |                 'link': url
52 |             })
53 | 
54 |         print('Bing parsed: ' + str(urls))
55 | 
56 |         return urls
57 | 
58 |     @staticmethod
59 |     def parse_image_response(soup):
60 |         """ Parse response and returns the urls
61 | 
62 |             Returns: urls (list)
63 |                     [[url1], [url2], ...]
64 |         """
65 |         urls = []
66 |         for a in soup.findAll('a', attrs={'class': 'iusc'}):
67 |             url = 'https://www.bing.com' + a.get('href')
68 |             urls.append({
69 |                 'link': url
70 |             })
71 | 
72 |         print('Bing parsed: ' + str(urls))
73 | 
74 |         return urls
75 | 
76 |     @staticmethod
77 |     def parse_news_response(soup):
78 |         """ Parses the reponse and return set of urls
79 |         Returns: urls (list)
80 |                 [[Tile1,url1], [Title2, url2],..]
81 |         """
82 |         urls = []
83 |         for div in soup.findAll('div', {'class': 't_s'}):
84 |             link = div.find('a', {'class': 'title'})
85 |             url = link['href']
86 |             title = link.getText()
87 |             title = title.replace('\n', '').replace('  ', '')
88 |             desc = div.find('div', {'class': 'snippet'}).getText()
89 |             desc = desc.replace('\n', '').replace('  ', '')
90 |             url_entry = {'title': title,
91 |                          'link': url,
92 |                          'desc': desc}
93 |             urls.append(url_entry)
94 | 
95 |         print('Bing parsed: ' + str(urls))
96 | 
97 |         return urls
98 | 


--------------------------------------------------------------------------------
/app/scrapers/dailymotion.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | import json
 4 | 
 5 | 
 6 | class DailyMotion(Scraper):
 7 |     """Scraper class for DailyMotion"""
 8 | 
 9 |     def __init__(self):
10 |         Scraper.__init__(self)
11 |         self.url = 'https://api.dailymotion.com/videos/'
12 |         self.queryKey = 'search'
13 |         self.startKey = 'page'
14 |         self.defaultStart = 1
15 |         self.name = 'dailymotion'
16 | 
17 |     @staticmethod
18 |     def parse_response(soup):
19 |         """ Parse the response and return set of urls
20 |         Returns: urls (list)
21 |                 [[Tile1,url1], [Title2, url2],..]
22 |         """
23 |         urls = []
24 | 
25 |         video_list = json.loads(str(soup))['list']
26 |         for item in video_list:
27 |             title = item['title']
28 |             link = 'https://www.dailymotion.com/video/' + str(item['id'])
29 |             urls.append({'title': title, 'link': link})
30 | 
31 |         print('Dailymotion parsed: ' + str(urls))
32 | 
33 |         return urls
34 | 


--------------------------------------------------------------------------------
/app/scrapers/duckduckgo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class DuckDuckGo(Scraper):
 6 |     """Scrapper class for DuckDuckGo"""
 7 | 
 8 |     def __init__(self):
 9 |         Scraper.__init__(self)
10 |         self.url = 'https://duckduckgo.com/html'
11 |         self.defaultStart = 0
12 |         self.startKey = 's'
13 |         self.name = 'duckduckgo'
14 | 
15 |     @staticmethod
16 |     def parse_response(soup):
17 |         """ Parse the response and return set of urls
18 |         Returns: urls (list)
19 |                 [[Tile1,url1], [Title2, url2],..]
20 |         """
21 |         urls = []
22 |         for links in soup.findAll('a', {'class': 'result__a'}):
23 |             urls.append({'title': links.getText(),
24 |                          'link': links.get('href')})
25 | 
26 |         print('DuckDuckGo parsed: ' + str(urls))
27 | 
28 |         return urls
29 | 


--------------------------------------------------------------------------------
/app/scrapers/exalead.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class ExaLead(Scraper):
 6 |     """Scraper class for ExaLead"""
 7 | 
 8 |     def __init__(self):
 9 |         Scraper.__init__(self)
10 |         self.url = 'https://www.exalead.com/search/web/results/'
11 |         self.defaultStart = 0
12 |         self.startKey = 'start_index'
13 |         self.name = 'exalead'
14 | 
15 |     @staticmethod
16 |     def parse_response(soup):
17 |         """ Parse the response and return set of urls
18 |         Returns: urls (list)
19 |                 [[Tile1,url1], [Title2, url2],..]
20 |         """
21 |         urls = []
22 |         for a in soup.findAll('a', {'class': 'title'}):
23 |             urls.append({
24 |                 'title': a.getText(),
25 |                 'link': a.get('href')
26 |             })
27 |         print('Exalead parsed: ' + str(urls))
28 |         return urls
29 | 


--------------------------------------------------------------------------------
/app/scrapers/generalized.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import requests
  3 | from bs4 import BeautifulSoup
  4 | 
  5 | VID_SCRAPERS = ('ask', 'bing', 'parsijoo', 'yahoo')
  6 | ISCH_SCRAPERS = ('bing', 'parsijoo', 'yahoo')
  7 | NEWS_SCRAPERS = ('baidu', 'bing', 'parsijoo', 'mojeek')
  8 | 
  9 | 
 10 | class Scraper:
 11 |     """Generalized scraper"""
 12 |     url = ''
 13 |     startKey = ''
 14 |     queryKey = 'q'
 15 |     defaultStart = 0
 16 |     qtype = ''
 17 |     headers = {
 18 |         'User-Agent': (
 19 |             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) '
 20 |             'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 '
 21 |             'Safari/537.36'
 22 |         )
 23 |     }
 24 | 
 25 |     def __init__(self):
 26 |         self.name = "general"
 27 |         pass
 28 | 
 29 |     def get_page(self, query, startIndex=0, qtype=''):
 30 |         """ Fetch the google search results page
 31 |         Returns : Results Page
 32 |         """
 33 |         url = self.url
 34 |         if qtype == 'vid' and self.name in VID_SCRAPERS:
 35 |                 url = self.videoURL
 36 |         elif qtype == 'isch' and self.name in ISCH_SCRAPERS:
 37 |                 url = self.imageURL
 38 |         elif qtype == 'news' and self.name in NEWS_SCRAPERS:
 39 |             url = self.newsURL
 40 |         payload = {self.queryKey: query, self.startKey: startIndex,
 41 |                    self.qtype: qtype}
 42 |         if self.name == 'mojeek' and qtype == 'news':
 43 |             payload['fmt'] = 'news'
 44 |         response = requests.get(url, headers=self.headers, params=payload)
 45 |         print(response.url)
 46 |         return response
 47 | 
 48 |     @staticmethod
 49 |     def parse_response(soup):
 50 |         raise NotImplementedError
 51 | 
 52 |     @staticmethod
 53 |     def parse_video_response(soup):
 54 |         raise NotImplementedError
 55 | 
 56 |     @staticmethod
 57 |     def next_start(current_start, prev_results):
 58 |         return current_start + len(prev_results)
 59 | 
 60 |     def search(self, query, num_results, qtype=''):
 61 |         """
 62 |             Search for the query and return set of urls
 63 |             Returns: list
 64 |         """
 65 |         urls = []
 66 |         current_start = self.defaultStart
 67 | 
 68 |         while (len(urls) < num_results):
 69 |             response = self.get_page(query, current_start, qtype)
 70 |             soup = BeautifulSoup(response.text, 'html.parser')
 71 |             new_results = self.call_appropriate_parser(qtype, soup)
 72 |             if new_results is None:
 73 |                 break
 74 |             urls.extend(new_results)
 75 |             current_start = self.next_start(current_start, new_results)
 76 |         return urls[: num_results]
 77 | 
 78 |     def call_appropriate_parser(self, qtype, soup):
 79 |         new_results = ''
 80 |         if qtype == 'vid' and self.name in VID_SCRAPERS:
 81 |                 new_results = self.parse_video_response(soup)
 82 |         elif qtype == 'isch' and self.name in ISCH_SCRAPERS:
 83 |                 new_results = self.parse_image_response(soup)
 84 |         elif qtype == 'news' and self.name in NEWS_SCRAPERS:
 85 |                 new_results = self.parse_news_response(soup)
 86 |         else:
 87 |             new_results = self.parse_response(soup)
 88 |         return new_results
 89 | 
 90 |     def search_without_count(self, query):
 91 |         """
 92 |             Search for the query and return set of urls
 93 |             Returns: list
 94 |         """
 95 |         urls = []
 96 |         payload = {self.queryKey: query}
 97 |         response = requests.get(self.url, headers=self.headers, params=payload)
 98 |         soup = BeautifulSoup(response.text, 'html.parser')
 99 |         urls = self.parse_response(soup)
100 |         return urls
101 | 


--------------------------------------------------------------------------------
/app/scrapers/google.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class Google(Scraper):
 6 |     """Scrapper class for Google"""
 7 | 
 8 |     def __init__(self):
 9 |         Scraper.__init__(self)
10 |         self.url = 'https://www.google.com/search'
11 |         self.defaultStart = 0
12 |         self.startKey = 'start'
13 |         self.qtype = 'tbm'
14 |         self.name = 'google'
15 | 
16 |     @staticmethod
17 |     def next_start(current_start, prev_results):
18 |         return current_start + len(prev_results)
19 | 
20 |     @staticmethod
21 |     def parse_response(soup):
22 |         """
23 |         Parses the response and returns set of urls
24 |         Returns: urls (list)
25 |                 [[Tile1,url1], [Title2, url2],..]
26 |         """
27 |         urls = []
28 |         for h3 in soup.findAll('h3', {'class': 'r'}):
29 |             links = h3.find('a')
30 |             urls.append({'title': links.getText(), 'link': links.get('href')})
31 | 
32 |         print('Google parsed: ' + str(urls))
33 | 
34 |         return urls
35 | 


--------------------------------------------------------------------------------
/app/scrapers/mojeek.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class Mojeek(Scraper):
 6 |     """Scraper class for Mojeek"""
 7 | 
 8 |     def __init__(self):
 9 |         Scraper.__init__(self)
10 |         self.url = 'https://www.mojeek.co.uk/search'
11 |         self.newsURL = 'https://www.mojeek.co.uk/search'
12 |         self.defaultStart = 1
13 |         self.startKey = 's'
14 |         self.name = 'mojeek'
15 | 
16 |     @staticmethod
17 |     def parse_response(soup):
18 |         """ Parse the response and return set of urls
19 |         Returns: urls (list)
20 |                 [[Tile1,url1], [Title2, url2],..]
21 |         """
22 |         urls = []
23 |         for a in soup.findAll('a', {'class': 'ob'}):
24 |             title = a.getText()
25 |             url = a.get('href')
26 |             urls.append({'title': title, 'link': url})
27 | 
28 |         print('Mojeek parsed: ' + str(urls))
29 | 
30 |         return urls
31 | 
32 |     @staticmethod
33 |     def parse_news_response(soup):
34 |         """ Parse response and returns the urls
35 | 
36 |             Returns: urls (list)
37 |                     [[url1], [url2], ...]
38 |         """
39 |         urls = []
40 |         for a in soup.findAll('a', attrs={'class': 'ob'}):
41 |             title = a.getText()
42 |             url = a.get('href')
43 |             urls.append({
44 |                 'title': title,
45 |                 'link': url
46 |             })
47 | 
48 |         print('Mojeek parsed: ' + str(urls))
49 | 
50 |         return urls
51 | 


--------------------------------------------------------------------------------
/app/scrapers/parsijoo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | try:
 4 |     from urllib.parse import unquote  # Python 3
 5 | except ImportError:
 6 |     from urllib import unquote        # Python 2
 7 | 
 8 | 
 9 | class Parsijoo(Scraper):
10 |     """Scraper class for Parsijoo"""
11 | 
12 |     def __init__(self):
13 |         Scraper.__init__(self)
14 |         self.url = 'https://parsijoo.ir/web'
15 |         self.imageURL = 'https://image.parsijoo.ir/image'
16 |         self.videoURL = 'https://video.parsijoo.ir/video'
17 |         self.newsURL = 'http://khabar.parsijoo.ir/search/'
18 |         self.defaultStart = 0
19 |         self.newsStart = 1
20 |         self.startKey = 'co'
21 |         self.name = 'parsijoo'
22 | 
23 |     @staticmethod
24 |     def parse_response(soup):
25 |         """ Parse the response and return set of urls
26 |         Returns: urls (list)
27 |                 [[Tile1,url1], [Title2, url2],..]
28 |         """
29 |         urls = []
30 |         for div in soup.findAll('div', {'class': 'result'}):
31 |             result_title = div.find('span', {'class': 'result-title'})
32 |             title = result_title.getText()[23:-1]
33 |             link = result_title.find('a').get('href')
34 |             desc = div.find('span', {'class': 'result-desc'}).getText()[35:-1]
35 |             urls.append({'title': title, 'link': link, 'desc': desc})
36 | 
37 |         print('Parsijoo parsed: ' + str(urls))
38 | 
39 |         return urls
40 | 
41 |     @staticmethod
42 |     def parse_video_response(soup):
43 |         """ Parse response and returns the urls
44 | 
45 |             Returns: urls (list)
46 |                     [[Tile1, url1], [Title2, url2], ...]
47 |         """
48 |         urls = []
49 |         for a in soup.findAll('a', attrs={'class': 'over-page'}):
50 |             title = a.get('title')
51 |             url = 'https://video.parsijoo.ir' + a.get('href')
52 |             urls.append({
53 |                 'title': title,
54 |                 'link': url
55 |             })
56 | 
57 |         print('Parsijoo parsed: ' + str(urls))
58 | 
59 |         return urls
60 | 
61 |     @staticmethod
62 |     def parse_image_response(soup):
63 |         """ Parse response and returns the urls
64 | 
65 |             Returns: urls (list)
66 |                     [[url1], [url2], ...]
67 |         """
68 |         urls = []
69 |         for div in soup.find_all('div', class_='image-container overflow'):
70 |             a = div.find('a')
71 |             url = 'https://image.parsijoo.ir' + a.get('href')
72 |             urls.append({
73 |                 'link': url
74 |             })
75 | 
76 |         print('Parsijoo parsed: ' + str(urls))
77 | 
78 |         return urls
79 | 
80 |     @staticmethod
81 |     def parse_news_response(soup):
82 |         """ Parse the response and return set of urls
83 |         Returns: urls (list)
84 |                 [[Tile1,url1], [Title2, url2],..]
85 |         """
86 |         urls = []
87 |         for div in soup.findAll('div', {'class': 'news-title-link'}):
88 |             title = div.a.getText()
89 |             link = unquote(div.a.get('href'))
90 |             urls.append({'title': title, 'link': link})
91 | 
92 |         print('Parsijoo parsed: ' + str(urls))
93 | 
94 |         return urls
95 | 


--------------------------------------------------------------------------------
/app/scrapers/quora.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class Quora(Scraper):
 6 |     """Scrapper class for Quora"""
 7 | 
 8 |     def __init__(self):
 9 |         Scraper.__init__(self)
10 |         self.url = 'https://www.quora.com/search'
11 |         self.name = 'quora'
12 | 
13 |     @staticmethod
14 |     def parse_response(soup):
15 |         """ Parse the response and return set of urls
16 |         Returns: urls (list)
17 |                 [[Tile1,url1], [Title2, url2],..]
18 |         """
19 |         urls = []
20 |         for a in soup.findAll('a', {'class': 'question_link'}):
21 |             link = 'https://www.quora.com' + str(a.get('href'))
22 |             urls.append({'title': a.getText(), 'link': link})
23 | 
24 |         print('Quora parsed: ' + str(urls))
25 | 
26 |         return urls
27 | 


--------------------------------------------------------------------------------
/app/scrapers/twitter.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class Twitter(Scraper):
 6 |     """This scraper takes a query and a count and returns the results of
 7 |          a Twitter search which is executed via the Loklak API"""
 8 | 
 9 |     def __init__(self):
10 |         Scraper.__init__(self)
11 |         self.loklakURL = 'http://api.loklak.org/api/search.json?q='
12 | 
13 |     def search(self, query, num_results, qtype=''):
14 |         """ Makes a GET request to Loklak API and returns the URLs
15 |         Returns: urls (list)
16 |                 [[Title1,url1], [Title2, url2],..]
17 |         """
18 |         encodedQuery = requests.utils.quote(query, safe='')
19 |         url = self.loklakURL+encodedQuery
20 | 
21 |         responses = requests.get(url).json()
22 | 
23 |         tweets = []
24 |         for response in responses['statuses']:
25 |             tweets.append({'link': response['link'], 'text': response['text']})
26 | 
27 |         print('Twitter parsed: ' + str(tweets))
28 | 
29 |         return tweets[:num_results]
30 | 


--------------------------------------------------------------------------------
/app/scrapers/yahoo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from .generalized import Scraper
  3 | import re
  4 | try:
  5 |     from urllib.parse import unquote  # Python 3
  6 | except ImportError:
  7 |     from urllib import unquote        # Python 2
  8 | 
  9 | 
 10 | class Yahoo(Scraper):
 11 |     """Scrapper class for Yahoo"""
 12 | 
 13 |     def __init__(self):
 14 |         Scraper.__init__(self)
 15 |         self.url = 'https://search.yahoo.com/search'
 16 |         self.videoURL = 'https://video.search.yahoo.com/search/video'
 17 |         self.imageURL = 'https://images.search.yahoo.com/search/images'
 18 |         self.newsURL = 'https://news.search.yahoo.com/search'
 19 |         self.defaultStart = 1
 20 |         self.startKey = 'b'
 21 |         self.name = 'yahoo'
 22 | 
 23 |     @staticmethod
 24 |     def parse_response(soup):
 25 |         """ Parse response and returns the urls
 26 | 
 27 |             Returns: urls (list)
 28 |                     [[Tile1, url1], [Title2, url2], ...]
 29 |         """
 30 |         urls = []
 31 |         for h in soup.findAll('h3', attrs={'class': 'title'}):
 32 |             t = h.findAll('a', attrs={'class': ' ac-algo fz-l ac-21th lh-24'})
 33 |             for y in t:
 34 |                 r = y.get('href')
 35 |                 f = r.split('RU=')
 36 |                 e = f[-1].split('/RK=2')
 37 |                 u = unquote(e[0])
 38 |                 urls.append({
 39 |                     'title': y.getText(),
 40 |                     'link': u
 41 |                 })
 42 | 
 43 |         print('Yahoo parsed: ' + str(urls))
 44 | 
 45 |         return urls
 46 | 
 47 |     @staticmethod
 48 |     def parse_video_response(soup):
 49 |         """ Parse response and returns the urls
 50 | 
 51 |             Returns: urls (list)
 52 |                     [[Tile1, url1], [Title2, url2], ...]
 53 |         """
 54 |         urls = []
 55 |         for h in soup.findAll('li', attrs={'class': 'vr vres'}):
 56 |             t = h.find('a', attrs={'class': 'ng'})
 57 |             r = t.get('data-rurl')
 58 |             titleDiv = t.find('div', attrs={'class': 'v-meta bx-bb'})
 59 |             title = titleDiv.find('h3').getText()
 60 |             urls.append({
 61 |                 'title': title,
 62 |                 'link': r
 63 |             })
 64 | 
 65 |         print('Yahoo parsed: ' + str(urls))
 66 | 
 67 |         return urls
 68 | 
 69 |     @staticmethod
 70 |     def parse_image_response(soup):
 71 |         """ Parse response and returns the urls
 72 | 
 73 |             Returns: urls (list)
 74 |                     [[Tile1, url1], [Title2, url2], ...]
 75 |         """
 76 |         urls = []
 77 |         for h in soup.findAll('li', attrs={'class': 'ld'}):
 78 |             t = h.find('a')
 79 |             r = t.get('aria-label')
 80 |             cleanr = re.compile('<.*?>')
 81 |             r = re.sub(cleanr, '', r)
 82 |             cleanl = re.compile('&#[\d]+(;)')
 83 |             r = re.sub(cleanl, '\'', r)
 84 |             img = t.find('img', attrs={'class': 'process'})
 85 |             url = img.get('data-src')
 86 |             urls.append({
 87 |                 'title': r,
 88 |                 'link': url
 89 |             })
 90 | 
 91 |         print('Yahoo parsed: ' + str(urls))
 92 | 
 93 |         return urls
 94 | 
 95 |     @staticmethod
 96 |     def parse_news_response(soup):
 97 |         """ Parse response and returns the urls
 98 |             Returns: urls (list)
 99 |                     [[Tile1, url1], [Title2, url2], ...]
100 |         """
101 |         urls = []
102 |         for div in soup.findAll('div', attrs={'class': 'dd algo NewsArticle'}):
103 |             link = div.find('a', attrs={'class': 'fz-m'})
104 |             descDiv = div.find('div', attrs={'class': 'compText'})
105 |             unparsedURL = link.get('href')
106 |             urlSearch = re.search('/RU=(.*?)/', unparsedURL, re.I)
107 |             url = unquote(urlSearch.group(1))
108 |             urls.append({
109 |                 'title': link.getText(),
110 |                 'link': url,
111 |                 'desc': descDiv.find('p').getText()
112 |             })
113 | 
114 |         print('Yahoo parsed: ' + str(urls))
115 | 
116 |         return urls
117 | 


--------------------------------------------------------------------------------
/app/scrapers/youtube.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from .generalized import Scraper
 3 | 
 4 | 
 5 | class Youtube(Scraper):
 6 |     """Scraper class for Youtube"""
 7 | 
 8 |     def __init__(self):
 9 |         Scraper.__init__(self)
10 |         self.url = 'https://www.youtube.com/results'
11 |         self.queryKey = 'search_query'
12 |         self.name = 'youtube'
13 | 
14 |     @staticmethod
15 |     def parse_response(soup):
16 |         """ Parse the response and return list of urls
17 |         Returns: urls (list)
18 |                 [[Tile1,url1], [Title2, url2],..]
19 |         """
20 |         urls = []
21 |         for a in soup.findAll('a'):
22 |             if a.get('href').startswith('/watch?'):
23 |                 link = 'https://www.youtube.com' + str(a.get('href'))
24 |                 if not a.getText().startswith('\n\n'):
25 |                     urls.append({'title': a.getText(), 'link': link})
26 |             else:
27 |                 continue
28 | 
29 |         print('Youtube parsed: ' + str(urls))
30 | 
31 |         return urls
32 | 


--------------------------------------------------------------------------------
/app/server.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from argparse import ArgumentParser
  4 | 
  5 | from defusedxml.minidom import parseString
  6 | from dicttoxml import dicttoxml
  7 | from flask import (Flask, Response, abort, jsonify, make_response,
  8 |                    render_template, request)
  9 | 
 10 | try:
 11 |     from scrapers import feed_gen, scrapers
 12 | except Exception as e:
 13 |     from app.scrapers import feed_gen, scrapers
 14 | 
 15 | DISABLE_CACHE = True  # Temporarily disable the MongoDB cache
 16 | if DISABLE_CACHE:
 17 |     def lookup(url):
 18 |         return False
 19 | 
 20 |     def store(url, links):
 21 |         pass
 22 | else:
 23 |     from query_cache import lookup, store
 24 | 
 25 | app = Flask(__name__)
 26 | err = ""
 27 | 
 28 | errorObj = {
 29 |     'type': 'Internal Server Error',
 30 |     'status_code': 500,
 31 |     'error': 'Could not parse the page due to Internal Server Error'
 32 | }
 33 | 
 34 | 
 35 | @app.route('/')
 36 | def index():
 37 |     return render_template('index.html', engines_list=sorted(scrapers.keys()))
 38 | 
 39 | 
 40 | def bad_request(error):
 41 |     message = {'Error': error[1], 'Status Code': error[0]}
 42 |     response = dicttoxml(message) if error[2] == 'xml' else json.dumps(message)
 43 |     return make_response(response, error[0])
 44 | 
 45 | 
 46 | @app.route('/api/v1/search/<search_engine>', methods=['GET'])
 47 | def search(search_engine):
 48 |     try:
 49 |         count = int(request.args.get('num', 10))
 50 |         qformat = request.args.get('format', 'json').lower()
 51 |         qtype = request.args.get('type', '')
 52 |         if qformat not in ('json', 'xml', 'csv'):
 53 |             abort(400, 'Not Found - undefined format')
 54 | 
 55 |         engine = search_engine
 56 |         if engine not in scrapers:
 57 |             error = [404, 'Incorrect search engine', engine]
 58 |             return bad_request(error)
 59 | 
 60 |         query = request.args.get('query')
 61 |         if not query:
 62 |             error = [400, 'Not Found - missing query', qformat]
 63 |             return bad_request(error)
 64 | 
 65 |         # first see if we can get the results for the cache
 66 |         engine_and_query = engine + ':' + query
 67 |         result = lookup(engine_and_query)
 68 |         if result:
 69 |             print("cache hit: {}".format(engine_and_query))
 70 |         else:
 71 |             result = feed_gen(query, engine, count, qtype)
 72 |             if result:
 73 |                 # store the result in the cache to speed up future searches
 74 |                 store(engine_and_query, result)
 75 |             else:
 76 |                 error = [404, 'No response', engine_and_query]
 77 |                 return bad_request(error)
 78 | 
 79 |         try:
 80 |             unicode  # unicode is undefined in Python 3 so NameError is raised
 81 |             for line in result:
 82 |                 line['link'] = line['link'].encode('utf-8')
 83 |                 if 'title' in line:
 84 |                     line['title'] = line['title'].encode('utf-8')
 85 |                 if 'desc' in line:
 86 |                     line['desc'] = line['desc'].encode('utf-8')
 87 |         except NameError:
 88 |             pass  # Python 3 strings are already Unicode
 89 |         if qformat == 'json':
 90 |             return jsonify(result)
 91 |         elif qformat == 'csv':
 92 |             csvfeed = '"'
 93 |             csvfeed += '","'.join(result[0].keys())
 94 |             for line in result:
 95 |                 csvfeed += '"\n"'
 96 |                 csvfeed += '","'.join(line.values())
 97 |             csvfeed += '"'
 98 |             return Response(csvfeed)
 99 | 
100 |         xmlfeed = dicttoxml(result, custom_root='channel', attr_type=False)
101 |         xmlfeed = parseString(xmlfeed).toprettyxml()
102 |         return Response(xmlfeed, mimetype='application/xml')
103 |     except Exception as e:
104 |         print(e)
105 |         return jsonify(errorObj)
106 | 
107 | 
108 | @app.after_request
109 | def set_header(r):
110 |     r.headers["Cache-Control"] = "no-cache"
111 |     return r
112 | 
113 | 
114 | if __name__ == '__main__':
115 |     port = int(os.environ.get('PORT', 7001))
116 |     parser = ArgumentParser()
117 |     help_msg = "Start the server in development mode with debug=True"
118 |     parser.add_argument("--dev", help=help_msg, action="store_true")
119 |     args = parser.parse_args()
120 |     app.run(host='0.0.0.0', port=port, debug=args.dev)
121 | 


--------------------------------------------------------------------------------
/app/static/css/styles.css:
--------------------------------------------------------------------------------
  1 | html {
  2 |     height: 100%;
  3 |     box-sizing: border-box;
  4 | }
  5 | 
  6 | body {
  7 |     padding-top: 0px;
  8 |     font: DroidSansMono;
  9 |     position: relative;
 10 |     margin: 0;
 11 |     padding-bottom: 6rem;
 12 |     min-height: 100%;
 13 |     font-family: Droid Sans Mono;
 14 | }
 15 | 
 16 | .github-fork-ribbon {
 17 |     position: absolute;
 18 |     top: 0;
 19 |     right: 0;
 20 |     border: 0;
 21 | }
 22 | 
 23 | .queryArea {
 24 |     min-width: 100%;
 25 | }
 26 | 
 27 | .custom{
 28 |     padding-left: 0px!important;
 29 |     padding-right: 0px!important;
 30 | }
 31 | 
 32 | .responseType {
 33 |     display: inline-flex;
 34 | }
 35 | 
 36 | .engineDrop {
 37 |     padding: 0;
 38 |     margin-right: 30px;
 39 |     margin-left: 16px;
 40 |     margin-bottom: 10px;    
 41 | }
 42 | 
 43 | .qType {
 44 | 	padding: 0;
 45 |     margin-right: 30px;
 46 |     margin-left: 16px;
 47 | }
 48 | 
 49 | .miscInfo {
 50 |     margin: 0 auto;
 51 |     display: table;
 52 | }
 53 | 
 54 | .typeButton {
 55 |     padding: 0px 10px 10px;
 56 | }
 57 | 
 58 | .qCount {
 59 |     display: inline-block;
 60 |     margin: 0;
 61 |     min-width: 50%;
 62 | }
 63 | 
 64 | #submit {
 65 |     margin-top: 20px;
 66 | }
 67 | 
 68 | #a1 {
 69 |     cursor: pointer;
 70 | }
 71 | 
 72 | #search {
 73 |     padding: 0;
 74 |     margin: 0, auto;
 75 | }
 76 | 
 77 | #feed {
 78 |     display: none;
 79 |     font-size: 1.2em;
 80 |     margin: 0em 2em 0 2em;
 81 |     padding: -3em 0 -3em 0;
 82 |     z-index: -100000;
 83 | }
 84 | 
 85 | #load {
 86 |     position: relative;
 87 |     display: none;
 88 |     left: 50%;
 89 |     top: 50%;
 90 |     margin: auto;
 91 | }
 92 | 
 93 | p {
 94 |     font-size: medium;
 95 | }
 96 | 
 97 | .jumbotron p {
 98 |     font-weight: 300;
 99 |     font-size: 1.5em;
100 |     display: inline-block;
101 |     box-shadow: aliceblue;
102 | }
103 | 
104 | .btn-group-vertical {
105 |     display: inline-flex;
106 | }
107 | 
108 | .btn-toolbar {
109 |     margin-top: 1em;
110 | }
111 | 
112 | .btn {
113 |     border: none;
114 |     color: white;
115 |     padding: 0.5em 1.5em;
116 |     text-align: center;
117 |     text-decoration: ghostwhite;
118 |     display: inline-block;
119 |     font-size: 1.4em;
120 |     border-radius: 0.4em;
121 | }
122 | 
123 | .btn-outline {
124 |     background-color: transparent;
125 |     color: inherit;
126 |     transition: all .5s;
127 |     border: 1px #428bca solid;
128 | }
129 | 
130 | .btn-outline:hover {
131 |     color: #286090;
132 |     background-color: rgba(0, 0, 0, 0.09);
133 | }
134 | 
135 | .search {
136 |     margin: 0.3em;
137 | }
138 | 
139 | #tweet {
140 |     font-size: large;
141 |     color:#0084b4;
142 |     background-color:#fff;
143 |     border: 2px solid #0084b4;
144 | }
145 | 
146 | 
147 | #tweet:hover{
148 |     font-size: large;
149 |     background-color:#0084b4;
150 |     color:#fff;
151 | }
152 | 
153 | #fb {
154 |     font-size: large;
155 |     background-color:#fff;
156 | 
157 |     color:#3b5998;
158 |     border: 2px solid #3b5998;
159 | }
160 | 
161 | 
162 | #fb:hover{
163 |     font-size: large;
164 |     background-color:#3b5998;
165 |     color:#fff;
166 | }
167 | 
168 | .panel-heading {
169 |     font-weight: 300;
170 |     font-size: 1.5em;
171 | }
172 | 
173 | .panel-body {
174 |     line-height: 25px;
175 |     margin: 0 auto;
176 | }
177 | 
178 | .panel-body .label {
179 |     font-size: 1em;
180 | }
181 | 
182 | .footer {
183 |     bottom:0;
184 |     width: 100%;
185 |     height: 100px;
186 | }
187 | 
188 | 
189 | .footer .text-muted {
190 |     margin: 20px 0;
191 | }
192 | 
193 | 
194 | /* Media Queries */
195 | 
196 | @media only screen and (min-device-width: 320px) and (max-device-width: 480px) {
197 |     .github-fork-ribbon img {
198 |         width: 120px;
199 |     }
200 |     h1 {
201 |         font-size: 2.5em;
202 |     }
203 |     .jumbotron p {
204 |         font-size: 1.5em;
205 |     }
206 |     #query {
207 |         width: 90%;
208 |     }
209 |     .btn-group {
210 |         margin-top: 10px;
211 |         line-height: 20px;
212 |     }
213 | }
214 | 


--------------------------------------------------------------------------------
/app/static/images/ask_icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/ask_icon.ico


--------------------------------------------------------------------------------
/app/static/images/baidu_icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/baidu_icon.ico


--------------------------------------------------------------------------------
/app/static/images/bing_icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/bing_icon.ico


--------------------------------------------------------------------------------
/app/static/images/dailymotion_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/dailymotion_icon.png


--------------------------------------------------------------------------------
/app/static/images/duckduckgo_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/duckduckgo_icon.png


--------------------------------------------------------------------------------
/app/static/images/exalead_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/exalead_icon.png


--------------------------------------------------------------------------------
/app/static/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/favicon.ico


--------------------------------------------------------------------------------
/app/static/images/forkme_right_green_007200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/forkme_right_green_007200.png


--------------------------------------------------------------------------------
/app/static/images/foss_asia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/foss_asia.png


--------------------------------------------------------------------------------
/app/static/images/google_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/google_icon.png


--------------------------------------------------------------------------------
/app/static/images/mojeek_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/mojeek_icon.png


--------------------------------------------------------------------------------
/app/static/images/parsijoo_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/parsijoo_icon.png


--------------------------------------------------------------------------------
/app/static/images/quora_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/quora_icon.png


--------------------------------------------------------------------------------
/app/static/images/ripple.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/ripple.gif


--------------------------------------------------------------------------------
/app/static/images/twitter_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/twitter_icon.png


--------------------------------------------------------------------------------
/app/static/images/yahoo_icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/yahoo_icon.ico


--------------------------------------------------------------------------------
/app/static/images/youtube_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/app/static/images/youtube_icon.png


--------------------------------------------------------------------------------
/app/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <meta name="robots" content="index,follow">
  7 |     <meta http-equiv="cache-control" content="public">
  8 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  9 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 10 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 11 |     <meta name="Content-Language" content="en, english" />
 12 | 
 13 |     <meta property="og:url"           content="https://query-server.herokuapp.com/" />
 14 |     <meta property="og:type"          content="Querying website" />
 15 |     <meta property="og:title"         content="Query Server" />
 16 |     <meta property="og:description"   content="A Query Server to query on multiple search engines in one Go!" />
 17 |     <meta property="og:image"         content="../static/images/foss_asia.png" />
 18 | 
 19 |     <link rel="stylesheet" href="{{ url_for('static', filename='bower_components/bootstrap/dist/css/bootstrap.min.css') }}">
 20 |     <link rel="stylesheet" href="{{ url_for('static', filename='bower_components/prismjs/themes/prism.css') }}">
 21 |     <link rel="stylesheet" href="{{ url_for('static', filename='css/styles.css') }}">
 22 |     <meta name="description" content="query-server:Query server that stores a query string on a server.">
 23 |     <meta name="author" content="enigmaeth, @enigmaeth">
 24 |     <script src="{{ url_for('static', filename='bower_components/jquery/dist/jquery.js') }}"></script>
 25 |     <script src="{{ url_for('static', filename='bower_components/prismjs/prism.js') }}"></script>
 26 |     <script src="{{ url_for('static', filename='bower_components/bootstrap/dist/js/bootstrap.min.js') }}"></script>
 27 |     <link rel="icon" type="image/png" sizes="16x16" href="{{ url_for('static', filename='images/favicon.ico') }}">
 28 |     <title>query-server &middot; FOSSASIA</title>
 29 | </head>
 30 | 
 31 | <body>
 32 |     <div class="container">
 33 |         <a class="github-fork-ribbon" href="https://github.com/fossasia/query-server.git">
 34 |             <img src="{{ url_for('static', filename='images/forkme_right_green_007200.png') }}" alt="Fork me on GitHub" title="query-server source code on github">
 35 |         </a>
 36 |     </div>
 37 | 
 38 |     <div class="jumbotron">
 39 |         <div class="container">
 40 |             <h1><code>query-server</code></h1>
 41 |             <p>
 42 |                 Query server that stores a query string on a server.
 43 |             </p>
 44 |             <form id="searchForm">
 45 |                 <div class="container" id="search">
 46 |                     <div class="row">
 47 |                         <div class="col-sm-8 custom">
 48 |                             <div class="input-group input-group-lg queryArea">
 49 |                                 <input id="query" type="text" class="center-block form-control" autocomplete="on" placeholder="Search with query-server">
 50 |                             </div>
 51 |                         </div>
 52 |                         
 53 |                         <div class="col-sm-4 custom responseType">
 54 |                         	<div class="btn-group" role="group" id="format" data-toggle="buttons">
 55 |                                 <label class="btn btn-lg btn-primary active formatButton">
 56 |                                 	<input type="radio" value="json" autocomplete="off">JSON
 57 |                                 </label>
 58 |                                 <label class="btn btn-lg btn-primary formatButton">
 59 |                                 	<input type="radio" value="xml" autocomplete="off">XML
 60 |                                 </label>
 61 |                                 <label class="btn btn-lg btn-primary formatButton">
 62 |                                 	<input type="radio" value="csv" autocomplete="off">CSV
 63 |                                 </label>
 64 |                             </div>
 65 |                         </div>
 66 |                     </div>
 67 |                     <br/>
 68 | 
 69 |                     <div class="row">
 70 |                         <div class="col-sm-3 col-xs-6 engineDrop">
 71 |                             <div class="dropdown">
 72 |                                 <label>Engine:</label><br/>
 73 |                                 <button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
 74 |                                     <span id="drop_down_text">
 75 |                                         <img src='static/images/google_icon.png' width='25px'>&nbsp;google
 76 |                                     </span>
 77 |                                     &nbsp;
 78 |                                     <span class="caret"></span>
 79 |                                 </button>
 80 |                                 <input type="hidden" id="engine" value="google">
 81 |                                 <ul class="dropdown-menu">
 82 |                                     {% for engine in engines_list %}
 83 |                                         <li onClick="update_button('{{ engine }}')" id="{{engine}}">
 84 |                                             <a id="a1" >
 85 |                                                 {% if engine in ['ask', 'baidu', 'bing', 'yahoo'] %}
 86 |                                                     <img src="{{ url_for('static', filename='images/' + engine + '_icon.ico') }}" width="15px">
 87 |                                                 {% else %}
 88 |                                                     <img src="{{ url_for('static', filename='images/' + engine + '_icon.png') }}" width="15px">
 89 |                                                 {% endif %}
 90 |                                                 {{ engine }}
 91 |                                             </a>
 92 |                                         </li>
 93 |                                     {% endfor %}
 94 |                                 </ul>
 95 |                             </div>
 96 |                         </div>
 97 |                         <div class="col-sm-3 col-xs-6 qType">
 98 |                                 <label>Type:</label><br/>
 99 |                                 <div id="type" class="btn-group btn-group-vertical" data-toggle="buttons">
100 |                                     <label class=" active typeButton">General<br/>
101 |                                             <input type="radio" id="general" name = "stype" value="" autocomplete="off" checked>
102 |                                     </label>
103 |                                     <label class="typeButton">Images<br/>
104 |                                             <input type="radio" id="isch" name = "stype" value="isch" autocomplete="off" disabled>
105 |                                     </label>
106 |                                     <label class="typeButton">
107 |                                         Video<br/>
108 |                                             <input type="radio" id="vid" name = "stype" value="vid" autocomplete="off" disabled>
109 |                                     </label>
110 |                                     <label class="typeButton ">News<br/>
111 |                                             <input type="radio" id="news" name = "stype" value="news" autocomplete="off" disabled>
112 |                                     </label>
113 |                                 </div>
114 |                         </div>
115 |                         <div class="col-sm-2 col-xs-6">
116 |                             <div class="form-group qCount">
117 |                                 <label for="resp">Max:</label>
118 |                                 <select class="form-control" id="resp">
119 |                                     <option>10</option>
120 |                                     <option>20</option>
121 |                                     <option>30</option>
122 |                                     <option>40</option>
123 |                                     <option>50</option>
124 |                                     <option>60</option>
125 |                                     <option>70</option>
126 |                                     <option>80</option>
127 |                                     <option>90</option>
128 |                                     <option>100</option>
129 |                                 </select>
130 |                             </div>
131 |                         </div>
132 |                     </div>
133 |                 </div>
134 |             </form>
135 |         </div>
136 |     </div>
137 |     <div class="pagination-centered">
138 |         <img id="load" src="{{ url_for('static', filename='images/ripple.gif') }}">
139 |     </div>
140 |     <pre id="feed"><code class="language-xml"></code></pre>
141 |     
142 |     <footer class="footer">
143 |         <div class="container">
144 |             <p class="text-muted">Want to contribute? <a href="https://gitter.im/fossasia/query-server" target="_blank"><img src="https://badges.gitter.im/gitterHQ/gitter.png" alt="gitter badge"></a>                to get started.</p>
145 |             <p><a class="twitter-follow-button" href="https://twitter.com/fossasia" data-show-count='true' data-size='large'>Follow @FOSSASIA</a> to get latest updates on <code>query-server</code>.</p>
146 |             <p>Help to spread the news:</p>
147 |                 <div class="row">
148 |                   <div class="col-md-3 col-sm-12">
149 |                       <a class="btn btn-info" id="tweet" href="https://twitter.com/intent/tweet?hashtags=loklak&text=%20Hey,%20Guys!%20Check%20out%20%23query-server%20to%20use%20multiple%20search%20engines%20in%20one%20go.%0Aquery-server%20%23fossasia&tw_p=tweetbutton"
150 |                         role="button" target="_blank">Tweet about this &raquo;</a>
151 |                     </div>
152 |                     <div class="col-md-3 col-sm-12">
153 |                       <a class="btn btn-info" id="fb" href="https://www.facebook.com/sharer/sharer.php?u=https://query-server.herokuapp.com/" target="_blank">Share on Facebook &raquo;</a>
154 |                     </div>
155 |                 </div>
156 | 
157 |             </p>
158 |             <br>
159 |             <p class="text-center miscInfo">
160 |             	<strong>Git Commit/Version number: <strong>
161 |             	<a class="version"></a>
162 |             </p>
163 |             <br>
164 |         </div>
165 |     </footer>
166 |     <script>
167 |     
168 |         var qtype_scraper_map = {
169 |             'isch' : ['bing', 'parsijoo', 'yahoo'],
170 |             'vid' : ['ask', 'bing', 'parsijoo', 'yahoo'],
171 |             'news' : ['baidu', 'bing', 'parsijoo', 'mojeek']
172 |         };
173 |     
174 |         function activate_qtype(active_engine) {          
175 |             
176 |             $('#isch').prop('disabled', true);
177 |             $('#vid').prop('disabled', true);
178 |             $('#news').prop('disabled', true);
179 |             
180 |             $.each( qtype_scraper_map, function (qtype, scrapers) {
181 |                 $.each( scrapers, function (index, engine) {
182 |                     if (engine == active_engine) {
183 |                         if(qtype == 'isch') {
184 |                             $('#isch').prop('disabled', false);
185 |                         }
186 |                         else if(qtype == 'vid') {
187 |                             $('#vid').prop('disabled', false);
188 |                         }
189 |                         else {
190 |                             $('#news').prop('disabled', false);
191 |                         }
192 |                         return false;
193 |                     }
194 |                 })
195 |             })    
196 |         }
197 |         function update_button(engine) {
198 |             var html = "";
199 |             if(engine == "bing" || engine == "baidu" ||
200 |                engine == "ask" || engine == "yahoo")
201 |                 html = "<img src='static/images/"+engine+"_icon.ico' width='25px'>";
202 |             else
203 |                 html = "<img src='static/images/"+engine+"_icon.png' width='25px'>";
204 |             html += "&nbsp;"+engine;
205 |             $("#drop_down_text").html(html);
206 |             $("#engine").val(engine);
207 |             $("#general").prop('checked', true);
208 |             activate_qtype(engine);
209 |         }
210 |         $(function () {
211 |             $('#format label').click(function (e) {
212 |                 e.preventDefault();
213 |                 if ($('#query').val()) {
214 |                     $('#load').show();
215 |                     $('#feed').hide();
216 |                     var sengine = $("#engine").val();
217 |                     var squery = encodeURIComponent($('#query').val());
218 |                     var stype = $("input[name=stype]:checked").val()
219 |                     var sformat = e.target.firstElementChild.value;
220 |                     var count = $('#resp').val();
221 |                     var urlloc = window.location.href.split(/\?|#/)[0] + "api/v1/search/" + sengine + "?query=" + squery + "&type=" + stype + "&format=" + sformat + "&num=" + count;
222 |                     $.ajax({
223 |                         url: urlloc,
224 |                         type: 'GET',
225 |                         success: function (response) {
226 |                             $('#load').hide();
227 |                             if (sformat == "json") {
228 |                                 $(".code").attr("class", "language-json");
229 |                                 response = JSON.stringify(response, null, 4);
230 |                             } else if (sformat == "csv") {
231 |                                 $('#feed').show();
232 |                                 $('#feed').text(response);
233 |                             } else
234 |                                 response = new XMLSerializer().serializeToString(response);
235 |                             $('#feed').show();
236 |                             $('#feed').text(response);
237 |                             Prism.highlightElement($('#feed')[0]);
238 |                         },
239 |                         error: function (error) {
240 |                             console.log(error);
241 |                         }
242 |                     });
243 |                 };
244 |             });
245 |         });
246 | 
247 |         $('.formatButton').click(function (e) {
248 |             e.preventDefault();
249 |             if (!$(this).hasClass('active')) {
250 |                 $(".formatButton .active").removeClass("active");
251 |                 $(this).addClass('active')
252 |             }
253 |         });
254 | 
255 | /*
256 |         $('.typeButton').click(function (e) {
257 |             e.preventDefault();
258 |             if (!$(this).hasClass('active')) {
259 |                 $(".typeButton .active").removeClass("active");
260 |                 $(this).addClass('active')
261 |             }
262 |         });
263 | */
264 | 
265 | 
266 |         $(window).keydown(function (event) {
267 |             if (event.keyCode == 13) {
268 |                 event.preventDefault();
269 |                 return false;
270 |             }
271 |         });
272 | 
273 |         var apiUrl = "https://api.github.com/repos/fossasia/query-server/git/refs/heads/master";
274 |         $.ajax({
275 |             url: apiUrl,
276 |             async: true,
277 |             success(result) {
278 |                 if (typeof result.object !== "undefined" && typeof result.object.sha !==
279 |                     "undefined") {
280 |                     var version = result["object"]["sha"];
281 |                     var versionLink = "https://github.com/fossasia/query-server/tree/" + version;
282 |                     var deployLink = $('.version').attr("href", versionLink).html(version);
283 |                 } else {
284 |                     $('.version').html('Failed to access version');
285 |                 }
286 |             },
287 |             error(error) {
288 |                 $('.version').html('Failed to access version');
289 |             }
290 |         });
291 |     </script>
292 | </body>
293 | 
294 | </html>
295 | 


--------------------------------------------------------------------------------
/bandit.yml:
--------------------------------------------------------------------------------
1 | skips: ['B101']
2 | 


--------------------------------------------------------------------------------
/bower.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "query-server",
 3 |   "description": "A simple server to store query strings",
 4 |   "main": "",
 5 |   "authors": [
 6 |     "enigmaeth <enigmaeth@gmail.com>"
 7 |   ],
 8 |   "license": "MIT",
 9 |   "homepage": "https://github.com/fossasia/query-server",
10 |   "ignore": [
11 |     "**/.*",
12 |     "node_modules",
13 |     "bower_components",
14 |     "test",
15 |     "tests"
16 |   ],
17 |   "dependencies": {
18 |     "bootstrap": "^3.3.7",
19 |     "jquery": "^3.1.1",
20 |     "json3": "^3.3.2",
21 |     "prismjs": "^1.6.0"
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   notify:
 3 |     require_ci_to_pass: yes
 4 | 
 5 | coverage:
 6 |   precision: 2
 7 |   round: down
 8 |   range: "70...100"
 9 | 
10 |   status:
11 |     project: yes
12 |     patch: yes
13 |     changes: no
14 | 
15 | comment:
16 |   layout: "reach, diff, flags, files, footer"
17 |   behavior: default
18 |   require_changes: no
19 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | services:
 3 |   web:
 4 |     build: .
 5 |     ports:
 6 |       - "7001:7001"
 7 |     links:
 8 |       - mongo
 9 |     environment:
10 |       MONGO_URI: mongodb://localhost:27017/query-server
11 | 
12 |   mongo:
13 |     image: mongo
14 |     volumes:
15 |       - /data/mongodb/db:/data/db
16 |     ports:
17 |       - "27017:27017"


--------------------------------------------------------------------------------
/docs/installation/docker.md:
--------------------------------------------------------------------------------
 1 | # Docker
 2 | 
 3 | [![Deploy to Docker Cloud](https://files.cloud.docker.com/images/deploy-to-dockercloud.svg)](https://cloud.docker.com/stack/deploy/?repo=https://github.com/fossasia/query-server)
 4 | 
 5 | * Get the latest version of docker. See the [offical site](https://docs.docker.com/engine/installation/) for installation info for your platform.
 6 | 
 7 | * Install the latest version of docker-compose. Windows and Mac users should have docker-compose by default as it is part of Docker toolbox. For Linux users, see the
 8 | [official guide](https://docs.docker.com/compose/install/).
 9 | 
10 | * Run `docker` and in terminal to see if they are properly installed.
11 | 
12 | * Clone the project and cd into it.
13 | 
14 | ```bash
15 | git clone https://github.com/fossasia/query-server.git && cd query-server
16 | ```
17 | 
18 | * In the terminal window, run `docker build -t query-server:latest .` to build badgeyay's docker image. This process can take some time.
19 | 
20 | * After build is done, run `docker run -d -p 7001:7001 query-server` to start the server.
21 | 


--------------------------------------------------------------------------------
/docs/installation/heroku.md:
--------------------------------------------------------------------------------
 1 | # Heroku
 2 | 
 3 | One-click Heroku deployment is available:
 4 | 
 5 | [![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/fossasia/query-server)
 6 | 
 7 | ### Steps for Manual Deployment
 8 | 
 9 | * We need to install heroku on our machine. Type the following in your linux terminal:
10 | 	* ```wget -O- https://toolbelt.heroku.com/install-ubuntu.sh | sh```
11 |   This installs the Heroku Toolbelt on your machine to access heroku from the command line. For windows user, install from [here](https://devcenter.heroku.com/articles/heroku-cli#windows)
12 | * Next we need to login to our heroku server (assuming that you have already created an account). Type the following in the terminal:
13 | 	* ```heroku login```(for windows user on cygwin or git bash: ```winpty heroku login```)
14 |     * Enter your credentials and login.
15 | * Once logged in we need to create a space on the heroku server for our application. This is done with the following command
16 | 	* ```heroku create```
17 | * Add nodejs build pack to the app
18 |     * ```heroku buildpacks:add --index 1 heroku/nodejs```
19 | * Add python build pack to the app
20 |     * ```heroku buildpacks:add --index 2 heroku/python```
21 | * Check nodejs and python build pack in the app
22 |     * ```heroku buildpacks```
23 |     It should return
24 |     
25 | > 1. heroku/nodejs
26 | > 2. heroku/python
27 | 
28 | * Then we deploy the code to heroku.
29 | 	* ```git push heroku master``` or
30 |     * ```git push heroku yourbranch:master``` if you are in a different branch than master
31 | 


--------------------------------------------------------------------------------
/docs/installation/local.md:
--------------------------------------------------------------------------------
 1 | # Local Development Setup
 2 | 
 3 | The instructions on this page will guide you in setting up a local development environment in your system.
 4 | 
 5 | For a start, fork Query-Server to your own github account. Then, clone it to your local system.
 6 | 
 7 | ```sh
 8 | git clone -b master https://github.com/<your_username>/query-server.git
 9 | ```
10 | 
11 | Add an upstream remote so that you can push your patched branches for starting a PR .
12 | 
13 | ```sh
14 | cd query-server
15 | git remote add upstream https://github.com/fossasia/query-server.git
16 | ```
17 | 
18 | Make sure you have [Nodejs](https://nodejs.org/en/) installed.
19 | Running this tool requires installing the nodejs as well as python dependencies.
20 | 
21 | ```
22 | npm install -g bower
23 | bower install
24 | pip install virtualenv
25 | virtualenv venv
26 | . venv/bin/activate # Linux
27 | venv\Scripts\activate # Windows
28 | pip install -r requirements.txt
29 | ```
30 | 
31 | or to use [`pipenv`](https://docs.pipenv.org) instead of `pip` and `virtualenv` separately.
32 | 
33 | ```
34 | npm install -g bower
35 | bower install
36 | pip install pipenv
37 | pipenv --two # To setup python 2 virtual environment
38 | pipenv install -r requirements.txt
39 | pipenv shell # To activate virtual environment
40 | ```
41 | 
42 | To set up MongoDB on your server :
43 | 
44 | ```bash
45 | sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10
46 | echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
47 | sudo apt-get update
48 | sudo apt-get install -y mongodb
49 | sudo service mongodb start
50 | ```
51 | 
52 | To run the project on a local machine.
53 | 
54 | For development mode (with debugger active), use the following command
55 | ```sh
56 | python app/server.py --dev
57 | ```
58 | 
59 | To run the project on a production machine.
60 | 
61 | ```sh
62 | python app/server.py
63 | ```
64 | 
65 | ## Preferred Development Workflow
66 | 
67 | 1. Get the latest copy of code from upstream.
68 | 
69 | ```sh
70 | git pull upstream master
71 | ```
72 | 
73 | 2. Once you get assigned an issue, create a new branch from `master`.
74 | 
75 | ```sh
76 | git checkout -b XXX-mock-issue     # XXX is the issue number
77 | ```
78 | 
79 | 3. Work on your patch, test it and when it's done, push it to your fork.
80 | 
81 | ```sh
82 | git push origin XXX-mock-issue
83 | ```
84 | 
85 | 4. File a PR and wait for the maintainers to suggest reviews or in the best case
86 | merge the PR. Then just update `master` of your local clone.
87 | 
88 | ```sh
89 | git pull upstream master
90 | ```
91 | 
92 | And then loop back again. For contribution guidelines, refer [here](https://github.com/fossasia/query-server/blob/master/.github/CONTRIBUTING.md)
93 | 


--------------------------------------------------------------------------------
/manifest.yml:
--------------------------------------------------------------------------------
1 | applications:
2 | - name: query-server
3 | memory: 256M
4 | command: python mongo-app.py
5 | buildpack: https://github.com/cloudfoundry/python-buildpack.git
6 | instances: 2
7 | services:
8 | - todo-mongo-db 


--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "query-server",
 3 |   "version": "0.1.0",
 4 |   "lockfileVersion": 1,
 5 |   "requires": true,
 6 |   "dependencies": {
 7 |     "bower": {
 8 |       "version": "1.8.2",
 9 |       "resolved": "https://registry.npmjs.org/bower/-/bower-1.8.2.tgz",
10 |       "integrity": "sha1-rfU1KcjUrwLvJPuNU0HBQZ0z4vc="
11 |     },
12 |     "ci-info": {
13 |       "version": "1.1.2",
14 |       "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-1.1.2.tgz",
15 |       "integrity": "sha512-uTGIPNx/nSpBdsF6xnseRXLLtfr9VLqkz8ZqHXr3Y7b6SftyRxBGjwMtJj1OhNbmlc1wZzLNAlAcvyIiE8a6ZA==",
16 |       "dev": true
17 |     },
18 |     "husky": {
19 |       "version": "0.14.3",
20 |       "resolved": "https://registry.npmjs.org/husky/-/husky-0.14.3.tgz",
21 |       "integrity": "sha512-e21wivqHpstpoiWA/Yi8eFti8E+sQDSS53cpJsPptPs295QTOQR0ZwnHo2TXy1XOpZFD9rPOd3NpmqTK6uMLJA==",
22 |       "dev": true,
23 |       "requires": {
24 |         "is-ci": "1.1.0",
25 |         "normalize-path": "1.0.0",
26 |         "strip-indent": "2.0.0"
27 |       }
28 |     },
29 |     "is-ci": {
30 |       "version": "1.1.0",
31 |       "resolved": "https://registry.npmjs.org/is-ci/-/is-ci-1.1.0.tgz",
32 |       "integrity": "sha512-c7TnwxLePuqIlxHgr7xtxzycJPegNHFuIrBkwbf8hc58//+Op1CqFkyS+xnIMkwn9UsJIwc174BIjkyBmSpjKg==",
33 |       "dev": true,
34 |       "requires": {
35 |         "ci-info": "1.1.2"
36 |       }
37 |     },
38 |     "normalize-path": {
39 |       "version": "1.0.0",
40 |       "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-1.0.0.tgz",
41 |       "integrity": "sha1-MtDkcvkf80VwHBWoMRAY07CpA3k=",
42 |       "dev": true
43 |     },
44 |     "strip-indent": {
45 |       "version": "2.0.0",
46 |       "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-2.0.0.tgz",
47 |       "integrity": "sha1-XvjbKV0B5u1sv3qrlpmNeCJSe2g=",
48 |       "dev": true
49 |     }
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "query-server",
 3 |   "version": "0.1.0",
 4 |   "dependencies": {
 5 |     "bower": "^1.8.0"
 6 |   },
 7 |   "scripts": {
 8 |     "postinstall": "bower install",
 9 |     "precommit": "flake8 . --max-line-length=85"
10 |   },
11 |   "devDependencies": {
12 |     "husky": "^0.14.3"
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | autopep8==1.4
2 | coverage>=4.3.4
3 | coveralls>=1.1
4 | pytest>=3.0.6
5 | pytest-cov>=2.4.0
6 | mock>=2.0.0


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4>=4.5.1
 2 | dicttoxml>=1.7.4
 3 | feedgen>=0.5.1
 4 | Flask>=0.12
 5 | futures>=3.0.5
 6 | html5lib>=0.9999999
 7 | Jinja2>=2.9.5
 8 | lxml>=3.7.2
 9 | pymongo>=3.6.0
10 | requests>=2.13.0
11 | webencodings>=0.5
12 | defusedxml>=0.5.0
13 | 


--------------------------------------------------------------------------------
/runtime.txt:
--------------------------------------------------------------------------------
1 | python-2.7.14


--------------------------------------------------------------------------------
/scalingo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Query-Server",
 3 |   "description": "The query server can be used to search a keyword/phrase on a search engine (Google, Yahoo, Bing, Ask and DuckDuckGo) and get the results as json or xml.",
 4 |   "logo": "https://scalingo.com/logo.svg",
 5 |   "repository": "https://github.com/fossasia/query-server",
 6 |   "website": "https://query-server.herokuapp.com/",
 7 |   "env": {
 8 |   },
 9 |   "addons": ["scalingo-redis"]
10 | } 
11 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fossasia/query-server/51c98716e1ec9cde6023a6b3ac2eb3081daa9e82/test/__init__.py


--------------------------------------------------------------------------------
/test/test_ask.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Ask
 4 | 
 5 | 
 6 | def test_next_start():
 7 |     assert 3 == Ask().next_start(2, None)
 8 | 
 9 | 
10 | def test_parse_response_for_none():
11 |     html_text = """<div class="PartialSearchResults-noresults">
12 |          <div class="PartialSearchResults-noresults-body">
13 |              <p>No results for:</p>
14 |              <p><b>44754546546545545465465f4654f654654</b></p>
15 |              <p>Please try again.</p>
16 |          </div>
17 |     </div>"""
18 |     stub_soup = BeautifulSoup(html_text, 'html.parser')
19 |     resp = Ask().parse_response(stub_soup)
20 |     assert resp is None
21 | 
22 | 
23 | def test_parse_response_with_desc():
24 |     html_div = """<div class="PartialSearchResults-item" data-zen="true">
25 |         <div class="PartialSearchResults-item-title">
26 |             <a class="PartialSearchResults-item-title-link result-link"
27 |              href='mock_url'>mock_title</a>
28 |         </div>
29 |         <p class="PartialSearchResults-item-abstract">mock_desc</p>
30 |         </div>"""
31 |     stub_soup_div = BeautifulSoup(html_div, 'html.parser')
32 |     resp = Ask().parse_response(stub_soup_div)
33 |     expected_resp = [
34 |         {
35 |             'link': u'mock_url',
36 |             'title': u'mock_title',
37 |             'desc': u'mock_desc'
38 |         }
39 |     ]
40 |     assert resp == expected_resp
41 | 
42 | 
43 | def test_parse_response_without_desc():
44 |     html_div = """<div class="PartialSearchResults-item" data-zen="true">
45 |         <div class="PartialSearchResults-item-title">
46 |             <a class="PartialSearchResults-item-title-link result-link"
47 |              href='mock_url'>mock_title</a>
48 |         </div>
49 |         </div>"""
50 |     stub_soup_div = BeautifulSoup(html_div, 'html.parser')
51 |     resp = Ask().parse_response(stub_soup_div)
52 |     expected_resp = [
53 |         {
54 |             'link': u'mock_url',
55 |             'title': u'mock_title'
56 |         }
57 |     ]
58 |     assert resp == expected_resp
59 | 
60 | 
61 | def test_parse_video_response():
62 |     html_div = """<div class="v-info"><div class="v-title">
63 |                   <a class="title" href="mock_url">mock_title</a></div>
64 |                   <div class="desc">mock_desc</div>
65 |                   </div>"""
66 |     stub_soup_div = BeautifulSoup(html_div, 'html.parser')
67 |     resp = Ask().parse_video_response(stub_soup_div)
68 |     url_video = 'https' + 'mock_url'
69 |     expected_resp = [
70 |         {
71 |             'link': url_video,
72 |             'title': u'mock_title',
73 |             'desc': u'mock_desc'
74 |         }
75 |     ]
76 |     assert resp == expected_resp
77 | 


--------------------------------------------------------------------------------
/test/test_baidu.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Baidu
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = """<div class="result c-container "><h3 class="t">
 8 |         <a href="mock_url" target="_blank">mock_title</a>
 9 |         </h3></div>"""
10 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
11 |     resp = Baidu().parse_response(dummy_soup)
12 |     expected_resp = [{
13 |         'title': u'mock_title',
14 |         'link': u'mock_url'
15 |     }]
16 |     assert resp == expected_resp
17 | 
18 | 
19 | def test_parse_news_response():
20 |     html_text = """<h3 class="c-title">
21 |         <a href="mock_url" target="_blank">mock_title</a>
22 |         </h3>"""
23 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
24 |     resp = Baidu().parse_news_response(dummy_soup)
25 |     expected_resp = [{
26 |         'title': u'mock_title',
27 |         'link': u'mock_url'
28 |     }]
29 |     assert resp == expected_resp
30 | 


--------------------------------------------------------------------------------
/test/test_bing.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Bing
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = """<li class="b_algo">
 8 |         <h2><a href="mock_url">mock_title</h2>
 9 |         <div class="b_caption"><p>mock_desc</p>
10 |         </div><li>"""
11 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
12 |     resp = Bing().parse_response(dummy_soup)
13 |     expected_resp = [{
14 |         'title': u'mock_title',
15 |         'link': u'mock_url',
16 |         'desc': u'mock_desc'
17 |     }]
18 |     assert resp == expected_resp
19 | 
20 | 
21 | def test_parse_image_response():
22 |     html_text = """<a class="iusc" href="mock_url">mock_title</a>"""
23 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
24 |     resp = Bing().parse_image_response(dummy_soup)
25 |     link_image = 'https://www.bing.com' + 'mock_url'
26 |     expected_resp = [{
27 |         'link': link_image
28 |     }]
29 |     assert resp == expected_resp
30 | 
31 | 
32 | def test_parse_video_response():
33 |     html_text = """<a aria-label="mock_title Duration" class="mc_vtvc_link"
34 |         href="mock_url"></a>"""
35 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
36 |     resp = Bing().parse_video_response(dummy_soup)
37 |     link_video = 'https://www.bing.com' + 'mock_url'
38 |     expected_resp = [{
39 |         'title': u'mock_title',
40 |         'link': link_video,
41 |     }]
42 |     assert resp == expected_resp
43 | 
44 | 
45 | def test_parse_news_response():
46 |     html_text = """<div class="t_s"><div class="t_t"><a class="title"
47 |         href="mock_url">mock_title</a></div><div class="snippet">
48 |         mock_desc</div></div>"""
49 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
50 |     resp = Bing().parse_news_response(dummy_soup)
51 |     expected_resp = [{
52 |         'title': u'mock_title',
53 |         'link': u'mock_url',
54 |         'desc': u'mock_desc',
55 |     }]
56 |     assert resp == expected_resp
57 | 


--------------------------------------------------------------------------------
/test/test_duckduckgo.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import DuckDuckGo
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = """<h2 class="result__title">
 8 |         <a class="result__a" href="mock_url">mock_title</a>
 9 |         </h2>"""
10 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
11 |     resp = DuckDuckGo().parse_response(dummy_soup)
12 |     expected_resp = [{
13 |         'title': u'mock_title',
14 |         'link': u'mock_url'
15 |     }]
16 |     assert resp == expected_resp
17 | 


--------------------------------------------------------------------------------
/test/test_generalized.py:
--------------------------------------------------------------------------------
 1 | from mock import patch
 2 | import pytest
 3 | 
 4 | from app.scrapers.generalized import Scraper
 5 | 
 6 | 
 7 | @patch('requests.models.Response')
 8 | @patch('app.scrapers.generalized.requests.get')
 9 | def test_get_page(mock_request_get, mock_response):
10 |     mock_request_get.return_value = mock_response
11 |     mock_response.url = "Mock Url"
12 |     response = Scraper().get_page("dummy_query")
13 |     assert response == mock_response
14 |     expected_payload = {'q': 'dummy_query', '': ''}
15 |     expected_headers = {
16 |         'User-Agent': (
17 |             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) '
18 |             'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 '
19 |             'Safari/537.36'
20 |         )
21 |     }
22 |     mock_request_get.assert_called_with(
23 |         '', headers=expected_headers, params=expected_payload)
24 | 
25 | 
26 | def test_parse_response():
27 |     with pytest.raises(NotImplementedError):
28 |         Scraper().parse_response(None)
29 | 
30 | 
31 | def test_next_start():
32 |     dummy_prev_results = ['dummy_value']
33 |     if not Scraper().next_start(3, dummy_prev_results) == 4:
34 |         raise AssertionError()
35 | 
36 | 
37 | @patch('app.scrapers.generalized.Scraper.parse_response')
38 | @patch('app.scrapers.generalized.Scraper.get_page')
39 | @patch('requests.models.Response')
40 | def test_search(mock_resp, mock_get_page, mock_parse_resp):
41 |     mock_get_page.return_value = mock_resp
42 |     mock_resp.text = "Mock response"
43 |     expected_resp = [{
44 |         'title': 'mock_title',
45 |         'link': 'mock_url'
46 |     }]
47 |     # assuming parse_response is being implemented by
48 |     # classes inheriting Scraper. Thus, returning dummy
49 |     # response instead of raising NotImplementedError
50 |     mock_parse_resp.return_value = expected_resp
51 |     resp = Scraper().search('dummy_query', 1)
52 |     assert resp == expected_resp
53 | 
54 | 
55 | @patch('app.scrapers.generalized.Scraper.get_page')
56 | @patch('requests.models.Response')
57 | def test_search_parsed_response_none(mock_resp, mock_get):
58 |     mock_get.return_value = mock_resp
59 |     mock_resp.text = "Mock Response"
60 |     with patch('app.scrapers.generalized.Scraper.parse_response',
61 |                return_value=None):
62 |         resp = Scraper().search('dummy_query', 1)
63 |         assert resp == []
64 | 
65 | 
66 | @patch('app.scrapers.generalized.requests.get')
67 | @patch('app.scrapers.generalized.Scraper.parse_response')
68 | @patch('requests.models.Response')
69 | def test_search_without_count(mock_resp, mock_parse_resp, mock_get):
70 |     mock_get.return_value = mock_resp
71 |     mock_resp.text = 'mock response'
72 |     expected_resp = [{
73 |         'title': 'mock_title',
74 |         'link': 'mock_url'
75 |     }]
76 |     expected_payload = {'q': 'dummy_query'}
77 |     expected_headers = {
78 |         'User-Agent': (
79 |             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) '
80 |             'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 '
81 |             'Safari/537.36'
82 |         )
83 |     }
84 |     mock_parse_resp.return_value = expected_resp
85 |     resp = Scraper().search_without_count('dummy_query')
86 |     assert resp == expected_resp
87 |     mock_get.assert_called_with(
88 |         '', headers=expected_headers, params=expected_payload)
89 | 


--------------------------------------------------------------------------------
/test/test_google.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Google
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = """<h3 class="r">
 8 |     <a href="mock_url">mock_title</a>
 9 |     </h3>"""
10 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
11 |     expected_resp = [{
12 |         'title': u'mock_title',
13 |         'link': u'mock_url'
14 |     }]
15 |     resp = Google().parse_response(dummy_soup)
16 |     assert resp == expected_resp
17 | 


--------------------------------------------------------------------------------
/test/test_mojeek.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Mojeek
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = '<a href="mock_url" class="ob">mock_title</a>'
 8 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
 9 |     expected_resp = [{
10 |         'title': u'mock_title',
11 |         'link': u'mock_url'
12 |     }]
13 |     resp = Mojeek().parse_response(dummy_soup)
14 |     assert resp == expected_resp
15 | 
16 | 
17 | def test_parse_news_response():
18 |     html_text = '<a href="mock_url" class="ob">mock_title</a>'
19 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
20 |     expected_resp = [{
21 |         'title': u'mock_title',
22 |         'link': u'mock_url'
23 |     }]
24 |     resp = Mojeek().parse_news_response(dummy_soup)
25 |     assert resp == expected_resp
26 | 


--------------------------------------------------------------------------------
/test/test_parsijoo.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Parsijoo
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = """<div class="result">
 8 |     <span class="result-title">
 9 |     <a href="mock_url">""" + " " * 22 + """mock_title </a></span>
10 |     <span class="result-url">mock_url</span>
11 |     <span class="result-desc">""" + " " * 34 + """ mock_desc </span>
12 |     <span class="result-similar"><a href="mock_similar_link"
13 |     title="mock_similar_title">mock_similar</a>
14 |     </span></div>"""
15 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
16 |     expected_resp = [{
17 |         'title': u'mock_title',
18 |         'link': u'mock_url',
19 |         'desc': u'mock_desc'
20 |     }]
21 |     resp = Parsijoo().parse_response(dummy_soup)
22 |     assert resp == expected_resp
23 | 
24 | 
25 | def test_parse_video_response():
26 |     html_text = """<a href="mock_url" class="over-page"
27 |     title="mock_title">mock_title</a>"""
28 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
29 |     url = 'https://video.parsijoo.ir' + "mock_url"
30 |     expected_resp = [{
31 |         'title': u'mock_title',
32 |         'link': url,
33 |     }]
34 |     resp = Parsijoo().parse_video_response(dummy_soup)
35 |     assert resp == expected_resp
36 | 
37 | 
38 | def test_parse_image_response():
39 |     html_text = """<div class="image-container overflow"><a href="mock_url"
40 |     title="mock_title">mock_title</a></div>"""
41 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
42 |     image_url = 'https://image.parsijoo.ir' + 'mock_url'
43 |     expected_resp = [{
44 |         'link': image_url,
45 |     }]
46 |     resp = Parsijoo().parse_image_response(dummy_soup)
47 |     assert resp == expected_resp
48 | 
49 | 
50 | def test_parse_news_response():
51 |     html_text = """<div class="news-title-link">
52 |     <a href="mock_url">mock_title</a>
53 |     </div>"""
54 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
55 |     expected_resp = [{
56 |         'title': u'mock_title',
57 |         'link': u'mock_url'
58 |     }]
59 |     resp = Parsijoo().parse_news_response(dummy_soup)
60 |     assert resp == expected_resp
61 | 


--------------------------------------------------------------------------------
/test/test_quora.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Quora
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = ("<div><a class='question_link' href='/mock_url'>"
 8 |                  "<span class='question_text'><span class='rendered_qtext'>"
 9 |                  "mock_title</span></span></a></div>")
10 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
11 |     expected_resp = [{
12 |         'title': u'mock_title',
13 |         'link': u'https://www.quora.com/mock_url'
14 |     }]
15 |     resp = Quora().parse_response(dummy_soup)
16 |     assert resp == expected_resp
17 | 


--------------------------------------------------------------------------------
/test/test_server.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | import pytest
  5 | import requests
  6 | from defusedxml import ElementTree
  7 | from mock import patch
  8 | 
  9 | from app.scrapers import small_test
 10 | from app.server import app
 11 | 
 12 | REASON = 'Do you have query-server running on http://127.0.0.1:7001 ?'
 13 | TRAVIS_CI = os.getenv('TRAVIS', False)  # Running in Travis CI?
 14 | 
 15 | 
 16 | @pytest.mark.xfail(not TRAVIS_CI, reason=REASON)
 17 | def test_small_test():
 18 |     small_test()
 19 | 
 20 | 
 21 | @pytest.mark.xfail(not TRAVIS_CI, reason=REASON)
 22 | def test_invalid_url_api_call():
 23 |     response = requests.get('http://localhost:7001/api/v1/search/invalid_url')
 24 |     assert response.json()['Status Code'] == 404
 25 | 
 26 | 
 27 | def make_engine_api_call(engine_name):
 28 |     url = 'http://localhost:7001/api/v1/search/' + engine_name
 29 |     assert requests.get(url).json()['Status Code'] == 400
 30 | 
 31 | 
 32 | @pytest.mark.xfail(not TRAVIS_CI, reason=REASON)
 33 | def test_engine_api_calls(engine_names=None):
 34 |     engines = """ask baidu bing dailymotion duckduckgo exalead google
 35 |                  mojeek parsijoo quora yahoo youtube""".split()
 36 |     for engine_name in (engine_names or engines):
 37 |         make_engine_api_call(engine_name)
 38 | 
 39 | 
 40 | def test_api_index():
 41 |     assert app.test_client().get('/').status_code == 200
 42 | 
 43 | 
 44 | @patch('app.server.abort')
 45 | def test_api_search_invalid_qformat(mock_abort):
 46 |     url = '/api/v1/search/google?query=fossasia&format=invalid'
 47 |     app.test_client().get(url)
 48 |     mock_abort.assert_called_with(400, 'Not Found - undefined format')
 49 | 
 50 | 
 51 | @patch('app.server.bad_request', return_value="Mock Response")
 52 | def test_api_search_invalid_engine(mock_bad_request):
 53 |     url = '/api/v1/search/invalid?query=fossasia'
 54 |     resp = app.test_client().get(url).get_data().decode('utf-8')
 55 |     mock_bad_request.assert_called_with(
 56 |         [404, 'Incorrect search engine', 'invalid'])
 57 |     assert resp == "Mock Response"
 58 | 
 59 | 
 60 | @patch('app.server.bad_request', return_value="Mock Response")
 61 | def test_api_search_missing_query(mock_bad_request):
 62 |     # invalid url with query parameter missing
 63 |     url = '/api/v1/search/google'
 64 |     resp = app.test_client().get(url).get_data().decode('utf-8')
 65 |     mock_bad_request.assert_called_with(
 66 |         [400, 'Not Found - missing query', 'json'])
 67 |     assert resp == "Mock Response"
 68 | 
 69 | 
 70 | @patch('app.server.bad_request', return_value="Mock Response")
 71 | def test_api_search_for_no_response(mock_bad_request):
 72 |     url = '/api/v1/search/google?query=fossasia'
 73 |     with patch('app.server.lookup', return_value=None):
 74 |         with patch('app.server.feed_gen', return_value=None):
 75 |             resp = app.test_client().get(url).get_data().decode('utf-8')
 76 |             mock_bad_request.assert_called_with([404, 'No response',
 77 |                                                  'google:fossasia'])
 78 |             assert resp == "Mock Response"
 79 | 
 80 | 
 81 | def test_api_search_for_cache_hit():
 82 |     url = '/api/v1/search/google?query=fossasia'
 83 |     mock_result = [{'title': 'mock_title', 'link': 'mock_link'}]
 84 |     with patch('app.server.lookup', return_value=mock_result):
 85 |         resp = app.test_client().get(url).get_data().decode('utf-8')
 86 |         assert json.loads(resp) == mock_result
 87 | 
 88 | 
 89 | @patch('app.server.feed_gen')
 90 | @patch('app.server.lookup')
 91 | def test_api_search_for_format(mock_lookup, mock_feed_gen):
 92 |     for qformat in ['json', 'csv', 'xml']:
 93 |         url = '/api/v1/search/google?query=fossasia&format=' + qformat
 94 |         mock_result = [
 95 |             {
 96 |                 'title': 'mock_title',
 97 |                 'link': 'mock_link',
 98 |                 'desc': 'mock_desc'
 99 |             }
100 |         ]
101 |         mock_lookup.return_value = None
102 |         mock_feed_gen.return_value = mock_result
103 |         resp = app.test_client().get(url).get_data().decode('utf-8')
104 |         expected_resp = expected_response_for_format(qformat)
105 |         if qformat == 'json':
106 |             resp = json.loads(resp)
107 |         elif qformat == 'xml':
108 |             resp = resp.replace('\t', '').replace('\n', '')
109 |             resp = get_json_equivalent_from_xml_feed(resp)
110 |             expected_resp = get_json_equivalent_from_xml_feed(expected_resp)
111 |         elif qformat == 'csv':
112 |             resp = get_json_equivalent_from_csv_feed(resp)
113 |             expected_resp = get_json_equivalent_from_csv_feed(expected_resp)
114 |         assert expected_resp == resp
115 | 
116 | 
117 | def expected_response_for_format(qformat):
118 |     if qformat == 'json':
119 |         return [
120 |             {'title': 'mock_title',
121 |              'link': 'mock_link',
122 |              'desc': 'mock_desc'}
123 |         ]
124 |     elif qformat == 'csv':
125 |         return '"link","title","desc"\n"mock_link","mock_title","mock_desc"'
126 |     elif qformat == 'xml':
127 |         return ('<?xml version="1.0" ?><channel><item>'
128 |                 '<desc>mock_desc</desc><link>mock_link</link>'
129 |                 '<title>mock_title</title></item></channel>')
130 | 
131 | 
132 | def get_json_equivalent_from_csv_feed(feed):
133 |     keys_feed1 = feed.split('\n')[0].split(',')
134 |     json_result = []
135 |     for row_index, row in enumerate(feed.split('\n')):
136 |         if row_index == 0:
137 |             continue
138 |         entry = {}
139 |         for index, value in enumerate(row.split(',')):
140 |             entry[keys_feed1[index].replace('"', '')] = value.replace('"', '')
141 |         json_result.append(entry)
142 |     return json_result
143 | 
144 | 
145 | def get_json_equivalent_from_xml_feed(feed):
146 |     def internal_iter(tree, accum):
147 |         if tree is None:
148 |             return accum
149 | 
150 |         if tree.getchildren():
151 |             accum[tree.tag] = {}
152 |             for each in tree.getchildren():
153 |                 result = internal_iter(each, {})
154 |                 if each.tag in accum[tree.tag]:
155 |                     if not isinstance(accum[tree.tag][each.tag], list):
156 |                         accum[tree.tag][each.tag] = [
157 |                             accum[tree.tag][each.tag]
158 |                         ]
159 |                     accum[tree.tag][each.tag].append(result[each.tag])
160 |                 else:
161 |                     accum[tree.tag].update(result)
162 |         else:
163 |             accum[tree.tag] = tree.text
164 | 
165 |         return accum
166 | 
167 |     return internal_iter(ElementTree.fromstring(feed), {})
168 | 


--------------------------------------------------------------------------------
/test/test_twitter.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from mock import patch, MagicMock
 4 | 
 5 | from app.scrapers import Twitter
 6 | 
 7 | 
 8 | @patch('requests.models.Response')
 9 | @patch('app.scrapers.twitter.requests.get')
10 | def test_search(mock_requests_get, mock_response):
11 |     dummy_json = json.loads('''{
12 |       "aggregations": {},
13 |       "readme_3": "mock_data",
14 |       "statuses": [
15 |         {
16 |           "hosts_count": 1,
17 |           "links": [
18 |             "http://Phimp.Me"
19 |           ],
20 |           "text": "mock_text",
21 |           "retweet_count": 0,
22 |           "source_type": "TWITTER",
23 |           "link": "mock_link",
24 |           "links_count": 1
25 |         }
26 |       ]
27 |     }''')
28 |     expected_resp = [
29 |         {
30 |             'text': u'mock_text',
31 |             'link': u'mock_link'
32 |         }
33 |     ]
34 |     mock_requests_get.return_value = mock_response
35 |     mock_response.json = MagicMock(return_value=dummy_json)
36 |     resp = Twitter().search('dummy_query', 1)
37 |     assert expected_resp == resp
38 | 


--------------------------------------------------------------------------------
/test/test_yahoo.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Yahoo
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = ('<h3 class="title"><a class=" ac-algo fz-l ac-21th lh-24" '
 8 |                  'href="//r.search.yahoo.com/_ylt=Awr;_ylu=X3--/RV=2/RE=15/'
 9 |                  'RO=10/RU=mock_url/RK=2/RS=Gne">mock_title</a></h3> ')
10 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
11 |     expected_resp = [{
12 |         'title': u'mock_title',
13 |         'link': u'mock_url'
14 |     }]
15 |     resp = Yahoo().parse_response(dummy_soup)
16 |     assert resp == expected_resp
17 | 
18 | 
19 | def test_parse_image_response():
20 |     html_text = """<li class="ld"><a aria-label="mock_title">
21 |                 <img data-src='mock_url' class='process'/></a></div>"""
22 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
23 |     expected_resp = [{
24 |         'title': u'mock_title',
25 |         'link': u'mock_url'
26 |     }]
27 |     resp = Yahoo().parse_image_response(dummy_soup)
28 |     assert resp == expected_resp
29 | 
30 | 
31 | def test_parse_news_response():
32 |     html_text = '<div class="dd algo NewsArticle"><div class="layoutLeft">' \
33 |         '<div class="compTitle"><h3><a class="fz-m" href="http://' \
34 |         'r.search.yahoo.com/_ylt=Awr;_ylu=X3--/RV=2/RE=15/RO=10/RU=mock_url/' \
35 |         'RK=2/RS=Gne">mock_title</a></h3><div class="compText" ><p>mock_desc'\
36 |         '</p></div></div>'
37 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
38 |     expected_resp = [{
39 |         'title': u'mock_title',
40 |         'link': u'mock_url',
41 |         'desc': u'mock_desc'
42 |     }]
43 |     resp = Yahoo().parse_news_response(dummy_soup)
44 |     assert resp == expected_resp
45 | 


--------------------------------------------------------------------------------
/test/test_youtube.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | from app.scrapers import Youtube
 4 | 
 5 | 
 6 | def test_parse_response():
 7 |     html_text = ('<a href="/channel/UCQprMsG-raCIMlBudm20iLQ" '
 8 |                  'class="yt-uix-sessionlink">mock_channel</a><a href='
 9 |                  '"/watch?v=mock" class="yt-uix-tile-link yt-ui-ellipsis '
10 |                  'yt-ui-ellipsis-2 yt-uix-sessionlink" '
11 |                  'title="mock_title">mock_title</a>')
12 |     dummy_soup = BeautifulSoup(html_text, 'html.parser')
13 |     expected_resp = [{
14 |         'title': u'mock_title',
15 |         'link': u'https://www.youtube.com/watch?v=mock'
16 |     }]
17 |     resp = Youtube().parse_response(dummy_soup)
18 |     assert resp == expected_resp
19 | 


--------------------------------------------------------------------------------