├── .gitignore
├── README.md
├── app.py
├── config
    ├── Caddyfile
    └── gunicorn.py
├── docs
    └── 00.png
├── items
    ├── __init__.py
    ├── baidu.py
    ├── bing.py
    └── google.py
├── requirements.txt
├── settings.py
└── wsgi.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | .static_storage/
 58 | .media/
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | .html/
108 | 
109 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## toapi-search
  2 | 
  3 | ### What is toapi-search?
  4 | 
  5 | This project uses [Toapi](https://github.com/gaojiuli/toapi) to build a friendly and robust API from Google, Bing, Baidu, So, DuckDuckGo etc.
  6 | 
  7 | 
  8 | ``` shell
  9 | 
 10 | # or git clone https://github.com/toapi/toapi-search
 11 | toapi new toapi/toapi-search
 12 | cd toapi-search
 13 | # toapi run
 14 | python wsgi.py
 15 | ```
 16 | 
 17 | Then, everything is done, the following content will show on screen:
 18 | 
 19 | ![RUN](./docs/00.png)
 20 | 
 21 | ### Usage:
 22 | 
 23 | Once the server is started, you can get JSON data from toapi-search:
 24 | 
 25 | Visit `http://0.0.0.0:5000/_items/`
 26 | 
 27 | ``` json
 28 | 
 29 | {
 30 |   "/:wd": [
 31 |     "google", 
 32 |     "bing", 
 33 |     "baidu"
 34 |   ]
 35 | }
 36 | 
 37 | ```
 38 | 
 39 | > http://0.0.0.0:5000/python
 40 | 
 41 | ``` json
 42 | 
 43 | {
 44 |     "baidu": [
 45 |         {
 46 |             "title": "Welcome to Python.org",
 47 |             "url": "http://www.baidu.com/link?url=g2_i_ThdQ0aA4WYZ5sIj5Lt3rly1xBA7XY0IkBza_W7DBcn_jJam4k1F9qiFBwZB"
 48 |         },
 49 |         {
 50 |             "title": "Download Python | Python.org",
 51 |             "url": "http://www.baidu.com/link?url=nb5pPKHJv403lz96-4EztfUBtWhiw6VDP-HPQrHVexuN8YoXaDnJILQl_Jy8r22j"
 52 |         },
 53 |         {
 54 |             "title": "Python 基础教程 | 菜鸟教程",
 55 |             "url": "http://www.baidu.com/link?url=ISKIBt6yMmDv6TM6rH9OvbyzM1j8r-3ZI6LcLg_w8-0BbWBC-OMaLFjl_JiKAkX88gcrlrvAUjcKNtG2Yxs5xa"
 56 |         },
 57 |         {
 58 |             "title": "Python教程 - 廖雪峰的官方网站",
 59 |             "url": "http://www.baidu.com/link?url=acz2VD5xN9J6e7R2GDE1vxa9ThpxF3uGHCPqQXvez04bUGUsxxz1S2PqnNhZWQ0ZBVQXTfIdwkInoVR1KmL-6solFSfCoM3C7TDkT5OdeTRK1ttSYRrbtv87-tufAVY9"
 60 |         },
 61 |         {
 62 |             "title": "你是如何自学 Python 的? - 知乎",
 63 |             "url": "http://www.baidu.com/link?url=ERdlWXCJNqIfvj-bKT8spkUeF6ORHWshUy6WdsRR2f6y9XWRK9tHg-2aWCxbndVpiXFf4Rn7zdy6LM_8wcz6gq"
 64 |         },
 65 |         {
 66 |             "title": "Python Releases for Windows | Python.org",
 67 |             "url": "http://www.baidu.com/link?url=-gL2jvZJVDsWI9aXZE4LO9G5IkMRbxApwf-yHMw9fLyYWXJ-_gylkX9jDNjKfH3EGTn0WocHOQ8-lNamkr9ega"
 68 |         },
 69 |         {
 70 |             "title": "Python - 伯乐在线",
 71 |             "url": "http://www.baidu.com/link?url=R0yId0pR9fQMXq615mHSkjNOtpq59wJOx5RWvFrV4Lfd6Ql26MV_teRi519oOqef"
 72 |         },
 73 |         {
 74 |             "title": "Python 简介 | 菜鸟教程",
 75 |             "url": "http://www.baidu.com/link?url=HBE46hWbBQim7NbuG7KukYnLSscD3YK4MdNY8MimB5Xq6OgLdkdeAcVYoDKrJTnAngnzcJx-oY-JC6fb5z1edq"
 76 |         }
 77 |     ],
 78 |     "bing": [
 79 |         {
 80 |             "title": "Python - Official Site",
 81 |             "url": "https://www.python.org/"
 82 |         },
 83 |         {
 84 |             "title": "Python (programming language) - Wikipedia",
 85 |             "url": "https://en.wikipedia.org/wiki/Python_%28programming_language%29"
 86 |         },
 87 |         {
 88 |             "title": "Python - Tutorial",
 89 |             "url": "https://www.tutorialspoint.com/python/"
 90 |         },
 91 |         {
 92 |             "title": "Python - Wikipedia",
 93 |             "url": "https://en.wikipedia.org/wiki/Python"
 94 |         },
 95 |         {
 96 |             "title": "Python - Free download and software reviews - …",
 97 |             "url": "http://download.cnet.com/Python/3000-2069_4-10080057.html"
 98 |         },
 99 |         {
100 |             "title": "2. Built-in Functions — Python 3.6.4 documentation",
101 |             "url": "https://docs.python.org/3/library/functions.html"
102 |         },
103 |         {
104 |             "title": "The Python Standard Library — Python 3.6.4 …",
105 |             "url": "https://docs.python.org/3/library/index.html"
106 |         },
107 |         {
108 |             "title": "Learn Python - Free Interactive Python Tutorial",
109 |             "url": "https://www.learnpython.org/"
110 |         },
111 |         {
112 |             "title": "Python - Basic Operators - tutorialspoint.com",
113 |             "url": "http://www.tutorialspoint.com/python/python_basic_operators.htm"
114 |         },
115 |         {
116 |             "title": "Learn Python | Codecademy",
117 |             "url": "https://www.codecademy.com/learn/learn-python"
118 |         }
119 |     ],
120 |     "google": [
121 |         {
122 |             "title": "Welcome to Python.org",
123 |             "url": "https://www.python.org/"
124 |         },
125 |         {
126 |             "title": "Python (programming language) - Wikipedia",
127 |             "url": "https://en.wikipedia.org/wiki/Python_(programming_language)"
128 |         },
129 |         {
130 |             "title": "Python | Codecademy",
131 |             "url": "https://www.codecademy.com/en/tracks/python"
132 |         },
133 |         {
134 |             "title": "Python - Learn Python | Codecademy",
135 |             "url": "https://www.codecademy.com/learn/learn-python"
136 |         },
137 |         {
138 |             "title": "Python Tutorial",
139 |             "url": "https://www.tutorialspoint.com/python/"
140 |         },
141 |         {
142 |             "title": "Python · GitHub",
143 |             "url": "https://github.com/python"
144 |         },
145 |         {
146 |             "title": "Images for python",
147 |             "url": null
148 |         },
149 |         {
150 |             "title": "",
151 |             "url": ""
152 |         }
153 |     ]
154 | }
155 | 
156 | ```
157 | 
158 | ### Deploy:
159 | 
160 | We recommend that you use Caddy(Nginx) + Gunicorn
161 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | from toapi import Api
 2 | from items.google import Google
 3 | from items.bing import Bing
 4 | from items.baidu import Baidu
 5 | from settings import MySettings
 6 | 
 7 | api = Api(settings=MySettings)
 8 | 
 9 | api.register(Google)
10 | api.register(Bing)
11 | api.register(Baidu)
12 | 
13 | if __name__ == '__main__':
14 |     api.serve()
15 | 


--------------------------------------------------------------------------------
/config/Caddyfile:
--------------------------------------------------------------------------------
1 | www.toapi-search.com {
2 |     proxy / 127.0.0.1:5000
3 |     timeouts none
4 |     gzip
5 | }
6 | 
7 | toapi-search.com {
8 |     redir http://www.toapi-search.com
9 | }


--------------------------------------------------------------------------------
/config/gunicorn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | 
 4 | WORKERS = os.getenv('WORKERS', 4)
 5 | 
 6 | bind = '0.0.0.0:5000'
 7 | max_requests = 1000
 8 | worker_class = 'gevent'
 9 | workers = WORKERS
10 | preload_app = True
11 | graceful_timeout = 30
12 | preload = True
13 | 


--------------------------------------------------------------------------------
/docs/00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toapi/toapi-search/3b30949b4a42563b266df0912065550503444480/docs/00.png


--------------------------------------------------------------------------------
/items/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/toapi/toapi-search/3b30949b4a42563b266df0912065550503444480/items/__init__.py


--------------------------------------------------------------------------------
/items/baidu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from toapi import Css
 3 | 
 4 | from .bing import Bing
 5 | 
 6 | 
 7 | class Baidu(Bing):
 8 |     __name__ = 'baidu'
 9 |     __base_url__ = 'http://www.baidu.com'
10 | 
11 |     url = Css('h3.t a', attr='href')
12 |     title = Css('h3.t a')
13 | 
14 |     class Meta:
15 |         source = Css('div.result')
16 |         route = {'/:wd': '/s?wd=:wd&ie=utf-8&vf_bl=1'}
17 | 


--------------------------------------------------------------------------------
/items/bing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from toapi import Css, Item
 3 | 
 4 | 
 5 | class Bing(Item):
 6 |     __name__ = 'bing'
 7 |     __base_url__ = 'https://www.bing.com'
 8 | 
 9 |     url = Css('h2 a', attr='href')
10 |     title = Css('h2 a')
11 | 
12 |     def clean_url(self, url):
13 |         if isinstance(url, list) and len(url):
14 |             url = url[0].get('href')
15 |         return url if url else ''
16 | 
17 |     def clean_title(self, title):
18 |         if isinstance(title, list) and len(title):
19 |             text = ''
20 |             for node in title[0].itertext():
21 |                 text += node
22 |             title = text.strip()
23 |         return title if title else ''
24 | 
25 |     class Meta:
26 |         source = Css('li.b_algo')
27 |         route = {'/:wd': '/search?q=:wd&ensearch=1'}
28 | 


--------------------------------------------------------------------------------
/items/google.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from urllib.parse import urlparse, parse_qs
 3 | 
 4 | from toapi import Css, Item
 5 | 
 6 | 
 7 | class Google(Item):
 8 |     __name__ = 'google'
 9 |     __base_url__ = 'https://www.google.com'
10 | 
11 |     url = Css('h3.r > a', attr='href')
12 |     title = Css('h3.r > a')
13 | 
14 |     def clean_url(self, url):
15 |         if isinstance(url, list) and len(url):
16 |             url = url[0].get('href')
17 |         return self.filter_link(link=url) if url else ''
18 | 
19 |     def clean_title(self, title):
20 |         if isinstance(title, list) and len(title):
21 |             text = ''
22 |             for node in title[0].itertext():
23 |                 text += node
24 |             title = text.strip()
25 |         return title if title else ''
26 | 
27 |     @classmethod
28 |     def filter_link(cls, link):
29 |         """
30 |         Returns None if the link doesn't yield a valid result.
31 |         Token from https://github.com/MarioVilas/google
32 |         :return: a valid result
33 |         """
34 |         try:
35 |             # Valid results are absolute URLs not pointing to a Google domain
36 |             # like images.google.com or googleusercontent.com
37 |             o = urlparse(link, 'http')
38 |             if o.netloc:
39 |                 return link
40 |             # Decode hidden URLs.
41 |             if link.startswith('/url?'):
42 |                 link = parse_qs(o.query)['q'][0]
43 |                 # Valid results are absolute URLs not pointing to a Google domain
44 |                 # like images.google.com or googleusercontent.com
45 |                 o = urlparse(link, 'http')
46 |                 if o.netloc:
47 |                     return link
48 |         # Otherwise, or on error, return None.
49 |         except Exception as e:
50 |             return ''
51 | 
52 |     class Meta:
53 |         source = Css('div.g')
54 |         route = {
55 |             '/:wd': '/search?hl=en&q=:wd&btnG=Search&gbv=1',
56 |         }
57 |         web = {
58 |             "with_ajax": False,
59 |             "request_config": {
60 |                 'headers': {
61 |                     'User-Agent': "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)"
62 |                 },
63 |                 'proxies': {
64 |                     'http': '0.0.0.0:8118',
65 |                     'https': '0.0.0.0:8118'
66 |                 }
67 |             },
68 |             "headers": None
69 |         }
70 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | gunicorn
2 | gevent
3 | toapi


--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from toapi.cache import RedisCache, PickleSerializer
 4 | from toapi.settings import Settings
 5 | 
 6 | 
 7 | class MySettings(Settings):
 8 |     """
 9 |     Create custom configuration
10 |     http://www.toapi.org/topics/settings/
11 |     """
12 | 
13 |     cache = {
14 |         'cache_class': RedisCache,
15 |         'cache_config': {
16 |             'host': '127.0.0.1',
17 |             'port': 6379,
18 |             'db': 0
19 |         },
20 |         'serializer': PickleSerializer,
21 |         'ttl': 10000
22 |     }
23 |     storage = {
24 |         "PATH": os.getcwd(),
25 |         "DB_URL": None
26 |     }
27 |     web = {
28 |         "with_ajax": False,
29 |         "request_config": {
30 |             'headers': {
31 |                 'User-Agent': "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)"
32 |             }
33 |         },
34 |         "headers": None
35 |     }
36 | 


--------------------------------------------------------------------------------
/wsgi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from app import api
 4 | from gunicorn.app.base import Application
 5 | 
 6 | 
 7 | class MyApplication(Application):
 8 |     def load_config(self):
 9 |         self.load_config_from_module_name_or_filename(
10 |             "config/gunicorn.py")
11 | 
12 |     def load(self):
13 |         return api.server.app
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     MyApplication().run()
18 | 


--------------------------------------------------------------------------------