├── __init__.py
├── test
├── __init__.py
└── unit
│ └── test_provider.py
├── spoon_server
├── __init__.py
├── browser
│ ├── __init__.py
│ ├── webdriver_pool_config.py
│ ├── webdriver_item.py
│ └── webdriver_pool.py
├── database
│ ├── __init__.py
│ ├── redis_config.py
│ └── redis_wrapper.py
├── example
│ ├── __init__.py
│ ├── example_multi.py
│ ├── example.py
│ └── provider_availble.py
├── forward
│ ├── __init__.py
│ ├── main.py
│ ├── forward.py
│ └── bak.py
├── main
│ ├── __init__.py
│ ├── checker.py
│ ├── proxy_pipe.py
│ ├── refresher.py
│ ├── validater.py
│ └── manager.py
├── proxy
│ ├── __init__.py
│ ├── file_provider.py
│ ├── provider.py
│ ├── xun_provider.py
│ ├── feilong_provider.py
│ ├── ip31_provider.py
│ ├── us_provider.py
│ ├── ip181_provider.py
│ ├── iphai_provider.py
│ ├── plp_provider.py
│ ├── ihuan_provider.py
│ ├── xici_provider.py
│ ├── xiaohexia_provider.py
│ ├── six_provider.py
│ ├── ip3366_provider.py
│ ├── nian_provider.py
│ ├── gou_provider.py
│ ├── fpl_provider.py
│ ├── ssl_provider.py
│ ├── wuyou_provider.py
│ ├── zdaye_provider.py
│ ├── nord_provider.py
│ ├── yao_provider.py
│ ├── web_provider.py
│ ├── you_provider.py
│ ├── pdb_provider.py
│ ├── busy_provider.py
│ ├── nntime_provider.py
│ ├── gp_provider.py
│ ├── cool_provider.py
│ ├── mipu_provider.py
│ ├── listende_provider.py
│ ├── kuai_provider.py
│ ├── prem_provider.py
│ └── fetcher.py
└── util
│ ├── __init__.py
│ ├── proxy_format.py
│ ├── logger.py
│ ├── captcha.py
│ ├── constant.py
│ ├── validate.py
│ ├── webdriver_parser.py
│ └── html_parser.py
├── spoon_web
├── api
│ ├── __init__.py
│ ├── urls.py
│ └── views.py
├── spoon
│ ├── __init__.py
│ ├── urls.py
│ ├── wsgi.py
│ └── settings.py
└── manage.py
├── _config.yml
├── .gitignore
├── requirements.txt
├── setup.py
├── README.md
└── LICENSE
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_server/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_web/api/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_web/spoon/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_server/browser/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_server/database/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_server/example/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_server/forward/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_server/main/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_server/proxy/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/spoon_server/util/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.pyc
3 | *.log
4 | .pypirc
5 | dist/
6 | build/
7 | spoonproxy.egg-info/
8 | setup_pypi.py
9 | build.sh
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | selenium==3.5.0
2 | requests==2.20.0
3 | django==1.11.23
4 | lxml==3.8.0
5 | redis==2.10.5
6 | schedule==0.4.3
7 | PyExecJS==1.4.0
--------------------------------------------------------------------------------
/spoon_web/spoon/urls.py:
--------------------------------------------------------------------------------
1 | from django.conf.urls import include, url
2 |
3 | urlpatterns = [
4 | url(r'^api/v1/', include('api.urls')),
5 | ]
6 |
--------------------------------------------------------------------------------
/spoon_server/util/proxy_format.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 |
4 | def match_proxy_format(origin):
5 | return re.match(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}', origin) is not None
6 |
--------------------------------------------------------------------------------
/spoon_server/proxy/file_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 |
3 |
4 | class FileProvider(Provider):
5 | @Provider.provider_exception
6 | def getter(self):
7 | yield "61.160.190.34:8888"
8 |
--------------------------------------------------------------------------------
/spoon_server/database/redis_config.py:
--------------------------------------------------------------------------------
1 | class RedisConfig(object):
2 | def __init__(self, host, port, db=0, password=None):
3 | self.host = host
4 | self.port = port
5 | self.db = db
6 | self.password = password
7 |
--------------------------------------------------------------------------------
/spoon_server/browser/webdriver_pool_config.py:
--------------------------------------------------------------------------------
1 | class WebDriverPoolConfig(object):
2 | def __init__(self, phantomjs_path, proxy=None, header=None, timeout=10):
3 | self.phantomjs_path = phantomjs_path
4 | self.proxy = proxy
5 | self.header = header
6 | self.timeout = timeout
7 |
--------------------------------------------------------------------------------
/spoon_web/api/urls.py:
--------------------------------------------------------------------------------
1 | from django.conf.urls import url
2 |
3 | from . import views
4 |
5 | urlpatterns = [
6 | url(r'^get_keys$', views.get_keys),
7 | url(r'^fetchone_from', views.fetchone_from),
8 | url(r'^fetchall_from', views.fetchall_from),
9 | url(r'^fetch_hundred_recent', views.fetch_hundred_recent),
10 | url(r'^fetch_stale', views.fetch_stale),
11 | url(r'^fetch_recent', views.fetch_recent),
12 | ]
13 |
--------------------------------------------------------------------------------
/spoon_web/spoon/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for spoon project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "spoon.settings")
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------
/spoon_server/proxy/provider.py:
--------------------------------------------------------------------------------
1 | class Provider(object):
2 | def __init__(self):
3 | self.url_list = None
4 | pass
5 |
6 | def getter(self):
7 | raise NotImplementedError("Getter method must be implemented.")
8 |
9 | @classmethod
10 | def provider_exception(cls, fun):
11 | def wrapper(self, *args, **kwargs):
12 | try:
13 | return fun(self, *args, **kwargs)
14 | except Exception as e:
15 | raise e
16 |
17 | return wrapper
18 |
19 | # def __str__(self):
20 | # return self.__class__.__name__
21 |
--------------------------------------------------------------------------------
/spoon_server/util/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | from logging.handlers import TimedRotatingFileHandler
4 |
5 | log_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)
6 |
7 | log_fmt = '%(asctime)s\tFile \"%(filename)s\",line %(lineno)s\t%(levelname)s: %(message)s'
8 | formatter = logging.Formatter(log_fmt)
9 | log_file_handler = TimedRotatingFileHandler(filename=os.path.join(log_path, "spoon.log"), when="D", interval=1)
10 | log_file_handler.suffix = '%Y%m%d.log'
11 | log_file_handler.setFormatter(formatter)
12 | logging.basicConfig(level=logging.INFO)
13 | log = logging.getLogger()
14 | log.addHandler(log_file_handler)
15 |
--------------------------------------------------------------------------------
/spoon_server/example/example_multi.py:
--------------------------------------------------------------------------------
1 | import time
2 | from multiprocessing import Process
3 |
4 | from spoon_server.main.proxy_pipe import ProxyPipe
5 | from spoon_server.database.redis_config import RedisConfig
6 | from spoon_server.main.checker import CheckerBaidu
7 |
8 |
9 | def main_run():
10 | redis = RedisConfig("127.0.0.1", 21009)
11 | p1 = ProxyPipe(url_prefix="https://www.baidu.com", database=redis, checker=CheckerBaidu())
12 | p2 = ProxyPipe(url_prefix="https://www.google.com", database=redis)
13 | p3 = ProxyPipe(database=redis)
14 |
15 | proc_list = [Process(target=p1.start), Process(target=p2.start), Process(target=p3.start)]
16 |
17 | for p in proc_list:
18 | p.start()
19 | time.sleep(1)
20 | for p in proc_list:
21 | p.join()
22 |
23 |
24 | if __name__ == '__main__':
25 | main_run()
26 |
--------------------------------------------------------------------------------
/spoon_web/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == "__main__":
6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "spoon.settings")
7 | try:
8 | from django.core.management import execute_from_command_line
9 | except ImportError:
10 | # The above import may fail for some other reason. Ensure that the
11 | # issue is really that Django is missing to avoid masking other
12 | # exceptions on Python 2.
13 | try:
14 | import django
15 | except ImportError:
16 | raise ImportError(
17 | "Couldn't import Django. Are you sure it's installed and "
18 | "available on your PYTHONPATH environment variable? Did you "
19 | "forget to activate a virtual environment?"
20 | )
21 | raise
22 | execute_from_command_line(sys.argv)
23 |
--------------------------------------------------------------------------------
/spoon_server/example/example.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.fetcher import Fetcher
2 | from spoon_server.main.proxy_pipe import ProxyPipe
3 | from spoon_server.proxy.kuai_provider import KuaiProvider
4 | from spoon_server.proxy.xici_provider import XiciProvider
5 | from spoon_server.database.redis_config import RedisConfig
6 | from spoon_server.main.checker import CheckerBaidu
7 |
8 |
9 | def main_run():
10 | redis = RedisConfig("127.0.0.1", 21009)
11 | p1 = ProxyPipe(url_prefix="https://www.baidu.com",
12 | fetcher=Fetcher(use_default=False),
13 | database=redis,
14 | checker=CheckerBaidu(),
15 | validater_thread_num=30,
16 | refresher_thread_num=30
17 | ).set_fetcher([KuaiProvider()]).add_fetcher([XiciProvider()])
18 | p1.start()
19 |
20 |
21 | if __name__ == '__main__':
22 | main_run()
23 |
--------------------------------------------------------------------------------
/spoon_server/proxy/xun_provider.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | from spoon_server.proxy.provider import Provider
4 |
5 |
6 | class XunProvider(Provider):
7 | def __init__(self, url_list=None):
8 | super(Provider, self).__init__()
9 | if not url_list:
10 | self.url_list = self._gen_url_list()
11 |
12 | @staticmethod
13 | def _gen_url_list():
14 | url_list = ["http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10"]
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | for url in self.url_list:
20 | content = json.loads(requests.get(url).content.decode("utf-8"))
21 | for row in content['RESULT']['rows']:
22 | yield '{}:{}'.format(row['ip'], row['port'])
23 |
24 |
25 | if __name__ == "__main__":
26 | kd = XunProvider()
27 | for proxy in kd.getter():
28 | print(proxy)
29 |
--------------------------------------------------------------------------------
/spoon_server/proxy/feilong_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | from spoon_server.proxy.provider import Provider
3 | from spoon_server.util.html_parser import get_html
4 | from spoon_server.util.constant import HEADERS
5 |
6 |
7 | class FeilongProvider(Provider):
8 | def __init__(self, url_list=None):
9 | super(Provider, self).__init__()
10 | if not url_list:
11 | self.url_list = self._gen_url_list()
12 |
13 | @staticmethod
14 | def _gen_url_list():
15 | url_list = "http://www.feilongip.com/"
16 | return url_list
17 |
18 | @Provider.provider_exception
19 | def getter(self):
20 | html = get_html(self.url_list, headers=HEADERS)
21 | if not html:
22 | pass
23 | for px in re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}', html):
24 | yield px
25 |
26 |
27 | if __name__ == "__main__":
28 | kd = FeilongProvider()
29 | for proxy in kd.getter():
30 | print(proxy)
31 |
--------------------------------------------------------------------------------
/spoon_server/proxy/ip31_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | from spoon_server.proxy.provider import Provider
3 | from spoon_server.util.html_parser import get_html
4 | from spoon_server.util.constant import HEADERS
5 |
6 |
7 | class IP31Provider(Provider):
8 | def __init__(self, url_list=None):
9 | super(Provider, self).__init__()
10 | if not url_list:
11 | self.url_list = self._gen_url_list()
12 |
13 | @staticmethod
14 | def _gen_url_list():
15 | url_list = "https://31f.cn/http-proxy/"
16 | return url_list
17 |
18 | @Provider.provider_exception
19 | def getter(self):
20 | html = get_html(self.url_list, headers=HEADERS)
21 | if not html:
22 | pass
23 | for px in re.findall(r'
(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) | \n(\d{1,5}) | ', html, re.S):
24 | yield ":".join(px)
25 |
26 |
27 | if __name__ == "__main__":
28 | kd = IP31Provider()
29 | for proxy in kd.getter():
30 | print(proxy)
31 |
--------------------------------------------------------------------------------
/spoon_server/proxy/us_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class UsProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ['https://www.us-proxy.org/']
14 | return url_list
15 |
16 | @Provider.provider_exception
17 | def getter(self):
18 | for url in self.url_list:
19 | tree = get_html_tree(url)
20 | if tree is None:
21 | continue
22 | proxy_list = tree.xpath('//*[@id="proxylisttable"]/tbody/tr')
23 | for px in proxy_list:
24 | yield ':'.join(px.xpath('./td/text()')[0:2])
25 |
26 |
27 | if __name__ == "__main__":
28 | kd = UsProvider()
29 | for proxy in kd.getter():
30 | print(proxy)
31 |
--------------------------------------------------------------------------------
/spoon_server/util/captcha.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import requests
3 |
4 | from spoon_server.util.logger import log
5 | from spoon_server.util.constant import HEADERS
6 |
7 |
8 | class Captcha(object):
9 | def __init__(self, crack_url):
10 | self.crack_url = crack_url
11 |
12 | def get_image_result(self, image_url):
13 | try:
14 | ir = requests.get(image_url, headers=HEADERS, timeout=10)
15 | except Exception as e:
16 | log.error("Error fetching captcha {0}".format(e))
17 | raise Exception(e)
18 |
19 | if ir.status_code == 200:
20 | post_data = {"image": base64.b64encode(ir.content)}
21 | res = requests.post(self.crack_url, data=post_data)
22 | answer = str(res.content, encoding="utf-8")
23 | return answer
24 | else:
25 | log.error("Error cracking captcha {0}".format(ir.status_code))
26 | raise Exception("Error cracking captcha {0}".format(ir.status_code))
27 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name='spoonproxy',
5 | version='git.latest',
6 | description='A package for building specific Proxy Pool for different Sites.',
7 | packages=find_packages(),
8 | url='https://github.com/Jiramew/spoon',
9 | license='BSD License',
10 | author='Jiramew',
11 | author_email='hanbingflying@sina.com',
12 | maintainer='Jiramew',
13 | maintainer_email='hanbingflying@sina.com',
14 | platforms=["all"],
15 | classifiers=[
16 | 'Development Status :: 3 - Alpha',
17 | 'License :: OSI Approved :: BSD License',
18 | 'Programming Language :: Python',
19 | "Programming Language :: Python :: 3",
20 | 'Programming Language :: Python :: 3.6',
21 | 'Topic :: Software Development :: Libraries'
22 | ],
23 | install_requires=[
24 | 'requests>=2.18.1',
25 | 'lxml>=3.8.0',
26 | 'redis>=2.10.5',
27 | 'schedule>=0.4.3',
28 | 'PyExecJS>=1.4.0',
29 | ]
30 | )
31 |
--------------------------------------------------------------------------------
/spoon_server/util/constant.py:
--------------------------------------------------------------------------------
1 | HEADERS = {
2 | 'Connection': 'keep-alive',
3 | 'Cache-Control': 'max-age=0',
4 | 'Upgrade-Insecure-Requests': '1',
5 | 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 "
6 | "(KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
7 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
8 | 'Accept-Encoding': 'gzip, deflate, sdch',
9 | 'Accept-Language': 'zh-CN,zh;q=0.8',
10 | }
11 |
12 | HEADERS_IPHONE = {
13 | 'Connection': 'keep-alive',
14 | 'Cache-Control': 'max-age=0',
15 | 'Upgrade-Insecure-Requests': '1',
16 | 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X; en-us) AppleWebKit'
17 | '/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53',
18 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
19 | 'Accept-Encoding': 'gzip, deflate, sdch',
20 | 'Accept-Language': 'zh-CN,zh;q=0.8'
21 | }
22 |
--------------------------------------------------------------------------------
/spoon_server/proxy/ip181_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class IP181Provider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ['http://www.ip181.com/',
14 | ]
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | for url in self.url_list:
20 | tree = get_html_tree(url)
21 | if tree is None:
22 | continue
23 | proxy_list = tree.xpath('/html/body/div[2]/div[1]/div[2]/div/div[2]/table//tr')
24 | for px in proxy_list[1:]:
25 | yield ':'.join(px.xpath('./td/text()')[0:2])
26 |
27 |
28 | if __name__ == "__main__":
29 | kd = IP181Provider()
30 | for proxy in kd.getter():
31 | print(proxy)
32 |
--------------------------------------------------------------------------------
/spoon_server/proxy/iphai_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class IphaiProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ["http://www.iphai.com/"]
14 | return url_list
15 |
16 | @Provider.provider_exception
17 | def getter(self):
18 | for url in self.url_list:
19 | tree = get_html_tree(url)
20 | if tree is None:
21 | continue
22 | proxy_list = tree.xpath('/html/body/div[4]/div[2]/table/tr')[1:]
23 | for px in proxy_list:
24 | yield ':'.join([px.xpath('./td[1]/text()')[0].strip(), px.xpath('./td[2]/text()')[0].strip()])
25 |
26 |
27 | if __name__ == "__main__":
28 | kd = IphaiProvider()
29 | for proxy in kd.getter():
30 | print(proxy)
31 |
--------------------------------------------------------------------------------
/spoon_server/proxy/plp_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class PlpProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ["https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-{0}".format(i) for i in range(1, 7)]
14 | return url_list
15 |
16 | @Provider.provider_exception
17 | def getter(self):
18 | for url in self.url_list:
19 | tree = get_html_tree(url)
20 | if tree is None:
21 | continue
22 | proxy_list = tree.xpath('//*[@id="page"]/table[2]/tr')
23 | for px in proxy_list[2:]:
24 | yield ':'.join(px.xpath('./td/text()')[0:2])
25 |
26 |
27 | if __name__ == "__main__":
28 | kd = PlpProvider()
29 | for proxy in kd.getter():
30 | print(proxy)
31 |
--------------------------------------------------------------------------------
/spoon_server/proxy/ihuan_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class IhuanProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ['https://ip.ihuan.me/?page={}&anonymity=2'.format(str(i)) for i in range(1, 31)]
14 | return url_list
15 |
16 | @Provider.provider_exception
17 | def getter(self):
18 | for url in self.url_list:
19 | tree = get_html_tree(url)
20 | if tree is None:
21 | continue
22 | proxy_list = tree.xpath('/html/body/div[2]/div[2]/table/tbody/tr')
23 | for px in proxy_list:
24 | yield ':'.join([px.xpath('./td[1]/a/text()')[0], px.xpath('./td/text()')[0]])
25 |
26 |
27 | if __name__ == "__main__":
28 | kd = IhuanProvider()
29 | for proxy in kd.getter():
30 | print(proxy)
31 |
--------------------------------------------------------------------------------
/spoon_server/proxy/xici_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class XiciProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ['http://www.xicidaili.com/nn', # 高匿
14 | 'http://www.xicidaili.com/nt', # 透明
15 | ]
16 | return url_list
17 |
18 | @Provider.provider_exception
19 | def getter(self):
20 | for url in self.url_list:
21 | tree = get_html_tree(url)
22 | if tree is None:
23 | continue
24 | proxy_list = tree.xpath('.//table[@id="ip_list"]//tr')
25 | for px in proxy_list:
26 | yield ':'.join(px.xpath('./td/text()')[0:2])
27 |
28 |
29 | if __name__ == "__main__":
30 | kd = XiciProvider()
31 | for proxy in kd.getter():
32 | print(proxy)
33 |
--------------------------------------------------------------------------------
/spoon_server/proxy/xiaohexia_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class XiaohexiaProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ['http://www.xiaohexia.cn/index.php?page={0}'.format(i) for i in range(1, 4)]
14 |
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | for url in self.url_list:
20 | tree = get_html_tree(url)
21 | if tree is None:
22 | continue
23 | px_segment = tree.xpath('/html/body/div/div/div[2]/div/table/tr')[1:]
24 | for px in px_segment:
25 | yield ":".join(px.xpath(
26 | "./td/text()")[0:2])
27 |
28 |
29 | if __name__ == "__main__":
30 | kd = XiaohexiaProvider()
31 | for proxy in kd.getter():
32 | print(proxy)
33 |
--------------------------------------------------------------------------------
/spoon_server/proxy/six_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | from spoon_server.proxy.provider import Provider
3 | from spoon_server.util.html_parser import get_html
4 | from spoon_server.util.constant import HEADERS
5 |
6 |
7 | class SixProvider(Provider):
8 | def __init__(self, url_list=None):
9 | super(Provider, self).__init__()
10 | if not url_list:
11 | self.url_list = self._gen_url_list()
12 |
13 | @staticmethod
14 | def _gen_url_list():
15 | url_list = [
16 | "http://www.66ip.cn/nmtq.php?getnum=300&isp=0&anonymoustype={0}&start=&ports=&export=&api=66ip".format(i)
17 | for i in range(3, 5)]
18 | return url_list
19 |
20 | @Provider.provider_exception
21 | def getter(self):
22 | for url in self.url_list:
23 | html = get_html(url, headers=HEADERS)
24 | if not html:
25 | pass
26 | for px in re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}', html):
27 | yield px
28 |
29 |
30 | if __name__ == "__main__":
31 | kd = SixProvider()
32 | for proxy in kd.getter():
33 | print(proxy)
34 |
--------------------------------------------------------------------------------
/spoon_server/proxy/ip3366_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class IP3366Provider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | base_url_list = ['http://www.ip3366.net/free/?stype={0}&page='.format(i) for i in range(1, 5)]
14 | url_list = [url + str(j) for url in base_url_list for j in range(1, 5)]
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | for url in self.url_list:
20 | tree = get_html_tree(url)
21 | if tree is None:
22 | continue
23 | proxy_list = tree.xpath('//*[@id="list"]/table//tr')
24 | for px in proxy_list[1:]:
25 | yield ':'.join(px.xpath('./td/text()')[0:2])
26 |
27 |
28 | if __name__ == "__main__":
29 | kd = IP3366Provider()
30 | for proxy in kd.getter():
31 | print(proxy)
32 |
--------------------------------------------------------------------------------
/spoon_server/proxy/nian_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class NianProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ['http://www.nianshao.me/?stype=1&page={0}'.format(i) for i in range(1, 4)]
14 | url_list.extend(['http://www.nianshao.me/?stype=2&page={0}'.format(i) for i in range(1, 4)])
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | for url in self.url_list:
20 | tree = get_html_tree(url)
21 | if tree is None:
22 | continue
23 | proxy_list = tree.xpath('//*[@id="main"]/div/div/table//tr')
24 | for px in proxy_list[1:]:
25 | yield ':'.join(px.xpath('./td/text()')[0:2])
26 |
27 |
28 | if __name__ == "__main__":
29 | kd = NianProvider()
30 | for proxy in kd.getter():
31 | print(proxy)
32 |
--------------------------------------------------------------------------------
/spoon_server/util/validate.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from spoon_server.util.logger import log
3 | from spoon_server.util.constant import HEADERS_IPHONE
4 | from spoon_server.main.checker import CheckerBaidu
5 |
6 |
7 | def validate(target_url, proxy, checker):
8 | if target_url == "default":
9 | target_url = "https://www.baidu.com"
10 | proxies = {"http": "http://{proxy}".format(proxy=proxy), "https": "http://{proxy}".format(proxy=proxy)}
11 | try:
12 | r = requests.get(target_url, proxies=proxies, timeout=checker.timeout, verify=False, headers=HEADERS_IPHONE)
13 | if r.status_code in checker.status_code:
14 | if checker.checker_func(r.content):
15 | log.info('validate success target {0} proxy {1}'.format(target_url, proxy))
16 | return True
17 | else:
18 | return False
19 | else:
20 | return False
21 | except Exception as e:
22 | log.error("validate failed with {0}".format(e))
23 | return False
24 |
25 |
26 | if __name__ == "__main__":
27 | print(validate("https://www.baidu.com", "127.0.0.1:6666", checker=CheckerBaidu()))
28 |
--------------------------------------------------------------------------------
/spoon_server/util/webdriver_parser.py:
--------------------------------------------------------------------------------
1 | from lxml import etree
2 | from spoon_server.util.logger import log
3 | from spoon_server.util.constant import HEADERS
4 |
5 | from spoon_server.browser.webdriver_pool import WebdriverPool
6 | from spoon_server.browser.webdriver_pool_config import WebDriverPoolConfig
7 |
8 |
9 | class WebdriverParser(object):
10 | def __init__(self, header=None, proxy=None):
11 | if header is None:
12 | header = HEADERS
13 | self.wdp_config = WebDriverPoolConfig(
14 | phantomjs_path="D:/program/phantomjs-2.1.1-windows/bin/phantomjs.exe",
15 | header=header,
16 | proxy=proxy
17 | )
18 | self.wd = WebdriverPool(self.wdp_config)
19 | self.driver = self.wd.acquire()
20 |
21 | def parse(self, url):
22 | try:
23 | self.driver.get(url)
24 | html = self.driver.page_source
25 |
26 | return etree.HTML(html)
27 | except Exception as e:
28 | log.error("{0}".format(e))
29 | raise Exception(e)
30 | finally:
31 | self.wd.release(self.driver)
32 | self.wd.stop()
33 |
--------------------------------------------------------------------------------
/spoon_server/proxy/gou_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class GouProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ["http://www.goubanjia.com"]
14 | return url_list
15 |
16 | @Provider.provider_exception
17 | def getter(self):
18 | for url in self.url_list:
19 | tree = get_html_tree(url)
20 | if tree is None:
21 | continue
22 | table = tree.xpath('//table/tbody/tr')
23 | for tb in table:
24 | component = tb.xpath('td[@class="ip"]/*[not(@style="display: none;" or @style="display:none;")]/text()')
25 | component.insert(-1, ':')
26 | yield "".join(component)
27 |
28 |
29 | if __name__ == "__main__":
30 | kd = GouProvider()
31 | try:
32 | for proxy in kd.getter():
33 | print(proxy)
34 | except Exception as e:
35 | print(e)
36 |
--------------------------------------------------------------------------------
/spoon_server/proxy/fpl_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class FPLProvider(Provider):
6 | def __init__(self, url_list=None, proxy=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 | self.proxy = proxy
11 |
12 | @staticmethod
13 | def _gen_url_list():
14 | url_list = ["https://free-proxy-list.net/", "https://free-proxy-list.net/anonymous-proxy.html"]
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | for url in self.url_list:
20 | tree = get_html_tree(url, proxy=self.proxy)
21 | if tree is None:
22 | continue
23 | for item in tree.xpath("//table[@id='proxylisttable']/tbody/tr"):
24 | ip = item.xpath("td[1]/text()")[0].strip()
25 | port = item.xpath("td[2]/text()")[0].strip()
26 | yield ip + ":" + port
27 |
28 |
29 | if __name__ == "__main__":
30 | kd = FPLProvider()
31 | try:
32 | for proxy in kd.getter():
33 | print(proxy)
34 | except Exception as e:
35 | print(e)
36 |
--------------------------------------------------------------------------------
/spoon_server/proxy/ssl_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class SSLProvider(Provider):
6 | def __init__(self, url_list=None, proxy=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 | self.proxy = proxy
11 |
12 | @staticmethod
13 | def _gen_url_list():
14 | url_list = ["https://www.sslproxies.org/"]
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | proxy = self.proxy # you need to configure the proxy
20 | for url in self.url_list:
21 | tree = get_html_tree(url, proxy=proxy)
22 | if tree is None:
23 | continue
24 | for item in tree.xpath("//table[@id='proxylisttable']/tbody/tr"):
25 | ip = item.xpath("td[1]/text()")[0].strip()
26 | port = item.xpath("td[2]/text()")[0].strip()
27 | yield ip + ":" + port
28 |
29 |
30 | if __name__ == "__main__":
31 | kd = SSLProvider()
32 | try:
33 | for proxy in kd.getter():
34 | print(proxy)
35 | except Exception as e:
36 | print(e)
37 |
--------------------------------------------------------------------------------
/spoon_server/proxy/wuyou_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class WuyouProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ["http://www.data5u.com/free/gngn/index.shtml",
14 | "http://www.data5u.com/free/gnpt/index.shtml",
15 | "http://www.data5u.com/free/gwgn/index.shtml",
16 | "http://www.data5u.com/free/gwpt/index.shtml"]
17 |
18 | return url_list
19 |
20 | @Provider.provider_exception
21 | def getter(self):
22 | for url in self.url_list:
23 | tree = get_html_tree(url)
24 | if tree is None:
25 | continue
26 | px_segment = tree.xpath("/html/body/div[5]/ul/li[2]/ul")[1:]
27 | for px in px_segment:
28 | yield ":".join(px.xpath(
29 | "./span/li/text()")[0:2])
30 |
31 |
32 | if __name__ == "__main__":
33 | kd = WuyouProvider()
34 | for proxy in kd.getter():
35 | print(proxy)
36 |
--------------------------------------------------------------------------------
/spoon_server/main/checker.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 |
4 | class Checker(object):
5 | def __init__(self, url=None, timeout=20, status_code=None):
6 | if status_code is None:
7 | status_code = [200]
8 | self.timeout = timeout
9 | self.url = url
10 | self.status_code = status_code
11 |
12 | def checker_func(self, html=None):
13 | return True
14 |
15 |
16 | class CheckerKeyword(Checker):
17 | def __init__(self, url=None, timeout=5):
18 | super(CheckerKeyword, self).__init__(url, timeout)
19 |
20 | def checker_func(self, html=None, keyword=None):
21 | if keyword is None:
22 | return True
23 | if isinstance(html, bytes):
24 | html = html.decode('utf-8')
25 | if re.search(keyword, html, re.MULTILINE):
26 | return True
27 | else:
28 | return False
29 |
30 |
31 | class CheckerBaidu(Checker):
32 | def __init__(self, url=None, timeout=5):
33 | super(CheckerBaidu, self).__init__(url, timeout)
34 |
35 | def checker_func(self, html=None):
36 | if isinstance(html, bytes):
37 | html = html.decode('utf-8')
38 | if re.search(r".*百度一下,你就知道.*", html):
39 | return True
40 | else:
41 | return False
42 |
--------------------------------------------------------------------------------
/spoon_server/browser/webdriver_item.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
3 |
4 |
5 | class WebDriverItem(object):
6 | def __init__(self, webdriver_config):
7 | self.webdriver_config = webdriver_config
8 |
9 | def get_webdriver(self):
10 | service_args = []
11 |
12 | if self.webdriver_config.proxy:
13 | service_args.extend([
14 | "--proxy=" + self.webdriver_config.proxy,
15 | "--proxy-type=http",
16 | "--ignore-ssl-errors=true"
17 | ])
18 |
19 | dcapability = dict(DesiredCapabilities.PHANTOMJS)
20 | if self.webdriver_config.header:
21 | dcapability["phantomjs.page.settings.userAgent"] = self.webdriver_config.header['User-Agent']
22 | dcapability["phantomjs.page.customHeaders.User-Agent"] = self.webdriver_config.header['User-Agent']
23 | dcapability["takesScreenshot"] = True
24 | driver = webdriver.PhantomJS(self.webdriver_config.phantomjs_path,
25 | service_args=service_args,
26 | desired_capabilities=dcapability)
27 |
28 | driver.set_page_load_timeout(self.webdriver_config.timeout)
29 | return driver
30 |
--------------------------------------------------------------------------------
/spoon_server/proxy/zdaye_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class ZdayeProvider(Provider):
6 | def __init__(self, url_list=None, proxy_=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 | self.proxy_ = proxy_
11 |
12 | @staticmethod
13 | def _gen_url_list():
14 | def concatenate(port_):
15 | return 'http://ip.zdaye.com/?ip=&port={0}&adr=&checktime=1&sleep=3' \
16 | '&cunhuo=&nport=&nadr=&dengji=&https=&yys=&post=%D6%A7%B3%D6&px='.format(port_)
17 |
18 | port = ['8081', '8080', '3128']
19 | url_list = [(p, concatenate(p)) for p in port]
20 | return url_list
21 |
22 | @Provider.provider_exception
23 | def getter(self):
24 | for port, url in self.url_list:
25 | tree = get_html_tree(url, proxy=self.proxy_)
26 | if tree is None:
27 | continue
28 | proxy_list = tree.xpath('//*[@id="ipc"]/tbody/tr/td[1]/text()')[1:]
29 | for ip in proxy_list:
30 | yield ip + ":" + port
31 |
32 |
33 | if __name__ == "__main__":
34 | kd = ZdayeProvider()
35 | for proxy in kd.getter():
36 | print(proxy)
37 |
--------------------------------------------------------------------------------
/spoon_server/proxy/nord_provider.py:
--------------------------------------------------------------------------------
1 | import json
2 | from spoon_server.proxy.provider import Provider
3 | import requests
4 |
5 |
6 | class NordProvider(Provider):
7 | def __init__(self, url_list=None, proxy=None):
8 | super(Provider, self).__init__()
9 | if not url_list:
10 | self.url_list = self._gen_url_list()
11 | self.proxy_ = proxy
12 |
13 | @staticmethod
14 | def _gen_url_list():
15 | url_list = [
16 | "https://nordvpn.com/wp-admin/admin-ajax.php?searchParameters%5B0%5D%5Bname%5D=proxy-country&searchParameters%5B0%5D%5Bvalue%5D=&searchParameters%5B1%5D%5Bname%5D=proxy-ports&searchParameters%5B1%5D%5Bvalue%5D=&searchParameters%5B2%5D%5Bname%5D=http&searchParameters%5B2%5D%5Bvalue%5D=on&searchParameters%5B3%5D%5Bname%5D=https&searchParameters%5B3%5D%5Bvalue%5D=on&offset=0&limit=500&action=getProxies"]
17 | return url_list
18 |
19 | @Provider.provider_exception
20 | def getter(self):
21 | for url in self.url_list:
22 | content = requests.get(url, proxies=self.proxy_).content.decode("utf-8")
23 | proxy_list = json.loads(content)
24 | for px in proxy_list:
25 | yield px['ip'] + ":" + px['port']
26 |
27 |
28 | if __name__ == "__main__":
29 | kd = NordProvider()
30 | for proxy in kd.getter():
31 | print(proxy)
32 |
--------------------------------------------------------------------------------
/spoon_server/proxy/yao_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class YaoProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ['http://www.httpsdaili.com/?stype=1&page={0}'.format(i) for i in range(1, 2)]
14 | url_list.extend(['http://www.httpsdaili.com/?stype=2&page={0}'.format(i) for i in range(1, 2)])
15 | url_list.extend(['http://www.httpsdaili.com/?stype=3&page={0}'.format(i) for i in range(1, 2)])
16 | url_list.extend(['http://www.httpsdaili.com/?stype=4&page={0}'.format(i) for i in range(1, 2)])
17 | return url_list
18 |
19 | @Provider.provider_exception
20 | def getter(self):
21 | for url in self.url_list:
22 | tree = get_html_tree(url)
23 | if tree is None:
24 | continue
25 | proxy_list = tree.xpath('//*[@id="list"]/table/tbody/tr')
26 | for px in proxy_list[1:]:
27 | yield ':'.join(px.xpath('./td/text()')[0:2])
28 |
29 |
30 | if __name__ == "__main__":
31 | kd = YaoProvider()
32 | for proxy in kd.getter():
33 | print(proxy)
34 |
--------------------------------------------------------------------------------
/spoon_server/proxy/web_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | import requests
3 | from spoon_server.proxy.provider import Provider
4 |
5 |
6 | class WebProvider(Provider):
7 | def __init__(self, url_list=None):
8 | super(Provider, self).__init__()
9 | if not url_list:
10 | self.url_list = self._gen_url_list()
11 |
12 | @staticmethod
13 | def _gen_url_list():
14 | # url_list = ["http://spys.one/pl.txt"]
15 | url_list = [
16 | "http://www.proxylists.net/http_highanon.txt",
17 | "http://ab57.ru/downloads/proxylist.txt",
18 | "http://pubproxy.com/api/proxy?limit=20&format=txt&type=http",
19 | # "http://www.ip3366.net/tiqv.php?sxb=&tqsl=10000&ports=&ktip=&xl=on&submit=%CC%E1++%C8%A1",
20 | # "http://comp0.ru/downloads/proxylist.txt",
21 | # 'https://www.rmccurdy.com/scripts/proxy/good.txt'
22 | ]
23 | return url_list
24 |
25 | @Provider.provider_exception
26 | def getter(self):
27 | for url in self.url_list:
28 | content = requests.get(url).content.decode("gbk")
29 | proxy_list = re.findall("\d+\.\d+\.\d+\.\d+:\d+", content)
30 | for proxy in proxy_list:
31 | yield proxy
32 |
33 |
34 | if __name__ == "__main__":
35 | kd = WebProvider()
36 | for proxy in kd.getter():
37 | print(proxy)
38 |
--------------------------------------------------------------------------------
/spoon_server/proxy/you_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | import requests
3 | from spoon_server.proxy.provider import Provider
4 | from spoon_server.util.html_parser import get_html_tree
5 | from spoon_server.util.constant import HEADERS
6 |
7 |
8 | class YouProvider(Provider):
9 | def __init__(self, url_list=None):
10 | super(Provider, self).__init__()
11 | if not url_list:
12 | self.url_list = self._gen_url_list()
13 |
14 | @staticmethod
15 | def _gen_url_list():
16 | try:
17 | url_list = get_html_tree("http://www.youdaili.net/Daili/http/").xpath(
18 | './/div[@class="chunlist"]/ul/li/p/a/@href')[0:1]
19 | return url_list
20 | except Exception as e:
21 | raise e
22 |
23 | @Provider.provider_exception
24 | def getter(self):
25 | for url in self.url_list:
26 | html = requests.get(url, headers=HEADERS).content
27 | if not html:
28 | continue
29 | proxy_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}', html)
30 | for px in proxy_list:
31 | yield px
32 |
33 |
34 | if __name__ == "__main__":
35 | try:
36 | kd = YouProvider()
37 | aaa = kd.getter()
38 | for proxy in aaa:
39 | print(proxy)
40 | except Exception as e:
41 | print(str(e))
42 |
--------------------------------------------------------------------------------
/spoon_server/proxy/pdb_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | import execjs
3 | from spoon_server.proxy.provider import Provider
4 | from spoon_server.util.html_parser import get_html_tree
5 |
6 |
7 | class PdbProvider(Provider):
8 | def __init__(self, url_list=None):
9 | super(Provider, self).__init__()
10 | if not url_list:
11 | self.url_list = self._gen_url_list()
12 |
13 | @staticmethod
14 | def _gen_url_list():
15 | url_list = ['http://proxydb.net/?protocol=http&protocol=https',
16 | ]
17 | return url_list
18 |
19 | @Provider.provider_exception
20 | def getter(self):
21 | for url in self.url_list:
22 | tree = get_html_tree(url)
23 | if tree is None:
24 | continue
25 | proxy_list = tree.xpath('/html/body/div[2]/table//tr')
26 | for px in proxy_list[1:]:
27 | script_string = 'function func() {var proxies=[];' + (
28 | re.sub("document.*?;", "",
29 | ''.join(px.xpath('./td[1]/script/text()'))) + "; return proxies}").replace(
30 | "\n", "")
31 | js_string = execjs.compile(script_string)
32 | result = js_string.call('func')
33 | yield result[0]
34 |
35 |
36 | if __name__ == "__main__":
37 | kd = PdbProvider()
38 | for proxy in kd.getter():
39 | print(proxy)
40 |
--------------------------------------------------------------------------------
/spoon_server/browser/webdriver_pool.py:
--------------------------------------------------------------------------------
1 | from queue import Queue, Empty
2 |
3 | from spoon_server.browser.webdriver_item import WebDriverItem
4 | from spoon_server.browser.webdriver_pool_config import WebDriverPoolConfig
5 |
6 |
7 | class WebdriverPool(object):
8 | def __init__(self, config):
9 | self.config = config
10 | self.phantomjs_path = config.phantomjs_path
11 | self.all = Queue()
12 | self.available = Queue()
13 | self.stopped = False
14 |
15 | def acquire(self):
16 | if not self.stopped:
17 | try:
18 | return self.available.get_nowait()
19 | except Empty:
20 | driver_item = WebDriverItem(self.config)
21 | self.all.put(driver_item)
22 | return driver_item.get_webdriver()
23 |
24 | def release(self, driver_item):
25 | self.available.put(driver_item)
26 |
27 | def stop(self):
28 | self.stopped = True
29 | while True:
30 | try:
31 | driver_item = self.all.get(block=False)
32 | driver_item.get_webdriver().quit()
33 | except Empty:
34 | break
35 |
36 |
37 | if __name__ == "__main__":
38 | wdp_config = WebDriverPoolConfig(phantomjs_path="D:/program/phantomjs-2.1.1-windows/bin/phantomjs.exe")
39 | wd = WebdriverPool(wdp_config)
40 | driver = wd.acquire()
41 | driver.get("www.baidu.com")
42 | wd.release(driver)
43 | wd.stop()
44 | print(wd.acquire() is None)
45 |
--------------------------------------------------------------------------------
/spoon_server/proxy/busy_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 |
5 | class BusyProvider(Provider):
6 | def __init__(self, url_list=None):
7 | super(Provider, self).__init__()
8 | if not url_list:
9 | self.url_list = self._gen_url_list()
10 |
11 | @staticmethod
12 | def _gen_url_list():
13 | url_list = ['https://proxy.coderbusy.com/classical/anonymous-type/highanonymous.aspx?page={0}'.format(i) for i
14 | in range(1, 6)]
15 | url_list.extend(
16 | ['https://proxy.coderbusy.com/classical/anonymous-type/transparent.aspx?page={0}'.format(i) for i in
17 | range(1, 6)])
18 | url_list.extend(
19 | ['https://proxy.coderbusy.com/classical/anonymous-type/anonymous.aspx?page={0}'.format(i) for i in
20 | range(1, 6)])
21 | return url_list
22 |
23 | @Provider.provider_exception
24 | def getter(self):
25 | for url in self.url_list:
26 | tree = get_html_tree(url)
27 | if tree is None:
28 | continue
29 | proxy_list = tree.xpath('//*[@id="site-app"]/div/div/div[1]/div/table/tbody/tr')
30 | for px in proxy_list:
31 | yield ':'.join([px.xpath('*/text()')[1].strip(), px.xpath('*/text()')[3].strip()])
32 |
33 |
34 | if __name__ == "__main__":
35 | kd = BusyProvider()
36 | for proxy in kd.getter():
37 | print(proxy)
38 |
--------------------------------------------------------------------------------
/spoon_server/proxy/nntime_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | from spoon_server.proxy.provider import Provider
3 | from spoon_server.util.html_parser import get_html_tree
4 |
5 |
6 | class NNtimeProvider(Provider):
7 | def __init__(self, url_list=None):
8 | super(Provider, self).__init__()
9 | if not url_list:
10 | self.url_list = self._gen_url_list()
11 |
12 | @staticmethod
13 | def _gen_url_list():
14 | url_list = ['http://nntime.com/proxy-updated-0{0}.htm'.format(i) for i in range(1, 4)]
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | for url in self.url_list:
20 | tree = get_html_tree(url)
21 | if tree is None:
22 | continue
23 | port_dict = {key.split("=")[0]: key.split("=")[1] for key in
24 | tree.xpath("//head/script/text()")[0].strip().split(";") if key != ''}
25 | px_segment = tree.xpath('//*[@id="proxylist"]/tr')
26 | for px in px_segment:
27 | ip = px.xpath('./td/text()')[0]
28 | port = "".join([port_dict[key] for key in
29 | re.findall(r"\+.*", px.xpath('./td/script/text()')[0])[0].replace(")",
30 | "").split(
31 | "+") if key != ''])
32 | yield ip + ":" + port
33 |
34 |
35 | if __name__ == "__main__":
36 | kd = NNtimeProvider()
37 | for proxy in kd.getter():
38 | print(proxy)
39 |
--------------------------------------------------------------------------------
/spoon_server/proxy/gp_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | import json
3 | from spoon_server.proxy.provider import Provider
4 | from spoon_server.util.html_parser import get_html
5 |
6 |
7 | class GPProvider(Provider):
8 | def __init__(self, url_list=None, proxy=None):
9 | super(Provider, self).__init__()
10 | if not url_list:
11 | self.url_list = self._gen_url_list()
12 | self.proxy = proxy
13 |
14 | @staticmethod
15 | def _gen_url_list():
16 | cy_list = ["China", "Indonesia", "United%20States", "Brazil", "Russia", "Thailand", "India", "United%20Kingdom",
17 | "Bangladesh", "Germany", "Singapore"]
18 | base_url_list = ["http://www.gatherproxy.com/proxylist/country/?c=" + cy for cy in cy_list]
19 |
20 | url_list = [url for url in base_url_list]
21 | return url_list
22 |
23 | @Provider.provider_exception
24 | def getter(self):
25 | proxy = self.proxy # you need to configure the proxy
26 | for url in self.url_list:
27 | tree = get_html(url, proxy=proxy)
28 | if tree is None:
29 | continue
30 | pattern = re.compile('gp.insertPrx\((.*?)\)', re.RegexFlag.S)
31 | items = re.findall(pattern, tree)
32 | for item in items:
33 | data = json.loads(item)
34 | port = data.get('PROXY_PORT')
35 | port = str(int(port, 16))
36 |
37 | yield data.get('PROXY_IP') + ":" + port
38 |
39 |
40 | if __name__ == "__main__":
41 | kd = GPProvider()
42 | try:
43 | for proxy in kd.getter():
44 | print(proxy)
45 | except Exception as e:
46 | print(e)
47 |
--------------------------------------------------------------------------------
/spoon_server/proxy/cool_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | import base64
3 | import codecs
4 | from spoon_server.proxy.provider import Provider
5 | from spoon_server.util.html_parser import get_html_tree
6 |
7 | ip_pattern = re.compile(r'Base64.decode\(str_rot13\("([^"]+)"\)\)', re.I)
8 |
9 |
10 | class CoolProvider(Provider):
11 | def __init__(self, url_list=None):
12 | super(Provider, self).__init__()
13 | if not url_list:
14 | self.url_list = self._gen_url_list()
15 |
16 | @staticmethod
17 | def _gen_url_list():
18 | url_list = ['https://www.cool-proxy.net/proxies/http_proxy_list/sort:score/direction:desc/page:{0}'.format(i)
19 | for i in range(1, 6)]
20 |
21 | return url_list
22 |
23 | @Provider.provider_exception
24 | def getter(self):
25 | for url in self.url_list:
26 | tree = get_html_tree(url)
27 | if tree is None:
28 | continue
29 | px_segment = tree.xpath('//*[@id="main"]/table/tr')[1:]
30 | for px in px_segment:
31 | if px.xpath("./td"):
32 | ip_raw = px.xpath("./td")[0].xpath("./script/text()")[0]
33 | ip_find_list = ip_pattern.findall(ip_raw)
34 | if ip_find_list:
35 | ip_find = ip_find_list[0]
36 | port = px.xpath("./td/text()")[0]
37 | ip = base64.b64decode(codecs.decode(ip_find.strip(), 'rot-13')).strip().decode('utf-8')
38 | yield ":".join([ip, port])
39 |
40 |
41 | if __name__ == "__main__":
42 | kd = CoolProvider()
43 | for proxy in kd.getter():
44 | print(proxy)
45 |
--------------------------------------------------------------------------------
/spoon_server/proxy/mipu_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.util.captcha import Captcha
2 | from spoon_server.proxy.provider import Provider
3 | from spoon_server.util.html_parser import get_html_tree
4 |
5 |
6 | class MimvpProvider(Provider):
7 | def __init__(self, captcha_recognize_url, url_list=None):
8 | super(Provider, self).__init__()
9 | self.captcha_recognize_url = captcha_recognize_url
10 | if not url_list:
11 | self.url_list = self._gen_url_list()
12 |
13 | @staticmethod
14 | def _gen_url_list():
15 | url_list = ['https://proxy.mimvp.com/free.php?proxy=in_hp',
16 | 'https://proxy.mimvp.com/free.php?proxy=in_tp']
17 | return url_list
18 |
19 | @Provider.provider_exception
20 | def getter(self):
21 | for url in self.url_list:
22 | tree = get_html_tree(url)
23 | if tree is None:
24 | continue
25 |
26 | image_tree = tree.xpath('//*[@id="mimvp-body"]/div[2]/div/table[1]/tbody/td/img/@src')
27 | proxy_tree = tree.xpath('//*[@id="mimvp-body"]/div[2]/div/table[1]/tbody/td')
28 |
29 | image_list = ["https://proxy.mimvp.com/" + px for px in image_tree[0::2]]
30 | ip_list = [px.xpath('./text()')[0] for px in proxy_tree[1::10]]
31 |
32 | assert len(image_list) == len(ip_list)
33 |
34 | cap = Captcha(self.captcha_recognize_url)
35 | all_length = len(image_list)
36 | for i in range(all_length):
37 | try:
38 | port = cap.get_image_result(image_list[i])
39 | yield ip_list[i] + ":" + port
40 | except Exception as e:
41 | yield None
42 |
43 |
44 | if __name__ == "__main__":
45 | kd = MimvpProvider("Your captcha recognize url.")
46 | for proxy in kd.getter():
47 | print(proxy)
48 |
--------------------------------------------------------------------------------
/spoon_server/proxy/listende_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | from spoon_server.proxy.provider import Provider
3 | from spoon_server.util.html_parser import get_html_tree, get_html
4 |
5 |
6 | class ListendeProvider(Provider):
7 | def __init__(self, url_list=None):
8 | super(Provider, self).__init__()
9 | if not url_list:
10 | self.url_list = self._gen_url_list()
11 |
12 | @staticmethod
13 | def _gen_url_list():
14 | url_list = ['https://www.proxy-listen.de/Proxy/Proxyliste.html']
15 | return url_list
16 |
17 | @Provider.provider_exception
18 | def getter(self):
19 | for url in self.url_list:
20 | response = get_html(url)
21 | key_pattern = re.compile('''name="fefefsfesf4tzrhtzuh" value="([^"]+)"''')
22 | keysearch = re.findall(key_pattern, response)
23 | fefefsfesf4tzrhtzuh = keysearch[0]
24 |
25 | post_data = {
26 | 'filter_port': "",
27 | 'filter_http_gateway': "",
28 | 'filter_http_anon': "",
29 | 'filter_response_time_http': "",
30 | 'fefefsfesf4tzrhtzuh': fefefsfesf4tzrhtzuh,
31 | 'filter_country': "",
32 | 'filter_timeouts1': "",
33 | 'liststyle': "info",
34 | 'proxies': "200",
35 | 'type': "httphttps",
36 | 'submit': "Anzeigen"
37 | }
38 |
39 | tree = get_html_tree(url, data=post_data)
40 | if tree is None:
41 | continue
42 | px_segment = tree.xpath('//table[@class="proxyList center"]/tr')[1:]
43 | for px in px_segment:
44 | yield ":".join([px.xpath('./td/a/text()')[0], px.xpath('./td/text()')[0]])
45 |
46 |
47 | if __name__ == "__main__":
48 | kd = ListendeProvider()
49 | for proxy in kd.getter():
50 | print(proxy)
51 |
--------------------------------------------------------------------------------
/spoon_server/proxy/kuai_provider.py:
--------------------------------------------------------------------------------
1 | import re
2 | import execjs
3 | from spoon_server.proxy.provider import Provider
4 | from spoon_server.util.html_parser import get_html_tree, get_html
5 |
6 |
7 | class KuaiProvider(Provider):
8 | def __init__(self, url_list=None):
9 | super(Provider, self).__init__()
10 | if not url_list:
11 | self.url_list = self._gen_url_list()
12 |
13 | @staticmethod
14 | def _gen_url_list(page=10):
15 | url_list = ['http://www.kuaidaili.com/ops/proxylist/{0}/'.format(i) for i in range(1, page + 1)]
16 | return url_list
17 |
18 | def _prepare(self):
19 | pre_text = get_html(self.url_list[0])
20 | js_string = ''.join(re.findall(r'(function .*?)', pre_text))
21 | arg = re.findall(r'setTimeout\(\"\D+\((\d+)\)\"', pre_text)[0]
22 | name = re.findall(r'function (\w+)', js_string)[0]
23 | js_string = js_string.replace('eval("qo=eval;qo(po);")', 'return po')
24 | func = execjs.compile(js_string)
25 | cookie_string = func.call(name, arg)
26 | cookie_string = cookie_string.replace("document.cookie='", "")
27 | clearance = cookie_string.split(';')[0]
28 | return {clearance.split('=')[0]: clearance.split('=')[1]}
29 |
30 | @Provider.provider_exception
31 | def getter(self):
32 | mode = 1
33 | try:
34 | cookie = self._prepare()
35 | except IndexError:
36 | mode = 0
37 | for url in self.url_list:
38 | if mode == 1:
39 | tree = get_html_tree(url, cookie=cookie)
40 | else:
41 | tree = get_html_tree(url)
42 | if tree is None:
43 | continue
44 | proxy_list = tree.xpath('//*[@id="freelist"]/table/tbody/tr')
45 | for px in proxy_list:
46 | yield ':'.join(px.xpath('./td/text()')[0:2])
47 |
48 |
49 | if __name__ == "__main__":
50 | kd = KuaiProvider()
51 | for proxy in kd.getter():
52 | print(proxy)
53 |
--------------------------------------------------------------------------------
/spoon_server/proxy/prem_provider.py:
--------------------------------------------------------------------------------
1 | from spoon_server.proxy.provider import Provider
2 | from spoon_server.util.html_parser import get_html_tree
3 |
4 | port_dict = {
5 | 're4e8': '8080', 'r1819': '80', 'ra572': '53281',
6 | 'r0a84': '3128', 'r0f89': '8888', 'raa30': '8088',
7 | 'r93ce': '81', 'rf015': '65309', 'rc8e9': '544',
8 | 'r479e': '20183', 'r6666': '8081', 'r70fb': '55555',
9 | 're048': '8118', 'rac4c': '9000', 'r6446': '18118',
10 | 'r23aa': '8060', 'r626c': '61234', 'r1adf': '62225',
11 | 're04a': '54314', 'r4928': '3129', 'ra1ab': '3100',
12 | 'rc6b5': '8383', 'rd026': '8380', 'r007c': '808',
13 | 'r3d4e': '8000', 'rd376': '61588', 'refa9': '45618',
14 | 'r1986': '31588', 'r39d8': '65301', 'r5665': '3355',
15 | 'rb2b3': '53282', 'r562f': '53005', 'r6f48': '52136',
16 | 'ra5c1': '443', 're42f': '65205', 'r0450': '54214',
17 | 'r0b20': '3130', 'r74c4': '65103', 'r7d8f': '37777',
18 | 'r96fd': '7777', 'r0df1': '87', 'r2da5': '52225'
19 | }
20 |
21 |
22 | class PremProvider(Provider):
23 | def __init__(self, url_list=None, proxy=None):
24 | super(Provider, self).__init__()
25 | if not url_list:
26 | self.url_list = self._gen_url_list()
27 | self.proxy = proxy
28 |
29 | @staticmethod
30 | def _gen_url_list():
31 | url_list = ['https://premproxy.com/list/0{0}.htm'.format(i)
32 | for i in range(1, 3)]
33 |
34 | return url_list
35 |
36 | @Provider.provider_exception
37 | def getter(self):
38 | for url in self.url_list:
39 | tree = get_html_tree(url, proxy=self.proxy)
40 | if tree is None:
41 | continue
42 | px_segment = tree.xpath('//*[@id="proxylistt"]/tbody/tr')[:-1]
43 | for px in px_segment:
44 | ip = px.xpath('./td')[0].xpath('./span')[0].tail
45 | port = port_dict[px.xpath('./td')[0].xpath('./span')[1].xpath("@class")[0]]
46 | yield ip + port
47 |
48 |
49 | if __name__ == "__main__":
50 | kd = PremProvider()
51 | for proxy in kd.getter():
52 | print(proxy)
53 |
--------------------------------------------------------------------------------
/spoon_server/main/proxy_pipe.py:
--------------------------------------------------------------------------------
1 | import time
2 | from multiprocessing import Process
3 |
4 | from spoon_server.main.checker import Checker
5 | from spoon_server.proxy.fetcher import Fetcher
6 | from spoon_server.main.refresher import refresher_run
7 | from spoon_server.main.validater import validater_run
8 |
9 |
10 | class ProxyPipe(object):
11 | def __init__(self,
12 | database=None,
13 | fetcher=None,
14 | url_prefix=None,
15 | checker=None,
16 | validater_thread_num=30,
17 | refresher_thread_num=30):
18 | if not fetcher:
19 | self._fetcher = Fetcher()
20 | else:
21 | self._fetcher = fetcher
22 |
23 | if not checker:
24 | self._checker = Checker()
25 | else:
26 | self._checker = checker
27 |
28 | self._database = database
29 | self._url_prefix = url_prefix
30 |
31 | self.validater_thread_num = validater_thread_num
32 | self.refresher_thread_num = refresher_thread_num
33 |
34 | def set_fetcher(self, provider_list):
35 | self._fetcher.set_provider(provider_list)
36 | return self
37 |
38 | def add_fetcher(self, provider_list):
39 | self._fetcher.add_provider(provider_list)
40 | return self
41 |
42 | def start(self):
43 | proc1 = Process(target=validater_run,
44 | args=(self._url_prefix,
45 | self._database,
46 | self._checker,
47 | self.validater_thread_num,))
48 |
49 | proc2 = Process(target=refresher_run,
50 | args=(self._url_prefix,
51 | self._fetcher,
52 | self._database,
53 | self._checker,
54 | self.refresher_thread_num,))
55 |
56 | proc_list = [proc1, proc2]
57 |
58 | for proc in proc_list:
59 | proc.start()
60 | time.sleep(1)
61 | for proc in proc_list:
62 | proc.join()
63 |
64 |
65 | if __name__ == "__main__":
66 | pp = ProxyPipe()
67 | pp.start()
68 |
--------------------------------------------------------------------------------
/spoon_server/main/refresher.py:
--------------------------------------------------------------------------------
1 | import time
2 | import schedule
3 | import concurrent.futures
4 |
5 | from spoon_server.util.validate import validate
6 | from spoon_server.main.manager import Manager
7 |
8 |
9 | class Refresher(Manager):
10 | def __init__(self, fetcher, url_prefix=None, database=None, checker=None, refresher_thread_num=30):
11 | super(Refresher, self).__init__(database=database, url_prefix=url_prefix, fetcher=fetcher, checker=checker)
12 | self.refresher_thread_num = refresher_thread_num
13 |
14 | def _validate_proxy(self):
15 | origin_proxy = self.database.pop(self.generate_name(self._origin_prefix))
16 | exist_proxy = self.database.get_all(self.generate_name(self._useful_prefix))
17 | while origin_proxy:
18 | if (origin_proxy not in exist_proxy) and validate(self._url_prefix, origin_proxy, self._checker):
19 | self.database.put(self.generate_name(self._useful_prefix), origin_proxy)
20 | origin_proxy = self.database.pop(self.generate_name(self._origin_prefix))
21 |
22 | def refresher_pool(self):
23 | self._validate_proxy()
24 |
25 | def main(self):
26 | self.refresh()
27 | with concurrent.futures.ThreadPoolExecutor(max_workers=self.refresher_thread_num) as executor:
28 | for _ in range(self.refresher_thread_num):
29 | executor.submit(self.refresher_pool)
30 | # proc = []
31 | # for num in range(process_num):
32 | # thread = Thread(target=self.refresher_pool, args=())
33 | # proc.append(thread)
34 | #
35 | # for num in range(process_num):
36 | # proc[num].start()
37 | #
38 | # for num in range(process_num):
39 | # proc[num].join()
40 |
41 |
42 | def refresher_run(url=None, fetcher=None, database=None, checker=None, refresher_thread_num=30):
43 | refresher = Refresher(url_prefix=url,
44 | fetcher=fetcher,
45 | database=database,
46 | checker=checker,
47 | refresher_thread_num=refresher_thread_num)
48 | schedule.every(5).minutes.do(refresher.main)
49 | while True:
50 | schedule.run_pending()
51 | time.sleep(5)
52 |
53 |
54 | if __name__ == '__main__':
55 | refresher_run()
56 |
--------------------------------------------------------------------------------
/spoon_web/api/views.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 | import random
4 | from django.http import HttpResponse
5 |
6 | from spoon_server.database.redis_config import RedisConfig
7 | from spoon_server.main.manager import Manager
8 |
9 | redis = RedisConfig("127.0.0.1", 6379, 0)
10 |
11 |
12 | def get_keys(request):
13 | m = Manager(database=redis)
14 | return HttpResponse(json.dumps(m.get_keys()))
15 |
16 |
17 | def fetchone_from(request):
18 | m = Manager(database=redis)
19 | target_name = request.GET.get("target", "www.baidu.com")
20 | filter_num = int(request.GET.get("filter", 10))
21 | search_name = ":".join(["spoon", target_name, "useful_proxy"])
22 |
23 | px_kv = m.get_all_kv_from(search_name)
24 | res_list = random.sample([k.decode('utf-8') for (k, v) in px_kv.items() if int(v.decode('utf-8')) > filter_num], 1)
25 |
26 | return HttpResponse(res_list[0])
27 |
28 |
29 | def fetchall_from(request):
30 | m = Manager(database=redis)
31 | target_name = request.GET.get("target", "www.baidu.com")
32 | filter_num = int(request.GET.get("filter", 10))
33 | search_name = ":".join(["spoon", target_name, "useful_proxy"])
34 |
35 | px_kv = m.get_all_kv_from(search_name)
36 | res_list = [k.decode('utf-8') for (k, v) in px_kv.items() if int(v) > filter_num]
37 |
38 | return HttpResponse("\r\n".join(res_list))
39 |
40 |
41 | def fetch_hundred_recent(request):
42 | m = Manager(database=redis)
43 | target_name = request.GET.get("target", "www.baidu.com")
44 | filter_num = int(request.GET.get("filter", 30))
45 | search_name = ":".join(["spoon", target_name, "hundred_proxy"])
46 |
47 | px_kv = m.get_all_kv_from(search_name)
48 | res_list = [k.decode('utf-8') for (k, v) in px_kv.items() if
49 | abs(float(v.decode('utf-8')) - time.time()) < filter_num]
50 | return HttpResponse("\r\n".join(res_list))
51 |
52 |
53 | def fetch_stale(request):
54 | m = Manager(database=redis)
55 | px_kv = m.scan_kv_from("spoon:proxy_stale", random.randint(0, 1000000))
56 | res_list_pre = [[k.decode('utf-8'), float(v.decode('utf-8'))] for (k, v) in px_kv[1].items()]
57 | res_list = [k[0] for k in res_list_pre]
58 |
59 | return HttpResponse("\r\n".join(res_list))
60 |
61 |
62 | def fetch_recent(request):
63 | m = Manager(database=redis)
64 | target_name = request.GET.get("target", "www.baidu.com")
65 | px_list = m.get_range_from(":".join(["spoon", target_name, "current_proxy"]))
66 | res_list = [px.decode('utf-8') for px in px_list]
67 |
68 | return HttpResponse("\r\n".join(res_list))
69 |
--------------------------------------------------------------------------------
/spoon_server/database/redis_wrapper.py:
--------------------------------------------------------------------------------
1 | import json
2 | import random
3 | import redis
4 |
5 |
6 | class RedisWrapper(object):
7 | def __init__(self, host, port, db=0, password=None):
8 | self._connection = redis.Redis(host=host, port=port, db=db, encoding='utf-8', password=password)
9 |
10 | def get(self, name):
11 | key = self._connection.hgetall(name=name)
12 | return random.choice([k.decode("utf-8") for k in key.keys()]) if key else None
13 |
14 | def put(self, name, key):
15 | key = json.dumps(key) if isinstance(key, (dict, list)) else key
16 | return self._connection.hincrby(name, key, 1)
17 |
18 | def getvalue(self, name, key):
19 | value = self._connection.hget(name, key)
20 | return value if value else None
21 |
22 | def pop(self, name):
23 | key = self.get(name)
24 | if key:
25 | self._connection.hdel(name, key)
26 | return key
27 |
28 | def len(self, name):
29 | key = self.get(name)
30 | if key:
31 | self._connection.hlen(name)
32 |
33 | def delete(self, name, key):
34 | self._connection.hdel(name, key)
35 |
36 | def inckey(self, name, key, value):
37 | self._connection.hincrby(name, key, value)
38 |
39 | def set_value(self, name, key, value):
40 | self._connection.hset(name, key, value)
41 |
42 | def rpush(self, name, value):
43 | self._connection.rpush(name, value)
44 |
45 | def ltrim(self, name, from_index, to_index):
46 | self._connection.ltrim(name, from_index, to_index)
47 |
48 | def sadd(self, name, value):
49 | self._connection.sadd(name, value)
50 |
51 | def zadd(self, name, key, score):
52 | self._connection.zadd(name, key, score)
53 |
54 | def zrem(self, name, key):
55 | self._connection.zrem(name, key)
56 |
57 | def zremrangebyrank(self, name, low, high):
58 | self._connection.zremrangebyrank(name, low, high)
59 |
60 | def get_all(self, name):
61 | return self._connection.hgetall(name).keys()
62 |
63 | def get_status(self, name):
64 | return self._connection.hlen(name)
65 |
66 | def get_keys(self):
67 | return self._connection.keys()
68 |
69 | def get_all_kv(self, name):
70 | return self._connection.hgetall(name)
71 |
72 | def scan_kv(self, name, cursor):
73 | return self._connection.hscan(name, cursor)
74 |
75 | def zrange(self, name, low, high):
76 | return self._connection.zrange(name, low, high)
77 |
78 |
79 | if __name__ == '__main__':
80 | redis_con = RedisWrapper('localhost', 6379)
81 | print(redis_con.get_keys())
82 |
--------------------------------------------------------------------------------
/spoon_server/proxy/fetcher.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from spoon_server.proxy.ip3366_provider import IP3366Provider
3 | from spoon_server.proxy.kuai_provider import KuaiProvider
4 | from spoon_server.proxy.xici_provider import XiciProvider
5 | from spoon_server.proxy.wuyou_provider import WuyouProvider
6 | from spoon_server.proxy.us_provider import UsProvider
7 | from spoon_server.proxy.ip181_provider import IP181Provider
8 | from spoon_server.proxy.six_provider import SixProvider
9 | from spoon_server.proxy.zdaye_provider import ZdayeProvider
10 | from spoon_server.proxy.busy_provider import BusyProvider
11 | from spoon_server.proxy.web_provider import WebProvider
12 |
13 |
14 | class Fetcher(object):
15 | def __init__(self, use_default=True):
16 | if use_default:
17 | self.provider_list = self._generate_provider_list()
18 | else:
19 | self.provider_list = []
20 | self.origin_provider_list = []
21 |
22 | @staticmethod
23 | def _generate_provider_list():
24 | # ip181 = IP181Provider()
25 | ip3366 = IP3366Provider() # Maybe IP Block
26 | kp = KuaiProvider() # Maybe malfunction
27 | # kpp = KuaiPayProvider()
28 | xp = XiciProvider()
29 | # fp = FileProvider()
30 | wp = WuyouProvider() # Maybe IP Block
31 | up = UsProvider()
32 | six = SixProvider()
33 | zdaye = ZdayeProvider()
34 | busy = BusyProvider()
35 | webp = WebProvider()
36 | return [up, ip3366, kp, xp, wp, six, zdaye, busy, webp]
37 |
38 | def clear(self):
39 | self.provider_list = []
40 | return self
41 |
42 | def set_provider(self, providers):
43 | self.provider_list = providers
44 | return self
45 |
46 | def add_provider(self, providers):
47 | self.provider_list.extend(providers)
48 | return self
49 |
50 | def get_provider(self, index):
51 | return self.provider_list[index]
52 |
53 | def remove_provider(self, indices):
54 | indices.reverse()
55 | for index in indices:
56 | self.provider_list.pop(index)
57 |
58 | def backup_provider(self):
59 | self.origin_provider_list = copy.deepcopy(self.provider_list)
60 |
61 | def restore_provider(self):
62 | self.provider_list = self.origin_provider_list[:]
63 |
64 | def __len__(self):
65 | return len(self.provider_list)
66 |
67 | def __str__(self):
68 | return "|".join(p.__class__.__name__ for p in self.provider_list)
69 |
70 |
71 | if __name__ == '__main__':
72 | fetch = Fetcher()
73 |
74 | for k in fetch.provider_list:
75 | for px in k.getter():
76 | print(px)
77 |
--------------------------------------------------------------------------------
/spoon_server/util/html_parser.py:
--------------------------------------------------------------------------------
1 | import time
2 | import requests
3 | from lxml import etree
4 |
5 | from spoon_server.util.constant import HEADERS
6 | from spoon_server.util.logger import log
7 |
8 |
9 | def get_html(url, headers=None, cookie=None, proxy=None, data=None, verify=False):
10 | if headers is None:
11 | headers = HEADERS
12 |
13 | try:
14 | if data is not None:
15 | response = requests.post(url=url,
16 | headers=headers,
17 | cookies=cookie,
18 | timeout=10,
19 | proxies=proxy,
20 | verify=verify,
21 | data=data)
22 | else:
23 | response = requests.get(url=url,
24 | headers=headers,
25 | cookies=cookie,
26 | timeout=10,
27 | proxies=proxy,
28 | verify=verify)
29 | # response.raise_for_status()
30 | response.encoding = response.apparent_encoding
31 | return response.text
32 | except Exception as e:
33 | log.error("{0}".format(e))
34 | raise Exception(e)
35 |
36 |
37 | def get_html_tree(url, headers=None, cookie=None, proxy=None, data=None, verify=False):
38 | if headers is None:
39 | headers = HEADERS
40 |
41 | try:
42 | if data is not None:
43 | response = requests.post(url=url,
44 | headers=headers,
45 | cookies=cookie,
46 | timeout=10,
47 | proxies=proxy,
48 | verify=verify,
49 | data=data)
50 | else:
51 | response = requests.get(url=url,
52 | headers=headers,
53 | cookies=cookie,
54 | timeout=10,
55 | proxies=proxy,
56 | verify=verify)
57 | response.raise_for_status()
58 | response.encoding = response.apparent_encoding
59 | html = response.text
60 | if isinstance(html, bytes):
61 | html = html.decode("utf-8")
62 | time.sleep(1)
63 | return etree.HTML(html)
64 | except Exception as e:
65 | log.error("{0}".format(e))
66 | raise e
67 |
68 |
69 | if __name__ == "__main__":
70 | tree = get_html_tree(url="http://www.baidu.com")
71 |
--------------------------------------------------------------------------------
/spoon_server/main/validater.py:
--------------------------------------------------------------------------------
1 | import time
2 | import concurrent.futures
3 |
4 | from spoon_server.util.validate import validate
5 | from spoon_server.main.manager import Manager
6 |
7 | from spoon_server.database.redis_config import RedisConfig
8 |
9 |
10 | class Validater(Manager):
11 | def __init__(self, url_prefix=None, database=None, checker=None, validater_thread_num=30):
12 | super(Validater, self).__init__(database=database, url_prefix=url_prefix, checker=checker)
13 | self.validater_thread_num = validater_thread_num
14 |
15 | def _validate_proxy(self, each_proxy):
16 | if isinstance(each_proxy, bytes):
17 | each_proxy = each_proxy.decode('utf-8')
18 | value = int(self.database.getvalue(self.generate_name(self._useful_prefix), each_proxy))
19 | if value < 0:
20 | self.database.delete(self.generate_name(self._useful_prefix), each_proxy)
21 | else:
22 | if validate(self._url_prefix, each_proxy, self._checker):
23 | self.database.zadd(self.generate_name(self._current_prefix), each_proxy, int(-1 * time.time()))
24 | self.database.zremrangebyrank(self.generate_name(self._current_prefix), 100, 10000)
25 | if not value >= 100:
26 | if value == 99:
27 | self.database.set_value(self.generate_name(self._hundred_prefix), each_proxy, time.time())
28 | self.database.inckey(self.generate_name(self._useful_prefix), each_proxy, 1)
29 | else:
30 | self.database.set_value(self.generate_name(self._hundred_prefix), each_proxy, time.time())
31 | self.database.set_value(self.generate_name(self._useful_prefix), each_proxy, 100)
32 | else:
33 | self.database.zrem(self.generate_name(self._current_prefix), each_proxy)
34 | if value > 0:
35 | self.database.set_value(self.generate_name(self._useful_prefix), each_proxy, value // 2)
36 | self.database.inckey(self.generate_name(self._useful_prefix), each_proxy, -1)
37 |
38 | def main(self):
39 | while True:
40 | with concurrent.futures.ThreadPoolExecutor(max_workers=self.validater_thread_num) as executor:
41 | proxy_list = [each_proxy for each_proxy in
42 | self.database.get_all(self.generate_name(self._useful_prefix))]
43 | for proxy in proxy_list:
44 | executor.submit(self._validate_proxy, proxy)
45 |
46 |
47 | def validater_run(url=None, database=None, checker=None, validater_thread_num=30):
48 | validater = Validater(url_prefix=url,
49 | database=database,
50 | checker=checker,
51 | validater_thread_num=validater_thread_num)
52 | validater.main()
53 |
54 |
55 | if __name__ == '__main__':
56 | redis = RedisConfig("127.0.0.1", 21009)
57 | p = Validater(url_prefix="https://www.google.com", database=redis)
58 | p.main()
59 |
--------------------------------------------------------------------------------
/spoon_server/example/provider_availble.py:
--------------------------------------------------------------------------------
1 | import concurrent.futures
2 | from spoon_server.proxy.busy_provider import BusyProvider
3 | from spoon_server.proxy.cool_provider import CoolProvider
4 | from spoon_server.proxy.feilong_provider import FeilongProvider
5 | from spoon_server.proxy.fpl_provider import FPLProvider
6 | from spoon_server.proxy.gou_provider import GouProvider
7 | from spoon_server.proxy.gp_provider import GPProvider
8 | from spoon_server.proxy.ihuan_provider import IhuanProvider
9 | from spoon_server.proxy.ip31_provider import IP31Provider
10 | from spoon_server.proxy.ip181_provider import IP181Provider
11 | from spoon_server.proxy.ip3366_provider import IP3366Provider
12 | from spoon_server.proxy.kuai_provider import KuaiProvider
13 | from spoon_server.proxy.listende_provider import ListendeProvider
14 | from spoon_server.proxy.mipu_provider import MimvpProvider
15 | from spoon_server.proxy.nian_provider import NianProvider
16 | from spoon_server.proxy.nntime_provider import NNtimeProvider
17 | from spoon_server.proxy.nord_provider import NordProvider
18 | from spoon_server.proxy.pdb_provider import PdbProvider
19 | from spoon_server.proxy.plp_provider import PlpProvider
20 | from spoon_server.proxy.prem_provider import PremProvider
21 | from spoon_server.proxy.six_provider import SixProvider
22 | from spoon_server.proxy.ssl_provider import SSLProvider
23 | from spoon_server.proxy.us_provider import UsProvider
24 | from spoon_server.proxy.web_provider import WebProvider
25 | from spoon_server.proxy.wuyou_provider import WuyouProvider
26 | from spoon_server.proxy.xiaohexia_provider import XiaohexiaProvider
27 | from spoon_server.proxy.xici_provider import XiciProvider
28 | from spoon_server.proxy.xun_provider import XunProvider
29 | from spoon_server.proxy.yao_provider import YaoProvider
30 | from spoon_server.proxy.you_provider import YouProvider
31 | from spoon_server.proxy.zdaye_provider import ZdayeProvider
32 |
33 | all_provider_list = [
34 | BusyProvider, CoolProvider, FeilongProvider, FPLProvider, GouProvider, GPProvider,
35 | IhuanProvider, IP31Provider, IP181Provider, IP3366Provider, KuaiProvider, ListendeProvider,
36 | MimvpProvider, NianProvider, NNtimeProvider, NordProvider, PdbProvider, PlpProvider,
37 | PremProvider, SixProvider, SSLProvider, UsProvider, WebProvider, WuyouProvider,
38 | XiaohexiaProvider, XiciProvider, XunProvider, YaoProvider, YouProvider, ZdayeProvider
39 | ]
40 |
41 |
42 | def check_provider(pro):
43 | current_proxies = []
44 | try:
45 | pro_instance = pro()
46 | except Exception as e:
47 | print(pro.__name__, e)
48 | return
49 |
50 | try:
51 | for p in pro_instance.getter():
52 | current_proxies.append(p)
53 | print(pro_instance.__class__.__name__, len(current_proxies))
54 | except Exception as e:
55 | print(pro_instance.__class__.__name__, len(current_proxies), e)
56 |
57 |
58 | if __name__ == '__main__':
59 | with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
60 | for pro in all_provider_list:
61 | executor.submit(check_provider, pro)
62 |
--------------------------------------------------------------------------------
/test/unit/test_provider.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from spoon_server.util.proxy_format import match_proxy_format
3 | from spoon_server.proxy.busy_provider import BusyProvider
4 | from spoon_server.proxy.cool_provider import CoolProvider
5 | from spoon_server.proxy.feilong_provider import FeilongProvider
6 | from spoon_server.proxy.gou_provider import GouProvider
7 | from spoon_server.proxy.fpl_provider import FPLProvider
8 | from spoon_server.proxy.gp_provider import GPProvider
9 | from spoon_server.proxy.ihuan_provider import IhuanProvider
10 | from spoon_server.proxy.ip31_provider import IP31Provider
11 | from spoon_server.proxy.ip181_provider import IP181Provider
12 | from spoon_server.proxy.ip3366_provider import IP3366Provider
13 | from spoon_server.proxy.iphai_provider import IphaiProvider
14 | from spoon_server.proxy.kuai_provider import KuaiProvider
15 | from spoon_server.proxy.listende_provider import ListendeProvider
16 | from spoon_server.proxy.mipu_provider import MimvpProvider
17 | from spoon_server.proxy.nian_provider import NianProvider
18 | from spoon_server.proxy.nntime_provider import NNtimeProvider
19 | from spoon_server.proxy.nord_provider import NordProvider
20 | from spoon_server.proxy.pdb_provider import PdbProvider
21 | from spoon_server.proxy.plp_provider import PlpProvider
22 | from spoon_server.proxy.prem_provider import PremProvider
23 | from spoon_server.proxy.six_provider import SixProvider
24 | from spoon_server.proxy.ssl_provider import SSLProvider
25 | from spoon_server.proxy.us_provider import UsProvider
26 | from spoon_server.proxy.wuyou_provider import WuyouProvider
27 | from spoon_server.proxy.xiaohexia_provider import XiaohexiaProvider
28 | from spoon_server.proxy.xici_provider import XiciProvider
29 | from spoon_server.proxy.xun_provider import XunProvider
30 | from spoon_server.proxy.yao_provider import YaoProvider
31 | from spoon_server.proxy.you_provider import YouProvider
32 | from spoon_server.proxy.zdaye_provider import ZdayeProvider
33 |
34 |
35 | # class ProviderTestCase(unittest.TestCase):
36 | # def test_busy_provider(self):
37 | # pd = BusyProvider()
38 | # data = [proxy for proxy in pd.getter()]
39 | # self.assertTrue(len(data) > 1 and match_proxy_format(data[0]))
40 |
41 |
42 | def add_test(name, provider):
43 | def test_method(provider):
44 | def fn(self):
45 | pd = provider()
46 | data = [proxy for proxy in pd.getter()]
47 | self.assertTrue(len(data) > 1 and all([match_proxy_format(p) for p in data]))
48 |
49 | return fn
50 |
51 | d = {'test': test_method(provider)}
52 | cls = type(name, (unittest.TestCase,), d)
53 | globals()[name] = cls
54 |
55 |
56 | if __name__ == '__main__':
57 | for t in [BusyProvider, CoolProvider, FeilongProvider,
58 | FPLProvider, GouProvider, FPLProvider, GPProvider,
59 | IhuanProvider, IP31Provider, IP181Provider, IP3366Provider,
60 | IphaiProvider, KuaiProvider, ListendeProvider, MimvpProvider,
61 | NianProvider, NNtimeProvider, NordProvider, PdbProvider,
62 | PlpProvider, PremProvider, SixProvider, SSLProvider,
63 | UsProvider, WuyouProvider, XiaohexiaProvider, XiciProvider,
64 | XunProvider, YaoProvider, YouProvider, ZdayeProvider]:
65 | add_test(f"Test{t.__name__}", t)
66 |
67 | unittest.main()
68 |
--------------------------------------------------------------------------------
/spoon_web/spoon/settings.py:
--------------------------------------------------------------------------------
1 | """
2 | Django settings for spoon project.
3 |
4 | Generated by 'django-admin startproject' using Django 1.11.3.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/1.11/topics/settings/
8 |
9 | For the full list of settings and their values, see
10 | https://docs.djangoproject.com/en/1.11/ref/settings/
11 | """
12 |
13 | import os
14 |
15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
17 |
18 |
19 | # Quick-start development settings - unsuitable for production
20 | # See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/
21 |
22 | # SECURITY WARNING: keep the secret key used in production secret!
23 | SECRET_KEY = 'yle@$!9e)6k!s10bw)$77=bymzr&7x13p!1h4b3#wv71_qr&g1'
24 |
25 | # SECURITY WARNING: don't run with debug turned on in production!
26 | DEBUG = True
27 |
28 | ALLOWED_HOSTS = ["*"]
29 |
30 |
31 | # Application definition
32 |
33 | INSTALLED_APPS = [
34 | 'django.contrib.admin',
35 | 'django.contrib.auth',
36 | 'django.contrib.contenttypes',
37 | 'django.contrib.sessions',
38 | 'django.contrib.messages',
39 | 'django.contrib.staticfiles',
40 | 'api'
41 | ]
42 |
43 | MIDDLEWARE = [
44 | 'django.middleware.security.SecurityMiddleware',
45 | 'django.contrib.sessions.middleware.SessionMiddleware',
46 | 'django.middleware.common.CommonMiddleware',
47 | 'django.middleware.csrf.CsrfViewMiddleware',
48 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
49 | 'django.contrib.messages.middleware.MessageMiddleware',
50 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
51 | ]
52 |
53 | ROOT_URLCONF = 'spoon.urls'
54 |
55 | TEMPLATES = [
56 | {
57 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
58 | 'DIRS': [],
59 | 'APP_DIRS': True,
60 | 'OPTIONS': {
61 | 'context_processors': [
62 | 'django.template.context_processors.debug',
63 | 'django.template.context_processors.request',
64 | 'django.contrib.auth.context_processors.auth',
65 | 'django.contrib.messages.context_processors.messages',
66 | ],
67 | },
68 | },
69 | ]
70 |
71 | WSGI_APPLICATION = 'spoon.wsgi.application'
72 |
73 |
74 | # Database
75 | # https://docs.djangoproject.com/en/1.11/ref/settings/#databases
76 |
77 | # Password validation
78 | # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators
79 |
80 | AUTH_PASSWORD_VALIDATORS = [
81 | {
82 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
83 | },
84 | {
85 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
86 | },
87 | {
88 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
89 | },
90 | {
91 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
92 | },
93 | ]
94 |
95 |
96 | # Internationalization
97 | # https://docs.djangoproject.com/en/1.11/topics/i18n/
98 |
99 | LANGUAGE_CODE = 'zh-hans'
100 |
101 | TIME_ZONE = 'UTC'
102 |
103 | USE_I18N = True
104 |
105 | USE_L10N = True
106 |
107 | USE_TZ = True
108 |
109 |
110 | # Static files (CSS, JavaScript, Images)
111 | # https://docs.djangoproject.com/en/1.11/howto/static-files/
112 |
113 | STATIC_URL = '/static/'
114 |
--------------------------------------------------------------------------------
/spoon_server/forward/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os
5 | import threading
6 | import sys
7 | import re
8 |
9 | import spoon_server.forward.forward as forward
10 |
11 | from spoon_server.util.logger import log
12 |
13 |
14 | def pid_exists(pid):
15 | """
16 | from http://stackoverflow.com/questions/568271/how-to-check-if-there-exists-a-process-with-a-given-pid
17 | """
18 | if os.name == 'posix':
19 | """Check whether pid exists in the current process table."""
20 | import errno
21 | if pid < 0:
22 | return False
23 | try:
24 | os.kill(pid, 0)
25 | except OSError as e:
26 | return e.errno == errno.EPERM
27 | else:
28 | return True
29 | else:
30 | import ctypes
31 | kernel32 = ctypes.windll.kernel32
32 | SYNCHRONIZE = 0x100000
33 |
34 | process = kernel32.OpenProcess(SYNCHRONIZE, 0, pid)
35 | if process != 0:
36 | kernel32.CloseHandle(process)
37 | return True
38 | else:
39 | return False
40 |
41 |
42 | def run_proxy(local_addr, local_port,
43 | remote_addr, remote_port):
44 | serv = forward.ForwardServer()
45 | print(local_addr, local_port,
46 | remote_addr, remote_port)
47 | serv.setListen(local_addr, local_port) \
48 | .setRemote(remote_addr, remote_port)
49 | serv.serve()
50 |
51 |
52 | def start():
53 | try:
54 | pid = os.fork()
55 | if pid > 0:
56 | # exit first parent
57 | log.info('parent process exit')
58 | sys.exit(0)
59 | except OSError as e:
60 | sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
61 | sys.exit(1)
62 |
63 | # write pid
64 | pid = str(os.getpid())
65 | pidfile = "./proxy_daemon.pid"
66 |
67 | if os.path.isfile(pidfile):
68 | f = open(pidfile, 'r')
69 | file_pid = int(f.read())
70 | log.info('read pid file pid=%s' % file_pid)
71 | if pid_exists(file_pid):
72 | log.info("%s already exists, and pid=%s exists exiting" %
73 | (pidfile, file_pid))
74 | sys.exit(1)
75 | else:
76 | log.info('the pid file pid=%s not exists' % file_pid)
77 | f.close()
78 |
79 | open(pidfile, 'w').write(pid)
80 | log.info('write pid to %s' % pidfile)
81 |
82 | log.info('now is child process do')
83 |
84 | re_ip_port = r'^(?P.+:)?(?P[0-9]{1,5})$'
85 |
86 | listen = "127.0.0.1:12001"
87 | remote = "119.39.48.205:9090"
88 |
89 | local_addr, local_port = None, None
90 | remote_addr, remote_port = None, None
91 |
92 | x = re.match(re_ip_port, listen)
93 | if not x:
94 | log.info('listen format error!')
95 | sys.exit(1)
96 | local_addr = x.group('addr') or '0.0.0.0'
97 | local_addr = local_addr.rstrip(':')
98 | local_port = int(x.group('port'))
99 |
100 | x = re.match(re_ip_port, remote)
101 | if not x:
102 | log.info('listen format error!')
103 | sys.exit(1)
104 | remote_addr = x.group('addr') or '0.0.0.0'
105 | remote_addr = remote_addr.rstrip(':')
106 | remote_port = int(x.group('port'))
107 |
108 | threading.Thread(
109 | target=run_proxy,
110 | args=(local_addr, local_port,
111 | remote_addr, remote_port)
112 | ).start()
113 |
114 | log.info('start all proxy done')
115 |
116 |
117 | def exit():
118 | pidfile = "./proxy_daemon.pid"
119 | os.remove(pidfile)
120 | log.info('exit')
121 |
122 |
123 | def help():
124 | print('Usage: %s Command [Option]' % sys.argv[0])
125 | print('Command List:')
126 | print('start 根据配置启动worker')
127 | print('stop 停止所有worker')
128 | print('restart 重新启动所有worker')
129 |
130 |
131 | if __name__ == '__main__':
132 | if len(sys.argv) <= 1:
133 | cmd = 'help'
134 | else:
135 | cmd = sys.argv[1]
136 | eval(cmd)()
137 |
--------------------------------------------------------------------------------
/spoon_server/forward/forward.py:
--------------------------------------------------------------------------------
1 | import re
2 | import socks
3 | import signal
4 | import socket
5 | import random
6 | import threading
7 |
8 | from spoon_server.util.logger import log
9 | from spoon_server.main.manager import Manager
10 | from spoon_server.database.redis_config import RedisConfig
11 |
12 | is_exit = False
13 |
14 |
15 | class ForwardServer(object):
16 | PAGE_SIZE = 4096
17 |
18 | def __init__(self, redis_):
19 | self.listen_host = None
20 | self.listen_port = None
21 | self.remote_host = None
22 | self.remote_port = None
23 | self.default_remote_host = None
24 | self.default_remote_port = None
25 | self.m = Manager(database=redis_)
26 |
27 | def set_listen(self, host, port):
28 | self.listen_host = host
29 | self.listen_port = port
30 | return self
31 |
32 | def set_default_remote(self, host, port):
33 | self.default_remote_host = host
34 | self.default_remote_port = port
35 | return self
36 |
37 | def set_remote(self, host, port):
38 | self.remote_host = host
39 | self.remote_port = port
40 | return self
41 |
42 | def _listen(self):
43 | sock_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # tcp
44 | sock_server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
45 | sock_server.bind((self.listen_host, self.listen_port))
46 | sock_server.listen(5)
47 | log.info('Listening at %s:%d ...' % (self.listen_host, self.listen_port))
48 | return sock_server
49 |
50 | def serve(self):
51 | sock_server = self._listen()
52 |
53 | while not is_exit:
54 | try:
55 | sock, addr = sock_server.accept()
56 | except (KeyboardInterrupt, SystemExit):
57 | log.warn('Closing...')
58 | sock_server.shutdown(socket.SHUT_RDWR)
59 | sock_server.close()
60 | break
61 | except Exception as e:
62 | log.error('Exception exit {0}'.format(e))
63 | sock_server.shutdown(socket.SHUT_RDWR)
64 | sock_server.close()
65 | break
66 |
67 | threading.Thread(target=self._forward, args=(sock,)).start()
68 | log.info('New clients from {0}'.format(addr))
69 |
70 | log.info('exit server')
71 |
72 | def _forward(self, sock_in):
73 | try:
74 | print("Remote host and remote port", self.default_remote_host, self.default_remote_port)
75 | sock_out = ForwardClient(self.default_remote_host, self.default_remote_port).get_client()
76 | log.info('get the client socks done')
77 | except Exception as e:
78 | log.error('Get Remote Client error: %s' % str(e))
79 | raise e
80 |
81 | threading.Thread(target=self._do_data_forward, args=(sock_in, sock_out)).start()
82 | threading.Thread(target=self._do_data_forward, args=(sock_out, sock_in)).start()
83 |
84 | # self._do_data_forward(sock_in, sock_out)
85 | # self._do_data_forward(sock_out, sock_in)
86 |
87 | def _do_data_forward(self, sock_in, sock_out):
88 | addr_in = '%s:%d' % sock_in.getpeername()
89 | addr_out = '%s:%d' % sock_out.getpeername()
90 |
91 | while True:
92 | try:
93 | data = sock_in.recv(ForwardServer.PAGE_SIZE)
94 | except Exception as e:
95 | log.error('Socket read error of %s: %s' % (addr_in, str(e)))
96 | break
97 |
98 | if not data:
99 | log.info('Socket closed by ' + addr_in)
100 | break
101 |
102 | try:
103 | sock_out.sendall(data)
104 | except Exception as e:
105 | log.error('Socket write error of %s: %s' % (addr_out, str(e)))
106 | break
107 |
108 | log.info('%s -> %s (%d B)' % (addr_in, addr_out, len(data)))
109 |
110 | sock_in.close()
111 | sock_out.close()
112 |
113 |
114 | class ForwardClient(object):
115 | def __init__(self, host, port):
116 | self.remote_host = host
117 | self.remote_port = port
118 |
119 | def set_remote(self, host, port):
120 | self.remote_host = host
121 | self.remote_port = port
122 | return self
123 |
124 | def get_client(self):
125 | sock_out = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)
126 |
127 | try:
128 | print('remote,=', (self.remote_host, self.remote_port))
129 | sock_out.connect((self.remote_host, self.remote_port))
130 | except socket.error as e:
131 | sock_out.close()
132 | log.error('Remote connect error: %s' % str(e))
133 | raise Exception('Remote connect error: %s' % str(e))
134 |
135 | return sock_out
136 |
137 |
138 | def handler(signum, frame):
139 | print(signum, frame)
140 | global is_exit
141 | is_exit = True
142 | print("receive a signal %d, is_exit = %d" % (signum, is_exit))
143 |
144 |
145 | if __name__ == '__main__':
146 | signal.signal(signal.SIGINT, handler)
147 | signal.signal(signal.SIGTERM, handler)
148 | listen = ("127.0.0.1", 12001)
149 | default_remote = ("47.93.234.57", 42251)
150 |
151 | redis = RedisConfig("10.1.10.10", 6379, 0, 123456)
152 |
153 | serv = ForwardServer(redis)
154 | serv.set_listen(listen[0], listen[1])
155 | serv.set_default_remote(default_remote[0], default_remote[1])
156 | serv.serve()
157 |
--------------------------------------------------------------------------------
/spoon_server/main/manager.py:
--------------------------------------------------------------------------------
1 | import time
2 | from urllib.parse import urlparse
3 |
4 | from spoon_server.database.redis_wrapper import RedisWrapper
5 | from spoon_server.proxy.fetcher import Fetcher
6 | from spoon_server.main.checker import Checker
7 | from spoon_server.util.logger import log
8 |
9 |
10 | class Manager(object):
11 | def __init__(self, database=None, url_prefix=None, fetcher=None, checker=None):
12 | if not database:
13 | self.database = RedisWrapper("127.0.0.1", 6379, 0)
14 | else:
15 | self.database = RedisWrapper(database.host, database.port, database.db, database.password)
16 |
17 | self._origin_prefix = 'origin_proxy'
18 | self._useful_prefix = 'useful_proxy'
19 | self._hundred_prefix = 'hundred_proxy'
20 | self._current_prefix = 'current_proxy'
21 |
22 | if not url_prefix:
23 | self._url_prefix = "default"
24 | else:
25 | self._url_prefix = url_prefix
26 |
27 | if not fetcher: # validater
28 | self._fetcher = Fetcher()
29 | else: # refresher
30 | self._fetcher = fetcher
31 | self._fetcher.backup_provider()
32 | log.info("REFRESH FETCHER BACKUP PROVIDER {0}".format(str(self._fetcher)))
33 |
34 | if not checker:
35 | self._checker = Checker()
36 | else:
37 | self._checker = checker
38 |
39 | self.log = log
40 |
41 | def get_netloc(self):
42 | if self._url_prefix == "default":
43 | return "default"
44 | return urlparse(self._url_prefix).netloc
45 |
46 | def generate_name(self, prefix):
47 | return ":".join(["spoon", self.get_netloc(), prefix])
48 |
49 | def refresh_condition(self):
50 | all_proxy_score = [[k.decode('utf-8'), int(v.decode('utf-8'))] for (k, v) in
51 | self.get_all_kv_from(self.generate_name(self._useful_prefix)).items()]
52 |
53 | all_length = len(all_proxy_score)
54 | count_length = len([0 for (k, v) in all_proxy_score if v >= 95])
55 |
56 | if all_length <= 100:
57 | return True
58 |
59 | if count_length / all_length >= 0.2:
60 | return True
61 | else:
62 | return False
63 |
64 | def refresh(self):
65 | log.info("REFRESH START WITH {0} TARGET {1}".format(str(self._fetcher), self.get_netloc()))
66 | if not self.refresh_condition():
67 | log.info("REFRESH DID NOT MEET CONDITION. TARGET{0}".format(self.get_netloc()))
68 | return
69 |
70 | if len(self._fetcher) < 6:
71 | self._fetcher.restore_provider()
72 | log.info("REFRESH FETCHER FAILED: NO ENOUGH PROVIDER, RESTORE PROVIDERS TO {0} for TARGET {1}".format(
73 | str(self._fetcher), self.get_netloc()))
74 | proxy_set = set()
75 |
76 | provider_to_be_removed_index = []
77 | for index in range(len(self._fetcher)):
78 | provider = self._fetcher.get_provider(index)
79 | try:
80 | for proxy in provider.getter():
81 | if proxy.strip():
82 | self.log.info(
83 | "REFRESH FETCHER: TARGET {0} PROVIDER {1} PROXY {2}".format(self.get_netloc(),
84 | provider.__class__.__name__,
85 | proxy.strip()))
86 | proxy_set.add(proxy.strip())
87 | except Exception as e:
88 | provider_to_be_removed_index.append(index)
89 | log.error(
90 | "REFRESH FETCHER FAILED: PROVIDER {0} WILL BE REMOVED ERROR {1}".format(provider.__class__.__name__,
91 | e))
92 |
93 | for proxy in proxy_set:
94 | self.database.set_value("spoon:proxy_stale", proxy, time.time())
95 | self.database.put(self.generate_name(self._origin_prefix), proxy)
96 |
97 | log.info("REFRESH FETCHER DELETE {0}. TARGET {1}".format(provider_to_be_removed_index, self.get_netloc()))
98 | self._fetcher.remove_provider(provider_to_be_removed_index)
99 |
100 | def get(self):
101 | return self.database.get(self.generate_name(self._useful_prefix))
102 |
103 | def set_value(self, key, value):
104 | return self.database.set_value(self.generate_name(self._useful_prefix), key, value)
105 |
106 | def delete(self, proxy):
107 | self.database.delete(self.generate_name(self._useful_prefix), proxy)
108 |
109 | def get_all(self):
110 | return self.database.get_all(self.generate_name(self._useful_prefix))
111 |
112 | def get_status(self):
113 | total_origin_proxy = self.database.get_status(self.generate_name(self._origin_prefix))
114 | total_useful_queue = self.database.get_status(self.generate_name(self._useful_prefix))
115 | return {'origin_proxy': total_origin_proxy, 'useful_proxy': total_useful_queue}
116 |
117 | # For spoon_web
118 | def get_keys(self):
119 | return [key.decode("utf-8") for key in self.database.get_keys()]
120 |
121 | def get_from(self, target):
122 | return self.database.get(target)
123 |
124 | def get_all_from(self, target):
125 | return self.database.get_all(target)
126 |
127 | def get_all_kv_from(self, target):
128 | return self.database.get_all_kv(target)
129 |
130 | def get_range_from(self, target):
131 | return self.database.zrange(target, 0, -1)
132 |
133 | def scan_kv_from(self, target, cursor):
134 | return self.database.scan_kv(target, cursor)
135 |
136 |
137 | if __name__ == "__main__":
138 | pp = Manager()
139 | pp.refresh()
140 | print(pp.get_status())
141 | aaa = pp.get_all_kv_from("spoon:www.gsxt.gov.cn:useful_proxy")
142 |
--------------------------------------------------------------------------------
/spoon_server/forward/bak.py:
--------------------------------------------------------------------------------
1 | import re
2 | import socks
3 | import signal
4 | import socket
5 | import random
6 | import threading
7 |
8 | from spoon_server.util.logger import log
9 | from spoon_server.main.manager import Manager
10 | from spoon_server.database.redis_config import RedisConfig
11 |
12 | is_exit = False
13 |
14 |
15 | class ForwardServer(object):
16 | PAGE_SIZE = 4096
17 |
18 | def __init__(self, redis_):
19 | self.listen_host = None
20 | self.listen_port = None
21 | self.remote_host = None
22 | self.remote_port = None
23 | self.default_remote_host = None
24 | self.default_remote_port = None
25 | self.m = Manager(database=redis_)
26 |
27 | def set_listen(self, host, port):
28 | self.listen_host = host
29 | self.listen_port = port
30 | return self
31 |
32 | def set_default_remote(self, host, port):
33 | self.default_remote_host = host
34 | self.default_remote_port = port
35 | return self
36 |
37 | def set_remote(self, host, port):
38 | self.remote_host = host
39 | self.remote_port = port
40 | return self
41 |
42 | def _listen(self):
43 | sock_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # tcp
44 | sock_server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
45 | sock_server.bind((self.listen_host, self.listen_port))
46 | sock_server.listen(5)
47 | log.info('Listening at %s:%d ...' % (self.listen_host, self.listen_port))
48 | return sock_server
49 |
50 | def serve(self):
51 | sock_server = self._listen()
52 |
53 | while not is_exit:
54 | try:
55 | sock, addr = sock_server.accept()
56 | except (KeyboardInterrupt, SystemExit):
57 | log.warn('Closing...')
58 | sock_server.shutdown(socket.SHUT_RDWR)
59 | sock_server.close()
60 | break
61 | except Exception as e:
62 | log.error('Exception exit {0}'.format(e))
63 | sock_server.shutdown(socket.SHUT_RDWR)
64 | sock_server.close()
65 | break
66 |
67 | threading.Thread(target=self._forward, args=(sock,)).start()
68 | log.info('New clients from {0}'.format(addr))
69 |
70 | log.info('exit server')
71 |
72 | def _forward(self, sock_in):
73 | try:
74 | sock_out = ForwardClient()
75 | log.info('get the client socks done')
76 | except Exception as e:
77 | log.error('Get Remote Client error: %s' % str(e))
78 | raise e
79 |
80 | threading.Thread(target=self._do_data_forward, args=(sock_in, sock_out)).start()
81 | threading.Thread(target=self._do_data_forward, args=(sock_out, sock_in)).start()
82 |
83 | def _do_data_forward(self, sock_in, sock_out):
84 | if isinstance(sock_in, ForwardClient):
85 | sock_in = sock_in.get_client(self.default_remote_host, self.default_remote_port)
86 |
87 | addr_in = '%s:%d' % sock_in.getpeername()
88 |
89 | while True:
90 | try:
91 | data = sock_in.recv(ForwardServer.PAGE_SIZE)
92 | if isinstance(sock_out, ForwardClient):
93 | print("sock_in", data)
94 | if b'Host' in data:
95 | host_match = re.match(r'.*Host:\s(.*?)\r\n.*', data.decode("utf-8"), re.S)
96 | if host_match:
97 | hostname = host_match[1]
98 | current_proxy_list = self.m.get_range_from(":".join(["spoon", hostname, "current_proxy"]))
99 | if current_proxy_list:
100 | ran_num = random.randint(0, len(current_proxy_list) // 3)
101 | proxy = current_proxy_list[ran_num].decode("utf-8")
102 | sock_out = sock_out.get_client(proxy.split(":")[0], int(proxy.split(":")[1]))
103 | log.info("Change Remote Proxy: {0}".format(proxy))
104 | else:
105 | log.info("Change Remote Proxy: ",
106 | self.default_remote_host + ":" + self.default_remote_port)
107 | sock_out = sock_out.get_client(self.default_remote_host, self.default_remote_port)
108 | sock_out = sock_out.get_client(self.default_remote_host, self.default_remote_port)
109 | except Exception as e:
110 | if isinstance(sock_out, ForwardClient):
111 | sock_out = sock_out.get_client(self.default_remote_host, self.default_remote_port)
112 | log.error('Socket read error of %s: %s' % (addr_in, str(e)))
113 | break
114 |
115 | if not data:
116 | log.info('Socket closed by ' + addr_in)
117 | break
118 |
119 | addr_out = '%s:%d' % sock_out.getpeername()
120 |
121 | try:
122 | sock_out.sendall(data)
123 | except Exception as e:
124 | log.error('Socket write error of %s: %s' % (addr_out, str(e)))
125 | break
126 |
127 | log.info('%s -> %s (%d B)' % (addr_in, addr_out, len(data)))
128 |
129 | sock_in.close()
130 | sock_out.close()
131 |
132 |
133 | class ForwardClient(object):
134 | @staticmethod
135 | def get_client(remote_host, remote_port):
136 | sock_out = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)
137 |
138 | try:
139 | print('remote,=', (remote_host, remote_port))
140 | sock_out.connect((remote_host, remote_port))
141 | except socket.error as e:
142 | sock_out.close()
143 | log.error('Remote connect error: %s' % str(e))
144 | raise Exception('Remote connect error: %s' % str(e))
145 |
146 | return sock_out
147 |
148 |
149 | def handler(signum, frame):
150 | print(signum, frame)
151 | global is_exit
152 | is_exit = True
153 | print("receive a signal %d, is_exit = %d" % (signum, is_exit))
154 |
155 |
156 | if __name__ == '__main__':
157 | signal.signal(signal.SIGINT, handler)
158 | signal.signal(signal.SIGTERM, handler)
159 | listen = ("127.0.0.1", 12001)
160 | default_remote = ("47.93.234.57", 42251)
161 |
162 | redis = RedisConfig("10.1.10.10", 6379, 0, 123456)
163 |
164 | serv = ForwardServer(redis)
165 | serv.set_listen(listen[0], listen[1])
166 | serv.set_default_remote(default_remote[0], default_remote[1])
167 | serv.serve()
168 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Spoon - A package for building specific Proxy Pool for different Sites.
2 | Spoon is a library for building Distributed Proxy Pool for each different sites as you assign.
3 | Only running on python 3.
4 |
5 | ## Install
6 | Simply run: `pip install spoonproxy` or clone the repo and set it into your PYTHONPATH.
7 |
8 | ## Run
9 |
10 | ### Spoon-server
11 | Please make sure the Redis is running. Default configuration is "host:localhost, port:6379". You can also modify the Redis connection.
12 | Like `example.py` in `spoon_server/example`,
13 | You can assign many different proxy providers.
14 | ```python
15 | from spoon_server.proxy.fetcher import Fetcher
16 | from spoon_server.main.proxy_pipe import ProxyPipe
17 | from spoon_server.proxy.kuai_provider import KuaiProvider
18 | from spoon_server.proxy.xici_provider import XiciProvider
19 | from spoon_server.database.redis_config import RedisConfig
20 | from spoon_server.main.checker import CheckerBaidu
21 |
22 | def main_run():
23 | redis = RedisConfig("127.0.0.1", 21009)
24 | p1 = ProxyPipe(url_prefix="https://www.baidu.com",
25 | fetcher=Fetcher(use_default=False),
26 | database=redis,
27 | checker=CheckerBaidu()).set_fetcher([KuaiProvider()]).add_fetcher([XiciProvider()])
28 | p1.start()
29 |
30 |
31 | if __name__ == '__main__':
32 | main_run()
33 | ```
34 |
35 | Also, with different checker, you can validate the result precisely.
36 | ```python
37 | class CheckerBaidu(Checker):
38 | def checker_func(self, html=None):
39 | if isinstance(html, bytes):
40 | html = html.decode('utf-8')
41 | if re.search(r".*百度一下,你就知道.*", html):
42 | return True
43 | else:
44 | return False
45 | ```
46 |
47 | Also, as the code shows in `spoon_server/example/example_multi.py`, by using multiprocess, you can get many queues to fetching & validating the proxies.
48 | You can also assign different Providers for different url.
49 | The default proxy providers are shown below, you can write your own providers.
50 |
51 |
52 |
53 | | name |
54 | description |
55 |
56 |
57 |
58 |
59 | | WebProvider |
60 | Get proxy from http api |
61 |
62 |
63 | | FileProvider |
64 | Get proxy from file |
65 |
66 |
67 | | GouProvider |
68 | http://www.goubanjia.com |
69 |
70 |
71 | | KuaiProvider |
72 | http://www.kuaidaili.com |
73 |
74 |
75 | | SixProvider |
76 | http://m.66ip.cn |
77 |
78 |
79 | | UsProvider |
80 | https://www.us-proxy.org |
81 |
82 |
83 | | WuyouProvider |
84 | http://www.data5u.com |
85 |
86 |
87 | | XiciProvider |
88 | http://www.xicidaili.com |
89 |
90 |
91 | | IP181Provider |
92 | http://www.ip181.com |
93 |
94 |
95 | | XunProvider |
96 | http://www.xdaili.cn |
97 |
98 |
99 | | PlpProvider |
100 | https://list.proxylistplus.com |
101 |
102 |
103 | | IP3366Provider |
104 | http://www.ip3366.net |
105 |
106 |
107 | | BusyProvider |
108 | https://proxy.coderbusy.com |
109 |
110 |
111 | | NianProvider |
112 | http://www.nianshao.me |
113 |
114 |
115 | | PdbProvider |
116 | http://proxydb.net |
117 |
118 |
119 | | ZdayeProvider |
120 | http://ip.zdaye.com |
121 |
122 |
123 | | YaoProvider |
124 | http://www.httpsdaili.com/ |
125 |
126 |
127 | | FeilongProvider |
128 | http://www.feilongip.com/ |
129 |
130 |
131 | | IP31Provider |
132 | https://31f.cn/http-proxy/ |
133 |
134 |
135 | | XiaohexiaProvider |
136 | http://www.xiaohexia.cn/ |
137 |
138 |
139 | | CoolProvider |
140 | https://www.cool-proxy.net/ |
141 |
142 |
143 | | NNtimeProvider |
144 | http://nntime.com/ |
145 |
146 |
147 | | ListendeProvider |
148 | https://www.proxy-listen.de/ |
149 |
150 |
151 | | IhuanProvider |
152 | https://ip.ihuan.me/ |
153 |
154 |
155 | | IphaiProvider |
156 | http://www.iphai.com/ |
157 |
158 |
159 | | MimvpProvider(@NeedCaptcha) |
160 | https://proxy.mimvp.com/ |
161 |
162 |
163 | | GPProvider(@NeedProxy if you're in China) |
164 | http://www.gatherproxy.com |
165 |
166 |
167 | | FPLProvider(@NeedProxy if you're in China) |
168 | https://free-proxy-list.net |
169 |
170 |
171 | | SSLProvider(@NeedProxy if you're in China) |
172 | https://www.sslproxies.org |
173 |
174 |
175 | | NordProvider(@NeedProxy if you're in China) |
176 | https://nordvpn.com |
177 |
178 |
179 | | PremProvider(@NeedProxy if you're in China) |
180 | https://premproxy.com |
181 |
182 |
183 | | YouProvider(@Deprecated) |
184 | http://www.youdaili.net |
185 |
186 |
187 |
188 |
189 | ### Spoon-web
190 | A Simple django web api demo. You could use any web server and write your own api.
191 | Gently run `python manager.py runserver **.**.**.**:*****`
192 | The simple apis include:
193 |
194 |
195 |
196 | | name |
197 | description |
198 |
199 |
200 |
201 |
202 | | http://127.0.0.1:21010/api/v1/get_keys |
203 | Get all keys from redis |
204 |
205 |
206 | | http://127.0.0.1:21010/api/v1/fetchone_from?target=www.google.com&filter=65 |
207 | Get one useful proxy. target: the specific url filter: successful-revalidate times |
208 |
209 |
210 | | http://127.0.0.1:21010/api/v1/fetchall_from?target=www.google.com&filter=65 |
211 | Get all useful proxies. |
212 |
213 |
214 | | http://127.0.0.1:21010/api/v1/fetch_hundred_recent?target=www.baidu.com&filter=5 |
215 | Get recently joined full-scored proxies. target: the specific url filter: time in seconds |
216 |
217 |
218 | | http://127.0.0.1:21010/api/v1/fetch_stale?num=100 |
219 | Get recently proxies without check. num: the specific number of proxies you want |
220 |
221 |
222 | | http://127.0.0.1:21010/api/v1/fetch_recent?target=www.baidu.com |
223 | Get recently proxies that successfully validated. target: the specific url |
224 |
225 |
226 |
227 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------