├── docs ├── authors.rst ├── history.rst ├── readme.rst ├── contributing.rst ├── _static │ ├── logo.png │ └── logo.xcf ├── index.rst ├── installation.rst ├── usage.rst ├── Makefile └── conf.py ├── requirements.txt ├── images ├── proxy-db.png └── proxy-db.xcf ├── tests ├── __init__.py ├── _compat.py ├── test_migrations.py ├── test_utils.py ├── test_models.py ├── test_countries.py ├── test_proxies.py ├── test_management.py └── test_providers.py ├── requirements-dev.txt ├── scripts └── proxy-db ├── .bumpversion.cfg ├── AUTHORS.rst ├── proxy_db ├── exceptions.py ├── __init__.py ├── _compat.py ├── db.py ├── migrations │ ├── migration_0_3_0.py │ ├── __init__.py │ └── migration_base.py ├── utils.py ├── export.py ├── management.py ├── proxies.py ├── models.py ├── countries.py └── providers.py ├── MANIFEST.in ├── tox.ini ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE.md └── workflows │ ├── publish.yml │ └── test.yml ├── setup.cfg ├── LICENSE ├── .gitignore ├── HISTORY.rst ├── Makefile ├── CONTRIBUTING.rst ├── README.rst └── setup.py /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../AUTHORS.rst 2 | -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../HISTORY.rst 2 | -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | click 3 | sqlalchemy 4 | beautifulsoup4 5 | six 6 | -------------------------------------------------------------------------------- /images/proxy-db.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nekmo/proxy-db/HEAD/images/proxy-db.png -------------------------------------------------------------------------------- /images/proxy-db.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nekmo/proxy-db/HEAD/images/proxy-db.xcf -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nekmo/proxy-db/HEAD/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/_static/logo.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nekmo/proxy-db/HEAD/docs/_static/logo.xcf -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Unit test package for proxy_db.""" 4 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | requests-mock 3 | bumpversion 4 | tox>=1.8 5 | codecov 6 | mock; python_version < '3.6' 7 | -------------------------------------------------------------------------------- /scripts/proxy-db: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from proxy_db.management import cli 3 | 4 | 5 | if __name__ == "__main__": 6 | cli() 7 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.3.1 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:proxy_db/__init__.py] 7 | 8 | -------------------------------------------------------------------------------- /tests/_compat.py: -------------------------------------------------------------------------------- 1 | 2 | try: 3 | from mock import patch, Mock, mock_open, call 4 | except ImportError: 5 | from unittest.mock import patch, Mock, mock_open, call 6 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Development Lead 6 | ---------------- 7 | 8 | * Nekmo Com 9 | 10 | Contributors 11 | ------------ 12 | 13 | None yet. Why not be the first? 14 | -------------------------------------------------------------------------------- /proxy_db/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class ProxyDB(Exception): 4 | pass 5 | 6 | 7 | class NoProvidersAvailable(ProxyDB): 8 | pass 9 | 10 | 11 | class UnknownExportFormat(ProxyDB): 12 | pass 13 | 14 | 15 | class UnsupportedEngine(ProxyDB): 16 | pass 17 | -------------------------------------------------------------------------------- /proxy_db/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Top-level package for proxy-db.""" 4 | import logging 5 | 6 | __author__ = """Nekmo Com""" 7 | __email__ = 'contacto@nekmo.com' 8 | __version__ = '0.3.1' 9 | 10 | logging.basicConfig() 11 | logging.getLogger('sqlalchemy').setLevel(logging.ERROR) 12 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.rst 2 | include CONTRIBUTING.rst 3 | include HISTORY.rst 4 | include LICENSE 5 | include README.rst 6 | include requirements.txt 7 | 8 | recursive-include tests * 9 | recursive-exclude * __pycache__ 10 | recursive-exclude * *.py[co] 11 | 12 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif 13 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # content of: tox.ini , put in same dir as setup.py 2 | [tox] 3 | envlist = pep8,py{39,38,37,36,35,27} 4 | 5 | [testenv] 6 | passenv=* 7 | deps = 8 | codecov>=1.4.0 9 | mock 10 | requests-mock 11 | docutils 12 | pep8,py{37,36}: geoip2-tools 13 | -rrequirements.txt 14 | commands= 15 | {env:COMMAND:python} -m unittest discover 16 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * proxy-db version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /proxy_db/_compat.py: -------------------------------------------------------------------------------- 1 | try: 2 | from urllib.parse import urlparse 3 | except ImportError: 4 | from urlparse import urlparse 5 | 6 | 7 | try: 8 | from itertools import filterfalse 9 | except ImportError: 10 | def filterfalse(predicate, iterable): 11 | # filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8 12 | if predicate is None: 13 | predicate = bool 14 | for x in iterable: 15 | if not predicate(x): 16 | yield x 17 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.0 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:proxy_db/__init__.py] 11 | search = __version__ = '{current_version}' 12 | replace = __version__ = '{new_version}' 13 | 14 | [bdist_wheel] 15 | universal = 1 16 | 17 | [flake8] 18 | exclude = docs 19 | 20 | [aliases] 21 | test = pytest 22 | # Define setup.py command aliases here 23 | -------------------------------------------------------------------------------- /proxy_db/db.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.sql import ClauseElement 2 | 3 | 4 | def get_or_create(session, model, defaults=None, **kwargs): 5 | instance = session.query(model).filter_by(**kwargs).first() 6 | if instance: 7 | return instance, True 8 | else: 9 | params = dict((k, v) for k, v in kwargs.items() if not isinstance(v, ClauseElement)) 10 | params.update(defaults or {}) 11 | instance = model(**params) 12 | session.add(instance) 13 | return instance, False 14 | -------------------------------------------------------------------------------- /tests/test_migrations.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from proxy_db.migrations import MigrateVersion 4 | from ._compat import patch 5 | 6 | 7 | class TestMigrateVersion(unittest.TestCase): 8 | def test_pending_versions(self): 9 | versions = MigrateVersion().pending_versions() 10 | self.assertEqual(versions, set()) 11 | 12 | def test_is_last_version(self): 13 | self.assertTrue(MigrateVersion().is_last_version()) 14 | 15 | @patch('proxy_db.migrations.migration_0_3_0.Migrate.migrate') 16 | def test_migrate_version(self, m): 17 | MigrateVersion().migrate_version('0.3.0') 18 | m.assert_called_once() 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache Software License 2.0 3 | 4 | Copyright (c) 2018, Nekmo Com 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | 18 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | proxy-db 2 | ======== 3 | Manage free and private proxies on local db for Python Projects. Each proxy has a score according to how it works. 4 | Add a positive vote if the proxy works correctly and a negative vote if it does not work. In addition, proxy-db 5 | determines the real country of the proxy using geoip. 6 | 7 | 8 | To **install** proxy-db, run this command in your terminal: 9 | 10 | .. code-block:: console 11 | 12 | $ pip install -U proxy-db 13 | 14 | 15 | Contents 16 | -------- 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | :glob: 21 | 22 | readme 23 | installation 24 | usage 25 | contributing 26 | authors 27 | history 28 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | from proxy_db.utils import get_domain, import_string 5 | 6 | 7 | class TestGetDomain(unittest.TestCase): 8 | def test_get_domain(self): 9 | self.assertEqual(get_domain('https://user:pass@domain.com:8888/'), 'domain.com') 10 | 11 | 12 | class TestImportString(unittest.TestCase): 13 | def test_import_class(self): 14 | self.assertEqual(import_string('proxy_db.utils.import_string'), import_string) 15 | 16 | def test_invalid_string(self): 17 | with self.assertRaises(ImportError): 18 | import_string('foobar') 19 | 20 | def test_invalid_attribute(self): 21 | with self.assertRaises(ImportError): 22 | import_string('proxy_db.utils.invalid') 23 | -------------------------------------------------------------------------------- /proxy_db/migrations/migration_0_3_0.py: -------------------------------------------------------------------------------- 1 | from proxy_db.migrations.migration_base import MigrateBase 2 | from proxy_db.models import ProviderRequest, Proxy, association_table 3 | 4 | 5 | class Migrate(MigrateBase): 6 | version = '0.3.0' 7 | tables = [ 8 | { 9 | 'model': ProviderRequest, 10 | 'fields': ['id', 'provider', 'request_id', 'created_at', 'updated_at', 'results'], 11 | }, 12 | { 13 | 'model': Proxy, 14 | 'fields': ['id', 'votes', 'country', 'created_at', 'updated_at', 'on_provider_at'], 15 | }, 16 | ] 17 | 18 | def migrate_data(self): 19 | super(Migrate, self).migrate_data() 20 | new_database_session = self.create_new_database() 21 | for proxy in new_database_session.query(Proxy).all(): 22 | proxy.protocol = proxy.id.split(':')[0] 23 | new_database_session.commit() 24 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - name: Set up Python 3.9 11 | uses: actions/setup-python@v2 12 | with: 13 | python-version: '3.9' 14 | - name: Install dependencies 15 | run: | 16 | python -m pip install --upgrade pip 17 | pip install tox-gh-actions wheel twine 18 | if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi 19 | - name: Create packages 20 | run: | 21 | python setup.py sdist bdist_wheel 22 | - name: Check packages 23 | run: | 24 | twine check dist/* 25 | - name: Publish package 26 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 27 | uses: pypa/gh-action-pypi-publish@master 28 | with: 29 | user: __token__ 30 | password: ${{ secrets.PYPI_API_TOKEN }} 31 | -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from proxy_db.models import Proxy, PROTOCOLS 4 | 5 | from ._compat import patch 6 | 7 | 8 | class TestProxy(unittest.TestCase): 9 | proxy_id = '1.1.1.1:8888' 10 | 11 | def test_get(self): 12 | self.assertEqual(Proxy(id=self.proxy_id).get('http'), self.proxy_id) 13 | 14 | def test_get_default(self): 15 | self.assertEqual(Proxy(id=self.proxy_id).get('spam', 'foo'), 'foo') 16 | 17 | @patch('proxy_db.models.create_session') 18 | def test_vote(self, m): 19 | Proxy(id=self.proxy_id).positive() 20 | Proxy(id=self.proxy_id).negative() 21 | 22 | def test_in(self): 23 | proxy = Proxy(id=self.proxy_id) 24 | for protocol in PROTOCOLS: 25 | self.assertTrue(protocol in proxy) 26 | 27 | def test_copy(self): 28 | self.assertEqual(Proxy(id=self.proxy_id).copy(), {key: self.proxy_id for key in PROTOCOLS}) 29 | 30 | def test_str(self): 31 | self.assertEqual(str(Proxy(id=self.proxy_id)), self.proxy_id) 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # pyenv python configuration file 62 | .python-version 63 | 64 | /.idea 65 | -------------------------------------------------------------------------------- /tests/test_countries.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | 4 | from proxy_db.countries import ip_country, geoip2_manager 5 | from ._compat import patch 6 | 7 | 8 | class TestIpCountry(unittest.TestCase): 9 | @patch('proxy_db.countries.geoip2_manager') 10 | def test_ip_country(self, m): 11 | ip_country('proxy') 12 | m.__getitem__.assert_called_once() 13 | 14 | def test_without_license(self): 15 | if geoip2_manager is None: 16 | self.skipTest("external module geoip-tools not available") 17 | return 18 | with patch('proxy_db.countries.geoip2_manager.is_license_key_available', return_value=False) as m: 19 | self.assertEqual(ip_country('proxy'), '') 20 | m.__getitem__.assert_not_called() 21 | 22 | @patch('proxy_db.countries.geoip2_manager') 23 | def test_address_not_found(self, m): 24 | try: 25 | from geoip2.errors import AddressNotFoundError 26 | except ImportError: 27 | self.skipTest("external module geoip2 not available") 28 | return 29 | m.__getitem__.side_effect = AddressNotFoundError 30 | self.assertEqual(ip_country('proxy'), '') 31 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9] 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install tox-gh-actions 23 | if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi 24 | # - name: Lint with flake8 25 | # run: | 26 | # # stop the build if there are Python syntax errors or undefined names 27 | # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 28 | # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 29 | # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 30 | - name: Test with coverage 31 | run: | 32 | coverage run -m unittest discover 33 | codecov 34 | -------------------------------------------------------------------------------- /proxy_db/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from importlib import import_module 3 | from six import raise_from 4 | 5 | from proxy_db._compat import urlparse 6 | 7 | 8 | def get_domain(address): 9 | netloc = urlparse(address).netloc 10 | return netloc.split('@')[-1].split(':')[0] 11 | 12 | 13 | def import_string(dotted_path): 14 | """ 15 | Import a dotted module path and return the attribute/class designated by the 16 | last name in the path. Raise ImportError if the import failed. 17 | """ 18 | try: 19 | module_path, class_name = dotted_path.rsplit('.', 1) 20 | except ValueError as err: 21 | # raise ImportError("%s doesn't look like a module path" % dotted_path) from err 22 | raise_from(ImportError("%s doesn't look like a module path" % dotted_path), err) 23 | 24 | 25 | module = import_module(module_path) 26 | 27 | try: 28 | return getattr(module, class_name) 29 | except AttributeError as err: 30 | # raise ImportError('Module "%s" does not define a "%s" attribute/class' % ( 31 | # module_path, class_name) 32 | # ) from err 33 | raise_from(ImportError('Module "%s" does not define a "%s" attribute/class' % ( 34 | module_path, class_name) 35 | ), err) 36 | -------------------------------------------------------------------------------- /proxy_db/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | from proxy_db.utils import import_string 2 | 3 | 4 | class MigrateVersion(object): 5 | versions = [ 6 | '0.3.0', 7 | ] 8 | 9 | def is_last_version(self): 10 | from proxy_db.models import create_session, Version 11 | session = create_session() 12 | version = session.query(Version).order_by(Version.id.desc()).first() 13 | return version.version == self.versions[-1] if version else False 14 | 15 | def pending_versions(self): 16 | from proxy_db.models import create_session, Version 17 | session = create_session() 18 | migrated_versions = session.query(Version).order_by(Version.id.asc()).all() 19 | migrated_versions = set([version.version for version in migrated_versions]) 20 | return set(self.versions) - migrated_versions 21 | 22 | def migrate_pending_versions(self): 23 | for version in self.pending_versions(): 24 | self.migrate_version(version) 25 | 26 | def create_all_versions(self): 27 | for version in self.versions: 28 | migration_cls = self.import_migration(version) 29 | migration_cls().create_version_row() 30 | 31 | def import_migration(self, version): 32 | version_alias = version.replace('.', '_') 33 | return import_string('proxy_db.migrations.migration_{}.Migrate'.format(version_alias)) 34 | 35 | def migrate_version(self, version): 36 | migration_cls = self.import_migration(version) 37 | migration_cls().migrate() 38 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | History 3 | ======= 4 | 5 | v0.3.1 (2020-11-10) 6 | ------------------- 7 | 8 | * Issue #24: Infinite loop getting proxy if no proxies available 9 | * Issue #23: sqlalchemy.exc.InvalidRequestError: Entity ProviderRequest has no property 'country' 10 | 11 | 12 | v0.3.0 (2020-11-07) 13 | ------------------- 14 | 15 | * Issue #5: NordVPN support 16 | * Issue #16: Filter by provider in ProxiesList 17 | * Issue #20: Remove old tests and fix TestProvider 18 | * Issue #17: maxmind geoip2 should be optional 19 | * Issue #22: Get country in Proxynova provider 20 | * Issue #19: Proxynova has changed its script element 21 | * Issue #18: Download geoip2 using maxmind license 22 | * Issue #15: Migrate database schema 23 | * Issue #14: Serve web docs 24 | * Issue #13: Create docs 25 | * Issue #6: Countries list 26 | 27 | 28 | v0.2.5 (2019-04-08) 29 | ------------------- 30 | 31 | * Issue #12: Invalid proxynova row 32 | 33 | 34 | v0.2.4 (2019-11-13) 35 | ------------------- 36 | 37 | * Fixed infinite recursion loop when a country doesn't have any proxy. 38 | * Now raises an StopIteration exception instead of just breaking the loop. 39 | 40 | v0.2.3 (2018-10-21) 41 | ------------------- 42 | 43 | * Fix Python2 support. 44 | 45 | v0.2.2 (2018-10-21) 46 | ------------------- 47 | 48 | * Fix Python2 support and requirements file in MANIFEST. 49 | 50 | 51 | v0.2.1 (2018-10-21) 52 | ------------------- 53 | 54 | * Fixed setup.py file. 55 | 56 | 57 | v0.2.0 (2018-10-20) 58 | ------------------- 59 | 60 | * Docs and tests. 61 | 62 | 63 | v0.1.0 (2018-01-19) 64 | ------------------- 65 | 66 | * First release on PyPI. 67 | -------------------------------------------------------------------------------- /proxy_db/migrations/migration_base.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | from sqlalchemy import create_engine 4 | from sqlalchemy.orm import sessionmaker 5 | 6 | from proxy_db.models import PROXY_DB_FILE, PROXY_DB_DB_URL, Base, Version 7 | 8 | 9 | class MigrateBase(object): 10 | tables = [] 11 | version = None 12 | 13 | def __init__(self, proxy_db_file=None): 14 | self.proxy_db_file = proxy_db_file or PROXY_DB_FILE 15 | 16 | def get_backup_path_file(self): 17 | return '{}.bak'.format(self.proxy_db_file) 18 | 19 | def create_backup_file(self): 20 | shutil.move(self.proxy_db_file, self.get_backup_path_file()) 21 | 22 | def create_new_database(self): 23 | engine = create_engine(PROXY_DB_DB_URL) 24 | Base.metadata.create_all(engine) 25 | return sessionmaker(bind=engine)() 26 | 27 | def get_backup_database(self): 28 | url = '{}.bak'.format(PROXY_DB_DB_URL) 29 | engine = create_engine(url) 30 | return sessionmaker(bind=engine)() 31 | 32 | def migrate_data(self): 33 | old_database_session = self.get_backup_database() 34 | new_database_session = self.create_new_database() 35 | for table in self.tables: 36 | model = table['model'] 37 | fields = table['fields'] 38 | entities = [getattr(model, field) for field in table['fields']] 39 | rows = old_database_session.query(model).with_entities(*entities).all() 40 | rows = [model(**{field: getattr(row, field) for field in fields}) for row in rows] 41 | new_database_session.bulk_save_objects(rows) 42 | new_database_session.commit() 43 | 44 | def create_version_row(self): 45 | new_database_session = self.create_new_database() 46 | new_database_session.add(Version(version=self.version)) 47 | new_database_session.commit() 48 | 49 | def migrate(self): 50 | self.create_backup_file() 51 | self.migrate_data() 52 | self.create_version_row() 53 | -------------------------------------------------------------------------------- /tests/test_proxies.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from proxy_db.providers import ProxyNovaCom, PROVIDERS 4 | from ._compat import patch, Mock 5 | 6 | from proxy_db.proxies import ProxiesList, RandomListingStrategy 7 | 8 | 9 | class TestProxiesList(unittest.TestCase): 10 | def test_provider_name(self): 11 | self.assertEqual(ProxiesList(provider=ProxyNovaCom.name).provider.name, ProxyNovaCom.name) 12 | 13 | def test_invalid_provider_name(self): 14 | with self.assertRaises(AssertionError): 15 | ProxiesList(provider='foo') 16 | 17 | def test_available_providers_filtered(self): 18 | provider = next(iter(filter(lambda x: x.name == ProxyNovaCom.name, PROVIDERS))) 19 | proxies_list = ProxiesList(provider=provider) 20 | self.assertEqual(next(iter(proxies_list.available_providers())), provider) 21 | 22 | @patch('proxy_db.proxies.create_session') 23 | def test_find_db_proxy(self, m): 24 | m.return_value.query.return_value.join.return_value.filter\ 25 | .return_value.session.get_bind.return_value.name = 'sqlite' 26 | ProxiesList(strategy=RandomListingStrategy).find_db_proxy() 27 | m.assert_called_once() 28 | 29 | @patch('proxy_db.proxies.create_session') 30 | def test_random_find_db_proxy(self, m): 31 | ProxiesList().find_db_proxy() 32 | m.assert_called_once() 33 | 34 | @patch('proxy_db.proxies.ProxiesList.available_providers', return_value=[Mock()]) 35 | def test_find_provider(self, m): 36 | provider = ProxiesList().find_provider() 37 | self.assertEqual(provider, m.return_value[0]) 38 | 39 | @patch('proxy_db.proxies.ProxiesList.find_provider') 40 | def test_reload_provider(self, m): 41 | p = ProxiesList() 42 | p.reload_provider() 43 | m.return_value.request.assert_called_once_with(**p.request_options) 44 | m.return_value.request.return_value.now.assert_called_once() 45 | 46 | def test_iter(self): 47 | p = ProxiesList() 48 | p._proxies = None 49 | p2 = iter(p) 50 | self.assertEqual(p2._proxies, set()) 51 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | define BROWSER_PYSCRIPT 4 | import os, webbrowser, sys 5 | try: 6 | from urllib import pathname2url 7 | except: 8 | from urllib.request import pathname2url 9 | 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 11 | endef 12 | export BROWSER_PYSCRIPT 13 | 14 | define PRINT_HELP_PYSCRIPT 15 | import re, sys 16 | 17 | for line in sys.stdin: 18 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 19 | if match: 20 | target, help = match.groups() 21 | print("%-20s %s" % (target, help)) 22 | endef 23 | export PRINT_HELP_PYSCRIPT 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | 32 | clean-build: ## remove build artifacts 33 | rm -fr build/ 34 | rm -fr dist/ 35 | rm -fr .eggs/ 36 | find . -name '*.egg-info' -exec rm -fr {} + 37 | find . -name '*.egg' -exec rm -f {} + 38 | 39 | clean-pyc: ## remove Python file artifacts 40 | find . -name '*.pyc' -exec rm -f {} + 41 | find . -name '*.pyo' -exec rm -f {} + 42 | find . -name '*~' -exec rm -f {} + 43 | find . -name '__pycache__' -exec rm -fr {} + 44 | 45 | clean-test: ## remove test and coverage artifacts 46 | rm -fr .tox/ 47 | rm -f .coverage 48 | rm -fr htmlcov/ 49 | 50 | lint: ## check style with flake8 51 | flake8 proxy_db tests 52 | 53 | test: ## run tests quickly with the default Python 54 | py.test 55 | 56 | 57 | test-all: ## run tests on every Python version with tox 58 | tox 59 | 60 | coverage: ## check code coverage quickly with the default Python 61 | coverage run --source proxy_db -m pytest 62 | coverage report -m 63 | coverage html 64 | $(BROWSER) htmlcov/index.html 65 | 66 | docs: ## generate Sphinx HTML documentation, including API docs 67 | rm -f docs/proxy_db.rst 68 | rm -f docs/modules.rst 69 | sphinx-apidoc -o docs/ proxy_db 70 | $(MAKE) -C docs clean 71 | $(MAKE) -C docs html 72 | $(BROWSER) docs/_build/html/index.html 73 | 74 | servedocs: docs ## compile the docs watching for changes 75 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 76 | 77 | release: clean ## package and upload a release 78 | python setup.py sdist upload 79 | python setup.py bdist_wheel upload 80 | 81 | dist: clean ## builds source and wheel package 82 | python setup.py sdist 83 | python setup.py bdist_wheel 84 | ls -l dist 85 | 86 | install: clean ## install the package to the active Python's site-packages 87 | python setup.py install 88 | -------------------------------------------------------------------------------- /tests/test_management.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import unittest 3 | 4 | from click.testing import CliRunner 5 | 6 | from proxy_db.exceptions import UnknownExportFormat 7 | from proxy_db.management import add_command, list_command 8 | from proxy_db.models import Proxy 9 | from tests._compat import patch 10 | 11 | 12 | class TestAdd(unittest.TestCase): 13 | 14 | @patch('proxy_db.management.ManualProxy') 15 | def test_add_proxies(self, m): 16 | CliRunner().invoke(add_command, ['http://1.2.3.4:999']) 17 | m.assert_called_once_with('manual') 18 | m.return_value.add_proxies.assert_called_once_with([{'protocol': 'http', 'proxy': '1.2.3.4:999'}], 10) 19 | 20 | @patch('proxy_db.management.click.get_text_stream', return_value=[]) 21 | def test_get_text_stream(self, m): 22 | CliRunner().invoke(add_command, []) 23 | m.assert_called_once() 24 | 25 | @patch('proxy_db.management.click.echo', return_value=[]) 26 | def test_invalid_proxy(self, m): 27 | CliRunner().invoke(add_command, ['invalid-proxy']) 28 | self.assertTrue(m.call_args_list[0][1]['err']) # click.echo('...', err=True) 29 | 30 | 31 | class TestList(unittest.TestCase): 32 | 33 | @patch('proxy_db.management.create_session') 34 | def test_lines_list_with_options(self, m): 35 | CliRunner().invoke(list_command, [ 36 | '--min-votes', '10', '--country', 'ES', 37 | '--protocol', 'https', '--provider', 'Nord VPN' 38 | ]) 39 | 40 | @patch('proxy_db.management.create_session') 41 | def test_line_format(self, m): 42 | m.return_value.query.return_value.all.return_value = [ 43 | Proxy(created_at=datetime.datetime.now()), 44 | ] 45 | CliRunner().invoke(list_command) 46 | 47 | @patch('proxy_db.management.create_session') 48 | def test_json_format(self, m): 49 | m.return_value.query.return_value.all.return_value = [ 50 | Proxy(), 51 | ] 52 | CliRunner().invoke(list_command, [ 53 | '--format', 'json', 54 | ]) 55 | 56 | @patch('proxy_db.management.create_session') 57 | def test_table_format(self, m): 58 | m.return_value.query.return_value.all.return_value = [ 59 | Proxy(), 60 | ] 61 | CliRunner().invoke(list_command, [ 62 | '--format', 'grid-table', 63 | ]) 64 | 65 | @patch('proxy_db.management.create_session') 66 | def test_invalid_format(self, m): 67 | result = CliRunner().invoke(list_command, [ 68 | '--format', 'invalid', 69 | ]) 70 | self.assertIsInstance(result.exception, UnknownExportFormat) 71 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: console 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | 8 | Stable release 9 | -------------- 10 | 11 | To install proxy-db, run these commands in your terminal: 12 | 13 | .. code-block:: console 14 | 15 | $ pip3 install -U proxy-db 16 | 17 | This is the preferred method to install proxy-db, as it will always install the most recent stable release. 18 | 19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide 20 | you through the process. 21 | 22 | .. _pip: https://pip.pypa.io 23 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ 24 | 25 | Geoip support 26 | ------------- 27 | Some providers do not have the correct country for proxies. To determine the correct country proxy-db can use geoip. 28 | To use this install the optional dependencies:: 29 | 30 | $ pip3 install proxy-db[geoip] 31 | 32 | You also need a maxmind Geolite2 license (it's free). To obtain the license, follow these steps: 33 | 34 | 1. `Sign up for a Maxmind Geolite2 account `_ 35 | 2. `Log in to your Maxmind account `_ 36 | 3. In the menu on the left, navigate to ``Services > My License Key``. 37 | 4. Click ``Generate new license key``. 38 | 39 | Sets the environment variable ``MAXMIND_LICENSE_KEY``. To set it from Python:: 40 | 41 | import os 42 | os.environ['MAXMIND_LICENSE_KEY'] = '28xjifHSTxVq93xZ' 43 | 44 | Or from the command line before starting the program:: 45 | 46 | $ export MAXMIND_LICENSE_KEY=28xjifHSTxVq93xZ 47 | 48 | HTTPS & SOCKS5 proxies 49 | ---------------------- 50 | To use **socks5 proxies** with requests you need to install socks support:: 51 | 52 | $ pip3 install proxy-db[socks] 53 | 54 | To use **HTTPS proxies** with requests/urllib3 you need to install the latests urllib3 version from sources:: 55 | 56 | $ pip install https://github.com/urllib3/urllib3/archive/master.zip 57 | 58 | Version 1.26 of urllib3 finally supports 59 | `TLS-in-TLS tunnels through proxies <( https://github.com/urllib3/urllib3/pull/2001 )>`_. This version will be 60 | available very soon but until then it is necessary to install it from source code. In case of incompatibility, 61 | `this (deprecated) dependency `_ allows the use of https proxies with 62 | requests. 63 | 64 | 65 | Other releases 66 | -------------- 67 | You can install other versions from Pypi using:: 68 | 69 | $ pip install proxy-db== 70 | 71 | For versions that are not in Pypi (it is a development version):: 72 | 73 | $ pip install git+https://github.com/Nekmo/proxy-db.git@#egg=proxy_db 74 | 75 | 76 | If you do not have git installed: 77 | 78 | $ pip install https://github.com/Nekmo/proxy-db/archive/.zip 79 | -------------------------------------------------------------------------------- /proxy_db/export.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | 4 | from proxy_db.exceptions import UnknownExportFormat 5 | 6 | 7 | try: 8 | import tabulate 9 | except ImportError: 10 | tabulate = None 11 | 12 | 13 | DEFAULT_COLUMNS = [ 14 | 'id', 'votes', 'country', 'protocol', 'created_at', 'updated_at', 'on_provider_at', 'providers' 15 | ] 16 | 17 | 18 | def value_to_string(value): 19 | if isinstance(value, set): 20 | return ', '.join(value) 21 | elif isinstance(value, int): 22 | return str(value) 23 | return value 24 | 25 | 26 | class JsonEncoder(json.JSONEncoder): 27 | def default(self, obj): 28 | if isinstance(obj, set): 29 | return list(obj) 30 | if isinstance(obj, datetime.datetime): 31 | return obj.isoformat() 32 | return json.JSONEncoder.default(self, obj) 33 | 34 | 35 | class OutputBase: 36 | name = None 37 | default_columns = DEFAULT_COLUMNS 38 | 39 | def __init__(self, data, columns=None): 40 | self.data = data 41 | self.columns = columns or self.default_columns 42 | 43 | def get_rows(self): 44 | for item in self.data: 45 | item._set_providers() 46 | yield {key: value_to_string(item.get_param(key)) for key in self.columns} 47 | 48 | def render(self): 49 | raise NotImplementedError 50 | 51 | def __str__(self): 52 | return self.render() 53 | 54 | 55 | class LineOutput(OutputBase): 56 | name = 'line' 57 | default_columns = ['proxy_with_credentials'] 58 | 59 | def render(self): 60 | return '\n'.join([' '.join(row.values()) for row in self.get_rows()]) 61 | 62 | 63 | class JsonOutput(OutputBase): 64 | name = 'json' 65 | default_columns = OutputBase.default_columns + ['provider_requests__provider'] 66 | 67 | def render(self): 68 | return json.dumps(list(self.get_rows()), cls=JsonEncoder, sort_keys=True, indent=4) 69 | 70 | 71 | class TabulateBaseOutput(OutputBase): 72 | def render(self): 73 | return tabulate.tabulate(self.get_rows(), headers="keys", tablefmt=self.name.split('-')[0]) 74 | 75 | 76 | EXPORT_OUTPUTS = [ 77 | LineOutput, 78 | JsonOutput, 79 | ] 80 | 81 | 82 | def get_export_output_classes(): 83 | classes = EXPORT_OUTPUTS 84 | tabulate_formats = [] 85 | if tabulate is not None: 86 | tabulate_formats = tabulate._table_formats.keys() 87 | for tabulate_format in tabulate_formats: 88 | class TabulateOutput(TabulateBaseOutput): 89 | name = '{}-table'.format(tabulate_format) 90 | classes.append(TabulateOutput) 91 | return classes 92 | 93 | 94 | def get_export_output_class(name): 95 | try: 96 | return next(filter(lambda x: x.name == name, get_export_output_classes())) 97 | except StopIteration: 98 | raise UnknownExportFormat('Unknown format: {}. Use one of the following available formats: {}'.format( 99 | name, ', '.join(map(lambda x: x.name, get_export_output_classes())) 100 | )) 101 | 102 | 103 | def get_export_output(name, data, columns=None): 104 | return get_export_output_class(name)(data, columns) 105 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Contributing 5 | ============ 6 | 7 | Contributions are welcome, and they are greatly appreciated! Every 8 | little bit helps, and credit will always be given. 9 | 10 | You can contribute in many ways: 11 | 12 | Types of Contributions 13 | ---------------------- 14 | 15 | Report Bugs 16 | ~~~~~~~~~~~ 17 | 18 | Report bugs at https://github.com/Nekmo/proxy-db/issues. 19 | 20 | If you are reporting a bug, please include: 21 | 22 | * Your operating system name and version. 23 | * Any details about your local setup that might be helpful in troubleshooting. 24 | * Detailed steps to reproduce the bug. 25 | 26 | Fix Bugs 27 | ~~~~~~~~ 28 | 29 | Look through the GitHub issues for bugs. Anything tagged with "bug" 30 | and "help wanted" is open to whoever wants to implement it. 31 | 32 | Implement Features 33 | ~~~~~~~~~~~~~~~~~~ 34 | 35 | Look through the GitHub issues for features. Anything tagged with "enhancement" 36 | and "help wanted" is open to whoever wants to implement it. 37 | 38 | Write Documentation 39 | ~~~~~~~~~~~~~~~~~~~ 40 | 41 | proxy-db could always use more documentation, whether as part of the 42 | official proxy-db docs, in docstrings, or even on the web in blog posts, 43 | articles, and such. 44 | 45 | Submit Feedback 46 | ~~~~~~~~~~~~~~~ 47 | 48 | The best way to send feedback is to file an issue at https://github.com/Nekmo/proxy-db/issues. 49 | 50 | If you are proposing a feature: 51 | 52 | * Explain in detail how it would work. 53 | * Keep the scope as narrow as possible, to make it easier to implement. 54 | * Remember that this is a volunteer-driven project, and that contributions 55 | are welcome :) 56 | 57 | Get Started! 58 | ------------ 59 | 60 | Ready to contribute? Here's how to set up `proxy-db` for local development. 61 | 62 | 1. Fork the `proxy-db` repo on GitHub. 63 | 2. Clone your fork locally:: 64 | 65 | $ git clone git@github.com:your_name_here/proxy-db.git 66 | 67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 68 | 69 | $ mkvirtualenv proxy-db 70 | $ cd proxy-db/ 71 | $ python setup.py develop 72 | 73 | 4. Create a branch for local development:: 74 | 75 | $ git checkout -b name-of-your-bugfix-or-feature 76 | 77 | Now you can make your changes locally. 78 | 79 | 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: 80 | 81 | $ flake8 proxy-db tests 82 | $ python setup.py test or py.test 83 | $ tox 84 | 85 | To get flake8 and tox, just pip install them into your virtualenv. 86 | 87 | 6. Commit your changes and push your branch to GitHub:: 88 | 89 | $ git add . 90 | $ git commit -m "Your detailed description of your changes." 91 | $ git push origin name-of-your-bugfix-or-feature 92 | 93 | 7. Submit a pull request through the GitHub website. 94 | 95 | Pull Request Guidelines 96 | ----------------------- 97 | 98 | Before you submit a pull request, check that it meets these guidelines: 99 | 100 | 1. The pull request should include tests. 101 | 2. If the pull request adds functionality, the docs should be updated. Put 102 | your new functionality into a function with a docstring, and add the 103 | feature to the list in README.rst. 104 | 3. The pull request should work for Python 2.6, 2.7, 3.3, 3.4 and 3.5, and for PyPy. Check 105 | https://travis-ci.org/Nekmo/proxy-db/pull_requests 106 | and make sure that the tests pass for all supported Python versions. 107 | 108 | Tips 109 | ---- 110 | 111 | To run a subset of tests:: 112 | 113 | $ py.test tests.test_proxy_db 114 | 115 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://raw.githubusercontent.com/Nekmo/proxy-db/master/images/proxy-db.png 2 | 3 | | 4 | 5 | .. image:: https://img.shields.io/github/workflow/status/Nekmo/proxy-db/Tests.svg?style=flat-square&maxAge=2592000 6 | :target: https://github.com/Nekmo/proxy-db/actions?query=workflow%3ATests 7 | :alt: Latest Tests CI build status 8 | 9 | .. image:: https://img.shields.io/pypi/v/proxy-db.svg?style=flat-square 10 | :target: https://pypi.org/project/proxy-db/ 11 | :alt: Latest PyPI version 12 | 13 | .. image:: https://img.shields.io/pypi/pyversions/proxy-db.svg?style=flat-square 14 | :target: https://pypi.org/project/proxy-db/ 15 | :alt: Python versions 16 | 17 | .. image:: https://img.shields.io/codeclimate/maintainability/Nekmo/proxy-db.svg?style=flat-square 18 | :target: https://codeclimate.com/github/Nekmo/proxy-db 19 | :alt: Code Climate 20 | 21 | .. image:: https://img.shields.io/codecov/c/github/Nekmo/proxy-db/master.svg?style=flat-square 22 | :target: https://codecov.io/github/Nekmo/proxy-db 23 | :alt: Test coverage 24 | 25 | .. image:: https://img.shields.io/requires/github/Nekmo/proxy-db.svg?style=flat-square 26 | :target: https://requires.io/github/Nekmo/proxy-db/requirements/?branch=master 27 | :alt: Requirements Status 28 | 29 | 30 | ======== 31 | proxy-db 32 | ======== 33 | 34 | 35 | Manage free and private proxies on local db for Python Projects. Each proxy has a score according to how it works. 36 | Add a positive vote if the proxy works correctly and a negative vote if it does not work. 37 | 38 | .. code-block:: python 39 | 40 | import requests 41 | from requests.exceptions import Timeout, ConnectionError, ProxyError 42 | from proxy_db.proxies import ProxiesList 43 | 44 | proxy = next(ProxiesList()) 45 | try: 46 | requests.get('http://site.com/', proxies=proxy) 47 | except (Timeout, ConnectionError, ProxyError): 48 | proxy.negative() 49 | else: 50 | proxy.positive() 51 | 52 | 53 | Proxy-db will return the best proxies first (more positive votes). You can also **filter by country**: 54 | 55 | .. code-block:: python 56 | 57 | from proxy_db.proxies import ProxiesList 58 | 59 | spain_proxy = next(ProxiesList('es')) 60 | # ... 61 | 62 | 63 | You can also **filter by provider**: 64 | 65 | .. code-block:: python 66 | 67 | from proxy_db.proxies import ProxiesList 68 | 69 | proxy_nova_proxy = next(ProxiesList(provider='Proxy Nova')) 70 | # ... 71 | 72 | 73 | Free proxies providers included: 74 | 75 | * Proxy Nova 76 | * Nord VPN (requires ``PROXYDB_NORDVPN_USERNAME`` & ``PROXYDB_NORDVPN_PASSWORD`` env. variables). 77 | 78 | For more information see `the docs `_. 79 | 80 | 81 | Install 82 | ======= 83 | If you have **Pip installed on your system**, you can use it to install the latest ProxyDB stable version:: 84 | 85 | $ pip3 install proxy-db 86 | 87 | Python 2.7 & 3.4-3.9 are supported but Python 3.x is recommended. Use ``pip2`` on install for Python2. 88 | `More info in the documentation `_ 89 | 90 | Some providers do not have the correct country for proxies. To determine the correct country proxy-db can use **geoip**. 91 | To use this install the optional dependencies:: 92 | 93 | $ pip3 install proxy-db[geoip] 94 | 95 | You also need a maxmind Geolite2 license (it's free). To obtain the license, follow these steps: 96 | 97 | 1. `Sign up for a Maxmind Geolite2 account `_ 98 | 2. `Log in to your Maxmind account `_ 99 | 3. In the menu on the left, navigate to ``Services > My License Key``. 100 | 4. Click ``Generate new license key``. 101 | 102 | Sets the environment variable ``MAXMIND_LICENSE_KEY``. To set it from Python:: 103 | 104 | import os 105 | os.environ['MAXMIND_LICENSE_KEY'] = '28xjifHSTxVq93xZ' 106 | 107 | HTTPS & SOCKS5 proxies 108 | ---------------------- 109 | To use **socks5 proxies** with requests you need to install socks support:: 110 | 111 | $ pip3 install proxy-db[socks] 112 | 113 | To use **HTTPS proxies** with requests/urllib3 you need to install the latests urllib3 version from sources:: 114 | 115 | $ pip install https://github.com/urllib3/urllib3/archive/master.zip 116 | -------------------------------------------------------------------------------- /proxy_db/management.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import string 3 | 4 | import click 5 | 6 | from proxy_db.export import get_export_output, DEFAULT_COLUMNS 7 | from proxy_db.models import Proxy, create_session, ProviderRequest 8 | from proxy_db.providers import ManualProxy 9 | from proxy_db.export import get_export_output_classes 10 | from proxy_db._compat import urlparse, filterfalse 11 | 12 | 13 | def proxy_is_valid(proxy): 14 | return proxy.scheme and proxy.netloc 15 | 16 | 17 | def strip_chars(text, remove_chars=string.whitespace): 18 | length = len(text) 19 | start = 0 20 | end = length 21 | for i, char in enumerate(text): 22 | if char not in remove_chars: 23 | break 24 | start = i + 1 25 | for i, char in enumerate(reversed(text)): 26 | if char not in remove_chars: 27 | break 28 | end = length - i - 1 29 | return text[start:end] 30 | 31 | 32 | @click.group() 33 | def cli(): 34 | pass 35 | 36 | 37 | @cli.command(name='add') 38 | @click.option('--file', help='Path to the file with the proxies.', 39 | type=click.File('r'), required=False) 40 | @click.option('--votes', default=10, type=int, 41 | help='Default votes score. This counter sets the order in which the proxies will be obtained.') 42 | @click.option('--provider', default='manual', type=str, 43 | help='Provider name for proxies. It allows to know the origin of the proxies and search by provider.') 44 | @click.argument('proxies', type=str, required=False, nargs=-1) 45 | def add_command(file=None, votes=10, provider='manual', proxies=None): 46 | """Add proxies in ://
: format or ://:@
: 47 | format.'""" 48 | if not file and not proxies: 49 | click.echo('Trying to read proxies from stdin. Use Ctrl + C to cancel. ' 50 | 'To add proxies in another way use --help.') 51 | proxies = click.get_text_stream('stdin') 52 | elif file: 53 | proxies = file.read() 54 | parsed_proxies = set([urlparse(strip_chars(proxy)) for proxy in proxies]) 55 | invalid_proxies = set(filterfalse(proxy_is_valid, parsed_proxies)) 56 | if invalid_proxies: 57 | click.echo('Invalid proxies entered: {}'.format( 58 | ', '.join(map(lambda x: x.geturl(), invalid_proxies))), err=True) 59 | parsed_proxies -= invalid_proxies 60 | proxies_data = [{'protocol': proxy.scheme, 'proxy': proxy.netloc} for proxy in parsed_proxies] 61 | proxy_instances = ManualProxy(provider).add_proxies(proxies_data, votes) 62 | created = filter(lambda x: x.updated_at is None, proxy_instances) 63 | click.echo('Read {} proxies. {} new proxies have been created.'.format(len(parsed_proxies), len(list(created)))) 64 | 65 | 66 | @cli.command(name='list') 67 | @click.option('--format', help='Output format to use. By default "line". ' 68 | 'Options: {}'.format(', '.join([x.name for x in get_export_output_classes()])), 69 | default='line') 70 | @click.option('--columns', help='Command separated columns to output using format.' 71 | 'You can use double low bar for related models. ' 72 | 'Choices: {}'.format(', '.join(DEFAULT_COLUMNS)), default='') 73 | @click.option('--min-votes', type=int, help='Minimum votes of proxies to list.', default=None) 74 | @click.option('--country', help='2 character country code to filter. For example US.', default='') 75 | @click.option('--protocol', help='Proxy protocol name. Examples: http, https, socks5.', default='') 76 | @click.option('--provider', help='Provider name to filter.', default='') 77 | def list_command(format, columns, min_votes, country, protocol, provider): 78 | """List proxies registered in proxy-db.'""" 79 | columns = [c.strip() for c in columns.split(',')] if columns else [] 80 | session = create_session() 81 | proxies = session.query(Proxy) 82 | if min_votes is not None: 83 | proxies = proxies.filter(Proxy.votes > min_votes) 84 | if country: 85 | proxies = proxies.filter(Proxy.country == country.upper()) 86 | if protocol: 87 | proxies = proxies.filter(Proxy.protocol == protocol.lower()) 88 | if provider: 89 | proxies = proxies.join(Proxy.provider_requests).filter( 90 | ProviderRequest.provider == provider, 91 | ) 92 | proxies = proxies.all() 93 | output = get_export_output(format, proxies, columns) 94 | click.echo(output) 95 | 96 | 97 | if __name__ == '__main__': 98 | cli() 99 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Manage free and private proxies on local db for Python Projects. 4 | """ 5 | import copy 6 | import os 7 | import glob 8 | from itertools import chain 9 | from setuptools import setup, find_packages 10 | 11 | AUTHOR = "Nekmo" 12 | EMAIL = 'contacto@nekmo.com' 13 | URL = 'https://github.com/Nekmo/proxy-db/' 14 | 15 | PACKAGE_NAME = 'proxy-db' 16 | PACKAGE_DOWNLOAD_URL = 'https://github.com/Nekmo/proxy-db/archive/master.zip' 17 | MODULE = 'proxy_db' 18 | REQUIREMENT_FILE = 'requirements.txt' 19 | STATUS_LEVEL = 5 # 1:Planning 2:Pre-Alpha 3:Alpha 4:Beta 5:Production/Stable 6:Mature 7:Inactive 20 | KEYWORDS = ['proxy-db'] 21 | LICENSE = 'MIT license' 22 | 23 | CLASSIFIERS = [ # https://github.com/github/choosealicense.com/tree/gh-pages/_licenses 24 | 'License :: OSI Approved :: MIT License', 25 | # 'License :: OSI Approved :: BSD License', 26 | # 'License :: OSI Approved :: ISC License (ISCL)', 27 | # 'License :: OSI Approved :: Apache Software License', 28 | # 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 29 | ] # https://pypi.python.org/pypi?%3Aaction=list_classifiers 30 | NATURAL_LANGUAGE = 'English' 31 | 32 | PLATFORMS = [ 33 | # 'universal', 34 | 'linux', 35 | # 'macosx', 36 | # 'solaris', 37 | # 'irix', 38 | # 'win' 39 | # 'bsd' 40 | # 'ios' 41 | # 'android' 42 | ] 43 | PYTHON_VERSIONS = ['2.7', '3.4-3.9'] 44 | 45 | 46 | def read_requirement_file(path): 47 | with open(path) as f: 48 | return f.readlines() 49 | 50 | 51 | def get_package_version(module_name): 52 | return __import__(module_name).__version__ 53 | 54 | 55 | def get_packages(directory): 56 | # Search modules and submodules to install (module, module.submodule, module.submodule2...) 57 | packages_list = find_packages(directory) 58 | # Prevent include symbolic links 59 | for package in tuple(packages_list): 60 | path = os.path.join(directory, package.replace('.', '/')) 61 | if not os.path.exists(path) or os.path.islink(path): 62 | packages_list.remove(package) 63 | return packages_list 64 | 65 | 66 | def get_python_versions(string_range): 67 | if '-' not in string_range: 68 | return [string_range] 69 | return ['{0:.1f}'.format(version * 0.1) for version 70 | in range(*[int(x * 10) + (1 * i) for i, x in enumerate(map(float, string_range.split('-')))])] 71 | 72 | 73 | def get_python_classifiers(versions): 74 | for version in range(2, 4): 75 | if not next(iter(filter(lambda x: int(float(x)) != version, versions.copy())), False): 76 | versions.add('{} :: Only'.format(version)) 77 | break 78 | return ['Programming Language :: Python :: %s' % version for version in versions] 79 | 80 | 81 | def get_platform_classifiers(platform): 82 | parts = { 83 | 'linux': ('POSIX', 'Linux'), 84 | 'win': ('Microsoft', 'Windows'), 85 | 'solaris': ('POSIX', 'SunOS/Solaris'), 86 | 'aix': ('POSIX', 'Linux'), 87 | 'unix': ('Unix',), 88 | 'bsd': ('POSIX', 'BSD') 89 | }[platform] 90 | return ['Operating System :: {}'.format(' :: '.join(parts[:i+1])) 91 | for i in range(len(parts))] 92 | 93 | 94 | # paths 95 | here = os.path.abspath(os.path.dirname(__file__)) 96 | readme = glob.glob('{}/{}*'.format(here, 'README'))[0] 97 | scripts = [os.path.join('scripts', os.path.basename(script)) 98 | for script in glob.glob('{}/scripts/*'.format(here))] 99 | 100 | # Package data 101 | packages = get_packages(here) 102 | modules = list(filter(lambda x: '.' not in x, packages)) 103 | module = MODULE if MODULE else modules[0] 104 | python_versions = set(chain(*[get_python_versions(versions) for versions in PYTHON_VERSIONS])) - {2.8, 2.9} 105 | status_name = ['Planning', 'Pre-Alpha', 'Alpha', 'Beta', 106 | 'Production/Stable', 'Mature', 'Inactive'][STATUS_LEVEL - 1] 107 | 108 | # Classifiers 109 | classifiers = copy.copy(CLASSIFIERS) 110 | classifiers.extend(get_python_classifiers(python_versions)) 111 | classifiers.extend(chain(*[get_platform_classifiers(platform) for platform in PLATFORMS])) 112 | classifiers.extend([ 113 | 'Natural Language :: {}'.format(NATURAL_LANGUAGE), 114 | 'Development Status :: {} - {}'.format(STATUS_LEVEL, status_name), 115 | ]) 116 | 117 | 118 | setup( 119 | name=PACKAGE_NAME, 120 | version=get_package_version(module), 121 | packages=packages, 122 | provides=modules, 123 | scripts=scripts, 124 | include_package_data=True, 125 | 126 | description=__doc__.strip(), 127 | long_description=open(readme, 'r').read(), 128 | keywords=KEYWORDS, 129 | download_url=PACKAGE_DOWNLOAD_URL, 130 | 131 | author=AUTHOR, 132 | author_email=EMAIL, 133 | url=URL, 134 | 135 | classifiers=classifiers, 136 | platforms=PLATFORMS, 137 | 138 | install_requires=read_requirement_file(REQUIREMENT_FILE), 139 | extras_require={ 140 | 'geoip': ["geoip2", 'geoip2-tools'], 141 | }, 142 | 143 | # entry_points={}, 144 | 145 | zip_safe=False, 146 | ) 147 | -------------------------------------------------------------------------------- /proxy_db/proxies.py: -------------------------------------------------------------------------------- 1 | import six 2 | from sqlalchemy import exists, func 3 | 4 | from proxy_db.exceptions import NoProvidersAvailable, UnsupportedEngine 5 | from proxy_db.models import Proxy, ProviderRequest, create_session 6 | from proxy_db.providers import PROVIDERS, ManualProxy 7 | 8 | 9 | class NONE: 10 | pass 11 | 12 | 13 | class ListingStrategy(object): 14 | def __init__(self, filters=None, order_by=None, no_repeat=True): 15 | self.filters = filters 16 | self.order_by = order_by 17 | self.no_repeat = no_repeat 18 | self._proxies = set() 19 | 20 | def get_default_filters(self): 21 | return [] 22 | 23 | def get_order_by(self, query): 24 | if self.order_by is not None: 25 | return self.order_by 26 | else: 27 | return Proxy.created_at.desc() 28 | 29 | def get_filters(self, query): 30 | filters = list(self.filters or []) 31 | filters.extend(self.get_default_filters()) 32 | if self.no_repeat: 33 | filters.append(~Proxy.id.in_(self._returned_proxies())) 34 | return filters 35 | 36 | def _returned_proxies(self): 37 | return [proxy.id for proxy in self._proxies] 38 | 39 | def get_query(self, query): 40 | return query.filter(*self.get_filters(query)).order_by(self.get_order_by(query)) 41 | 42 | def next(self, query): 43 | query = self.get_query(query) 44 | proxy = query.first() 45 | if proxy is not None and self.no_repeat: 46 | self._proxies.add(proxy) 47 | if proxy is not None: 48 | proxy._set_providers() 49 | return proxy 50 | 51 | 52 | class VotesListingStrategy(ListingStrategy): 53 | def __init__(self, filters=None, min_votes=0): 54 | super().__init__(filters, Proxy.votes.desc()) 55 | self.min_votes = min_votes 56 | 57 | def get_default_filters(self): 58 | return [Proxy.votes >= self.min_votes] 59 | 60 | 61 | class RandomListingStrategy(ListingStrategy): 62 | def __init__(self, filters=None): 63 | super().__init__(filters, None) 64 | 65 | def get_order_by(self, query): 66 | engine_name = query.session.get_bind().name 67 | if engine_name in ['sqlite', 'postgresql']: 68 | return func.random() 69 | elif engine_name == 'mysql': 70 | return func.rand() 71 | elif engine_name == 'oracle': 72 | return 'dbms_random.value' 73 | else: 74 | raise UnsupportedEngine( 75 | '{engine_name} engine does not support random ordering.'.format(**locals()) 76 | ) 77 | 78 | 79 | class ProxiesList(object): 80 | def __init__(self, country=None, provider=None, protocol=None, strategy=None): 81 | if isinstance(country, six.string_types): 82 | country = country.upper() 83 | self.request_options = dict( 84 | country=country, 85 | protocol=protocol, 86 | ) 87 | provider_name = provider 88 | if provider is not None and isinstance(provider, str): 89 | provider = next(iter(filter(lambda x: x.name == provider, PROVIDERS)), NONE) 90 | if provider is NONE: 91 | manual_provider_exists = create_session().query( 92 | exists().where(ProviderRequest.provider == provider_name) 93 | ).scalar() 94 | assert manual_provider_exists is True, "Invalid provider name." 95 | provider = ManualProxy(provider_name) 96 | self.provider = provider 97 | if strategy and isinstance(strategy, type): 98 | # Is a class without initialize. Instance now. 99 | strategy = strategy() 100 | self.strategy = strategy or VotesListingStrategy() 101 | 102 | def available_providers(self): 103 | providers = PROVIDERS 104 | if self.provider: 105 | providers = [self.provider] 106 | return filter(lambda x: x.is_available(), providers) 107 | 108 | def find_db_proxy(self): 109 | query = create_session().query(Proxy).join(Proxy.provider_requests).filter( 110 | ProviderRequest.provider.in_([x.name for x in self.available_providers()]), 111 | ) 112 | country = self.request_options['country'] 113 | protocol = self.request_options['protocol'] 114 | if country: 115 | query = query.filter(Proxy.country == country) 116 | if protocol: 117 | query = query.filter(Proxy.protocol == protocol) 118 | return self.strategy.next(query) 119 | 120 | def find_provider(self): 121 | for provider in self.available_providers(): 122 | req = provider.request(**self.request_options) 123 | if req.requires_update(): 124 | return provider 125 | raise NoProvidersAvailable 126 | 127 | def reload_provider(self): 128 | provider = self.find_provider() 129 | provider.request(**self.request_options).now() 130 | 131 | def reload_provider_without_error(self): 132 | try: 133 | self.reload_provider() 134 | except NoProvidersAvailable: 135 | pass 136 | 137 | def __iter__(self): 138 | self._proxies = set() 139 | return self 140 | 141 | def try_get_proxy(self, retry=True): 142 | proxy = self.find_db_proxy() 143 | if proxy: 144 | return proxy 145 | elif retry: 146 | self.reload_provider_without_error() 147 | if retry: 148 | return self.try_get_proxy(retry=False) 149 | else: 150 | raise StopIteration 151 | 152 | def __next__(self): 153 | return self.try_get_proxy() 154 | 155 | def next(self): 156 | return self.__next__() 157 | -------------------------------------------------------------------------------- /proxy_db/models.py: -------------------------------------------------------------------------------- 1 | import os 2 | from sqlalchemy import create_engine, Integer, Column, String, Sequence, DateTime, func, Table, ForeignKey, \ 3 | UniqueConstraint 4 | from sqlalchemy.ext.declarative import declarative_base 5 | from sqlalchemy.orm import relationship, sessionmaker 6 | from proxy_db._compat import urlparse 7 | 8 | 9 | PROXY_DB_FILE = os.environ.get('PROXY_DB_FILE', os.path.expanduser('~/.local/var/lib/proxy-db/db.sqlite3')) 10 | PROXY_DB_DB_URL = os.environ.get('PROXY_DB_DB_URL', 'sqlite:///{}'.format(PROXY_DB_FILE)) 11 | PROTOCOLS = ['http', 'https'] 12 | 13 | 14 | Base = declarative_base() 15 | 16 | association_table = Table('proxy_provider_request', Base.metadata, 17 | Column('proxy_id', String(255), ForeignKey('proxies.id')), 18 | Column('provider_request_id', Integer, ForeignKey('provider_requests.id')) 19 | ) 20 | 21 | 22 | class ModelMixin: 23 | def get_param(self, name): 24 | param_parts = name.split('__', 1) 25 | param_value = getattr(self, param_parts[0]) 26 | if isinstance(param_value, (tuple, list)) and len(param_parts) > 1: 27 | param_value = {value.get_param(param_parts[1]) for value in param_value} 28 | elif len(param_parts) > 1: 29 | param_value = param_value.param_parts(param_parts[1]) 30 | elif callable(param_value): 31 | param_value = param_value() 32 | return param_value 33 | 34 | 35 | class ProviderRequest(ModelMixin, Base): 36 | __tablename__ = 'provider_requests' 37 | __table_args__ = (UniqueConstraint('provider', 'request_id', name='_provider_request_uc'), 38 | ) 39 | 40 | id = Column(Integer, Sequence('provider_requests_id_seq'), primary_key=True) 41 | provider = Column(String(30)) 42 | request_id = Column(String(255), index=True) 43 | created_at = Column(DateTime(timezone=True), server_default=func.now()) 44 | updated_at = Column(DateTime(timezone=True), onupdate=func.now()) 45 | results = Column(Integer) 46 | proxies = relationship("Proxy", secondary=association_table, backref="provider_requests") 47 | 48 | def get_provider_instance(self): 49 | from proxy_db.providers import PROVIDERS, ManualProxy 50 | provider = next(filter(lambda x: x.name == self.provider, PROVIDERS), None) 51 | if provider is None: 52 | return ManualProxy(self.provider) 53 | return provider 54 | 55 | 56 | class Proxy(ModelMixin, Base): 57 | __tablename__ = 'proxies' 58 | _proxies_list = None 59 | 60 | id = Column(String(255), primary_key=True) 61 | votes = Column(Integer, default=0) 62 | country = Column(String(5), index=True) 63 | protocol = Column(String(32), index=True) 64 | created_at = Column(DateTime(timezone=True), server_default=func.now()) 65 | updated_at = Column(DateTime(timezone=True), onupdate=func.now()) 66 | on_provider_at = Column(DateTime(timezone=True)) 67 | providers = {} 68 | credentials = () 69 | 70 | def get_updated_proxy(self, session=None): 71 | """ 72 | :param session: 73 | :return: 74 | :rtype: Proxy 75 | """ 76 | session = session or create_session() 77 | return session.query(Proxy).filter_by(id=self.id).first() 78 | 79 | def vote(self, vote): 80 | session = create_session() 81 | instance = self.get_updated_proxy(session) 82 | instance.votes += vote 83 | session.commit() 84 | 85 | def positive(self): 86 | self.vote(1) 87 | 88 | def negative(self): 89 | self.vote(-1) 90 | 91 | def get(self, key, default=None): 92 | try: 93 | return self[key] 94 | except KeyError: 95 | return default 96 | 97 | def set_proxies_list(self, proxies_list): 98 | self._proxies_list = proxies_list 99 | 100 | def __contains__(self, item): 101 | return item in PROTOCOLS 102 | 103 | def __getitem__(self, item): 104 | if item not in PROTOCOLS: 105 | raise KeyError 106 | return str(self) 107 | 108 | def copy(self): 109 | return {key: str(self) for key in PROTOCOLS} 110 | 111 | def _set_providers(self): 112 | provider_instances = map(lambda x: x.get_provider_instance(), self.provider_requests) 113 | credential_provider = next(filter(lambda x: x.has_credentials(), provider_instances), None) 114 | if credential_provider: 115 | self.credentials = credential_provider.credentials() 116 | self.providers = {proxy.provider for proxy in self.provider_requests} 117 | 118 | def __repr__(self): 119 | return "".format(self, ','.join(self.providers)) 120 | 121 | def proxy_with_credentials(self): 122 | if self.credentials: 123 | url_result = urlparse(self.id) 124 | return '{url_result.scheme}://{username}:{password}@{url_result.netloc}'.format( 125 | username=self.credentials[0], password=self.credentials[1], 126 | url_result=url_result 127 | ) 128 | return self.id 129 | 130 | def __str__(self): 131 | return self.proxy_with_credentials() 132 | 133 | 134 | class Version(Base): 135 | __tablename__ = 'versions' 136 | _proxies_list = None 137 | 138 | id = Column(Integer, Sequence('version_id_seq'), primary_key=True) 139 | version = Column(String(64)) 140 | created_at = Column(DateTime(timezone=True), server_default=func.now()) 141 | 142 | def __repr__(self): 143 | return "".format(self.version) 144 | 145 | def __str__(self): 146 | return self.version 147 | 148 | 149 | proxy_db_dir = os.path.dirname(PROXY_DB_FILE) 150 | if not os.path.lexists(proxy_db_dir): 151 | os.makedirs(proxy_db_dir) 152 | 153 | db_created = not os.path.lexists(PROXY_DB_FILE) 154 | engine = create_engine(PROXY_DB_DB_URL, 155 | connect_args={'check_same_thread': False} if PROXY_DB_DB_URL.startswith('sqlite://') else {}) 156 | Base.metadata.create_all(engine) 157 | 158 | 159 | def create_session_maker(): 160 | return sessionmaker(bind=engine) 161 | 162 | 163 | session_maker = create_session_maker() 164 | 165 | 166 | def create_session(): 167 | return session_maker() 168 | 169 | 170 | from proxy_db.migrations import MigrateVersion 171 | 172 | 173 | if db_created: 174 | MigrateVersion().create_all_versions() 175 | else: 176 | MigrateVersion().migrate_pending_versions() 177 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Usage 4 | ===== 5 | proxy-db uses *free proxy pages* to feed a **local proxy database**. By default the database is created in 6 | ``~/.local/var/lib/proxy-db/db.sqlite3``, although you can change the location and use other db engines. 7 | The proxies score is stored in this database. To **increase the score** of a proxy (the proxy works) use the 8 | ``proxy.positive()`` method, and use ``proxy.negative()`` if **it fails**. 9 | 10 | 11 | .. code-block:: python 12 | 13 | >>> from proxy_db.proxies import ProxiesList 14 | >>> p = next(ProxiesList()) 15 | >>> p 16 | 17 | >>> p.positive() # Increase score 18 | >>> p.votes # current score 19 | 1 20 | >>> str(p) # return proxy string 21 | 'http://5.0.0.0:8080' 22 | >>> p.country # proxy country 23 | 'ES' 24 | 25 | By default ProxiesList will return proxies ordered by number of votes. 26 | 27 | 28 | Use with requests 29 | ----------------- 30 | proxy-db is very easy to use with requests. In this example it vote positive if it works and negative if the proxy 31 | fails. 32 | 33 | .. code-block:: python 34 | 35 | import requests 36 | from requests.exceptions import Timeout, ConnectionError, ProxyError 37 | from proxy_db.proxies import ProxiesList 38 | 39 | proxy = next(ProxiesList()) 40 | 41 | try: 42 | requests.get('http://site.com/', proxies=proxy) 43 | except (Timeout, ConnectionError, ProxyError): 44 | proxy.negative() 45 | else: 46 | proxy.positive() 47 | 48 | In this example test proxies **to find a proxy that works**: 49 | 50 | .. code-block:: python 51 | 52 | import requests 53 | from requests.exceptions import Timeout, ConnectionError, ProxyError 54 | from proxy_db.proxies import ProxiesList 55 | 56 | resp = None 57 | 58 | for proxy in ProxiesList(): 59 | try: 60 | resp = requests.get('http://site.com/', proxies=proxy) 61 | except (Timeout, ConnectionError, ProxyError): 62 | proxy.negative() 63 | else: 64 | proxy.positive() 65 | break 66 | 67 | if resp is not None: 68 | print(f'Response: {resp.text}') 69 | else: 70 | print('Could not get response') 71 | 72 | 73 | Countries 74 | --------- 75 | To force the country of the proxies use the country code in ``ProxiesList``: 76 | 77 | .. code-block:: 78 | 79 | >>> from proxy_db.proxies import ProxiesList 80 | >>> p = next(ProxiesList('es')) 81 | >>> p.country 82 | 'ES' 83 | 84 | Countries use `the 2-character iso code `_. 85 | 86 | 87 | Change list strategy 88 | ==================== 89 | By default proxy-db sorts proxies by votes and only returns those with more than 1 vote (ignore with negative votes). 90 | There are other strategies available like returning random proxies: 91 | 92 | .. code-block:: 93 | 94 | >>> from proxy_db.proxies import ProxiesList, RandomListingStrategy 95 | >>> p = next(ProxiesList(strategy=RandomListingStrategy)) 96 | 97 | It is also possible to change the default strategy options: 98 | 99 | .. code-block:: 100 | 101 | >>> from proxy_db.proxies import ProxiesList, VotesListingStrategy 102 | >>> p = next(ProxiesList(strategy=VotesListingStrategy(min_votes=-5))) 103 | 104 | 105 | Change database 106 | --------------- 107 | To change the path to the sqlite database define the environment variable ``PROXY_DB_FILE``, by default 108 | ``~/.local/var/lib/proxy-db/db.sqlite3``. The environment variable ``PROXY_DB_DB_URL`` changes the 109 | database configuration, by default ``sqlite:///{PROXY_DB_FILE}``. 110 | 111 | proxy-db uses sqlalchemy. For more information about how to configure the url to the database, 112 | `check its documentation `_. 113 | 114 | Add proxies manually 115 | ==================== 116 | You can add one or more proxies per command line to insert them into the database. To add proxies:: 117 | 118 | $ proxy-db add[ ][ ][ ] 119 | 120 | For example:: 121 | 122 | $ proxy-db add http://5.0.0.0:8080 http://6.0.0.0:8080 http://7.0.0.0:8080 123 | 124 | You can also import proxies from a file, with one proxy per line:: 125 | 126 | $ proxy-db add --file 127 | 128 | For example:: 129 | 130 | $ proxy-db add --file proxies.txt 131 | 132 | You can also send the proxies stdin, one proxy per line:: 133 | 134 | $ proxy-db add < proxies.txt 135 | 136 | By default the proxies are created with the *"manual"* provider but this can be changed using the 137 | ``--provider `` parameter. For example:: 138 | 139 | $ proxy-db add --provider "my_provider" http://5.0.0.0:8080 140 | 141 | Added proxies have 10 positive votes by default. These votes will increase with successful requests and 142 | will decrease if the proxy fails. To change the default votes use the ``--votes `` parameter. For example:: 143 | 144 | $ proxy-db add --votes 50 http://5.0.0.0:8080 145 | 146 | 147 | List proxies 148 | ============ 149 | To list all the proxies already available you can use the following command:: 150 | 151 | $ proxy-db list 152 | http://185.146.167.226:1080 153 | http://185.89.182.32:86 154 | http://186.28.217.19:80 155 | http://185.116.137.248:1080 156 | http://185.176.129.14:1080 157 | ... 158 | 159 | 160 | By default the proxies will be listed line by line as in the previous example. You can change the format 161 | using ``--format ``. Available options: line, json. More options are available by installing 162 | the ``tabulate`` package using ``pip install tabulate``. To see all the options after installing 163 | *tabulate* use ``proxy-db list --help``. 164 | 165 | .. code-block:: shell 166 | 167 | $ proxy-db list --format json 168 | 169 | 170 | It is also possible to choose the columns to display. To see the available columns use ``proxy-db list --help``:: 171 | 172 | $ proxy-db list --columns ]> 173 | 174 | 175 | For example:: 176 | 177 | $ proxy-db list --columns id,votes,country,protocol,providers 178 | 179 | 180 | Proxies can be filtered using various options:: 181 | 182 | $ proxy list[ --min-votes ][ --country ] 183 | [ --protocol ][ --provider ] 184 | 185 | 186 | For example:: 187 | 188 | $ proxy list --min-votes 10 --country ES \ 189 | --protocol https --provider "Nord VPN" 190 | 191 | 192 | Payment providers 193 | ================= 194 | Some providers require a payment and additional steps to use. 195 | 196 | Nord VPN 197 | -------- 198 | 199 | 1. Login in Nord VPN. 200 | 2. Go to `Nord VPN service details `_. 201 | 3. In **advanced configuration** copy/create your **username** and **password** for *Service credentials 202 | (manual setup)*. These credentials are different from the username and password to log into the 203 | Nord VPN website. 204 | 4. Set environment variables ``PROXYDB_NORDVPN_USERNAME`` and ``PROXYDB_NORDVPN_PASSWORD`` in your program. 205 | 206 | To set **environment variables in Python**: 207 | 208 | .. code-block:: python 209 | 210 | import os 211 | 212 | os.environ['PROXYDB_NORDVPN_USERNAME'] = '2dybg3pvxN4XYpLpF2iBE3wz' 213 | os.environ['PROXYDB_NORDVPN_PASSWORD'] = 'hjFq8QkKsnKM42o4Yzta8y2K' 214 | 215 | To set **environment variables in Bash** (before run your program): 216 | 217 | 218 | .. code-block:: shell 219 | 220 | $ export PROXYDB_NORDVPN_USERNAME=2dybg3pvxN4XYpLpF2iBE3wz 221 | $ export PROXYDB_NORDVPN_PASSWORD=hjFq8QkKsnKM42o4Yzta8y2K 222 | 223 | $ ./your-program.py 224 | 225 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXAPIDOC = sphinx-apidoc 8 | PAPER = 9 | BUILDDIR = _build 10 | PROJECT_NAME = proxy-db 11 | DRIVE_FOLDER = 12 | 13 | # User-friendly check for sphinx-build 14 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 15 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 16 | endif 17 | 18 | # Internal variables. 19 | PAPEROPT_a4 = -D latex_paper_size=a4 20 | PAPEROPT_letter = -D latex_paper_size=letter 21 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 22 | # the i18n builder cannot share the environment and doctrees with the others 23 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 24 | 25 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 26 | 27 | help: 28 | @echo "Please use \`make ' where is one of" 29 | @echo " pdf to make standalone PDF files" 30 | @echo " html to make standalone HTML files" 31 | @echo " watch Browser Sync watcher for build HTML files on real time" 32 | @echo " dirhtml to make HTML files named index.html in directories" 33 | @echo " singlehtml to make a single large HTML file" 34 | @echo " pickle to make pickle files" 35 | @echo " json to make JSON files" 36 | @echo " htmlhelp to make HTML files and a HTML help project" 37 | @echo " qthelp to make HTML files and a qthelp project" 38 | @echo " devhelp to make HTML files and a Devhelp project" 39 | @echo " epub to make an epub" 40 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 41 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 42 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 43 | @echo " text to make text files" 44 | @echo " man to make manual pages" 45 | @echo " texinfo to make Texinfo files" 46 | @echo " info to make Texinfo files and run them through makeinfo" 47 | @echo " gettext to make PO message catalogs" 48 | @echo " changes to make an overview of all changed/added/deprecated items" 49 | @echo " xml to make Docutils-native XML files" 50 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 51 | @echo " linkcheck to check all external links for integrity" 52 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 53 | 54 | clean: 55 | rm -rf $(BUILDDIR)/* 56 | 57 | rinohpdf: 58 | $(SPHINXAPIDOC) -o . ../ 59 | $(SPHINXBUILD) -b rinoh . $(BUILDDIR)/pdf 60 | @echo 61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 62 | 63 | pdf: 64 | HTML_THEME=business_theme make singlehtml 65 | weasyprint _build/singlehtml/index.html "$(PROJECT_NAME).pdf" 66 | rm -rf _build 67 | python -m business_theme upload "$(PROJECT_NAME).pdf" "$(PROJECT_NAME).pdf" "$(DRIVE_FOLDER)" 68 | @echo 69 | @echo "Build finished. The PDF file is in $(BUILDDIR)/." 70 | 71 | html: 72 | $(SPHINXAPIDOC) -o . ../ 73 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 74 | @echo 75 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 76 | 77 | dirhtml: 78 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 79 | @echo 80 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 81 | 82 | singlehtml: 83 | $(SPHINXAPIDOC) -o . ../ 84 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 85 | @echo 86 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 87 | 88 | pickle: 89 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 90 | @echo 91 | @echo "Build finished; now you can process the pickle files." 92 | 93 | json: 94 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 95 | @echo 96 | @echo "Build finished; now you can process the JSON files." 97 | 98 | htmlhelp: 99 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 100 | @echo 101 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 102 | ".hhp project file in $(BUILDDIR)/htmlhelp." 103 | 104 | qthelp: 105 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 106 | @echo 107 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 108 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 109 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/delfos.qhcp" 110 | @echo "To view the help file:" 111 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/delfos.qhc" 112 | 113 | devhelp: 114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 115 | @echo 116 | @echo "Build finished." 117 | @echo "To view the help file:" 118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/delfos" 119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/delfos" 120 | @echo "# devhelp" 121 | 122 | epub: 123 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 124 | @echo 125 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 126 | 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | latexpdf: 135 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 136 | @echo "Running LaTeX files through pdflatex..." 137 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 138 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 139 | 140 | latexpdfja: 141 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 142 | @echo "Running LaTeX files through platex and dvipdfmx..." 143 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 144 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 145 | 146 | text: 147 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 148 | @echo 149 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 150 | 151 | man: 152 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 153 | @echo 154 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 155 | 156 | texinfo: 157 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 158 | @echo 159 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 160 | @echo "Run \`make' in that directory to run these through makeinfo" \ 161 | "(use \`make info' here to do that automatically)." 162 | 163 | info: 164 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 165 | @echo "Running Texinfo files through makeinfo..." 166 | make -C $(BUILDDIR)/texinfo info 167 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 168 | 169 | gettext: 170 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 171 | @echo 172 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 173 | 174 | changes: 175 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 176 | @echo 177 | @echo "The overview file is in $(BUILDDIR)/changes." 178 | 179 | linkcheck: 180 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 181 | @echo 182 | @echo "Link check complete; look for any errors in the above output " \ 183 | "or in $(BUILDDIR)/linkcheck/output.txt." 184 | 185 | doctest: 186 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 187 | @echo "Testing of doctests in the sources finished, look at the " \ 188 | "results in $(BUILDDIR)/doctest/output.txt." 189 | 190 | xml: 191 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 192 | @echo 193 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 194 | 195 | pseudoxml: 196 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 197 | @echo 198 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 199 | -------------------------------------------------------------------------------- /proxy_db/countries.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import os 4 | 5 | try: 6 | from geoip2.errors import AddressNotFoundError 7 | from geoip2_tools.manager import Geoip2DataBaseManager 8 | except ImportError: 9 | Geoip2DataBaseManager = AddressNotFoundError = None 10 | 11 | 12 | MAXMIND_LICENSE_KEY_ENVNAME = 'MAXMIND_LICENSE_KEY' 13 | 14 | 15 | # https://dev.maxmind.com/geoip/legacy/codes/iso3166/ 16 | COUNTRIES = { 17 | "A1": "Anonymous Proxy", 18 | "A2": "Satellite Provider", 19 | "O1": "Other Country", 20 | "AD": "Andorra", 21 | "AE": "United Arab Emirates", 22 | "AF": "Afghanistan", 23 | "AG": "Antigua and Barbuda", 24 | "AI": "Anguilla", 25 | "AL": "Albania", 26 | "AM": "Armenia", 27 | "AO": "Angola", 28 | "AP": "Asia/Pacific Region", 29 | "AQ": "Antarctica", 30 | "AR": "Argentina", 31 | "AS": "American Samoa", 32 | "AT": "Austria", 33 | "AU": "Australia", 34 | "AW": "Aruba", 35 | "AX": "Aland Islands", 36 | "AZ": "Azerbaijan", 37 | "BA": "Bosnia and Herzegovina", 38 | "BB": "Barbados", 39 | "BD": "Bangladesh", 40 | "BE": "Belgium", 41 | "BF": "Burkina Faso", 42 | "BG": "Bulgaria", 43 | "BH": "Bahrain", 44 | "BI": "Burundi", 45 | "BJ": "Benin", 46 | "BL": "Saint Barthelemy", 47 | "BM": "Bermuda", 48 | "BN": "Brunei Darussalam", 49 | "BO": "Bolivia", 50 | "BQ": "Bonaire, Saint Eustatius and Saba", 51 | "BR": "Brazil", 52 | "BS": "Bahamas", 53 | "BT": "Bhutan", 54 | "BV": "Bouvet Island", 55 | "BW": "Botswana", 56 | "BY": "Belarus", 57 | "BZ": "Belize", 58 | "CA": "Canada", 59 | "CC": "Cocos (Keeling) Islands", 60 | "CD": "Congo, The Democratic Republic of the", 61 | "CF": "Central African Republic", 62 | "CG": "Congo", 63 | "CH": "Switzerland", 64 | "CI": "Cote d'Ivoire", 65 | "CK": "Cook Islands", 66 | "CL": "Chile", 67 | "CM": "Cameroon", 68 | "CN": "China", 69 | "CO": "Colombia", 70 | "CR": "Costa Rica", 71 | "CU": "Cuba", 72 | "CV": "Cape Verde", 73 | "CW": "Curacao", 74 | "CX": "Christmas Island", 75 | "CY": "Cyprus", 76 | "CZ": "Czech Republic", 77 | "DE": "Germany", 78 | "DJ": "Djibouti", 79 | "DK": "Denmark", 80 | "DM": "Dominica", 81 | "DO": "Dominican Republic", 82 | "DZ": "Algeria", 83 | "EC": "Ecuador", 84 | "EE": "Estonia", 85 | "EG": "Egypt", 86 | "EH": "Western Sahara", 87 | "ER": "Eritrea", 88 | "ES": "Spain", 89 | "ET": "Ethiopia", 90 | "EU": "Europe", 91 | "FI": "Finland", 92 | "FJ": "Fiji", 93 | "FK": "Falkland Islands (Malvinas)", 94 | "FM": "Micronesia, Federated States of", 95 | "FO": "Faroe Islands", 96 | "FR": "France", 97 | "GA": "Gabon", 98 | "GB": "United Kingdom", 99 | "GD": "Grenada", 100 | "GE": "Georgia", 101 | "GF": "French Guiana", 102 | "GG": "Guernsey", 103 | "GH": "Ghana", 104 | "GI": "Gibraltar", 105 | "GL": "Greenland", 106 | "GM": "Gambia", 107 | "GN": "Guinea", 108 | "GP": "Guadeloupe", 109 | "GQ": "Equatorial Guinea", 110 | "GR": "Greece", 111 | "GS": "South Georgia and the South Sandwich Islands", 112 | "GT": "Guatemala", 113 | "GU": "Guam", 114 | "GW": "Guinea-Bissau", 115 | "GY": "Guyana", 116 | "HK": "Hong Kong", 117 | "HM": "Heard Island and McDonald Islands", 118 | "HN": "Honduras", 119 | "HR": "Croatia", 120 | "HT": "Haiti", 121 | "HU": "Hungary", 122 | "ID": "Indonesia", 123 | "IE": "Ireland", 124 | "IL": "Israel", 125 | "IM": "Isle of Man", 126 | "IN": "India", 127 | "IO": "British Indian Ocean Territory", 128 | "IQ": "Iraq", 129 | "IR": "Iran, Islamic Republic of", 130 | "IS": "Iceland", 131 | "IT": "Italy", 132 | "JE": "Jersey", 133 | "JM": "Jamaica", 134 | "JO": "Jordan", 135 | "JP": "Japan", 136 | "KE": "Kenya", 137 | "KG": "Kyrgyzstan", 138 | "KH": "Cambodia", 139 | "KI": "Kiribati", 140 | "KM": "Comoros", 141 | "KN": "Saint Kitts and Nevis", 142 | "KP": "Korea, Democratic People's Republic of", 143 | "KR": "Korea, Republic of", 144 | "KW": "Kuwait", 145 | "KY": "Cayman Islands", 146 | "KZ": "Kazakhstan", 147 | "LA": "Lao People's Democratic Republic", 148 | "LB": "Lebanon", 149 | "LC": "Saint Lucia", 150 | "LI": "Liechtenstein", 151 | "LK": "Sri Lanka", 152 | "LR": "Liberia", 153 | "LS": "Lesotho", 154 | "LT": "Lithuania", 155 | "LU": "Luxembourg", 156 | "LV": "Latvia", 157 | "LY": "Libyan Arab Jamahiriya", 158 | "MA": "Morocco", 159 | "MC": "Monaco", 160 | "MD": "Moldova, Republic of", 161 | "ME": "Montenegro", 162 | "MF": "Saint Martin", 163 | "MG": "Madagascar", 164 | "MH": "Marshall Islands", 165 | "MK": "Macedonia", 166 | "ML": "Mali", 167 | "MM": "Myanmar", 168 | "MN": "Mongolia", 169 | "MO": "Macao", 170 | "MP": "Northern Mariana Islands", 171 | "MQ": "Martinique", 172 | "MR": "Mauritania", 173 | "MS": "Montserrat", 174 | "MT": "Malta", 175 | "MU": "Mauritius", 176 | "MV": "Maldives", 177 | "MW": "Malawi", 178 | "MX": "Mexico", 179 | "MY": "Malaysia", 180 | "MZ": "Mozambique", 181 | "NA": "Namibia", 182 | "NC": "New Caledonia", 183 | "NE": "Niger", 184 | "NF": "Norfolk Island", 185 | "NG": "Nigeria", 186 | "NI": "Nicaragua", 187 | "NL": "Netherlands", 188 | "NO": "Norway", 189 | "NP": "Nepal", 190 | "NR": "Nauru", 191 | "NU": "Niue", 192 | "NZ": "New Zealand", 193 | "OM": "Oman", 194 | "PA": "Panama", 195 | "PE": "Peru", 196 | "PF": "French Polynesia", 197 | "PG": "Papua New Guinea", 198 | "PH": "Philippines", 199 | "PK": "Pakistan", 200 | "PL": "Poland", 201 | "PM": "Saint Pierre and Miquelon", 202 | "PN": "Pitcairn", 203 | "PR": "Puerto Rico", 204 | "PS": "Palestinian Territory", 205 | "PT": "Portugal", 206 | "PW": "Palau", 207 | "PY": "Paraguay", 208 | "QA": "Qatar", 209 | "RE": "Reunion", 210 | "RO": "Romania", 211 | "RS": "Serbia", 212 | "RU": "Russian Federation", 213 | "RW": "Rwanda", 214 | "SA": "Saudi Arabia", 215 | "SB": "Solomon Islands", 216 | "SC": "Seychelles", 217 | "SD": "Sudan", 218 | "SE": "Sweden", 219 | "SG": "Singapore", 220 | "SH": "Saint Helena", 221 | "SI": "Slovenia", 222 | "SJ": "Svalbard and Jan Mayen", 223 | "SK": "Slovakia", 224 | "SL": "Sierra Leone", 225 | "SM": "San Marino", 226 | "SN": "Senegal", 227 | "SO": "Somalia", 228 | "SR": "Suriname", 229 | "SS": "South Sudan", 230 | "ST": "Sao Tome and Principe", 231 | "SV": "El Salvador", 232 | "SX": "Sint Maarten", 233 | "SY": "Syrian Arab Republic", 234 | "SZ": "Swaziland", 235 | "TC": "Turks and Caicos Islands", 236 | "TD": "Chad", 237 | "TF": "French Southern Territories", 238 | "TG": "Togo", 239 | "TH": "Thailand", 240 | "TJ": "Tajikistan", 241 | "TK": "Tokelau", 242 | "TL": "Timor-Leste", 243 | "TM": "Turkmenistan", 244 | "TN": "Tunisia", 245 | "TO": "Tonga", 246 | "TR": "Turkey", 247 | "TT": "Trinidad and Tobago", 248 | "TV": "Tuvalu", 249 | "TW": "Taiwan", 250 | "TZ": "Tanzania, United Republic of", 251 | "UA": "Ukraine", 252 | "UG": "Uganda", 253 | "UM": "United States Minor Outlying Islands", 254 | "US": "United States", 255 | "UY": "Uruguay", 256 | "UZ": "Uzbekistan", 257 | "VA": "Holy See (Vatican City State)", 258 | "VC": "Saint Vincent and the Grenadines", 259 | "VE": "Venezuela", 260 | "VG": "Virgin Islands, British", 261 | "VI": "Virgin Islands, U.S.", 262 | "VN": "Vietnam", 263 | "VU": "Vanuatu", 264 | "WF": "Wallis and Futuna", 265 | "WS": "Samoa", 266 | "YE": "Yemen", 267 | "YT": "Mayotte", 268 | "ZA": "South Africa", 269 | "ZM": "Zambia", 270 | "ZW": "Zimbabwe", 271 | } 272 | 273 | 274 | if Geoip2DataBaseManager is not None: 275 | geoip2_manager = Geoip2DataBaseManager(os.environ.get(MAXMIND_LICENSE_KEY_ENVNAME)) 276 | else: 277 | geoip2_manager = None 278 | 279 | 280 | def ip_country(ip): 281 | if geoip2_manager is None or not geoip2_manager.is_license_key_available(): 282 | return '' 283 | try: 284 | country = geoip2_manager['country'].reader.country(ip) 285 | except AddressNotFoundError: 286 | return '' 287 | else: 288 | return country.country.iso_code 289 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # proxy-db documentation build configuration file, created by 5 | # sphinx-quickstart on Tue Jul 9 22:26:36 2013. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | import datetime 16 | import sys 17 | import os 18 | # import django 19 | 20 | # If extensions (or modules to document with autodoc) are in another 21 | # directory, add these directories to sys.path here. If the directory is 22 | # relative to the documentation root, use os.path.abspath to make it 23 | # absolute, like shown here. 24 | 25 | 26 | # Insert the project root dir as the first element in the PYTHONPATH. 27 | # This lets us ensure that the source package is imported, and that its 28 | # version is used. 29 | directory = os.path.dirname(os.path.abspath(__file__)) 30 | 31 | # sys.path.append(os.path.abspath(os.path.join(directory, '../'))) 32 | # os.environ['DJANGO_SETTINGS_MODULE'] = 'proxy-db.settings.develop' 33 | # django.setup() 34 | 35 | # -- General configuration --------------------------------------------- 36 | 37 | # If your documentation needs a minimal Sphinx version, state it here. 38 | #needs_sphinx = '1.0' 39 | 40 | # Add any Sphinx extension module names here, as strings. They can be 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 42 | extensions = [ 43 | 'sphinx.ext.autodoc', 44 | 'sphinx.ext.intersphinx', 45 | # 'sphinxcontrib.autohttp.drf', 46 | # 'sphinxcontrib_django', 47 | ] 48 | 49 | # Add any paths that contain templates here, relative to this directory. 50 | templates_path = ['_templates'] 51 | 52 | # The suffix of source filenames. 53 | source_suffix = '.rst' 54 | 55 | # The encoding of source files. 56 | #source_encoding = 'utf-8-sig' 57 | 58 | # The master toctree document. 59 | master_doc = 'index' 60 | 61 | # General information about the project. 62 | project = u'proxy-db' 63 | copyright = u"%i, Nekmo" % datetime.date.today().year 64 | 65 | pdf_documents = [('index', u'rst2pdf', u'proxy-db', u'Nekmo'), ] 66 | 67 | rinoh_documents = [('index', # top-level file (index.rst) 68 | 'target', # output (target.pdf) 69 | 'proxy-db', # document title 70 | 'Nekmo')] # document author 71 | # rinoh_logo = '_static/logo.png' 72 | rinoh_domain_indices = False 73 | 74 | html_context = dict(docs_scope='external') 75 | 76 | # The version info for the project you're documenting, acts as replacement 77 | # for |version| and |release|, also used in various other places throughout 78 | # the built documents. 79 | # 80 | # The short X.Y version. 81 | version = '0.1.0' 82 | # The full version, including alpha/beta/rc tags. 83 | release = '0.1.0' 84 | 85 | # The language for content autogenerated by Sphinx. Refer to documentation 86 | # for a list of supported languages. 87 | #language = None 88 | 89 | # There are two options for replacing |today|: either, you set today to 90 | # some non-false value, then it is used: 91 | #today = '' 92 | # Else, today_fmt is used as the format for a strftime call. 93 | #today_fmt = '%B %d, %Y' 94 | 95 | # List of patterns, relative to source directory, that match files and 96 | # directories to ignore when looking for source files. 97 | exclude_patterns = ['_build'] 98 | 99 | # The reST default role (used for this markup: `text`) to use for all 100 | # documents. 101 | #default_role = None 102 | 103 | # If true, '()' will be appended to :func: etc. cross-reference text. 104 | #add_function_parentheses = True 105 | 106 | # If true, the current module name will be prepended to all description 107 | # unit titles (such as .. function::). 108 | #add_module_names = True 109 | 110 | # If true, sectionauthor and moduleauthor directives will be shown in the 111 | # output. They are ignored by default. 112 | #show_authors = False 113 | 114 | # The name of the Pygments (syntax highlighting) style to use. 115 | # pygments_style = 'sphinx' 116 | 117 | # A list of ignored prefixes for module index sorting. 118 | #modindex_common_prefix = [] 119 | 120 | # If true, keep warnings as "system message" paragraphs in the built 121 | # documents. 122 | #keep_warnings = False 123 | 124 | 125 | # -- Options for HTML output ------------------------------------------- 126 | 127 | # The theme to use for HTML and HTML Help pages. See the documentation for 128 | # a list of builtin themes. 129 | html_theme = os.environ.get('HTML_THEME', 'alabaster') 130 | 131 | # Theme options are theme-specific and customize the look and feel of a 132 | # theme further. For a list of options available for each theme, see the 133 | # documentation. 134 | html_theme_options = { 135 | 'logo': 'logo.png', 136 | 'description': 'Manage free and private proxies on local db for Python Projects', 137 | 'github_user': 'Nekmo', 138 | 'github_repo': 'proxy-db', 139 | 'github_type': 'star', 140 | 'github_banner': True, 141 | 'travis_button': True, 142 | 'codecov_button': True, 143 | 'analytics_id': 'UA-62276079-1', 144 | 'canonical_url': 'http://docs.nekmo.org/proxy-db/' 145 | } 146 | 147 | 148 | # Add any paths that contain custom themes here, relative to this directory. 149 | html_theme_path = ['.'] 150 | 151 | # The name for this set of Sphinx documents. If None, it defaults to 152 | # " v documentation". 153 | #html_title = None 154 | 155 | # A shorter title for the navigation bar. Default is the same as 156 | # html_title. 157 | #html_short_title = None 158 | 159 | # The name of an image file (relative to this directory) to place at the 160 | # top of the sidebar. 161 | #html_logo = None 162 | 163 | # The name of an image file (within the static path) to use as favicon 164 | # of the docs. This file should be a Windows icon file (.ico) being 165 | # 16x16 or 32x32 pixels large. 166 | #html_favicon = None 167 | 168 | # Add any paths that contain custom static files (such as style sheets) 169 | # here, relative to this directory. They are copied after the builtin 170 | # static files, so a file named "default.css" will overwrite the builtin 171 | # "default.css". 172 | html_static_path = ['_static'] 173 | 174 | # If not '', a 'Last updated on:' timestamp is inserted at every page 175 | # bottom, using the given strftime format. 176 | #html_last_updated_fmt = '%b %d, %Y' 177 | 178 | # If true, SmartyPants will be used to convert quotes and dashes to 179 | # typographically correct entities. 180 | #html_use_smartypants = True 181 | 182 | # Custom sidebar templates, maps document names to template names. 183 | # html_sidebars = { 184 | # '**': [ 185 | # 'about.html', 186 | # 'navigation.html', 187 | # 'relations.html', 188 | # 'searchbox.html', 189 | # 'donate.html', 190 | # ] 191 | # } 192 | 193 | # Additional templates that should be rendered to pages, maps page names 194 | # to template names. 195 | #html_additional_pages = {} 196 | 197 | # If false, no module index is generated. 198 | #html_domain_indices = True 199 | 200 | # If false, no index is generated. 201 | #html_use_index = True 202 | 203 | # If true, the index is split into individual pages for each letter. 204 | #html_split_index = False 205 | 206 | # If true, links to the reST sources are added to the pages. 207 | #html_show_sourcelink = True 208 | 209 | # If true, "Created using Sphinx" is shown in the HTML footer. 210 | # Default is True. 211 | #html_show_sphinx = True 212 | 213 | # If true, "(C) Copyright ..." is shown in the HTML footer. 214 | # Default is True. 215 | #html_show_copyright = True 216 | 217 | # If true, an OpenSearch description file will be output, and all pages 218 | # will contain a tag referring to it. The value of this option 219 | # must be the base URL from which the finished HTML is served. 220 | #html_use_opensearch = '' 221 | 222 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 223 | #html_file_suffix = None 224 | 225 | # Output file base name for HTML help builder. 226 | htmlhelp_basename = 'proxy-dbdoc' 227 | 228 | 229 | # -- Options for LaTeX output ------------------------------------------ 230 | 231 | latex_elements = { 232 | # The paper size ('letterpaper' or 'a4paper'). 233 | 'papersize': 'letterpaper', 234 | 235 | # The font size ('10pt', '11pt' or '12pt'). 236 | 'pointsize': '10pt', 237 | 238 | # Additional stuff for the LaTeX preamble. 239 | 'preamble': '', 240 | } 241 | 242 | # Grouping the document tree into LaTeX files. List of tuples 243 | # (source start file, target name, title, author, documentclass 244 | # [howto/manual]). 245 | latex_documents = [ 246 | ('index', 'proxy-db.tex', 247 | u'proxy-db Documentation', 248 | u'Nekmo', 'manual'), 249 | ] 250 | 251 | # The name of an image file (relative to this directory) to place at 252 | # the top of the title page. 253 | #latex_logo = None 254 | 255 | # For "manual" documents, if this is true, then toplevel headings 256 | # are parts, not chapters. 257 | #latex_use_parts = False 258 | 259 | # If true, show page references after internal links. 260 | #latex_show_pagerefs = False 261 | 262 | # If true, show URL addresses after external links. 263 | #latex_show_urls = False 264 | 265 | # Documents to append as an appendix to all manuals. 266 | #latex_appendices = [] 267 | 268 | # If false, no module index is generated. 269 | #latex_domain_indices = True 270 | 271 | 272 | # -- Options for manual page output ------------------------------------ 273 | 274 | # One entry per manual page. List of tuples 275 | # (source start file, name, description, authors, manual section). 276 | man_pages = [ 277 | ('index', 'proxy-db', 278 | u'proxy-db Documentation', 279 | [u'Nekmo'], 1) 280 | ] 281 | 282 | # If true, show URL addresses after external links. 283 | #man_show_urls = False 284 | 285 | 286 | # -- Options for Texinfo output ---------------------------------------- 287 | 288 | # Grouping the document tree into Texinfo files. List of tuples 289 | # (source start file, target name, title, author, 290 | # dir menu entry, description, category) 291 | texinfo_documents = [ 292 | ('index', 'proxy-db', 293 | u'proxy-db Documentation', 294 | u'Nekmo', 295 | 'proxy-db', 296 | 'One line description of project.', 297 | 'Miscellaneous'), 298 | ] 299 | 300 | # Documents to append as an appendix to all manuals. 301 | #texinfo_appendices = [] 302 | 303 | # If false, no module index is generated. 304 | #texinfo_domain_indices = True 305 | 306 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 307 | #texinfo_show_urls = 'footnote' 308 | 309 | # If true, do not generate a @detailmenu in the "Top" node's menu. 310 | #texinfo_no_detailmenu = False 311 | 312 | def setup(app): 313 | # app.add_stylesheet('custom.css') 314 | pass 315 | -------------------------------------------------------------------------------- /proxy_db/providers.py: -------------------------------------------------------------------------------- 1 | """Based on: 2 | https://github.com/constverum/ProxyBroker/blob/master/proxybroker/providers.py 3 | 4 | 5 | """ 6 | from __future__ import absolute_import 7 | import copy 8 | import datetime 9 | import os 10 | import re 11 | from logging import getLogger 12 | 13 | import requests 14 | from bs4 import BeautifulSoup 15 | from requests import RequestException 16 | 17 | from proxy_db.countries import ip_country, COUNTRIES 18 | from proxy_db.db import get_or_create 19 | from proxy_db.models import create_session, Proxy, ProviderRequest 20 | from proxy_db.utils import get_domain 21 | 22 | try: 23 | import lxml 24 | except ImportError: 25 | lxml_available = False 26 | else: 27 | lxml_available = True 28 | 29 | PROVIDER_REQUIRES_UPDATE_MINUTES = 45 30 | SIMPLE_IP_PATTERN = re.compile('(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})') 31 | IP_PORT_PATTERN_GLOBAL = re.compile( 32 | r'(?P(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?))' # noqa 33 | r'(?=.*?(?:(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?))|(?P\d{2,5})))', # noqa 34 | flags=re.DOTALL) 35 | UPDATE_VOTES = 3 36 | 37 | 38 | class ProviderRequestBase(object): 39 | headers = {'user-agent': 'Mozilla/5.0 (Windows NT x.y; Win64; x64; rv:10.0) Gecko/20100101 Firefox/10.0'} 40 | 41 | def __init__(self, provider, url, method='GET', data=None, headers=None, options=None): 42 | self.provider = provider 43 | self.url = url 44 | self.method = method 45 | self.headers = headers or copy.copy(self.headers) 46 | self.data = data 47 | self.options = options or {} 48 | 49 | def make_request(self): 50 | return requests.request(self.method, self.url) 51 | 52 | def now(self): 53 | session = create_session() 54 | try: 55 | response = self.make_request() 56 | except RequestException: 57 | self.provider.logger.exception('Error on request to {}'.format(self.url)) 58 | return 59 | proxies = self.provider.process_page(response, session) 60 | self.add_proxies(proxies, session) 61 | 62 | def add_proxies(self, proxies, session=None): 63 | session = session or create_session() 64 | provider_request, _ = self.get_or_create(session, {'results': len(proxies)}) 65 | for proxy in proxies: 66 | provider_request.proxies.append(proxy) 67 | session.commit() 68 | 69 | def requires_update(self): 70 | instance, exists = self.get_or_create() 71 | return not exists or \ 72 | datetime.datetime.now() > \ 73 | instance.updated_at + datetime.timedelta(minutes=PROVIDER_REQUIRES_UPDATE_MINUTES) 74 | 75 | def get_or_create(self, session=None, update_defaults=None): 76 | session = session or create_session() 77 | defaults = { 78 | 'updated_at': datetime.datetime.now(), 79 | 'results': 0, 80 | } 81 | defaults.update(update_defaults or {}) 82 | return get_or_create(session, ProviderRequest, defaults=defaults, 83 | request_id=self.id, provider=self.provider.name) 84 | 85 | @property 86 | def id(self): 87 | return '-'.join(['{}'.format(x[1]) for x in sorted(self.options.items())]) 88 | 89 | 90 | class ProviderCredentialMixin: 91 | env_key_username = None 92 | env_key_password = None 93 | 94 | def get_env_key_username(self): 95 | return self.env_key_username 96 | 97 | def get_env_key_password(self): 98 | return self.env_key_password 99 | 100 | def is_available(self): 101 | return self.has_credentials() 102 | 103 | def has_credentials(self): 104 | return os.environ.get(self.get_env_key_username()) and os.environ.get(self.get_env_key_password()) 105 | 106 | def credentials(self): 107 | if not self.has_credentials(): 108 | return () 109 | return os.environ.get(self.get_env_key_username()), os.environ.get(self.get_env_key_password()) 110 | 111 | 112 | class Provider(object): 113 | name = 'Provider' 114 | base_url = None 115 | 116 | def __init__(self, base_url=None): 117 | self.base_url = base_url or self.base_url 118 | self.logger = getLogger('proxy_db.providers.{}'.format(self.lowercase_name())) 119 | 120 | def is_available(self): 121 | return True 122 | 123 | def has_credentials(self): 124 | return False 125 | 126 | def credentials(self): 127 | return () 128 | 129 | def request(self, url=None, country=None, protocol=None): 130 | url = url or self.base_url 131 | return self.get_provider_request(url, country, protocol) 132 | 133 | def find_page_proxies(self, request): 134 | return [{'proxy': proxy} for proxy in IP_PORT_PATTERN_GLOBAL.findall(request.text)] 135 | 136 | def process_page(self, request, session=None): 137 | return self.process_proxies(self.find_page_proxies(request), session) 138 | 139 | def process_proxies(self, proxies, session=None, update_votes=UPDATE_VOTES): 140 | session = session or create_session() 141 | proxy_instances = [] 142 | for proxy in proxies: 143 | protocol = proxy.get('protocol', 'http') 144 | instance, exists = get_or_create( 145 | session, Proxy, defaults=dict(votes=0, protocol=protocol), 146 | id='{}://{}'.format(protocol, proxy['proxy']) 147 | ) 148 | if not instance.country: 149 | detected_country = ip_country(get_domain(instance.id)) 150 | instance.country = detected_country or proxy.get('country_code') or '' 151 | instance.votes += update_votes 152 | proxy_instances.append(instance) 153 | session.commit() 154 | return proxy_instances 155 | 156 | def get_provider_request(self, url, country, protocol): 157 | return ProviderRequestBase(self, url, options={'country': country, 'protocol': protocol}) 158 | 159 | def lowercase_name(self): 160 | return self.name.lower().replace(' ', '_') 161 | 162 | 163 | class SoupProvider(Provider): 164 | parser = 'lxml' if lxml_available else 'html.parser' 165 | 166 | def find_page_proxies(self, request): 167 | soup = BeautifulSoup(request.text, self.parser) 168 | items = self.soup_items(soup) 169 | return list(filter(bool, map(lambda item: self.soup_item(item), items))) 170 | 171 | def soup_items(self, soup): 172 | raise NotImplementedError 173 | 174 | def soup_item(self, item): 175 | raise NotImplementedError 176 | 177 | 178 | class ProxyNovaCom(SoupProvider): 179 | name = 'Proxy Nova' 180 | base_url = 'https://www.proxynova.com/proxy-server-list/' 181 | 182 | def request(self, url=None, country=None, protocol=None): 183 | url = url or self.base_url 184 | if country: 185 | url += 'country-{}/'.format(country.lower()) 186 | return super(ProxyNovaCom, self).request(url, country, protocol) 187 | 188 | def soup_items(self, soup): 189 | return soup.select('tr[data-proxy-id]') 190 | 191 | def soup_item(self, item): 192 | script = item.find('script') 193 | if script is None: 194 | self.logger.warning('Script tag is no available in item {}'.format(item)) 195 | return None 196 | script = script.string or '' 197 | td_tags = item.find_all('td') 198 | if len(td_tags) < 2: 199 | self.logger.warning('td tag including port is not available in item {}'.format(item)) 200 | return None 201 | port = ''.join(td_tags[1].stripped_strings or '') 202 | matchs = SIMPLE_IP_PATTERN.search(script) 203 | if matchs is None: 204 | self.logger.warning('Invalid script value for item {}'.format(item)) 205 | return None 206 | img = item.find('img', class_='flag') 207 | country = None 208 | if img is None or 'alt' not in img.attrs: 209 | self.logger.warning('Image with country is not available in item {}'.format(item)) 210 | else: 211 | country = img.attrs['alt'].upper() 212 | if country and country not in COUNTRIES: 213 | self.logger.warning('Invalid country code in item {}: {}'.format(item, country)) 214 | country = None 215 | ip_address = matchs.group(1) 216 | return {'proxy': '{}:{}'.format(ip_address, port), 'country_code': country} 217 | 218 | 219 | # class NordVpnProviderRequest(ProviderRequestBase): 220 | # def make_request(self): 221 | # return requests.request(self.method, self.url, ) 222 | 223 | 224 | class NordVpn(ProviderCredentialMixin, Provider): 225 | name = 'Nord VPN' 226 | base_url = 'https://api.nordvpn.com/server' 227 | protocols = [ 228 | {'feature': 'socks', 'protocol': 'socks5', 'port': 1080}, 229 | {'feature': 'proxy', 'protocol': 'http', 'port': 80}, 230 | {'feature': 'proxy_ssl', 'protocol': 'https', 'port': 89}, 231 | ] 232 | env_key_username = 'PROXYDB_NORDVPN_USERNAME' 233 | env_key_password = 'PROXYDB_NORDVPN_PASSWORD' 234 | 235 | def request(self, url=None, country=None, protocol=None): 236 | url = url or self.base_url 237 | return super(NordVpn, self).request(url, country, protocol) 238 | 239 | def find_page_proxies(self, request): 240 | proxies = request.json() 241 | proxy_datas = [] 242 | for proxy in proxies: 243 | country = proxy['flag'] 244 | if country not in COUNTRIES: 245 | self.logger.warning('Invalid country in proxy {}: {}'.format( 246 | proxy['ip_address'], country, 247 | )) 248 | country = None 249 | for protocol in self.protocols: 250 | if not proxy['features'].get(protocol['feature']): 251 | continue 252 | address = proxy['domain'] if protocol['protocol'] == 'https' else proxy['ip_address'] 253 | proxy_datas.append({ 254 | 'proxy': '{address}:{port}'.format( 255 | address=address, **protocol 256 | ), 257 | 'country_code': country, 258 | 'protocol': protocol['protocol'], 259 | }) 260 | return proxy_datas 261 | 262 | 263 | class ManualProxyRequest(ProviderRequestBase): 264 | def __init__(self, provider): 265 | super(ManualProxyRequest, self).__init__(provider, '') 266 | 267 | def now(self): 268 | pass 269 | 270 | def requires_update(self): 271 | return False 272 | 273 | 274 | class ManualProxy(ProviderCredentialMixin, Provider): 275 | def __init__(self, provider): 276 | super(ManualProxy, self).__init__() 277 | self.name = provider 278 | 279 | def get_env_key_username(self): 280 | return 'PROXYDB_{}_USERNAME'.format(self.name.upper()) 281 | 282 | def get_env_key_password(self): 283 | return 'PROXYDB_{}_PASSWORD'.format(self.name.upper()) 284 | 285 | def is_available(self): 286 | return True 287 | 288 | def get_provider_request(self, url, country, protocol): 289 | return ManualProxyRequest(self) 290 | 291 | def add_proxies(self, proxies, update_votes=UPDATE_VOTES): 292 | session = create_session() 293 | proxy_instances = self.process_proxies(proxies, session, update_votes) 294 | self.get_provider_request(None, None, None).add_proxies(proxy_instances, session) 295 | return proxy_instances 296 | 297 | 298 | PROVIDERS = [ 299 | NordVpn(), 300 | ProxyNovaCom(), 301 | ] 302 | -------------------------------------------------------------------------------- /tests/test_providers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Tests for `proxy-db` package.""" 5 | import copy 6 | import datetime 7 | import unittest 8 | 9 | import requests_mock 10 | 11 | from ._compat import Mock, patch 12 | 13 | from proxy_db.providers import ProxyNovaCom, Provider, ProviderRequestBase, PROVIDER_REQUIRES_UPDATE_MINUTES, NordVpn, \ 14 | ManualProxy 15 | 16 | URL = 'https://domain.com/' 17 | PROVIDER_HTML = """ 18 | Proxy: 12.131.91.51:8888 19 | Other: 8.10.81.82:7171. 20 | """ 21 | PROXY_NOVA_HTML = """ 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 3128 30 | 31 | 32 | ua 33 | Ukraine 35 | - Kyiv 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 8080 47 | 48 | 49 | hu 50 | Hungary 51 | - Debrecen 52 | 53 | 54 | 55 | """ 56 | PROXY_NOVA_INVALID_ROWS_HTML = """ 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 3128 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | without port 74 | 75 | 76 | 77 | without script 78 | 79 | 80 | 3128 81 | 82 | 83 | """ 84 | PROXY_NOVA_INVALID_COUNTRY = """ 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 3128 93 | 94 | 95 | 96 | Country is not available 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 8080 107 | 108 | 109 | INVALID 110 | 111 | 112 | """ 113 | NORDVPN_SERVERS = [ 114 | { 115 | "id":0, "ip_address":"123.123.123.123", "search_keywords":[], "load":55, 116 | "categories":[{"name":"Standard VPN servers"}, {"name":"P2P"}], "name":"United States #0", 117 | "domain":"us0.nordvpn.com", "price":0, "flag":"US", "country":"United States", "location": {"lat":0, "long":0}, 118 | "features": { 119 | "ikev2": True, "openvpn_udp": True, "openvpn_tcp": True, "socks": True, "proxy": True, "pptp": False, 120 | "l2tp": False, "openvpn_xor_udp": False, "openvpn_xor_tcp": False, "proxy_cybersec": False, 121 | "proxy_ssl": True, "proxy_ssl_cybersec": True, "ikev2_v6": False, "openvpn_udp_v6": False, 122 | "openvpn_tcp_v6": False, "wireguard_udp": False, "openvpn_udp_tls_crypt": False, 123 | "openvpn_tcp_tls_crypt": False, "openvpn_dedicated_udp": False, "openvpn_dedicated_tcp": False, 124 | "skylark": False 125 | } 126 | } 127 | ] 128 | 129 | 130 | class TestProviderRequestBase(unittest.TestCase): 131 | url = URL 132 | 133 | @patch('proxy_db.providers.create_session') 134 | @patch('proxy_db.providers.ProviderRequestBase.get_or_create', return_value=(Mock(), None)) 135 | def test_now(self, m2, m1): 136 | session_mock = requests_mock.Mocker() 137 | session_mock.start() 138 | req_mock = session_mock.get(self.url, text=PROVIDER_HTML) 139 | self.get_provider_request().now() 140 | self.assertTrue(req_mock.called_once) 141 | session_mock.stop() 142 | m1.return_value.commit.assert_called() 143 | m2.assert_called_with(m1.return_value, {'results': 2}) 144 | 145 | def test_requires_update(self): 146 | instance = Mock() 147 | instance.updated_at = datetime.datetime.now() - datetime.timedelta(minutes=PROVIDER_REQUIRES_UPDATE_MINUTES) 148 | instance.updated_at -= datetime.timedelta(minutes=1) 149 | with patch('proxy_db.providers.ProviderRequestBase.get_or_create', return_value=(instance, True)): 150 | self.assertTrue(self.get_provider_request().requires_update()) 151 | 152 | def test_requires_update_not_exists(self): 153 | self.assertTrue(self.get_provider_request().requires_update()) 154 | 155 | def test_get_or_create(self): 156 | request_provider = self.get_provider_request() 157 | instance, _ = request_provider.get_or_create() 158 | self.assertEqual(instance.results, 0) 159 | self.assertEqual(instance.request_id, request_provider.id) 160 | self.assertEqual(instance.provider, request_provider.provider.name) 161 | 162 | def get_provider(self): 163 | return Provider(self.url) 164 | 165 | def get_provider_request(self): 166 | return ProviderRequestBase(self.get_provider(), self.url, options={'country': 'es', 'spam': 1}) 167 | 168 | def test_id(self): 169 | self.assertEqual(self.get_provider_request().id, 'es-1') 170 | 171 | 172 | class TestProvider(unittest.TestCase): 173 | url = URL 174 | proxies = [{'proxy': ('12.131.91.51', '8888')}, {'proxy': ('8.10.81.82', '7171')}] 175 | 176 | def test_request(self): 177 | country = 'es' 178 | provider_request = Provider(self.url).request(self.url, country) 179 | self.assertEqual(provider_request.url, self.url) 180 | self.assertEqual(provider_request.options, {'country': country, 'protocol': None}) 181 | 182 | def test_find_page_proxies(self): 183 | request = Mock() 184 | request.text = PROVIDER_HTML 185 | proxies = Provider(self.url).find_page_proxies(request) 186 | self.assertEqual(proxies, self.proxies) 187 | 188 | @patch('proxy_db.providers.create_session') 189 | def test_process_proxies(self, mock_session): 190 | Provider(self.url).process_proxies(self.proxies[:1]) 191 | mock_session.assert_called_once() 192 | mock_session.return_value.commit.assert_called_once() 193 | 194 | 195 | class TestProxyNovaCom(unittest.TestCase): 196 | url = 'https://domain.com/' 197 | 198 | @patch("proxy_db.providers.Provider.request") 199 | def test_request(self, m): 200 | provider = ProxyNovaCom() 201 | provider.request(self.url) 202 | m.assert_called_with(self.url, None, None) 203 | 204 | @patch("proxy_db.providers.Provider.request") 205 | def test_request_country(self, m): 206 | provider = ProxyNovaCom() 207 | country = 'es' 208 | provider.request(self.url, country) 209 | m.assert_called_with(self.url + 'country-{}/'.format(country), country, None) 210 | 211 | def test_find_page_proxies(self): 212 | provider = ProxyNovaCom() 213 | request = Mock() 214 | request.text = PROXY_NOVA_HTML 215 | self.assertEqual(provider.find_page_proxies(request), [ 216 | {'proxy': '91.217.28.125:3128', 'country_code': 'UA'}, 217 | {'proxy': '89.145.199.64:8080', 'country_code': 'HU'}, 218 | ]) 219 | 220 | @patch("proxy_db.providers.getLogger") 221 | def test_invalid_country(self, m): 222 | provider = ProxyNovaCom() 223 | request = Mock() 224 | request.text = PROXY_NOVA_INVALID_COUNTRY 225 | self.assertEqual(provider.find_page_proxies(request), [ 226 | {'proxy': '91.217.28.125:3128', 'country_code': None}, 227 | {'proxy': '89.145.199.64:8080', 'country_code': None}, 228 | ]) 229 | 230 | @patch("proxy_db.providers.getLogger") 231 | def test_invalid_rows(self, m): 232 | provider = ProxyNovaCom() 233 | request = Mock() 234 | request.text = PROXY_NOVA_INVALID_ROWS_HTML 235 | self.assertEqual(provider.find_page_proxies(request), []) 236 | self.assertEqual( 237 | m.return_value.warning.call_count, 3, 238 | "Expected 'warning' to have been called 3 times. Called {}".format( 239 | m.return_value.warning.call_count 240 | ) 241 | ) 242 | 243 | 244 | class TestNordVPN(unittest.TestCase): 245 | url = NordVpn.base_url 246 | 247 | @patch("proxy_db.providers.Provider.request") 248 | def test_request(self, m): 249 | provider = NordVpn() 250 | provider.request(self.url) 251 | m.assert_called_with(self.url, None, None) 252 | 253 | def test_find_page_proxies(self): 254 | provider = NordVpn() 255 | request = Mock() 256 | request.json.return_value = NORDVPN_SERVERS 257 | self.assertEqual(provider.find_page_proxies(request), [ 258 | {'country_code': 'US', 'protocol': 'socks5', 'proxy': '123.123.123.123:1080'}, 259 | {'country_code': 'US', 'protocol': 'http', 'proxy': '123.123.123.123:80'}, 260 | {'country_code': 'US', 'protocol': 'https', 'proxy': 'us0.nordvpn.com:89'} 261 | ]) 262 | 263 | def test_invalid_country(self): 264 | provider = NordVpn() 265 | request = Mock() 266 | servers = copy.deepcopy(NORDVPN_SERVERS) 267 | servers[0]['flag'] = 'FOO' 268 | servers[0]['features'] = {'socks': True} 269 | request.json.return_value = servers 270 | self.assertEqual(provider.find_page_proxies(request), [ 271 | {'country_code': None, 'protocol': 'socks5', 'proxy': '123.123.123.123:1080'}, 272 | ]) 273 | 274 | 275 | class TestNoProviderInfiniteLoop(unittest.TestCase): 276 | """Test to make sure that it doesn't fall into an infinite loop when 277 | next(ProxiesList(country)) is called with a country with no proxies.""" 278 | 279 | @patch("proxy_db.proxies.ProxiesList.reload_provider") 280 | @patch("proxy_db.proxies.ProxiesList.find_db_proxy") 281 | def test_infinite_recursion_loop_solution(self, find_db_proxy_mock, reload_provider_mock): 282 | """This call was falling into a recursion loop. Now tries only twice and then raise 283 | an StopIteration exception.""" 284 | from proxy_db.proxies import ProxiesList 285 | reload_provider_mock.return_value = None 286 | find_db_proxy_mock.return_value = None 287 | 288 | self.assertRaises(StopIteration, lambda: next(ProxiesList("country"))) 289 | self.assertEqual(find_db_proxy_mock.call_count, 2) 290 | self.assertEqual(reload_provider_mock.call_count, 1) 291 | 292 | 293 | class TestManualProvider(unittest.TestCase): 294 | @patch('proxy_db.providers.create_session') 295 | def test_add_proxies(self, m): 296 | ManualProxy('manual').add_proxies([{'protocol': 'http', 'proxy': '1.2.3.4:999'}]) 297 | --------------------------------------------------------------------------------