├── accountpool ├── __init__.py ├── storages │ ├── __init__.py │ └── redis.py ├── utils │ ├── __init__.py │ └── parse.py ├── exceptions │ ├── __init__.py │ └── init.py ├── processors │ ├── __init__.py │ ├── server.py │ ├── tester.py │ └── generator.py ├── setting.py └── scheduler.py ├── Dockerfile ├── requirements.txt ├── importer.py ├── docker-compose.yml ├── run.py ├── .github ├── workflows │ └── build.yml └── ISSUE_TEMPLATE │ └── bug_report.md ├── supervisord.conf ├── register.py ├── .gitignore ├── deployment.yml └── README.md /accountpool/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /accountpool/storages/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /accountpool/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /accountpool/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /accountpool/processors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | WORKDIR /app 3 | COPY requirements.txt . 4 | RUN pip install -r requirements.txt 5 | COPY . . 6 | CMD ["supervisord", "-c", "supervisord.conf"] 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.13.0 2 | selenium==3.4.0 3 | redis==2.10.5 4 | Flask==1.1.4 5 | environs==7.2.0 6 | loguru==0.3.2 7 | supervisor==4.1.0 8 | MarkupSafe==2.0.1 9 | -------------------------------------------------------------------------------- /accountpool/exceptions/init.py: -------------------------------------------------------------------------------- 1 | class InitException(Exception): 2 | def __str__(self): 3 | """ 4 | init error 5 | :return: 6 | """ 7 | return repr('init failed') 8 | -------------------------------------------------------------------------------- /importer.py: -------------------------------------------------------------------------------- 1 | from accountpool.storages.redis import RedisClient 2 | import argparse 3 | 4 | parser = argparse.ArgumentParser(description='AccountPool') 5 | parser.add_argument('website', type=str, help='website') 6 | args = parser.parse_args() 7 | website = args.website 8 | 9 | conn = RedisClient('account', args.website) 10 | start = 1 11 | end = 100 12 | for i in range(start, end + 1): 13 | username = password = f'admin{i}' 14 | conn.set(username, password) 15 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | redis4accountpool: 4 | image: redis:alpine 5 | container_name: redis4accountpool 6 | command: redis-server 7 | ports: 8 | - "6333:6379" 9 | accountpool: 10 | build: . 11 | image: 'germey/accountpool' 12 | container_name: accountpool 13 | ports: 14 | - "6777:6777" 15 | environment: 16 | REDIS_HOST: redis4accountpool 17 | REDIS_PORT: "6379" 18 | API_PORT: "6777" 19 | WEBSITE: antispider7 -------------------------------------------------------------------------------- /accountpool/utils/parse.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def parse_redis_connection_string(connection_string): 4 | """ 5 | parse a redis connection string, for example: 6 | redis://[password]@host:port 7 | rediss://[password]@host:port 8 | :param connection_string: 9 | :return: 10 | """ 11 | result = re.match('rediss?:\/\/(.*?)@(.*?):(\d+)\/(\d+)', connection_string) 12 | return result.group(2), int(result.group(3)), (result.group(1) or None), (result.group(4) or 0) if result \ 13 | else ('localhost', 6379, None) 14 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from accountpool.scheduler import Scheduler 2 | import argparse 3 | 4 | parser = argparse.ArgumentParser(description='AccountPool') 5 | parser.add_argument('website', type=str, help='website') 6 | parser.add_argument('--processor', type=str, help='processor to run') 7 | args = parser.parse_args() 8 | website = args.website 9 | 10 | if __name__ == '__main__': 11 | # if processor set, just run it 12 | if args.processor: 13 | getattr(Scheduler(), f'run_{args.processor}')(website) 14 | else: 15 | Scheduler().run(website) 16 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: 3 | push: 4 | branches: 5 | - master 6 | paths-ignore: 7 | - .gitignore 8 | - README.md 9 | - '.github/ISSUE_TEMPLATE/**' 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout Source 15 | uses: actions/checkout@v1 16 | - name: Docker Login 17 | run: docker login -u germey -p ${{ secrets.DOCKERHUB_LOGIN_PASSWORD }} 18 | - name: Build the Docker Image 19 | run: docker-compose build 20 | - name: Tag and Push Master Version 21 | run: | 22 | docker tag germey/accountpool germey/accountpool:master 23 | docker push germey/accountpool:master 24 | 25 | -------------------------------------------------------------------------------- /supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:tester] 5 | process_name=tester 6 | command=python3 run.py %(ENV_WEBSITE)s --processor tester 7 | directory=/app 8 | stdout_logfile=/dev/stdout 9 | stdout_logfile_maxbytes=0 10 | stderr_logfile=/dev/stderr 11 | stderr_logfile_maxbytes=0 12 | 13 | [program:generator] 14 | process_name=generator 15 | command=python3 run.py %(ENV_WEBSITE)s --processor generator 16 | directory=/app 17 | stdout_logfile=/dev/stdout 18 | stdout_logfile_maxbytes=0 19 | stderr_logfile=/dev/stderr 20 | stderr_logfile_maxbytes=0 21 | 22 | [program:server] 23 | process_name=server 24 | command=python3 run.py %(ENV_WEBSITE)s --processor server 25 | directory=/app 26 | stdout_logfile=/dev/stdout 27 | stdout_logfile_maxbytes=0 28 | stderr_logfile=/dev/stderr 29 | stderr_logfile_maxbytes=0 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: Germey 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Environments (please complete the following information):** 27 | - OS: [e.g. macOS 10.15.2] 28 | - Python [e.g. Python 3.6] 29 | - Browser [e.g. Chrome 67 ] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /register.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from acinonyx import run 3 | import requests 4 | from loguru import logger 5 | 6 | # This is a script for registering account for antispider7, using acinonyx to accelerate. 7 | 8 | parser = argparse.ArgumentParser(description='AccountPool') 9 | parser.add_argument('website', type=str, help='website') 10 | args = parser.parse_args() 11 | website = args.website 12 | 13 | 14 | @logger.catch() 15 | def register(username, password): 16 | logger.debug(f'register using {username} and {password}') 17 | response = requests.post(f'https://{website}.scrape.center/api/register', json={ 18 | 'username': username, 19 | 'password': password 20 | }) 21 | print(response.json()) 22 | 23 | 24 | if __name__ == '__main__': 25 | accounts = [] 26 | for index in range(1, 1000): 27 | accounts.append((f'admin{index}', f'admin{index}')) 28 | run(register, accounts) 29 | -------------------------------------------------------------------------------- /accountpool/processors/server.py: -------------------------------------------------------------------------------- 1 | import json 2 | from flask import Flask, g 3 | from accountpool.storages.redis import RedisClient 4 | from accountpool.setting import GENERATOR_MAP 5 | from loguru import logger 6 | 7 | __all__ = ['app'] 8 | 9 | app = Flask(__name__) 10 | 11 | account = 'account' 12 | credential = 'credential' 13 | 14 | 15 | @app.route('/') 16 | def index(): 17 | return '

Welcome to Account Pool System

' 18 | 19 | 20 | def get_conn(): 21 | """ 22 | get connection 23 | :return: 24 | """ 25 | for website in GENERATOR_MAP: 26 | if not hasattr(g, website): 27 | setattr(g, f'{website}_{credential}', RedisClient(credential, website)) 28 | setattr(g, f'{website}_{account}', RedisClient(account, website)) 29 | return g 30 | 31 | 32 | @app.route('//random') 33 | def random(website): 34 | """ 35 | ger random credential /weibo/random 36 | :return: random credential 37 | """ 38 | g = get_conn() 39 | result = getattr(g, f'{website}_{credential}').random() 40 | logger.debug(f'get credential {result}') 41 | return result 42 | 43 | 44 | @app.route('//add//') 45 | def add(website, username, password): 46 | """ 47 | add account /weibo/add/user/password 48 | :param website: website 49 | :param username: username 50 | :param password: password 51 | :return: 52 | """ 53 | g = get_conn() 54 | getattr(g, f'{website}_{account}').set(username, password) 55 | return json.dumps({'status': '1'}) 56 | 57 | 58 | @app.route('//count') 59 | def count(website): 60 | """ 61 | get count of credential 62 | """ 63 | g = get_conn() 64 | count = getattr(g, f'{website}_{credential}').count() 65 | return json.dumps({'status': 'ok', 'count': count}) 66 | 67 | 68 | if __name__ == '__main__': 69 | app.run(host='0.0.0.0') 70 | -------------------------------------------------------------------------------- /accountpool/storages/redis.py: -------------------------------------------------------------------------------- 1 | import random 2 | import redis 3 | from accountpool.setting import * 4 | 5 | 6 | class RedisClient(object): 7 | """ 8 | redis client 9 | """ 10 | 11 | def __init__(self, type, website, host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD): 12 | """ 13 | init redis client 14 | :param host: redis host 15 | :param port: redis port 16 | :param password: redis password 17 | """ 18 | self.db = redis.StrictRedis(host=host, port=port, password=password, decode_responses=True) 19 | self.type = type 20 | self.website = website 21 | 22 | def name(self): 23 | """ 24 | get hash name 25 | :return: name of hash 26 | """ 27 | return f'{self.type}:{self.website}' 28 | 29 | def set(self, username, value): 30 | """ 31 | set key-value 32 | :param username: username 33 | :param value: password or cookies 34 | :return: 35 | """ 36 | return self.db.hset(self.name(), username, value) 37 | 38 | def get(self, username): 39 | """ 40 | get value 41 | :param username: username 42 | :return: 43 | """ 44 | return self.db.hget(self.name(), username) 45 | 46 | def delete(self, username): 47 | """ 48 | delete key-value 49 | :param username: username 50 | :return: result 51 | """ 52 | return self.db.hdel(self.name(), username) 53 | 54 | def count(self): 55 | """ 56 | get count 57 | :return: count 58 | """ 59 | return self.db.hlen(self.name()) 60 | 61 | def random(self): 62 | """ 63 | get random cookies or password 64 | :return: random cookies or password 65 | """ 66 | return random.choice(self.db.hvals(self.name())) 67 | 68 | def usernames(self): 69 | """ 70 | get all usernames 71 | :return: all usernames 72 | """ 73 | return self.db.hkeys(self.name()) 74 | 75 | def all(self): 76 | """ 77 | get all key-values 78 | :return: map of key-values 79 | """ 80 | return self.db.hgetall(self.name()) 81 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | *.pyc 3 | ghostdriver.log 4 | ### Python template 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # pytype static type analyzer 139 | .pytype/ 140 | 141 | # Cython debug symbols 142 | cython_debug/ 143 | 144 | logs/ -------------------------------------------------------------------------------- /accountpool/processors/tester.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | from requests.exceptions import ConnectionError 4 | from accountpool.storages.redis import * 5 | from accountpool.exceptions.init import InitException 6 | from loguru import logger 7 | 8 | 9 | class BaseTester(object): 10 | """ 11 | base tester 12 | """ 13 | 14 | def __init__(self, website=None): 15 | """ 16 | init base tester 17 | """ 18 | self.website = website 19 | if not self.website: 20 | raise InitException 21 | self.account_operator = RedisClient(type='account', website=self.website) 22 | self.credential_operator = RedisClient(type='credential', website=self.website) 23 | 24 | def test(self, username, credential): 25 | """ 26 | test single credential 27 | """ 28 | raise NotImplementedError 29 | 30 | def run(self): 31 | """ 32 | test all credentials 33 | """ 34 | credentials = self.credential_operator.all() 35 | for username, credential in credentials.items(): 36 | self.test(username, credential) 37 | 38 | 39 | class Antispider6Tester(BaseTester): 40 | """ 41 | tester for antispider6 42 | """ 43 | 44 | def __init__(self, website=None): 45 | BaseTester.__init__(self, website) 46 | 47 | def test(self, username, credential): 48 | """ 49 | test single credential 50 | """ 51 | logger.info(f'testing credential for {username}') 52 | try: 53 | test_url = TEST_URL_MAP[self.website] 54 | response = requests.get(test_url, headers={ 55 | 'Cookie': credential 56 | }, timeout=5, allow_redirects=False) 57 | if response.status_code == 200: 58 | logger.info('credential is valid') 59 | else: 60 | logger.info('credential is not valid, delete it') 61 | self.credential_operator.delete(username) 62 | except ConnectionError: 63 | logger.info('test failed') 64 | 65 | 66 | class Antispider7Tester(BaseTester): 67 | """ 68 | tester for antispider7 69 | """ 70 | 71 | def __init__(self, website=None): 72 | BaseTester.__init__(self, website) 73 | 74 | def test(self, username, credential): 75 | """ 76 | test single credential 77 | """ 78 | logger.info(f'testing credential for {username}') 79 | try: 80 | test_url = TEST_URL_MAP[self.website] 81 | response = requests.get(test_url, headers={ 82 | 'authorization': f'jwt {credential}' 83 | }, timeout=5, allow_redirects=False) 84 | if response.status_code == 200: 85 | logger.info('credential is valid') 86 | else: 87 | logger.info('credential is not valid, delete it') 88 | self.credential_operator.delete(username) 89 | except ConnectionError: 90 | logger.info('test failed') 91 | -------------------------------------------------------------------------------- /deployment.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | creationTimestamp: null 5 | name: accountpool 6 | --- 7 | apiVersion: v1 8 | kind: PersistentVolumeClaim 9 | metadata: 10 | name: accountpool 11 | namespace: accountpool 12 | spec: 13 | storageClassName: azure-file 14 | accessModes: 15 | - ReadWriteMany 16 | resources: 17 | requests: 18 | storage: 2Gi 19 | --- 20 | apiVersion: v1 21 | items: 22 | - apiVersion: v1 23 | kind: Service 24 | metadata: 25 | annotations: 26 | kompose.cmd: kompose convert -f docker-compose.yml -o deployment.yml 27 | kompose.version: 1.20.0 () 28 | creationTimestamp: null 29 | labels: 30 | io.kompose.service: accountpool 31 | name: accountpool 32 | namespace: accountpool 33 | spec: 34 | ports: 35 | - name: "6777" 36 | port: 6777 37 | targetPort: 6777 38 | selector: 39 | io.kompose.service: accountpool 40 | status: 41 | loadBalancer: {} 42 | - apiVersion: apps/v1 43 | kind: Deployment 44 | metadata: 45 | annotations: 46 | kompose.cmd: kompose convert -f docker-compose.yml -o deployment.yml 47 | kompose.version: 1.20.0 () 48 | creationTimestamp: null 49 | labels: 50 | io.kompose.service: accountpool 51 | name: accountpool 52 | namespace: accountpool 53 | spec: 54 | replicas: 2 55 | revisionHistoryLimit: 1 56 | strategy: {} 57 | selector: 58 | matchLabels: 59 | io.kompose.service: accountpool 60 | template: 61 | metadata: 62 | annotations: 63 | kompose.cmd: kompose convert -f docker-compose.yml -o deployment.yml 64 | kompose.version: 1.20.0 () 65 | creationTimestamp: null 66 | labels: 67 | io.kompose.service: accountpool 68 | spec: 69 | containers: 70 | - env: 71 | - name: REDIS_CONNECTION_STRING 72 | valueFrom: 73 | secretKeyRef: 74 | name: redis 75 | key: connection_string 76 | - name: REDIS_PORT 77 | value: '6379' 78 | image: germey/accountpool:${TAG} 79 | name: accountpool 80 | resources: 81 | limits: 82 | memory: "500Mi" 83 | cpu: "300m" 84 | requests: 85 | memory: "500Mi" 86 | cpu: "300m" 87 | ports: 88 | - containerPort: 6777 89 | volumeMounts: 90 | - mountPath: "/app/accountpool/logs" 91 | name: accountpool 92 | restartPolicy: Always 93 | volumes: 94 | - name: accountpool 95 | persistentVolumeClaim: 96 | claimName: pvc-accountpool 97 | status: {} 98 | kind: List 99 | metadata: {} 100 | -------------------------------------------------------------------------------- /accountpool/setting.py: -------------------------------------------------------------------------------- 1 | import platform 2 | from os.path import dirname, abspath, join 3 | from environs import Env 4 | from loguru import logger 5 | from accountpool.utils.parse import parse_redis_connection_string 6 | 7 | env = Env() 8 | env.read_env() 9 | 10 | # definition of flags 11 | IS_WINDOWS = platform.system().lower() == 'windows' 12 | 13 | # definition of dirs 14 | ROOT_DIR = dirname(dirname(abspath(__file__))) 15 | LOG_DIR = join(ROOT_DIR, env.str('LOG_DIR', 'logs')) 16 | 17 | # definition of environments 18 | DEV_MODE, TEST_MODE, PROD_MODE = 'dev', 'test', 'prod' 19 | APP_ENV = env.str('APP_ENV', DEV_MODE).lower() 20 | APP_DEBUG = env.bool('APP_DEBUG', True if APP_ENV == DEV_MODE else False) 21 | APP_DEV = IS_DEV = APP_ENV == DEV_MODE 22 | APP_PROD = IS_PROD = APP_ENV == PROD_MODE 23 | APP_TEST = IS_TEST = APP_ENV == TEST_MODE 24 | 25 | # redis host 26 | REDIS_HOST = env.str('REDIS_HOST', '127.0.0.1') 27 | # redis port 28 | REDIS_PORT = env.int('REDIS_PORT', 6379) 29 | # redis password, if no password, set it to None 30 | REDIS_PASSWORD = env.str('REDIS_PASSWORD', None) 31 | # redis db, if no choice, set it to 0 32 | REDIS_DB = env.int('REDIS_DB', 0) 33 | # redis connection string, like redis://[password]@host:port or rediss://[password]@host:port/0 34 | REDIS_CONNECTION_STRING = env.str('REDIS_CONNECTION_STRING', None) 35 | 36 | if REDIS_CONNECTION_STRING: 37 | REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, REDIS_DB = parse_redis_connection_string(REDIS_CONNECTION_STRING) 38 | 39 | # redis hash table key name 40 | REDIS_ACCOUNT_KEY = env.str('REDIS_ACCOUNT_KEY', 'accounts:%s') 41 | REDIS_CREDENTIAL_KEY = env.str('REDIS_CREDENTIAL_KEY', 'credential:%s') 42 | 43 | # integrated generator 44 | GENERATOR_MAP = { 45 | 'antispider6': 'Antispider6Generator', 46 | 'antispider7': 'Antispider7Generator' 47 | } 48 | 49 | # integrated tester 50 | TESTER_MAP = { 51 | 'antispider6': 'Antispider6Tester', 52 | 'antispider7': 'Antispider7Tester', 53 | } 54 | 55 | # definition of tester cycle, it will test every CYCLE_TESTER second 56 | CYCLE_TESTER = env.int('CYCLE_TESTER', 600) 57 | # definition of getter cycle, it will get proxy every CYCLE_GENERATOR second 58 | CYCLE_GENERATOR = env.int('CYCLE_GENERATOR', 600) 59 | GET_TIMEOUT = env.int('GET_TIMEOUT', 10) 60 | 61 | # definition of tester 62 | TEST_URL = env.str('TEST_URL', 'http://www.baidu.com') 63 | TEST_TIMEOUT = env.int('TEST_TIMEOUT', 10) 64 | TEST_BATCH = env.int('TEST_BATCH', 20) 65 | # test url 66 | TEST_URL_MAP = { 67 | 'antispider6': 'https://antispider6.scrape.center/', 68 | 'antispider7': 'https://antispider7.scrape.center/' 69 | } 70 | 71 | # definition of api 72 | API_HOST = env.str('API_HOST', '0.0.0.0') 73 | API_PORT = env.int('API_PORT', 6789) 74 | API_THREADED = env.bool('API_THREADED', True) 75 | 76 | # flags of enable 77 | ENABLE_TESTER = env.bool('ENABLE_TESTER', True) 78 | ENABLE_GENERATOR = env.bool('ENABLE_GENERATOR', True) 79 | ENABLE_SERVER = env.bool('ENABLE_SERVER', True) 80 | 81 | logger.add(env.str('LOG_RUNTIME_FILE', join(LOG_DIR, 'runtime.log')), level='DEBUG', rotation='1 week', 82 | retention='20 days') 83 | logger.add(env.str('LOG_ERROR_FILE', join(LOG_DIR, 'error.log')), level='ERROR', rotation='1 week') 84 | -------------------------------------------------------------------------------- /accountpool/processors/generator.py: -------------------------------------------------------------------------------- 1 | from accountpool.exceptions.init import InitException 2 | from accountpool.storages.redis import RedisClient 3 | from loguru import logger 4 | 5 | 6 | class BaseGenerator(object): 7 | def __init__(self, website=None): 8 | """ 9 | init base generator 10 | :param website: name of website 11 | """ 12 | self.website = website 13 | if not self.website: 14 | raise InitException 15 | self.account_operator = RedisClient(type='account', website=self.website) 16 | self.credential_operator = RedisClient(type='credential', website=self.website) 17 | 18 | def generate(self, username, password): 19 | """ 20 | generate method 21 | :param username: username 22 | :param password: password 23 | :return: 24 | """ 25 | raise NotImplementedError 26 | 27 | def init(self): 28 | """ 29 | do init 30 | """ 31 | pass 32 | 33 | def run(self): 34 | """ 35 | run main process 36 | :return: 37 | """ 38 | self.init() 39 | logger.debug('start to run generator') 40 | for username, password in self.account_operator.all().items(): 41 | if self.credential_operator.get(username): 42 | continue 43 | logger.debug(f'start to generate credential of {username}') 44 | self.generate(username, password) 45 | 46 | 47 | import requests 48 | 49 | 50 | class Antispider6Generator(BaseGenerator): 51 | 52 | def init(self): 53 | """ 54 | do init 55 | """ 56 | if self.account_operator.count() == 0: 57 | self.account_operator.set('admin', 'admin') 58 | self.account_operator.set('admin2', 'admin2') 59 | 60 | def generate(self, username, password): 61 | """ 62 | generate main process 63 | """ 64 | if self.credential_operator.get(username): 65 | logger.debug(f'credential of {username} exists, skip') 66 | return 67 | login_url = 'https://antispider6.scrape.center/login' 68 | s = requests.Session() 69 | s.post(login_url, data={ 70 | 'username': username, 71 | 'password': password 72 | }) 73 | result = [] 74 | for cookie in s.cookies: 75 | print(cookie.name, cookie.value) 76 | result.append(f'{cookie.name}={cookie.value}') 77 | result = ';'.join(result) 78 | logger.debug(f'get credential {result}') 79 | self.credential_operator.set(username, result) 80 | 81 | 82 | class Antispider7Generator(BaseGenerator): 83 | 84 | MAX_COUNT = 100 85 | 86 | def init(self): 87 | """ 88 | do init 89 | """ 90 | for i in range(1, self.MAX_COUNT + 1): 91 | self.account_operator.set(f'admin{i}', f'admin{i}') 92 | 93 | def generate(self, username, password): 94 | """ 95 | generate main process 96 | """ 97 | if self.credential_operator.get(username): 98 | logger.debug(f'credential of {username} exists, skip') 99 | return 100 | login_url = 'https://antispider7.scrape.center/api/login' 101 | s = requests.Session() 102 | r = s.post(login_url, json={ 103 | 'username': username, 104 | 'password': password 105 | }) 106 | if r.status_code != 200: 107 | logger.error(f'error occurred while generating credential of {username}, error code {r.status_code}') 108 | return 109 | token = r.json().get('token') 110 | logger.debug(f'get credential {token}') 111 | self.credential_operator.set(username, token) 112 | -------------------------------------------------------------------------------- /accountpool/scheduler.py: -------------------------------------------------------------------------------- 1 | import time 2 | import multiprocessing 3 | from accountpool.processors.server import app 4 | from accountpool.processors import generator as generators 5 | from accountpool.processors import tester as testers 6 | from accountpool.setting import CYCLE_GENERATOR, CYCLE_TESTER, API_HOST, API_THREADED, API_PORT, ENABLE_SERVER, \ 7 | ENABLE_GENERATOR, ENABLE_TESTER, IS_WINDOWS, TESTER_MAP, GENERATOR_MAP 8 | from loguru import logger 9 | 10 | if IS_WINDOWS: 11 | multiprocessing.freeze_support() 12 | 13 | tester_process, generator_process, server_process = None, None, None 14 | 15 | 16 | class Scheduler(object): 17 | """ 18 | scheduler 19 | """ 20 | 21 | def run_tester(self, website, cycle=CYCLE_TESTER): 22 | """ 23 | run tester 24 | """ 25 | if not ENABLE_TESTER: 26 | logger.info('tester not enabled, exit') 27 | return 28 | tester = getattr(testers, TESTER_MAP[website])(website) 29 | loop = 0 30 | while True: 31 | logger.debug(f'tester loop {loop} start...') 32 | tester.run() 33 | loop += 1 34 | time.sleep(cycle) 35 | 36 | def run_generator(self, website, cycle=CYCLE_GENERATOR): 37 | """ 38 | run getter 39 | """ 40 | if not ENABLE_GENERATOR: 41 | logger.info('getter not enabled, exit') 42 | return 43 | generator = getattr(generators, GENERATOR_MAP[website])(website) 44 | loop = 0 45 | while True: 46 | logger.debug(f'getter loop {loop} start...') 47 | generator.run() 48 | loop += 1 49 | time.sleep(cycle) 50 | 51 | def run_server(self, _): 52 | """ 53 | run server for api 54 | """ 55 | if not ENABLE_SERVER: 56 | logger.info('server not enabled, exit') 57 | return 58 | app.run(host=API_HOST, port=API_PORT, threaded=API_THREADED) 59 | 60 | def run(self, website): 61 | global tester_process, generator_process, server_process 62 | try: 63 | logger.info(f'starting account pool for website {website}...') 64 | if ENABLE_TESTER: 65 | tester_process = multiprocessing.Process(target=self.run_tester, args=(website,)) 66 | logger.info(f'starting tester, pid {tester_process.pid}...') 67 | tester_process.start() 68 | 69 | if ENABLE_GENERATOR: 70 | generator_process = multiprocessing.Process(target=self.run_generator, args=(website,)) 71 | logger.info(f'starting getter, pid{generator_process.pid}...') 72 | generator_process.start() 73 | 74 | if ENABLE_SERVER: 75 | server_process = multiprocessing.Process(target=self.run_server, args=(website,)) 76 | logger.info(f'starting server, pid{server_process.pid}...') 77 | server_process.start() 78 | 79 | tester_process.join() 80 | generator_process.join() 81 | server_process.join() 82 | except KeyboardInterrupt: 83 | logger.info('received keyboard interrupt signal') 84 | tester_process.terminate() 85 | generator_process.terminate() 86 | server_process.terminate() 87 | finally: 88 | # must call join method before calling is_alive 89 | tester_process.join() 90 | generator_process.join() 91 | server_process.join() 92 | logger.info(f'tester is {"alive" if tester_process.is_alive() else "dead"}') 93 | logger.info(f'getter is {"alive" if generator_process.is_alive() else "dead"}') 94 | logger.info(f'server is {"alive" if server_process.is_alive() else "dead"}') 95 | logger.info('accountpool terminated') 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AccountPool 2 | 3 | ![build](https://github.com/Python3WebSpider/AccountPool/workflows/build/badge.svg) 4 | ![](https://img.shields.io/badge/python-3.6%2B-brightgreen) 5 | ![Docker Pulls](https://img.shields.io/docker/pulls/germey/accountpool) 6 | 7 | 简易高效的账号池,提供如下功能: 8 | 9 | - 定时模拟登录账号,将 Cookies 或 JWT 等信息存储到 Redis 数据库。 10 | - 定时测试,剔除不可用 Cookies 或 JWT。 11 | - 提供 API,随机取用测试通过的可用 Cookies 或 JWT。 12 | 13 | ## 使用要求 14 | 15 | 可以通过两种方式来运行账号池,一种方式是使用 Docker(推荐),另一种方式是常规方式运行。 16 | 17 | ### Docker 18 | 19 | 如果使用 Docker,则需要安装如下环境: 20 | 21 | - Docker 22 | - Docker-Compose 23 | 24 | ### 常规方式 25 | 26 | 常规方式要求有 Python 环境、Redis 环境,具体要求如下: 27 | 28 | - Python>=3.6 29 | - Redis 30 | 31 | ## Docker 运行 32 | 33 | 如果安装好了 Docker 和 Docker-Compose,只需要一条命令即可运行。 34 | 35 | ```shell script 36 | docker-compose up 37 | ``` 38 | 39 | 运行结果类似如下: 40 | 41 | ``` 42 | redis4accountpool is up-to-date 43 | Recreating accountpool ... done 44 | Attaching to redis4accountpool, accountpool 45 | redis4accountpool | 1:C 31 Aug 2023 03:53:10.335 * oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo 46 | redis4accountpool | 1:C 31 Aug 2023 03:53:10.335 * Redis version=7.2.0, bits=64, commit=00000000, modified=0, pid=1, just started 47 | redis4accountpool | 1:C 31 Aug 2023 03:53:10.335 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf 48 | redis4accountpool | 1:M 31 Aug 2023 03:53:10.335 * monotonic clock: POSIX clock_gettime 49 | redis4accountpool | 1:M 31 Aug 2023 03:53:10.336 * Running mode=standalone, port=6379. 50 | redis4accountpool | 1:M 31 Aug 2023 03:53:10.336 * Server initialized 51 | redis4accountpool | 1:M 31 Aug 2023 03:53:10.336 * Ready to accept connections tcp 52 | redis4accountpool | 1:C 31 Aug 2023 04:03:11.226 * oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo 53 | redis4accountpool | 1:C 31 Aug 2023 04:03:11.226 * Redis version=7.2.0, bits=64, commit=00000000, modified=0, pid=1, just started 54 | redis4accountpool | 1:C 31 Aug 2023 04:03:11.226 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf 55 | redis4accountpool | 1:M 31 Aug 2023 04:03:11.226 * monotonic clock: POSIX clock_gettime 56 | redis4accountpool | 1:M 31 Aug 2023 04:03:11.227 * Running mode=standalone, port=6379. 57 | redis4accountpool | 1:M 31 Aug 2023 04:03:11.227 * Server initialized 58 | redis4accountpool | 1:M 31 Aug 2023 04:03:11.227 * Ready to accept connections tcp 59 | accountpool | 2023-08-31 04:06:20,737 CRIT Supervisor is running as root. Privileges were not dropped because no user is specified in the config file. If you intend to run as root, you can set user=root in the config file to avoid this message. 60 | accountpool | 2023-08-31 04:06:20,739 INFO supervisord started with pid 1 61 | accountpool | 2023-08-31 04:06:21,742 INFO spawned: 'generator' with pid 10 62 | accountpool | 2023-08-31 04:06:21,744 INFO spawned: 'server' with pid 11 63 | accountpool | 2023-08-31 04:06:21,746 INFO spawned: 'tester' with pid 12 64 | accountpool | 2023-08-31 04:06:21.990 | DEBUG | accountpool.scheduler:run_tester:31 - tester loop 0 start... 65 | accountpool | 2023-08-31 04:06:21.990 | DEBUG | accountpool.scheduler:run_generator:46 - getter loop 0 start... 66 | accountpool | * Running on all addresses. 67 | accountpool | WARNING: This is a development server. Do not use it in a production deployment. 68 | accountpool | * Running on http://172.24.0.3:6777/ (Press CTRL+C to quit) 69 | accountpool | 2023-08-31 04:06:22.004 | DEBUG | accountpool.processors.generator:run:39 - start to run generator 70 | accountpool | 2023-08-31 04:06:22.005 | DEBUG | accountpool.processors.generator:run:43 - start to generate credential of admin1 71 | accountpool | 2023-08-31 04:06:23,007 INFO success: generator entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 72 | accountpool | 2023-08-31 04:06:23,007 INFO success: server entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 73 | accountpool | 2023-08-31 04:06:23,007 INFO success: tester entered RUNNING state, process has stayed up for > than 1 seconds (startsecs) 74 | ``` 75 | 76 | 可以看到 Redis、Generator、Server、Tester 都已经启动成功。 77 | 78 | 另外还需要导入一些账号信息到 Redis 数据库里面,由于已经用 Docker 启动了 Redis 数据库,运行在 6333 端口上。 79 | 80 | 这时候可以执行脚本: 81 | 82 | ``` 83 | export REDIS_PORT=6333 84 | python3 importer.py antispider7 85 | ``` 86 | 87 | 运行完成之后如果没有报错就说明账号导入成功了,可以自行连上 Redis 看下。 88 | 89 | 过一会访问 [http://localhost:6777/antispider7/random](http://localhost:6777/antispider7/random) 即可获取一个 [antispider7](https://antispider7.scrape.center) 的随机可用 Cookies。 90 | 91 | ## 常规方式运行 92 | 93 | 如果不使用 Docker 运行,配置好 Python、Redis 环境之后也可运行,步骤如下。 94 | 95 | ### 安装和配置 Redis 96 | 97 | 本地安装 Redis、Docker 启动 Redis、远程 Redis 都是可以的,只要能正常连接使用即可。 98 | 99 | 首先可以需要一下环境变量,代理池会通过环境变量读取这些值。 100 | 101 | 设置 Redis 的环境变量有两种方式,一种是分别设置 host、port、password,另一种是设置连接字符串,设置方法分别如下: 102 | 103 | 设置 host、port、password,如果 password 为空可以设置为空字符串,示例如下: 104 | 105 | ```shell script 106 | export REDIS_HOST='localhost' 107 | export REDIS_PORT=6379 108 | export REDIS_PASSWORD='' 109 | export REDIS_DB=0 110 | ``` 111 | 112 | 或者只设置连接字符串: 113 | 114 | ```shell script 115 | export REDIS_CONNECTION_STRING='redis://[password]@host:port/db' 116 | ``` 117 | 118 | 如果没有密码也要设置为: 119 | 120 | ```shell script 121 | export REDIS_CONNECTION_STRING='redis://@host:port/db' 122 | ``` 123 | 124 | 这里连接字符串的格式需要符合 `redis://[password]@host:port/db` 的格式,注意不要遗漏 `@`。 125 | 126 | 以上两种设置任选其一即可。 127 | 128 | ### 安装依赖包 129 | 130 | 这里强烈推荐使用 [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) 131 | 或 [virtualenv](https://virtualenv.pypa.io/en/latest/user_guide.html) 创建虚拟环境,Python 版本不低于 3.6。 132 | 133 | 然后 pip 安装依赖即可: 134 | 135 | ```shell script 136 | pip3 install -r requirements.txt 137 | ``` 138 | 139 | ### 运行代理池 140 | 141 | 两种方式运行账号池,一种是 Tester、Generator、Server 全部运行,另一种是按需分别运行。 142 | 143 | 一般来说可以选择全部运行,命令如下: 144 | 145 | ```shell script 146 | python3 run.py 147 | ``` 148 | 149 | 运行之后会启动 Tester、Generator、Server,这时访问 [http://localhost:6777//random](http://localhost:6777//random) 即可获取一个随机可用代理。 150 | 151 | 或者如果你弄清楚了账号池的架构,可以按需分别运行,命令如下: 152 | 153 | ```shell script 154 | python3 run.py --processor getter 155 | python3 run.py --processor tester 156 | python3 run.py --processor server 157 | ``` 158 | 159 | 这里 processor 可以指定运行 Tester、Generator 还是 Server。 160 | 161 | ## 可配置项 162 | 163 | 账号池可以通过设置环境变量来配置一些参数。 164 | 165 | ### 开关 166 | 167 | - ENABLE_TESTER:允许 Tester 启动,默认 true 168 | - ENABLE_GENERATOR:允许 Generator 启动,默认 true 169 | - ENABLE_SERVER:运行 Server 启动,默认 true 170 | 171 | ### 环境 172 | 173 | - APP_ENV:运行环境,可以设置 dev、test、prod,即开发、测试、生产环境,默认 dev 174 | - APP_DEBUG:调试模式,可以设置 true 或 false,默认 true 175 | 176 | ### Redis 连接 177 | 178 | - REDIS_HOST:Redis 的 Host 179 | - REDIS_PORT:Redis 的端口 180 | - REDIS_PASSWORD:Redis 的密码 181 | - REDIS_DB:Redis 的数据库索引,如 0、1 182 | - REDIS_CONNECTION_STRING:Redis 连接字符串 183 | - REDIS_KEY:Redis 储存代理使用字典的名称 184 | 185 | ### 处理器 186 | 187 | - CYCLE_TESTER:Tester 运行周期,即间隔多久运行一次测试,默认 20 秒 188 | - CYCLE_GETTER:Getter 运行周期,即间隔多久运行一次代理获取,默认 100 秒 189 | - API_HOST:代理 Server 运行 Host,默认 0.0.0.0 190 | - API_PORT:代理 Server 运行端口,默认 6777 191 | - API_THREADED:代理 Server 是否使用多线程,默认 true 192 | 193 | ### 日志 194 | 195 | - LOG_DIR:日志相对路径 196 | - LOG_RUNTIME_FILE:运行日志文件名称 197 | - LOG_ERROR_FILE:错误日志文件名称 198 | 199 | ## 部署 200 | 201 | 本项目提供了 Kubernetes 部署脚本,如需部署到 Kubernetes,执行如下命令即可: 202 | 203 | ```shell script 204 | cat deployment.yml | sed 's/\${TAG}/latest/g' | kubectl apply -f - 205 | ``` 206 | 207 | ## 待开发 208 | 209 | - [ ] 前端页面管理 210 | - [ ] 使用情况统计分析 211 | 212 | 如有一起开发的兴趣可以在 Issue 留言,非常感谢! 213 | 214 | ## LICENSE 215 | 216 | MIT 217 | --------------------------------------------------------------------------------