├── accountpool
├── __init__.py
├── storages
│ ├── __init__.py
│ └── redis.py
├── utils
│ ├── __init__.py
│ └── parse.py
├── exceptions
│ ├── __init__.py
│ └── init.py
├── processors
│ ├── __init__.py
│ ├── server.py
│ ├── tester.py
│ └── generator.py
├── setting.py
└── scheduler.py
├── Dockerfile
├── requirements.txt
├── importer.py
├── docker-compose.yml
├── run.py
├── .github
├── workflows
│ └── build.yml
└── ISSUE_TEMPLATE
│ └── bug_report.md
├── supervisord.conf
├── register.py
├── .gitignore
├── deployment.yml
└── README.md
/accountpool/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/accountpool/storages/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/accountpool/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/accountpool/exceptions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/accountpool/processors/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.6
2 | WORKDIR /app
3 | COPY requirements.txt .
4 | RUN pip install -r requirements.txt
5 | COPY . .
6 | CMD ["supervisord", "-c", "supervisord.conf"]
7 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.13.0
2 | selenium==3.4.0
3 | redis==2.10.5
4 | Flask==1.1.4
5 | environs==7.2.0
6 | loguru==0.3.2
7 | supervisor==4.1.0
8 | MarkupSafe==2.0.1
9 |
--------------------------------------------------------------------------------
/accountpool/exceptions/init.py:
--------------------------------------------------------------------------------
1 | class InitException(Exception):
2 | def __str__(self):
3 | """
4 | init error
5 | :return:
6 | """
7 | return repr('init failed')
8 |
--------------------------------------------------------------------------------
/importer.py:
--------------------------------------------------------------------------------
1 | from accountpool.storages.redis import RedisClient
2 | import argparse
3 |
4 | parser = argparse.ArgumentParser(description='AccountPool')
5 | parser.add_argument('website', type=str, help='website')
6 | args = parser.parse_args()
7 | website = args.website
8 |
9 | conn = RedisClient('account', args.website)
10 | start = 1
11 | end = 100
12 | for i in range(start, end + 1):
13 | username = password = f'admin{i}'
14 | conn.set(username, password)
15 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | redis4accountpool:
4 | image: redis:alpine
5 | container_name: redis4accountpool
6 | command: redis-server
7 | ports:
8 | - "6333:6379"
9 | accountpool:
10 | build: .
11 | image: 'germey/accountpool'
12 | container_name: accountpool
13 | ports:
14 | - "6777:6777"
15 | environment:
16 | REDIS_HOST: redis4accountpool
17 | REDIS_PORT: "6379"
18 | API_PORT: "6777"
19 | WEBSITE: antispider7
--------------------------------------------------------------------------------
/accountpool/utils/parse.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | def parse_redis_connection_string(connection_string):
4 | """
5 | parse a redis connection string, for example:
6 | redis://[password]@host:port
7 | rediss://[password]@host:port
8 | :param connection_string:
9 | :return:
10 | """
11 | result = re.match('rediss?:\/\/(.*?)@(.*?):(\d+)\/(\d+)', connection_string)
12 | return result.group(2), int(result.group(3)), (result.group(1) or None), (result.group(4) or 0) if result \
13 | else ('localhost', 6379, None)
14 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | from accountpool.scheduler import Scheduler
2 | import argparse
3 |
4 | parser = argparse.ArgumentParser(description='AccountPool')
5 | parser.add_argument('website', type=str, help='website')
6 | parser.add_argument('--processor', type=str, help='processor to run')
7 | args = parser.parse_args()
8 | website = args.website
9 |
10 | if __name__ == '__main__':
11 | # if processor set, just run it
12 | if args.processor:
13 | getattr(Scheduler(), f'run_{args.processor}')(website)
14 | else:
15 | Scheduler().run(website)
16 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 | on:
3 | push:
4 | branches:
5 | - master
6 | paths-ignore:
7 | - .gitignore
8 | - README.md
9 | - '.github/ISSUE_TEMPLATE/**'
10 | jobs:
11 | build:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - name: Checkout Source
15 | uses: actions/checkout@v1
16 | - name: Docker Login
17 | run: docker login -u germey -p ${{ secrets.DOCKERHUB_LOGIN_PASSWORD }}
18 | - name: Build the Docker Image
19 | run: docker-compose build
20 | - name: Tag and Push Master Version
21 | run: |
22 | docker tag germey/accountpool germey/accountpool:master
23 | docker push germey/accountpool:master
24 |
25 |
--------------------------------------------------------------------------------
/supervisord.conf:
--------------------------------------------------------------------------------
1 | [supervisord]
2 | nodaemon=true
3 |
4 | [program:tester]
5 | process_name=tester
6 | command=python3 run.py %(ENV_WEBSITE)s --processor tester
7 | directory=/app
8 | stdout_logfile=/dev/stdout
9 | stdout_logfile_maxbytes=0
10 | stderr_logfile=/dev/stderr
11 | stderr_logfile_maxbytes=0
12 |
13 | [program:generator]
14 | process_name=generator
15 | command=python3 run.py %(ENV_WEBSITE)s --processor generator
16 | directory=/app
17 | stdout_logfile=/dev/stdout
18 | stdout_logfile_maxbytes=0
19 | stderr_logfile=/dev/stderr
20 | stderr_logfile_maxbytes=0
21 |
22 | [program:server]
23 | process_name=server
24 | command=python3 run.py %(ENV_WEBSITE)s --processor server
25 | directory=/app
26 | stdout_logfile=/dev/stdout
27 | stdout_logfile_maxbytes=0
28 | stderr_logfile=/dev/stderr
29 | stderr_logfile_maxbytes=0
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: bug
6 | assignees: Germey
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Environments (please complete the following information):**
27 | - OS: [e.g. macOS 10.15.2]
28 | - Python [e.g. Python 3.6]
29 | - Browser [e.g. Chrome 67 ]
30 |
31 | **Additional context**
32 | Add any other context about the problem here.
33 |
--------------------------------------------------------------------------------
/register.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from acinonyx import run
3 | import requests
4 | from loguru import logger
5 |
6 | # This is a script for registering account for antispider7, using acinonyx to accelerate.
7 |
8 | parser = argparse.ArgumentParser(description='AccountPool')
9 | parser.add_argument('website', type=str, help='website')
10 | args = parser.parse_args()
11 | website = args.website
12 |
13 |
14 | @logger.catch()
15 | def register(username, password):
16 | logger.debug(f'register using {username} and {password}')
17 | response = requests.post(f'https://{website}.scrape.center/api/register', json={
18 | 'username': username,
19 | 'password': password
20 | })
21 | print(response.json())
22 |
23 |
24 | if __name__ == '__main__':
25 | accounts = []
26 | for index in range(1, 1000):
27 | accounts.append((f'admin{index}', f'admin{index}'))
28 | run(register, accounts)
29 |
--------------------------------------------------------------------------------
/accountpool/processors/server.py:
--------------------------------------------------------------------------------
1 | import json
2 | from flask import Flask, g
3 | from accountpool.storages.redis import RedisClient
4 | from accountpool.setting import GENERATOR_MAP
5 | from loguru import logger
6 |
7 | __all__ = ['app']
8 |
9 | app = Flask(__name__)
10 |
11 | account = 'account'
12 | credential = 'credential'
13 |
14 |
15 | @app.route('/')
16 | def index():
17 | return '
Welcome to Account Pool System
'
18 |
19 |
20 | def get_conn():
21 | """
22 | get connection
23 | :return:
24 | """
25 | for website in GENERATOR_MAP:
26 | if not hasattr(g, website):
27 | setattr(g, f'{website}_{credential}', RedisClient(credential, website))
28 | setattr(g, f'{website}_{account}', RedisClient(account, website))
29 | return g
30 |
31 |
32 | @app.route('//random')
33 | def random(website):
34 | """
35 | ger random credential /weibo/random
36 | :return: random credential
37 | """
38 | g = get_conn()
39 | result = getattr(g, f'{website}_{credential}').random()
40 | logger.debug(f'get credential {result}')
41 | return result
42 |
43 |
44 | @app.route('//add//')
45 | def add(website, username, password):
46 | """
47 | add account /weibo/add/user/password
48 | :param website: website
49 | :param username: username
50 | :param password: password
51 | :return:
52 | """
53 | g = get_conn()
54 | getattr(g, f'{website}_{account}').set(username, password)
55 | return json.dumps({'status': '1'})
56 |
57 |
58 | @app.route('//count')
59 | def count(website):
60 | """
61 | get count of credential
62 | """
63 | g = get_conn()
64 | count = getattr(g, f'{website}_{credential}').count()
65 | return json.dumps({'status': 'ok', 'count': count})
66 |
67 |
68 | if __name__ == '__main__':
69 | app.run(host='0.0.0.0')
70 |
--------------------------------------------------------------------------------
/accountpool/storages/redis.py:
--------------------------------------------------------------------------------
1 | import random
2 | import redis
3 | from accountpool.setting import *
4 |
5 |
6 | class RedisClient(object):
7 | """
8 | redis client
9 | """
10 |
11 | def __init__(self, type, website, host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD):
12 | """
13 | init redis client
14 | :param host: redis host
15 | :param port: redis port
16 | :param password: redis password
17 | """
18 | self.db = redis.StrictRedis(host=host, port=port, password=password, decode_responses=True)
19 | self.type = type
20 | self.website = website
21 |
22 | def name(self):
23 | """
24 | get hash name
25 | :return: name of hash
26 | """
27 | return f'{self.type}:{self.website}'
28 |
29 | def set(self, username, value):
30 | """
31 | set key-value
32 | :param username: username
33 | :param value: password or cookies
34 | :return:
35 | """
36 | return self.db.hset(self.name(), username, value)
37 |
38 | def get(self, username):
39 | """
40 | get value
41 | :param username: username
42 | :return:
43 | """
44 | return self.db.hget(self.name(), username)
45 |
46 | def delete(self, username):
47 | """
48 | delete key-value
49 | :param username: username
50 | :return: result
51 | """
52 | return self.db.hdel(self.name(), username)
53 |
54 | def count(self):
55 | """
56 | get count
57 | :return: count
58 | """
59 | return self.db.hlen(self.name())
60 |
61 | def random(self):
62 | """
63 | get random cookies or password
64 | :return: random cookies or password
65 | """
66 | return random.choice(self.db.hvals(self.name()))
67 |
68 | def usernames(self):
69 | """
70 | get all usernames
71 | :return: all usernames
72 | """
73 | return self.db.hkeys(self.name())
74 |
75 | def all(self):
76 | """
77 | get all key-values
78 | :return: map of key-values
79 | """
80 | return self.db.hgetall(self.name())
81 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 | *.pyc
3 | ghostdriver.log
4 | ### Python template
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | .pybuilder/
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | # For a library or package, you might want to ignore these files since the code is
91 | # intended to run in multiple environments; otherwise, check them in:
92 | # .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 |
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 |
108 | # SageMath parsed files
109 | *.sage.py
110 |
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 |
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 |
124 | # Rope project settings
125 | .ropeproject
126 |
127 | # mkdocs documentation
128 | /site
129 |
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 |
135 | # Pyre type checker
136 | .pyre/
137 |
138 | # pytype static type analyzer
139 | .pytype/
140 |
141 | # Cython debug symbols
142 | cython_debug/
143 |
144 | logs/
--------------------------------------------------------------------------------
/accountpool/processors/tester.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | from requests.exceptions import ConnectionError
4 | from accountpool.storages.redis import *
5 | from accountpool.exceptions.init import InitException
6 | from loguru import logger
7 |
8 |
9 | class BaseTester(object):
10 | """
11 | base tester
12 | """
13 |
14 | def __init__(self, website=None):
15 | """
16 | init base tester
17 | """
18 | self.website = website
19 | if not self.website:
20 | raise InitException
21 | self.account_operator = RedisClient(type='account', website=self.website)
22 | self.credential_operator = RedisClient(type='credential', website=self.website)
23 |
24 | def test(self, username, credential):
25 | """
26 | test single credential
27 | """
28 | raise NotImplementedError
29 |
30 | def run(self):
31 | """
32 | test all credentials
33 | """
34 | credentials = self.credential_operator.all()
35 | for username, credential in credentials.items():
36 | self.test(username, credential)
37 |
38 |
39 | class Antispider6Tester(BaseTester):
40 | """
41 | tester for antispider6
42 | """
43 |
44 | def __init__(self, website=None):
45 | BaseTester.__init__(self, website)
46 |
47 | def test(self, username, credential):
48 | """
49 | test single credential
50 | """
51 | logger.info(f'testing credential for {username}')
52 | try:
53 | test_url = TEST_URL_MAP[self.website]
54 | response = requests.get(test_url, headers={
55 | 'Cookie': credential
56 | }, timeout=5, allow_redirects=False)
57 | if response.status_code == 200:
58 | logger.info('credential is valid')
59 | else:
60 | logger.info('credential is not valid, delete it')
61 | self.credential_operator.delete(username)
62 | except ConnectionError:
63 | logger.info('test failed')
64 |
65 |
66 | class Antispider7Tester(BaseTester):
67 | """
68 | tester for antispider7
69 | """
70 |
71 | def __init__(self, website=None):
72 | BaseTester.__init__(self, website)
73 |
74 | def test(self, username, credential):
75 | """
76 | test single credential
77 | """
78 | logger.info(f'testing credential for {username}')
79 | try:
80 | test_url = TEST_URL_MAP[self.website]
81 | response = requests.get(test_url, headers={
82 | 'authorization': f'jwt {credential}'
83 | }, timeout=5, allow_redirects=False)
84 | if response.status_code == 200:
85 | logger.info('credential is valid')
86 | else:
87 | logger.info('credential is not valid, delete it')
88 | self.credential_operator.delete(username)
89 | except ConnectionError:
90 | logger.info('test failed')
91 |
--------------------------------------------------------------------------------
/deployment.yml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 | creationTimestamp: null
5 | name: accountpool
6 | ---
7 | apiVersion: v1
8 | kind: PersistentVolumeClaim
9 | metadata:
10 | name: accountpool
11 | namespace: accountpool
12 | spec:
13 | storageClassName: azure-file
14 | accessModes:
15 | - ReadWriteMany
16 | resources:
17 | requests:
18 | storage: 2Gi
19 | ---
20 | apiVersion: v1
21 | items:
22 | - apiVersion: v1
23 | kind: Service
24 | metadata:
25 | annotations:
26 | kompose.cmd: kompose convert -f docker-compose.yml -o deployment.yml
27 | kompose.version: 1.20.0 ()
28 | creationTimestamp: null
29 | labels:
30 | io.kompose.service: accountpool
31 | name: accountpool
32 | namespace: accountpool
33 | spec:
34 | ports:
35 | - name: "6777"
36 | port: 6777
37 | targetPort: 6777
38 | selector:
39 | io.kompose.service: accountpool
40 | status:
41 | loadBalancer: {}
42 | - apiVersion: apps/v1
43 | kind: Deployment
44 | metadata:
45 | annotations:
46 | kompose.cmd: kompose convert -f docker-compose.yml -o deployment.yml
47 | kompose.version: 1.20.0 ()
48 | creationTimestamp: null
49 | labels:
50 | io.kompose.service: accountpool
51 | name: accountpool
52 | namespace: accountpool
53 | spec:
54 | replicas: 2
55 | revisionHistoryLimit: 1
56 | strategy: {}
57 | selector:
58 | matchLabels:
59 | io.kompose.service: accountpool
60 | template:
61 | metadata:
62 | annotations:
63 | kompose.cmd: kompose convert -f docker-compose.yml -o deployment.yml
64 | kompose.version: 1.20.0 ()
65 | creationTimestamp: null
66 | labels:
67 | io.kompose.service: accountpool
68 | spec:
69 | containers:
70 | - env:
71 | - name: REDIS_CONNECTION_STRING
72 | valueFrom:
73 | secretKeyRef:
74 | name: redis
75 | key: connection_string
76 | - name: REDIS_PORT
77 | value: '6379'
78 | image: germey/accountpool:${TAG}
79 | name: accountpool
80 | resources:
81 | limits:
82 | memory: "500Mi"
83 | cpu: "300m"
84 | requests:
85 | memory: "500Mi"
86 | cpu: "300m"
87 | ports:
88 | - containerPort: 6777
89 | volumeMounts:
90 | - mountPath: "/app/accountpool/logs"
91 | name: accountpool
92 | restartPolicy: Always
93 | volumes:
94 | - name: accountpool
95 | persistentVolumeClaim:
96 | claimName: pvc-accountpool
97 | status: {}
98 | kind: List
99 | metadata: {}
100 |
--------------------------------------------------------------------------------
/accountpool/setting.py:
--------------------------------------------------------------------------------
1 | import platform
2 | from os.path import dirname, abspath, join
3 | from environs import Env
4 | from loguru import logger
5 | from accountpool.utils.parse import parse_redis_connection_string
6 |
7 | env = Env()
8 | env.read_env()
9 |
10 | # definition of flags
11 | IS_WINDOWS = platform.system().lower() == 'windows'
12 |
13 | # definition of dirs
14 | ROOT_DIR = dirname(dirname(abspath(__file__)))
15 | LOG_DIR = join(ROOT_DIR, env.str('LOG_DIR', 'logs'))
16 |
17 | # definition of environments
18 | DEV_MODE, TEST_MODE, PROD_MODE = 'dev', 'test', 'prod'
19 | APP_ENV = env.str('APP_ENV', DEV_MODE).lower()
20 | APP_DEBUG = env.bool('APP_DEBUG', True if APP_ENV == DEV_MODE else False)
21 | APP_DEV = IS_DEV = APP_ENV == DEV_MODE
22 | APP_PROD = IS_PROD = APP_ENV == PROD_MODE
23 | APP_TEST = IS_TEST = APP_ENV == TEST_MODE
24 |
25 | # redis host
26 | REDIS_HOST = env.str('REDIS_HOST', '127.0.0.1')
27 | # redis port
28 | REDIS_PORT = env.int('REDIS_PORT', 6379)
29 | # redis password, if no password, set it to None
30 | REDIS_PASSWORD = env.str('REDIS_PASSWORD', None)
31 | # redis db, if no choice, set it to 0
32 | REDIS_DB = env.int('REDIS_DB', 0)
33 | # redis connection string, like redis://[password]@host:port or rediss://[password]@host:port/0
34 | REDIS_CONNECTION_STRING = env.str('REDIS_CONNECTION_STRING', None)
35 |
36 | if REDIS_CONNECTION_STRING:
37 | REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, REDIS_DB = parse_redis_connection_string(REDIS_CONNECTION_STRING)
38 |
39 | # redis hash table key name
40 | REDIS_ACCOUNT_KEY = env.str('REDIS_ACCOUNT_KEY', 'accounts:%s')
41 | REDIS_CREDENTIAL_KEY = env.str('REDIS_CREDENTIAL_KEY', 'credential:%s')
42 |
43 | # integrated generator
44 | GENERATOR_MAP = {
45 | 'antispider6': 'Antispider6Generator',
46 | 'antispider7': 'Antispider7Generator'
47 | }
48 |
49 | # integrated tester
50 | TESTER_MAP = {
51 | 'antispider6': 'Antispider6Tester',
52 | 'antispider7': 'Antispider7Tester',
53 | }
54 |
55 | # definition of tester cycle, it will test every CYCLE_TESTER second
56 | CYCLE_TESTER = env.int('CYCLE_TESTER', 600)
57 | # definition of getter cycle, it will get proxy every CYCLE_GENERATOR second
58 | CYCLE_GENERATOR = env.int('CYCLE_GENERATOR', 600)
59 | GET_TIMEOUT = env.int('GET_TIMEOUT', 10)
60 |
61 | # definition of tester
62 | TEST_URL = env.str('TEST_URL', 'http://www.baidu.com')
63 | TEST_TIMEOUT = env.int('TEST_TIMEOUT', 10)
64 | TEST_BATCH = env.int('TEST_BATCH', 20)
65 | # test url
66 | TEST_URL_MAP = {
67 | 'antispider6': 'https://antispider6.scrape.center/',
68 | 'antispider7': 'https://antispider7.scrape.center/'
69 | }
70 |
71 | # definition of api
72 | API_HOST = env.str('API_HOST', '0.0.0.0')
73 | API_PORT = env.int('API_PORT', 6789)
74 | API_THREADED = env.bool('API_THREADED', True)
75 |
76 | # flags of enable
77 | ENABLE_TESTER = env.bool('ENABLE_TESTER', True)
78 | ENABLE_GENERATOR = env.bool('ENABLE_GENERATOR', True)
79 | ENABLE_SERVER = env.bool('ENABLE_SERVER', True)
80 |
81 | logger.add(env.str('LOG_RUNTIME_FILE', join(LOG_DIR, 'runtime.log')), level='DEBUG', rotation='1 week',
82 | retention='20 days')
83 | logger.add(env.str('LOG_ERROR_FILE', join(LOG_DIR, 'error.log')), level='ERROR', rotation='1 week')
84 |
--------------------------------------------------------------------------------
/accountpool/processors/generator.py:
--------------------------------------------------------------------------------
1 | from accountpool.exceptions.init import InitException
2 | from accountpool.storages.redis import RedisClient
3 | from loguru import logger
4 |
5 |
6 | class BaseGenerator(object):
7 | def __init__(self, website=None):
8 | """
9 | init base generator
10 | :param website: name of website
11 | """
12 | self.website = website
13 | if not self.website:
14 | raise InitException
15 | self.account_operator = RedisClient(type='account', website=self.website)
16 | self.credential_operator = RedisClient(type='credential', website=self.website)
17 |
18 | def generate(self, username, password):
19 | """
20 | generate method
21 | :param username: username
22 | :param password: password
23 | :return:
24 | """
25 | raise NotImplementedError
26 |
27 | def init(self):
28 | """
29 | do init
30 | """
31 | pass
32 |
33 | def run(self):
34 | """
35 | run main process
36 | :return:
37 | """
38 | self.init()
39 | logger.debug('start to run generator')
40 | for username, password in self.account_operator.all().items():
41 | if self.credential_operator.get(username):
42 | continue
43 | logger.debug(f'start to generate credential of {username}')
44 | self.generate(username, password)
45 |
46 |
47 | import requests
48 |
49 |
50 | class Antispider6Generator(BaseGenerator):
51 |
52 | def init(self):
53 | """
54 | do init
55 | """
56 | if self.account_operator.count() == 0:
57 | self.account_operator.set('admin', 'admin')
58 | self.account_operator.set('admin2', 'admin2')
59 |
60 | def generate(self, username, password):
61 | """
62 | generate main process
63 | """
64 | if self.credential_operator.get(username):
65 | logger.debug(f'credential of {username} exists, skip')
66 | return
67 | login_url = 'https://antispider6.scrape.center/login'
68 | s = requests.Session()
69 | s.post(login_url, data={
70 | 'username': username,
71 | 'password': password
72 | })
73 | result = []
74 | for cookie in s.cookies:
75 | print(cookie.name, cookie.value)
76 | result.append(f'{cookie.name}={cookie.value}')
77 | result = ';'.join(result)
78 | logger.debug(f'get credential {result}')
79 | self.credential_operator.set(username, result)
80 |
81 |
82 | class Antispider7Generator(BaseGenerator):
83 |
84 | MAX_COUNT = 100
85 |
86 | def init(self):
87 | """
88 | do init
89 | """
90 | for i in range(1, self.MAX_COUNT + 1):
91 | self.account_operator.set(f'admin{i}', f'admin{i}')
92 |
93 | def generate(self, username, password):
94 | """
95 | generate main process
96 | """
97 | if self.credential_operator.get(username):
98 | logger.debug(f'credential of {username} exists, skip')
99 | return
100 | login_url = 'https://antispider7.scrape.center/api/login'
101 | s = requests.Session()
102 | r = s.post(login_url, json={
103 | 'username': username,
104 | 'password': password
105 | })
106 | if r.status_code != 200:
107 | logger.error(f'error occurred while generating credential of {username}, error code {r.status_code}')
108 | return
109 | token = r.json().get('token')
110 | logger.debug(f'get credential {token}')
111 | self.credential_operator.set(username, token)
112 |
--------------------------------------------------------------------------------
/accountpool/scheduler.py:
--------------------------------------------------------------------------------
1 | import time
2 | import multiprocessing
3 | from accountpool.processors.server import app
4 | from accountpool.processors import generator as generators
5 | from accountpool.processors import tester as testers
6 | from accountpool.setting import CYCLE_GENERATOR, CYCLE_TESTER, API_HOST, API_THREADED, API_PORT, ENABLE_SERVER, \
7 | ENABLE_GENERATOR, ENABLE_TESTER, IS_WINDOWS, TESTER_MAP, GENERATOR_MAP
8 | from loguru import logger
9 |
10 | if IS_WINDOWS:
11 | multiprocessing.freeze_support()
12 |
13 | tester_process, generator_process, server_process = None, None, None
14 |
15 |
16 | class Scheduler(object):
17 | """
18 | scheduler
19 | """
20 |
21 | def run_tester(self, website, cycle=CYCLE_TESTER):
22 | """
23 | run tester
24 | """
25 | if not ENABLE_TESTER:
26 | logger.info('tester not enabled, exit')
27 | return
28 | tester = getattr(testers, TESTER_MAP[website])(website)
29 | loop = 0
30 | while True:
31 | logger.debug(f'tester loop {loop} start...')
32 | tester.run()
33 | loop += 1
34 | time.sleep(cycle)
35 |
36 | def run_generator(self, website, cycle=CYCLE_GENERATOR):
37 | """
38 | run getter
39 | """
40 | if not ENABLE_GENERATOR:
41 | logger.info('getter not enabled, exit')
42 | return
43 | generator = getattr(generators, GENERATOR_MAP[website])(website)
44 | loop = 0
45 | while True:
46 | logger.debug(f'getter loop {loop} start...')
47 | generator.run()
48 | loop += 1
49 | time.sleep(cycle)
50 |
51 | def run_server(self, _):
52 | """
53 | run server for api
54 | """
55 | if not ENABLE_SERVER:
56 | logger.info('server not enabled, exit')
57 | return
58 | app.run(host=API_HOST, port=API_PORT, threaded=API_THREADED)
59 |
60 | def run(self, website):
61 | global tester_process, generator_process, server_process
62 | try:
63 | logger.info(f'starting account pool for website {website}...')
64 | if ENABLE_TESTER:
65 | tester_process = multiprocessing.Process(target=self.run_tester, args=(website,))
66 | logger.info(f'starting tester, pid {tester_process.pid}...')
67 | tester_process.start()
68 |
69 | if ENABLE_GENERATOR:
70 | generator_process = multiprocessing.Process(target=self.run_generator, args=(website,))
71 | logger.info(f'starting getter, pid{generator_process.pid}...')
72 | generator_process.start()
73 |
74 | if ENABLE_SERVER:
75 | server_process = multiprocessing.Process(target=self.run_server, args=(website,))
76 | logger.info(f'starting server, pid{server_process.pid}...')
77 | server_process.start()
78 |
79 | tester_process.join()
80 | generator_process.join()
81 | server_process.join()
82 | except KeyboardInterrupt:
83 | logger.info('received keyboard interrupt signal')
84 | tester_process.terminate()
85 | generator_process.terminate()
86 | server_process.terminate()
87 | finally:
88 | # must call join method before calling is_alive
89 | tester_process.join()
90 | generator_process.join()
91 | server_process.join()
92 | logger.info(f'tester is {"alive" if tester_process.is_alive() else "dead"}')
93 | logger.info(f'getter is {"alive" if generator_process.is_alive() else "dead"}')
94 | logger.info(f'server is {"alive" if server_process.is_alive() else "dead"}')
95 | logger.info('accountpool terminated')
96 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AccountPool
2 |
3 | 
4 | 
5 | 
6 |
7 | 简易高效的账号池,提供如下功能:
8 |
9 | - 定时模拟登录账号,将 Cookies 或 JWT 等信息存储到 Redis 数据库。
10 | - 定时测试,剔除不可用 Cookies 或 JWT。
11 | - 提供 API,随机取用测试通过的可用 Cookies 或 JWT。
12 |
13 | ## 使用要求
14 |
15 | 可以通过两种方式来运行账号池,一种方式是使用 Docker(推荐),另一种方式是常规方式运行。
16 |
17 | ### Docker
18 |
19 | 如果使用 Docker,则需要安装如下环境:
20 |
21 | - Docker
22 | - Docker-Compose
23 |
24 | ### 常规方式
25 |
26 | 常规方式要求有 Python 环境、Redis 环境,具体要求如下:
27 |
28 | - Python>=3.6
29 | - Redis
30 |
31 | ## Docker 运行
32 |
33 | 如果安装好了 Docker 和 Docker-Compose,只需要一条命令即可运行。
34 |
35 | ```shell script
36 | docker-compose up
37 | ```
38 |
39 | 运行结果类似如下:
40 |
41 | ```
42 | redis4accountpool is up-to-date
43 | Recreating accountpool ... done
44 | Attaching to redis4accountpool, accountpool
45 | redis4accountpool | 1:C 31 Aug 2023 03:53:10.335 * oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
46 | redis4accountpool | 1:C 31 Aug 2023 03:53:10.335 * Redis version=7.2.0, bits=64, commit=00000000, modified=0, pid=1, just started
47 | redis4accountpool | 1:C 31 Aug 2023 03:53:10.335 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf
48 | redis4accountpool | 1:M 31 Aug 2023 03:53:10.335 * monotonic clock: POSIX clock_gettime
49 | redis4accountpool | 1:M 31 Aug 2023 03:53:10.336 * Running mode=standalone, port=6379.
50 | redis4accountpool | 1:M 31 Aug 2023 03:53:10.336 * Server initialized
51 | redis4accountpool | 1:M 31 Aug 2023 03:53:10.336 * Ready to accept connections tcp
52 | redis4accountpool | 1:C 31 Aug 2023 04:03:11.226 * oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
53 | redis4accountpool | 1:C 31 Aug 2023 04:03:11.226 * Redis version=7.2.0, bits=64, commit=00000000, modified=0, pid=1, just started
54 | redis4accountpool | 1:C 31 Aug 2023 04:03:11.226 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf
55 | redis4accountpool | 1:M 31 Aug 2023 04:03:11.226 * monotonic clock: POSIX clock_gettime
56 | redis4accountpool | 1:M 31 Aug 2023 04:03:11.227 * Running mode=standalone, port=6379.
57 | redis4accountpool | 1:M 31 Aug 2023 04:03:11.227 * Server initialized
58 | redis4accountpool | 1:M 31 Aug 2023 04:03:11.227 * Ready to accept connections tcp
59 | accountpool | 2023-08-31 04:06:20,737 CRIT Supervisor is running as root. Privileges were not dropped because no user is specified in the config file. If you intend to run as root, you can set user=root in the config file to avoid this message.
60 | accountpool | 2023-08-31 04:06:20,739 INFO supervisord started with pid 1
61 | accountpool | 2023-08-31 04:06:21,742 INFO spawned: 'generator' with pid 10
62 | accountpool | 2023-08-31 04:06:21,744 INFO spawned: 'server' with pid 11
63 | accountpool | 2023-08-31 04:06:21,746 INFO spawned: 'tester' with pid 12
64 | accountpool | 2023-08-31 04:06:21.990 | DEBUG | accountpool.scheduler:run_tester:31 - tester loop 0 start...
65 | accountpool | 2023-08-31 04:06:21.990 | DEBUG | accountpool.scheduler:run_generator:46 - getter loop 0 start...
66 | accountpool | * Running on all addresses.
67 | accountpool | WARNING: This is a development server. Do not use it in a production deployment.
68 | accountpool | * Running on http://172.24.0.3:6777/ (Press CTRL+C to quit)
69 | accountpool | 2023-08-31 04:06:22.004 | DEBUG | accountpool.processors.generator:run:39 - start to run generator
70 | accountpool | 2023-08-31 04:06:22.005 | DEBUG | accountpool.processors.generator:run:43 - start to generate credential of admin1
71 | accountpool | 2023-08-31 04:06:23,007 INFO success: generator entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
72 | accountpool | 2023-08-31 04:06:23,007 INFO success: server entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
73 | accountpool | 2023-08-31 04:06:23,007 INFO success: tester entered RUNNING state, process has stayed up for > than 1 seconds (startsecs)
74 | ```
75 |
76 | 可以看到 Redis、Generator、Server、Tester 都已经启动成功。
77 |
78 | 另外还需要导入一些账号信息到 Redis 数据库里面,由于已经用 Docker 启动了 Redis 数据库,运行在 6333 端口上。
79 |
80 | 这时候可以执行脚本:
81 |
82 | ```
83 | export REDIS_PORT=6333
84 | python3 importer.py antispider7
85 | ```
86 |
87 | 运行完成之后如果没有报错就说明账号导入成功了,可以自行连上 Redis 看下。
88 |
89 | 过一会访问 [http://localhost:6777/antispider7/random](http://localhost:6777/antispider7/random) 即可获取一个 [antispider7](https://antispider7.scrape.center) 的随机可用 Cookies。
90 |
91 | ## 常规方式运行
92 |
93 | 如果不使用 Docker 运行,配置好 Python、Redis 环境之后也可运行,步骤如下。
94 |
95 | ### 安装和配置 Redis
96 |
97 | 本地安装 Redis、Docker 启动 Redis、远程 Redis 都是可以的,只要能正常连接使用即可。
98 |
99 | 首先可以需要一下环境变量,代理池会通过环境变量读取这些值。
100 |
101 | 设置 Redis 的环境变量有两种方式,一种是分别设置 host、port、password,另一种是设置连接字符串,设置方法分别如下:
102 |
103 | 设置 host、port、password,如果 password 为空可以设置为空字符串,示例如下:
104 |
105 | ```shell script
106 | export REDIS_HOST='localhost'
107 | export REDIS_PORT=6379
108 | export REDIS_PASSWORD=''
109 | export REDIS_DB=0
110 | ```
111 |
112 | 或者只设置连接字符串:
113 |
114 | ```shell script
115 | export REDIS_CONNECTION_STRING='redis://[password]@host:port/db'
116 | ```
117 |
118 | 如果没有密码也要设置为:
119 |
120 | ```shell script
121 | export REDIS_CONNECTION_STRING='redis://@host:port/db'
122 | ```
123 |
124 | 这里连接字符串的格式需要符合 `redis://[password]@host:port/db` 的格式,注意不要遗漏 `@`。
125 |
126 | 以上两种设置任选其一即可。
127 |
128 | ### 安装依赖包
129 |
130 | 这里强烈推荐使用 [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands)
131 | 或 [virtualenv](https://virtualenv.pypa.io/en/latest/user_guide.html) 创建虚拟环境,Python 版本不低于 3.6。
132 |
133 | 然后 pip 安装依赖即可:
134 |
135 | ```shell script
136 | pip3 install -r requirements.txt
137 | ```
138 |
139 | ### 运行代理池
140 |
141 | 两种方式运行账号池,一种是 Tester、Generator、Server 全部运行,另一种是按需分别运行。
142 |
143 | 一般来说可以选择全部运行,命令如下:
144 |
145 | ```shell script
146 | python3 run.py
147 | ```
148 |
149 | 运行之后会启动 Tester、Generator、Server,这时访问 [http://localhost:6777//random](http://localhost:6777//random) 即可获取一个随机可用代理。
150 |
151 | 或者如果你弄清楚了账号池的架构,可以按需分别运行,命令如下:
152 |
153 | ```shell script
154 | python3 run.py --processor getter
155 | python3 run.py --processor tester
156 | python3 run.py --processor server
157 | ```
158 |
159 | 这里 processor 可以指定运行 Tester、Generator 还是 Server。
160 |
161 | ## 可配置项
162 |
163 | 账号池可以通过设置环境变量来配置一些参数。
164 |
165 | ### 开关
166 |
167 | - ENABLE_TESTER:允许 Tester 启动,默认 true
168 | - ENABLE_GENERATOR:允许 Generator 启动,默认 true
169 | - ENABLE_SERVER:运行 Server 启动,默认 true
170 |
171 | ### 环境
172 |
173 | - APP_ENV:运行环境,可以设置 dev、test、prod,即开发、测试、生产环境,默认 dev
174 | - APP_DEBUG:调试模式,可以设置 true 或 false,默认 true
175 |
176 | ### Redis 连接
177 |
178 | - REDIS_HOST:Redis 的 Host
179 | - REDIS_PORT:Redis 的端口
180 | - REDIS_PASSWORD:Redis 的密码
181 | - REDIS_DB:Redis 的数据库索引,如 0、1
182 | - REDIS_CONNECTION_STRING:Redis 连接字符串
183 | - REDIS_KEY:Redis 储存代理使用字典的名称
184 |
185 | ### 处理器
186 |
187 | - CYCLE_TESTER:Tester 运行周期,即间隔多久运行一次测试,默认 20 秒
188 | - CYCLE_GETTER:Getter 运行周期,即间隔多久运行一次代理获取,默认 100 秒
189 | - API_HOST:代理 Server 运行 Host,默认 0.0.0.0
190 | - API_PORT:代理 Server 运行端口,默认 6777
191 | - API_THREADED:代理 Server 是否使用多线程,默认 true
192 |
193 | ### 日志
194 |
195 | - LOG_DIR:日志相对路径
196 | - LOG_RUNTIME_FILE:运行日志文件名称
197 | - LOG_ERROR_FILE:错误日志文件名称
198 |
199 | ## 部署
200 |
201 | 本项目提供了 Kubernetes 部署脚本,如需部署到 Kubernetes,执行如下命令即可:
202 |
203 | ```shell script
204 | cat deployment.yml | sed 's/\${TAG}/latest/g' | kubectl apply -f -
205 | ```
206 |
207 | ## 待开发
208 |
209 | - [ ] 前端页面管理
210 | - [ ] 使用情况统计分析
211 |
212 | 如有一起开发的兴趣可以在 Issue 留言,非常感谢!
213 |
214 | ## LICENSE
215 |
216 | MIT
217 |
--------------------------------------------------------------------------------