├── .gitignore ├── README.md ├── alembic.ini ├── alembic ├── README ├── env.py ├── script.py.mako └── versions │ ├── 1512a8979993_create_contest.py │ └── d3d546e27222_create_tables.py ├── config.py ├── manage.py ├── requirements.txt ├── run.py ├── server ├── __init__.py └── views.py └── vjudge ├── __init__.py ├── database.py ├── main.py ├── models.py └── site ├── __init__.py ├── base.py ├── exceptions.py ├── hdu ├── __init__.py └── client.py └── scu ├── __init__.py ├── captcha.db └── client.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__/ 3 | data.sqlite 4 | tests/ 5 | venv/ 6 | accounts.json 7 | data.sqlite-journal 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repository has been moved into https://github.com/gozssky/vjudge. 2 | -------------------------------------------------------------------------------- /alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # timezone to use when rendering the date 11 | # within the migration file as well as the filename. 12 | # string value is passed to dateutil.tz.gettz() 13 | # leave blank for localtime 14 | # timezone = 15 | 16 | # max length of characters to apply to the 17 | # "slug" field 18 | #truncate_slug_length = 40 19 | 20 | # set to 'true' to run the environment during 21 | # the 'revision' command, regardless of autogenerate 22 | # revision_environment = false 23 | 24 | # set to 'true' to allow .pyc and .pyo files without 25 | # a source .py file to be detected as revisions in the 26 | # versions/ directory 27 | # sourceless = false 28 | 29 | # version location specification; this defaults 30 | # to alembic/versions. When using multiple version 31 | # directories, initial revisions must be specified with --version-path 32 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions 33 | 34 | # the output encoding used when revision files 35 | # are written from script.py.mako 36 | # output_encoding = utf-8 37 | 38 | sqlalchemy.url = driver://user:pass@localhost/dbname 39 | 40 | 41 | # Logging configuration 42 | [loggers] 43 | keys = root,sqlalchemy,alembic 44 | 45 | [handlers] 46 | keys = console 47 | 48 | [formatters] 49 | keys = generic 50 | 51 | [logger_root] 52 | level = WARN 53 | handlers = console 54 | qualname = 55 | 56 | [logger_sqlalchemy] 57 | level = WARN 58 | handlers = 59 | qualname = sqlalchemy.engine 60 | 61 | [logger_alembic] 62 | level = INFO 63 | handlers = 64 | qualname = alembic 65 | 66 | [handler_console] 67 | class = StreamHandler 68 | args = (sys.stderr,) 69 | level = NOTSET 70 | formatter = generic 71 | 72 | [formatter_generic] 73 | format = %(levelname)-5.5s [%(name)s] %(message)s 74 | datefmt = %H:%M:%S 75 | -------------------------------------------------------------------------------- /alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /alembic/env.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | from alembic import context 3 | from sqlalchemy import engine_from_config, pool 4 | from logging.config import fileConfig 5 | import os 6 | import sys 7 | 8 | sys.path.insert(0, os.path.realpath('.')) 9 | 10 | # this is the Alembic Config object, which provides 11 | # access to the values within the .ini file in use. 12 | config = context.config 13 | 14 | # Interpret the config file for Python logging. 15 | # This line sets up loggers basically. 16 | fileConfig(config.config_file_name) 17 | 18 | # add your model's MetaData object here 19 | # for 'autogenerate' support 20 | # from myapp import mymodel 21 | # target_metadata = mymodel.Base.metadata 22 | from vjudge.models import db 23 | from config import SQLALCHEMY_DATABASE_URI 24 | 25 | config.set_main_option('sqlalchemy.url', SQLALCHEMY_DATABASE_URI) 26 | target_metadata = db.Model.metadata 27 | 28 | 29 | # other values from the config, defined by the needs of env.py, 30 | # can be acquired: 31 | # my_important_option = config.get_main_option("my_important_option") 32 | # ... etc. 33 | 34 | 35 | def run_migrations_offline(): 36 | """Run migrations in 'offline' mode. 37 | 38 | This configures the context with just a URL 39 | and not an Engine, though an Engine is acceptable 40 | here as well. By skipping the Engine creation 41 | we don't even need a DBAPI to be available. 42 | 43 | Calls to context.execute() here emit the given string to the 44 | script output. 45 | 46 | """ 47 | url = config.get_main_option("sqlalchemy.url") 48 | context.configure( 49 | url=url, target_metadata=target_metadata, literal_binds=True) 50 | 51 | with context.begin_transaction(): 52 | context.run_migrations() 53 | 54 | 55 | def run_migrations_online(): 56 | """Run migrations in 'online' mode. 57 | 58 | In this scenario we need to create an Engine 59 | and associate a connection with the context. 60 | 61 | """ 62 | connectable = engine_from_config( 63 | config.get_section(config.config_ini_section), 64 | prefix='sqlalchemy.', 65 | poolclass=pool.NullPool) 66 | 67 | with connectable.connect() as connection: 68 | context.configure( 69 | connection=connection, 70 | target_metadata=target_metadata 71 | ) 72 | 73 | with context.begin_transaction(): 74 | context.run_migrations() 75 | 76 | 77 | if context.is_offline_mode(): 78 | run_migrations_offline() 79 | else: 80 | run_migrations_online() 81 | -------------------------------------------------------------------------------- /alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /alembic/versions/1512a8979993_create_contest.py: -------------------------------------------------------------------------------- 1 | """create contest 2 | 3 | Revision ID: 1512a8979993 4 | Revises: d3d546e27222 5 | Create Date: 2018-07-22 01:21:44.372782 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '1512a8979993' 14 | down_revision = 'd3d546e27222' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('contests', 22 | sa.Column('oj_name', sa.String(), nullable=False), 23 | sa.Column('site', sa.String(), nullable=False), 24 | sa.Column('contest_id', sa.String(), nullable=False), 25 | sa.Column('title', sa.String(), nullable=True), 26 | sa.Column('public', sa.Boolean(), nullable=True), 27 | sa.Column('status', sa.String(), nullable=True), 28 | sa.Column('start_time', sa.DateTime(), nullable=True), 29 | sa.Column('end_time', sa.DateTime(), nullable=True), 30 | sa.PrimaryKeyConstraint('oj_name'), 31 | sa.UniqueConstraint('site', 'contest_id', name='_site_contest_id_uc') 32 | ) 33 | # ### end Alembic commands ### 34 | 35 | 36 | def downgrade(): 37 | # ### commands auto generated by Alembic - please adjust! ### 38 | op.drop_table('contests') 39 | # ### end Alembic commands ### 40 | -------------------------------------------------------------------------------- /alembic/versions/d3d546e27222_create_tables.py: -------------------------------------------------------------------------------- 1 | """create tables 2 | 3 | Revision ID: d3d546e27222 4 | Revises: 5 | Create Date: 2018-07-21 21:10:53.958643 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'd3d546e27222' 14 | down_revision = None 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('problems', 22 | sa.Column('oj_name', sa.String(), nullable=False), 23 | sa.Column('problem_id', sa.String(), nullable=False), 24 | sa.Column('last_update', sa.DateTime(), nullable=False), 25 | sa.Column('title', sa.String(), nullable=True), 26 | sa.Column('description', sa.String(), nullable=True), 27 | sa.Column('input', sa.String(), nullable=True), 28 | sa.Column('output', sa.String(), nullable=True), 29 | sa.Column('sample_input', sa.String(), nullable=True), 30 | sa.Column('sample_output', sa.String(), nullable=True), 31 | sa.Column('time_limit', sa.Integer(), nullable=True), 32 | sa.Column('mem_limit', sa.Integer(), nullable=True), 33 | sa.PrimaryKeyConstraint('oj_name', 'problem_id') 34 | ) 35 | op.create_index(op.f('ix_problems_oj_name'), 'problems', ['oj_name'], unique=False) 36 | op.create_index(op.f('ix_problems_problem_id'), 'problems', ['problem_id'], unique=False) 37 | op.create_table('submissions', 38 | sa.Column('id', sa.Integer(), nullable=False), 39 | sa.Column('user_id', sa.String(), nullable=True), 40 | sa.Column('oj_name', sa.String(), nullable=False), 41 | sa.Column('problem_id', sa.String(), nullable=False), 42 | sa.Column('language', sa.String(), nullable=False), 43 | sa.Column('source_code', sa.String(), nullable=False), 44 | sa.Column('run_id', sa.String(), nullable=True), 45 | sa.Column('verdict', sa.String(), nullable=True), 46 | sa.Column('exe_time', sa.Integer(), nullable=True), 47 | sa.Column('exe_mem', sa.Integer(), nullable=True), 48 | sa.Column('time_stamp', sa.DateTime(), nullable=True), 49 | sa.PrimaryKeyConstraint('id') 50 | ) 51 | op.create_index(op.f('ix_submissions_user_id'), 'submissions', ['user_id'], unique=False) 52 | # ### end Alembic commands ### 53 | 54 | 55 | def downgrade(): 56 | # ### commands auto generated by Alembic - please adjust! ### 57 | op.drop_index(op.f('ix_submissions_user_id'), table_name='submissions') 58 | op.drop_table('submissions') 59 | op.drop_index(op.f('ix_problems_problem_id'), table_name='problems') 60 | op.drop_index(op.f('ix_problems_oj_name'), table_name='problems') 61 | op.drop_table('problems') 62 | # ### end Alembic commands ### 63 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import random 5 | import re 6 | 7 | from gunicorn.glogging import Logger 8 | 9 | LOG_ENV = os.environ.get('LOG_ENV') or 'NORMAL' 10 | LOG_LEVEL = os.environ.get('LOG_LEVEL') or 'info' 11 | LOG_LEVELS = { 12 | "critical": logging.CRITICAL, 13 | "error": logging.ERROR, 14 | "warning": logging.WARNING, 15 | "info": logging.INFO, 16 | "debug": logging.DEBUG 17 | } 18 | 19 | if LOG_ENV == 'JOURNAL': 20 | log_format = r'[%(levelname)s] %(message)s' 21 | else: 22 | log_format = r'[%(asctime)s] [%(process)d] [%(levelname)s] %(message)s' 23 | 24 | date_fmt = r'%Y-%m-%d %H:%M:%S %z' 25 | 26 | 27 | class GLogger(Logger): 28 | error_fmt = log_format 29 | datefmt = date_fmt 30 | 31 | 32 | log_level = LOG_LEVELS.get(LOG_LEVEL, logging.INFO) 33 | logging.basicConfig(level=log_level, format=log_format, datefmt=date_fmt) 34 | logger = logging.getLogger('vjudge-core') 35 | 36 | SQLALCHEMY_DATABASE_URI = (os.environ.get('DATABASE_URL') or 37 | 'sqlite:///' + os.path.dirname(__file__) + '/data.sqlite') 38 | 39 | OJ_CONFIG = os.path.dirname(__file__) + '/accounts.json' 40 | 41 | DEFAULT_REDIS_URI = 'redis://localhost:6379/0' 42 | 43 | REDIS_CONFIG = { 44 | 'host': 'localhost', 45 | 'port': 6379, 46 | 'db': 0, 47 | 'queue': { 48 | 'submitter_queue': 'vjudge-core-task-submitter', 49 | 'crawler_queue': 'vjudge-core-task-crawler' 50 | } 51 | } 52 | 53 | USER_AGENTS = [ 54 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 55 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", 56 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 57 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", 58 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", 59 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", 60 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", 61 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", 62 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", 63 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", 64 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", 65 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", 66 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", 67 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", 68 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", 69 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", 70 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", 71 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", 72 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", 73 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)", 74 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER", 75 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 76 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)", 77 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", 78 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)", 79 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", 80 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 81 | "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", 82 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", 83 | "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5", 84 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre", 85 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0", 86 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11", 87 | "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10" 88 | ] 89 | 90 | 91 | def get_header(): 92 | return { 93 | 'User-Agent': random.choice(USER_AGENTS), 94 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 95 | 'Accept-Language': 'en-US,en;q=0.5', 96 | 'Connection': 'keep-alive', 97 | 'Accept-Encoding': 'gzip, deflate' 98 | } 99 | 100 | 101 | def get_accounts(): 102 | with open(OJ_CONFIG) as f: 103 | result = json.load(f) 104 | normal_accounts = {} 105 | for account in result['normal_accounts']: 106 | site = account['site'] 107 | authentications = [] 108 | for auth in account['auth']: 109 | authentications.append((auth['username'], auth['password'])) 110 | normal_accounts[site] = authentications 111 | contest_accounts = {} 112 | for account in result['contest_accounts']: 113 | site = account['site'] 114 | for auth in account['auth']: 115 | supported_contests = auth['supported_contests'] 116 | for contest_id in supported_contests: 117 | oj_name = f'{site}_ct_{contest_id}' 118 | if oj_name not in contest_accounts: 119 | contest_accounts[oj_name] = [] 120 | authentications = contest_accounts.get(oj_name) 121 | authentications.append((auth['username'], auth['password'])) 122 | return normal_accounts, contest_accounts 123 | 124 | 125 | def init_redis_config(): 126 | redis_uri = os.environ.get('REDIS_URI') or DEFAULT_REDIS_URI 127 | match = re.match('^redis://(.*?):([0-9]+)/([0-9]+)$', redis_uri) 128 | if match: 129 | REDIS_CONFIG['host'], REDIS_CONFIG['port'], REDIS_CONFIG['db'] = match.groups() 130 | 131 | 132 | init_redis_config() 133 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from flask_script import Manager, Shell 3 | 4 | from server import app 5 | from vjudge import db 6 | from vjudge.models import Submission, Problem 7 | 8 | 9 | def make_shell_context(): 10 | return dict(app=app, db=db, Submission=Submission, Problem=Problem) 11 | 12 | 13 | manager = Manager(app) 14 | manager.add_command('shell', Shell(make_context=make_shell_context)) 15 | 16 | if __name__ == '__main__': 17 | manager.run() 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==1.0.11 2 | beautifulsoup4==4.8.0 3 | certifi==2019.6.16 4 | chardet==3.0.4 5 | Click==7.0 6 | Flask==1.1.1 7 | Flask-Script==2.0.6 8 | gevent==1.4.0 9 | greenlet==0.4.15 10 | gunicorn==19.9.0 11 | idna==2.8 12 | itsdangerous==1.1.0 13 | Jinja2==2.11.3 14 | lxml==4.6.3 15 | Mako==1.0.13 16 | MarkupSafe==1.1.1 17 | python-dateutil==2.8.0 18 | python-editor==1.0.4 19 | redis==3.2.1 20 | requests==2.22.0 21 | six==1.12.0 22 | soupsieve==1.9.2 23 | SQLAlchemy==1.3.5 24 | urllib3==1.26.5 25 | Werkzeug==0.15.5 26 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import shlex 3 | import subprocess 4 | 5 | from config import get_accounts, logger, LOG_LEVEL 6 | from vjudge.main import VJudge 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('-b', required=False, dest='address', default='localhost:5000', help='address to bind') 10 | args = parser.parse_args() 11 | 12 | p = subprocess.Popen( 13 | shlex.split(f"gunicorn -w 2 -k gevent --logger-class config.GLogger --log-level {LOG_LEVEL} " 14 | f"-b '{args.address}' manage:app")) 15 | 16 | try: 17 | normal_accounts, contest_accounts = get_accounts() 18 | vjudge = VJudge(normal_accounts=normal_accounts, contest_accounts=contest_accounts) 19 | vjudge.start() 20 | except KeyboardInterrupt: 21 | logger.info('VJudge exiting') 22 | finally: 23 | p.terminate() 24 | p.wait() 25 | -------------------------------------------------------------------------------- /server/__init__.py: -------------------------------------------------------------------------------- 1 | from .views import app 2 | -------------------------------------------------------------------------------- /server/views.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datetime import datetime, timedelta 3 | 4 | import redis 5 | from flask import Flask, jsonify, request, abort, url_for 6 | from sqlalchemy import and_, or_ 7 | 8 | from config import REDIS_CONFIG 9 | from vjudge.models import db, Submission, Problem, Contest 10 | from vjudge.site import contest_clients, supported_sites, supported_contest_sites 11 | 12 | app = Flask(__name__) 13 | 14 | redis_con = redis.StrictRedis(host=REDIS_CONFIG['host'], port=REDIS_CONFIG['port'], db=REDIS_CONFIG['db']) 15 | submitter_queue = REDIS_CONFIG['queue']['submitter_queue'] 16 | crawler_queue = REDIS_CONFIG['queue']['crawler_queue'] 17 | 18 | 19 | @app.route('/problems/') 20 | def get_problem_list(): 21 | page = request.args.get('page', 1, type=int) 22 | per_page = request.args.get('per_page', 20, type=int) 23 | oj_name = request.args.get('oj_name', '') 24 | problem_id = request.args.get('problem_id', '') 25 | if oj_name: 26 | oj_name_filter = Problem.oj_name == oj_name 27 | else: 28 | filter_args = [] 29 | for site in supported_sites: 30 | filter_args.append(Problem.oj_name == site) 31 | oj_name_filter = or_(*filter_args) 32 | pagination = Problem.query.filter( 33 | and_(oj_name_filter, Problem.problem_id.like(problem_id or '%'))).paginate( 34 | page=page, per_page=per_page, error_out=False) 35 | problems = pagination.items 36 | page = pagination.page 37 | prev = None 38 | if pagination.has_prev: 39 | prev = url_for('get_problem_list', oj_name=oj_name, problem_id=problem_id, 40 | page=page - 1, per_page=per_page, _external=True) 41 | next = None 42 | if pagination.has_next: 43 | next = url_for('get_problem_list', oj_name=oj_name, problem_id=problem_id, 44 | page=page + 1, per_page=per_page, _external=True) 45 | return jsonify({ 46 | 'problems': [p.summary() for p in problems], 47 | 'prev': prev, 48 | 'next': next, 49 | 'count': pagination.total 50 | }) 51 | 52 | 53 | @app.route('/problems/', methods=['POST']) 54 | def refresh_all_problems(): 55 | oj_name = request.form.get('oj_name') 56 | if oj_name is None: 57 | return jsonify({'error': 'missing field oj_name'}), 422 58 | if oj_name not in supported_sites: 59 | return jsonify({'error': f'oj {oj_name} is not supported'}), 422 60 | redis_con.lpush(crawler_queue, json.dumps({ 61 | 'oj_name': oj_name, 62 | 'type': 'problem', 63 | 'all': True 64 | })) 65 | return jsonify({'status': 'success'}) 66 | 67 | 68 | @app.route('/problems//') 69 | def get_problem(oj_name, problem_id): 70 | problem = Problem.query.filter_by(oj_name=oj_name, problem_id=problem_id).first() 71 | if problem is None: 72 | abort(404) 73 | if datetime.utcnow() - timedelta(days=1) > problem.last_update: 74 | redis_con.lpush(crawler_queue, json.dumps({ 75 | 'oj_name': oj_name, 76 | 'type': 'problem', 77 | 'all': False, 78 | 'problem_id': problem_id 79 | })) 80 | return jsonify(problem.to_json()) 81 | 82 | 83 | @app.route('/problems//', methods=['POST']) 84 | def refresh_problem(oj_name, problem_id): 85 | redis_con.lpush(crawler_queue, json.dumps({ 86 | 'oj_name': oj_name, 87 | 'type': 'problem', 88 | 'all': False, 89 | 'problem_id': problem_id 90 | })) 91 | return jsonify({ 92 | 'status': 'success', 93 | 'url': url_for('get_problem', oj_name=oj_name, problem_id=problem_id, _external=True) 94 | }) 95 | 96 | 97 | @app.route('/submissions/') 98 | def get_submission_list(): 99 | page = request.args.get('page', 1, type=int) 100 | per_page = request.args.get('per_page', 20, type=int) 101 | pagination = Submission.query.order_by(Submission.id.desc()).paginate( 102 | page=page, per_page=per_page, error_out=False) 103 | submissions = pagination.items 104 | page = pagination.page 105 | prev = None 106 | if pagination.has_prev: 107 | prev = url_for('get_submission_list', page=page - 1, per_page=per_page, _external=True) 108 | next = None 109 | if pagination.has_next: 110 | next = url_for('get_submission_list', page=page + 1, per_page=per_page, _external=True) 111 | return jsonify({ 112 | 'submissions': [s.to_json() for s in submissions], 113 | 'prev': prev, 114 | 'next': next, 115 | 'count': pagination.total 116 | }) 117 | 118 | 119 | @app.route('/submissions/', methods=['POST']) 120 | def submit_problem(): 121 | oj_name = request.form.get('oj_name') 122 | problem_id = request.form.get('problem_id') 123 | language = request.form.get('language') 124 | source_code = request.form.get('source_code') 125 | if None in (oj_name, problem_id, language, source_code): 126 | return jsonify({'error': 'missing field'}), 422 127 | if not Problem.query.filter_by(oj_name=oj_name, problem_id=problem_id).first(): 128 | return jsonify({'error': 'no such problem'}), 422 129 | submission = Submission(oj_name=oj_name, problem_id=problem_id, 130 | language=language, source_code=source_code) 131 | db.session.add(submission) 132 | db.session.commit() 133 | redis_con.lpush(submitter_queue, submission.id) 134 | url = url_for('get_submission', id=submission.id, _external=True) 135 | return jsonify({'status': 'success', 'id': submission.id, 'url': url}) 136 | 137 | 138 | @app.route('/submissions/') 139 | def get_submission(id): 140 | submission = Submission.query.get(id) 141 | if submission is None: 142 | abort(404) 143 | return jsonify(submission.to_json()) 144 | 145 | 146 | @app.route('/submissions/', methods=['POST']) 147 | def update_submission(id): 148 | submission = Submission.query.get(id) 149 | if submission is None: 150 | return jsonify({'error': 'no such submission'}), 422 151 | if submission.verdict not in ('Queuing', 'Being Judged'): 152 | submission.verdict = 'Being Judged' 153 | db.session.commit() 154 | redis_con.lpush(submitter_queue, submission.id) 155 | url = url_for('get_submission', id=submission.id, _external=True) 156 | return jsonify({'status': 'success', 'id': submission.id, 'url': url}) 157 | 158 | 159 | @app.route('/contests/') 160 | def get_recent_contests(site): 161 | if site not in contest_clients: 162 | abort(404) 163 | c = contest_clients[site] 164 | contest_list = c.get_recent_contest() 165 | return jsonify({ 166 | 'contests': [x.to_json() for x in contest_list] 167 | }) 168 | 169 | 170 | @app.route('/contests//') 171 | def get_contest_info(site, contest_id): 172 | contest = Contest.query.filter_by(site=site, contest_id=contest_id).first() 173 | if contest is None: 174 | abort(404) 175 | problems = Problem.query.filter_by(oj_name=contest.oj_name).all() 176 | return jsonify({ 177 | 'contest': contest.to_json(), 178 | 'problems': [p.to_json() for p in problems] 179 | }) 180 | 181 | 182 | @app.route('/contests//', methods=['POST']) 183 | def crawl_contest_info(site, contest_id): 184 | if site not in supported_contest_sites: 185 | return jsonify({'error': f'site {site} is not supported'}), 422 186 | redis_con.lpush(crawler_queue, json.dumps({ 187 | 'oj_name': f'{site}_ct_{contest_id}', 188 | 'type': 'contest' 189 | })) 190 | url = url_for('get_contest_info', site=site, contest_id=contest_id, _external=True) 191 | return jsonify({'status': 'success', 'url': url}) 192 | 193 | 194 | @app.teardown_appcontext 195 | def shutdown_session(response_or_exc): 196 | db.session.remove() 197 | return response_or_exc 198 | 199 | 200 | @app.errorhandler(404) 201 | def page_not_found(e): 202 | return jsonify({'error': 'not found'}), 404 203 | 204 | 205 | @app.errorhandler(500) 206 | def internal_server_error(e): 207 | return jsonify({'error': 'internal_server_error'}), 500 208 | -------------------------------------------------------------------------------- /vjudge/__init__.py: -------------------------------------------------------------------------------- 1 | from .database import SQLManager 2 | 3 | db = SQLManager() 4 | -------------------------------------------------------------------------------- /vjudge/database.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine, orm 2 | from sqlalchemy.ext.declarative import declarative_base 3 | from sqlalchemy.orm import scoped_session, sessionmaker 4 | from math import ceil 5 | from config import SQLALCHEMY_DATABASE_URI 6 | 7 | 8 | class Pagination(object): 9 | def __init__(self, query, page, per_page, total, items): 10 | self.query = query 11 | self.page = page 12 | self.per_page = per_page 13 | self.total = total 14 | self.items = items 15 | 16 | @property 17 | def pages(self): 18 | if self.per_page == 0: 19 | pages = 0 20 | else: 21 | pages = int(ceil(self.total / float(self.per_page))) 22 | return pages 23 | 24 | def prev(self, error_out=False): 25 | assert self.query is not None, 'a query object is required ' \ 26 | 'for this method to work' 27 | return self.query.paginate(self.page - 1, self.per_page, error_out) 28 | 29 | def next(self, error_out=False): 30 | assert self.query is not None, 'a query object is required ' \ 31 | 'for this method to work' 32 | return self.query.paginate(self.page + 1, self.per_page, error_out) 33 | 34 | @property 35 | def has_prev(self): 36 | return self.page > 1 37 | 38 | @property 39 | def has_next(self): 40 | return self.page < self.pages 41 | 42 | @property 43 | def prev_num(self): 44 | if not self.has_prev: 45 | return None 46 | return self.page - 1 47 | 48 | @property 49 | def next_num(self): 50 | if not self.has_next: 51 | return None 52 | return self.page + 1 53 | 54 | 55 | class BaseQuery(orm.Query): 56 | def paginate(self, page=1, per_page=20, error_out=True): 57 | if page < 1: 58 | if error_out: 59 | raise IndexError 60 | else: 61 | page = 1 62 | if per_page < 0: 63 | if error_out: 64 | raise IndexError 65 | else: 66 | per_page = 20 67 | items = self.limit(per_page).offset((page - 1) * per_page).all() 68 | if not items and page != 1: 69 | if error_out: 70 | raise IndexError 71 | else: 72 | page = 1 73 | if page == 1 and len(items) < per_page: 74 | total = len(items) 75 | else: 76 | total = self.order_by(None).count() 77 | return Pagination(self, page, per_page, total, items) 78 | 79 | 80 | class SQLManager(object): 81 | def __init__(self): 82 | if 'sqlite' in SQLALCHEMY_DATABASE_URI: 83 | connect_args = {'check_same_thread': False} 84 | engine = create_engine(SQLALCHEMY_DATABASE_URI, echo=False, connect_args=connect_args) 85 | else: 86 | engine = create_engine(SQLALCHEMY_DATABASE_URI, echo=False) 87 | session_factory = sessionmaker(bind=engine) 88 | self._session = scoped_session(session_factory) 89 | self.Model = declarative_base(bind=engine) 90 | self.Model.query = self._session.query_property(query_cls=BaseQuery) 91 | 92 | @property 93 | def session(self): 94 | return self._session 95 | 96 | def create_all(self): 97 | self.Model.metadata.create_all() 98 | 99 | def drop_all(self): 100 | self.Model.metadata.drop_all() 101 | -------------------------------------------------------------------------------- /vjudge/main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import threading 4 | import time 5 | from datetime import datetime, timedelta, timezone 6 | from queue import Queue, Empty 7 | 8 | import redis 9 | from sqlalchemy import or_ 10 | 11 | from config import REDIS_CONFIG, logger 12 | from .models import db, Submission, Problem, Contest 13 | from .site import get_client_by_oj_name, exceptions 14 | 15 | 16 | class StatusCrawler(threading.Thread): 17 | def __init__(self, client, daemon=None): 18 | super().__init__(daemon=daemon) 19 | self._client = client 20 | self._user_id = client.get_user_id() 21 | self._name = client.get_name() 22 | self._start_event = threading.Event() 23 | self._stop_event = threading.Event() 24 | self._tasks = [] 25 | self._thread = None 26 | self._loop = None 27 | 28 | def run(self): 29 | self._thread = threading.current_thread() 30 | self._loop = asyncio.new_event_loop() 31 | asyncio.set_event_loop(self._loop) 32 | self._loop.call_soon(self._start_event.set) 33 | self._loop.run_forever() 34 | pending_tasks = self._pending_tasks() 35 | self._loop.run_until_complete(asyncio.gather(*pending_tasks)) 36 | 37 | def wait_start(self, timeout=None): 38 | return self._start_event.wait(timeout) 39 | 40 | def add_task(self, submission_id): 41 | if not self._start_event.is_set(): 42 | raise RuntimeError('Cannot add task before crawler is started') 43 | if self._stop_event.is_set(): 44 | raise RuntimeError('Cannot add task when crawler is stopping') 45 | self._loop.call_soon_threadsafe( 46 | asyncio.ensure_future, self._crawl_status(submission_id)) 47 | return True 48 | 49 | def stop(self): 50 | if not self._start_event.is_set(): 51 | raise RuntimeError('Cannot stop crawler before it is started') 52 | if self._stop_event.is_set(): 53 | raise RuntimeError('Crawler can only be stopped once') 54 | self._stop_event.set() 55 | self._loop.call_soon_threadsafe(self._loop.stop) 56 | 57 | async def _crawl_status(self, submission_id): 58 | submission = Submission.query.get(submission_id) 59 | if (not submission.run_id or submission.oj_name != self._name 60 | or submission.verdict != 'Being Judged'): 61 | return 62 | for delay in range(120): 63 | await asyncio.sleep(delay) 64 | try: 65 | verdict, exe_time, exe_mem = self._client.get_submit_status( 66 | submission.run_id, 67 | user_id=submission.user_id, 68 | problem_id=submission.problem_id) 69 | except exceptions.ConnectionError as e: 70 | submission.verdict = 'Judge Failed' 71 | db.session.commit() 72 | logger.error(f'Crawled status failed, submission_id: {submission.id}, reason: {e}') 73 | return 74 | except exceptions.LoginRequired: 75 | try: 76 | self._client.update_cookies() 77 | logger.debug( 78 | f'StatusCrawler login expired, login again, name: {self._name}, user_id: {self._user_id}') 79 | continue 80 | except exceptions.ConnectionError as e: 81 | submission.verdict = 'Judge Failed' 82 | db.session.commit() 83 | logger.error(f'Crawled status failed, submission_id: {submission.id}, reason: {e}') 84 | return 85 | if verdict not in ('Being Judged', 'Queuing', 'Compiling', 'Running'): 86 | submission.verdict = verdict 87 | submission.exe_time = exe_time 88 | submission.exe_mem = exe_mem 89 | db.session.commit() 90 | logger.info( 91 | f'Crawled status successfully, submission_id: {submission.id}, verdict: {submission.verdict}') 92 | return 93 | submission.verdict = 'Judge Failed' 94 | db.session.commit() 95 | logger.error(f'Crawled status failed, submission_id: {submission.id}, reason: Timeout') 96 | 97 | def _pending_tasks(self): 98 | if hasattr(asyncio, 'all_tasks'): 99 | pending_tasks = asyncio.all_tasks(self._loop) 100 | else: 101 | pending_tasks = {t for t in asyncio.Task.all_tasks(self._loop) if not t.done()} 102 | return pending_tasks 103 | 104 | def __repr__(self): 105 | return f'' 106 | 107 | 108 | class Submitter(threading.Thread): 109 | def __init__(self, client, submit_queue, status_crawler, daemon=None): 110 | super().__init__(daemon=daemon) 111 | self._client = client 112 | self._user_id = client.get_user_id() 113 | self._name = client.get_name() 114 | self._submit_queue = submit_queue 115 | self._status_crawler = status_crawler 116 | self._stop_event = threading.Event() 117 | 118 | def run(self): 119 | self._status_crawler.start() 120 | self._status_crawler.wait_start() 121 | logger.info(f'Started submitter, name: {self._name}, user_id: {self._user_id}') 122 | while True: 123 | try: 124 | submission = Submission.query.get(self._submit_queue.get(timeout=60)) 125 | except Empty: 126 | if self._stop_event.is_set(): 127 | break 128 | continue 129 | logger.info(f'Start judging submission {submission.id}, verdict: {submission.verdict}') 130 | if submission.verdict not in ('Queuing', 'Being Judged'): 131 | continue 132 | if submission.verdict == 'Being Judged': 133 | self._status_crawler.add_task(submission.id) 134 | continue 135 | try: 136 | run_id = self._client.submit_problem( 137 | submission.problem_id, submission.language, submission.source_code) 138 | except (exceptions.SubmitError, exceptions.ConnectionError) as e: 139 | submission.verdict = 'Submit Failed' 140 | db.session.commit() 141 | logger.error(f'Submission {submission.id} is submitted failed, reason: {e}') 142 | except exceptions.LoginRequired: 143 | try: 144 | self._client.update_cookies() 145 | self._submit_queue.put(submission.id) 146 | logger.debug( 147 | f'Submitter login is expired, login again, name: {self._name}, user_id: {self._user_id}') 148 | except exceptions.ConnectionError as e: 149 | submission.verdict = 'Submit Failed' 150 | db.session.commit() 151 | logger.error(f'Submission {submission.id} is submitted failed, reason: {e}') 152 | else: 153 | submission.run_id = run_id 154 | submission.user_id = self._user_id 155 | submission.verdict = 'Being Judged' 156 | db.session.commit() 157 | logger.info(f'Submission {submission.id} is submitted successfully') 158 | self._status_crawler.add_task(submission.id) 159 | time.sleep(5) 160 | logger.info(f'Stopping submitter, name: {self._name}, user_id: {self._user_id}') 161 | self._status_crawler.stop() 162 | self._status_crawler.join() 163 | logger.info(f'Stopped submitter, name: {self._name}, user_id: {self._user_id}') 164 | 165 | def stop(self): 166 | self._stop_event.set() 167 | 168 | def __repr__(self): 169 | return f'' 170 | 171 | 172 | class PageCrawler(threading.Thread): 173 | def __init__(self, client, page_queue, daemon=None): 174 | super().__init__(daemon=daemon) 175 | self._client = client 176 | self._name = client.get_name() 177 | self._user_id = client.get_user_id() 178 | self._client_type = client.get_client_type() 179 | self._supported_crawl_type = ['problem'] 180 | if self._client_type == 'contest': 181 | self._supported_crawl_type.append('contest') 182 | self._page_queue = page_queue 183 | self._stop_event = threading.Event() 184 | 185 | def run(self): 186 | logger.info(f'Started PageCrawler, name: {self._name}, user_id: {self._user_id}') 187 | while True: 188 | try: 189 | data = self._page_queue.get(timeout=60) 190 | except Empty: 191 | if self._stop_event.is_set(): 192 | break 193 | continue 194 | if not isinstance(data, dict): 195 | logger.error(f'PageCrawler: data type should be dict, data: "{data}"') 196 | continue 197 | crawl_type = data.get('type') 198 | if crawl_type not in self._supported_crawl_type: 199 | logger.error(f'Unsupported crawl_type: {crawl_type}') 200 | continue 201 | try: 202 | if crawl_type == 'problem': 203 | problem_id = data.get('problem_id') 204 | if problem_id: 205 | self._crawl_problem(problem_id) 206 | else: 207 | self._crawl_problem_all() 208 | elif crawl_type == 'contest': 209 | self._crawl_contest() 210 | except exceptions.ConnectionError as e: 211 | logger.error(f'Crawled page failed, name: {self._name}, user_id: {self._user_id}, reason: {e}') 212 | except exceptions.LoginRequired: 213 | try: 214 | self._client.update_cookies() 215 | self._page_queue.put(data) 216 | logger.debug( 217 | f'PageCrawler login expired, login again, name: {self._name}, user_id: {self._user_id}') 218 | except exceptions.ConnectionError as e: 219 | logger.error(f'Crawled contest failed, name: {self._name}, user_id: {self._user_id}, reason: {e}') 220 | logger.info(f'Stopped PageCrawler, name: {self._name}, user_id: {self._user_id}') 221 | 222 | def stop(self): 223 | self._stop_event.set() 224 | 225 | def _crawl_problem(self, problem_id): 226 | result = self._client.get_problem(problem_id) 227 | if not isinstance(result, dict): 228 | logger.error(f'No such problem, name: {self._name}, ' 229 | f'user_id: {self._user_id}, problem_id: {problem_id}') 230 | return 231 | problem = Problem.query.filter_by(oj_name=self._name, problem_id=problem_id).first() or Problem() 232 | problem.oj_name = self._name 233 | problem.problem_id = problem_id 234 | problem.last_update = datetime.utcnow() 235 | problem.title = result.get('title') 236 | problem.description = result.get('description') 237 | problem.input = result.get('input') 238 | problem.output = result.get('output') 239 | problem.sample_input = result.get('sample_input') 240 | problem.sample_output = result.get('sample_output') 241 | problem.time_limit = result.get('time_limit') 242 | problem.mem_limit = result.get('mem_limit') 243 | db.session.add(problem) 244 | db.session.commit() 245 | logger.info(f'Crawled problem successfully, name: {self._name}, ' 246 | f'user_id: {self._user_id}, problem_id: {problem_id}') 247 | 248 | def _crawl_problem_all(self): 249 | problem_list = self._client.get_problem_list() 250 | for problem_id in problem_list: 251 | self._crawl_problem(problem_id) 252 | 253 | def _crawl_contest(self): 254 | contest = Contest.query.filter_by(oj_name=self._name).first() or Contest() 255 | self._client.refresh_contest_info() 256 | contest_info = self._client.get_contest_info() 257 | contest.oj_name = self._name 258 | contest.site = contest_info.site 259 | contest.contest_id = contest_info.contest_id 260 | contest.title = contest_info.title 261 | contest.public = contest_info.public 262 | contest.status = contest_info.status 263 | contest.start_time = datetime.fromtimestamp(contest_info.start_time, tz=timezone.utc) 264 | contest.end_time = datetime.fromtimestamp(contest_info.end_time, tz=timezone.utc) 265 | db.session.add(contest) 266 | db.session.commit() 267 | logger.info(f'Crawled contest successfully, name: {self._name}, ' 268 | f'user_id: {self._user_id}, contest_id: {contest.contest_id}') 269 | self._crawl_problem_all() 270 | 271 | 272 | class SubmitterHandler(threading.Thread): 273 | def __init__(self, normal_accounts, contest_accounts, daemon=None): 274 | super().__init__(daemon=daemon) 275 | self._redis_key = REDIS_CONFIG['queue']['submitter_queue'] 276 | self._redis_con = redis.StrictRedis( 277 | host=REDIS_CONFIG['host'], port=REDIS_CONFIG['port'], db=REDIS_CONFIG['db']) 278 | self._normal_accounts = normal_accounts 279 | self._contest_accounts = contest_accounts 280 | self._running_submitters = {} 281 | self._stopping_submitters = set() 282 | self._queues = {} 283 | 284 | def run(self): 285 | self._scan_unfinished_tasks() 286 | last_clean = datetime.utcnow() 287 | while True: 288 | data = self._redis_con.brpop(self._redis_key, timeout=600) 289 | if datetime.utcnow() - last_clean > timedelta(hours=1): 290 | self._clean_free_submitters() 291 | last_clean = datetime.utcnow() 292 | if not data: 293 | continue 294 | try: 295 | submission_id = int(data[1]) 296 | except (ValueError, TypeError): 297 | logger.error(f'SubmitterHandler: receive corrupt data "{data[1]}"') 298 | continue 299 | submission = Submission.query.get(submission_id) 300 | if not submission: 301 | logger.error(f'Submission {submission_id} is not found') 302 | continue 303 | if submission.oj_name not in self._normal_accounts and submission.oj_name not in self._contest_accounts: 304 | logger.error(f'Unsupported oj_name: {submission.oj_name}') 305 | continue 306 | if submission.oj_name not in self._queues: 307 | self._queues[submission.oj_name] = Queue() 308 | submit_queue = self._queues.get(submission.oj_name) 309 | if submission.oj_name not in self._running_submitters: 310 | if not self._start_new_submitters(submission.oj_name, submit_queue): 311 | submission.verdict = 'Submit Failed' 312 | db.session.commit() 313 | logger.error(f'Cannot start client for {submission.oj_name}') 314 | continue 315 | assert submission.oj_name in self._running_submitters 316 | submit_queue.put(submission.id) 317 | 318 | def _scan_unfinished_tasks(self): 319 | submissions = Submission.query.filter( 320 | or_(Submission.verdict == 'Queuing', Submission.verdict == 'Being Judged')) 321 | for submission in submissions: 322 | self._redis_con.lpush(self._redis_key, submission.id) 323 | 324 | def _start_new_submitters(self, oj_name, submit_queue): 325 | submitter_info = {'submitters': {}} 326 | submitters = submitter_info.get('submitters') 327 | accounts = {} 328 | if oj_name in self._normal_accounts: 329 | accounts = self._normal_accounts[oj_name] 330 | if oj_name in self._contest_accounts: 331 | accounts = self._contest_accounts[oj_name] 332 | for auth in accounts: 333 | try: 334 | crawler = StatusCrawler(get_client_by_oj_name(oj_name, auth), daemon=True) 335 | submitter = Submitter(get_client_by_oj_name(oj_name, auth), submit_queue, crawler, daemon=True) 336 | except exceptions.JudgeException as e: 337 | logger.error(f'Create submitter failed, name: {oj_name}, user_id: auth[0], reason: {e}') 338 | continue 339 | submitter.start() 340 | submitters[auth[0]] = submitter 341 | if not submitters: 342 | return False 343 | submitter_info['start_time'] = datetime.utcnow() 344 | self._running_submitters[oj_name] = submitter_info 345 | return True 346 | 347 | def _clean_free_submitters(self): 348 | free_clients = [] 349 | for oj_name in self._running_submitters: 350 | submitter_info = self._running_submitters[oj_name] 351 | if datetime.utcnow() - submitter_info['start_time'] > timedelta(hours=1): 352 | free_clients.append(oj_name) 353 | for oj_name in free_clients: 354 | submitter_info = self._running_submitters[oj_name] 355 | submitters = submitter_info.get('submitters') 356 | for user_id in submitters: 357 | submitter = submitters.get(user_id) 358 | submitter.stop() 359 | self._stopping_submitters.add(submitter) 360 | self._running_submitters.pop(oj_name) 361 | logger.info(f'No more task, stop all {oj_name} submitters') 362 | stopped_submitters = [] 363 | for submitter in self._stopping_submitters: 364 | if not submitter.is_alive(): 365 | stopped_submitters.append(submitter) 366 | for submitter in stopped_submitters: 367 | self._stopping_submitters.remove(submitter) 368 | logger.info('Cleaned free submitters') 369 | logger.info(f'Running submitters: {self._running_submitters}') 370 | logger.info(f'Stopping submitters: {self._stopping_submitters}') 371 | 372 | 373 | class CrawlerHandler(threading.Thread): 374 | def __init__(self, normal_accounts, contest_accounts, daemon=None): 375 | super().__init__(daemon=daemon) 376 | self._redis_key = REDIS_CONFIG['queue']['crawler_queue'] 377 | self._redis_con = redis.StrictRedis( 378 | host=REDIS_CONFIG['host'], port=REDIS_CONFIG['port'], db=REDIS_CONFIG['db']) 379 | self._normal_accounts = normal_accounts 380 | self._contest_accounts = contest_accounts 381 | self._running_crawlers = {} 382 | self._stopping_crawlers = set() 383 | self._queues = {} 384 | 385 | def run(self): 386 | last_clean = datetime.utcnow() 387 | while True: 388 | data = self._redis_con.brpop(self._redis_key, timeout=600) 389 | if datetime.utcnow() - last_clean > timedelta(hours=1): 390 | self._clean_free_crawlers() 391 | last_clean = datetime.utcnow() 392 | if not data: 393 | continue 394 | try: 395 | data = json.loads(data[1]) 396 | except json.JSONDecodeError: 397 | logger.error(f'CrawlerHandler: received corrupt data "{data[1]}"') 398 | continue 399 | if not isinstance(data, dict): 400 | logger.error(f'CrawlerHandler: data type should be dict, data: "{data}"') 401 | continue 402 | crawl_type = data.get('type') 403 | oj_name = data.get('oj_name') 404 | if crawl_type not in ('problem', 'contest'): 405 | logger.error(f'Unsupported crawl_type: {crawl_type}') 406 | continue 407 | if oj_name not in self._normal_accounts and oj_name not in self._contest_accounts: 408 | logger.error(f'Unsupported oj_name: {oj_name}') 409 | continue 410 | if oj_name not in self._queues: 411 | self._queues[oj_name] = Queue() 412 | crawl_queue = self._queues.get(oj_name) 413 | if oj_name not in self._running_crawlers: 414 | if not self._start_new_crawlers(oj_name, crawl_queue): 415 | logger.error(f'Cannot start client for {oj_name}') 416 | continue 417 | assert oj_name in self._running_crawlers 418 | if crawl_type == 'problem': 419 | crawl_all = data.get('all') 420 | problem_id = data.get('problem_id') 421 | if crawl_all is not True: 422 | crawl_all = False 423 | if not crawl_all and problem_id is None: 424 | logger.error('Missing crawl_params: problem_id') 425 | continue 426 | data = {'type': 'problem'} 427 | if not crawl_all: 428 | data['problem_id'] = problem_id 429 | crawl_queue.put(data) 430 | elif crawl_type == 'contest': 431 | crawl_queue.put({'type': 'contest'}) 432 | 433 | def _start_new_crawlers(self, oj_name, crawl_queue): 434 | crawler_info = {'crawlers': {}} 435 | crawlers = crawler_info.get('crawlers') 436 | accounts = {} 437 | if oj_name in self._normal_accounts: 438 | accounts = self._normal_accounts[oj_name] 439 | if oj_name in self._contest_accounts: 440 | accounts = self._contest_accounts[oj_name] 441 | for auth in accounts: 442 | try: 443 | crawler = PageCrawler(get_client_by_oj_name(oj_name, auth), crawl_queue, daemon=True) 444 | except exceptions.JudgeException as e: 445 | logger.error(f'Create crawler failed, name: {oj_name}, user_id: {auth[0]}, reason: {e}') 446 | continue 447 | crawler.start() 448 | crawlers[auth[0]] = crawler 449 | if not crawlers: 450 | return False 451 | crawler_info['start_time'] = datetime.utcnow() 452 | self._running_crawlers[oj_name] = crawler_info 453 | return True 454 | 455 | def _clean_free_crawlers(self): 456 | free_clients = [] 457 | for oj_name in self._running_crawlers: 458 | crawler_info = self._running_crawlers[oj_name] 459 | if datetime.utcnow() - crawler_info['start_time'] > timedelta(hours=1): 460 | free_clients.append(oj_name) 461 | for oj_name in free_clients: 462 | crawler_info = self._running_crawlers[oj_name] 463 | crawlers = crawler_info.get('crawlers') 464 | for user_id in crawlers: 465 | crawler = crawlers.get(user_id) 466 | crawler.stop() 467 | self._stopping_crawlers.add(crawler) 468 | self._running_crawlers.pop(oj_name) 469 | logger.info(f'No more task, stop all {oj_name} crawlers') 470 | stopped_crawlers = [] 471 | for crawler in self._stopping_crawlers: 472 | if not crawler.is_alive(): 473 | stopped_crawlers.append(crawler) 474 | for crawler in stopped_crawlers: 475 | self._stopping_crawlers.remove(crawler) 476 | logger.info('Cleaned free crawlers') 477 | logger.info(f'Running crawlers: {self._running_crawlers}') 478 | logger.info(f'Stopping crawlers: {self._stopping_crawlers}') 479 | 480 | 481 | class VJudge(object): 482 | def __init__(self, normal_accounts=None, contest_accounts=None): 483 | if not normal_accounts and not contest_accounts: 484 | logger.warning('Neither normal_accounts nor contest_accounts has available account, ' 485 | 'submitter and crawler will not work') 486 | self._normal_accounts = normal_accounts or {} 487 | self._contest_accounts = contest_accounts or {} 488 | 489 | @property 490 | def normal_accounts(self): 491 | return self._normal_accounts 492 | 493 | @property 494 | def contest_accounts(self): 495 | return self._contest_accounts 496 | 497 | def start(self): 498 | submitter_handle = SubmitterHandler(self._normal_accounts, self._contest_accounts, True) 499 | crawler_handle = CrawlerHandler(self._normal_accounts, self._contest_accounts, True) 500 | submitter_handle.start() 501 | crawler_handle.start() 502 | submitter_handle.join() 503 | crawler_handle.join() 504 | -------------------------------------------------------------------------------- /vjudge/models.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from sqlalchemy import Column, Integer, Boolean, String, DateTime, UniqueConstraint 3 | 4 | from . import db 5 | 6 | 7 | class Submission(db.Model): 8 | __tablename__ = 'submissions' 9 | id = Column(Integer, primary_key=True) 10 | user_id = Column(String, index=True) 11 | oj_name = Column(String, nullable=False) 12 | problem_id = Column(String, nullable=False) 13 | language = Column(String, nullable=False) 14 | source_code = Column(String, nullable=False) 15 | run_id = Column(String) 16 | verdict = Column(String, default='Queuing') 17 | exe_time = Column(Integer) 18 | exe_mem = Column(Integer) 19 | time_stamp = Column(DateTime, default=datetime.utcnow) 20 | 21 | def to_json(self): 22 | submission_json = { 23 | 'id': self.id, 24 | 'oj_name': self.oj_name, 25 | 'problem_id': self.problem_id, 26 | 'verdict': self.verdict, 27 | 'exe_time': self.exe_time, 28 | 'exe_mem': self.exe_mem 29 | } 30 | return submission_json 31 | 32 | def __repr__(self): 33 | return (f'') 35 | 36 | 37 | class Problem(db.Model): 38 | __tablename__ = 'problems' 39 | oj_name = Column(String, primary_key=True, index=True) 40 | problem_id = Column(String, primary_key=True, index=True) 41 | last_update = Column(DateTime, nullable=False) 42 | title = Column(String) 43 | description = Column(String) 44 | input = Column(String) 45 | output = Column(String) 46 | sample_input = Column(String) 47 | sample_output = Column(String) 48 | time_limit = Column(Integer) 49 | mem_limit = Column(Integer) 50 | 51 | def to_json(self): 52 | problem_json = { 53 | 'oj_name': self.oj_name, 54 | 'problem_id': self.problem_id, 55 | 'last_update': self._to_timestamp(self.last_update), 56 | 'title': self.title, 57 | 'description': self.description, 58 | 'input': self.input, 59 | 'output': self.output, 60 | 'sample_input': self.sample_input, 61 | 'sample_output': self.sample_output, 62 | 'time_limit': self.time_limit, 63 | 'mem_limit': self.mem_limit 64 | } 65 | return problem_json 66 | 67 | def summary(self): 68 | summary_json = { 69 | 'oj_name': self.oj_name, 70 | 'problem_id': self.problem_id, 71 | 'title': self.title, 72 | } 73 | return summary_json 74 | 75 | @staticmethod 76 | def _to_timestamp(dt): 77 | dt = datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, tzinfo=timezone.utc) 78 | return dt.timestamp() 79 | 80 | def __repr__(self): 81 | return f'' 82 | 83 | 84 | class Contest(db.Model): 85 | __tablename__ = 'contests' 86 | oj_name = Column(String, primary_key=True) 87 | site = Column(String, nullable=False) 88 | contest_id = Column(String, nullable=False) 89 | title = Column(String, default='') 90 | public = Column(Boolean, default=False) 91 | status = Column(String, default='Pending') 92 | start_time = Column(DateTime, default=datetime.utcfromtimestamp(0)) 93 | end_time = Column(DateTime, default=datetime.utcfromtimestamp(0)) 94 | 95 | __table_args__ = (UniqueConstraint('site', 'contest_id', name='_site_contest_id_uc'),) 96 | 97 | def to_json(self): 98 | contest_json = { 99 | 'oj_name': self.oj_name, 100 | 'site': self.site, 101 | 'contest_id': self.contest_id, 102 | 'title': self.title, 103 | 'public': self.public, 104 | 'status': self.status, 105 | 'start_time': self._to_timestamp(self.start_time), 106 | 'end_time': self._to_timestamp(self.end_time), 107 | } 108 | return contest_json 109 | 110 | @staticmethod 111 | def _to_timestamp(dt): 112 | dt = datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, tzinfo=timezone.utc) 113 | return dt.timestamp() 114 | 115 | def __repr__(self): 116 | return (f'') 118 | -------------------------------------------------------------------------------- /vjudge/site/__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | from .hdu import * 3 | from .scu import * 4 | from . import exceptions 5 | 6 | __all__ = hdu.__all__ + scu.__all__ 7 | 8 | supported_sites = ('scu', 'hdu') 9 | supported_contest_sites = ('hdu',) 10 | 11 | normal_clients = {'scu': SOJClient, 'hdu': HDUClient} 12 | contest_clients = {'hdu': HDUContestClient} 13 | 14 | 15 | def get_normal_client(site, auth=None): 16 | if site not in supported_sites: 17 | raise exceptions.JudgeException(f'Site "{site}" is not supported') 18 | return normal_clients[site](auth) 19 | 20 | 21 | def get_contest_client(site, auth=None, contest_id=None): 22 | if site not in supported_contest_sites: 23 | raise exceptions.JudgeException(f'Site "{site}" is not supported') 24 | return contest_clients[site](auth, contest_id) 25 | 26 | 27 | def get_client_by_oj_name(name, auth=None): 28 | res = re.match(r'^(.*?)_ct_([0-9]+)$', name) 29 | if res: 30 | site, contest_id = res.groups() 31 | return get_contest_client(site, auth, contest_id) 32 | else: 33 | return get_normal_client(name, auth) 34 | -------------------------------------------------------------------------------- /vjudge/site/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import abstractmethod, ABC 3 | 4 | import requests 5 | 6 | from config import get_header 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | 11 | class BaseClient(ABC): 12 | def __init__(self): 13 | self._session = requests.session() 14 | self._session.headers.update(get_header()) 15 | 16 | @abstractmethod 17 | def get_name(self): 18 | pass 19 | 20 | @abstractmethod 21 | def get_user_id(self): 22 | pass 23 | 24 | @abstractmethod 25 | def get_client_type(self): 26 | pass 27 | 28 | @abstractmethod 29 | def login(self, username, password): 30 | pass 31 | 32 | @abstractmethod 33 | def check_login(self): 34 | pass 35 | 36 | @abstractmethod 37 | def update_cookies(self): 38 | pass 39 | 40 | @abstractmethod 41 | def get_problem(self, problem_id): 42 | pass 43 | 44 | @abstractmethod 45 | def get_problem_list(self): 46 | pass 47 | 48 | @abstractmethod 49 | def submit_problem(self, problem_id, language, source_code): 50 | pass 51 | 52 | @abstractmethod 53 | def get_submit_status(self, run_id, **kwargs): 54 | pass 55 | 56 | 57 | class ContestInfo(object): 58 | def __init__(self, site, contest_id, title='', public=True, status='Pending', 59 | start_time=0, end_time=0, problem_list=None): 60 | self.site = site 61 | self.contest_id = contest_id 62 | self.title = title 63 | self.public = public 64 | self.status = status 65 | self.start_time = start_time 66 | self.end_time = end_time 67 | self.problem_list = problem_list or [] 68 | 69 | def to_json(self): 70 | contest_json = { 71 | 'site': self.site, 72 | 'contest_id': self.contest_id, 73 | 'title': self.title, 74 | 'public': self.public, 75 | 'status': self.status, 76 | 'start_time': self.start_time, 77 | 'end_time': self.end_time, 78 | 'problem_list': self.problem_list 79 | } 80 | return contest_json 81 | 82 | def __repr__(self): 83 | return (f'') 85 | 86 | 87 | class ContestClient(ABC): 88 | def __init__(self): 89 | pass 90 | 91 | @abstractmethod 92 | def get_contest_id(self): 93 | pass 94 | 95 | @abstractmethod 96 | def get_contest_info(self): 97 | pass 98 | 99 | @abstractmethod 100 | def refresh_contest_info(self): 101 | pass 102 | 103 | @classmethod 104 | @abstractmethod 105 | def get_recent_contest(cls): 106 | pass 107 | -------------------------------------------------------------------------------- /vjudge/site/exceptions.py: -------------------------------------------------------------------------------- 1 | class JudgeException(Exception): 2 | pass 3 | 4 | 5 | class ConnectionError(JudgeException): 6 | pass 7 | 8 | 9 | class LoginError(JudgeException): 10 | pass 11 | 12 | 13 | class UserNotExist(LoginError): 14 | pass 15 | 16 | 17 | class PasswordError(LoginError): 18 | pass 19 | 20 | 21 | class LoginRequired(JudgeException): 22 | pass 23 | 24 | 25 | class SubmitError(JudgeException): 26 | pass 27 | -------------------------------------------------------------------------------- /vjudge/site/hdu/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import * 2 | 3 | __all__ = client.__all__ 4 | -------------------------------------------------------------------------------- /vjudge/site/hdu/client.py: -------------------------------------------------------------------------------- 1 | import re 2 | from abc import abstractmethod 3 | from datetime import datetime, timedelta, timezone 4 | from urllib.parse import urljoin 5 | 6 | import requests 7 | from bs4 import BeautifulSoup 8 | from bs4.element import NavigableString 9 | 10 | from .. import exceptions 11 | from ..base import BaseClient, ContestClient, ContestInfo 12 | 13 | __all__ = ('HDUClient', 'HDUContestClient') 14 | 15 | BASE_URL = 'http://acm.hdu.edu.cn' 16 | 17 | LANG_ID = {'G++': '0', 'GCC': '1', 'C++': '2', 18 | 'C': '3', 'Pascal': '4', 'Java': '5', 'C#': '6'} 19 | 20 | PAGE_TITLES = {'Problem Description': 'description', 'Input': 'input', 'Output': 'output', 21 | 'Sample Input': 'sample_input', 'Sample Output': 'sample_output'} 22 | 23 | 24 | class _UniClient(BaseClient): 25 | def __init__(self, auth=None, client_type='practice', contest_id='0', timeout=5): 26 | super().__init__() 27 | self.auth = auth 28 | self.client_type = client_type 29 | self.contest_id = contest_id 30 | self.timeout = timeout 31 | if auth is not None: 32 | self.username, self.password = auth 33 | self.login(self.username, self.password) 34 | 35 | @abstractmethod 36 | def get_name(self): 37 | pass 38 | 39 | def get_user_id(self): 40 | if self.auth is None: 41 | raise exceptions.LoginRequired('Login is required') 42 | return self.username 43 | 44 | def get_client_type(self): 45 | return self.client_type 46 | 47 | def login(self, username, password): 48 | url = self._get_login_url() 49 | data = { 50 | 'login': 'Sign in', 51 | 'username': username, 52 | 'userpass': password 53 | } 54 | try: 55 | self._request_url('post', url, data=data) 56 | except exceptions.LoginRequired: 57 | raise exceptions.LoginError('User not exist or wrong password') 58 | self.auth = (username, password) 59 | self.username = username 60 | self.password = password 61 | 62 | @abstractmethod 63 | def check_login(self): 64 | pass 65 | 66 | def update_cookies(self): 67 | if self.auth is None: 68 | raise exceptions.LoginRequired('Login is required') 69 | self.login(self.username, self.password) 70 | 71 | def get_problem(self, problem_id): 72 | url = self._get_problem_url(problem_id) 73 | resp = self._request_url('get', url) 74 | return self._parse_problem(resp) 75 | 76 | @abstractmethod 77 | def get_problem_list(self): 78 | pass 79 | 80 | def submit_problem(self, problem_id, language, source_code): 81 | if self.auth is None: 82 | raise exceptions.LoginRequired('Login is required') 83 | if language not in LANG_ID: 84 | raise exceptions.SubmitError(f'Language "{language}" is not supported') 85 | if self.client_type == 'contest': 86 | source_code = self.__class__._encode_source_code(source_code) 87 | data = { 88 | 'problemid': problem_id, 89 | 'language': LANG_ID[language], 90 | 'usercode': source_code 91 | } 92 | if self.client_type == 'contest': 93 | data['submit'] = 'Submit' 94 | else: 95 | data['check'] = '0' 96 | url = self._get_submit_url() 97 | resp = self._request_url('post', url, data=data) 98 | if re.search('Code length is improper', resp): 99 | raise exceptions.SubmitError('Code length is too short') 100 | if re.search("Please don't re-submit in 5 seconds, thank you.", resp): 101 | raise exceptions.SubmitError('Submit too frequently') 102 | if not re.search('Realtime Status', resp): 103 | raise exceptions.SubmitError('Submit failed unexpectedly') 104 | url = self._get_status_url(problem_id=problem_id, user_id=self.username) 105 | resp = self._request_url('get', url) 106 | try: 107 | tables = BeautifulSoup(resp, 'lxml').find_all('table') 108 | tables.reverse() 109 | pattern = re.compile(r'Run ID.*Judge Status.*Author', re.DOTALL) 110 | table = next(filter(lambda x: re.search(pattern, str(x)), tables)) 111 | tag = table.find('tr', align="center") 112 | run_id = tag.find('td').text.strip() 113 | except (AttributeError, StopIteration): 114 | raise exceptions.SubmitError('Submit failed unexpectedly') 115 | return run_id 116 | 117 | def get_submit_status(self, run_id, **kwargs): 118 | user_id = kwargs.get('user_id', '') 119 | problem_id = kwargs.get('problem_id', '') 120 | url = self._get_status_url(run_id=run_id, problem_id=problem_id, user_id=user_id) 121 | resp = self._request_url('get', url) 122 | result = self.__class__._find_verdict(resp, run_id) 123 | if result is not None: 124 | return result 125 | if self.client_type == 'contest': 126 | for page in range(2, 5): 127 | status_url = url + f'&page={page}' 128 | resp = self._request_url('get', status_url) 129 | result = self.__class__._find_verdict(resp, run_id) 130 | if result is not None: 131 | return result 132 | 133 | def _request_url(self, method, url, data=None, timeout=None): 134 | if timeout is None: 135 | timeout = self.timeout 136 | try: 137 | r = self._session.request(method, url, data=data, timeout=timeout) 138 | except requests.exceptions.RequestException: 139 | raise exceptions.ConnectionError(f'Request "{url}" failed') 140 | if re.search('Sign In Your Account', r.text): 141 | raise exceptions.LoginRequired('Login is required') 142 | return r.text 143 | 144 | def _get_login_url(self): 145 | login_url = f'{BASE_URL}/userloginex.php?action=login' 146 | if self.client_type == 'contest': 147 | login_url += f'&cid={self.contest_id}¬ice=0' 148 | return login_url 149 | 150 | def _get_submit_url(self): 151 | if self.client_type == 'contest': 152 | return f'{BASE_URL}/contests/contest_submit.php?action=submit&cid={self.contest_id}' 153 | else: 154 | return f'{BASE_URL}/submit.php?action=submit' 155 | 156 | def _get_status_url(self, run_id='', problem_id='', user_id=''): 157 | if self.client_type == 'contest': 158 | return (f'{BASE_URL}/contests/contest_status.php?' 159 | f'cid={self.contest_id}&pid={problem_id}&user={user_id}&lang=0&status=0') 160 | else: 161 | return f'{BASE_URL}/status.php?first={run_id}&pid={problem_id}&user={user_id}&lang=0&status=0' 162 | 163 | def _get_problem_url(self, problem_id): 164 | if self.client_type == 'contest': 165 | return f'{BASE_URL}/contests/contest_showproblem.php?pid={problem_id}&cid={self.contest_id}' 166 | else: 167 | return f'{BASE_URL}/showproblem.php?pid={problem_id}' 168 | 169 | def _parse_problem(self, text): 170 | result = {} 171 | pattern = re.compile((r'Time Limit:.*?[0-9]*/([0-9]*).*?MS.*?\(Java/Others\).*?' 172 | 'Memory Limit:.*?[0-9]*/([0-9]*).*?K.*?\(Java/Others\)')) 173 | # find time limit and mem limit 174 | limit = re.search(pattern, text) 175 | if limit: 176 | result['time_limit'] = limit.group(1) 177 | result['mem_limit'] = limit.group(2) 178 | soup = BeautifulSoup(text, 'lxml') 179 | # replace relative url 180 | img_tags = soup.find_all('img') 181 | for tag in img_tags: 182 | if hasattr(tag, 'src'): 183 | img_url = urljoin(self._get_problem_url(''), tag['src']) 184 | tag['src'] = img_url 185 | if soup.h1: 186 | result['title'] = soup.h1.text 187 | if result['title'] == 'System Message': 188 | return 189 | tags = soup.find_all('div', 'panel_title', align='left') 190 | for t in tags: 191 | title = t.string 192 | if title in PAGE_TITLES: 193 | tag = t.next_sibling 194 | limit = 0 195 | while tag and type(tag) is NavigableString and limit < 3: 196 | tag = tag.next_sibling 197 | limit += 1 198 | if tag is None or type(tag) is NavigableString: 199 | continue 200 | res = re.match('(.*)$', str(tag), re.DOTALL) 201 | if res: 202 | result[PAGE_TITLES[title]] = res.group(1) 203 | return result 204 | 205 | @staticmethod 206 | def _find_verdict(text, run_id): 207 | soup = BeautifulSoup(text, 'lxml') 208 | tables = soup.find_all('table') 209 | tables.reverse() 210 | try: 211 | pattern = re.compile(r'Run ID.*Judge Status.*Author', re.DOTALL) 212 | table = next(filter(lambda x: re.search(pattern, str(x)), tables)) 213 | except StopIteration: 214 | return 215 | tags = table.find_all('tr', align="center") 216 | for tag in tags: 217 | result = [x.text.strip() for x in tag.find_all('td')] 218 | if len(result) < 6: 219 | continue 220 | if result[0] == run_id: 221 | verdict = result[2] 222 | try: 223 | exe_time = int(result[4].replace('MS', '')) 224 | exe_mem = int(result[5].replace('K', '')) 225 | except ValueError: 226 | continue 227 | if re.search('Runtime Error', verdict): 228 | verdict = 'Runtime Error' 229 | return verdict, exe_time, exe_mem 230 | 231 | @staticmethod 232 | def _encode_source_code(code): 233 | from urllib import parse 234 | import base64 235 | return base64.b64encode(parse.quote(code).encode('utf-8')).decode('utf-8') 236 | 237 | 238 | class HDUClient(_UniClient): 239 | def __init__(self, auth=None, **kwargs): 240 | super().__init__(auth, **kwargs) 241 | self.name = 'hdu' 242 | 243 | def get_name(self): 244 | return self.name 245 | 246 | def check_login(self): 247 | url = BASE_URL + '/control_panel.php' 248 | try: 249 | self._request_url('get', url) 250 | except exceptions.LoginRequired: 251 | return False 252 | return True 253 | 254 | def get_problem_list(self): 255 | url = f'{BASE_URL}/listproblem.php' 256 | resp = self._request_url('get', url) 257 | vols = set(re.findall(r'listproblem.php\?vol=([0-9]+)', resp)) 258 | vols = [int(x) for x in vols] 259 | vols.sort() 260 | result = [] 261 | for vol in vols: 262 | ex_url = url + f'?vol={vol}' 263 | vol += 1 264 | try: 265 | resp = self._request_url('get', ex_url) 266 | except exceptions.ConnectionError: 267 | break 268 | ids = self.__class__._parse_problem_id(resp) 269 | result += ids 270 | result.sort() 271 | return result 272 | 273 | @staticmethod 274 | def _parse_problem_id(text): 275 | pattern = re.compile(r'p\([^,()]+?,([^,()]+?)(,[^,()]+?){4}\);', re.DOTALL) 276 | res = re.findall(pattern, text) 277 | return [x[0] for x in res] 278 | 279 | 280 | class HDUContestClient(_UniClient, ContestClient): 281 | def __init__(self, auth=None, contest_id=None, **kwargs): 282 | timeout = kwargs.get('timeout', 5) 283 | if contest_id is None: 284 | raise exceptions.JudgeException('You must specific a contest id') 285 | super().__init__(auth, 'contest', str(contest_id), timeout) 286 | self.name = f'hdu_ct_{contest_id}' 287 | self._contest_info = ContestInfo('hdu', self.contest_id) 288 | self.refresh_contest_info() 289 | 290 | def get_name(self): 291 | return self.name 292 | 293 | def get_contest_id(self): 294 | return self.contest_id 295 | 296 | def check_login(self): 297 | raise NotImplementedError 298 | 299 | def get_contest_info(self): 300 | return self._contest_info 301 | 302 | def get_problem_list(self): 303 | return self._contest_info.problem_list 304 | 305 | def get_problem(self, problem_id): 306 | if not self._contest_info.public and self.auth is None: 307 | raise exceptions.LoginRequired('Login is required') 308 | return super().get_problem(problem_id) 309 | 310 | def submit_problem(self, problem_id, language, source_code): 311 | self.refresh_contest_info() 312 | if self._contest_info.status == 'Pending': 313 | raise exceptions.SubmitError('Contest has not begun') 314 | if self._contest_info.status == 'Ended': 315 | raise exceptions.SubmitError('Contest is ended') 316 | return super().submit_problem(problem_id, language, source_code) 317 | 318 | def get_submit_status(self, run_id, **kwargs): 319 | if not self._contest_info.public and self.auth is None: 320 | raise exceptions.LoginRequired('Login is required') 321 | return super().get_submit_status(run_id, **kwargs) 322 | 323 | def refresh_contest_info(self): 324 | url = f'{BASE_URL}/contests/contest_show.php?cid={self.contest_id}' 325 | resp = self._request_url('get', url) 326 | if re.search(r'System Message', resp): 327 | raise exceptions.ConnectionError(f'Contest {self.contest_id} not exists') 328 | self._contest_info.problem_list = self.__class__._parse_problem_id(resp) 329 | soup = BeautifulSoup(resp, 'lxml') 330 | h1 = self._contest_info.title = soup.h1 331 | if h1: 332 | self._contest_info.title = h1.get_text() 333 | divs = soup.find_all('div') 334 | divs.reverse() 335 | try: 336 | pattern = re.compile(r'Start.*Time.*Contest.*Type.*Contest.*Status', re.DOTALL) 337 | div = next(filter(lambda x: re.search(pattern, str(x)), divs)) 338 | except StopIteration: 339 | return 340 | pattern = re.compile( 341 | r'Start *?Time *?: *?([0-9]{4})-([0-9]{2})-([0-9]{2}) *?([0-9]{2}):([0-9]{2}):([0-9]{2}).*?' 342 | r'End *?Time *?: *?([0-9]{4})-([0-9]{2})-([0-9]{2}) *?([0-9]{2}):([0-9]{2}):([0-9]{2}).*?' 343 | r'Contest *?Type *?:(.*?)Contest *?Status.*?:(.*?)Current.*?Server.*?Time', 344 | re.DOTALL) 345 | res = re.search(pattern, div.get_text()) 346 | if res: 347 | res = [x.strip() for x in res.groups()] 348 | self._contest_info.start_time = self._to_timestamp(res[0:6]) 349 | self._contest_info.end_time = self._to_timestamp(res[6:12]) 350 | self._contest_info.public = res[12] == 'Public' 351 | self._contest_info.status = res[13] 352 | 353 | @classmethod 354 | def get_recent_contest(cls): 355 | from ..base import get_header 356 | session = requests.session() 357 | session.headers.update(get_header()) 358 | url = f'{BASE_URL}/contests/contest_list.php' 359 | try: 360 | r = session.get(url, timeout=5) 361 | except requests.exceptions.RequestException: 362 | return [] 363 | soup = BeautifulSoup(r.text, 'lxml') 364 | table = soup.find('table', 'table_text') 365 | if table is None: 366 | return [] 367 | tags = table.find_all('tr', align='center') 368 | result = [] 369 | for tag in tags: 370 | tds = tag.find_all('td') 371 | tds = [x.text.strip() for x in tds] 372 | if len(tds) < 6: 373 | continue 374 | contest_info = ContestInfo('hdu', contest_id=tds[0], title=tds[1], status=tds[4]) 375 | r = re.search('([0-9]{4})-([0-9]{2})-([0-9]{2}) *?([0-9]{2}):([0-9]{2}):([0-9]{2})', tds[2]) 376 | if r: 377 | contest_info.start_time = cls._to_timestamp(r.groups()) 378 | if tds[3] != 'Public': 379 | contest_info.public = False 380 | result.append(contest_info) 381 | return result 382 | 383 | @staticmethod 384 | def _parse_problem_id(text): 385 | res = [] 386 | soup = BeautifulSoup(text, 'lxml') 387 | tables = soup.find_all('table') 388 | try: 389 | pattern = re.compile(r'Solved.*Title.*Ratio', re.DOTALL) 390 | table = next(filter(lambda x: re.search(pattern, str(x)), tables)) 391 | except StopIteration: 392 | return res 393 | tags = table.find_all('tr', align="center") 394 | for tag in tags: 395 | tds = [x.text for x in tag.find_all('td')] 396 | if len(tds) >= 2: 397 | res.append(tds[1]) 398 | return res 399 | 400 | @staticmethod 401 | def _to_timestamp(d): 402 | try: 403 | d = [int(x) for x in d] 404 | except ValueError: 405 | return 0 406 | 407 | utc = datetime(*d, tzinfo=timezone.utc) - timedelta(hours=8) 408 | return utc.timestamp() 409 | -------------------------------------------------------------------------------- /vjudge/site/scu/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import * 2 | 3 | __all__ = client.__all__ 4 | -------------------------------------------------------------------------------- /vjudge/site/scu/captcha.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sleepymole/vjudge-core/cecfafd1ac7704477f0bb1382c6f5a4d561a01d5/vjudge/site/scu/captcha.db -------------------------------------------------------------------------------- /vjudge/site/scu/client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sqlite3 4 | 5 | import requests 6 | from bs4 import BeautifulSoup 7 | 8 | from .. import exceptions 9 | from ..base import BaseClient 10 | 11 | __all__ = ('SOJClient',) 12 | 13 | base_url = 'http://acm.scu.edu.cn/soj' 14 | base_dir = os.path.abspath(os.path.dirname(__file__)) 15 | db = sqlite3.connect(os.path.join(base_dir, 'captcha.db'), check_same_thread=False) 16 | 17 | 18 | class SOJClient(BaseClient): 19 | def __init__(self, auth=None, **kwargs): 20 | super().__init__() 21 | self.auth = auth 22 | self.name = 'scu' 23 | self.client_type = 'practice' 24 | self.timeout = kwargs.get('timeout', 5) 25 | if auth is not None: 26 | self.username, self.password = auth 27 | self.login(self.username, self.password) 28 | 29 | def get_name(self): 30 | return self.name 31 | 32 | def get_user_id(self): 33 | if self.auth is None: 34 | raise exceptions.LoginRequired('Login is required') 35 | return self.username 36 | 37 | def get_client_type(self): 38 | return self.client_type 39 | 40 | def login(self, username, password): 41 | url = base_url + '/login.action' 42 | data = { 43 | 'back': 2, 44 | 'id': username, 45 | 'password': password, 46 | 'submit': 'login' 47 | } 48 | resp = self._request_url('post', url, data=data) 49 | if re.search('USER_NOT_EXIST', resp): 50 | raise exceptions.UserNotExist('User not exist') 51 | elif re.search('PASSWORD_ERROR', resp): 52 | raise exceptions.PasswordError('Password error') 53 | self.auth = (username, password) 54 | self.username = username 55 | self.password = password 56 | 57 | def check_login(self): 58 | url = f'{base_url}/update_user_form.action' 59 | resp = self._request_url('get', url) 60 | if re.search('Please login first', resp): 61 | return False 62 | return True 63 | 64 | def update_cookies(self): 65 | if self.auth is None: 66 | raise exceptions.LoginRequired 67 | self.login(self.username, self.password) 68 | 69 | def get_problem(self, problem_id): 70 | url = f'{base_url}/problem.action?id={problem_id}' 71 | resp = self._request_url('get', url) 72 | if re.search('No such problem', resp): 73 | return 74 | try: 75 | title = re.findall('{}: (.*?)'.format(problem_id), resp)[0] 76 | except IndexError: 77 | return 78 | return {'title': title} 79 | 80 | def get_problem_list(self): 81 | url = f'{base_url}/problems.action' 82 | resp = self._request_url('get', url) 83 | volume_list = [] 84 | try: 85 | table = BeautifulSoup(resp, 'lxml').find('table') 86 | tr = table.find('tr') 87 | tr = tr.find_next_sibling('tr') 88 | tags = tr.find_all('a') 89 | for tag in tags: 90 | r = re.search(r'\[(.*)\]', tag.text.strip()) 91 | volume_list.append(r.groups()[0]) 92 | except (AttributeError, IndexError): 93 | pass 94 | problem_list = [] 95 | for vol in volume_list: 96 | page_url = f'{url}?volume={vol}' 97 | resp = self._request_url('get', page_url) 98 | problem_list += self.__class__._parse_problem_id(resp) 99 | problem_list.sort() 100 | return problem_list 101 | 102 | def submit_problem(self, problem_id, language, source_code): 103 | if self.auth is None: 104 | raise exceptions.LoginRequired('Login is required') 105 | submit_url = f'{base_url}/submit.action' 106 | status_url = f'{base_url}/solutions.action?userId={self.username}&problemId={problem_id}' 107 | captcha = self._get_captcha() 108 | if captcha is None: 109 | raise exceptions.JudgeException('Can not find a valid captcha') 110 | data = { 111 | 'problemId': problem_id, 112 | 'validation': captcha, 113 | 'language': language, 114 | 'source': source_code, 115 | 'submit': 'Submit' 116 | } 117 | resp = self._request_url('post', submit_url, data=data) 118 | if re.search('ERROR', resp): 119 | if not self.check_login(): 120 | raise exceptions.LoginRequired('Login is required') 121 | else: 122 | raise exceptions.SubmitError('Submit failed unexpectedly') 123 | resp = self._request_url('get', status_url) 124 | soup = BeautifulSoup(resp, 'lxml') 125 | try: 126 | tag = soup.find_all('table')[1].find_all('tr')[1] 127 | run_id = next(tag.stripped_strings) 128 | except IndexError: 129 | raise exceptions.SubmitError 130 | return run_id 131 | 132 | def get_submit_status(self, run_id, **kwargs): 133 | status_url = f'{base_url}/solutions.action?from={run_id}' 134 | resp = self._request_url('get', status_url) 135 | try: 136 | soup = BeautifulSoup(resp, 'lxml') 137 | tag = soup.find_all('table')[1].find_all('tr')[1] 138 | col_tags = tag.find_all('td') 139 | result = [' '.join(x.stripped_strings) for x in col_tags[5:]] 140 | verdict, exe_time, exe_mem = result[0], int(result[1]), int(result[2]) 141 | return verdict, exe_time, exe_mem 142 | except (IndexError, ValueError): 143 | pass 144 | 145 | @staticmethod 146 | def _parse_problem_id(text): 147 | ids = [] 148 | table = BeautifulSoup(text, 'lxml').find('table') 149 | if not table: 150 | return ids 151 | trs = table.find_all('tr')[3:] 152 | for tr in trs: 153 | try: 154 | tds = tr.find_all('td') 155 | pid = tds[1].text.strip() 156 | int(pid) 157 | except (ValueError, IndexError): 158 | continue 159 | ids.append(pid) 160 | return ids 161 | 162 | def _request_url(self, method, url, data=None, timeout=None): 163 | if timeout is None: 164 | timeout = self.timeout 165 | try: 166 | r = self._session.request(method, url, data=data, timeout=timeout) 167 | except requests.exceptions.RequestException: 168 | raise exceptions.ConnectionError(f'Request "{url}" failed') 169 | return r.text 170 | 171 | def _get_captcha(self): 172 | url = os.path.join(base_url, 'validation_code') 173 | try: 174 | r = self._session.get(url, timeout=self.timeout) 175 | except requests.exceptions.RequestException: 176 | raise exceptions.ConnectionError(f'Request "{url}" failed') 177 | import hashlib 178 | h = hashlib.md5(r.content).hexdigest() 179 | cursor = db.cursor() 180 | cursor.execute("SELECT Code FROM Captcha WHERE Hash=?", (h,)) 181 | res = cursor.fetchall() 182 | try: 183 | return res[0][0] 184 | except IndexError: 185 | return 186 | --------------------------------------------------------------------------------