├── tests ├── __init__.py ├── api │ ├── __init__.py │ ├── _utils │ │ ├── __init__.py │ │ ├── asyncio.py │ │ └── database.py │ ├── _fixtures │ │ ├── __init__.py │ │ ├── config.json │ │ ├── grading_runs.py │ │ └── grading_configs.py │ ├── unit │ │ ├── test_utils.py │ │ └── test_daos.py │ ├── integration │ │ ├── test_worker.py │ │ └── test_client.py │ └── base.py ├── integration │ ├── __init__.py │ ├── base.py │ └── test_basic.py └── _fixtures │ ├── roster.json │ ├── test-course.json │ └── assignment.json ├── broadway ├── __init__.py ├── api │ ├── handlers │ │ ├── __init__.py │ │ ├── stream.py │ │ ├── base.py │ │ ├── worker_ws.py │ │ ├── worker.py │ │ └── client.py │ ├── utils │ │ ├── __init__.py │ │ ├── time.py │ │ ├── multiqueue.py │ │ ├── streamqueue.py │ │ ├── run.py │ │ └── bootstrap.py │ ├── decorators │ │ ├── __init__.py │ │ └── auth.py │ ├── models │ │ ├── base.py │ │ ├── course.py │ │ ├── grading_job_log.py │ │ ├── __init__.py │ │ ├── worker_node.py │ │ ├── assignment_config.py │ │ ├── grading_run.py │ │ └── grading_job.py │ ├── callbacks │ │ ├── __init__.py │ │ ├── job.py │ │ └── worker.py │ ├── daos │ │ ├── __init__.py │ │ ├── decorators.py │ │ ├── base.py │ │ ├── course.py │ │ ├── grading_job_log.py │ │ ├── assignment_config.py │ │ ├── grading_run.py │ │ ├── grading_job.py │ │ └── worker_node.py │ ├── __main__.py │ ├── definitions.py │ └── flags.py └── grader │ ├── api.py │ ├── __main__.py │ ├── flags.py │ ├── ws.py │ └── http.py ├── setup.cfg ├── requirements-test.txt ├── requirements.txt ├── ops ├── api.Dockerfile ├── grader.Dockerfile └── tests.Dockerfile ├── .travis.yml ├── docker-compose.test.yml ├── install-secrets-guard-hook.sh ├── LICENSE ├── .gitignore ├── README.md └── CONTRIBUTING.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /broadway/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/api/_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /broadway/api/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /broadway/api/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/api/_fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /broadway/api/decorators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /broadway/api/models/base.py: -------------------------------------------------------------------------------- 1 | class BaseModel: 2 | pass 3 | -------------------------------------------------------------------------------- /tests/_fixtures/roster.json: -------------------------------------------------------------------------------- 1 | { 2 | "students_env": [ 3 | { "NETID": "student-id" } 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | extend-ignore = E203 3 | max-line-length = 88 4 | exclude = .git/,venv/,logs/,test_logs/ 5 | -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | python-coveralls 4 | pytest-cov 5 | coverage 6 | websockets 7 | requests 8 | -------------------------------------------------------------------------------- /tests/api/_utils/asyncio.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | 4 | def to_sync(awaitable): 5 | return asyncio.get_event_loop().run_until_complete(awaitable) 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/illinois-cs241/chainlink@0.0.8 2 | git+https://github.com/illinois-cs241/flagset@0.0.3 3 | pymongo==3.9.0 4 | tornado==5.1.1 5 | Tornado-JSON==1.3.3 6 | jsonschema==2.6.0 7 | websockets==7.0 8 | -------------------------------------------------------------------------------- /tests/_fixtures/test-course.json: -------------------------------------------------------------------------------- 1 | { 2 | "test-course": { 3 | "tokens": [ 4 | "course-token" 5 | ], 6 | "query_tokens": [ 7 | "query-token" 8 | ] 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /broadway/api/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from broadway.api.callbacks.job import job_update_callback 2 | from broadway.api.callbacks.worker import worker_heartbeat_callback 3 | 4 | __all__ = ["job_update_callback", "worker_heartbeat_callback"] 5 | -------------------------------------------------------------------------------- /broadway/api/models/course.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | class Course: 5 | def __init__(self, id_: str, tokens: List[str] = [], query_tokens: List[str] = []): 6 | self.id = id_ 7 | self.tokens = tokens 8 | self.query_tokens = query_tokens 9 | -------------------------------------------------------------------------------- /tests/api/_fixtures/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "heartbeat_interval": 1, 3 | 4 | "logs": { 5 | "dir": "test_logs", 6 | "rotate": "midnight", 7 | "backup": 7 8 | }, 9 | 10 | "mongodb": { 11 | "primary": "__test", 12 | "logs": "__test_logs", 13 | "timeout": 1 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /ops/api.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG INSTALL_PATH=/opt/broadway 2 | 3 | FROM python:3.6.9-alpine 4 | 5 | ADD requirements.txt ${INSTALL_PATH} 6 | 7 | RUN apk add --no-cache git && \ 8 | pip install -r ${INSTALL_PATH}/requirements.txt 9 | 10 | ADD broadway ${INSTALL_PATH}/broadway 11 | 12 | ENV PYTHONPATH "${PYTHONPATH}:${INSTALL_PATH}" 13 | 14 | ENTRYPOINT ["python", "-m", "broadway.api"] 15 | CMD [] 16 | -------------------------------------------------------------------------------- /broadway/api/models/grading_job_log.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | 4 | class GradingJobLog: 5 | def __init__( 6 | self, 7 | job_id: str, 8 | id_: Optional[str] = None, 9 | stdout: Optional[str] = "", 10 | stderr: Optional[str] = "", 11 | ): 12 | self.id = id_ 13 | self.job_id = job_id 14 | self.stdout = stdout 15 | self.stderr = stderr 16 | -------------------------------------------------------------------------------- /tests/_fixtures/assignment.json: -------------------------------------------------------------------------------- 1 | { 2 | "student_pipeline": [ 3 | { 4 | "image": "alpine:3.5", 5 | "entrypoint": ["sleep", "2"] 6 | }, 7 | { 8 | "image": "alpine:3.5", 9 | "entrypoint": ["sh", "-c", "echo $NETID > /job/student"] 10 | }, 11 | { 12 | "image": "alpine:3.5", 13 | "entrypoint": ["cat", "/job/student"] 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /ops/grader.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6.9-alpine 2 | 3 | ARG INSTALL_PATH=/opt/broadway 4 | RUN mkdir -p ${INSTALL_PATH} 5 | 6 | ADD requirements.txt ${INSTALL_PATH} 7 | 8 | RUN apk add --no-cache git && \ 9 | pip install -r ${INSTALL_PATH}/requirements.txt 10 | 11 | ADD broadway ${INSTALL_PATH}/broadway 12 | 13 | ENV PYTHONPATH "${PYTHONPATH}:${INSTALL_PATH}" 14 | 15 | WORKDIR /srv/cs241/broadway-grader 16 | ENTRYPOINT ["python", "-m", "broadway.grader"] 17 | CMD [] 18 | -------------------------------------------------------------------------------- /ops/tests.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG INSTALL_PATH=/opt/broadway 2 | 3 | FROM python:3.6.9-alpine 4 | 5 | ADD requirements-test.txt ${INSTALL_PATH} 6 | 7 | # python3-dev, gcc, and build-base is required for building some python packages (typed-ast in particular) 8 | RUN apk add --no-cache git python3-dev gcc build-base && \ 9 | pip install -r ${INSTALL_PATH}/requirements-test.txt 10 | 11 | ADD tests ${INSTALL_PATH}/tests 12 | 13 | ENV PYTHONPATH "${PYTHONPATH}:${INSTALL_PATH}" 14 | 15 | ENTRYPOINT ["py.test", "-v", "tests/integration"] 16 | CMD [] 17 | 18 | -------------------------------------------------------------------------------- /broadway/api/daos/__init__.py: -------------------------------------------------------------------------------- 1 | from broadway.api.daos.assignment_config import AssignmentConfigDao 2 | from broadway.api.daos.course import CourseDao 3 | from broadway.api.daos.grading_job import GradingJobDao 4 | from broadway.api.daos.grading_job_log import GradingJobLogDao 5 | from broadway.api.daos.grading_run import GradingRunDao 6 | from broadway.api.daos.worker_node import WorkerNodeDao 7 | 8 | __all__ = [ 9 | "AssignmentConfigDao", 10 | "CourseDao", 11 | "GradingJobDao", 12 | "GradingJobLogDao", 13 | "GradingRunDao", 14 | "WorkerNodeDao", 15 | ] 16 | -------------------------------------------------------------------------------- /broadway/api/daos/decorators.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from broadway.api.daos.base import BaseDao 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | def validate_obj_size(func): 9 | def wrapper(*args, **kwargs): 10 | base_dao: BaseDao = args[0] 11 | obj = args[1] 12 | 13 | if base_dao.is_obj_size_valid(obj): 14 | return func(*args, **kwargs) 15 | 16 | logger.critical( 17 | "Bson document larger than the maximum bson size as specified by the mongo" 18 | "client. Not saving the object." 19 | ) 20 | return 21 | 22 | return wrapper 23 | -------------------------------------------------------------------------------- /broadway/api/models/__init__.py: -------------------------------------------------------------------------------- 1 | from broadway.api.models.assignment_config import AssignmentConfig 2 | from broadway.api.models.course import Course 3 | from broadway.api.models.grading_job import GradingJob, GradingJobType, GradingJobState 4 | from broadway.api.models.grading_job_log import GradingJobLog 5 | from broadway.api.models.grading_run import GradingRun, GradingRunState 6 | from broadway.api.models.worker_node import WorkerNode 7 | 8 | __all__ = [ 9 | "AssignmentConfig", 10 | "Course", 11 | "GradingJob", 12 | "GradingJobType", 13 | "GradingJobState", 14 | "GradingJobLog", 15 | "GradingRun", 16 | "GradingRunState", 17 | "WorkerNode", 18 | ] 19 | -------------------------------------------------------------------------------- /broadway/api/models/worker_node.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from datetime import datetime 3 | 4 | 5 | class WorkerNode: 6 | def __init__( 7 | self, 8 | id_: str, 9 | hostname: str, 10 | running_job_id: Optional[str] = None, 11 | last_seen: Optional[datetime] = None, 12 | jobs_processed: int = 0, 13 | is_alive: bool = True, 14 | use_ws: bool = False, 15 | ): 16 | self.id = id_ 17 | self.running_job_id = running_job_id 18 | self.last_seen = last_seen 19 | self.hostname = hostname 20 | self.jobs_processed = jobs_processed 21 | self.is_alive = is_alive 22 | self.use_ws = use_ws 23 | -------------------------------------------------------------------------------- /tests/api/_utils/database.py: -------------------------------------------------------------------------------- 1 | from broadway.api.models.course import Course 2 | from broadway.api.daos.course import CourseDao 3 | 4 | 5 | def initialize_db(settings, course_config): 6 | course_dao = CourseDao(settings) 7 | for course_id, course in course_config.items(): 8 | course = Course( 9 | id_=course_id, 10 | tokens=course["tokens"], 11 | query_tokens=course.get("query_tokens", []), 12 | ) 13 | course_dao.insert_or_update(course) 14 | 15 | 16 | def clear_db(settings): 17 | db = settings["DB"] 18 | config = settings["FLAGS"] 19 | db.drop_database(config["mongodb_primary"]) 20 | db.drop_database(config["mongodb_logs"]) 21 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # We need xenial otherwise we will get an ssl error with tornado install 2 | dist: xenial 3 | sudo: required 4 | language: python 5 | python: 6 | - "3.6" 7 | services: 8 | - mongodb 9 | - docker 10 | cache: pip 11 | branches: 12 | only: 13 | - /.*/ 14 | install: 15 | - pip install --upgrade pip 16 | - pip install -r requirements.txt 17 | - pip install -r requirements-test.txt 18 | script: 19 | - black --check broadway/ tests/ 20 | - flake8 --count --config=setup.cfg 21 | - py.test -v --cov=broadway/ tests/api --cov coveralls --cov-report term-missing --doctest-modules 22 | - docker-compose -f docker-compose.test.yml up --build --force-recreate --exit-code-from tests 23 | after_success: 24 | - coveralls 25 | -------------------------------------------------------------------------------- /tests/api/_fixtures/grading_runs.py: -------------------------------------------------------------------------------- 1 | one_student_job = {"students_env": [{"netid": "test net id"}]} 2 | one_student_and_pre = { 3 | "pre_processing_env": {"type": "pre"}, 4 | "students_env": [{"netid": "test net id"}], 5 | } 6 | one_student_and_post = { 7 | "students_env": [{"netid": "test net id"}], 8 | "post_processing_env": {"type": "post"}, 9 | } 10 | one_student_and_both = { 11 | "pre_processing_env": {"type": "pre"}, 12 | "students_env": [{"netid": "test net id"}], 13 | "post_processing_env": {"type": "post"}, 14 | } 15 | 16 | two_student_job = { 17 | "students_env": [{"netid": "student id 1"}, {"netid": "student id 2"}] 18 | } 19 | 20 | 21 | def generate_n_student_jobs(n): 22 | return {"students_env": [{"netid": "test net id"}] * n} 23 | -------------------------------------------------------------------------------- /broadway/api/models/assignment_config.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from broadway.api.models.base import BaseModel 4 | 5 | 6 | class AssignmentConfig(BaseModel): 7 | def __init__( 8 | self, 9 | id_: str, 10 | env: Optional[Dict[str, str]] = None, 11 | student_pipeline: List[Dict[str, Any]] = [], 12 | pre_processing_pipeline: Optional[List[Dict[str, Any]]] = None, 13 | post_processing_pipeline: Optional[List[Dict[str, Any]]] = None, 14 | ) -> None: 15 | self.id = id_ 16 | self.env = env 17 | self.student_pipeline = student_pipeline 18 | self.pre_processing_pipeline = pre_processing_pipeline 19 | self.post_processing_pipeline = post_processing_pipeline 20 | 21 | def to_dict(self): 22 | result = {} 23 | for var in vars(self): 24 | if var != "id": 25 | result[var] = getattr(self, var) 26 | return result 27 | -------------------------------------------------------------------------------- /broadway/api/__main__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import tornado.ioloop 4 | 5 | from broadway.api.utils.bootstrap import ( 6 | initialize_global_settings, 7 | initialize_database, 8 | initialize_logger, 9 | initialize_course_tokens, 10 | initialize_signal_handler, 11 | initialize_app, 12 | ) 13 | 14 | from broadway.api.flags import app_flags 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | def __main__(): 20 | flags = app_flags.parse() 21 | initialize_logger(flags) 22 | 23 | settings = initialize_global_settings(flags) 24 | 25 | initialize_database(settings, flags) 26 | initialize_course_tokens(settings, flags) 27 | initialize_signal_handler(settings, flags) 28 | initialize_app(settings, flags) 29 | 30 | logger.info("ready to serve") 31 | 32 | tornado.ioloop.IOLoop.current().start() 33 | 34 | tornado.ioloop.IOLoop.current().close(all_fds=True) 35 | settings["DB"].close() 36 | 37 | logger.info("shutted down") 38 | 39 | 40 | if __name__ == "__main__": 41 | __main__() 42 | -------------------------------------------------------------------------------- /broadway/api/daos/base.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import bson 4 | 5 | from pymongo import MongoClient 6 | from pymongo.collection import Collection 7 | 8 | 9 | class BaseDao: 10 | ID = "_id" 11 | 12 | def __init__(self, settings): 13 | self._config: dict = settings["FLAGS"] 14 | self._client: MongoClient = settings["DB"] 15 | 16 | def _get_log_collection(self, collection_name) -> Collection: 17 | return self._client[self._config["mongodb_logs"]][collection_name] 18 | 19 | def _get_primary_collection(self, collection_name) -> Collection: 20 | return self._client[self._config["mongodb_primary"]][collection_name] 21 | 22 | def _to_store(self, obj) -> dict: 23 | raise NotImplementedError("_to_store not implemented") 24 | 25 | def _from_store(self, obj) -> Optional["BaseDao"]: 26 | raise NotImplementedError("_from_store not implemented") 27 | 28 | def is_obj_size_valid(self, obj) -> bool: 29 | return len(bson.BSON.encode(self._to_store(obj))) <= self._client.max_bson_size 30 | -------------------------------------------------------------------------------- /broadway/api/utils/time.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | import time 3 | 4 | 5 | def get_time(): 6 | """ 7 | Get current time represented by a datetime object 8 | 9 | :rtype: dt.datetime 10 | :return: the datetime object representing the current time 11 | """ 12 | return dt.datetime.fromtimestamp(time.time()) 13 | 14 | 15 | def get_time_from_string(str_time, stamp_format="%Y-%m-%d %H:%M:%S"): 16 | """ 17 | Convert string representation of time to datetime using the specified format 18 | 19 | :param str_time: string representation of time in TIMESTAMP_FORMAT 20 | :rtype: dt.datetime 21 | :param stamp_format: the format to use to parse the string 22 | :type str_time: str 23 | :return: the datetime object representing the time 24 | """ 25 | return dt.datetime.strptime(str_time, stamp_format) 26 | 27 | 28 | def get_string_from_time(stamp_format="%Y-%m-%d %H:%M:%S"): 29 | """ 30 | Get the string representation of current time 31 | 32 | :type stamp_format: format to return the time in 33 | :rtype: str 34 | :return: string representation of current time with TIMESTAMP_FORMAT 35 | """ 36 | return get_time().strftime(stamp_format) 37 | -------------------------------------------------------------------------------- /docker-compose.test.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | mongodb: 4 | image: mongo:4.0-xenial 5 | 6 | api: 7 | build: 8 | context: . 9 | dockerfile: ops/api.Dockerfile 10 | environment: 11 | - BROADWAY_MONGODB_DSN=mongodb://mongodb:27017 12 | - BROADWAY_BIND_ADDR=api 13 | - BROADWAY_BIND_PORT=1470 14 | - BROADWAY_TOKEN=test-token 15 | ports: 16 | - 1470:1470 17 | depends_on: 18 | - mongodb 19 | 20 | # load test course config 21 | volumes: 22 | - ./tests/_fixtures/test-course.json:/tmp/course.json 23 | command: 24 | - --course-config=/tmp/course.json 25 | 26 | docker: 27 | image: docker:dind 28 | privileged: true 29 | command: dockerd --tls=false -H docker:1471 30 | 31 | grader: 32 | build: 33 | context: . 34 | dockerfile: ops/grader.Dockerfile 35 | command: 36 | - test-token 37 | - grd-01 38 | environment: 39 | - BROADWAY_API_HOST=ws://api:1470 40 | - DOCKER_HOST=tcp://docker:1471 41 | - BROADWAY_VERBOSE=true 42 | depends_on: 43 | - api 44 | - docker 45 | 46 | tests: 47 | build: 48 | context: . 49 | dockerfile: ops/tests.Dockerfile 50 | depends_on: 51 | - api 52 | - grader 53 | -------------------------------------------------------------------------------- /tests/integration/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import unittest 4 | 5 | 6 | class BaseTest(unittest.TestCase): 7 | def get_api_host(self): 8 | return "http://api:1470" 9 | 10 | def get_token(self): 11 | return "course-token" 12 | 13 | def get_default_headers(self): 14 | return {"Authorization": "Bearer {}".format(self.get_token())} 15 | 16 | def api_get(self, endpoint, *args, **kwargs): 17 | if "headers" not in kwargs: 18 | # use a correct header if not otherwise overwritten 19 | kwargs["headers"] = self.get_default_headers() 20 | 21 | return requests.get( 22 | "{}{}".format(self.get_api_host(), endpoint), *args, **kwargs 23 | ) 24 | 25 | def api_post(self, endpoint, data, *args, **kwargs): 26 | if "headers" not in kwargs: 27 | # use a correct header if not otherwise overwritten 28 | kwargs["headers"] = self.get_default_headers() 29 | 30 | return requests.post( 31 | "{}{}".format(self.get_api_host(), endpoint), 32 | *args, 33 | data=json.dumps(data), 34 | **kwargs 35 | ) 36 | 37 | def assertResponse(self, resp, status): 38 | self.assertEqual( 39 | resp.status_code, 40 | status, 41 | "assert status code for request to {}: {}".format(resp.url, resp.text), 42 | ) 43 | -------------------------------------------------------------------------------- /broadway/grader/api.py: -------------------------------------------------------------------------------- 1 | # API keys 2 | AUTH = "Authorization" 3 | HOSTNAME = "hostname" 4 | HEARTBEAT = "heartbeat" 5 | GRADING_JOB_ID = "grading_job_id" 6 | RESULTS = "results" 7 | SUCCESS = "success" 8 | LOGS = "logs" 9 | STAGES = "stages" 10 | ENV = "env" 11 | 12 | # API endpoints 13 | HEARTBEAT_ENDPOINT = "/api/v1/heartbeat" 14 | GRADING_JOB_ENDPOINT = "/api/v1/grading_job" 15 | GRADER_REGISTER_ENDPOINT = "/api/v1/worker" 16 | WORKER_WS_ENDPOINT = "/api/v1/worker_ws" 17 | 18 | SUCCESS_CODE = 200 19 | QUEUE_EMPTY_CODE = 498 20 | JOB_POLL_INTERVAL = 5 21 | HEARTBEAT_INTERVAL = 10 22 | 23 | GRADING_STAGE_DEF = { 24 | "type": ["object", "null"], 25 | "properties": { 26 | "image": {"type": "string"}, 27 | "env": {"type": "object"}, 28 | "entrypoint": {"type": "array", "items": {"type": "string"}}, 29 | "networking": {"type": "boolean"}, 30 | "privileged": {"type": "boolean"}, 31 | "hostname": {"type": "string"}, 32 | "timeout": {"type": "number"}, 33 | "memory": {"type": "string"}, 34 | "cpuset_cpus": {"type": "string"}, 35 | }, 36 | "required": ["image"], 37 | "additionalProperties": False, 38 | } 39 | 40 | GRADING_JOB_DEF = { 41 | "type": "object", 42 | "properties": { 43 | GRADING_JOB_ID: {"type": "string"}, 44 | STAGES: {"type": "array", "items": GRADING_STAGE_DEF}, 45 | }, 46 | "required": [GRADING_JOB_ID, STAGES], 47 | "additionalProperties": False, 48 | } 49 | -------------------------------------------------------------------------------- /broadway/api/models/grading_run.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Dict, List, Optional 3 | 4 | from broadway.api.models.base import BaseModel 5 | 6 | 7 | class GradingRunState(Enum): 8 | READY = "ready to be started" 9 | PRE_PROCESSING_STAGE = "pre processing job has been scheduled" 10 | STUDENTS_STAGE = "students grading jobs have been scheduled" 11 | POST_PROCESSING_STAGE = "post processing job has been scheduled" 12 | FINISHED = "grading run is complete" 13 | FAILED = "grading run failed" 14 | 15 | 16 | class GradingRun(BaseModel): 17 | def __init__( 18 | self, 19 | assignment_id: str, 20 | state: GradingRunState, 21 | id_: Optional[str] = None, 22 | started_at: Optional[str] = None, 23 | finished_at: Optional[str] = None, 24 | pre_processing_env: Optional[List[Dict[str, str]]] = None, 25 | post_processing_env: Optional[List[Dict[str, str]]] = None, 26 | students_env: List[Dict[str, str]] = [], 27 | student_jobs_left: int = 0, 28 | success: Optional[bool] = None, 29 | ) -> None: 30 | self.id = id_ 31 | self.state = state 32 | self.assignment_id = assignment_id 33 | self.started_at = started_at 34 | self.finished_at = finished_at 35 | self.pre_processing_env = pre_processing_env 36 | self.post_processing_env = post_processing_env 37 | self.students_env = students_env 38 | self.student_jobs_left = student_jobs_left 39 | self.success = success 40 | -------------------------------------------------------------------------------- /broadway/api/daos/course.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from broadway.api.daos.base import BaseDao 4 | from broadway.api.daos.decorators import validate_obj_size 5 | from broadway.api.models import Course 6 | 7 | 8 | class CourseDao(BaseDao): 9 | ID = "_id" 10 | TOKENS = "tokens" 11 | QUERY_TOKENS = "query_tokens" 12 | _COLLECTION = "course" 13 | 14 | def __init__(self, app): 15 | super().__init__(app) 16 | self._collection = self._get_primary_collection(CourseDao._COLLECTION) 17 | 18 | @validate_obj_size 19 | def insert_or_update(self, obj): 20 | document = self._to_store(obj) 21 | return self._collection.update_one( 22 | {CourseDao.ID: obj.id}, {"$set": document}, upsert=True 23 | ) 24 | 25 | def find_by_id(self, id_): 26 | return self._from_store(self._collection.find_one({CourseDao.ID: id_})) 27 | 28 | def drop_all(self): 29 | return self._collection.delete_many({}) 30 | 31 | def _from_store(self, obj) -> Optional[Course]: 32 | if obj is None: 33 | return None 34 | attrs = { 35 | "id_": obj.get(CourseDao.ID), 36 | "tokens": obj.get(CourseDao.TOKENS), 37 | "query_tokens": obj.get(CourseDao.QUERY_TOKENS), 38 | } 39 | return Course(**attrs) 40 | 41 | def _to_store(self, obj) -> dict: 42 | return { 43 | CourseDao.ID: obj.id, 44 | CourseDao.TOKENS: obj.tokens, 45 | CourseDao.QUERY_TOKENS: obj.query_tokens, 46 | } 47 | -------------------------------------------------------------------------------- /broadway/grader/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | from logging.handlers import TimedRotatingFileHandler 5 | 6 | from broadway.grader.flags import fset 7 | from broadway.grader.ws import run_ws_grader 8 | from broadway.grader.http import run_http_grader 9 | 10 | 11 | def _initialize_logger(flags): 12 | log_dir = flags["log_dir"] 13 | log_level = flags["log_level"] 14 | log_rotate = flags["log_rotate"] 15 | log_backup = flags["log_backup"] 16 | log_timestamps = flags["log_timestamps"] 17 | 18 | os.makedirs(log_dir, exist_ok=True) 19 | 20 | rotating_handler = TimedRotatingFileHandler( 21 | "{}/log".format(log_dir), when=log_rotate, backupCount=log_backup 22 | ) 23 | 24 | if log_timestamps: 25 | format = "%(asctime)s %(levelname)s %(module)s.%(funcName)s: %(message)s" 26 | else: 27 | format = "%(levelname)s %(module)s.%(funcName)s: %(message)s" 28 | 29 | logging.basicConfig( 30 | handlers=[rotating_handler, logging.StreamHandler()], 31 | format=format, 32 | level=log_level, 33 | ) 34 | 35 | 36 | def __main__(): 37 | flags = fset.parse() 38 | _initialize_logger(flags) 39 | 40 | api_host = flags["api_host"] 41 | 42 | if api_host.startswith("wss://") or api_host.startswith("ws://"): 43 | run_ws_grader(flags) 44 | elif api_host.startswith("https://") or api_host.startswith("http://"): 45 | run_http_grader(flags) 46 | else: 47 | raise RuntimeError("unsupported protocol: {}".format(api_host)) 48 | 49 | 50 | if __name__ == "__main__": 51 | __main__() 52 | -------------------------------------------------------------------------------- /broadway/api/definitions.py: -------------------------------------------------------------------------------- 1 | course_config = { 2 | "type": "object", 3 | "patternProperties": { 4 | "": { 5 | "properties": { 6 | "tokens": {"type": "array", "items": {"type": "string"}}, 7 | "query_tokens": {"type": "array", "items": {"type": "string"}}, 8 | }, 9 | "required": ["tokens"], 10 | } 11 | }, 12 | } 13 | 14 | grading_stage = { 15 | "type": ["object", "null"], 16 | "properties": { 17 | "image": {"type": "string"}, 18 | "env": {"type": "object"}, 19 | "entrypoint": {"type": "array", "items": {"type": "string"}}, 20 | "networking": {"type": "boolean"}, 21 | "privileged": {"type": "boolean"}, 22 | "hostname": {"type": "string"}, 23 | "timeout": {"type": "number"}, 24 | "cpuset_cpus": {"type": "string"}, 25 | "memory": {"type": "string"}, 26 | "logs": {"type": "boolean"}, 27 | }, 28 | "required": ["image"], 29 | "additionalProperties": False, 30 | } 31 | 32 | grading_pipeline = {"type": ["array", "null"], "items": grading_stage} 33 | 34 | grading_config = { 35 | "type": "object", 36 | "properties": { 37 | "pre_processing_pipeline": grading_pipeline, 38 | "student_pipeline": grading_pipeline, 39 | "post_processing_pipeline": grading_pipeline, 40 | "env": {"type": ["object", "null"]}, 41 | }, 42 | "required": ["student_pipeline"], 43 | "additionalProperties": False, 44 | } 45 | 46 | ws_api_msg = { 47 | "type": "object", 48 | "properties": {"type": {"type": "string"}, "args": {"type": "object"}}, 49 | "required": ["type", "args"], 50 | "additionalProperties": False, 51 | } 52 | -------------------------------------------------------------------------------- /install-secrets-guard-hook.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | precommit_hookfile="./.git/hooks/pre-commit" 5 | 6 | echo -n "Write pre-commit hook to $precommit_hookfile (Y/N)? " 7 | read response 8 | 9 | if [ "$response" != "Y" ]; then 10 | exit 1 11 | fi 12 | 13 | cat <<\FIN > "$precommit_hookfile" 14 | #!/bin/bash 15 | 16 | if git rev-parse --verify HEAD >/dev/null 2>&1 17 | then 18 | against=HEAD 19 | else 20 | # Initial commit: diff against an empty tree object 21 | against=$(git hash-object -t tree /dev/null) 22 | fi 23 | 24 | # If you want to allow possible secrets set this variable to true. 25 | allowpossiblesecrets=$(git config --type=bool hooks.allowpossiblesecrets) 26 | 27 | # Redirect output to stderr. 28 | exec 1>&2 29 | 30 | UUIDregex='[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' 31 | GitHubPATregex='ghp_[a-zA-Z0-9]{36}' 32 | 33 | forbiddenRegexes=(\ 34 | "$UUIDregex"\ 35 | "$GitHubPATregex"\ 36 | ) 37 | forbiddenRegexDescriptions=(\ 38 | "UUID"\ 39 | "GitHub Personal Authentication Token"\ 40 | ) 41 | 42 | for i in "${!forbiddenRegexes[@]}"; do 43 | diffRes=$(git diff --cached --diff-filter=AM -G "${forbiddenRegexes[i]}" -z $against) 44 | diffNames=$(git diff --name-only --cached --diff-filter=AM -G "${forbiddenRegexes[i]}" -z $against) 45 | 46 | if [ "$allowpossiblesecrets" != "true" ] && test -n "$diffRes" 47 | then 48 | cat < Optional[GradingJobLog]: 39 | if obj is None: 40 | return None 41 | attrs = { 42 | "id_": str(obj.get(GradingJobLogDao.ID)), 43 | "job_id": obj.get(GradingJobLogDao.GRADING_JOB_ID), 44 | "stdout": obj.get(GradingJobLogDao.STDOUT), 45 | "stderr": obj.get(GradingJobLogDao.STDERR), 46 | } 47 | return GradingJobLog(**attrs) 48 | 49 | def _to_store(self, obj) -> dict: 50 | return { 51 | GradingJobLogDao.ID: ObjectId(obj.id) if obj.id is not None else obj.id, 52 | GradingJobLogDao.GRADING_JOB_ID: obj.job_id, 53 | GradingJobLogDao.STDOUT: obj.stdout, 54 | GradingJobLogDao.STDERR: obj.stderr, 55 | } 56 | -------------------------------------------------------------------------------- /tests/integration/test_basic.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import time 4 | 5 | from tests.integration.base import BaseTest 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class TestBasic(BaseTest): 12 | def testSingleJobRun(self): 13 | COURSE = "test-course" 14 | ASSIGNMENT = "test-assignment" 15 | 16 | ASSIGNMENT_CONFIG = "tests/_fixtures/assignment.json" 17 | ROSTER = "tests/_fixtures/roster.json" 18 | 19 | with open(ASSIGNMENT_CONFIG) as f: 20 | config = json.load(f) 21 | 22 | # register assignment config 23 | r = self.api_post( 24 | "/api/v1/grading_config/{}/{}".format(COURSE, ASSIGNMENT), config 25 | ) 26 | self.assertResponse(r, 200) 27 | 28 | with open(ROSTER) as f: 29 | roster = json.load(f) 30 | 31 | # initiate grading run 32 | r = self.api_post( 33 | "/api/v1/grading_run/{}/{}".format(COURSE, ASSIGNMENT), roster 34 | ) 35 | self.assertResponse(r, 200) 36 | 37 | run_id = json.loads(r.text)["data"]["grading_run_id"] 38 | logger.info("grading run {} started".format(run_id)) 39 | 40 | while True: # wait for grading run to complete 41 | r = self.api_get("/api/v1/grading_run_status/{}/{}".format(COURSE, run_id)) 42 | self.assertResponse(r, 200) 43 | 44 | res = json.loads(r.text)["data"] 45 | job_state = res["student_jobs_state"] 46 | 47 | logger.info("run status: {}".format(res)) 48 | 49 | if "complete" in res["state"]: 50 | break 51 | 52 | time.sleep(3) 53 | 54 | self.assertEqual(len(job_state), 1, "expecting only one job") 55 | 56 | job_id, status = list(job_state.items())[0] 57 | 58 | self.assertTrue("successful" in status, "expecting job to succeed") 59 | 60 | # check grading run output 61 | r = self.api_get("/api/v1/grading_job_log/{}/{}".format(COURSE, job_id)) 62 | self.assertResponse(r, 200) 63 | 64 | log = json.loads(r.text)["data"] 65 | logger.info("job log: {}".format(log)) 66 | 67 | self.assertTrue("student-id" in log["stdout"], "unable to find expected output") 68 | -------------------------------------------------------------------------------- /broadway/api/utils/multiqueue.py: -------------------------------------------------------------------------------- 1 | from queue import Queue, Empty 2 | 3 | """ 4 | A wrapper around Python's queue module that contains multiple queues 5 | and round-robins between them when pulling. 6 | """ 7 | 8 | 9 | class MultiQueue: 10 | def __init__(self): 11 | self.queues = {} 12 | self.keys = [] 13 | self.round_robin_idx = 0 14 | 15 | def _add_queue(self, queue_id): 16 | if queue_id in self.queues: 17 | raise Exception(f"{queue_id} already exists in the MultiQueue.") 18 | 19 | self.queues[queue_id] = Queue() 20 | self.keys.append(queue_id) 21 | 22 | def _ensure_queue_exists(self, queue_id): 23 | if queue_id not in self.queues: 24 | raise Exception(f"{queue_id} does not exist in the MultiQueue.") 25 | 26 | def push(self, queue_id, elem): 27 | if queue_id not in self.queues: 28 | self._add_queue(queue_id) 29 | 30 | self.queues[queue_id].put(elem) 31 | 32 | def pull(self): 33 | N = len(self.keys) 34 | if N == 0: 35 | raise Empty("MultiQueue has no queues in it.") 36 | 37 | for i in range(N): 38 | idx = (self.round_robin_idx + i) % N 39 | 40 | try: 41 | rv = self.queues[self.keys[idx]].get_nowait() 42 | self.round_robin_idx = (idx + 1) % N 43 | return rv 44 | 45 | except Empty: 46 | # continue to next queue 47 | pass 48 | 49 | raise Empty("All the queues in the MultiQueue are empty.") 50 | 51 | def contains_key(self, key): 52 | return key in self.queues 53 | 54 | def get_queue_length(self, queue_id): 55 | self._ensure_queue_exists(queue_id) 56 | return self.queues[queue_id].qsize() 57 | 58 | def get_position_in_queue(self, queue_id, key): 59 | self._ensure_queue_exists(queue_id) 60 | try: 61 | return self.queues[queue_id].queue.index(key) 62 | except ValueError: 63 | return -1 64 | 65 | def update_all_job_positions(self, stream_queue): 66 | for queue_id, job_queue in self.queues.items(): 67 | for job_id in list(job_queue.queue): 68 | pos = self.queues[queue_id].queue.index(job_id) 69 | stream_queue.update_queue_position(job_id, pos) 70 | -------------------------------------------------------------------------------- /broadway/api/daos/assignment_config.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from broadway.api.daos.base import BaseDao 4 | from broadway.api.daos.decorators import validate_obj_size 5 | from broadway.api.models.assignment_config import AssignmentConfig 6 | 7 | 8 | class AssignmentConfigDao(BaseDao): 9 | ID = "_id" 10 | ENV = "env" 11 | STUDENT_PIPELINE = "student_pipeline" 12 | PRE_PROCESSING_PIPELINE = "pre_processing_pipeline" 13 | POST_PROCESSING_PIPELINE = "post_processing_pipeline" 14 | _COLLECTION = "assignment_config" 15 | 16 | def __init__(self, app): 17 | super().__init__(app) 18 | self._collection = self._get_primary_collection(AssignmentConfigDao._COLLECTION) 19 | 20 | @staticmethod 21 | def id_from(course_id, assignment_name): 22 | return "{}/{}".format(course_id, assignment_name) 23 | 24 | @validate_obj_size 25 | def insert(self, obj): 26 | return self._collection.insert_one(self._to_store(obj)) 27 | 28 | def find_by_id(self, id_): 29 | return self._from_store( 30 | self._collection.find_one({AssignmentConfigDao.ID: id_}) 31 | ) 32 | 33 | def delete_by_id(self, id_): 34 | return self._collection.delete_one({AssignmentConfigDao.ID: id_}) 35 | 36 | def _from_store(self, obj) -> Optional[AssignmentConfig]: 37 | if obj is None: 38 | return None 39 | attrs = { 40 | "id_": obj.get(AssignmentConfigDao.ID), 41 | "env": obj.get(AssignmentConfigDao.ENV), 42 | "student_pipeline": obj.get(AssignmentConfigDao.STUDENT_PIPELINE), 43 | "pre_processing_pipeline": obj.get( 44 | AssignmentConfigDao.PRE_PROCESSING_PIPELINE 45 | ), 46 | "post_processing_pipeline": obj.get( 47 | AssignmentConfigDao.POST_PROCESSING_PIPELINE 48 | ), 49 | } 50 | return AssignmentConfig(**attrs) 51 | 52 | def _to_store(self, obj) -> dict: 53 | return { 54 | AssignmentConfigDao.ID: obj.id, 55 | AssignmentConfigDao.ENV: obj.env, 56 | AssignmentConfigDao.STUDENT_PIPELINE: obj.student_pipeline, 57 | AssignmentConfigDao.PRE_PROCESSING_PIPELINE: obj.pre_processing_pipeline, 58 | AssignmentConfigDao.POST_PROCESSING_PIPELINE: obj.post_processing_pipeline, 59 | } 60 | -------------------------------------------------------------------------------- /broadway/api/handlers/stream.py: -------------------------------------------------------------------------------- 1 | import json 2 | from tornado import gen, web 3 | from tornado.iostream import StreamClosedError 4 | from tornado.ioloop import PeriodicCallback 5 | 6 | from broadway.api.handlers.base import BaseAPIHandler 7 | from broadway.api.decorators.auth import authenticate_course_member_or_admin 8 | from broadway.api.utils.streamqueue import StreamQueue 9 | 10 | HEARTBEAT_TIME_MILLI = 20 * 1000 # 20 seconds 11 | 12 | 13 | class GradingJobStreamHandler(BaseAPIHandler): 14 | def initialize(self): 15 | # Prepare for Server-Sent Events 16 | self.set_header("Content-Type", "text/event-stream") 17 | self.set_header("Cache-Control", "no-cache") 18 | 19 | # Needed for SSEs to work with nginx. 20 | # See https://serverfault.com/a/801629 21 | self.set_header("X-Accel-Buffering", "no") 22 | 23 | # No need to register stream or callback when it's a preflight request 24 | if self.request.method == "OPTIONS": 25 | return 26 | 27 | self._id = id(self) 28 | self._callback = PeriodicCallback( 29 | callback=self._heartbeat, callback_time=HEARTBEAT_TIME_MILLI 30 | ) 31 | self._callback.start() 32 | 33 | def _stop_listening(self): 34 | self.get_stream_queue().unregister_stream(self._job_id, self._id) 35 | self._callback.stop() 36 | raise web.Finish 37 | 38 | @gen.coroutine 39 | def _send_sse(self, message): 40 | try: 41 | self.write(message) 42 | yield self.flush() 43 | except StreamClosedError: 44 | self._stop_listening() 45 | 46 | @gen.coroutine 47 | def _heartbeat(self): 48 | yield self._send_sse(":\n\n") 49 | 50 | @gen.coroutine 51 | def publish(self, event, data): 52 | blob = json.dumps({"type": event, "data": data}) 53 | yield self._send_sse(f"event: status_update\ndata: {blob}\n\n") 54 | 55 | @authenticate_course_member_or_admin 56 | @gen.coroutine 57 | def get(self, **kwargs): 58 | self._job_id = kwargs.get("job_id") 59 | 60 | sq = self.get_stream_queue() 61 | sq.register_stream(self._job_id, self._id) 62 | 63 | while True: 64 | res = yield sq.get(self._job_id, self._id) 65 | # If we receive the sentinel value, stop listening 66 | if res is StreamQueue.CLOSE_EVENT: 67 | self._stop_listening() 68 | yield self.publish(*res) 69 | -------------------------------------------------------------------------------- /broadway/api/models/grading_job.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Any, Dict, List, Optional 3 | from datetime import datetime 4 | 5 | from broadway.api.models.base import BaseModel 6 | 7 | 8 | class GradingJobType(Enum): 9 | PRE_PROCESSING = "pre processing job" 10 | STUDENT = "student grading job" 11 | POST_PROCESSING = "post processing job" 12 | 13 | 14 | class GradingJobState(Enum): 15 | QUEUED = "grading job has been scheduled" 16 | STARTED = "grading job is running" 17 | FAILED = "grading job failed" 18 | SUCCEEDED = "grading job was successful" 19 | 20 | 21 | class GradingJob(BaseModel): 22 | def __init__( 23 | self, 24 | job_type: GradingJobType, 25 | run_id: str, 26 | stages: List[Any] = [], 27 | students: List[Dict[str, str]] = None, 28 | id_: Optional[str] = None, 29 | worker_id: Optional[str] = None, 30 | queued_at: Optional[datetime] = None, 31 | started_at: Optional[datetime] = None, 32 | finished_at: Optional[datetime] = None, 33 | results: Optional[List[Any]] = None, 34 | success: Optional[bool] = None, 35 | ) -> None: 36 | self.id = id_ 37 | self.type = job_type 38 | self.run_id = run_id 39 | self.worker_id = worker_id 40 | self.queued_at = queued_at 41 | self.started_at = started_at 42 | self.finished_at = finished_at 43 | self.results = results 44 | self.success = success 45 | self.stages = stages 46 | self.students = students 47 | 48 | def get_state(self) -> GradingJobState: 49 | if self.finished_at is not None: 50 | if self.success: 51 | return GradingJobState.SUCCEEDED 52 | return GradingJobState.FAILED 53 | if self.started_at is not None: 54 | return GradingJobState.STARTED 55 | if self.queued_at is not None: 56 | return GradingJobState.QUEUED 57 | raise ValueError("Invalid state") 58 | 59 | def set_stages(self, stages, global_environ, run_environ): 60 | # there are three types of environments: global, stage, and run 61 | # (with precedence in that order) 62 | 63 | # we need to make sure that the incoming stages have these environments 64 | # merged correctly before setting them 65 | stages_copy = stages.copy() 66 | for stage in stages_copy: 67 | stage["env"] = {**global_environ, **stage.get("env", {}), **run_environ} 68 | self.stages = stages_copy 69 | -------------------------------------------------------------------------------- /broadway/grader/flags.py: -------------------------------------------------------------------------------- 1 | from flagset import Flag, FlagSet 2 | 3 | fset = FlagSet( 4 | { 5 | "token": Flag( 6 | str, 7 | cmdline_name="token", 8 | env_name="BROADWAY_TOKEN", 9 | config_name="token", 10 | help="cluster token", 11 | ), 12 | "grader_id": Flag( 13 | str, 14 | cmdline_name="grader-id", 15 | env_name="BROADWAY_GRADER_ID", 16 | config_name="id", 17 | help="unique identifier of the grader instance", 18 | ), 19 | "api_host": Flag( 20 | str, 21 | default="http://127.0.0.1:1470", 22 | cmdline_name="--api-host", 23 | env_name="BROADWAY_API_HOST", 24 | config_name="api_host", 25 | help="api host. no slash in the end. " 26 | + "supported protocols: ws(s) and http(s)", 27 | ), 28 | "verbose": Flag( 29 | bool, 30 | default=False, 31 | cmdline_name=["-v", "--verbose"], 32 | env_name="BROADWAY_VERBOSE", 33 | config_name="verbose", 34 | help="verbose mode", 35 | ), 36 | "log_dir": Flag( 37 | str, 38 | default="logs", 39 | cmdline_name="--log-dir", 40 | env_name="BROADWAY_LOG_DIR", 41 | config_name="log.dir", 42 | help="directory for logs", 43 | ), 44 | "log_level": Flag( 45 | str, 46 | default="INFO", 47 | cmdline_name="--log-level", 48 | env_name="BROADWAY_LOG_LEVEL", 49 | config_name="log.level", 50 | help="logging level, e.g. INFO, DEBUG", 51 | ), 52 | "log_timestamps": Flag( 53 | bool, 54 | default=True, 55 | cmdline_name="--log-timestamps", 56 | env_name="BROADWAY_LOG_TIMESTAMPS", 57 | config_name="log.timestamps", 58 | help="whether to include timestamps in logs", 59 | ), 60 | "log_rotate": Flag( 61 | str, 62 | default="midnight", 63 | cmdline_name="--log-rotate", 64 | env_name="BROADWAY_LOG_ROTATE", 65 | config_name="log.rotate", 66 | help="time for log rotate", 67 | ), 68 | "log_backup": Flag( 69 | int, 70 | default=7, 71 | cmdline_name="--log-backup", 72 | env_name="BROADWAY_LOG_BACKUP", 73 | config_name="log.backup", 74 | help="backup count for logs", 75 | ), 76 | } 77 | ) 78 | -------------------------------------------------------------------------------- /broadway/api/handlers/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from jsonschema import validate 4 | 5 | from tornado_json.requesthandlers import APIHandler 6 | from tornado.websocket import WebSocketHandler 7 | 8 | from broadway.api.definitions import ws_api_msg 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class BaseAPIHandler(APIHandler): 14 | def set_default_headers(self, *args, **kwargs): 15 | self.set_header("Access-Control-Allow-Origin", "*") 16 | 17 | def options(self, *args, **kwargs): 18 | """ 19 | For CORS, browsers will send a preflight request with "OPTIONS" method 20 | before the actual request. We want to respond with a status of "HTTP OK", 21 | a header with proper "Access-Control-Allow-Origin" field for all origins 22 | we allow and a proper "Access-Control-Allow-Headers" for all special 23 | header fields we allow, and no body. 24 | """ 25 | self.set_header("Access-Control-Allow-Headers", "Authorization") 26 | self.set_status(204) 27 | self.finish() 28 | 29 | def abort(self, data, status=400): 30 | self.set_status(status) 31 | self.fail(data) 32 | 33 | def get_flags(self): 34 | return self.settings["FLAGS"] 35 | 36 | def get_token(self): 37 | return self.settings["FLAGS"]["token"] 38 | 39 | def get_queue(self): 40 | return self.settings["QUEUE"] 41 | 42 | def get_stream_queue(self): 43 | return self.settings["STREAM_QUEUE"] 44 | 45 | 46 | class BaseWSAPIHandler(BaseAPIHandler, WebSocketHandler): 47 | msg_type_map = {} 48 | 49 | @staticmethod 50 | def msg_type(type_id, decl): 51 | def decor(handler): 52 | BaseWSAPIHandler.msg_type_map[type_id] = (decl, handler) 53 | return handler 54 | 55 | return decor 56 | 57 | def on_message(self, msg): 58 | try: 59 | data = json.loads(msg) 60 | 61 | # check msg decl 62 | validate(instance=data, schema=ws_api_msg) 63 | 64 | msg_type = data["type"] 65 | 66 | decl, handler = BaseWSAPIHandler.msg_type_map[msg_type] 67 | 68 | # check argument decl 69 | validate(instance=data["args"], schema=decl) 70 | 71 | return handler(self, **data["args"]) 72 | 73 | except Exception as e: 74 | self.close(code=1011, reason="internal error") 75 | logger.warning("connection {} closed: {}".format(repr(self), repr(e))) 76 | 77 | def send(self, data): 78 | self.write_message(json.dumps(data)) 79 | 80 | def get_ws_conn_map(self): 81 | return self.settings.get("WS_CONN_MAP") 82 | -------------------------------------------------------------------------------- /broadway/api/callbacks/job.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import broadway.api.daos as daos 4 | from broadway.api.models.grading_job import GradingJobType 5 | from broadway.api.models.grading_run import GradingRunState 6 | from broadway.api.utils.run import continue_grading_run, fail_grading_run 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def job_update_callback(settings, grading_job_id, grading_run_id): 12 | job_dao = daos.GradingJobDao(settings) 13 | job = job_dao.find_by_id(grading_job_id) 14 | if job is None: 15 | logger.critical( 16 | "cannot update non-existent job with ID '{}'".format(grading_job_id) 17 | ) 18 | return 19 | 20 | run_dao = daos.GradingRunDao(settings) 21 | run = run_dao.find_by_id(grading_run_id) 22 | if run is None: 23 | logger.critical( 24 | "cannot update non-existent run with ID '{}'".format(grading_run_id) 25 | ) 26 | return 27 | if run.finished_at is not None: 28 | logger.critical( 29 | "cannot update run with ID '{}' (already finished)".format(grading_run_id) 30 | ) 31 | return 32 | 33 | stream_queue = settings["STREAM_QUEUE"] 34 | if job.success: 35 | stream_queue.update_job_state(job.id, GradingRunState.FINISHED.name) 36 | else: 37 | stream_queue.update_job_state(job.id, GradingRunState.FAILED.name) 38 | stream_queue.send_close_event(job.id) 39 | 40 | if job.type == GradingJobType.PRE_PROCESSING: 41 | if job.success: 42 | continue_grading_run(settings, run) 43 | else: 44 | fail_grading_run(settings, run) 45 | elif job.type == GradingJobType.POST_PROCESSING: 46 | if run.student_jobs_left != 0: 47 | logger.critical( 48 | "post-processing job finished when {} student jobs remain".format( 49 | run.student_jobs_left 50 | ) 51 | ) 52 | return 53 | 54 | if job.success: 55 | continue_grading_run(settings, run) 56 | else: 57 | fail_grading_run(settings, run) 58 | elif job.type == GradingJobType.STUDENT: 59 | if run.student_jobs_left <= 0: 60 | logger.critical( 61 | "student job finished when {} student jobs remain".format( 62 | run.student_jobs_left 63 | ) 64 | ) 65 | return 66 | 67 | run.student_jobs_left -= 1 68 | run_dao.update(run) 69 | 70 | if run.student_jobs_left == 0: 71 | # last job in this stage is complete 72 | continue_grading_run(settings, run) 73 | else: 74 | logger.critical("cannot update run with last job type '{}'".format(job.type)) 75 | -------------------------------------------------------------------------------- /broadway/api/daos/grading_run.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from bson import ObjectId 4 | 5 | from broadway.api.daos.base import BaseDao 6 | from broadway.api.daos.decorators import validate_obj_size 7 | from broadway.api.models import GradingRun, GradingRunState 8 | 9 | 10 | class GradingRunDao(BaseDao): 11 | ID = "_id" 12 | STATE = "state" 13 | ASSIGNMENT_ID = "assignment_id" 14 | STARTED_AT = "started_at" 15 | FINISHED_AT = "finished_at" 16 | PRE_PROCESSING_ENV = "pre_processing_env" 17 | POST_PROCESSING_ENV = "post_processing_env" 18 | STUDENTS_ENV = "students_env" 19 | STUDENT_JOBS_LEFT = "students_jobs_left" 20 | SUCCESS = "success" 21 | _COLLECTION = "grading_run" 22 | 23 | def __init__(self, app): 24 | super().__init__(app) 25 | self._collection = self._get_primary_collection(GradingRunDao._COLLECTION) 26 | 27 | @validate_obj_size 28 | def insert(self, obj): 29 | document = self._to_store(obj) 30 | del document[GradingRunDao.ID] 31 | return self._collection.insert_one(document) 32 | 33 | def find_by_id(self, id_): 34 | if not ObjectId.is_valid(id_): 35 | return None 36 | return self._from_store( 37 | self._collection.find_one({GradingRunDao.ID: ObjectId(id_)}) 38 | ) 39 | 40 | @validate_obj_size 41 | def update(self, obj): 42 | return self._collection.update_one( 43 | {GradingRunDao.ID: ObjectId(obj.id)}, {"$set": self._to_store(obj)} 44 | ) 45 | 46 | def _from_store(self, obj) -> Optional[GradingRun]: 47 | if obj is None: 48 | return None 49 | attrs = { 50 | "id_": str(obj.get(GradingRunDao.ID)), 51 | "state": GradingRunState(obj.get(GradingRunDao.STATE)), 52 | "assignment_id": obj.get(GradingRunDao.ASSIGNMENT_ID), 53 | "started_at": obj.get(GradingRunDao.STARTED_AT), 54 | "finished_at": obj.get(GradingRunDao.FINISHED_AT), 55 | "pre_processing_env": obj.get(GradingRunDao.PRE_PROCESSING_ENV), 56 | "post_processing_env": obj.get(GradingRunDao.POST_PROCESSING_ENV), 57 | "students_env": obj.get(GradingRunDao.STUDENTS_ENV), 58 | "student_jobs_left": obj.get(GradingRunDao.STUDENT_JOBS_LEFT), 59 | "success": obj.get(GradingRunDao.SUCCESS), 60 | } 61 | return GradingRun(**attrs) 62 | 63 | def _to_store(self, obj) -> dict: 64 | return { 65 | GradingRunDao.ID: ObjectId(obj.id) if obj.id is not None else None, 66 | GradingRunDao.STATE: obj.state.value, 67 | GradingRunDao.ASSIGNMENT_ID: obj.assignment_id, 68 | GradingRunDao.STARTED_AT: obj.started_at, 69 | GradingRunDao.FINISHED_AT: obj.finished_at, 70 | GradingRunDao.PRE_PROCESSING_ENV: obj.pre_processing_env, 71 | GradingRunDao.POST_PROCESSING_ENV: obj.post_processing_env, 72 | GradingRunDao.STUDENTS_ENV: obj.students_env, 73 | GradingRunDao.STUDENT_JOBS_LEFT: obj.student_jobs_left, 74 | GradingRunDao.SUCCESS: obj.success, 75 | } 76 | -------------------------------------------------------------------------------- /broadway/api/daos/grading_job.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from bson import ObjectId 4 | 5 | from broadway.api.daos.base import BaseDao 6 | from broadway.api.daos.decorators import validate_obj_size 7 | from broadway.api.models.grading_job import GradingJob, GradingJobType 8 | 9 | 10 | class GradingJobDao(BaseDao): 11 | ID = "_id" 12 | TYPE = "type" 13 | RUN_ID = "grading_run_id" 14 | WORKER_ID = "worker_id" 15 | QUEUED_AT = "queued_at" 16 | STARTED_AT = "started_at" 17 | FINISHED_AT = "finished_at" 18 | RESULTS = "results" 19 | SUCCESS = "success" 20 | STAGES = "stages" 21 | STUDENTS = "students" 22 | _COLLECTION = "grading_job" 23 | 24 | def __init__(self, app): 25 | super().__init__(app) 26 | self._collection = self._get_primary_collection(GradingJobDao._COLLECTION) 27 | 28 | @validate_obj_size 29 | def insert(self, obj): 30 | document = self._to_store(obj) 31 | del document[GradingJobDao.ID] 32 | return self._collection.insert_one(document) 33 | 34 | def find_by_id(self, id_): 35 | if not ObjectId.is_valid(id_): 36 | return None 37 | return self._from_store( 38 | self._collection.find_one({GradingJobDao.ID: ObjectId(id_)}) 39 | ) 40 | 41 | def find_by_run_id(self, run_id): 42 | return list( 43 | map(self._from_store, self._collection.find({GradingJobDao.RUN_ID: run_id})) 44 | ) 45 | 46 | @validate_obj_size 47 | def update(self, obj): 48 | return self._collection.update_one( 49 | {GradingJobDao.ID: ObjectId(obj.id)}, {"$set": self._to_store(obj)} 50 | ) 51 | 52 | def _from_store(self, obj) -> Optional[GradingJob]: 53 | if obj is None: 54 | return None 55 | attrs = { 56 | "id_": str(obj.get(GradingJobDao.ID)), 57 | "job_type": GradingJobType(obj.get(GradingJobDao.TYPE)), 58 | "run_id": obj.get(GradingJobDao.RUN_ID), 59 | "worker_id": obj.get(GradingJobDao.WORKER_ID), 60 | "queued_at": obj.get(GradingJobDao.QUEUED_AT), 61 | "started_at": obj.get(GradingJobDao.STARTED_AT), 62 | "finished_at": obj.get(GradingJobDao.FINISHED_AT), 63 | "results": obj.get(GradingJobDao.RESULTS), 64 | "success": obj.get(GradingJobDao.SUCCESS), 65 | "stages": obj.get(GradingJobDao.STAGES), 66 | "students": obj.get(GradingJobDao.STUDENTS), 67 | } 68 | return GradingJob(**attrs) 69 | 70 | def _to_store(self, obj) -> dict: 71 | return { 72 | GradingJobDao.ID: ObjectId(obj.id) if obj.id is not None else obj.id, 73 | GradingJobDao.TYPE: obj.type.value, 74 | GradingJobDao.RUN_ID: obj.run_id, 75 | GradingJobDao.WORKER_ID: obj.worker_id, 76 | GradingJobDao.QUEUED_AT: obj.queued_at, 77 | GradingJobDao.STARTED_AT: obj.started_at, 78 | GradingJobDao.FINISHED_AT: obj.finished_at, 79 | GradingJobDao.RESULTS: obj.results, 80 | GradingJobDao.SUCCESS: obj.success, 81 | GradingJobDao.STAGES: obj.stages, 82 | GradingJobDao.STUDENTS: obj.students, 83 | } 84 | -------------------------------------------------------------------------------- /broadway/api/daos/worker_node.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from broadway.api.daos.base import BaseDao 4 | from broadway.api.daos.decorators import validate_obj_size 5 | from broadway.api.models import WorkerNode 6 | 7 | 8 | class WorkerNodeDao(BaseDao): 9 | ID = "_id" 10 | RUNNING_JOB_ID = "running_job_id" 11 | LAST_SEEN = "last_seen" 12 | WORKER_HOSTNAME = "worker_hostname" 13 | JOBS_PROCESSED = "jobs_processed" 14 | ALIVE = "alive" 15 | USE_WS = "use_ws" 16 | _COLLECTION = "worker_node" 17 | 18 | def __init__(self, app): 19 | super().__init__(app) 20 | self._collection = self._get_primary_collection(WorkerNodeDao._COLLECTION) 21 | 22 | @validate_obj_size 23 | def insert(self, obj): 24 | document = self._to_store(obj) 25 | return self._collection.insert_one(document) 26 | 27 | @validate_obj_size 28 | def update(self, obj): 29 | document = self._to_store(obj) 30 | return self._collection.update_one( 31 | {WorkerNodeDao.ID: obj.id}, {"$set": document} 32 | ) 33 | 34 | def find_all(self): 35 | return list(map(self._from_store, self._collection.find())) 36 | 37 | def find_by_id(self, id_): 38 | return self._from_store(self._collection.find_one({WorkerNodeDao.ID: id_})) 39 | 40 | def find_by_hostname(self, hostname): 41 | return self._from_store( 42 | self._collection.find_one({WorkerNodeDao.WORKER_HOSTNAME: hostname}) 43 | ) 44 | 45 | def find_by_liveness(self, alive, use_ws=None): 46 | pattern = {WorkerNodeDao.ALIVE: alive} 47 | 48 | if use_ws is not None: 49 | pattern[WorkerNodeDao.USE_WS] = use_ws 50 | 51 | return list(map(self._from_store, self._collection.find(pattern))) 52 | 53 | def reset_worker_nodes(self): 54 | return self._collection.update_many( 55 | {WorkerNodeDao.USE_WS: True}, {"$set": {WorkerNodeDao.ALIVE: False}} 56 | ) 57 | 58 | def find_by_idleness(self): 59 | return list( 60 | map( 61 | self._from_store, 62 | self._collection.find({WorkerNodeDao.RUNNING_JOB_ID: None}), 63 | ) 64 | ) 65 | 66 | def _from_store(self, obj) -> Optional[WorkerNode]: 67 | if obj is None: 68 | return None 69 | attrs = { 70 | "id_": obj.get(WorkerNodeDao.ID), 71 | "running_job_id": obj.get(WorkerNodeDao.RUNNING_JOB_ID), 72 | "last_seen": obj.get(WorkerNodeDao.LAST_SEEN), 73 | "hostname": obj.get(WorkerNodeDao.WORKER_HOSTNAME), 74 | "jobs_processed": obj.get(WorkerNodeDao.JOBS_PROCESSED), 75 | "is_alive": obj.get(WorkerNodeDao.ALIVE), 76 | "use_ws": obj.get(WorkerNodeDao.USE_WS), 77 | } 78 | return WorkerNode(**attrs) 79 | 80 | def _to_store(self, obj) -> dict: 81 | return { 82 | WorkerNodeDao.ID: obj.id, 83 | WorkerNodeDao.RUNNING_JOB_ID: obj.running_job_id, 84 | WorkerNodeDao.LAST_SEEN: obj.last_seen, 85 | WorkerNodeDao.WORKER_HOSTNAME: obj.hostname, 86 | WorkerNodeDao.JOBS_PROCESSED: obj.jobs_processed, 87 | WorkerNodeDao.ALIVE: obj.is_alive, 88 | WorkerNodeDao.USE_WS: obj.use_ws, 89 | } 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Broadway 2 | 3 | [![Build Status](https://www.travis-ci.com/illinois-cs241/broadway.svg?branch=master)](https://www.travis-ci.com/illinois-cs241/broadway) 4 | [![Coverage Status](https://coveralls.io/repos/github/illinois-cs241/broadway/badge.svg?branch=master)](https://coveralls.io/github/illinois-cs241/broadway?branch=master) 5 | ![License](https://img.shields.io/badge/license-NCSA%2FIllinois-blue.svg) 6 | ![Python Versions](https://img.shields.io/badge/python-3.6-blue.svg) 7 | 8 | The Broadway is a distributed grading service that receives, executes, and keeps track of grading jobs and runs. 9 | 10 | The aim of this project is to provide a generic interface to a distributed autograding system that can be used by multiple courses. Broadway aims to provide the following benefits: 11 | * More stable and reliable grading runs. No one student can break the entire AG run. 12 | * Faster grading runs. Multiple machines can grade the same assignment. 13 | * Easier tracking and debugging of student failures during grading. 14 | * A more consistent environment to grade student code. 15 | * Easier to scale out the infrastructure. 16 | 17 | **_Please read the [Wiki](https://github.com/illinois-cs241/broadway/wiki) for documentation._** It explains how Broadway works and how to interact with it. Please be sure to read all the pages if you are planning on using Broadway. 18 | 19 | See our [contribution guidelines](CONTRIBUTING.md) if you want to contribute. 20 | 21 | ## Requirements 22 | 23 | MongoDB must be installed and the `mongod` daemon must be running locally before starting the API. Default options are usually sufficient (but for security purposes, be sure to disallow external access to the store). 24 | 25 | Python 3.6 is the minimum supported interpreter version. Python 3.7 should also work just fine. 26 | 27 | To install the dependencies (with venv) 28 | 29 | python3 -m venv venv 30 | source venv/bin/activate 31 | pip3 install -r requirements.txt 32 | 33 | ## Configuration 34 | 35 | Most of our configuration variables can be set from three sources: command-line flags, 36 | environment variables, config file, in the order of decreasing precedence. 37 | 38 | ## Broadway API and grader 39 | 40 | API and grader are two major parts of a broadway cluster. API is in charge of receiving and scheduling jobs 41 | across graders, while graders have the simple job of executing them in containers. 42 | 43 | To bring up a functioning broadway cluster, you have spin up API first, then connect grader to the API 44 | using the authentication token (either given or automatically generated). 45 | 46 | ## Running the API 47 | 48 | (After installing requirements) 49 | 50 | python3 -m broadway.api [--token TOKEN] [--bind-addr ADDR] [--bind-port PORT] 51 | 52 | More info can be found by running `python3 -m broadway.api --help` 53 | 54 | ## Running the grader 55 | 56 | `broadway.grader` takes two positional arguments, where `TOKEN` is the cluster token in API, 57 | and `GRADER_ID` should be a unique identifier of the grader (and only letters, digits, and dashes are allowed) 58 | 59 | `API_ADDR` points to where API was bound to along with the protocol you wish to use. 60 | e.g. `ws://127.0.0.1:1470` means that grader should find API at `127.0.0.1:1470` and 61 | use the websocket version of our protocol. 62 | 63 | python3 -m broadway.grader [--api-host API_ADDR] 64 | 65 | More info can be found by running `python3 -m broadway.grader --help` 66 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Broadway 2 | 3 | First of all thank you for contributing to Broadway! :tada: 4 | 5 | ### How can I contribute? 6 | There are many ways in which you can contribute. Broadway has a lot of sister open source projects which need maintenance too. 7 | * [Broadway On-Demand](https://github.com/illinois-cs241/broadway-on-demand) 8 | * [Chainlink](https://github.com/illinois-cs241/chainlink) 9 | * [FlagSet](https://github.com/illinois-cs241/flagset) 10 | 11 | If you find issues in any of these, please feel free to make Issues or Pull Requests. Reporting issues itself is contributing! 12 | 13 | ### Issues 14 | Please make issues as detailed as possible. Include error messages, logs and output in code blocks. If you are making feature requests 15 | please explain in detail what purpose the feature would serve. **Please keep in mind that Broadway should not contain course-specific 16 | features or code.** Broadway's design has been kept generic for all courses to use and provide the flexibility to use it. Consider building 17 | services around Broadway if you want course specific changes. 18 | 19 | ### Pull Requests 20 | Please keep PRs as small as possible so its easier for reviewers to review it and get back to you. If you have a lot of changes, 21 | try splitting them up into smaller parts and stacking PRs. It makes a world of a difference for reviewers. It will also be easier 22 | spot potential errors. 23 | 24 | Please keep meaningful commit messages, branch names and description. Summarize your changes in the description. Link all the issues 25 | your PR is attemption to fix. Possibly prepend the issues links with "Resolves " so that the issue is immediately close when the PR 26 | is merged to `master`. 27 | 28 | **Please squash and merge** so that the commit history on master looks cleaner and easier to navigate. 29 | 30 | ### Testing 31 | Please run/modify the [tests](tests) each time a change is made to the logic or structure. Make sure you have a local `mongod` instance running or else all tests will fail. You can run the tests using: 32 | ```shell 33 | pytest -v tests/api --doctest-modules 34 | ``` 35 | 36 | followed by our integration tests with: 37 | ```shell 38 | docker-compose -f docker-compose.test.yml up --build --force-recreate --exit-code-from tests 39 | ``` 40 | 41 | In addition, we run a linter/formatter to keep things standard and clean. For formatting, be sure to execute `black` 42 | ```shell 43 | black broadway/ tests/ 44 | ``` 45 | 46 | and then `flake8` 47 | 48 | ```shell 49 | flake8 --config=setup.cfg 50 | ``` 51 | 52 | before opening a pull request. 53 | 54 | ### Blocking calls 55 | Please be cautious of adding blocking calls in the application logic because Tornado uses a single-threaded event loop. Therefore, 56 | one blocking call will prevent the API from serving requests and hence tamper with the entire distributed system. For instance, 57 | it might prevent the API from listening to heartbeats and as a result the server will consider worker nodes to be dead. 58 | 59 | If you want to use blocking calls, please make them asynchronous. [Asynchronous and non-blocking IO guide for Tornado](http://www.tornadoweb.org/en/stable/guide/async.html) 60 | 61 | ### Response Time 62 | Almost all maintainers of this project are full-time students. We tend to get busy with college schedule and studies of our own. Please 63 | be patient while we review your contributions. We would definitely get back to you because we want to improve Broadway as much as you do! 64 | -------------------------------------------------------------------------------- /broadway/grader/ws.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import socket 4 | import signal 5 | import asyncio 6 | import logging 7 | import websockets 8 | 9 | from jsonschema import validate, ValidationError, SchemaError 10 | from chainlink import Chainlink 11 | 12 | import broadway.grader.api as api 13 | from broadway.grader.api import WORKER_WS_ENDPOINT, HEARTBEAT_INTERVAL, GRADING_JOB_DEF 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | async def _exec_job(flags, job): 19 | job_id = job[api.GRADING_JOB_ID] 20 | stages = job[api.STAGES] 21 | 22 | logger.info("starting job {}".format(job_id)) 23 | 24 | # execute job 25 | try: 26 | chain = Chainlink(stages, workdir=os.getcwd()) 27 | job_results = await chain.run_async({}) 28 | except Exception as ex: 29 | logger.critical("grading job failed with exception:\n{}".format(ex)) 30 | job_results = [ 31 | { 32 | "logs": { 33 | "stdout": b"the container crashed", 34 | "stderr": bytes(str(ex), "utf-8"), 35 | }, 36 | "success": False, 37 | } 38 | ] 39 | 40 | job_stdout = "\n".join([r["logs"]["stdout"].decode("utf-8") for r in job_results]) 41 | job_stderr = "\n".join([r["logs"]["stderr"].decode("utf-8") for r in job_results]) 42 | 43 | for r in job_results: 44 | del r["logs"] 45 | 46 | logger.info("finished job {}".format(job_id)) 47 | 48 | if flags["verbose"]: 49 | logger.info("job stdout:\n" + job_stdout) 50 | logger.info("job stderr:\n" + job_stderr) 51 | 52 | return { 53 | api.RESULTS: job_results, 54 | api.SUCCESS: job_results[-1]["success"], 55 | api.LOGS: {"stdout": job_stdout, "stderr": job_stderr}, 56 | api.GRADING_JOB_ID: job_id, 57 | } 58 | 59 | 60 | async def _run(flags): 61 | url = "{}{}/{}".format(flags["api_host"], WORKER_WS_ENDPOINT, flags["grader_id"]) 62 | 63 | headers = {api.AUTH: "Bearer {}".format(flags["token"])} 64 | hostname = socket.gethostname() 65 | 66 | async with websockets.connect( 67 | url, ping_interval=HEARTBEAT_INTERVAL, extra_headers=headers 68 | ) as ws: 69 | # poll job 70 | try: 71 | await ws.send( 72 | json.dumps({"type": "register", "args": {"hostname": hostname}}) 73 | ) 74 | 75 | ack = json.loads(await ws.recv()) 76 | 77 | if not ack["success"]: 78 | raise Exception("failed to register") 79 | 80 | logger.info("registered as {}".format(flags["grader_id"])) 81 | 82 | while True: 83 | job = json.loads(await ws.recv()) 84 | 85 | validate(instance=job, schema=GRADING_JOB_DEF) 86 | 87 | job_result = await _exec_job(flags, job) 88 | 89 | await ws.send(json.dumps({"type": "job_result", "args": job_result})) 90 | 91 | except websockets.ConnectionClosed as e: 92 | logger.critical("connection closed: {}".format(repr(e))) 93 | 94 | except ValidationError as e: 95 | logger.critical("validation error: {}".format(repr(e))) 96 | 97 | except SchemaError as e: 98 | logger.critical("schema error: {}".format(repr(e))) 99 | 100 | 101 | def _shutdown(sig, task): 102 | logger.info("signal received: {}, shutting down".format(signal.Signals(sig).name)) 103 | task.cancel() 104 | 105 | 106 | def run_ws_grader(flags): 107 | loop = asyncio.get_event_loop() 108 | task = loop.create_task(_run(flags)) 109 | 110 | loop.add_signal_handler(signal.SIGINT, lambda: _shutdown(signal.SIGINT, task)) 111 | 112 | try: 113 | loop.run_until_complete(task) 114 | except asyncio.CancelledError: 115 | logger.info("task cancelled") 116 | -------------------------------------------------------------------------------- /broadway/api/decorators/auth.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from broadway.api.daos import AssignmentConfigDao, CourseDao, WorkerNodeDao 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | def _is_token_valid(token): 9 | return ( 10 | (token is not None) 11 | and token.startswith("Bearer ") 12 | and len(token.split(" ")) == 2 13 | ) 14 | 15 | 16 | def authenticate_worker(func): 17 | def wrapper(*args, **kwargs): 18 | handler = args[0] 19 | worker_id = kwargs.get("worker_id") 20 | 21 | dao = WorkerNodeDao(handler.settings) 22 | 23 | worker = dao.find_by_id(worker_id) 24 | 25 | if worker is None: 26 | handler.abort({"message": "worker not found"}, status=401) 27 | return 28 | 29 | return func(*args, **kwargs) 30 | 31 | return wrapper 32 | 33 | 34 | def validate_assignment(func): 35 | def wrapper(*args, **kwargs): 36 | handler = args[0] 37 | course_id = kwargs.get("course_id") 38 | assignment_name = kwargs.get("assignment_name") 39 | assignment_id = AssignmentConfigDao.id_from(course_id, assignment_name) 40 | 41 | dao = AssignmentConfigDao(handler.settings) 42 | if dao.find_by_id(assignment_id) is None: 43 | handler.abort({"message": "assignment not found"}, status=401) 44 | return 45 | return func(*args, **kwargs) 46 | 47 | return wrapper 48 | 49 | 50 | def authenticate_cluster_token(func): 51 | def wrapper(*args, **kwargs): 52 | handler = args[0] 53 | expected_token = handler.get_token() 54 | request_token = handler.request.headers.get("Authorization") 55 | 56 | if not _is_token_valid(request_token): 57 | handler.abort({"message": "invalid token format"}, status=401) 58 | return 59 | elif expected_token != request_token.split(" ")[1]: 60 | handler.abort({"message": "invalid token"}, status=401) 61 | return 62 | return func(*args, **kwargs) 63 | 64 | return wrapper 65 | 66 | 67 | def authenticate_cluster_token_ws(func): 68 | def wrapper(*args, **kwargs): 69 | handler = args[0] 70 | expected_token = handler.get_token() 71 | request_token = handler.request.headers.get("Authorization") 72 | 73 | if not _is_token_valid(request_token): 74 | handler.close(reason="invalid token format", code=1008) 75 | return 76 | elif expected_token != request_token.split(" ")[1]: 77 | handler.close(reason="invalid token", code=1008) 78 | return 79 | return func(*args, **kwargs) 80 | 81 | return wrapper 82 | 83 | 84 | def authenticate_course_wrapper_generator(admin_only, func): 85 | def wrapper(*args, **kwargs): 86 | handler = args[0] 87 | 88 | request_token = handler.request.headers.get("Authorization") 89 | if not _is_token_valid(request_token): 90 | handler.abort({"message": "invalid token format"}, status=401) 91 | return 92 | 93 | request_token = request_token.split(" ")[1] 94 | course_id = kwargs.get("course_id") 95 | 96 | dao = CourseDao(handler.settings) 97 | course = dao.find_by_id(course_id) 98 | if course is None: 99 | handler.abort({"message": "course not found"}, status=401) 100 | return 101 | 102 | if admin_only: 103 | allowed_tokens = set(course.tokens) 104 | else: 105 | allowed_tokens = set(course.tokens).union(set(course.query_tokens)) 106 | 107 | if request_token not in allowed_tokens: 108 | handler.abort({"message": "invalid token"}, status=401) 109 | return 110 | 111 | return func(*args, **kwargs) 112 | 113 | return wrapper 114 | 115 | 116 | def authenticate_course_member_or_admin(func): 117 | return authenticate_course_wrapper_generator(False, func) 118 | 119 | 120 | def authenticate_course_admin(func): 121 | return authenticate_course_wrapper_generator(True, func) 122 | 123 | 124 | __all__ = [ 125 | "authenticate_cluster_token", 126 | "authenticate_course_member_or_admin", 127 | "authenticate_course_admin", 128 | "authenticate_worker", 129 | "validate_assignment", 130 | ] 131 | -------------------------------------------------------------------------------- /broadway/api/utils/streamqueue.py: -------------------------------------------------------------------------------- 1 | from tornado.queues import Queue 2 | from collections import defaultdict 3 | 4 | """ 5 | Used in conjunction with server-sent events (SSE) to service updates about grading jobs. 6 | Updates on a job's queue position and state are saved here. 7 | """ 8 | 9 | 10 | class StreamQueue: 11 | POSITION_EVENT = "position" 12 | STATE_EVENT = "state" 13 | CLOSE_EVENT = None 14 | 15 | def __init__(self): 16 | self._streams = defaultdict(lambda: defaultdict(lambda: Queue())) 17 | 18 | def _ensure_stream_exists(self, job_id, iid) -> bool: 19 | """ 20 | Raise an exception if there is no corresponding listener. 21 | 22 | :param job_id: Target job ID. 23 | :param iid: ID of the listener. 24 | :raises Exception: If there is no corresponding listener. 25 | """ 26 | if job_id not in self._streams or iid not in self._streams[job_id]: 27 | raise Exception(f"KeyError: ({job_id}:{iid}) is not in the StreamQueue") 28 | 29 | def register_stream(self, job_id, iid) -> None: 30 | """ 31 | Register a new stream to listen for events for the given job ID. 32 | 33 | :param job_id: Target job ID. 34 | :param iid: A unique identifier for the listener (Using `id(self)` in handlers). 35 | """ 36 | self._streams[job_id][iid] = Queue() 37 | 38 | def unregister_stream(self, job_id, iid) -> None: 39 | """ 40 | Remove a listener for the given job ID. 41 | 42 | :param job_id: Target job ID. 43 | :param iid: ID of the listener. 44 | :raises Exception: If there is no corresponding listener. 45 | """ 46 | self._ensure_stream_exists(job_id, iid) 47 | del self._streams[job_id][iid] 48 | if not self._streams[job_id]: 49 | del self._streams[job_id] 50 | 51 | def has_update(self, job_id, iid) -> bool: 52 | """ 53 | Returns whether a listener has any new events. 54 | 55 | :param job_id: Target job ID. 56 | :param iid: ID of the listener. 57 | :raises Exception: If there is no corresponding listener. 58 | """ 59 | self._ensure_stream_exists(job_id, iid) 60 | return not self._streams[job_id][iid].empty() 61 | 62 | def get(self, job_id, iid): 63 | """ 64 | Pops and returns the next message from the listener's event queue. Returns a 65 | tuple `(event, data)` or a sentinel value signifying there are no more events 66 | for the job. See docstring for `send_close_event` for information about this 67 | value. 68 | 69 | Blocks until there is an item in the queue. 70 | 71 | :param job_id: Target job ID. 72 | :param iid: ID of the listener. 73 | :raises Exception: If there is no corresponding listener. 74 | """ 75 | self._ensure_stream_exists(job_id, iid) 76 | return self._streams[job_id][iid].get() 77 | 78 | def _update(self, job_id, event) -> None: 79 | """ 80 | General function for adding events to listener queues. 81 | 82 | :param job_id: Target job ID. 83 | :param event: Event tuple to add to the queues. 84 | """ 85 | if job_id not in self._streams: 86 | return 87 | for iid in self._streams[job_id]: 88 | self._streams[job_id][iid].put(event) 89 | 90 | def update_queue_position(self, job_id, position) -> None: 91 | """ 92 | Add a queue position change event to all listeners of the given job ID. 93 | 94 | :param job_id: Target job ID. 95 | :param position: New position of the job. 96 | """ 97 | self._update(job_id, (self.POSITION_EVENT, position)) 98 | 99 | def update_job_state(self, job_id, state) -> None: 100 | """ 101 | Add a job state change event to all listeners of the given job ID. 102 | 103 | :param job_id: Target job ID. 104 | :param state: New state of the job. 105 | """ 106 | self._update(job_id, (self.STATE_EVENT, state)) 107 | 108 | def send_close_event(self, job_id) -> None: 109 | """ 110 | Add a sentinel event to all listeners to signify that there will be no more 111 | updates for the given job. We expect the listener to unregister itself once it 112 | gets this value. `None` is used as this sentinel value. 113 | 114 | :param job_id: Target job ID. 115 | """ 116 | self._update(job_id, self.CLOSE_EVENT) 117 | -------------------------------------------------------------------------------- /broadway/api/flags.py: -------------------------------------------------------------------------------- 1 | from flagset import Flag, FlagSet 2 | 3 | app_flags = FlagSet( 4 | { 5 | "debug": Flag( 6 | bool, 7 | default=False, 8 | cmdline_name=["-d", "--debug"], 9 | env_name="BROADWAY_DEBUG", 10 | config_name="debug", 11 | help="enable debug mode", 12 | ), 13 | "token": Flag( 14 | str, 15 | cmdline_name="--token", 16 | env_name="BROADWAY_TOKEN", 17 | config_name="token", 18 | help="access token for communication between workers and api", 19 | ), 20 | "heartbeat_interval": Flag( 21 | int, 22 | default=10, 23 | cmdline_name="--heartbeat-interval", 24 | config_name="heartbeat_interval", 25 | help="heartbeat interval in seconds", 26 | ), 27 | "course_config": Flag( 28 | str, 29 | cmdline_name="--course-config", 30 | config_name="course_config", 31 | help="optional course config file. " 32 | + "if given, the existing config will be overwritten; " 33 | + "otherwise, the existing config will be used", 34 | ), 35 | # web app flags 36 | "bind_addr": Flag( 37 | str, 38 | default="localhost", 39 | cmdline_name="--bind-addr", 40 | env_name="BROADWAY_BIND_ADDR", 41 | config_name="bind_addr", 42 | help="web app bind address", 43 | ), 44 | "bind_port": Flag( 45 | str, 46 | default="1470", 47 | cmdline_name="--bind-port", 48 | env_name="BROADWAY_BIND_PORT", 49 | config_name="bind_port", 50 | help="web app bind port", 51 | ), 52 | # log flags 53 | "log_dir": Flag( 54 | str, 55 | default="logs", 56 | cmdline_name="--log-dir", 57 | env_name="BROADWAY_LOG_DIR", 58 | config_name="log.dir", 59 | help="directory for logs", 60 | ), 61 | "log_level": Flag( 62 | str, 63 | default="INFO", 64 | cmdline_name="--log-level", 65 | env_name="BROADWAY_LOG_LEVEL", 66 | config_name="log.level", 67 | help="logging level, e.g. INFO, DEBUG", 68 | ), 69 | "log_timestamps": Flag( 70 | bool, 71 | default=True, 72 | cmdline_name="--log-timestamps", 73 | env_name="BROADWAY_LOG_TIMESTAMPS", 74 | config_name="log.timestamps", 75 | help="whether to include timestamps in logs", 76 | ), 77 | "log_rotate": Flag( 78 | str, 79 | default="midnight", 80 | cmdline_name="--log-rotate", 81 | env_name="BROADWAY_LOG_ROTATE", 82 | config_name="log.rotate", 83 | help="time for log rotate", 84 | ), 85 | "log_backup": Flag( 86 | int, 87 | default=7, 88 | cmdline_name="--log-backup", 89 | env_name="BROADWAY_LOG_BACKUP", 90 | config_name="log.backup", 91 | help="backup count for logs", 92 | ), 93 | # mongo db flags 94 | "mongodb_dsn": Flag( 95 | str, 96 | default="mongodb://localhost:27017", 97 | cmdline_name="--mongodb-dsn", 98 | env_name="BROADWAY_MONGODB_DSN", 99 | config_name="mongodb.dsn", 100 | help="data source name for mongodb", 101 | ), 102 | "mongodb_primary": Flag( 103 | str, 104 | default="AG", 105 | cmdline_name="--mongodb-primary", 106 | env_name="BROADWAY_MONGODB_PRIMARY", 107 | config_name="mongodb.primary", 108 | help="name of the primary database", 109 | ), 110 | "mongodb_logs": Flag( 111 | str, 112 | default="logs", 113 | cmdline_name="--mongodb-logs", 114 | env_name="BROADWAY_MONGODB_LOGS", 115 | config_name="mongodb.logs", 116 | help="name of the logging database", 117 | ), 118 | "mongodb_timeout": Flag( 119 | int, 120 | default=5, 121 | cmdline_name="--mongodb-timeout", 122 | env_name="BROADWAY_MONGODB_TIMEOUT", 123 | config_name="mongodb.timeout", 124 | help="timeout for mongodb connection", 125 | ), 126 | } 127 | ) 128 | -------------------------------------------------------------------------------- /broadway/api/utils/run.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import broadway.api.daos as daos 4 | import broadway.api.models as models 5 | from broadway.api.models.grading_job import GradingJobType 6 | from broadway.api.models.grading_run import GradingRunState 7 | from broadway.api.utils.time import get_time 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def continue_grading_run(settings, grading_run): 13 | """ 14 | Moves grading run into next state or finishes it 15 | """ 16 | assignment_config_dao = daos.AssignmentConfigDao(settings) 17 | assignment = assignment_config_dao.find_by_id(grading_run.assignment_id) 18 | 19 | course_id = assignment.id.split("/")[0] 20 | 21 | global_environ = assignment.env or {} 22 | global_environ["GRADING_RUN_ID"] = grading_run.id 23 | 24 | queue = settings["QUEUE"] 25 | if grading_run.state == GradingRunState.READY: 26 | if assignment.pre_processing_pipeline: 27 | _update_run_state( 28 | settings, grading_run, GradingRunState.PRE_PROCESSING_STAGE 29 | ) 30 | next_job = _prepare_next_job( 31 | settings, 32 | grading_run, 33 | global_environ, 34 | grading_run.pre_processing_env or {}, 35 | assignment.pre_processing_pipeline, 36 | GradingJobType.PRE_PROCESSING, 37 | ) 38 | queue.push(course_id, next_job) 39 | return True 40 | if ( 41 | grading_run.state == GradingRunState.READY 42 | or grading_run.state == GradingRunState.PRE_PROCESSING_STAGE 43 | ): 44 | _update_run_state(settings, grading_run, GradingRunState.STUDENTS_STAGE) 45 | for runtime_environ in grading_run.students_env: 46 | next_job = _prepare_next_job( 47 | settings, 48 | grading_run, 49 | global_environ, 50 | runtime_environ, 51 | assignment.student_pipeline, 52 | GradingJobType.STUDENT, 53 | ) 54 | queue.push(course_id, next_job) 55 | return True 56 | if grading_run.state == GradingRunState.STUDENTS_STAGE: 57 | if assignment.post_processing_pipeline: 58 | _update_run_state( 59 | settings, grading_run, GradingRunState.POST_PROCESSING_STAGE 60 | ) 61 | next_job = _prepare_next_job( 62 | settings, 63 | grading_run, 64 | global_environ, 65 | grading_run.post_processing_env or {}, 66 | assignment.post_processing_pipeline, 67 | GradingJobType.POST_PROCESSING, 68 | ) 69 | queue.push(course_id, next_job) 70 | return True 71 | else: 72 | _finish_grading_run(settings, grading_run) 73 | return True 74 | if grading_run.state == GradingRunState.POST_PROCESSING_STAGE: 75 | _finish_grading_run(settings, grading_run) 76 | return True 77 | logger.critical("invalid grading run state for run '{}'".format(grading_run.id)) 78 | return False 79 | 80 | 81 | def fail_grading_run(settings, run): 82 | run_dao = daos.GradingRunDao(settings) 83 | if run is None: 84 | logger.critical("cannot fail non-existent run with ID '{}'".format(run.id)) 85 | return 86 | 87 | run.finished_at = get_time() 88 | run.state = GradingRunState.FAILED 89 | run.success = False 90 | run_dao.update(run) 91 | 92 | 93 | def _update_run_state(settings, grading_run, state): 94 | """ 95 | Updates the state for a grading run 96 | """ 97 | grading_run_dao = daos.GradingRunDao(settings) 98 | grading_run.state = state 99 | grading_run_dao.update(grading_run) 100 | 101 | 102 | def _prepare_next_job( 103 | settings, grading_run, global_job_environ, runtime_job_environ, job_stages, job_type 104 | ): 105 | """ 106 | Prepares a job to be submitted to queue 107 | """ 108 | grading_job_dao = daos.GradingJobDao(settings) 109 | grading_job = models.GradingJob( 110 | job_type=job_type, run_id=grading_run.id, queued_at=get_time() 111 | ) 112 | grading_job.id = str(grading_job_dao.insert(grading_job).inserted_id) 113 | 114 | runtime_job_environ["GRADING_JOB_ID"] = grading_job.id 115 | grading_job.set_stages(job_stages, global_job_environ, runtime_job_environ) 116 | grading_job_dao.update(grading_job) 117 | 118 | return grading_job.id 119 | 120 | 121 | def _finish_grading_run(settings, grading_run): 122 | grading_run_dao = daos.GradingRunDao(settings) 123 | grading_run.state = GradingRunState.FINISHED 124 | grading_run.finished_at = get_time() 125 | grading_run.success = True 126 | grading_run_dao.update(grading_run) 127 | -------------------------------------------------------------------------------- /broadway/api/callbacks/worker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from queue import Empty 4 | 5 | import tornado 6 | import tornado.ioloop 7 | 8 | from broadway.api.callbacks import job_update_callback 9 | from broadway.api.daos import GradingJobDao, WorkerNodeDao 10 | from broadway.api.utils.time import get_time 11 | 12 | import random 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | def worker_heartbeat_callback(settings): 18 | """ 19 | Checks if any workers went offline (after 2 * heartbeat_interval seconds) 20 | """ 21 | heartbeat_timestamp = get_time() 22 | heartbeat_interval = settings["FLAGS"]["heartbeat_interval"] 23 | conn_map = settings["WS_CONN_MAP"] 24 | 25 | dao = WorkerNodeDao(settings) 26 | 27 | for node in dao.find_by_liveness(alive=True): 28 | if ( 29 | heartbeat_timestamp - node.last_seen 30 | ).total_seconds() >= 2 * heartbeat_interval: 31 | if node.use_ws and node.id in conn_map: 32 | conn_map[node.id].close() 33 | 34 | _handle_lost_worker_node(settings, node) 35 | 36 | 37 | def worker_lost_callback(settings, worker_id, reason="closed connection"): 38 | dao = WorkerNodeDao(settings) 39 | worker = dao.find_by_id(worker_id) 40 | 41 | if worker is None: 42 | logger.critical("dead worker {} not found".format(worker_id)) 43 | return 44 | 45 | _handle_lost_worker_node(settings, worker, reason=reason) 46 | 47 | 48 | # assign available jobs to whoever is currently not working 49 | # triggered upon the following events 50 | # 1. client submitting a new job 51 | # 2. worker finishing a job 52 | def worker_schedule_job(settings): 53 | conn_map = settings["WS_CONN_MAP"] 54 | job_queue = settings["QUEUE"] 55 | stream_queue = settings["STREAM_QUEUE"] 56 | 57 | grading_job_dao = GradingJobDao(settings) 58 | worker_node_dao = WorkerNodeDao(settings) 59 | 60 | idle_workers = worker_node_dao.find_by_idleness() 61 | random.shuffle(idle_workers) 62 | 63 | for idle_worker in idle_workers: 64 | if idle_worker.use_ws and idle_worker.id in conn_map: 65 | conn = conn_map[idle_worker.id] 66 | 67 | try: 68 | grading_job_id = job_queue.pull() 69 | job_queue.update_all_job_positions(stream_queue) 70 | grading_job = grading_job_dao.find_by_id(grading_job_id) 71 | 72 | if not grading_job: 73 | logger.critical( 74 | "found job ID '{}' in queue, but job does not exist".format( 75 | grading_job_id 76 | ) 77 | ) 78 | return 79 | 80 | grading_job.started_at = get_time() 81 | grading_job.worker_id = idle_worker.id 82 | grading_job_dao.update(grading_job) 83 | 84 | idle_worker.running_job_id = grading_job_id 85 | idle_worker.jobs_processed += 1 86 | worker_node_dao.update(idle_worker) 87 | 88 | conn.send( 89 | {"grading_job_id": grading_job_id, "stages": grading_job.stages} 90 | ) 91 | 92 | except Empty: 93 | # no more jobs available 94 | return 95 | 96 | except Exception as e: 97 | logger.critical( 98 | "failed to assign job to {}: {}".format(idle_worker.id, repr(e)) 99 | ) 100 | 101 | 102 | def _handle_lost_worker_node(settings, worker, reason="timeout"): 103 | lost_run_id = worker.running_job_id 104 | 105 | worker.is_alive = False 106 | worker.running_job_id = None 107 | worker_dao = WorkerNodeDao(settings) 108 | worker_dao.update(worker) 109 | 110 | if not lost_run_id: 111 | logger.critical( 112 | "worker '{}' went offline unexpectedly on '{}' due to {}".format( 113 | worker.id, worker.hostname, reason 114 | ) 115 | ) 116 | return 117 | 118 | logger.critical( 119 | "worker '{}' went offline unexpectedly on '{}' while" 120 | " executing '{}' due to {}".format( 121 | worker.id, worker.hostname, lost_run_id, reason 122 | ) 123 | ) 124 | 125 | jobs_dao = GradingJobDao(settings) 126 | job = jobs_dao.find_by_id(lost_run_id) 127 | if job is None: 128 | logger.critical( 129 | ( 130 | "worker was reportedly executing job '{}' " 131 | "but this job does not exist" 132 | ).format(lost_run_id) 133 | ) 134 | return 135 | 136 | job.finished_at = get_time() 137 | job.success = False 138 | job.results = [{"result": "worker died while executing job"}] 139 | jobs_dao.update(job) 140 | 141 | tornado.ioloop.IOLoop.current().add_callback( 142 | job_update_callback, settings, lost_run_id, job.run_id 143 | ) 144 | 145 | 146 | __all__ = ["worker_heartbeat_callback", "worker_lost_callback", "worker_schedule_job"] 147 | -------------------------------------------------------------------------------- /tests/api/unit/test_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import unittest.mock as mock 4 | 5 | from broadway.api.utils.bootstrap import ( 6 | initialize_course_tokens, 7 | initialize_global_settings, 8 | ) 9 | from broadway.api.utils.multiqueue import MultiQueue 10 | 11 | from broadway.api.flags import app_flags 12 | from broadway.api.daos.course import CourseDao 13 | 14 | from tests.api.base import BaseTest 15 | 16 | from queue import Empty 17 | 18 | logging.disable(logging.WARNING) 19 | 20 | 21 | class TestClusterTokenUtils(BaseTest): 22 | def test_cluster_token_generated(self): 23 | flags = app_flags.parse([], env={}) 24 | settings = initialize_global_settings(flags) 25 | self.assertIsNotNone(flags.get("token")) 26 | self.assertIsNotNone(settings["FLAGS"]["token"]) 27 | 28 | 29 | class TestCourseTokenUtils(BaseTest): 30 | def test_init_tokens(self): 31 | course_tokens = { 32 | "cs225": {"tokens": ["token1"]}, 33 | "cs241": {"tokens": ["token1"], "query_tokens": ["token2"]}, 34 | } 35 | 36 | with mock.patch( 37 | "builtins.open", mock.mock_open(read_data=json.dumps(course_tokens)) 38 | ): 39 | initialize_course_tokens(self.app.settings, self.app.settings["FLAGS"]) 40 | 41 | dao = CourseDao(self.app.settings) 42 | cs225 = dao.find_by_id("cs225") 43 | cs241 = dao.find_by_id("cs241") 44 | 45 | self.assertIsNotNone(cs225) 46 | self.assertIsNotNone(cs241) 47 | 48 | self.assertIn("token1", cs225.tokens) 49 | self.assertNotIn("token2", cs225.tokens) 50 | self.assertNotIn("token1", cs225.query_tokens) 51 | self.assertIn("token1", cs241.tokens) 52 | self.assertNotIn("token1", cs241.query_tokens) 53 | self.assertIn("token2", cs241.query_tokens) 54 | self.assertNotIn("token2", cs241.tokens) 55 | 56 | 57 | class TestMultiQueue(BaseTest): 58 | def setUp(self): 59 | super().setUp() 60 | self.multiqueue = MultiQueue() 61 | 62 | def test_push(self): 63 | self.multiqueue.push("cs225", 225) 64 | self.multiqueue.push("cs225", 296 - 25) 65 | 66 | self.multiqueue.push("cs233", 233) 67 | 68 | self.multiqueue.push("cs241", 241) 69 | self.multiqueue.push("cs241", 295 - 41) 70 | 71 | self.assertTrue(self.multiqueue.contains_key("cs225")) 72 | self.assertTrue(self.multiqueue.contains_key("cs233")) 73 | self.assertTrue(self.multiqueue.contains_key("cs241")) 74 | self.assertFalse(self.multiqueue.contains_key("ece411")) 75 | 76 | self.assertEqual(2, self.multiqueue.get_queue_length("cs225")) 77 | self.assertEqual(1, self.multiqueue.get_queue_length("cs233")) 78 | self.assertEqual(2, self.multiqueue.get_queue_length("cs241")) 79 | 80 | self.assertEqual(3, len(self.multiqueue.queues)) 81 | self.assertEqual(2, self.multiqueue.queues["cs225"].qsize()) 82 | self.assertEqual(1, self.multiqueue.queues["cs233"].qsize()) 83 | self.assertEqual(2, self.multiqueue.queues["cs241"].qsize()) 84 | 85 | def test_position(self): 86 | 87 | # try getting the position of some key in a non-existent queue 88 | with self.assertRaises(Exception): 89 | self.multiqueue.get_position_in_queue("foo", "") 90 | 91 | for i in range(10): 92 | self.multiqueue.push("cs225", "cs225-" + str(i)) 93 | 94 | for i in range(100): 95 | self.multiqueue.push("cs233", "cs233-" + str(i)) 96 | 97 | for i in range(241): 98 | self.multiqueue.push("cs241", "cs241-" + str(i)) 99 | 100 | for i in range(10): 101 | self.assertEqual( 102 | i, self.multiqueue.get_position_in_queue("cs225", "cs225-" + str(i)) 103 | ) 104 | 105 | for i in range(100): 106 | self.assertEqual( 107 | i, self.multiqueue.get_position_in_queue("cs233", "cs233-" + str(i)) 108 | ) 109 | 110 | for i in range(241): 111 | self.assertEqual( 112 | i, self.multiqueue.get_position_in_queue("cs241", "cs241-" + str(i)) 113 | ) 114 | 115 | # try getting the position of a non-existent key 116 | self.assertEqual(-1, self.multiqueue.get_position_in_queue("cs225", "foo")) 117 | 118 | def test_pull(self): 119 | 120 | # try pulling from a multiqueue that is empty 121 | with self.assertRaises(Empty): 122 | rv = self.multiqueue.pull() 123 | 124 | for i in range(10): 125 | self.multiqueue.push("cs225", "cs225-" + str(i)) 126 | 127 | for i in range(100): 128 | self.multiqueue.push("cs233", "cs233-" + str(i)) 129 | 130 | for i in range(241): 131 | self.multiqueue.push("cs241", "cs241-" + str(i)) 132 | 133 | for i in range(10): 134 | rv = self.multiqueue.pull() 135 | self.assertEqual("cs225-" + str(i), rv) 136 | rv = self.multiqueue.pull() 137 | self.assertEqual("cs233-" + str(i), rv) 138 | rv = self.multiqueue.pull() 139 | self.assertEqual("cs241-" + str(i), rv) 140 | 141 | for i in range(10, 100): 142 | rv = self.multiqueue.pull() 143 | self.assertEqual("cs233-" + str(i), rv) 144 | rv = self.multiqueue.pull() 145 | self.assertEqual("cs241-" + str(i), rv) 146 | 147 | for i in range(100, 241): 148 | rv = self.multiqueue.pull() 149 | self.assertEqual("cs241-" + str(i), rv) 150 | 151 | # the multiqueue should be empty now 152 | with self.assertRaises(Empty): 153 | self.multiqueue.pull() 154 | -------------------------------------------------------------------------------- /broadway/grader/http.py: -------------------------------------------------------------------------------- 1 | import os 2 | import signal 3 | import socket 4 | import asyncio 5 | import logging 6 | 7 | from threading import Event 8 | from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED 9 | 10 | import requests 11 | from chainlink import Chainlink 12 | 13 | import broadway.grader.api as api 14 | from broadway.grader.api import ( 15 | GRADER_REGISTER_ENDPOINT, 16 | GRADING_JOB_ENDPOINT, 17 | HEARTBEAT_ENDPOINT, 18 | SUCCESS_CODE, 19 | QUEUE_EMPTY_CODE, 20 | JOB_POLL_INTERVAL, 21 | HEARTBEAT_INTERVAL, 22 | ) 23 | 24 | # globals 25 | _grader_id = None 26 | _hostname = None 27 | _worker_thread = None 28 | _heartbeat_interval = HEARTBEAT_INTERVAL 29 | _api_host = None 30 | _header = None 31 | _verbose = None 32 | 33 | _event_loop = asyncio.new_event_loop() 34 | _exit_event = Event() 35 | 36 | logger = logging.getLogger(__name__) 37 | 38 | 39 | def _halt_all(): 40 | _exit_event.set() 41 | 42 | 43 | def _signal_handler(sig, frame): 44 | _halt_all() 45 | 46 | 47 | def _get_url(endpoint): 48 | return "{}{}".format(_api_host, endpoint) 49 | 50 | 51 | def _heartbeat_routine(): 52 | while not _exit_event.is_set(): 53 | response = requests.post( 54 | _get_url("{}/{}".format(HEARTBEAT_ENDPOINT, _grader_id)), 55 | headers=_header, 56 | data="", 57 | ) 58 | if response.status_code != SUCCESS_CODE: 59 | logger.critical("Heartbeat failed!\nError: {}".format(response.text)) 60 | return 61 | 62 | _exit_event.wait(_heartbeat_interval) 63 | 64 | 65 | def _worker_routine(): 66 | asyncio.set_event_loop(_event_loop) 67 | 68 | while not _exit_event.is_set(): 69 | # poll from queue 70 | response = requests.get( 71 | _get_url("{}/{}".format(GRADING_JOB_ENDPOINT, _grader_id)), headers=_header 72 | ) 73 | 74 | # if the queue is empty then sleep for a while 75 | if response.status_code == QUEUE_EMPTY_CODE: 76 | _exit_event.wait(JOB_POLL_INTERVAL) 77 | continue 78 | 79 | if response.status_code != SUCCESS_CODE: 80 | logger.critical( 81 | "Bad server response while trying to poll job.\nError: {}".format( 82 | response.text 83 | ) 84 | ) 85 | return 86 | 87 | # we successfully polled a job 88 | job = response.json()["data"] 89 | job_id = job.get(api.GRADING_JOB_ID) 90 | logger.info("Starting job {}".format(job_id)) 91 | 92 | # execute job 93 | try: 94 | chain = Chainlink(job[api.STAGES], workdir=os.getcwd()) 95 | job_results = chain.run({}) 96 | except Exception as ex: 97 | logger.critical("Grading job failed with exception:\n{}".format(ex)) 98 | job_results = [ 99 | { 100 | "logs": { 101 | "stdout": b"The container crashed", 102 | "stderr": bytes(str(ex), "utf-8"), 103 | }, 104 | "success": False, 105 | } 106 | ] 107 | 108 | job_stdout = "\n".join( 109 | [r["logs"]["stdout"].decode("utf-8") for r in job_results] 110 | ) 111 | job_stderr = "\n".join( 112 | [r["logs"]["stderr"].decode("utf-8") for r in job_results] 113 | ) 114 | 115 | # remove logs from result array because logs can be bulky 116 | # we will store then separately 117 | for r in job_results: 118 | del r["logs"] 119 | 120 | logger.info("Finished job {}".format(job_id)) 121 | if _verbose: 122 | logger.info("Job stdout:\n" + job_stdout) 123 | logger.info("Job stderr:\n" + job_stderr) 124 | 125 | grading_job_result = { 126 | api.RESULTS: job_results, 127 | api.SUCCESS: job_results[-1]["success"], 128 | api.LOGS: {"stdout": job_stdout, "stderr": job_stderr}, 129 | api.GRADING_JOB_ID: job_id, 130 | } 131 | 132 | logger.info("Sending job results") 133 | response = requests.post( 134 | _get_url("{}/{}".format(GRADING_JOB_ENDPOINT, _grader_id)), 135 | json=grading_job_result, 136 | headers=_header, 137 | ) 138 | if response.status_code != SUCCESS_CODE: 139 | logger.critical( 140 | ( 141 | "Bad server response while updating" 142 | + "about job status.\nError: {}" 143 | ).format(response.text) 144 | ) 145 | return 146 | 147 | 148 | def _register_node(): 149 | global _grader_id 150 | global _heartbeat_interval 151 | 152 | response = requests.post( 153 | _get_url("{}/{}".format(GRADER_REGISTER_ENDPOINT, _grader_id)), 154 | headers=_header, 155 | json={api.HOSTNAME: _hostname}, 156 | ) 157 | if response.status_code != SUCCESS_CODE: 158 | raise RuntimeError("failed to register: {}".format(response.text)) 159 | 160 | logger.info("Registered to server") 161 | server_response = response.json()["data"] 162 | 163 | # set heartbeat interval 164 | if api.HEARTBEAT in server_response: 165 | _heartbeat_interval = server_response[api.HEARTBEAT] 166 | else: 167 | logger.info( 168 | "Server response did not include heartbeat, using default {}".format( 169 | _heartbeat_interval 170 | ) 171 | ) 172 | 173 | 174 | def run_http_grader(flags): 175 | global _grader_id 176 | global _hostname 177 | global _header 178 | global _api_host 179 | global _verbose 180 | 181 | signal.signal(signal.SIGINT, _signal_handler) 182 | 183 | _grader_id = flags["grader_id"] 184 | _hostname = socket.gethostname() 185 | _api_host = flags["api_host"] 186 | _verbose = flags["verbose"] 187 | 188 | # register node to server 189 | _header = {api.AUTH: "Bearer {}".format(flags["token"])} 190 | _register_node() 191 | 192 | # run the grader on two separate threads. 193 | # If any of the routines fail, the grader shuts down 194 | executor = ThreadPoolExecutor(max_workers=2) 195 | futures = [executor.submit(_heartbeat_routine), executor.submit(_worker_routine)] 196 | wait(futures, return_when=FIRST_COMPLETED) 197 | executor.shutdown() 198 | -------------------------------------------------------------------------------- /broadway/api/handlers/worker_ws.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import tornado.ioloop 4 | 5 | from broadway.api.handlers.base import BaseWSAPIHandler 6 | from broadway.api.decorators.auth import authenticate_cluster_token_ws 7 | 8 | import broadway.api.daos as daos 9 | import broadway.api.models as models 10 | 11 | from broadway.api.callbacks.job import job_update_callback 12 | from broadway.api.callbacks.worker import worker_lost_callback, worker_schedule_job 13 | from broadway.api.utils.time import get_time 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class WorkerConnectionHandler(BaseWSAPIHandler): 19 | def __init__(self, *args, **kwargs): 20 | self.worker_id = None 21 | super().__init__(*args, **kwargs) 22 | 23 | @authenticate_cluster_token_ws 24 | def open(self, worker_id): 25 | self.worker_id = worker_id 26 | self.registered = False 27 | logger.info("worker '{}' opened a connection".format(self.worker_id)) 28 | 29 | @BaseWSAPIHandler.msg_type( 30 | "register", 31 | { 32 | "type": "object", 33 | "properties": {"hostname": {"type": "string"}}, 34 | "required": ["hostname"], 35 | "additionalProperties": False, 36 | }, 37 | ) 38 | def handler_register(self, hostname): 39 | if self.worker_id is None: 40 | return 41 | 42 | worker_node_dao = daos.WorkerNodeDao(self.settings) 43 | 44 | dup = worker_node_dao.find_by_id(self.worker_id) 45 | 46 | if dup is None: 47 | self.worker_node = models.WorkerNode( 48 | id_=self.worker_id, 49 | hostname=hostname, 50 | last_seen=get_time(), 51 | is_alive=True, 52 | use_ws=True, 53 | ) 54 | logger.info( 55 | "new worker '{}' joined on '{}'".format(self.worker_id, hostname) 56 | ) 57 | worker_node_dao.insert(self.worker_node) 58 | elif not dup.is_alive: 59 | self.worker_node = dup 60 | self.worker_node.hostname = hostname 61 | self.worker_node.last_seen = get_time() 62 | self.worker_node.is_alive = True 63 | self.use_ws = True 64 | logger.info( 65 | "worker '{}' alive again on '{}'".format(self.worker_id, hostname) 66 | ) 67 | worker_node_dao.update(self.worker_node) 68 | else: 69 | msg = "worker id '{}' already exists".format(self.worker_id) 70 | logger.info(msg) 71 | self.send({"success": False}) 72 | self.close(reason=msg, code=1002) 73 | return 74 | 75 | self.registered = True 76 | self.get_ws_conn_map()[self.worker_id] = self 77 | 78 | self.send({"success": True}) 79 | 80 | # trigger schedule event 81 | tornado.ioloop.IOLoop.current().add_callback(worker_schedule_job, self.settings) 82 | 83 | @BaseWSAPIHandler.msg_type( 84 | "job_result", 85 | { 86 | "type": "object", 87 | "properties": { 88 | "grading_job_id": {"type": "string"}, 89 | "success": {"type": "boolean"}, 90 | "results": {"type": "array", "items": {"type": "object"}}, 91 | "logs": {"type": "object"}, 92 | }, 93 | "required": ["grading_job_id", "success", "results", "logs"], 94 | "additionalProperties": False, 95 | }, 96 | ) 97 | def handler_job_result(self, grading_job_id, success, results, logs): 98 | if not self.registered: 99 | logger.info( 100 | "worker '{}' submitted before registering".format(self.worker_id) 101 | ) 102 | self.close(reason="submitting before registering", code=1002) 103 | return 104 | 105 | grading_job_dao = daos.GradingJobDao(self.settings) 106 | job = grading_job_dao.find_by_id(grading_job_id) 107 | 108 | if not job: 109 | self.close(reason="job with the given ID not found", code=1002) 110 | return 111 | 112 | job_state = job.get_state() 113 | 114 | if job_state != models.GradingJobState.STARTED: 115 | logger.critical( 116 | "job with id '{}' updated when in state '{}'".format( 117 | grading_job_id, job_state.value 118 | ) 119 | ) 120 | self.close( 121 | reason="cannot update job that is not in STARTED state", code=1002 122 | ) 123 | return 124 | 125 | worker_node_dao = daos.WorkerNodeDao(self.settings) 126 | worker_node = worker_node_dao.find_by_id(self.worker_id) 127 | 128 | if not worker_node: 129 | msg = "unknown worker '{}' successfully updated job".format(self.worker_id) 130 | logger.critical(msg) 131 | self.close(reason=msg, code=1002) 132 | return 133 | 134 | logger.info( 135 | "worker '{}' submitted job result for job '{}'".format( 136 | self.worker_id, grading_job_id 137 | ) 138 | ) 139 | 140 | # clear the worker node's job 141 | worker_node.running_job_id = None 142 | worker_node_dao.update(worker_node) 143 | 144 | # finish the job 145 | job.finished_at = get_time() 146 | job.results = results 147 | job.success = success 148 | grading_job_dao.update(job) 149 | 150 | # store the logs 151 | job_log_dao = daos.GradingJobLogDao(self.settings) 152 | job_log = models.GradingJobLog(job_id=grading_job_id, **logs) 153 | job_log_dao.insert(job_log) 154 | 155 | # thread safe callback 156 | tornado.ioloop.IOLoop.current().add_callback( 157 | job_update_callback, self.settings, grading_job_id, job.run_id 158 | ) 159 | 160 | # trigger schedule event 161 | tornado.ioloop.IOLoop.current().add_callback(worker_schedule_job, self.settings) 162 | 163 | def on_close(self): 164 | if self.worker_id is not None and self.registered: 165 | tornado.ioloop.IOLoop.current().add_callback( 166 | worker_lost_callback, self.settings, self.worker_id 167 | ) 168 | 169 | del self.get_ws_conn_map()[self.worker_id] 170 | else: 171 | logger.info( 172 | "worker '{}' went down before registering".format(self.worker_id) 173 | ) 174 | 175 | def on_ping(self, data): 176 | # ping messages have the same function as heartbeat requests 177 | # for normal http workers 178 | 179 | if self.worker_id is None: 180 | logger.critical("worker is not initialized") 181 | return 182 | 183 | worker_node_dao = daos.WorkerNodeDao(self.settings) 184 | worker_node = worker_node_dao.find_by_id(self.worker_id) 185 | 186 | if not worker_node: 187 | logger.critical( 188 | "unknown ws node with ID '{}' successfully sent heartbeat".format( 189 | self.worker_id 190 | ) 191 | ) 192 | return 193 | 194 | worker_node.last_seen = get_time() 195 | worker_node_dao.update(worker_node) 196 | -------------------------------------------------------------------------------- /broadway/api/handlers/worker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import tornado.ioloop 3 | from queue import Empty 4 | from tornado_json import schema 5 | 6 | import broadway.api.daos as daos 7 | import broadway.api.definitions as definitions 8 | import broadway.api.models as models 9 | from broadway.api.decorators.auth import authenticate_cluster_token, authenticate_worker 10 | from broadway.api.callbacks.job import job_update_callback 11 | from broadway.api.handlers.base import BaseAPIHandler 12 | from broadway.api.utils.time import get_time 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class WorkerRegisterHandler(BaseAPIHandler): 18 | @authenticate_cluster_token 19 | @schema.validate( 20 | on_empty_404=True, 21 | input_schema={ 22 | "type": "object", 23 | "properties": {"hostname": {"type": "string"}}, 24 | "required": ["hostname"], 25 | "additionalProperties": False, 26 | }, 27 | output_schema={ 28 | "type": "object", 29 | "properties": {"heartbeat": {"type": "number"}}, 30 | "required": ["heartbeat"], 31 | "additionalProperties": False, 32 | }, 33 | ) 34 | def post(self, *args, **kwargs): 35 | worker_id = kwargs.get("worker_id") 36 | hostname = self.body.get("hostname") 37 | 38 | worker_node_dao = daos.WorkerNodeDao(self.settings) 39 | 40 | worker_node = models.WorkerNode( 41 | id_=worker_id, hostname=hostname, last_seen=get_time(), is_alive=True 42 | ) 43 | 44 | dup = worker_node_dao.find_by_id(worker_id) 45 | 46 | if dup is None: 47 | logger.info("new worker {} joined on {}".format(worker_id, hostname)) 48 | worker_node_dao.insert(worker_node) 49 | elif not dup.is_alive: 50 | dup.is_alive = True 51 | logger.info("worker {} alive again on {}".format(worker_id, hostname)) 52 | worker_node_dao.update(dup) 53 | else: 54 | msg = "worker id '{}' already exists".format(worker_id) 55 | logger.info(msg) 56 | self.abort({"message": msg}, status=400) 57 | return 58 | 59 | return {"heartbeat": self.get_flags()["heartbeat_interval"]} 60 | 61 | 62 | class GradingJobHandler(BaseAPIHandler): 63 | @authenticate_cluster_token 64 | @authenticate_worker 65 | @schema.validate( 66 | on_empty_404=True, 67 | output_schema={ 68 | "type": "object", 69 | "properties": { 70 | "grading_job_id": {"type": "string"}, 71 | "stages": {"type": "array", "items": definitions.grading_stage}, 72 | }, 73 | "required": ["grading_job_id", "stages"], 74 | "additionalProperties": False, 75 | }, 76 | ) 77 | def get(self, *args, **kwargs): 78 | """ 79 | Allows workers to request their next grading job 80 | """ 81 | worker_id = kwargs.get("worker_id") 82 | worker_node_dao = daos.WorkerNodeDao(self.settings) 83 | worker_node = worker_node_dao.find_by_id(worker_id) 84 | if not worker_node: 85 | logger.critical( 86 | "unknown node with ID '{}' successfully requested job".format(worker_id) 87 | ) 88 | self.abort({"message": ""}, status=404) 89 | return 90 | 91 | try: 92 | grading_job_id = self.get_queue().pull() 93 | self.get_stream_queue().update_job_state( 94 | grading_job_id, models.GradingJobState.STARTED.name 95 | ) 96 | self.get_queue().update_all_job_positions(self.get_stream_queue()) 97 | grading_job_dao = daos.GradingJobDao(self.settings) 98 | grading_job = grading_job_dao.find_by_id(grading_job_id) 99 | if not grading_job: 100 | logger.critical( 101 | "found job ID '{}' in queue, but job does not exist".format( 102 | grading_job_id 103 | ) 104 | ) 105 | self.abort( 106 | {"message": "a failure occurred while getting next job"}, status=500 107 | ) 108 | return 109 | 110 | grading_job.started_at = get_time() 111 | grading_job.worker_id = worker_id 112 | grading_job_dao.update(grading_job) 113 | 114 | worker_node.running_job_id = grading_job_id 115 | worker_node.jobs_processed += 1 116 | worker_node.is_alive = True 117 | worker_node_dao.update(worker_node) 118 | 119 | return {"grading_job_id": grading_job_id, "stages": grading_job.stages} 120 | except Empty: 121 | self.abort({"message": "no jobs available"}, status=498) 122 | 123 | @authenticate_cluster_token 124 | @authenticate_worker 125 | @schema.validate( 126 | input_schema={ 127 | "type": "object", 128 | "properties": { 129 | "grading_job_id": {"type": "string"}, 130 | "success": {"type": "boolean"}, 131 | "results": {"type": "array", "items": {"type": "object"}}, 132 | "logs": {"type": "object"}, 133 | }, 134 | "required": ["grading_job_id", "success", "results", "logs"], 135 | "additionalProperties": False, 136 | } 137 | ) 138 | def post(self, *args, **kwargs): 139 | """ 140 | Allows workers to update grading job status on completion 141 | """ 142 | worker_id = kwargs.get("worker_id") 143 | job_id = self.body.get("grading_job_id") 144 | 145 | grading_job_dao = daos.GradingJobDao(self.settings) 146 | job = grading_job_dao.find_by_id(job_id) 147 | if not job: 148 | self.abort({"message": "job with the given ID not found"}) 149 | return 150 | 151 | job_state = job.get_state() 152 | if job_state != models.GradingJobState.STARTED: 153 | logger.critical( 154 | "job with id '{}' updated when in state '{}'".format( 155 | job_id, job_state.value 156 | ) 157 | ) 158 | self.abort({"message": "cannot update job that is not in STARTED state"}) 159 | return 160 | 161 | worker_node_dao = daos.WorkerNodeDao(self.settings) 162 | worker_node = worker_node_dao.find_by_id(worker_id) 163 | if not worker_node: 164 | logger.critical( 165 | "unknown node with ID '{}' successfully updated job".format(worker_id) 166 | ) 167 | self.abort({"message": ""}, status=404) 168 | return 169 | 170 | # clear the worker node's job 171 | worker_node.running_job_id = None 172 | worker_node.is_alive = True 173 | worker_node_dao.update(worker_node) 174 | 175 | # finish the job 176 | job.finished_at = get_time() 177 | job.results = self.body.get("results") 178 | job.success = self.body.get("success") 179 | grading_job_dao.update(job) 180 | 181 | # store the logs 182 | job_log_dao = daos.GradingJobLogDao(self.settings) 183 | job_log = models.GradingJobLog(job_id=job_id, **self.body.get("logs")) 184 | job_log_dao.insert(job_log) 185 | 186 | # thread safe callback 187 | tornado.ioloop.IOLoop.current().add_callback( 188 | job_update_callback, self.settings, job_id, job.run_id 189 | ) 190 | 191 | 192 | class HeartBeatHandler(BaseAPIHandler): 193 | @authenticate_cluster_token 194 | @authenticate_worker 195 | def post(self, *args, **kwargs): 196 | worker_id = kwargs.get("worker_id") 197 | 198 | worker_node_dao = daos.WorkerNodeDao(self.settings) 199 | worker_node = worker_node_dao.find_by_id(worker_id) 200 | if not worker_node: 201 | logger.critical( 202 | "unknown node with ID '{}' successfully sent heartbeat".format( 203 | worker_id 204 | ) 205 | ) 206 | self.abort({"message": ""}, status=404) 207 | return 208 | 209 | worker_node.last_seen = get_time() 210 | worker_node.is_alive = True 211 | worker_node_dao.update(worker_node) 212 | -------------------------------------------------------------------------------- /tests/api/unit/test_daos.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | import logging 3 | 4 | import broadway.api.daos as daos 5 | import broadway.api.models as models 6 | 7 | from tests.api.base import BaseTest 8 | 9 | logging.disable(logging.WARNING) 10 | 11 | 12 | class AssignmentConfigDaoTest(BaseTest): 13 | 14 | DEFAULT_OBJECT = models.AssignmentConfig( 15 | id_="obj", 16 | env={"key": "value"}, 17 | pre_processing_pipeline=[{"image": "alpine"}], 18 | student_pipeline=[{"image": "alpine"}], 19 | ) 20 | 21 | def setUp(self): 22 | super().setUp() 23 | self.dao = daos.AssignmentConfigDao(self.app.settings) 24 | 25 | def _insert_obj(self): 26 | return self.dao.insert(AssignmentConfigDaoTest.DEFAULT_OBJECT) 27 | 28 | def test_id_from(self): 29 | _id = self.dao.id_from("course", "assignment") 30 | self.assertEqual("course/assignment", _id) 31 | 32 | def test_insert(self): 33 | result = self._insert_obj() 34 | self.assertIsNotNone(result.inserted_id) 35 | 36 | def test_find_by_id(self): 37 | result = self._insert_obj() 38 | obj = self.dao.find_by_id(result.inserted_id) 39 | 40 | self.assertIsNotNone(obj) 41 | for var in vars(obj): 42 | self.assertEqual( 43 | getattr(obj, var), getattr(AssignmentConfigDaoTest.DEFAULT_OBJECT, var) 44 | ) 45 | 46 | def test_delete(self): 47 | insert_result = self._insert_obj() 48 | delete_result = self.dao.delete_by_id(insert_result.inserted_id) 49 | self.assertGreater(delete_result.deleted_count, 0) 50 | 51 | 52 | class CourseDaoTest(BaseTest): 53 | 54 | DEFAULT_OBJECT = models.Course(id_="course", tokens=["value"]) 55 | 56 | def setUp(self): 57 | super().setUp() 58 | self.dao = daos.CourseDao(self.app.settings) 59 | 60 | def _insert_obj(self): 61 | return self.dao.insert_or_update(CourseDaoTest.DEFAULT_OBJECT) 62 | 63 | def test_insert(self): 64 | result = self._insert_obj() 65 | self.assertIsNotNone(result.upserted_id) 66 | 67 | def test_find_by_id(self): 68 | result = self._insert_obj() 69 | obj = self.dao.find_by_id(result.upserted_id) 70 | 71 | self.assertIsNotNone(obj) 72 | for var in vars(obj): 73 | self.assertEqual( 74 | getattr(obj, var), getattr(CourseDaoTest.DEFAULT_OBJECT, var) 75 | ) 76 | 77 | def test_update(self): 78 | insert_result = self._insert_obj() 79 | obj = self.dao.find_by_id(insert_result.upserted_id) 80 | obj.tokens = ["new_value"] 81 | update_result = self.dao.insert_or_update(obj) 82 | 83 | self.assertGreater(update_result.matched_count, 0) 84 | 85 | 86 | class GradingJobLogDaoTest(BaseTest): 87 | 88 | DEFAULT_OBJECT = models.GradingJobLog( 89 | job_id="job_id", stdout="output", stderr="errors" 90 | ) 91 | 92 | def setUp(self): 93 | super().setUp() 94 | self.dao = daos.GradingJobLogDao(self.app.settings) 95 | 96 | def _insert_obj(self): 97 | return self.dao.insert(GradingJobLogDaoTest.DEFAULT_OBJECT) 98 | 99 | def test_insert(self): 100 | result = self._insert_obj() 101 | self.assertIsNotNone(result.inserted_id) 102 | 103 | def test_find_by_id(self): 104 | result = self._insert_obj() 105 | obj = self.dao.find_by_id(result.inserted_id) 106 | 107 | self.assertIsNotNone(obj) 108 | for var in vars(obj): 109 | if var == "id": 110 | self.assertIsNotNone(obj.id) 111 | else: 112 | self.assertEqual( 113 | getattr(obj, var), getattr(GradingJobLogDaoTest.DEFAULT_OBJECT, var) 114 | ) 115 | 116 | def test_find_by_invalid_id(self): 117 | result = self.dao.find_by_id("$$$") 118 | self.assertIsNone(result) 119 | 120 | 121 | class GradingJobDaoTest(BaseTest): 122 | 123 | DEFAULT_OBJECT = models.GradingJob( 124 | job_type=models.GradingJobType.STUDENT, 125 | run_id="run123", 126 | worker_id="worker123", 127 | queued_at=dt.datetime.utcnow(), 128 | success=True, 129 | stages=[{"image": "alpine"}], 130 | students=[{"STUDENT_ID": "example"}], 131 | ) 132 | 133 | def setUp(self): 134 | super().setUp() 135 | self.dao = daos.GradingJobDao(self.app.settings) 136 | 137 | def _insert_obj(self): 138 | return self.dao.insert(GradingJobDaoTest.DEFAULT_OBJECT) 139 | 140 | def test_insert(self): 141 | result = self._insert_obj() 142 | self.assertIsNotNone(result.inserted_id) 143 | 144 | def test_find_by_id(self): 145 | result = self._insert_obj() 146 | obj = self.dao.find_by_id(result.inserted_id) 147 | 148 | self.assertIsNotNone(obj) 149 | for var in vars(obj): 150 | if var == "id": 151 | self.assertIsNotNone(obj.id) 152 | elif var == "queued_at": 153 | delta = GradingJobDaoTest.DEFAULT_OBJECT.queued_at - obj.queued_at 154 | self.assertEqual(delta.seconds, 0) 155 | else: 156 | self.assertEqual( 157 | getattr(obj, var), getattr(GradingJobDaoTest.DEFAULT_OBJECT, var) 158 | ) 159 | 160 | def test_find_by_run_id(self): 161 | result = self._insert_obj() 162 | objs = self.dao.find_by_run_id(GradingJobDaoTest.DEFAULT_OBJECT.run_id) 163 | 164 | self.assertEqual(len(objs), 1) 165 | self.assertEqual(objs[0].id, str(result.inserted_id)) 166 | 167 | def test_update(self): 168 | insert_result = self._insert_obj() 169 | obj = self.dao.find_by_id(insert_result.inserted_id) 170 | obj.success = False 171 | update_result = self.dao.update(obj) 172 | 173 | self.assertGreater(update_result.matched_count, 0) 174 | 175 | 176 | class GradingRunDaoTest(BaseTest): 177 | 178 | DEFAULT_OBJECT = models.GradingRun( 179 | assignment_id="assignment123", 180 | state=models.GradingRunState.READY, 181 | students_env=[{"override": "1"}], 182 | student_jobs_left=1, 183 | ) 184 | 185 | def setUp(self): 186 | super().setUp() 187 | self.dao = daos.GradingRunDao(self.app.settings) 188 | 189 | def _insert_obj(self): 190 | return self.dao.insert(GradingRunDaoTest.DEFAULT_OBJECT) 191 | 192 | def test_insert(self): 193 | result = self._insert_obj() 194 | self.assertIsNotNone(result.inserted_id) 195 | 196 | def test_find_by_id(self): 197 | result = self._insert_obj() 198 | obj = self.dao.find_by_id(result.inserted_id) 199 | 200 | self.assertIsNotNone(obj) 201 | for var in vars(obj): 202 | if var == "id": 203 | self.assertIsNotNone(obj.id) 204 | else: 205 | self.assertEqual( 206 | getattr(obj, var), getattr(GradingRunDaoTest.DEFAULT_OBJECT, var) 207 | ) 208 | 209 | def test_update(self): 210 | insert_result = self._insert_obj() 211 | obj = self.dao.find_by_id(insert_result.inserted_id) 212 | obj.student_jobs_left = 0 213 | update_result = self.dao.update(obj) 214 | 215 | self.assertGreater(update_result.matched_count, 0) 216 | 217 | 218 | class WorkerNodeDaoTest(BaseTest): 219 | 220 | DEFAULT_OBJECT = models.WorkerNode(id_="holygoply", hostname="example.com") 221 | 222 | def setUp(self): 223 | super().setUp() 224 | self.dao = daos.WorkerNodeDao(self.app.settings) 225 | 226 | def _insert_obj(self): 227 | self.dao.insert(WorkerNodeDaoTest.DEFAULT_OBJECT) 228 | return WorkerNodeDaoTest.DEFAULT_OBJECT.id 229 | 230 | def test_insert(self): 231 | worker_id = self._insert_obj() 232 | self.assertIsNotNone(worker_id) 233 | 234 | def test_find_by_hostname(self): 235 | self._insert_obj() 236 | obj = self.dao.find_by_hostname(WorkerNodeDaoTest.DEFAULT_OBJECT.hostname) 237 | self.assertIsNotNone(obj) 238 | 239 | def test_find_by_liveness(self): 240 | worker_id = self._insert_obj() 241 | obj_list = self.dao.find_by_liveness(alive=True) 242 | no_obj_list = self.dao.find_by_liveness(alive=False) 243 | self.assertEqual(len(obj_list), 1) 244 | self.assertEqual(len(no_obj_list), 0) 245 | self.assertEqual(obj_list[0].id, worker_id) 246 | 247 | def test_update(self): 248 | worker_id = self._insert_obj() 249 | obj = self.dao.find_by_id(worker_id) 250 | obj.is_alive = False 251 | update_result = self.dao.update(obj) 252 | 253 | self.assertGreater(update_result.matched_count, 0) 254 | -------------------------------------------------------------------------------- /broadway/api/utils/bootstrap.py: -------------------------------------------------------------------------------- 1 | import json 2 | import jsonschema 3 | import logging 4 | import os 5 | import sys 6 | import uuid 7 | import signal 8 | 9 | from typing import Dict, Any 10 | 11 | from pymongo import MongoClient 12 | from pymongo.errors import ConnectionFailure 13 | 14 | import tornado 15 | import tornado.web 16 | import tornado.ioloop 17 | import tornado.httpserver 18 | 19 | from logging.handlers import TimedRotatingFileHandler 20 | 21 | from broadway.api.definitions import course_config 22 | from broadway.api.daos import CourseDao, WorkerNodeDao 23 | from broadway.api.models import Course 24 | from broadway.api.utils.multiqueue import MultiQueue 25 | from broadway.api.utils.streamqueue import StreamQueue 26 | 27 | import broadway.api.callbacks as callbacks 28 | import broadway.api.handlers.client as client_handlers 29 | import broadway.api.handlers.worker as worker_handlers 30 | import broadway.api.handlers.stream as stream_handlers 31 | import broadway.api.handlers.worker_ws as worker_ws_handlers 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | def initialize_logger(flags: Dict[str, Any]): 37 | log_dir = flags["log_dir"] 38 | log_level = flags["log_level"] 39 | log_rotate = flags["log_rotate"] 40 | log_backup = flags["log_backup"] 41 | log_timestamps = flags["log_timestamps"] 42 | 43 | os.makedirs(log_dir, exist_ok=True) 44 | 45 | rotating_handler = TimedRotatingFileHandler( 46 | "{}/log".format(log_dir), when=log_rotate, backupCount=log_backup 47 | ) 48 | 49 | if log_timestamps: 50 | format = "%(asctime)s %(levelname)s %(module)s.%(funcName)s: %(message)s" 51 | else: 52 | format = "%(levelname)s %(module)s.%(funcName)s: %(message)s" 53 | 54 | logging.basicConfig( 55 | handlers=[rotating_handler, logging.StreamHandler()], 56 | format=format, 57 | level=log_level, 58 | ) 59 | 60 | # redirecting tornado logs to the file handler 61 | logging.getLogger("tornado").addHandler(rotating_handler) 62 | logging.getLogger("tornado").propagate = False 63 | 64 | 65 | def initialize_global_settings(flags: Dict[str, Any]) -> Dict[str, Any]: 66 | if flags["token"] is None: 67 | flags["token"] = str(uuid.uuid4()) 68 | logger.info("token not given, using {}".format(flags["token"])) 69 | 70 | return { 71 | "FLAGS": flags, 72 | "DB": None, 73 | "QUEUE": MultiQueue(), 74 | "STREAM_QUEUE": StreamQueue(), 75 | "WS_CONN_MAP": {}, 76 | } 77 | 78 | 79 | def initialize_course_tokens(settings: Dict[str, Any], flags: Dict[str, Any]): 80 | logger.info("initializing course config") 81 | 82 | if flags["course_config"] is None: 83 | logger.warning( 84 | "no course configuration specified, using existing configuration" 85 | ) 86 | return 87 | 88 | with open(flags["course_config"]) as f: 89 | courses = json.load(f) 90 | 91 | jsonschema.validate(courses, course_config) 92 | 93 | logger.info("course config found for {} courses".format(len(courses))) 94 | logger.info("dropping existing courses and loading new configuration") 95 | 96 | course_dao = CourseDao(settings) 97 | course_dao.drop_all() 98 | 99 | for course_id, course in courses.items(): 100 | course = Course( 101 | id_=course_id, 102 | tokens=course["tokens"], 103 | query_tokens=course.get("query_tokens", []), 104 | ) 105 | course_dao.insert_or_update(course) 106 | 107 | 108 | def initialize_database(settings: Dict[str, Any], flags: Dict[str, Any]): 109 | logger.info("initializing database") 110 | 111 | try: 112 | db_client = MongoClient( 113 | flags["mongodb_dsn"], 114 | serverSelectionTimeoutMS=flags["mongodb_timeout"] * 1000, 115 | ) 116 | db_client.server_info() 117 | 118 | settings["DB"] = db_client 119 | 120 | dao = WorkerNodeDao(settings) 121 | logger.info("resetting ws worker nodes") 122 | dao.reset_worker_nodes() 123 | 124 | except ConnectionFailure as e: 125 | logger.critical("failed to connect to mongo server: {}".format(repr(e))) 126 | sys.exit(1) 127 | 128 | 129 | def initialize_signal_handler(settings: Dict[str, Any], flags: Dict[str, Any]): 130 | logger.info("initializing signal handler") 131 | 132 | def shutdown(): 133 | logger.info("shutting down") 134 | 135 | ioloop = tornado.ioloop.IOLoop.current() 136 | ioloop.add_callback(ioloop.stop) 137 | 138 | def handler(sig, frame): 139 | tornado.ioloop.IOLoop.current().add_callback_from_signal(shutdown) 140 | 141 | signal.signal(signal.SIGINT, handler) 142 | 143 | 144 | def initialize_app( 145 | settings: Dict[str, Any], flags: Dict[str, Any] 146 | ) -> tornado.web.Application: 147 | id_regex = r"(?P<{}>[-\w0-9]+)" 148 | string_regex = r"(?P<{}>[^()]+)" 149 | 150 | app = tornado.web.Application( 151 | [ 152 | # -------- Client Endpoints -------- 153 | ( 154 | r"/api/v1/grading_config/{}/{}".format( 155 | id_regex.format("course_id"), id_regex.format("assignment_name") 156 | ), 157 | client_handlers.GradingConfigHandler, 158 | ), 159 | ( 160 | r"/api/v1/grading_run/{}/{}".format( 161 | id_regex.format("course_id"), id_regex.format("assignment_name") 162 | ), 163 | client_handlers.GradingRunHandler, 164 | ), 165 | ( 166 | r"/api/v1/grading_run_status/{}/{}".format( 167 | id_regex.format("course_id"), id_regex.format("run_id") 168 | ), 169 | client_handlers.GradingRunStatusHandler, 170 | ), 171 | ( 172 | r"/api/v1/grading_run_env/{}/{}".format( 173 | id_regex.format("course_id"), id_regex.format("run_id") 174 | ), 175 | client_handlers.GradingRunEnvHandler, 176 | ), 177 | ( 178 | r"/api/v1/grading_job_log/{}/{}".format( 179 | id_regex.format("course_id"), id_regex.format("job_id") 180 | ), 181 | client_handlers.GradingJobLogHandler, 182 | ), 183 | ( 184 | r"/api/v1/worker/{}/{}".format( 185 | id_regex.format("course_id"), string_regex.format("scope") 186 | ), 187 | client_handlers.CourseWorkerNodeHandler, 188 | ), 189 | ( 190 | r"/api/v1/queue/{}/length".format(id_regex.format("course_id")), 191 | client_handlers.CourseQueueLengthHandler, 192 | ), 193 | ( 194 | r"/api/v1/queue/{}/{}/position".format( 195 | id_regex.format("course_id"), id_regex.format("job_id") 196 | ), 197 | client_handlers.GradingJobQueuePositionHandler, 198 | ), 199 | # ---------------------------------- 200 | # ------- Worker Endpoints --------- 201 | ( 202 | r"/api/v1/worker/{}".format(id_regex.format("worker_id")), 203 | worker_handlers.WorkerRegisterHandler, 204 | ), 205 | ( 206 | r"/api/v1/grading_job/{}".format(id_regex.format("worker_id")), 207 | worker_handlers.GradingJobHandler, 208 | ), 209 | ( 210 | r"/api/v1/heartbeat/{}".format(id_regex.format("worker_id")), 211 | worker_handlers.HeartBeatHandler, 212 | ), 213 | ( 214 | r"/api/v1/worker_ws/{}".format(id_regex.format("worker_id")), 215 | worker_ws_handlers.WorkerConnectionHandler, 216 | ), 217 | # ---------------------------------- 218 | # -------- Stream Endpoints -------- 219 | ( 220 | r"/api/v1/stream/{}/{}".format( 221 | id_regex.format("course_id"), id_regex.format("job_id") 222 | ), 223 | stream_handlers.GradingJobStreamHandler, 224 | ) 225 | # ---------------------------------- 226 | ], 227 | **settings 228 | ) 229 | 230 | app.listen(flags["bind_port"], flags["bind_addr"]) 231 | 232 | logger.info("app binded on {}:{}".format(flags["bind_addr"], flags["bind_port"])) 233 | 234 | # registering heartbeat callback 235 | tornado.ioloop.PeriodicCallback( 236 | lambda: callbacks.worker_heartbeat_callback(app.settings), 237 | flags["heartbeat_interval"] * 1000, 238 | ).start() 239 | 240 | return app 241 | -------------------------------------------------------------------------------- /tests/api/_fixtures/grading_configs.py: -------------------------------------------------------------------------------- 1 | valid_configs = [ 2 | { 3 | "student_pipeline": [ 4 | {"image": "alpine:3.5", "timeout": 20}, 5 | {"image": "alpine:3.5", "hostname": "123456"}, 6 | {"image": "alpine:3.5", "networking": True}, 7 | {"image": "alpine:3.5", "env": {"var1": "val1", "var2": "val2"}}, 8 | {"image": "alpine:3.5", "entrypoint": ["echo", "student-job"]}, 9 | ], 10 | "pre_processing_pipeline": [ 11 | { 12 | "image": "alpine:3.5", 13 | "env": {"STAGE": "pre"}, 14 | "entrypoint": ["echo", "pre-processing-job"], 15 | "timeout": 20, 16 | "hostname": "123456", 17 | "networking": False, 18 | "privileged": True, 19 | "memory": "2g", 20 | "logs": False, 21 | } 22 | ], 23 | "post_processing_pipeline": [ 24 | { 25 | "image": "alpine:3.5", 26 | "env": {"STAGE": "post"}, 27 | "entrypoint": ["echo", "post-processing-job"], 28 | "timeout": 20, 29 | "hostname": "123456", 30 | "networking": False, 31 | } 32 | ], 33 | "env": {"TEST": "testing", "temp": "val3"}, 34 | }, 35 | {"student_pipeline": [{"image": "alpine:3.5"}]}, 36 | { 37 | "student_pipeline": [ 38 | {"image": "alpine:3.5", "timeout": 20}, 39 | {"image": "alpine:3.5", "hostname": "123456"}, 40 | {"image": "alpine:3.5", "entrypoint": ["echo", "student-job"]}, 41 | ], 42 | "env": {"TEST": "testing", "temp": "val3"}, 43 | }, 44 | { 45 | "student_pipeline": [ 46 | {"image": "alpine:3.5", "entrypoint": ["echo", "student-job"]} 47 | ], 48 | "pre_processing_pipeline": [ 49 | { 50 | "image": "alpine:3.5", 51 | "env": {"STAGE": "pre"}, 52 | "entrypoint": ["echo", "pre-processing-job"], 53 | "timeout": 20, 54 | "hostname": "123456", 55 | "networking": False, 56 | } 57 | ], 58 | }, 59 | { 60 | "student_pipeline": [{"image": "alpine:3.5", "hostname": "123456"}], 61 | "post_processing_pipeline": [ 62 | { 63 | "image": "alpine:3.5", 64 | "env": {"STAGE": "post"}, 65 | "entrypoint": ["echo", "post-processing-job"], 66 | "timeout": 20, 67 | "hostname": "123456", 68 | "networking": False, 69 | } 70 | ], 71 | "env": {"TEST": "testing", "temp": "val3"}, 72 | }, 73 | { 74 | "student_pipeline": [ 75 | {"image": "alpine:3.5", "hostname": "654321", "privileged": True} 76 | ], 77 | "post_processing_pipeline": [ 78 | { 79 | "image": "alpine:3.5", 80 | "env": {"STAGE": "post"}, 81 | "entrypoint": ["echo", "post-processing-job"], 82 | "timeout": 20, 83 | "hostname": "654321", 84 | "networking": False, 85 | } 86 | ], 87 | "env": {"TEST": "testing", "temp": "val3"}, 88 | }, 89 | { 90 | "student_pipeline": [ 91 | {"image": "alpine:3.5", "hostname": "654321", "privileged": True} 92 | ], 93 | "post_processing_pipeline": [ 94 | { 95 | "image": "alpine:3.5", 96 | "env": {"STAGE": "post"}, 97 | "entrypoint": ["echo", "post-processing-job"], 98 | "timeout": 20, 99 | "hostname": "654321", 100 | "networking": False, 101 | } 102 | ], 103 | "env": {"TEST": "testing", "temp": "val3"}, 104 | }, 105 | ] 106 | 107 | invalid_configs = [ 108 | { 109 | "student_pipeline": [ 110 | { 111 | "image": "alpine:3.5", 112 | "hostname": "654321", 113 | "privileged": "neither true or false", 114 | } 115 | ], 116 | "post_processing_pipeline": [ 117 | { 118 | "image": "alpine:3.5", 119 | "env": {"STAGE": "post"}, 120 | "entrypoint": ["echo", "post-processing-job"], 121 | "timeout": 20, 122 | "hostname": "654321", 123 | "networking": False, 124 | } 125 | ], 126 | "env": {"TEST": "testing", "temp": "val3"}, 127 | }, 128 | { 129 | "student_pipeline": [ 130 | {"image": "alpine:3.5", "hostname": "654321", "privileged": "very high"} 131 | ], 132 | "post_processing_pipeline": [ 133 | { 134 | "image": "alpine:3.5", 135 | "env": {"STAGE": "post"}, 136 | "entrypoint": ["echo", "post-processing-job"], 137 | "timeout": 20, 138 | "hostname": "654321", 139 | "networking": False, 140 | } 141 | ], 142 | "env": {"TEST": "testing", "temp": "val3"}, 143 | }, 144 | { 145 | "student_pipeline": [ 146 | {"hostname": "123456"}, 147 | {"image": "alpine:3.5", "networking": True}, 148 | {"image": "alpine:3.5", "env": {"var1": "val1", "var2": "val2"}}, 149 | {"image": "alpine:3.5", "entrypoint": ["echo", "student-job"]}, 150 | ], 151 | "pre_processing_pipeline": [ 152 | { 153 | "image": "alpine:3.5", 154 | "env": {"STAGE": "pre"}, 155 | "entrypoint": ["echo", "pre-processing-job"], 156 | "timeout": 20, 157 | "hostname": "123456", 158 | "networking": False, 159 | } 160 | ], 161 | "post_processing_pipeline": [ 162 | { 163 | "image": "alpine:3.5", 164 | "env": {"STAGE": "post"}, 165 | "entrypoint": ["echo", "post-processing-job"], 166 | "timeout": 20, 167 | "hostname": "123456", 168 | "networking": False, 169 | } 170 | ], 171 | "env": {"TEST": "testing", "temp": "val3"}, 172 | }, 173 | { 174 | "pre_processing_pipeline": [ 175 | { 176 | "image": "alpine:3.5", 177 | "env": {"STAGE": "pre"}, 178 | "entrypoint": ["echo", "pre-processing-job"], 179 | "timeout": 20, 180 | "hostname": "123456", 181 | "networking": False, 182 | } 183 | ], 184 | "post_processing_pipeline": [ 185 | { 186 | "image": "alpine:3.5", 187 | "env": {"STAGE": "post"}, 188 | "entrypoint": ["echo", "post-processing-job"], 189 | "timeout": 20, 190 | "hostname": "123456", 191 | "networking": False, 192 | } 193 | ], 194 | "env": {"TEST": "testing", "temp": "val3"}, 195 | }, 196 | { 197 | "student_pipeline": [{"image": "alpine:3.5", "timeout": 20}], 198 | "pre_processing_pipeline": [ 199 | { 200 | "env": {"STAGE": "pre"}, 201 | "entrypoint": ["echo", "pre-processing-job"], 202 | "timeout": 20, 203 | "hostname": "123456", 204 | "networking": False, 205 | } 206 | ], 207 | }, 208 | {"student_pipeline": [{"image": "alpine:3.5", "hello": "world"}]}, 209 | {"student_pipeline": [{"image": "alpine:3.5", "timeout": 20}], "hello": "world"}, 210 | {"student_pipeline": "hello"}, 211 | {}, 212 | ] 213 | 214 | only_student_config = { 215 | "student_pipeline": [{"image": "alpine:3.5"}], 216 | "env": {"env1": "global1", "env2": "global2"}, 217 | } 218 | 219 | pre_processing_config = { 220 | "student_pipeline": [{"image": "alpine:3.5"}], 221 | "pre_processing_pipeline": [{"image": "alpine:3.5"}], 222 | } 223 | 224 | post_processing_config = { 225 | "student_pipeline": [{"image": "alpine:3.5"}], 226 | "post_processing_pipeline": [{"image": "alpine:3.5"}], 227 | } 228 | 229 | both_config = { 230 | "pre_processing_pipeline": [{"image": "alpine:3.5"}], 231 | "student_pipeline": [{"image": "alpine:3.5"}], 232 | "post_processing_pipeline": [{"image": "alpine:3.5"}], 233 | } 234 | 235 | complete_config = { 236 | "pre_processing_pipeline": [ 237 | { 238 | "image": "alpine:3.1", 239 | "env": {"STAGE": "pre"}, 240 | "entrypoint": ["echo", "pre-processing-job"], 241 | "timeout": 20, 242 | "hostname": "12", 243 | "networking": True, 244 | } 245 | ], 246 | "student_pipeline": [ 247 | { 248 | "image": "alpine:3.2", 249 | "env": {"var1": "val1", "var2": "val2"}, 250 | "entrypoint": ["echo", "student-job-1"], 251 | "timeout": 30, 252 | "hostname": "34", 253 | "networking": False, 254 | }, 255 | { 256 | "image": "alpine:3.3", 257 | "env": {"var3": "val3", "var4": "val4"}, 258 | "entrypoint": ["echo", "student-job-2"], 259 | "timeout": 40, 260 | "hostname": "56", 261 | "networking": True, 262 | }, 263 | ], 264 | "post_processing_pipeline": [ 265 | { 266 | "image": "alpine:3.4", 267 | "env": {"STAGE": "post"}, 268 | "entrypoint": ["echo", "post-processing-job"], 269 | "timeout": 50, 270 | "hostname": "78", 271 | "networking": False, 272 | } 273 | ], 274 | "env": {"global_var1": "global_val1", "global_var2": "global_val2"}, 275 | } 276 | -------------------------------------------------------------------------------- /tests/api/integration/test_worker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import json 4 | import websockets 5 | 6 | from tests.api.base import BaseTest 7 | 8 | import tornado.testing 9 | 10 | from broadway.api.callbacks import worker_heartbeat_callback 11 | 12 | logging.disable(logging.WARNING) 13 | 14 | 15 | class RegisterGraderEndpointsTest(BaseTest): 16 | def test_register(self): 17 | self.assertIsNotNone(self.register_worker(self.get_header())) 18 | 19 | def test_duplicate_id(self): 20 | worker_id = "duplicate" 21 | self.register_worker(self.get_header(), worker_id=worker_id, expected_code=200) 22 | self.register_worker(self.get_header(), worker_id=worker_id, expected_code=400) 23 | 24 | def test_reregister_id(self): 25 | worker_id = self.register_worker(self.get_header(), expected_code=200) 26 | time.sleep(self.app.settings["FLAGS"]["heartbeat_interval"] * 2 + 1) 27 | worker_heartbeat_callback(self.app.settings) 28 | self.register_worker(self.get_header(), worker_id=worker_id, expected_code=200) 29 | 30 | def test_unauthorized(self): 31 | self.register_worker(None, 401) 32 | 33 | def test_wrong_token(self): 34 | self.register_worker(self.get_header("invalid"), 401) 35 | 36 | 37 | class PollGradingJobEndpointsTest(BaseTest): 38 | def test_unauthorized(self): 39 | worker_id = self.register_worker(self.get_header()) 40 | self.assertEqual(self.poll_job(worker_id, None), 401) 41 | 42 | def test_wrong_token(self): 43 | worker_id = self.register_worker(self.get_header()) 44 | self.assertEqual(self.poll_job(worker_id, self.get_header("invalid")), 401) 45 | 46 | def test_invalid_worker_id(self): 47 | self.assertEqual(self.poll_job("1234", self.get_header()), 401) 48 | 49 | def test_empty_poll(self): 50 | worker_id = self.register_worker(self.get_header()) 51 | self.assertEqual(self.poll_job(worker_id, self.get_header()), 498) 52 | 53 | 54 | class UpdateGradingJobEndpointsTest(BaseTest): 55 | def test_unauthorized(self): 56 | worker_id = self.register_worker(self.get_header()) 57 | self.post_job_result(worker_id, None, "1234", True, 401) 58 | 59 | def test_wrong_token(self): 60 | worker_id = self.register_worker(self.get_header()) 61 | self.post_job_result(worker_id, self.get_header("invalid"), "1234", True, 401) 62 | 63 | def test_invalid_worker_id(self): 64 | self.post_job_result("1234", self.get_header(), "1234", True, 401) 65 | 66 | 67 | class HeartBeatEndpointsTest(BaseTest): 68 | def test_unauthorized(self): 69 | worker_id = self.register_worker(self.get_header()) 70 | self.send_heartbeat(worker_id, None, 401) 71 | 72 | def test_wrong_token(self): 73 | worker_id = self.register_worker(self.get_header()) 74 | self.send_heartbeat(worker_id, self.get_header("fake"), 401) 75 | 76 | def test_invalid_worker_id(self): 77 | self.send_heartbeat("1234", self.get_header(), 401) 78 | 79 | def test_valid_heartbeat(self): 80 | worker_id = self.register_worker(self.get_header()) 81 | self.send_heartbeat(worker_id, self.get_header()) 82 | 83 | 84 | class WorkerWSEndpointTest(BaseTest): 85 | @tornado.testing.gen_test 86 | async def test_decode_error(self): 87 | async with self.worker_ws_conn( 88 | worker_id="test_worker", headers=self.get_header() 89 | ) as conn: 90 | try: 91 | await conn.send("i'm not json") 92 | except Exception as e: 93 | self.assertEqual(e.code, 1011) 94 | 95 | # submit job result before registering 96 | @tornado.testing.gen_test 97 | async def test_bad_job_result(self): 98 | async with self.worker_ws_conn( 99 | worker_id="test_worker", headers=self.get_header() 100 | ) as conn: 101 | try: 102 | await conn.send( 103 | json.dumps( 104 | { 105 | "type": "job_result", 106 | "args": { 107 | "grading_job_id": "someid", 108 | "success": True, 109 | "results": [{"res": "spoof"}], 110 | "logs": {"stdout": "stdout", "stderr": "stderr"}, 111 | }, 112 | } 113 | ) 114 | ) 115 | 116 | await conn.recv() 117 | except Exception as e: 118 | self.assertEqual(e.code, 1002) 119 | 120 | @tornado.testing.gen_test 121 | async def test_register(self): 122 | async with self.worker_ws_conn( 123 | worker_id="test_worker", headers=self.get_header() 124 | ) as conn: 125 | await conn.send( 126 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 127 | ) 128 | 129 | ack = json.loads(await conn.recv()) 130 | self.assertTrue(ack["success"]) 131 | 132 | @tornado.testing.gen_test 133 | async def test_pong(self): 134 | async with self.worker_ws_conn( 135 | worker_id="test_worker", headers=self.get_header() 136 | ) as conn: 137 | await conn.send( 138 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 139 | ) 140 | 141 | ack = json.loads(await conn.recv()) 142 | self.assertTrue(ack["success"]) 143 | 144 | await conn.pong() 145 | 146 | @tornado.testing.gen_test 147 | async def test_no_token(self): 148 | async with self.worker_ws_conn(worker_id="test_worker", headers=None) as conn: 149 | try: 150 | await conn.send( 151 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 152 | ) 153 | 154 | ack = json.loads(await conn.recv()) 155 | self.assertFalse(ack["success"]) 156 | except websockets.exceptions.ConnectionClosed as e: 157 | self.assertEqual(e.code, 1008) 158 | 159 | @tornado.testing.gen_test 160 | async def test_wrong_token(self): 161 | async with self.worker_ws_conn( 162 | worker_id="test_worker", headers=self.get_header("invalid") 163 | ) as conn: 164 | try: 165 | await conn.send( 166 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 167 | ) 168 | 169 | ack = json.loads(await conn.recv()) 170 | self.assertFalse(ack["success"]) 171 | except websockets.exceptions.ConnectionClosed as e: 172 | self.assertEqual(e.code, 1008) 173 | 174 | @tornado.testing.gen_test 175 | async def test_duplicate_token(self): 176 | async with self.worker_ws_conn( 177 | worker_id="test_worker", headers=self.get_header() 178 | ) as conn1: 179 | await conn1.send( 180 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 181 | ) 182 | 183 | # worker 1 should successfully register 184 | ack = json.loads(await conn1.recv()) 185 | self.assertTrue(ack["success"]) 186 | 187 | async with self.worker_ws_conn( 188 | worker_id="test_worker", headers=self.get_header() 189 | ) as conn2: 190 | 191 | try: 192 | await conn2.send( 193 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 194 | ) 195 | 196 | # worker 2 should fail 197 | ack = json.loads(await conn2.recv()) 198 | self.assertFalse(ack["success"]) 199 | except websockets.exceptions.ConnectionClosed as e: 200 | self.assertEqual(e.code, 1002) 201 | 202 | @tornado.testing.gen_test 203 | async def test_reregister(self): 204 | async with self.worker_ws_conn( 205 | worker_id="test_worker", headers=self.get_header() 206 | ) as conn1: 207 | await conn1.send( 208 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 209 | ) 210 | 211 | # worker 1 should succeed 212 | ack = json.loads(await conn1.recv()) 213 | self.assertTrue(ack["success"]) 214 | 215 | async with self.worker_ws_conn( 216 | worker_id="test_worker", headers=self.get_header() 217 | ) as conn2: 218 | await conn2.send( 219 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 220 | ) 221 | 222 | # worker 2 should also succeed 223 | ack = json.loads(await conn2.recv()) 224 | self.assertTrue(ack["success"]) 225 | 226 | @tornado.testing.gen_test 227 | async def test_wrong_job_id(self): 228 | async with self.worker_ws_conn( 229 | worker_id="test_worker", headers=self.get_header() 230 | ) as conn: 231 | await conn.send( 232 | json.dumps({"type": "register", "args": {"hostname": "eniac"}}) 233 | ) 234 | 235 | ack = json.loads(await conn.recv()) 236 | self.assertTrue(ack["success"]) 237 | 238 | try: 239 | await conn.send( 240 | json.dumps( 241 | { 242 | "type": "job_result", 243 | "args": { 244 | "grading_job_id": "no_such_id", 245 | "success": True, 246 | "results": [{"res": "spoof"}], 247 | "logs": {"stdout": "stdout", "stderr": "stderr"}, 248 | }, 249 | } 250 | ) 251 | ) 252 | except websockets.exceptions.ConnectionClosed as e: 253 | self.assertEqual(e.code, 1002) 254 | -------------------------------------------------------------------------------- /tests/api/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | import jsonschema 3 | import uuid 4 | import unittest 5 | 6 | import websockets 7 | 8 | from tornado.testing import AsyncHTTPTestCase 9 | from tornado.httpclient import AsyncHTTPClient 10 | 11 | import broadway.api.definitions as definitions 12 | 13 | from broadway.api.utils.bootstrap import ( 14 | initialize_global_settings, 15 | initialize_database, 16 | initialize_app, 17 | ) 18 | from broadway.api.flags import app_flags 19 | 20 | import tests.api._utils.database as database_utils 21 | 22 | MOCK_COURSE1 = "mock_course1" 23 | MOCK_COURSE2 = "mock_course2" 24 | 25 | MOCK_CLIENT_TOKEN1 = "12345" 26 | MOCK_CLIENT_TOKEN2 = "67890" 27 | 28 | MOCK_CLIENT_QUERY_TOKEN = "C4OWEM2XHD" 29 | 30 | 31 | class AsyncHTTPMixin(AsyncHTTPTestCase): 32 | def __init__(self, *args, **kwargs): 33 | super().__init__(*args, **kwargs) 34 | 35 | def get_app(self): 36 | """ 37 | Note: this is called by setUp in AsyncHTTPTestCase 38 | """ 39 | 40 | flags = app_flags.parse( 41 | [ 42 | "tests/api/_fixtures/config.json", 43 | "--token", 44 | "test", 45 | "--debug", 46 | "--course-config=''", 47 | # provide an empty path for testing course config 48 | ], 49 | use_exc=True, 50 | ) 51 | 52 | self.app = initialize_app(initialize_global_settings(flags), flags) 53 | 54 | initialize_database(self.app.settings, flags) 55 | 56 | database_utils.initialize_db( 57 | self.app.settings, 58 | { 59 | MOCK_COURSE1: { 60 | "tokens": [MOCK_CLIENT_TOKEN1], 61 | "query_tokens": [MOCK_CLIENT_QUERY_TOKEN], 62 | }, 63 | MOCK_COURSE2: { 64 | "tokens": [MOCK_CLIENT_TOKEN1, MOCK_CLIENT_TOKEN2], 65 | "query_tokens": [], 66 | }, 67 | }, 68 | ) 69 | 70 | return self.app 71 | 72 | def get_token(self): 73 | return self.app.settings["FLAGS"]["token"] 74 | 75 | def get_header(self, override=None): 76 | return { 77 | "Authorization": "Bearer " 78 | + (self.get_token() if not override else override) 79 | } 80 | 81 | def tearDown(self): 82 | super().tearDown() 83 | database_utils.clear_db(self.app.settings) 84 | 85 | 86 | class ClientMixin(AsyncHTTPMixin): 87 | def __init__(self, *args, **kwargs): 88 | super().__init__(*args, **kwargs) 89 | self.client_header1 = {"Authorization": "Bearer " + MOCK_CLIENT_TOKEN1} 90 | self.client_header2 = {"Authorization": "Bearer " + MOCK_CLIENT_TOKEN2} 91 | self.client_header_query_token = { 92 | "Authorization": "Bearer " + MOCK_CLIENT_QUERY_TOKEN 93 | } 94 | self.course1 = MOCK_COURSE1 95 | self.course2 = MOCK_COURSE2 96 | 97 | def upload_grading_config( 98 | self, course_id, assignment_name, header, grading_config, expected_code 99 | ): 100 | response = self.fetch( 101 | self.get_url( 102 | "/api/v1/grading_config/{}/{}".format(course_id, assignment_name) 103 | ), 104 | method="POST", 105 | body=json.dumps(grading_config), 106 | headers=header, 107 | ) 108 | self.assertEqual(response.code, expected_code) 109 | 110 | def get_grading_config(self, course_id, assignment_name, header, expected_code): 111 | response = self.fetch( 112 | self.get_url( 113 | "/api/v1/grading_config/{}/{}".format(course_id, assignment_name) 114 | ), 115 | method="GET", 116 | headers=header, 117 | ) 118 | self.assertEqual(response.code, expected_code) 119 | 120 | if response.code == 200: 121 | response_body = json.loads(response.body.decode("utf-8")) 122 | return response_body["data"] 123 | 124 | def start_grading_run( 125 | self, course_id, assignment_name, header, students, expected_code 126 | ): 127 | response = self.fetch( 128 | self.get_url( 129 | "/api/v1/grading_run/{}/{}".format(course_id, assignment_name) 130 | ), 131 | method="POST", 132 | headers=header, 133 | body=json.dumps(students), 134 | ) 135 | self.assertEqual(response.code, expected_code) 136 | 137 | if response.code == 200: 138 | response_body = json.loads(response.body.decode("utf-8")) 139 | return response_body["data"]["grading_run_id"] 140 | 141 | def get_grading_run_state(self, course_id, grading_run_id, header): 142 | response = self.fetch( 143 | self.get_url( 144 | "/api/v1/grading_run_status/{}/{}".format(course_id, grading_run_id) 145 | ), 146 | method="GET", 147 | headers=header, 148 | ) 149 | self.assertEqual(response.code, 200) 150 | 151 | response_body = json.loads(response.body.decode("utf-8")) 152 | return response_body["data"] 153 | 154 | def check_grading_run_status( 155 | self, course_id, grading_run_id, header, expected_code, expected_state=None 156 | ): 157 | response = self.fetch( 158 | self.get_url( 159 | "/api/v1/grading_run_status/{}/{}".format(course_id, grading_run_id) 160 | ), 161 | method="GET", 162 | headers=header, 163 | ) 164 | self.assertEqual(response.code, expected_code) 165 | 166 | if response.code == 200: 167 | response_body = json.loads(response.body.decode("utf-8")) 168 | self.assertEqual(response_body["data"].get("state"), expected_state) 169 | 170 | def get_grading_run_env(self, course_id, grading_run_id, header): 171 | response = self.fetch( 172 | self.get_url( 173 | "/api/v1/grading_run_env/{}/{}".format(course_id, grading_run_id) 174 | ), 175 | method="GET", 176 | headers=header, 177 | ) 178 | 179 | self.assertEqual(response.code, 200) 180 | 181 | response_body = json.loads(response.body.decode("utf-8")) 182 | return response_body["data"] 183 | 184 | def get_grading_job_log(self, course_id, job_id, header, expected_code): 185 | response = self.fetch( 186 | self.get_url("/api/v1/grading_job_log/{}/{}".format(course_id, job_id)), 187 | method="GET", 188 | headers=header, 189 | ) 190 | self.assertEqual(response.code, expected_code) 191 | 192 | if response.code == 200: 193 | response_body = json.loads(response.body.decode("utf-8")) 194 | return response_body["data"] 195 | 196 | def get_course_worker_nodes(self, course_id, scope, header, expected_code): 197 | response = self.fetch( 198 | self.get_url("/api/v1/worker/{}/{}".format(course_id, scope)), 199 | method="GET", 200 | headers=header, 201 | ) 202 | self.assertEqual(response.code, expected_code) 203 | 204 | if response.code == 200: 205 | response_body = json.loads(response.body.decode("utf-8")) 206 | return response_body["data"] 207 | 208 | def get_course_queue_length(self, course_id, header, expected_code): 209 | response = self.fetch( 210 | self.get_url("/api/v1/queue/{}/length".format(course_id)), 211 | method="GET", 212 | headers=header, 213 | ) 214 | self.assertEqual(response.code, expected_code) 215 | 216 | if response.code == 200: 217 | response_body = json.loads(response.body.decode("utf-8")) 218 | return response_body["data"] 219 | 220 | def get_grading_job_queue_position( 221 | self, course_id, grading_job_id, header, expected_code 222 | ): 223 | response = self.fetch( 224 | self.get_url( 225 | "/api/v1/queue/{}/{}/position".format(course_id, grading_job_id) 226 | ), 227 | method="GET", 228 | headers=header, 229 | ) 230 | self.assertEqual(response.code, expected_code) 231 | 232 | if response.code == 200: 233 | response_body = json.loads(response.body.decode("utf-8")) 234 | return response_body["data"] 235 | 236 | def get_grading_job_stream(self, course_id, grading_job_id, header, callback): 237 | # We have to create a new client as to not block other requests while receiving 238 | # streaming chunks 239 | AsyncHTTPClient().fetch( 240 | self.get_url("/api/v1/stream/{}/{}".format(course_id, grading_job_id)), 241 | method="GET", 242 | headers=header, 243 | header_callback=lambda _: None, 244 | streaming_callback=callback, 245 | ) 246 | 247 | 248 | class GraderMixin(AsyncHTTPMixin): 249 | def register_worker( 250 | self, header, expected_code=200, worker_id=None, hostname="mock_hostname" 251 | ): 252 | worker_id = worker_id or str(uuid.uuid4()) 253 | 254 | response = self.fetch( 255 | self.get_url("/api/v1/worker/{}".format(worker_id)), 256 | method="POST", 257 | headers=header, 258 | body=json.dumps({"hostname": hostname}), 259 | ) 260 | 261 | self.assertEqual(response.code, expected_code) 262 | 263 | if expected_code == 200: 264 | return worker_id 265 | 266 | def poll_job(self, worker_id, header): 267 | response = self.fetch( 268 | self.get_url("/api/v1/grading_job/{}".format(worker_id)), 269 | method="GET", 270 | headers=header, 271 | ) 272 | 273 | if response.code == 200: 274 | self.assertEqual(response.code, 200) 275 | response_body = json.loads(response.body.decode("utf-8")) 276 | self.assertIn("grading_job_id", response_body["data"]) 277 | self.assertIn("stages", response_body["data"]) 278 | return response_body["data"] 279 | 280 | return response.code 281 | 282 | def post_job_result( 283 | self, worker_id, header, job_id, job_success=True, expected_code=200 284 | ): 285 | body = { 286 | "grading_job_id": job_id, 287 | "success": job_success, 288 | "results": [{"res": "container 1 res"}, {"res": "container 2 res"}], 289 | "logs": {"stdout": "stdout", "stderr": "stderr"}, 290 | } 291 | response = self.fetch( 292 | self.get_url("/api/v1/grading_job/{}".format(worker_id)), 293 | method="POST", 294 | headers=header, 295 | body=json.dumps(body), 296 | ) 297 | self.assertEqual(response.code, expected_code) 298 | 299 | def send_heartbeat(self, worker_id, header, expected_code=200): 300 | response = self.fetch( 301 | self.get_url("/api/v1/heartbeat/{}".format(worker_id)), 302 | method="POST", 303 | body="", 304 | headers=header, 305 | ) 306 | self.assertEqual(response.code, expected_code) 307 | 308 | 309 | class EqualityMixin(unittest.TestCase): 310 | def assert_equal_grading_config(self, actual_config, expected_config): 311 | jsonschema.validate(actual_config, definitions.grading_config) 312 | jsonschema.validate(expected_config, definitions.grading_config) 313 | 314 | for config_key in expected_config: 315 | if config_key == "env": 316 | self.assertEqual( 317 | sorted(actual_config.get(config_key)), 318 | sorted(expected_config[config_key]), 319 | ) 320 | else: 321 | self.assert_equal_grading_pipeline( 322 | actual_config.get(config_key), expected_config[config_key] 323 | ) 324 | 325 | def assert_equal_grading_pipeline(self, actual_pipeline, expected_pipeline): 326 | jsonschema.validate(actual_pipeline, definitions.grading_pipeline) 327 | jsonschema.validate(expected_pipeline, definitions.grading_pipeline) 328 | 329 | for i in range(len(expected_pipeline)): 330 | self.assert_equal_grading_stage(actual_pipeline[i], expected_pipeline[i]) 331 | 332 | def assert_equal_grading_stage(self, actual_stage, expected_stage): 333 | jsonschema.validate(actual_stage, definitions.grading_stage) 334 | jsonschema.validate(expected_stage, definitions.grading_stage) 335 | 336 | for stage_key in expected_stage: 337 | if stage_key == "env": 338 | self.assertTrue( 339 | set(expected_stage["env"].keys()).issubset( 340 | set(actual_stage["env"].keys()) 341 | ) 342 | ) 343 | for env_key in expected_stage[stage_key]: 344 | self.assertEqual( 345 | actual_stage["env"].get(env_key), expected_stage["env"][env_key] 346 | ) 347 | else: 348 | self.assertEqual(actual_stage.get(stage_key), expected_stage[stage_key]) 349 | 350 | 351 | class WorkerWSMixin(AsyncHTTPMixin): 352 | # lower level conn 353 | def worker_ws_conn(self, worker_id, headers): 354 | url = self.get_url("/api/v1/worker_ws/{}".format(worker_id)).replace( 355 | "http://", "ws://" 356 | ) 357 | return websockets.connect(url, extra_headers=headers) 358 | 359 | def worker_ws_conn_register(self, conn, hostname): 360 | return conn.send( 361 | json.dumps({"type": "register", "args": {"hostname": hostname}}) 362 | ) 363 | 364 | def worker_ws_conn_reulst(self, conn, job_id, job_success): 365 | args = { 366 | "grading_job_id": job_id, 367 | "success": job_success, 368 | "results": [{"res": "container 1 res"}, {"res": "container 2 res"}], 369 | "logs": {"stdout": "stdout", "stderr": "stderr"}, 370 | } 371 | 372 | return conn.send(json.dumps({"type": "job_result", "args": args})) 373 | 374 | # need to be closed 375 | async def worker_ws(self, worker_id, headers, hostname="eniac"): 376 | conn = await self.worker_ws_conn(worker_id=worker_id, headers=headers) 377 | 378 | await self.worker_ws_conn_register(conn, hostname) 379 | 380 | ack = json.loads(await conn.recv()) 381 | self.assertTrue(ack["success"]) 382 | 383 | return conn 384 | 385 | 386 | class BaseTest(WorkerWSMixin, EqualityMixin, ClientMixin, GraderMixin): 387 | pass 388 | -------------------------------------------------------------------------------- /broadway/api/handlers/client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import tornado.ioloop 3 | 4 | from tornado_json import schema 5 | 6 | import broadway.api.daos as daos 7 | import broadway.api.definitions as definitions 8 | import broadway.api.models as models 9 | from broadway.api.decorators.auth import ( 10 | authenticate_course_admin, 11 | authenticate_course_member_or_admin, 12 | ) 13 | from broadway.api.handlers.base import BaseAPIHandler 14 | from broadway.api.utils.run import continue_grading_run 15 | from broadway.api.utils.time import get_time 16 | from broadway.api.callbacks.worker import worker_schedule_job 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | class ClientAPIHandler(BaseAPIHandler): 22 | def get_assignment_id(self, **kwargs): 23 | return daos.AssignmentConfigDao.id_from( 24 | kwargs.get("course_id"), kwargs.get("assignment_name") 25 | ) 26 | 27 | 28 | class GradingConfigHandler(ClientAPIHandler): 29 | @authenticate_course_admin 30 | @schema.validate(input_schema=definitions.grading_config) 31 | def post(self, *args, **kwargs): 32 | assignment_id = self.get_assignment_id(**kwargs) 33 | config = models.AssignmentConfig(id_=assignment_id, **self.body) 34 | config.id = assignment_id 35 | 36 | config_dao = daos.AssignmentConfigDao(self.settings) 37 | config_dao.delete_by_id(assignment_id) 38 | config_dao.insert(config) 39 | 40 | @authenticate_course_admin 41 | @schema.validate(on_empty_404=True, output_schema=definitions.grading_config) 42 | def get(self, *args, **kwargs): 43 | assignment_id = self.get_assignment_id(**kwargs) 44 | 45 | config_dao = daos.AssignmentConfigDao(self.settings) 46 | config = config_dao.find_by_id(assignment_id) 47 | if not config: 48 | self.abort({"message": "assignment configuration not found"}) 49 | return 50 | 51 | return config.to_dict() 52 | 53 | 54 | class GradingRunHandler(ClientAPIHandler): 55 | @authenticate_course_admin 56 | @schema.validate( 57 | input_schema={ 58 | "type": "object", 59 | "properties": { 60 | "pre_processing_env": {"type": "object"}, 61 | "students_env": {"type": "array", "items": {"type": "object"}}, 62 | "post_processing_env": {"type": "object"}, 63 | }, 64 | "required": ["students_env"], 65 | "additionalProperties": False, 66 | }, 67 | output_schema={ 68 | "type": "object", 69 | "properties": {"grading_run_id": {"type": "string"}}, 70 | "required": ["grading_run_id"], 71 | "additionalProperties": False, 72 | }, 73 | on_empty_404=True, 74 | ) 75 | def post(self, *args, **kwargs): 76 | assignment_id = self.get_assignment_id(**kwargs) 77 | 78 | config_dao = daos.AssignmentConfigDao(self.settings) 79 | config = config_dao.find_by_id(assignment_id) 80 | if not config: 81 | self.abort({"message": "assignment configuration not found"}) 82 | return 83 | 84 | if not self._assert_run_valid(config): 85 | # abort in valid-run method for conciseness 86 | return 87 | 88 | run_attrs = { 89 | **self.body, 90 | "assignment_id": assignment_id, 91 | "started_at": get_time(), 92 | "state": models.GradingRunState.READY, 93 | "student_jobs_left": len(self.body.get("students_env")), 94 | } 95 | run = models.GradingRun(**run_attrs) 96 | 97 | run_dao = daos.GradingRunDao(self.settings) 98 | run.id = str(run_dao.insert(run).inserted_id) 99 | 100 | if not continue_grading_run(self.settings, run): 101 | self.abort({"message": "failed to start grading run"}, status=500) 102 | return 103 | 104 | # trigger schedule event 105 | tornado.ioloop.IOLoop.current().add_callback(worker_schedule_job, self.settings) 106 | 107 | return {"grading_run_id": run.id} 108 | 109 | def _assert_run_valid(self, config): 110 | if "pre_processing_env" in self.body and not config.pre_processing_pipeline: 111 | self.abort( 112 | { 113 | "message": ( 114 | "pre-processing runtime environment provided, but no" 115 | "pre-processing stage was associated with this assignment" 116 | ) 117 | }, 118 | status=400, 119 | ) 120 | return False 121 | 122 | if "post_processing_env" in self.body and not config.post_processing_pipeline: 123 | self.abort( 124 | { 125 | "message": ( 126 | "post-processing runtime environment provided, but no" 127 | "post-processing stage was associated with this assignment" 128 | ) 129 | }, 130 | status=400, 131 | ) 132 | return False 133 | return True 134 | 135 | 136 | class GradingRunStatusHandler(ClientAPIHandler): 137 | @authenticate_course_member_or_admin 138 | @schema.validate( 139 | output_schema={ 140 | "type": "object", 141 | "properties": { 142 | "state": {"type": "string"}, 143 | "pre_processing_job_state": {"type": ["null", "object"]}, 144 | "post_processing_job_state": {"type": ["null", "object"]}, 145 | "student_jobs_state": {"type": "object"}, 146 | }, 147 | "required": ["state"], 148 | "additionalProperties": False, 149 | }, 150 | on_empty_404=True, 151 | ) 152 | def get(self, *args, **kwargs): 153 | grading_run_id = kwargs.get("run_id") 154 | 155 | grading_run_dao = daos.GradingRunDao(self.settings) 156 | grading_run = grading_run_dao.find_by_id(grading_run_id) 157 | if grading_run is None: 158 | self.abort({"message": "grading run with the given ID not found"}) 159 | return 160 | 161 | grading_job_dao = daos.GradingJobDao(self.settings) 162 | grading_jobs = grading_job_dao.find_by_run_id(grading_run_id) 163 | pre_processing_job = next( 164 | filter( 165 | lambda j: j.type == models.GradingJobType.PRE_PROCESSING, grading_jobs 166 | ), 167 | None, 168 | ) 169 | post_processing_job = next( 170 | filter( 171 | lambda j: j.type == models.GradingJobType.POST_PROCESSING, grading_jobs 172 | ), 173 | None, 174 | ) 175 | student_jobs = filter( 176 | lambda j: j.type == models.GradingJobType.STUDENT, grading_jobs 177 | ) 178 | 179 | # [jobs] -> { job_id: job_state } 180 | def get_job_id_to_state_map(jobs): 181 | if jobs is None: 182 | return None 183 | else: 184 | return {job.id: job.get_state().value for job in jobs} 185 | 186 | return { 187 | "state": grading_run.state.value, 188 | "pre_processing_job_state": get_job_id_to_state_map( 189 | [pre_processing_job] if pre_processing_job else None 190 | ), 191 | "post_processing_job_state": get_job_id_to_state_map( 192 | [post_processing_job] if post_processing_job else None 193 | ), 194 | "student_jobs_state": get_job_id_to_state_map(student_jobs), 195 | } 196 | 197 | 198 | class GradingRunEnvHandler(ClientAPIHandler): 199 | @authenticate_course_admin 200 | @schema.validate( 201 | output_schema={ 202 | "type": "object", 203 | "properties": { 204 | "pre_processing_env": {"type": ["null", "object"]}, 205 | "post_processing_env": {"type": ["null", "object"]}, 206 | "student_env": {"type": "object"}, 207 | }, 208 | "required": ["student_env"], 209 | "additionalProperties": False, 210 | }, 211 | on_empty_404=True, 212 | ) 213 | def get(self, *args, **kwargs): 214 | grading_run_id = kwargs.get("run_id") 215 | 216 | grading_run_dao = daos.GradingRunDao(self.settings) 217 | grading_run = grading_run_dao.find_by_id(grading_run_id) 218 | if grading_run is None: 219 | self.abort({"message": "grading run with the given ID not found"}) 220 | return 221 | 222 | grading_job_dao = daos.GradingJobDao(self.settings) 223 | grading_jobs = grading_job_dao.find_by_run_id(grading_run_id) 224 | 225 | # Helper method for making a dictionary from the grading_run 226 | def get_job_id_to_env_map(jobs): 227 | job_id_to_env_map = {} 228 | for job in jobs: 229 | env_dict = {} 230 | for stage in job.stages: 231 | env = stage["env"] 232 | for key in env: 233 | env_dict[key] = env_dict.get(key, set()) 234 | env_dict[key].add(env[key]) 235 | # Convert each set into a list for JSON 236 | for key in env_dict: 237 | env_values = list(env_dict[key]) 238 | env_dict[key] = env_values 239 | 240 | job_id_to_env_map[job.id] = env_dict 241 | 242 | return job_id_to_env_map 243 | 244 | pre_processing_job = next( 245 | filter( 246 | lambda j: j.type == models.GradingJobType.PRE_PROCESSING, grading_jobs 247 | ), 248 | None, 249 | ) 250 | post_processing_job = next( 251 | filter( 252 | lambda j: j.type == models.GradingJobType.POST_PROCESSING, grading_jobs 253 | ), 254 | None, 255 | ) 256 | student_jobs = filter( 257 | lambda j: j.type == models.GradingJobType.STUDENT, grading_jobs 258 | ) 259 | 260 | # Make sure pre_processing_env exists 261 | if pre_processing_job is None: 262 | pre_processing_dict = None 263 | else: 264 | pre_processing_dict = get_job_id_to_env_map([pre_processing_job]) 265 | 266 | # Make sure post_processing_env exists 267 | if post_processing_job is None: 268 | post_processing_dict = None 269 | else: 270 | post_processing_dict = get_job_id_to_env_map([post_processing_job]) 271 | 272 | # We are guaranteed that this dict exists in the run, so no need to check 273 | student_dict = get_job_id_to_env_map(student_jobs) 274 | 275 | return { 276 | "pre_processing_env": pre_processing_dict, 277 | "post_processing_env": post_processing_dict, 278 | "student_env": student_dict, 279 | } 280 | 281 | 282 | class GradingJobLogHandler(ClientAPIHandler): 283 | @authenticate_course_admin 284 | @schema.validate( 285 | output_schema={ 286 | "type": "object", 287 | "properties": {"stderr": {"type": "string"}, "stdout": {"type": "string"}}, 288 | "required": ["stderr", "stdout"], 289 | "additionalProperties": False, 290 | }, 291 | on_empty_404=True, 292 | ) 293 | def get(self, *args, **kwargs): 294 | job_id = kwargs["job_id"] 295 | 296 | job_log_dao = daos.GradingJobLogDao(self.settings) 297 | job_log = job_log_dao.find_by_job_id(job_id) 298 | 299 | if job_log is None: 300 | self.abort( 301 | { 302 | "message": "grading job with the given ID" 303 | "not found or has not finished" 304 | } 305 | ) 306 | return 307 | 308 | return {"stderr": job_log.stderr, "stdout": job_log.stdout} 309 | 310 | 311 | class CourseWorkerNodeHandler(ClientAPIHandler): 312 | @authenticate_course_admin 313 | @schema.validate( 314 | output_schema={ 315 | "type": "object", 316 | "properties": { 317 | "worker_nodes": { 318 | "type": "array", 319 | "items": { 320 | "type": "object", 321 | "properties": { 322 | "hostname": {"type": "string"}, 323 | "jobs_processed": {"type": "number"}, 324 | "busy": {"type": "boolean"}, 325 | "alive": {"type": "boolean"}, 326 | }, 327 | "required": ["hostname", "jobs_processed", "busy", "alive"], 328 | "additionalProperties": False, 329 | }, 330 | } 331 | }, 332 | "required": ["worker_nodes"], 333 | "additionalProperties": False, 334 | }, 335 | on_empty_404=True, 336 | ) 337 | def get(self, *args, **kwargs): 338 | scope = kwargs.get("scope") 339 | worker_node_dao = daos.WorkerNodeDao(self.settings) 340 | 341 | if scope == "all": 342 | return { 343 | "worker_nodes": list( 344 | map( 345 | lambda worker_node: { 346 | "hostname": worker_node.hostname, 347 | "jobs_processed": worker_node.jobs_processed, 348 | "busy": (worker_node.running_job_id is not None), 349 | "alive": worker_node.is_alive, 350 | }, 351 | worker_node_dao.find_all(), 352 | ) 353 | ) 354 | } 355 | else: 356 | self.abort( 357 | {"message": "scope {} has not been implemented yet".format(scope)}, 404 358 | ) 359 | return 360 | 361 | 362 | class CourseQueueLengthHandler(ClientAPIHandler): 363 | @authenticate_course_admin 364 | @schema.validate( 365 | output_schema={ 366 | "type": "object", 367 | "properties": {"length": {"type": "number"}}, 368 | "required": ["length"], 369 | "additionalProperties": False, 370 | }, 371 | on_empty_404=True, 372 | ) 373 | def get(self, *args, **kwargs): 374 | course_id = kwargs["course_id"] 375 | queue = self.settings["QUEUE"] 376 | 377 | length = 0 378 | if queue.contains_key(course_id): 379 | length = queue.get_queue_length(course_id) 380 | 381 | return {"length": length} 382 | 383 | 384 | class GradingJobQueuePositionHandler(ClientAPIHandler): 385 | @authenticate_course_member_or_admin 386 | @schema.validate( 387 | output_schema={ 388 | "type": "object", 389 | "properties": {"position": {"type": "number"}}, 390 | "required": ["position"], 391 | "additionalProperties": False, 392 | }, 393 | on_empty_404=True, 394 | ) 395 | def get(self, *args, **kwargs): 396 | 397 | grading_job_id = kwargs.get("job_id") 398 | 399 | grading_job_dao = daos.GradingJobDao(self.settings) 400 | if grading_job_dao.find_by_id(grading_job_id) is None: 401 | self.abort({"message": "grading job with the given ID not found"}) 402 | return 403 | 404 | course_id = kwargs["course_id"] 405 | queue = self.settings["QUEUE"] 406 | 407 | if not queue.contains_key(course_id): 408 | self.abort( 409 | {"message": f"{course_id} does not exist as a course in the queue"} 410 | ) 411 | return 412 | 413 | queue_position = queue.get_position_in_queue(course_id, grading_job_id) 414 | if queue_position == -1: 415 | self.abort( 416 | {"message": f"{grading_job_id} has already passed through the queue"} 417 | ) 418 | 419 | return {"position": queue_position} 420 | -------------------------------------------------------------------------------- /tests/api/integration/test_client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | from collections import deque 4 | 5 | import tests.api._fixtures.grading_configs as grading_configs 6 | import tests.api._fixtures.grading_runs as grading_runs 7 | 8 | from tests.api.base import BaseTest 9 | 10 | logging.disable(logging.WARNING) 11 | 12 | 13 | class GradingConfigEndpointsTest(BaseTest): 14 | def test_no_token(self): 15 | self.upload_grading_config( 16 | self.course1, "assignment1", None, grading_configs.valid_configs[0], 401 17 | ) 18 | self.get_grading_config(self.course1, "assignment1", None, 401) 19 | 20 | def test_wrong_token(self): 21 | # course 1 can only be authenticated with client header 1, 22 | # course 2 can be authenticated with either 23 | self.upload_grading_config( 24 | self.course1, 25 | "assignment1", 26 | self.client_header2, 27 | grading_configs.valid_configs[0], 28 | 401, 29 | ) 30 | self.get_grading_config(self.course1, "assignment1", self.client_header2, 401) 31 | 32 | def test_invalid_course_id(self): 33 | self.upload_grading_config( 34 | "wrong_id", 35 | "assignment1", 36 | self.client_header1, 37 | grading_configs.valid_configs[0], 38 | 401, 39 | ) 40 | self.get_grading_config("wrong_id", "assignment1", self.client_header1, 401) 41 | 42 | def test_update_old_config(self): 43 | self.get_grading_config(self.course1, "assignment1", self.client_header1, 400) 44 | self.upload_grading_config( 45 | self.course1, 46 | "assignment1", 47 | self.client_header1, 48 | grading_configs.valid_configs[0], 49 | 200, 50 | ) 51 | self.assert_equal_grading_config( 52 | self.get_grading_config( 53 | self.course1, "assignment1", self.client_header1, 200 54 | ), 55 | grading_configs.valid_configs[0], 56 | ) 57 | 58 | self.upload_grading_config( 59 | self.course1, 60 | "assignment1", 61 | self.client_header1, 62 | grading_configs.valid_configs[1], 63 | 200, 64 | ) 65 | self.assert_equal_grading_config( 66 | self.get_grading_config( 67 | self.course1, "assignment1", self.client_header1, 200 68 | ), 69 | grading_configs.valid_configs[1], 70 | ) 71 | 72 | def test_same_assignment_name(self): 73 | self.upload_grading_config( 74 | self.course1, 75 | "assignment1", 76 | self.client_header1, 77 | grading_configs.valid_configs[0], 78 | 200, 79 | ) 80 | self.assert_equal_grading_config( 81 | self.get_grading_config( 82 | self.course1, "assignment1", self.client_header1, 200 83 | ), 84 | grading_configs.valid_configs[0], 85 | ) 86 | 87 | self.upload_grading_config( 88 | self.course2, 89 | "assignment1", 90 | self.client_header2, 91 | grading_configs.valid_configs[1], 92 | 200, 93 | ) 94 | self.assert_equal_grading_config( 95 | self.get_grading_config( 96 | self.course2, "assignment1", self.client_header2, 200 97 | ), 98 | grading_configs.valid_configs[1], 99 | ) 100 | 101 | def test_multiple_tokens(self): 102 | # a course can use multiple tokens 103 | self.upload_grading_config( 104 | self.course2, 105 | "assignment1", 106 | self.client_header1, 107 | grading_configs.valid_configs[0], 108 | 200, 109 | ) 110 | self.assert_equal_grading_config( 111 | self.get_grading_config( 112 | self.course2, "assignment1", self.client_header1, 200 113 | ), 114 | grading_configs.valid_configs[0], 115 | ) 116 | 117 | self.upload_grading_config( 118 | self.course2, 119 | "assignment2", 120 | self.client_header2, 121 | grading_configs.valid_configs[1], 122 | 200, 123 | ) 124 | self.assert_equal_grading_config( 125 | self.get_grading_config( 126 | self.course2, "assignment2", self.client_header2, 200 127 | ), 128 | grading_configs.valid_configs[1], 129 | ) 130 | 131 | def test_shared_token(self): 132 | # the same token can be used by multiple courses 133 | self.upload_grading_config( 134 | self.course1, 135 | "assignment1", 136 | self.client_header1, 137 | grading_configs.valid_configs[0], 138 | 200, 139 | ) 140 | self.assert_equal_grading_config( 141 | self.get_grading_config( 142 | self.course1, "assignment1", self.client_header1, 200 143 | ), 144 | grading_configs.valid_configs[0], 145 | ) 146 | 147 | self.upload_grading_config( 148 | self.course2, 149 | "assignment2", 150 | self.client_header1, 151 | grading_configs.valid_configs[1], 152 | 200, 153 | ) 154 | self.assert_equal_grading_config( 155 | self.get_grading_config( 156 | self.course2, "assignment2", self.client_header1, 200 157 | ), 158 | grading_configs.valid_configs[1], 159 | ) 160 | 161 | def test_valid_configs(self): 162 | for idx, valid_config in enumerate(grading_configs.valid_configs): 163 | self.upload_grading_config( 164 | self.course1, str(idx), self.client_header1, valid_config, 200 165 | ) 166 | self.assert_equal_grading_config( 167 | self.get_grading_config( 168 | self.course1, str(idx), self.client_header1, 200 169 | ), 170 | valid_config, 171 | ) 172 | 173 | def test_invalid_configs(self): 174 | for idx, invalid_config in enumerate(grading_configs.invalid_configs): 175 | self.upload_grading_config( 176 | self.course1, str(idx), self.client_header1, invalid_config, 400 177 | ) 178 | self.get_grading_config(self.course1, str(idx), self.client_header1, 400) 179 | 180 | 181 | class GradingRunEndpointsTest(BaseTest): 182 | def test_no_token(self): 183 | self.start_grading_run( 184 | self.course1, "assignment1", None, grading_runs.one_student_job, 401 185 | ) 186 | self.check_grading_run_status(self.course1, "temp", None, 401) 187 | 188 | def test_wrong_token(self): 189 | # course 1 can only be authenticated with client header 1 190 | # course 2 can be authenticated with either 191 | self.start_grading_run( 192 | self.course1, 193 | "assignment1", 194 | self.client_header2, 195 | grading_runs.one_student_job, 196 | 401, 197 | ) 198 | self.check_grading_run_status(self.course1, "temp", self.client_header2, 401) 199 | 200 | def test_invalid_course_id(self): 201 | self.start_grading_run( 202 | "wrong_id", 203 | "assignment1", 204 | self.client_header1, 205 | grading_runs.one_student_job, 206 | 401, 207 | ) 208 | self.check_grading_run_status("wrong_id", "temp", self.client_header1, 401) 209 | 210 | def test_invalid_assignment_id(self): 211 | self.start_grading_run( 212 | self.course1, 213 | "assignment1", 214 | self.client_header1, 215 | grading_runs.one_student_job, 216 | 400, 217 | ) 218 | 219 | def test_invalid_run_id(self): 220 | self.upload_grading_config( 221 | self.course1, 222 | "assignment1", 223 | self.client_header1, 224 | grading_configs.valid_configs[0], 225 | 200, 226 | ) 227 | self.check_grading_run_status(self.course1, "temp", self.client_header1, 400) 228 | 229 | def test_invalid_run(self): 230 | # the config only has student env vars defined. 231 | # pre processing or post processing env vars are invalid 232 | self.upload_grading_config( 233 | self.course1, 234 | "assignment1", 235 | self.client_header1, 236 | grading_configs.only_student_config, 237 | 200, 238 | ) 239 | 240 | self.start_grading_run( 241 | self.course1, 242 | "assignment1", 243 | self.client_header1, 244 | grading_runs.one_student_and_pre, 245 | 400, 246 | ) 247 | self.start_grading_run( 248 | self.course1, 249 | "assignment1", 250 | self.client_header1, 251 | grading_runs.one_student_and_post, 252 | 400, 253 | ) 254 | self.start_grading_run( 255 | self.course1, 256 | "assignment1", 257 | self.client_header1, 258 | grading_runs.one_student_and_both, 259 | 400, 260 | ) 261 | 262 | def test_run_course_private(self): 263 | self.upload_grading_config( 264 | self.course1, 265 | "assignment1", 266 | self.client_header1, 267 | grading_configs.only_student_config, 268 | 200, 269 | ) 270 | self.start_grading_run( 271 | self.course2, 272 | "assignment1", 273 | self.client_header2, 274 | grading_runs.one_student_job, 275 | 400, 276 | ) 277 | 278 | 279 | class GradingRunEnvEndpointTest(BaseTest): 280 | def test_one_student(self): 281 | self.upload_grading_config( 282 | self.course1, 283 | "assignment1", 284 | self.client_header1, 285 | grading_configs.only_student_config, 286 | 200, 287 | ) 288 | 289 | run_id = self.start_grading_run( 290 | self.course1, 291 | "assignment1", 292 | self.client_header1, 293 | grading_runs.one_student_job, 294 | 200, 295 | ) 296 | read_value = self.get_grading_run_env(self.course1, run_id, self.client_header1) 297 | 298 | self.assertEqual(read_value["pre_processing_env"], None) 299 | self.assertEqual(read_value["post_processing_env"], None) 300 | 301 | self.assertEqual( 302 | list(read_value["student_env"].values())[0]["netid"], 303 | ["test net id"], # since the test doesnt have access to job-ids 304 | ) # we take the values as a list and check the first value 305 | 306 | def test_two_students(self): 307 | self.upload_grading_config( 308 | self.course1, 309 | "assignment1", 310 | self.client_header1, 311 | grading_configs.only_student_config, 312 | 200, 313 | ) 314 | 315 | run_id = self.start_grading_run( 316 | self.course1, 317 | "assignment1", 318 | self.client_header1, 319 | grading_runs.two_student_job, 320 | 200, 321 | ) 322 | read_value = self.get_grading_run_env(self.course1, run_id, self.client_header1) 323 | 324 | self.assertEqual(read_value["pre_processing_env"], None) 325 | self.assertEqual(read_value["post_processing_env"], None) 326 | netids = [] 327 | for dict in read_value["student_env"].values(): 328 | netids.append(dict["netid"][0]) 329 | 330 | self.assertIn("student id 1", netids) 331 | self.assertIn("student id 2", netids) 332 | 333 | 334 | class GradingJobLogEndpointTest(BaseTest): 335 | def test_no_token(self): 336 | self.get_grading_job_log(self.course1, "weird", None, 401) 337 | 338 | def test_wrong_token(self): 339 | # course 1 can only be authenticated with client header 1, 340 | # course 2 can be authenticated with either 341 | self.get_grading_job_log(self.course1, "weird", self.client_header2, 401) 342 | 343 | def test_invalid_job_id(self): 344 | self.get_grading_job_log(self.course1, "weird", self.client_header1, 400) 345 | 346 | 347 | class CourseWorkerNodeEndpointTest(BaseTest): 348 | def test_no_token(self): 349 | self.get_course_worker_nodes(self.course1, "all", None, 401) 350 | 351 | def test_wrong_token(self): 352 | # course 1 can only be authenticated with client header 1, 353 | # course 2 can be authenticated with either 354 | self.get_course_worker_nodes(self.course1, "all", self.client_header2, 401) 355 | 356 | def test_invalid_course_id(self): 357 | self.get_course_worker_nodes("wrong_id", "all", self.client_header1, 401) 358 | 359 | def test_invalid_scope(self): 360 | self.get_course_worker_nodes( 361 | self.course1, "invalid_scope", self.client_header1, 404 362 | ) 363 | 364 | 365 | class CourseQueueLengthEndpointTest(BaseTest): 366 | def assertLengthEquals(self, course_id, header, expected_len): 367 | length = self.get_course_queue_length(course_id, header, 200)["length"] 368 | self.assertEqual(expected_len, length) 369 | 370 | def test_single_course(self): 371 | num_students = 10 372 | 373 | # Upload the jobs 374 | self.upload_grading_config( 375 | self.course1, 376 | "assignment1", 377 | self.client_header1, 378 | grading_configs.only_student_config, 379 | 200, 380 | ) 381 | self.start_grading_run( 382 | self.course1, 383 | "assignment1", 384 | self.client_header1, 385 | grading_runs.generate_n_student_jobs(num_students), 386 | 200, 387 | ) 388 | 389 | self.assertLengthEquals(self.course1, self.client_header1, num_students) 390 | 391 | # Now, run those jobs 392 | worker_id = self.register_worker(self.get_header()) 393 | for i in range(num_students): 394 | self.poll_job(worker_id, self.get_header()) 395 | 396 | # No more jobs should be left 397 | self.assertLengthEquals(self.course1, self.client_header1, 0) 398 | 399 | def test_multiple_courses(self): 400 | num_students1 = 32 401 | num_students2 = 56 402 | 403 | # Upload the jobs 404 | self.upload_grading_config( 405 | self.course1, 406 | "assignment1", 407 | self.client_header1, 408 | grading_configs.only_student_config, 409 | 200, 410 | ) 411 | self.upload_grading_config( 412 | self.course2, 413 | "assignment2", 414 | self.client_header2, 415 | grading_configs.only_student_config, 416 | 200, 417 | ) 418 | self.start_grading_run( 419 | self.course1, 420 | "assignment1", 421 | self.client_header1, 422 | grading_runs.generate_n_student_jobs(num_students1), 423 | 200, 424 | ) 425 | self.start_grading_run( 426 | self.course2, 427 | "assignment2", 428 | self.client_header2, 429 | grading_runs.generate_n_student_jobs(num_students2), 430 | 200, 431 | ) 432 | 433 | self.assertLengthEquals(self.course1, self.client_header1, num_students1) 434 | self.assertLengthEquals(self.course2, self.client_header2, num_students2) 435 | 436 | # Now, run the jobs 437 | worker_id = self.register_worker(self.get_header()) 438 | for i in range(num_students1 + num_students2): 439 | self.poll_job(worker_id, self.get_header()) 440 | 441 | # No more jobs should be left. 442 | self.assertLengthEquals(self.course1, self.client_header1, 0) 443 | self.assertLengthEquals(self.course2, self.client_header2, 0) 444 | 445 | def test_invalid_course(self): 446 | self.upload_grading_config( 447 | self.course1, 448 | "assignment1", 449 | self.client_header1, 450 | grading_configs.only_student_config, 451 | 200, 452 | ) 453 | self.upload_grading_config( 454 | self.course2, 455 | "assignment2", 456 | self.client_header2, 457 | grading_configs.only_student_config, 458 | 200, 459 | ) 460 | 461 | # No jobs should have been pushed to the queue yet, 462 | # since we didn't start any grading runs. 463 | self.assertLengthEquals(self.course1, self.client_header1, 0) 464 | self.assertLengthEquals(self.course2, self.client_header2, 0) 465 | 466 | 467 | class GradingJobQueuePositionEndpointTest(BaseTest): 468 | def assert_position_equals(self, course_id, grading_job_id, header, expected_pos): 469 | pos = self.get_grading_job_queue_position( 470 | course_id, grading_job_id, header, 200 471 | )["position"] 472 | self.assertEqual(expected_pos, pos) 473 | 474 | def test_single_job(self): 475 | 476 | # Upload the jobs 477 | self.upload_grading_config( 478 | self.course1, 479 | "assignment1", 480 | self.client_header1, 481 | grading_configs.only_student_config, 482 | 200, 483 | ) 484 | grading_run_id = self.start_grading_run( 485 | self.course1, 486 | "assignment1", 487 | self.client_header1, 488 | grading_runs.one_student_job, 489 | 200, 490 | ) 491 | 492 | run_state = self.get_grading_run_state( 493 | self.course1, grading_run_id, self.client_header1 494 | ) 495 | 496 | # There should only be one job in the run 497 | job_id = list(run_state["student_jobs_state"].keys())[0] 498 | 499 | # There should be 0 jobs ahead of this run in the queue 500 | self.assert_position_equals(self.course1, job_id, self.client_header1, 0) 501 | 502 | # Now, run the job 503 | worker_id = self.register_worker(self.get_header()) 504 | self.poll_job(worker_id, self.get_header()) 505 | 506 | # The job should no longer be in the queue 507 | self.get_grading_job_queue_position( 508 | self.course1, job_id, self.client_header1, 400 509 | ) 510 | 511 | def test_multiple_jobs(self): 512 | num_jobs = 10 513 | 514 | # Upload the jobs 515 | self.upload_grading_config( 516 | self.course1, 517 | "assignment1", 518 | self.client_header1, 519 | grading_configs.only_student_config, 520 | 200, 521 | ) 522 | 523 | job_ids = [] 524 | for _ in range(num_jobs): 525 | grading_run_id = self.start_grading_run( 526 | self.course1, 527 | "assignment1", 528 | self.client_header1, 529 | grading_runs.one_student_job, 530 | 200, 531 | ) 532 | 533 | # Keep track of the job ids 534 | run_state = self.get_grading_run_state( 535 | self.course1, grading_run_id, self.client_header1 536 | ) 537 | job_ids.append(list(run_state["student_jobs_state"].keys())[0]) 538 | 539 | for ind, job_id in enumerate(job_ids): 540 | self.assert_position_equals(self.course1, job_id, self.client_header1, ind) 541 | 542 | # Now, run the job 543 | worker_id = self.register_worker(self.get_header()) 544 | for starting_ind, job_id in enumerate(job_ids): 545 | # Run the job 546 | self.poll_job(worker_id, self.get_header()) 547 | # Make sure the rest of the jobs have gone down 1 in position 548 | for expected_pos, waiting_job in enumerate(job_ids[starting_ind + 1 :]): 549 | self.assert_position_equals( 550 | self.course1, waiting_job, self.client_header1, expected_pos 551 | ) 552 | 553 | # The jobs should no longer be in the queue 554 | for job_id in job_ids: 555 | self.get_grading_job_queue_position( 556 | self.course1, job_id, self.client_header1, 400 557 | ) 558 | 559 | 560 | class StreamEndpointTest(BaseTest): 561 | def test_stream(self): 562 | num_jobs = 5 563 | 564 | # Upload the jobs 565 | self.upload_grading_config( 566 | self.course1, 567 | "assignment1", 568 | self.client_header1, 569 | grading_configs.only_student_config, 570 | 200, 571 | ) 572 | 573 | job_ids = [] 574 | for _ in range(num_jobs): 575 | grading_run_id = self.start_grading_run( 576 | self.course1, 577 | "assignment1", 578 | self.client_header1, 579 | grading_runs.one_student_job, 580 | 200, 581 | ) 582 | 583 | # Keep track of the job ids 584 | # Also tests that query token can be used for this endpoint 585 | run_state = self.get_grading_run_state( 586 | self.course1, grading_run_id, self.client_header_query_token 587 | ) 588 | job_ids.append(list(run_state["student_jobs_state"].keys())[0]) 589 | 590 | for ind, job_id in enumerate(job_ids): 591 | 592 | def _create_callback(chunks): 593 | def _callback(chunk): 594 | self.assertNotEqual(len(chunks), 0) 595 | self.assertEqual(chunk, chunks.pop()) 596 | 597 | return _callback 598 | 599 | def create_chunk(event, data): 600 | blob = json.dumps({"type": event, "data": data}) 601 | return f"event: status_update\ndata: {blob}\n\n".encode() 602 | 603 | chunks = deque() 604 | chunks.append(create_chunk("state", "FINISHED")) 605 | chunks.append(create_chunk("state", "STARTED")) 606 | for pos in range(ind): 607 | chunks.append(create_chunk("position", pos)) 608 | 609 | self.get_grading_job_stream( 610 | self.course1, 611 | job_id, 612 | self.client_header_query_token, 613 | _create_callback(chunks), 614 | ) 615 | 616 | worker_id = self.register_worker(self.get_header()) 617 | for job_id in job_ids: 618 | # Run and post the job 619 | self.poll_job(worker_id, self.get_header()) 620 | self.post_job_result(worker_id, self.get_header(), job_id) 621 | --------------------------------------------------------------------------------