├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md └── app ├── codeforces_wrapper.py ├── main.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Jetbrains pycharm 132 | .idea/ 133 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tiangolo/uwsgi-nginx-flask:python3.6-alpine3.7 2 | COPY ./app/requirements.txt /app 3 | RUN pip3 install -r requirements.txt 4 | COPY ./app /app 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Kerollos Magdy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CodeForces Problem Scraper API 2 | 3 | A straightforward Flask-Python API designed to parse Codeforces problems into JSON format. 4 | 5 | ## Endpoint 6 | 7 | Make a **GET** request to the root `/?id=1325/A` and provide the required query parameter `id`. The `id` should be in the following format `contest_id/problem_letter`. 8 | 9 | For example, parsing [this](https://codeforces.com/contest/1325/problem/A) problem yields the following JSON response: 10 | 11 | ```json 12 | { 13 | "inputSpecification": "
The first line contains a single integer $$$t$$$ $$$(1 \\le t \\le 100)$$$ — the number of testcases.
Each testcase consists of one line containing a single integer, $$$x$$$ $$$(2 \\le x \\le 10^9)$$$.
", 14 | "memoryLimit": { 15 | "unit": "megabytes", 16 | "value": 256 17 | }, 18 | "note": "In the first testcase of the sample, $$$GCD(1,1)+LCM(1,1)=1+1=2$$$.
In the second testcase of the sample, $$$GCD(6,4)+LCM(6,4)=2+12=14$$$.
", 19 | "outputSpecification": "For each testcase, output a pair of positive integers $$$a$$$ and $$$b$$$ ($$$1 \\le a, b \\le 10^9)$$$ such that $$$GCD(a,b)+LCM(a,b)=x$$$. It's guaranteed that the solution always exists. If there are several such pairs $$$(a, b)$$$, you can output any of them.
", 20 | "samples": [ 21 | { 22 | "input": "\n2\n2\n14\n", 23 | "output": "\n1 1\n6 4\n" 24 | } 25 | ], 26 | "statement": "You are given a positive integer $$$x$$$. Find any such $$$2$$$ positive integers $$$a$$$ and $$$b$$$ such that $$$GCD(a,b)+LCM(a,b)=x$$$.
As a reminder, $$$GCD(a,b)$$$ is the greatest integer that divides both $$$a$$$ and $$$b$$$. Similarly, $$$LCM(a,b)$$$ is the smallest integer such that both $$$a$$$ and $$$b$$$ divide it.
It's guaranteed that the solution always exists. If there are several such pairs $$$(a, b)$$$, you can output any of them.
", 27 | "timeLimit": { 28 | "unit": "second", 29 | "value": 1 30 | }, 31 | "title": "A. EhAb AnD gCd" 32 | } 33 | ``` 34 | 35 | ## Usage 36 | 37 | After cloning the repository, you have two options: 38 | 39 | - **Option 1: Python3 Installed** 40 | - Navigate to the `/app` directory. 41 | - Install packages in `requirements.txt` using pip3. 42 | `pip3 install -r requirements.txt` 43 | - Start the server. 44 | `FLASK_APP=main.py flask run` 45 | 46 | - **Option 2: Docker Installed** 47 | - Build the image. 48 | `docker build -t cf-problem-scraper-api .` 49 | - Create a container. 50 | `docker run -d --name cf-api -p 80:80 cf-problem-scraper-api` 51 | 52 | ## JSON Schema 53 | 54 | ```json 55 | { 56 | "inputSpecification": String, 57 | "memoryLimit": { 58 | "unit": String, 59 | "value": Number 60 | }, 61 | "note": String, // or null 62 | "outputSpecification": String, 63 | "samples": [ 64 | { 65 | "input": String, 66 | "output": String 67 | } 68 | ], 69 | "statement": String, 70 | "timeLimit": { 71 | "unit": String, 72 | "value": Number 73 | }, 74 | "title": String 75 | } 76 | ``` 77 | 78 | Feel free to explore and integrate this API into your projects! 79 | -------------------------------------------------------------------------------- /app/codeforces_wrapper.py: -------------------------------------------------------------------------------- 1 | import bs4 2 | import requests 3 | 4 | 5 | def parse_problem(problem_link): 6 | markup = requests.get(problem_link).text 7 | soup = bs4.BeautifulSoup(markup, "html.parser") 8 | problem = { 9 | "title": soup.find('div', 'title').string, 10 | "timeLimit": split_limit(soup.find('div', 'time-limit').contents[1].string), 11 | "memoryLimit": split_limit(soup.find('div', 'memory-limit').contents[1].string), 12 | "statement": get_statement(soup), 13 | "inputSpecification": get_content(soup, 'input-specification'), 14 | "outputSpecification": get_content(soup, 'output-specification'), 15 | "samples": get_sample_tests(soup), 16 | "note": get_content(soup, 'note'), 17 | } 18 | return problem 19 | 20 | 21 | def split_limit(soup): 22 | l = soup.split() 23 | return { 24 | "value": int(l[0]), 25 | "unit": l[1] 26 | } 27 | 28 | 29 | def group_tests(lst): 30 | """returns a list of list({input, output})""" 31 | return [{"input": _in, "output": _out} for _in, _out in pairwise(lst)] 32 | 33 | 34 | def get_sample_tests(souped_html): 35 | return group_tests(get_tags_contents(souped_html, 'pre')) 36 | 37 | 38 | def get_tags_contents(souped_html, tag_name, class_name=None): 39 | """This function returns all the tags contents in a souped html""" 40 | return [concat_contents(tag.contents) for tag in souped_html.find_all(tag_name, class_name)] 41 | 42 | 43 | def pairwise(iterable): 44 | a = iter(iterable) 45 | return zip(a, a) 46 | 47 | 48 | def get_statement(soup): 49 | return concat_contents(soup.find('div', 'header').next_sibling.contents) 50 | 51 | 52 | def get_content(soup, _class=''): 53 | element = soup.find('div', _class) 54 | if not element: 55 | return None 56 | tags = element.contents 57 | tags.pop(0) 58 | return concat_contents(tags) 59 | 60 | 61 | def concat_contents(ls): 62 | return ''.join([str(i) for i in ls]) 63 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | import flask 2 | import codeforces_wrapper 3 | 4 | 5 | app = flask.Flask(__name__) 6 | PROBLEM_LINK = 'https://codeforces.com/problemset/problem/' 7 | 8 | 9 | @app.route('/', methods=['GET']) 10 | def home(): 11 | problem_id = flask.request.args['id'] 12 | return flask.jsonify(codeforces_wrapper.parse_problem(PROBLEM_LINK + problem_id)) 13 | -------------------------------------------------------------------------------- /app/requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | bs4 3 | requests 4 | --------------------------------------------------------------------------------