├── .github └── workflows │ └── testcase-downloader.yml ├── README.md ├── crawler ├── .gitignore ├── package-lock.json ├── package.json ├── playwright.config.ts └── tests │ └── dropbox-downloader.spec.ts ├── downloader.py ├── generate-list.py └── requirements.txt /.github/workflows/testcase-downloader.yml: -------------------------------------------------------------------------------- 1 | name: Download test case 2 | #on: workflow_dispatch 3 | 4 | on: 5 | workflow_dispatch: 6 | # schedule: 7 | # - cron: "0 0 * * *" 8 | # - cron: "0 6 * * *" 9 | # - cron: "0 9 * * *" 10 | # - cron: "0 12 * * *" 11 | # - cron: "0 15 * * *" 12 | # - cron: "0 18 * * *" 13 | # - cron: "0 21 * * *" 14 | 15 | jobs: 16 | crawl: 17 | runs-on: ubuntu-latest 18 | defaults: 19 | run: 20 | working-directory: ./crawler 21 | steps: 22 | - uses: actions/checkout@v4 23 | - uses: actions/setup-node@v4 24 | with: 25 | node-version: lts/* 26 | - name: Install dependencies 27 | run: npm ci 28 | - name: Install Playwright Browsers 29 | run: npx playwright install --with-deps chromium 30 | - name: Run Playwright tests 31 | run: npx playwright test 32 | - uses: actions/upload-artifact@v4 33 | if: always() 34 | with: 35 | name: playwright-artifact 36 | path: crawler/ 37 | 38 | download: 39 | needs: crawl 40 | runs-on: ubuntu-latest 41 | permissions: 42 | contents: write 43 | defaults: 44 | run: 45 | working-directory: ./ 46 | strategy: 47 | matrix: 48 | python-version: ["3.10"] 49 | 50 | steps: 51 | - uses: actions/checkout@v4 52 | - name: Set up Python ${{ matrix.python-version }} 53 | uses: actions/setup-python@v5 54 | with: 55 | python-version: ${{ matrix.python-version }} 56 | 57 | - uses: actions/download-artifact@main 58 | with: 59 | name: playwright-artifact 60 | path: crawler/ 61 | 62 | - name: Set Github identity 63 | run: | 64 | git config --global user.email "phamlong15297@gmail.com" 65 | git config --global user.name "conlacda" 66 | 67 | - name: Download test case 68 | run: | 69 | pip install -r requirements.txt 70 | python downloader.py 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # atcoder-testcases -------------------------------------------------------------------------------- /crawler/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | /test-results/ 3 | /playwright-report/ 4 | /blob-report/ 5 | /playwright/.cache/ 6 | dump.json -------------------------------------------------------------------------------- /crawler/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "crawler", 3 | "version": "1.0.0", 4 | "lockfileVersion": 3, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "crawler", 9 | "version": "1.0.0", 10 | "license": "ISC", 11 | "devDependencies": { 12 | "@playwright/test": "^1.43.1", 13 | "@types/node": "^20.12.8" 14 | } 15 | }, 16 | "node_modules/@playwright/test": { 17 | "version": "1.43.1", 18 | "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.43.1.tgz", 19 | "integrity": "sha512-HgtQzFgNEEo4TE22K/X7sYTYNqEMMTZmFS8kTq6m8hXj+m1D8TgwgIbumHddJa9h4yl4GkKb8/bgAl2+g7eDgA==", 20 | "dev": true, 21 | "dependencies": { 22 | "playwright": "1.43.1" 23 | }, 24 | "bin": { 25 | "playwright": "cli.js" 26 | }, 27 | "engines": { 28 | "node": ">=16" 29 | } 30 | }, 31 | "node_modules/@types/node": { 32 | "version": "20.12.8", 33 | "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.8.tgz", 34 | "integrity": "sha512-NU0rJLJnshZWdE/097cdCBbyW1h4hEg0xpovcoAQYHl8dnEyp/NAOiE45pvc+Bd1Dt+2r94v2eGFpQJ4R7g+2w==", 35 | "dev": true, 36 | "dependencies": { 37 | "undici-types": "~5.26.4" 38 | } 39 | }, 40 | "node_modules/fsevents": { 41 | "version": "2.3.2", 42 | "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", 43 | "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", 44 | "dev": true, 45 | "hasInstallScript": true, 46 | "optional": true, 47 | "os": [ 48 | "darwin" 49 | ], 50 | "engines": { 51 | "node": "^8.16.0 || ^10.6.0 || >=11.0.0" 52 | } 53 | }, 54 | "node_modules/playwright": { 55 | "version": "1.43.1", 56 | "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.43.1.tgz", 57 | "integrity": "sha512-V7SoH0ai2kNt1Md9E3Gwas5B9m8KR2GVvwZnAI6Pg0m3sh7UvgiYhRrhsziCmqMJNouPckiOhk8T+9bSAK0VIA==", 58 | "dev": true, 59 | "dependencies": { 60 | "playwright-core": "1.43.1" 61 | }, 62 | "bin": { 63 | "playwright": "cli.js" 64 | }, 65 | "engines": { 66 | "node": ">=16" 67 | }, 68 | "optionalDependencies": { 69 | "fsevents": "2.3.2" 70 | } 71 | }, 72 | "node_modules/playwright-core": { 73 | "version": "1.43.1", 74 | "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.43.1.tgz", 75 | "integrity": "sha512-EI36Mto2Vrx6VF7rm708qSnesVQKbxEWvPrfA1IPY6HgczBplDx7ENtx+K2n4kJ41sLLkuGfmb0ZLSSXlDhqPg==", 76 | "dev": true, 77 | "bin": { 78 | "playwright-core": "cli.js" 79 | }, 80 | "engines": { 81 | "node": ">=16" 82 | } 83 | }, 84 | "node_modules/undici-types": { 85 | "version": "5.26.5", 86 | "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", 87 | "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", 88 | "dev": true 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /crawler/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "crawler", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": {}, 7 | "keywords": [], 8 | "author": "", 9 | "license": "ISC", 10 | "devDependencies": { 11 | "@playwright/test": "^1.43.1", 12 | "@types/node": "^20.12.8" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /crawler/playwright.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig, devices } from '@playwright/test'; 2 | 3 | /** 4 | * Read environment variables from file. 5 | * https://github.com/motdotla/dotenv 6 | */ 7 | // require('dotenv').config(); 8 | 9 | /** 10 | * See https://playwright.dev/docs/test-configuration. 11 | */ 12 | export default defineConfig({ 13 | testDir: './tests', 14 | /* Run tests in files in parallel */ 15 | fullyParallel: true, 16 | /* Fail the build on CI if you accidentally left test.only in the source code. */ 17 | forbidOnly: !!process.env.CI, 18 | /* Retry on CI only */ 19 | retries: process.env.CI ? 2 : 0, 20 | /* Opt out of parallel tests on CI. */ 21 | workers: process.env.CI ? 1 : undefined, 22 | /* Reporter to use. See https://playwright.dev/docs/test-reporters */ 23 | reporter: 'html', 24 | /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */ 25 | use: { 26 | /* Base URL to use in actions like `await page.goto('/')`. */ 27 | // baseURL: 'http://127.0.0.1:3000', 28 | 29 | /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */ 30 | trace: 'on-first-retry', 31 | 32 | headless: process.env.CI ? true : false 33 | }, 34 | 35 | /* Configure projects for major browsers */ 36 | projects: [ 37 | { 38 | name: 'chromium', 39 | use: { ...devices['Desktop Chrome'] }, 40 | }, 41 | 42 | // { 43 | // name: 'firefox', 44 | // use: { ...devices['Desktop Firefox'] }, 45 | // }, 46 | // 47 | // { 48 | // name: 'webkit', 49 | // use: { ...devices['Desktop Safari'] }, 50 | // }, 51 | 52 | /* Test against mobile viewports. */ 53 | // { 54 | // name: 'Mobile Chrome', 55 | // use: { ...devices['Pixel 5'] }, 56 | // }, 57 | // { 58 | // name: 'Mobile Safari', 59 | // use: { ...devices['iPhone 12'] }, 60 | // }, 61 | 62 | /* Test against branded browsers. */ 63 | // { 64 | // name: 'Microsoft Edge', 65 | // use: { ...devices['Desktop Edge'], channel: 'msedge' }, 66 | // }, 67 | // { 68 | // name: 'Google Chrome', 69 | // use: { ...devices['Desktop Chrome'], channel: 'chrome' }, 70 | // }, 71 | ], 72 | 73 | /* Run your local dev server before starting the tests */ 74 | // webServer: { 75 | // command: 'npm run start', 76 | // url: 'http://127.0.0.1:3000', 77 | // reuseExistingServer: !process.env.CI, 78 | // }, 79 | }); 80 | -------------------------------------------------------------------------------- /crawler/tests/dropbox-downloader.spec.ts: -------------------------------------------------------------------------------- 1 | import {test, expect, Locator} from '@playwright/test'; 2 | import fs from 'fs'; 3 | 4 | test.setTimeout(120000); 5 | test('Get all contests that have test cases published.', async ({ page }) => { 6 | // Load dumped data 7 | const dumpFile = 'dump.json'; 8 | let data = {}; 9 | // Access page to get new data 10 | await page.goto('https://www.dropbox.com/sh/nx3tnilzqz7df8a/AAAYlTq2tiEHl5hsESw6-yfLa?e=1&dl=0'); 11 | await page.evaluate(async () => { 12 | const delay = ms => new Promise(resolve => setTimeout(resolve, ms)); 13 | let len = 0; 14 | while (true) { 15 | const newLen = document.querySelectorAll('div[role="row"]').length; 16 | if (len === newLen) 17 | break; 18 | len = newLen; 19 | document.querySelectorAll('div[role="row"]')[len-1].scrollIntoView({ behavior: "smooth", block: "end", inline: "nearest" }); 20 | await delay(10000); 21 | } 22 | }); 23 | 24 | const body: Locator = page.locator('.dig-Table-body').first(); 25 | const rows: Locator = body.locator('div[role="row"]'); 26 | const cnt = await rows.count(); 27 | for (let index= 0; index < cnt ; index++) { 28 | const row: Locator = rows.nth(index); 29 | const cell: Locator = row.locator('div[role="cell"]').first(); 30 | const atag: Locator = cell.locator('a').first(); 31 | const title = await cell.innerText(); 32 | const link = await atag.getAttribute('href'); 33 | data[title.toLowerCase()] = link; 34 | } 35 | fs.writeFileSync(dumpFile, JSON.stringify(data)); 36 | }); 37 | -------------------------------------------------------------------------------- /downloader.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import subprocess 3 | from typing import List 4 | from requests import get 5 | import json 6 | import os 7 | from tqdm import tqdm 8 | import random 9 | 10 | 11 | def download(url, fileName): 12 | with open(fileName, "wb+") as file: 13 | response = get(url) 14 | file.write(response.content) 15 | 16 | 17 | def get_git_branches(): 18 | # Dont use git branch -a because it will get list of branches on local. git ls-remote origin will fetch from the original source from the internet. 19 | result = subprocess.run(['git', 'ls-remote', 'origin'], capture_output=True, text=True) 20 | branches = [] 21 | for line in result.stdout.splitlines(): 22 | if 'refs/heads' in line: 23 | branch_name = line.strip().split('\t')[-1].replace('refs/heads/', '').replace('"', '') 24 | branches.append(branch_name) 25 | 26 | return branches 27 | 28 | 29 | with open("./crawler/dump.json") as f: 30 | data: dict[str, str] = json.load(f) 31 | # Dont use git branch -a because it will get list of branches on local. git ls-remote origin will fetch from the original source from the internet. 32 | branches = get_git_branches() 33 | 34 | # Remove some random branches to prevent incomplete data (it should be newest branches but can not get remote branches list then sort them by created date) 35 | NUMBER_OF_REMOVED_BRANCHES = 3 36 | for i in range(NUMBER_OF_REMOVED_BRANCHES): 37 | abrgcBranches = list(filter(lambda branch: branch.startswith('abc') or branch.startswith('arc') or branch.startswith('agc'), branches)) 38 | randomBranch = random.choice(abrgcBranches) 39 | branches.remove(randomBranch) 40 | 41 | # Download missing testcases 42 | for contest in tqdm(data.keys()): 43 | if contest in branches: 44 | continue 45 | 46 | # dl=0 -> dl=1 47 | url = data[contest][:-1] + "1" 48 | print(f"Downloading {contest} from {url}") 49 | download(url, f"{contest}.zip") 50 | shutil.unpack_archive(f"{contest}.zip", f"{contest}/") 51 | os.remove(f"{contest}.zip") 52 | subprocess.run(["python", "generate-list.py", contest]) 53 | subprocess.run(['git', 'push', 'origin', '--delete', contest]) 54 | subprocess.run(["git", "checkout", "-b", contest]) 55 | subprocess.run(["git", "add", "."]) 56 | subprocess.run(["git", "commit", "-m", f"add {contest}"]) 57 | subprocess.run(["git", "push", "origin", contest]) 58 | subprocess.run(["git", "checkout", "main"]) 59 | -------------------------------------------------------------------------------- /generate-list.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from typing import List 4 | 5 | # python genenrate-list.py contest_name 6 | contest = sys.argv[1] 7 | 8 | problems: List[str] = [p for p in os.listdir(f"./{contest}")] 9 | 10 | # Rename file 11 | for problem in problems: 12 | # input directory 13 | for path, subdirs, fileNames in os.walk(f"./{contest}/{problem}/in"): 14 | for fileName in fileNames: 15 | fileNameWithoutExtension = fileName.split('.')[0] 16 | # sometimes we have 2 files with same prefix but different extension 17 | if not os.path.exists(f"{path}/{fileNameWithoutExtension}"): 18 | os.rename(f"{path}/{fileName}", f"{path}/{fileNameWithoutExtension}") 19 | 20 | # output directory 21 | for path, subdirs, fileNames in os.walk(f"./{contest}/{problem}/out"): 22 | for fileName in fileNames: 23 | fileNameWithoutExtension = fileName.split('.')[0] 24 | # sometimes we have 2 files with same prefix but different extension 25 | if not os.path.exists(f"{path}/{fileNameWithoutExtension}"): 26 | os.rename(f"{path}/{fileName}", f"{path}/{fileNameWithoutExtension}") 27 | 28 | # Generate list.txt file 29 | for problem in problems: 30 | # if (os.path.exists(f"./{folder}/{problem}/list.txt")): 31 | # os.remove(f"./{folder}/{problem}/list.txt") 32 | if not os.path.exists(f"./{contest}/{problem}/list.txt"): 33 | for path, subdirs, inFiles in os.walk(f"./{contest}/{problem}/in"): 34 | f = open(f"./{contest}/{problem}/list.txt", "a+") 35 | for inFile in sorted(inFiles): 36 | outFile = inFile 37 | if (inFile.endswith('.in')): 38 | outFile = inFile[:-2] + "out" 39 | 40 | # missing output files will raise an error (check abc350.C on dropbox) 41 | # create if not exists 42 | if not os.path.exists(f"./{contest}/{problem}/out/{outFile}"): 43 | with open(f"./{contest}/{problem}/out/{outFile}", 'w') as outf: 44 | outf.write("Sorry, this output file is not provided.") 45 | 46 | # file_name,input_file_size_in_bytes,output_file_size_in_bytes 47 | if os.path.exists(f"./{contest}/{problem}/in/{inFile}") and os.path.exists(f"./{contest}/{problem}/out/{outFile}"): 48 | inputSz = os.path.getsize(f"./{contest}/{problem}/in/{inFile}") 49 | outputSz = os.path.getsize(f"./{contest}/{problem}/out/{outFile}") 50 | commonName = inFile.split(".")[0] 51 | f.write(f"{commonName},{inputSz},{outputSz}\n") 52 | f.close() 53 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | tqdm --------------------------------------------------------------------------------