├── .gitignore ├── AddProblem ├── add_problem.py ├── add_problemV1.1.py ├── add_problem_V2.py ├── config.py ├── delete.py ├── function.py ├── pack_sample.py └── settings.py ├── BZOJ └── bzoj.zip ├── LICENSE ├── README.md ├── README_1.0.md ├── WebSpider └── OnlineJudgeProblem_BZOJ │ ├── OnlineJudgeProblem_BZOJ │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── items.cpython-36.pyc │ │ ├── pipelines.cpython-36.pyc │ │ └── settings.cpython-36.pyc │ ├── items.py │ ├── middlewares.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── bzoj.cpython-36.pyc │ │ └── bzoj.py │ └── scrapy.cfg └── docs ├── Scrapy安装详解.pdf └── scrapy安装文档.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | WebSpider/OnlineJudgeProblem_BZOJ/.idea/misc.xml 3 | WebSpider/OnlineJudgeProblem_BZOJ/.idea/modules.xml 4 | WebSpider/OnlineJudgeProblem_BZOJ/.idea/OnlineJudgeProblem_BZOJ.iml 5 | WebSpider/OnlineJudgeProblem_BZOJ/.idea/workspace.xml 6 | -------------------------------------------------------------------------------- /AddProblem/add_problem.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :function: 通过python与selenium将mongoDB中的题目数据自动添加网站中 4 | :author:hefengen 5 | :date:2018/04/14 6 | :email:hefengen@hotmail.com 7 | """ 8 | import time 9 | 10 | import pymongo 11 | 12 | import requests 13 | from selenium.webdriver.common.keys import Keys 14 | from config import * 15 | from function import * 16 | from selenium import webdriver 17 | from selenium.webdriver.support.ui import WebDriverWait 18 | from selenium.webdriver.support import expected_conditions as EC 19 | from selenium.webdriver.common.by import By 20 | import re 21 | 22 | 23 | # 题目的url 24 | problem_url = 'http://172.16.72.4/admin/problems' 25 | 26 | # 创建题目的url 27 | create_problem_url = 'http://172.16.72.4/admin/problem/create' 28 | 29 | # login_url 30 | url = 'http://172.16.72.4/admin' 31 | 32 | # 使用selenium模拟在浏览器中导入数据 33 | browser = webdriver.Chrome(service_args=SERVICE_ARGS) 34 | wait = WebDriverWait(browser, 10) 35 | 36 | # 设置窗口大小 37 | browser.set_window_size(1400, 900) 38 | 39 | zip_dir = "E:\\Problem\\Testcase\\ok" # 解压后的目录 40 | 41 | 42 | def handle_login(): 43 | """ 44 | :function: 模拟实现登陆 45 | :return: 46 | """ 47 | try: 48 | browser.get(url=url) 49 | username = wait.until( 50 | EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(2) > div > div.el-input > input')) 51 | ) 52 | password = wait.until( 53 | EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(3) > div > div.el-input > input')) 54 | ) 55 | submit = wait.until( 56 | EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > form > div:nth-child(4) > div > button')) 57 | ) 58 | # 自动填充密码 59 | username.send_keys(USERNAME) 60 | password.send_keys(PASSWORD) 61 | submit.click() 62 | 63 | create_problem = wait.until( 64 | EC.presence_of_element_located((By.CSS_SELECTOR, 65 | '#app > div > div.content-app > div.view > div.panel > div > div.panel-options > button > span')) 66 | ) 67 | create_problem.click() 68 | add_data_to_page() 69 | except TimeoutError: 70 | handle_login() 71 | 72 | def add_data_to_page(): 73 | """ 74 | :param url: 75 | :return: 76 | """ 77 | data = query_data_from_mongo() 78 | for problem in data: 79 | if(problem['sample_input'] != "" and problem['sample_output'] != ""): 80 | try: 81 | browser.get(create_problem_url) 82 | display_id = wait.until( 83 | EC.presence_of_element_located((By.CSS_SELECTOR, 84 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-6 > div > div > div.el-input > input')) 85 | ) 86 | title = wait.until( 87 | EC.presence_of_element_located((By.CSS_SELECTOR, 88 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-18 > div > div > div.el-input > input')) 89 | ) 90 | description = wait.until( 91 | EC.presence_of_element_located((By.CSS_SELECTOR, 92 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(2) > div > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 93 | ) 94 | input = wait.until( 95 | EC.presence_of_element_located((By.CSS_SELECTOR, 96 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(1) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 97 | ) 98 | output = wait.until( 99 | EC.presence_of_element_located((By.CSS_SELECTOR, 100 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(2) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 101 | ) 102 | time_limit = wait.until( 103 | EC.presence_of_element_located((By.CSS_SELECTOR, 104 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(1) > div > div > div > input')) 105 | ) 106 | memory_limit = wait.until( 107 | EC.presence_of_element_located((By.CSS_SELECTOR, 108 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(2) > div > div > div > input')) 109 | ) 110 | difficulty = wait.until( 111 | EC.presence_of_element_located((By.CSS_SELECTOR, 112 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(3) > div > div > div')) 113 | ) 114 | tag = wait.until( 115 | EC.element_to_be_clickable((By.CSS_SELECTOR, 116 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > button > span')) 117 | ) 118 | tag.click() 119 | tags = wait.until( 120 | EC.presence_of_element_located((By.CSS_SELECTOR, 121 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > div > div.el-input.el-input--mini > input')) 122 | ) 123 | sample_input = wait.until( 124 | EC.presence_of_element_located((By.CSS_SELECTOR, 125 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(1) > div > div > div > textarea')) 126 | ) 127 | sample_output = wait.until( 128 | EC.presence_of_element_located((By.CSS_SELECTOR, 129 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(2) > div > div > div > textarea')) 130 | ) 131 | file_input = wait.until( 132 | EC.presence_of_element_located((By.CSS_SELECTOR, 133 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(10) > div.el-col.el-col-4 > div > div > div > div > input')) 134 | ) 135 | hint = wait.until( 136 | EC.presence_of_element_located((By.CSS_SELECTOR, 137 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(11) > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 138 | ) 139 | source = wait.until( 140 | EC.presence_of_element_located((By.CSS_SELECTOR, 141 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(12) > div > div > input')) 142 | ) 143 | create = wait.until( 144 | EC.element_to_be_clickable((By.CSS_SELECTOR, 145 | '#app > div > div.content-app > div.problem > div > div > form > button')) 146 | ) 147 | 148 | # 发送相关的信息 149 | display_id.send_keys(problem['problem_no']) 150 | title.send_keys(problem['problem_name']) 151 | 152 | pat = 'src="(.*?)"' 153 | regex = re.compile(pat, re.S) 154 | is_description = re.search(pat, problem['description'], re.S) 155 | if (is_description != None): 156 | description_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['description']) 157 | else: 158 | description_text = problem['description'] 159 | description.send_keys(description_text) 160 | 161 | # 输入框 162 | is_input = re.search(pat, problem['input'], re.S) 163 | if (is_input != None): 164 | # 通过正则表达式替换 165 | input_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['input']) 166 | else: 167 | input_text = problem['input'] 168 | input.send_keys(input_text) 169 | 170 | # 输出框 171 | is_output = re.search(pat, problem['output'], re.S) 172 | if (is_output != None): 173 | output_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['output']) 174 | else: 175 | output_text = problem['output'] 176 | output.send_keys(output_text) 177 | 178 | # 时间、内存、标签、样例 179 | for i in range(0, 5): 180 | time_limit.send_keys(Keys.BACKSPACE) 181 | time_limit.send_keys('4000') 182 | 183 | memory_limit.clear() 184 | memory_limit.send_keys(problem['memory_limit']) 185 | tags.send_keys('bzoj-problem') 186 | tags.send_keys(Keys.ENTER) 187 | sample_input.send_keys(problem['sample_input']) 188 | sample_output.send_keys(problem['sample_output']) 189 | 190 | # 点击上传文件 191 | file_dir = zip_dir + "\\" + problem['problem_no'] + ".zip" 192 | file_input.send_keys(file_dir) 193 | 194 | is_hint = re.search(pat, problem['hint'], re.S) 195 | if (is_hint != None): 196 | hint_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['hint']) 197 | else: 198 | hint_text = problem['hint'] 199 | hint.send_keys(hint_text) 200 | source.send_keys(problem['source']) 201 | time.sleep(5) 202 | 203 | # 提交 204 | create.click() 205 | except TimeoutError: 206 | add_data_to_page() 207 | 208 | def main(): 209 | """ 210 | :return: 211 | """ 212 | handle_login() 213 | 214 | if __name__ == '__main__': 215 | main() 216 | 217 | -------------------------------------------------------------------------------- /AddProblem/add_problemV1.1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :function: 通过python与selenium将mongoDB中的题目数据自动添加网站中 4 | :author:hefengen 5 | :date:2018/04/14 6 | :email:hefengen@hotmail.com 7 | """ 8 | import time 9 | 10 | import pymongo 11 | 12 | import re 13 | 14 | import requests 15 | from selenium.webdriver.common.keys import Keys 16 | from config import * 17 | from function import * 18 | from selenium import webdriver 19 | from selenium.webdriver.support.ui import WebDriverWait 20 | from selenium.webdriver.support import expected_conditions as EC 21 | from selenium.webdriver.common.by import By 22 | 23 | 24 | # 题目的url 25 | problem_url = 'http://192.168.94.137/admin/problems' 26 | 27 | # 创建题目的url 28 | create_problem_url = 'http://192.168.94.137/admin/problem/create' 29 | 30 | # login_url 31 | url = 'http://192.168.94.137/admin' 32 | 33 | # 使用selenium模拟在浏览器中导入数据 34 | browser = webdriver.Chrome(service_args=SERVICE_ARGS) 35 | wait = WebDriverWait(browser, 10) 36 | 37 | # 设置窗口大小 38 | browser.set_window_size(1400, 900) 39 | 40 | zip_dir = "E:\\Problem\\Testcase\\ok\\" # 解压后的目录 41 | 42 | 43 | def handle_login(): 44 | """ 45 | :function: 模拟实现登陆 46 | :return: 47 | """ 48 | try: 49 | browser.get(url=url) 50 | username = wait.until( 51 | EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(2) > div > div.el-input > input')) 52 | ) 53 | password = wait.until( 54 | EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(3) > div > div > input')) 55 | ) 56 | submit = wait.until( 57 | EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > form > div:nth-child(4) > div > button')) 58 | ) 59 | # 自动填充密码 60 | username.send_keys(USERNAME) 61 | password.send_keys(PASSWORD) 62 | submit.click() 63 | 64 | #clickit = wait.until( 65 | # EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > div > div:nth-child(1) > ul > li:nth-child(4) > div > i.el-submenu__icon-arrow.el-icon-arrow-down')) 66 | #) 67 | #clickit.click() 68 | 69 | #addproblemsbutton = wait.until( 70 | # EC.presence_of_element_located((By.CSS_SELECTOR, '#app > div > div:nth-child(1) > ul > li.el-submenu.is-active.is-opened > ul > li.el-menu-item.is-active')) 71 | #) 72 | 73 | #clickita.click() 74 | time.sleep(9) 75 | 76 | browser.get('http://192.168.94.137/admin/problems') 77 | create_problem = wait.until( 78 | EC.presence_of_element_located((By.CSS_SELECTOR, '#app > div > div.content-app > div.view > div.panel > div > div.panel-options > button')) 79 | ) 80 | create_problem.click() 81 | add_data_to_page() 82 | except TimeoutError: 83 | handle_login() 84 | 85 | 86 | def add_data_to_page(): 87 | """ 88 | :param url: 89 | :return: 90 | """ 91 | data = query_data_from_mongo() 92 | for problem in data: 93 | if(problem['sample_input'] != "" and problem['sample_output'] != ""): 94 | try: 95 | browser.get(create_problem_url) 96 | display_id = wait.until( 97 | EC.presence_of_element_located((By.CSS_SELECTOR, 98 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-6 > div > div > div.el-input > input')) 99 | ) 100 | title = wait.until( 101 | EC.presence_of_element_located((By.CSS_SELECTOR, 102 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-18 > div > div > div.el-input > input')) 103 | ) 104 | description = wait.until( 105 | EC.presence_of_element_located((By.CSS_SELECTOR, 106 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(2) > div > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 107 | ) 108 | input = wait.until( 109 | EC.presence_of_element_located((By.CSS_SELECTOR, 110 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(1) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 111 | ) 112 | output = wait.until( 113 | EC.presence_of_element_located((By.CSS_SELECTOR, 114 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(2) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 115 | ) 116 | time_limit = wait.until( 117 | EC.presence_of_element_located((By.CSS_SELECTOR, 118 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(1) > div > div > div > input')) 119 | ) 120 | memory_limit = wait.until( 121 | EC.presence_of_element_located((By.CSS_SELECTOR, 122 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(2) > div > div > div > input')) 123 | ) 124 | difficulty = wait.until( 125 | EC.presence_of_element_located((By.CSS_SELECTOR, 126 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(3) > div > div > div')) 127 | ) 128 | tag = wait.until( 129 | EC.element_to_be_clickable((By.CSS_SELECTOR, 130 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > button > span')) 131 | ) 132 | tag.click() 133 | tags = wait.until( 134 | EC.presence_of_element_located((By.CSS_SELECTOR, 135 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > div > div.el-input.el-input--mini > input')) 136 | ) 137 | sample_input = wait.until( 138 | EC.presence_of_element_located((By.CSS_SELECTOR, 139 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(1) > div > div > div > textarea')) 140 | ) 141 | sample_output = wait.until( 142 | EC.presence_of_element_located((By.CSS_SELECTOR, 143 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(2) > div > div > div > textarea')) 144 | ) 145 | #hint = wait.until( 146 | # EC.presence_of_element_located((By.CSS_SELECTOR, 147 | # '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(7) > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 148 | #) 149 | file_input = wait.until( 150 | EC.presence_of_element_located((By.CSS_SELECTOR, 151 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(11) > div.el-col.el-col-4 > div > div > div > div > input')) 152 | ) 153 | source = wait.until( 154 | EC.presence_of_element_located((By.CSS_SELECTOR, 155 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(12) > div > div > input')) 156 | ) 157 | create = wait.until( 158 | EC.element_to_be_clickable((By.CSS_SELECTOR, 159 | '#app > div > div.content-app > div.problem > div > div > form > button')) 160 | ) 161 | 162 | # 发送相关的信息 163 | display_id.send_keys(problem['problem_no']) 164 | title.send_keys(problem['problem_name']) 165 | 166 | pat = 'src="(.*?)"' 167 | regex = re.compile(pat, re.S) 168 | is_description = re.search(pat, problem['description'], re.S) 169 | if (is_description != None): 170 | description_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['description']) 171 | else: 172 | description_text = problem['description'] 173 | description.send_keys(description_text) 174 | 175 | # 输入框 176 | is_input = re.search(pat, problem['input'], re.S) 177 | if (is_input != None): 178 | # 通过正则表达式替换 179 | input_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['input']) 180 | else: 181 | input_text = problem['input'] 182 | input.send_keys(input_text) 183 | 184 | # 输出框 185 | is_output = re.search(pat, problem['output'], re.S) 186 | if (is_output != None): 187 | output_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['output']) 188 | else: 189 | output_text = problem['output'] 190 | output.send_keys(output_text) 191 | 192 | # 时间、内存、标签、样例 193 | for i in range(0, 5): 194 | time_limit.send_keys(Keys.BACKSPACE) 195 | time_limit.send_keys('4000') 196 | 197 | memory_limit.clear() 198 | memory_limit.send_keys(problem['memory_limit']) 199 | tags.send_keys('bzoj-problem') 200 | tags.send_keys(Keys.ENTER) 201 | sample_input.send_keys(problem['sample_input']) 202 | sample_output.send_keys(problem['sample_output']) 203 | 204 | #is_hint = re.search(pat, problem['hint'], re.S) 205 | #if (is_hint != None): 206 | # hint_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['hint']) 207 | #else: 208 | # hint_text = problem['hint'] 209 | #hint.send_keys(hint_text) 210 | #time.sleep(5) 211 | 212 | # 点击上传文件 213 | file_dir = zip_dir + "\\" + problem['problem_no'] + ".zip" 214 | file_input.send_keys(file_dir) 215 | 216 | source.send_keys(problem['source']) 217 | time.sleep(5) 218 | 219 | # 提交 220 | create.click() 221 | except TimeoutError: 222 | add_data_to_page() 223 | 224 | def main(): 225 | """ 226 | :return: 227 | """ 228 | handle_login() 229 | 230 | if __name__ == '__main__': 231 | main() 232 | 233 | -------------------------------------------------------------------------------- /AddProblem/add_problem_V2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :function: 通过python与selenium将mongoDB中的题目数据自动添加网站中 4 | :author:hefengen 5 | :date:2018/04/14 6 | :email:hefengen@hotmail.com 7 | """ 8 | import time 9 | 10 | import pymongo 11 | 12 | import re 13 | 14 | import requests 15 | from selenium.webdriver.common.keys import Keys 16 | from config import * 17 | from function import * 18 | from selenium import webdriver 19 | from selenium.webdriver.support.ui import WebDriverWait 20 | from selenium.webdriver.support import expected_conditions as EC 21 | from selenium.webdriver.common.by import By 22 | 23 | 24 | # 题目的url 25 | problem_url = 'http://ip/admin/problems' 26 | 27 | # 创建题目的url 28 | create_problem_url = 'http://ip/admin/problem/create' 29 | 30 | # login_url 31 | url = 'http://ip/admin' 32 | 33 | # 使用selenium模拟在浏览器中导入数据 34 | browser = webdriver.Chrome(service_args=SERVICE_ARGS) 35 | wait = WebDriverWait(browser, 10) 36 | 37 | # 设置窗口大小 38 | browser.set_window_size(1400, 900) 39 | 40 | zip_dir = "F:\a\" # 解压后的目录 41 | 42 | 43 | def handle_login(): 44 | """ 45 | :function: 模拟实现登陆 46 | :return: 47 | """ 48 | try: 49 | browser.get(url=url) 50 | username = wait.until( 51 | EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(2) > div > div.el-input > input')) 52 | ) 53 | password = wait.until( 54 | EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(3) > div > div > input')) 55 | ) 56 | submit = wait.until( 57 | EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > form > div:nth-child(4) > div > button')) 58 | ) 59 | # 自动填充密码 60 | username.send_keys(USERNAME) 61 | password.send_keys(PASSWORD) 62 | submit.click() 63 | 64 | #clickit = wait.until( 65 | # EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > div > div:nth-child(1) > ul > li:nth-child(4) > div > i.el-submenu__icon-arrow.el-icon-arrow-down')) 66 | #) 67 | #clickit.click() 68 | 69 | #addproblemsbutton = wait.until( 70 | # EC.presence_of_element_located((By.CSS_SELECTOR, '#app > div > div:nth-child(1) > ul > li.el-submenu.is-active.is-opened > ul > li.el-menu-item.is-active')) 71 | #) 72 | 73 | #clickita.click() 74 | time.sleep(9) ###这里改了,让他可以等一会 75 | 76 | browser.get('http://39.106.50.179/admin/problems') ###这里改了,指向问题列表 77 | create_problem = wait.until( 78 | EC.presence_of_element_located((By.CSS_SELECTOR, '#app > div > div.content-app > div.view > div.panel > div > div.panel-options > button')) ###更新 79 | ) 80 | create_problem.click() 81 | add_data_to_page() 82 | except TimeoutError: 83 | handle_login() 84 | 85 | 86 | def add_data_to_page(): 87 | """ 88 | :param url: 89 | :return: 90 | """ 91 | data = query_data_from_mongo() 92 | for problem in data: 93 | if(problem['sample_input'] != "" and problem['sample_output'] != ""): 94 | try: 95 | browser.get(create_problem_url) 96 | display_id = wait.until( 97 | EC.presence_of_element_located((By.CSS_SELECTOR, 98 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-6 > div > div > div.el-input > input')) 99 | ) 100 | title = wait.until( 101 | EC.presence_of_element_located((By.CSS_SELECTOR, 102 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-18 > div > div > div.el-input > input')) 103 | ) 104 | description = wait.until( 105 | EC.presence_of_element_located((By.CSS_SELECTOR, 106 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(2) > div > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 107 | ) 108 | input = wait.until( 109 | EC.presence_of_element_located((By.CSS_SELECTOR, 110 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(1) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 111 | ) 112 | output = wait.until( 113 | EC.presence_of_element_located((By.CSS_SELECTOR, 114 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(2) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 115 | ) 116 | time_limit = wait.until( 117 | EC.presence_of_element_located((By.CSS_SELECTOR, 118 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(1) > div > div > div > input')) 119 | ) 120 | memory_limit = wait.until( 121 | EC.presence_of_element_located((By.CSS_SELECTOR, 122 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(2) > div > div > div > input')) 123 | ) 124 | difficulty = wait.until( 125 | EC.presence_of_element_located((By.CSS_SELECTOR, 126 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(3) > div > div > div')) 127 | ) 128 | tag = wait.until( 129 | EC.element_to_be_clickable((By.CSS_SELECTOR, 130 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > button > span')) 131 | ) 132 | tag.click() 133 | tags = wait.until( 134 | EC.presence_of_element_located((By.CSS_SELECTOR, 135 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > div > div.el-input.el-input--mini > input')) 136 | ) 137 | sample_input = wait.until( 138 | EC.presence_of_element_located((By.CSS_SELECTOR, 139 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(1) > div > div > div > textarea')) 140 | ) 141 | sample_output = wait.until( 142 | EC.presence_of_element_located((By.CSS_SELECTOR, 143 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(2) > div > div > div > textarea')) 144 | ) 145 | ###hint和file_input上下换了个位置,然后两个的元素路线也要改成下边 146 | hint = wait.until( 147 | EC.presence_of_element_located((By.CSS_SELECTOR, 148 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(7) > div > div > div.simditor-wrapper > div.markdown-editor > textarea')) 149 | ) 150 | file_input = wait.until( 151 | EC.presence_of_element_located((By.CSS_SELECTOR, 152 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(11) > div.el-col.el-col-4 > div > div > div > div > button > i')) 153 | ) 154 | source = wait.until( 155 | EC.presence_of_element_located((By.CSS_SELECTOR, 156 | '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(12) > div > div > input')) 157 | ) 158 | create = wait.until( 159 | EC.element_to_be_clickable((By.CSS_SELECTOR, 160 | '#app > div > div.content-app > div.problem > div > div > form > button')) 161 | ) 162 | 163 | # 发送相关的信息 164 | display_id.send_keys(problem['problem_no']) 165 | title.send_keys(problem['problem_name']) 166 | 167 | pat = 'src="(.*?)"' 168 | regex = re.compile(pat, re.S) 169 | is_description = re.search(pat, problem['description'], re.S) 170 | if (is_description != None): 171 | description_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['description']) 172 | else: 173 | description_text = problem['description'] 174 | description.send_keys(description_text) 175 | 176 | # 输入框 177 | is_input = re.search(pat, problem['input'], re.S) 178 | if (is_input != None): 179 | # 通过正则表达式替换 180 | input_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['input']) 181 | else: 182 | input_text = problem['input'] 183 | input.send_keys(input_text) 184 | 185 | # 输出框 186 | is_output = re.search(pat, problem['output'], re.S) 187 | if (is_output != None): 188 | output_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['output']) 189 | else: 190 | output_text = problem['output'] 191 | output.send_keys(output_text) 192 | 193 | # 时间、内存、标签、样例 194 | for i in range(0, 5): 195 | time_limit.send_keys(Keys.BACKSPACE) 196 | time_limit.send_keys('4000') 197 | 198 | memory_limit.clear() 199 | memory_limit.send_keys(problem['memory_limit']) 200 | tags.send_keys('bzoj-problem') 201 | tags.send_keys(Keys.ENTER) 202 | sample_input.send_keys(problem['sample_input']) 203 | sample_output.send_keys(problem['sample_output']) 204 | 205 | is_hint = re.search(pat, problem['hint'], re.S) 206 | if (is_hint != None): 207 | hint_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['hint']) 208 | else: 209 | hint_text = problem['hint'] 210 | hint.send_keys(hint_text) 211 | 212 | ### 点击上传文件,,,,这部分本来在hint的上边,,现在改到了下边 213 | file_dir = zip_dir + "\\" + problem['problem_no'] + ".zip" 214 | file_input.send_keys(file_dir) 215 | 216 | source.send_keys(problem['source']) 217 | time.sleep(5) 218 | 219 | # 提交 220 | create.click() 221 | except TimeoutError: 222 | add_data_to_page() 223 | 224 | def main(): 225 | """ 226 | :return: 227 | """ 228 | handle_login() 229 | 230 | if __name__ == '__main__': 231 | main() 232 | 233 | -------------------------------------------------------------------------------- /AddProblem/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :function: 用户信息配置 4 | :author:hefengen 5 | :date:2018/04/15 6 | :email:hefengen@hotmail.com 7 | """ 8 | 9 | # 用户账号密码 10 | USERNAME = 'root' 11 | PASSWORD = 'xxxxxxxxx' 12 | 13 | 14 | -------------------------------------------------------------------------------- /AddProblem/delete.py: -------------------------------------------------------------------------------- 1 | from function import * 2 | from pymongo import * 3 | 4 | def delete(): 5 | try: 6 | # 创建连接对象 7 | client = MongoClient(host="localhost", port=27017) 8 | 9 | db = client.bzoj 10 | 11 | # 删除编号自1200开始,至1245的所有文档数据 12 | for i in range(0, 46): 13 | count = 1200 14 | count = count+i 15 | print(count) 16 | db.problem.delete_one({"problem_no": str(count)}) 17 | # 所有符合条件数据都删除 18 | print("删除成功") 19 | 20 | except Exception as e: 21 | print(e) 22 | 23 | if __name__ == '__main__': 24 | delete() 25 | -------------------------------------------------------------------------------- /AddProblem/function.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :function: mongoDB数据库连接函数 4 | :author:hefengen 5 | :date:2018/04/15 6 | :email:hefengen@hotmail.com 7 | """ 8 | 9 | from settings import * 10 | import pymongo 11 | 12 | def get_collection(): 13 | """ 14 | :function: 连接数据库 15 | :return: collection 16 | """ 17 | client = pymongo.MongoClient(MONGO_URI) 18 | db = client[MONGO_DATABASE] 19 | collection = db[MONGO_TABLE] 20 | return collection 21 | 22 | def query_data_from_mongo(): 23 | """ 24 | :function:从mongoDB中取数据 25 | :return: 26 | """ 27 | collection = get_collection() 28 | query_filter = collection.find().sort("problem_no") 29 | # query_filter = collection.find_one({"problem_no": "1019"}) 30 | return query_filter 31 | -------------------------------------------------------------------------------- /AddProblem/pack_sample.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | :function: 通过MongoDB进行配置选择zip文件解压与重新压缩 4 | :author:hefengen 5 | :date:2018/04/15 6 | :email:hefengen@hotmail.com 7 | """ 8 | 9 | from function import * 10 | import os, zipfile 11 | 12 | start_dir = "E:\\Problem\\Testcase\\no" # 需要遍历的目录 13 | zip_dir = "E:\\Problem\\Testcase\\ok" # 解压后的目录 14 | 15 | 16 | def unzip(): 17 | """ 18 | :function: 解压文件 19 | :return: 20 | """ 21 | # 从mongodb中获取数据 22 | data = query_data_from_mongo() 23 | # 根据数据进行解压 24 | for problem in data: 25 | problem_no = problem['problem_no'] 26 | file_name = start_dir + "\\" + problem_no + ".zip" 27 | if (os.path.exists(file_name)): 28 | zip_file = zipfile.ZipFile(file_name) 29 | zip_file.extractall(zip_dir) 30 | else: 31 | print(file_name + "Not Exist") 32 | 33 | def zip_compress(): 34 | """ 35 | :function: 对指定目录下的文件进行压缩 36 | :return: 37 | """ 38 | 39 | data = query_data_from_mongo() 40 | for problem in data: 41 | problem_no = problem['problem_no'] 42 | new_dir = zip_dir + "\\" + problem_no 43 | new_file = new_dir + ".zip" 44 | zip_file = zipfile.ZipFile(new_file, 'w', zipfile.ZIP_DEFLATED) 45 | for dirpath, dirnames, filenames in os.walk(new_dir, topdown=False): 46 | fpath = dirpath.replace(new_dir, '') 47 | fpath = fpath and fpath + os.sep or '' 48 | for filename in filenames: 49 | zip_file.write(os.path.join(dirpath, filename), fpath + filename) 50 | zip_file.close() 51 | 52 | 53 | if __name__ == '__main__': 54 | 55 | unzip() 56 | 57 | # 开启多线程压缩文件 58 | zip_compress() 59 | 60 | 61 | -------------------------------------------------------------------------------- /AddProblem/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | :function: MongoDB配置以及其他配置 3 | :author:hefengen 4 | :date:2018/04/15 5 | :email:hefengen@hotmail.com 6 | """ 7 | 8 | # request headers 9 | Headers = { 10 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 11 | 'Accept-Encoding': 'gzip, deflate', 12 | 'Accept-Language': 'zh-CN,zh;q=0.9', 13 | 'Cache-Control': 'max-age=0', 14 | 'Connection': 'keep-alive', 15 | 'Host': 'www.yiwailian.cn', 16 | 'If-Modified-Since': 'Sat, 24 Feb 2018 13:17:46 GMT', 17 | 'If-None-Match': 'W/"5a9165fa-2f2"', 18 | 'Upgrade-Insecure-Requests': '1', 19 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36', 20 | } 21 | 22 | # MongoDB配置 23 | MONGO_URI = 'localhost' 24 | MONGO_DATABASE = 'bzoj' 25 | MONGO_TABLE = 'problem' 26 | 27 | # Service_Agrs配置 28 | SERVICE_ARGS = ['--load-images=true', '--disk-cache=true'] -------------------------------------------------------------------------------- /BZOJ/bzoj.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/BZOJ/bzoj.zip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 HiCodd 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BZOJ数据:链接: https://pan.baidu.com/s/1ug7XIaOKxHPyZHV4iNxS8Q?pwd=uyj9 提取码: uyj9 复制这段内容后打开百度网盘手机App,操作更方便哦 2 | 3 | ## 缘由 4 | 5 | QDUOJ的开发以及众多OJ的题库,使得我们自动化添加题目更加轻松。前提是您需要获得各OJ的测试数据。**请注意本方法只用在您的机器上运行即可,不用再OJ服务器运行!** 6 | 7 | ## 概况 8 | 9 | 目前主要模块分为 10 | 11 | * 爬虫部分-WebSpider 12 | * 自动化加题部分-AddProblem 13 | 14 | 用了爬虫scrapy,并且爬虫获取到的数据相对而言文档更容易解析与添加。如果您能够通过pandoc转文件后,将文本提取出来也可以。 15 | 16 | ## 需要安装的软件 17 | 18 | * Python3.6 19 | * MongoDB(V3.4) 20 | * MongoDB 客户端工具-推荐使用RoBo 3T 21 | 22 | Mongo安装教程:[mongo安装](https://blog.csdn.net/heshushun/article/details/77776706)(教程源于-李子园的梦想) 23 | 24 | 25 | ## 数据(某OJ数据&已经解压完成的) 26 | 1000-1999-Testcase数据链接:https://pan.baidu.com/s/1SvCgulQt8rn8m7w0cbMExQ 密码:jg7m 27 | 28 | 2000-2499-Testcase数据链接:https://pan.baidu.com/s/1tgr1M-VsRrzaEjXPmA3NCA 密码:4mpf 29 | 30 | 2500-2999-Testcase数据链接:https://pan.baidu.com/s/1X3HVJTGCFhHo-p6R_G0sSw 密码:scgq 31 | 32 | 3000-3499-Testcase数据链接:https://pan.baidu.com/s/1KjosY3Sr7XbSqbZo4Cdfkg 密码:rdr4 33 | 34 | 3500-3999-Testcase数据链接:https://pan.baidu.com/s/16eJWeheUgKJeuQDGVSKHuw 密码:goii 35 | 36 | 4000-4499-Testcase数据连接:https://pan.baidu.com/s/1Yt-MZHvDPGtQooUgt9yJ1Q 密码:1pu0 37 | 38 | 4500-4999-Testcase数据连接:https://pan.baidu.com/s/1Dz9bDHzkpsx9jOxHSp2IeQ 密码:tv9g 39 | 40 | ## 题目数据 41 | 42 | https://finen-1251602255.cos.ap-shanghai.myqcloud.com/file/bzoj_problem.zip 43 | 44 | 您可以直接通过MongoDB将将该数据导入到您的Mongo中。 45 | 导入命令如下: 46 | ``` 47 | linux下可以使用:mongorestore -d 48 | windows下可以使用:mongorestore.exe -d 49 | 50 | windows下: mongorestore.exe -d bzoj D:\Mongo\bin\dump\bzoj_problem\problem.bson 51 | linux下: mongorestore -d bzoj /usr/DB/bzoj_problem/problem.bson 52 | ``` 53 | 54 | > 如果您对爬虫有兴趣可以参看1.0版本进行对题目数据进行爬取。 55 | https://github.com/hirCodd/AutoAddProblem/blob/master/README_1.0.md 56 | 57 | ## 图片位置 58 | 在本项目中已经提供BZOJ,所以您可以直接在BZOJ解压包中看到JudgeOnline找到upload以及images两个图片目录,你只需要将这个两个目录复制到已经部署好的qduoj的public目录下即可。 59 | ![dir][2] 60 | 61 | ## 安装自动加题所需要的库 62 | 1. webdriver 63 | 2. selenium 64 | 3. pymongo 65 | 66 | 安装方法: 67 | ``` 68 | pip install selenium 69 | pip install pymongo 70 | ``` 71 | 72 | webdriver下载地址:[chromedriver](https://finen-1251602255.cos.ap-shanghai.myqcloud.com/file/chromedriver.exe) 73 | webdriver放置位置如下: 74 | 75 | ![webdriver][4] 76 | 77 | 78 | * 执行自动加题 79 | > 也需要修改您的url以及mongoDB配置,以及OJ的管理员的用户名、密码。 80 | 81 | 1. url在add_problem.py 82 | 2. MongoDB配置在settings.py 83 | 3. OJ用户名以及密码在config.py 84 | 4. 修改zip_dir = "E:\\Problem\\Testcase\\ok"为您重新压缩后的目录。 85 | 86 | * 执行:python add_problem.py 87 | 88 | 89 | ## 某些bug 90 | * 因为BZOJ数据问题,可能导致添加题目突然中止,你可能需要执行删除数据库文档的命令,然后重新执行:python add_problem.py即可再次添加题目。 91 | > 在delete.py中,你需要修改count的值以及for循环的值,删除已经添加得文档。示例如下: 92 | 93 | # 删除编号自1200开始,至1245的所有文档数据 94 | for i in range(0, 46): 95 | count = 1200 96 | count = count+i 97 | print(count) 98 | db.problem.delete_one({"problem_no": str(count)}) 99 | 100 | 101 | ![oj][1] 102 | ![oj1][3] 103 | 104 | 105 | [1]: https://s1.ax2x.com/2018/06/02/71uIJ.png 106 | [2]: https://finen-1251602255.cos.ap-shanghai.myqcloud.com/images/github/autoaddproblem/dir.png 107 | [3]: https://finen-1251602255.cos.ap-shanghai.myqcloud.com/images/github/autoaddproblem/p.png 108 | [4]: https://finen-1251602255.cos.ap-shanghai.myqcloud.com/images/github/autoaddproblem/webdriver.png 109 | -------------------------------------------------------------------------------- /README_1.0.md: -------------------------------------------------------------------------------- 1 | # 缘由 2 | 3 | QDUOJ的开发以及众多OJ的题库,使得我们自动化添加题目更加轻松。前提是您需要获得各OJ的测试数据。 4 | 5 | # 数据(某OJ数据&已经解压完成的) 6 | 1000-1999数据链接:https://pan.baidu.com/s/1SvCgulQt8rn8m7w0cbMExQ 密码:jg7m 7 | 8 | 9 | # 概况 10 | 11 | 目前主要模块分为 12 | 13 | * 爬虫部分-WebSpider 14 | * 自动化加题部分-AddProblem 15 | 16 | 因为懒得解析markdown文档,所有用了爬虫scrapy,并且爬虫获取到的数据相对而言文档更容易解析与添加。如果您能够通过pandoc转文件后,将文本提取出来也可以。 17 | 18 | # 需要安装的软件 19 | 20 | * Python 21 | 22 | * MongoDB(V3.4) 23 | 24 | * MongoDB 客户端工具-推荐使用RoBo 3T 25 | 26 | # 准备 27 | ## 解压获取已准备好的BZOJ 28 | 你需要将BZOJ目录下的压缩文件解压,解压至你的web服务器目录下。 29 | 30 | 31 | ## 安装scrapy 32 | 具体安装文档请见docs。 33 | 34 | 35 | ## 启动爬虫并检查数据库是否存在数据 36 | * 修改爬虫配置并执行爬虫: 37 | 38 | 1. url在bzoj.py 39 | 2. MongoDB数据配置在settings.py 40 | 41 | * 执行爬虫:scrapy crawl bzoj 42 | 43 | ## 解压与重新压缩数据 44 | 45 | * 你需要修改文件路径与压缩路径: 46 | 47 | 路径设置在pack_sample.py。 48 | start_dir = "E:\\Problem\\Testcase\\no" # 需要遍历的目录 49 | zip_dir = "E:\\Problem\\Testcase\\ok" # 解压后的目录 50 | 51 | 52 | 你需要执行以下命令: 53 | 54 | python pack_sample.py 55 | # 如果您懂python程序设计,可以写多线程解压缩。 56 | 57 | ## 图片位置 58 | 在本项目中已经提供BZOJ,所以您可以直接在BZOJ解压包中看到JudgeOnline找到upload以及images两个图片目录,你只需要将这个两个目录复制到已经部署好的qduoj的 public目录下。 59 | ![dir][2] 60 | 61 | ## 安装自动加题所需要的库 62 | 1. webdriver 63 | 2. selenium 64 | 3. pymongo 65 | 66 | * 执行自动加题 67 | > 也需要修改您的url以及mongoDB配置,以及OJ的管理员的用户名、密码。 68 | 69 | 1. url在add_problem.py 70 | 2. MongoDB配置在settings.py 71 | 3. OJ用户名以及密码在config.py 72 | 4. 修改zip_dir = "E:\\Problem\\Testcase\\ok"为您重新压缩后的目录。 73 | 74 | * 执行:python add_problem.py 75 | 76 | 77 | # 某些bug 78 | * 因为BZOJ数据问题,可能导致添加题目突然中止,你可能需要执行删除数据库文档的命令,然后重新执行:python add_problem.py即可再次添加题目。 79 | > 在delete.py中,你需要修改count的值以及for循环的值,删除已经添加得文档。示例如下: 80 | 81 | # 删除编号自1200开始,至1245的所有文档数据 82 | for i in range(0, 46): 83 | count = 1200 84 | count = count+i 85 | print(count) 86 | db.problem.delete_one({"problem_no": str(count)}) 87 | 88 | 89 | ![oj][1] 90 | 91 | [1]: https://s1.ax2x.com/2018/06/02/71uIJ.png 92 | [2]: https://finen-1251602255.cos.ap-shanghai.myqcloud.com/images/github/autoaddproblem/dir.png -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__init__.py -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/items.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/items.cpython-36.pyc -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/pipelines.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/pipelines.cpython-36.pyc -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/settings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/settings.cpython-36.pyc -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # https://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | from scrapy import Item, Field 9 | 10 | 11 | class ProblemItem(Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | problem_no = Field() 15 | problem_name = Field() 16 | description = Field() 17 | input = Field() 18 | output = Field() 19 | sample_input = Field() 20 | sample_output = Field() 21 | hint = Field() 22 | source = Field() 23 | memory_limit = Field() 24 | time_limit = Field() 25 | 26 | -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class OnlinejudgeproblemBzojSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(self, response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(self, response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(self, response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(self, start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | 58 | 59 | class OnlinejudgeproblemBzojDownloaderMiddleware(object): 60 | # Not all methods need to be defined. If a method is not defined, 61 | # scrapy acts as if the downloader middleware does not modify the 62 | # passed objects. 63 | 64 | @classmethod 65 | def from_crawler(cls, crawler): 66 | # This method is used by Scrapy to create your spiders. 67 | s = cls() 68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 69 | return s 70 | 71 | def process_request(self, request, spider): 72 | # Called for each request that goes through the downloader 73 | # middleware. 74 | 75 | # Must either: 76 | # - return None: continue processing this request 77 | # - or return a Response object 78 | # - or return a Request object 79 | # - or raise IgnoreRequest: process_exception() methods of 80 | # installed downloader middleware will be called 81 | return None 82 | 83 | def process_response(self, request, response, spider): 84 | # Called with the response returned from the downloader. 85 | 86 | # Must either; 87 | # - return a Response object 88 | # - return a Request object 89 | # - or raise IgnoreRequest 90 | return response 91 | 92 | def process_exception(self, request, exception, spider): 93 | # Called when a download handler or a process_request() 94 | # (from other downloader middleware) raises an exception. 95 | 96 | # Must either: 97 | # - return None: continue processing this exception 98 | # - return a Response object: stops process_exception() chain 99 | # - return a Request object: stops process_exception() chain 100 | pass 101 | 102 | def spider_opened(self, spider): 103 | spider.logger.info('Spider opened: %s' % spider.name) 104 | -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | import pymongo 8 | 9 | class OnlinejudgeproblemBzojPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | 13 | 14 | class MongoPipeline(object): 15 | 16 | def __init__(self, mongo_uri, mongo_db): 17 | self.mongo_uri = mongo_uri 18 | self.mongo_db = mongo_db 19 | 20 | @classmethod 21 | def from_crawler(cls, crawler): 22 | return cls( 23 | mongo_uri=crawler.settings.get('MONGO_URI'), 24 | mongo_db=crawler.settings.get('MONGO_DATABASE', 'items') 25 | ) 26 | 27 | def open_spider(self, spider): 28 | self.client = pymongo.MongoClient(self.mongo_uri) 29 | self.db = self.client[self.mongo_db] 30 | 31 | def close_spider(self, spider): 32 | self.client.close() 33 | 34 | def process_item(self, item, spider): 35 | """ 36 | :function: 更新至mongo数据库 37 | :param item: ProblemItem 38 | :param spider: 39 | :return: item 40 | """ 41 | self.db['problem'].update({'problem_no': item['problem_no']}, {'$set': item}, True) 42 | # book_info = dict(item) 43 | # self.db['problem'].insert(book_info) 44 | return item 45 | 46 | 47 | -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for OnlineJudgeProblem_BZOJ project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # https://doc.scrapy.org/en/latest/topics/settings.html 9 | # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html 10 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'OnlineJudgeProblem_BZOJ' 13 | 14 | SPIDER_MODULES = ['OnlineJudgeProblem_BZOJ.spiders'] 15 | NEWSPIDER_MODULE = 'OnlineJudgeProblem_BZOJ.spiders' 16 | 17 | 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 19 | #USER_AGENT = 'OnlineJudgeProblem_BZOJ (+http://www.yourdomain.com)' 20 | 21 | # Obey robots.txt rules 22 | ROBOTSTXT_OBEY = False 23 | 24 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 25 | CONCURRENT_REQUESTS = 100 26 | 27 | # Configure a delay for requests for the same website (default: 0) 28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay 29 | # See also autothrottle settings and docs 30 | #DOWNLOAD_DELAY = 3 31 | # The download delay setting will honor only one of: 32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 33 | #CONCURRENT_REQUESTS_PER_IP = 16 34 | 35 | # Disable cookies (enabled by default) 36 | COOKIES_ENABLED = False 37 | 38 | # Disable Telnet Console (enabled by default) 39 | #TELNETCONSOLE_ENABLED = False 40 | 41 | # Override the default request headers: 42 | #DEFAULT_REQUEST_HEADERS = { 43 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 44 | # 'Accept-Language': 'en', 45 | #} 46 | 47 | # Enable or disable spider middlewares 48 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html 49 | #SPIDER_MIDDLEWARES = { 50 | # 'OnlineJudgeProblem_BZOJ.middlewares.OnlinejudgeproblemBzojSpiderMiddleware': 543, 51 | #} 52 | 53 | # Enable or disable downloader middlewares 54 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html 55 | #DOWNLOADER_MIDDLEWARES = { 56 | # 'OnlineJudgeProblem_BZOJ.middlewares.OnlinejudgeproblemBzojDownloaderMiddleware': 543, 57 | #} 58 | 59 | # Enable or disable extensions 60 | # See https://doc.scrapy.org/en/latest/topics/extensions.html 61 | #EXTENSIONS = { 62 | # 'scrapy.extensions.telnet.TelnetConsole': None, 63 | #} 64 | 65 | # Configure item pipelines 66 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html 67 | ITEM_PIPELINES = { 68 | 'OnlineJudgeProblem_BZOJ.pipelines.MongoPipeline': 300, 69 | } 70 | 71 | # Enable and configure the AutoThrottle extension (disabled by default) 72 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html 73 | AUTOTHROTTLE_ENABLED = True 74 | # The initial download delay 75 | # AUTOTHROTTLE_START_DELAY = 5 76 | # The maximum download delay to be set in case of high latencies 77 | #AUTOTHROTTLE_MAX_DELAY = 60 78 | # The average number of requests Scrapy should be sending in parallel to 79 | # each remote server 80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 81 | # Enable showing throttling stats for every response received: 82 | #AUTOTHROTTLE_DEBUG = False 83 | 84 | # Enable and configure HTTP caching (disabled by default) 85 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 86 | #HTTPCACHE_ENABLED = True 87 | #HTTPCACHE_EXPIRATION_SECS = 0 88 | #HTTPCACHE_DIR = 'httpcache' 89 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 91 | 92 | MONGO_URI = '127.0.0.1' 93 | MONGO_DATABASE = 'bzoj' 94 | 95 | DB_HOST = 'localhost' 96 | DB_PORT = 3306 97 | DB_USER = 'root' 98 | DB_PASSWORD = 'admin@123456' 99 | DB_NAME = 'mysql' 100 | DB_CHARSET = 'utf8' 101 | 102 | 103 | -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__pycache__/bzoj.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__pycache__/bzoj.cpython-36.pyc -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/bzoj.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from scrapy import Request, Spider 4 | from urllib import parse 5 | import re 6 | from bs4 import BeautifulSoup 7 | from lxml import etree 8 | from OnlineJudgeProblem_BZOJ.items import ProblemItem 9 | 10 | 11 | class BzojSpider(scrapy.Spider): 12 | name = 'bzoj' 13 | allowed_domains = [] 14 | start_urls = 'http://172.16.72.4:83/' 15 | url = 'http://172.16.72.4:83/' 16 | 17 | def start_requests(self): 18 | yield Request(self.start_urls, callback=self.parse, dont_filter=False) 19 | 20 | def parse(self, response): 21 | """ 22 | :function: 解析所有题目的url 23 | :param response: response 24 | :return: 25 | """ 26 | 27 | # 解析所有题目url 28 | problem_urls = response.xpath('//table[@class="ui celled table"]/tbody//tr//td//a//@href').extract() 29 | 30 | for problem_url in problem_urls: 31 | 32 | detail_url = parse.urljoin(self.url, problem_url) 33 | 34 | yield Request(detail_url, self.parse_problem, dont_filter=False) 35 | 36 | # detail_url = 'http://172.16.72.4:83/JudgeOnline/1169.html' 37 | # yield Request(detail_url, self.parse_problem, dont_filter=False) 38 | 39 | def parse_problem(self, response): 40 | """ 41 | :function:解析 42 | :param response: 43 | :return: 44 | """ 45 | # 题号与题目数据 46 | problem = response.xpath('//div[@class="ui existing segment"]/center/h1//text()').extract_first() 47 | problem = str(problem) 48 | problem_no = re.sub("\D+", "", problem)[0:4] 49 | problem_names = re.findall(r": (.*)", problem, re.S) 50 | problem_name = problem_names[0] 51 | 52 | # 题目具体数据处理 53 | contents = response.xpath('.//div[@class="content"]') 54 | description = ''.join(contents[0].xpath(".").extract()).strip() 55 | input = ''.join(contents[1].xpath(".").extract()).strip() 56 | output = ''.join(contents[2].xpath(".").extract()).strip() 57 | sample_input = ''.join(contents[3].xpath(".//text()").extract()).strip() 58 | sample_output = ''.join(contents[4].xpath(".//text()").extract()).strip() 59 | hint = ''.join(contents[5].xpath(".").extract()).strip() 60 | source = ''.join(contents[6].xpath(".//text()").extract()).strip() 61 | 62 | if(hint == ""): 63 | hint = "没有提示" 64 | if(source == ""): 65 | source = "bzoj数据" 66 | 67 | memory_limit = 512 68 | time_limit = 1500 69 | 70 | problem_item = ProblemItem() 71 | for field in problem_item.fields: 72 | try: 73 | problem_item[field] = eval(field) 74 | except NameError: 75 | self.logger.debug('Field is Not Defined' + field) 76 | 77 | yield problem_item 78 | -------------------------------------------------------------------------------- /WebSpider/OnlineJudgeProblem_BZOJ/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = OnlineJudgeProblem_BZOJ.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = OnlineJudgeProblem_BZOJ 12 | -------------------------------------------------------------------------------- /docs/Scrapy安装详解.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/docs/Scrapy安装详解.pdf -------------------------------------------------------------------------------- /docs/scrapy安装文档.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/docs/scrapy安装文档.pdf --------------------------------------------------------------------------------