├── .gitignore
├── AddProblem
    ├── add_problem.py
    ├── add_problemV1.1.py
    ├── add_problem_V2.py
    ├── config.py
    ├── delete.py
    ├── function.py
    ├── pack_sample.py
    └── settings.py
├── BZOJ
    └── bzoj.zip
├── LICENSE
├── README.md
├── README_1.0.md
├── WebSpider
    └── OnlineJudgeProblem_BZOJ
    │   ├── OnlineJudgeProblem_BZOJ
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-36.pyc
    │       │   ├── items.cpython-36.pyc
    │       │   ├── pipelines.cpython-36.pyc
    │       │   └── settings.cpython-36.pyc
    │       ├── items.py
    │       ├── middlewares.py
    │       ├── pipelines.py
    │       ├── settings.py
    │       └── spiders
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │       ├── __init__.cpython-36.pyc
    │       │       └── bzoj.cpython-36.pyc
    │       │   └── bzoj.py
    │   └── scrapy.cfg
└── docs
    ├── Scrapy安装详解.pdf
    └── scrapy安装文档.pdf


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | WebSpider/OnlineJudgeProblem_BZOJ/.idea/misc.xml
3 | WebSpider/OnlineJudgeProblem_BZOJ/.idea/modules.xml
4 | WebSpider/OnlineJudgeProblem_BZOJ/.idea/OnlineJudgeProblem_BZOJ.iml
5 | WebSpider/OnlineJudgeProblem_BZOJ/.idea/workspace.xml
6 | 


--------------------------------------------------------------------------------
/AddProblem/add_problem.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | :function: 通过python与selenium将mongoDB中的题目数据自动添加网站中
  4 | :author:hefengen
  5 | :date:2018/04/14
  6 | :email:hefengen@hotmail.com
  7 | """
  8 | import time
  9 | 
 10 | import pymongo
 11 | 
 12 | import requests
 13 | from selenium.webdriver.common.keys import Keys
 14 | from config import *
 15 | from function import *
 16 | from selenium import webdriver
 17 | from selenium.webdriver.support.ui import WebDriverWait
 18 | from selenium.webdriver.support import expected_conditions as EC
 19 | from selenium.webdriver.common.by import By
 20 | import re
 21 | 
 22 | 
 23 | # 题目的url
 24 | problem_url = 'http://172.16.72.4/admin/problems'
 25 | 
 26 | # 创建题目的url
 27 | create_problem_url = 'http://172.16.72.4/admin/problem/create'
 28 | 
 29 | # login_url
 30 | url = 'http://172.16.72.4/admin'
 31 | 
 32 | # 使用selenium模拟在浏览器中导入数据
 33 | browser = webdriver.Chrome(service_args=SERVICE_ARGS)
 34 | wait = WebDriverWait(browser, 10)
 35 | 
 36 | # 设置窗口大小
 37 | browser.set_window_size(1400, 900)
 38 | 
 39 | zip_dir = "E:\\Problem\\Testcase\\ok"      # 解压后的目录
 40 | 
 41 | 
 42 | def handle_login():
 43 |     """
 44 |     :function: 模拟实现登陆
 45 |     :return:
 46 |     """
 47 |     try:
 48 |         browser.get(url=url)
 49 |         username = wait.until(
 50 |             EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(2) > div > div.el-input > input'))
 51 |         )
 52 |         password = wait.until(
 53 |             EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(3) > div > div.el-input > input'))
 54 |         )
 55 |         submit = wait.until(
 56 |             EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > form > div:nth-child(4) > div > button'))
 57 |         )
 58 |         # 自动填充密码
 59 |         username.send_keys(USERNAME)
 60 |         password.send_keys(PASSWORD)
 61 |         submit.click()
 62 | 
 63 |         create_problem = wait.until(
 64 |             EC.presence_of_element_located((By.CSS_SELECTOR,
 65 |                 '#app > div > div.content-app > div.view > div.panel > div > div.panel-options > button > span'))
 66 |         )
 67 |         create_problem.click()
 68 |         add_data_to_page()
 69 |     except TimeoutError:
 70 |         handle_login()
 71 | 
 72 | def add_data_to_page():
 73 |     """
 74 |     :param url:
 75 |     :return:
 76 |     """
 77 |     data = query_data_from_mongo()
 78 |     for problem in data:
 79 |         if(problem['sample_input'] != "" and problem['sample_output'] != ""):
 80 |             try:
 81 |                 browser.get(create_problem_url)
 82 |                 display_id = wait.until(
 83 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
 84 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-6 > div > div > div.el-input > input'))
 85 |                 )
 86 |                 title = wait.until(
 87 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
 88 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-18 > div > div > div.el-input > input'))
 89 |                 )
 90 |                 description = wait.until(
 91 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
 92 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(2) > div > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
 93 |                 )
 94 |                 input = wait.until(
 95 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
 96 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(1) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
 97 |                 )
 98 |                 output = wait.until(
 99 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
100 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(2) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
101 |                 )
102 |                 time_limit = wait.until(
103 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
104 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(1) > div > div > div > input'))
105 |                 )
106 |                 memory_limit = wait.until(
107 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
108 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(2) > div > div > div > input'))
109 |                 )
110 |                 difficulty = wait.until(
111 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
112 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(3) > div > div > div'))
113 |                 )
114 |                 tag = wait.until(
115 |                     EC.element_to_be_clickable((By.CSS_SELECTOR,
116 |                                                 '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > button > span'))
117 |                 )
118 |                 tag.click()
119 |                 tags = wait.until(
120 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
121 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > div > div.el-input.el-input--mini > input'))
122 |                 )
123 |                 sample_input = wait.until(
124 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
125 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(1) > div > div > div > textarea'))
126 |                 )
127 |                 sample_output = wait.until(
128 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
129 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(2) > div > div > div > textarea'))
130 |                 )
131 |                 file_input = wait.until(
132 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
133 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(10) > div.el-col.el-col-4 > div > div > div > div > input'))
134 |                 )
135 |                 hint = wait.until(
136 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
137 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(11) > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
138 |                 )
139 |                 source = wait.until(
140 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
141 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(12) > div > div > input'))
142 |                 )
143 |                 create = wait.until(
144 |                     EC.element_to_be_clickable((By.CSS_SELECTOR,
145 |                                                 '#app > div > div.content-app > div.problem > div > div > form > button'))
146 |                 )
147 | 
148 |                 # 发送相关的信息
149 |                 display_id.send_keys(problem['problem_no'])
150 |                 title.send_keys(problem['problem_name'])
151 | 
152 |                 pat = 'src="(.*?)"'
153 |                 regex = re.compile(pat, re.S)
154 |                 is_description = re.search(pat, problem['description'], re.S)
155 |                 if (is_description != None):
156 |                     description_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['description'])
157 |                 else:
158 |                     description_text = problem['description']
159 |                 description.send_keys(description_text)
160 | 
161 |                 # 输入框
162 |                 is_input = re.search(pat, problem['input'], re.S)
163 |                 if (is_input != None):
164 |                     # 通过正则表达式替换
165 |                     input_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['input'])
166 |                 else:
167 |                     input_text = problem['input']
168 |                 input.send_keys(input_text)
169 | 
170 |                 # 输出框
171 |                 is_output = re.search(pat, problem['output'], re.S)
172 |                 if (is_output != None):
173 |                     output_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['output'])
174 |                 else:
175 |                     output_text = problem['output']
176 |                 output.send_keys(output_text)
177 | 
178 |                 # 时间、内存、标签、样例
179 |                 for i in range(0, 5):
180 |                     time_limit.send_keys(Keys.BACKSPACE)
181 |                 time_limit.send_keys('4000')
182 | 
183 |                 memory_limit.clear()
184 |                 memory_limit.send_keys(problem['memory_limit'])
185 |                 tags.send_keys('bzoj-problem')
186 |                 tags.send_keys(Keys.ENTER)
187 |                 sample_input.send_keys(problem['sample_input'])
188 |                 sample_output.send_keys(problem['sample_output'])
189 | 
190 |                 # 点击上传文件
191 |                 file_dir = zip_dir + "\\" + problem['problem_no'] + ".zip"
192 |                 file_input.send_keys(file_dir)
193 | 
194 |                 is_hint = re.search(pat, problem['hint'], re.S)
195 |                 if (is_hint != None):
196 |                     hint_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['hint'])
197 |                 else:
198 |                     hint_text = problem['hint']
199 |                 hint.send_keys(hint_text)
200 |                 source.send_keys(problem['source'])
201 |                 time.sleep(5)
202 | 
203 |                 # 提交
204 |                 create.click()
205 |             except TimeoutError:
206 |                 add_data_to_page()
207 | 
208 | def main():
209 |     """
210 |     :return:
211 |     """
212 |     handle_login()
213 | 
214 | if __name__ == '__main__':
215 |     main()
216 | 
217 | 


--------------------------------------------------------------------------------
/AddProblem/add_problemV1.1.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | :function: 通过python与selenium将mongoDB中的题目数据自动添加网站中
  4 | :author:hefengen
  5 | :date:2018/04/14
  6 | :email:hefengen@hotmail.com
  7 | """
  8 | import time
  9 | 
 10 | import pymongo
 11 | 
 12 | import re
 13 | 
 14 | import requests
 15 | from selenium.webdriver.common.keys import Keys
 16 | from config import *
 17 | from function import *
 18 | from selenium import webdriver
 19 | from selenium.webdriver.support.ui import WebDriverWait
 20 | from selenium.webdriver.support import expected_conditions as EC
 21 | from selenium.webdriver.common.by import By
 22 | 
 23 | 
 24 | # 题目的url
 25 | problem_url = 'http://192.168.94.137/admin/problems'
 26 | 
 27 | # 创建题目的url
 28 | create_problem_url = 'http://192.168.94.137/admin/problem/create'
 29 | 
 30 | # login_url
 31 | url = 'http://192.168.94.137/admin'
 32 | 
 33 | # 使用selenium模拟在浏览器中导入数据
 34 | browser = webdriver.Chrome(service_args=SERVICE_ARGS)
 35 | wait = WebDriverWait(browser, 10)
 36 | 
 37 | # 设置窗口大小
 38 | browser.set_window_size(1400, 900)
 39 | 
 40 | zip_dir = "E:\\Problem\\Testcase\\ok\\"      # 解压后的目录
 41 | 
 42 | 
 43 | def handle_login():
 44 |     """
 45 |     :function: 模拟实现登陆
 46 |     :return:
 47 |     """
 48 |     try:
 49 |         browser.get(url=url)
 50 |         username = wait.until(
 51 |             EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(2) > div > div.el-input > input'))
 52 |         )
 53 |         password = wait.until(
 54 |             EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(3) > div > div > input'))
 55 |         )
 56 |         submit = wait.until(
 57 |             EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > form > div:nth-child(4) > div > button'))
 58 |         )
 59 |         # 自动填充密码
 60 |         username.send_keys(USERNAME)
 61 |         password.send_keys(PASSWORD)
 62 |         submit.click()
 63 | 
 64 |         #clickit = wait.until(
 65 |         #    EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > div > div:nth-child(1) > ul > li:nth-child(4) > div > i.el-submenu__icon-arrow.el-icon-arrow-down'))
 66 |         #)
 67 |         #clickit.click()
 68 | 
 69 |         #addproblemsbutton = wait.until(
 70 |         #    EC.presence_of_element_located((By.CSS_SELECTOR, '#app > div > div:nth-child(1) > ul > li.el-submenu.is-active.is-opened > ul > li.el-menu-item.is-active'))
 71 |         #)
 72 |         
 73 |         #clickita.click()        
 74 |         time.sleep(9)
 75 | 
 76 |         browser.get('http://192.168.94.137/admin/problems')  
 77 |         create_problem = wait.until(
 78 |             EC.presence_of_element_located((By.CSS_SELECTOR, '#app > div > div.content-app > div.view > div.panel > div > div.panel-options > button'))
 79 |          )      
 80 |         create_problem.click()
 81 |         add_data_to_page()     
 82 |     except TimeoutError:
 83 |         handle_login()
 84 | 
 85 | 
 86 | def add_data_to_page():
 87 |     """
 88 |     :param url:
 89 |     :return:
 90 |     """
 91 |     data = query_data_from_mongo()
 92 |     for problem in data:
 93 |         if(problem['sample_input'] != "" and problem['sample_output'] != ""):
 94 |             try:
 95 |                 browser.get(create_problem_url)
 96 |                 display_id = wait.until(
 97 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
 98 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-6 > div > div > div.el-input > input'))
 99 |                 )
100 |                 title = wait.until(
101 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
102 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-18 > div > div > div.el-input > input'))
103 |                 )
104 |                 description = wait.until(
105 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
106 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(2) > div > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
107 |                 )
108 |                 input = wait.until(
109 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
110 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(1) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
111 |                 )
112 |                 output = wait.until(
113 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
114 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(2) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
115 |                 )
116 |                 time_limit = wait.until(
117 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
118 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(1) > div > div > div > input'))
119 |                 )
120 |                 memory_limit = wait.until(
121 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
122 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(2) > div > div > div > input'))
123 |                 )
124 |                 difficulty = wait.until(
125 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
126 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(3) > div > div > div'))
127 |                 )
128 |                 tag = wait.until(
129 |                     EC.element_to_be_clickable((By.CSS_SELECTOR,
130 |                                                 '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > button > span'))
131 |                 )
132 |                 tag.click()
133 |                 tags = wait.until(
134 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
135 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > div > div.el-input.el-input--mini > input'))
136 |                 )
137 |                 sample_input = wait.until(
138 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
139 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(1) > div > div > div > textarea'))
140 |                 )
141 |                 sample_output = wait.until(
142 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
143 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(2) > div > div > div > textarea'))
144 |                 )
145 |                 #hint = wait.until(
146 |                 #    EC.presence_of_element_located((By.CSS_SELECTOR,
147 |                 #                                    '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(7) > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
148 |                 #)
149 |                 file_input = wait.until(
150 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
151 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(11) > div.el-col.el-col-4 > div > div > div > div > input'))
152 |                 )
153 |                 source = wait.until(
154 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
155 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(12) > div > div > input'))
156 |                 )
157 |                 create = wait.until(
158 |                     EC.element_to_be_clickable((By.CSS_SELECTOR,
159 |                                                 '#app > div > div.content-app > div.problem > div > div > form > button'))
160 |                 )
161 | 
162 |                 # 发送相关的信息
163 |                 display_id.send_keys(problem['problem_no'])
164 |                 title.send_keys(problem['problem_name'])
165 | 
166 |                 pat = 'src="(.*?)"'
167 |                 regex = re.compile(pat, re.S)
168 |                 is_description = re.search(pat, problem['description'], re.S)
169 |                 if (is_description != None):
170 |                     description_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['description'])
171 |                 else:
172 |                     description_text = problem['description']
173 |                 description.send_keys(description_text)
174 | 
175 |                 # 输入框
176 |                 is_input = re.search(pat, problem['input'], re.S)
177 |                 if (is_input != None):
178 |                     # 通过正则表达式替换
179 |                     input_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['input'])
180 |                 else:
181 |                     input_text = problem['input']
182 |                 input.send_keys(input_text)
183 | 
184 |                 # 输出框
185 |                 is_output = re.search(pat, problem['output'], re.S)
186 |                 if (is_output != None):
187 |                     output_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['output'])
188 |                 else:
189 |                     output_text = problem['output']
190 |                 output.send_keys(output_text)
191 | 
192 |                 # 时间、内存、标签、样例
193 |                 for i in range(0, 5):
194 |                     time_limit.send_keys(Keys.BACKSPACE)
195 |                 time_limit.send_keys('4000')
196 | 
197 |                 memory_limit.clear()
198 |                 memory_limit.send_keys(problem['memory_limit'])
199 |                 tags.send_keys('bzoj-problem')
200 |                 tags.send_keys(Keys.ENTER)
201 |                 sample_input.send_keys(problem['sample_input'])
202 |                 sample_output.send_keys(problem['sample_output'])
203 | 
204 |                 #is_hint = re.search(pat, problem['hint'], re.S)
205 |                 #if (is_hint != None):
206 |                 #    hint_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['hint'])
207 |                 #else:
208 |                 #    hint_text = problem['hint']
209 |                 #hint.send_keys(hint_text)
210 |                 #time.sleep(5)
211 | 
212 |                 # 点击上传文件
213 |                 file_dir = zip_dir + "\\" + problem['problem_no'] + ".zip"
214 |                 file_input.send_keys(file_dir)
215 | 
216 |                 source.send_keys(problem['source'])
217 |                 time.sleep(5)
218 | 
219 |                 # 提交
220 |                 create.click()
221 |             except TimeoutError:
222 |                 add_data_to_page()
223 | 
224 | def main():
225 |     """
226 |     :return:
227 |     """
228 |     handle_login()
229 | 
230 | if __name__ == '__main__':
231 |     main()
232 | 
233 | 


--------------------------------------------------------------------------------
/AddProblem/add_problem_V2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | :function: 通过python与selenium将mongoDB中的题目数据自动添加网站中
  4 | :author:hefengen
  5 | :date:2018/04/14
  6 | :email:hefengen@hotmail.com
  7 | """
  8 | import time
  9 | 
 10 | import pymongo
 11 | 
 12 | import re
 13 | 
 14 | import requests
 15 | from selenium.webdriver.common.keys import Keys
 16 | from config import *
 17 | from function import *
 18 | from selenium import webdriver
 19 | from selenium.webdriver.support.ui import WebDriverWait
 20 | from selenium.webdriver.support import expected_conditions as EC
 21 | from selenium.webdriver.common.by import By
 22 | 
 23 | 
 24 | # 题目的url
 25 | problem_url = 'http://ip/admin/problems'
 26 | 
 27 | # 创建题目的url
 28 | create_problem_url = 'http://ip/admin/problem/create'
 29 | 
 30 | # login_url
 31 | url = 'http://ip/admin'
 32 | 
 33 | # 使用selenium模拟在浏览器中导入数据
 34 | browser = webdriver.Chrome(service_args=SERVICE_ARGS)
 35 | wait = WebDriverWait(browser, 10)
 36 | 
 37 | # 设置窗口大小
 38 | browser.set_window_size(1400, 900)
 39 | 
 40 | zip_dir = "F:\a\"      # 解压后的目录
 41 | 
 42 | 
 43 | def handle_login():
 44 |     """
 45 |     :function: 模拟实现登陆
 46 |     :return:
 47 |     """
 48 |     try:
 49 |         browser.get(url=url)
 50 |         username = wait.until(
 51 |             EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(2) > div > div.el-input > input'))
 52 |         )
 53 |         password = wait.until(
 54 |             EC.presence_of_element_located((By.CSS_SELECTOR, '#app > form > div:nth-child(3) > div > div > input'))
 55 |         )
 56 |         submit = wait.until(
 57 |             EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > form > div:nth-child(4) > div > button'))
 58 |         )
 59 |         # 自动填充密码
 60 |         username.send_keys(USERNAME)
 61 |         password.send_keys(PASSWORD)
 62 |         submit.click()
 63 | 
 64 |         #clickit = wait.until(
 65 |         #    EC.element_to_be_clickable((By.CSS_SELECTOR, '#app > div > div:nth-child(1) > ul > li:nth-child(4) > div > i.el-submenu__icon-arrow.el-icon-arrow-down'))
 66 |         #)
 67 |         #clickit.click()
 68 | 
 69 |         #addproblemsbutton = wait.until(
 70 |         #    EC.presence_of_element_located((By.CSS_SELECTOR, '#app > div > div:nth-child(1) > ul > li.el-submenu.is-active.is-opened > ul > li.el-menu-item.is-active'))
 71 |         #)
 72 |         
 73 |         #clickita.click()        
 74 |         time.sleep(9) ###这里改了，让他可以等一会
 75 | 
 76 |         browser.get('http://39.106.50.179/admin/problems')  ###这里改了，指向问题列表
 77 |         create_problem = wait.until(
 78 |             EC.presence_of_element_located((By.CSS_SELECTOR, '#app > div > div.content-app > div.view > div.panel > div > div.panel-options > button'))   ###更新
 79 |          )      
 80 |         create_problem.click()
 81 |         add_data_to_page()     
 82 |     except TimeoutError:
 83 |         handle_login()
 84 | 
 85 | 
 86 | def add_data_to_page():
 87 |     """
 88 |     :param url:
 89 |     :return:
 90 |     """
 91 |     data = query_data_from_mongo()
 92 |     for problem in data:
 93 |         if(problem['sample_input'] != "" and problem['sample_output'] != ""):
 94 |             try:
 95 |                 browser.get(create_problem_url)
 96 |                 display_id = wait.until(
 97 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
 98 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-6 > div > div > div.el-input > input'))
 99 |                 )
100 |                 title = wait.until(
101 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
102 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(1) > div.el-col.el-col-18 > div > div > div.el-input > input'))
103 |                 )
104 |                 description = wait.until(
105 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
106 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(2) > div > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
107 |                 )
108 |                 input = wait.until(
109 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
110 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(1) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
111 |                 )
112 |                 output = wait.until(
113 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
114 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(3) > div:nth-child(2) > div > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
115 |                 )
116 |                 time_limit = wait.until(
117 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
118 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(1) > div > div > div > input'))
119 |                 )
120 |                 memory_limit = wait.until(
121 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
122 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(2) > div > div > div > input'))
123 |                 )
124 |                 difficulty = wait.until(
125 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
126 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(4) > div:nth-child(3) > div > div > div'))
127 |                 )
128 |                 tag = wait.until(
129 |                     EC.element_to_be_clickable((By.CSS_SELECTOR,
130 |                                                 '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > button > span'))
131 |                 )
132 |                 tag.click()
133 |                 tags = wait.until(
134 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
135 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(5) > div:nth-child(2) > div > div > div > div.el-input.el-input--mini > input'))
136 |                 )
137 |                 sample_input = wait.until(
138 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
139 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(1) > div > div > div > textarea'))
140 |                 )
141 |                 sample_output = wait.until(
142 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
143 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(6) > div > div > div > div > div > div:nth-child(2) > div > div > div > textarea'))
144 |                 )
145 |                 ###hint和file_input上下换了个位置，然后两个的元素路线也要改成下边
146 |                 hint = wait.until(
147 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
148 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(7) > div > div > div.simditor-wrapper > div.markdown-editor > textarea'))
149 |                 )
150 |                 file_input = wait.until(
151 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
152 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(11) > div.el-col.el-col-4 > div > div > div > div > button > i'))
153 |                 )
154 |                 source = wait.until(
155 |                     EC.presence_of_element_located((By.CSS_SELECTOR,
156 |                                                     '#app > div > div.content-app > div.problem > div > div > form > div:nth-child(12) > div > div > input'))
157 |                 )
158 |                 create = wait.until(
159 |                     EC.element_to_be_clickable((By.CSS_SELECTOR,
160 |                                                 '#app > div > div.content-app > div.problem > div > div > form > button'))
161 |                 )
162 | 
163 |                 # 发送相关的信息
164 |                 display_id.send_keys(problem['problem_no'])
165 |                 title.send_keys(problem['problem_name'])
166 | 
167 |                 pat = 'src="(.*?)"'
168 |                 regex = re.compile(pat, re.S)
169 |                 is_description = re.search(pat, problem['description'], re.S)
170 |                 if (is_description != None):
171 |                     description_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['description'])
172 |                 else:
173 |                     description_text = problem['description']
174 |                 description.send_keys(description_text)
175 | 
176 |                 # 输入框
177 |                 is_input = re.search(pat, problem['input'], re.S)
178 |                 if (is_input != None):
179 |                     # 通过正则表达式替换
180 |                     input_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['input'])
181 |                 else:
182 |                     input_text = problem['input']
183 |                 input.send_keys(input_text)
184 | 
185 |                 # 输出框
186 |                 is_output = re.search(pat, problem['output'], re.S)
187 |                 if (is_output != None):
188 |                     output_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['output'])
189 |                 else:
190 |                     output_text = problem['output']
191 |                 output.send_keys(output_text)
192 | 
193 |                 # 时间、内存、标签、样例
194 |                 for i in range(0, 5):
195 |                     time_limit.send_keys(Keys.BACKSPACE)
196 |                 time_limit.send_keys('4000')
197 | 
198 |                 memory_limit.clear()
199 |                 memory_limit.send_keys(problem['memory_limit'])
200 |                 tags.send_keys('bzoj-problem')
201 |                 tags.send_keys(Keys.ENTER)
202 |                 sample_input.send_keys(problem['sample_input'])
203 |                 sample_output.send_keys(problem['sample_output'])
204 | 
205 |                 is_hint = re.search(pat, problem['hint'], re.S)
206 |                 if (is_hint != None):
207 |                     hint_text = regex.sub(lambda m: 'src="/public/' + m.group(1) + '"', problem['hint'])
208 |                 else:
209 |                     hint_text = problem['hint']
210 |                 hint.send_keys(hint_text)
211 | 
212 |                 ### 点击上传文件，，，，这部分本来在hint的上边，，现在改到了下边
213 |                 file_dir = zip_dir + "\\" + problem['problem_no'] + ".zip"
214 |                 file_input.send_keys(file_dir)
215 | 
216 |                 source.send_keys(problem['source'])
217 |                 time.sleep(5)
218 | 
219 |                 # 提交
220 |                 create.click()
221 |             except TimeoutError:
222 |                 add_data_to_page()
223 | 
224 | def main():
225 |     """
226 |     :return:
227 |     """
228 |     handle_login()
229 | 
230 | if __name__ == '__main__':
231 |     main()
232 | 
233 | 


--------------------------------------------------------------------------------
/AddProblem/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | :function: 用户信息配置
 4 | :author:hefengen
 5 | :date:2018/04/15
 6 | :email:hefengen@hotmail.com
 7 | """
 8 | 
 9 | # 用户账号密码
10 | USERNAME = 'root'
11 | PASSWORD = 'xxxxxxxxx'
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/AddProblem/delete.py:
--------------------------------------------------------------------------------
 1 | from function import *
 2 | from pymongo import *
 3 | 
 4 | def delete():
 5 |     try:
 6 |         # 创建连接对象
 7 |         client = MongoClient(host="localhost", port=27017)
 8 | 
 9 |         db = client.bzoj
10 | 
11 |         # 删除编号自1200开始，至1245的所有文档数据
12 |         for i in range(0, 46):
13 |             count = 1200
14 |             count = count+i
15 |             print(count)
16 |             db.problem.delete_one({"problem_no": str(count)})
17 |             # 所有符合条件数据都删除
18 |             print("删除成功")
19 | 
20 |     except Exception as e:
21 |         print(e)
22 | 
23 | if __name__ == '__main__':
24 |     delete()
25 | 


--------------------------------------------------------------------------------
/AddProblem/function.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | :function: mongoDB数据库连接函数
 4 | :author:hefengen
 5 | :date:2018/04/15
 6 | :email:hefengen@hotmail.com
 7 | """
 8 | 
 9 | from settings import *
10 | import pymongo
11 | 
12 | def get_collection():
13 |     """
14 |     :function: 连接数据库
15 |     :return: collection
16 |     """
17 |     client = pymongo.MongoClient(MONGO_URI)
18 |     db = client[MONGO_DATABASE]
19 |     collection = db[MONGO_TABLE]
20 |     return collection
21 | 
22 | def query_data_from_mongo():
23 |     """
24 |     :function:从mongoDB中取数据
25 |     :return:
26 |     """
27 |     collection = get_collection()
28 |     query_filter = collection.find().sort("problem_no")
29 |     # query_filter = collection.find_one({"problem_no": "1019"})
30 |     return query_filter
31 | 


--------------------------------------------------------------------------------
/AddProblem/pack_sample.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | :function: 通过MongoDB进行配置选择zip文件解压与重新压缩
 4 | :author:hefengen
 5 | :date:2018/04/15
 6 | :email:hefengen@hotmail.com
 7 | """
 8 | 
 9 | from function import *
10 | import os, zipfile
11 | 
12 | start_dir = "E:\\Problem\\Testcase\\no"    # 需要遍历的目录
13 | zip_dir = "E:\\Problem\\Testcase\\ok"      # 解压后的目录
14 | 
15 | 
16 | def unzip():
17 |     """
18 |     :function: 解压文件
19 |     :return:
20 |     """
21 |     # 从mongodb中获取数据
22 |     data = query_data_from_mongo()
23 |     # 根据数据进行解压
24 |     for problem in data:
25 |         problem_no = problem['problem_no']
26 |         file_name = start_dir + "\\" + problem_no + ".zip"
27 |         if (os.path.exists(file_name)):
28 |             zip_file = zipfile.ZipFile(file_name)
29 |             zip_file.extractall(zip_dir)
30 |         else:
31 |             print(file_name + "Not Exist")
32 | 
33 | def zip_compress():
34 |     """
35 |     :function: 对指定目录下的文件进行压缩
36 |     :return:
37 |     """
38 | 
39 |     data = query_data_from_mongo()
40 |     for problem in data:
41 |         problem_no = problem['problem_no']
42 |         new_dir = zip_dir + "\\" + problem_no
43 |         new_file = new_dir + ".zip"
44 |         zip_file = zipfile.ZipFile(new_file, 'w', zipfile.ZIP_DEFLATED)
45 |         for dirpath, dirnames, filenames in os.walk(new_dir, topdown=False):
46 |             fpath = dirpath.replace(new_dir, '')
47 |             fpath = fpath and fpath + os.sep or ''
48 |             for filename in filenames:
49 |                 zip_file.write(os.path.join(dirpath, filename), fpath + filename)
50 |         zip_file.close()
51 | 
52 | 
53 | if __name__ == '__main__':
54 | 
55 |     unzip()
56 | 
57 |     # 开启多线程压缩文件
58 |     zip_compress()
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/AddProblem/settings.py:
--------------------------------------------------------------------------------
 1 | """
 2 | :function: MongoDB配置以及其他配置
 3 | :author:hefengen
 4 | :date:2018/04/15
 5 | :email:hefengen@hotmail.com
 6 | """
 7 | 
 8 | # request headers
 9 | Headers = {
10 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
11 |     'Accept-Encoding': 'gzip, deflate',
12 |     'Accept-Language': 'zh-CN,zh;q=0.9',
13 |     'Cache-Control': 'max-age=0',
14 |     'Connection': 'keep-alive',
15 |     'Host': 'www.yiwailian.cn',
16 |     'If-Modified-Since': 'Sat, 24 Feb 2018 13:17:46 GMT',
17 |     'If-None-Match': 'W/"5a9165fa-2f2"',
18 |     'Upgrade-Insecure-Requests': '1',
19 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
20 | }
21 | 
22 | # MongoDB配置
23 | MONGO_URI = 'localhost'
24 | MONGO_DATABASE = 'bzoj'
25 | MONGO_TABLE = 'problem'
26 | 
27 | # Service_Agrs配置
28 | SERVICE_ARGS = ['--load-images=true', '--disk-cache=true']


--------------------------------------------------------------------------------
/BZOJ/bzoj.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/BZOJ/bzoj.zip


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 HiCodd
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | BZOJ数据：链接: https://pan.baidu.com/s/1ug7XIaOKxHPyZHV4iNxS8Q?pwd=uyj9 提取码: uyj9 复制这段内容后打开百度网盘手机App，操作更方便哦
  2 | 
  3 | ## 缘由 
  4 | 
  5 | QDUOJ的开发以及众多OJ的题库，使得我们自动化添加题目更加轻松。前提是您需要获得各OJ的测试数据。**请注意本方法只用在您的机器上运行即可，不用再OJ服务器运行！**
  6 | 
  7 | ## 概况
  8 | 
  9 | 目前主要模块分为
 10 | 
 11 | * 爬虫部分-WebSpider
 12 | * 自动化加题部分-AddProblem
 13 | 
 14 | 用了爬虫scrapy,并且爬虫获取到的数据相对而言文档更容易解析与添加。如果您能够通过pandoc转文件后，将文本提取出来也可以。
 15 | 
 16 | ## 需要安装的软件
 17 | 
 18 | * Python3.6
 19 | * MongoDB(V3.4)
 20 | * MongoDB 客户端工具-推荐使用RoBo 3T
 21 | 
 22 | Mongo安装教程：[mongo安装](https://blog.csdn.net/heshushun/article/details/77776706)（教程源于-李子园的梦想）
 23 | 
 24 | 
 25 | ## 数据（某OJ数据&已经解压完成的）
 26 | 1000-1999-Testcase数据链接：https://pan.baidu.com/s/1SvCgulQt8rn8m7w0cbMExQ 密码：jg7m
 27 | 
 28 | 2000-2499-Testcase数据链接：https://pan.baidu.com/s/1tgr1M-VsRrzaEjXPmA3NCA 密码：4mpf
 29 | 
 30 | 2500-2999-Testcase数据链接：https://pan.baidu.com/s/1X3HVJTGCFhHo-p6R_G0sSw 密码：scgq
 31 | 
 32 | 3000-3499-Testcase数据链接：https://pan.baidu.com/s/1KjosY3Sr7XbSqbZo4Cdfkg 密码：rdr4
 33 | 
 34 | 3500-3999-Testcase数据链接：https://pan.baidu.com/s/16eJWeheUgKJeuQDGVSKHuw 密码：goii
 35 | 
 36 | 4000-4499-Testcase数据连接：https://pan.baidu.com/s/1Yt-MZHvDPGtQooUgt9yJ1Q 密码：1pu0
 37 | 
 38 | 4500-4999-Testcase数据连接：https://pan.baidu.com/s/1Dz9bDHzkpsx9jOxHSp2IeQ 密码：tv9g
 39 | 
 40 | ## 题目数据
 41 | 
 42 | https://finen-1251602255.cos.ap-shanghai.myqcloud.com/file/bzoj_problem.zip
 43 | 
 44 | 您可以直接通过MongoDB将将该数据导入到您的Mongo中。
 45 | 导入命令如下：
 46 | ```
 47 | linux下可以使用：mongorestore -d <db_name> <bson_folder>
 48 | windows下可以使用：mongorestore.exe -d <db_name> <bson_folder>
 49 | 
 50 | windows下： mongorestore.exe -d bzoj D:\Mongo\bin\dump\bzoj_problem\problem.bson
 51 | linux下： mongorestore -d bzoj /usr/DB/bzoj_problem/problem.bson
 52 | ```
 53 | 
 54 | > 如果您对爬虫有兴趣可以参看1.0版本进行对题目数据进行爬取。
 55 | https://github.com/hirCodd/AutoAddProblem/blob/master/README_1.0.md
 56 | 
 57 | ## 图片位置
 58 | 在本项目中已经提供BZOJ，所以您可以直接在BZOJ解压包中看到JudgeOnline找到upload以及images两个图片目录，你只需要将这个两个目录复制到已经部署好的qduoj的public目录下即可。
 59 | ![dir][2]
 60 | 
 61 | ## 安装自动加题所需要的库
 62 | 1. webdriver
 63 | 2. selenium
 64 | 3. pymongo
 65 | 
 66 | 安装方法：
 67 | ```
 68 | pip install selenium
 69 | pip install pymongo
 70 | ```
 71 | 
 72 | webdriver下载地址：[chromedriver](https://finen-1251602255.cos.ap-shanghai.myqcloud.com/file/chromedriver.exe)
 73 | webdriver放置位置如下：
 74 | 
 75 | ![webdriver][4]
 76 | 
 77 | 
 78 | * 执行自动加题
 79 | > 也需要修改您的url以及mongoDB配置，以及OJ的管理员的用户名、密码。
 80 | 
 81 | 1. url在add_problem.py
 82 | 2. MongoDB配置在settings.py
 83 | 3. OJ用户名以及密码在config.py
 84 | 4. 修改zip_dir = "E:\\Problem\\Testcase\\ok"为您重新压缩后的目录。
 85 | 
 86 | * 执行：python add_problem.py
 87 | 
 88 | 
 89 | ## 某些bug
 90 | * 因为BZOJ数据问题，可能导致添加题目突然中止，你可能需要执行删除数据库文档的命令，然后重新执行：python add_problem.py即可再次添加题目。
 91 | > 在delete.py中，你需要修改count的值以及for循环的值，删除已经添加得文档。示例如下：
 92 | 
 93 |     # 删除编号自1200开始，至1245的所有文档数据
 94 |     for i in range(0, 46):
 95 |         count = 1200
 96 |         count = count+i
 97 |         print(count)
 98 |         db.problem.delete_one({"problem_no": str(count)})
 99 | 
100 | 
101 | ![oj][1]
102 | ![oj1][3]
103 | 
104 | 
105 |   [1]: https://s1.ax2x.com/2018/06/02/71uIJ.png
106 |   [2]: https://finen-1251602255.cos.ap-shanghai.myqcloud.com/images/github/autoaddproblem/dir.png
107 |   [3]: https://finen-1251602255.cos.ap-shanghai.myqcloud.com/images/github/autoaddproblem/p.png
108 |   [4]: https://finen-1251602255.cos.ap-shanghai.myqcloud.com/images/github/autoaddproblem/webdriver.png
109 | 


--------------------------------------------------------------------------------
/README_1.0.md:
--------------------------------------------------------------------------------
 1 | ﻿# 缘由 
 2 | 
 3 | QDUOJ的开发以及众多OJ的题库，使得我们自动化添加题目更加轻松。前提是您需要获得各OJ的测试数据。
 4 | 
 5 | # 数据（某OJ数据&已经解压完成的）
 6 | 1000-1999数据链接：https://pan.baidu.com/s/1SvCgulQt8rn8m7w0cbMExQ 密码：jg7m
 7 | 
 8 | 
 9 | # 概况
10 | 
11 | 目前主要模块分为
12 | 
13 | * 爬虫部分-WebSpider
14 | * 自动化加题部分-AddProblem
15 | 
16 | 因为懒得解析markdown文档，所有用了爬虫scrapy,并且爬虫获取到的数据相对而言文档更容易解析与添加。如果您能够通过pandoc转文件后，将文本提取出来也可以。
17 | 
18 | # 需要安装的软件
19 | 
20 | * Python
21 | 
22 | * MongoDB(V3.4)
23 | 
24 | * MongoDB 客户端工具-推荐使用RoBo 3T
25 | 
26 | # 准备
27 | ## 解压获取已准备好的BZOJ
28 | 你需要将BZOJ目录下的压缩文件解压，解压至你的web服务器目录下。
29 | 
30 | 
31 | ## 安装scrapy
32 | 具体安装文档请见docs。
33 | 
34 | 
35 | ## 启动爬虫并检查数据库是否存在数据
36 | * 修改爬虫配置并执行爬虫:
37 | 
38 | 1. url在bzoj.py
39 | 2. MongoDB数据配置在settings.py
40 | 
41 | * 执行爬虫：scrapy crawl bzoj
42 | 
43 | ## 解压与重新压缩数据
44 | 
45 | * 你需要修改文件路径与压缩路径：
46 | 	
47 | 	路径设置在pack_sample.py。
48 | 	start_dir = "E:\\Problem\\Testcase\\no"    # 需要遍历的目录
49 | 	zip_dir = "E:\\Problem\\Testcase\\ok"      # 解压后的目录
50 | 
51 | 
52 | 你需要执行以下命令：
53 | 	
54 | 	python pack_sample.py 
55 | 	# 如果您懂python程序设计，可以写多线程解压缩。
56 | 
57 | ## 图片位置
58 | 在本项目中已经提供BZOJ，所以您可以直接在BZOJ解压包中看到JudgeOnline找到upload以及images两个图片目录，你只需要将这个两个目录复制到已经部署好的qduoj的 public目录下。
59 | ![dir][2]
60 | 
61 | ## 安装自动加题所需要的库
62 | 1. webdriver
63 | 2. selenium
64 | 3. pymongo
65 | 
66 | * 执行自动加题
67 | > 也需要修改您的url以及mongoDB配置，以及OJ的管理员的用户名、密码。
68 | 
69 | 1. url在add_problem.py
70 | 2. MongoDB配置在settings.py
71 | 3. OJ用户名以及密码在config.py
72 | 4. 修改zip_dir = "E:\\Problem\\Testcase\\ok"为您重新压缩后的目录。
73 | 
74 | * 执行：python add_problem.py
75 | 
76 | 
77 | # 某些bug
78 | * 因为BZOJ数据问题，可能导致添加题目突然中止，你可能需要执行删除数据库文档的命令，然后重新执行：python add_problem.py即可再次添加题目。
79 | > 在delete.py中，你需要修改count的值以及for循环的值，删除已经添加得文档。示例如下：
80 | 
81 |     # 删除编号自1200开始，至1245的所有文档数据
82 |     for i in range(0, 46):
83 |         count = 1200
84 |         count = count+i
85 |         print(count)
86 |         db.problem.delete_one({"problem_no": str(count)})
87 | 
88 | 
89 | ![oj][1]
90 | 
91 |   [1]: https://s1.ax2x.com/2018/06/02/71uIJ.png
92 |   [2]: https://finen-1251602255.cos.ap-shanghai.myqcloud.com/images/github/autoaddproblem/dir.png


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__init__.py


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/items.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/items.cpython-36.pyc


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/pipelines.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/pipelines.cpython-36.pyc


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/settings.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/__pycache__/settings.cpython-36.pyc


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # https://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | from scrapy import Item, Field
 9 | 
10 | 
11 | class ProblemItem(Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     problem_no = Field()
15 |     problem_name = Field()
16 |     description = Field()
17 |     input = Field()
18 |     output = Field()
19 |     sample_input = Field()
20 |     sample_output = Field()
21 |     hint = Field()
22 |     source = Field()
23 |     memory_limit = Field()
24 |     time_limit = Field()
25 | 
26 | 


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/middlewares.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Define here the models for your spider middleware
  4 | #
  5 | # See documentation in:
  6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
  7 | 
  8 | from scrapy import signals
  9 | 
 10 | 
 11 | class OnlinejudgeproblemBzojSpiderMiddleware(object):
 12 |     # Not all methods need to be defined. If a method is not defined,
 13 |     # scrapy acts as if the spider middleware does not modify the
 14 |     # passed objects.
 15 | 
 16 |     @classmethod
 17 |     def from_crawler(cls, crawler):
 18 |         # This method is used by Scrapy to create your spiders.
 19 |         s = cls()
 20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 21 |         return s
 22 | 
 23 |     def process_spider_input(self, response, spider):
 24 |         # Called for each response that goes through the spider
 25 |         # middleware and into the spider.
 26 | 
 27 |         # Should return None or raise an exception.
 28 |         return None
 29 | 
 30 |     def process_spider_output(self, response, result, spider):
 31 |         # Called with the results returned from the Spider, after
 32 |         # it has processed the response.
 33 | 
 34 |         # Must return an iterable of Request, dict or Item objects.
 35 |         for i in result:
 36 |             yield i
 37 | 
 38 |     def process_spider_exception(self, response, exception, spider):
 39 |         # Called when a spider or process_spider_input() method
 40 |         # (from other spider middleware) raises an exception.
 41 | 
 42 |         # Should return either None or an iterable of Response, dict
 43 |         # or Item objects.
 44 |         pass
 45 | 
 46 |     def process_start_requests(self, start_requests, spider):
 47 |         # Called with the start requests of the spider, and works
 48 |         # similarly to the process_spider_output() method, except
 49 |         # that it doesn’t have a response associated.
 50 | 
 51 |         # Must return only requests (not items).
 52 |         for r in start_requests:
 53 |             yield r
 54 | 
 55 |     def spider_opened(self, spider):
 56 |         spider.logger.info('Spider opened: %s' % spider.name)
 57 | 
 58 | 
 59 | class OnlinejudgeproblemBzojDownloaderMiddleware(object):
 60 |     # Not all methods need to be defined. If a method is not defined,
 61 |     # scrapy acts as if the downloader middleware does not modify the
 62 |     # passed objects.
 63 | 
 64 |     @classmethod
 65 |     def from_crawler(cls, crawler):
 66 |         # This method is used by Scrapy to create your spiders.
 67 |         s = cls()
 68 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 69 |         return s
 70 | 
 71 |     def process_request(self, request, spider):
 72 |         # Called for each request that goes through the downloader
 73 |         # middleware.
 74 | 
 75 |         # Must either:
 76 |         # - return None: continue processing this request
 77 |         # - or return a Response object
 78 |         # - or return a Request object
 79 |         # - or raise IgnoreRequest: process_exception() methods of
 80 |         #   installed downloader middleware will be called
 81 |         return None
 82 | 
 83 |     def process_response(self, request, response, spider):
 84 |         # Called with the response returned from the downloader.
 85 | 
 86 |         # Must either;
 87 |         # - return a Response object
 88 |         # - return a Request object
 89 |         # - or raise IgnoreRequest
 90 |         return response
 91 | 
 92 |     def process_exception(self, request, exception, spider):
 93 |         # Called when a download handler or a process_request()
 94 |         # (from other downloader middleware) raises an exception.
 95 | 
 96 |         # Must either:
 97 |         # - return None: continue processing this exception
 98 |         # - return a Response object: stops process_exception() chain
 99 |         # - return a Request object: stops process_exception() chain
100 |         pass
101 | 
102 |     def spider_opened(self, spider):
103 |         spider.logger.info('Spider opened: %s' % spider.name)
104 | 


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | import pymongo
 8 | 
 9 | class OnlinejudgeproblemBzojPipeline(object):
10 |     def process_item(self, item, spider):
11 |         return item
12 | 
13 | 
14 | class MongoPipeline(object):
15 | 
16 |     def __init__(self, mongo_uri, mongo_db):
17 |         self.mongo_uri = mongo_uri
18 |         self.mongo_db = mongo_db
19 | 
20 |     @classmethod
21 |     def from_crawler(cls, crawler):
22 |         return cls(
23 |             mongo_uri=crawler.settings.get('MONGO_URI'),
24 |             mongo_db=crawler.settings.get('MONGO_DATABASE', 'items')
25 |         )
26 | 
27 |     def open_spider(self, spider):
28 |         self.client = pymongo.MongoClient(self.mongo_uri)
29 |         self.db = self.client[self.mongo_db]
30 | 
31 |     def close_spider(self, spider):
32 |         self.client.close()
33 | 
34 |     def process_item(self, item, spider):
35 |         """
36 |         :function: 更新至mongo数据库
37 |         :param item: ProblemItem
38 |         :param spider:
39 |         :return: item
40 |         """
41 |         self.db['problem'].update({'problem_no': item['problem_no']}, {'$set': item}, True)
42 |         # book_info = dict(item)
43 |         # self.db['problem'].insert(book_info)
44 |         return item
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/settings.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Scrapy settings for OnlineJudgeProblem_BZOJ project
  4 | #
  5 | # For simplicity, this file contains only settings considered important or
  6 | # commonly used. You can find more settings consulting the documentation:
  7 | #
  8 | #     https://doc.scrapy.org/en/latest/topics/settings.html
  9 | #     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
 10 | #     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
 11 | 
 12 | BOT_NAME = 'OnlineJudgeProblem_BZOJ'
 13 | 
 14 | SPIDER_MODULES = ['OnlineJudgeProblem_BZOJ.spiders']
 15 | NEWSPIDER_MODULE = 'OnlineJudgeProblem_BZOJ.spiders'
 16 | 
 17 | 
 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
 19 | #USER_AGENT = 'OnlineJudgeProblem_BZOJ (+http://www.yourdomain.com)'
 20 | 
 21 | # Obey robots.txt rules
 22 | ROBOTSTXT_OBEY = False
 23 | 
 24 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
 25 | CONCURRENT_REQUESTS = 100
 26 | 
 27 | # Configure a delay for requests for the same website (default: 0)
 28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
 29 | # See also autothrottle settings and docs
 30 | #DOWNLOAD_DELAY = 3
 31 | # The download delay setting will honor only one of:
 32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
 33 | #CONCURRENT_REQUESTS_PER_IP = 16
 34 | 
 35 | # Disable cookies (enabled by default)
 36 | COOKIES_ENABLED = False
 37 | 
 38 | # Disable Telnet Console (enabled by default)
 39 | #TELNETCONSOLE_ENABLED = False
 40 | 
 41 | # Override the default request headers:
 42 | #DEFAULT_REQUEST_HEADERS = {
 43 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 44 | #   'Accept-Language': 'en',
 45 | #}
 46 | 
 47 | # Enable or disable spider middlewares
 48 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
 49 | #SPIDER_MIDDLEWARES = {
 50 | #    'OnlineJudgeProblem_BZOJ.middlewares.OnlinejudgeproblemBzojSpiderMiddleware': 543,
 51 | #}
 52 | 
 53 | # Enable or disable downloader middlewares
 54 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
 55 | #DOWNLOADER_MIDDLEWARES = {
 56 | #    'OnlineJudgeProblem_BZOJ.middlewares.OnlinejudgeproblemBzojDownloaderMiddleware': 543,
 57 | #}
 58 | 
 59 | # Enable or disable extensions
 60 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
 61 | #EXTENSIONS = {
 62 | #    'scrapy.extensions.telnet.TelnetConsole': None,
 63 | #}
 64 | 
 65 | # Configure item pipelines
 66 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
 67 | ITEM_PIPELINES = {
 68 |    'OnlineJudgeProblem_BZOJ.pipelines.MongoPipeline': 300,
 69 | }
 70 | 
 71 | # Enable and configure the AutoThrottle extension (disabled by default)
 72 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
 73 | AUTOTHROTTLE_ENABLED = True
 74 | # The initial download delay
 75 | # AUTOTHROTTLE_START_DELAY = 5
 76 | # The maximum download delay to be set in case of high latencies
 77 | #AUTOTHROTTLE_MAX_DELAY = 60
 78 | # The average number of requests Scrapy should be sending in parallel to
 79 | # each remote server
 80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
 81 | # Enable showing throttling stats for every response received:
 82 | #AUTOTHROTTLE_DEBUG = False
 83 | 
 84 | # Enable and configure HTTP caching (disabled by default)
 85 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
 86 | #HTTPCACHE_ENABLED = True
 87 | #HTTPCACHE_EXPIRATION_SECS = 0
 88 | #HTTPCACHE_DIR = 'httpcache'
 89 | #HTTPCACHE_IGNORE_HTTP_CODES = []
 90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
 91 | 
 92 | MONGO_URI = '127.0.0.1'
 93 | MONGO_DATABASE = 'bzoj'
 94 | 
 95 | DB_HOST = 'localhost'
 96 | DB_PORT = 3306
 97 | DB_USER = 'root'
 98 | DB_PASSWORD = 'admin@123456'
 99 | DB_NAME = 'mysql'
100 | DB_CHARSET = 'utf8'
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__pycache__/bzoj.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/__pycache__/bzoj.cpython-36.pyc


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/OnlineJudgeProblem_BZOJ/spiders/bzoj.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from scrapy import Request, Spider
 4 | from urllib import parse
 5 | import re
 6 | from bs4 import BeautifulSoup
 7 | from lxml import etree
 8 | from OnlineJudgeProblem_BZOJ.items import ProblemItem
 9 | 
10 | 
11 | class BzojSpider(scrapy.Spider):
12 |     name = 'bzoj'
13 |     allowed_domains = []
14 |     start_urls = 'http://172.16.72.4:83/'
15 |     url = 'http://172.16.72.4:83/'
16 | 
17 |     def start_requests(self):
18 |         yield Request(self.start_urls, callback=self.parse, dont_filter=False)
19 | 
20 |     def parse(self, response):
21 |         """
22 |         :function: 解析所有题目的url
23 |         :param response: response
24 |         :return:
25 |         """
26 | 
27 |         # 解析所有题目url
28 |         problem_urls = response.xpath('//table[@class="ui celled table"]/tbody//tr//td//a//@href').extract()
29 | 
30 |         for problem_url in problem_urls:
31 | 
32 |             detail_url = parse.urljoin(self.url, problem_url)
33 | 
34 |             yield Request(detail_url, self.parse_problem, dont_filter=False)
35 | 
36 |         # detail_url = 'http://172.16.72.4:83/JudgeOnline/1169.html'
37 |         # yield Request(detail_url, self.parse_problem, dont_filter=False)
38 | 
39 |     def parse_problem(self, response):
40 |         """
41 |         :function:解析
42 |         :param response:
43 |         :return:
44 |         """
45 |         # 题号与题目数据
46 |         problem = response.xpath('//div[@class="ui existing segment"]/center/h1//text()').extract_first()
47 |         problem = str(problem)
48 |         problem_no = re.sub("\D+", "", problem)[0:4]
49 |         problem_names = re.findall(r": (.*)", problem, re.S)
50 |         problem_name = problem_names[0]
51 | 
52 |         # 题目具体数据处理
53 |         contents = response.xpath('.//div[@class="content"]')
54 |         description = ''.join(contents[0].xpath(".").extract()).strip()
55 |         input = ''.join(contents[1].xpath(".").extract()).strip()
56 |         output = ''.join(contents[2].xpath(".").extract()).strip()
57 |         sample_input = ''.join(contents[3].xpath(".//text()").extract()).strip()
58 |         sample_output = ''.join(contents[4].xpath(".//text()").extract()).strip()
59 |         hint = ''.join(contents[5].xpath(".").extract()).strip()
60 |         source = ''.join(contents[6].xpath(".//text()").extract()).strip()
61 | 
62 |         if(hint == ""):
63 |             hint = "没有提示"
64 |         if(source == ""):
65 |             source = "bzoj数据"
66 | 
67 |         memory_limit = 512
68 |         time_limit = 1500
69 | 
70 |         problem_item = ProblemItem()
71 |         for field in problem_item.fields:
72 |             try:
73 |                 problem_item[field] = eval(field)
74 |             except NameError:
75 |                 self.logger.debug('Field is Not Defined' + field)
76 | 
77 |         yield problem_item
78 | 


--------------------------------------------------------------------------------
/WebSpider/OnlineJudgeProblem_BZOJ/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = OnlineJudgeProblem_BZOJ.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = OnlineJudgeProblem_BZOJ
12 | 


--------------------------------------------------------------------------------
/docs/Scrapy安装详解.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/docs/Scrapy安装详解.pdf


--------------------------------------------------------------------------------
/docs/scrapy安装文档.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moremind/AutoAddProblem/39a949dd8608162972350c08a3920113dfbe595b/docs/scrapy安装文档.pdf


--------------------------------------------------------------------------------