├── .gitignore
├── README.md
├── all
├── all_2022.csv
├── all_2022.txt
├── all_2023.csv
├── all_2023.txt
├── all_2024.csv
├── all_2024.txt
└── all_with_problem_2022.txt
├── analysis.ipynb
├── analysis_2024.ipynb
├── config.py
├── download.py
├── exception.txt
├── gitpush.bat
├── pdf2text.py
├── requirements.txt
├── txt_joint.py
└── 统计结果.png
/.gitignore:
--------------------------------------------------------------------------------
1 | paper/*.pdf
2 | paper_2022/*.pdf
3 | paper_2023/*.pdf
4 | .ipynb_checkpoints
5 | __pycache__
6 | gitpush.bat
7 | .idea
8 | tmp*.txt
9 | all/tmp*.txt
10 | /all/tmp*.txt
11 | your_university/*.txt
12 | /your_university/*.txt
13 | your_university
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # comap_crawler
2 |
3 | 2024更新:修改config.py中的23为24即可爬取2024美赛证书并进行分析
4 |
5 | 2023美赛爬虫,2024美赛获奖证书爬虫
6 |
7 | 美国大学生数学建模竞赛证书爬取及信息OCR识别分析
8 |
9 | ## 运行步骤
10 |
11 | 1. 安装tesseractOCR,参考版本:v5.0.1.20220118,其他版本不保证可用,
12 |
13 | v5.0.1.20220118下载地址
14 |
15 | https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w64-setup-v5.0.1.20220118.exe
16 |
17 | 其他版本下载:https://digi.bib.uni-mannheim.de/tesseract/
18 |
19 | 2. `pip install -r requirements.txt`
20 |
21 | 3. config.py中设置年份、进程数、你的学校、TesseractOCR安装路径等,进程数根据CPU和内存情况设置
22 |
23 | 4. download.py下载证书,由于有些证书只运行一次部分下载会失败,需要运行多次,确保全部下载
24 |
25 | 5. pdf2text.py识别学校、姓名、获奖等级、队伍ID等
26 |
27 | 6. txt_joint.py合并OCR识别的txt结果
28 |
29 | 7. analysis.ipynb分析数据
30 |
31 | ## 识别结果:
32 |
33 | 更多分析结果请自行下载代码,运行analysis.ipynb进行分析
34 |
35 | ### 2024美赛
36 |
37 | 2024美赛结果,证书数量28905张,最终识别28796条信息
38 | https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2024.txt
39 |
40 | cdn加速镜像:https://fastly.jsdelivr.net/gh/personqianduixue/comap_crawler_2023@master/all/all_2024.txt
41 |
42 |
43 |
44 | ### 2023美赛
45 |
46 | 2023美赛结果,证书数量20858张,最终识别20818条信息
47 |
48 | https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2023.txt
49 |
50 | cdn加速镜像:https://ghproxy.net/https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2023.txt
51 |
52 |
53 |
54 | ### 2022美赛
55 |
56 | 2022美赛结果,证书数量27205张,最终识别27161条信息
57 |
58 | https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2022.txt
59 |
60 | cdn加速镜像:https://ghproxy.net/https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2022.txt
61 |
62 |
63 |
--------------------------------------------------------------------------------
/analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import shutil\n",
10 | "import pandas as pd\n",
11 | "from config import *"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "your_university='Huazhong University of Science and Technology'"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 3,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "year=23"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 4,
35 | "metadata": {
36 | "scrolled": true
37 | },
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/html": [
42 | "
\n",
43 | "\n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " | \n",
60 | " control_number | \n",
61 | " student1 | \n",
62 | " student2 | \n",
63 | " student3 | \n",
64 | " advisor | \n",
65 | " university | \n",
66 | " prize | \n",
67 | "
\n",
68 | " \n",
69 | " \n",
70 | " \n",
71 | " 0 | \n",
72 | " 2300005 | \n",
73 | " Zheng Yaxin | \n",
74 | " Du Jingiu | \n",
75 | " Gao Qihang | \n",
76 | " UIC Math Modeling Team | \n",
77 | " BNU-HKBU United International College | \n",
78 | " Meritorious Winner | \n",
79 | "
\n",
80 | " \n",
81 | " 1 | \n",
82 | " 2300006 | \n",
83 | " Wu Peixin | \n",
84 | " Chen Ningyun | \n",
85 | " Wu You | \n",
86 | " UIC Math Modeling Team | \n",
87 | " BNU-HKBU United International College | \n",
88 | " Successful Participant | \n",
89 | "
\n",
90 | " \n",
91 | " 2 | \n",
92 | " 2300007 | \n",
93 | " Gou RuoXuan | \n",
94 | " Huang JianFei | \n",
95 | " Ye YuJun | \n",
96 | " UIC Math Modeling Team | \n",
97 | " BNU-HKBU United International College | \n",
98 | " Successful Participant | \n",
99 | "
\n",
100 | " \n",
101 | " 3 | \n",
102 | " 2300008 | \n",
103 | " Wan Yeqian | \n",
104 | " Gan Yining | \n",
105 | " Guo Wenhao | \n",
106 | " UIC Math Modeling Team | \n",
107 | " BNU-HKBU United International College | \n",
108 | " Successful Participant | \n",
109 | "
\n",
110 | " \n",
111 | " 4 | \n",
112 | " 2300009 | \n",
113 | " Lu Qianyu | \n",
114 | " Li Can | \n",
115 | " Zhao Xiaoxuan | \n",
116 | " UIC Math Modeling Team | \n",
117 | " BNU-HKBU United International College | \n",
118 | " Successful Participant | \n",
119 | "
\n",
120 | " \n",
121 | "
\n",
122 | "
"
123 | ],
124 | "text/plain": [
125 | " control_number student1 student2 student3 \\\n",
126 | "0 2300005 Zheng Yaxin Du Jingiu Gao Qihang \n",
127 | "1 2300006 Wu Peixin Chen Ningyun Wu You \n",
128 | "2 2300007 Gou RuoXuan Huang JianFei Ye YuJun \n",
129 | "3 2300008 Wan Yeqian Gan Yining Guo Wenhao \n",
130 | "4 2300009 Lu Qianyu Li Can Zhao Xiaoxuan \n",
131 | "\n",
132 | " advisor university \\\n",
133 | "0 UIC Math Modeling Team BNU-HKBU United International College \n",
134 | "1 UIC Math Modeling Team BNU-HKBU United International College \n",
135 | "2 UIC Math Modeling Team BNU-HKBU United International College \n",
136 | "3 UIC Math Modeling Team BNU-HKBU United International College \n",
137 | "4 UIC Math Modeling Team BNU-HKBU United International College \n",
138 | "\n",
139 | " prize \n",
140 | "0 Meritorious Winner \n",
141 | "1 Successful Participant \n",
142 | "2 Successful Participant \n",
143 | "3 Successful Participant \n",
144 | "4 Successful Participant "
145 | ]
146 | },
147 | "execution_count": 4,
148 | "metadata": {},
149 | "output_type": "execute_result"
150 | }
151 | ],
152 | "source": [
153 | "txtpath='./all/all_20'+str(year)+'.txt'\n",
154 | "csvpath='./all/all_20'+str(year)+'.csv'\n",
155 | "shutil.copyfile(txtpath, csvpath)\n",
156 | "data = pd.read_csv(csvpath)\n",
157 | "data=data.iloc[:,0:7]\n",
158 | "data.head()"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 5,
164 | "metadata": {
165 | "scrolled": true
166 | },
167 | "outputs": [
168 | {
169 | "data": {
170 | "text/plain": [
171 | "Harbin Engineering University 580\n",
172 | "XIDIAN UNIVERSITY 448\n",
173 | "Beihang University 407\n",
174 | "Jilin University 364\n",
175 | "Huazhong University of Science and Technology 338\n",
176 | "Northwestern Polytechnical University 323\n",
177 | "Nanjing University of Posts and Telecommunication 298\n",
178 | "Dalian University of Technology 293\n",
179 | "Shanghai Jiao Tong University 263\n",
180 | "South China University of Technology 228\n",
181 | "Hohai University 215\n",
182 | "National University of Defense Technology 207\n",
183 | "Northeastern University 206\n",
184 | "Harbin Institute of Technology 200\n",
185 | "Beijing Institute of Technology 197\n",
186 | "Southwest University 193\n",
187 | "Central University of Finance and Economics 192\n",
188 | "Renmin University of China 183\n",
189 | "Xi'an Jiaotong University 162\n",
190 | "Hangzhou Normal University 154\n",
191 | "Name: university, dtype: int64"
192 | ]
193 | },
194 | "execution_count": 5,
195 | "metadata": {},
196 | "output_type": "execute_result"
197 | }
198 | ],
199 | "source": [
200 | "univ_data=data['university'].value_counts().iloc[0:20]\n",
201 | "univ_data"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 6,
207 | "metadata": {
208 | "scrolled": true
209 | },
210 | "outputs": [
211 | {
212 | "data": {
213 | "image/png": "\n",
214 | "text/plain": [
215 | ""
216 | ]
217 | },
218 | "metadata": {
219 | "needs_background": "light"
220 | },
221 | "output_type": "display_data"
222 | }
223 | ],
224 | "source": [
225 | "\n",
226 | "ax=univ_data.sort_values().plot.barh()\n",
227 | "fig=ax.get_figure()\n",
228 | "fig.savefig('统计结果.png',bbox_inches='tight')"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 7,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/plain": [
239 | "Successful Participant 13180\n",
240 | "Honorable Mention 4531\n",
241 | "Meritorious Winner 1352\n",
242 | "Disqualified - P 990\n",
243 | "Finalist 545\n",
244 | "Unsuccessful - I 177\n",
245 | "Not Judged 24\n",
246 | "Outstanding Winner 14\n",
247 | "AMS Award 6\n",
248 | "INFORMS Award 5\n",
249 | "SIAM Award 5\n",
250 | "MAA Award 3\n",
251 | "COMAP Scholarship Award 2\n",
252 | "Leonhard Euler Award 1\n",
253 | "Ben Fusaro Award 1\n",
254 | "Vilfredo Pareto Award 1\n",
255 | "Rachel Carson Award 1\n",
256 | "Frank Giordano Award 1\n",
257 | "Unsuccessful - [ 1\n",
258 | "Name: prize, dtype: int64"
259 | ]
260 | },
261 | "execution_count": 7,
262 | "metadata": {},
263 | "output_type": "execute_result"
264 | }
265 | ],
266 | "source": [
267 | "data['prize'].value_counts()"
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": 8,
273 | "metadata": {},
274 | "outputs": [
275 | {
276 | "data": {
277 | "text/html": [
278 | "\n",
279 | "\n",
292 | "
\n",
293 | " \n",
294 | " \n",
295 | " | \n",
296 | " control_number | \n",
297 | " student1 | \n",
298 | " student2 | \n",
299 | " student3 | \n",
300 | " advisor | \n",
301 | " university | \n",
302 | " prize | \n",
303 | "
\n",
304 | " \n",
305 | " \n",
306 | " \n",
307 | " 96 | \n",
308 | " 2300136 | \n",
309 | " Hanzhang Zhou | \n",
310 | " Huangyii Zheng | \n",
311 | " Xiaoyang Cao | \n",
312 | " Heng Liang | \n",
313 | " Tsinghua University | \n",
314 | " INFORMS Award | \n",
315 | "
\n",
316 | " \n",
317 | " 169 | \n",
318 | " 2300229 | \n",
319 | " Cao Honglin | \n",
320 | " Zhou Zijian | \n",
321 | " Hu Yongqi | \n",
322 | " Li Mingqi | \n",
323 | " Jniversity of Electronic Science and Technolog... | \n",
324 | " Leonhard Euler Award | \n",
325 | "
\n",
326 | " \n",
327 | " 260 | \n",
328 | " 2300336 | \n",
329 | " Jiahao Luo | \n",
330 | " Tianyu Xiong | \n",
331 | " Yuting Zhang | \n",
332 | " Guoliang He | \n",
333 | " University of Electronic Science and Technolog... | \n",
334 | " AMS Award | \n",
335 | "
\n",
336 | " \n",
337 | " 1037 | \n",
338 | " 2309229 | \n",
339 | " Yuhao Sun | \n",
340 | " Ziruo Wang | \n",
341 | " Shuo Zhang | \n",
342 | " Hai Jin | \n",
343 | " Beijing Institute of Technology | \n",
344 | " Ben Fusaro Award | \n",
345 | "
\n",
346 | " \n",
347 | " 1532 | \n",
348 | " 2309766 | \n",
349 | " Carrie Cox | \n",
350 | " Jobi Lo | \n",
351 | " Elias Coppock | \n",
352 | " Wai Lau | \n",
353 | " Seattle Pacific University | \n",
354 | " MAA Award | \n",
355 | "
\n",
356 | " \n",
357 | " 2406 | \n",
358 | " 2310776 | \n",
359 | " Duffy Anderson | \n",
360 | " Matthew Helmer | \n",
361 | " Madeline Rue | \n",
362 | " Mei Zhu | \n",
363 | " Pacific Lutheran University | \n",
364 | " MAA Award | \n",
365 | "
\n",
366 | " \n",
367 | " 2832 | \n",
368 | " 2311258 | \n",
369 | " Haiyan Zhang | \n",
370 | " Tao Zhuang | \n",
371 | " Yunuo Lei | \n",
372 | " Qingsong Zou | \n",
373 | " XIDIAN UNIVERSITY | \n",
374 | " AMS Award | \n",
375 | "
\n",
376 | " \n",
377 | " 3077 | \n",
378 | " 2311517 | \n",
379 | " Taining Yan | \n",
380 | " Kaiqun Wu | \n",
381 | " Lijie Chen | \n",
382 | " Taining Yan | \n",
383 | " Renmin University of China | \n",
384 | " Vilfredo Pareto Award | \n",
385 | "
\n",
386 | " \n",
387 | " 3912 | \n",
388 | " 2312411 | \n",
389 | " Dingkai Wei | \n",
390 | " Yuecheng Wang | \n",
391 | " Weilong Zhu | \n",
392 | " Yuchao Li | \n",
393 | " Chang'an University | \n",
394 | " Rachel Carson Award | \n",
395 | "
\n",
396 | " \n",
397 | " 5561 | \n",
398 | " 2314151 | \n",
399 | " Jing Ren | \n",
400 | " Chenyue Xia | \n",
401 | " AnyYu Pan | \n",
402 | " Guibing Guo | \n",
403 | " Northeastern University of China | \n",
404 | " COMAP Scholarship Award | \n",
405 | "
\n",
406 | " \n",
407 | " 6398 | \n",
408 | " 2315018 | \n",
409 | " Hongyu Lang | \n",
410 | " Yutong Shao | \n",
411 | " Xiaqing Zhou | \n",
412 | " Jigao Yan | \n",
413 | " Soochow University | \n",
414 | " SIAM Award | \n",
415 | "
\n",
416 | " \n",
417 | " 6692 | \n",
418 | " 2315321 | \n",
419 | " Baoyang Zhang | \n",
420 | " Lei Tian | \n",
421 | " Zihan Wu | \n",
422 | " Jun Lu | \n",
423 | " National University of Defense Technology | \n",
424 | " SIAM Award | \n",
425 | "
\n",
426 | " \n",
427 | " 6749 | \n",
428 | " 2315379 | \n",
429 | " Zhu Xiangyu | \n",
430 | " Liang Katyin | \n",
431 | " Wei Xiaoqin | \n",
432 | " Wang Xiaoyin | \n",
433 | " Tiangong University | \n",
434 | " Frank Giordano Award | \n",
435 | "
\n",
436 | " \n",
437 | " 8305 | \n",
438 | " 2316994 | \n",
439 | " Zichen Cao | \n",
440 | " Yunyujie Du | \n",
441 | " Xinyi Jiang | \n",
442 | " Xiaofeng Gao | \n",
443 | " Shanghai Jiao Tong University | \n",
444 | " INFORMS Award | \n",
445 | "
\n",
446 | " \n",
447 | " 9488 | \n",
448 | " 2318300 | \n",
449 | " Enzo Moraes Mescall | \n",
450 | " Nicolas Salazar | \n",
451 | " Erik Mendes Novak | \n",
452 | " Maria-Veronica Ciocanel | \n",
453 | " Duke University | \n",
454 | " COMAP Scholarship Award | \n",
455 | "
\n",
456 | " \n",
457 | " 10156 | \n",
458 | " 2318982 | \n",
459 | " Kehan Tong | \n",
460 | " Yuqi Yang | \n",
461 | " Yan Du | \n",
462 | " Qiang Yao | \n",
463 | " East China Normal University | \n",
464 | " SIAM Award | \n",
465 | "
\n",
466 | " \n",
467 | " 10351 | \n",
468 | " 2301192 | \n",
469 | " Jianjie Zheng | \n",
470 | " Weikang Li | \n",
471 | " Yao Hou | \n",
472 | " Lei Liu | \n",
473 | " Zhejiang University of Finance and Economics | \n",
474 | " AMS Award | \n",
475 | "
\n",
476 | " \n",
477 | " 12128 | \n",
478 | " 2320131 | \n",
479 | " Song Yushuai | \n",
480 | " Zhao Chenxia | \n",
481 | " Hu Yunbo | \n",
482 | " Song Yushuai | \n",
483 | " Tianjin University | \n",
484 | " SIAM Award | \n",
485 | "
\n",
486 | " \n",
487 | " 14508 | \n",
488 | " 2322645 | \n",
489 | " Steven Sofos DiSilvio | \n",
490 | " Anthony Ozerov | \n",
491 | " Leon Zhou | \n",
492 | " George Dragomir | \n",
493 | " Columbia University | \n",
494 | " INFORMS Award | \n",
495 | "
\n",
496 | " \n",
497 | " 14546 | \n",
498 | " 2322687 | \n",
499 | " Caden Lin | \n",
500 | " Maksym Bondarenko | \n",
501 | " Phillip M Yan | \n",
502 | " George Dragomir | \n",
503 | " Columbia University | \n",
504 | " MAA Award | \n",
505 | "
\n",
506 | " \n",
507 | " 16300 | \n",
508 | " 2303950 | \n",
509 | " Yuewen Yang | \n",
510 | " Yifan Qi | \n",
511 | " Yuechuan Ma | \n",
512 | " Bo Wang | \n",
513 | " Beiing Institute of Technology | \n",
514 | " SIAM Award | \n",
515 | "
\n",
516 | " \n",
517 | " 16313 | \n",
518 | " 2303967 | \n",
519 | " Jingjia Peng | \n",
520 | " Xinyi Huang | \n",
521 | " Xuejun Zhang | \n",
522 | " Xiaofeng Gao | \n",
523 | " Shanghai Jiao Tong University | \n",
524 | " AMS Award | \n",
525 | "
\n",
526 | " \n",
527 | " 17181 | \n",
528 | " 2304962 | \n",
529 | " Zhu Xiaotian | \n",
530 | " Liu Jingwen | \n",
531 | " Liu Xinjie | \n",
532 | " Chen Hua | \n",
533 | " China University of Petroleum | \n",
534 | " INFORMS Award | \n",
535 | "
\n",
536 | " \n",
537 | " 17912 | \n",
538 | " 2305794 | \n",
539 | " Zhang Chuxiao | \n",
540 | " Wang Chenghan | \n",
541 | " Zhang Ying | \n",
542 | " Yuli ZHANG | \n",
543 | " Beijing Institute of Technology | \n",
544 | " INFORMS Award | \n",
545 | "
\n",
546 | " \n",
547 | " 19308 | \n",
548 | " 2307336 | \n",
549 | " Zhaohong Liao | \n",
550 | " Enyang Li | \n",
551 | " Yingyi Liu | \n",
552 | " Zhi Gao | \n",
553 | " Wuhan University | \n",
554 | " AMS Award | \n",
555 | "
\n",
556 | " \n",
557 | " 20749 | \n",
558 | " 2308899 | \n",
559 | " Ruomu Li | \n",
560 | " Chenyu Ma | \n",
561 | " Mengyuan Dai | \n",
562 | " Dongxue Yan | \n",
563 | " Nanjing University of Posts and Telecommunication | \n",
564 | " AMS Award | \n",
565 | "
\n",
566 | " \n",
567 | "
\n",
568 | "
"
569 | ],
570 | "text/plain": [
571 | " control_number student1 student2 \\\n",
572 | "96 2300136 Hanzhang Zhou Huangyii Zheng \n",
573 | "169 2300229 Cao Honglin Zhou Zijian \n",
574 | "260 2300336 Jiahao Luo Tianyu Xiong \n",
575 | "1037 2309229 Yuhao Sun Ziruo Wang \n",
576 | "1532 2309766 Carrie Cox Jobi Lo \n",
577 | "2406 2310776 Duffy Anderson Matthew Helmer \n",
578 | "2832 2311258 Haiyan Zhang Tao Zhuang \n",
579 | "3077 2311517 Taining Yan Kaiqun Wu \n",
580 | "3912 2312411 Dingkai Wei Yuecheng Wang \n",
581 | "5561 2314151 Jing Ren Chenyue Xia \n",
582 | "6398 2315018 Hongyu Lang Yutong Shao \n",
583 | "6692 2315321 Baoyang Zhang Lei Tian \n",
584 | "6749 2315379 Zhu Xiangyu Liang Katyin \n",
585 | "8305 2316994 Zichen Cao Yunyujie Du \n",
586 | "9488 2318300 Enzo Moraes Mescall Nicolas Salazar \n",
587 | "10156 2318982 Kehan Tong Yuqi Yang \n",
588 | "10351 2301192 Jianjie Zheng Weikang Li \n",
589 | "12128 2320131 Song Yushuai Zhao Chenxia \n",
590 | "14508 2322645 Steven Sofos DiSilvio Anthony Ozerov \n",
591 | "14546 2322687 Caden Lin Maksym Bondarenko \n",
592 | "16300 2303950 Yuewen Yang Yifan Qi \n",
593 | "16313 2303967 Jingjia Peng Xinyi Huang \n",
594 | "17181 2304962 Zhu Xiaotian Liu Jingwen \n",
595 | "17912 2305794 Zhang Chuxiao Wang Chenghan \n",
596 | "19308 2307336 Zhaohong Liao Enyang Li \n",
597 | "20749 2308899 Ruomu Li Chenyu Ma \n",
598 | "\n",
599 | " student3 advisor \\\n",
600 | "96 Xiaoyang Cao Heng Liang \n",
601 | "169 Hu Yongqi Li Mingqi \n",
602 | "260 Yuting Zhang Guoliang He \n",
603 | "1037 Shuo Zhang Hai Jin \n",
604 | "1532 Elias Coppock Wai Lau \n",
605 | "2406 Madeline Rue Mei Zhu \n",
606 | "2832 Yunuo Lei Qingsong Zou \n",
607 | "3077 Lijie Chen Taining Yan \n",
608 | "3912 Weilong Zhu Yuchao Li \n",
609 | "5561 AnyYu Pan Guibing Guo \n",
610 | "6398 Xiaqing Zhou Jigao Yan \n",
611 | "6692 Zihan Wu Jun Lu \n",
612 | "6749 Wei Xiaoqin Wang Xiaoyin \n",
613 | "8305 Xinyi Jiang Xiaofeng Gao \n",
614 | "9488 Erik Mendes Novak Maria-Veronica Ciocanel \n",
615 | "10156 Yan Du Qiang Yao \n",
616 | "10351 Yao Hou Lei Liu \n",
617 | "12128 Hu Yunbo Song Yushuai \n",
618 | "14508 Leon Zhou George Dragomir \n",
619 | "14546 Phillip M Yan George Dragomir \n",
620 | "16300 Yuechuan Ma Bo Wang \n",
621 | "16313 Xuejun Zhang Xiaofeng Gao \n",
622 | "17181 Liu Xinjie Chen Hua \n",
623 | "17912 Zhang Ying Yuli ZHANG \n",
624 | "19308 Yingyi Liu Zhi Gao \n",
625 | "20749 Mengyuan Dai Dongxue Yan \n",
626 | "\n",
627 | " university \\\n",
628 | "96 Tsinghua University \n",
629 | "169 Jniversity of Electronic Science and Technolog... \n",
630 | "260 University of Electronic Science and Technolog... \n",
631 | "1037 Beijing Institute of Technology \n",
632 | "1532 Seattle Pacific University \n",
633 | "2406 Pacific Lutheran University \n",
634 | "2832 XIDIAN UNIVERSITY \n",
635 | "3077 Renmin University of China \n",
636 | "3912 Chang'an University \n",
637 | "5561 Northeastern University of China \n",
638 | "6398 Soochow University \n",
639 | "6692 National University of Defense Technology \n",
640 | "6749 Tiangong University \n",
641 | "8305 Shanghai Jiao Tong University \n",
642 | "9488 Duke University \n",
643 | "10156 East China Normal University \n",
644 | "10351 Zhejiang University of Finance and Economics \n",
645 | "12128 Tianjin University \n",
646 | "14508 Columbia University \n",
647 | "14546 Columbia University \n",
648 | "16300 Beiing Institute of Technology \n",
649 | "16313 Shanghai Jiao Tong University \n",
650 | "17181 China University of Petroleum \n",
651 | "17912 Beijing Institute of Technology \n",
652 | "19308 Wuhan University \n",
653 | "20749 Nanjing University of Posts and Telecommunication \n",
654 | "\n",
655 | " prize \n",
656 | "96 INFORMS Award \n",
657 | "169 Leonhard Euler Award \n",
658 | "260 AMS Award \n",
659 | "1037 Ben Fusaro Award \n",
660 | "1532 MAA Award \n",
661 | "2406 MAA Award \n",
662 | "2832 AMS Award \n",
663 | "3077 Vilfredo Pareto Award \n",
664 | "3912 Rachel Carson Award \n",
665 | "5561 COMAP Scholarship Award \n",
666 | "6398 SIAM Award \n",
667 | "6692 SIAM Award \n",
668 | "6749 Frank Giordano Award \n",
669 | "8305 INFORMS Award \n",
670 | "9488 COMAP Scholarship Award \n",
671 | "10156 SIAM Award \n",
672 | "10351 AMS Award \n",
673 | "12128 SIAM Award \n",
674 | "14508 INFORMS Award \n",
675 | "14546 MAA Award \n",
676 | "16300 SIAM Award \n",
677 | "16313 AMS Award \n",
678 | "17181 INFORMS Award \n",
679 | "17912 INFORMS Award \n",
680 | "19308 AMS Award \n",
681 | "20749 AMS Award "
682 | ]
683 | },
684 | "execution_count": 8,
685 | "metadata": {},
686 | "output_type": "execute_result"
687 | }
688 | ],
689 | "source": [
690 | "avard_result=data.loc[(data['prize'].str.contains('Award'))]\n",
691 | "avard_result"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": 9,
697 | "metadata": {
698 | "scrolled": false
699 | },
700 | "outputs": [
701 | {
702 | "data": {
703 | "text/html": [
704 | "\n",
705 | "\n",
718 | "
\n",
719 | " \n",
720 | " \n",
721 | " | \n",
722 | " control_number | \n",
723 | " student1 | \n",
724 | " student2 | \n",
725 | " student3 | \n",
726 | " advisor | \n",
727 | " university | \n",
728 | " prize | \n",
729 | "
\n",
730 | " \n",
731 | " \n",
732 | " \n",
733 | " 157 | \n",
734 | " 2300214 | \n",
735 | " Hao Yang | \n",
736 | " Yuxin Hu | \n",
737 | " Di Liu | \n",
738 | " ZhiHong Yang | \n",
739 | " Huazhong University of Science and Technology | \n",
740 | " Disqualified - P | \n",
741 | "
\n",
742 | " \n",
743 | " 165 | \n",
744 | " 2300223 | \n",
745 | " Yingzhong Hua | \n",
746 | " Yanwei Tan | \n",
747 | " Jichen Bian | \n",
748 | " Yingzhong Hua | \n",
749 | " Huazhong University of Science and Technology | \n",
750 | " Successful Participant | \n",
751 | "
\n",
752 | " \n",
753 | " 588 | \n",
754 | " 2300708 | \n",
755 | " Xiang Li | \n",
756 | " Yu Hu | \n",
757 | " JunHang Ma | \n",
758 | " Xiang Li | \n",
759 | " Huazhong University of Science and Technology | \n",
760 | " Successful Participant | \n",
761 | "
\n",
762 | " \n",
763 | " 637 | \n",
764 | " 2300772 | \n",
765 | " Yitian Han | \n",
766 | " Anyang Li | \n",
767 | " Ruoxin Wu | \n",
768 | " Gang Xu | \n",
769 | " Huazhong University of Science and Technology | \n",
770 | " Successful Participant | \n",
771 | "
\n",
772 | " \n",
773 | " 722 | \n",
774 | " 2300883 | \n",
775 | " Lesi Hu | \n",
776 | " Hannan Chen | \n",
777 | " Yulang Hong | \n",
778 | " Suyang Ma | \n",
779 | " Huazhong University of Science and Technology | \n",
780 | " Successful Participant | \n",
781 | "
\n",
782 | " \n",
783 | " ... | \n",
784 | " ... | \n",
785 | " ... | \n",
786 | " ... | \n",
787 | " ... | \n",
788 | " ... | \n",
789 | " ... | \n",
790 | " ... | \n",
791 | "
\n",
792 | " \n",
793 | " 20816 | \n",
794 | " 2308976 | \n",
795 | " Xiaokai Zhang | \n",
796 | " Muchu Chen | \n",
797 | " Bowen Zhou | \n",
798 | " Chen Yu | \n",
799 | " Huazhong University of Science and Technology | \n",
800 | " Honorable Mention | \n",
801 | "
\n",
802 | " \n",
803 | " 20820 | \n",
804 | " 2308980 | \n",
805 | " Xuebin Zhou | \n",
806 | " Yutong Ding | \n",
807 | " Chenmin Ke | \n",
808 | " Haoyuan Xu | \n",
809 | " Huazhong University of Science and Technology | \n",
810 | " Honorable Mention | \n",
811 | "
\n",
812 | " \n",
813 | " 20822 | \n",
814 | " 2308982 | \n",
815 | " Jiongfan Zhu | \n",
816 | " Yilong Chen | \n",
817 | " Yuezhang Long | \n",
818 | " Jiongfan Zhu | \n",
819 | " Huazhong University of Science and Technology | \n",
820 | " Successful Participant | \n",
821 | "
\n",
822 | " \n",
823 | " 20826 | \n",
824 | " 2308986 | \n",
825 | " Lihui Zhang | \n",
826 | " Xin Hu | \n",
827 | " Chenyang Xiong | \n",
828 | " Li Li | \n",
829 | " Huazhong University of Science and Technology | \n",
830 | " Finalist | \n",
831 | "
\n",
832 | " \n",
833 | " 20827 | \n",
834 | " 2308987 | \n",
835 | " Ziheng Huang | \n",
836 | " Zihe Liu | \n",
837 | " Wensheng Yang | \n",
838 | " Ziheng Huang | \n",
839 | " Huazhong University of Science and Technology | \n",
840 | " Successful Participant | \n",
841 | "
\n",
842 | " \n",
843 | "
\n",
844 | "
338 rows × 7 columns
\n",
845 | "
"
846 | ],
847 | "text/plain": [
848 | " control_number student1 student2 student3 \\\n",
849 | "157 2300214 Hao Yang Yuxin Hu Di Liu \n",
850 | "165 2300223 Yingzhong Hua Yanwei Tan Jichen Bian \n",
851 | "588 2300708 Xiang Li Yu Hu JunHang Ma \n",
852 | "637 2300772 Yitian Han Anyang Li Ruoxin Wu \n",
853 | "722 2300883 Lesi Hu Hannan Chen Yulang Hong \n",
854 | "... ... ... ... ... \n",
855 | "20816 2308976 Xiaokai Zhang Muchu Chen Bowen Zhou \n",
856 | "20820 2308980 Xuebin Zhou Yutong Ding Chenmin Ke \n",
857 | "20822 2308982 Jiongfan Zhu Yilong Chen Yuezhang Long \n",
858 | "20826 2308986 Lihui Zhang Xin Hu Chenyang Xiong \n",
859 | "20827 2308987 Ziheng Huang Zihe Liu Wensheng Yang \n",
860 | "\n",
861 | " advisor university \\\n",
862 | "157 ZhiHong Yang Huazhong University of Science and Technology \n",
863 | "165 Yingzhong Hua Huazhong University of Science and Technology \n",
864 | "588 Xiang Li Huazhong University of Science and Technology \n",
865 | "637 Gang Xu Huazhong University of Science and Technology \n",
866 | "722 Suyang Ma Huazhong University of Science and Technology \n",
867 | "... ... ... \n",
868 | "20816 Chen Yu Huazhong University of Science and Technology \n",
869 | "20820 Haoyuan Xu Huazhong University of Science and Technology \n",
870 | "20822 Jiongfan Zhu Huazhong University of Science and Technology \n",
871 | "20826 Li Li Huazhong University of Science and Technology \n",
872 | "20827 Ziheng Huang Huazhong University of Science and Technology \n",
873 | "\n",
874 | " prize \n",
875 | "157 Disqualified - P \n",
876 | "165 Successful Participant \n",
877 | "588 Successful Participant \n",
878 | "637 Successful Participant \n",
879 | "722 Successful Participant \n",
880 | "... ... \n",
881 | "20816 Honorable Mention \n",
882 | "20820 Honorable Mention \n",
883 | "20822 Successful Participant \n",
884 | "20826 Finalist \n",
885 | "20827 Successful Participant \n",
886 | "\n",
887 | "[338 rows x 7 columns]"
888 | ]
889 | },
890 | "execution_count": 9,
891 | "metadata": {},
892 | "output_type": "execute_result"
893 | }
894 | ],
895 | "source": [
896 | "your_univ_result=data.loc[data['university']==your_university]\n",
897 | "your_univ_result"
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": 10,
903 | "metadata": {
904 | "scrolled": true
905 | },
906 | "outputs": [
907 | {
908 | "data": {
909 | "text/plain": [
910 | "Successful Participant 223\n",
911 | "Honorable Mention 72\n",
912 | "Meritorious Winner 20\n",
913 | "Disqualified - P 14\n",
914 | "Finalist 8\n",
915 | "Unsuccessful - I 1\n",
916 | "Name: prize, dtype: int64"
917 | ]
918 | },
919 | "execution_count": 10,
920 | "metadata": {},
921 | "output_type": "execute_result"
922 | }
923 | ],
924 | "source": [
925 | "your_univ_result['prize'].value_counts()"
926 | ]
927 | },
928 | {
929 | "cell_type": "code",
930 | "execution_count": 11,
931 | "metadata": {
932 | "scrolled": false
933 | },
934 | "outputs": [
935 | {
936 | "data": {
937 | "text/html": [
938 | "\n",
939 | "\n",
952 | "
\n",
953 | " \n",
954 | " \n",
955 | " | \n",
956 | " control_number | \n",
957 | " student1 | \n",
958 | " student2 | \n",
959 | " student3 | \n",
960 | " advisor | \n",
961 | " university | \n",
962 | " prize | \n",
963 | "
\n",
964 | " \n",
965 | " \n",
966 | " \n",
967 | "
\n",
968 | "
"
969 | ],
970 | "text/plain": [
971 | "Empty DataFrame\n",
972 | "Columns: [control_number, student1, student2, student3, advisor, university, prize]\n",
973 | "Index: []"
974 | ]
975 | },
976 | "execution_count": 11,
977 | "metadata": {},
978 | "output_type": "execute_result"
979 | }
980 | ],
981 | "source": [
982 | "prize_result=your_univ_result.loc[(your_univ_result['prize']=='Outstanding Winner') | (your_univ_result['prize'].str.contains('Award'))]\n",
983 | "prize_result"
984 | ]
985 | },
986 | {
987 | "cell_type": "code",
988 | "execution_count": null,
989 | "metadata": {},
990 | "outputs": [],
991 | "source": []
992 | },
993 | {
994 | "cell_type": "code",
995 | "execution_count": 12,
996 | "metadata": {},
997 | "outputs": [
998 | {
999 | "data": {
1000 | "text/html": [
1001 | "\n",
1002 | "\n",
1015 | "
\n",
1016 | " \n",
1017 | " \n",
1018 | " | \n",
1019 | " control_number | \n",
1020 | " student1 | \n",
1021 | " student2 | \n",
1022 | " student3 | \n",
1023 | " advisor | \n",
1024 | " university | \n",
1025 | " prize | \n",
1026 | "
\n",
1027 | " \n",
1028 | " \n",
1029 | " \n",
1030 | " 2859 | \n",
1031 | " 2311286 | \n",
1032 | " Shituo Ma | \n",
1033 | " Anqi Liu | \n",
1034 | " Zixiong Wang | \n",
1035 | " Shituo Ma | \n",
1036 | " Huazhong University of Science and Technology | \n",
1037 | " Finalist | \n",
1038 | "
\n",
1039 | " \n",
1040 | " 4385 | \n",
1041 | " 2312921 | \n",
1042 | " Fanjun Kong | \n",
1043 | " Chenglong Zeng | \n",
1044 | " Di Wu | \n",
1045 | " Kong Fanjun | \n",
1046 | " Huazhong University of Science and Technology | \n",
1047 | " Finalist | \n",
1048 | "
\n",
1049 | " \n",
1050 | " 4670 | \n",
1051 | " 2313212 | \n",
1052 | " Chen Yihua | \n",
1053 | " Lv Xinsheng | \n",
1054 | " Lin JunJie | \n",
1055 | " Yang Kai | \n",
1056 | " Huazhong University of Science and Technology | \n",
1057 | " Finalist | \n",
1058 | "
\n",
1059 | " \n",
1060 | " 4952 | \n",
1061 | " 2313511 | \n",
1062 | " Sitong Zheng | \n",
1063 | " Yuanmeng Shan | \n",
1064 | " Ziying Chen | \n",
1065 | " Zhao Pan | \n",
1066 | " Huazhong University of Science and Technology | \n",
1067 | " Finalist | \n",
1068 | "
\n",
1069 | " \n",
1070 | " 19632 | \n",
1071 | " 2307685 | \n",
1072 | " Xiangqian Yan | \n",
1073 | " Keyi Chen | \n",
1074 | " Ling Luo | \n",
1075 | " Haoyuan Xu | \n",
1076 | " Huazhong University of Science and Technology | \n",
1077 | " Finalist | \n",
1078 | "
\n",
1079 | " \n",
1080 | " 19692 | \n",
1081 | " 2307750 | \n",
1082 | " Zuoming Fu | \n",
1083 | " Hatyue Chen | \n",
1084 | " Haoran Zhu | \n",
1085 | " Zuoming Fu | \n",
1086 | " Huazhong University of Science and Technology | \n",
1087 | " Finalist | \n",
1088 | "
\n",
1089 | " \n",
1090 | " 20752 | \n",
1091 | " 2308903 | \n",
1092 | " Chenshen Mao | \n",
1093 | " Zhixiong Xia | \n",
1094 | " Shuning Luo | \n",
1095 | " Haoyuan Xu | \n",
1096 | " Huazhong University of Science and Technology | \n",
1097 | " Finalist | \n",
1098 | "
\n",
1099 | " \n",
1100 | " 20826 | \n",
1101 | " 2308986 | \n",
1102 | " Lihui Zhang | \n",
1103 | " Xin Hu | \n",
1104 | " Chenyang Xiong | \n",
1105 | " Li Li | \n",
1106 | " Huazhong University of Science and Technology | \n",
1107 | " Finalist | \n",
1108 | "
\n",
1109 | " \n",
1110 | "
\n",
1111 | "
"
1112 | ],
1113 | "text/plain": [
1114 | " control_number student1 student2 student3 \\\n",
1115 | "2859 2311286 Shituo Ma Anqi Liu Zixiong Wang \n",
1116 | "4385 2312921 Fanjun Kong Chenglong Zeng Di Wu \n",
1117 | "4670 2313212 Chen Yihua Lv Xinsheng Lin JunJie \n",
1118 | "4952 2313511 Sitong Zheng Yuanmeng Shan Ziying Chen \n",
1119 | "19632 2307685 Xiangqian Yan Keyi Chen Ling Luo \n",
1120 | "19692 2307750 Zuoming Fu Hatyue Chen Haoran Zhu \n",
1121 | "20752 2308903 Chenshen Mao Zhixiong Xia Shuning Luo \n",
1122 | "20826 2308986 Lihui Zhang Xin Hu Chenyang Xiong \n",
1123 | "\n",
1124 | " advisor university prize \n",
1125 | "2859 Shituo Ma Huazhong University of Science and Technology Finalist \n",
1126 | "4385 Kong Fanjun Huazhong University of Science and Technology Finalist \n",
1127 | "4670 Yang Kai Huazhong University of Science and Technology Finalist \n",
1128 | "4952 Zhao Pan Huazhong University of Science and Technology Finalist \n",
1129 | "19632 Haoyuan Xu Huazhong University of Science and Technology Finalist \n",
1130 | "19692 Zuoming Fu Huazhong University of Science and Technology Finalist \n",
1131 | "20752 Haoyuan Xu Huazhong University of Science and Technology Finalist \n",
1132 | "20826 Li Li Huazhong University of Science and Technology Finalist "
1133 | ]
1134 | },
1135 | "execution_count": 12,
1136 | "metadata": {},
1137 | "output_type": "execute_result"
1138 | }
1139 | ],
1140 | "source": [
1141 | "prize_result=your_univ_result.loc[your_univ_result['prize']=='Finalist']\n",
1142 | "prize_result"
1143 | ]
1144 | },
1145 | {
1146 | "cell_type": "code",
1147 | "execution_count": null,
1148 | "metadata": {},
1149 | "outputs": [],
1150 | "source": []
1151 | },
1152 | {
1153 | "cell_type": "code",
1154 | "execution_count": null,
1155 | "metadata": {},
1156 | "outputs": [],
1157 | "source": []
1158 | }
1159 | ],
1160 | "metadata": {
1161 | "kernelspec": {
1162 | "display_name": "Python 3",
1163 | "language": "python",
1164 | "name": "python3"
1165 | },
1166 | "language_info": {
1167 | "codemirror_mode": {
1168 | "name": "ipython",
1169 | "version": 3
1170 | },
1171 | "file_extension": ".py",
1172 | "mimetype": "text/x-python",
1173 | "name": "python",
1174 | "nbconvert_exporter": "python",
1175 | "pygments_lexer": "ipython3",
1176 | "version": "3.7.6"
1177 | }
1178 | },
1179 | "nbformat": 4,
1180 | "nbformat_minor": 4
1181 | }
1182 |
--------------------------------------------------------------------------------
/analysis_2024.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import shutil\n",
10 | "import pandas as pd\n",
11 | "from config import your_university"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {
18 | "scrolled": true
19 | },
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " | \n",
42 | " control_number | \n",
43 | " student1 | \n",
44 | " student2 | \n",
45 | " student3 | \n",
46 | " university | \n",
47 | " prize | \n",
48 | "
\n",
49 | " \n",
50 | " \n",
51 | " \n",
52 | " 0 | \n",
53 | " 2400002 | \n",
54 | " Huang Jiarui | \n",
55 | " Luo Zimeng | \n",
56 | " Zhang Yifa | \n",
57 | " Beijing Normal University | \n",
58 | " Honorable Mention | \n",
59 | "
\n",
60 | " \n",
61 | " 1 | \n",
62 | " 2400004 | \n",
63 | " Chongqin Chen | \n",
64 | " Junyi Liu | \n",
65 | " Ruishu Huang | \n",
66 | " Ningbo University | \n",
67 | " Successful Participant | \n",
68 | "
\n",
69 | " \n",
70 | " 2 | \n",
71 | " 2400006 | \n",
72 | " Xinkai Wu | \n",
73 | " Gaoyuan Feng | \n",
74 | " Nuoheng Zhou | \n",
75 | " Ningbo University | \n",
76 | " Honorable Mention | \n",
77 | "
\n",
78 | " \n",
79 | " 3 | \n",
80 | " 2400007 | \n",
81 | " Xiatian Zhang | \n",
82 | " Runyi Lin | \n",
83 | " Yuehan Yang | \n",
84 | " Ningbo University | \n",
85 | " Finalist | \n",
86 | "
\n",
87 | " \n",
88 | " 4 | \n",
89 | " 2400008 | \n",
90 | " Kang-liang Wang | \n",
91 | " Fei Ni | \n",
92 | " Zi-shuo Wang | \n",
93 | " Ningbo University | \n",
94 | " Successful Participant | \n",
95 | "
\n",
96 | " \n",
97 | "
\n",
98 | "
"
99 | ],
100 | "text/plain": [
101 | " control_number student1 student2 student3 \\\n",
102 | "0 2400002 Huang Jiarui Luo Zimeng Zhang Yifa \n",
103 | "1 2400004 Chongqin Chen Junyi Liu Ruishu Huang \n",
104 | "2 2400006 Xinkai Wu Gaoyuan Feng Nuoheng Zhou \n",
105 | "3 2400007 Xiatian Zhang Runyi Lin Yuehan Yang \n",
106 | "4 2400008 Kang-liang Wang Fei Ni Zi-shuo Wang \n",
107 | "\n",
108 | " university prize \n",
109 | "0 Beijing Normal University Honorable Mention \n",
110 | "1 Ningbo University Successful Participant \n",
111 | "2 Ningbo University Honorable Mention \n",
112 | "3 Ningbo University Finalist \n",
113 | "4 Ningbo University Successful Participant "
114 | ]
115 | },
116 | "execution_count": 2,
117 | "metadata": {},
118 | "output_type": "execute_result"
119 | }
120 | ],
121 | "source": [
122 | "shutil.copyfile('./all/all_2024.txt', './all/all_2024.csv')\n",
123 | "data = pd.read_csv('./all/all_2024.csv')\n",
124 | "data=data.iloc[:,0:6]\n",
125 | "data.head()"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 3,
131 | "metadata": {},
132 | "outputs": [
133 | {
134 | "data": {
135 | "text/plain": [
136 | "Jilin University 572\n",
137 | "Harbin Engineering University 560\n",
138 | "Huazhong University of Science and Technology 511\n",
139 | "Northwestern Polytechnical University 497\n",
140 | "Xi'an Jiaotong University 494\n",
141 | "Beihang University 451\n",
142 | "Nanjing University of Posts and Telecommunication 410\n",
143 | "Dalian University of Technology 387\n",
144 | "South China University of Technology 372\n",
145 | "XIDIAN UNIVERSITY 368\n",
146 | "Chongqing University 337\n",
147 | "Wuhan University 337\n",
148 | "Shanghai Jiao Tong University 330\n",
149 | "Harbin Institute of Technology 326\n",
150 | "Xidian University 285\n",
151 | "SUN YAT-SEN UNIVERSITY 273\n",
152 | "Northeastern University 270\n",
153 | "National University of Defense Technology 267\n",
154 | "Being Jiaotong University 266\n",
155 | "Beijing Institute of Technology 262\n",
156 | "Name: university, dtype: int64"
157 | ]
158 | },
159 | "execution_count": 3,
160 | "metadata": {},
161 | "output_type": "execute_result"
162 | }
163 | ],
164 | "source": [
165 | "univ_data=data['university'].value_counts().iloc[0:20]\n",
166 | "univ_data"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 4,
172 | "metadata": {
173 | "scrolled": true
174 | },
175 | "outputs": [
176 | {
177 | "data": {
178 | "image/png": "\n",
179 | "text/plain": [
180 | ""
181 | ]
182 | },
183 | "metadata": {
184 | "needs_background": "light"
185 | },
186 | "output_type": "display_data"
187 | }
188 | ],
189 | "source": [
190 | "\n",
191 | "ax=univ_data.sort_values().plot.barh()\n",
192 | "fig=ax.get_figure()\n",
193 | "fig.savefig('统计结果.png',bbox_inches='tight')"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 5,
199 | "metadata": {},
200 | "outputs": [
201 | {
202 | "data": {
203 | "text/plain": [
204 | "Successful Participant 18103\n",
205 | "Honorable Mention 6643\n",
206 | "Meritorious Winner 1918\n",
207 | "Disqualified - P 1432\n",
208 | "Finalist 506\n",
209 | "Unsuccessful - I 127\n",
210 | "Not Judged 24\n",
211 | "Outstanding Winner 13\n",
212 | "AMS Award 6\n",
213 | "INFORMS Award 6\n",
214 | "SIAM Award 5\n",
215 | "MAA Award 3\n",
216 | "COMAP Scholarship Award 2\n",
217 | "Leonhard Euler Award 1\n",
218 | "Vilfredo Pareto Award 1\n",
219 | "Rachel Carson Award 1\n",
220 | "ASA Award 1\n",
221 | "Cc 1\n",
222 | "Ben Fusaro Award 1\n",
223 | "Frank Giordano Award 1\n",
224 | "Name: prize, dtype: int64"
225 | ]
226 | },
227 | "execution_count": 5,
228 | "metadata": {},
229 | "output_type": "execute_result"
230 | }
231 | ],
232 | "source": [
233 | "data['prize'].value_counts()"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 6,
239 | "metadata": {},
240 | "outputs": [
241 | {
242 | "data": {
243 | "text/html": [
244 | "\n",
245 | "\n",
258 | "
\n",
259 | " \n",
260 | " \n",
261 | " | \n",
262 | " control_number | \n",
263 | " student1 | \n",
264 | " student2 | \n",
265 | " student3 | \n",
266 | " university | \n",
267 | " prize | \n",
268 | "
\n",
269 | " \n",
270 | " \n",
271 | " \n",
272 | " 2850 | \n",
273 | " 2407038 | \n",
274 | " Jiechuan Gong | \n",
275 | " Fantong Xia | \n",
276 | " Junhao Zeng | \n",
277 | " UESTC | \n",
278 | " INFORMS Award | \n",
279 | "
\n",
280 | " \n",
281 | " 2901 | \n",
282 | " 2407093 | \n",
283 | " Haoran Yi | \n",
284 | " Junrong Liu | \n",
285 | " Zhe Zhao | \n",
286 | " Beijing Institute of Technology | \n",
287 | " AMS Award | \n",
288 | "
\n",
289 | " \n",
290 | " 3208 | \n",
291 | " 2407414 | \n",
292 | " Jiajun Lei | \n",
293 | " Junjie Wang | \n",
294 | " Ruiyang Liu | \n",
295 | " Xi'an Jiaotong University | \n",
296 | " AMS Award | \n",
297 | "
\n",
298 | " \n",
299 | " 5532 | \n",
300 | " 2400860 | \n",
301 | " Xinyu Hu | \n",
302 | " Jiarui Liang | \n",
303 | " Dongsheng Su | \n",
304 | " Zhejiang Sci-Tech University | \n",
305 | " Rachel Carson Award | \n",
306 | "
\n",
307 | " \n",
308 | " 5656 | \n",
309 | " 2400996 | \n",
310 | " Zhen Huang | \n",
311 | " Honghui Cao | \n",
312 | " Zhenning Liu | \n",
313 | " Wuhan University | \n",
314 | " INFORMS Award | \n",
315 | "
\n",
316 | " \n",
317 | " 6086 | \n",
318 | " 2409949 | \n",
319 | " YANHENG LI | \n",
320 | " HONG JIANG | \n",
321 | " LING GAN | \n",
322 | " Renmin University of China | \n",
323 | " COMAP Scholarship Award | \n",
324 | "
\n",
325 | " \n",
326 | " 6599 | \n",
327 | " 2410482 | \n",
328 | " ZIXUAN TANG | \n",
329 | " YITING YANG | \n",
330 | " XINYU CHEN | \n",
331 | " Beijing Normal University | \n",
332 | " INFORMS Award | \n",
333 | "
\n",
334 | " \n",
335 | " 6947 | \n",
336 | " 2410846 | \n",
337 | " Heyu Huang | \n",
338 | " Jiahui Zhai | \n",
339 | " Weibin Li | \n",
340 | " Xiamen University | \n",
341 | " Ben Fusaro Award | \n",
342 | "
\n",
343 | " \n",
344 | " 6989 | \n",
345 | " 2410889 | \n",
346 | " Wenlong Chen | \n",
347 | " Yihui Li | \n",
348 | " Wei Zhu | \n",
349 | " Shenzhen University | \n",
350 | " SIAM Award | \n",
351 | "
\n",
352 | " \n",
353 | " 7640 | \n",
354 | " 2411570 | \n",
355 | " Yutong Wang | \n",
356 | " Haoqi Lv | \n",
357 | " Pengkun Wang | \n",
358 | " China University of Geosciences Being | \n",
359 | " Frank Giordano Award | \n",
360 | "
\n",
361 | " \n",
362 | " 9540 | \n",
363 | " 2413552 | \n",
364 | " Qianjing Zhu | \n",
365 | " Wenhao Yuan | \n",
366 | " Yunjie Shao | \n",
367 | " Soochow University | \n",
368 | " INFORMS Award | \n",
369 | "
\n",
370 | " \n",
371 | " 9553 | \n",
372 | " 2413565 | \n",
373 | " Xu Yan | \n",
374 | " Zhou Yangyujie | \n",
375 | " Huo Zhengyji | \n",
376 | " Xi'an University of Posts and Telecommunications | \n",
377 | " INFORMS Award | \n",
378 | "
\n",
379 | " \n",
380 | " 10727 | \n",
381 | " 2401298 | \n",
382 | " Junjie Fei | \n",
383 | " Yuxin Shen | \n",
384 | " Kaijie Du | \n",
385 | " Fudan University | \n",
386 | " AMS Award | \n",
387 | "
\n",
388 | " \n",
389 | " 13320 | \n",
390 | " 2417004 | \n",
391 | " Jiahao Zhu | \n",
392 | " Chengzhen Han | \n",
393 | " Dongyi Jiang | \n",
394 | " East China Normal University | \n",
395 | " INFORMS Award | \n",
396 | "
\n",
397 | " \n",
398 | " 14127 | \n",
399 | " 2417831 | \n",
400 | " Yue Zhong | \n",
401 | " Yueming Cao | \n",
402 | " Bo Wu | \n",
403 | " Tsinghua University | \n",
404 | " Leonhard Euler Award | \n",
405 | "
\n",
406 | " \n",
407 | " 16133 | \n",
408 | " 2401919 | \n",
409 | " Shang Yue Li | \n",
410 | " YuChun Yang | \n",
411 | " HuiXue Su | \n",
412 | " Renmin University of China | \n",
413 | " SIAM Award | \n",
414 | "
\n",
415 | " \n",
416 | " 16293 | \n",
417 | " 2419588 | \n",
418 | " Haoyang Peng | \n",
419 | " Wei Li | \n",
420 | " Siyuan Wang | \n",
421 | " National University of Defense Technology | \n",
422 | " AMS Award | \n",
423 | "
\n",
424 | " \n",
425 | " 16681 | \n",
426 | " 2419984 | \n",
427 | " Zeyang Wu | \n",
428 | " Xinhiang Li | \n",
429 | " Xiran Ni | \n",
430 | " Tongji University | \n",
431 | " AMS Award | \n",
432 | "
\n",
433 | " \n",
434 | " 18654 | \n",
435 | " 2422054 | \n",
436 | " Yi Wu | \n",
437 | " Jiankai Li | \n",
438 | " Chenyang Pan | \n",
439 | " Shanghai Jiao Tong University | \n",
440 | " Vilfredo Pareto Award | \n",
441 | "
\n",
442 | " \n",
443 | " 19231 | \n",
444 | " 2422656 | \n",
445 | " Aranjinsuren Enkhbat | \n",
446 | " Bisrat Kassie | \n",
447 | " Jordan Sims | \n",
448 | " Brandeis University | \n",
449 | " MAA Award | \n",
450 | "
\n",
451 | " \n",
452 | " 20886 | \n",
453 | " 2424371 | \n",
454 | " Xinyu Sui | \n",
455 | " Ji Zheng | \n",
456 | " Yuhang Ma | \n",
457 | " Xi'an Jiaotong University | \n",
458 | " SIAM Award | \n",
459 | "
\n",
460 | " \n",
461 | " 22721 | \n",
462 | " 2425792 | \n",
463 | " Cunxin Fan | \n",
464 | " Haonan Wang | \n",
465 | " Ke Zhang | \n",
466 | " Shanghai Jiao Tong University | \n",
467 | " SIAM Award | \n",
468 | "
\n",
469 | " \n",
470 | " 23819 | \n",
471 | " 2426941 | \n",
472 | " Wei Sun | \n",
473 | " Ting Mei | \n",
474 | " Fangyuan Liu | \n",
475 | " Hunan Normal University | \n",
476 | " AMS Award | \n",
477 | "
\n",
478 | " \n",
479 | " 25273 | \n",
480 | " 2428463 | \n",
481 | " Yilin Ma | \n",
482 | " Xuantong Wang | \n",
483 | " Jingyi Sun | \n",
484 | " North China University of Technology | \n",
485 | " ASA Award | \n",
486 | "
\n",
487 | " \n",
488 | " 25929 | \n",
489 | " 2429166 | \n",
490 | " Christopher Kan | \n",
491 | " Brandon Lu | \n",
492 | " Benny Sun | \n",
493 | " Duke University | \n",
494 | " MAA Award | \n",
495 | "
\n",
496 | " \n",
497 | " 25967 | \n",
498 | " 2429211 | \n",
499 | " Arjun Taneja | \n",
500 | " Paco Navarro | \n",
501 | " Max Collins | \n",
502 | " Harvey Mudd College | \n",
503 | " COMAP Scholarship Award | \n",
504 | "
\n",
505 | " \n",
506 | " 26675 | \n",
507 | " 2402960 | \n",
508 | " Hangyi Yao | \n",
509 | " Jiting Chen | \n",
510 | " Zeyu Liang | \n",
511 | " Ningbo University | \n",
512 | " SIAM Award | \n",
513 | "
\n",
514 | " \n",
515 | " 27150 | \n",
516 | " 2429973 | \n",
517 | " Henry Li | \n",
518 | " Yuxin Li | \n",
519 | " Crystal Su | \n",
520 | " Columbia University | \n",
521 | " MAA Award | \n",
522 | "
\n",
523 | " \n",
524 | "
\n",
525 | "
"
526 | ],
527 | "text/plain": [
528 | " control_number student1 student2 student3 \\\n",
529 | "2850 2407038 Jiechuan Gong Fantong Xia Junhao Zeng \n",
530 | "2901 2407093 Haoran Yi Junrong Liu Zhe Zhao \n",
531 | "3208 2407414 Jiajun Lei Junjie Wang Ruiyang Liu \n",
532 | "5532 2400860 Xinyu Hu Jiarui Liang Dongsheng Su \n",
533 | "5656 2400996 Zhen Huang Honghui Cao Zhenning Liu \n",
534 | "6086 2409949 YANHENG LI HONG JIANG LING GAN \n",
535 | "6599 2410482 ZIXUAN TANG YITING YANG XINYU CHEN \n",
536 | "6947 2410846 Heyu Huang Jiahui Zhai Weibin Li \n",
537 | "6989 2410889 Wenlong Chen Yihui Li Wei Zhu \n",
538 | "7640 2411570 Yutong Wang Haoqi Lv Pengkun Wang \n",
539 | "9540 2413552 Qianjing Zhu Wenhao Yuan Yunjie Shao \n",
540 | "9553 2413565 Xu Yan Zhou Yangyujie Huo Zhengyji \n",
541 | "10727 2401298 Junjie Fei Yuxin Shen Kaijie Du \n",
542 | "13320 2417004 Jiahao Zhu Chengzhen Han Dongyi Jiang \n",
543 | "14127 2417831 Yue Zhong Yueming Cao Bo Wu \n",
544 | "16133 2401919 Shang Yue Li YuChun Yang HuiXue Su \n",
545 | "16293 2419588 Haoyang Peng Wei Li Siyuan Wang \n",
546 | "16681 2419984 Zeyang Wu Xinhiang Li Xiran Ni \n",
547 | "18654 2422054 Yi Wu Jiankai Li Chenyang Pan \n",
548 | "19231 2422656 Aranjinsuren Enkhbat Bisrat Kassie Jordan Sims \n",
549 | "20886 2424371 Xinyu Sui Ji Zheng Yuhang Ma \n",
550 | "22721 2425792 Cunxin Fan Haonan Wang Ke Zhang \n",
551 | "23819 2426941 Wei Sun Ting Mei Fangyuan Liu \n",
552 | "25273 2428463 Yilin Ma Xuantong Wang Jingyi Sun \n",
553 | "25929 2429166 Christopher Kan Brandon Lu Benny Sun \n",
554 | "25967 2429211 Arjun Taneja Paco Navarro Max Collins \n",
555 | "26675 2402960 Hangyi Yao Jiting Chen Zeyu Liang \n",
556 | "27150 2429973 Henry Li Yuxin Li Crystal Su \n",
557 | "\n",
558 | " university \\\n",
559 | "2850 UESTC \n",
560 | "2901 Beijing Institute of Technology \n",
561 | "3208 Xi'an Jiaotong University \n",
562 | "5532 Zhejiang Sci-Tech University \n",
563 | "5656 Wuhan University \n",
564 | "6086 Renmin University of China \n",
565 | "6599 Beijing Normal University \n",
566 | "6947 Xiamen University \n",
567 | "6989 Shenzhen University \n",
568 | "7640 China University of Geosciences Being \n",
569 | "9540 Soochow University \n",
570 | "9553 Xi'an University of Posts and Telecommunications \n",
571 | "10727 Fudan University \n",
572 | "13320 East China Normal University \n",
573 | "14127 Tsinghua University \n",
574 | "16133 Renmin University of China \n",
575 | "16293 National University of Defense Technology \n",
576 | "16681 Tongji University \n",
577 | "18654 Shanghai Jiao Tong University \n",
578 | "19231 Brandeis University \n",
579 | "20886 Xi'an Jiaotong University \n",
580 | "22721 Shanghai Jiao Tong University \n",
581 | "23819 Hunan Normal University \n",
582 | "25273 North China University of Technology \n",
583 | "25929 Duke University \n",
584 | "25967 Harvey Mudd College \n",
585 | "26675 Ningbo University \n",
586 | "27150 Columbia University \n",
587 | "\n",
588 | " prize \n",
589 | "2850 INFORMS Award \n",
590 | "2901 AMS Award \n",
591 | "3208 AMS Award \n",
592 | "5532 Rachel Carson Award \n",
593 | "5656 INFORMS Award \n",
594 | "6086 COMAP Scholarship Award \n",
595 | "6599 INFORMS Award \n",
596 | "6947 Ben Fusaro Award \n",
597 | "6989 SIAM Award \n",
598 | "7640 Frank Giordano Award \n",
599 | "9540 INFORMS Award \n",
600 | "9553 INFORMS Award \n",
601 | "10727 AMS Award \n",
602 | "13320 INFORMS Award \n",
603 | "14127 Leonhard Euler Award \n",
604 | "16133 SIAM Award \n",
605 | "16293 AMS Award \n",
606 | "16681 AMS Award \n",
607 | "18654 Vilfredo Pareto Award \n",
608 | "19231 MAA Award \n",
609 | "20886 SIAM Award \n",
610 | "22721 SIAM Award \n",
611 | "23819 AMS Award \n",
612 | "25273 ASA Award \n",
613 | "25929 MAA Award \n",
614 | "25967 COMAP Scholarship Award \n",
615 | "26675 SIAM Award \n",
616 | "27150 MAA Award "
617 | ]
618 | },
619 | "execution_count": 6,
620 | "metadata": {},
621 | "output_type": "execute_result"
622 | }
623 | ],
624 | "source": [
625 | "avard_result=data.loc[(data['prize'].str.contains('Award'))]\n",
626 | "avard_result"
627 | ]
628 | },
629 | {
630 | "cell_type": "code",
631 | "execution_count": 7,
632 | "metadata": {
633 | "scrolled": false
634 | },
635 | "outputs": [
636 | {
637 | "data": {
638 | "text/html": [
639 | "\n",
640 | "\n",
653 | "
\n",
654 | " \n",
655 | " \n",
656 | " | \n",
657 | " control_number | \n",
658 | " student1 | \n",
659 | " student2 | \n",
660 | " student3 | \n",
661 | " university | \n",
662 | " prize | \n",
663 | "
\n",
664 | " \n",
665 | " \n",
666 | " \n",
667 | " 102 | \n",
668 | " 2400114 | \n",
669 | " Zishuo Wang | \n",
670 | " Yucheng Chen | \n",
671 | " Yanjinghao Xu | \n",
672 | " Huazhong University of Science and Technology | \n",
673 | " Successful Participant | \n",
674 | "
\n",
675 | " \n",
676 | " 124 | \n",
677 | " 2400139 | \n",
678 | " Boheng Lin | \n",
679 | " Zhipu Hu | \n",
680 | " Zhenghao Gao | \n",
681 | " Huazhong University of Science and Technology | \n",
682 | " Honorable Mention | \n",
683 | "
\n",
684 | " \n",
685 | " 132 | \n",
686 | " 2400147 | \n",
687 | " He Yin | \n",
688 | " HaiZhuo Wang | \n",
689 | " Haoze Li | \n",
690 | " Huazhong University of Science and Technology | \n",
691 | " Successful Participant | \n",
692 | "
\n",
693 | " \n",
694 | " 150 | \n",
695 | " 2400167 | \n",
696 | " Jingchao Lu | \n",
697 | " RuiXin Dong | \n",
698 | " Yaxiang Gao | \n",
699 | " Huazhong University of Science and Technology | \n",
700 | " Successful Participant | \n",
701 | "
\n",
702 | " \n",
703 | " 165 | \n",
704 | " 2400185 | \n",
705 | " Jinlong Ma | \n",
706 | " Zhimin Luo | \n",
707 | " Leying Fu | \n",
708 | " Huazhong University of Science and Technology | \n",
709 | " Successful Participant | \n",
710 | "
\n",
711 | " \n",
712 | " ... | \n",
713 | " ... | \n",
714 | " ... | \n",
715 | " ... | \n",
716 | " ... | \n",
717 | " ... | \n",
718 | " ... | \n",
719 | "
\n",
720 | " \n",
721 | " 28081 | \n",
722 | " 2403746 | \n",
723 | " Jiaye Peng | \n",
724 | " Yuanzheng Li | \n",
725 | " Weiping Shen | \n",
726 | " Huazhong University of Science and Technology | \n",
727 | " Successful Participant | \n",
728 | "
\n",
729 | " \n",
730 | " 28083 | \n",
731 | " 2403748 | \n",
732 | " Zichuan Wang | \n",
733 | " XAuanze Young | \n",
734 | " Yuhang Ding | \n",
735 | " Huazhong University of Science and Technology | \n",
736 | " Honorable Mention | \n",
737 | "
\n",
738 | " \n",
739 | " 28096 | \n",
740 | " 2403761 | \n",
741 | " Xiangyi Li | \n",
742 | " Lin Chen | \n",
743 | " Litong Shi | \n",
744 | " Huazhong University of Science and Technology | \n",
745 | " Disqualified - P | \n",
746 | "
\n",
747 | " \n",
748 | " 28291 | \n",
749 | " 2403964 | \n",
750 | " Huadong Song | \n",
751 | " Hanxiang Lv | \n",
752 | " Liang Zhang | \n",
753 | " Huazhong University of Science and Technology | \n",
754 | " Successful Participant | \n",
755 | "
\n",
756 | " \n",
757 | " 28297 | \n",
758 | " 2403971 | \n",
759 | " Leon Huang | \n",
760 | " jared zhou | \n",
761 | " Joe | \n",
762 | " Huazhong University of Science and Technology | \n",
763 | " Honorable Mention | \n",
764 | "
\n",
765 | " \n",
766 | "
\n",
767 | "
511 rows × 6 columns
\n",
768 | "
"
769 | ],
770 | "text/plain": [
771 | " control_number student1 student2 student3 \\\n",
772 | "102 2400114 Zishuo Wang Yucheng Chen Yanjinghao Xu \n",
773 | "124 2400139 Boheng Lin Zhipu Hu Zhenghao Gao \n",
774 | "132 2400147 He Yin HaiZhuo Wang Haoze Li \n",
775 | "150 2400167 Jingchao Lu RuiXin Dong Yaxiang Gao \n",
776 | "165 2400185 Jinlong Ma Zhimin Luo Leying Fu \n",
777 | "... ... ... ... ... \n",
778 | "28081 2403746 Jiaye Peng Yuanzheng Li Weiping Shen \n",
779 | "28083 2403748 Zichuan Wang XAuanze Young Yuhang Ding \n",
780 | "28096 2403761 Xiangyi Li Lin Chen Litong Shi \n",
781 | "28291 2403964 Huadong Song Hanxiang Lv Liang Zhang \n",
782 | "28297 2403971 Leon Huang jared zhou Joe \n",
783 | "\n",
784 | " university prize \n",
785 | "102 Huazhong University of Science and Technology Successful Participant \n",
786 | "124 Huazhong University of Science and Technology Honorable Mention \n",
787 | "132 Huazhong University of Science and Technology Successful Participant \n",
788 | "150 Huazhong University of Science and Technology Successful Participant \n",
789 | "165 Huazhong University of Science and Technology Successful Participant \n",
790 | "... ... ... \n",
791 | "28081 Huazhong University of Science and Technology Successful Participant \n",
792 | "28083 Huazhong University of Science and Technology Honorable Mention \n",
793 | "28096 Huazhong University of Science and Technology Disqualified - P \n",
794 | "28291 Huazhong University of Science and Technology Successful Participant \n",
795 | "28297 Huazhong University of Science and Technology Honorable Mention \n",
796 | "\n",
797 | "[511 rows x 6 columns]"
798 | ]
799 | },
800 | "execution_count": 7,
801 | "metadata": {},
802 | "output_type": "execute_result"
803 | }
804 | ],
805 | "source": [
806 | "your_univ_result=data.loc[data['university']==your_university]\n",
807 | "your_univ_result"
808 | ]
809 | },
810 | {
811 | "cell_type": "code",
812 | "execution_count": 8,
813 | "metadata": {
814 | "scrolled": true
815 | },
816 | "outputs": [
817 | {
818 | "data": {
819 | "text/plain": [
820 | "Successful Participant 311\n",
821 | "Honorable Mention 130\n",
822 | "Meritorious Winner 39\n",
823 | "Disqualified - P 19\n",
824 | "Finalist 11\n",
825 | "Unsuccessful - I 1\n",
826 | "Name: prize, dtype: int64"
827 | ]
828 | },
829 | "execution_count": 8,
830 | "metadata": {},
831 | "output_type": "execute_result"
832 | }
833 | ],
834 | "source": [
835 | "your_univ_result['prize'].value_counts()"
836 | ]
837 | },
838 | {
839 | "cell_type": "code",
840 | "execution_count": 9,
841 | "metadata": {
842 | "scrolled": false
843 | },
844 | "outputs": [
845 | {
846 | "data": {
847 | "text/html": [
848 | "\n",
849 | "\n",
862 | "
\n",
863 | " \n",
864 | " \n",
865 | " | \n",
866 | " control_number | \n",
867 | " student1 | \n",
868 | " student2 | \n",
869 | " student3 | \n",
870 | " university | \n",
871 | " prize | \n",
872 | "
\n",
873 | " \n",
874 | " \n",
875 | " \n",
876 | "
\n",
877 | "
"
878 | ],
879 | "text/plain": [
880 | "Empty DataFrame\n",
881 | "Columns: [control_number, student1, student2, student3, university, prize]\n",
882 | "Index: []"
883 | ]
884 | },
885 | "execution_count": 9,
886 | "metadata": {},
887 | "output_type": "execute_result"
888 | }
889 | ],
890 | "source": [
891 | "prize_result=your_univ_result.loc[(your_univ_result['prize']=='Outstanding Winner') | (your_univ_result['prize'].str.contains('Award'))]\n",
892 | "prize_result"
893 | ]
894 | },
895 | {
896 | "cell_type": "code",
897 | "execution_count": null,
898 | "metadata": {},
899 | "outputs": [],
900 | "source": []
901 | },
902 | {
903 | "cell_type": "code",
904 | "execution_count": 10,
905 | "metadata": {},
906 | "outputs": [
907 | {
908 | "data": {
909 | "text/html": [
910 | "\n",
911 | "\n",
924 | "
\n",
925 | " \n",
926 | " \n",
927 | " | \n",
928 | " control_number | \n",
929 | " student1 | \n",
930 | " student2 | \n",
931 | " student3 | \n",
932 | " university | \n",
933 | " prize | \n",
934 | "
\n",
935 | " \n",
936 | " \n",
937 | " \n",
938 | " 6465 | \n",
939 | " 2410343 | \n",
940 | " Qianyu Zhao | \n",
941 | " Yanheng jiang | \n",
942 | " Shoukun Gao | \n",
943 | " Huazhong University of Science and Technology | \n",
944 | " Finalist | \n",
945 | "
\n",
946 | " \n",
947 | " 6721 | \n",
948 | " 2410612 | \n",
949 | " Xinlong Chen | \n",
950 | " Wanting Wang | \n",
951 | " Yining Deng | \n",
952 | " Huazhong University of Science and Technology | \n",
953 | " Finalist | \n",
954 | "
\n",
955 | " \n",
956 | " 6849 | \n",
957 | " 2410744 | \n",
958 | " Sheng Dong | \n",
959 | " Xiang Fan | \n",
960 | " Lewen Yan | \n",
961 | " Huazhong University of Science and Technology | \n",
962 | " Finalist | \n",
963 | "
\n",
964 | " \n",
965 | " 7138 | \n",
966 | " 2411045 | \n",
967 | " Qu JiaJun | \n",
968 | " Li Junhao | \n",
969 | " Yu Hao | \n",
970 | " Huazhong University of Science and Technology | \n",
971 | " Finalist | \n",
972 | "
\n",
973 | " \n",
974 | " 7227 | \n",
975 | " 2411137 | \n",
976 | " Bin Wang | \n",
977 | " Jiajun Zhang | \n",
978 | " Tiantong Sun | \n",
979 | " Huazhong University of Science and Technology | \n",
980 | " Finalist | \n",
981 | "
\n",
982 | " \n",
983 | " 7425 | \n",
984 | " 2411343 | \n",
985 | " Tingjia Hu | \n",
986 | " Yinda Li | \n",
987 | " Zehua Cao | \n",
988 | " Huazhong University of Science and Technology | \n",
989 | " Finalist | \n",
990 | "
\n",
991 | " \n",
992 | " 7922 | \n",
993 | " 2411866 | \n",
994 | " Hatying Lao | \n",
995 | " Yongye Lai | \n",
996 | " Muze Zhou | \n",
997 | " Huazhong University of Science and Technology | \n",
998 | " Finalist | \n",
999 | "
\n",
1000 | " \n",
1001 | " 9305 | \n",
1002 | " 2413307 | \n",
1003 | " Feryang Huang | \n",
1004 | " Shuchen Pu | \n",
1005 | " Chaoyang Zheng | \n",
1006 | " Huazhong University of Science and Technology | \n",
1007 | " Finalist | \n",
1008 | "
\n",
1009 | " \n",
1010 | " 11233 | \n",
1011 | " 2414823 | \n",
1012 | " Bowen Zheng | \n",
1013 | " Yiging Yuan | \n",
1014 | " Huanyu Liu | \n",
1015 | " Huazhong University of Science and Technology | \n",
1016 | " Finalist | \n",
1017 | "
\n",
1018 | " \n",
1019 | " 11839 | \n",
1020 | " 2415449 | \n",
1021 | " Wang Lingyu | \n",
1022 | " Li Shengzhen | \n",
1023 | " Huang Xinyue | \n",
1024 | " Huazhong University of Science and Technology | \n",
1025 | " Finalist | \n",
1026 | "
\n",
1027 | " \n",
1028 | " 26800 | \n",
1029 | " 2429594 | \n",
1030 | " Jieang Ma | \n",
1031 | " Jiaxing Feng | \n",
1032 | " Qinglin JI | \n",
1033 | " Huazhong University of Science and Technology | \n",
1034 | " Finalist | \n",
1035 | "
\n",
1036 | " \n",
1037 | "
\n",
1038 | "
"
1039 | ],
1040 | "text/plain": [
1041 | " control_number student1 student2 student3 \\\n",
1042 | "6465 2410343 Qianyu Zhao Yanheng jiang Shoukun Gao \n",
1043 | "6721 2410612 Xinlong Chen Wanting Wang Yining Deng \n",
1044 | "6849 2410744 Sheng Dong Xiang Fan Lewen Yan \n",
1045 | "7138 2411045 Qu JiaJun Li Junhao Yu Hao \n",
1046 | "7227 2411137 Bin Wang Jiajun Zhang Tiantong Sun \n",
1047 | "7425 2411343 Tingjia Hu Yinda Li Zehua Cao \n",
1048 | "7922 2411866 Hatying Lao Yongye Lai Muze Zhou \n",
1049 | "9305 2413307 Feryang Huang Shuchen Pu Chaoyang Zheng \n",
1050 | "11233 2414823 Bowen Zheng Yiging Yuan Huanyu Liu \n",
1051 | "11839 2415449 Wang Lingyu Li Shengzhen Huang Xinyue \n",
1052 | "26800 2429594 Jieang Ma Jiaxing Feng Qinglin JI \n",
1053 | "\n",
1054 | " university prize \n",
1055 | "6465 Huazhong University of Science and Technology Finalist \n",
1056 | "6721 Huazhong University of Science and Technology Finalist \n",
1057 | "6849 Huazhong University of Science and Technology Finalist \n",
1058 | "7138 Huazhong University of Science and Technology Finalist \n",
1059 | "7227 Huazhong University of Science and Technology Finalist \n",
1060 | "7425 Huazhong University of Science and Technology Finalist \n",
1061 | "7922 Huazhong University of Science and Technology Finalist \n",
1062 | "9305 Huazhong University of Science and Technology Finalist \n",
1063 | "11233 Huazhong University of Science and Technology Finalist \n",
1064 | "11839 Huazhong University of Science and Technology Finalist \n",
1065 | "26800 Huazhong University of Science and Technology Finalist "
1066 | ]
1067 | },
1068 | "execution_count": 10,
1069 | "metadata": {},
1070 | "output_type": "execute_result"
1071 | }
1072 | ],
1073 | "source": [
1074 | "prize_result=your_univ_result.loc[your_univ_result['prize']=='Finalist']\n",
1075 | "prize_result"
1076 | ]
1077 | },
1078 | {
1079 | "cell_type": "code",
1080 | "execution_count": null,
1081 | "metadata": {},
1082 | "outputs": [],
1083 | "source": []
1084 | },
1085 | {
1086 | "cell_type": "code",
1087 | "execution_count": null,
1088 | "metadata": {},
1089 | "outputs": [],
1090 | "source": []
1091 | }
1092 | ],
1093 | "metadata": {
1094 | "kernelspec": {
1095 | "display_name": "Python 3",
1096 | "language": "python",
1097 | "name": "python3"
1098 | },
1099 | "language_info": {
1100 | "codemirror_mode": {
1101 | "name": "ipython",
1102 | "version": 3
1103 | },
1104 | "file_extension": ".py",
1105 | "mimetype": "text/x-python",
1106 | "name": "python",
1107 | "nbconvert_exporter": "python",
1108 | "pygments_lexer": "ipython3",
1109 | "version": "3.7.6"
1110 | }
1111 | },
1112 | "nbformat": 4,
1113 | "nbformat_minor": 4
1114 | }
1115 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | year = 23
2 | total_num = 30000
3 | # 进程数根据CPU和内存能力设置
4 | # step=30000/进程数
5 | #开30进程
6 | download_step = 1000
7 | #开30进程
8 | pdf2text_step = 1000
9 | #你的学校
10 | your_university= 'Huazhong University of Science and Technology'
11 | #TesseractOCR路径
12 | TesseractOCR_path = 'E:/prog/TesseractOCR/tesseract.exe'
13 |
--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
1 | """
2 | 爬取美赛获奖证书,并以控制号命名
3 | 只运行一次部分下载会失败,需要运行多次,确保全部下载
4 | """
5 | import os
6 | import requests
7 | from multiprocessing import Process
8 | from random import shuffle
9 | from config import *
10 |
11 |
12 | class CMcmCertificateCrawler():
13 | def __init__(self, contol_nmuber):
14 | self.headers = {
15 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
16 | 'Chrome/73.0.3683.103 Safari/537.36'
17 | }
18 | self.control_number = contol_nmuber
19 |
20 | def FGetResponse(self):
21 | url = "http://www.comap-math.com/mcm/20" + str(year) + "Certs/" + str(self.control_number) + ".pdf"
22 | response = requests.get(url=url, headers=self.headers)
23 | # print(response.status_code)
24 | return response
25 |
26 | def FSavePDF(self, control_number):
27 | # 下载证书PDF
28 | try:
29 | path = './paper_20' + str(year) + '/' + str(control_number) + '.pdf'
30 | response = self.FGetResponse()
31 | # print(response.status_code)
32 | if response.status_code != 404:
33 | with open(path, 'wb') as f:
34 | f.write(response.content)
35 | print(str(control_number) + ".pdf" + "存储成功")
36 | else:
37 | print(control_number, ' -- 404')
38 | except Exception:
39 | print("Exception")
40 |
41 |
42 | def download(start, end):
43 | for control_number in range(start, end):
44 | control_number = '%05d' % control_number
45 | control_number = year * 100000 + int(control_number)
46 | mcc = CMcmCertificateCrawler(control_number)
47 | mcc.FSavePDF(control_number)
48 |
49 |
50 | def downloadlist(control_number_list):
51 | for control_number in control_number_list:
52 | mcc = CMcmCertificateCrawler(control_number)
53 | mcc.FSavePDF(control_number)
54 |
55 |
56 | if __name__ == '__main__':
57 | all_control_list = []
58 | for control_number in range(1, total_num):
59 | control_number = '%05d' % control_number
60 | control_number = year * 100000 + int(control_number)
61 | all_control_list.append(control_number)
62 |
63 | dir = './paper_20' + str(year) + '/'
64 | if not os.path.exists(dir):
65 | os.mkdir(dir)
66 | download_filelist = os.listdir(dir)
67 | for filename in download_filelist:
68 | filenum = int(filename[0:7])
69 | print(filenum)
70 | filesize = os.path.getsize(dir + filename)
71 | if filesize:
72 | all_control_list.remove(filenum)
73 |
74 | step = download_step
75 | shuffle(all_control_list)
76 | for i in range(0, len(all_control_list), step):
77 | start = i
78 | end = i + step - 1
79 | control_numbers = all_control_list[start:end]
80 | p = Process(target=downloadlist, args=(control_numbers,))
81 | p.start()
82 |
--------------------------------------------------------------------------------
/exception.txt:
--------------------------------------------------------------------------------
1 | 2308010
--------------------------------------------------------------------------------
/gitpush.bat:
--------------------------------------------------------------------------------
1 | git add .
2 | git commit -m "update"
3 | git push -u origin master
--------------------------------------------------------------------------------
/pdf2text.py:
--------------------------------------------------------------------------------
1 | """
2 | 美赛获奖证书信息OCR
3 | """
4 |
5 | import fitz
6 | import PIL
7 | import pytesseract
8 | import os
9 | from multiprocessing import Process
10 | import re
11 | from config import *
12 |
13 | pytesseract.pytesseract.tesseract_cmd = TesseractOCR_path
14 |
15 |
16 | def pdf2text(pdfPath, control_number, zoom_x=6, zoom_y=6, rotation_angle=0):
17 | students = ['']
18 | university = ''
19 | prize = ''
20 | advisor = ''
21 | try:
22 | # 打开PDF文件
23 | pdf = fitz.open(pdfPath)
24 | # 逐页读取PDF
25 | for pg in range(0, pdf.pageCount):
26 | page = pdf[pg]
27 | rect = page.rect
28 | clip = fitz.Rect(rect.width * 0.25, rect.height * 0.27,
29 | rect.width * 0.8, rect.height * 0.7)
30 | trans = fitz.Matrix(zoom_x, zoom_y).prerotate(rotation_angle)
31 | pix = page.get_pixmap(matrix=trans, alpha=False, clip=clip)
32 | img = PIL.Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
33 | # pix.save("test.png")
34 | text = pytesseract.image_to_string(img)
35 | text = text.split('\n')
36 | text = [s for s in text if s]
37 | # print(text)
38 | try:
39 | students_index = text.index('With Student Advisor')
40 | advisor_index = text.index('With Student Advisor') + 1
41 | except:
42 | try:
43 | students_index = text.index('With Faculty Advisor')
44 | advisor_index = text.index('With Faculty Advisor') + 1
45 | except:
46 | try:
47 | students_index = text.index('Of') - 1
48 | advisor_index = text.index('Of') - 1
49 | except:
50 | students_index = text.index('Was Designated As') - 3
51 | advisor_index = text.index('Was Designated As') - 2
52 | # print(students_index)
53 | # print(advisor_index)
54 | try:
55 | univ_index = text.index('Was Designated As') - 1
56 | students = text[0:students_index]
57 | advisor = text[advisor_index]
58 | university = text[univ_index]
59 | except:
60 | students = text[0:3]
61 | advisor = text[4]
62 | university = text[5]
63 | prize = text[-1]
64 |
65 | pdf.close()
66 | except:
67 | print(control_number, 'Exception')
68 | with open('exception.txt', 'w+') as exception_file:
69 | exception_file.write(str(control_number))
70 |
71 | university = university.replace(',', ' ').replace('1', 'i')
72 | prize = prize.replace(',', ' ')
73 | advisor = advisor.replace(',', ' ').replace('1', 'i')
74 | stus = []
75 | for student in students:
76 | student = student.replace(',', ' ').replace('1', 'i')
77 | stus.append(student)
78 | return stus, advisor, university, prize
79 |
80 |
81 | def savetext(start, end, count):
82 | global logger
83 | all_data = ''
84 | your_university_data = ''
85 | for control_number in range(start, end):
86 | control_number = '%05d' % control_number
87 | control_number = year * 100000 + int(control_number)
88 | path = "./paper_20" + str(year) + "/" + str(control_number) + ".pdf"
89 | # print(path)
90 | if os.path.exists(path) and os.path.getsize(path) > 0:
91 | students, advisor, university, prize = pdf2text(path, control_number)
92 | if prize:
93 | if len(students) == 0:
94 | students = ', , '
95 | elif len(students) == 1:
96 | students = ','.join(students) + ', , '
97 | elif len(students) == 2:
98 | students = ','.join(students) + ', '
99 | elif len(students) == 3:
100 | students = ','.join(students)
101 | elif len(students) > 3:
102 | students = students[0:3]
103 | students = ','.join(students)
104 |
105 | row = '%s,%s,%s,%s,%s,\n' % (control_number, students, advisor, university, prize)
106 | row = row.encode('gbk', 'backslashreplace').decode('gbk', 'backslashreplace')
107 | try:
108 | print(row)
109 | except:
110 | print(control_number, ' -- gbk encoding error')
111 |
112 | all_data += row
113 | if university == your_university:
114 | your_university_data += row
115 |
116 | with open('./all/tmp' + str(count) + '.txt', 'w', encoding='utf-8') as all_file:
117 | # all_data = all_data.encode('utf-8')
118 | all_file.write(all_data)
119 | print('./all/tmp' + str(count) + '.txt save sucessfully')
120 | with open('./your_university/tmp' + str(count) + '.txt', 'w', encoding='utf-8') as your_university_file:
121 | your_university_file.write(your_university_data)
122 | print('./your_university/tmp' + str(count) + '.txt save sucessfully')
123 |
124 |
125 | if __name__ == '__main__':
126 | if not os.path.exists('./all/'):
127 | os.mkdir('./all/')
128 | if not os.path.exists('./your_university/'):
129 | os.mkdir('./your_university/')
130 |
131 | step = pdf2text_step
132 | count = 1
133 | for i in range(1, total_num, step):
134 | start = i
135 | end = i + step - 1
136 | p = Process(target=savetext, args=(start, end, count))
137 | p.start()
138 | count += 1
139 |
140 | # students, advisor, university, prize = pdf2text('./paper_2023/2300009.pdf', 2300009)
141 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.22.0
2 | pytesseract==0.3.9
3 | fitz==0.0.1.dev2
4 | PyMuPDF==1.19.6
5 |
--------------------------------------------------------------------------------
/txt_joint.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | from config import year
4 |
5 | def txtjoint(dir):
6 | files = os.listdir(dir)
7 | res = 'control_number,student1,student2,student3,advisor,university,prize,,,\n'
8 |
9 | for file in files:
10 | if re.search('^tmp(\d+).txt',file):
11 | with open(dir + file, "r", encoding='utf-8') as f:
12 | content = f.read()
13 | # content = content.lower()
14 | res += content
15 |
16 | with open(dir + "all_20"+str(year)+".txt", "w", encoding='utf-8') as outFile:
17 | outFile.write(res)
18 | print('txtjoint sucessfully')
19 |
20 | # 合并文件
21 | all_dir = "./all/"
22 | your_university_dir = './your_university/'
23 | txtjoint(all_dir)
24 | txtjoint(your_university_dir)
--------------------------------------------------------------------------------
/统计结果.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/6ec3537cd279d39d80f4be73b5589477a29defc2/统计结果.png
--------------------------------------------------------------------------------