├── .gitignore ├── README.md ├── all ├── all_2022.csv ├── all_2022.txt ├── all_2023.csv ├── all_2023.txt ├── all_2024.csv ├── all_2024.txt └── all_with_problem_2022.txt ├── analysis.ipynb ├── analysis_2024.ipynb ├── config.py ├── download.py ├── exception.txt ├── gitpush.bat ├── pdf2text.py ├── requirements.txt ├── txt_joint.py └── 统计结果.png /.gitignore: -------------------------------------------------------------------------------- 1 | paper/*.pdf 2 | paper_2022/*.pdf 3 | paper_2023/*.pdf 4 | .ipynb_checkpoints 5 | __pycache__ 6 | gitpush.bat 7 | .idea 8 | tmp*.txt 9 | all/tmp*.txt 10 | /all/tmp*.txt 11 | your_university/*.txt 12 | /your_university/*.txt 13 | your_university -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # comap_crawler 2 | 3 | 2024更新:修改config.py中的23为24即可爬取2024美赛证书并进行分析 4 | 5 | 2023美赛爬虫,2024美赛获奖证书爬虫 6 | 7 | 美国大学生数学建模竞赛证书爬取及信息OCR识别分析 8 | 9 | ## 运行步骤 10 | 11 | 1. 安装tesseractOCR,参考版本:v5.0.1.20220118,其他版本不保证可用, 12 | 13 | v5.0.1.20220118下载地址 14 | 15 | https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w64-setup-v5.0.1.20220118.exe 16 | 17 | 其他版本下载:https://digi.bib.uni-mannheim.de/tesseract/ 18 | 19 | 2. `pip install -r requirements.txt` 20 | 21 | 3. config.py中设置年份、进程数、你的学校、TesseractOCR安装路径等,进程数根据CPU和内存情况设置 22 | 23 | 4. download.py下载证书,由于有些证书只运行一次部分下载会失败,需要运行多次,确保全部下载 24 | 25 | 5. pdf2text.py识别学校、姓名、获奖等级、队伍ID等 26 | 27 | 6. txt_joint.py合并OCR识别的txt结果 28 | 29 | 7. analysis.ipynb分析数据 30 | 31 | ## 识别结果: 32 | 33 | 更多分析结果请自行下载代码,运行analysis.ipynb进行分析 34 | 35 | ### 2024美赛 36 | 37 | 2024美赛结果,证书数量28905张,最终识别28796条信息 38 | https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2024.txt 39 | 40 | cdn加速镜像:https://fastly.jsdelivr.net/gh/personqianduixue/comap_crawler_2023@master/all/all_2024.txt 41 | 42 | 43 | 44 | ### 2023美赛 45 | 46 | 2023美赛结果,证书数量20858张,最终识别20818条信息 47 | 48 | https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2023.txt 49 | 50 | cdn加速镜像:https://ghproxy.net/https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2023.txt 51 | 52 | 53 | 54 | ### 2022美赛 55 | 56 | 2022美赛结果,证书数量27205张,最终识别27161条信息 57 | 58 | https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2022.txt 59 | 60 | cdn加速镜像:https://ghproxy.net/https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/master/all/all_2022.txt 61 | 62 | 63 | -------------------------------------------------------------------------------- /analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import shutil\n", 10 | "import pandas as pd\n", 11 | "from config import *" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "your_university='Huazhong University of Science and Technology'" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "year=23" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": { 36 | "scrolled": true 37 | }, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/html": [ 42 | "
\n", 43 | "\n", 56 | "\n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | "
control_numberstudent1student2student3advisoruniversityprize
02300005Zheng YaxinDu JingiuGao QihangUIC Math Modeling TeamBNU-HKBU United International CollegeMeritorious Winner
12300006Wu PeixinChen NingyunWu YouUIC Math Modeling TeamBNU-HKBU United International CollegeSuccessful Participant
22300007Gou RuoXuanHuang JianFeiYe YuJunUIC Math Modeling TeamBNU-HKBU United International CollegeSuccessful Participant
32300008Wan YeqianGan YiningGuo WenhaoUIC Math Modeling TeamBNU-HKBU United International CollegeSuccessful Participant
42300009Lu QianyuLi CanZhao XiaoxuanUIC Math Modeling TeamBNU-HKBU United International CollegeSuccessful Participant
\n", 122 | "
" 123 | ], 124 | "text/plain": [ 125 | " control_number student1 student2 student3 \\\n", 126 | "0 2300005 Zheng Yaxin Du Jingiu Gao Qihang \n", 127 | "1 2300006 Wu Peixin Chen Ningyun Wu You \n", 128 | "2 2300007 Gou RuoXuan Huang JianFei Ye YuJun \n", 129 | "3 2300008 Wan Yeqian Gan Yining Guo Wenhao \n", 130 | "4 2300009 Lu Qianyu Li Can Zhao Xiaoxuan \n", 131 | "\n", 132 | " advisor university \\\n", 133 | "0 UIC Math Modeling Team BNU-HKBU United International College \n", 134 | "1 UIC Math Modeling Team BNU-HKBU United International College \n", 135 | "2 UIC Math Modeling Team BNU-HKBU United International College \n", 136 | "3 UIC Math Modeling Team BNU-HKBU United International College \n", 137 | "4 UIC Math Modeling Team BNU-HKBU United International College \n", 138 | "\n", 139 | " prize \n", 140 | "0 Meritorious Winner \n", 141 | "1 Successful Participant \n", 142 | "2 Successful Participant \n", 143 | "3 Successful Participant \n", 144 | "4 Successful Participant " 145 | ] 146 | }, 147 | "execution_count": 4, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "txtpath='./all/all_20'+str(year)+'.txt'\n", 154 | "csvpath='./all/all_20'+str(year)+'.csv'\n", 155 | "shutil.copyfile(txtpath, csvpath)\n", 156 | "data = pd.read_csv(csvpath)\n", 157 | "data=data.iloc[:,0:7]\n", 158 | "data.head()" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 5, 164 | "metadata": { 165 | "scrolled": true 166 | }, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "Harbin Engineering University 580\n", 172 | "XIDIAN UNIVERSITY 448\n", 173 | "Beihang University 407\n", 174 | "Jilin University 364\n", 175 | "Huazhong University of Science and Technology 338\n", 176 | "Northwestern Polytechnical University 323\n", 177 | "Nanjing University of Posts and Telecommunication 298\n", 178 | "Dalian University of Technology 293\n", 179 | "Shanghai Jiao Tong University 263\n", 180 | "South China University of Technology 228\n", 181 | "Hohai University 215\n", 182 | "National University of Defense Technology 207\n", 183 | "Northeastern University 206\n", 184 | "Harbin Institute of Technology 200\n", 185 | "Beijing Institute of Technology 197\n", 186 | "Southwest University 193\n", 187 | "Central University of Finance and Economics 192\n", 188 | "Renmin University of China 183\n", 189 | "Xi'an Jiaotong University 162\n", 190 | "Hangzhou Normal University 154\n", 191 | "Name: university, dtype: int64" 192 | ] 193 | }, 194 | "execution_count": 5, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "univ_data=data['university'].value_counts().iloc[0:20]\n", 201 | "univ_data" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 6, 207 | "metadata": { 208 | "scrolled": true 209 | }, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "image/png": "\n", 214 | "text/plain": [ 215 | "
" 216 | ] 217 | }, 218 | "metadata": { 219 | "needs_background": "light" 220 | }, 221 | "output_type": "display_data" 222 | } 223 | ], 224 | "source": [ 225 | "\n", 226 | "ax=univ_data.sort_values().plot.barh()\n", 227 | "fig=ax.get_figure()\n", 228 | "fig.savefig('统计结果.png',bbox_inches='tight')" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 7, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/plain": [ 239 | "Successful Participant 13180\n", 240 | "Honorable Mention 4531\n", 241 | "Meritorious Winner 1352\n", 242 | "Disqualified - P 990\n", 243 | "Finalist 545\n", 244 | "Unsuccessful - I 177\n", 245 | "Not Judged 24\n", 246 | "Outstanding Winner 14\n", 247 | "AMS Award 6\n", 248 | "INFORMS Award 5\n", 249 | "SIAM Award 5\n", 250 | "MAA Award 3\n", 251 | "COMAP Scholarship Award 2\n", 252 | "Leonhard Euler Award 1\n", 253 | "Ben Fusaro Award 1\n", 254 | "Vilfredo Pareto Award 1\n", 255 | "Rachel Carson Award 1\n", 256 | "Frank Giordano Award 1\n", 257 | "Unsuccessful - [ 1\n", 258 | "Name: prize, dtype: int64" 259 | ] 260 | }, 261 | "execution_count": 7, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "data['prize'].value_counts()" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 8, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/html": [ 278 | "
\n", 279 | "\n", 292 | "\n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | "
control_numberstudent1student2student3advisoruniversityprize
962300136Hanzhang ZhouHuangyii ZhengXiaoyang CaoHeng LiangTsinghua UniversityINFORMS Award
1692300229Cao HonglinZhou ZijianHu YongqiLi MingqiJniversity of Electronic Science and Technolog...Leonhard Euler Award
2602300336Jiahao LuoTianyu XiongYuting ZhangGuoliang HeUniversity of Electronic Science and Technolog...AMS Award
10372309229Yuhao SunZiruo WangShuo ZhangHai JinBeijing Institute of TechnologyBen Fusaro Award
15322309766Carrie CoxJobi LoElias CoppockWai LauSeattle Pacific UniversityMAA Award
24062310776Duffy AndersonMatthew HelmerMadeline RueMei ZhuPacific Lutheran UniversityMAA Award
28322311258Haiyan ZhangTao ZhuangYunuo LeiQingsong ZouXIDIAN UNIVERSITYAMS Award
30772311517Taining YanKaiqun WuLijie ChenTaining YanRenmin University of ChinaVilfredo Pareto Award
39122312411Dingkai WeiYuecheng WangWeilong ZhuYuchao LiChang'an UniversityRachel Carson Award
55612314151Jing RenChenyue XiaAnyYu PanGuibing GuoNortheastern University of ChinaCOMAP Scholarship Award
63982315018Hongyu LangYutong ShaoXiaqing ZhouJigao YanSoochow UniversitySIAM Award
66922315321Baoyang ZhangLei TianZihan WuJun LuNational University of Defense TechnologySIAM Award
67492315379Zhu XiangyuLiang KatyinWei XiaoqinWang XiaoyinTiangong UniversityFrank Giordano Award
83052316994Zichen CaoYunyujie DuXinyi JiangXiaofeng GaoShanghai Jiao Tong UniversityINFORMS Award
94882318300Enzo Moraes MescallNicolas SalazarErik Mendes NovakMaria-Veronica CiocanelDuke UniversityCOMAP Scholarship Award
101562318982Kehan TongYuqi YangYan DuQiang YaoEast China Normal UniversitySIAM Award
103512301192Jianjie ZhengWeikang LiYao HouLei LiuZhejiang University of Finance and EconomicsAMS Award
121282320131Song YushuaiZhao ChenxiaHu YunboSong YushuaiTianjin UniversitySIAM Award
145082322645Steven Sofos DiSilvioAnthony OzerovLeon ZhouGeorge DragomirColumbia UniversityINFORMS Award
145462322687Caden LinMaksym BondarenkoPhillip M YanGeorge DragomirColumbia UniversityMAA Award
163002303950Yuewen YangYifan QiYuechuan MaBo WangBeiing Institute of TechnologySIAM Award
163132303967Jingjia PengXinyi HuangXuejun ZhangXiaofeng GaoShanghai Jiao Tong UniversityAMS Award
171812304962Zhu XiaotianLiu JingwenLiu XinjieChen HuaChina University of PetroleumINFORMS Award
179122305794Zhang ChuxiaoWang ChenghanZhang YingYuli ZHANGBeijing Institute of TechnologyINFORMS Award
193082307336Zhaohong LiaoEnyang LiYingyi LiuZhi GaoWuhan UniversityAMS Award
207492308899Ruomu LiChenyu MaMengyuan DaiDongxue YanNanjing University of Posts and TelecommunicationAMS Award
\n", 568 | "
" 569 | ], 570 | "text/plain": [ 571 | " control_number student1 student2 \\\n", 572 | "96 2300136 Hanzhang Zhou Huangyii Zheng \n", 573 | "169 2300229 Cao Honglin Zhou Zijian \n", 574 | "260 2300336 Jiahao Luo Tianyu Xiong \n", 575 | "1037 2309229 Yuhao Sun Ziruo Wang \n", 576 | "1532 2309766 Carrie Cox Jobi Lo \n", 577 | "2406 2310776 Duffy Anderson Matthew Helmer \n", 578 | "2832 2311258 Haiyan Zhang Tao Zhuang \n", 579 | "3077 2311517 Taining Yan Kaiqun Wu \n", 580 | "3912 2312411 Dingkai Wei Yuecheng Wang \n", 581 | "5561 2314151 Jing Ren Chenyue Xia \n", 582 | "6398 2315018 Hongyu Lang Yutong Shao \n", 583 | "6692 2315321 Baoyang Zhang Lei Tian \n", 584 | "6749 2315379 Zhu Xiangyu Liang Katyin \n", 585 | "8305 2316994 Zichen Cao Yunyujie Du \n", 586 | "9488 2318300 Enzo Moraes Mescall Nicolas Salazar \n", 587 | "10156 2318982 Kehan Tong Yuqi Yang \n", 588 | "10351 2301192 Jianjie Zheng Weikang Li \n", 589 | "12128 2320131 Song Yushuai Zhao Chenxia \n", 590 | "14508 2322645 Steven Sofos DiSilvio Anthony Ozerov \n", 591 | "14546 2322687 Caden Lin Maksym Bondarenko \n", 592 | "16300 2303950 Yuewen Yang Yifan Qi \n", 593 | "16313 2303967 Jingjia Peng Xinyi Huang \n", 594 | "17181 2304962 Zhu Xiaotian Liu Jingwen \n", 595 | "17912 2305794 Zhang Chuxiao Wang Chenghan \n", 596 | "19308 2307336 Zhaohong Liao Enyang Li \n", 597 | "20749 2308899 Ruomu Li Chenyu Ma \n", 598 | "\n", 599 | " student3 advisor \\\n", 600 | "96 Xiaoyang Cao Heng Liang \n", 601 | "169 Hu Yongqi Li Mingqi \n", 602 | "260 Yuting Zhang Guoliang He \n", 603 | "1037 Shuo Zhang Hai Jin \n", 604 | "1532 Elias Coppock Wai Lau \n", 605 | "2406 Madeline Rue Mei Zhu \n", 606 | "2832 Yunuo Lei Qingsong Zou \n", 607 | "3077 Lijie Chen Taining Yan \n", 608 | "3912 Weilong Zhu Yuchao Li \n", 609 | "5561 AnyYu Pan Guibing Guo \n", 610 | "6398 Xiaqing Zhou Jigao Yan \n", 611 | "6692 Zihan Wu Jun Lu \n", 612 | "6749 Wei Xiaoqin Wang Xiaoyin \n", 613 | "8305 Xinyi Jiang Xiaofeng Gao \n", 614 | "9488 Erik Mendes Novak Maria-Veronica Ciocanel \n", 615 | "10156 Yan Du Qiang Yao \n", 616 | "10351 Yao Hou Lei Liu \n", 617 | "12128 Hu Yunbo Song Yushuai \n", 618 | "14508 Leon Zhou George Dragomir \n", 619 | "14546 Phillip M Yan George Dragomir \n", 620 | "16300 Yuechuan Ma Bo Wang \n", 621 | "16313 Xuejun Zhang Xiaofeng Gao \n", 622 | "17181 Liu Xinjie Chen Hua \n", 623 | "17912 Zhang Ying Yuli ZHANG \n", 624 | "19308 Yingyi Liu Zhi Gao \n", 625 | "20749 Mengyuan Dai Dongxue Yan \n", 626 | "\n", 627 | " university \\\n", 628 | "96 Tsinghua University \n", 629 | "169 Jniversity of Electronic Science and Technolog... \n", 630 | "260 University of Electronic Science and Technolog... \n", 631 | "1037 Beijing Institute of Technology \n", 632 | "1532 Seattle Pacific University \n", 633 | "2406 Pacific Lutheran University \n", 634 | "2832 XIDIAN UNIVERSITY \n", 635 | "3077 Renmin University of China \n", 636 | "3912 Chang'an University \n", 637 | "5561 Northeastern University of China \n", 638 | "6398 Soochow University \n", 639 | "6692 National University of Defense Technology \n", 640 | "6749 Tiangong University \n", 641 | "8305 Shanghai Jiao Tong University \n", 642 | "9488 Duke University \n", 643 | "10156 East China Normal University \n", 644 | "10351 Zhejiang University of Finance and Economics \n", 645 | "12128 Tianjin University \n", 646 | "14508 Columbia University \n", 647 | "14546 Columbia University \n", 648 | "16300 Beiing Institute of Technology \n", 649 | "16313 Shanghai Jiao Tong University \n", 650 | "17181 China University of Petroleum \n", 651 | "17912 Beijing Institute of Technology \n", 652 | "19308 Wuhan University \n", 653 | "20749 Nanjing University of Posts and Telecommunication \n", 654 | "\n", 655 | " prize \n", 656 | "96 INFORMS Award \n", 657 | "169 Leonhard Euler Award \n", 658 | "260 AMS Award \n", 659 | "1037 Ben Fusaro Award \n", 660 | "1532 MAA Award \n", 661 | "2406 MAA Award \n", 662 | "2832 AMS Award \n", 663 | "3077 Vilfredo Pareto Award \n", 664 | "3912 Rachel Carson Award \n", 665 | "5561 COMAP Scholarship Award \n", 666 | "6398 SIAM Award \n", 667 | "6692 SIAM Award \n", 668 | "6749 Frank Giordano Award \n", 669 | "8305 INFORMS Award \n", 670 | "9488 COMAP Scholarship Award \n", 671 | "10156 SIAM Award \n", 672 | "10351 AMS Award \n", 673 | "12128 SIAM Award \n", 674 | "14508 INFORMS Award \n", 675 | "14546 MAA Award \n", 676 | "16300 SIAM Award \n", 677 | "16313 AMS Award \n", 678 | "17181 INFORMS Award \n", 679 | "17912 INFORMS Award \n", 680 | "19308 AMS Award \n", 681 | "20749 AMS Award " 682 | ] 683 | }, 684 | "execution_count": 8, 685 | "metadata": {}, 686 | "output_type": "execute_result" 687 | } 688 | ], 689 | "source": [ 690 | "avard_result=data.loc[(data['prize'].str.contains('Award'))]\n", 691 | "avard_result" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": 9, 697 | "metadata": { 698 | "scrolled": false 699 | }, 700 | "outputs": [ 701 | { 702 | "data": { 703 | "text/html": [ 704 | "
\n", 705 | "\n", 718 | "\n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | "
control_numberstudent1student2student3advisoruniversityprize
1572300214Hao YangYuxin HuDi LiuZhiHong YangHuazhong University of Science and TechnologyDisqualified - P
1652300223Yingzhong HuaYanwei TanJichen BianYingzhong HuaHuazhong University of Science and TechnologySuccessful Participant
5882300708Xiang LiYu HuJunHang MaXiang LiHuazhong University of Science and TechnologySuccessful Participant
6372300772Yitian HanAnyang LiRuoxin WuGang XuHuazhong University of Science and TechnologySuccessful Participant
7222300883Lesi HuHannan ChenYulang HongSuyang MaHuazhong University of Science and TechnologySuccessful Participant
........................
208162308976Xiaokai ZhangMuchu ChenBowen ZhouChen YuHuazhong University of Science and TechnologyHonorable Mention
208202308980Xuebin ZhouYutong DingChenmin KeHaoyuan XuHuazhong University of Science and TechnologyHonorable Mention
208222308982Jiongfan ZhuYilong ChenYuezhang LongJiongfan ZhuHuazhong University of Science and TechnologySuccessful Participant
208262308986Lihui ZhangXin HuChenyang XiongLi LiHuazhong University of Science and TechnologyFinalist
208272308987Ziheng HuangZihe LiuWensheng YangZiheng HuangHuazhong University of Science and TechnologySuccessful Participant
\n", 844 | "

338 rows × 7 columns

\n", 845 | "
" 846 | ], 847 | "text/plain": [ 848 | " control_number student1 student2 student3 \\\n", 849 | "157 2300214 Hao Yang Yuxin Hu Di Liu \n", 850 | "165 2300223 Yingzhong Hua Yanwei Tan Jichen Bian \n", 851 | "588 2300708 Xiang Li Yu Hu JunHang Ma \n", 852 | "637 2300772 Yitian Han Anyang Li Ruoxin Wu \n", 853 | "722 2300883 Lesi Hu Hannan Chen Yulang Hong \n", 854 | "... ... ... ... ... \n", 855 | "20816 2308976 Xiaokai Zhang Muchu Chen Bowen Zhou \n", 856 | "20820 2308980 Xuebin Zhou Yutong Ding Chenmin Ke \n", 857 | "20822 2308982 Jiongfan Zhu Yilong Chen Yuezhang Long \n", 858 | "20826 2308986 Lihui Zhang Xin Hu Chenyang Xiong \n", 859 | "20827 2308987 Ziheng Huang Zihe Liu Wensheng Yang \n", 860 | "\n", 861 | " advisor university \\\n", 862 | "157 ZhiHong Yang Huazhong University of Science and Technology \n", 863 | "165 Yingzhong Hua Huazhong University of Science and Technology \n", 864 | "588 Xiang Li Huazhong University of Science and Technology \n", 865 | "637 Gang Xu Huazhong University of Science and Technology \n", 866 | "722 Suyang Ma Huazhong University of Science and Technology \n", 867 | "... ... ... \n", 868 | "20816 Chen Yu Huazhong University of Science and Technology \n", 869 | "20820 Haoyuan Xu Huazhong University of Science and Technology \n", 870 | "20822 Jiongfan Zhu Huazhong University of Science and Technology \n", 871 | "20826 Li Li Huazhong University of Science and Technology \n", 872 | "20827 Ziheng Huang Huazhong University of Science and Technology \n", 873 | "\n", 874 | " prize \n", 875 | "157 Disqualified - P \n", 876 | "165 Successful Participant \n", 877 | "588 Successful Participant \n", 878 | "637 Successful Participant \n", 879 | "722 Successful Participant \n", 880 | "... ... \n", 881 | "20816 Honorable Mention \n", 882 | "20820 Honorable Mention \n", 883 | "20822 Successful Participant \n", 884 | "20826 Finalist \n", 885 | "20827 Successful Participant \n", 886 | "\n", 887 | "[338 rows x 7 columns]" 888 | ] 889 | }, 890 | "execution_count": 9, 891 | "metadata": {}, 892 | "output_type": "execute_result" 893 | } 894 | ], 895 | "source": [ 896 | "your_univ_result=data.loc[data['university']==your_university]\n", 897 | "your_univ_result" 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": 10, 903 | "metadata": { 904 | "scrolled": true 905 | }, 906 | "outputs": [ 907 | { 908 | "data": { 909 | "text/plain": [ 910 | "Successful Participant 223\n", 911 | "Honorable Mention 72\n", 912 | "Meritorious Winner 20\n", 913 | "Disqualified - P 14\n", 914 | "Finalist 8\n", 915 | "Unsuccessful - I 1\n", 916 | "Name: prize, dtype: int64" 917 | ] 918 | }, 919 | "execution_count": 10, 920 | "metadata": {}, 921 | "output_type": "execute_result" 922 | } 923 | ], 924 | "source": [ 925 | "your_univ_result['prize'].value_counts()" 926 | ] 927 | }, 928 | { 929 | "cell_type": "code", 930 | "execution_count": 11, 931 | "metadata": { 932 | "scrolled": false 933 | }, 934 | "outputs": [ 935 | { 936 | "data": { 937 | "text/html": [ 938 | "
\n", 939 | "\n", 952 | "\n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | "
control_numberstudent1student2student3advisoruniversityprize
\n", 968 | "
" 969 | ], 970 | "text/plain": [ 971 | "Empty DataFrame\n", 972 | "Columns: [control_number, student1, student2, student3, advisor, university, prize]\n", 973 | "Index: []" 974 | ] 975 | }, 976 | "execution_count": 11, 977 | "metadata": {}, 978 | "output_type": "execute_result" 979 | } 980 | ], 981 | "source": [ 982 | "prize_result=your_univ_result.loc[(your_univ_result['prize']=='Outstanding Winner') | (your_univ_result['prize'].str.contains('Award'))]\n", 983 | "prize_result" 984 | ] 985 | }, 986 | { 987 | "cell_type": "code", 988 | "execution_count": null, 989 | "metadata": {}, 990 | "outputs": [], 991 | "source": [] 992 | }, 993 | { 994 | "cell_type": "code", 995 | "execution_count": 12, 996 | "metadata": {}, 997 | "outputs": [ 998 | { 999 | "data": { 1000 | "text/html": [ 1001 | "
\n", 1002 | "\n", 1015 | "\n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | "
control_numberstudent1student2student3advisoruniversityprize
28592311286Shituo MaAnqi LiuZixiong WangShituo MaHuazhong University of Science and TechnologyFinalist
43852312921Fanjun KongChenglong ZengDi WuKong FanjunHuazhong University of Science and TechnologyFinalist
46702313212Chen YihuaLv XinshengLin JunJieYang KaiHuazhong University of Science and TechnologyFinalist
49522313511Sitong ZhengYuanmeng ShanZiying ChenZhao PanHuazhong University of Science and TechnologyFinalist
196322307685Xiangqian YanKeyi ChenLing LuoHaoyuan XuHuazhong University of Science and TechnologyFinalist
196922307750Zuoming FuHatyue ChenHaoran ZhuZuoming FuHuazhong University of Science and TechnologyFinalist
207522308903Chenshen MaoZhixiong XiaShuning LuoHaoyuan XuHuazhong University of Science and TechnologyFinalist
208262308986Lihui ZhangXin HuChenyang XiongLi LiHuazhong University of Science and TechnologyFinalist
\n", 1111 | "
" 1112 | ], 1113 | "text/plain": [ 1114 | " control_number student1 student2 student3 \\\n", 1115 | "2859 2311286 Shituo Ma Anqi Liu Zixiong Wang \n", 1116 | "4385 2312921 Fanjun Kong Chenglong Zeng Di Wu \n", 1117 | "4670 2313212 Chen Yihua Lv Xinsheng Lin JunJie \n", 1118 | "4952 2313511 Sitong Zheng Yuanmeng Shan Ziying Chen \n", 1119 | "19632 2307685 Xiangqian Yan Keyi Chen Ling Luo \n", 1120 | "19692 2307750 Zuoming Fu Hatyue Chen Haoran Zhu \n", 1121 | "20752 2308903 Chenshen Mao Zhixiong Xia Shuning Luo \n", 1122 | "20826 2308986 Lihui Zhang Xin Hu Chenyang Xiong \n", 1123 | "\n", 1124 | " advisor university prize \n", 1125 | "2859 Shituo Ma Huazhong University of Science and Technology Finalist \n", 1126 | "4385 Kong Fanjun Huazhong University of Science and Technology Finalist \n", 1127 | "4670 Yang Kai Huazhong University of Science and Technology Finalist \n", 1128 | "4952 Zhao Pan Huazhong University of Science and Technology Finalist \n", 1129 | "19632 Haoyuan Xu Huazhong University of Science and Technology Finalist \n", 1130 | "19692 Zuoming Fu Huazhong University of Science and Technology Finalist \n", 1131 | "20752 Haoyuan Xu Huazhong University of Science and Technology Finalist \n", 1132 | "20826 Li Li Huazhong University of Science and Technology Finalist " 1133 | ] 1134 | }, 1135 | "execution_count": 12, 1136 | "metadata": {}, 1137 | "output_type": "execute_result" 1138 | } 1139 | ], 1140 | "source": [ 1141 | "prize_result=your_univ_result.loc[your_univ_result['prize']=='Finalist']\n", 1142 | "prize_result" 1143 | ] 1144 | }, 1145 | { 1146 | "cell_type": "code", 1147 | "execution_count": null, 1148 | "metadata": {}, 1149 | "outputs": [], 1150 | "source": [] 1151 | }, 1152 | { 1153 | "cell_type": "code", 1154 | "execution_count": null, 1155 | "metadata": {}, 1156 | "outputs": [], 1157 | "source": [] 1158 | } 1159 | ], 1160 | "metadata": { 1161 | "kernelspec": { 1162 | "display_name": "Python 3", 1163 | "language": "python", 1164 | "name": "python3" 1165 | }, 1166 | "language_info": { 1167 | "codemirror_mode": { 1168 | "name": "ipython", 1169 | "version": 3 1170 | }, 1171 | "file_extension": ".py", 1172 | "mimetype": "text/x-python", 1173 | "name": "python", 1174 | "nbconvert_exporter": "python", 1175 | "pygments_lexer": "ipython3", 1176 | "version": "3.7.6" 1177 | } 1178 | }, 1179 | "nbformat": 4, 1180 | "nbformat_minor": 4 1181 | } 1182 | -------------------------------------------------------------------------------- /analysis_2024.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import shutil\n", 10 | "import pandas as pd\n", 11 | "from config import your_university" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "scrolled": true 19 | }, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/html": [ 24 | "
\n", 25 | "\n", 38 | "\n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | "
control_numberstudent1student2student3universityprize
02400002Huang JiaruiLuo ZimengZhang YifaBeijing Normal UniversityHonorable Mention
12400004Chongqin ChenJunyi LiuRuishu HuangNingbo UniversitySuccessful Participant
22400006Xinkai WuGaoyuan FengNuoheng ZhouNingbo UniversityHonorable Mention
32400007Xiatian ZhangRunyi LinYuehan YangNingbo UniversityFinalist
42400008Kang-liang WangFei NiZi-shuo WangNingbo UniversitySuccessful Participant
\n", 98 | "
" 99 | ], 100 | "text/plain": [ 101 | " control_number student1 student2 student3 \\\n", 102 | "0 2400002 Huang Jiarui Luo Zimeng Zhang Yifa \n", 103 | "1 2400004 Chongqin Chen Junyi Liu Ruishu Huang \n", 104 | "2 2400006 Xinkai Wu Gaoyuan Feng Nuoheng Zhou \n", 105 | "3 2400007 Xiatian Zhang Runyi Lin Yuehan Yang \n", 106 | "4 2400008 Kang-liang Wang Fei Ni Zi-shuo Wang \n", 107 | "\n", 108 | " university prize \n", 109 | "0 Beijing Normal University Honorable Mention \n", 110 | "1 Ningbo University Successful Participant \n", 111 | "2 Ningbo University Honorable Mention \n", 112 | "3 Ningbo University Finalist \n", 113 | "4 Ningbo University Successful Participant " 114 | ] 115 | }, 116 | "execution_count": 2, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "shutil.copyfile('./all/all_2024.txt', './all/all_2024.csv')\n", 123 | "data = pd.read_csv('./all/all_2024.csv')\n", 124 | "data=data.iloc[:,0:6]\n", 125 | "data.head()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 3, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": [ 136 | "Jilin University 572\n", 137 | "Harbin Engineering University 560\n", 138 | "Huazhong University of Science and Technology 511\n", 139 | "Northwestern Polytechnical University 497\n", 140 | "Xi'an Jiaotong University 494\n", 141 | "Beihang University 451\n", 142 | "Nanjing University of Posts and Telecommunication 410\n", 143 | "Dalian University of Technology 387\n", 144 | "South China University of Technology 372\n", 145 | "XIDIAN UNIVERSITY 368\n", 146 | "Chongqing University 337\n", 147 | "Wuhan University 337\n", 148 | "Shanghai Jiao Tong University 330\n", 149 | "Harbin Institute of Technology 326\n", 150 | "Xidian University 285\n", 151 | "SUN YAT-SEN UNIVERSITY 273\n", 152 | "Northeastern University 270\n", 153 | "National University of Defense Technology 267\n", 154 | "Being Jiaotong University 266\n", 155 | "Beijing Institute of Technology 262\n", 156 | "Name: university, dtype: int64" 157 | ] 158 | }, 159 | "execution_count": 3, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "univ_data=data['university'].value_counts().iloc[0:20]\n", 166 | "univ_data" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 4, 172 | "metadata": { 173 | "scrolled": true 174 | }, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "image/png": "\n", 179 | "text/plain": [ 180 | "
" 181 | ] 182 | }, 183 | "metadata": { 184 | "needs_background": "light" 185 | }, 186 | "output_type": "display_data" 187 | } 188 | ], 189 | "source": [ 190 | "\n", 191 | "ax=univ_data.sort_values().plot.barh()\n", 192 | "fig=ax.get_figure()\n", 193 | "fig.savefig('统计结果.png',bbox_inches='tight')" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 5, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "Successful Participant 18103\n", 205 | "Honorable Mention 6643\n", 206 | "Meritorious Winner 1918\n", 207 | "Disqualified - P 1432\n", 208 | "Finalist 506\n", 209 | "Unsuccessful - I 127\n", 210 | "Not Judged 24\n", 211 | "Outstanding Winner 13\n", 212 | "AMS Award 6\n", 213 | "INFORMS Award 6\n", 214 | "SIAM Award 5\n", 215 | "MAA Award 3\n", 216 | "COMAP Scholarship Award 2\n", 217 | "Leonhard Euler Award 1\n", 218 | "Vilfredo Pareto Award 1\n", 219 | "Rachel Carson Award 1\n", 220 | "ASA Award 1\n", 221 | "Cc 1\n", 222 | "Ben Fusaro Award 1\n", 223 | "Frank Giordano Award 1\n", 224 | "Name: prize, dtype: int64" 225 | ] 226 | }, 227 | "execution_count": 5, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "data['prize'].value_counts()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 6, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/html": [ 244 | "
\n", 245 | "\n", 258 | "\n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | "
control_numberstudent1student2student3universityprize
28502407038Jiechuan GongFantong XiaJunhao ZengUESTCINFORMS Award
29012407093Haoran YiJunrong LiuZhe ZhaoBeijing Institute of TechnologyAMS Award
32082407414Jiajun LeiJunjie WangRuiyang LiuXi'an Jiaotong UniversityAMS Award
55322400860Xinyu HuJiarui LiangDongsheng SuZhejiang Sci-Tech UniversityRachel Carson Award
56562400996Zhen HuangHonghui CaoZhenning LiuWuhan UniversityINFORMS Award
60862409949YANHENG LIHONG JIANGLING GANRenmin University of ChinaCOMAP Scholarship Award
65992410482ZIXUAN TANGYITING YANGXINYU CHENBeijing Normal UniversityINFORMS Award
69472410846Heyu HuangJiahui ZhaiWeibin LiXiamen UniversityBen Fusaro Award
69892410889Wenlong ChenYihui LiWei ZhuShenzhen UniversitySIAM Award
76402411570Yutong WangHaoqi LvPengkun WangChina University of Geosciences BeingFrank Giordano Award
95402413552Qianjing ZhuWenhao YuanYunjie ShaoSoochow UniversityINFORMS Award
95532413565Xu YanZhou YangyujieHuo ZhengyjiXi'an University of Posts and TelecommunicationsINFORMS Award
107272401298Junjie FeiYuxin ShenKaijie DuFudan UniversityAMS Award
133202417004Jiahao ZhuChengzhen HanDongyi JiangEast China Normal UniversityINFORMS Award
141272417831Yue ZhongYueming CaoBo WuTsinghua UniversityLeonhard Euler Award
161332401919Shang Yue LiYuChun YangHuiXue SuRenmin University of ChinaSIAM Award
162932419588Haoyang PengWei LiSiyuan WangNational University of Defense TechnologyAMS Award
166812419984Zeyang WuXinhiang LiXiran NiTongji UniversityAMS Award
186542422054Yi WuJiankai LiChenyang PanShanghai Jiao Tong UniversityVilfredo Pareto Award
192312422656Aranjinsuren EnkhbatBisrat KassieJordan SimsBrandeis UniversityMAA Award
208862424371Xinyu SuiJi ZhengYuhang MaXi'an Jiaotong UniversitySIAM Award
227212425792Cunxin FanHaonan WangKe ZhangShanghai Jiao Tong UniversitySIAM Award
238192426941Wei SunTing MeiFangyuan LiuHunan Normal UniversityAMS Award
252732428463Yilin MaXuantong WangJingyi SunNorth China University of TechnologyASA Award
259292429166Christopher KanBrandon LuBenny SunDuke UniversityMAA Award
259672429211Arjun TanejaPaco NavarroMax CollinsHarvey Mudd CollegeCOMAP Scholarship Award
266752402960Hangyi YaoJiting ChenZeyu LiangNingbo UniversitySIAM Award
271502429973Henry LiYuxin LiCrystal SuColumbia UniversityMAA Award
\n", 525 | "
" 526 | ], 527 | "text/plain": [ 528 | " control_number student1 student2 student3 \\\n", 529 | "2850 2407038 Jiechuan Gong Fantong Xia Junhao Zeng \n", 530 | "2901 2407093 Haoran Yi Junrong Liu Zhe Zhao \n", 531 | "3208 2407414 Jiajun Lei Junjie Wang Ruiyang Liu \n", 532 | "5532 2400860 Xinyu Hu Jiarui Liang Dongsheng Su \n", 533 | "5656 2400996 Zhen Huang Honghui Cao Zhenning Liu \n", 534 | "6086 2409949 YANHENG LI HONG JIANG LING GAN \n", 535 | "6599 2410482 ZIXUAN TANG YITING YANG XINYU CHEN \n", 536 | "6947 2410846 Heyu Huang Jiahui Zhai Weibin Li \n", 537 | "6989 2410889 Wenlong Chen Yihui Li Wei Zhu \n", 538 | "7640 2411570 Yutong Wang Haoqi Lv Pengkun Wang \n", 539 | "9540 2413552 Qianjing Zhu Wenhao Yuan Yunjie Shao \n", 540 | "9553 2413565 Xu Yan Zhou Yangyujie Huo Zhengyji \n", 541 | "10727 2401298 Junjie Fei Yuxin Shen Kaijie Du \n", 542 | "13320 2417004 Jiahao Zhu Chengzhen Han Dongyi Jiang \n", 543 | "14127 2417831 Yue Zhong Yueming Cao Bo Wu \n", 544 | "16133 2401919 Shang Yue Li YuChun Yang HuiXue Su \n", 545 | "16293 2419588 Haoyang Peng Wei Li Siyuan Wang \n", 546 | "16681 2419984 Zeyang Wu Xinhiang Li Xiran Ni \n", 547 | "18654 2422054 Yi Wu Jiankai Li Chenyang Pan \n", 548 | "19231 2422656 Aranjinsuren Enkhbat Bisrat Kassie Jordan Sims \n", 549 | "20886 2424371 Xinyu Sui Ji Zheng Yuhang Ma \n", 550 | "22721 2425792 Cunxin Fan Haonan Wang Ke Zhang \n", 551 | "23819 2426941 Wei Sun Ting Mei Fangyuan Liu \n", 552 | "25273 2428463 Yilin Ma Xuantong Wang Jingyi Sun \n", 553 | "25929 2429166 Christopher Kan Brandon Lu Benny Sun \n", 554 | "25967 2429211 Arjun Taneja Paco Navarro Max Collins \n", 555 | "26675 2402960 Hangyi Yao Jiting Chen Zeyu Liang \n", 556 | "27150 2429973 Henry Li Yuxin Li Crystal Su \n", 557 | "\n", 558 | " university \\\n", 559 | "2850 UESTC \n", 560 | "2901 Beijing Institute of Technology \n", 561 | "3208 Xi'an Jiaotong University \n", 562 | "5532 Zhejiang Sci-Tech University \n", 563 | "5656 Wuhan University \n", 564 | "6086 Renmin University of China \n", 565 | "6599 Beijing Normal University \n", 566 | "6947 Xiamen University \n", 567 | "6989 Shenzhen University \n", 568 | "7640 China University of Geosciences Being \n", 569 | "9540 Soochow University \n", 570 | "9553 Xi'an University of Posts and Telecommunications \n", 571 | "10727 Fudan University \n", 572 | "13320 East China Normal University \n", 573 | "14127 Tsinghua University \n", 574 | "16133 Renmin University of China \n", 575 | "16293 National University of Defense Technology \n", 576 | "16681 Tongji University \n", 577 | "18654 Shanghai Jiao Tong University \n", 578 | "19231 Brandeis University \n", 579 | "20886 Xi'an Jiaotong University \n", 580 | "22721 Shanghai Jiao Tong University \n", 581 | "23819 Hunan Normal University \n", 582 | "25273 North China University of Technology \n", 583 | "25929 Duke University \n", 584 | "25967 Harvey Mudd College \n", 585 | "26675 Ningbo University \n", 586 | "27150 Columbia University \n", 587 | "\n", 588 | " prize \n", 589 | "2850 INFORMS Award \n", 590 | "2901 AMS Award \n", 591 | "3208 AMS Award \n", 592 | "5532 Rachel Carson Award \n", 593 | "5656 INFORMS Award \n", 594 | "6086 COMAP Scholarship Award \n", 595 | "6599 INFORMS Award \n", 596 | "6947 Ben Fusaro Award \n", 597 | "6989 SIAM Award \n", 598 | "7640 Frank Giordano Award \n", 599 | "9540 INFORMS Award \n", 600 | "9553 INFORMS Award \n", 601 | "10727 AMS Award \n", 602 | "13320 INFORMS Award \n", 603 | "14127 Leonhard Euler Award \n", 604 | "16133 SIAM Award \n", 605 | "16293 AMS Award \n", 606 | "16681 AMS Award \n", 607 | "18654 Vilfredo Pareto Award \n", 608 | "19231 MAA Award \n", 609 | "20886 SIAM Award \n", 610 | "22721 SIAM Award \n", 611 | "23819 AMS Award \n", 612 | "25273 ASA Award \n", 613 | "25929 MAA Award \n", 614 | "25967 COMAP Scholarship Award \n", 615 | "26675 SIAM Award \n", 616 | "27150 MAA Award " 617 | ] 618 | }, 619 | "execution_count": 6, 620 | "metadata": {}, 621 | "output_type": "execute_result" 622 | } 623 | ], 624 | "source": [ 625 | "avard_result=data.loc[(data['prize'].str.contains('Award'))]\n", 626 | "avard_result" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": 7, 632 | "metadata": { 633 | "scrolled": false 634 | }, 635 | "outputs": [ 636 | { 637 | "data": { 638 | "text/html": [ 639 | "
\n", 640 | "\n", 653 | "\n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | "
control_numberstudent1student2student3universityprize
1022400114Zishuo WangYucheng ChenYanjinghao XuHuazhong University of Science and TechnologySuccessful Participant
1242400139Boheng LinZhipu HuZhenghao GaoHuazhong University of Science and TechnologyHonorable Mention
1322400147He YinHaiZhuo WangHaoze LiHuazhong University of Science and TechnologySuccessful Participant
1502400167Jingchao LuRuiXin DongYaxiang GaoHuazhong University of Science and TechnologySuccessful Participant
1652400185Jinlong MaZhimin LuoLeying FuHuazhong University of Science and TechnologySuccessful Participant
.....................
280812403746Jiaye PengYuanzheng LiWeiping ShenHuazhong University of Science and TechnologySuccessful Participant
280832403748Zichuan WangXAuanze YoungYuhang DingHuazhong University of Science and TechnologyHonorable Mention
280962403761Xiangyi LiLin ChenLitong ShiHuazhong University of Science and TechnologyDisqualified - P
282912403964Huadong SongHanxiang LvLiang ZhangHuazhong University of Science and TechnologySuccessful Participant
282972403971Leon Huangjared zhouJoeHuazhong University of Science and TechnologyHonorable Mention
\n", 767 | "

511 rows × 6 columns

\n", 768 | "
" 769 | ], 770 | "text/plain": [ 771 | " control_number student1 student2 student3 \\\n", 772 | "102 2400114 Zishuo Wang Yucheng Chen Yanjinghao Xu \n", 773 | "124 2400139 Boheng Lin Zhipu Hu Zhenghao Gao \n", 774 | "132 2400147 He Yin HaiZhuo Wang Haoze Li \n", 775 | "150 2400167 Jingchao Lu RuiXin Dong Yaxiang Gao \n", 776 | "165 2400185 Jinlong Ma Zhimin Luo Leying Fu \n", 777 | "... ... ... ... ... \n", 778 | "28081 2403746 Jiaye Peng Yuanzheng Li Weiping Shen \n", 779 | "28083 2403748 Zichuan Wang XAuanze Young Yuhang Ding \n", 780 | "28096 2403761 Xiangyi Li Lin Chen Litong Shi \n", 781 | "28291 2403964 Huadong Song Hanxiang Lv Liang Zhang \n", 782 | "28297 2403971 Leon Huang jared zhou Joe \n", 783 | "\n", 784 | " university prize \n", 785 | "102 Huazhong University of Science and Technology Successful Participant \n", 786 | "124 Huazhong University of Science and Technology Honorable Mention \n", 787 | "132 Huazhong University of Science and Technology Successful Participant \n", 788 | "150 Huazhong University of Science and Technology Successful Participant \n", 789 | "165 Huazhong University of Science and Technology Successful Participant \n", 790 | "... ... ... \n", 791 | "28081 Huazhong University of Science and Technology Successful Participant \n", 792 | "28083 Huazhong University of Science and Technology Honorable Mention \n", 793 | "28096 Huazhong University of Science and Technology Disqualified - P \n", 794 | "28291 Huazhong University of Science and Technology Successful Participant \n", 795 | "28297 Huazhong University of Science and Technology Honorable Mention \n", 796 | "\n", 797 | "[511 rows x 6 columns]" 798 | ] 799 | }, 800 | "execution_count": 7, 801 | "metadata": {}, 802 | "output_type": "execute_result" 803 | } 804 | ], 805 | "source": [ 806 | "your_univ_result=data.loc[data['university']==your_university]\n", 807 | "your_univ_result" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": 8, 813 | "metadata": { 814 | "scrolled": true 815 | }, 816 | "outputs": [ 817 | { 818 | "data": { 819 | "text/plain": [ 820 | "Successful Participant 311\n", 821 | "Honorable Mention 130\n", 822 | "Meritorious Winner 39\n", 823 | "Disqualified - P 19\n", 824 | "Finalist 11\n", 825 | "Unsuccessful - I 1\n", 826 | "Name: prize, dtype: int64" 827 | ] 828 | }, 829 | "execution_count": 8, 830 | "metadata": {}, 831 | "output_type": "execute_result" 832 | } 833 | ], 834 | "source": [ 835 | "your_univ_result['prize'].value_counts()" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": 9, 841 | "metadata": { 842 | "scrolled": false 843 | }, 844 | "outputs": [ 845 | { 846 | "data": { 847 | "text/html": [ 848 | "
\n", 849 | "\n", 862 | "\n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | "
control_numberstudent1student2student3universityprize
\n", 877 | "
" 878 | ], 879 | "text/plain": [ 880 | "Empty DataFrame\n", 881 | "Columns: [control_number, student1, student2, student3, university, prize]\n", 882 | "Index: []" 883 | ] 884 | }, 885 | "execution_count": 9, 886 | "metadata": {}, 887 | "output_type": "execute_result" 888 | } 889 | ], 890 | "source": [ 891 | "prize_result=your_univ_result.loc[(your_univ_result['prize']=='Outstanding Winner') | (your_univ_result['prize'].str.contains('Award'))]\n", 892 | "prize_result" 893 | ] 894 | }, 895 | { 896 | "cell_type": "code", 897 | "execution_count": null, 898 | "metadata": {}, 899 | "outputs": [], 900 | "source": [] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "execution_count": 10, 905 | "metadata": {}, 906 | "outputs": [ 907 | { 908 | "data": { 909 | "text/html": [ 910 | "
\n", 911 | "\n", 924 | "\n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | "
control_numberstudent1student2student3universityprize
64652410343Qianyu ZhaoYanheng jiangShoukun GaoHuazhong University of Science and TechnologyFinalist
67212410612Xinlong ChenWanting WangYining DengHuazhong University of Science and TechnologyFinalist
68492410744Sheng DongXiang FanLewen YanHuazhong University of Science and TechnologyFinalist
71382411045Qu JiaJunLi JunhaoYu HaoHuazhong University of Science and TechnologyFinalist
72272411137Bin WangJiajun ZhangTiantong SunHuazhong University of Science and TechnologyFinalist
74252411343Tingjia HuYinda LiZehua CaoHuazhong University of Science and TechnologyFinalist
79222411866Hatying LaoYongye LaiMuze ZhouHuazhong University of Science and TechnologyFinalist
93052413307Feryang HuangShuchen PuChaoyang ZhengHuazhong University of Science and TechnologyFinalist
112332414823Bowen ZhengYiging YuanHuanyu LiuHuazhong University of Science and TechnologyFinalist
118392415449Wang LingyuLi ShengzhenHuang XinyueHuazhong University of Science and TechnologyFinalist
268002429594Jieang MaJiaxing FengQinglin JIHuazhong University of Science and TechnologyFinalist
\n", 1038 | "
" 1039 | ], 1040 | "text/plain": [ 1041 | " control_number student1 student2 student3 \\\n", 1042 | "6465 2410343 Qianyu Zhao Yanheng jiang Shoukun Gao \n", 1043 | "6721 2410612 Xinlong Chen Wanting Wang Yining Deng \n", 1044 | "6849 2410744 Sheng Dong Xiang Fan Lewen Yan \n", 1045 | "7138 2411045 Qu JiaJun Li Junhao Yu Hao \n", 1046 | "7227 2411137 Bin Wang Jiajun Zhang Tiantong Sun \n", 1047 | "7425 2411343 Tingjia Hu Yinda Li Zehua Cao \n", 1048 | "7922 2411866 Hatying Lao Yongye Lai Muze Zhou \n", 1049 | "9305 2413307 Feryang Huang Shuchen Pu Chaoyang Zheng \n", 1050 | "11233 2414823 Bowen Zheng Yiging Yuan Huanyu Liu \n", 1051 | "11839 2415449 Wang Lingyu Li Shengzhen Huang Xinyue \n", 1052 | "26800 2429594 Jieang Ma Jiaxing Feng Qinglin JI \n", 1053 | "\n", 1054 | " university prize \n", 1055 | "6465 Huazhong University of Science and Technology Finalist \n", 1056 | "6721 Huazhong University of Science and Technology Finalist \n", 1057 | "6849 Huazhong University of Science and Technology Finalist \n", 1058 | "7138 Huazhong University of Science and Technology Finalist \n", 1059 | "7227 Huazhong University of Science and Technology Finalist \n", 1060 | "7425 Huazhong University of Science and Technology Finalist \n", 1061 | "7922 Huazhong University of Science and Technology Finalist \n", 1062 | "9305 Huazhong University of Science and Technology Finalist \n", 1063 | "11233 Huazhong University of Science and Technology Finalist \n", 1064 | "11839 Huazhong University of Science and Technology Finalist \n", 1065 | "26800 Huazhong University of Science and Technology Finalist " 1066 | ] 1067 | }, 1068 | "execution_count": 10, 1069 | "metadata": {}, 1070 | "output_type": "execute_result" 1071 | } 1072 | ], 1073 | "source": [ 1074 | "prize_result=your_univ_result.loc[your_univ_result['prize']=='Finalist']\n", 1075 | "prize_result" 1076 | ] 1077 | }, 1078 | { 1079 | "cell_type": "code", 1080 | "execution_count": null, 1081 | "metadata": {}, 1082 | "outputs": [], 1083 | "source": [] 1084 | }, 1085 | { 1086 | "cell_type": "code", 1087 | "execution_count": null, 1088 | "metadata": {}, 1089 | "outputs": [], 1090 | "source": [] 1091 | } 1092 | ], 1093 | "metadata": { 1094 | "kernelspec": { 1095 | "display_name": "Python 3", 1096 | "language": "python", 1097 | "name": "python3" 1098 | }, 1099 | "language_info": { 1100 | "codemirror_mode": { 1101 | "name": "ipython", 1102 | "version": 3 1103 | }, 1104 | "file_extension": ".py", 1105 | "mimetype": "text/x-python", 1106 | "name": "python", 1107 | "nbconvert_exporter": "python", 1108 | "pygments_lexer": "ipython3", 1109 | "version": "3.7.6" 1110 | } 1111 | }, 1112 | "nbformat": 4, 1113 | "nbformat_minor": 4 1114 | } 1115 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | year = 23 2 | total_num = 30000 3 | # 进程数根据CPU和内存能力设置 4 | # step=30000/进程数 5 | #开30进程 6 | download_step = 1000 7 | #开30进程 8 | pdf2text_step = 1000 9 | #你的学校 10 | your_university= 'Huazhong University of Science and Technology' 11 | #TesseractOCR路径 12 | TesseractOCR_path = 'E:/prog/TesseractOCR/tesseract.exe' 13 | -------------------------------------------------------------------------------- /download.py: -------------------------------------------------------------------------------- 1 | """ 2 | 爬取美赛获奖证书,并以控制号命名 3 | 只运行一次部分下载会失败,需要运行多次,确保全部下载 4 | """ 5 | import os 6 | import requests 7 | from multiprocessing import Process 8 | from random import shuffle 9 | from config import * 10 | 11 | 12 | class CMcmCertificateCrawler(): 13 | def __init__(self, contol_nmuber): 14 | self.headers = { 15 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 16 | 'Chrome/73.0.3683.103 Safari/537.36' 17 | } 18 | self.control_number = contol_nmuber 19 | 20 | def FGetResponse(self): 21 | url = "http://www.comap-math.com/mcm/20" + str(year) + "Certs/" + str(self.control_number) + ".pdf" 22 | response = requests.get(url=url, headers=self.headers) 23 | # print(response.status_code) 24 | return response 25 | 26 | def FSavePDF(self, control_number): 27 | # 下载证书PDF 28 | try: 29 | path = './paper_20' + str(year) + '/' + str(control_number) + '.pdf' 30 | response = self.FGetResponse() 31 | # print(response.status_code) 32 | if response.status_code != 404: 33 | with open(path, 'wb') as f: 34 | f.write(response.content) 35 | print(str(control_number) + ".pdf" + "存储成功") 36 | else: 37 | print(control_number, ' -- 404') 38 | except Exception: 39 | print("Exception") 40 | 41 | 42 | def download(start, end): 43 | for control_number in range(start, end): 44 | control_number = '%05d' % control_number 45 | control_number = year * 100000 + int(control_number) 46 | mcc = CMcmCertificateCrawler(control_number) 47 | mcc.FSavePDF(control_number) 48 | 49 | 50 | def downloadlist(control_number_list): 51 | for control_number in control_number_list: 52 | mcc = CMcmCertificateCrawler(control_number) 53 | mcc.FSavePDF(control_number) 54 | 55 | 56 | if __name__ == '__main__': 57 | all_control_list = [] 58 | for control_number in range(1, total_num): 59 | control_number = '%05d' % control_number 60 | control_number = year * 100000 + int(control_number) 61 | all_control_list.append(control_number) 62 | 63 | dir = './paper_20' + str(year) + '/' 64 | if not os.path.exists(dir): 65 | os.mkdir(dir) 66 | download_filelist = os.listdir(dir) 67 | for filename in download_filelist: 68 | filenum = int(filename[0:7]) 69 | print(filenum) 70 | filesize = os.path.getsize(dir + filename) 71 | if filesize: 72 | all_control_list.remove(filenum) 73 | 74 | step = download_step 75 | shuffle(all_control_list) 76 | for i in range(0, len(all_control_list), step): 77 | start = i 78 | end = i + step - 1 79 | control_numbers = all_control_list[start:end] 80 | p = Process(target=downloadlist, args=(control_numbers,)) 81 | p.start() 82 | -------------------------------------------------------------------------------- /exception.txt: -------------------------------------------------------------------------------- 1 | 2308010 -------------------------------------------------------------------------------- /gitpush.bat: -------------------------------------------------------------------------------- 1 | git add . 2 | git commit -m "update" 3 | git push -u origin master -------------------------------------------------------------------------------- /pdf2text.py: -------------------------------------------------------------------------------- 1 | """ 2 | 美赛获奖证书信息OCR 3 | """ 4 | 5 | import fitz 6 | import PIL 7 | import pytesseract 8 | import os 9 | from multiprocessing import Process 10 | import re 11 | from config import * 12 | 13 | pytesseract.pytesseract.tesseract_cmd = TesseractOCR_path 14 | 15 | 16 | def pdf2text(pdfPath, control_number, zoom_x=6, zoom_y=6, rotation_angle=0): 17 | students = [''] 18 | university = '' 19 | prize = '' 20 | advisor = '' 21 | try: 22 | # 打开PDF文件 23 | pdf = fitz.open(pdfPath) 24 | # 逐页读取PDF 25 | for pg in range(0, pdf.pageCount): 26 | page = pdf[pg] 27 | rect = page.rect 28 | clip = fitz.Rect(rect.width * 0.25, rect.height * 0.27, 29 | rect.width * 0.8, rect.height * 0.7) 30 | trans = fitz.Matrix(zoom_x, zoom_y).prerotate(rotation_angle) 31 | pix = page.get_pixmap(matrix=trans, alpha=False, clip=clip) 32 | img = PIL.Image.frombytes("RGB", [pix.width, pix.height], pix.samples) 33 | # pix.save("test.png") 34 | text = pytesseract.image_to_string(img) 35 | text = text.split('\n') 36 | text = [s for s in text if s] 37 | # print(text) 38 | try: 39 | students_index = text.index('With Student Advisor') 40 | advisor_index = text.index('With Student Advisor') + 1 41 | except: 42 | try: 43 | students_index = text.index('With Faculty Advisor') 44 | advisor_index = text.index('With Faculty Advisor') + 1 45 | except: 46 | try: 47 | students_index = text.index('Of') - 1 48 | advisor_index = text.index('Of') - 1 49 | except: 50 | students_index = text.index('Was Designated As') - 3 51 | advisor_index = text.index('Was Designated As') - 2 52 | # print(students_index) 53 | # print(advisor_index) 54 | try: 55 | univ_index = text.index('Was Designated As') - 1 56 | students = text[0:students_index] 57 | advisor = text[advisor_index] 58 | university = text[univ_index] 59 | except: 60 | students = text[0:3] 61 | advisor = text[4] 62 | university = text[5] 63 | prize = text[-1] 64 | 65 | pdf.close() 66 | except: 67 | print(control_number, 'Exception') 68 | with open('exception.txt', 'w+') as exception_file: 69 | exception_file.write(str(control_number)) 70 | 71 | university = university.replace(',', ' ').replace('1', 'i') 72 | prize = prize.replace(',', ' ') 73 | advisor = advisor.replace(',', ' ').replace('1', 'i') 74 | stus = [] 75 | for student in students: 76 | student = student.replace(',', ' ').replace('1', 'i') 77 | stus.append(student) 78 | return stus, advisor, university, prize 79 | 80 | 81 | def savetext(start, end, count): 82 | global logger 83 | all_data = '' 84 | your_university_data = '' 85 | for control_number in range(start, end): 86 | control_number = '%05d' % control_number 87 | control_number = year * 100000 + int(control_number) 88 | path = "./paper_20" + str(year) + "/" + str(control_number) + ".pdf" 89 | # print(path) 90 | if os.path.exists(path) and os.path.getsize(path) > 0: 91 | students, advisor, university, prize = pdf2text(path, control_number) 92 | if prize: 93 | if len(students) == 0: 94 | students = ', , ' 95 | elif len(students) == 1: 96 | students = ','.join(students) + ', , ' 97 | elif len(students) == 2: 98 | students = ','.join(students) + ', ' 99 | elif len(students) == 3: 100 | students = ','.join(students) 101 | elif len(students) > 3: 102 | students = students[0:3] 103 | students = ','.join(students) 104 | 105 | row = '%s,%s,%s,%s,%s,\n' % (control_number, students, advisor, university, prize) 106 | row = row.encode('gbk', 'backslashreplace').decode('gbk', 'backslashreplace') 107 | try: 108 | print(row) 109 | except: 110 | print(control_number, ' -- gbk encoding error') 111 | 112 | all_data += row 113 | if university == your_university: 114 | your_university_data += row 115 | 116 | with open('./all/tmp' + str(count) + '.txt', 'w', encoding='utf-8') as all_file: 117 | # all_data = all_data.encode('utf-8') 118 | all_file.write(all_data) 119 | print('./all/tmp' + str(count) + '.txt save sucessfully') 120 | with open('./your_university/tmp' + str(count) + '.txt', 'w', encoding='utf-8') as your_university_file: 121 | your_university_file.write(your_university_data) 122 | print('./your_university/tmp' + str(count) + '.txt save sucessfully') 123 | 124 | 125 | if __name__ == '__main__': 126 | if not os.path.exists('./all/'): 127 | os.mkdir('./all/') 128 | if not os.path.exists('./your_university/'): 129 | os.mkdir('./your_university/') 130 | 131 | step = pdf2text_step 132 | count = 1 133 | for i in range(1, total_num, step): 134 | start = i 135 | end = i + step - 1 136 | p = Process(target=savetext, args=(start, end, count)) 137 | p.start() 138 | count += 1 139 | 140 | # students, advisor, university, prize = pdf2text('./paper_2023/2300009.pdf', 2300009) 141 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.22.0 2 | pytesseract==0.3.9 3 | fitz==0.0.1.dev2 4 | PyMuPDF==1.19.6 5 | -------------------------------------------------------------------------------- /txt_joint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from config import year 4 | 5 | def txtjoint(dir): 6 | files = os.listdir(dir) 7 | res = 'control_number,student1,student2,student3,advisor,university,prize,,,\n' 8 | 9 | for file in files: 10 | if re.search('^tmp(\d+).txt',file): 11 | with open(dir + file, "r", encoding='utf-8') as f: 12 | content = f.read() 13 | # content = content.lower() 14 | res += content 15 | 16 | with open(dir + "all_20"+str(year)+".txt", "w", encoding='utf-8') as outFile: 17 | outFile.write(res) 18 | print('txtjoint sucessfully') 19 | 20 | # 合并文件 21 | all_dir = "./all/" 22 | your_university_dir = './your_university/' 23 | txtjoint(all_dir) 24 | txtjoint(your_university_dir) -------------------------------------------------------------------------------- /统计结果.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/personqianduixue/comap_crawler_2023/6ec3537cd279d39d80f4be73b5589477a29defc2/统计结果.png --------------------------------------------------------------------------------