├── .gitattributes ├── README.md ├── .gitignore ├── Tc_AdjustV1.0.py └── Tc_AdjustV2.0.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Adjust_code 2 | 基于python3.7.2 3 | 4 | 4.10 更新 5 | 两个网页源代码发生变化,爬不了了,现在有点懒不想维护.. 6 | 7 | 2月15日成绩出来后如果真的有调剂这方面的需求可以尝试一下此软件。 8 | 9 | 研招网没爬是因为... 10 | 对调剂同学太不友好。 11 | 12 | 求大佬指正不足之处! 13 | 2019.01.31 14 | V2.0 15 | 优化界面,使用内置tkinter进行设计 16 | 17 | /*********************************************************** 18 | 19 | 2019.01.29 20 | V1.0 21 | 使用正则表达式和xpath进行匹配信息,在黑框中能够定时查询。 22 | 有对爬虫感兴趣的同学可以一起学习~ 23 | 24 | 小木虫 http://muchong.com/bbs/kaoyan.php?action=adjust&type=1&page=1 25 | 26 | 中国考研 http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/1.shtml 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # IPython 77 | profile_default/ 78 | ipython_config.py 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | .dmypy.json 111 | dmypy.json 112 | 113 | # Pyre type checker 114 | .pyre/ 115 | -------------------------------------------------------------------------------- /Tc_AdjustV1.0.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import re 3 | import time 4 | from lxml import etree 5 | search_profession = input("你要查找的专业或学校是:") 6 | print("\n","Powered by Dong dong Xu") 7 | print("\n"," 更多代码均放在 -->Github : https://github.com/Tcloser") 8 | print("\n") 9 | print("写在前面:本程序用于实时爬取最新调剂信息,关键词可以根据所爬内容适当修改,1.0版本主要进行测试信息及界面优化。") 10 | print("\n") 11 | print(" 调剂信息最新发表内容如下所示:") 12 | # 中国考研网 http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/1.shtml 13 | chinakaoyan_index = 1 14 | web_site_chinakaoyan = 'http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/'+str(chinakaoyan_index)+'.shtml' #网址第一页 可用循环多次 15 | school_sign_chinakaoyan = '(.*?)' #标题特征 16 | profession_sign_chinakaoyan = '(.*?)' #专业特征 17 | title_sign_chinakaoyan = 'target="_blank">(.*?)' #发表标题 18 | url_sign_chinakaoyan = 'Github : https://github.com/Tcloser") 7 | # print("\n") 8 | # print("写在前面:本程序用于实时爬取最新调剂信息,关键词可以根据所爬内容适当修改,1.0版本主要进行测试信息及界面优化。") 9 | # print("\n") 10 | # print(" 调剂信息最新发表内容如下所示:") 11 | # 中国考研网 http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/1.shtml 12 | chinakaoyan_index = 1 13 | web_site_chinakaoyan = 'http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/'+str(chinakaoyan_index)+'.shtml' #网址第一页 可用循环多次 14 | school_sign_chinakaoyan = '(.*?)' #标题特征 15 | profession_sign_chinakaoyan = '(.*?)' #专业特征 16 | title_sign_chinakaoyan = 'target="_blank">(.*?)' #发表标题 17 | url_sign_chinakaoyan = '