├── .gitattributes
├── README.md
├── .gitignore
├── Tc_AdjustV1.0.py
└── Tc_AdjustV2.0.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Adjust_code
2 | 基于python3.7.2
3 |
4 | 4.10 更新
5 | 两个网页源代码发生变化,爬不了了,现在有点懒不想维护..
6 |
7 | 2月15日成绩出来后如果真的有调剂这方面的需求可以尝试一下此软件。
8 |
9 | 研招网没爬是因为...
10 | 对调剂同学太不友好。
11 |
12 | 求大佬指正不足之处!
13 | 2019.01.31
14 | V2.0
15 | 优化界面,使用内置tkinter进行设计
16 |
17 | /***********************************************************
18 |
19 | 2019.01.29
20 | V1.0
21 | 使用正则表达式和xpath进行匹配信息,在黑框中能够定时查询。
22 | 有对爬虫感兴趣的同学可以一起学习~
23 |
24 | 小木虫 http://muchong.com/bbs/kaoyan.php?action=adjust&type=1&page=1
25 |
26 | 中国考研 http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/1.shtml
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .nox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *.cover
48 | .hypothesis/
49 | .pytest_cache/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | db.sqlite3
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # Jupyter Notebook
74 | .ipynb_checkpoints
75 |
76 | # IPython
77 | profile_default/
78 | ipython_config.py
79 |
80 | # pyenv
81 | .python-version
82 |
83 | # celery beat schedule file
84 | celerybeat-schedule
85 |
86 | # SageMath parsed files
87 | *.sage.py
88 |
89 | # Environments
90 | .env
91 | .venv
92 | env/
93 | venv/
94 | ENV/
95 | env.bak/
96 | venv.bak/
97 |
98 | # Spyder project settings
99 | .spyderproject
100 | .spyproject
101 |
102 | # Rope project settings
103 | .ropeproject
104 |
105 | # mkdocs documentation
106 | /site
107 |
108 | # mypy
109 | .mypy_cache/
110 | .dmypy.json
111 | dmypy.json
112 |
113 | # Pyre type checker
114 | .pyre/
115 |
--------------------------------------------------------------------------------
/Tc_AdjustV1.0.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import re
3 | import time
4 | from lxml import etree
5 | search_profession = input("你要查找的专业或学校是:")
6 | print("\n","Powered by Dong dong Xu")
7 | print("\n"," 更多代码均放在 -->Github : https://github.com/Tcloser")
8 | print("\n")
9 | print("写在前面:本程序用于实时爬取最新调剂信息,关键词可以根据所爬内容适当修改,1.0版本主要进行测试信息及界面优化。")
10 | print("\n")
11 | print(" 调剂信息最新发表内容如下所示:")
12 | # 中国考研网 http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/1.shtml
13 | chinakaoyan_index = 1
14 | web_site_chinakaoyan = 'http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/'+str(chinakaoyan_index)+'.shtml' #网址第一页 可用循环多次
15 | school_sign_chinakaoyan = '(.*?)' #标题特征
16 | profession_sign_chinakaoyan = '(.*?)' #专业特征
17 | title_sign_chinakaoyan = 'target="_blank">(.*?)' #发表标题
18 | url_sign_chinakaoyan = 'Github : https://github.com/Tcloser")
7 | # print("\n")
8 | # print("写在前面:本程序用于实时爬取最新调剂信息,关键词可以根据所爬内容适当修改,1.0版本主要进行测试信息及界面优化。")
9 | # print("\n")
10 | # print(" 调剂信息最新发表内容如下所示:")
11 | # 中国考研网 http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/1.shtml
12 | chinakaoyan_index = 1
13 | web_site_chinakaoyan = 'http://www.chinakaoyan.com/tiaoji/schoollist/pagenum/'+str(chinakaoyan_index)+'.shtml' #网址第一页 可用循环多次
14 | school_sign_chinakaoyan = '(.*?)' #标题特征
15 | profession_sign_chinakaoyan = '(.*?)' #专业特征
16 | title_sign_chinakaoyan = 'target="_blank">(.*?)' #发表标题
17 | url_sign_chinakaoyan = '