'
65 | re_pdf = re.compile(r'data-class="media"[\s\S]*?data-title="([\s\S]*?)"[\s\S]*?data-url="(.*?)"')
66 | for _id, name in zip(chapter_ids, chapter_names):
67 | self.infos.append({'id': _id, 'name': winre.sub('',name)[:WIN_LENGTH], 'units':[], 'pdfs':[]})
68 | for index, ptext in chapter_ptext:
69 | inx = int(index)-1
70 | pdfs = re_pdf.findall(ptext)
71 | pdf_list = [{'name':winre.sub('', pdf[0])[:WIN_LENGTH], 'url':pdf[1]} for pdf in pdfs]
72 | self.infos[inx]['pdfs'] = pdf_list
73 | unit_list = re.findall(r'
(\d+).\s*?(\d+)(.*?)', text1)
74 | for unit_id,unit_inx1, unit_inx2,unit_name in unit_list:
75 | inx1 = int(unit_inx1)-1
76 | inx2 = int(unit_inx2)-1
77 | self.infos[inx1]['units'].append({'id': unit_id, 'name': winre.sub('',unit_name)[:WIN_LENGTH], 'pdfs':[]})
78 | m_str = match_str.format(unit_inx1, unit_inx2)
79 | match_ptext = re.search(m_str, text2)
80 | if match_ptext:
81 | ptext = match_ptext.group(1)
82 | pdfs = re_pdf.findall(ptext)
83 | pdf_list = [{'name':winre.sub('', pdf[0])[:WIN_LENGTH], 'url':pdf[1]} for pdf in pdfs]
84 | self.infos[inx1]['units'][inx2]['pdfs'] = pdf_list
85 |
86 | def _get_course_links(self, sid):
87 | mp4_list = []
88 | pdf_list = []
89 | data = {'sectionId': sid}
90 | text = request_post(self.url_course, data)
91 | #!!! except json.decoder.JSONDecodeError
92 | infos = json.loads(text)
93 | if infos['model']['listRes'] :
94 | reslist = infos['model']['listRes']
95 | for res in reslist:
96 | if res['mediaType'] == 'mp4':
97 | if 'fullResUrl' in res:
98 | mp4_list.append((res['fullResUrl'], res['title']))
99 | elif res['mediaType'] in ('ppt', 'pdf'):
100 | if 'fullResUrl' in res:
101 | pdf_list.append((res['fullResUrl'], res['title']))
102 | return mp4_list, pdf_list
103 |
104 | def _get_paper_links(self):
105 | url = self.url_paper + self.cid
106 | paper_list = []
107 | text = request_get(url)
108 | match_text = re.findall(r'
', text)
109 | re_url = re.compile(r'data-url="(.*?)"')
110 | re_title = re.compile(r'data-title="(.*?)"')
111 | for m_text in match_text:
112 | link_list = re_url.findall(m_text[0])
113 | title_list = re_title.findall(m_text[0])
114 | paper_list += list(zip(link_list, title_list))
115 | return paper_list
116 |
117 | def _get_source_links(self):
118 | url = self.url_source + self.cid
119 | source_list = []
120 | text = request_get(url)
121 | match_text = re.findall(r'', text)
122 | re_url = re.compile(r'data-url="(.*?)"')
123 | re_title = re.compile(r'data-title="(.*?)"')
124 | for m_text in match_text:
125 | link_list = re_url.findall(m_text[0])
126 | title_list = re_title.findall(m_text[0])
127 | source_list += list(zip(link_list, title_list))
128 | return source_list
129 |
130 | def _download(self):
131 | print('\n{:^{}s}'.format(self.title, LEN_S))
132 | self.rootDir = rootDir = os.path.join(PATH, self.title)
133 | if not os.path.exists(rootDir):
134 | os.mkdir(rootDir)
135 | Icourse_Base.potplayer.init(rootDir)
136 | if (self.mode & IS_MP4) or (self.mode & IS_PDF):
137 | courseDir = os.path.join(rootDir, COURSENAME)
138 | if not os.path.exists(courseDir):
139 | os.mkdir(courseDir)
140 | print('-'*LEN_+'下载课程'+'-'*LEN_)
141 | Icourse_Base.potplayer.enable()
142 | for cnt1, info in enumerate(self.infos, 1):
143 | chapter = '{'+str(cnt1)+'}--'+info['name']
144 | print(chapter)
145 | chapterDir = os.path.join(courseDir, chapter)
146 | if not os.path.exists(chapterDir):
147 | os.mkdir(chapterDir)
148 | mp4_list, pdf_list = self._get_course_links(info['id'])
149 | pdf_list += [(pdf['url'], pdf['name']) for pdf in info['pdfs']]
150 | if self.mode & IS_PDF:
151 | self.download_pdf_list(chapterDir, pdf_list, '{}.'.format(cnt1))
152 | if self.mode & IS_MP4:
153 | self.download_video_list(chapterDir, mp4_list, '{}.'.format(cnt1))
154 | for cnt2, unit in enumerate(info['units'],1):
155 | lesson = '{'+str(cnt2)+'}--'+unit['name']
156 | print(" "+lesson)
157 | lessonDir = os.path.join(chapterDir, lesson)
158 | if not os.path.exists(lessonDir):
159 | os.mkdir(lessonDir)
160 | mp4_list, pdf_list = self._get_course_links(unit['id'])
161 | pdf_list += [(pdf['url'], pdf['name']) for pdf in unit['pdfs']]
162 | if self.mode & IS_PDF:
163 | self.download_pdf_list(lessonDir, pdf_list, '{}.{}.'.format(cnt1,cnt2))
164 | if self.mode & IS_MP4:
165 | self.download_video_list(lessonDir, mp4_list, '{}.{}.'.format(cnt1,cnt2))
166 | if self.mode & IS_PAPER:
167 | paperDir = os.path.join(rootDir, PAPERNAME)
168 | if not os.path.exists(paperDir):
169 | os.mkdir(paperDir)
170 | print("-"*LEN_+"下载试卷"+"-"*LEN_)
171 | paper_list = self._get_paper_links()
172 | self.download_pdf_list(paperDir, paper_list)
173 | if self.mode & IS_SOURCE:
174 | sourceDir = os.path.join(rootDir, SOURCENAME)
175 | if not os.path.exists(sourceDir):
176 | os.mkdir(sourceDir)
177 | print("-"*LEN_+"下载资源"+"-"*LEN_)
178 | Icourse_Base.potplayer.disable()
179 | source_list = self._get_source_links()
180 | pdf_list = list(filter(lambda x:x[0].endswith('.pdf'), source_list))
181 | mp4_list = list(filter(lambda x:x[0].endswith('.mp4'), source_list))
182 | self.download_pdf_list(sourceDir, pdf_list)
183 | self.download_video_list(sourceDir, mp4_list)
184 |
185 | def set_mode(self):
186 | while True:
187 | try:
188 | instr = input(
189 | " 视频:[1] + 课件:[2] + 试卷:[4] + 资源:[8]\n"
190 | "请输入一个0-15的数选择性下载内容(如15表示全部下载,15=1+2+4+8) [0退出]: "
191 | )
192 | if not instr:
193 | continue
194 | try:
195 | innum = int(instr)
196 | if innum == 0:
197 | return False
198 | elif 1 <= innum <= 15:
199 | self.mode = innum
200 | return True
201 | else:
202 | print("请输入一个0-15之间的整数!")
203 | continue
204 | except ValueError:
205 | print("请输入一个0-15之间的整数!")
206 | except KeyboardInterrupt:
207 | print()
208 |
209 |
210 | def main():
211 | # url = 'http://www.icourses.cn/sCourse/course_4860.html'
212 | url = 'http://www.icourses.cn/web/sword/portal/shareDetails?cId=4860#/course/chapter'
213 | # url = 'https://www.icourses.cn/sCourse/course_6661.html'
214 | # url = 'http://www.icourses.cn/sCourse/course_3459.html'
215 | icourse_mooc = Icourse_Mooc()
216 | if (icourse_mooc.set_mode()):
217 | icourse_mooc.prepare(url)
218 | icourse_mooc.download()
219 |
220 |
221 | if __name__ == '__main__':
222 | main()
223 |
--------------------------------------------------------------------------------