├── foxford_downloader ├── .gitignore ├── lib │ ├── __init__.py │ ├── browser.py │ ├── helpers.py │ ├── requests_cache.py │ └── fns.py ├── requirements.txt ├── README.md └── fdl.py ├── CONTRIBUTING.md ├── PULL_REQUEST_TEMPLATE.md ├── .github └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── LICENSE ├── CODE_OF_CONDUCT.md └── README.md /foxford_downloader/.gitignore: -------------------------------------------------------------------------------- 1 | **/ 2 | !lib/ 3 | -------------------------------------------------------------------------------- /foxford_downloader/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # module init 2 | -------------------------------------------------------------------------------- /foxford_downloader/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.22.0 2 | beautifulsoup4==4.7.1 3 | more-itertools==7.1.0 4 | PyInquirer==1.0.3 5 | git+git://github.com/limitedeternity/pyppeteer@0.0.26#egg=pyppeteer 6 | async_lru==1.0.2 7 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | --------------- 3 | 4 | ### **Make sure that all requirements are satisfied:** 5 | 6 | * You are using the latest version of script. 7 | 8 | * You have the latest Node.js installed. 9 | 10 | **ONLY THEN YOU CAN SUBMIT ANY BUG REPORTS** 11 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Types of changes 2 | 3 | - [ ] Bug fix (non-breaking change which fixes an issue) 4 | - [ ] New feature (non-breaking change which adds functionality) 5 | - [ ] Breaking change (fix or feature that would cause existing functionality to change) 6 | - [ ] I have read the **CONTRIBUTING.md** document. 7 | - [ ] My code follows the code style of this project. 8 | - [ ] My change requires a change to the documentation. 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 1. Go to '...' 13 | 2. Click on '....' 14 | 3. Scroll down to '....' 15 | 4. See error 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Screenshots** 21 | If applicable, add screenshots to help explain your problem. 22 | 23 | **Desktop (please complete the following information):** 24 | - OS: [e.g. iOS] 25 | - Browser [e.g. chrome, safari] 26 | - Version [e.g. 22] 27 | 28 | **Smartphone (please complete the following information):** 29 | - Device: [e.g. iPhone6] 30 | - OS: [e.g. iOS8.1] 31 | - Browser [e.g. stock browser, safari] 32 | - Version [e.g. 22] 33 | 34 | **Additional context** 35 | Add any other context about the problem here. 36 | -------------------------------------------------------------------------------- /foxford_downloader/lib/browser.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from async_lru import alru_cache 4 | from pyppeteer import connect, launch 5 | 6 | 7 | @alru_cache(maxsize=1, typed=False) 8 | async def get_browser_connection_url() -> str: 9 | browser = await launch( 10 | ignoreHTTPSErrors=True, 11 | headless=True, 12 | slowMo=0, 13 | args=[ 14 | "--no-sandbox", 15 | "--disable-setuid-sandbox", 16 | "--disable-gpu", 17 | "--disable-dev-shm-usage", 18 | '--proxy-server="direct://"', 19 | "--proxy-bypass-list=*" 20 | ] 21 | ) 22 | 23 | connectionUrl = browser.wsEndpoint 24 | await browser.disconnect() 25 | return connectionUrl 26 | 27 | 28 | async def terminate_browser_instance() -> None: 29 | browser_endpoint = await get_browser_connection_url() 30 | browser = await connect(browserWSEndpoint=browser_endpoint) 31 | get_browser_connection_url.cache_clear() 32 | await browser.close() 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Marise Hayashi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /foxford_downloader/lib/helpers.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from traceback import format_exc 3 | from typing import Any, Callable, Dict, Tuple, Union 4 | 5 | 6 | class Logger(): 7 | @staticmethod 8 | def error(message: str) -> None: 9 | print(f"[\033[91mE\033[0m]: \033[1m{message}\033[0m") 10 | 11 | @staticmethod 12 | def warn(message: str) -> None: 13 | print(f"[\033[93mW\033[0m]: \033[1m{message}\033[0m") 14 | 15 | @staticmethod 16 | def log(message: str) -> None: 17 | print(f"[\033[94mL\033[0m]: \033[1m{message}\033[0m") 18 | 19 | 20 | def pipe(*args: Tuple[Callable]) -> Callable: 21 | return lambda val: reduce(lambda prev, fn: fn(prev), args, val) 22 | 23 | 24 | def error_handler(fn: Callable) -> Callable: 25 | def wrapper(*args: Tuple, **kwargs: Dict): 26 | try: 27 | result: Any = fn(*args, **kwargs) 28 | if isinstance(result, dict) and "fatal_error" in result: 29 | Logger.error(result["fatal_error"]) 30 | exit(1) 31 | 32 | return result 33 | except Exception: 34 | Logger.error(format_exc()) 35 | exit(1) 36 | 37 | return wrapper 38 | -------------------------------------------------------------------------------- /foxford_downloader/lib/requests_cache.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | 3 | from requests import Response, Session 4 | from requests.adapters import HTTPAdapter 5 | from requests.cookies import cookiejar_from_dict, extract_cookies_to_jar 6 | from requests.structures import CaseInsensitiveDict 7 | from requests.utils import get_encoding_from_headers 8 | 9 | 10 | class CachedResponse(Response): 11 | @property 12 | @lru_cache(maxsize=1, typed=False) 13 | def content(self): 14 | return super().content 15 | 16 | @property 17 | @lru_cache(maxsize=1, typed=False) 18 | def text(self): 19 | return super().text 20 | 21 | @lru_cache(maxsize=1, typed=False) 22 | def json(self, **kwargs): 23 | return super().json(**kwargs) 24 | 25 | 26 | class CachedHTTPAdapter(HTTPAdapter): 27 | def build_response(self, req, resp): 28 | response = CachedResponse() 29 | response.status_code = getattr(resp, "status", None) 30 | response.headers = CaseInsensitiveDict(getattr(resp, "headers", {})) 31 | response.encoding = get_encoding_from_headers(response.headers) 32 | response.raw = resp 33 | response.reason = resp.reason 34 | 35 | if isinstance(req.url, bytes): 36 | response.url = req.url.decode("utf-8") 37 | 38 | else: 39 | response.url = req.url 40 | 41 | extract_cookies_to_jar(response.cookies, req, resp) 42 | response.request = req 43 | response.connection = self 44 | return response 45 | 46 | 47 | class CachedSession(): 48 | def __new__(self): 49 | s = Session() 50 | a = CachedHTTPAdapter(max_retries=3) 51 | s.mount("http://", a) 52 | s.mount("https://", a) 53 | return s 54 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at Telegram: @limitedeternity. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /foxford_downloader/README.md: -------------------------------------------------------------------------------- 1 | # Как сохранить материалы? 2 | 3 | ## Метод 1 4 | 5 | _Сложность: средняя (для пользователей десктопов)_ 6 | 7 | 1. Устанавливаем Python 3.7+ и Git. На Linux есть `apt-get` (или что там), на OSX есть [`homebrew`](https://brew.sh/), под Windows есть [`chocolatey`](https://chocolatey.org/install). 8 | 9 | 2. [Клонируем репозиторий](https://github.com/limitedeternity/foxford_courses/archive/master.zip), распаковываем, переходим в папку, где лежит **этот** гайд. 10 | 11 | 3. Открываем **здесь** терминал и выполняем `pip install -Ur requirements.txt`. (На Linux/OSX - `pip3 install -Ur requirements.txt`) 12 | 13 | 4. Выполняем `python fdl.py` (На Linux/OSX - `python3 fdl.py`). 14 | 15 | 5. Логинимся, выбираем курс и скачиваемые материалы. 16 | 17 | 6. Всё. 18 | 19 | ### Примечания: 20 | 21 | - Это метод для скачивания всего необходимого. 22 | 23 | - Есть возобновление процесса. Нужно удалить поврежденные материалы и запустить `fdl.py` снова. 24 | 25 | - Если во время скачивания выдало ошибку, нужно остановить выполнение программы, нажав в терминале `Ctrl + C`, и перезапустить `fdl.py`. 26 | 27 | - Для проверки целостности **обязательно** запускай `fdl.py` повторно до тех пор, пока программа не завершится "со всеми галочками" и без ошибок. 28 | 29 | ## Метод 2 30 | 31 | _Сложность: средняя (для любителей эмуляторов)_ 32 | 33 | 1. [Устаналиваем MEmu](https://www.memuplay.com/). 34 | 35 | 2. [Включаем Root-права](https://youtu.be/UYl5zPSnugA). 36 | 37 | 3. Устанавливаем приложение Фоксфорд.Курсы. 38 | 39 | 4. Логинимся, выбираем курс, жмем рядом с уроком кнопочку, как бы намекающую на "Скачать". 40 | 41 | 5. [Устанавливаем Root Browser](https://play.google.com/store/apps/details?id=com.jrummy.root.browserfree). 42 | 43 | 6. Переходим в `/data/data/ru.foxford.webinars`, ищем .mp4 файлы, копируем их в `/storage/emulated/0/Download/`. 44 | 45 | 7. [Забираем на ПК](https://www.memuplay.com/blog/2016/06/04/how-to-share-file-between-android-and-windows/). 46 | 47 | ### Примечания: 48 | 49 | - Это метод для скачивания видео 50 | 51 | # Чейнджлог (Крупные апдейты) 52 | 53 | ## 18.06.2017 (v1) 54 | 55 | Реализована первая версия методом "проб и ошибок". 56 | 57 | С помощью расширения (https://chrome.google.com/webstore/detail/network-sniffer/coblekblkacfilmgdghecpekhadldjfj) необходимо перехватить момент перехода к видео. Нажав на поле со ссылкой, соответствующей видео, откроется поле, где в Request Headers будет параметр Cookie. 58 | 59 | В скрипт вводится 2 значения: адрес видео и куки. На выходе получается файл b64.html, в котором находится ссылка, при переходе по которой происходило перенаправление на плеер, выдающий mp4, который качался с помощью расширения (https://chrome.google.com/webstore/detail/video-downloader-pro/ilppkoakomgpcblpemgbloapenijdcho). 60 | 61 | (Отдельное спасибо _Paravozik_Lesha_ за тестирование пре-релизной версии и терпение моего характера c: ) 62 | 63 | --- 64 | 65 | ## 21.06.2017 (v2) 66 | 67 | Реализована полу-автоматическая система. 68 | 69 | Вбивается ссылка на видео и человека перенаправляет сразу в плеер, где с помощью нажатия "Сохранить как..." можно сохранить вебинар на диск (логично). 70 | 71 | Примерно тогда же была реализована система сохранения ДЗ. 72 | 73 | --- 74 | 75 | ## 30.07.2017 (v3) 76 | 77 | Реализована полная автоматика и какой-никакой интерфейс. 78 | 79 | Все скрипты были объединены в один. 80 | 81 | Перевод на русский язык (да-да, наканецта). 82 | 83 | Загрузка курсов была переписана _Stanley Kowalski_. Пришлось много чинить, конечно, но это не отменяет моего "спасиба" за факт реализации "другим способом" и то, что именно он подтолкнул меня на объединение обоих скриптов. 84 | 85 | --- 86 | 87 | ## 05.08.2017 (v4) 88 | 89 | Исправлено множество ошибок. 90 | 91 | Разделение на модули для легкости починки. 92 | 93 | Добавлено сохранение теории и ДЗ. 94 | 95 | Объединение всех действий в единый оператор. За один проход теперь можно скачать весь материал полностью. 96 | 97 | Сделан режим "только видео" и сортировка материала. (Отдельное спасибо _@kuzminovdmit_) 98 | 99 | Добавлено возобновление загрузки на случай, если что-то пойдет не так, чтобы не ждать по-новой. Необходимо просто выбрать после перезапуска пункт меню, который выбирался до этого. 100 | 101 | Создание файла "video.skips", содержащего число, приведет к пропуску видео на соответствующее число. (0 - Вводное занятие, 1 - Первое + Вводное ...) 102 | 103 | --- 104 | 105 | ## 25.12.2017 (v5) 106 | 107 | Перенесен на Node.js. Прекращена поддержка сохранения ДЗ. 108 | 109 | --- 110 | 111 | ## 29.08.2018 (v5.5) 112 | 113 | Скомпилирован. Проведена работа над дизайном. 114 | 115 | --- 116 | 117 | ## 29.01.2019 (v5.6) 118 | 119 | Возвращение легенды - автоматическое сохранение ДЗ. 120 | 121 | --- 122 | 123 | ## 16.07.2019 (v6) 124 | 125 | FDL, написанный на Node.js, был переименован в HWDL и сделан утилитой для сохранения ДЗ. 126 | 127 | Сам же скрипт и его алгоритмическая составляющая были переписаны на Python 3.7. 128 | 129 | Добавлено сохранение сообщений из чата и презентаций. 130 | 131 | --- 132 | 133 | ## 07.08.2019 (v6.5) 134 | 135 | HWDL, написанный на Node.js, был удалён, а его функционал - встроен в FDL. 136 | 137 | --- 138 | 139 | _Текущая версия_: **v6.5** 140 | 141 | --- 142 | 143 | - _Идея, поддержка и написание: `limitedeternity`_ 144 | 145 | - _Тестирование и идеи: `Stanley Kowalski` и `Paravozik_Lesha` (v1-v3)_ 146 | 147 | - _VideoDownloader(): `Stanley Kowalski` (v3)_ 148 | 149 | (Специально для [2ch.hk/un/](https://2ch.hk/un/)) 150 | -------------------------------------------------------------------------------- /foxford_downloader/fdl.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from argparse import ArgumentParser, Namespace 3 | from itertools import chain 4 | from multiprocessing import Pool, cpu_count 5 | from pathlib import Path 6 | from typing import Dict, Iterable, List, Tuple 7 | 8 | from PyInquirer import prompt 9 | 10 | from lib.browser import terminate_browser_instance 11 | from lib.fns import * 12 | from lib.helpers import Logger, pipe 13 | from lib.requests_cache import CachedSession 14 | 15 | 16 | def main(params: Dict) -> None: 17 | session: CachedSession = CachedSession() 18 | credential_query: Dict[str, str] = params if params["email"] and params["password"] else prompt([ 19 | { 20 | "type": "input", 21 | "name": "email", 22 | "message": "Email" 23 | }, 24 | { 25 | "type": "password", 26 | "name": "password", 27 | "message": "Password" 28 | } 29 | ]) 30 | 31 | Logger.log("Fetching course list...") 32 | 33 | user_courses: Tuple[Dict] = get_user_courses( 34 | login( 35 | credential_query["email"], 36 | credential_query["password"], 37 | session 38 | ) 39 | ) 40 | 41 | course_query: Dict[str, str] = prompt([ 42 | { 43 | "type": "list", 44 | "name": "course", 45 | "message": "Select course", 46 | "choices": map(lambda obj: f"({obj['grades_range']}) {obj['name']} - {obj['subtitle']}", user_courses) 47 | } 48 | ]) 49 | 50 | selected_course: Dict = next( 51 | filter( 52 | lambda obj: f"({obj['grades_range']}) {obj['name']} - {obj['subtitle']}" == course_query["course"], 53 | user_courses 54 | ) 55 | ) 56 | 57 | Logger.log("Fetching lesson list...") 58 | 59 | ( 60 | course_lessons_with_video, 61 | course_lessons_with_homework, 62 | course_lessons_with_conspect 63 | ) = pipe( 64 | lambda course_id: get_course_lessons(course_id, session), 65 | lambda all_lessons: filter( 66 | lambda lesson: lesson["access_state"] == "available" and not lesson["is_locked"], 67 | all_lessons 68 | ), 69 | tuple, 70 | lambda available_lessons: map( 71 | lambda that_include: filter( 72 | lambda lesson: 73 | "available" in lesson[f"{that_include}_status"] and 74 | "not" not in lesson[f"{that_include}_status"], 75 | available_lessons 76 | ), [ 77 | "webinar", 78 | "homework", 79 | "conspect" 80 | ] 81 | ), 82 | lambda map_of_filters: map(tuple, map_of_filters) 83 | )(selected_course["resource_id"]) 84 | 85 | options_check: Dict[str, List[str]] = prompt([ 86 | { 87 | "type": "checkbox", 88 | "message": "What to fetch", 89 | "name": "actions", 90 | "choices": [ 91 | { 92 | "name": "Resources", 93 | "checked": True 94 | }, 95 | { 96 | "name": "Homework" 97 | }, 98 | { 99 | "name": "Conspects" 100 | } 101 | ] 102 | } 103 | ]) 104 | 105 | if "Resources" in options_check["actions"]: 106 | Logger.warn("Resources collection started") 107 | Logger.log("Fetching resources links...") 108 | 109 | resources_for_lessons: Tuple[Dict] = get_resources_for_lessons( 110 | selected_course["resource_id"], 111 | map( 112 | lambda obj: obj["webinar_id"], 113 | course_lessons_with_video 114 | ), 115 | session 116 | ) 117 | 118 | paths: Iterable[Path] = build_dir_hierarchy( 119 | selected_course["name"], 120 | selected_course["subtitle"], 121 | selected_course["grades_range"], 122 | course_lessons_with_video 123 | ) 124 | 125 | Logger.log("Downloading resources...") 126 | 127 | pool = Pool(cpu_count()) 128 | pool.starmap( 129 | download_resources, 130 | map( 131 | lambda res_obj, path: [ 132 | { 133 | **res_obj, 134 | "destination": path 135 | }, 136 | session 137 | ], 138 | resources_for_lessons, 139 | paths 140 | ) 141 | ) 142 | 143 | pool.close() 144 | pool.join() 145 | Logger.warn("Resources collection finished") 146 | 147 | coro_list = [] 148 | semaphore = asyncio.Semaphore(2 if cpu_count() > 1 else 1) 149 | 150 | if "Homework" in options_check["actions"]: 151 | Logger.warn("Homework collection started") 152 | Logger.log("Collecting tasks...") 153 | 154 | lesson_tasks: Iterable[List[Dict]] = get_lesson_tasks( 155 | map( 156 | lambda obj: obj["id"], 157 | course_lessons_with_homework 158 | ), 159 | session 160 | ) 161 | 162 | task_urls: Iterable[Iterable[str]] = construct_task_urls( 163 | map( 164 | lambda obj: obj["id"], 165 | course_lessons_with_homework 166 | ), 167 | lesson_tasks 168 | ) 169 | 170 | paths: Iterable[Path] = build_dir_hierarchy( 171 | selected_course["name"], 172 | selected_course["subtitle"], 173 | selected_course["grades_range"], 174 | course_lessons_with_homework 175 | ) 176 | 177 | Logger.warn( 178 | "Fetched tasks details. Homework collection will start soon..." 179 | ) 180 | 181 | coro_list.extend( 182 | chain.from_iterable( 183 | map( 184 | lambda url_tuple, path: map( 185 | lambda url: save_page( 186 | url, 187 | path, 188 | "homework", 189 | map( 190 | lambda item: { 191 | "name": item[0], 192 | "value": item[1], 193 | "domain": ".foxford.ru", 194 | "path": "/" 195 | }, 196 | session.cookies.get_dict().items() 197 | ), 198 | semaphore 199 | ), 200 | url_tuple 201 | ), 202 | task_urls, 203 | paths 204 | ) 205 | ) 206 | ) 207 | 208 | if "Conspects" in options_check["actions"]: 209 | Logger.warn("Conspects collection started") 210 | 211 | conspect_urls: Iterable[Tuple[str]] = construct_conspect_urls( 212 | map( 213 | lambda obj: obj["id"], 214 | course_lessons_with_conspect 215 | ), 216 | map( 217 | lambda obj: obj["conspect_blocks_count"], 218 | course_lessons_with_conspect 219 | ) 220 | ) 221 | 222 | paths: Iterable[Path] = build_dir_hierarchy( 223 | selected_course["name"], 224 | selected_course["subtitle"], 225 | selected_course["grades_range"], 226 | course_lessons_with_conspect 227 | ) 228 | 229 | Logger.warn( 230 | "Fetched conspects details. Conspects collection will start soon..." 231 | ) 232 | 233 | coro_list.extend( 234 | chain.from_iterable( 235 | map( 236 | lambda url_tuple, path: map( 237 | lambda url: save_page( 238 | url, 239 | path, 240 | "conspects", 241 | map( 242 | lambda item: { 243 | "name": item[0], 244 | "value": item[1], 245 | "domain": ".foxford.ru", 246 | "path": "/" 247 | }, 248 | session.cookies.get_dict().items() 249 | ), 250 | semaphore 251 | ), 252 | url_tuple 253 | ), 254 | conspect_urls, 255 | paths 256 | ) 257 | ) 258 | ) 259 | 260 | if coro_list: 261 | Logger.warn("Actual collection started") 262 | 263 | asyncio.get_event_loop().run_until_complete( 264 | asyncio.wait( 265 | coro_list 266 | ) 267 | ) 268 | 269 | Logger.warn("Collection finished. Quitting...") 270 | asyncio.get_event_loop().run_until_complete(asyncio.sleep(0.5)) 271 | asyncio.get_event_loop().run_until_complete(terminate_browser_instance()) 272 | 273 | 274 | if __name__ == "__main__": 275 | parser: ArgumentParser = ArgumentParser() 276 | parser.add_argument("--email", type=str, required=False) 277 | parser.add_argument("--password", type=str, required=False) 278 | 279 | args: Namespace = parser.parse_args() 280 | main(args.__dict__) 281 | -------------------------------------------------------------------------------- /foxford_downloader/lib/fns.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections import deque 3 | from datetime import datetime 4 | from pathlib import Path 5 | from re import Match, match 6 | from typing import Dict, Iterable, List, Tuple, Union 7 | from urllib import parse 8 | 9 | import requests 10 | from bs4 import BeautifulSoup, Tag 11 | from more_itertools import unique_everseen 12 | from pyppeteer import connect 13 | 14 | from .browser import get_browser_connection_url 15 | from .helpers import error_handler, pipe 16 | from .requests_cache import CachedResponse, CachedSession 17 | 18 | 19 | @error_handler 20 | def get_csrf_token(session: CachedSession) -> str: 21 | csrf_token_get_response: CachedResponse = session.get( 22 | "https://foxford.ru/api/csrf_token", 23 | headers={ 24 | "X-Requested-With": "XMLHttpRequest" 25 | } 26 | ) 27 | 28 | if csrf_token_get_response.status_code != 200: 29 | return {"fatal_error": "CSRF token fetch has failed"} 30 | 31 | if "token" not in csrf_token_get_response.json(): 32 | return {"fatal_error": "CSRF token structure is unknown"} 33 | 34 | return csrf_token_get_response.json()["token"] 35 | 36 | 37 | @error_handler 38 | def login(email: str, password: str, session: CachedSession) -> CachedSession: 39 | if not email or not password: 40 | return {"fatal_error": "No credentials provided"} 41 | 42 | credential_post_response: CachedResponse = session.post( 43 | "https://foxford.ru/user/login", 44 | headers={ 45 | "X-CSRF-Token": get_csrf_token(session), 46 | "X-Requested-With": "XMLHttpRequest" 47 | }, 48 | json={ 49 | "user": { 50 | "email": email, 51 | "password": password 52 | } 53 | } 54 | ) 55 | 56 | if credential_post_response.status_code != 200: 57 | return {"fatal_error": "Wrong credentials"} 58 | 59 | return session 60 | 61 | 62 | def get_user_courses(session: CachedSession) -> Tuple[Dict]: 63 | @error_handler 64 | def recursive_collection(page_num: int) -> Tuple[Dict]: 65 | course_list_response: CachedResponse = session.get( 66 | f"https://foxford.ru/api/user/bookmarks?page={page_num}&archived=false", 67 | headers={ 68 | "X-CSRF-Token": get_csrf_token(session), 69 | "X-Requested-With": "XMLHttpRequest" 70 | } 71 | ) 72 | 73 | if course_list_response.status_code != 200: 74 | return {"fatal_error": "Course list fetch has failed"} 75 | 76 | if "bookmarks" not in course_list_response.json(): 77 | return {"fatal_error": "Course list structure is unknown"} 78 | 79 | if all(False for _ in course_list_response.json()["bookmarks"]): 80 | return () 81 | 82 | if not {"name", "subtitle", "resource_id"}.issubset(set(course_list_response.json()["bookmarks"][0])): 83 | return {"fatal_error": "Course structure is unknown"} 84 | 85 | return ( 86 | *course_list_response.json()["bookmarks"], 87 | *recursive_collection(page_num + 1) 88 | ) 89 | 90 | return recursive_collection(1) 91 | 92 | 93 | class get_course_lessons(): 94 | @error_handler 95 | def __new__(self, course_id: int, session: CachedSession) -> Iterable[Dict]: 96 | lesson_list_at_somewhere_response: CachedResponse = session.get( 97 | f"https://foxford.ru/api/courses/{course_id}/lessons", 98 | headers={ 99 | "X-Requested-With": "XMLHttpRequest" 100 | } 101 | ) 102 | 103 | if lesson_list_at_somewhere_response.status_code != 200: 104 | return {"fatal_error": "Lesson list fetch has failed"} 105 | 106 | if not {"lessons", "cursors"}.issubset(set(lesson_list_at_somewhere_response.json())): 107 | return {"fatal_error": "Lesson list structure is unknown"} 108 | 109 | if "id" not in lesson_list_at_somewhere_response.json()["lessons"][0]: 110 | return {"fatal_error": "Lesson structure is unknown"} 111 | 112 | self.course_id = course_id 113 | self.session = session 114 | 115 | return pipe( 116 | lambda json: ( 117 | *self.recursive_collection( 118 | self, 119 | "before", 120 | json["cursors"]["before"] 121 | ), 122 | *json["lessons"], 123 | *self.recursive_collection( 124 | self, 125 | "after", 126 | json["cursors"]["after"] 127 | ) 128 | ), 129 | lambda lessons: map( 130 | lambda lesson: self.lesson_extension(self, lesson), 131 | lessons 132 | ) 133 | )(lesson_list_at_somewhere_response.json()) 134 | 135 | @error_handler 136 | def recursive_collection(self, direction: str, cursor: Union[int, None]) -> Tuple[Dict]: 137 | if not cursor: 138 | return () 139 | 140 | lesson_list_at_direction_response: CachedResponse = self.session.get( 141 | f"https://foxford.ru/api/courses/{self.course_id}/lessons?{direction}={cursor}", 142 | headers={ 143 | "X-Requested-With": "XMLHttpRequest" 144 | } 145 | ) 146 | 147 | if lesson_list_at_direction_response.status_code != 200: 148 | return {"fatal_error": "Lesson list fetch has failed"} 149 | 150 | if not {"lessons", "cursors"}.issubset(set(lesson_list_at_direction_response.json())): 151 | return {"fatal_error": "Lesson list structure is unknown"} 152 | 153 | if "id" not in lesson_list_at_direction_response.json()["lessons"][0]: 154 | return {"fatal_error": "Lesson structure is unknown"} 155 | 156 | if direction == "before": 157 | return ( 158 | *self.recursive_collection( 159 | self, 160 | direction, 161 | lesson_list_at_direction_response 162 | .json()["cursors"][direction] 163 | ), 164 | *lesson_list_at_direction_response.json()["lessons"] 165 | ) 166 | else: 167 | return ( 168 | *lesson_list_at_direction_response.json()["lessons"], 169 | *self.recursive_collection( 170 | self, 171 | direction, 172 | lesson_list_at_direction_response 173 | .json()["cursors"][direction] 174 | ) 175 | ) 176 | 177 | @error_handler 178 | def lesson_extension(self, lesson: Dict) -> Dict: 179 | lesson_extension_response: CachedResponse = self.session.get( 180 | f"https://foxford.ru/api/courses/{self.course_id}/lessons/{lesson['id']}", 181 | headers={ 182 | "X-Requested-With": "XMLHttpRequest" 183 | } 184 | ) 185 | 186 | if lesson_extension_response.status_code != 200: 187 | return {"fatal_error": "Lesson extension fetch has failed"} 188 | 189 | if not {"webinar_id", "access_state", "webinar_status", "is_locked"}.issubset(set(lesson_extension_response.json())): 190 | return {"fatal_error": "Lesson extension structure is unknown"} 191 | 192 | return lesson_extension_response.json() 193 | 194 | 195 | class get_resources_for_lessons(): 196 | def __new__(self, course_id: int, webinar_ids: Iterable[int], session: CachedSession) -> Tuple[Dict]: 197 | self.course_id = course_id 198 | self.webinar_ids = webinar_ids 199 | self.session = session 200 | return self.recursive_collection(self) 201 | 202 | @error_handler 203 | def recursive_collection(self) -> Tuple[Dict]: 204 | webinar_id: Union[int, None] = next(self.webinar_ids, None) 205 | 206 | if not webinar_id: 207 | return () 208 | 209 | video_source_response: CachedResponse = self.session.get( 210 | f"https://foxford.ru/groups/{webinar_id}" 211 | ) 212 | 213 | if video_source_response.status_code != 200: 214 | return {"fatal_error": "Video source fetch has failed"} 215 | 216 | return ( 217 | pipe( 218 | lambda res: self.retrieve_erly_iframe_src(self, res), 219 | lambda src: self.construct_resource_links(self, src) 220 | )(video_source_response), 221 | *self.recursive_collection(self) 222 | ) 223 | 224 | @error_handler 225 | def retrieve_erly_iframe_src(self, video_source_response: CachedResponse) -> str: 226 | erly_iframe: Union[Tag, None] = pipe( 227 | lambda r_content: BeautifulSoup( 228 | r_content, 229 | "html.parser" 230 | ), 231 | lambda soup: soup.select_one( 232 | "div.full_screen > iframe" 233 | ) 234 | )(video_source_response.content) 235 | 236 | if not erly_iframe: 237 | return {"fatal_error": ".full_screen > iframe wasn't found"} 238 | 239 | erly_iframe_src: Union[str, None] = erly_iframe.get("src") 240 | 241 | if not erly_iframe_src: 242 | return {"fatal_error": ".full_screen > iframe doesn't have src attribute"} 243 | 244 | return erly_iframe_src 245 | 246 | @error_handler 247 | def construct_resource_links(self, erly_iframe_src: str) -> Dict: 248 | search_params: Dict = dict( 249 | parse.parse_qsl( 250 | parse.urlparse(erly_iframe_src).query 251 | ) 252 | ) 253 | 254 | if not {"conf", "access_token"}.issubset(set(search_params)): 255 | return {"fatal_error": "Iframe src search params structure is unknown"} 256 | 257 | webinar_id_match: Union[Match, None] = match( 258 | r"^webinar-(\d+)$", search_params.get("conf") 259 | ) 260 | 261 | if not webinar_id_match: 262 | return {"fatal_error": "Unable to extract webinar id"} 263 | 264 | return { 265 | "video": f"https://storage.netology-group.services/api/v1/buckets/ms.webinar.foxford.ru/sets/{webinar_id_match[1]}/objects/mp4?access_token={search_params.get('access_token')}", 266 | "events": f"https://storage.netology-group.services/api/v1/buckets/meta.webinar.foxford.ru/sets/{webinar_id_match[1]}/objects/events.json?access_token={search_params.get('access_token')}" 267 | } 268 | 269 | 270 | def get_lesson_tasks(lesson_ids: Iterable[int], session: CachedSession) -> Iterable[List[Dict]]: 271 | @error_handler 272 | def fetch(lesson_id: int) -> List[Dict]: 273 | tasks_response: CachedResponse = session.get( 274 | f"https://foxford.ru/api/lessons/{lesson_id}/tasks", 275 | headers={ 276 | "X-Requested-With": "XMLHttpRequest" 277 | } 278 | ) 279 | 280 | if tasks_response.status_code != 200: 281 | return {"fatal_error": "Tasks fetch has failed"} 282 | 283 | if "id" not in tasks_response.json()[0]: 284 | return {"fatal_error": "Task structure is unknown"} 285 | 286 | return tasks_response.json() 287 | 288 | return map(fetch, lesson_ids) 289 | 290 | 291 | def construct_task_urls(lesson_ids: Iterable[int], lesson_tasks: Iterable[List[Dict]]) -> Iterable[Iterable[str]]: 292 | def combination(lesson_id: int, task_list: List[Dict]) -> Iterable[str]: 293 | return map( 294 | lambda task: f"https://foxford.ru/lessons/{lesson_id}/tasks/{task['id']}", 295 | task_list 296 | ) 297 | 298 | return map( 299 | combination, 300 | lesson_ids, 301 | lesson_tasks 302 | ) 303 | 304 | 305 | def construct_conspect_urls(lesson_ids: Iterable[int], conspect_amount: Iterable[int]) -> Iterable[Tuple[str]]: 306 | def recursive_collection(lesson_id: int, amount: int) -> Tuple[str]: 307 | if amount == 0: 308 | return () 309 | 310 | return ( 311 | *recursive_collection(lesson_id, amount - 1), 312 | f"https://foxford.ru/lessons/{lesson_id}/conspects/{amount}" 313 | ) 314 | 315 | return map( 316 | recursive_collection, 317 | lesson_ids, 318 | conspect_amount 319 | ) 320 | 321 | 322 | def build_dir_hierarchy(course_name: str, course_subtitle: str, grade: str, lessons: Iterable[Dict]) -> Iterable[Path]: 323 | def sanitize_string(string: str) -> str: 324 | return pipe( 325 | lambda char_list: filter( 326 | lambda char: char.isalpha() or char.isdigit() or char == " ", char_list 327 | ), 328 | lambda iterable: "".join(iterable), 329 | lambda filtered_char_list: filtered_char_list[:30].strip() 330 | )(string) 331 | 332 | def create_dir(lesson: Dict) -> Path: 333 | constructed_path: Path = Path( 334 | Path.cwd(), 335 | ( 336 | f"({grade}) " + 337 | sanitize_string(course_name) + 338 | " - " + 339 | sanitize_string(course_subtitle) 340 | ).strip(), 341 | ( 342 | f"({lesson['number']}) " + 343 | sanitize_string(lesson['title']) 344 | ).strip() 345 | ) 346 | 347 | if not constructed_path.exists(): 348 | constructed_path.mkdir(parents=True) 349 | 350 | return constructed_path 351 | 352 | return map( 353 | create_dir, 354 | lessons 355 | ) 356 | 357 | 358 | def download_resources(res_with_path: Dict, session: CachedSession) -> None: 359 | @error_handler 360 | def download_url(url: str, dest: Path) -> None: 361 | with requests.get(url, stream=True) as r: 362 | if r.status_code != 200: 363 | return {"fatal_error": "Video fetch has failed"} 364 | 365 | with dest.open("wb") as f: 366 | deque( 367 | map( 368 | lambda chunk: f.write(chunk), 369 | filter(None, r.iter_content(10 * 1024)) 370 | ), 371 | 0 372 | ) 373 | 374 | def save_video() -> None: 375 | if res_with_path["destination"].joinpath("video.mp4").exists(): 376 | return 377 | 378 | download_url( 379 | res_with_path["video"], 380 | res_with_path["destination"].joinpath("video.mp4") 381 | ) 382 | 383 | @error_handler 384 | def parse_and_save_event_data() -> None: 385 | if res_with_path["destination"].joinpath("message_log.txt").exists(): 386 | return 387 | 388 | events_response: CachedResponse = session.get( 389 | res_with_path["events"] 390 | ) 391 | 392 | if events_response.status_code != 200: 393 | return {"fatal_error": "Events fetch has failed"} 394 | 395 | if "meta" not in events_response.json()[0]: 396 | return {"fatal_error": "Events structure is unknown"} 397 | 398 | with res_with_path["destination"].joinpath("message_log.txt").open("w", errors="replace") as f: 399 | pipe( 400 | lambda json: filter( 401 | lambda obj: obj["meta"]["action"] == "message", 402 | json 403 | ), 404 | lambda messages: map( 405 | lambda msg: f"[{datetime.fromtimestamp(msg['meta']['time'])}] {msg['meta']['user_name']}: {parse.unquote(msg['meta']['body'])}", 406 | messages 407 | ), 408 | lambda message_log: "\n".join(message_log), 409 | f.write 410 | )(events_response.json()) 411 | 412 | pipe( 413 | lambda json: filter( 414 | lambda obj: 415 | (obj["meta"]["action"] == "add_tab" or 416 | obj["meta"]["action"] == "change_tab") and 417 | obj["meta"]["content_type"] == "pdf", 418 | json 419 | ), 420 | lambda pdfs: map( 421 | lambda pdf: pdf["meta"]["url"], 422 | pdfs 423 | ), 424 | unique_everseen, 425 | lambda urls: enumerate(urls, 1), 426 | lambda enumed_urls: map( 427 | lambda item: download_url( 428 | item[1], 429 | res_with_path["destination"] 430 | .joinpath(f"{item[0]}.pdf") 431 | ), 432 | enumed_urls 433 | ), 434 | lambda task_map: deque(task_map, 0) 435 | )(events_response.json()) 436 | 437 | save_video() 438 | parse_and_save_event_data() 439 | print( 440 | f"-> {res_with_path['destination'].name}: \033[92m\u2713\033[0m" 441 | ) 442 | 443 | 444 | async def save_page(url: str, path: Path, folder: str, cookies: Iterable[Dict], semaphore: asyncio.Semaphore) -> None: 445 | async with semaphore: 446 | if not path.joinpath(folder).joinpath(url.split("/")[-1] + ".pdf").exists(): 447 | browser_endpoint = await get_browser_connection_url() 448 | browser = await connect(browserWSEndpoint=browser_endpoint) 449 | page = await browser.newPage() 450 | await page.emulateMedia("screen") 451 | await page.setViewport({"width": 411, "height": 823}) 452 | await page.setCookie(*cookies) 453 | await page.goto(url, {"waitUntil": "domcontentloaded"}) 454 | 455 | if await page.waitForFunction("() => window.MathJax", timeout=10000): 456 | await asyncio.sleep(3.5) 457 | await page.evaluate(""" 458 | async function() { 459 | await new Promise(function(resolve) { 460 | window.MathJax.Hub.Register.StartupHook( 461 | "End", 462 | resolve 463 | ) 464 | }) 465 | } 466 | """) 467 | await asyncio.sleep(0.1) 468 | 469 | await page.evaluate(""" 470 | document.querySelectorAll(".toggle_element > .toggle_content").forEach(el => el.style.display = "block") 471 | """, force_expr=True) 472 | await asyncio.sleep(0.1) 473 | 474 | await page.evaluate(""" 475 | document.querySelector("#cc_container").remove() 476 | """, force_expr=True) 477 | await asyncio.sleep(0.1) 478 | 479 | if not path.joinpath(folder).exists(): 480 | path.joinpath(folder).mkdir() 481 | 482 | path.joinpath(folder).joinpath(url.split("/")[-1] + ".pdf").touch() 483 | 484 | await page.pdf({ 485 | "path": str(path.joinpath(folder).joinpath(url.split("/")[-1] + ".pdf")), 486 | "printBackground": True 487 | }) 488 | 489 | await page.close() 490 | await browser.disconnect() 491 | 492 | print( 493 | f"-> {folder}/{url.split('/')[-3]}/{url.split('/')[-1]}: \033[92m\u2713\033[0m" 494 | ) 495 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # /un/ 2 | 3 |
Incoming message:
Друзья, много времени прошло с того момента, как я начал работу над FDL. Три года, если быть точным. И последней версией оказалась 6.5. Я внёс последнюю строчку кода с этим коммитом и решил, что мне пора двигаться дальше, поэтому я архивирую репозиторий, никаких изменений вноситься больше не будет. Я его оставлю для себя на память.