├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── documentation-improvement.md │ └── feature_request.md ├── dependabot.yml ├── issue_label_bot.yaml ├── stale.yml └── workflows │ ├── code-checks.yml │ └── unittest.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── codecov.yml ├── config.yaml ├── dev-requirements.txt ├── media_downloader.py ├── mypy.ini ├── pylintrc ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── test_media_downloader.py └── utils │ ├── __init__.py │ ├── test_file_management.py │ ├── test_log.py │ ├── test_meta.py │ └── test_updates.py └── utils ├── __init__.py ├── file_management.py ├── log.py ├── meta.py └── updates.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: Dineshkarthik 2 | custom: ["https://www.buymeacoffee.com/dkraveendran"] 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve by fixing bugs 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Share the config: `Please don't share your api_hash & api_id` 15 | ```yaml 16 | chat_id: telegram_chat_id 17 | last_read_message_id: 0 18 | media_types: 19 | - audio 20 | - photo 21 | - video 22 | - document 23 | - voice 24 | file_formats: 25 | audio: 26 | - all 27 | document: 28 | - all 29 | video: 30 | - all 31 | ``` 32 | 33 | **Python Version** 34 | Python: [e.g. 3.7.7] 35 | 36 | **OS:** 37 | The OS and its version: [e.g. Ubuntu 20.04] 38 | 39 | **Logs** 40 | Logs showing the exception 41 | 42 | **Additional context** 43 | Add any other context about the problem here. 44 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation-improvement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation Improvement 3 | about: Report wrong or missing documentation. 4 | title: 'DOC:' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | #### Location of the documentation 11 | 12 | [this should provide the location of the documentation, e.g. "CONTRIBUTION.md" or the URL of the documentation, e.g. "https://github.com/Dineshkarthik/telegram_media_downloader/blob/master/CONTRIBUTING.md"] 13 | 14 | #### Documentation problem 15 | 16 | [this should provide a description of what documentation you believe needs to be fixed/improved] 17 | 18 | #### Suggested fix for documentation 19 | 20 | [this should explain the suggested fix and **why** it's better than the existing documentation] 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | time: "12:00" 8 | timezone: CET 9 | open-pull-requests-limit: 10 10 | reviewers: 11 | - Dineshkarthik 12 | -------------------------------------------------------------------------------- /.github/issue_label_bot.yaml: -------------------------------------------------------------------------------- 1 | label-alias: 2 | bug: 'kind/bug' 3 | feature_request: 'enhancement' 4 | question: 'question' 5 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 90 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 7 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - enhancement 8 | - feature_request 9 | - help wanted 10 | - good first issue 11 | - dependencies 12 | - bug 13 | # Label to use when marking as stale 14 | staleLabel: stale 15 | # Comment to post when marking an issue as stale. Set to `false` to disable 16 | markComment: > 17 | This issue has been automatically marked as stale because it has not had 18 | recent activity in the past 45 days. It will be closed if no further activity 19 | occurs in the next 7 days. Thank you for your contributions. 20 | 21 | # Limit to only `issues` 22 | only: issues 23 | -------------------------------------------------------------------------------- /.github/workflows/code-checks.yml: -------------------------------------------------------------------------------- 1 | name: Code Quality 2 | 3 | on: 4 | pull_request: 5 | branches: [ master ] 6 | paths-ignore: 7 | - 'README.md' 8 | push: 9 | branches: [ master ] 10 | paths-ignore: 11 | - 'README.md' 12 | 13 | jobs: 14 | pre-commit: 15 | name: Linting 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | - uses: actions/setup-python@v4 20 | with: 21 | python-version: '3.10' 22 | - name: Install dependencies 23 | run: make dev_install 24 | - uses: pre-commit/action@v3.0.0 25 | -------------------------------------------------------------------------------- /.github/workflows/unittest.yml: -------------------------------------------------------------------------------- 1 | name: Unittest 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | paths-ignore: 7 | - 'README.md' 8 | pull_request: 9 | branches: [ master ] 10 | paths-ignore: 11 | - 'README.md' 12 | 13 | jobs: 14 | build: 15 | 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | os: [ubuntu-latest, macos-latest, windows-latest] 20 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11.0-beta.4' ] 21 | name: Test - Python ${{ matrix.python-version }} on ${{ matrix.os }} 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | - name: Get setuptools Unix 30 | if: ${{ matrix.os != 'windows-latest' }} 31 | run: pip install --upgrade pip setuptools codecov 32 | - name: Get setuptools Windows 33 | if: ${{ matrix.os == 'windows-latest' }} 34 | run: pip install --upgrade --user pip setuptools codecov 35 | - name: Install dependencies 36 | run: make dev_install 37 | - name: Test with pytest 38 | run: | 39 | make -e test 40 | codecov 41 | env: 42 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.pyc 3 | *.pid 4 | *.cfg 5 | *.db 6 | *.env 7 | .DS_Store 8 | .cache/ 9 | .mypy_cache/ 10 | .coverage 11 | settings.json 12 | 13 | # Distribution / packaging 14 | .Python 15 | .pytest_cache 16 | .python-version 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | logs/ 27 | parts/ 28 | sdist/ 29 | share/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | 36 | 37 | # Jupyter Notebook 38 | .ipynb_checkpoints 39 | *.ipynb 40 | 41 | # virtualenv 42 | .venv 43 | venv/ 44 | ENV/ 45 | bin/ 46 | include/ 47 | pip-selfcheck.json 48 | lib64 49 | 50 | #Telegram Sessions 51 | *.session 52 | *.session-journal 53 | 54 | #Downloaded documents 55 | documents/ 56 | audio/ 57 | document/ 58 | photo/ 59 | voice/ 60 | video/ 61 | video_note/ 62 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.2.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - repo: https://github.com/psf/black 10 | rev: 22.3.0 11 | hooks: 12 | - id: black 13 | name: black 14 | entry: black 15 | types: [python] 16 | - repo: https://github.com/pycqa/isort 17 | rev: 5.10.1 18 | hooks: 19 | - id: isort 20 | name: isort 21 | entry: isort 22 | types: [python] 23 | args: ["--profile", "black", "--filter-files"] 24 | - repo: https://github.com/pre-commit/mirrors-mypy 25 | rev: v0.961 26 | hooks: 27 | - id: mypy 28 | name: mypy 29 | entry: mypy 30 | types: [python] 31 | args: [--ignore-missing-imports] 32 | files: utils/|media_downloader.py 33 | exclude: tests/ 34 | - repo: https://github.com/pycqa/pylint 35 | rev: v2.14.5 36 | hooks: 37 | - id: pylint 38 | name: pylint 39 | entry: pylint 40 | language: system 41 | types: [python] 42 | args: [ 43 | "-rn", # Only display messages 44 | "-sn", # Don't display the score 45 | "--rcfile=pylintrc" # Link to your config file 46 | ] 47 | files: utils/|media_downloader.py 48 | exclude: tests/ 49 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at hello@dineshkarthik.me. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | First off, thank you for considering contributing to Telegram Media Downloader. It's people like you that make telegram-media-downloader such a great tool. 4 | Please take a moment to review this document in order to make the contribution process easy and effective for everyone involved. 5 | 6 | ### Where do I go from here? 7 | 8 | If you've noticed a bug or have a feature request, [make one](https://github.com/Dineshkarthik/telegram_media_downloader/issues)! It's generally best if you get confirmation of your bug or approval for your feature request this way before starting to code. 9 | 10 | If you have a general question about telegram-media-downloader, you can ask it on [Discussion](https://github.com/Dineshkarthik/telegram_media_downloader/discussions) under `Q&A` category and any ideas/suggestions goes under `Ideas` category, the issue tracker is only for bugs and feature requests. 11 | 12 | ### Fork & create a branch 13 | 14 | If this is something you think you can fix, then [fork telegram-media-downloader](https://help.github.com/articles/fork-a-repo) and create a branch with a descriptive name. 15 | 16 | A good branch name would be (where issue #52 is the ticket you're working on): 17 | 18 | ```sh 19 | git checkout -b 52-fix-expired-file-reference 20 | ``` 21 | 22 | ### For new Contributors 23 | 24 | If you never created a pull request before, welcome [Here is a great tutorial](https://egghead.io/series/how-to-contribute-to-an-open-source-project-on-github) on how to send one :) 25 | 26 | 1. [Fork](http://help.github.com/fork-a-repo/) the project, clone your fork, and configure the remotes: 27 | ```sh 28 | # Clone your fork of the repo into the current directory 29 | git clone https://github.com// 30 | # Navigate to the newly cloned directory 31 | cd 32 | # Install dependencies 33 | make dev_install 34 | # Assign the original repo to a remote called "upstream" 35 | git remote add upstream https://github.com/Dineshkkarthik/ 36 | ``` 37 | 38 | 2. If you cloned a while ago, get the latest changes from upstream: 39 | ```sh 40 | git checkout master 41 | git pull upstream master 42 | ``` 43 | 44 | 3. Create a new branch (off the main project master branch) to contain your feature, change, or fix based on the branch name convention described above: 45 | ```sh 46 | git checkout -b 47 | ``` 48 | 49 | 4. Make sure to update, or add to the tests when appropriate. Patches and features will not be accepted without tests. Run `make test` to check that all tests pass after you've made changes. 50 | 51 | 5. If you added or changed a feature, make sure to document it accordingly in the `README.md` file. 52 | 53 | 6. Push your branch up to your fork: 54 | ```sh 55 | git push origin 56 | ``` 57 | 58 | 7. [Open a Pull Request](https://help.github.com/articles/using-pull-requests/) with a clear title and description. 59 | 60 | 61 | ### Coding Standards 62 | 63 | #### Python style 64 | 65 | Please follow these coding standards when writing code for inclusion in telegram-media-downloader. 66 | 67 | Telegram-media-downloader follows the [PEP8](https://www.python.org/dev/peps/pep-0008/) standard and uses [Black](https://black.readthedocs.io/en/stable/) and [Pylint](https://pylint.pycqa.org/en/latest/) to ensure a consistent code format throughout the project. 68 | 69 | [Continuous Integration](https://github.com/Dineshkarthik/telegram_media_downloader/actions) using GitHub Actions will run those tools and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself: 70 | ```sh 71 | black media_downloader.py utils 72 | ``` 73 | to auto-format your code. Additionally, many editors have plugins that will apply `black` as you edit files. 74 | 75 | Writing good code is not just about what you write. It is also about _how_ you write it. During [Continuous Integration](https://github.com/Dineshkarthik/telegram_media_downloader/actions) testing, several tools will be run to check your code for stylistic errors. Generating any warnings will cause the test to fail. Thus, good style is a requirement for submitting code to telegram-media-downloader. 76 | 77 | This is already added in the repo to help contributors verify their changes before contributing them to the project: 78 | ```sh 79 | make style_check 80 | ``` 81 | 82 | #### Type hints 83 | 84 | Telegram-media-downloader strongly encourages the use of [**PEP 484**](https://www.python.org/dev/peps/pep-0484) style type hints. New development should contain type hints and pull requests to annotate existing code are accepted as well! 85 | 86 | Types imports should follow the `from typing import ...` convention. So rather than 87 | ```py 88 | import typing 89 | 90 | primes: typing.List[int] = [] 91 | ``` 92 | You should write 93 | ```py 94 | from typing import List, Optional, Union 95 | 96 | primes: List[int] = [] 97 | ``` 98 | 99 | `Optional` should be used where applicable, so instead of 100 | ```py 101 | maybe_primes: List[Union[int, None]] = [] 102 | ``` 103 | You should write 104 | ```py 105 | maybe_primes: List[Optional[int]] = [] 106 | ``` 107 | 108 | #### Validating type hints 109 | 110 | telegram-media-downloader uses [mypy](http://mypy-lang.org/) to statically analyze the code base and type hints. After making any change you can ensure your type hints are correct by running 111 | ```sh 112 | make static_type_check 113 | ``` 114 | 115 | #### Docstrings and standards 116 | 117 | A Python docstring is a string used to document a Python module, class, function or method, so programmers can understand what it does without having to read the details of the implementation. 118 | 119 | The next example gives an idea of what a docstring looks like: 120 | ```py 121 | def add(num1: int, num2: int) -> int: 122 | """ 123 | Add up two integer numbers. 124 | 125 | This function simply wraps the ``+`` operator, and does not 126 | do anything interesting, except for illustrating what 127 | the docstring of a very simple function looks like. 128 | 129 | Parameters 130 | ---------- 131 | num1: int 132 | First number to add. 133 | num2: int 134 | Second number to add. 135 | 136 | Returns 137 | ------- 138 | int 139 | The sum of ``num1`` and ``num2``. 140 | 141 | See Also 142 | -------- 143 | subtract : Subtract one integer from another. 144 | 145 | Examples 146 | -------- 147 | >>> add(2, 2) 148 | 4 149 | >>> add(25, 0) 150 | 25 151 | >>> add(10, -10) 152 | 0 153 | """ 154 | return num1 + num2 155 | ``` 156 | Some standards regarding docstrings exist, which make them easier to read, and allow them be easily exported to other formats such as html or pdf. 157 | 158 | ### Commit Message 159 | 160 | telegram-media-downloader uses a convention for commit message prefixes and layout. Here are some common prefixes along with general guidelines for when to use them: 161 | ``` 162 | : 163 | <-- OPTIONAL --> 164 | 165 | 166 | ``` 167 | 168 | #### Prefix: 169 | 170 | Must be one of the following: 171 | - **add**: Adding a new file 172 | - **ci**: Changes to CI configuration files and scripts (example: files inside `.github` folder) 173 | - **clean**: Code cleanup 174 | - **docs**: Additions/updates to documentation 175 | - **enh**: Enhancement, new functionality 176 | - **fix**: Bug fix 177 | - **perf**: A code change that improves performance 178 | - **refactor**: A code change that neither fixes a bug nor adds a feature 179 | - **style**: Changes that do not affect the meaning of the code (white-space, formatting, etc) 180 | - **test**: Additions/updates to tests 181 | - **type**: Type annotations 182 | 183 | #### Subject: 184 | 185 | Please reference the relevant GitHub issues in your commit message using #1234. 186 | - a subject line with `< 80` chars. 187 | - summary in present tense. 188 | - not capitalized. 189 | - no period at the end. 190 | 191 | #### Commit Message Body 192 | 193 | Just as in the summary, use the imperative, present tense. 194 | 195 | Explain the motivation for the change in the commit message body. This commit message should explain _why_ you are making the change. You can include a comparison of the previous behavior with the new behavior in order to illustrate the impact of the change. 196 | 197 | ### Code of Conduct 198 | 199 | As a contributor, you can help us keep the community open and inclusive. Please read and follow our [Code of Conduct](https://github.com/Dineshkarthik/telegram_media_downloader/blob/master/CODE_OF_CONDUCT.md). 200 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Dineshkarthik R 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | TEST_ARTIFACTS ?= /tmp/coverage 2 | 3 | .PHONY: install dev_install static_type_check pylint style_check test 4 | 5 | install: 6 | python3 -m pip install --upgrade pip setuptools 7 | python3 -m pip install -r requirements.txt 8 | 9 | dev_install: install 10 | python3 -m pip install -r dev-requirements.txt 11 | 12 | static_type_check: 13 | mypy media_downloader.py utils --ignore-missing-imports 14 | 15 | pylint: 16 | pylint media_downloader.py utils -r y 17 | 18 | style_check: static_type_check pylint 19 | 20 | test: 21 | py.test --cov media_downloader --doctest-modules \ 22 | --cov utils \ 23 | --cov-report term-missing \ 24 | --cov-report html:${TEST_ARTIFACTS} \ 25 | --junit-xml=${TEST_ARTIFACTS}/media-downloader.xml \ 26 | tests/ 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

Telegram Media Downloader

3 | 4 |

5 | Unittest 6 | Coverage Status 7 | License: MIT 8 | Code style: black 9 |

10 | 11 |

12 | Feature request 13 | · 14 | Report a bug 15 | · 16 | Support: Discussions 17 | & 18 | Telegram Community 19 |

20 | 21 | ### Overview: 22 | Download all media files from a conversation or a channel that you are a part of from telegram. 23 | A meta of last read/downloaded message is stored in the config file so that in such a way it won't download the same media file again. 24 | 25 | ### Support: 26 | | Category | Support | 27 | |--|--| 28 | |Language | `Python 3.7 ` and above| 29 | |Download media types| audio, document, photo, video, video_note, voice| 30 | 31 | ### ToDo: 32 | - Add support for multiple channels/chats. 33 | 34 | ### Installation 35 | 36 | For *nix os distributions with `make` availability 37 | ```sh 38 | $ git clone https://github.com/Dineshkarthik/telegram_media_downloader.git 39 | $ cd telegram_media_downloader 40 | $ make install 41 | ``` 42 | For Windows which doesn't have `make` inbuilt 43 | ```sh 44 | $ git clone https://github.com/Dineshkarthik/telegram_media_downloader.git 45 | $ cd telegram_media_downloader 46 | $ pip3 install -r requirements.txt 47 | ``` 48 | 49 | ## Configuration 50 | 51 | All the configurations are passed to the Telegram Media Downloader via `config.yaml` file. 52 | 53 | **Getting your API Keys:** 54 | The very first step requires you to obtain a valid Telegram API key (API id/hash pair): 55 | 1. Visit [https://my.telegram.org/apps](https://my.telegram.org/apps) and log in with your Telegram Account. 56 | 2. Fill out the form to register a new Telegram application. 57 | 3. Done! The API key consists of two parts: **api_id** and **api_hash**. 58 | 59 | 60 | **Getting chat id:** 61 | 62 | **1. Using web telegram:** 63 | 1. Open https://web.telegram.org/?legacy=1#/im 64 | 2. Now go to the chat/channel and you will see the URL as something like 65 | - `https://web.telegram.org/?legacy=1#/im?p=u853521067_2449618633394` here `853521067` is the chat id. 66 | - `https://web.telegram.org/?legacy=1#/im?p=@somename` here `somename` is the chat id. 67 | - `https://web.telegram.org/?legacy=1#/im?p=s1301254321_6925449697188775560` here take `1301254321` and add `-100` to the start of the id => `-1001301254321`. 68 | - `https://web.telegram.org/?legacy=1#/im?p=c1301254321_6925449697188775560` here take `1301254321` and add `-100` to the start of the id => `-1001301254321`. 69 | 70 | 71 | **2. Using bot:** 72 | 1. Use [@username_to_id_bot](https://t.me/username_to_id_bot) to get the chat_id of 73 | - almost any telegram user: send username to the bot or just forward their message to the bot 74 | - any chat: send chat username or copy and send its joinchat link to the bot 75 | - public or private channel: same as chats, just copy and send to the bot 76 | - id of any telegram bot 77 | 78 | 79 | ### config.yaml 80 | ```yaml 81 | api_hash: your_api_hash 82 | api_id: your_api_id 83 | chat_id: telegram_chat_id 84 | last_read_message_id: 0 85 | ids_to_retry: [] 86 | media_types: 87 | - audio 88 | - document 89 | - photo 90 | - video 91 | - voice 92 | file_formats: 93 | audio: 94 | - all 95 | document: 96 | - pdf 97 | - epub 98 | video: 99 | - mp4 100 | ``` 101 | 102 | - api_hash - The api_hash you got from telegram apps 103 | - api_id - The api_id you got from telegram apps 104 | - chat_id - The id of the chat/channel you want to download media. Which you get from the above-mentioned steps. 105 | - last_read_message_id - If it is the first time you are going to read the channel let it be `0` or if you have already used this script to download media it will have some numbers which are auto-updated after the scripts successful execution. Don't change it. 106 | - ids_to_retry - `Leave it as it is.` This is used by the downloader script to keep track of all skipped downloads so that it can be downloaded during the next execution of the script. 107 | - media_types - Type of media to download, you can update which type of media you want to download it can be one or any of the available types. 108 | - file_formats - File types to download for supported media types which are `audio`, `document` and `video`. Default format is `all`, downloads all files. 109 | 110 | ## Execution 111 | ```sh 112 | $ python3 media_downloader.py 113 | ``` 114 | All the downloaded media will be stored inside respective direcotry named in the same path as the python script. 115 | 116 | | Media type | Download directory | 117 | |--|--| 118 | | audio | path/to/project/audio | 119 | | document | path/to/project/document | 120 | | photo | path/to/project/photo | 121 | | video | path/to/project/video | 122 | | voice | path/to/project/voice | 123 | | voice_note | path/to/project/voice_note | 124 | 125 | ## Proxy 126 | `socks4, socks5, http` proxies are supported in this project currently. To use it, add the following to the bottom of your `config.yaml` file 127 | 128 | ```yaml 129 | proxy: 130 | scheme: socks5 131 | hostname: 11.22.33.44 132 | port: 1234 133 | username: your_username 134 | password: your_password 135 | ``` 136 | If your proxy doesn’t require authorization you can omit username and password. Then the proxy will automatically be enabled. 137 | 138 | ## Contributing 139 | ### Contributing Guidelines 140 | Read through our [contributing guidelines](https://github.com/Dineshkarthik/telegram_media_downloader/blob/master/CONTRIBUTING.md) to learn about our submission process, coding rules and more. 141 | 142 | ### Want to Help? 143 | Want to file a bug, contribute some code, or improve documentation? Excellent! Read up on our guidelines for [contributing](https://github.com/Dineshkarthik/telegram_media_downloader/blob/master/CONTRIBUTING.md). 144 | 145 | ### Code of Conduct 146 | Help us keep Telegram Media Downloader open and inclusive. Please read and follow our [Code of Conduct](https://github.com/Dineshkarthik/telegram_media_downloader/blob/master/CODE_OF_CONDUCT.md). 147 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: auto 6 | threshold: 1% 7 | if_no_uploads: error 8 | if_not_found: success 9 | if_ci_failed: error 10 | patch: no 11 | 12 | comment: 13 | require_changes: true 14 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | api_hash: your_api_hash 2 | api_id: your_api_id 3 | chat_id: telegram_chat_id 4 | last_read_message_id: 0 5 | ids_to_retry: [] 6 | media_types: 7 | - audio 8 | - photo 9 | - video 10 | - document 11 | - voice 12 | - video_note 13 | file_formats: 14 | audio: 15 | - all 16 | document: 17 | - all 18 | video: 19 | - all 20 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | black==22.6.0 2 | isort==5.10.1 3 | mock==4.0.3 4 | mypy==0.971 5 | pre-commit==2.20.0 6 | pylint==2.14.5 7 | pytest==7.0.1 8 | pytest-cov==3.0.0 9 | types-PyYAML==6.0.11 10 | -------------------------------------------------------------------------------- /media_downloader.py: -------------------------------------------------------------------------------- 1 | """Downloads media from telegram.""" 2 | import asyncio 3 | import logging 4 | import os 5 | from typing import List, Optional, Tuple, Union 6 | 7 | import pyrogram 8 | import yaml 9 | from pyrogram.types import Audio, Document, Photo, Video, VideoNote, Voice 10 | from rich.logging import RichHandler 11 | 12 | from utils.file_management import get_next_name, manage_duplicate_file 13 | from utils.log import LogFilter 14 | from utils.meta import print_meta 15 | from utils.updates import check_for_updates 16 | 17 | logging.basicConfig( 18 | level=logging.INFO, 19 | format="%(message)s", 20 | datefmt="[%X]", 21 | handlers=[RichHandler()], 22 | ) 23 | logging.getLogger("pyrogram.session.session").addFilter(LogFilter()) 24 | logging.getLogger("pyrogram.client").addFilter(LogFilter()) 25 | logger = logging.getLogger("media_downloader") 26 | 27 | THIS_DIR = os.path.dirname(os.path.abspath(__file__)) 28 | FAILED_IDS: list = [] 29 | DOWNLOADED_IDS: list = [] 30 | 31 | 32 | def update_config(config: dict): 33 | """ 34 | Update existing configuration file. 35 | 36 | Parameters 37 | ---------- 38 | config: dict 39 | Configuration to be written into config file. 40 | """ 41 | config["ids_to_retry"] = ( 42 | list(set(config["ids_to_retry"]) - set(DOWNLOADED_IDS)) + FAILED_IDS 43 | ) 44 | with open("config.yaml", "w") as yaml_file: 45 | yaml.dump(config, yaml_file, default_flow_style=False) 46 | logger.info("Updated last read message_id to config file") 47 | 48 | 49 | def _can_download(_type: str, file_formats: dict, file_format: Optional[str]) -> bool: 50 | """ 51 | Check if the given file format can be downloaded. 52 | 53 | Parameters 54 | ---------- 55 | _type: str 56 | Type of media object. 57 | file_formats: dict 58 | Dictionary containing the list of file_formats 59 | to be downloaded for `audio`, `document` & `video` 60 | media types 61 | file_format: str 62 | Format of the current file to be downloaded. 63 | 64 | Returns 65 | ------- 66 | bool 67 | True if the file format can be downloaded else False. 68 | """ 69 | if _type in ["audio", "document", "video"]: 70 | allowed_formats: list = file_formats[_type] 71 | if not file_format in allowed_formats and allowed_formats[0] != "all": 72 | return False 73 | return True 74 | 75 | 76 | def _is_exist(file_path: str) -> bool: 77 | """ 78 | Check if a file exists and it is not a directory. 79 | 80 | Parameters 81 | ---------- 82 | file_path: str 83 | Absolute path of the file to be checked. 84 | 85 | Returns 86 | ------- 87 | bool 88 | True if the file exists else False. 89 | """ 90 | return not os.path.isdir(file_path) and os.path.exists(file_path) 91 | 92 | 93 | async def _get_media_meta( 94 | media_obj: Union[Audio, Document, Photo, Video, VideoNote, Voice], 95 | _type: str, 96 | ) -> Tuple[str, Optional[str]]: 97 | """Extract file name and file id from media object. 98 | 99 | Parameters 100 | ---------- 101 | media_obj: Union[Audio, Document, Photo, Video, VideoNote, Voice] 102 | Media object to be extracted. 103 | _type: str 104 | Type of media object. 105 | 106 | Returns 107 | ------- 108 | Tuple[str, Optional[str]] 109 | file_name, file_format 110 | """ 111 | if _type in ["audio", "document", "video"]: 112 | # pylint: disable = C0301 113 | file_format: Optional[str] = media_obj.mime_type.split("/")[-1] # type: ignore 114 | else: 115 | file_format = None 116 | 117 | if _type in ["voice", "video_note"]: 118 | # pylint: disable = C0209 119 | file_format = media_obj.mime_type.split("/")[-1] # type: ignore 120 | file_name: str = os.path.join( 121 | THIS_DIR, 122 | _type, 123 | "{}_{}.{}".format( 124 | _type, 125 | media_obj.date.isoformat(), # type: ignore 126 | file_format, 127 | ), 128 | ) 129 | else: 130 | file_name = os.path.join( 131 | THIS_DIR, _type, getattr(media_obj, "file_name", None) or "" 132 | ) 133 | return file_name, file_format 134 | 135 | 136 | async def download_media( 137 | client: pyrogram.client.Client, 138 | message: pyrogram.types.Message, 139 | media_types: List[str], 140 | file_formats: dict, 141 | ): 142 | """ 143 | Download media from Telegram. 144 | 145 | Each of the files to download are retried 3 times with a 146 | delay of 5 seconds each. 147 | 148 | Parameters 149 | ---------- 150 | client: pyrogram.client.Client 151 | Client to interact with Telegram APIs. 152 | message: pyrogram.types.Message 153 | Message object retrieved from telegram. 154 | media_types: list 155 | List of strings of media types to be downloaded. 156 | Ex : `["audio", "photo"]` 157 | Supported formats: 158 | * audio 159 | * document 160 | * photo 161 | * video 162 | * voice 163 | file_formats: dict 164 | Dictionary containing the list of file_formats 165 | to be downloaded for `audio`, `document` & `video` 166 | media types. 167 | 168 | Returns 169 | ------- 170 | int 171 | Current message id. 172 | """ 173 | for retry in range(3): 174 | try: 175 | if message.media is None: 176 | return message.id 177 | for _type in media_types: 178 | _media = getattr(message, _type, None) 179 | if _media is None: 180 | continue 181 | file_name, file_format = await _get_media_meta(_media, _type) 182 | if _can_download(_type, file_formats, file_format): 183 | if _is_exist(file_name): 184 | file_name = get_next_name(file_name) 185 | download_path = await client.download_media( 186 | message, file_name=file_name 187 | ) 188 | # pylint: disable = C0301 189 | download_path = manage_duplicate_file(download_path) # type: ignore 190 | else: 191 | download_path = await client.download_media( 192 | message, file_name=file_name 193 | ) 194 | if download_path: 195 | logger.info("Media downloaded - %s", download_path) 196 | DOWNLOADED_IDS.append(message.id) 197 | break 198 | except pyrogram.errors.exceptions.bad_request_400.BadRequest: 199 | logger.warning( 200 | "Message[%d]: file reference expired, refetching...", 201 | message.id, 202 | ) 203 | message = await client.get_messages( # type: ignore 204 | chat_id=message.chat.id, # type: ignore 205 | message_ids=message.id, 206 | ) 207 | if retry == 2: 208 | # pylint: disable = C0301 209 | logger.error( 210 | "Message[%d]: file reference expired for 3 retries, download skipped.", 211 | message.id, 212 | ) 213 | FAILED_IDS.append(message.id) 214 | except TypeError: 215 | # pylint: disable = C0301 216 | logger.warning( 217 | "Timeout Error occurred when downloading Message[%d], retrying after 5 seconds", 218 | message.id, 219 | ) 220 | await asyncio.sleep(5) 221 | if retry == 2: 222 | logger.error( 223 | "Message[%d]: Timing out after 3 reties, download skipped.", 224 | message.id, 225 | ) 226 | FAILED_IDS.append(message.id) 227 | except Exception as e: 228 | # pylint: disable = C0301 229 | logger.error( 230 | "Message[%d]: could not be downloaded due to following exception:\n[%s].", 231 | message.id, 232 | e, 233 | exc_info=True, 234 | ) 235 | FAILED_IDS.append(message.id) 236 | break 237 | return message.id 238 | 239 | 240 | async def process_messages( 241 | client: pyrogram.client.Client, 242 | messages: List[pyrogram.types.Message], 243 | media_types: List[str], 244 | file_formats: dict, 245 | ) -> int: 246 | """ 247 | Download media from Telegram. 248 | 249 | Parameters 250 | ---------- 251 | client: pyrogram.client.Client 252 | Client to interact with Telegram APIs. 253 | messages: list 254 | List of telegram messages. 255 | media_types: list 256 | List of strings of media types to be downloaded. 257 | Ex : `["audio", "photo"]` 258 | Supported formats: 259 | * audio 260 | * document 261 | * photo 262 | * video 263 | * voice 264 | file_formats: dict 265 | Dictionary containing the list of file_formats 266 | to be downloaded for `audio`, `document` & `video` 267 | media types. 268 | 269 | Returns 270 | ------- 271 | int 272 | Max value of list of message ids. 273 | """ 274 | message_ids = await asyncio.gather( 275 | *[ 276 | download_media(client, message, media_types, file_formats) 277 | for message in messages 278 | ] 279 | ) 280 | 281 | last_message_id: int = max(message_ids) 282 | return last_message_id 283 | 284 | 285 | async def begin_import(config: dict, pagination_limit: int) -> dict: 286 | """ 287 | Create pyrogram client and initiate download. 288 | 289 | The pyrogram client is created using the ``api_id``, ``api_hash`` 290 | from the config and iter through message offset on the 291 | ``last_message_id`` and the requested file_formats. 292 | 293 | Parameters 294 | ---------- 295 | config: dict 296 | Dict containing the config to create pyrogram client. 297 | pagination_limit: int 298 | Number of message to download asynchronously as a batch. 299 | 300 | Returns 301 | ------- 302 | dict 303 | Updated configuration to be written into config file. 304 | """ 305 | client = pyrogram.Client( 306 | "media_downloader", 307 | api_id=config["api_id"], 308 | api_hash=config["api_hash"], 309 | proxy=config.get("proxy"), 310 | ) 311 | await client.start() 312 | last_read_message_id: int = config["last_read_message_id"] 313 | messages_iter = client.get_chat_history( 314 | config["chat_id"], offset_id=last_read_message_id, reverse=True 315 | ) 316 | messages_list: list = [] 317 | pagination_count: int = 0 318 | if config["ids_to_retry"]: 319 | logger.info("Downloading files failed during last run...") 320 | skipped_messages: list = await client.get_messages( # type: ignore 321 | chat_id=config["chat_id"], message_ids=config["ids_to_retry"] 322 | ) 323 | for message in skipped_messages: 324 | pagination_count += 1 325 | messages_list.append(message) 326 | 327 | async for message in messages_iter: # type: ignore 328 | if pagination_count != pagination_limit: 329 | pagination_count += 1 330 | messages_list.append(message) 331 | else: 332 | last_read_message_id = await process_messages( 333 | client, 334 | messages_list, 335 | config["media_types"], 336 | config["file_formats"], 337 | ) 338 | pagination_count = 0 339 | messages_list = [] 340 | messages_list.append(message) 341 | config["last_read_message_id"] = last_read_message_id 342 | update_config(config) 343 | if messages_list: 344 | last_read_message_id = await process_messages( 345 | client, 346 | messages_list, 347 | config["media_types"], 348 | config["file_formats"], 349 | ) 350 | 351 | await client.stop() 352 | config["last_read_message_id"] = last_read_message_id 353 | return config 354 | 355 | 356 | def main(): 357 | """Main function of the downloader.""" 358 | with open(os.path.join(THIS_DIR, "config.yaml")) as f: 359 | config = yaml.safe_load(f) 360 | updated_config = asyncio.get_event_loop().run_until_complete( 361 | begin_import(config, pagination_limit=100) 362 | ) 363 | if FAILED_IDS: 364 | logger.info( 365 | "Downloading of %d files failed. " 366 | "Failed message ids are added to config file.\n" 367 | "These files will be downloaded on the next run.", 368 | len(set(FAILED_IDS)), 369 | ) 370 | update_config(updated_config) 371 | check_for_updates() 372 | 373 | 374 | if __name__ == "__main__": 375 | print_meta(logger) 376 | main() 377 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | warn_return_any = True 3 | 4 | [mypy-yaml.*] 5 | ignore_missing_imports = True 6 | 7 | [mypy-tests.*] 8 | ignore_errors = True 9 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | # pylint-version: 2.2 2 | 3 | [MASTER] 4 | 5 | # A comma-separated list of package or module names from where C extensions may 6 | # be loaded. Extensions are loading into the active Python interpreter and may 7 | # run arbitrary code. 8 | extension-pkg-whitelist= 9 | pycurl, 10 | cdecimal, 11 | 12 | 13 | # Add files or directories to the blacklist. They should be base names, not 14 | # paths. 15 | ignore=CVS .git 16 | 17 | # Add files or directories matching the regex patterns to the blacklist. The 18 | # regex matches against base names, not paths. 19 | ignore-patterns= 20 | 21 | # Python code to execute, usually for sys.path manipulation such as 22 | # pygtk.require(). 23 | #init-hook= 24 | 25 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 26 | # number of processors available to use. 27 | jobs=1 28 | 29 | # Control the amount of potential inferred values when inferring a single 30 | # object. This can help the performance when dealing with large functions or 31 | # complex, nested conditions. 32 | limit-inference-results=100 33 | 34 | # List of plugins (as comma separated values of python modules names) to load, 35 | # usually to register additional checkers. 36 | load-plugins= 37 | 38 | # Pickle collected data for later comparisons. 39 | persistent=no 40 | 41 | # Specify a configuration file. 42 | #rcfile= 43 | 44 | # When enabled, pylint would attempt to guess common misconfiguration and emit 45 | # user-friendly hints instead of false-positive error messages. 46 | suggestion-mode=yes 47 | 48 | # Allow loading of arbitrary C extensions. Extensions are imported into the 49 | # active Python interpreter and may run arbitrary code. 50 | unsafe-load-any-extension=no 51 | 52 | 53 | [MESSAGES CONTROL] 54 | 55 | # Only show warnings with the listed confidence levels. Leave empty to show 56 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 57 | confidence= 58 | 59 | # Disable the message, report, category or checker with the given id(s). You 60 | # can either give multiple identifiers separated by comma (,) or put this 61 | # option multiple times (only on the command line, not in the configuration 62 | # file where it should appear only once). You can also use "--disable=all" to 63 | # disable everything first and then reenable specific checks. For example, if 64 | # you want to run only the similarities checker, you can use "--disable=all 65 | # --enable=similarities". If you want to run only the classes checker, but have 66 | # no Warning level messages displayed, use "--disable=all --enable=classes 67 | # --disable=W". 68 | disable= 69 | locally-disabled, 70 | file-ignored, 71 | fixme, 72 | useless-object-inheritance, 73 | 74 | redefined-variable-type, 75 | redefined-argument-from-local, 76 | wrong-import-position, 77 | consider-using-ternary, 78 | redefined-outer-name, 79 | 80 | invalid-name, 81 | bad-continuation, 82 | import-error, 83 | broad-except, 84 | 85 | unspecified-encoding, 86 | 87 | 88 | # Enable the message, report, category or checker with the given id(s). You can 89 | # either give multiple identifier separated by comma (,) or put this option 90 | # multiple time (only on the command line, not in the configuration file where 91 | # it should appear only once). See also the "--disable" option for examples. 92 | enable= 93 | 94 | 95 | [REPORTS] 96 | 97 | # Python expression which should return a note less than 10 (10 is the highest 98 | # note). You have access to the variables errors warning, statement which 99 | # respectively contain the number of errors / warnings messages and the total 100 | # number of statements analyzed. This is used by the global evaluation report 101 | # (RP0004). 102 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 103 | 104 | # Template used to display messages. This is a python new-style format string 105 | # used to format the message information. See doc for all details. 106 | msg-template={path}:{line}: [{msg_id}({symbol}), {obj}] {msg} 107 | 108 | # Set the output format. Available formats are text, parseable, colorized, json 109 | # and msvs (visual studio). You can also give a reporter class, e.g. 110 | # mypackage.mymodule.MyReporterClass. 111 | output-format=text 112 | 113 | # Tells whether to display a full report or only the messages. 114 | reports=no 115 | 116 | # Activate the evaluation score. 117 | score=no 118 | 119 | 120 | [REFACTORING] 121 | 122 | # Maximum number of nested blocks for function / method body 123 | max-nested-blocks=5 124 | 125 | # Complete name of functions that never returns. When checking for 126 | # inconsistent-return-statements if a never returning function is called then 127 | # it will be considered as an explicit return statement and no message will be 128 | # printed. 129 | never-returning-functions=sys.exit 130 | 131 | 132 | [MISCELLANEOUS] 133 | 134 | # List of note tags to take in consideration, separated by a comma. 135 | notes=FIXME, XXX, TODO 136 | 137 | 138 | [LOGGING] 139 | 140 | # Format style used to check logging format string. `old` means using % 141 | # formatting, while `new` is for `{}` formatting. 142 | logging-format-style=old 143 | 144 | # Logging modules to check that the string format arguments are in logging 145 | # function parameter format. 146 | logging-modules=logging 147 | 148 | 149 | [SIMILARITIES] 150 | 151 | # Ignore comments when computing similarities. 152 | ignore-comments=yes 153 | 154 | # Ignore docstrings when computing similarities. 155 | ignore-docstrings=yes 156 | 157 | # Ignore imports when computing similarities. 158 | ignore-imports=no 159 | 160 | # Minimum lines number of a similarity. 161 | min-similarity-lines=4 162 | 163 | 164 | [SPELLING] 165 | 166 | # Limits count of emitted suggestions for spelling mistakes. 167 | max-spelling-suggestions=4 168 | 169 | # Spelling dictionary name. Available dictionaries: none. To make it working 170 | # install python-enchant package.. 171 | spelling-dict= 172 | 173 | # List of comma separated words that should not be checked. 174 | spelling-ignore-words= 175 | 176 | # A path to a file that contains private dictionary; one word per line. 177 | spelling-private-dict-file= 178 | 179 | # Tells whether to store unknown words to indicated private dictionary in 180 | # --spelling-private-dict-file option instead of raising a message. 181 | spelling-store-unknown-words=no 182 | 183 | 184 | [TYPECHECK] 185 | 186 | # List of decorators that produce context managers, such as 187 | # contextlib.contextmanager. Add to this list to register other decorators that 188 | # produce valid context managers. 189 | contextmanager-decorators=contextlib.contextmanager 190 | 191 | # List of members which are set dynamically and missed by pylint inference 192 | # system, and so shouldn't trigger E1101 when accessed. Python regular 193 | # expressions are accepted. 194 | generated-members= 195 | 196 | # Tells whether missing members accessed in mixin class should be ignored. A 197 | # mixin class is detected if its name ends with "mixin" (case insensitive). 198 | ignore-mixin-members=yes 199 | 200 | # Tells whether to warn about missing members when the owner of the attribute 201 | # is inferred to be None. 202 | ignore-none=yes 203 | 204 | # This flag controls whether pylint should warn about no-member and similar 205 | # checks whenever an opaque object is returned when inferring. The inference 206 | # can return multiple potential results while evaluating a Python object, but 207 | # some branches might not be evaluated, which results in partial inference. In 208 | # that case, it might be useful to still emit no-member and other checks for 209 | # the rest of the inferred objects. 210 | ignore-on-opaque-inference=yes 211 | 212 | # List of class names for which member attributes should not be checked (useful 213 | # for classes with dynamically set attributes). This supports the use of 214 | # qualified names. 215 | ignored-classes= 216 | st.config._config._section._unset, 217 | 218 | 219 | # List of module names for which member attributes should not be checked 220 | # (useful for modules/projects where namespaces are manipulated during runtime 221 | # and thus existing member attributes cannot be deduced by static analysis. It 222 | # supports qualified module names, as well as Unix pattern matching. 223 | ignored-modules= 224 | 225 | # Show a hint with possible names when a member name was not found. The aspect 226 | # of finding the hint is based on edit distance. 227 | missing-member-hint=yes 228 | 229 | # The minimum edit distance a name should have in order to be considered a 230 | # similar match for a missing member name. 231 | missing-member-hint-distance=1 232 | 233 | # The total number of similar names that should be taken in consideration when 234 | # showing a hint for a missing member. 235 | missing-member-max-choices=1 236 | 237 | 238 | [FORMAT] 239 | 240 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 241 | expected-line-ending-format= 242 | 243 | # Regexp for a line that is allowed to be longer than the limit. 244 | ignore-long-lines=^\s*(# )??$ 245 | 246 | # Number of spaces of indent required inside a hanging or continued line. 247 | indent-after-paren=4 248 | 249 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 250 | # tab). 251 | indent-string=' ' 252 | 253 | # Maximum number of characters on a single line. 254 | max-line-length=90 255 | 256 | # Maximum number of lines in a module. 257 | max-module-lines= 258 | 1500 259 | 260 | 261 | # List of optional constructs for which whitespace checking is disabled. `dict- 262 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 263 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 264 | # `empty-line` allows space-only lines. 265 | no-space-check= 266 | 267 | # Allow the body of a class to be on the same line as the declaration if body 268 | # contains single statement. 269 | single-line-class-stmt=no 270 | 271 | # Allow the body of an if to be on the same line as the test if there is no 272 | # else. 273 | single-line-if-stmt=no 274 | 275 | 276 | [VARIABLES] 277 | 278 | # List of additional names supposed to be defined in builtins. Remember that 279 | # you should avoid defining new builtins when possible. 280 | additional-builtins= 281 | 282 | # Tells whether unused global variables should be treated as a violation. 283 | allow-global-unused-variables=yes 284 | 285 | # List of strings which can identify a callback function by name. A callback 286 | # name must start or end with one of those strings. 287 | callbacks=cb_, _cb 288 | 289 | # A regular expression matching the name of dummy variables (i.e. expected to 290 | # not be used). 291 | dummy-variables-rgx=(?x) 292 | (_|dummy)$ 293 | 294 | 295 | # Argument names that match this expression will be ignored. Default to name 296 | # with leading underscore. 297 | ignored-argument-names=(?x) 298 | _|(?:dummy|(?:kw)?args|request|response|context|ctx)$ 299 | 300 | 301 | # Tells whether we should check for unused import in __init__ files. 302 | init-import=no 303 | 304 | # List of qualified module names which can have objects that can redefine 305 | # builtins. 306 | redefining-builtins-modules=six.moves,future.builtins 307 | 308 | 309 | [BASIC] 310 | 311 | # Naming style matching correct argument names. 312 | argument-naming-style=snake_case 313 | 314 | # Regular expression matching correct argument names. Overrides argument- 315 | # naming-style. 316 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 317 | 318 | # Naming style matching correct attribute names. 319 | attr-naming-style=snake_case 320 | 321 | # Regular expression matching correct attribute names. Overrides attr-naming- 322 | # style. 323 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 324 | 325 | # Bad variable names which should always be refused, separated by a comma. 326 | bad-names=foo, bar, baz, toto, tutu, tata 327 | 328 | # Naming style matching correct class attribute names. 329 | class-attribute-naming-style=any 330 | 331 | # Regular expression matching correct class attribute names. Overrides class- 332 | # attribute-naming-style. 333 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 334 | 335 | # Naming style matching correct class names. 336 | class-naming-style=PascalCase 337 | 338 | # Regular expression matching correct class names. Overrides class-naming- 339 | # style. 340 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 341 | 342 | # Naming style matching correct constant names. 343 | const-naming-style=UPPER_CASE 344 | 345 | # Regular expression matching correct constant names. Overrides const-naming- 346 | # style. 347 | const-rgx=(?x)( 348 | ([A-Z_][A-Z0-9_]*) 349 | |(__.*__) 350 | |(.+_)?logger 351 | |(.+_)?predicate 352 | |t_[a-z0-9]+(_[a-z0-9]+)* 353 | |(.*_)?templates 354 | )$ 355 | 356 | 357 | # Minimum line length for functions/classes that require docstrings, shorter 358 | # ones are exempt. 359 | docstring-min-length=-1 360 | 361 | # Naming style matching correct function names. 362 | function-naming-style=snake_case 363 | 364 | # Regular expression matching correct function names. Overrides function- 365 | # naming-style. 366 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 367 | 368 | # Good variable names which should always be accepted, separated by a comma. 369 | good-names= 370 | _, j, db, e, fd, fp, 371 | 372 | 373 | # Include a hint for the correct naming format with invalid-name. 374 | include-naming-hint=no 375 | 376 | # Naming style matching correct inline iteration names. 377 | inlinevar-naming-style=any 378 | 379 | # Regular expression matching correct inline iteration names. Overrides 380 | # inlinevar-naming-style. 381 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 382 | 383 | # Naming style matching correct method names. 384 | method-naming-style=snake_case 385 | 386 | # Regular expression matching correct method names. Overrides method-naming- 387 | # style. 388 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 389 | 390 | # Naming style matching correct module names. 391 | module-naming-style=snake_case 392 | 393 | # Regular expression matching correct module names. Overrides module-naming- 394 | # style. 395 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 396 | 397 | # Colon-delimited sets of names that determine each other's naming style when 398 | # the name regexes allow several styles. 399 | name-group= 400 | 401 | # Regular expression which should only match function or class names that do 402 | # not require a docstring. 403 | no-docstring-rgx=(?x)( 404 | __.*__ 405 | |test_.* 406 | |.+Test 407 | |render_.+ 408 | |repeat_.+ 409 | |(?:Pre)?Render 410 | )$ 411 | 412 | 413 | # List of decorators that produce properties, such as abc.abstractproperty. Add 414 | # to this list to register other decorators that produce valid properties. 415 | # These decorators are taken in consideration only for invalid-name. 416 | property-classes=abc.abstractproperty 417 | 418 | # Naming style matching correct variable names. 419 | variable-naming-style=snake_case 420 | 421 | # Regular expression matching correct variable names. Overrides variable- 422 | # naming-style. 423 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 424 | 425 | 426 | [IMPORTS] 427 | 428 | # Allow wildcard imports from modules that define __all__. 429 | allow-wildcard-with-all=no 430 | 431 | # Analyse import fallback blocks. This can be used to support both Python 2 and 432 | # 3 compatible code, which means that the block might have code that exists 433 | # only in one or another interpreter, leading to false positives when analysed. 434 | analyse-fallback-blocks=no 435 | 436 | # Deprecated modules which should not be used, separated by a comma. 437 | deprecated-modules=regsub, TERMIOS, Bastion, rexec 438 | 439 | # Create a graph of external dependencies in the given file (report RP0402 must 440 | # not be disabled). 441 | ext-import-graph= 442 | 443 | # Create a graph of every (i.e. internal and external) dependencies in the 444 | # given file (report RP0402 must not be disabled). 445 | import-graph= 446 | 447 | # Create a graph of internal dependencies in the given file (report RP0402 must 448 | # not be disabled). 449 | int-import-graph= 450 | 451 | # Force import order to recognize a module as part of the standard 452 | # compatibility libraries. 453 | known-standard-library= 454 | 455 | # Force import order to recognize a module as part of a third party library. 456 | known-third-party=enchant 457 | 458 | 459 | [CLASSES] 460 | 461 | # List of method names used to declare (i.e. assign) instance attributes. 462 | defining-attr-methods= 463 | __init__, 464 | __new__, 465 | setUp, 466 | 467 | 468 | # List of member names, which should be excluded from the protected access 469 | # warning. 470 | exclude-protected=_asdict, _fields, _replace, _source, _make 471 | 472 | # List of valid names for the first argument in a class method. 473 | valid-classmethod-first-arg=cls 474 | 475 | # List of valid names for the first argument in a metaclass class method. 476 | valid-metaclass-classmethod-first-arg=mcs 477 | 478 | 479 | [DESIGN] 480 | 481 | # Maximum number of arguments for function / method. 482 | max-args=10 483 | 484 | # Maximum number of attributes for a class (see R0902). 485 | max-attributes=8 486 | 487 | # Maximum number of boolean expressions in an if statement. 488 | max-bool-expr=5 489 | 490 | # Maximum number of branch for function / method body. 491 | max-branches=13 492 | 493 | # Maximum number of locals for function / method body. 494 | max-locals=15 495 | 496 | # Maximum number of parents for a class (see R0901). 497 | max-parents=7 498 | 499 | # Maximum number of public methods for a class (see R0904). 500 | max-public-methods=20 501 | 502 | # Maximum number of return / yield for function / method body. 503 | max-returns=6 504 | 505 | # Maximum number of statements in function / method body. 506 | max-statements=50 507 | 508 | # Minimum number of public methods for a class (see R0903). 509 | min-public-methods=0 510 | 511 | 512 | [EXCEPTIONS] 513 | 514 | # Exceptions that will emit a warning when being caught. Defaults to 515 | # "Exception". 516 | overgeneral-exceptions=Exception 517 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | https://github.com/Dineshkarthik/pyrogram/archive/refs/heads/master.zip 2 | PyYAML==6.0 3 | rich==12.5.1 4 | TgCrypto==1.2.3 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | from utils import __version__ 4 | 5 | setup( 6 | name="telegram-media-downloader", 7 | version=__version__, 8 | author="Dineshkarthik Raveendran", 9 | author_email="hello@dineshkarthik.me", 10 | description="A simple script to download media from telegram", 11 | url="https://github.com/Dineshkarthik/telegram_media_downloader", 12 | download_url="https://github.com/Dineshkarthik/telegram_media_downloader/releases/latest", 13 | py_modules=["media_downloader"], 14 | classifiers=[ 15 | "Development Status :: 5 - Production/Stable", 16 | "Environment :: Console", 17 | "Operating System :: OS Independent", 18 | "Intended Audience :: Developers", 19 | "Intended Audience :: End Users/Desktop", 20 | "Intended Audience :: Science/Research", 21 | "License :: OSI Approved :: MIT License", 22 | "Natural Language :: English", 23 | "Programming Language :: Python", 24 | "Programming Language :: Python :: 3", 25 | "Programming Language :: Python :: 3.7", 26 | "Programming Language :: Python :: 3.8", 27 | "Programming Language :: Python :: 3.9", 28 | "Programming Language :: Python :: 3.10", 29 | "Programming Language :: Python :: 3.11", 30 | "Topic :: Internet", 31 | "Topic :: Communications", 32 | "Topic :: Communications :: Chat", 33 | "Topic :: Software Development :: Libraries", 34 | "Topic :: Software Development :: Libraries :: Python Modules", 35 | ], 36 | project_urls={ 37 | "Tracker": "https://github.com/Dineshkarthik/telegram_media_downloader/issues", 38 | "Community": "https://t.me/tgmdnews", 39 | "Source": "https://github.com/Dineshkarthik/telegram_media_downloader", 40 | }, 41 | python_requires="~=3.7", 42 | ) 43 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dineshkarthik/telegram_media_downloader/68636daf0d9b2c410d8255f1556f367b6da4a60b/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_media_downloader.py: -------------------------------------------------------------------------------- 1 | """Unittest module for media downloader.""" 2 | import asyncio 3 | import copy 4 | import os 5 | import platform 6 | import unittest 7 | from datetime import datetime 8 | 9 | import mock 10 | import pyrogram 11 | 12 | from media_downloader import ( 13 | _can_download, 14 | _get_media_meta, 15 | _is_exist, 16 | begin_import, 17 | download_media, 18 | main, 19 | process_messages, 20 | update_config, 21 | ) 22 | 23 | MOCK_DIR: str = "/root/project" 24 | if platform.system() == "Windows": 25 | MOCK_DIR = "\\root\\project" 26 | MOCK_CONF = { 27 | "api_id": 123, 28 | "api_hash": "hasw5Tgawsuj67", 29 | "last_read_message_id": 0, 30 | "chat_id": 8654123, 31 | "ids_to_retry": [1], 32 | "media_types": ["audio", "voice"], 33 | "file_formats": {"audio": ["all"], "voice": ["all"]}, 34 | } 35 | 36 | 37 | def platform_generic_path(_path: str) -> str: 38 | platform_specific_path: str = _path 39 | if platform.system() == "Windows": 40 | platform_specific_path = platform_specific_path.replace("/", "\\") 41 | return platform_specific_path 42 | 43 | 44 | def mock_manage_duplicate_file(file_path: str) -> str: 45 | return file_path 46 | 47 | 48 | class Chat: 49 | def __init__(self, chat_id): 50 | self.id = chat_id 51 | 52 | 53 | class MockMessage: 54 | def __init__(self, **kwargs): 55 | self.id = kwargs.get("id") 56 | self.media = kwargs.get("media") 57 | self.audio = kwargs.get("audio", None) 58 | self.document = kwargs.get("document", None) 59 | self.photo = kwargs.get("photo", None) 60 | self.video = kwargs.get("video", None) 61 | self.voice = kwargs.get("voice", None) 62 | self.video_note = kwargs.get("video_note", None) 63 | self.chat = Chat(kwargs.get("chat_id", None)) 64 | 65 | 66 | class MockAudio: 67 | def __init__(self, **kwargs): 68 | self.file_name = kwargs["file_name"] 69 | self.mime_type = kwargs["mime_type"] 70 | 71 | 72 | class MockDocument: 73 | def __init__(self, **kwargs): 74 | self.file_name = kwargs["file_name"] 75 | self.mime_type = kwargs["mime_type"] 76 | 77 | 78 | class MockPhoto: 79 | def __init__(self, **kwargs): 80 | self.date = kwargs["date"] 81 | 82 | 83 | class MockVoice: 84 | def __init__(self, **kwargs): 85 | self.mime_type = kwargs["mime_type"] 86 | self.date = kwargs["date"] 87 | 88 | 89 | class MockVideo: 90 | def __init__(self, **kwargs): 91 | self.mime_type = kwargs["mime_type"] 92 | 93 | 94 | class MockVideoNote: 95 | def __init__(self, **kwargs): 96 | self.mime_type = kwargs["mime_type"] 97 | self.date = kwargs["date"] 98 | 99 | 100 | class MockEventLoop: 101 | def __init__(self): 102 | pass 103 | 104 | def run_until_complete(self, *args, **kwargs): 105 | return {"api_id": 1, "api_hash": "asdf", "ids_to_retry": [1, 2, 3]} 106 | 107 | 108 | class MockAsync: 109 | def __init__(self): 110 | pass 111 | 112 | def get_event_loop(self): 113 | return MockEventLoop() 114 | 115 | 116 | async def async_get_media_meta(message_media, _type): 117 | result = await _get_media_meta(message_media, _type) 118 | return result 119 | 120 | 121 | async def async_download_media(client, message, media_types, file_formats): 122 | result = await download_media(client, message, media_types, file_formats) 123 | return result 124 | 125 | 126 | async def async_begin_import(conf, pagination_limit): 127 | result = await begin_import(conf, pagination_limit) 128 | return result 129 | 130 | 131 | async def mock_process_message(*args, **kwargs): 132 | return 5 133 | 134 | 135 | async def async_process_messages(client, messages, media_types, file_formats): 136 | result = await process_messages(client, messages, media_types, file_formats) 137 | return result 138 | 139 | 140 | class MockClient: 141 | def __init__(self, *args, **kwargs): 142 | pass 143 | 144 | def __aiter__(self): 145 | return self 146 | 147 | async def start(self): 148 | pass 149 | 150 | async def stop(self): 151 | pass 152 | 153 | async def get_chat_history(self, *args, **kwargs): 154 | items = [ 155 | MockMessage( 156 | id=1213, 157 | media=True, 158 | voice=MockVoice( 159 | mime_type="audio/ogg", 160 | date=datetime(2019, 7, 25, 14, 53, 50), 161 | ), 162 | ), 163 | MockMessage( 164 | id=1214, 165 | media=False, 166 | text="test message 1", 167 | ), 168 | MockMessage( 169 | id=1215, 170 | media=False, 171 | text="test message 2", 172 | ), 173 | MockMessage( 174 | id=1216, 175 | media=False, 176 | text="test message 3", 177 | ), 178 | ] 179 | for item in items: 180 | yield item 181 | 182 | async def get_messages(self, *args, **kwargs): 183 | if kwargs["message_ids"] == 7: 184 | message = MockMessage( 185 | id=7, 186 | media=True, 187 | chat_id=123456, 188 | video=MockVideo( 189 | file_name="sample_video.mov", 190 | mime_type="video/mov", 191 | ), 192 | ) 193 | elif kwargs["message_ids"] == 8: 194 | message = MockMessage( 195 | id=8, 196 | media=True, 197 | chat_id=234567, 198 | video=MockVideo( 199 | file_name="sample_video.mov", 200 | mime_type="video/mov", 201 | ), 202 | ) 203 | elif kwargs["message_ids"] == [1]: 204 | message = [ 205 | MockMessage( 206 | id=1, 207 | media=True, 208 | chat_id=234568, 209 | video=MockVideo( 210 | file_name="sample_video.mov", 211 | mime_type="video/mov", 212 | ), 213 | ) 214 | ] 215 | return message 216 | 217 | async def download_media(self, *args, **kwargs): 218 | mock_message = args[0] 219 | if mock_message.id in [7, 8]: 220 | raise pyrogram.errors.exceptions.bad_request_400.BadRequest 221 | elif mock_message.id == 9: 222 | raise pyrogram.errors.exceptions.unauthorized_401.Unauthorized 223 | elif mock_message.id == 11: 224 | raise TypeError 225 | return kwargs["file_name"] 226 | 227 | 228 | class MediaDownloaderTestCase(unittest.TestCase): 229 | @classmethod 230 | def setUpClass(cls): 231 | cls.loop = asyncio.get_event_loop() 232 | 233 | @mock.patch("media_downloader.THIS_DIR", new=MOCK_DIR) 234 | def test_get_media_meta(self): 235 | # Test Voice notes 236 | message = MockMessage( 237 | id=1, 238 | media=True, 239 | voice=MockVoice( 240 | mime_type="audio/ogg", 241 | date=datetime(2019, 7, 25, 14, 53, 50), 242 | ), 243 | ) 244 | result = self.loop.run_until_complete( 245 | async_get_media_meta(message.voice, "voice") 246 | ) 247 | 248 | self.assertEqual( 249 | ( 250 | platform_generic_path( 251 | "/root/project/voice/voice_2019-07-25T14:53:50.ogg" 252 | ), 253 | "ogg", 254 | ), 255 | result, 256 | ) 257 | 258 | # Test photos 259 | message = MockMessage( 260 | id=2, 261 | media=True, 262 | photo=MockPhoto(date=datetime(2019, 8, 5, 14, 35, 12)), 263 | ) 264 | result = self.loop.run_until_complete( 265 | async_get_media_meta(message.photo, "photo") 266 | ) 267 | self.assertEqual( 268 | ( 269 | platform_generic_path("/root/project/photo/"), 270 | None, 271 | ), 272 | result, 273 | ) 274 | 275 | # Test Documents 276 | message = MockMessage( 277 | id=3, 278 | media=True, 279 | document=MockDocument( 280 | file_name="sample_document.pdf", 281 | mime_type="application/pdf", 282 | ), 283 | ) 284 | result = self.loop.run_until_complete( 285 | async_get_media_meta(message.document, "document") 286 | ) 287 | self.assertEqual( 288 | ( 289 | platform_generic_path("/root/project/document/sample_document.pdf"), 290 | "pdf", 291 | ), 292 | result, 293 | ) 294 | 295 | # Test audio 296 | message = MockMessage( 297 | id=4, 298 | media=True, 299 | audio=MockAudio( 300 | file_name="sample_audio.mp3", 301 | mime_type="audio/mp3", 302 | ), 303 | ) 304 | result = self.loop.run_until_complete( 305 | async_get_media_meta(message.audio, "audio") 306 | ) 307 | self.assertEqual( 308 | ( 309 | platform_generic_path("/root/project/audio/sample_audio.mp3"), 310 | "mp3", 311 | ), 312 | result, 313 | ) 314 | 315 | # Test Video 316 | message = MockMessage( 317 | id=5, 318 | media=True, 319 | video=MockVideo( 320 | mime_type="video/mp4", 321 | ), 322 | ) 323 | result = self.loop.run_until_complete( 324 | async_get_media_meta(message.video, "video") 325 | ) 326 | self.assertEqual( 327 | ( 328 | platform_generic_path("/root/project/video/"), 329 | "mp4", 330 | ), 331 | result, 332 | ) 333 | 334 | # Test VideoNote 335 | message = MockMessage( 336 | id=6, 337 | media=True, 338 | video_note=MockVideoNote( 339 | mime_type="video/mp4", 340 | date=datetime(2019, 7, 25, 14, 53, 50), 341 | ), 342 | ) 343 | result = self.loop.run_until_complete( 344 | async_get_media_meta(message.video_note, "video_note") 345 | ) 346 | self.assertEqual( 347 | ( 348 | platform_generic_path( 349 | "/root/project/video_note/video_note_2019-07-25T14:53:50.mp4" 350 | ), 351 | "mp4", 352 | ), 353 | result, 354 | ) 355 | 356 | @mock.patch("media_downloader.THIS_DIR", new=MOCK_DIR) 357 | @mock.patch("media_downloader.asyncio.sleep", return_value=None) 358 | @mock.patch("media_downloader.logger") 359 | def test_download_media(self, mock_logger, patched_time_sleep): 360 | client = MockClient() 361 | message = MockMessage( 362 | id=5, 363 | media=True, 364 | video=MockVideo( 365 | file_name="sample_video.mp4", 366 | mime_type="video/mp4", 367 | ), 368 | ) 369 | result = self.loop.run_until_complete( 370 | async_download_media( 371 | client, message, ["video", "photo"], {"video": ["mp4"]} 372 | ) 373 | ) 374 | self.assertEqual(5, result) 375 | 376 | message_1 = MockMessage( 377 | id=6, 378 | media=True, 379 | video=MockVideo( 380 | file_name="sample_video.mov", 381 | mime_type="video/mov", 382 | ), 383 | ) 384 | result = self.loop.run_until_complete( 385 | async_download_media( 386 | client, message_1, ["video", "photo"], {"video": ["all"]} 387 | ) 388 | ) 389 | self.assertEqual(6, result) 390 | 391 | # Test re-fetch message success 392 | message_2 = MockMessage( 393 | id=7, 394 | media=True, 395 | video=MockVideo( 396 | file_name="sample_video.mov", 397 | mime_type="video/mov", 398 | ), 399 | ) 400 | result = self.loop.run_until_complete( 401 | async_download_media( 402 | client, message_2, ["video", "photo"], {"video": ["all"]} 403 | ) 404 | ) 405 | self.assertEqual(7, result) 406 | mock_logger.warning.assert_called_with( 407 | "Message[%d]: file reference expired, refetching...", 7 408 | ) 409 | 410 | # Test re-fetch message failure 411 | message_3 = MockMessage( 412 | id=8, 413 | media=True, 414 | video=MockVideo( 415 | file_name="sample_video.mov", 416 | mime_type="video/mov", 417 | ), 418 | ) 419 | result = self.loop.run_until_complete( 420 | async_download_media( 421 | client, message_3, ["video", "photo"], {"video": ["all"]} 422 | ) 423 | ) 424 | self.assertEqual(8, result) 425 | mock_logger.error.assert_called_with( 426 | "Message[%d]: file reference expired for 3 retries, download skipped.", 427 | 8, 428 | ) 429 | 430 | # Test other exception 431 | message_4 = MockMessage( 432 | id=9, 433 | media=True, 434 | video=MockVideo( 435 | file_name="sample_video.mov", 436 | mime_type="video/mov", 437 | ), 438 | ) 439 | result = self.loop.run_until_complete( 440 | async_download_media( 441 | client, message_4, ["video", "photo"], {"video": ["all"]} 442 | ) 443 | ) 444 | self.assertEqual(9, result) 445 | mock_logger.error.assert_called_with( 446 | "Message[%d]: could not be downloaded due to following exception:\n[%s].", 447 | 9, 448 | mock.ANY, 449 | exc_info=True, 450 | ) 451 | 452 | # Check no media 453 | message_5 = MockMessage( 454 | id=10, 455 | media=None, 456 | ) 457 | result = self.loop.run_until_complete( 458 | async_download_media( 459 | client, message_5, ["video", "photo"], {"video": ["all"]} 460 | ) 461 | ) 462 | self.assertEqual(10, result) 463 | 464 | # Test timeout 465 | message_6 = MockMessage( 466 | id=11, 467 | media=True, 468 | video=MockVideo( 469 | file_name="sample_video.mov", 470 | mime_type="video/mov", 471 | ), 472 | ) 473 | result = self.loop.run_until_complete( 474 | async_download_media( 475 | client, message_6, ["video", "photo"], {"video": ["all"]} 476 | ) 477 | ) 478 | self.assertEqual(11, result) 479 | mock_logger.error.assert_called_with( 480 | "Message[%d]: Timing out after 3 reties, download skipped.", 11 481 | ) 482 | 483 | @mock.patch("__main__.__builtins__.open", new_callable=mock.mock_open) 484 | @mock.patch("media_downloader.yaml", autospec=True) 485 | def test_update_config(self, mock_yaml, mock_open): 486 | conf = { 487 | "api_id": 123, 488 | "api_hash": "hasw5Tgawsuj67", 489 | "ids_to_retry": [], 490 | } 491 | update_config(conf) 492 | mock_open.assert_called_with("config.yaml", "w") 493 | mock_yaml.dump.assert_called_with(conf, mock.ANY, default_flow_style=False) 494 | 495 | @mock.patch("media_downloader.update_config") 496 | @mock.patch("media_downloader.pyrogram.Client", new=MockClient) 497 | @mock.patch("media_downloader.process_messages", new=mock_process_message) 498 | def test_begin_import(self, mock_update_config): 499 | result = self.loop.run_until_complete(async_begin_import(MOCK_CONF, 3)) 500 | conf = copy.deepcopy(MOCK_CONF) 501 | conf["last_read_message_id"] = 5 502 | self.assertDictEqual(result, conf) 503 | 504 | def test_process_message(self): 505 | client = MockClient() 506 | result = self.loop.run_until_complete( 507 | async_process_messages( 508 | client, 509 | [ 510 | MockMessage( 511 | id=1213, 512 | media=True, 513 | voice=MockVoice( 514 | mime_type="audio/ogg", 515 | date=datetime(2019, 7, 25, 14, 53, 50), 516 | ), 517 | ), 518 | MockMessage( 519 | id=1214, 520 | media=False, 521 | text="test message 1", 522 | ), 523 | MockMessage( 524 | id=1215, 525 | media=False, 526 | text="test message 2", 527 | ), 528 | MockMessage( 529 | id=1216, 530 | media=False, 531 | text="test message 3", 532 | ), 533 | ], 534 | ["voice", "photo"], 535 | {"audio": ["all"], "voice": ["all"]}, 536 | ) 537 | ) 538 | self.assertEqual(result, 1216) 539 | 540 | @mock.patch("media_downloader._is_exist", return_value=True) 541 | @mock.patch( 542 | "media_downloader.manage_duplicate_file", 543 | new=mock_manage_duplicate_file, 544 | ) 545 | def test_process_message_when_file_exists(self, mock_is_exist): 546 | client = MockClient() 547 | result = self.loop.run_until_complete( 548 | async_process_messages( 549 | client, 550 | [ 551 | MockMessage( 552 | id=1213, 553 | media=True, 554 | voice=MockVoice( 555 | mime_type="audio/ogg", 556 | date=datetime(2019, 7, 25, 14, 53, 50), 557 | ), 558 | ), 559 | MockMessage( 560 | id=1214, 561 | media=False, 562 | text="test message 1", 563 | ), 564 | MockMessage( 565 | id=1215, 566 | media=False, 567 | text="test message 2", 568 | ), 569 | MockMessage( 570 | id=1216, 571 | media=False, 572 | text="test message 3", 573 | ), 574 | ], 575 | ["voice", "photo"], 576 | {"audio": ["all"], "voice": ["all"]}, 577 | ) 578 | ) 579 | self.assertEqual(result, 1216) 580 | 581 | def test_can_download(self): 582 | file_formats = { 583 | "audio": ["mp3"], 584 | "video": ["mp4"], 585 | "document": ["all"], 586 | } 587 | result = _can_download("audio", file_formats, "mp3") 588 | self.assertEqual(result, True) 589 | 590 | result1 = _can_download("audio", file_formats, "ogg") 591 | self.assertEqual(result1, False) 592 | 593 | result2 = _can_download("document", file_formats, "pdf") 594 | self.assertEqual(result2, True) 595 | 596 | result3 = _can_download("document", file_formats, "epub") 597 | self.assertEqual(result3, True) 598 | 599 | def test_is_exist(self): 600 | this_dir = os.path.dirname(os.path.abspath(__file__)) 601 | result = _is_exist(os.path.join(this_dir, "__init__.py")) 602 | self.assertEqual(result, True) 603 | 604 | result1 = _is_exist(os.path.join(this_dir, "init.py")) 605 | self.assertEqual(result1, False) 606 | 607 | result2 = _is_exist(this_dir) 608 | self.assertEqual(result2, False) 609 | 610 | @mock.patch("media_downloader.FAILED_IDS", [2, 3]) 611 | @mock.patch("media_downloader.yaml.safe_load") 612 | @mock.patch("media_downloader.update_config", return_value=True) 613 | @mock.patch("media_downloader.begin_import") 614 | @mock.patch("media_downloader.asyncio", new=MockAsync()) 615 | def test_main(self, mock_import, mock_update, mock_yaml): 616 | conf = { 617 | "api_id": 1, 618 | "api_hash": "asdf", 619 | "ids_to_retry": [1, 2], 620 | } 621 | mock_yaml.return_value = conf 622 | main() 623 | mock_import.assert_called_with(conf, pagination_limit=100) 624 | conf["ids_to_retry"] = [1, 2, 3] 625 | mock_update.assert_called_with(conf) 626 | 627 | @classmethod 628 | def tearDownClass(cls): 629 | cls.loop.close() 630 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dineshkarthik/telegram_media_downloader/68636daf0d9b2c410d8255f1556f367b6da4a60b/tests/utils/__init__.py -------------------------------------------------------------------------------- /tests/utils/test_file_management.py: -------------------------------------------------------------------------------- 1 | """Unittest module for media downloader.""" 2 | import os 3 | import sys 4 | import tempfile 5 | import unittest 6 | from pathlib import Path 7 | 8 | import mock 9 | 10 | sys.path.append("..") # Adds higher directory to python modules path. 11 | from utils.file_management import get_next_name, manage_duplicate_file 12 | 13 | 14 | class FileManagementTestCase(unittest.TestCase): 15 | def setUp(self): 16 | self.this_dir = os.path.dirname(os.path.abspath(__file__)) 17 | self.test_file = os.path.join(self.this_dir, "file-test.txt") 18 | self.test_file_copy_1 = os.path.join(self.this_dir, "file-test-copy1.txt") 19 | self.test_file_copy_2 = os.path.join(self.this_dir, "file-test-copy2.txt") 20 | f = open(self.test_file, "w+") 21 | f.write("dummy file") 22 | f.close() 23 | Path(self.test_file_copy_1).touch() 24 | Path(self.test_file_copy_2).touch() 25 | 26 | def test_get_next_name(self): 27 | result = get_next_name(self.test_file) 28 | excepted_result = os.path.join(self.this_dir, "file-test-copy3.txt") 29 | self.assertEqual(result, excepted_result) 30 | 31 | def test_manage_duplicate_file(self): 32 | result = manage_duplicate_file(self.test_file_copy_2) 33 | self.assertEqual(result, self.test_file_copy_1) 34 | 35 | result1 = manage_duplicate_file(self.test_file_copy_1) 36 | self.assertEqual(result1, self.test_file_copy_1) 37 | 38 | def tearDown(self): 39 | os.remove(self.test_file) 40 | os.remove(self.test_file_copy_1) 41 | -------------------------------------------------------------------------------- /tests/utils/test_log.py: -------------------------------------------------------------------------------- 1 | """Unittest module for log handlers.""" 2 | import os 3 | import sys 4 | import unittest 5 | 6 | import mock 7 | 8 | sys.path.append("..") # Adds higher directory to python modules path. 9 | from utils.log import LogFilter 10 | 11 | 12 | class MockLog: 13 | """ 14 | Mock logs. 15 | """ 16 | 17 | def __init__(self, **kwargs): 18 | self.funcName = kwargs["funcName"] 19 | 20 | 21 | class MetaTestCase(unittest.TestCase): 22 | def test_log_filter(self): 23 | result = LogFilter().filter(MockLog(funcName="invoke")) 24 | self.assertEqual(result, False) 25 | 26 | result1 = LogFilter().filter(MockLog(funcName="get_file")) 27 | self.assertEqual(result1, True) 28 | 29 | result2 = LogFilter().filter(MockLog(funcName="Synced")) 30 | self.assertEqual(result2, True) 31 | -------------------------------------------------------------------------------- /tests/utils/test_meta.py: -------------------------------------------------------------------------------- 1 | """Unittest module for media downloader.""" 2 | import os 3 | import sys 4 | import unittest 5 | 6 | import mock 7 | 8 | sys.path.append("..") # Adds higher directory to python modules path. 9 | from utils.meta import print_meta 10 | 11 | 12 | class MetaTestCase(unittest.TestCase): 13 | @mock.patch("utils.meta.APP_VERSION", "test-version 1.0.0") 14 | @mock.patch("utils.meta.DEVICE_MODEL", "CPython X.X.X") 15 | @mock.patch("utils.meta.SYSTEM_VERSION", "System xx.x.xx") 16 | @mock.patch("media_downloader.logger") 17 | def test_print_meta(self, mock_logger): 18 | print_meta(mock_logger) 19 | calls = [ 20 | mock.call.info("Device: CPython X.X.X - test-version 1.0.0"), 21 | mock.call.info("System: System xx.x.xx (EN)"), 22 | ] 23 | mock_logger.assert_has_calls(calls, any_order=True) 24 | -------------------------------------------------------------------------------- /tests/utils/test_updates.py: -------------------------------------------------------------------------------- 1 | """Unittest module for update checker.""" 2 | import os 3 | import sys 4 | import unittest 5 | 6 | import mock 7 | from rich.markdown import Markdown 8 | 9 | sys.path.append("..") # Adds higher directory to python modules path. 10 | from utils.updates import check_for_updates 11 | 12 | 13 | class FakeHTTPSConnection: 14 | def __init__(self, status): 15 | self.status = status 16 | 17 | def request(self, *args, **kwargs): 18 | pass 19 | 20 | def getresponse(self): 21 | return FakeHTTPSResponse(self.status) 22 | 23 | 24 | class FakeHTTPSResponse: 25 | def __init__(self, status): 26 | self.status = status 27 | 28 | def read(self): 29 | if self.status == 200: 30 | return b'{"name":"v0.0.0 2022-03-02","tag_name":"v0.0.0", "html_url":"https://github.com/Dineshkarthik/telegram_media_downloader/releases/tag/v0.0.0"}' 31 | else: 32 | return b"{error}" 33 | 34 | 35 | class UpdatesTestCase(unittest.TestCase): 36 | @mock.patch( 37 | "utils.updates.http.client.HTTPSConnection", 38 | new=mock.MagicMock(return_value=FakeHTTPSConnection(200)), 39 | ) 40 | @mock.patch("utils.updates.__version__", new="0.0.1") 41 | @mock.patch("utils.updates.Console") 42 | @mock.patch("utils.updates.Markdown") 43 | def test_update(self, mock_markdown, mock_console): 44 | check_for_updates() 45 | name: str = "v0.0.0 2022-03-02" 46 | html_url: str = "https://github.com/Dineshkarthik/telegram_media_downloader/releases/tag/v0.0.0" 47 | expected_message: str = ( 48 | f"## New version of Telegram-Media-Downloader is available - {name}\n" 49 | "You are using an outdated version v0.0.1 please pull in the changes using `git pull` or download the latest release.\n\n" 50 | f"Find more details about the latest release here - {html_url}" 51 | ) 52 | mock_markdown.assert_called_with(expected_message) 53 | mock_console.return_value.print.assert_called_once() 54 | 55 | @mock.patch( 56 | "utils.updates.http.client.HTTPSConnection", 57 | new=mock.MagicMock(return_value=FakeHTTPSConnection(500)), 58 | ) 59 | @mock.patch("utils.updates.Console") 60 | def test_exception(self, mock_console): 61 | check_for_updates() 62 | exception_message: str = ( 63 | "Following error occured when checking for updates\n" 64 | ", Expecting property name enclosed in double quotes: line 1 column 2 (char 1)" 65 | ) 66 | mock_console.return_value.log.assert_called_with(exception_message) 67 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Init namespace""" 2 | 3 | __version__ = "2.0.2" 4 | __license__ = "MIT License" 5 | __copyright__ = "Copyright (C) 2019 Dineshkarthik " 6 | -------------------------------------------------------------------------------- /utils/file_management.py: -------------------------------------------------------------------------------- 1 | """Utility functions to handle downloaded files.""" 2 | import glob 3 | import os 4 | import pathlib 5 | from hashlib import md5 6 | 7 | 8 | def get_next_name(file_path: str) -> str: 9 | """ 10 | Get next available name to download file. 11 | 12 | Parameters 13 | ---------- 14 | file_path: str 15 | Absolute path of the file for which next available name to 16 | be generated. 17 | 18 | Returns 19 | ------- 20 | str 21 | Absolute path of the next available name for the file. 22 | """ 23 | posix_path = pathlib.Path(file_path) 24 | counter: int = 1 25 | new_file_name: str = os.path.join("{0}", "{1}-copy{2}{3}") 26 | while os.path.isfile( 27 | new_file_name.format( 28 | posix_path.parent, 29 | posix_path.stem, 30 | counter, 31 | "".join(posix_path.suffixes), 32 | ) 33 | ): 34 | counter += 1 35 | return new_file_name.format( 36 | posix_path.parent, 37 | posix_path.stem, 38 | counter, 39 | "".join(posix_path.suffixes), 40 | ) 41 | 42 | 43 | def manage_duplicate_file(file_path: str): 44 | """ 45 | Check if a file is duplicate. 46 | 47 | Compare the md5 of files with copy name pattern 48 | and remove if the md5 hash is same. 49 | 50 | Parameters 51 | ---------- 52 | file_path: str 53 | Absolute path of the file for which duplicates needs to 54 | be managed. 55 | 56 | Returns 57 | ------- 58 | str 59 | Absolute path of the duplicate managed file. 60 | """ 61 | # pylint: disable = R1732 62 | posix_path = pathlib.Path(file_path) 63 | file_base_name: str = "".join(posix_path.stem.split("-copy")[0]) 64 | name_pattern: str = f"{posix_path.parent}/{file_base_name}*" 65 | # Reason for using `str.translate()` 66 | # https://stackoverflow.com/q/22055500/6730439 67 | old_files: list = glob.glob( 68 | name_pattern.translate({ord("["): "[[]", ord("]"): "[]]"}) 69 | ) 70 | if file_path in old_files: 71 | old_files.remove(file_path) 72 | current_file_md5: str = md5(open(file_path, "rb").read()).hexdigest() 73 | for old_file_path in old_files: 74 | old_file_md5: str = md5(open(old_file_path, "rb").read()).hexdigest() 75 | if current_file_md5 == old_file_md5: 76 | os.remove(file_path) 77 | return old_file_path 78 | return file_path 79 | -------------------------------------------------------------------------------- /utils/log.py: -------------------------------------------------------------------------------- 1 | """Util module to handle logs.""" 2 | import logging 3 | 4 | 5 | class LogFilter(logging.Filter): 6 | """ 7 | Custom Log Filter. 8 | 9 | Ignore logs from specific functions. 10 | """ 11 | 12 | # pylint: disable = W0221 13 | def filter(self, record): 14 | if record.funcName in ("invoke"): 15 | return False 16 | return True 17 | -------------------------------------------------------------------------------- /utils/meta.py: -------------------------------------------------------------------------------- 1 | """Utility module to manage meta info.""" 2 | import platform 3 | 4 | from rich.console import Console 5 | 6 | from . import __copyright__, __license__, __version__ 7 | 8 | APP_VERSION = f"Telegram Media Downloader {__version__}" 9 | DEVICE_MODEL = f"{platform.python_implementation()} {platform.python_version()}" 10 | SYSTEM_VERSION = f"{platform.system()} {platform.release()}" 11 | LANG_CODE = "en" 12 | 13 | 14 | def print_meta(logger): 15 | """Prints meta-data of the downloader script.""" 16 | console = Console() 17 | # pylint: disable = C0301 18 | console.log( 19 | f"[bold]Telegram Media Downloader v{__version__}[/bold],\n[i]{__copyright__}[/i]" 20 | ) 21 | console.log(f"Licensed under the terms of the {__license__}", end="\n\n") 22 | logger.info(f"Device: {DEVICE_MODEL} - {APP_VERSION}") 23 | logger.info(f"System: {SYSTEM_VERSION} ({LANG_CODE.upper()})") 24 | -------------------------------------------------------------------------------- /utils/updates.py: -------------------------------------------------------------------------------- 1 | """Utility module to check for new release of telegram-media-downloader""" 2 | import http.client 3 | import json 4 | 5 | from rich.console import Console 6 | from rich.markdown import Markdown 7 | 8 | from . import __version__ 9 | 10 | 11 | # pylint: disable = C0301 12 | def check_for_updates() -> None: 13 | """Checks for new releases. 14 | 15 | Using Github API checks for new release and prints information of new release if available. 16 | """ 17 | console = Console() 18 | try: 19 | headers: dict = { 20 | "Content-Type": "application/json", 21 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36", 22 | } 23 | conn = http.client.HTTPSConnection("api.github.com") 24 | conn.request( 25 | method="GET", 26 | url="/repos/Dineshkarthik/telegram_media_downloader/releases/latest", 27 | headers=headers, 28 | ) 29 | res = conn.getresponse() 30 | latest_release: dict = json.loads(res.read().decode("utf-8")) 31 | if f"v{__version__}" != latest_release["tag_name"]: 32 | update_message: str = ( 33 | f"## New version of Telegram-Media-Downloader is available - {latest_release['name']}\n" 34 | f"You are using an outdated version v{__version__} please pull in the changes using `git pull` or download the latest release.\n\n" 35 | f"Find more details about the latest release here - {latest_release['html_url']}" 36 | ) 37 | console.print(Markdown(update_message)) 38 | except Exception as e: 39 | console.log( 40 | f"Following error occured when checking for updates\n{e.__class__}, {e}" 41 | ) 42 | --------------------------------------------------------------------------------