├── .github └── dependabot.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.rst ├── COPYING ├── MANIFEST.in ├── README.rst ├── ci └── travis │ └── before_script.sh ├── docs ├── ISSUE_TEMPLATE.md ├── Makefile ├── requirements.txt └── source │ ├── batch.rst │ ├── conf.py │ ├── cron.rst │ ├── examples.rst │ ├── index.rst │ ├── install.rst │ ├── instalooter │ ├── batch.rst │ ├── cli.rst │ ├── index.rst │ ├── looters.rst │ ├── medias.rst │ ├── pages.rst │ ├── pbar.rst │ └── worker.rst │ └── usage.rst ├── instalooter ├── __init__.py ├── __main__.py ├── _impl.py ├── _uadetect.py ├── _utils.py ├── batch.py ├── cli │ ├── __init__.py │ ├── constants.py │ ├── login.py │ ├── logutils.py │ ├── threadutils.py │ └── time.py ├── looters.py ├── medias.py ├── pages.py ├── pbar.py ├── static │ └── splash.html └── worker.py ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── test_batch.py ├── test_cli.py ├── test_issues.py ├── test_login.py ├── test_looter.py ├── test_pbar.py └── utils ├── __init__.py ├── ig_mock.py ├── ig_mock.tar.gz └── method_names.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | ignore: 10 | - dependency-name: sphinx 11 | versions: 12 | - 3.4.0 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | docs/source/changelog.rst 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # IPython Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | 92 | # Codacy token 93 | .codacy.token 94 | 95 | # MyPy 96 | .mypy_cache 97 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | cache: pip 4 | dist: xenial 5 | 6 | python: 7 | - 2.7 8 | - 3.7 9 | 10 | before_install: 11 | - pip install -U pip wheel setuptools 12 | 13 | install: 14 | - pip install -e .[dev] 15 | 16 | before_script: 17 | - ci/travis/before_script.sh 18 | 19 | script: 20 | - python -m coverage run -m unittest discover -v 21 | 22 | after_success: 23 | - python -m codecov 24 | - python -m codacy -r coverage.xml 25 | 26 | deploy: 27 | provider: pypi 28 | distributions: sdist bdist_wheel 29 | on: 30 | python: 3.7 31 | tags: true 32 | repo: althonos/InstaLooter 33 | server: https://upload.pypi.org/legacy/ 34 | skip_upload_docs: true 35 | user: althonos 36 | password: 37 | secure: ExaoUT+/7v8VJp/vsllDm9AsO3/6s7Nka5rf+0TVz8Z7pZ+azFXK5hZmQ6nTtvSRYpZCfGH8ecYLbE0ek02BFqLc25/VfMfPao+6eUh1v3MGvHxH9ml+/9aCwWIv5C/T5dGURVj0udXxXuuTsdJwzxrb+K9taVZSjBbk6Ti8Fdu8yRMhOvI4xoiG69tunk6IXnOZjDCQix8O3Cn0OfA/zeD0IX1n8SVlKwsS+dOgFAJCujP865VuUh+2kM63Xx3OEb3caBGc1HXaVxNRXdhGykFeWrT4Mzrzd0T458Odc3S+DJG+2WVZbNC+chGAUBMq77z8JVirdWzydhdnhzi0DNZohRO0itstz53DuyqAtyTZ04xLO3+06svU5grdwilZGwy2KNZ1S1wTRlUgpxBhRL6dwHEAfaq4JlKlijEJJnFBMq2O2TKB/h+CdIALfWKSRNfOUm2GdG4ZwZOOLczbe4ATYoxGsqGP/lqnH2/iCrJFtoYmdFR8QZxJtNNwcNvVLT3MNx7eze7OpGgHWQjJq/m5hHoKJx6yW6U5XiRSt5gEowiq7vNrdwHSDRLe5CZJVndkiwelQQN5womKtWHFb1w9DRgBFj0ZscgJuxrsRl9uLfqcLB77tyS9q4BpQKsMIDw0P2bVI/P50KUsG0OUjHkstxS4nl9DLsxVPb+NmiA= 38 | 39 | notifications: 40 | email: 41 | - althonosdev@gmail.com 42 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | All notable changes to this project will be documented in this file. 5 | 6 | The format is based on `Keep a Changelog `_ and this 7 | project adheres to `Semantic Versioning `_. 8 | 9 | Unreleased_ 10 | ----------- 11 | 12 | v2.4.4_ - 2020-07-15 13 | -------------------- 14 | 15 | Changed 16 | ''''''' 17 | - Bumped ``verboselogs`` to ``v14`` in requirements. 18 | 19 | 20 | v2.4.3_ - 2020-06-25 21 | -------------------- 22 | 23 | Changed 24 | ''''''' 25 | - Bumped ``tenacity`` to ``v6`` in requirements. 26 | 27 | Fixed 28 | ''''' 29 | - Change in Instagram login policy causing plaintext password to stop 30 | working. 31 | 32 | 33 | v2.4.2_ - 2019-12-27 34 | -------------------- 35 | 36 | Changed 37 | ''''''' 38 | - CLI `--time` option will now always use higher and lower time given as the 39 | timeframe, independently of the order they are given. 40 | 41 | Fixed 42 | ''''' 43 | - JSON files also get a proper timestamp set (pr #275). 44 | 45 | 46 | v2.4.1_ - 2019-12-10 47 | -------------------- 48 | 49 | Fixed 50 | ''''' 51 | - Issue with additional data not being loaded from certain pages (#271) (pr #273) 52 | 53 | 54 | v2.4.0_ - 2019-06-29 55 | -------------------- 56 | 57 | Fixed 58 | ''''' 59 | - Attempt fix for `rhx_gis` issue (#247) (pr #248) 60 | - Fix crashes when downloading hashtag medias 61 | 62 | Changed 63 | ''''''' 64 | - Removed ``fake-useragent`` dependency. 65 | - Use a custom HTTP server to detect the user agent of the default web browser. 66 | 67 | v2.3.4_ - 2019-02-22 68 | -------------------- 69 | 70 | Fixed 71 | ''''' 72 | - Bumped supported ``fs`` version to ``~=2.1``. 73 | 74 | v2.3.3_ - 2019-02-11 75 | -------------------- 76 | 77 | Fixed 78 | ''''' 79 | - Bumped supported ``fs`` version to ``2.3.0``. 80 | 81 | v2.3.2_ - 2019-01-06 82 | --------------------- 83 | 84 | Added 85 | ''''' 86 | - Add zero padding for date and time in filenames (pr #224) 87 | 88 | Changed 89 | ''''''' 90 | - Add `tests` to source distribution (pr #228). 91 | - Bumped supported ``fs`` version to ``2.2.0``. 92 | 93 | v2.3.1_ - 2018-10-13 94 | -------------------- 95 | 96 | Fixed 97 | ''''' 98 | - Allow extracting post codes of length 10 from URLs. 99 | 100 | 101 | v2.3.0_ - 2018-09-05 102 | -------------------- 103 | 104 | Changed 105 | ''''''' 106 | - Bumped required ``tenacity`` version to ``5.0``. 107 | 108 | v2.2.0_ - 2018-08-19 109 | -------------------- 110 | 111 | Changed 112 | ''''''' 113 | - Bumped required ``fs`` version to ``2.1.0``. 114 | 115 | 116 | v2.1.0_ - 2018-07-31 117 | -------------------- 118 | 119 | Added 120 | ''''' 121 | - Posts can now be downloaded by giving directly the post URL (implement #184). 122 | 123 | Fixed 124 | ''''' 125 | - Batch will now log the name of the current account as well as occuring 126 | errors (fix #185) 127 | - CLI login will now properly display logger messages. 128 | - Library loggers do not have a `logging.StreamHandler` set by default 129 | anymore. 130 | - Attempt fixing login procedure in ``InstaLooter._login``. 131 | 132 | Changed 133 | ''''''' 134 | - Trying to download media from an non-existing user will display a nicer 135 | message: ``user not found: '...'`` (fix #194). 136 | - Batch mode will now continue to the next job if any error occurs, showing 137 | an error message instead of crashing (fix #185). 138 | 139 | 140 | v2.0.3_ - 2018-05-29 141 | -------------------- 142 | 143 | Fixed 144 | ''''' 145 | - Use the webpage shared data to find the CSRF token instead of response 146 | cookies. 147 | 148 | v2.0.2_ - 2018-05-17 149 | -------------------- 150 | 151 | Changed 152 | ''''''' 153 | - Bump ``coloredlogs`` required version to `10.0`. 154 | - Use ``verboselogs`` as the backend logging library. 155 | 156 | 157 | v2.0.1_ - 2018-04-18 158 | -------------------- 159 | 160 | Changed 161 | ''''''' 162 | - Updated the query hash in ``ProfileIterator`` (although previous seemed 163 | to keep working). 164 | 165 | Fixed 166 | ''''' 167 | - *RHX-GIS* computation not using the CSRF token anymore. 168 | - Lowered ``PageIterator.PAGE_SIZE`` to 50 to comply with Instagram. 169 | 170 | 171 | v2.0.0_ - 2018-04-16 172 | -------------------- 173 | 174 | Changed 175 | ''''''' 176 | - Passing a pre-initialised ``Session`` to ``PageIterator`` constructor 177 | is now mandatory. 178 | - ``HashtagIterator`` must be provided a ``rhx`` (it is infered for ``ProfileIterator``). 179 | 180 | Fixed 181 | ''''' 182 | - API changes made by Instagram ca. April 2018 (excluding logging in / out). 183 | - Calling `operator.length_hint` on ``PageIterator`` objects will no longer 184 | cause duplicate server queries. 185 | 186 | 187 | v1.0.0_ - 2018-04-05 188 | -------------------- 189 | 190 | Added 191 | ''''' 192 | - This CHANGELOG file. 193 | - Typing annotations using the ``typing`` module. 194 | - Limited retries on connection failure, using `tenacity `_. 195 | - Real-world User Agent spoofing, using `fake-useragent `_ 196 | 197 | Fixed 198 | ''''' 199 | - API changes made by Instagram ca. March 2018. 200 | 201 | Changed 202 | ''''''' 203 | - Whole new API following major code refactor and rewrite. 204 | - Requests to the API directly use JSON and GraphQL queries when possible. 205 | - License is now GPLv3 *or later* instead of GPLv3. 206 | - I/O now uses PyFilesystem (FS URLs can be passed as CLI arguments). 207 | 208 | Removed 209 | ''''''' 210 | - Exif metadata handling (*will be added back in later release*). 211 | - ``urlgen`` capabilities (Instagram signs picture URL since 2018). 212 | - Python 3.5.1 support (lacks the required ``typing`` version). 213 | - ``progressbar2`` dependency, replaced by ``tqdm`` 214 | - ``hues`` dependency, replaced by ``coloredlogs`` 215 | - ``BeautifulSoup4`` dependency 216 | 217 | .. _Unreleased: https://github.com/althonos/InstaLooter/compare/v2.4.4...HEAD 218 | .. _v2.4.3: https://github.com/althonos/InstaLooter/compare/v2.4.3...v2.4.4 219 | .. _v2.4.3: https://github.com/althonos/InstaLooter/compare/v2.4.2...v2.4.3 220 | .. _v2.4.2: https://github.com/althonos/InstaLooter/compare/v2.4.1...v2.4.2 221 | .. _v2.4.1: https://github.com/althonos/InstaLooter/compare/v2.4.0...v2.4.1 222 | .. _v2.4.0: https://github.com/althonos/InstaLooter/compare/v2.3.4...v2.4.0 223 | .. _v2.3.4: https://github.com/althonos/InstaLooter/compare/v2.3.3...v2.3.4 224 | .. _v2.3.3: https://github.com/althonos/InstaLooter/compare/v2.3.2...v2.3.3 225 | .. _v2.3.2: https://github.com/althonos/InstaLooter/compare/v2.3.1...v2.3.2 226 | .. _v2.3.1: https://github.com/althonos/InstaLooter/compare/v2.3.0...v2.3.1 227 | .. _v2.3.0: https://github.com/althonos/InstaLooter/compare/v2.2.0...v2.3.0 228 | .. _v2.2.0: https://github.com/althonos/InstaLooter/compare/v2.1.0...v2.2.0 229 | .. _v2.1.0: https://github.com/althonos/InstaLooter/compare/v2.0.3...v2.1.0 230 | .. _v2.0.3: https://github.com/althonos/InstaLooter/compare/v2.0.2...v2.0.3 231 | .. _v2.0.2: https://github.com/althonos/InstaLooter/compare/v2.0.1...v2.0.2 232 | .. _v2.0.1: https://github.com/althonos/InstaLooter/compare/v2.0.0...v2.0.1 233 | .. _v2.0.0: https://github.com/althonos/InstaLooter/compare/v1.0.0...v2.0.0 234 | .. _v1.0.0: https://github.com/althonos/InstaLooter/compare/v0.14.0...v1.0.0 235 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include COPYING 2 | include CHANGELOG.rst 3 | include setup.cfg 4 | 5 | recursive-include instalooter/static *.html 6 | 7 | graft tests 8 | global-exclude __pycache__ *.pyc 9 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | InstaLooter |starme| 2 | ==================== 3 | 4 | .. |starme| image:: https://img.shields.io/github/stars/althonos/InstaLooter.svg?style=social&label=Star 5 | :target: https://github.com/althonos/InstaLooter 6 | 7 | *Not all treasure's silver and gold, mate.* 8 | 9 | |build| |repo| |versions| |format| |coverage| |doc| |grade| |license| 10 | |keepachangelog| |saythanks| 11 | 12 | 13 | .. |build| image:: https://img.shields.io/travis/althonos/InstaLooter/master.svg?label=travis-ci&style=flat-square 14 | :target: https://travis-ci.org/althonos/InstaLooter/ 15 | 16 | .. |repo| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 17 | :target: https://github.com/althonos/InstaLooter 18 | 19 | .. |versions| image:: https://img.shields.io/pypi/v/instalooter.svg?style=flat-square 20 | :target: https://pypi.org/project/instalooter 21 | 22 | .. |format| image:: https://img.shields.io/pypi/format/instalooter.svg?style=flat-square 23 | :target: https://pypi.org/project/instalooter 24 | 25 | .. |grade| image:: https://img.shields.io/codacy/grade/9b8c7da6887c4195b9e960cb04b59a91/master.svg?style=flat-square 26 | :target: https://www.codacy.com/app/althonos/InstaLooter/dashboard 27 | 28 | .. |coverage| image:: https://img.shields.io/codecov/c/github/althonos/InstaLooter/master.svg?style=flat-square 29 | :target: https://codecov.io/gh/althonos/InstaLooter 30 | 31 | .. |doc| image:: https://img.shields.io/readthedocs/instalooter.svg?style=flat-square 32 | :target: http://instalooter.readthedocs.io/en/stable/?badge=stable 33 | 34 | .. .. |requirements| image:: https://img.shields.io/requires/github/althonos/InstaLooter/master.svg?style=flat-square 35 | .. :target: https://requires.io/github/althonos/InstaLooter/requirements/?branch=master 36 | 37 | .. .. |health| image:: https://landscape.io/github/althonos/InstaLooter/master/landscape.svg?style=flat-square 38 | .. :target: https://landscape.io/github/althonos/InstaLooter/master 39 | 40 | .. |license| image:: https://img.shields.io/pypi/l/instalooter.svg?style=flat-square 41 | :target: https://choosealicense.com/licenses/gpl-3.0/ 42 | 43 | .. |keepachangelog| image:: https://img.shields.io/badge/keep%20a-changelog-8A0707.svg?maxAge=86400&style=flat-square 44 | :target: http://keepachangelog.com/ 45 | 46 | .. |saythanks| image:: https://img.shields.io/badge/say-thanks!-1EAEDB.svg?maxAge=86400&style=flat-square 47 | :target: https://saythanks.io/to/althonos 48 | 49 | 50 | InstaLooter is a program that can download any picture or video associated 51 | from an Instagram profile, without any API access. It can be seen as a 52 | re-implementation of the now deprecated `InstaRaider `_ 53 | developed by `@akurtovic `_. 54 | 55 | ``v1.0.0`` *was completely rewrote from scratch, and as such, will 56 | probably break compatibility with your homemade scripts. Meanwhile, great care 57 | was taken to keep the CLI as consistent as possible with the previous versions, 58 | so it'll hopefully feel like home.* 59 | 60 | 61 | Requirements 62 | ------------ 63 | 64 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 65 | | **coloredlogs** | Colored output | |PyPI coloredlogs| | |Source coloredlogs| | |License coloredlogs| | 66 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 67 | | **dateutil** | Date manipulation | |PyPI dateutil| | |Source dateutil| | |License dateutil| | 68 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 69 | | **docopt** | CLI arguments parsing | |PyPI docopt| | |Source docopt| | |License docopt| | 70 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 71 | | **fs** | Filesystem handling | |PyPI fs| | |Source fs| | |License fs| | 72 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 73 | | **requests** | HTTP handling | |PyPI requests| | |Source requests| | |License requests| | 74 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 75 | | **six** | Python 2/3 compatibility | |PyPI six| | |Source six| | |License six| | 76 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 77 | | **tenacity** | Retry until success | |PyPI tenacity| | |Source tenacity| | |License tenacity| | 78 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 79 | | **tqdm** | Dynamic output in CLI | |PyPI tqdm| | |Source tqdm| | |License tqdm| | 80 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 81 | | **verboselogs** | More detailed logs | |PyPI verboselogs| | |Source verboselogs| | |License verboselogs| | 82 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+ 83 | 84 | 85 | .. |PyPI coloredlogs| image:: https://img.shields.io/pypi/v/coloredlogs.svg?style=flat-square 86 | :target: https://pypi.org/project/coloredlogs 87 | 88 | .. |PyPI dateutil| image:: https://img.shields.io/pypi/v/python-dateutil.svg?style=flat-square 89 | :target: https://pypi.org/project/python-dateutil/ 90 | 91 | .. |PyPI docopt| image:: https://img.shields.io/pypi/v/docopt.svg?style=flat-square 92 | :target: https://pypi.org/project/docopt/ 93 | 94 | .. |PyPI fs| image:: https://img.shields.io/pypi/v/fs.svg?style=flat-square 95 | :target: https://pypi.org/project/fs/ 96 | 97 | .. |PyPI fakeua| image:: https://img.shields.io/pypi/v/fake-useragent.svg?style=flat-square 98 | :target: https://pypi.org/project/fake-useragent/ 99 | 100 | .. |PyPI requests| image:: https://img.shields.io/pypi/v/requests.svg?style=flat-square 101 | :target: https://pypi.org/project/requests 102 | 103 | .. |PyPI six| image:: https://img.shields.io/pypi/v/six.svg?style=flat-square 104 | :target: https://pypi.org/project/six 105 | 106 | .. |PyPI tenacity| image:: https://img.shields.io/pypi/v/tenacity.svg?style=flat-square 107 | :target: https://pypi.org/project/tenacity 108 | 109 | .. |PyPI tqdm| image:: https://img.shields.io/pypi/v/tqdm.svg?style=flat-square 110 | :target: https://pypi.org/project/tqdm 111 | 112 | .. |PyPI verboselogs| image:: https://img.shields.io/pypi/v/verboselogs.svg?style=flat-square 113 | :target: https://pypi.org/project/verboselogs 114 | 115 | .. |Source coloredlogs| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 116 | :target: https://github.com/xolox/python-coloredlogs 117 | 118 | .. |Source dateutil| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 119 | :target: https://github.com/dateutil/dateutil/ 120 | 121 | .. |Source docopt| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 122 | :target: https://github.com/docopt/docopt 123 | 124 | .. |Source fs| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 125 | :target: https://github.com/PyFilesystem/pyfilesystem2 126 | 127 | .. |Source fakeua| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 128 | :target: https://github.com/hellysmile/fake-useragent 129 | 130 | .. |Source requests| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 131 | :target: https://github.com/kennethreitz/requests 132 | 133 | .. |Source six| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 134 | :target: https://github.com/benjaminp/six 135 | 136 | .. |Source tenacity| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 137 | :target: https://github.com/jd/tenacity 138 | 139 | .. |Source tqdm| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 140 | :target: https://github.com/tqdm/tqdm 141 | 142 | .. |Source verboselogs| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square 143 | :target: https://github.com/xolox/python-verboselogs 144 | 145 | .. |License coloredlogs| image:: https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square 146 | :target: https://choosealicense.com/licenses/mit/ 147 | 148 | .. |License dateutil| image:: https://img.shields.io/pypi/l/python-dateutil.svg?style=flat-square 149 | :target: https://choosealicense.com/licenses/apache-2.0/ 150 | 151 | .. |License docopt| image:: https://img.shields.io/pypi/l/docopt.svg?style=flat-square 152 | :target: https://choosealicense.com/licenses/mit/ 153 | 154 | .. |License fs| image:: https://img.shields.io/pypi/l/fs.svg?style=flat-square 155 | :target: https://choosealicense.com/licenses/mit/ 156 | 157 | .. |License fakeua| image:: https://img.shields.io/badge/license-Apache_2.0-blue.svg?style=flat-square 158 | :target: https://choosealicense.com/licenses/apache-2.0/ 159 | 160 | .. |License requests| image:: https://img.shields.io/pypi/l/requests.svg?style=flat-square 161 | :target: https://choosealicense.com/licenses/apache-2.0/ 162 | 163 | .. |License six| image:: https://img.shields.io/pypi/l/six.svg?style=flat-square 164 | :target: https://choosealicense.com/licenses/mit/ 165 | 166 | .. |License tenacity| image:: https://img.shields.io/badge/license-Apache_2.0-blue.svg?style=flat-square 167 | :target: https://choosealicense.com/licenses/apache-2.0/ 168 | 169 | .. |License tqdm| image:: https://img.shields.io/pypi/l/tqdm.svg?style=flat-square 170 | :target: https://choosealicense.com/licenses/mpl-2.0/ 171 | 172 | .. |License verboselogs| image:: https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square 173 | :target: https://choosealicense.com/licenses/mit/ 174 | 175 | 176 | Installation 177 | ------------ 178 | 179 | InstaLooter is available on PyPI to install with ``pip``. If you are not 180 | familiar with the package management of the Python ecosystem, please see the 181 | `Installation page `_ 182 | of the `documentation `_. 183 | Yet, you will probably end up using the following command:: 184 | 185 | pip install --user instalooter --pre 186 | 187 | 188 | Usage 189 | ----- 190 | 191 | instalooter comes with its CLI:: 192 | 193 | $ instalooter user [] [options] 194 | $ instalooter hashtag [] [options] 195 | $ instalooter post [] [options] 196 | $ instalooter batch [] 197 | 198 | See ``instalooter --usage`` for all possible uses, or ``instalooter --help`` 199 | for a complete usage guide. 200 | 201 | 202 | Logging in and out 203 | ------------------ 204 | There are two ways to login on Instagram through instalooter: 205 | 206 | * use the *login* subcommand (``instalooter login``) to interactively login 207 | using your username and password. 208 | * give a ``--username`` (and, if you want, a ``--password``) argument to any of 209 | the download commands. 210 | 211 | In both cases, a session cookie will be created in a cache-specific folder. 212 | To delete it and close your session on the server, use the ``logout`` 213 | subcommand. 214 | 215 | 216 | Examples 217 | -------- 218 | 219 | Download all **pictures** from the *instagram* profile in the current directory:: 220 | 221 | $ instalooter user instagram 222 | 223 | Download the latest 20 pictures or videos tagged with *python* to */tmp*:: 224 | 225 | $ instalooter hashtag python /tmp -n 20 --get-videos -c MYLOGIN 226 | 227 | Download a single post from an url in the `~/Pictures` directory:: 228 | 229 | $ instalooter post "https://www.instagram.com/p/BFB6znLg5s1/" ~/Pictures 230 | 231 | Use a configuration file to download from several account using custom parameters 232 | (see `Batch mode `_):: 233 | 234 | $ instalooter batch /path/to/a/config/file.ini 235 | 236 | See more on the `Usage page `_ 237 | of the `online documentation `_. 238 | -------------------------------------------------------------------------------- /ci/travis/before_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | UA="Mozilla/5.0 (X11; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0" 4 | python -c "from instalooter.looters import InstaLooter; InstaLooter._cachefs().settext(u'user-agent.txt', u'$UA')" 5 | -------------------------------------------------------------------------------- /docs/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | ## Library version 7 | 8 | *What's the installed library version ? Check with `instalooter --version`*: 9 | 10 | ``` 11 | instalooter vX.Y.Z 12 | ``` 13 | 14 | ## Environment 15 | 16 | *Describe here your environment, including:* 17 | 18 | * *OS* 19 | * *Python version* 20 | * *`setuptools` version if reporting an issue with installation* 21 | * *non-standard Python implementation if any* 22 | 23 | 24 | ## Error description - installation 25 | 26 | *If you have an issue with installation, make sure you use a recent `setuptools` version 27 | before filing a bug ! If the error is still there, describe the command you used to 28 | install, and make sure you reported your environment in details. In particular, 29 | if you encounter a critical error with the CLI, please post the program output when 30 | running with the `--traceback` flag.* 31 | 32 | 33 | ## Error description - runtime 34 | 35 | *If you have an issue at runtime, include the required information below:* 36 | 37 | ### Reproducible test case 38 | 39 | *Are you using the CLI ? If so, include a command that can be used to re-raise the 40 | error, with actual arguments anybody can try:* 41 | 42 | ``` 43 | instalooter ... 44 | ``` 45 | 46 | *Are you using the API ? If so, include a small snippet that can be used to re-raise the 47 | error:* 48 | 49 | ```python 50 | from instalooter.looters import ... 51 | ``` 52 | 53 | 54 | ### Expected behaviour 55 | 56 | *What's supposed to happen ? That's were you can ask for a new feature as well* 57 | 58 | ### Actual behaviour 59 | 60 | *What's actually happening ? Leave empty if asking for a new feature* 61 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help 18 | help: 19 | @echo "Please use \`make ' where is one of" 20 | @echo " html to make standalone HTML files" 21 | @echo " dirhtml to make HTML files named index.html in directories" 22 | @echo " singlehtml to make a single large HTML file" 23 | @echo " pickle to make pickle files" 24 | @echo " json to make JSON files" 25 | @echo " htmlhelp to make HTML files and a HTML help project" 26 | @echo " qthelp to make HTML files and a qthelp project" 27 | @echo " applehelp to make an Apple Help Book" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " epub3 to make an epub3" 31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 32 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " xml to make Docutils-native XML files" 41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 42 | @echo " linkcheck to check all external links for integrity" 43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 44 | @echo " coverage to run coverage check of the documentation (if enabled)" 45 | @echo " dummy to check syntax errors of document sources" 46 | 47 | .PHONY: clean 48 | clean: 49 | rm -rf $(BUILDDIR)/* 50 | 51 | .PHONY: 52 | cleanhtml: 53 | rm -rf $(BUILDDIR)/*html* 54 | 55 | .PHONY: html 56 | html: 57 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 58 | @echo 59 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 60 | #xdg-open $(BUILDDIR)/html/index.html 61 | 62 | .PHONY: dirhtml 63 | dirhtml: 64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 65 | @echo 66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 67 | 68 | .PHONY: singlehtml 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | .PHONY: pickle 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | .PHONY: json 81 | json: 82 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 83 | @echo 84 | @echo "Build finished; now you can process the JSON files." 85 | 86 | .PHONY: htmlhelp 87 | htmlhelp: 88 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 89 | @echo 90 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 91 | ".hhp project file in $(BUILDDIR)/htmlhelp." 92 | 93 | .PHONY: qthelp 94 | qthelp: 95 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 96 | @echo 97 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 98 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 99 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/InstaLooter.qhcp" 100 | @echo "To view the help file:" 101 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/InstaLooter.qhc" 102 | 103 | .PHONY: applehelp 104 | applehelp: 105 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 106 | @echo 107 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 108 | @echo "N.B. You won't be able to view it unless you put it in" \ 109 | "~/Library/Documentation/Help or install it in your application" \ 110 | "bundle." 111 | 112 | .PHONY: devhelp 113 | devhelp: 114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 115 | @echo 116 | @echo "Build finished." 117 | @echo "To view the help file:" 118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/InstaLooter" 119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/InstaLooter" 120 | @echo "# devhelp" 121 | 122 | .PHONY: epub 123 | epub: 124 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 125 | @echo 126 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 127 | 128 | .PHONY: epub3 129 | epub3: 130 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 131 | @echo 132 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 133 | 134 | .PHONY: latex 135 | latex: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo 138 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 139 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 140 | "(use \`make latexpdf' here to do that automatically)." 141 | 142 | .PHONY: latexpdf 143 | latexpdf: 144 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 145 | @echo "Running LaTeX files through pdflatex..." 146 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 147 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 148 | 149 | .PHONY: latexpdfja 150 | latexpdfja: 151 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 152 | @echo "Running LaTeX files through platex and dvipdfmx..." 153 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 154 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 155 | 156 | .PHONY: text 157 | text: 158 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 159 | @echo 160 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 161 | 162 | .PHONY: man 163 | man: 164 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 165 | @echo 166 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 167 | 168 | .PHONY: texinfo 169 | texinfo: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo 172 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 173 | @echo "Run \`make' in that directory to run these through makeinfo" \ 174 | "(use \`make info' here to do that automatically)." 175 | 176 | .PHONY: info 177 | info: 178 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 179 | @echo "Running Texinfo files through makeinfo..." 180 | make -C $(BUILDDIR)/texinfo info 181 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 182 | 183 | .PHONY: gettext 184 | gettext: 185 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 186 | @echo 187 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 188 | 189 | .PHONY: changes 190 | changes: 191 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 192 | @echo 193 | @echo "The overview file is in $(BUILDDIR)/changes." 194 | 195 | .PHONY: linkcheck 196 | linkcheck: 197 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 198 | @echo 199 | @echo "Link check complete; look for any errors in the above output " \ 200 | "or in $(BUILDDIR)/linkcheck/output.txt." 201 | 202 | .PHONY: doctest 203 | doctest: 204 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 205 | @echo "Testing of doctests in the sources finished, look at the " \ 206 | "results in $(BUILDDIR)/doctest/output.txt." 207 | 208 | .PHONY: coverage 209 | coverage: 210 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 211 | @echo "Testing of coverage in the sources finished, look at the " \ 212 | "results in $(BUILDDIR)/coverage/python.txt." 213 | 214 | .PHONY: xml 215 | xml: 216 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 217 | @echo 218 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 219 | 220 | .PHONY: pseudoxml 221 | pseudoxml: 222 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 223 | @echo 224 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 225 | 226 | .PHONY: dummy 227 | dummy: 228 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 229 | @echo 230 | @echo "Build finished. Dummy builder generates no files." 231 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools >=30.3 2 | Sphinx ~=3.1 3 | sphinx-bootstrap-theme ~=0.7 4 | semantic-version ~=2.8 5 | -------------------------------------------------------------------------------- /docs/source/batch.rst: -------------------------------------------------------------------------------- 1 | Batch mode 2 | ========== 3 | 4 | ``instaLooter`` supports a batch mode for use cases that are more requiring than 5 | just download from a profile once or twice. To use it, you must specify a 6 | *batch config file* to the CLI. The file is in the Python configuration format, 7 | very close to the Windows **INI** format. 8 | 9 | Format 10 | ------ 11 | A *config file* contains at least one section, but can contain more if needed. 12 | A section is organised as shown below, with a header and key-value pairs using 13 | the ``=`` sign: 14 | 15 | .. code-block:: ini 16 | 17 | [my section header] 18 | key = value 19 | other_key = other_value 20 | 21 | Specifying targets 22 | ------------------ 23 | 24 | Users can be specified in the *users* parameter of each section, and hashtags 25 | in the *hashtags* parameter. Those sections take a ``key: value`` pair per line, 26 | where *key* is the name of the user, and *value* the path to the directory where 27 | the medias will be downloaded. For instance: 28 | 29 | .. code-block:: ini 30 | 31 | [Video Games] 32 | users = 33 | borderlands: /tmp/borderlands 34 | ffxv: /tmp/ffxv 35 | hashtags = 36 | nierautomata: /tmp/nier 37 | 38 | [Music] 39 | users = 40 | perm36 : ~/Music/Perm36 41 | 42 | 43 | Logging in 44 | ---------- 45 | 46 | Each section can be provided with a ``username`` and a ``password`` parameter: 47 | 48 | * if none are given, the scraping is done anonymously or using the last session 49 | you logged with (through ``instaLooter login`` for instance, or the session 50 | of the previous section). 51 | * if only ``username`` is given, ``instaLooter`` will interactively ask for the 52 | associated password and then login. 53 | * if both ``username`` and ``password`` are given, then ``instaLooter`` will 54 | logout from any previous session and login quietly. 55 | 56 | 57 | Passing parameters 58 | ------------------ 59 | 60 | Each section can be given the same parameters as the command line: 61 | 62 | ``add-metadata`` 63 | set to *True* to add metadata to the downloaded images 64 | ``get-videos`` 65 | set to *True* to download videos as well as images 66 | ``jobs`` 67 | the number of threads to use, defaults to ``16`` 68 | ``template`` 69 | the template to use, without quotes, defaults to ``{id}`` 70 | ``videos-only`` 71 | set to *True* to download only videos 72 | ``quiet`` 73 | set to *True* to hide the progress bar 74 | ``new`` 75 | set to *True* to only download new medias 76 | ``num-to-dl`` 77 | the number of images to download 78 | ``dump-json`` 79 | set to *True* to dump metadata in JSON format 80 | ``dump-only`` 81 | set to *True* to only dump metadata, not downloading anything. 82 | ``extended-dump`` 83 | set to *True* to fetch additional information when dumping metadata. 84 | 85 | For instance, to download 3 new videos from ``#funny`` and ``#nsfw``: 86 | 87 | .. code-block:: ini 88 | 89 | [Vids] 90 | videos-only = true 91 | new = true 92 | num-to-dl = 3 93 | hashtags = 94 | funny: ~/Videos 95 | nsfw: ~/Videos 96 | 97 | 98 | Running the program 99 | ------------------- 100 | 101 | Simply run the following command 102 | 103 | .. code-block:: console 104 | 105 | instaLooter batch /path/to/your/batch.ini 106 | 107 | 108 | Bugs 109 | ---- 110 | 111 | .. warning:: 112 | 113 | This feature may not be completely functional yet ! I would say that it is 114 | still in beta, were the whole ``instaLooter`` program not in beta too **:D**. 115 | 116 | Please report any bugs caused by this feature to the `Github 117 | issue tracker `_, adding the 118 | configuration file as an attachment! 119 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/stable/config 8 | 9 | 10 | # -- Imports ----------------------------------------------------------------- 11 | 12 | import os 13 | import sys 14 | import shutil 15 | import collections 16 | import datetime 17 | import semantic_version 18 | import sphinx_bootstrap_theme 19 | 20 | # -- Path setup -------------------------------------------------------------- 21 | 22 | # If extensions (or modules to document with autodoc) are in another directory, 23 | # add these directories to sys.path here. If the directory is relative to the 24 | # documentation root, use os.path.abspath to make it absolute, like shown here. 25 | 26 | docsrc_dir = os.path.abspath(os.path.join(__file__, '..')) 27 | project_dir = os.path.abspath(os.path.join(docsrc_dir, '..', '..')) 28 | 29 | sys.path.insert(0, project_dir) 30 | import instalooter 31 | 32 | # -- Files setup ------------------------------------------------------------- 33 | 34 | with open(os.path.join(project_dir, "CHANGELOG.rst"), 'rb') as src: 35 | with open(os.path.join(docsrc_dir, "changelog.rst"), 'wb') as dst: 36 | dst.write(b":tocdepth: 2\n\n") 37 | shutil.copyfileobj(src, dst) 38 | 39 | # -- Project information ----------------------------------------------------- 40 | 41 | project = 'InstaLooter' 42 | author = instalooter.__author__ 43 | copyright = '2016-{}, {}'.format(datetime.date.today().year, author) 44 | 45 | # The full version, including alpha/beta/rc tags 46 | release = instalooter.__version__ 47 | # Semantic version 48 | semver = semantic_version.Version(instalooter.__version__) 49 | # The short X.Y version 50 | version = "{v.major}.{v.minor}.{v.patch}".format(v=semver) 51 | 52 | # -- General configuration --------------------------------------------------- 53 | 54 | # If your documentation needs a minimal Sphinx version, state it here. 55 | # 56 | needs_sphinx = '1.7' 57 | 58 | # Add any Sphinx extension module names here, as strings. They can be 59 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 60 | # ones. 61 | extensions = [ 62 | 'sphinx.ext.autodoc', 63 | 'sphinx.ext.autosummary', 64 | 'sphinx.ext.doctest', 65 | 'sphinx.ext.intersphinx', 66 | 'sphinx.ext.todo', 67 | 'sphinx.ext.coverage', 68 | 'sphinx.ext.mathjax', 69 | 'sphinx.ext.ifconfig', 70 | 'sphinx.ext.viewcode', 71 | 'sphinx.ext.githubpages', 72 | 'sphinx.ext.napoleon', 73 | 'sphinx_bootstrap_theme', 74 | ] 75 | 76 | # Add any paths that contain templates here, relative to this directory. 77 | templates_path = ['_templates'] 78 | 79 | # The suffix(es) of source filenames. 80 | # You can specify multiple suffix as a list of string: 81 | # 82 | # source_suffix = ['.rst', '.md'] 83 | source_suffix = '.rst' 84 | 85 | # The master toctree document. 86 | master_doc = 'index' 87 | 88 | # The language for content autogenerated by Sphinx. Refer to documentation 89 | # for a list of supported languages. 90 | # 91 | # This is also used if you do content translation via gettext catalogs. 92 | # Usually you set "language" from the command line for these cases. 93 | language = None 94 | 95 | # List of patterns, relative to source directory, that match files and 96 | # directories to ignore when looking for source files. 97 | # This pattern also affects html_static_path and html_extra_path . 98 | exclude_patterns = [] 99 | 100 | # The name of the Pygments (syntax highlighting) style to use. 101 | pygments_style = 'sphinx' 102 | 103 | # The name of the default role for inline references 104 | default_role = "py:obj" 105 | 106 | # -- Options for HTML output ------------------------------------------------- 107 | 108 | # The theme to use for HTML and HTML Help pages. See the documentation for 109 | # a list of builtin themes. 110 | # 111 | html_theme = 'bootstrap' 112 | 113 | # Add any paths that contain custom themes here, relative to this directory. 114 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() 115 | 116 | # Theme options are theme-specific and customize the look and feel of a theme 117 | # further. For a list of options available for each theme, see the 118 | # documentation. 119 | # 120 | html_theme_options = { 121 | 122 | # Bootswatch (http://bootswatch.com/) theme. 123 | 'bootswatch_theme': "simplex", 124 | 125 | # Choose Bootstrap version. 126 | 'bootstrap_version': "3", 127 | 128 | # Tab name for entire site. (Default: "Site") 129 | 'navbar_site_name': "Documentation", 130 | 131 | # HTML navbar class (Default: "navbar") to attach to
element. 132 | # For black navbar, do "navbar navbar-inverse" 133 | 'navbar_class': "navbar navbar-inverse", 134 | 135 | # Render the next and previous page links in navbar. (Default: true) 136 | 'navbar_sidebarrel': True, 137 | 138 | # Render the current pages TOC in the navbar. (Default: true) 139 | 'navbar_pagenav': False, 140 | 141 | # A list of tuples containing pages or urls to link to. 142 | 'navbar_links': [ 143 | ("GitHub", "https://github.com/althonos/InstaLooter", True), 144 | ("PyPI", "https://pypi.org/project/InstaLooter", True), 145 | ], 146 | 147 | } 148 | 149 | # Add any paths that contain custom static files (such as style sheets) here, 150 | # relative to this directory. They are copied after the builtin static files, 151 | # so a file named "default.css" will overwrite the builtin "default.css". 152 | html_static_path = ['_static'] 153 | 154 | # Custom sidebar templates, must be a dictionary that maps document names 155 | # to template names. 156 | # 157 | # The default sidebars (for documents that don't match any pattern) are 158 | # defined by theme itself. Builtin themes are using these templates by 159 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 160 | # 'searchbox.html']``. 161 | # 162 | html_sidebars = { 163 | "*": ['localtoc.html'], 164 | os.path.join("instalooter", "*"): [], 165 | } 166 | 167 | # -- Options for HTMLHelp output --------------------------------------------- 168 | 169 | # Output file base name for HTML help builder. 170 | htmlhelp_basename = 'InstaLooter' 171 | 172 | # -- Options for LaTeX output ------------------------------------------------ 173 | 174 | latex_elements = { 175 | # The paper size ('letterpaper' or 'a4paper'). 176 | # 177 | # 'papersize': 'letterpaper', 178 | 179 | # The font size ('10pt', '11pt' or '12pt'). 180 | # 181 | # 'pointsize': '10pt', 182 | 183 | # Additional stuff for the LaTeX preamble. 184 | # 185 | # 'preamble': '', 186 | 187 | # Latex figure (float) alignment 188 | # 189 | # 'figure_align': 'htbp', 190 | } 191 | 192 | # Grouping the document tree into LaTeX files. List of tuples 193 | # (source start file, target name, title, 194 | # author, documentclass [howto, manual, or own class]). 195 | latex_documents = [ 196 | (master_doc, 'InstaLooter.tex', 'InstaLooter Documentation', 197 | 'Martin Larralde', 'manual'), 198 | ] 199 | 200 | # -- Options for manual page output ------------------------------------------ 201 | 202 | # One entry per manual page. List of tuples 203 | # (source start file, name, description, authors, manual section). 204 | man_pages = [ 205 | (master_doc, 'instalooter', 'InstaLooter Documentation', 206 | [author], 1) 207 | ] 208 | 209 | # -- Options for Texinfo output ---------------------------------------------- 210 | 211 | # Grouping the document tree into Texinfo files. List of tuples 212 | # (source start file, target name, title, author, 213 | # dir menu entry, description, category) 214 | texinfo_documents = [ 215 | (master_doc, 'InstaLooter', 'InstaLooter Documentation', 216 | author, 'InstaLooter', 'One line description of project.', 217 | 'Miscellaneous'), 218 | ] 219 | 220 | # -- Extension configuration ------------------------------------------------- 221 | 222 | # -- Options for autosummary ------------------------------------------------- 223 | 224 | autosummary_generate = [os.path.join("instalooter", "index")] 225 | 226 | # -- Options for autodoc ----------------------------------------------------- 227 | 228 | autodoc_member_order = "groupwise" 229 | autoclass_content = "both" 230 | 231 | # -- Options for intersphinx extension --------------------------------------- 232 | 233 | # Example configuration for intersphinx: refer to the Python standard library. 234 | intersphinx_mapping = { 235 | 'python': ('https://docs.python.org/3/', None), 236 | 'fs': ('https://docs.pyfilesystem.org/en/latest/', None), 237 | 'requests': ('https://requests.readthedocs.io/en/master/', None), 238 | 'coloredlogs': ('https://coloredlogs.readthedocs.io/en/latest/', None), 239 | } 240 | 241 | # -- Options for todo extension ---------------------------------------------- 242 | 243 | # If true, `todo` and `todoList` produce output, else they produce nothing. 244 | todo_include_todos = True 245 | -------------------------------------------------------------------------------- /docs/source/cron.rst: -------------------------------------------------------------------------------- 1 | Periodic downloads 2 | ================== 3 | 4 | ``instaLooter`` may be used to update a local mirror of an instagram account, 5 | and as such it may be desired to run it periodically, without needing to update 6 | manually. 7 | 8 | 9 | 10 | UNIX 11 | ---- 12 | 13 | To support the UNIX philosophy, the program do not implement this feature itself 14 | but should integrate well with established alternatives. The following examples 15 | make use of either `Cron `_ or 16 | `SystemD timers `_. 17 | 18 | 19 | Cron 20 | ^^^^ 21 | First of all, make sure ``Cron`` is installed, and if not, refer to the 22 | package manager of your distribution (if you're on MacOS, give a try to 23 | `homebrew `_ if not using it already !). 24 | 25 | Then, edit ``Cron`` to add a scheduled task: 26 | 27 | .. code-block:: console 28 | 29 | $ crontab -e 30 | 31 | This will open a file using the **$EDITOR** system variable to find a text 32 | editor, such as *nano*, *pico*, *vi*, etc. Then, add one line as one of the 33 | examples below to run instaLooter periodically (you can add more than one line 34 | if you have more than one goal in mind): 35 | 36 | * Download maximum 3 new ``#funny`` videos to ``~/Videos`` every hour:: 37 | 38 | @hourly /usr/bin/env python -m instaLooter hashtag funny ~/Videos -N -n 3 -V 39 | 40 | * Download new pictures w/ metadata from the ``instagram`` account at every reboot:: 41 | 42 | @reboot /usr/bin/env python -m instaLooter instagram ~/Pictures/instagram -Nm 43 | 44 | * Use a configuration file to download in :doc:`batch` every week on Sunday, 00:00 :: 45 | 46 | @weekly /usr/bin/env python -m instaLooter batch ~/myLooter.ini 47 | 48 | 49 | To disable a scheduled task, simply remove the line associated to that task within 50 | *crontab*. 51 | 52 | .. seealso:: 53 | 54 | * The `CronHowTo `_ hosted 55 | on *ubuntu.org* for a complete understanding of the crontab line format. 56 | 57 | SystemD 58 | ^^^^^^^ 59 | You'll probably use this alternative if your system is already running on top of 60 | SystemD. If not, you should probably turn to ``Cron``. Simply check for the 61 | existence of a ``systemctl`` executable (e.g. running ``systemctl --help``) to 62 | see if you're using SystemD. 63 | 64 | Create a new service file, either in ``/etc/systemd/system/`` for system-wide jobs, 65 | or in ``~/.config/systemd/user/`` for user-only jobs, named for instance 66 | ``looter.service`` (you can use any name as long as the file has a *.service* 67 | extension), with the following content: 68 | 69 | .. code-block:: ini 70 | 71 | [Unit] 72 | Description=my custom periodic instagram looter 73 | 74 | [Service] 75 | Type=oneshot 76 | ExecStart=/usr/bin/env python -m instaLooter 77 | 78 | Make sure the ``instaLooter`` module is accessible to the ``systemd`` manager, 79 | i.e. if you're using system-wide jobs that the module was installed in */usr* (not 80 | with ``pip insta --user instaLooter`` but with ``pip install instaLooter``). 81 | 82 | To test your service, run ``systemctl start looter.service`` (using the name of 83 | your file), or ``systemctl --user start looter.service`` if you want to use 84 | user-only jobs. There should be no output if everything works fine. 85 | 86 | If a bug occurs check the logs with *journalctl*: 87 | 88 | .. code-block:: console 89 | 90 | # journalctl looter.service 91 | $ journalctl --user --user-unit looter.service 92 | 93 | Once your service works fine, create a timer for your new service, named like 94 | and located next to your service file, but with a ``.timer`` extension, and 95 | the following content: 96 | 97 | .. code-block:: ini 98 | 99 | [Unit] 100 | Description=run my custom periodic instagram looter hourly 101 | 102 | [Timer] 103 | # Time to wait after booting before we run first time 104 | OnBootSec=10min 105 | # Time between running each consecutive time 106 | OnUnitActiveSec=1h 107 | Unit=looter.service 108 | 109 | Finally, enable and start your timer with one of the following commands: 110 | 111 | .. code-block:: console 112 | 113 | # systemctl start looter.timer && systemctl enable looter.timer 114 | $ systemctl --user start looter.timer && systemctl --user enable looter.timer 115 | 116 | To disable the timer, use the same command as above, replacing ``start`` with 117 | ``stop`` and ``enable`` by ``disable``, and remove the service and timer files 118 | if you want to completely uninstall the timer. 119 | 120 | .. seealso:: 121 | 122 | * The `SystemD/timers `_ 123 | and the whole `SystemD `_ 124 | pages on the *Archlinux wiki* for more details about timer and services. 125 | * The `post on Jason's blog `_ 126 | that helped shaping this tutorial. 127 | -------------------------------------------------------------------------------- /docs/source/examples.rst: -------------------------------------------------------------------------------- 1 | API Examples 2 | ============ 3 | 4 | .. toctree:: 5 | 6 | ``instaLooter`` also provides an :abbr:`API (Application Programmable Interface)` 7 | that can be used to extend the capabilities of ``instaLooter``, to fit your 8 | needs more tightly or to integrate ``instaLooter`` to your program. 9 | 10 | 11 | Download pictures 12 | ----------------- 13 | 14 | Download 50 posts from the `Dream Wife band `_ 15 | account to the `Pictures` directory in your home folder (you better be checking 16 | their music though): 17 | 18 | .. code:: python 19 | 20 | from instalooter.looters import ProfileLooter 21 | looter = ProfileLooter("dreamwifetheband") 22 | looter.download('~/Pictures', media_count=50) 23 | 24 | 25 | Dump media links 26 | ---------------- 27 | 28 | Create a list with all the links to picture and video files tagged with 29 | `#ramones `_ in a file 30 | named `ramones.txt`: 31 | 32 | .. code:: python 33 | 34 | def links(media, looter): 35 | if media.get('__typename') == "GraphSidecar": 36 | media = looter.get_post_info(media['shortcode']) 37 | nodes = [e['node'] for e in media['edge_sidecar_to_children']['edges']] 38 | return [n.get('video_url') or n.get('display_url') for n in nodes] 39 | elif media['is_video']: 40 | media = looter.get_post_info(media['shortcode']) 41 | return [media['video_url']] 42 | else: 43 | return [media['display_url']] 44 | 45 | from instalooter.looters import HashtagLooter 46 | looter = HashtagLooter("ramones") 47 | 48 | with open("ramones.txt", "w") as f: 49 | for media in looter.medias(): 50 | for link in links(media, looter): 51 | f.write("{}\n".format(link)) 52 | 53 | 54 | Users from comments 55 | ------------------- 56 | 57 | Obtain a subset of users that commented on some of the posts of 58 | `Franz Ferdinand `_. 59 | 60 | .. code:: python 61 | 62 | from instalooter.looters import ProfileLooter 63 | looter = ProfileLooter("franz_ferdinand") 64 | 65 | users = set() 66 | for media in looter.medias(): 67 | info = looter.get_post_info(media['shortcode']) 68 | for comment in post_info['edge_media_to_comment']['edges']: 69 | user = comment['node']['owner']['username'] 70 | users.add(user) 71 | 72 | 73 | Users from mentions 74 | ------------------- 75 | 76 | 77 | 78 | .. code:: python 79 | 80 | from instalooter.looters import ProfileLooter 81 | looter = ProfileLooter("mandodiaomusic") 82 | 83 | users = set() 84 | for media in looter.medias(): 85 | info = looter.get_post_info(media['shortcode']) 86 | for comment in post_info['edge_media_to_tagged_user']['edges']: 87 | user = comment['node']['user']['username'] 88 | users.add(user) 89 | 90 | 91 | Download resized pictures 92 | ------------------------- 93 | 94 | Unfortunately, this is not possible anymore as Instagram added a hash signature 95 | to prevent messing with their URLs. 96 | 97 | .. 98 | .. Downloaded pictures will all be resized by IG to be 320 pixels wide 99 | .. with the same aspect ratio before being downloaded. 100 | .. 101 | .. .. code:: 102 | .. 103 | .. from instaLooter import InstaLooter 104 | .. from instaLooter.urlgen import resizer 105 | .. 106 | .. looter = InstaLooter(profile="xxxx", get_videos=True, url_generator=resizer(320)) 107 | .. looter.download() 108 | 109 | 110 | .. Download thumbnails 111 | .. ------------------- 112 | .. .. code:: 113 | .. 114 | .. from instaLooter import InstaLooter 115 | .. from instaLooter.urlgen import thumbnail 116 | .. 117 | .. looter = InstaLooter(profile="xxxx", get_videos=True, url_generator=thumbnail) 118 | .. looter.download() 119 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | InstaLooter 2 | =========== 3 | 4 | *Not all treasure's silver and gold, mate.* 5 | 6 | |build| |repo| |versions| |format| |coverage| |doc| |requirements| |grade| 7 | 8 | InstaLooter is a program that can download pictures and videos from any profile 9 | or hashtag on `Instagram `_, without any API token. It is 10 | even possible to download pictures and videos from a private profile your are 11 | following using your credentials to log in. 12 | 13 | See more details about one of the following topics: 14 | 15 | .. rubric:: Guides 16 | 17 | .. toctree:: 18 | :maxdepth: 1 19 | 20 | Installation 21 | Usage 22 | Batch mode 23 | Periodic Downloads 24 | 25 | 26 | .. rubric:: Library 27 | 28 | .. toctree:: 29 | :maxdepth: 1 30 | 31 | API Examples 32 | Changelog 33 | API Reference 34 | 35 | 36 | License |license| 37 | ----------------- 38 | 39 | InstaLooter is released under the 40 | `GNU General Public License v3 `_ 41 | *or later*, and is fully open-source. The ``COPYING`` file distributed with 42 | the software contains the complete license text. 43 | 44 | 45 | Issues |issues| 46 | --------------- 47 | 48 | If you want to request a feature, or report a bug, please file in 49 | an issue on the `issue tracker `_. 50 | 51 | About 52 | ----- 53 | 54 | InstaLooter is maintained by: 55 | * `Martin Larralde `_ 56 | 57 | Special thanks to the following contributors: 58 | * `Mohaned Magdy `_ 59 | * `Daniel Lee Harple `_ 60 | * `Bryan Massoth `_ 61 | * `AndCycle `_ 62 | * `Pauli Salmenrinne `_ 63 | * `Georp `_ 64 | * `Lev Velykoivanenko `_ 65 | * `Maksymilian Ratajczyk `_ 66 | * `Henning Kowalk `_ 67 | * `Daniel M. Capella `_ 68 | * `tgandor `_ 69 | * `Denis Emelyanov `_ 70 | * `Pavel Sutyrin `_ 71 | 72 | Indices and tables 73 | ------------------ 74 | 75 | * :ref:`genindex` 76 | * :ref:`modindex` 77 | * :ref:`search` 78 | 79 | 80 | .. |repo| image:: https://img.shields.io/badge/source-GitHub-303030.svg?maxAge=3600&style=flat-square 81 | :target: https://github.com/althonos/InstaLooter 82 | 83 | .. |versions| image:: https://img.shields.io/pypi/v/instaLooter.svg?maxAge=3600&style=flat-square 84 | :target: https://pypi.org/project/instaLooter 85 | 86 | .. |format| image:: https://img.shields.io/pypi/format/instaLooter.svg?maxAge=3600&style=flat-square 87 | :target: https://pypi.org/project/instaLooter 88 | 89 | .. |grade| image:: https://img.shields.io/codacy/grade/9b8c7da6887c4195b9e960cb04b59a91/master.svg?maxAge=3600&style=flat-square 90 | :target: https://www.codacy.com/app/althonos/InstaLooter/dashboard 91 | 92 | .. |coverage| image:: https://img.shields.io/codecov/c/github/althonos/InstaLooter/master.svg?maxAge=3600&style=flat-square 93 | :target: https://codecov.io/gh/althonos/InstaLooter 94 | 95 | .. |build| image:: https://img.shields.io/travis/althonos/InstaLooter/master.svg?label=travis-ci&maxAge=3600&style=flat-square 96 | :target: https://travis-ci.org/althonos/InstaLooter/ 97 | 98 | .. |doc| image:: https://img.shields.io/readthedocs/instalooter.svg?style=flat-square&maxAge=3600 99 | :target: http://instalooter.readthedocs.io/en/latest/?badge=latest 100 | 101 | .. |requirements| image:: https://img.shields.io/requires/github/althonos/InstaLooter/master.svg?style=flat-square&maxAge=3600 102 | :target: https://requires.io/github/althonos/InstaLooter/requirements/?branch=master 103 | 104 | .. |health| image:: https://landscape.io/github/althonos/InstaLooter/master/landscape.svg?style=flat-square&maxAge=3600 105 | :target: https://landscape.io/github/althonos/InstaLooter/master 106 | 107 | .. |license| image:: https://img.shields.io/pypi/l/InstaLooter.svg?maxAge=3600&style=flat-square 108 | :target: https://choosealicense.com/licenses/gpl-3.0/ 109 | 110 | .. |issues| image:: https://img.shields.io/github/issues/althonos/InstaLooter.svg?maxAge=3600&style=flat-square 111 | :target: https://github.com/althonos/InstaLooter/issues 112 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | .. toctree:: 5 | 6 | InstaLooter is available from 2 different sources: either a git repository, 7 | shared publicly on GitHub, and a Python wheel, available on PyPI. Instructions 8 | on how to setup each version are available below. 9 | 10 | The python modules ``pip`` and ``setuptools`` are required before you start 11 | installing InstaLooter. Although not strictly required, there will be no 12 | explanations on how to setup instaLooter without those. 13 | 14 | .. hint:: 15 | 16 | See the `PyPA web page `_ 17 | page to install ``pip`` if it is not already installed. 18 | 19 | .. attention:: 20 | 21 | Using ``pip`` will install InstaLooter with the default Python version. 22 | InstaLooter is known to work with Python versions **2.7**, **3.4** 23 | and **3.5**, but encoding errors have been reported with Python **2.7**. If 24 | you are not familiar with the default Python version on you system, consider 25 | enforcing an installation with Python 3 using ``pip3`` instead of ``pip``. 26 | 27 | PyPI |pypi| 28 | ----------- 29 | 30 | If you have super user rights, open up a terminal and type the following: 31 | 32 | .. code-block:: console 33 | 34 | # pip install instaLooter 35 | 36 | If you don't have admin rights, then type the following to install only for 37 | the current user instead: 38 | 39 | 40 | .. code-block:: console 41 | 42 | $ pip install instaLooter --user 43 | 44 | 45 | If you want to use the *exif* metadata features, install the ``metadata`` extras 46 | as well: 47 | 48 | .. code-block:: console 49 | 50 | $ pip install instaLooter[metadata] --user 51 | 52 | 53 | GitHub |build| 54 | -------------- 55 | 56 | With ``git`` installed, do the following in a directory on your machine to 57 | clone the remote repository and install instaLooter from source: 58 | 59 | .. code-block:: console 60 | 61 | $ git clone https://github.com/althonos/InstaLooter 62 | $ cd InstaLooter 63 | 64 | Then use pip to install the local version of the program and all the required 65 | dependencies: 66 | 67 | .. code-block:: console 68 | 69 | # pip install . 70 | 71 | To install development dependencies (to test the program and/or build the 72 | documentation), use the *test* and/or *doc* extras: 73 | 74 | .. code-block:: console 75 | 76 | $ pip install --user ".[test]" # install only test dependencies 77 | $ pip install --user ".[doc]" # install only doc dependencies 78 | $ pip install --user ".[dev]" # install all dev dependencies 79 | 80 | 81 | .. |pypi| image:: https://img.shields.io/pypi/v/instaLooter.svg?maxAge=3600&style=flat-square 82 | :target: https://pypi.org/project/instaLooter 83 | 84 | .. |build| image:: https://img.shields.io/travis/althonos/InstaLooter/master.svg?label=travis-ci&maxAge=3600&style=flat-square 85 | :target: https://travis-ci.org/althonos/InstaLooter/ 86 | -------------------------------------------------------------------------------- /docs/source/instalooter/batch.rst: -------------------------------------------------------------------------------- 1 | Batch Runner (`instalooter.batch`) 2 | ================================== 3 | 4 | .. currentmodule:: instalooter.batch 5 | 6 | .. automodule:: instalooter.batch 7 | :members: 8 | :inherited-members: 9 | :show-inheritance: 10 | -------------------------------------------------------------------------------- /docs/source/instalooter/cli.rst: -------------------------------------------------------------------------------- 1 | Command Line Interface (`instalooter.cli`) 2 | ========================================== 3 | 4 | .. currentmodule:: instalooter.cli 5 | 6 | .. automodule:: instalooter.cli 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/source/instalooter/index.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============== 3 | 4 | .. toctree:: 5 | :hidden: 6 | 7 | looters 8 | cli 9 | medias 10 | pages 11 | batch 12 | pbar 13 | worker 14 | 15 | 16 | Main 17 | ---- 18 | 19 | .. rubric:: Looters (`instalooter.looters`) 20 | 21 | .. currentmodule:: instalooter.looters 22 | 23 | .. autosummary:: 24 | :nosignatures: 25 | 26 | InstaLooter 27 | HashtagLooter 28 | ProfileLooter 29 | PostLooter 30 | 31 | 32 | .. rubric:: Command Line Interface (`instalooter.cli`) 33 | 34 | .. currentmodule:: instalooter.cli 35 | 36 | .. autosummary:: 37 | 38 | main 39 | 40 | 41 | .. rubric:: Batch Runner (`instalooter.batch`) 42 | 43 | .. currentmodule:: instalooter.batch 44 | 45 | .. autosummary:: 46 | :nosignatures: 47 | 48 | BatchRunner 49 | 50 | 51 | Iterators 52 | --------- 53 | 54 | .. rubric:: Medias Iterators (`instalooter.medias`) 55 | 56 | .. currentmodule:: instalooter.medias 57 | 58 | .. autosummary:: 59 | :nosignatures: 60 | 61 | MediasIterator 62 | TimedMediasIterator 63 | 64 | 65 | .. rubric:: Pages Iterators (`instalooter.pages`) 66 | 67 | .. currentmodule:: instalooter.pages 68 | 69 | .. autosummary:: 70 | :nosignatures: 71 | 72 | PageIterator 73 | HashtagIterator 74 | ProfileIterator 75 | 76 | 77 | Miscellaneous 78 | ------------- 79 | 80 | .. rubric:: Progress Bars (`instalooter.pbar`) 81 | 82 | .. currentmodule:: instalooter.pbar 83 | 84 | .. autosummary:: 85 | :nosignatures: 86 | 87 | ProgressBar 88 | TqdmProgressBar 89 | 90 | 91 | .. rubric:: Background Downloader (`instalooter.worker`) 92 | 93 | .. currentmodule:: instalooter.worker 94 | 95 | .. autosummary:: 96 | :nosignatures: 97 | 98 | InstaDownloader 99 | -------------------------------------------------------------------------------- /docs/source/instalooter/looters.rst: -------------------------------------------------------------------------------- 1 | Looters (`instalooter.looters`) 2 | =============================== 3 | 4 | .. currentmodule:: instalooter.looters 5 | 6 | .. automodule:: instalooter.looters 7 | :members: 8 | :show-inheritance: 9 | :inherited-members: 10 | -------------------------------------------------------------------------------- /docs/source/instalooter/medias.rst: -------------------------------------------------------------------------------- 1 | Medias Iterators (`instalooter.medias`) 2 | ======================================= 3 | 4 | .. currentmodule:: instalooter.medias 5 | 6 | .. automodule:: instalooter.medias 7 | :members: 8 | :special-members: __iter__, __next__, __length_hint__ 9 | :show-inheritance: 10 | :inherited-members: 11 | -------------------------------------------------------------------------------- /docs/source/instalooter/pages.rst: -------------------------------------------------------------------------------- 1 | Pages Iterators (`instalooter.pages`) 2 | ===================================== 3 | 4 | .. currentmodule:: instalooter.pages 5 | 6 | .. automodule:: instalooter.pages 7 | :members: 8 | :special-members: __iter__, __next__, __length_hint__ 9 | :show-inheritance: 10 | :inherited-members: 11 | -------------------------------------------------------------------------------- /docs/source/instalooter/pbar.rst: -------------------------------------------------------------------------------- 1 | Progress Bars (`instalooter.worker`) 2 | ============================================ 3 | 4 | .. currentmodule:: instalooter.pbar 5 | 6 | .. automodule:: instalooter.pbar 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/source/instalooter/worker.rst: -------------------------------------------------------------------------------- 1 | Background Downloader (`instalooter.worker`) 2 | ============================================ 3 | 4 | .. currentmodule:: instalooter.worker 5 | 6 | .. automodule:: instalooter.worker 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | .. toctree:: 5 | 6 | instaLooter provides a command line interface, that you can call with 7 | the ``instaLooter`` command. 8 | 9 | .. note:: 10 | 11 | In some cases, the ``instaLooter`` command is not added into 12 | the ``$PATH`` after installation. It is possible to perform 13 | all the following actions nevertheless by replacing occurences 14 | of ``instaLooter`` with ``python -m instaLooter`` (or 15 | ``python3 -m instaLooter``). 16 | 17 | Command Line Interface 18 | ---------------------- 19 | 20 | Download pictures/videos from the profile of a single user: 21 | 22 | .. code-block:: console 23 | 24 | $ instaLooter user [] [options] 25 | 26 | 27 | Download pictures/videos tagged with a given *#hashtag*: 28 | 29 | .. code-block:: console 30 | 31 | $ instaLooter hashtag [options] 32 | 33 | Download pictures/videos from a single post: 34 | 35 | .. code-block:: console 36 | 37 | $ instaLooter post [options] 38 | 39 | Download pictures/videos in :doc:`batch`: 40 | 41 | .. code-block:: console 42 | 43 | $ instaLooter batch 44 | 45 | Positional Arguments 46 | -------------------- 47 | 48 | ``username`` 49 | the username of the Instagram profile to download pictures/videos from. 50 | 51 | ``hashtag`` 52 | the hashtag to download pictures/videos from. 53 | 54 | ``post_token`` 55 | the URL or the code of the post to download. 56 | 57 | ``directory`` 58 | the directory in which to download pictures/videos. Optional for 59 | profile download, will then use current directory. 60 | 61 | ``batch_file`` 62 | the path to the batch file containing batch download instructions 63 | (see the :doc:`batch` page for the format specification). 64 | 65 | 66 | Options - Credentials 67 | --------------------- 68 | 69 | ``-u USER, --username USER`` 70 | The username to connect to Instagram with. 71 | 72 | ``-p PASS, --password PASS`` 73 | The password to connect to Instagram with (will be asked in the shell 74 | if the ``--username`` option was given without the corresponding 75 | ``--password``). 76 | 77 | Options - Files 78 | --------------- 79 | 80 | ``-n NUM, --num-to-dl NUM`` 81 | Maximum number of new files to download 82 | 83 | ``-j JOBS, --jobs JOBS`` 84 | Number of parallel threads to use to download files **[default: 16]** 85 | 86 | ``-T TMPL, --template TMPL`` 87 | A filename template to use to write the files (see :ref:`Template`). 88 | **[default: {id}]** 89 | 90 | ``-v, --get-videos`` 91 | Get videos as well as photos 92 | 93 | ``-V, --videos-only`` 94 | Get videos only (implies ``--get-videos``) 95 | 96 | ``-N, --new`` 97 | Only look for files newer than the ones in the destination directory 98 | (faster). 99 | 100 | ``-t TIME, --time TIME`` 101 | The time limit within which to download pictures and video 102 | (see :ref:`Time`) 103 | 104 | 105 | Options - Metadata 106 | ------------------ 107 | 108 | ``-d, --dump-json`` 109 | Save metadata to a JSON file next to downloaded videos and pictures. 110 | 111 | ``-m, --add-metadata`` 112 | Add date and caption metadata to downloaded pictures (requires 113 | `PIL `_ or 114 | `Pillow `_ as well as 115 | `piexif `_). 116 | 117 | ``-D, --dump-only`` 118 | Save only the metadata and no video / picture. 119 | 120 | ``-e, --extended-dump`` 121 | Always dump the maximum amount of extractable information, at the cost 122 | of more time. 123 | 124 | 125 | Options - Miscellaneous 126 | ----------------------- 127 | 128 | ``-q, --quiet`` 129 | Do not produce any output 130 | 131 | ``-h, --help`` 132 | Display the help message 133 | 134 | ``--version`` 135 | Show program version and quit 136 | 137 | ``--traceback`` 138 | Print error traceback if any (debug). 139 | 140 | ``-W WARNINGCTL`` 141 | Change warning behaviour (same as ``python -W``) **[default: default]** 142 | 143 | 144 | .. _Template: 145 | 146 | Template 147 | -------- 148 | 149 | The default filename of the pictures and videos on Instagram doesn't show 150 | anything about the file you just downloaded. But using the ``-T`` argument 151 | allows you to give instaLooter a filename template, using the following 152 | format with brackets-enclosed (``{}``) variable names among: 153 | 154 | - ``id``\*\² and ``code``\² of the instagram id of the media 155 | - ``ownerid``\*, ``username`` and ``fullname`` of the owner 156 | - ``datetime``\*: the date and time of the post (YYYY-MM-DD hh:mm:ss) 157 | - ``date``\*: the date of the post (YYYY-MM-DD) 158 | - ``width``\* and ``height``\* 159 | - ``likescount``\* and ``commentscount``\* 160 | 161 | :\*: 162 | use these only to quicken download, since fetching the others may take 163 | a tad longer (in particular in hashtag download mode). 164 | 165 | :\²: 166 | use at least one of these in your filename to make sure the generated 167 | filename is unique. 168 | 169 | Examples of acceptable values: 170 | 171 | .. code-block:: console 172 | 173 | $ instaLooter -T {username}.{datetime} 174 | $ instaLooter -T {username}-{likescount}-{width}x{height}.{id} 175 | $ instaLooter -T {username}.{code}.something_constant 176 | 177 | 178 | .. _Time: 179 | 180 | Time 181 | ---- 182 | 183 | The ``--time`` parameter can be given either a combination of start and stop 184 | date in ISO format (e.g. ``2016-12-21:2016-12-18``, ``2015-03-07:``, 185 | ``:2016-08-02``) or a special value among: *thisday*, *thisweek*, *thismonth*, 186 | *thisyear*. 187 | 188 | Edges are included in the time frame, so if using the following value: 189 | ``--time 2016-05-10:2016-04-03``, then all medias will be downloaded 190 | including the ones posted the 10th of May 2016 and the 3rd of April 2016. 191 | 192 | .. _Credentials: 193 | 194 | Credentials 195 | ----------- 196 | 197 | The ``--username`` and ``--password`` parameters can be used to log to 198 | Instagram. This allows you to download pictures/videos from private profiles 199 | you are following. You can either provide your password directly 200 | or type it in later for privacy purposes. 201 | 202 | .. code-block:: console 203 | 204 | $ instaLooter ... --username USERNAME --password PASSWORD 205 | $ instaLooter ... --username USERNAME 206 | Password: # type PASSWORD privately here 207 | -------------------------------------------------------------------------------- /instalooter/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | __author__ = "Martin Larralde" 6 | __author_email__ = "martin.larralde@ens-paris-saclay.fr" 7 | __version__ = "2.4.4" 8 | __license___ = "GPLv3+" 9 | -------------------------------------------------------------------------------- /instalooter/__main__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | 4 | import sys 5 | from .cli import main 6 | 7 | sys.exit(main()) 8 | -------------------------------------------------------------------------------- /instalooter/_impl.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Backports and alternative library implementations. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import typing 8 | 9 | try: 10 | import simplejson as json 11 | except ImportError: 12 | import json # type: ignore 13 | 14 | try: 15 | import PIL.Image 16 | import piexif 17 | except ImportError: 18 | PIL = None 19 | piexif = None 20 | 21 | try: 22 | from operator import length_hint 23 | except ImportError: 24 | def length_hint(obj, default=0): # type: ignore 25 | # type: (typing.Any, int) -> int 26 | """Return an estimate of the number of items in obj. 27 | 28 | This is useful for presizing containers when building from an 29 | iterable. 30 | 31 | If the object supports len(), the result will be 32 | exact. Otherwise, it may over- or under-estimate by an 33 | arbitrary amount. The result will be an integer >= 0. 34 | 35 | See Also: 36 | `PEP 424 `_ 37 | 38 | """ 39 | try: 40 | return len(obj) 41 | except TypeError: 42 | try: 43 | get_hint = type(obj).__length_hint__ 44 | except AttributeError: 45 | return default 46 | try: 47 | hint = get_hint(obj) 48 | except TypeError: 49 | return default 50 | if hint is NotImplemented: 51 | return default 52 | if not isinstance(hint, int): 53 | raise TypeError("Length hint must be an integer, not %r" % 54 | type(hint)) 55 | if hint < 0: 56 | raise ValueError("__length_hint__() should return >= 0") 57 | return hint 58 | 59 | 60 | __all__ = ["PIL", "piexif", "json", "length_hint"] 61 | -------------------------------------------------------------------------------- /instalooter/_uadetect.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """An HTTP server to detect the local web browser. 3 | """ 4 | 5 | import contextlib 6 | import socket 7 | import threading 8 | import queue 9 | import webbrowser 10 | 11 | import six 12 | import pkg_resources 13 | 14 | class UserAgentRequestHandler(six.moves.BaseHTTPServer.BaseHTTPRequestHandler): 15 | 16 | def do_GET(self): 17 | """Serve a GET request.""" 18 | self.do_HEAD() 19 | template = pkg_resources.resource_string(__name__, "static/splash.html") 20 | page = template.decode('utf-8').format(self.headers.get("User-Agent"), self.cache) 21 | self.wfile.write(page.encode('utf-8')) 22 | 23 | def do_HEAD(self): 24 | """Serve a HEAD request.""" 25 | self.queue.put(self.headers.get("User-Agent")) 26 | self.send_response(six.moves.BaseHTTPServer.HTTPStatus.OK) 27 | self.send_header("Location", self.path) 28 | self.end_headers() 29 | 30 | def log_message(self, format, *args): 31 | pass # silence the server 32 | 33 | 34 | def get_free_port(): 35 | with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 36 | s.bind(('', 0)) 37 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 38 | return s.getsockname()[1] 39 | 40 | 41 | def get_user_agent(port=None, cache=None): 42 | # Setup thread-local request handler 43 | UserAgentRequestHandler.queue = queue.Queue() 44 | UserAgentRequestHandler.cache = cache 45 | # Lock the request handler lock to wait for user agent to be processed. 46 | # Use the given port or get a free one and create the HTTP server 47 | port = port or get_free_port() 48 | server = six.moves.BaseHTTPServer.HTTPServer( 49 | ("localhost", port), 50 | UserAgentRequestHandler, 51 | ) 52 | # Launch the server thread in the background 53 | server_thread = threading.Thread(target=server.serve_forever) 54 | server_thread.start() 55 | # Use webbrowser to connect to the server with the default browser 56 | webbrowser.open("http://localhost:{}/".format(port)) 57 | # Wait for the request handler to get the request from the browser 58 | user_agent = UserAgentRequestHandler.queue.get() 59 | # Close the server 60 | server.shutdown() 61 | server.server_close() 62 | # Return the obtained user agent 63 | return user_agent 64 | 65 | 66 | if __name__ == "__main__": 67 | print(get_user_agent()) 68 | -------------------------------------------------------------------------------- /instalooter/_utils.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Internal utility classes and functions. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import datetime 8 | import hashlib 9 | import operator 10 | import os 11 | import re 12 | import typing 13 | 14 | import six 15 | 16 | from ._impl import json 17 | 18 | if typing.TYPE_CHECKING: 19 | from typing import Any, Dict, Mapping, Optional, Text 20 | 21 | 22 | class NameGenerator(object): 23 | """Generator for filenames using a template. 24 | """ 25 | 26 | @classmethod 27 | def _get_info(cls, media): 28 | # type: (Mapping[Text, Any]) -> Mapping[Text, Any] 29 | 30 | info = { 31 | 'id': media['id'], 32 | 'code': media['shortcode'], 33 | 'ownerid': media['owner']['id'], 34 | 'username': media['owner'].get('username'), 35 | 'fullname': media['owner'].get('full_name'), 36 | 'commentscount': media.get('edge_media_to_comment', {}).get('count'), 37 | 'likescount': media.get('edge_media_preview_like', {}).get('count'), 38 | 'width': media.get('dimensions', {}).get('width'), 39 | 'height': media.get('dimensions', {}).get('height'), 40 | } # type: Dict[Text, Any] 41 | 42 | timestamp = media.get('date') or media.get('taken_at_timestamp') 43 | if timestamp is not None: 44 | dt = datetime.datetime.fromtimestamp(timestamp) 45 | info['datetime'] = ("{0.year}-{0.month:02d}-{0.day:02d} {0.hour:02d}" 46 | "h{0.minute:02d}m{0.second:02d}s{0.microsecond}").format(dt) 47 | info['date'] = datetime.date.fromtimestamp(timestamp) 48 | 49 | return dict(six.moves.filter( 50 | operator.itemgetter(1), six.iteritems(info))) 51 | 52 | def __init__(self, template="{id}"): 53 | # type: (Text) -> None 54 | self.template = template 55 | 56 | def base(self, media): 57 | # type: (Mapping[Text, Any]) -> Text 58 | info = self._get_info(media) 59 | return self.template.format(**info) 60 | 61 | def file(self, media, ext=None): 62 | # type: (Mapping[Text, Any], Optional[Text]) -> Text 63 | ext = ext or ("mp4" if media['is_video'] else "jpg") 64 | return os.path.extsep.join([self.base(media), ext]) 65 | 66 | def needs_extended(self, media): 67 | # type: (Mapping[Text, Any]) -> bool 68 | try: 69 | self.base(media) 70 | return False 71 | except KeyError: 72 | return True 73 | 74 | 75 | 76 | def get_shared_data(html): 77 | match = re.search(r'window._sharedData = ({[^\n]*});', html) 78 | return json.loads(match.group(1)) 79 | 80 | 81 | def get_additional_data(html): 82 | match = re.search(r"window.__additionalDataLoaded\('/p/.*/',({[^\n]*})\);", html) 83 | return json.loads(match.group(1)) 84 | -------------------------------------------------------------------------------- /instalooter/batch.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Run several jobs sharing a session using a configuration file. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import io 8 | import getpass 9 | import logging 10 | import typing 11 | 12 | import six 13 | import verboselogs 14 | from requests import Session 15 | 16 | from .looters import HashtagLooter, ProfileLooter 17 | from .pbar import TqdmProgressBar 18 | 19 | if typing.TYPE_CHECKING: 20 | from typing import Any, Dict, Mapping, Optional, Text, Type, Union 21 | from .looter import InstaLooter 22 | 23 | 24 | #: The module logger 25 | logger = verboselogs.VerboseLogger(__name__) 26 | 27 | 28 | class BatchRunner(object): 29 | """Run ``InstaLooter`` in batch mode, using a configuration file. 30 | """ 31 | 32 | _CLS_MAP = { 33 | 'users': ProfileLooter, 34 | 'hashtag': HashtagLooter, 35 | } # type: Mapping[Text, Type[InstaLooter]] 36 | 37 | def __init__(self, handle, args=None): 38 | # type: (Any, Optional[Mapping[Text, Any]]) -> None 39 | 40 | close_handle = False 41 | if isinstance(handle, six.binary_type): 42 | handle = handle.decode('utf-8') 43 | if isinstance(handle, six.text_type): 44 | _handle = open(handle) # type: typing.IO 45 | close_handle = True 46 | else: 47 | _handle = handle 48 | 49 | try: 50 | self.args = args or {} 51 | self.parser = six.moves.configparser.ConfigParser() 52 | getattr(self.parser, "readfp" if six.PY2 else "read_file")(_handle) 53 | finally: 54 | if close_handle: 55 | _handle.close() 56 | 57 | @typing.overload 58 | def _getboolean(self, section_id, key, default): 59 | # type: (Text, Text, bool) -> bool 60 | pass 61 | 62 | @typing.overload 63 | def _getboolean(self, section_id, key): 64 | # type: (Text, Text) -> Optional[bool] 65 | pass 66 | 67 | @typing.overload 68 | def _getboolean(self, section_id, key, default): 69 | # type: (Text, Text, None) -> Optional[bool] 70 | pass 71 | 72 | def _getboolean(self, section_id, key, default=None): 73 | # type: (Text, Text, Optional[bool]) -> Optional[bool] 74 | if self.parser.has_option(section_id, key): 75 | return self.parser.getboolean(section_id, key) 76 | return default 77 | 78 | @typing.overload 79 | def _getint(self, section_id, key, default): 80 | # type: (Text, Text, None) -> Optional[int] 81 | pass 82 | 83 | @typing.overload 84 | def _getint(self, section_id, key): 85 | # type: (Text, Text) -> Optional[int] 86 | pass 87 | 88 | @typing.overload 89 | def _getint(self, section_id, key, default): 90 | # type: (Text, Text, int) -> int 91 | pass 92 | 93 | def _getint(self, section_id, key, default=None): 94 | # type: (Text, Text, Optional[int]) -> Optional[int] 95 | if self.parser.has_option(section_id, key): 96 | return self.parser.getint(section_id, key) 97 | return default 98 | 99 | @typing.overload 100 | def _get(self, section_id, key, default): 101 | # type: (Text, Text, None) -> Optional[Text] 102 | pass 103 | 104 | @typing.overload 105 | def _get(self, section_id, key): 106 | # type: (Text, Text) -> Optional[Text] 107 | pass 108 | 109 | @typing.overload 110 | def _get(self, section_id, key, default): 111 | # type: (Text, Text, Text) -> Text 112 | pass 113 | 114 | def _get(self, section_id, key, default=None): 115 | # type: (Text, Text, Optional[Text]) -> Optional[Text] 116 | if self.parser.has_option(section_id, key): 117 | return self.parser.get(section_id, key) 118 | return default 119 | 120 | def run_all(self): 121 | # type: () -> None 122 | """Run all the jobs specified in the configuration file. 123 | """ 124 | logger.debug("Creating batch session") 125 | session = Session() 126 | 127 | for section_id in self.parser.sections(): 128 | self.run_job(section_id, session=session) 129 | 130 | def run_job(self, section_id, session=None): 131 | # type: (Text, Optional[Session]) -> None 132 | """Run a job as described in the section named ``section_id``. 133 | 134 | Raises: 135 | KeyError: when the section could not be found. 136 | 137 | """ 138 | if not self.parser.has_section(section_id): 139 | raise KeyError('section not found: {}'.format(section_id)) 140 | 141 | session = session or Session() 142 | 143 | for name, looter_cls in six.iteritems(self._CLS_MAP): 144 | 145 | targets = self.get_targets(self._get(section_id, name)) 146 | quiet = self._getboolean( 147 | section_id, "quiet", self.args.get("--quiet", False)) 148 | 149 | if targets: 150 | logger.info("Launching {} job for section {}".format(name, section_id)) 151 | 152 | for target, directory in six.iteritems(targets): 153 | try: 154 | logger.info("Downloading {} to {}".format(target, directory)) 155 | looter = looter_cls( 156 | target, 157 | add_metadata=self._getboolean(section_id, 'add-metadata', False), 158 | get_videos=self._getboolean(section_id, 'get-videos', False), 159 | videos_only=self._getboolean(section_id, 'videos-only', False), 160 | jobs=self._getint(section_id, 'jobs', 16), 161 | template=self._get(section_id, 'template', '{id}'), 162 | dump_json=self._getboolean(section_id, 'dump-json', False), 163 | dump_only=self._getboolean(section_id, 'dump-only', False), 164 | extended_dump=self._getboolean(section_id, 'extended-dump', False), 165 | session=session) 166 | 167 | if self.parser.has_option(section_id, 'username'): 168 | looter.logout() 169 | username = self._get(section_id, 'username') 170 | password = self._get(section_id, 'password') or \ 171 | getpass.getpass('Password for "{}": '.format(username)) 172 | looter.login(username, password) 173 | 174 | n = looter.download( 175 | directory, 176 | media_count=self._getint(section_id, 'num-to-dl'), 177 | # FIXME: timeframe=self._get(section_id, 'timeframe'), 178 | new_only=self._getboolean(section_id, 'new', False), 179 | pgpbar_cls=None if quiet else TqdmProgressBar, 180 | dlpbar_cls=None if quiet else TqdmProgressBar) 181 | 182 | logger.success("Downloaded %i medias !", n) 183 | 184 | except Exception as exception: 185 | logger.error(six.text_type(exception)) 186 | 187 | def get_targets(self, raw_string): 188 | # type: (Optional[Text]) -> Dict[Text, Text] 189 | """Extract targets from a string in 'key: value' format. 190 | """ 191 | targets = {} 192 | if raw_string is not None: 193 | for line in raw_string.splitlines(): 194 | if line: 195 | target, directory = line.split(':', 1) 196 | targets[target.strip()] = directory.strip() 197 | return targets 198 | -------------------------------------------------------------------------------- /instalooter/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Implementation of the main program executable. 3 | 4 | Warning: 5 | Only `.cli.main` and `.cli.logger` are guaranteed to be stable, do not 6 | rely on any other member from this package ! 7 | """ 8 | from __future__ import absolute_import 9 | from __future__ import print_function 10 | from __future__ import unicode_literals 11 | 12 | import functools 13 | import logging 14 | import getpass 15 | import os 16 | import sys 17 | import traceback 18 | import warnings 19 | 20 | import coloredlogs 21 | import docopt 22 | import fs 23 | import six 24 | import verboselogs 25 | 26 | from .. import __version__ 27 | from ..looters import InstaLooter, HashtagLooter, ProfileLooter, PostLooter 28 | from ..pbar import TqdmProgressBar 29 | from ..batch import BatchRunner, logger as batch_logger 30 | 31 | from . import logutils 32 | from .constants import HELP, USAGE, WARNING_ACTIONS 33 | from .time import get_times_from_cli 34 | from .login import login, logger as login_logger 35 | 36 | 37 | __all__ = ["main", "logger"] 38 | 39 | 40 | #: A `~logging.Logger` instance used within the `.cli` module. 41 | logger = verboselogs.VerboseLogger(__name__) 42 | 43 | 44 | @logutils.wrap_warnings(logger) 45 | def main(argv=None, stream=None): 46 | """Run from the command line interface. 47 | 48 | Arguments: 49 | argv (list): The positional arguments to read. Defaults to 50 | `sys.argv` to use CLI arguments. 51 | stream (~io.IOBase): A file where to write error messages. 52 | Leave to `None` to use the `~coloredlogs.StandardErrorHandler` 53 | for logs, and `sys.stderr` for error messages. 54 | 55 | Returns: 56 | int: An error code, or 0 if the program executed successfully. 57 | """ 58 | 59 | _print = functools.partial(print, file=stream or sys.stderr) 60 | 61 | # Parse command line arguments 62 | try: 63 | args = docopt.docopt( 64 | HELP, argv, version='instalooter {}'.format(__version__)) 65 | except docopt.DocoptExit as de: 66 | _print(de) 67 | return 1 68 | 69 | # Print usage and exit if required (docopt does not do this !) 70 | if args['--usage']: 71 | _print(USAGE) 72 | return 0 73 | 74 | # Set the loggers up with the requested logging level 75 | level = "ERROR" if args['--quiet'] else args.get("--loglevel", "INFO") 76 | for logger_ in (logger, login_logger, batch_logger): 77 | coloredlogs.install( 78 | level=int(level) if level.isdigit() else level, 79 | stream=stream, 80 | logger=logger_) 81 | 82 | # Check the requested logging level 83 | if args['-W'] not in WARNING_ACTIONS: 84 | _print("Unknown warning action:", args['-W']) 85 | _print(" available actions:", ', '.join(WARNING_ACTIONS)) 86 | return 1 87 | 88 | with warnings.catch_warnings(): 89 | warnings.simplefilter(args['-W']) 90 | 91 | try: 92 | # Run in batch mode 93 | if args['batch']: 94 | # Load the batch configuration from the given file 95 | with open(args['']) as batch_file: 96 | batch_runner = BatchRunner(batch_file, args) 97 | # Run the batch 98 | batch_runner.run_all() 99 | return 0 100 | 101 | # Login if requested 102 | if args['login']: 103 | try: 104 | if not args['--username']: 105 | args['--username'] = six.moves.input('Username: ') 106 | login(args) 107 | return 0 108 | except ValueError as ve: 109 | logger.error("%s", ve) 110 | if args["--traceback"]: 111 | traceback.print_exc() 112 | return 1 113 | 114 | # Logout if requested 115 | if args['logout']: 116 | if InstaLooter._cachefs().exists(InstaLooter._COOKIE_FILE): 117 | InstaLooter._logout() 118 | logger.success('Logged out.') 119 | else: 120 | warnings.warn('Cookie file not found.') 121 | return 0 122 | 123 | # Normal download mode: 124 | if args['user']: 125 | looter_cls = ProfileLooter 126 | target = args[''] 127 | elif args['hashtag']: 128 | looter_cls = HashtagLooter 129 | target = args[''] 130 | elif args['post']: 131 | looter_cls = PostLooter 132 | target = args[''] 133 | else: 134 | raise NotImplementedError("TODO") 135 | 136 | # Instantiate the looter 137 | looter = looter_cls( 138 | target, 139 | add_metadata=args['--add-metadata'], 140 | get_videos=args['--get-videos'], 141 | videos_only=args['--videos-only'], 142 | jobs=int(args['--jobs']) if args['--jobs'] is not None else 16, 143 | template=args['--template'], 144 | dump_json=args['--dump-json'], 145 | dump_only=args['--dump-only'], 146 | extended_dump=args['--extended-dump'] 147 | ) 148 | 149 | # Attempt to login and extract the timeframe 150 | if args['--username']: 151 | login(args) 152 | if args['--num-to-dl']: 153 | args['--num-to-dl'] = int(args['--num-to-dl']) 154 | try: 155 | if args['--time'] is not None: 156 | args['--time'] = get_times_from_cli(args['--time']) 157 | except ValueError as ve: 158 | _print("invalid format for --time parameter:", args["--time"]) 159 | _print(" (format is [D]:[D] where D is an ISO 8601 date)") 160 | return 1 161 | 162 | logger.debug("Opening destination filesystem") 163 | dest_url = args.get('') or os.getcwd() 164 | dest_fs = fs.open_fs(dest_url, create=True) 165 | 166 | logger.notice("Starting download of `%s`", target) 167 | n = looter.download( 168 | destination=dest_fs, 169 | media_count=args['--num-to-dl'], 170 | timeframe=args['--time'], 171 | new_only=args['--new'], 172 | pgpbar_cls=None if args['--quiet'] else TqdmProgressBar, 173 | dlpbar_cls=None if args['--quiet'] else TqdmProgressBar) 174 | if n > 1: 175 | logger.success("Downloaded %i posts.", n) 176 | elif n == 1: 177 | logger.success("Downloaded %i post.", n) 178 | 179 | except (Exception, KeyboardInterrupt) as e: 180 | from .threadutils import threads_force_join, threads_count 181 | # Show error traceback if any 182 | if not isinstance(e, KeyboardInterrupt): 183 | logger.critical("%s", e) 184 | if args["--traceback"]: 185 | traceback.print_exc() 186 | else: 187 | logger.critical("Interrupted") 188 | # Close remaining threads spawned by InstaLooter.download 189 | count = threads_count() 190 | if count: 191 | logger.notice("Terminating %i remaining workers...", count) 192 | threads_force_join() 193 | # Return the error number if any 194 | errno = e.errno if hasattr(e, "errno") else None 195 | return errno if errno is not None else 1 196 | 197 | else: 198 | return 0 199 | 200 | finally: 201 | logger.debug("Closing destination filesystem") 202 | try: 203 | dest_fs.close() 204 | except Exception: 205 | pass 206 | -------------------------------------------------------------------------------- /instalooter/cli/constants.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import textwrap 6 | 7 | 8 | WARNING_ACTIONS = {'error', 'ignore', 'always', 'default', 'module', 'once'} 9 | 10 | 11 | HELP = textwrap.dedent( 12 | """ 13 | instalooter - Another API-less Instagram media downloader 14 | 15 | Usage: 16 | instalooter (-h | --help | --version | --usage) 17 | instalooter batch [] [options] 18 | instalooter hashtag [] [options] 19 | instalooter user [] [options] 20 | instalooter post [] [options] 21 | instalooter logout 22 | instalooter login [options] 23 | 24 | Arguments: 25 | The username of the profile to download 26 | pictures and optionally videos from. 27 | A hashtag to download pictures and 28 | optionally videos from. 29 | Either the url or the code of a post to 30 | download the picture or video from. 31 | The directory in which to download files. 32 | Can actually be a Pyfilesystem2 FS URL 33 | (see http://pyfilesystem2.rtfd.io). 34 | The path to the batch file containing 35 | batch download instructions (see the 36 | online documentation). 37 | 38 | Options - Credentials: 39 | -u USER, --username USER The username to connect to Instagram with. 40 | -p PASS, --password PASS The password to connect to Instagram with 41 | (will be asked in the shell if the 42 | `--username` option was given without 43 | the corresponding `--password`). 44 | 45 | Options - Files: 46 | -n NUM, --num-to-dl NUM Maximum number of new files to download 47 | -j JOBS, --jobs JOBS Number of parallel threads to use to 48 | download files. [default: 16] 49 | -T TMPL, --template TMPL A filename template to use to write the 50 | files (see *Template*). [default: {id}] 51 | -v, --get-videos Get videos as well as photos. 52 | -V, --videos-only Get videos only. Implies `--get-videos`. 53 | -N, --new Only look for files newer than the ones 54 | in the destination directory (faster). 55 | -t TIME, --time TIME The time limit within which to download 56 | pictures and video (see *Time*). 57 | 58 | Options - Metadata: 59 | -m, --add-metadata Add date and caption metadata to downloaded 60 | pictures (requires PIL/Pillow and piexif). 61 | -d, --dump-json Save metadata to a JSON file next to 62 | downloaded videos/pictures. 63 | -D, --dump-only Save only the metadata and no video/picture. 64 | Implies `--dump-json`. 65 | -e, --extended-dump Always dump the maximum amount of extracted 66 | information, at the cost of more time. 67 | 68 | Options - Miscellaneous: 69 | -l LEVEL, --loglevel LEVEL The level of log to produce, as an 70 | integer or a level name. [default: INFO] 71 | -q, --quiet Do not display any output or progress 72 | bar. Implies `--loglevel ERROR`. 73 | -h, --help Display this message and quit. 74 | --version Show program version and quit. 75 | --traceback Print error traceback if any (use when 76 | reporting an issue on GitHub, please!). 77 | -W WARNINGCTL Change warning behaviour (same as the 78 | Python `-W` flag). [default: default] 79 | 80 | Template: 81 | The default filename of the pictures and videos on Instagram doesn't 82 | show anything about the file you just downloaded. But using the -T 83 | argument allows you to give instalooter a filename template, using the 84 | the following format with brackets-enclosed ({}) variable names among: 85 | - ``id``*² and ``code``*² of the instagram id of the media 86 | - ``ownerid``*, ``username`` and ``fullname`` of the owner 87 | - ``datetime``*: the date and time of the post (YYYY-MM-DD hh:mm:ss) 88 | - ``date``*: the date of the post (YYYY-MM-DD) 89 | - ``width``* and ``height``* 90 | - ``likescount``* and ``commentscount``* 91 | 92 | ²: use at least one of these to make sure the generated file name 93 | is unique (``datetime`` is not unique anymore since multiple posts). 94 | 95 | *: use these only to quicken download, since fetching the others may 96 | take a tad longer (in particular in hashtag download mode). 97 | 98 | You are however to make sure that the generated filename is unique, 99 | so you should use at least id, code or datetime somewhere. 100 | Examples of acceptable values: 101 | - {username}.{datetime}.{code} 102 | - {username}-{likescount}-{width}x{height}.{id} 103 | 104 | Time: 105 | The --time parameter can be given either a combination of start and stop 106 | date in ISO format (e.g. 2016-12-21:2016-12-18, 2015-03-07:, :2016-08-02) 107 | or a special value among: "thisday", "thisweek", "thismonth", "thisyear". 108 | 109 | Edges are included in the time frame, so if using the following value: 110 | `--time 2016-05-10:2016-04-03`, then all medias will be downloaded 111 | including the ones posted the 10th of May 2016 and the 3rd of April 2016. 112 | 113 | See more at http://instalooter.readthedocs.io/en/latest/usage.html 114 | 115 | """ 116 | ) 117 | 118 | 119 | USAGE = next(s for s in HELP.split("\n\n") if s.startswith("Usage")) 120 | -------------------------------------------------------------------------------- /instalooter/cli/login.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import getpass 6 | import logging 7 | 8 | import verboselogs 9 | 10 | from ..looters import InstaLooter 11 | 12 | 13 | logger = verboselogs.VerboseLogger(__name__) 14 | 15 | 16 | def login(args): 17 | if args['--username']: 18 | username = args['--username'] 19 | if not InstaLooter._logged_in(): 20 | password = args['--password'] or getpass.getpass() 21 | InstaLooter._login(username, password) 22 | if not args['--quiet']: 23 | logger.success('Logged in.') 24 | elif not args['--quiet']: 25 | logger.success("Already logged in.") 26 | -------------------------------------------------------------------------------- /instalooter/cli/logutils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | import functools 8 | import logging 9 | import warnings 10 | import typing 11 | 12 | if typing.TYPE_CHECKING: 13 | from typing import Callable 14 | 15 | 16 | def warn_logging(logger): 17 | # type: (logging.Logger) -> Callable 18 | """Create a `showwarning` function that uses the given logger. 19 | 20 | Arguments: 21 | logger (~logging.Logger): the logger to use. 22 | 23 | Returns: 24 | function: a function that can be used as the `warnings.showwarning` 25 | callback. 26 | 27 | """ 28 | def showwarning(message, category, filename, lineno, file=None, line=None): 29 | logger.warning(message) 30 | return showwarning 31 | 32 | 33 | def wrap_warnings(logger): 34 | """Have the function patch `warnings.showwarning` with the given logger. 35 | 36 | Arguments: 37 | logger (~logging.logger): the logger to wrap warnings with when 38 | the decorated function is called. 39 | 40 | Returns: 41 | `function`: a decorator function. 42 | 43 | """ 44 | def decorator(func): 45 | @functools.wraps(func) 46 | def new_func(*args, **kwargs): 47 | showwarning = warnings.showwarning 48 | warnings.showwarning = warn_logging(logger) 49 | try: 50 | return func(*args, **kwargs) 51 | finally: 52 | warnings.showwarning = showwarning 53 | return new_func 54 | return decorator 55 | -------------------------------------------------------------------------------- /instalooter/cli/threadutils.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | 4 | import threading 5 | 6 | from ..worker import InstaDownloader 7 | 8 | 9 | def threads_force_join(): 10 | for t in threading.enumerate(): 11 | if isinstance(t, InstaDownloader): 12 | t.terminate() 13 | t.join() 14 | 15 | 16 | def threads_count(): 17 | return sum(isinstance(t, InstaDownloader) for t in threading.enumerate()) 18 | -------------------------------------------------------------------------------- /instalooter/cli/time.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import datetime 6 | 7 | import dateutil.relativedelta 8 | 9 | 10 | def date_from_isoformat(isoformat_date): 11 | """Convert an ISO-8601 date into a `datetime.date` object. 12 | 13 | Argument: 14 | isoformat_date (str): a date in ISO-8601 format (YYYY-MM-DD) 15 | 16 | Returns: 17 | ~datetime.date: the object corresponding to the given ISO date. 18 | 19 | Raises: 20 | ValueError: when the date could not be converted successfully. 21 | 22 | See Also: 23 | `ISO-8601 specification `_. 24 | """ 25 | year, month, day = isoformat_date.split('-') 26 | return datetime.date(int(year), int(month), int(day)) 27 | 28 | 29 | def get_times_from_cli(cli_token): 30 | """Convert a CLI token to a datetime tuple. 31 | 32 | Argument: 33 | cli_token (str): an isoformat datetime token ([ISO date]:[ISO date]) 34 | or a special value among: 35 | * thisday 36 | * thisweek 37 | * thismonth 38 | * thisyear 39 | 40 | Returns: 41 | tuple: a datetime.date objects couple, where the first item is 42 | the start of a time frame and the second item the end of the 43 | time frame. Both elements can also be None, if no date was 44 | provided. 45 | 46 | Raises: 47 | ValueError: when the CLI token is not in the right format 48 | (no colon in the token, not one of the special values, dates 49 | are not in proper ISO-8601 format.) 50 | 51 | See Also: 52 | `ISO-8601 specification `_. 53 | 54 | """ 55 | today = datetime.date.today() 56 | 57 | if cli_token=="thisday": 58 | return today, today 59 | elif cli_token=="thisweek": 60 | return today, today - dateutil.relativedelta.relativedelta(days=7) 61 | elif cli_token=="thismonth": 62 | return today, today - dateutil.relativedelta.relativedelta(months=1) 63 | elif cli_token=="thisyear": 64 | return today, today - dateutil.relativedelta.relativedelta(years=1) 65 | else: 66 | try: 67 | start_date, stop_date = cli_token.split(':') 68 | except ValueError: 69 | raise ValueError("--time parameter must contain a colon (:)") 70 | if not start_date and not stop_date: # ':', no start date, no stop date 71 | return None, None 72 | try: 73 | start_date = date_from_isoformat(start_date) if start_date else None 74 | stop_date = date_from_isoformat(stop_date) if stop_date else None 75 | except ValueError: 76 | raise ValueError("--time parameter was not provided ISO formatted dates") 77 | if start_date is not None and stop_date is not None: 78 | return max(start_date, stop_date), min(start_date, stop_date) 79 | else: 80 | return stop_date, start_date 81 | -------------------------------------------------------------------------------- /instalooter/looters.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Instagram looters implementations. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import abc 8 | import atexit 9 | import copy 10 | import functools 11 | import random 12 | import re 13 | import threading 14 | import time 15 | import typing 16 | import warnings 17 | 18 | import fs 19 | import six 20 | from requests import Session 21 | from six.moves.queue import Queue 22 | from six.moves.http_cookiejar import FileCookieJar, LWPCookieJar 23 | 24 | from . import __author__, __name__ as __appname__, __version__ 25 | from ._impl import length_hint, json 26 | from ._uadetect import get_user_agent 27 | from ._utils import NameGenerator, get_shared_data, get_additional_data 28 | from .medias import TimedMediasIterator, MediasIterator 29 | from .pages import ProfileIterator, HashtagIterator 30 | from .pbar import ProgressBar 31 | from .worker import InstaDownloader 32 | 33 | if typing.TYPE_CHECKING: 34 | from datetime import datetime 35 | from typing import ( 36 | Any, Callable, Dict, Iterator, Iterable, List, 37 | Optional, Text, Tuple, Type, Union) 38 | from fs.base import FS 39 | from six.moves.http_cookiejar import CookieJar 40 | _T = typing.TypeVar("_T") 41 | _Timeframe = Tuple[Optional[datetime], Optional[datetime]] 42 | 43 | 44 | __all__ = [ 45 | "InstaLooter", 46 | "ProfileLooter", 47 | "HashtagLooter", 48 | "PostLooter", 49 | ] 50 | 51 | 52 | @six.add_metaclass(abc.ABCMeta) 53 | class InstaLooter(object): 54 | """A brutal Instagram looter that raids without API tokens. 55 | """ 56 | 57 | @classmethod 58 | def _cachefs(cls): 59 | # type: () -> FS 60 | """Get the a persistent filesystem to store the program cache. 61 | """ 62 | url = "usercache://{}:{}:{}".format(__appname__, __author__, __version__) 63 | return fs.open_fs(url, create=True) 64 | 65 | @classmethod 66 | def _user_agent(cls): 67 | # type: () -> Text 68 | """Get the user agent of the default web browser on the local machine. 69 | """ 70 | cache = cls._cachefs() 71 | if not cache.isfile(cls._USERAGENT_FILE): 72 | ua = get_user_agent(cache=cache.getsyspath(cls._USERAGENT_FILE)) 73 | if ua is None: 74 | warnings.warn("Could not detect user agent, using default") 75 | ua = "Mozilla/5.0 (X11; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0" 76 | with cache.open("user-agent.txt", "w") as f: 77 | f.write(ua) 78 | with cache.open(cls._USERAGENT_FILE) as f: 79 | return f.read() 80 | 81 | # str: The name of the user agent file in the cache filesystem 82 | _USERAGENT_FILE = "user-agent.txt" 83 | 84 | # str: The name of the cookie file in the cache filesystem 85 | _COOKIE_FILE = "cookies.txt" 86 | 87 | @classmethod 88 | def _init_session(cls, session=None): 89 | # type: (Optional[Session]) -> Session 90 | """Initialise the given session and load class cookies to its jar. 91 | 92 | Arguments: 93 | session (~requests.Session, optional): a `requests` 94 | session, or `None` to create a new one. 95 | 96 | Returns: 97 | ~requests.Session: an initialised session instance. 98 | 99 | """ 100 | session = session or Session() 101 | # Load cookies 102 | path = cls._cachefs().getsyspath(cls._COOKIE_FILE) 103 | session.cookies = LWPCookieJar(path) # type: ignore 104 | try: 105 | typing.cast(FileCookieJar, session.cookies).load() 106 | except IOError: 107 | pass 108 | session.cookies.clear_expired_cookies() # type: ignore 109 | return session 110 | 111 | @classmethod 112 | def _login(cls, username, password, session=None): 113 | # type: (str, str, Optional[Session]) -> None 114 | """Login with provided credentials and session. 115 | 116 | Arguments: 117 | username (str): the username to log in with. 118 | password (str): the password to log in with. 119 | session (~requests.Session, optional): the session to use, 120 | or `None` to create a new session. 121 | 122 | Note: 123 | Code taken from LevPasha/instabot.py 124 | 125 | """ 126 | session = cls._init_session(session) 127 | headers = copy.deepcopy(session.headers) 128 | homepage = "https://www.instagram.com/" 129 | login_url = "https://www.instagram.com/accounts/login/ajax/" 130 | enc_password = "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(time.time(), password) 131 | data = {'username': username, 'enc_password': enc_password} 132 | 133 | try: 134 | session.headers.update({ 135 | 'Accept-Encoding': 'gzip, deflate', 136 | 'Accept-Language': 'en-US,en;q=0.8', 137 | 'Connection': 'keep-alive', 138 | 'Content-Length': '0', 139 | 'Host': 'www.instagram.com', 140 | 'Origin': 'https://www.instagram.com', 141 | 'Referer': 'https://www.instagram.com', 142 | 'User-Agent': cls._user_agent(), 143 | 'X-Instagram-AJAX': '1', 144 | 'X-Requested-With': 'XMLHttpRequest' 145 | }) 146 | 147 | with session.get(homepage) as res: 148 | token = get_shared_data(res.text)['config']['csrf_token'] 149 | session.headers.update({'X-CSRFToken': token}) 150 | 151 | time.sleep(5 * random.random()) # nosec 152 | with session.post(login_url, data, allow_redirects=True) as login: 153 | token = next(c.value for c in login.cookies if c.name == 'csrftoken') 154 | session.headers.update({'X-CSRFToken': token}) 155 | if not login.ok: 156 | raise SystemError("Login error: check your connection") 157 | data = json.loads(login.text) 158 | if not data.get('authenticated', False): 159 | raise ValueError('Login error: check your login data') 160 | 161 | time.sleep(5 * random.random()) # nosec 162 | with session.get(homepage) as res: 163 | if res.text.find(username) == -1: 164 | raise ValueError('Login error: check your login data') 165 | try: 166 | typing.cast(FileCookieJar, session.cookies).save() 167 | except IOError: 168 | pass 169 | 170 | finally: 171 | session.headers = headers 172 | 173 | @classmethod 174 | def _logout(cls, session=None): 175 | # type: (Optional[Session]) -> None 176 | """Log out from current session. 177 | 178 | Also deletes the eventual cookie file left in the cache directory, 179 | to prevent new connections from using the old session ID. 180 | 181 | Arguments: 182 | session (~requests.Session): the session to use, or `None` 183 | to create a new session. 184 | 185 | Note: 186 | Code taken from LevPasha/instabot.py 187 | 188 | """ 189 | session = cls._init_session(session) 190 | sessionid = cls._sessionid(session) 191 | if sessionid is not None: 192 | url = "https://www.instagram.com/accounts/logout/" 193 | session.post(url, data={"csrfmiddlewaretoken": sessionid}) 194 | 195 | cache = cls._cachefs() 196 | if cache.exists(cls._COOKIE_FILE): 197 | cache.remove(cls._COOKIE_FILE) 198 | 199 | @classmethod 200 | def _logged_in(cls, session=None): 201 | # type: (Optional[Session]) -> bool 202 | """Check if there is an open Instagram session. 203 | 204 | Arguments: 205 | session (~requests.Session): the session to use, or `None` 206 | to create a new session. 207 | 208 | Returns: 209 | bool: `True` if there's an active session, `False` otherwise. 210 | 211 | """ 212 | return cls._sessionid(session) is not None 213 | 214 | @classmethod 215 | def _sessionid(cls, session=None): 216 | # type: (Optional[Session]) -> Optional[Text] 217 | """Get the ID of the currently opened Instagram session. 218 | 219 | Arguments: 220 | session (~requests.Session): the session to use, or `None` 221 | to create a new session. 222 | 223 | Returns: 224 | str or None: the session ID, if any, or `None`. 225 | 226 | """ 227 | _session = cls._init_session(session) 228 | _cookies = typing.cast(FileCookieJar, _session.cookies) 229 | return next((ck.value for ck in _cookies 230 | if ck.domain == ".instagram.com" 231 | and ck.name == "ds_user_id" 232 | and ck.path == "/"), None) 233 | 234 | def __init__(self, 235 | add_metadata=False, # type: bool 236 | get_videos=False, # type: bool 237 | videos_only=False, # type: bool 238 | jobs=16, # type: int 239 | template="{id}", # type: Text 240 | dump_json=False, # type: bool 241 | dump_only=False, # type: bool 242 | extended_dump=False, # type: bool 243 | session=None # type: Optional[Session] 244 | ): 245 | # type: (...) -> None 246 | """Create a new looter instance. 247 | 248 | Arguments: 249 | add_metadata (bool): Add date and comment metadata to 250 | the downloaded pictures. 251 | get_videos (bool): Also get the videos from the given target. 252 | videos_only (bool): Only download videos (implies 253 | ``get_videos=True``). 254 | jobs (bool): the number of parallel threads to use to 255 | download media (12 or more is advised to have a true parallel 256 | download of media files). 257 | template (str): a filename format, in Python new-style-formatting 258 | format. See the the :ref:`Template` page of the documentation 259 | for available keys. 260 | dump_json (bool): Save each resource metadata to a 261 | JSON file next to the actual image/video. 262 | dump_only (bool): Only save metadata and discard the actual 263 | resource. 264 | extended_dump (bool): Attempt to fetch as much metadata as 265 | possible, at the cost of more time. Set to `True` if, for 266 | instance, you always want the top comments to be downloaded 267 | in the dump. 268 | session (~requests.Session or None): a `requests` session, 269 | or `None` to create a new one. 270 | 271 | """ 272 | self.add_metadata = add_metadata 273 | self.get_videos = get_videos or videos_only 274 | self.videos_only = videos_only 275 | self.jobs = jobs 276 | self.namegen = NameGenerator(template) 277 | self.dump_only = dump_only 278 | self.dump_json = dump_json or dump_only 279 | self.extended_dump = extended_dump 280 | self.session = self._init_session(session) 281 | atexit.register(self.session.close) 282 | 283 | # Set the default webbrowser user agent 284 | if self.session.headers['User-Agent'].startswith('python-requests'): 285 | self.session.headers['User-Agent'] = self._user_agent() 286 | 287 | # Get CSRFToken and RHX 288 | with self.session.get('https://www.instagram.com/') as res: 289 | token = get_shared_data(res.text)['config']['csrf_token'] 290 | self.session.headers['X-CSRFToken'] = token 291 | self.rhx = get_shared_data(res.text).get('rhx_gis', '') 292 | 293 | @abc.abstractmethod 294 | def pages(self): 295 | # type: () -> Iterator[Dict[Text, Any]] 296 | """Obtain an iterator over Instagram post pages. 297 | 298 | Returns: 299 | PageIterator: an iterator over the instagram post pages. 300 | 301 | """ 302 | return NotImplemented 303 | 304 | def _medias(self, 305 | pages_iterator, # type: Iterable[Dict[Text, Any]] 306 | timeframe=None # type: Optional[_Timeframe] 307 | ): 308 | # type: (...) -> Iterator[Dict[Text, Any]] 309 | """Obtain an iterator over the medias of the given pages iterator. 310 | 311 | Arguments: 312 | pages_iterator (Iterator): an iterator over the Instagram 313 | pages, returned by `InstaLooter.pages` 314 | 315 | Returns: 316 | MediasIterator: an iterator over the medias in every pages. 317 | 318 | """ 319 | if timeframe is not None: 320 | return TimedMediasIterator(pages_iterator, timeframe) 321 | return MediasIterator(pages_iterator) 322 | 323 | def medias(self, timeframe=None): 324 | # type: (Optional[_Timeframe]) -> Iterator[Dict[Text, Any]] 325 | """Obtain an iterator over the Instagram medias. 326 | 327 | Wraps the iterator returned by `InstaLooter.pages` to seamlessly 328 | iterate over the medias of all the pages. 329 | 330 | Returns: 331 | MediasIterator: an iterator over the medias in every pages. 332 | 333 | """ 334 | return self._medias(self.pages(), timeframe) 335 | 336 | def get_post_info(self, code): 337 | # type: (str) -> dict 338 | """Get media information from a given post code. 339 | 340 | Arguments: 341 | code (str): the code of the post (can be obtained either 342 | from the ``shortcode`` attribute of media dictionaries, or 343 | from a post URL: ``https://www.instagram.com/p//``) 344 | 345 | Returns: 346 | dict: a media dictionaries, in the format used by Instagram. 347 | 348 | """ 349 | url = "https://www.instagram.com/p/{}/".format(code) 350 | with self.session.get(url) as res: 351 | data = get_shared_data(res.text) 352 | if 'graphql' in data['entry_data']['PostPage'][0]: 353 | return data['entry_data']['PostPage'][0]['graphql']['shortcode_media'] 354 | data = get_additional_data(res.text) 355 | return data['graphql']['shortcode_media'] 356 | 357 | def download_pictures(self, 358 | destination, # type: Union[str, fs.base.FS] 359 | media_count=None, # type: Optional[int] 360 | timeframe=None, # type: Optional[_Timeframe] 361 | new_only=False, # type: bool 362 | pgpbar_cls=None, # type: Optional[Type[ProgressBar]] 363 | dlpbar_cls=None # type: Optional[Type[ProgressBar]] 364 | ): 365 | # type: (...) -> int 366 | """Download all the pictures to the provided destination. 367 | 368 | Actually a shortcut for `.download` with ``condition`` set 369 | to accept only images. 370 | 371 | """ 372 | return self.download( 373 | destination, 374 | condition=lambda media: not media["is_video"], 375 | media_count=media_count, 376 | timeframe=timeframe, 377 | new_only=new_only, 378 | pgpbar_cls=pgpbar_cls, 379 | dlpbar_cls=dlpbar_cls, 380 | ) 381 | 382 | def download_videos(self, 383 | destination, # type: Union[str, fs.base.FS] 384 | media_count=None, # type: Optional[int] 385 | timeframe=None, # type: Optional[_Timeframe] 386 | new_only=False, # type: bool 387 | pgpbar_cls=None, # type: Optional[Type[ProgressBar]] 388 | dlpbar_cls=None, # type: Optional[Type[ProgressBar]] 389 | ): 390 | # type: (...) -> int 391 | """Download all videos to the provided destination. 392 | 393 | Actually a shortcut for `.download` with ``condition`` set 394 | to accept only videos. 395 | 396 | """ 397 | return self.download( 398 | destination, 399 | condition=lambda media: media["is_video"], 400 | media_count=media_count, 401 | timeframe=timeframe, 402 | new_only=new_only, 403 | pgpbar_cls=pgpbar_cls, 404 | dlpbar_cls=dlpbar_cls, 405 | ) 406 | 407 | def download(self, 408 | destination, # type: Union[str, fs.base.FS] 409 | condition=None, # type: Optional[Callable[[dict], bool]] 410 | media_count=None, # type: Optional[int] 411 | timeframe=None, # type: Optional[_Timeframe] 412 | new_only=False, # type: bool 413 | pgpbar_cls=None, # type: Optional[Type[ProgressBar]] 414 | dlpbar_cls=None, # type: Optional[Type[ProgressBar]] 415 | ): 416 | # type: (...) -> int 417 | """Download all medias passing ``condition`` to destination. 418 | 419 | Arguments: 420 | destination (~fs.base.FS or str): the filesystem where to 421 | store the downloaded files, as a filesystem instance or 422 | FS URL. 423 | condition (function): the condition to filter the 424 | medias with. If `None` is given, a function is created using 425 | the ``get_videos`` and ``videos_only`` passed at object 426 | initialisation. 427 | media_count (int or None): the maximum number of medias 428 | to download. Leave to ``None`` to download everything from 429 | the target. *Note that more files can be downloaded, since 430 | a post with multiple images/videos is considered to be a 431 | single media*. 432 | timeframe (tuple or None): a tuple of two `~datetime.datetime` 433 | objects to enforce a time frame (the first item must be 434 | more recent). Leave to `None` to ignore times. 435 | new_only (bool): stop media discovery when already 436 | downloaded medias are encountered. 437 | pgpbar_cls (type or None): an optional `~.pbar.ProgressBar` 438 | subclass to use to display page scraping progress. 439 | dlpbar_cls (type or None): an optional `~.pbar.ProgressBar` 440 | subclass to use to display file download progress. 441 | 442 | Returns: 443 | int: the number of queued medias. 444 | 445 | May not be equal to the number of downloaded medias if some 446 | errors occurred during background download. 447 | 448 | """ 449 | # Open the destination filesystem 450 | destination, close_destination = self._init_destfs(destination) 451 | 452 | # Create an iterator over the pages with an optional progress bar 453 | pages_iterator = self.pages() # type: Iterable[Dict[Text, Any]] 454 | pages_iterator = pgpbar = self._init_pbar(pages_iterator, pgpbar_cls) 455 | 456 | # Create an iterator over the medias 457 | medias_iterator = self._medias(iter(pages_iterator), timeframe) 458 | 459 | # Create the media download bar from a dummy iterator 460 | dlpbar = self._init_pbar( 461 | six.moves.range(length_hint(medias_iterator)), dlpbar_cls) 462 | 463 | # Start a group of workers 464 | workers, queue = self._init_workers( 465 | dlpbar if dlpbar_cls is not None else None, destination) 466 | 467 | # Make sure exiting the main thread will shutdown workers 468 | atexit.register(self._shutdown_workers, workers) 469 | 470 | # Queue all medias 471 | medias_queued = self._fill_media_queue( 472 | queue, destination, medias_iterator, media_count, 473 | new_only, condition) 474 | 475 | # Once queuing the medias is fininished, finish the page progress bar 476 | # and set a new maximum on the download progress bar. 477 | if pgpbar_cls is not None: 478 | pgpbar.finish() # type: ignore 479 | if dlpbar_cls is not None: 480 | dlpbar.set_maximum(medias_queued) # type: ignore 481 | 482 | # If no medias were queued, issue a warning 483 | # TODO: refine warning depending on download parameters 484 | if medias_queued == 0: 485 | warnings.warn("No medias found.") 486 | 487 | # Add poison pills to the queue and wait for workers to finish 488 | self._poison_workers(workers, queue) 489 | self._join_workers(workers, queue) 490 | 491 | # Once downloading is finished, finish the download progress bar 492 | # and close the destination if needed. 493 | if dlpbar_cls is not None: 494 | dlpbar.finish() # type: ignore 495 | if close_destination: 496 | destination.close() 497 | 498 | return medias_queued 499 | 500 | def login(self, username, password): 501 | # type: (str, str) -> None 502 | """Log the instance in using the given credentials. 503 | 504 | Arguments: 505 | username (str): the username to log in with. 506 | password (str): the password to log in with. 507 | 508 | """ 509 | self._login(username, password, session=self.session) 510 | 511 | def logout(self): 512 | # type: () -> None 513 | """Log the instance out from the currently opened session. 514 | """ 515 | self._logout(session=self.session) 516 | 517 | def logged_in(self): 518 | # type: () -> bool 519 | """Check if there's an open Instagram session. 520 | """ 521 | return self._logged_in(self.session) 522 | 523 | def _init_pbar(self, 524 | it, # type: Iterable[_T] 525 | pbar_cls=None, # type: Optional[Type[ProgressBar]] 526 | ): 527 | # type: (...) -> Iterable[_T] 528 | """Wrap an iterable within a `ProgressBar`. 529 | 530 | Arguments: 531 | it (~collections.Iterable): an iterable to wrap. 532 | pgpbar_cls (type or None): an optional `ProgressBar` subclass 533 | to use, or `None` to avoid using a progress bar. 534 | 535 | Returns: 536 | ~collections.Iterable: the wrapped iterable. 537 | 538 | """ 539 | if pbar_cls is not None: 540 | if not issubclass(pbar_cls, ProgressBar): 541 | raise TypeError("pbar must implement the ProgressBar interface !") 542 | maximum = length_hint(it) 543 | it = pbar = pbar_cls(it) 544 | pbar.set_maximum(maximum) 545 | pbar.set_lock(threading.RLock()) 546 | return it 547 | 548 | def _init_destfs(self, destination, create=True): 549 | # type: (Union[str, fs.base.FS], bool) -> Tuple[fs.base.FS, bool] 550 | """Open a filesystem either from a FS URL or filesystem instance. 551 | 552 | Arguments: 553 | destination (~fs.base.FS or str): the destination filesystem 554 | to open, as a filesystem instance or FS URL. 555 | create (bool): whether or not to create a new 556 | filesystem if it does not exist. 557 | 558 | Returns: 559 | (~fs.base.FS, bool): the open FS, and whether to close it. 560 | 561 | """ 562 | close_destination = False 563 | if isinstance(destination, six.binary_type): 564 | destination = destination.decode('utf-8') 565 | if isinstance(destination, six.text_type): 566 | destination = fs.open_fs(destination, create=create) 567 | close_destination = True 568 | if not isinstance(destination, fs.base.FS): 569 | raise TypeError(" must be a FS URL or FS instance.") 570 | return destination, close_destination 571 | 572 | def _fill_media_queue(self, 573 | queue, # type: Queue 574 | destination, # type: fs.base.FS 575 | medias_iter, # type: Iterable[Any] 576 | media_count=None, # type: Optional[int] 577 | new_only=False, # type: bool 578 | condition=None, # type: Optional[Callable[[dict], bool]] 579 | ): 580 | # type: (...) -> int 581 | """Fill the download queue with medias from the provided iterator. 582 | 583 | Arguments: 584 | queue (~queue.Queue): the download queue to fill. 585 | destination (~fs.base.FS): the filesystem where to download 586 | the files. 587 | medias_iterator (~collections.Iterable): an iterable over the 588 | Instagram medias to download. 589 | media_count (int or None): the maximum number of new medias to 590 | download, or ``None`` to download all discoverable medias. 591 | new_only (bool): stop media discovery when a media that was 592 | already downloaded is encountered. 593 | condition (function or None): the condition to filter the medias 594 | with. If `None` is given, a function is created using the 595 | ``get_videos`` and ``videos_only`` passed at object 596 | initialisation. 597 | 598 | Returns: 599 | int: the number of queued medias. 600 | 601 | May not be equal to the number of downloaded medias if some 602 | errors occurred during downloads. 603 | 604 | """ 605 | # Create a condition from parameters if needed 606 | if condition is not None: 607 | _condition = condition # type: Callable[[dict], bool] 608 | else: 609 | if self.videos_only: 610 | def _condition(media): return media['is_video'] 611 | elif not self.get_videos: 612 | def _condition(media): return not media['is_video'] 613 | else: 614 | def _condition(media): return True 615 | 616 | # Queue all media filling the condition 617 | medias_queued = 0 618 | for media in six.moves.filter(_condition, medias_iter): 619 | 620 | # Check if the whole post info is required 621 | if self.namegen.needs_extended(media) or media["__typename"] != "GraphImage": 622 | media = self.get_post_info(media['shortcode']) 623 | 624 | # Check that sidecar children fit the condition 625 | if media['__typename'] == "GraphSidecar": 626 | # Check that each node fits the condition 627 | for sidecar in media['edge_sidecar_to_children']['edges'][:]: 628 | if not _condition(sidecar['node']): 629 | media['edge_sidecar_to_children']['edges'].remove(sidecar) 630 | 631 | # Check that the nodelist is not depleted 632 | if not media['edge_sidecar_to_children']['edges']: 633 | continue 634 | 635 | # Check that the file does not exist 636 | # FIXME: not working well with sidecar 637 | if new_only and destination.exists(self.namegen.file(media)): 638 | break 639 | 640 | # Put the medias in the queue 641 | queue.put(media) 642 | medias_queued += 1 643 | 644 | if media_count is not None and medias_queued >= media_count: 645 | break 646 | 647 | return medias_queued 648 | 649 | # WORKERS UTILS 650 | 651 | def _init_workers(self, 652 | pbar, # type: Union[ProgressBar, Iterable, None] 653 | destination, # type: fs.base.FS 654 | ): 655 | # type: (...) -> Tuple[List[InstaDownloader], Queue] 656 | 657 | workers = [] # type: List[InstaDownloader] 658 | queue = Queue() # type: Queue 659 | 660 | for _ in six.moves.range(self.jobs): 661 | worker = InstaDownloader( 662 | queue=queue, 663 | destination=destination, 664 | namegen=self.namegen, 665 | add_metadata=self.add_metadata, 666 | dump_json=self.dump_json, 667 | dump_only=self.dump_only, 668 | pbar=pbar, 669 | session=self.session) 670 | worker.start() 671 | workers.append(worker) 672 | 673 | return workers, queue 674 | 675 | def _poison_workers(self, workers, queue): 676 | # type: (List[InstaDownloader], Queue) -> None 677 | for worker in workers: 678 | queue.put(None) 679 | 680 | def _join_workers(self, workers, queue): 681 | # type: (List[InstaDownloader], Queue) -> None 682 | if any(w.is_alive() for w in workers): 683 | for worker in workers: 684 | worker.join() 685 | 686 | def _shutdown_workers(self, workers): 687 | # type: (List[InstaDownloader]) -> None 688 | for worker in workers: 689 | worker.terminate() 690 | 691 | 692 | class ProfileLooter(InstaLooter): 693 | """A looter targeting medias on a user profile. 694 | """ 695 | 696 | def __init__(self, username, **kwargs): 697 | # type: (str, **Any) -> None 698 | """Create a new profile looter. 699 | 700 | Arguments: 701 | username (str): the username of the profile. 702 | 703 | See `InstaLooter.__init__` for more details about accepted 704 | keyword arguments. 705 | 706 | """ 707 | super(ProfileLooter, self).__init__(**kwargs) 708 | self._username = username 709 | self._owner_id = None 710 | 711 | def pages(self): 712 | # type: () -> ProfileIterator 713 | """Obtain an iterator over Instagram post pages. 714 | 715 | Returns: 716 | PageIterator: an iterator over the instagram post pages. 717 | 718 | Raises: 719 | ValueError: when the requested user does not exist. 720 | RuntimeError: when the user is a private account 721 | and there is no logged user (or the logged user 722 | does not follow that account). 723 | 724 | """ 725 | if self._owner_id is None: 726 | it = ProfileIterator.from_username(self._username, self.session) 727 | self._owner_id = it.owner_id 728 | return it 729 | return ProfileIterator(self._owner_id, self.session, self.rhx) 730 | 731 | 732 | class HashtagLooter(InstaLooter): 733 | """A looter targeting medias tagged with a hashtag. 734 | """ 735 | 736 | def __init__(self, hashtag, **kwargs): 737 | # type: (str, **Any) -> None 738 | """Create a new hashtag looter. 739 | 740 | Arguments: 741 | username (str): the hashtag to search for. 742 | 743 | See `InstaLooter.__init__` for more details about accepted 744 | keyword arguments. 745 | 746 | """ 747 | super(HashtagLooter, self).__init__(**kwargs) 748 | self._hashtag = hashtag 749 | 750 | def pages(self): # noqa: D102 751 | # type: () -> HashtagIterator 752 | return HashtagIterator(self._hashtag, self.session, self.rhx) 753 | 754 | 755 | class PostLooter(InstaLooter): 756 | """A looter targeting a specific post. 757 | """ 758 | 759 | _RX_URL = re.compile( 760 | r'(?:https?://)?(?:www\.instagram\.com|instagr\.am)/p/([0-9a-zA-Z_\-]{10,11})' 761 | ) 762 | 763 | _RX_CODE = re.compile( 764 | r'^[0-9a-zA-Z_\-]{10,11}$' 765 | ) 766 | 767 | def __init__(self, code, **kwargs): 768 | # type: (str, **Any) -> None 769 | """Create a new hashtag looter. 770 | 771 | Arguments: 772 | code (str): the code of the post to get. 773 | 774 | See `InstaLooter.__init__` for more details about accepted 775 | keyword arguments. 776 | 777 | """ 778 | super(PostLooter, self).__init__(**kwargs) 779 | 780 | self._info = None # type: Optional[dict] 781 | 782 | match = self._RX_URL.match(code) 783 | if match is not None: 784 | self.code = match.group(1) 785 | elif self._RX_CODE.match(code) is None: 786 | raise ValueError("invalid post code: '{}'".format(code)) 787 | else: 788 | self.code = code 789 | 790 | @property 791 | def info(self): 792 | # type: () -> dict 793 | if self._info is None: 794 | self._info = self.get_post_info(self.code) 795 | return self._info 796 | 797 | def pages(self): 798 | # type: () -> Iterator[Dict[Text, Any]] 799 | """Return a generator that yields a page with only the refered post. 800 | 801 | Yields: 802 | dict: a page dictionary with only a single media. 803 | 804 | """ 805 | yield {"edge_owner_to_timeline_media": { 806 | "count": 1, 807 | "page_info": { 808 | "has_next_page": False, 809 | "end_cursor": None, 810 | }, 811 | "edges": [ 812 | {"node": self.info} 813 | ], 814 | }} 815 | 816 | def medias(self, timeframe=None): 817 | """Return a generator that yields only the refered post. 818 | 819 | Yields: 820 | dict: a media dictionary obtained from the given post. 821 | 822 | Raises: 823 | StopIteration: if the post does not fit the timeframe. 824 | 825 | """ 826 | info = self.info 827 | if timeframe is not None: 828 | start, end = TimedMediasIterator.get_times(timeframe) 829 | timestamp = info.get("taken_at_timestamp") or info["media"] 830 | if not (start >= timestamp >= end): 831 | raise StopIteration 832 | yield info 833 | 834 | def download(self, 835 | destination, # type: Union[str, fs.base.FS] 836 | condition=None, # type: Optional[Callable[[dict], bool]] 837 | media_count=None, # type: Optional[int] 838 | timeframe=None, # type: Optional[_Timeframe] 839 | new_only=False, # type: bool 840 | pgpbar_cls=None, # type: Optional[Type[ProgressBar]] 841 | dlpbar_cls=None, # type: Optional[Type[ProgressBar]] 842 | ): 843 | # type: (...) -> int 844 | """Download the refered post to the destination. 845 | 846 | See `InstaLooter.download` for argument reference. 847 | 848 | Note: 849 | This function, opposed to other *looter* implementations, will 850 | not spawn new threads, but simply use the main thread to download 851 | the files. 852 | 853 | Since a worker is in charge of downloading a *media* at a time 854 | (and not a *file*), there would be no point in spawning more. 855 | 856 | """ 857 | destination, close_destination = self._init_destfs(destination) 858 | 859 | queue = Queue() # type: Queue[Optional[Dict]] 860 | medias_queued = self._fill_media_queue( 861 | queue, destination, iter(self.medias()), media_count, 862 | new_only, condition) 863 | queue.put(None) 864 | 865 | worker = InstaDownloader( 866 | queue=queue, 867 | destination=destination, 868 | namegen=self.namegen, 869 | add_metadata=self.add_metadata, 870 | dump_json=self.dump_json, 871 | dump_only=self.dump_only, 872 | pbar=None, 873 | session=self.session) 874 | worker.run() 875 | 876 | return medias_queued 877 | -------------------------------------------------------------------------------- /instalooter/medias.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Iterators over Instagram medias. 3 | 4 | Iterators defined in this module wrap `PageIterator` instances to yield 5 | individual medias defined in each page instead of whole pages. 6 | """ 7 | from __future__ import absolute_import 8 | from __future__ import unicode_literals 9 | 10 | import datetime 11 | import typing 12 | 13 | import six 14 | 15 | from .pages import PageIterator 16 | 17 | if typing.TYPE_CHECKING: 18 | from typing import Any, Dict, List, Optional, Iterable, Set, Text 19 | 20 | 21 | _I = typing.TypeVar('_I', bound='MediasIterator') 22 | 23 | 24 | __all__ = [ 25 | "MediasIterator", 26 | "TimedMediasIterator", 27 | ] 28 | 29 | 30 | class MediasIterator(typing.Iterator[typing.Dict[typing.Text, typing.Any]]): 31 | """An iterator over the medias obtained from a page iterator. 32 | """ 33 | 34 | def __init__(self, page_iterator): 35 | # type: (Iterable[Dict[Text, Any]]) -> None 36 | self._it = iter(page_iterator) 37 | self._seen = set() # type: Set[Text] 38 | self._edges = [] # type: List[Dict[Text, Dict[Text, Any]]] 39 | self._finished = False 40 | self._total = None # type: Optional[int] 41 | self._done = 0 42 | 43 | def __iter__(self): 44 | # type: (_I) -> _I 45 | return self 46 | 47 | def _next_page(self): 48 | # type: () -> Dict[Text, Any] 49 | data = next(self._it) 50 | section = next(s for s in six.iterkeys(data) if s.endswith('_media')) 51 | return data[section] 52 | 53 | def __next__(self): 54 | # type: () -> Dict[Text, Any] 55 | if self._finished: 56 | raise StopIteration 57 | 58 | if not self._edges: 59 | page = self._next_page() 60 | self._total = page['count'] 61 | self._edges.extend(page['edges']) 62 | if not page['edges']: 63 | raise StopIteration 64 | 65 | media = self._edges.pop(0) 66 | self._done += 1 67 | 68 | if media['node']['id'] in self._seen: 69 | self._finished = True 70 | 71 | self._seen.add(media['node']['id']) 72 | return media['node'] 73 | 74 | def __length_hint__(self): 75 | if self._total is None: 76 | try: 77 | page = self._next_page() 78 | self._total = page['count'] 79 | self._edges.extend(page['edges']) 80 | except StopIteration: 81 | self._total = 0 82 | return self._total - self._done 83 | 84 | if six.PY2: 85 | next = __next__ 86 | 87 | 88 | class TimedMediasIterator(MediasIterator): 89 | """An iterator over the medias within a specific timeframe. 90 | """ 91 | 92 | @staticmethod 93 | def get_times(timeframe): 94 | if timeframe is None: 95 | timeframe = (None, None) 96 | try: 97 | start_time = timeframe[0] or datetime.date.today() 98 | end_time = timeframe[1] or datetime.date.fromtimestamp(0) 99 | except (IndexError, AttributeError): 100 | raise TypeError("'timeframe' must be a couple of dates!") 101 | return start_time, end_time 102 | 103 | def __init__(self, page_iterator, timeframe=None): 104 | super(TimedMediasIterator, self).__init__(page_iterator) 105 | self.start_time, self.end_time = self.get_times(timeframe) 106 | 107 | def __next__(self): 108 | number_old = 0 109 | while True: 110 | media = super(TimedMediasIterator, self).__next__() 111 | timestamp = media.get('taken_at_timestamp') or media['date'] 112 | media_date = type(self.start_time).fromtimestamp(timestamp) 113 | 114 | if self.start_time >= media_date >= self.end_time: 115 | return media 116 | elif media_date < self.end_time: 117 | number_old += 1 118 | if number_old >= PageIterator.PAGE_SIZE: 119 | self._finished = True 120 | raise StopIteration 121 | 122 | if six.PY2: 123 | next = __next__ 124 | -------------------------------------------------------------------------------- /instalooter/pages.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Iterators over Instagram media pages. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import abc 8 | import hashlib 9 | import itertools 10 | import math 11 | import time 12 | import typing 13 | 14 | import six 15 | from requests import Session 16 | 17 | from ._impl import json 18 | from ._utils import get_shared_data 19 | 20 | if typing.TYPE_CHECKING: 21 | from typing import Any, Dict, Iterator, Iterable, Optional, Text 22 | 23 | 24 | __all__ = [ 25 | "PageIterator", 26 | "HashtagIterator", 27 | "ProfileIterator", 28 | ] 29 | 30 | 31 | @six.add_metaclass(abc.ABCMeta) 32 | class PageIterator(typing.Iterator[typing.Dict[typing.Text, typing.Any]]): 33 | """An abstract Instagram page iterator. 34 | """ 35 | 36 | PAGE_SIZE = 50 37 | INTERVAL = 2 38 | 39 | _BASE_URL = "https://www.instagram.com/graphql/query/" 40 | _section_generic = NotImplemented # type: Text 41 | _section_media = NotImplemented # type: Text 42 | _URL = NotImplemented # type: Text 43 | 44 | def __init__(self, session, rhx): 45 | # type: (Session, Text) -> None 46 | self._finished = False 47 | self._cursor = None # type: Optional[Text] 48 | self._current_page = 0 49 | self._data_it = iter(self._page_loader(session, rhx)) 50 | 51 | @abc.abstractmethod 52 | def _getparams(self, cursor): 53 | # type: (Optional[Text]) -> Text 54 | return NotImplemented 55 | 56 | def _page_loader(self, session, rhx): 57 | # type: (Session, Text) -> Iterable[Dict[Text, Dict[Text, Any]]] 58 | while True: 59 | # Cache cursor for later 60 | cursor = self._cursor 61 | # Query data 62 | try: 63 | # Prepare the query 64 | params = self._getparams(cursor) 65 | json_params = json.dumps(params, separators=(',', ':')) 66 | magic = "{}:{}".format(rhx, json_params) 67 | session.headers['x-instagram-gis'] = hashlib.md5(magic.encode('utf-8')).hexdigest() 68 | url = self._URL.format(json_params) 69 | # Query the server for data 70 | with session.get(url) as res: 71 | self._last_page = data = res.json() 72 | # Yield that same data until cursor is updated 73 | while self._cursor == cursor: 74 | yield data['data'] 75 | except KeyError as e: 76 | if data.get('message') == 'rate limited': 77 | raise RuntimeError("Query rate exceeded (wait before next run)") 78 | time.sleep(10) 79 | # Sleep before next query 80 | time.sleep(self.INTERVAL) 81 | 82 | def __length_hint__(self): 83 | # type: () -> int 84 | try: 85 | data = next(self._data_it) 86 | c = data[self._section_generic][self._section_media]['count'] 87 | total = int(math.ceil(c / self.PAGE_SIZE)) 88 | except (StopIteration, TypeError): 89 | total = 0 90 | return total - self._current_page 91 | 92 | def __iter__(self): 93 | return self 94 | 95 | def __next__(self): 96 | 97 | if self._finished: 98 | raise StopIteration 99 | 100 | data = next(self._data_it) 101 | 102 | try: 103 | media_info = data[self._section_generic][self._section_media] 104 | except (TypeError, KeyError): 105 | self._finished = True 106 | raise StopIteration 107 | 108 | if not media_info['page_info']['has_next_page']: 109 | self._finished = True 110 | elif not media_info['edges']: 111 | self._finished = True 112 | raise StopIteration 113 | else: 114 | self._cursor = media_info['page_info']['end_cursor'] 115 | self._current_page += 1 116 | 117 | return data[self._section_generic] 118 | 119 | if six.PY2: 120 | next = __next__ 121 | 122 | 123 | class HashtagIterator(PageIterator): 124 | """An iterator over the pages refering to a specific hashtag. 125 | """ 126 | 127 | _QUERY_ID = "17882293912014529" 128 | _URL = "{}?query_id={}&variables={{}}".format(PageIterator._BASE_URL, _QUERY_ID) 129 | _section_generic = "hashtag" 130 | _section_media = "edge_hashtag_to_media" 131 | 132 | def __init__(self, hashtag, session, rhx): 133 | super(HashtagIterator, self).__init__(session, rhx) 134 | self.hashtag = hashtag 135 | 136 | def _getparams(self, cursor): 137 | return { 138 | "tag_name": self.hashtag, 139 | "first": self.PAGE_SIZE, 140 | "after": cursor 141 | } 142 | 143 | def __next__(self): 144 | item = super(HashtagIterator, self).__next__() 145 | for media in item[self._section_media].get("edges", []): 146 | media["node"].setdefault( 147 | "__typename", 148 | "GraphVideo" if media["node"].get("is_video", False) else "GraphImage" 149 | ) 150 | return item 151 | 152 | if six.PY2: 153 | next = __next__ 154 | 155 | 156 | class ProfileIterator(PageIterator): 157 | """An iterator over the pages of a user profile. 158 | """ 159 | 160 | _QUERY_HASH = "42323d64886122307be10013ad2dcc44" 161 | #_QUERY_HASH = "472f257a40c653c64c666ce877d59d2b" 162 | _URL = "{}?query_hash={}&variables={{}}".format(PageIterator._BASE_URL, _QUERY_HASH) 163 | _section_generic = "user" 164 | _section_media = "edge_owner_to_timeline_media" 165 | 166 | @classmethod 167 | def _user_data(cls, username, session): 168 | url = "https://www.instagram.com/{}/".format(username) 169 | try: 170 | with session.get(url) as res: 171 | return get_shared_data(res.text) 172 | except (ValueError, AttributeError): 173 | raise ValueError("user not found: '{}'".format(username)) 174 | 175 | @classmethod 176 | def from_username(cls, username, session): 177 | user_data = cls._user_data(username, session) 178 | if 'ProfilePage' not in user_data['entry_data']: 179 | raise ValueError("user not found: '{}'".format(username)) 180 | data = user_data['entry_data']['ProfilePage'][0]['graphql']['user'] 181 | if data['is_private'] and not data['followed_by_viewer']: 182 | con_id = next((c.value for c in session.cookies if c.name == "ds_user_id"), None) 183 | if con_id != data['id']: 184 | raise RuntimeError("user '{}' is private".format(username)) 185 | return cls(data['id'], session, user_data.get('rhx_gis', '')) 186 | 187 | def __init__(self, owner_id, session, rhx): 188 | super(ProfileIterator, self).__init__(session, rhx) 189 | self.owner_id = owner_id 190 | 191 | def _getparams(self, cursor): 192 | return { 193 | "id": self.owner_id, 194 | "first": self.PAGE_SIZE, 195 | "after": cursor, 196 | } 197 | -------------------------------------------------------------------------------- /instalooter/pbar.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Progress bars used to report `InstaLooter.download` progress. 3 | 4 | The module exposes and abstract class that can be derived to implement 5 | your own progress displayer. The default implementation (which uses the 6 | `tqdm` library) is used by the CLI. 7 | """ 8 | from __future__ import absolute_import 9 | from __future__ import unicode_literals 10 | 11 | import abc 12 | import typing 13 | 14 | import six 15 | import tqdm 16 | 17 | if typing.TYPE_CHECKING: 18 | from threading import Lock, RLock 19 | from typing import Union 20 | 21 | 22 | _T = typing.TypeVar('_T', covariant=True) 23 | _L = typing.TypeVar('_L') 24 | 25 | 26 | @six.add_metaclass(abc.ABCMeta) 27 | class ProgressBar(typing.Iterator[_T]): 28 | """An abstract progess bar used to report interal progress. 29 | """ 30 | 31 | def __init__(self, it, *args, **kwargs): 32 | self.it = it 33 | self.__lock = None # type: Union[Lock, RLock, None] 34 | 35 | def __iter__(self): 36 | # type: () -> ProgressBar[_T] 37 | return self 38 | 39 | def __next__(self): 40 | # type: () -> _T 41 | item = next(self.it) 42 | self.update() 43 | return item 44 | 45 | if six.PY2: 46 | next = __next__ 47 | 48 | @abc.abstractmethod 49 | def update(self): 50 | # type: () -> None 51 | """Update the progress bar by one step. 52 | """ 53 | return NotImplemented 54 | 55 | @abc.abstractmethod 56 | def set_maximum(self, maximum): 57 | # type: (int) -> None 58 | """Set the maximum number of steps of the operation. 59 | """ 60 | return NotImplemented 61 | 62 | def finish(self): 63 | # type: () -> None 64 | """Notify the progress bar the operation is finished. 65 | """ 66 | pass 67 | 68 | def set_lock(self, lock): 69 | # type: (Union[Lock, RLock]) -> None 70 | """Set a lock to be used by parallel workers. 71 | """ 72 | self.__lock = lock 73 | 74 | def get_lock(self): 75 | # type: () -> Union[Lock, RLock] 76 | """Obtain the progress bar lock. 77 | """ 78 | if self.__lock is None: 79 | raise RuntimeError("lock was not initialised") 80 | return self.__lock 81 | 82 | 83 | class TqdmProgressBar(tqdm.tqdm, ProgressBar): 84 | """A progress bar using the `tqdm` library. 85 | """ 86 | 87 | def __init__(self, it, *args, **kwargs): # noqa: D102, D107 88 | kwargs["leave"] = False 89 | super(TqdmProgressBar, self).__init__(it, *args, **kwargs) 90 | ProgressBar.__init__(self, it) 91 | 92 | def set_maximum(self, maximum): # noqa: D102 93 | self.total = maximum 94 | 95 | def finish(self): # noqa: D102 96 | self.close() 97 | -------------------------------------------------------------------------------- /instalooter/static/splash.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | InstaLooter - UserAgent catcher 15 | 16 | 17 | 18 |
19 |
20 |
21 | 22 |
Don't Panic
23 |
24 |

25 | Hi ! You recently installed and updated InstaLooter, and this page opened 26 | because it needs to detect your User Agent. This way, we can trick Instagram 27 | into thinking that you are using your usual web browser ! 28 | You will not see this page on the next run. 29 |

30 |
31 |

Your User Agent is: {}

32 |

It has been cached in: {}

33 |
34 |
35 |
36 |
37 |
38 |
39 | © 2016-2019, Martin Larralde (GPLv3) 40 |
41 |
42 |
43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /instalooter/worker.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """Background download thread. 3 | """ 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import io 8 | import operator 9 | import threading 10 | import time 11 | 12 | import requests 13 | import six 14 | import tenacity 15 | 16 | from ._impl import PIL, piexif, json 17 | 18 | 19 | class InstaDownloader(threading.Thread): 20 | """The background InstaLooter worker class. 21 | """ 22 | 23 | _tenacity_options = { 24 | "stop": tenacity.stop_after_attempt(5), 25 | "wait": tenacity.wait_exponential(1, 10), 26 | } 27 | 28 | def __init__(self, 29 | queue, 30 | destination, 31 | namegen, 32 | add_metadata=False, 33 | dump_json=False, 34 | dump_only=False, 35 | pbar=None, 36 | session=None): 37 | 38 | super(InstaDownloader, self).__init__() 39 | 40 | self.queue = queue 41 | self.destination = destination 42 | self.namegen = namegen 43 | self.session = session or requests.Session() 44 | self.pbar = pbar 45 | 46 | self.dump_only = dump_only 47 | self.dump_json = dump_json or dump_only 48 | self.add_metadata = add_metadata 49 | 50 | self._killed = False 51 | self._downloading = None 52 | 53 | retry = tenacity.retry(**self._tenacity_options) 54 | self._DOWNLOAD_METHODS = { 55 | "GraphImage": retry(self._download_image), 56 | "GraphVideo": retry(self._download_video), 57 | "GraphSidecar": self._download_sidecar, 58 | } 59 | 60 | def _download_image(self, media): 61 | url = media['display_url'] 62 | filename = self.namegen.file(media) 63 | 64 | if self.destination.exists(filename): 65 | return 66 | 67 | # FIXME: find a way to remove failed temporary downloads 68 | with self.destination.open(filename, "wb") as f: 69 | with self.session.get(url) as res: 70 | f.write(res.content) 71 | self._set_time(media, filename) 72 | 73 | def _download_video(self, media): 74 | url = media['video_url'] 75 | filename = self.namegen.file(media) 76 | 77 | if self.destination.exists(filename): 78 | return 79 | 80 | # FIXME: find a way to remove failed temporary downloads 81 | with self.destination.open(filename, "wb") as f: 82 | with self.session.get(url) as res: 83 | for chunk in res.iter_content(io.DEFAULT_BUFFER_SIZE): 84 | f.write(chunk) 85 | self._set_time(media, filename) 86 | 87 | def _download_sidecar(self, media): 88 | edges = media.pop('edge_sidecar_to_children')['edges'] 89 | for edge in six.moves.map(operator.itemgetter('node'), edges): 90 | for key, value in six.iteritems(media): 91 | edge.setdefault(key, value) 92 | self._DOWNLOAD_METHODS[edge['__typename']](edge) 93 | 94 | def _set_time(self, media, filename): 95 | details = {} 96 | details["modified"] = details["accessed"] = details["created"] = \ 97 | media.get('taken_at_timestamp') or media['date'] 98 | self.destination.setinfo(filename, {"details": details}) 99 | 100 | def _dump(self, media): 101 | basename = self.namegen.base(media) 102 | filename = "{}.json".format(basename) 103 | mode = "w" if six.PY3 else "wb" 104 | with self.destination.open(filename, mode) as dest: 105 | json.dump(media, dest, indent=4, sort_keys=True) 106 | self._set_time(media, filename) 107 | 108 | def run(self): 109 | while not self._killed: 110 | try: 111 | media = self.queue.get_nowait() 112 | 113 | # Received a poison pill: break the loop 114 | if media is None: 115 | self._killed = True 116 | 117 | else: 118 | # Download media 119 | if not self.dump_only: 120 | self._DOWNLOAD_METHODS[media["__typename"]](media) 121 | # Dump JSON metadata if needed 122 | if self.dump_json: 123 | self._dump(media) 124 | # Update progress bar if any 125 | if self.pbar is not None and not self._killed: 126 | with self.pbar.get_lock(): 127 | self.pbar.update() 128 | 129 | self.queue.task_done() 130 | 131 | except six.moves.queue.Empty: 132 | time.sleep(1) 133 | 134 | def terminate(self): 135 | self._killed = True 136 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = instalooter 3 | version = attr: instalooter.__version__ 4 | author = Martin Larralde 5 | author-email = martin.larralde@ens-paris-saclay.fr 6 | home-page = https://github.com/althonos/instalooter 7 | description = Another API-less Instagram pictures and videos downloader 8 | long-description = file: README.rst 9 | license = GPLv3+ 10 | license-file = COPYING 11 | platform = any 12 | keywords = instagram, download, web, web scraping, looter 13 | classifiers = 14 | Development Status :: 4 - Beta 15 | Intended Audience :: Developers 16 | Intended Audience :: End Users/Desktop 17 | License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+) 18 | Programming Language :: Python 19 | Programming Language :: Python :: 2.7 20 | Programming Language :: Python :: 3.3 21 | Programming Language :: Python :: 3.4 22 | Programming Language :: Python :: 3.5 23 | Programming Language :: Python :: 3.6 24 | Topic :: Internet 25 | Topic :: Software Development :: Libraries :: Python Modules 26 | Operating System :: OS Independent 27 | 28 | [options] 29 | zip_safe = true 30 | include_package_data = true 31 | python_requires = >= 2.7, != 3.0.*, != 3.1.*, != 3.2.*, != 3.5.1 32 | packages = find: 33 | test_suite = tests 34 | install_requires = 35 | coloredlogs ~=14.0 36 | python-dateutil ~=2.1 37 | docopt ~=0.4 38 | fs ~=2.1 39 | requests ~=2.18 40 | six ~=1.4 41 | tqdm ~=4.19 42 | tenacity ~=6.0 43 | typing ~=3.6 ; python_version < '3.6' 44 | verboselogs ~=1.7 45 | tests_require = 46 | instalooter[test] 47 | 48 | [options.entry_points] 49 | console_scripts = 50 | instalooter = instalooter.cli:main 51 | 52 | [options.packages.find] 53 | exclude = 54 | tests 55 | tests.utils 56 | 57 | [options.extras_require] 58 | # add EXIF metadata to downloaded pictures 59 | metadata = 60 | piexif 61 | Pillow 62 | # all features 63 | all = 64 | %(metadata)s 65 | # test dependencies 66 | test = 67 | contexter ~=0.1 68 | mock ~=2.0 ; python_version < '3.4' 69 | parameterized ~=0.6 70 | green ~=2.12 71 | %(metadata)s 72 | # coverage dependencies 73 | coverage = 74 | coverage 75 | codecov 76 | codacy-coverage 77 | # documentation dependencies 78 | doc = 79 | sphinx ~=1.7 80 | sphinx-bootstrap-theme ~=0.6 81 | semantic-version ~=2.6 82 | # development dependencies 83 | dev = 84 | docutils 85 | Pygments 86 | %(test)s 87 | %(coverage)s 88 | 89 | [bdist_wheel] 90 | universal=1 91 | 92 | [coverage:report] 93 | exclude_lines = 94 | pragma: no cover 95 | raise AssertionError 96 | raise NotImplementedError 97 | return NotImplemented 98 | if 0: 99 | if __name__ == .__main__.: 100 | except ImportError 101 | if six.PY2: 102 | if six.PY3: 103 | @typing.overload 104 | @abc.abstractmethod 105 | if typing.TYPE_CHECKING: 106 | 107 | [coverage:run] 108 | branch=True 109 | 110 | [green] 111 | file-pattern = test_*.py 112 | verbose = 2 113 | no-skip-report = true 114 | quiet-stdout = true 115 | run-coverage = true 116 | processes = 1 117 | 118 | [pydocstyle] 119 | inherit = false 120 | match-dir = (?!tests)(?!resources)(?!docs)[^\.].* 121 | match = (?!test)(?!setup)[^\._].*\.py 122 | ignore = D200, D203, D213, D406, D407 123 | 124 | [flake8] 125 | max-line-length = 99 126 | doctests = True 127 | ignore = D200, D203, D213, D406, D407 128 | exclude = 129 | .git, 130 | .eggs, 131 | __pycache__, 132 | tests/, 133 | docs/, 134 | build/, 135 | dist/, 136 | setup.py 137 | 138 | [mypy] 139 | disallow_any_decorated = false 140 | disallow_any_generics = false 141 | disallow_any_unimported = false 142 | disallow_subclassing_any = false 143 | disallow_untyped_calls = false 144 | disallow_untyped_defs = false 145 | ignore_missing_imports = true 146 | warn_unused_ignores = true 147 | warn_return_any = false 148 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # released under the GNU General Public License version 3.0 (GPLv3) 3 | 4 | from setuptools import setup 5 | setup() 6 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | import sys 5 | sys.path.insert(0, os.path.abspath('..')) 6 | -------------------------------------------------------------------------------- /tests/test_batch.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import textwrap 6 | import unittest 7 | import warnings 8 | 9 | import fs 10 | import requests 11 | 12 | from instalooter.cli import main 13 | from instalooter.batch import BatchRunner 14 | from instalooter.looters import InstaLooter 15 | 16 | 17 | try: 18 | CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok 19 | except requests.exceptions.ConnectionError: 20 | CONNECTION_FAILURE = True 21 | 22 | 23 | class TestBatchRunner(unittest.TestCase): 24 | 25 | @classmethod 26 | def setUpClass(cls): 27 | cls.session = requests.Session() 28 | 29 | @classmethod 30 | def tearDownClass(cls): 31 | cls.session.close() 32 | 33 | def setUp(self): 34 | self.destfs = fs.open_fs("temp://") 35 | self.tmpdir = self.destfs.getsyspath("/") 36 | 37 | def tearDown(self): 38 | self.destfs.close() 39 | 40 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 41 | def test_cli(self): 42 | cfg = textwrap.dedent( 43 | """ 44 | [my job] 45 | 46 | num-to-dl = 3 47 | quiet = true 48 | 49 | users: 50 | therock: {self.tmpdir} 51 | nintendo: {self.tmpdir} 52 | """ 53 | ).format(self=self) 54 | 55 | with self.destfs.open('batch.ini', 'w') as batch_file: 56 | batch_file.write(cfg) 57 | 58 | retcode = main(["batch", self.destfs.getsyspath('batch.ini')]) 59 | self.assertEqual(retcode, 0) 60 | self.assertGreaterEqual( 61 | len(list(self.destfs.filterdir("/", ["*.jpg"]))), 6) 62 | 63 | 64 | def setUpModule(): 65 | warnings.simplefilter('ignore') 66 | 67 | 68 | def tearDownModule(): 69 | warnings.simplefilter(warnings.defaultaction) 70 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import datetime 6 | import unittest 7 | import json 8 | import os 9 | import time 10 | 11 | import contexter 12 | import fs 13 | import parameterized 14 | import requests 15 | import six 16 | from six.moves.queue import Queue 17 | 18 | from instalooter.cli import main 19 | from instalooter.cli import time as timeutils 20 | from instalooter.cli import threadutils 21 | from instalooter.cli.constants import USAGE 22 | from instalooter.cli.login import login 23 | from instalooter.worker import InstaDownloader 24 | 25 | from .utils import mock 26 | from .utils.method_names import firstparam 27 | from .utils.ig_mock import MockPages 28 | 29 | 30 | try: 31 | CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok 32 | except requests.exceptions.ConnectionError: 33 | CONNECTION_FAILURE = True 34 | 35 | 36 | class TestCLI(unittest.TestCase): 37 | 38 | @classmethod 39 | def setUpClass(cls): 40 | cls.session = requests.Session() 41 | 42 | @classmethod 43 | def tearDownClass(cls): 44 | cls.session.close() 45 | 46 | def setUp(self): 47 | self.destfs = fs.open_fs("temp://") 48 | self.tmpdir = self.destfs.getsyspath("/") 49 | 50 | def tearDown(self): 51 | self.destfs.close() 52 | if os.getenv("CI") == "true": 53 | time.sleep(1) 54 | 55 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 56 | def test_user(self): 57 | with contexter.Contexter() as ctx: 58 | ctx << mock.patch('instalooter.cli.ProfileLooter.pages', MockPages('nintendo')) 59 | r = main(["user", "nintendo", self.tmpdir, "-q", '-n', '10']) 60 | self.assertEqual(r, 0) 61 | self.assertEqual(len(self.destfs.listdir('/')), 10) 62 | 63 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 64 | def test_single_post(self): 65 | r = main(["post", "BFB6znLg5s1", self.tmpdir, "-q"]) 66 | self.assertEqual(r, 0) 67 | self.assertTrue(self.destfs.exists("1243533605591030581.jpg")) 68 | 69 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 70 | def test_dump_json(self): 71 | r = main(["post", "BIqZ8L8AHmH", self.tmpdir, '-q', '-d']) 72 | self.assertEqual(r, 0) 73 | 74 | self.assertTrue(self.destfs.exists("1308972728853756295.json")) 75 | self.assertTrue(self.destfs.exists("1308972728853756295.jpg")) 76 | 77 | with self.destfs.open("1308972728853756295.json") as fp: 78 | json_metadata = json.load(fp) 79 | 80 | self.assertEqual("1308972728853756295", json_metadata["id"]) 81 | self.assertEqual("BIqZ8L8AHmH", json_metadata["shortcode"]) 82 | 83 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 84 | def test_dump_only(self): 85 | r = main(["post", "BIqZ8L8AHmH", self.tmpdir, '-q', '-D']) 86 | self.assertEqual(r, 0) 87 | 88 | self.assertTrue(self.destfs.exists("1308972728853756295.json")) 89 | self.assertFalse(self.destfs.exists("1308972728853756295.jpg")) 90 | 91 | with self.destfs.open("1308972728853756295.json") as fp: 92 | json_metadata = json.load(fp) 93 | 94 | self.assertEqual("1308972728853756295", json_metadata["id"]) 95 | self.assertEqual("BIqZ8L8AHmH", json_metadata["shortcode"]) 96 | 97 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 98 | def test_usage(self): 99 | handle = six.moves.StringIO() 100 | main(["--usage"], stream=handle) 101 | self.assertEqual(handle.getvalue().strip(), USAGE.strip()) 102 | 103 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 104 | def test_single_post_from_url(self): 105 | url = "https://www.instagram.com/p/BFB6znLg5s1/" 106 | main(["post", url, self.tmpdir, "-q"]) 107 | self.assertIn("1243533605591030581.jpg", os.listdir(self.tmpdir)) 108 | 109 | 110 | class TestTimeUtils(unittest.TestCase): 111 | 112 | @parameterized.parameterized.expand([ 113 | (":", (None, None)), 114 | ("2017-03-12:", (None, datetime.date(2017, 3, 12))), 115 | (":2016-08-04", (datetime.date(2016, 8, 4), None)), 116 | ("2017-03-01:2017-02-01", (datetime.date(2017, 3, 1), datetime.date(2017, 2, 1))), 117 | ], testcase_func_name=firstparam) 118 | def test_get_times_from_cli(self, token, expected): 119 | self.assertEqual(timeutils.get_times_from_cli(token), expected) 120 | 121 | @parameterized.parameterized.expand([ 122 | ("thisday", 0, 0), 123 | ("thisweek", 7, 7), 124 | ("thismonth", 28, 31), 125 | ("thisyear", 365, 366), 126 | ], testcase_func_name=firstparam) 127 | def test_get_times_from_cli_keywords(self, token, inf, sup): 128 | start, stop = timeutils.get_times_from_cli(token) 129 | self.assertGreaterEqual(start - stop, datetime.timedelta(inf)) 130 | self.assertLessEqual(start - stop, datetime.timedelta(sup)) 131 | self.assertEqual(start, datetime.date.today()) 132 | 133 | @parameterized.parameterized.expand([ 134 | ["x"], 135 | ["x:y"], 136 | ["x:y:z"], 137 | ], testcase_func_name=firstparam) 138 | def test_get_times_from_cli_bad_format(self, token): 139 | self.assertRaises(ValueError, timeutils.get_times_from_cli, token) 140 | 141 | 142 | @mock.patch('instalooter.looters.InstaLooter._login') 143 | @mock.patch('getpass.getpass') 144 | class TestLoginUtils(unittest.TestCase): 145 | 146 | def test_cli_login_no_username(self, getpass_, login_): 147 | args = {'--username': None, "--password": None} 148 | login(args) 149 | login_.assert_not_called() 150 | 151 | @mock.patch('instalooter.looters.InstaLooter._logged_in') 152 | def test_cli_login_no_password(self, logged_in_, getpass_, login_): 153 | args = {'--username': "user", "--password": None, "--quiet": False} 154 | logged_in_.return_value = False 155 | getpass_.return_value = "pasw" 156 | login(args) 157 | login_.assert_called_once_with("user", "pasw") 158 | 159 | @mock.patch('instalooter.looters.InstaLooter._logged_in') 160 | def test_cli_login(self, logged_in_, getpass_, login_): 161 | args = {'--username': "user", "--password": "pasw", "--quiet": False} 162 | logged_in_.return_value = False 163 | login(args) 164 | login_.assert_called_once_with("user", "pasw") 165 | 166 | @mock.patch('instalooter.looters.InstaLooter._logged_in') 167 | def test_cli_already_logged_in(self, logged_in_, getpass_, login_): 168 | args = {'--username': "user", "--password": "pasw", "--quiet": False} 169 | logged_in_.return_value = True 170 | login(args) 171 | login_.assert_not_called() 172 | 173 | 174 | class TestThreadUtils(unittest.TestCase): 175 | 176 | def test_threads_count(self): 177 | 178 | q = Queue() 179 | t1 = InstaDownloader(q, None, None) 180 | t2 = InstaDownloader(q, None, None) 181 | 182 | try: 183 | self.assertEqual(threadutils.threads_count(), 0) 184 | t1.start() 185 | self.assertEqual(threadutils.threads_count(), 1) 186 | t2.start() 187 | self.assertEqual(threadutils.threads_count(), 2) 188 | finally: 189 | t1.terminate() 190 | t2.terminate() 191 | 192 | def test_threads_force_join(self): 193 | 194 | q = Queue() 195 | t1 = InstaDownloader(q, None, None) 196 | t2 = InstaDownloader(q, None, None) 197 | 198 | t1.start() 199 | t2.start() 200 | 201 | self.assertTrue(t1.is_alive()) 202 | self.assertTrue(t2.is_alive()) 203 | 204 | threadutils.threads_force_join() 205 | 206 | self.assertFalse(t1.is_alive()) 207 | self.assertFalse(t2.is_alive()) 208 | -------------------------------------------------------------------------------- /tests/test_issues.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import datetime 6 | import json 7 | import os 8 | import textwrap 9 | import time 10 | import unittest 11 | import warnings 12 | 13 | import contexter 14 | import fs 15 | import requests 16 | import six 17 | 18 | from instalooter._impl import length_hint, piexif, PIL 19 | from instalooter.batch import BatchRunner, logger as batch_logger 20 | from instalooter.cli import main 21 | from instalooter.looters import InstaLooter, HashtagLooter, ProfileLooter, PostLooter 22 | 23 | from .utils import mock 24 | from .utils.ig_mock import MockPages 25 | 26 | 27 | try: 28 | CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok 29 | except requests.exceptions.ConnectionError: 30 | CONNECTION_FAILURE = True 31 | 32 | 33 | class TestResolvedIssues(unittest.TestCase): 34 | 35 | if six.PY2: 36 | assertRegex = unittest.TestCase.assertRegexpMatches 37 | 38 | @classmethod 39 | def setUpClass(cls): 40 | cls.session = requests.Session() 41 | _user_agent = mock.Mock(return_value=cls.session.headers["User-Agent"]) 42 | cls.patch = mock.patch.object(InstaLooter, "_user_agent", new=_user_agent) 43 | cls.patch.__enter__() 44 | 45 | @classmethod 46 | def tearDownClass(cls): 47 | cls.session.close() 48 | cls.patch.__exit__(None, None, None) 49 | 50 | def setUp(self): 51 | self.destfs = fs.open_fs("temp://") 52 | self.tmpdir = self.destfs.getsyspath("/") 53 | warnings._showwarning = warnings.showwarning 54 | 55 | def tearDown(self): 56 | self.destfs.close() 57 | warnings.showwarning = warnings._showwarning 58 | if os.getenv("CI") == "true": 59 | time.sleep(1) 60 | 61 | @unittest.expectedFailure 62 | @unittest.skipUnless(piexif, "piexif required for this test") 63 | def test_issue_009(self): 64 | """ 65 | Thanks to @kurtmaia for reporting this bug. 66 | 67 | Checks that adding metadata to pictures downloaded from a hashtag 68 | works as well. 69 | """ 70 | looter = HashtagLooter("fluoxetine", add_metadata=True, session=self.session) 71 | with contexter.Contexter() as ctx: 72 | ctx << mock.patch.object(looter, 'pages', MockPages('fluoxetine')) 73 | looter.download(self.destfs, media_count=10) 74 | for f in self.destfs.listdir("/"): 75 | exif = piexif.load(self.destfs.getbytes(f)) 76 | self.assertTrue(exif['Exif']) # Date & Caption 77 | self.assertTrue(exif['0th']) # Image creator 78 | 79 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 80 | def test_issue_012(self): 81 | """Feature request by @paramjitrohit. 82 | 83 | Allows downloading pictures and videos only within a timeframe. 84 | """ 85 | looter = ProfileLooter("nintendo", session=self.session) 86 | day = datetime.date(2018, 3, 16) 87 | with contexter.Contexter() as ctx: 88 | ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) 89 | medias_in_timeframe = list(looter.medias(timeframe=[day, day])) 90 | self.assertEqual(len(medias_in_timeframe), 2) 91 | 92 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 93 | def test_issue_019(self): 94 | """ 95 | Thanks to @emijawdo for reporting this bug. 96 | 97 | Checks that instalooter does not crash when not given a destination 98 | directory and uses the current directory. 99 | """ 100 | initial_dir = os.getcwd() 101 | os.chdir(self.tmpdir) 102 | 103 | try: 104 | with contexter.Contexter() as ctx: 105 | ctx << mock.patch('instalooter.looters.InstaLooter.pages', MockPages('nintendo')) 106 | main(["user", "nintendo", "-n", "3", "-q"]) 107 | self.assertGreaterEqual(len(self.destfs.listdir("/")), 3) 108 | finally: 109 | os.chdir(initial_dir) 110 | 111 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 112 | def test_issue_014(self): 113 | """Feature request by @JFLarsen. 114 | 115 | Allows customizing filenames using a template following Python 116 | `.format()` minilanguage. 117 | """ 118 | looter = ProfileLooter("nintendo", template="{username}.{id}", session=self.session) 119 | with contexter.Contexter() as ctx: 120 | ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) 121 | looter.download(self.destfs, media_count=5) 122 | for f in self.destfs.scandir("/"): 123 | self.assertTrue(f.name.startswith('nintendo.')) 124 | 125 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 126 | @unittest.skipIf(os.getenv("IG_USERNAME") is None, "need private user account") 127 | def test_issue_006(self): 128 | """ 129 | Checks that instalooter does not iterate forever on a private 130 | profile. 131 | """ 132 | with self.assertRaises(RuntimeError): 133 | username = os.getenv("IG_USERNAME") 134 | looter = ProfileLooter(username, session=self.session) 135 | looter.logout() 136 | next(looter.medias()) 137 | 138 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 139 | def test_issue_015(self): 140 | """ 141 | Feature request by @MohamedIM. 142 | 143 | Checks that videos are not downloaded several times if present 144 | already in the destination directory. 145 | """ 146 | looter = ProfileLooter("nintendo", session=self.session) 147 | 148 | with contexter.Contexter() as ctx: 149 | ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) 150 | looter.download_videos(self.destfs, media_count=1) 151 | video_file = next(self.destfs.filterdir("/", ["*.mp4"])) 152 | mtime = self.destfs.getdetails(video_file.name).accessed 153 | looter.download_videos(self.destfs, media_count=1) 154 | self.assertEqual(mtime, self.destfs.getdetails(video_file.name).accessed) 155 | 156 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 157 | def test_issue_022(self): 158 | """ 159 | Thanks to @kuchenmitsahne for reporting this bug. 160 | 161 | Checks that using ``{datetime}`` in the template does not put 162 | a Windows forbidden character in the filename. 163 | """ 164 | FORBIDDEN = set('<>:"/\|?*') 165 | looter = ProfileLooter("nintendo", template="{datetime}", session=self.session) 166 | with contexter.Contexter() as ctx: 167 | ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) 168 | looter.download(self.destfs, media_count=5) 169 | for f in self.destfs.scandir("/"): 170 | self.assertFalse(FORBIDDEN.intersection(f.name)) 171 | 172 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 173 | @unittest.skipUnless(PIL, "PIL required for this test") 174 | def test_issue_026(self): 175 | """ 176 | Feature request by @verafide. 177 | 178 | Checks that pictures that are downloaded are not 179 | resized. 180 | """ 181 | PostLooter("BO0XpEshejh", session=self.session).download(self.destfs) 182 | pic = PIL.Image.open(self.destfs.getsyspath("1419863760138791137.jpg")) 183 | self.assertEqual(pic.size, (525, 612)) 184 | 185 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 186 | def test_issue_039(self): 187 | """ 188 | Feature request by @verafide 189 | 190 | Checks that all pictures are downloaded from posts 191 | with more than one picture. 192 | """ 193 | looter = PostLooter("BRHecUuFhPl", session=self.session) 194 | looter.download(self.destfs) 195 | self.assertEqual( 196 | set(self.destfs.listdir("/")), 197 | { 198 | "1461270165803344956.jpg", 199 | "1461270167497776767.jpg", 200 | "1461270174435133336.jpg", 201 | "1461270172581471925.jpg", 202 | "1461270181565655668.jpg", 203 | } 204 | ) 205 | 206 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 207 | def test_issue_042(self): 208 | """ 209 | Thanks to @MohamedIM for reporting this bug. 210 | 211 | Checks that a multipost is successfully downloaded from 212 | the CLI `post` option. 213 | """ 214 | looter = PostLooter('BRW-j_dBI6F', get_videos=True, session=self.session) 215 | looter.download(self.destfs) 216 | self.assertEqual( 217 | set(self.destfs.listdir("/")), 218 | { 219 | '1465633492745668095.mp4', 220 | '1465633517836005761.mp4', 221 | '1465633541559037966.mp4', 222 | '1465633561523918792.mp4', 223 | } 224 | ) 225 | 226 | # OUTDATED: warn_windows is not used anymore 227 | # 228 | # def test_issue_044(self): 229 | # """ 230 | # Thanks to @Bangaio64 for reporting this bug. 231 | # 232 | # Checks that warn_windows does not trigger an exception. 233 | # """ 234 | # import instalooter.utils 235 | # warnings.showwarning = instalooter.utils.warn_windows 236 | # looter = instalooter.InstaLooter( 237 | # directory=self.tmpdir, 238 | # profile="akjhdskjhfkjsdhfkjhdskjhfkjdshkfjhsdkjfdhkjdfshdfskhfd" 239 | # ) 240 | # try: 241 | # looter.download() 242 | # except Exception: 243 | # self.fail() 244 | 245 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 246 | def test_issue_041(self): 247 | """Feature request by @liorlior 248 | 249 | Allow downloading only videos. 250 | """ 251 | looter = ProfileLooter("nintendo", videos_only=True, session=self.session) 252 | day = datetime.date(2017, 3, 10) 253 | with contexter.Contexter() as ctx: 254 | ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) 255 | looter.download(self.destfs, timeframe=[day, day]) 256 | self.assertEqual(self.destfs.listdir("/"), ["1467639884243493431.mp4"]) 257 | 258 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 259 | def test_issue_052(self): 260 | """Thanks to @cyrusclarke for reporting this bug. 261 | 262 | Checks that on hashtags with a lot of posts, the time parameter 263 | doesn't cause the program to crash without finding any media to 264 | download. 265 | """ 266 | main(["hashtag", "happy", self.tmpdir, "-q", "-t", "thisweek", "-n", "5"]) 267 | self.assertGreaterEqual(len(self.destfs.listdir('/')), 5) 268 | 269 | # OUTDATED: Sidecar info dicts are not converted anymore but passed 270 | # to the workers directly. 271 | # 272 | # def test_issue_057(self): 273 | # """ 274 | # Thanks to @VasiliPupkin256 for reporting this bug. 275 | # 276 | # Checks that metadata can successfully extract caption 277 | # out of multiposts containing images. 278 | # """ 279 | # looter = ProfileLooter("awwwwshoot_ob", session=self.session) 280 | # sidecar = next(m for m in looter.medias() if m['__typename'] == "GraphSidecar") 281 | # 282 | # looter = PostLooter(sidecar['shortcode'], session=self.session) 283 | # looter.download(self.destfs) 284 | # 285 | # for key in ('caption', 'code', 'date'): 286 | # self.assertIn(key, media) 287 | # self.assertIsNotNone(media[key]) 288 | 289 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 290 | def test_issue_066(self): 291 | """Thanks to @douglasrizzo for reporting this bug. 292 | 293 | Check that likescount and commentscount can be used 294 | in filename templates without causing the program to 295 | crash. 296 | """ 297 | looter = ProfileLooter( 298 | "nintendo", get_videos=True, add_metadata=True, 299 | template='{id}-{likescount}-{commentscount}', 300 | session=self.session) 301 | with contexter.Contexter() as ctx: 302 | ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) 303 | looter.download(self.destfs, media_count=10) 304 | for image in self.destfs.listdir("/"): 305 | self.assertRegex(image, '[a-zA-Z0-9]*-[0-9]*-[0-9]*.(jpg|mp4)') 306 | 307 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 308 | def test_issue_076(self): 309 | """Thanks to @zeshuaro for reporting this bug. 310 | 311 | Check that when downloading hashtags, the downloader 312 | actually stops. 313 | """ 314 | looter = HashtagLooter("oulianov", session=self.session) 315 | 316 | medias_it = looter.medias() 317 | postcount = length_hint(medias_it) 318 | 319 | for i, m in enumerate(medias_it): 320 | if i > postcount: 321 | self.fail("looter.medias() did not stop.") 322 | 323 | # OUTDATED: URLs are not modified anymore as Instagram prevents 324 | # any modification 325 | # 326 | # def test_issue_082(self): 327 | # """ 328 | # Thanks to @MohamedIM for reporting this bug. 329 | # 330 | # Check that urls containing 'h-ak-igx' are not stripped from all 331 | # their parameters. 332 | # """ 333 | # looter = instalooter.looter.PostLooter('BWOYSYQDCo5', template='{code}') 334 | # info = next(looter.medias()) 335 | # 336 | # info['display_url'] = \ 337 | # 'https://ig-s-c-a.akamaihd.net/h-ak-igx/19764472_1586345694718446_4011887281420894208_n.jpg' 338 | # looter.get_post_info = lambda code: info 339 | # 340 | # looter.download_post('BWOYSYQDCo5') 341 | # 342 | # with open(os.path.join(self.tmpdir, 'BWOYSYQDCo5.jpg'), 'rb') as f: 343 | # self.assertNotIn(b'5xx Server Error', f.read()) 344 | 345 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 346 | def test_issue_084(self): 347 | """Thanks to @raphaelbernardino for reporting this bug. 348 | 349 | Make sure private profiles with few pictures (less than a page worth) 350 | raise the private error as expected. 351 | """ 352 | looter = ProfileLooter("rararudo", session=self.session) 353 | self.assertRaises(RuntimeError, looter.medias) 354 | 355 | @unittest.expectedFailure 356 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 357 | @unittest.skipUnless(piexif, "piexif required for this test") 358 | def test_issue_094(self): 359 | """Thanks to @jeanmarctst for raising this issue. 360 | 361 | Make sure caption is properly extracted from images downloaded 362 | from a post code and written to the metadata. 363 | """ 364 | looter = PostLooter("BY77tSfBnRm", 365 | add_metadata=True, template='{code}', session=self.session) 366 | looter.download(self.destfs) 367 | metadata = piexif.load(self.destfs.getbytes("BY77tSfBnRm.jpg"), True) 368 | self.assertTrue(metadata['Exif']['UserComment']) 369 | 370 | def test_issue_125(self): 371 | """Thanks to @applepanda for reporting this bug. 372 | 373 | Make sure colons in path do not cause issue in batch mode. 374 | """ 375 | configfile = six.StringIO(textwrap.dedent( 376 | """ 377 | [Family] 378 | users = 379 | instagram: D:\\Instagram\\Profiles\\instagram 380 | therock: D:\\Instagram\\Profiles\\therock 381 | """ 382 | )) 383 | runner = BatchRunner(configfile) 384 | self.assertEqual( 385 | runner.get_targets(runner._get('Family', 'users')), 386 | {'instagram': 'D:\\Instagram\\Profiles\\instagram', 387 | 'therock': 'D:\\Instagram\\Profiles\\therock'} 388 | ) 389 | 390 | @mock.patch('instalooter.looters.InstaLooter.__init__') 391 | def test_issue_184(self, _): 392 | """Feature request by @ghost. 393 | 394 | Allow downloading a post directly from its URL. 395 | """ 396 | looter = PostLooter("https://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k") 397 | self.assertEqual(looter.code, "BJlIB9WhdRn") 398 | 399 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 400 | def test_issue_185(self): 401 | """Feature request by @JPNYC81. 402 | 403 | Make sure an ``instalooter`` batch keeps even if it encounters errors 404 | on a specific job. This test tries with an non-existing profile. 405 | """ 406 | configfile = six.StringIO(textwrap.dedent( 407 | """ 408 | [Family] 409 | num-to-dl = 3 410 | users = 411 | jdskjhjkfhkdshfkjdhsfjsfdkjhfksdjhf: {tmp} 412 | instagram: {tmp} 413 | therock: {tmp} 414 | """ 415 | ).format(tmp=self.tmpdir)) 416 | runner = BatchRunner(configfile) 417 | with mock.patch('instalooter.batch.logger'): 418 | runner.run_all() 419 | self.assertGreaterEqual(len(self.destfs.listdir('/')), 6) 420 | 421 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 422 | def test_issue_194(self): 423 | """Feature request by @raphaelbernardino 424 | 425 | When trying to download from an non-existing user, try to display a 426 | meaningful message instead of a cryptic error. 427 | """ 428 | username = "jdhfdjkhdlqdhfdhqfqjqlhfhdsdjquryerhdjfhqlkdfhkqhfqkure" 429 | looter = ProfileLooter(username) 430 | with self.assertRaises(ValueError) as ctx: 431 | media = next(looter.medias()) 432 | self.assertEqual(str(ctx.exception), "user not found: '{}'".format(username)) 433 | 434 | 435 | # @mock.patch('instalooter.looter.requests.Session', lambda: TestPullRequests.session) 436 | class TestPullRequests(unittest.TestCase): 437 | 438 | @classmethod 439 | def setUpClass(cls): 440 | cls.session = requests.Session() 441 | 442 | @classmethod 443 | def tearDownClass(cls): 444 | cls.session.close() 445 | 446 | def setUp(self): 447 | self.destfs = fs.open_fs("temp://") 448 | self.tmpdir = self.destfs.getsyspath("/") 449 | 450 | def tearDown(self): 451 | self.destfs.close() 452 | if os.getenv("CI") == "true": 453 | time.sleep(1) 454 | 455 | def _pr_122_looter(self): 456 | return ProfileLooter('nintendo', template='{code}', session=self.session) 457 | 458 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 459 | def test_pr_122_download_post(self): 460 | """Feature implemented by @susundberg. 461 | 462 | Set the access time and modification time of a downloaded media 463 | according to its IG date. 464 | """ 465 | code = 'BY77tSfBnRm' 466 | post_looter = PostLooter(code, session=self.session, template='{code}') 467 | info = post_looter.get_post_info(code) 468 | post_looter.download(self.destfs) 469 | stat = self.destfs.getdetails('{}.jpg'.format(code)) 470 | self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp']) 471 | self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp']) 472 | 473 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 474 | def test_pr_122_download_pictures(self): 475 | """Feature implemented by @susundberg. 476 | 477 | Set the access time and modification time of a downloaded media 478 | according to its IG date. 479 | """ 480 | # Test download_pictures 481 | looter = self._pr_122_looter() 482 | with contexter.Contexter() as ctx: 483 | ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) 484 | pic = next(m for m in looter.medias() if not m['is_video']) 485 | looter.download_pictures(self.destfs, media_count=1) 486 | stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode'])) 487 | self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp']) 488 | self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp']) 489 | 490 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 491 | def test_pr_122_download_videos(self): 492 | """Feature implemented by @susundberg. 493 | 494 | Set the access time and modification time of a downloaded media 495 | according to its IG date. 496 | """ 497 | # Test download_videos 498 | looter = self._pr_122_looter() 499 | with contexter.Contexter() as ctx: 500 | ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) 501 | vid = next(m for m in looter.medias() if m['is_video']) 502 | looter.download_videos(self.destfs, media_count=1) 503 | stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode'])) 504 | self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp']) 505 | self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp']) 506 | 507 | 508 | def setUpModule(): 509 | warnings.simplefilter('ignore') 510 | 511 | 512 | def tearDownModule(): 513 | warnings.simplefilter(warnings.defaultaction) 514 | -------------------------------------------------------------------------------- /tests/test_login.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import os 6 | import unittest 7 | 8 | import requests 9 | import fs.memoryfs 10 | 11 | from instalooter.looters import InstaLooter, ProfileLooter 12 | 13 | 14 | USERNAME = os.getenv("IG_USERNAME") 15 | PASSWORD = os.getenv("IG_PASSWORD") 16 | 17 | try: 18 | CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok 19 | except requests.exceptions.ConnectionError: 20 | CONNECTION_FAILURE = True 21 | 22 | 23 | @unittest.skipIf(os.getenv("CI") == "true", "not supported in CI") 24 | @unittest.skipUnless(USERNAME and PASSWORD, "credentials required") 25 | class TestLogin(unittest.TestCase): 26 | 27 | @classmethod 28 | def setUpClass(cls): 29 | cls.session = requests.Session() 30 | 31 | @classmethod 32 | def tearDownClass(cls): 33 | cls.session.close() 34 | 35 | def setUp(self): 36 | self.looter = ProfileLooter(USERNAME, template="test") 37 | self.destfs = fs.memoryfs.MemoryFS() 38 | 39 | def tearDown(self): 40 | self.destfs.close() 41 | 42 | def test_login(self): 43 | 44 | self.assertFalse(self.looter.logged_in()) 45 | self.assertRaises(RuntimeError, self.looter.medias) 46 | self.assertFalse(self.looter._cachefs().exists(self.looter._COOKIE_FILE)) 47 | 48 | try: 49 | self.looter.login(USERNAME, PASSWORD) 50 | self.assertTrue(self.looter.logged_in()) 51 | self.assertTrue(self.looter._cachefs().exists(self.looter._COOKIE_FILE)) 52 | self.assertTrue(next(self.looter.medias())) 53 | finally: 54 | self.looter.logout() 55 | self.assertFalse(self.looter._cachefs().exists(self.looter._COOKIE_FILE)) 56 | 57 | def test_download(self): 58 | try: 59 | self.looter.login(USERNAME, PASSWORD) 60 | self.looter.download(self.destfs) 61 | self.assertTrue(self.destfs.exists('test.jpg')) 62 | self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF') 63 | finally: 64 | self.looter.logout() 65 | -------------------------------------------------------------------------------- /tests/test_looter.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import datetime 6 | import os 7 | import time 8 | import unittest 9 | import warnings 10 | 11 | import fs.memoryfs 12 | import parameterized 13 | import requests 14 | import six 15 | 16 | from instalooter.looters import InstaLooter, ProfileLooter, HashtagLooter, PostLooter 17 | 18 | from .utils import mock 19 | from .utils.method_names import signature 20 | 21 | 22 | try: 23 | CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok 24 | except requests.exceptions.ConnectionError: 25 | CONNECTION_FAILURE = True 26 | 27 | 28 | class TestInstaLooter(unittest.TestCase): 29 | 30 | MEDIA_COUNT = 5 31 | 32 | @classmethod 33 | def setUpClass(cls): 34 | cls.session = requests.Session() 35 | 36 | @classmethod 37 | def tearDownClass(cls): 38 | cls.session.close() 39 | 40 | def setUp(self): 41 | self.destfs = fs.memoryfs.MemoryFS() 42 | 43 | def tearDown(self): 44 | self.destfs.close() 45 | if os.getenv("CI") == "true": 46 | time.sleep(1) 47 | 48 | @parameterized.parameterized.expand([ 49 | parameterized.param("instagram",), 50 | parameterized.param("instagram", get_videos=True), 51 | # parameterized.param("serotonine",), 52 | ], testcase_func_name=signature) 53 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 54 | def test_profile(self, profile, **kwargs): 55 | looter = ProfileLooter(profile, session=self.session, **kwargs) 56 | looter.download(self.destfs, media_count=self.MEDIA_COUNT) 57 | self.assertGreaterEqual(len(self.destfs.listdir("/")), self.MEDIA_COUNT) 58 | 59 | @parameterized.parameterized.expand([ 60 | parameterized.param("eggs"), 61 | parameterized.param("python", videos_only=True), 62 | ], testcase_func_name=signature) 63 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 64 | def test_hashtag(self, hashtag, **kwargs): 65 | looter = HashtagLooter(hashtag, session=self.session, **kwargs) 66 | looter.download(self.destfs, media_count=self.MEDIA_COUNT) 67 | self.assertGreaterEqual(len(self.destfs.listdir("/")), self.MEDIA_COUNT) 68 | 69 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 70 | def test_timeframe_datetime(self): 71 | looter = HashtagLooter("protein") 72 | now = datetime.datetime.now() 73 | timeframe = now - datetime.timedelta(5), now - datetime.timedelta(7) 74 | media = next(looter.medias(timeframe=timeframe)) 75 | 76 | taken_at = datetime.datetime.fromtimestamp(media["taken_at_timestamp"]) 77 | self.assertLessEqual(taken_at, max(timeframe)) 78 | self.assertGreaterEqual(taken_at, min(timeframe)) 79 | 80 | @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram") 81 | def test_timeframe_date(self): 82 | looter = HashtagLooter("protein") 83 | today = datetime.date.today() 84 | timeframe = today - datetime.timedelta(5), today - datetime.timedelta(7) 85 | media = next(looter.medias(timeframe=timeframe)) 86 | 87 | taken_at = datetime.datetime.fromtimestamp(media["taken_at_timestamp"]) 88 | self.assertLessEqual(taken_at.date(), max(timeframe)) 89 | self.assertGreaterEqual(taken_at.date(), min(timeframe)) 90 | 91 | 92 | class TestPostLooter(unittest.TestCase): 93 | 94 | def tearDown(self): 95 | if os.getenv("CI") == "true": 96 | time.sleep(1) 97 | 98 | @mock.patch('instalooter.looters.InstaLooter.__init__') 99 | def test_post_url(self, _): 100 | urls = ( 101 | "http://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k", 102 | "https://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k", 103 | "www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k", 104 | "http://instagr.am/p/BJlIB9WhdRn/?taken-by=2k", 105 | "https://instagr.am/p/BJlIB9WhdRn/?taken-by=2k", 106 | "instagr.am/p/BJlIB9WhdRn/?taken-by=2k", 107 | ) 108 | for url in urls: 109 | looter = PostLooter(url) 110 | self.assertEqual(looter.code, "BJlIB9WhdRn") 111 | 112 | @mock.patch('instalooter.looters.InstaLooter.__init__') 113 | def test_invalid_post_code(self, _): 114 | with self.assertRaises(ValueError): 115 | looter = PostLooter("instagram") # invalid code 116 | 117 | 118 | # class TestTemplate(_TempTestCase): 119 | # 120 | # MEDIA_COUNT = 30 121 | # 122 | # def test_template_1(self): 123 | # profile = "therock" 124 | # looter = instaLooter.InstaLooter( 125 | # self.tmpdir, profile=profile, get_videos=True, 126 | # template='{username}-{id}' 127 | # ) 128 | # looter.download(media_count=self.MEDIA_COUNT, with_pbar=False) 129 | # for f in os.listdir(self.tmpdir): 130 | # self.assertTrue(f.startswith(profile)) 131 | # 132 | # 133 | # class TestDump(_TempTestCase): 134 | # 135 | # def assertMediaEqual(self, media, dump): 136 | # for key in ['__typename', 'date', 'dimensions', 'display_src', 137 | # 'is_video', 'media_preview']: 138 | # self.assertEqual(media[key], dump[key]) 139 | # 140 | # self.assertEqual( 141 | # media.get('code') or media['shortcode'], 142 | # dump.get('code' or dump['shortcode']) 143 | # ) 144 | # self.assertEqual( 145 | # media['owner']['id'], 146 | # dump['owner']['id'] 147 | # ) 148 | # self.assertIn('likes', dump) 149 | # self.assertIn('comments', dump) 150 | # 151 | # def test_dump_json(self): 152 | # looter = instaLooter.InstaLooter( 153 | # self.tmpdir, 154 | # profile="instagram", 155 | # dump_json=True, 156 | # ) 157 | # test_medias = list(itertools.islice( 158 | # (m for m in looter.medias() if not m['is_video']), 3)) 159 | # looter.download(media_count=3) 160 | # 161 | # # Check all files were downloaded as expected 162 | # self.assertEqual( 163 | # sorted(os.listdir(self.tmpdir)), 164 | # sorted(f for media in test_medias for f in ( 165 | # str("{}.jpg").format(media['id']), 166 | # str("{}.json").format(media['id']), 167 | # )) 168 | # ) 169 | # 170 | # # Check the metadata are OK 171 | # for media in test_medias: 172 | # with open(os.path.join(self.tmpdir, "{}.json").format(media['id'])) as f: 173 | # dump = json.load(f) 174 | # self.assertMediaEqual(media, dump) 175 | # 176 | # def test_dump_only(self): 177 | # looter = instaLooter.InstaLooter( 178 | # self.tmpdir, 179 | # profile="instagram", 180 | # dump_only=True, 181 | # ) 182 | # test_medias = list(itertools.islice( 183 | # (m for m in looter.medias() if not m['is_video']), 3)) 184 | # looter.download(media_count=3) 185 | # 186 | # # Check all files were downloaded as expected 187 | # self.assertEqual( 188 | # sorted(os.listdir(self.tmpdir)), 189 | # sorted(str("{}.json").format(media['id']) for media in test_medias) 190 | # ) 191 | # 192 | # # Check the metadata are OK 193 | # for media in test_medias: 194 | # with open(os.path.join(self.tmpdir, "{}.json").format(media['id'])) as f: 195 | # dump = json.load(f) 196 | # self.assertMediaEqual(media, dump) 197 | # 198 | # def test_extended_dump(self): 199 | # looter = instaLooter.InstaLooter( 200 | # self.tmpdir, 201 | # profile="instagram", 202 | # dump_only=True, 203 | # extended_dump=True, 204 | # ) 205 | # test_medias = list(itertools.islice( 206 | # (m for m in looter.medias() if not m['is_video']), 3)) 207 | # looter.download(media_count=3) 208 | # 209 | # # Check all files were downloaded as expected 210 | # self.assertEqual( 211 | # sorted(os.listdir(self.tmpdir)), 212 | # sorted(str("{}.json").format(media['id']) for media in test_medias) 213 | # ) 214 | # 215 | # # Check the metadata are OK 216 | # for media in test_medias: 217 | # with open(os.path.join(self.tmpdir, "{}.json").format(media['id'])) as f: 218 | # dump = json.load(f) 219 | # self.assertMediaEqual(media, dump) 220 | # 221 | # # Check the dump was "extended" 222 | # self.assertIn('edge_media_to_comment', dump) 223 | # self.assertIn('edge_media_to_caption', dump) 224 | # 225 | # 226 | # class TestUtils(_TempTestCase): 227 | # 228 | # MEDIA_COUNT = 30 229 | # 230 | # def setUp(self): 231 | # super(TestUtils, self).setUp() 232 | # self.looter = instaLooter.InstaLooter() 233 | # 234 | # def test_extract_post_code_from_url(self): 235 | # url = "https://www.instagram.com/p/BFB6znLg5s1/" 236 | # 237 | # self.assertEqual( 238 | # self.looter._extract_code_from_url(url), 239 | # 'BFB6znLg5s1', 240 | # ) 241 | # 242 | # with self.assertRaises(ValueError): 243 | # self.looter._extract_code_from_url( 244 | # 'https://www.instagram.com/' 245 | # ) 246 | # 247 | # def test_get_owner_info(self): 248 | # therock = self.looter.get_owner_info("BTHqEhWFR4y") 249 | # self.assertEqual(therock['username'], 'therock') 250 | # self.assertEqual(therock['id'], '232192182') 251 | # self.assertFalse(therock['is_private']) 252 | # 253 | # gearbox = self.looter.get_owner_info("BfMWE3aFsEh") 254 | # self.assertEqual(gearbox['username'], 'gearboxsoftware') 255 | # self.assertEqual(gearbox['id'], '1409542965') 256 | # self.assertFalse(gearbox['is_private']) 257 | # 258 | # def test_url_generator_nocallable(self): 259 | # with self.assertRaises(ValueError): 260 | # self.looter = instaLooter.InstaLooter( 261 | # self.tmpdir, profile="instagram", url_generator=1 262 | # ) 263 | # 264 | # @unittest.skipIf(sys.version_info < (3,4), 265 | # "operator.length_hint is a 3.4+ feature.") 266 | # def test_length_hint_empty(self): 267 | # 268 | # looter = instaLooter.InstaLooter(profile="jkshksjdhfjkhdkfhk") 269 | # self.assertEqual(operator.length_hint(looter), 0) 270 | # 271 | # looter = instaLooter.InstaLooter(hashtag="jkshksjdhfjkhdkfhk") 272 | # self.assertEqual(operator.length_hint(looter), 0) 273 | # 274 | # @unittest.skipIf(sys.version_info < (3,4), 275 | # "operator.length_hint is a 3.4+ feature.") 276 | # def test_length_hint(self): 277 | # 278 | # looter = instaLooter.InstaLooter(self.tmpdir, profile="tide") 279 | # hint = operator.length_hint(looter) 280 | # 281 | # # Check the post count is greater than 0 282 | # self.assertGreater(hint, 0) 283 | # 284 | # # Download pictures and check if the count 285 | # # match (at most as many posts downloaded) 286 | # looter.download() 287 | # self.assertLessEqual(len(os.listdir(self.tmpdir)), hint) 288 | 289 | 290 | # def load_tests(loader, tests, pattern): 291 | # suite = unittest.TestSuite() 292 | # TestProfileLooter.register_tests() 293 | # suite.addTests(loader.loadTestsFromTestCase(TestProfileLooter)) 294 | # # suite.addTests(loader.loadTestsFromTestCase(TestHashtagDownload)) 295 | # # suite.addTests(loader.loadTestsFromTestCase(TestTemplate)) 296 | # return suite 297 | 298 | 299 | def setUpModule(): 300 | warnings.simplefilter('ignore') 301 | 302 | 303 | def tearDownModule(): 304 | warnings.simplefilter(warnings.defaultaction) 305 | -------------------------------------------------------------------------------- /tests/test_pbar.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import threading 6 | import unittest 7 | import warnings 8 | 9 | import six 10 | 11 | from instalooter.pbar import ProgressBar, TqdmProgressBar 12 | 13 | 14 | class TestProgressBar(unittest.TestCase): 15 | 16 | def test_derived_progress_bar(self): 17 | 18 | class MyProgressBar(ProgressBar): 19 | _test = {"update": 0, "max": None} 20 | def update(self): 21 | self._test['update'] += 1 22 | def set_maximum(self, maximum): 23 | self._test['max'] = maximum 24 | 25 | pb = MyProgressBar(iter(range(10))) 26 | self.assertEqual(pb._test['update'], 0) 27 | self.assertIs(pb._test['max'], None) 28 | 29 | self.assertEqual(next(pb), 0) 30 | self.assertEqual(pb._test['update'], 1) 31 | 32 | pb.set_maximum(10) 33 | self.assertEqual(pb._test['max'], 10) 34 | 35 | self.assertEqual(list(pb), list(range(1, 10))) 36 | self.assertRaises(StopIteration, next, pb) 37 | self.assertEqual(pb._test['update'], 10) 38 | pb.finish() 39 | 40 | self.assertRaises(RuntimeError, pb.get_lock) 41 | lock = threading.RLock() 42 | pb.set_lock(lock) 43 | self.assertIs(pb.get_lock(), lock) 44 | 45 | def test_tqdm_progress_bar(self): 46 | 47 | fh = six.moves.StringIO() 48 | pb = TqdmProgressBar(iter(range(10)), file=fh) 49 | 50 | self.assertEqual(pb.n, 0) 51 | self.assertIs(pb.total, None) 52 | 53 | self.assertEqual(next(pb), 0) 54 | self.assertEqual(pb.n, 1) 55 | self.assertIs(pb.total, None) 56 | 57 | pb.set_maximum(10) 58 | self.assertEqual(pb.total, 10) 59 | 60 | self.assertEqual(list(pb), list(range(1, 10))) 61 | self.assertRaises(StopIteration, next, pb) 62 | self.assertEqual(pb.n, 10) 63 | pb.finish() 64 | 65 | lock = threading.RLock() 66 | pb.set_lock(lock) 67 | self.assertIs(pb.get_lock(), lock) 68 | 69 | 70 | def setUpModule(): 71 | warnings.simplefilter('ignore') 72 | 73 | 74 | def tearDownModule(): 75 | warnings.simplefilter(warnings.defaultaction) 76 | -------------------------------------------------------------------------------- /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | 4 | try: 5 | from unittest import mock 6 | except ImportError: 7 | import mock 8 | 9 | from . import ig_mock 10 | from . import method_names 11 | -------------------------------------------------------------------------------- /tests/utils/ig_mock.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import os 6 | import json 7 | 8 | import fs.path 9 | import instalooter.looters 10 | 11 | 12 | _test_dir = os.path.abspath(os.path.join(__file__, "..")) 13 | _url = "tar://{}/ig_mock.tar.gz".format(_test_dir) 14 | 15 | 16 | def get_mock_fs(): 17 | return fs.open_fs(_url) 18 | 19 | 20 | class MockPages(object): 21 | 22 | def __init__(self, profile): 23 | self.profile = profile 24 | 25 | def __call__(self): 26 | with get_mock_fs() as mockfs: 27 | with mockfs.open("pages/{}".format(self.profile)) as f: 28 | return iter(json.load(f)) 29 | 30 | 31 | if __name__ == "__main__": 32 | 33 | with fs.open_fs(_test_dir) as test_fs: 34 | if test_fs.exists(fs.path.basename(_url)): 35 | test_fs.remove(fs.path.basename(_url)) 36 | 37 | with fs.open_fs(_url, create=True) as mockfs: 38 | mockfs.makedir("pages", recreate=True) 39 | nintendo = instalooter.looters.ProfileLooter("nintendo") 40 | with mockfs.open("pages/nintendo", "w") as f: 41 | json.dump(list(nintendo.pages()), f) 42 | 43 | fluoxetine = instalooter.looters.HashtagLooter("fluoxetine") 44 | with mockfs.open("pages/fluoxetine", "w") as f: 45 | pages_it = fluoxetine.pages_it 46 | json.dump([next(pages_it) for _ in range(3)], f) 47 | -------------------------------------------------------------------------------- /tests/utils/ig_mock.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/althonos/InstaLooter/468f76caced67560214d5e2e6e745d7ffb2c0674/tests/utils/ig_mock.tar.gz -------------------------------------------------------------------------------- /tests/utils/method_names.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from __future__ import absolute_import 3 | 4 | def signature(func, param_num, params): 5 | args = ','.join("{!r}".format(a) for a in params.args) 6 | kwargs = ','.join("{}={!r}".format(k, v) for k,v in params.kwargs.items()) 7 | if args and kwargs: 8 | return "{}({},{})".format(func.__name__, args, kwargs) 9 | else: 10 | return "{}({})".format(func.__name__, args or kwargs) 11 | 12 | def firstparam(func, param_num, params): 13 | return "{}({!r})".format(func.__name__, params.args[0]) 14 | 15 | def num(func, param_num, params): 16 | return "{}_{}".format(func.__name__, param_num) 17 | --------------------------------------------------------------------------------