├── .github
    └── dependabot.yml
├── .gitignore
├── .travis.yml
├── CHANGELOG.rst
├── COPYING
├── MANIFEST.in
├── README.rst
├── ci
    └── travis
    │   └── before_script.sh
├── docs
    ├── ISSUE_TEMPLATE.md
    ├── Makefile
    ├── requirements.txt
    └── source
    │   ├── batch.rst
    │   ├── conf.py
    │   ├── cron.rst
    │   ├── examples.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── instalooter
    │       ├── batch.rst
    │       ├── cli.rst
    │       ├── index.rst
    │       ├── looters.rst
    │       ├── medias.rst
    │       ├── pages.rst
    │       ├── pbar.rst
    │       └── worker.rst
    │   └── usage.rst
├── instalooter
    ├── __init__.py
    ├── __main__.py
    ├── _impl.py
    ├── _uadetect.py
    ├── _utils.py
    ├── batch.py
    ├── cli
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── login.py
    │   ├── logutils.py
    │   ├── threadutils.py
    │   └── time.py
    ├── looters.py
    ├── medias.py
    ├── pages.py
    ├── pbar.py
    ├── static
    │   └── splash.html
    └── worker.py
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── test_batch.py
    ├── test_cli.py
    ├── test_issues.py
    ├── test_login.py
    ├── test_looter.py
    ├── test_pbar.py
    └── utils
        ├── __init__.py
        ├── ig_mock.py
        ├── ig_mock.tar.gz
        └── method_names.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 | - package-ecosystem: pip
 4 |   directory: "/"
 5 |   schedule:
 6 |     interval: daily
 7 |     time: "04:00"
 8 |   open-pull-requests-limit: 10
 9 |   ignore:
10 |   - dependency-name: sphinx
11 |     versions:
12 |     - 3.4.0
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | docs/source/changelog.rst  
66 | 
67 | # PyBuilder
68 | target/
69 | 
70 | # IPython Notebook
71 | .ipynb_checkpoints
72 | 
73 | # pyenv
74 | .python-version
75 | 
76 | # celery beat schedule file
77 | celerybeat-schedule
78 | 
79 | # dotenv
80 | .env
81 | 
82 | # virtualenv
83 | venv/
84 | ENV/
85 | 
86 | # Spyder project settings
87 | .spyderproject
88 | 
89 | # Rope project settings
90 | .ropeproject
91 | 
92 | # Codacy token
93 | .codacy.token
94 | 
95 | # MyPy
96 | .mypy_cache
97 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | cache: pip
 4 | dist: xenial
 5 | 
 6 | python:
 7 | - 2.7
 8 | - 3.7
 9 | 
10 | before_install:
11 | - pip install -U pip wheel setuptools
12 | 
13 | install:
14 | - pip install -e .[dev]
15 | 
16 | before_script:
17 | - ci/travis/before_script.sh
18 | 
19 | script:
20 | - python -m coverage run -m unittest discover -v
21 | 
22 | after_success:
23 | - python -m codecov
24 | - python -m codacy -r coverage.xml
25 | 
26 | deploy:
27 |   provider: pypi
28 |   distributions: sdist bdist_wheel
29 |   on:
30 |     python: 3.7
31 |     tags: true
32 |     repo: althonos/InstaLooter
33 |   server: https://upload.pypi.org/legacy/
34 |   skip_upload_docs: true
35 |   user: althonos
36 |   password:
37 |     secure: ExaoUT+/7v8VJp/vsllDm9AsO3/6s7Nka5rf+0TVz8Z7pZ+azFXK5hZmQ6nTtvSRYpZCfGH8ecYLbE0ek02BFqLc25/VfMfPao+6eUh1v3MGvHxH9ml+/9aCwWIv5C/T5dGURVj0udXxXuuTsdJwzxrb+K9taVZSjBbk6Ti8Fdu8yRMhOvI4xoiG69tunk6IXnOZjDCQix8O3Cn0OfA/zeD0IX1n8SVlKwsS+dOgFAJCujP865VuUh+2kM63Xx3OEb3caBGc1HXaVxNRXdhGykFeWrT4Mzrzd0T458Odc3S+DJG+2WVZbNC+chGAUBMq77z8JVirdWzydhdnhzi0DNZohRO0itstz53DuyqAtyTZ04xLO3+06svU5grdwilZGwy2KNZ1S1wTRlUgpxBhRL6dwHEAfaq4JlKlijEJJnFBMq2O2TKB/h+CdIALfWKSRNfOUm2GdG4ZwZOOLczbe4ATYoxGsqGP/lqnH2/iCrJFtoYmdFR8QZxJtNNwcNvVLT3MNx7eze7OpGgHWQjJq/m5hHoKJx6yW6U5XiRSt5gEowiq7vNrdwHSDRLe5CZJVndkiwelQQN5womKtWHFb1w9DRgBFj0ZscgJuxrsRl9uLfqcLB77tyS9q4BpQKsMIDw0P2bVI/P50KUsG0OUjHkstxS4nl9DLsxVPb+NmiA=
38 | 
39 | notifications:
40 |   email:
41 |   - althonosdev@gmail.com
42 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
  1 | Changelog
  2 | =========
  3 | 
  4 | All notable changes to this project will be documented in this file.
  5 | 
  6 | The format is based on `Keep a Changelog <http://keepachangelog.com>`_ and this
  7 | project adheres to `Semantic Versioning <http://semver.org/spec/v2.0.0.html>`_.
  8 | 
  9 | Unreleased_
 10 | -----------
 11 | 
 12 | v2.4.4_ - 2020-07-15
 13 | --------------------
 14 | 
 15 | Changed
 16 | '''''''
 17 | - Bumped ``verboselogs`` to ``v14`` in requirements.
 18 | 
 19 | 
 20 | v2.4.3_ - 2020-06-25
 21 | --------------------
 22 | 
 23 | Changed
 24 | '''''''
 25 | - Bumped ``tenacity`` to ``v6`` in requirements.
 26 | 
 27 | Fixed
 28 | '''''
 29 | - Change in Instagram login policy causing plaintext password to stop 
 30 |   working.
 31 | 
 32 | 
 33 | v2.4.2_ - 2019-12-27
 34 | --------------------
 35 | 
 36 | Changed
 37 | '''''''
 38 | - CLI `--time` option will now always use higher and lower time given as the 
 39 |   timeframe, independently of the order they are given.
 40 | 
 41 | Fixed
 42 | '''''
 43 | - JSON files also get a proper timestamp set (pr #275).
 44 | 
 45 | 
 46 | v2.4.1_ - 2019-12-10
 47 | --------------------
 48 | 
 49 | Fixed
 50 | '''''
 51 | - Issue with additional data not being loaded from certain pages (#271) (pr #273)
 52 | 
 53 | 
 54 | v2.4.0_ - 2019-06-29
 55 | --------------------
 56 | 
 57 | Fixed
 58 | '''''
 59 | - Attempt fix for `rhx_gis` issue (#247) (pr #248)
 60 | - Fix crashes when downloading hashtag medias
 61 | 
 62 | Changed
 63 | '''''''
 64 | - Removed ``fake-useragent`` dependency.
 65 | - Use a custom HTTP server to detect the user agent of the default web browser.
 66 | 
 67 | v2.3.4_ - 2019-02-22
 68 | --------------------
 69 | 
 70 | Fixed
 71 | '''''
 72 | - Bumped supported ``fs`` version to ``~=2.1``.
 73 | 
 74 | v2.3.3_ - 2019-02-11
 75 | --------------------
 76 | 
 77 | Fixed
 78 | '''''
 79 | - Bumped supported ``fs`` version to ``2.3.0``.
 80 | 
 81 | v2.3.2_ - 2019-01-06
 82 | ---------------------
 83 | 
 84 | Added
 85 | '''''
 86 | - Add zero padding for date and time in filenames (pr #224)
 87 | 
 88 | Changed
 89 | '''''''
 90 | - Add `tests` to source distribution (pr #228).
 91 | - Bumped supported ``fs`` version to ``2.2.0``.
 92 | 
 93 | v2.3.1_ - 2018-10-13
 94 | --------------------
 95 | 
 96 | Fixed
 97 | '''''
 98 | - Allow extracting post codes of length 10 from URLs.
 99 | 
100 | 
101 | v2.3.0_ - 2018-09-05
102 | --------------------
103 | 
104 | Changed
105 | '''''''
106 | - Bumped required ``tenacity`` version to ``5.0``.
107 | 
108 | v2.2.0_ - 2018-08-19
109 | --------------------
110 | 
111 | Changed
112 | '''''''
113 | - Bumped required ``fs`` version to ``2.1.0``.
114 | 
115 | 
116 | v2.1.0_ - 2018-07-31
117 | --------------------
118 | 
119 | Added
120 | '''''
121 | - Posts can now be downloaded by giving directly the post URL (implement #184).
122 | 
123 | Fixed
124 | '''''
125 | - Batch will now log the name of the current account as well as occuring
126 |   errors (fix #185)
127 | - CLI login will now properly display logger messages.
128 | - Library loggers do not have a `logging.StreamHandler` set by default
129 |   anymore.
130 | - Attempt fixing login procedure in ``InstaLooter._login``.
131 | 
132 | Changed
133 | '''''''
134 | - Trying to download media from an non-existing user will display a nicer
135 |   message: ``user not found: '...'`` (fix #194).
136 | - Batch mode will now continue to the next job if any error occurs, showing
137 |   an error message instead of crashing (fix #185).
138 | 
139 | 
140 | v2.0.3_ - 2018-05-29
141 | --------------------
142 | 
143 | Fixed
144 | '''''
145 | - Use the webpage shared data to find the CSRF token instead of response
146 |   cookies.
147 | 
148 | v2.0.2_ - 2018-05-17
149 | --------------------
150 | 
151 | Changed
152 | '''''''
153 | - Bump ``coloredlogs`` required version to `10.0`.
154 | - Use ``verboselogs`` as the backend logging library.
155 | 
156 | 
157 | v2.0.1_ - 2018-04-18
158 | --------------------
159 | 
160 | Changed
161 | '''''''
162 | - Updated the query hash in ``ProfileIterator`` (although previous seemed
163 |   to keep working).
164 | 
165 | Fixed
166 | '''''
167 | - *RHX-GIS* computation not using the CSRF token anymore.
168 | - Lowered ``PageIterator.PAGE_SIZE`` to 50 to comply with Instagram.
169 | 
170 | 
171 | v2.0.0_ - 2018-04-16
172 | --------------------
173 | 
174 | Changed
175 | '''''''
176 | - Passing a pre-initialised ``Session`` to ``PageIterator`` constructor
177 |   is now mandatory.
178 | - ``HashtagIterator`` must be provided a ``rhx`` (it is infered for ``ProfileIterator``).
179 | 
180 | Fixed
181 | '''''
182 | - API changes made by Instagram ca. April 2018 (excluding logging in / out).
183 | - Calling `operator.length_hint` on ``PageIterator`` objects will no longer
184 |   cause duplicate server queries.
185 | 
186 | 
187 | v1.0.0_ - 2018-04-05
188 | --------------------
189 | 
190 | Added
191 | '''''
192 | - This CHANGELOG file.
193 | - Typing annotations using the ``typing`` module.
194 | - Limited retries on connection failure, using `tenacity <https://http://pypi.org/project/tenacity/>`_.
195 | - Real-world User Agent spoofing, using `fake-useragent <https://pypi.org/project/fake-useragent/>`_
196 | 
197 | Fixed
198 | '''''
199 | - API changes made by Instagram ca. March 2018.
200 | 
201 | Changed
202 | '''''''
203 | - Whole new API following major code refactor and rewrite.
204 | - Requests to the API directly use JSON and GraphQL queries when possible.
205 | - License is now GPLv3 *or later* instead of GPLv3.
206 | - I/O now uses PyFilesystem (FS URLs can be passed as CLI arguments).
207 | 
208 | Removed
209 | '''''''
210 | - Exif metadata handling (*will be added back in later release*).
211 | - ``urlgen`` capabilities (Instagram signs picture URL since 2018).
212 | - Python 3.5.1 support (lacks the required ``typing`` version).
213 | - ``progressbar2`` dependency, replaced by ``tqdm``
214 | - ``hues`` dependency, replaced by ``coloredlogs``
215 | - ``BeautifulSoup4`` dependency
216 | 
217 | .. _Unreleased: https://github.com/althonos/InstaLooter/compare/v2.4.4...HEAD
218 | .. _v2.4.3: https://github.com/althonos/InstaLooter/compare/v2.4.3...v2.4.4
219 | .. _v2.4.3: https://github.com/althonos/InstaLooter/compare/v2.4.2...v2.4.3
220 | .. _v2.4.2: https://github.com/althonos/InstaLooter/compare/v2.4.1...v2.4.2
221 | .. _v2.4.1: https://github.com/althonos/InstaLooter/compare/v2.4.0...v2.4.1
222 | .. _v2.4.0: https://github.com/althonos/InstaLooter/compare/v2.3.4...v2.4.0
223 | .. _v2.3.4: https://github.com/althonos/InstaLooter/compare/v2.3.3...v2.3.4
224 | .. _v2.3.3: https://github.com/althonos/InstaLooter/compare/v2.3.2...v2.3.3
225 | .. _v2.3.2: https://github.com/althonos/InstaLooter/compare/v2.3.1...v2.3.2
226 | .. _v2.3.1: https://github.com/althonos/InstaLooter/compare/v2.3.0...v2.3.1
227 | .. _v2.3.0: https://github.com/althonos/InstaLooter/compare/v2.2.0...v2.3.0
228 | .. _v2.2.0: https://github.com/althonos/InstaLooter/compare/v2.1.0...v2.2.0
229 | .. _v2.1.0: https://github.com/althonos/InstaLooter/compare/v2.0.3...v2.1.0
230 | .. _v2.0.3: https://github.com/althonos/InstaLooter/compare/v2.0.2...v2.0.3
231 | .. _v2.0.2: https://github.com/althonos/InstaLooter/compare/v2.0.1...v2.0.2
232 | .. _v2.0.1: https://github.com/althonos/InstaLooter/compare/v2.0.0...v2.0.1
233 | .. _v2.0.0: https://github.com/althonos/InstaLooter/compare/v1.0.0...v2.0.0
234 | .. _v1.0.0: https://github.com/althonos/InstaLooter/compare/v0.14.0...v1.0.0
235 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include COPYING
2 | include CHANGELOG.rst
3 | include setup.cfg
4 | 
5 | recursive-include instalooter/static *.html
6 | 
7 | graft tests
8 | global-exclude __pycache__ *.pyc
9 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | InstaLooter |starme|
  2 | ====================
  3 | 
  4 | .. |starme| image:: https://img.shields.io/github/stars/althonos/InstaLooter.svg?style=social&label=Star
  5 |    :target: https://github.com/althonos/InstaLooter
  6 | 
  7 | *Not all treasure's silver and gold, mate.*
  8 | 
  9 | |build| |repo| |versions| |format| |coverage| |doc| |grade| |license|
 10 | |keepachangelog| |saythanks|
 11 | 
 12 | 
 13 | .. |build| image:: https://img.shields.io/travis/althonos/InstaLooter/master.svg?label=travis-ci&style=flat-square
 14 |    :target: https://travis-ci.org/althonos/InstaLooter/
 15 | 
 16 | .. |repo| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
 17 |    :target: https://github.com/althonos/InstaLooter
 18 | 
 19 | .. |versions| image:: https://img.shields.io/pypi/v/instalooter.svg?style=flat-square
 20 |    :target: https://pypi.org/project/instalooter
 21 | 
 22 | .. |format| image:: https://img.shields.io/pypi/format/instalooter.svg?style=flat-square
 23 |    :target: https://pypi.org/project/instalooter
 24 | 
 25 | .. |grade| image:: https://img.shields.io/codacy/grade/9b8c7da6887c4195b9e960cb04b59a91/master.svg?style=flat-square
 26 |    :target: https://www.codacy.com/app/althonos/InstaLooter/dashboard
 27 | 
 28 | .. |coverage| image:: https://img.shields.io/codecov/c/github/althonos/InstaLooter/master.svg?style=flat-square
 29 |    :target: https://codecov.io/gh/althonos/InstaLooter
 30 | 
 31 | .. |doc| image:: https://img.shields.io/readthedocs/instalooter.svg?style=flat-square
 32 |    :target: http://instalooter.readthedocs.io/en/stable/?badge=stable
 33 | 
 34 | .. .. |requirements| image:: https://img.shields.io/requires/github/althonos/InstaLooter/master.svg?style=flat-square
 35 | ..    :target: https://requires.io/github/althonos/InstaLooter/requirements/?branch=master
 36 | 
 37 | .. .. |health| image:: https://landscape.io/github/althonos/InstaLooter/master/landscape.svg?style=flat-square
 38 | ..    :target: https://landscape.io/github/althonos/InstaLooter/master
 39 | 
 40 | .. |license| image:: https://img.shields.io/pypi/l/instalooter.svg?style=flat-square
 41 |    :target: https://choosealicense.com/licenses/gpl-3.0/
 42 | 
 43 | .. |keepachangelog| image:: https://img.shields.io/badge/keep%20a-changelog-8A0707.svg?maxAge=86400&style=flat-square
 44 |    :target: http://keepachangelog.com/
 45 | 
 46 | .. |saythanks| image:: https://img.shields.io/badge/say-thanks!-1EAEDB.svg?maxAge=86400&style=flat-square
 47 |    :target: https://saythanks.io/to/althonos
 48 | 
 49 | 
 50 | InstaLooter is a program that can download any picture or video associated
 51 | from an Instagram profile, without any API access. It can be seen as a
 52 | re-implementation of the now deprecated `InstaRaider <https://github.com/akurtovic/InstaRaider>`_
 53 | developed by `@akurtovic <https://github.com/akurtovic>`_.
 54 | 
 55 | ``v1.0.0`` *was completely rewrote from scratch, and as such, will
 56 | probably break compatibility with your homemade scripts. Meanwhile, great care
 57 | was taken to keep the CLI as consistent as possible with the previous versions,
 58 | so it'll hopefully feel like home.*
 59 | 
 60 | 
 61 | Requirements
 62 | ------------
 63 | 
 64 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 65 | | **coloredlogs**    |  Colored output            | |PyPI coloredlogs|   | |Source coloredlogs|   | |License coloredlogs|   |
 66 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 67 | | **dateutil**       |  Date manipulation         | |PyPI dateutil|      | |Source dateutil|      | |License dateutil|      |
 68 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 69 | | **docopt**         |  CLI arguments parsing     | |PyPI docopt|        | |Source docopt|        | |License docopt|        |
 70 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 71 | | **fs**             |  Filesystem handling       | |PyPI fs|            | |Source fs|            | |License fs|            |
 72 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 73 | | **requests**       |  HTTP handling             | |PyPI requests|      | |Source requests|      | |License requests|      |
 74 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 75 | | **six**            |  Python 2/3 compatibility  | |PyPI six|           | |Source six|           | |License six|           |
 76 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 77 | | **tenacity**       |  Retry until success       | |PyPI tenacity|      | |Source tenacity|      | |License tenacity|      |
 78 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 79 | | **tqdm**           |  Dynamic output in CLI     | |PyPI tqdm|          | |Source tqdm|          | |License tqdm|          |
 80 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 81 | | **verboselogs**    |  More detailed logs        | |PyPI verboselogs|   | |Source verboselogs|   | |License verboselogs|   |
 82 | +--------------------+----------------------------+----------------------+------------------------+-------------------------+
 83 | 
 84 | 
 85 | .. |PyPI coloredlogs| image:: https://img.shields.io/pypi/v/coloredlogs.svg?style=flat-square
 86 |    :target: https://pypi.org/project/coloredlogs
 87 | 
 88 | .. |PyPI dateutil| image:: https://img.shields.io/pypi/v/python-dateutil.svg?style=flat-square
 89 |    :target: https://pypi.org/project/python-dateutil/
 90 | 
 91 | .. |PyPI docopt| image:: https://img.shields.io/pypi/v/docopt.svg?style=flat-square
 92 |    :target: https://pypi.org/project/docopt/
 93 | 
 94 | .. |PyPI fs| image:: https://img.shields.io/pypi/v/fs.svg?style=flat-square
 95 |    :target: https://pypi.org/project/fs/
 96 | 
 97 | .. |PyPI fakeua| image:: https://img.shields.io/pypi/v/fake-useragent.svg?style=flat-square
 98 |    :target: https://pypi.org/project/fake-useragent/
 99 | 
100 | .. |PyPI requests| image:: https://img.shields.io/pypi/v/requests.svg?style=flat-square
101 |    :target: https://pypi.org/project/requests
102 | 
103 | .. |PyPI six| image:: https://img.shields.io/pypi/v/six.svg?style=flat-square
104 |    :target: https://pypi.org/project/six
105 | 
106 | .. |PyPI tenacity| image:: https://img.shields.io/pypi/v/tenacity.svg?style=flat-square
107 |    :target: https://pypi.org/project/tenacity
108 | 
109 | .. |PyPI tqdm| image:: https://img.shields.io/pypi/v/tqdm.svg?style=flat-square
110 |    :target: https://pypi.org/project/tqdm
111 | 
112 | .. |PyPI verboselogs| image:: https://img.shields.io/pypi/v/verboselogs.svg?style=flat-square
113 |    :target: https://pypi.org/project/verboselogs
114 | 
115 | .. |Source coloredlogs| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
116 |    :target: https://github.com/xolox/python-coloredlogs
117 | 
118 | .. |Source dateutil| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
119 |    :target: https://github.com/dateutil/dateutil/
120 | 
121 | .. |Source docopt| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
122 |    :target: https://github.com/docopt/docopt
123 | 
124 | .. |Source fs| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
125 |    :target: https://github.com/PyFilesystem/pyfilesystem2
126 | 
127 | .. |Source fakeua| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
128 |    :target: https://github.com/hellysmile/fake-useragent
129 | 
130 | .. |Source requests| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
131 |    :target: https://github.com/kennethreitz/requests
132 | 
133 | .. |Source six| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
134 |    :target: https://github.com/benjaminp/six
135 | 
136 | .. |Source tenacity| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
137 |    :target: https://github.com/jd/tenacity
138 | 
139 | .. |Source tqdm| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
140 |    :target: https://github.com/tqdm/tqdm
141 | 
142 | .. |Source verboselogs| image:: https://img.shields.io/badge/source-GitHub-303030.svg?style=flat-square
143 |    :target: https://github.com/xolox/python-verboselogs
144 | 
145 | .. |License coloredlogs| image:: https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square
146 |    :target: https://choosealicense.com/licenses/mit/
147 | 
148 | .. |License dateutil| image:: https://img.shields.io/pypi/l/python-dateutil.svg?style=flat-square
149 |    :target: https://choosealicense.com/licenses/apache-2.0/
150 | 
151 | .. |License docopt| image:: https://img.shields.io/pypi/l/docopt.svg?style=flat-square
152 |    :target: https://choosealicense.com/licenses/mit/
153 | 
154 | .. |License fs| image:: https://img.shields.io/pypi/l/fs.svg?style=flat-square
155 |    :target: https://choosealicense.com/licenses/mit/
156 | 
157 | .. |License fakeua| image:: https://img.shields.io/badge/license-Apache_2.0-blue.svg?style=flat-square
158 |    :target: https://choosealicense.com/licenses/apache-2.0/
159 | 
160 | .. |License requests| image:: https://img.shields.io/pypi/l/requests.svg?style=flat-square
161 |    :target: https://choosealicense.com/licenses/apache-2.0/
162 | 
163 | .. |License six| image:: https://img.shields.io/pypi/l/six.svg?style=flat-square
164 |    :target: https://choosealicense.com/licenses/mit/
165 | 
166 | .. |License tenacity| image:: https://img.shields.io/badge/license-Apache_2.0-blue.svg?style=flat-square
167 |    :target: https://choosealicense.com/licenses/apache-2.0/
168 | 
169 | .. |License tqdm| image:: https://img.shields.io/pypi/l/tqdm.svg?style=flat-square
170 |    :target: https://choosealicense.com/licenses/mpl-2.0/
171 | 
172 | .. |License verboselogs| image:: https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square
173 |    :target: https://choosealicense.com/licenses/mit/
174 | 
175 | 
176 | Installation
177 | ------------
178 | 
179 | InstaLooter is available on PyPI to install with ``pip``. If you are not
180 | familiar with the package management of the Python ecosystem, please see the
181 | `Installation page <http://instalooter.readthedocs.io/en/latest/install.html>`_
182 | of the `documentation <http://instalooter.readthedocs.io/en/latest/index.html>`_.
183 | Yet, you will probably end up using the following command::
184 | 
185 |   pip install --user instalooter --pre
186 | 
187 | 
188 | Usage
189 | -----
190 | 
191 | instalooter comes with its CLI::
192 | 
193 |     $ instalooter user <username> [<directory>] [options]
194 |     $ instalooter hashtag <hashtag> [<directory>] [options]
195 |     $ instalooter post <post_token> [<directory>] [options]
196 |     $ instalooter batch [<batch_file>]
197 | 
198 | See ``instalooter --usage`` for all possible uses, or ``instalooter --help``
199 | for a complete usage guide.
200 | 
201 | 
202 | Logging in and out
203 | ------------------
204 | There are two ways to login on Instagram through instalooter:
205 | 
206 | * use the *login* subcommand (``instalooter login``) to interactively login
207 |   using your username and password.
208 | * give a ``--username`` (and, if you want, a ``--password``) argument to any of
209 |   the download commands.
210 | 
211 | In both cases, a session cookie will be created in a cache-specific folder.
212 | To delete it and close your session on the server, use the ``logout``
213 | subcommand.
214 | 
215 | 
216 | Examples
217 | --------
218 | 
219 | Download all **pictures** from the *instagram* profile in the current directory::
220 | 
221 |     $ instalooter user instagram
222 | 
223 | Download the latest 20 pictures or videos tagged with *python* to */tmp*::
224 | 
225 |     $ instalooter hashtag python /tmp -n 20 --get-videos -c MYLOGIN
226 | 
227 | Download a single post from an url in the `~/Pictures` directory::
228 | 
229 |     $ instalooter post "https://www.instagram.com/p/BFB6znLg5s1/" ~/Pictures
230 | 
231 | Use a configuration file to download from several account using custom parameters
232 | (see `Batch mode <http://instalooter.readthedocs.io/en/latest/batch.html>`_)::
233 | 
234 |     $ instalooter batch /path/to/a/config/file.ini
235 | 
236 | See more on the `Usage page <http://instalooter.readthedocs.io/en/latest/usage.html>`_
237 | of the `online documentation <http://instalooter.readthedocs.io/en/latest/index.html>`_.
238 | 


--------------------------------------------------------------------------------
/ci/travis/before_script.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | UA="Mozilla/5.0 (X11; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0"
4 | python -c "from instalooter.looters import InstaLooter; InstaLooter._cachefs().settext(u'user-agent.txt', u'$UA')"
5 | 


--------------------------------------------------------------------------------
/docs/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!-- Below you'll find a template to create an issue, with information you're
 2 | expected to provide to help debugging. Failure to do so will most likely
 3 | end up in your issue being ignored. Let's try being adults here, an issue
 4 | named IT DOESN'T WORK without a description is not helping anybody. -->
 5 | 
 6 | ## Library version
 7 | 
 8 | *What's the installed library version ? Check with `instalooter --version`*:
 9 | 
10 | ```
11 | instalooter vX.Y.Z
12 | ```
13 | 
14 | ## Environment
15 | 
16 | *Describe here your environment, including:*
17 | 
18 | * *OS*
19 | * *Python version*
20 | * *`setuptools` version if reporting an issue with installation*
21 | * *non-standard Python implementation if any*
22 | 
23 | 
24 | ## Error description - installation
25 | 
26 | *If you have an issue with installation, make sure you use a recent `setuptools` version
27 | before filing a bug ! If the error is still there, describe the command you used to
28 | install, and make sure you reported your environment in details. In particular,
29 | if you encounter a critical error with the CLI, please post the program output when
30 | running with the `--traceback` flag.*
31 | 
32 | 
33 | ## Error description - runtime
34 | 
35 | *If you have an issue at runtime, include the required information below:*
36 | 
37 | ### Reproducible test case
38 | 
39 | *Are you using the CLI ? If so, include a command that can be used to re-raise the
40 | error, with actual arguments anybody can try:*
41 | 
42 | ```
43 | instalooter ...
44 | ```
45 | 
46 | *Are you using the API ? If so, include a small snippet that can be used to re-raise the
47 | error:*
48 | 
49 | ```python
50 | from instalooter.looters import ...
51 | ```
52 | 
53 | 
54 | ### Expected behaviour
55 | 
56 | *What's supposed to happen ? That's were you can ask for a new feature as well*
57 | 
58 | ### Actual behaviour
59 | 
60 | *What's actually happening ? Leave empty if asking for a new feature*
61 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help
 18 | help:
 19 | 	@echo "Please use \`make <target>' where <target> is one of"
 20 | 	@echo "  html       to make standalone HTML files"
 21 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 22 | 	@echo "  singlehtml to make a single large HTML file"
 23 | 	@echo "  pickle     to make pickle files"
 24 | 	@echo "  json       to make JSON files"
 25 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 26 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 27 | 	@echo "  applehelp  to make an Apple Help Book"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  epub3      to make an epub3"
 31 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 32 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 33 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 34 | 	@echo "  text       to make text files"
 35 | 	@echo "  man        to make manual pages"
 36 | 	@echo "  texinfo    to make Texinfo files"
 37 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 38 | 	@echo "  gettext    to make PO message catalogs"
 39 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 40 | 	@echo "  xml        to make Docutils-native XML files"
 41 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 42 | 	@echo "  linkcheck  to check all external links for integrity"
 43 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 44 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 45 | 	@echo "  dummy      to check syntax errors of document sources"
 46 | 
 47 | .PHONY: clean
 48 | clean:
 49 | 	rm -rf $(BUILDDIR)/*
 50 | 
 51 | .PHONY:
 52 | cleanhtml:
 53 | 	rm -rf $(BUILDDIR)/*html*
 54 | 
 55 | .PHONY: html
 56 | html:
 57 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 58 | 	@echo
 59 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 60 | 	#xdg-open $(BUILDDIR)/html/index.html
 61 | 
 62 | .PHONY: dirhtml
 63 | dirhtml:
 64 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 65 | 	@echo
 66 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 67 | 
 68 | .PHONY: singlehtml
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | .PHONY: pickle
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | .PHONY: json
 81 | json:
 82 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 83 | 	@echo
 84 | 	@echo "Build finished; now you can process the JSON files."
 85 | 
 86 | .PHONY: htmlhelp
 87 | htmlhelp:
 88 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 89 | 	@echo
 90 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 91 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 92 | 
 93 | .PHONY: qthelp
 94 | qthelp:
 95 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 96 | 	@echo
 97 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 98 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 99 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/InstaLooter.qhcp"
100 | 	@echo "To view the help file:"
101 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/InstaLooter.qhc"
102 | 
103 | .PHONY: applehelp
104 | applehelp:
105 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | 	@echo
107 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | 	@echo "N.B. You won't be able to view it unless you put it in" \
109 | 	      "~/Library/Documentation/Help or install it in your application" \
110 | 	      "bundle."
111 | 
112 | .PHONY: devhelp
113 | devhelp:
114 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | 	@echo
116 | 	@echo "Build finished."
117 | 	@echo "To view the help file:"
118 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/InstaLooter"
119 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/InstaLooter"
120 | 	@echo "# devhelp"
121 | 
122 | .PHONY: epub
123 | epub:
124 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | 	@echo
126 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 | 
128 | .PHONY: epub3
129 | epub3:
130 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | 	@echo
132 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 | 
134 | .PHONY: latex
135 | latex:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo
138 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | 	      "(use \`make latexpdf' here to do that automatically)."
141 | 
142 | .PHONY: latexpdf
143 | latexpdf:
144 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | 	@echo "Running LaTeX files through pdflatex..."
146 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 | 
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
153 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 | 
156 | .PHONY: text
157 | text:
158 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | 	@echo
160 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
161 | 
162 | .PHONY: man
163 | man:
164 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | 	@echo
166 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 | 
168 | .PHONY: texinfo
169 | texinfo:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo
172 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
174 | 	      "(use \`make info' here to do that automatically)."
175 | 
176 | .PHONY: info
177 | info:
178 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | 	@echo "Running Texinfo files through makeinfo..."
180 | 	make -C $(BUILDDIR)/texinfo info
181 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 | 
183 | .PHONY: gettext
184 | gettext:
185 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | 	@echo
187 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 | 
189 | .PHONY: changes
190 | changes:
191 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | 	@echo
193 | 	@echo "The overview file is in $(BUILDDIR)/changes."
194 | 
195 | .PHONY: linkcheck
196 | linkcheck:
197 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | 	@echo
199 | 	@echo "Link check complete; look for any errors in the above output " \
200 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
201 | 
202 | .PHONY: doctest
203 | doctest:
204 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | 	@echo "Testing of doctests in the sources finished, look at the " \
206 | 	      "results in $(BUILDDIR)/doctest/output.txt."
207 | 
208 | .PHONY: coverage
209 | coverage:
210 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | 	@echo "Testing of coverage in the sources finished, look at the " \
212 | 	      "results in $(BUILDDIR)/coverage/python.txt."
213 | 
214 | .PHONY: xml
215 | xml:
216 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | 	@echo
218 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 | 
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | 	@echo
224 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 | 
226 | .PHONY: dummy
227 | dummy:
228 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | 	@echo
230 | 	@echo "Build finished. Dummy builder generates no files."
231 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools >=30.3
2 | Sphinx ~=3.1
3 | sphinx-bootstrap-theme ~=0.7
4 | semantic-version ~=2.8
5 | 


--------------------------------------------------------------------------------
/docs/source/batch.rst:
--------------------------------------------------------------------------------
  1 | Batch mode
  2 | ==========
  3 | 
  4 | ``instaLooter`` supports a batch mode for use cases that are more requiring than
  5 | just download from a profile once or twice. To use it, you must specify a
  6 | *batch config file* to the CLI. The file is in the Python configuration format,
  7 | very close to the Windows **INI** format.
  8 | 
  9 | Format
 10 | ------
 11 | A *config file* contains at least one section, but can contain more if needed.
 12 | A section is organised as shown below, with a header and key-value pairs using
 13 | the ``=`` sign:
 14 | 
 15 | .. code-block:: ini
 16 | 
 17 |    [my section header]
 18 |    key = value
 19 |    other_key = other_value
 20 | 
 21 | Specifying targets
 22 | ------------------
 23 | 
 24 | Users can be specified in the *users* parameter of each section, and hashtags
 25 | in the *hashtags* parameter. Those sections take a ``key: value`` pair per line,
 26 | where *key* is the name of the user, and *value* the path to the directory where
 27 | the medias will be downloaded. For instance:
 28 | 
 29 | .. code-block:: ini
 30 | 
 31 |    [Video Games]
 32 |    users =
 33 |        borderlands: /tmp/borderlands
 34 |        ffxv: /tmp/ffxv
 35 |    hashtags =
 36 |        nierautomata: /tmp/nier
 37 | 
 38 |    [Music]
 39 |    users =
 40 |       perm36 : ~/Music/Perm36
 41 | 
 42 | 
 43 | Logging in
 44 | ----------
 45 | 
 46 | Each section can be provided with a ``username`` and a ``password`` parameter:
 47 | 
 48 | * if none are given, the scraping is done anonymously or using the last session
 49 |   you logged with (through ``instaLooter login`` for instance, or the session
 50 |   of the previous section).
 51 | * if only ``username`` is given, ``instaLooter`` will interactively ask for the
 52 |   associated password and then login.
 53 | * if both ``username`` and ``password`` are given, then ``instaLooter`` will
 54 |   logout from any previous session and login quietly.
 55 | 
 56 | 
 57 | Passing parameters
 58 | ------------------
 59 | 
 60 | Each section can be given the same parameters as the command line:
 61 | 
 62 | ``add-metadata``
 63 |   set to *True* to add metadata to the downloaded images
 64 | ``get-videos``
 65 |   set to *True* to download videos as well as images
 66 | ``jobs``
 67 |   the number of threads to use, defaults to ``16``
 68 | ``template``
 69 |   the template to use, without quotes, defaults to ``{id}``
 70 | ``videos-only``
 71 |   set to *True* to download only videos
 72 | ``quiet``
 73 |   set to *True* to hide the progress bar
 74 | ``new``
 75 |   set to *True* to only download new medias
 76 | ``num-to-dl``
 77 |   the number of images to download
 78 | ``dump-json``
 79 |   set to *True* to dump metadata in JSON format
 80 | ``dump-only``
 81 |   set to *True* to only dump metadata, not downloading anything.
 82 | ``extended-dump``
 83 |   set to *True* to fetch additional information when dumping metadata.
 84 | 
 85 | For instance, to download 3 new videos from ``#funny`` and ``#nsfw``:
 86 | 
 87 | .. code-block:: ini
 88 | 
 89 |    [Vids]
 90 |    videos-only = true
 91 |    new = true
 92 |    num-to-dl = 3
 93 |    hashtags =
 94 |        funny: ~/Videos
 95 |        nsfw: ~/Videos
 96 | 
 97 | 
 98 | Running the program
 99 | -------------------
100 | 
101 | Simply run the following command
102 | 
103 | .. code-block:: console
104 | 
105 |   instaLooter batch /path/to/your/batch.ini
106 | 
107 | 
108 | Bugs
109 | ----
110 | 
111 | .. warning::
112 | 
113 |    This feature may not be completely functional yet ! I would say that it is
114 |    still in beta, were the whole ``instaLooter`` program not in beta too **:D**.
115 | 
116 | Please report any bugs caused by this feature to the `Github
117 | issue tracker <https://github.com/althonos/InstaLooter/issues>`_, adding the
118 | configuration file as an attachment!
119 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/stable/config
  8 | 
  9 | 
 10 | # -- Imports -----------------------------------------------------------------
 11 | 
 12 | import os
 13 | import sys
 14 | import shutil
 15 | import collections
 16 | import datetime
 17 | import semantic_version
 18 | import sphinx_bootstrap_theme
 19 | 
 20 | # -- Path setup --------------------------------------------------------------
 21 | 
 22 | # If extensions (or modules to document with autodoc) are in another directory,
 23 | # add these directories to sys.path here. If the directory is relative to the
 24 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 25 | 
 26 | docsrc_dir = os.path.abspath(os.path.join(__file__, '..'))
 27 | project_dir = os.path.abspath(os.path.join(docsrc_dir, '..', '..'))
 28 | 
 29 | sys.path.insert(0, project_dir)
 30 | import instalooter
 31 | 
 32 | # -- Files setup -------------------------------------------------------------
 33 | 
 34 | with open(os.path.join(project_dir, "CHANGELOG.rst"), 'rb') as src:
 35 |     with open(os.path.join(docsrc_dir, "changelog.rst"), 'wb') as dst:
 36 |         dst.write(b":tocdepth: 2\n\n")
 37 |         shutil.copyfileobj(src, dst)
 38 | 
 39 | # -- Project information -----------------------------------------------------
 40 | 
 41 | project = 'InstaLooter'
 42 | author = instalooter.__author__
 43 | copyright = '2016-{}, {}'.format(datetime.date.today().year, author)
 44 | 
 45 | # The full version, including alpha/beta/rc tags
 46 | release = instalooter.__version__
 47 | # Semantic version
 48 | semver = semantic_version.Version(instalooter.__version__)
 49 | # The short X.Y version
 50 | version = "{v.major}.{v.minor}.{v.patch}".format(v=semver)
 51 | 
 52 | # -- General configuration ---------------------------------------------------
 53 | 
 54 | # If your documentation needs a minimal Sphinx version, state it here.
 55 | #
 56 | needs_sphinx = '1.7'
 57 | 
 58 | # Add any Sphinx extension module names here, as strings. They can be
 59 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 60 | # ones.
 61 | extensions = [
 62 |     'sphinx.ext.autodoc',
 63 |     'sphinx.ext.autosummary',
 64 |     'sphinx.ext.doctest',
 65 |     'sphinx.ext.intersphinx',
 66 |     'sphinx.ext.todo',
 67 |     'sphinx.ext.coverage',
 68 |     'sphinx.ext.mathjax',
 69 |     'sphinx.ext.ifconfig',
 70 |     'sphinx.ext.viewcode',
 71 |     'sphinx.ext.githubpages',
 72 |     'sphinx.ext.napoleon',
 73 |     'sphinx_bootstrap_theme',
 74 | ]
 75 | 
 76 | # Add any paths that contain templates here, relative to this directory.
 77 | templates_path = ['_templates']
 78 | 
 79 | # The suffix(es) of source filenames.
 80 | # You can specify multiple suffix as a list of string:
 81 | #
 82 | # source_suffix = ['.rst', '.md']
 83 | source_suffix = '.rst'
 84 | 
 85 | # The master toctree document.
 86 | master_doc = 'index'
 87 | 
 88 | # The language for content autogenerated by Sphinx. Refer to documentation
 89 | # for a list of supported languages.
 90 | #
 91 | # This is also used if you do content translation via gettext catalogs.
 92 | # Usually you set "language" from the command line for these cases.
 93 | language = None
 94 | 
 95 | # List of patterns, relative to source directory, that match files and
 96 | # directories to ignore when looking for source files.
 97 | # This pattern also affects html_static_path and html_extra_path .
 98 | exclude_patterns = []
 99 | 
100 | # The name of the Pygments (syntax highlighting) style to use.
101 | pygments_style = 'sphinx'
102 | 
103 | # The name of the default role for inline references
104 | default_role = "py:obj"
105 | 
106 | # -- Options for HTML output -------------------------------------------------
107 | 
108 | # The theme to use for HTML and HTML Help pages.  See the documentation for
109 | # a list of builtin themes.
110 | #
111 | html_theme = 'bootstrap'
112 | 
113 | # Add any paths that contain custom themes here, relative to this directory.
114 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
115 | 
116 | # Theme options are theme-specific and customize the look and feel of a theme
117 | # further.  For a list of options available for each theme, see the
118 | # documentation.
119 | #
120 | html_theme_options = {
121 | 
122 |     # Bootswatch (http://bootswatch.com/) theme.
123 |     'bootswatch_theme': "simplex",
124 | 
125 |     # Choose Bootstrap version.
126 |     'bootstrap_version': "3",
127 | 
128 |     # Tab name for entire site. (Default: "Site")
129 |     'navbar_site_name': "Documentation",
130 | 
131 |     # HTML navbar class (Default: "navbar") to attach to <div> element.
132 |     # For black navbar, do "navbar navbar-inverse"
133 |     'navbar_class': "navbar navbar-inverse",
134 | 
135 |     # Render the next and previous page links in navbar. (Default: true)
136 |     'navbar_sidebarrel': True,
137 | 
138 |     # Render the current pages TOC in the navbar. (Default: true)
139 |     'navbar_pagenav': False,
140 | 
141 |     # A list of tuples containing pages or urls to link to.
142 |     'navbar_links': [
143 |         ("GitHub", "https://github.com/althonos/InstaLooter", True),
144 |         ("PyPI", "https://pypi.org/project/InstaLooter", True),
145 |     ],
146 | 
147 | }
148 | 
149 | # Add any paths that contain custom static files (such as style sheets) here,
150 | # relative to this directory. They are copied after the builtin static files,
151 | # so a file named "default.css" will overwrite the builtin "default.css".
152 | html_static_path = ['_static']
153 | 
154 | # Custom sidebar templates, must be a dictionary that maps document names
155 | # to template names.
156 | #
157 | # The default sidebars (for documents that don't match any pattern) are
158 | # defined by theme itself.  Builtin themes are using these templates by
159 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
160 | # 'searchbox.html']``.
161 | #
162 | html_sidebars = {
163 |     "*": ['localtoc.html'],
164 |     os.path.join("instalooter", "*"): [],
165 | }
166 | 
167 | # -- Options for HTMLHelp output ---------------------------------------------
168 | 
169 | # Output file base name for HTML help builder.
170 | htmlhelp_basename = 'InstaLooter'
171 | 
172 | # -- Options for LaTeX output ------------------------------------------------
173 | 
174 | latex_elements = {
175 |     # The paper size ('letterpaper' or 'a4paper').
176 |     #
177 |     # 'papersize': 'letterpaper',
178 | 
179 |     # The font size ('10pt', '11pt' or '12pt').
180 |     #
181 |     # 'pointsize': '10pt',
182 | 
183 |     # Additional stuff for the LaTeX preamble.
184 |     #
185 |     # 'preamble': '',
186 | 
187 |     # Latex figure (float) alignment
188 |     #
189 |     # 'figure_align': 'htbp',
190 | }
191 | 
192 | # Grouping the document tree into LaTeX files. List of tuples
193 | # (source start file, target name, title,
194 | #  author, documentclass [howto, manual, or own class]).
195 | latex_documents = [
196 |     (master_doc, 'InstaLooter.tex', 'InstaLooter Documentation',
197 |      'Martin Larralde', 'manual'),
198 | ]
199 | 
200 | # -- Options for manual page output ------------------------------------------
201 | 
202 | # One entry per manual page. List of tuples
203 | # (source start file, name, description, authors, manual section).
204 | man_pages = [
205 |     (master_doc, 'instalooter', 'InstaLooter Documentation',
206 |      [author], 1)
207 | ]
208 | 
209 | # -- Options for Texinfo output ----------------------------------------------
210 | 
211 | # Grouping the document tree into Texinfo files. List of tuples
212 | # (source start file, target name, title, author,
213 | #  dir menu entry, description, category)
214 | texinfo_documents = [
215 |     (master_doc, 'InstaLooter', 'InstaLooter Documentation',
216 |      author, 'InstaLooter', 'One line description of project.',
217 |      'Miscellaneous'),
218 | ]
219 | 
220 | # -- Extension configuration -------------------------------------------------
221 | 
222 | # -- Options for autosummary -------------------------------------------------
223 | 
224 | autosummary_generate = [os.path.join("instalooter", "index")]
225 | 
226 | # -- Options for autodoc -----------------------------------------------------
227 | 
228 | autodoc_member_order = "groupwise"
229 | autoclass_content = "both"
230 | 
231 | # -- Options for intersphinx extension ---------------------------------------
232 | 
233 | # Example configuration for intersphinx: refer to the Python standard library.
234 | intersphinx_mapping = {
235 |     'python': ('https://docs.python.org/3/', None),
236 |     'fs': ('https://docs.pyfilesystem.org/en/latest/', None),
237 |     'requests': ('https://requests.readthedocs.io/en/master/', None),
238 |     'coloredlogs': ('https://coloredlogs.readthedocs.io/en/latest/', None),
239 | }
240 | 
241 | # -- Options for todo extension ----------------------------------------------
242 | 
243 | # If true, `todo` and `todoList` produce output, else they produce nothing.
244 | todo_include_todos = True
245 | 


--------------------------------------------------------------------------------
/docs/source/cron.rst:
--------------------------------------------------------------------------------
  1 | Periodic downloads
  2 | ==================
  3 | 
  4 | ``instaLooter`` may be used to update a local mirror of an instagram account,
  5 | and as such it may be desired to run it periodically, without needing to update
  6 | manually.
  7 | 
  8 | 
  9 | 
 10 | UNIX
 11 | ----
 12 | 
 13 | To support the UNIX philosophy, the program do not implement this feature itself
 14 | but should integrate well with established alternatives. The following examples
 15 | make use of either `Cron <https://en.wikipedia.org/wiki/Cron>`_ or
 16 | `SystemD timers <https://wiki.archlinux.org/index.php/Systemd/Timers>`_.
 17 | 
 18 | 
 19 | Cron
 20 | ^^^^
 21 | First of all, make sure ``Cron`` is installed, and if not, refer to the
 22 | package manager of your distribution (if you're on MacOS, give a try to
 23 | `homebrew <https://brew.sh/>`_ if not using it already !).
 24 | 
 25 | Then, edit ``Cron`` to add a scheduled task:
 26 | 
 27 | .. code-block:: console
 28 | 
 29 |   $ crontab -e
 30 | 
 31 | This will open a file using the **$EDITOR** system variable to find a text
 32 | editor, such as *nano*, *pico*, *vi*, etc. Then, add one line as one of the
 33 | examples below to run instaLooter periodically (you can add more than one line
 34 | if you have more than one goal in mind):
 35 | 
 36 | * Download maximum 3 new ``#funny`` videos to ``~/Videos`` every hour::
 37 | 
 38 |     @hourly /usr/bin/env python -m instaLooter hashtag funny ~/Videos -N -n 3 -V
 39 | 
 40 | * Download new pictures w/ metadata from the ``instagram`` account at every reboot::
 41 | 
 42 |     @reboot /usr/bin/env python -m instaLooter instagram ~/Pictures/instagram -Nm
 43 | 
 44 | * Use a configuration file to download in :doc:`batch` every week on Sunday, 00:00 ::
 45 | 
 46 |     @weekly /usr/bin/env python -m instaLooter batch ~/myLooter.ini
 47 | 
 48 | 
 49 | To disable a scheduled task, simply remove the line associated to that task within
 50 | *crontab*.
 51 | 
 52 | .. seealso::
 53 | 
 54 |     * The `CronHowTo <https://help.ubuntu.com/community/CronHowto>`_ hosted
 55 |       on *ubuntu.org* for a complete understanding of the crontab line format.
 56 | 
 57 | SystemD
 58 | ^^^^^^^
 59 | You'll probably use this alternative if your system is already running on top of
 60 | SystemD. If not, you should probably turn to ``Cron``. Simply check for the
 61 | existence of a ``systemctl`` executable (e.g. running ``systemctl --help``) to
 62 | see if you're using SystemD.
 63 | 
 64 | Create a new service file, either in ``/etc/systemd/system/`` for system-wide jobs,
 65 | or in ``~/.config/systemd/user/`` for user-only jobs, named for instance
 66 | ``looter.service`` (you can use any name as long as the file has a *.service*
 67 | extension), with the following content:
 68 | 
 69 | .. code-block:: ini
 70 | 
 71 |   [Unit]
 72 |   Description=my custom periodic instagram looter
 73 | 
 74 |   [Service]
 75 |   Type=oneshot
 76 |   ExecStart=/usr/bin/env python -m instaLooter <the parameters I want>
 77 | 
 78 | Make sure the ``instaLooter`` module is accessible to the ``systemd`` manager,
 79 | i.e. if you're using system-wide jobs that the module was installed in */usr* (not
 80 | with ``pip insta --user instaLooter`` but with ``pip install instaLooter``).
 81 | 
 82 | To test your service, run ``systemctl start looter.service`` (using the name of
 83 | your file), or ``systemctl --user start looter.service`` if you want to use
 84 | user-only jobs. There should be no output if everything works fine.
 85 | 
 86 | If a bug occurs check the logs with *journalctl*:
 87 | 
 88 | .. code-block:: console
 89 | 
 90 |     # journalctl looter.service
 91 |     $ journalctl --user --user-unit looter.service
 92 | 
 93 | Once your service works fine, create a timer for your new service, named like
 94 | and located next to your service file, but with a ``.timer`` extension, and
 95 | the following content:
 96 | 
 97 | .. code-block:: ini
 98 | 
 99 |   [Unit]
100 |   Description=run my custom periodic instagram looter hourly
101 | 
102 |   [Timer]
103 |   # Time to wait after booting before we run first time
104 |   OnBootSec=10min
105 |   # Time between running each consecutive time
106 |   OnUnitActiveSec=1h
107 |   Unit=looter.service
108 | 
109 | Finally, enable and start your timer with one of the following commands:
110 | 
111 | .. code-block:: console
112 | 
113 |     # systemctl start looter.timer && systemctl enable looter.timer
114 |     $ systemctl --user start looter.timer && systemctl --user enable looter.timer
115 | 
116 | To disable the timer, use the same command as above, replacing ``start`` with
117 | ``stop`` and ``enable`` by ``disable``, and remove the service and timer files
118 | if you want to completely uninstall the timer.
119 | 
120 | .. seealso::
121 | 
122 |     * The `SystemD/timers <https://wiki.archlinux.org/index.php/Systemd/Timers>`_
123 |       and the whole `SystemD <https://wiki.archlinux.org/index.php/Systemd>`_
124 |       pages on the *Archlinux wiki* for more details about timer and services.
125 |     * The `post on Jason's blog <https://jason.the-graham.com/2013/03/06/how-to-use-systemd-timers/>`_
126 |       that helped shaping this tutorial.
127 | 


--------------------------------------------------------------------------------
/docs/source/examples.rst:
--------------------------------------------------------------------------------
  1 | API Examples
  2 | ============
  3 | 
  4 | .. toctree::
  5 | 
  6 | ``instaLooter`` also provides an :abbr:`API (Application Programmable Interface)`
  7 | that can be used to extend the capabilities of ``instaLooter``, to fit your
  8 | needs more tightly or to integrate ``instaLooter`` to your program.
  9 | 
 10 | 
 11 | Download pictures
 12 | -----------------
 13 | 
 14 | Download 50 posts from the `Dream Wife band <https://www.instagram.com/dreamwifetheband/?hl=fr>`_
 15 | account to the `Pictures` directory in your home folder (you better be checking
 16 | their music though):
 17 | 
 18 | .. code:: python
 19 | 
 20 |    from instalooter.looters import ProfileLooter
 21 |    looter = ProfileLooter("dreamwifetheband")
 22 |    looter.download('~/Pictures', media_count=50)
 23 | 
 24 | 
 25 | Dump media links
 26 | ----------------
 27 | 
 28 | Create a list with all the links to picture and video files tagged with
 29 | `#ramones <https://www.instagram.com/explore/tags/ramones/>`_ in a file
 30 | named `ramones.txt`:
 31 | 
 32 | .. code:: python
 33 | 
 34 |     def links(media, looter):
 35 |         if media.get('__typename') == "GraphSidecar":
 36 |             media = looter.get_post_info(media['shortcode'])
 37 |             nodes = [e['node'] for e in media['edge_sidecar_to_children']['edges']]
 38 |             return [n.get('video_url') or n.get('display_url') for n in nodes]
 39 |         elif media['is_video']:
 40 |             media = looter.get_post_info(media['shortcode'])
 41 |             return [media['video_url']]
 42 |         else:
 43 |             return [media['display_url']]
 44 | 
 45 |     from instalooter.looters import HashtagLooter
 46 |     looter = HashtagLooter("ramones")
 47 | 
 48 |     with open("ramones.txt", "w") as f:
 49 |         for media in looter.medias():
 50 |             for link in links(media, looter):
 51 |                 f.write("{}\n".format(link))
 52 | 
 53 | 
 54 | Users from comments
 55 | -------------------
 56 | 
 57 | Obtain a subset of users that commented on some of the posts of
 58 | `Franz Ferdinand <https://www.instagram.com/franz_ferdinand>`_.
 59 | 
 60 | .. code:: python
 61 | 
 62 |     from instalooter.looters import ProfileLooter
 63 |     looter = ProfileLooter("franz_ferdinand")
 64 | 
 65 |     users = set()
 66 |     for media in looter.medias():
 67 |        info = looter.get_post_info(media['shortcode'])
 68 |        for comment in post_info['edge_media_to_comment']['edges']:
 69 |            user = comment['node']['owner']['username']
 70 |            users.add(user)
 71 | 
 72 | 
 73 | Users from mentions
 74 | -------------------
 75 | 
 76 | 
 77 | 
 78 | .. code:: python
 79 | 
 80 |     from instalooter.looters import ProfileLooter
 81 |     looter = ProfileLooter("mandodiaomusic")
 82 | 
 83 |     users = set()
 84 |     for media in looter.medias():
 85 |        info = looter.get_post_info(media['shortcode'])
 86 |        for comment in post_info['edge_media_to_tagged_user']['edges']:
 87 |            user = comment['node']['user']['username']
 88 |            users.add(user)
 89 | 
 90 | 
 91 | Download resized pictures
 92 | -------------------------
 93 | 
 94 | Unfortunately, this is not possible anymore as Instagram added a hash signature
 95 | to prevent messing with their URLs.
 96 | 
 97 | ..
 98 | .. Downloaded pictures will all be resized by IG to be 320 pixels wide
 99 | .. with the same aspect ratio before being downloaded.
100 | ..
101 | .. .. code::
102 | ..
103 | ..     from instaLooter import InstaLooter
104 | ..     from instaLooter.urlgen import resizer
105 | ..
106 | ..     looter = InstaLooter(profile="xxxx", get_videos=True, url_generator=resizer(320))
107 | ..     looter.download()
108 | 
109 | 
110 | .. Download thumbnails
111 | .. -------------------
112 | .. .. code::
113 | ..
114 | ..     from instaLooter import InstaLooter
115 | ..     from instaLooter.urlgen import thumbnail
116 | ..
117 | ..     looter = InstaLooter(profile="xxxx", get_videos=True, url_generator=thumbnail)
118 | ..     looter.download()
119 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | InstaLooter
  2 | ===========
  3 | 
  4 | *Not all treasure's silver and gold, mate.*
  5 | 
  6 | |build| |repo| |versions| |format| |coverage| |doc| |requirements| |grade|
  7 | 
  8 | InstaLooter is a program that can download pictures and videos from any profile
  9 | or hashtag on `Instagram <https://instagram.com>`_, without any API token. It is
 10 | even possible to download pictures and videos from a private profile your are
 11 | following using your credentials to log in.
 12 | 
 13 | See more details about one of the following topics:
 14 | 
 15 | .. rubric:: Guides
 16 | 
 17 | .. toctree::
 18 |     :maxdepth: 1
 19 | 
 20 |     Installation <install>
 21 |     Usage <usage>
 22 |     Batch mode <batch>
 23 |     Periodic Downloads <cron>
 24 | 
 25 | 
 26 | .. rubric:: Library
 27 | 
 28 | .. toctree::
 29 |     :maxdepth: 1
 30 | 
 31 |     API Examples <examples>
 32 |     Changelog <changelog>
 33 |     API Reference <instalooter/index>
 34 | 
 35 | 
 36 | License |license|
 37 | -----------------
 38 | 
 39 | InstaLooter is released under the
 40 | `GNU General Public License v3 <https://choosealicense.com/licenses/gpl-3.0/>`_
 41 | *or later*, and is fully open-source. The ``COPYING`` file distributed with
 42 | the software contains the complete license text.
 43 | 
 44 | 
 45 | Issues |issues|
 46 | ---------------
 47 | 
 48 | If you want to request a feature, or report a bug, please file in
 49 | an issue on the `issue tracker <https://github.com/althonos/InstaLooter/issues>`_.
 50 | 
 51 | About
 52 | -----
 53 | 
 54 | InstaLooter is maintained by:
 55 |   * `Martin Larralde <https://github.com/althonos>`_
 56 | 
 57 | Special thanks to the following contributors:
 58 |   * `Mohaned Magdy <https://github.com/mohan3d>`_
 59 |   * `Daniel Lee Harple <https://github.com/dlh>`_
 60 |   * `Bryan Massoth <https://github.com/bmass02>`_
 61 |   * `AndCycle <https://github.com/AndCycle>`_
 62 |   * `Pauli Salmenrinne <https://github.com/susundberg>`_
 63 |   * `Georp <https://github.com/gffde3>`_
 64 |   * `Lev Velykoivanenko <https://github.com/AbysmalBiscuit>`_
 65 |   * `Maksymilian Ratajczyk <https://github.com/mratajczyk>`_
 66 |   * `Henning Kowalk <https://github.com/Henning-K>`_
 67 |   * `Daniel M. Capella <https://github.com/polyzen>`_
 68 |   * `tgandor <https://github.com/tgandor>`_
 69 |   * `Denis Emelyanov <https://github.com/St2mZ>`_
 70 |   * `Pavel Sutyrin <https://github.com/pavel64-sutyrin>`_
 71 | 
 72 | Indices and tables
 73 | ------------------
 74 | 
 75 | * :ref:`genindex`
 76 | * :ref:`modindex`
 77 | * :ref:`search`
 78 | 
 79 | 
 80 | .. |repo| image:: https://img.shields.io/badge/source-GitHub-303030.svg?maxAge=3600&style=flat-square
 81 |    :target: https://github.com/althonos/InstaLooter
 82 | 
 83 | .. |versions| image:: https://img.shields.io/pypi/v/instaLooter.svg?maxAge=3600&style=flat-square
 84 |    :target: https://pypi.org/project/instaLooter
 85 | 
 86 | .. |format| image:: https://img.shields.io/pypi/format/instaLooter.svg?maxAge=3600&style=flat-square
 87 |    :target: https://pypi.org/project/instaLooter
 88 | 
 89 | .. |grade| image:: https://img.shields.io/codacy/grade/9b8c7da6887c4195b9e960cb04b59a91/master.svg?maxAge=3600&style=flat-square
 90 |    :target: https://www.codacy.com/app/althonos/InstaLooter/dashboard
 91 | 
 92 | .. |coverage| image:: https://img.shields.io/codecov/c/github/althonos/InstaLooter/master.svg?maxAge=3600&style=flat-square
 93 |    :target: https://codecov.io/gh/althonos/InstaLooter
 94 | 
 95 | .. |build| image:: https://img.shields.io/travis/althonos/InstaLooter/master.svg?label=travis-ci&maxAge=3600&style=flat-square
 96 |    :target: https://travis-ci.org/althonos/InstaLooter/
 97 | 
 98 | .. |doc| image:: https://img.shields.io/readthedocs/instalooter.svg?style=flat-square&maxAge=3600
 99 |    :target: http://instalooter.readthedocs.io/en/latest/?badge=latest
100 | 
101 | .. |requirements| image:: https://img.shields.io/requires/github/althonos/InstaLooter/master.svg?style=flat-square&maxAge=3600
102 |    :target: https://requires.io/github/althonos/InstaLooter/requirements/?branch=master
103 | 
104 | .. |health| image:: https://landscape.io/github/althonos/InstaLooter/master/landscape.svg?style=flat-square&maxAge=3600
105 |    :target: https://landscape.io/github/althonos/InstaLooter/master
106 | 
107 | .. |license| image:: https://img.shields.io/pypi/l/InstaLooter.svg?maxAge=3600&style=flat-square
108 |    :target: https://choosealicense.com/licenses/gpl-3.0/
109 | 
110 | .. |issues| image:: https://img.shields.io/github/issues/althonos/InstaLooter.svg?maxAge=3600&style=flat-square
111 |    :target: https://github.com/althonos/InstaLooter/issues
112 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | .. toctree::
 5 | 
 6 | InstaLooter is available from 2 different sources: either a git repository,
 7 | shared publicly on GitHub, and a Python wheel, available on PyPI. Instructions
 8 | on how to setup each version are available below.
 9 | 
10 | The python modules ``pip`` and ``setuptools`` are required before you start
11 | installing InstaLooter. Although not strictly required, there will be no
12 | explanations on how to setup instaLooter without those.
13 | 
14 | .. hint::
15 | 
16 |    See the `PyPA web page <https://pip.pypa.io/en/stable/installing/>`_
17 |    page to install ``pip`` if it is not already installed.
18 | 
19 | .. attention::
20 | 
21 |     Using ``pip`` will install InstaLooter with the default Python version.
22 |     InstaLooter is known to work with Python versions **2.7**, **3.4**
23 |     and **3.5**, but encoding errors have been reported with Python **2.7**. If
24 |     you are not familiar with the default Python version on you system, consider
25 |     enforcing an installation with Python 3 using ``pip3`` instead of ``pip``.
26 | 
27 | PyPI |pypi|
28 | -----------
29 | 
30 | If you have super user rights, open up a terminal and type the following:
31 | 
32 | .. code-block:: console
33 | 
34 |    # pip install instaLooter
35 | 
36 | If you don't have admin rights, then type the following to install only for
37 | the current user instead:
38 | 
39 | 
40 | .. code-block:: console
41 | 
42 |    $ pip install instaLooter --user
43 | 
44 | 
45 | If you want to use the *exif* metadata features, install the ``metadata`` extras
46 | as well:
47 | 
48 | .. code-block:: console
49 | 
50 |    $ pip install instaLooter[metadata] --user
51 | 
52 | 
53 | GitHub |build|
54 | --------------
55 | 
56 | With ``git`` installed, do the following in a directory on your machine to
57 | clone the remote repository and install instaLooter from source:
58 | 
59 | .. code-block:: console
60 | 
61 |    $ git clone https://github.com/althonos/InstaLooter
62 |    $ cd InstaLooter
63 | 
64 | Then use pip to install the local version of the program and all the required
65 | dependencies:
66 | 
67 | .. code-block:: console
68 | 
69 |   # pip install .
70 | 
71 | To install development dependencies (to test the program and/or build the
72 | documentation), use the *test* and/or *doc* extras:
73 | 
74 | .. code-block:: console
75 | 
76 |   $ pip install --user ".[test]"    # install only test dependencies
77 |   $ pip install --user ".[doc]"     # install only doc dependencies
78 |   $ pip install --user ".[dev]"     # install all dev dependencies
79 | 
80 | 
81 | .. |pypi| image:: https://img.shields.io/pypi/v/instaLooter.svg?maxAge=3600&style=flat-square
82 |    :target: https://pypi.org/project/instaLooter
83 | 
84 | .. |build| image:: https://img.shields.io/travis/althonos/InstaLooter/master.svg?label=travis-ci&maxAge=3600&style=flat-square
85 |    :target: https://travis-ci.org/althonos/InstaLooter/
86 | 


--------------------------------------------------------------------------------
/docs/source/instalooter/batch.rst:
--------------------------------------------------------------------------------
 1 | Batch Runner (`instalooter.batch`)
 2 | ==================================
 3 | 
 4 | .. currentmodule:: instalooter.batch
 5 | 
 6 | .. automodule:: instalooter.batch
 7 |    :members:
 8 |    :inherited-members:
 9 |    :show-inheritance:
10 | 


--------------------------------------------------------------------------------
/docs/source/instalooter/cli.rst:
--------------------------------------------------------------------------------
1 | Command Line Interface (`instalooter.cli`)
2 | ==========================================
3 | 
4 | .. currentmodule:: instalooter.cli
5 | 
6 | .. automodule:: instalooter.cli
7 |    :members:
8 | 


--------------------------------------------------------------------------------
/docs/source/instalooter/index.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | ==============
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 | 
 7 |    looters
 8 |    cli
 9 |    medias
10 |    pages
11 |    batch
12 |    pbar
13 |    worker
14 | 
15 | 
16 | Main
17 | ----
18 | 
19 | .. rubric:: Looters (`instalooter.looters`)
20 | 
21 | .. currentmodule:: instalooter.looters
22 | 
23 | .. autosummary::
24 |     :nosignatures:
25 | 
26 |     InstaLooter
27 |     HashtagLooter
28 |     ProfileLooter
29 |     PostLooter
30 | 
31 | 
32 | .. rubric:: Command Line Interface  (`instalooter.cli`)
33 | 
34 | .. currentmodule:: instalooter.cli
35 | 
36 | .. autosummary::
37 | 
38 |    main
39 | 
40 | 
41 | .. rubric:: Batch Runner (`instalooter.batch`)
42 | 
43 | .. currentmodule:: instalooter.batch
44 | 
45 | .. autosummary::
46 |    :nosignatures:
47 | 
48 |    BatchRunner
49 | 
50 | 
51 | Iterators
52 | ---------
53 | 
54 | .. rubric:: Medias Iterators (`instalooter.medias`)
55 | 
56 | .. currentmodule:: instalooter.medias
57 | 
58 | .. autosummary::
59 |     :nosignatures:
60 | 
61 |     MediasIterator
62 |     TimedMediasIterator
63 | 
64 | 
65 | .. rubric:: Pages Iterators (`instalooter.pages`)
66 | 
67 | .. currentmodule:: instalooter.pages
68 | 
69 | .. autosummary::
70 |     :nosignatures:
71 | 
72 |     PageIterator
73 |     HashtagIterator
74 |     ProfileIterator
75 | 
76 | 
77 | Miscellaneous
78 | -------------
79 | 
80 | .. rubric:: Progress Bars (`instalooter.pbar`)
81 | 
82 | .. currentmodule:: instalooter.pbar
83 | 
84 | .. autosummary::
85 |     :nosignatures:
86 | 
87 |     ProgressBar
88 |     TqdmProgressBar
89 | 
90 | 
91 | .. rubric:: Background Downloader (`instalooter.worker`)
92 | 
93 | .. currentmodule:: instalooter.worker
94 | 
95 | .. autosummary::
96 |     :nosignatures:
97 | 
98 |     InstaDownloader
99 | 


--------------------------------------------------------------------------------
/docs/source/instalooter/looters.rst:
--------------------------------------------------------------------------------
 1 | Looters (`instalooter.looters`)
 2 | ===============================
 3 | 
 4 | .. currentmodule:: instalooter.looters
 5 | 
 6 | .. automodule:: instalooter.looters
 7 |    :members:
 8 |    :show-inheritance:
 9 |    :inherited-members:
10 | 


--------------------------------------------------------------------------------
/docs/source/instalooter/medias.rst:
--------------------------------------------------------------------------------
 1 | Medias Iterators (`instalooter.medias`)
 2 | =======================================
 3 | 
 4 | .. currentmodule:: instalooter.medias
 5 | 
 6 | .. automodule:: instalooter.medias
 7 |    :members:
 8 |    :special-members: __iter__, __next__, __length_hint__
 9 |    :show-inheritance:
10 |    :inherited-members:
11 | 


--------------------------------------------------------------------------------
/docs/source/instalooter/pages.rst:
--------------------------------------------------------------------------------
 1 | Pages Iterators (`instalooter.pages`)
 2 | =====================================
 3 | 
 4 | .. currentmodule:: instalooter.pages
 5 | 
 6 | .. automodule:: instalooter.pages
 7 |    :members:
 8 |    :special-members: __iter__, __next__, __length_hint__
 9 |    :show-inheritance:
10 |    :inherited-members:
11 | 


--------------------------------------------------------------------------------
/docs/source/instalooter/pbar.rst:
--------------------------------------------------------------------------------
1 | Progress Bars (`instalooter.worker`)
2 | ============================================
3 | 
4 | .. currentmodule:: instalooter.pbar
5 | 
6 | .. automodule:: instalooter.pbar
7 |    :members:
8 | 


--------------------------------------------------------------------------------
/docs/source/instalooter/worker.rst:
--------------------------------------------------------------------------------
1 | Background Downloader (`instalooter.worker`)
2 | ============================================
3 | 
4 | .. currentmodule:: instalooter.worker
5 | 
6 | .. automodule:: instalooter.worker
7 |    :members:
8 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
  1 | Usage
  2 | =====
  3 | 
  4 | .. toctree::
  5 | 
  6 | instaLooter provides a command line interface, that you can call with
  7 | the ``instaLooter`` command.
  8 | 
  9 | .. note::
 10 | 
 11 |    In some cases, the ``instaLooter`` command is not added into
 12 |    the ``$PATH`` after installation. It is possible to perform
 13 |    all the following actions nevertheless by replacing occurences
 14 |    of ``instaLooter`` with ``python -m instaLooter`` (or
 15 |    ``python3 -m instaLooter``).
 16 | 
 17 | Command Line Interface
 18 | ----------------------
 19 | 
 20 | Download pictures/videos from the profile of a single user:
 21 | 
 22 | .. code-block:: console
 23 | 
 24 |    $ instaLooter user <username> [<directory>] [options]
 25 | 
 26 | 
 27 | Download pictures/videos tagged with a given *#hashtag*:
 28 | 
 29 | .. code-block:: console
 30 | 
 31 |    $ instaLooter hashtag <hashtag> <directory> [options]
 32 | 
 33 | Download pictures/videos from a single post:
 34 | 
 35 | .. code-block:: console
 36 | 
 37 |    $ instaLooter post <post_token> <directory> [options]
 38 | 
 39 | Download pictures/videos in :doc:`batch`:
 40 | 
 41 | .. code-block:: console
 42 | 
 43 |    $ instaLooter batch <batch_file>
 44 | 
 45 | Positional Arguments
 46 | --------------------
 47 | 
 48 | ``username``
 49 |   the username of the Instagram profile to download pictures/videos from.
 50 | 
 51 | ``hashtag``
 52 |   the hashtag to download pictures/videos from.
 53 | 
 54 | ``post_token``
 55 |   the URL or the code of the post to download.
 56 | 
 57 | ``directory``
 58 |   the directory in which to download pictures/videos. Optional for
 59 |   profile download, will then use current directory.
 60 | 
 61 | ``batch_file``
 62 |   the path to the batch file containing batch download instructions
 63 |   (see the :doc:`batch` page for the format specification).
 64 | 
 65 | 
 66 | Options - Credentials
 67 | ---------------------
 68 | 
 69 | ``-u USER, --username USER``
 70 |   The username to connect to Instagram with.
 71 | 
 72 | ``-p PASS, --password PASS``
 73 |   The password to connect to Instagram with (will be asked in the shell
 74 |   if the ``--username`` option was given without the corresponding
 75 |   ``--password``).
 76 | 
 77 | Options - Files
 78 | ---------------
 79 | 
 80 | ``-n NUM, --num-to-dl NUM``
 81 |   Maximum number of new files to download
 82 | 
 83 | ``-j JOBS, --jobs JOBS``
 84 |   Number of parallel threads to use to download files **[default: 16]**
 85 | 
 86 | ``-T TMPL, --template TMPL``
 87 |   A filename template to use to write the files (see :ref:`Template`).
 88 |   **[default: {id}]**
 89 | 
 90 | ``-v, --get-videos``
 91 |   Get videos as well as photos
 92 | 
 93 | ``-V, --videos-only``
 94 |   Get videos only (implies ``--get-videos``)
 95 | 
 96 | ``-N, --new``
 97 |   Only look for files newer than the ones in the destination directory
 98 |   (faster).
 99 | 
100 | ``-t TIME, --time TIME``
101 |   The time limit within which to download pictures and video
102 |   (see :ref:`Time`)
103 | 
104 | 
105 | Options - Metadata
106 | ------------------
107 | 
108 | ``-d, --dump-json``
109 |   Save metadata to a JSON file next to downloaded videos and pictures.
110 | 
111 | ``-m, --add-metadata``
112 |   Add date and caption metadata to downloaded pictures (requires
113 |   `PIL <http://www.pythonware.com/products/pil/>`_ or
114 |   `Pillow <https://python-pillow.org/>`_ as well as
115 |   `piexif <https://pypi.org/project/piexif>`_).
116 | 
117 | ``-D, --dump-only``
118 |   Save only the metadata and no video / picture.
119 | 
120 | ``-e, --extended-dump``
121 |   Always dump the maximum amount of extractable information, at the cost
122 |   of more time.
123 | 
124 | 
125 | Options - Miscellaneous
126 | -----------------------
127 | 
128 | ``-q, --quiet``
129 |   Do not produce any output
130 | 
131 | ``-h, --help``
132 |   Display the help message
133 | 
134 | ``--version``
135 |   Show program version and quit
136 | 
137 | ``--traceback``
138 |   Print error traceback if any (debug).
139 | 
140 | ``-W WARNINGCTL``
141 |   Change warning behaviour (same as ``python -W``) **[default: default]**
142 | 
143 | 
144 | .. _Template:
145 | 
146 | Template
147 | --------
148 | 
149 | The default filename of the pictures and videos on Instagram doesn't show
150 | anything about the file you just downloaded. But using the ``-T`` argument
151 | allows you to give instaLooter a filename template, using the following
152 | format with brackets-enclosed (``{}``) variable names among:
153 | 
154 | - ``id``\*\² and ``code``\² of the instagram id of the media
155 | - ``ownerid``\*, ``username`` and ``fullname`` of the owner
156 | - ``datetime``\*: the date and time of the post (YYYY-MM-DD hh:mm:ss)
157 | - ``date``\*: the date of the post (YYYY-MM-DD)
158 | - ``width``\* and ``height``\*
159 | - ``likescount``\* and ``commentscount``\*
160 | 
161 | :\*:
162 |    use these only to quicken download, since fetching the others may take
163 |    a tad longer (in particular in hashtag download mode).
164 | 
165 | :\²:
166 |    use at least one of these in your filename to make sure the generated
167 |    filename is unique.
168 | 
169 | Examples of acceptable values:
170 | 
171 | .. code-block:: console
172 | 
173 |     $ instaLooter <profile> -T {username}.{datetime}
174 |     $ instaLooter <profile> -T {username}-{likescount}-{width}x{height}.{id}
175 |     $ instaLooter <profile> -T {username}.{code}.something_constant
176 | 
177 | 
178 | .. _Time:
179 | 
180 | Time
181 | ----
182 | 
183 | The ``--time`` parameter can be given either a combination of start and stop
184 | date in ISO format (e.g. ``2016-12-21:2016-12-18``, ``2015-03-07:``,
185 | ``:2016-08-02``) or a special value among: *thisday*, *thisweek*, *thismonth*,
186 | *thisyear*.
187 | 
188 | Edges are included in the time frame, so if using the following value:
189 | ``--time 2016-05-10:2016-04-03``, then all medias will be downloaded
190 | including the ones posted the 10th of May 2016 and the 3rd of April 2016.
191 | 
192 | .. _Credentials:
193 | 
194 | Credentials
195 | -----------
196 | 
197 | The ``--username`` and ``--password`` parameters can be used to log to
198 | Instagram. This allows you to download pictures/videos from private profiles
199 | you are following. You can either provide your password directly
200 | or type it in later for privacy purposes.
201 | 
202 | .. code-block:: console
203 | 
204 |    $ instaLooter ... --username USERNAME --password PASSWORD
205 |    $ instaLooter ... --username USERNAME
206 |    Password: # type PASSWORD privately here
207 | 


--------------------------------------------------------------------------------
/instalooter/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from __future__ import absolute_import
3 | from __future__ import unicode_literals
4 | 
5 | __author__ = "Martin Larralde"
6 | __author_email__ = "martin.larralde@ens-paris-saclay.fr"
7 | __version__ = "2.4.4"
8 | __license___ = "GPLv3+"
9 | 


--------------------------------------------------------------------------------
/instalooter/__main__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from __future__ import absolute_import
3 | 
4 | import sys
5 | from .cli import main
6 | 
7 | sys.exit(main())
8 | 


--------------------------------------------------------------------------------
/instalooter/_impl.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """Backports and alternative library implementations.
 3 | """
 4 | from __future__ import absolute_import
 5 | from __future__ import unicode_literals
 6 | 
 7 | import typing
 8 | 
 9 | try:
10 |     import simplejson as json
11 | except ImportError:
12 |     import json  # type: ignore
13 | 
14 | try:
15 |     import PIL.Image
16 |     import piexif
17 | except ImportError:
18 |     PIL = None
19 |     piexif = None
20 | 
21 | try:
22 |     from operator import length_hint
23 | except ImportError:
24 |     def length_hint(obj, default=0):  # type: ignore
25 |         # type: (typing.Any, int) -> int
26 |         """Return an estimate of the number of items in obj.
27 | 
28 |         This is useful for presizing containers when building from an
29 |         iterable.
30 | 
31 |         If the object supports len(), the result will be
32 |         exact. Otherwise, it may over- or under-estimate by an
33 |         arbitrary amount. The result will be an integer >= 0.
34 | 
35 |         See Also:
36 |             `PEP 424 <https://www.python.org/dev/peps/pep-0424/>`_
37 | 
38 |         """
39 |         try:
40 |             return len(obj)
41 |         except TypeError:
42 |             try:
43 |                 get_hint = type(obj).__length_hint__
44 |             except AttributeError:
45 |                 return default
46 |             try:
47 |                 hint = get_hint(obj)
48 |             except TypeError:
49 |                 return default
50 |             if hint is NotImplemented:
51 |                 return default
52 |             if not isinstance(hint, int):
53 |                 raise TypeError("Length hint must be an integer, not %r" %
54 |                                 type(hint))
55 |             if hint < 0:
56 |                 raise ValueError("__length_hint__() should return >= 0")
57 |             return hint
58 | 
59 | 
60 | __all__ = ["PIL", "piexif", "json", "length_hint"]
61 | 


--------------------------------------------------------------------------------
/instalooter/_uadetect.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """An HTTP server to detect the local web browser.
 3 | """
 4 | 
 5 | import contextlib
 6 | import socket
 7 | import threading
 8 | import queue
 9 | import webbrowser
10 | 
11 | import six
12 | import pkg_resources
13 | 
14 | class UserAgentRequestHandler(six.moves.BaseHTTPServer.BaseHTTPRequestHandler):
15 | 
16 |     def do_GET(self):
17 |         """Serve a GET request."""
18 |         self.do_HEAD()
19 |         template = pkg_resources.resource_string(__name__, "static/splash.html")
20 |         page = template.decode('utf-8').format(self.headers.get("User-Agent"), self.cache)
21 |         self.wfile.write(page.encode('utf-8'))
22 | 
23 |     def do_HEAD(self):
24 |         """Serve a HEAD request."""
25 |         self.queue.put(self.headers.get("User-Agent"))
26 |         self.send_response(six.moves.BaseHTTPServer.HTTPStatus.OK)
27 |         self.send_header("Location", self.path)
28 |         self.end_headers()
29 | 
30 |     def log_message(self, format, *args):
31 |         pass # silence the server
32 | 
33 | 
34 | def get_free_port():
35 |     with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
36 |         s.bind(('', 0))
37 |         s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
38 |         return s.getsockname()[1]
39 | 
40 | 
41 | def get_user_agent(port=None, cache=None):
42 |     # Setup thread-local request handler
43 |     UserAgentRequestHandler.queue = queue.Queue()
44 |     UserAgentRequestHandler.cache = cache
45 |     # Lock the request handler lock to wait for user agent to be processed.
46 |     # Use the given port or get a free one and create the HTTP server
47 |     port = port or get_free_port()
48 |     server = six.moves.BaseHTTPServer.HTTPServer(
49 |         ("localhost", port),
50 |         UserAgentRequestHandler,
51 |     )
52 |     # Launch the server thread in the background
53 |     server_thread = threading.Thread(target=server.serve_forever)
54 |     server_thread.start()
55 |     # Use webbrowser to connect to the server with the default browser
56 |     webbrowser.open("http://localhost:{}/".format(port))
57 |     # Wait for the request handler to get the request from the browser
58 |     user_agent = UserAgentRequestHandler.queue.get()
59 |     # Close the server
60 |     server.shutdown()
61 |     server.server_close()
62 |     # Return the obtained user agent
63 |     return user_agent
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     print(get_user_agent())
68 | 


--------------------------------------------------------------------------------
/instalooter/_utils.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """Internal utility classes and functions.
 3 | """
 4 | from __future__ import absolute_import
 5 | from __future__ import unicode_literals
 6 | 
 7 | import datetime
 8 | import hashlib
 9 | import operator
10 | import os
11 | import re
12 | import typing
13 | 
14 | import six
15 | 
16 | from ._impl import json
17 | 
18 | if typing.TYPE_CHECKING:
19 |     from typing import Any, Dict, Mapping, Optional, Text
20 | 
21 | 
22 | class NameGenerator(object):
23 |     """Generator for filenames using a template.
24 |     """
25 | 
26 |     @classmethod
27 |     def _get_info(cls, media):
28 |         # type: (Mapping[Text, Any]) -> Mapping[Text, Any]
29 | 
30 |         info = {
31 |             'id': media['id'],
32 |             'code': media['shortcode'],
33 |             'ownerid': media['owner']['id'],
34 |             'username': media['owner'].get('username'),
35 |             'fullname': media['owner'].get('full_name'),
36 |             'commentscount': media.get('edge_media_to_comment', {}).get('count'),
37 |             'likescount': media.get('edge_media_preview_like', {}).get('count'),
38 |             'width': media.get('dimensions', {}).get('width'),
39 |             'height': media.get('dimensions', {}).get('height'),
40 |         }  # type: Dict[Text, Any]
41 | 
42 |         timestamp = media.get('date') or media.get('taken_at_timestamp')
43 |         if timestamp is not None:
44 |             dt = datetime.datetime.fromtimestamp(timestamp)
45 |             info['datetime'] = ("{0.year}-{0.month:02d}-{0.day:02d} {0.hour:02d}"
46 |                 "h{0.minute:02d}m{0.second:02d}s{0.microsecond}").format(dt)
47 |             info['date'] = datetime.date.fromtimestamp(timestamp)
48 | 
49 |         return dict(six.moves.filter(
50 |             operator.itemgetter(1), six.iteritems(info)))
51 | 
52 |     def __init__(self, template="{id}"):
53 |         # type: (Text) -> None
54 |         self.template = template
55 | 
56 |     def base(self, media):
57 |         # type: (Mapping[Text, Any]) -> Text
58 |         info = self._get_info(media)
59 |         return self.template.format(**info)
60 | 
61 |     def file(self, media, ext=None):
62 |         # type: (Mapping[Text, Any], Optional[Text]) -> Text
63 |         ext = ext or ("mp4" if media['is_video'] else "jpg")
64 |         return os.path.extsep.join([self.base(media), ext])
65 | 
66 |     def needs_extended(self, media):
67 |         # type: (Mapping[Text, Any]) -> bool
68 |         try:
69 |             self.base(media)
70 |             return False
71 |         except KeyError:
72 |             return True
73 | 
74 | 
75 | 
76 | def get_shared_data(html):
77 |     match = re.search(r'window._sharedData = ({[^\n]*});', html)
78 |     return json.loads(match.group(1))
79 | 
80 | 
81 | def get_additional_data(html):
82 |     match = re.search(r"window.__additionalDataLoaded\('/p/.*/',({[^\n]*})\);", html)
83 |     return json.loads(match.group(1))
84 | 


--------------------------------------------------------------------------------
/instalooter/batch.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """Run several jobs sharing a session using a configuration file.
  3 | """
  4 | from __future__ import absolute_import
  5 | from __future__ import unicode_literals
  6 | 
  7 | import io
  8 | import getpass
  9 | import logging
 10 | import typing
 11 | 
 12 | import six
 13 | import verboselogs
 14 | from requests import Session
 15 | 
 16 | from .looters import HashtagLooter, ProfileLooter
 17 | from .pbar import TqdmProgressBar
 18 | 
 19 | if typing.TYPE_CHECKING:
 20 |     from typing import Any, Dict, Mapping, Optional, Text, Type, Union
 21 |     from .looter import InstaLooter
 22 | 
 23 | 
 24 | #: The module logger
 25 | logger = verboselogs.VerboseLogger(__name__)
 26 | 
 27 | 
 28 | class BatchRunner(object):
 29 |     """Run ``InstaLooter`` in batch mode, using a configuration file.
 30 |     """
 31 | 
 32 |     _CLS_MAP = {
 33 |         'users': ProfileLooter,
 34 |         'hashtag': HashtagLooter,
 35 |     }  # type: Mapping[Text, Type[InstaLooter]]
 36 | 
 37 |     def __init__(self, handle, args=None):
 38 |         # type: (Any, Optional[Mapping[Text, Any]]) -> None
 39 | 
 40 |         close_handle = False
 41 |         if isinstance(handle, six.binary_type):
 42 |             handle = handle.decode('utf-8')
 43 |         if isinstance(handle, six.text_type):
 44 |             _handle = open(handle)  # type: typing.IO
 45 |             close_handle = True
 46 |         else:
 47 |             _handle = handle
 48 | 
 49 |         try:
 50 |             self.args = args or {}
 51 |             self.parser = six.moves.configparser.ConfigParser()
 52 |             getattr(self.parser, "readfp" if six.PY2 else "read_file")(_handle)
 53 |         finally:
 54 |             if close_handle:
 55 |                 _handle.close()
 56 | 
 57 |     @typing.overload
 58 |     def _getboolean(self, section_id, key, default):
 59 |         # type: (Text, Text, bool) -> bool
 60 |         pass
 61 | 
 62 |     @typing.overload
 63 |     def _getboolean(self, section_id, key):
 64 |         # type: (Text, Text) -> Optional[bool]
 65 |         pass
 66 | 
 67 |     @typing.overload
 68 |     def _getboolean(self, section_id, key, default):
 69 |         # type: (Text, Text, None) -> Optional[bool]
 70 |         pass
 71 | 
 72 |     def _getboolean(self, section_id, key, default=None):
 73 |         # type: (Text, Text, Optional[bool]) -> Optional[bool]
 74 |         if self.parser.has_option(section_id, key):
 75 |             return self.parser.getboolean(section_id, key)
 76 |         return default
 77 | 
 78 |     @typing.overload
 79 |     def _getint(self, section_id, key, default):
 80 |         # type: (Text, Text, None) -> Optional[int]
 81 |         pass
 82 | 
 83 |     @typing.overload
 84 |     def _getint(self, section_id, key):
 85 |         # type: (Text, Text) -> Optional[int]
 86 |         pass
 87 | 
 88 |     @typing.overload
 89 |     def _getint(self, section_id, key, default):
 90 |         # type: (Text, Text, int) -> int
 91 |         pass
 92 | 
 93 |     def _getint(self, section_id, key, default=None):
 94 |         # type: (Text, Text, Optional[int]) -> Optional[int]
 95 |         if self.parser.has_option(section_id, key):
 96 |             return self.parser.getint(section_id, key)
 97 |         return default
 98 | 
 99 |     @typing.overload
100 |     def _get(self, section_id, key, default):
101 |         # type: (Text, Text, None) -> Optional[Text]
102 |         pass
103 | 
104 |     @typing.overload
105 |     def _get(self, section_id, key):
106 |         # type: (Text, Text) -> Optional[Text]
107 |         pass
108 | 
109 |     @typing.overload
110 |     def _get(self, section_id, key, default):
111 |         # type: (Text, Text, Text) -> Text
112 |         pass
113 | 
114 |     def _get(self, section_id, key, default=None):
115 |         # type: (Text, Text, Optional[Text]) -> Optional[Text]
116 |         if self.parser.has_option(section_id, key):
117 |             return self.parser.get(section_id, key)
118 |         return default
119 | 
120 |     def run_all(self):
121 |         # type: () -> None
122 |         """Run all the jobs specified in the configuration file.
123 |         """
124 |         logger.debug("Creating batch session")
125 |         session = Session()
126 | 
127 |         for section_id in self.parser.sections():
128 |             self.run_job(section_id, session=session)
129 | 
130 |     def run_job(self, section_id, session=None):
131 |         # type: (Text, Optional[Session]) -> None
132 |         """Run a job as described in the section named ``section_id``.
133 | 
134 |         Raises:
135 |             KeyError: when the section could not be found.
136 | 
137 |         """
138 |         if not self.parser.has_section(section_id):
139 |             raise KeyError('section not found: {}'.format(section_id))
140 | 
141 |         session = session or Session()
142 | 
143 |         for name, looter_cls in six.iteritems(self._CLS_MAP):
144 | 
145 |                 targets = self.get_targets(self._get(section_id, name))
146 |                 quiet = self._getboolean(
147 |                     section_id, "quiet", self.args.get("--quiet", False))
148 | 
149 |                 if targets:
150 |                     logger.info("Launching {} job for section {}".format(name, section_id))
151 | 
152 |                 for target, directory in six.iteritems(targets):
153 |                     try:
154 |                         logger.info("Downloading {} to {}".format(target, directory))
155 |                         looter = looter_cls(
156 |                             target,
157 |                             add_metadata=self._getboolean(section_id, 'add-metadata', False),
158 |                             get_videos=self._getboolean(section_id, 'get-videos', False),
159 |                             videos_only=self._getboolean(section_id, 'videos-only', False),
160 |                             jobs=self._getint(section_id, 'jobs', 16),
161 |                             template=self._get(section_id, 'template', '{id}'),
162 |                             dump_json=self._getboolean(section_id, 'dump-json', False),
163 |                             dump_only=self._getboolean(section_id, 'dump-only', False),
164 |                             extended_dump=self._getboolean(section_id, 'extended-dump', False),
165 |                             session=session)
166 | 
167 |                         if self.parser.has_option(section_id, 'username'):
168 |                             looter.logout()
169 |                             username = self._get(section_id, 'username')
170 |                             password = self._get(section_id, 'password') or \
171 |                                 getpass.getpass('Password for "{}": '.format(username))
172 |                             looter.login(username, password)
173 | 
174 |                         n = looter.download(
175 |                             directory,
176 |                             media_count=self._getint(section_id, 'num-to-dl'),
177 |                             # FIXME: timeframe=self._get(section_id, 'timeframe'),
178 |                             new_only=self._getboolean(section_id, 'new', False),
179 |                             pgpbar_cls=None if quiet else TqdmProgressBar,
180 |                             dlpbar_cls=None if quiet else TqdmProgressBar)
181 | 
182 |                         logger.success("Downloaded %i medias !", n)
183 | 
184 |                     except Exception as exception:
185 |                         logger.error(six.text_type(exception))
186 | 
187 |     def get_targets(self, raw_string):
188 |         # type: (Optional[Text]) -> Dict[Text, Text]
189 |         """Extract targets from a string in 'key: value' format.
190 |         """
191 |         targets = {}
192 |         if raw_string is not None:
193 |             for line in raw_string.splitlines():
194 |                 if line:
195 |                     target, directory = line.split(':', 1)
196 |                     targets[target.strip()] = directory.strip()
197 |         return targets
198 | 


--------------------------------------------------------------------------------
/instalooter/cli/__init__.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """Implementation of the main program executable.
  3 | 
  4 | Warning:
  5 |     Only `.cli.main` and `.cli.logger` are guaranteed to be stable, do not
  6 |     rely on any other member from this package !
  7 | """
  8 | from __future__ import absolute_import
  9 | from __future__ import print_function
 10 | from __future__ import unicode_literals
 11 | 
 12 | import functools
 13 | import logging
 14 | import getpass
 15 | import os
 16 | import sys
 17 | import traceback
 18 | import warnings
 19 | 
 20 | import coloredlogs
 21 | import docopt
 22 | import fs
 23 | import six
 24 | import verboselogs
 25 | 
 26 | from .. import __version__
 27 | from ..looters import InstaLooter, HashtagLooter, ProfileLooter, PostLooter
 28 | from ..pbar import TqdmProgressBar
 29 | from ..batch import BatchRunner, logger as batch_logger
 30 | 
 31 | from . import logutils
 32 | from .constants import HELP, USAGE, WARNING_ACTIONS
 33 | from .time import get_times_from_cli
 34 | from .login import login, logger as login_logger
 35 | 
 36 | 
 37 | __all__ = ["main", "logger"]
 38 | 
 39 | 
 40 | #: A `~logging.Logger` instance used within the `.cli` module.
 41 | logger = verboselogs.VerboseLogger(__name__)
 42 | 
 43 | 
 44 | @logutils.wrap_warnings(logger)
 45 | def main(argv=None, stream=None):
 46 |     """Run from the command line interface.
 47 | 
 48 |     Arguments:
 49 |         argv (list): The positional arguments to read. Defaults to
 50 |             `sys.argv` to use CLI arguments.
 51 |         stream (~io.IOBase): A file where to write error messages.
 52 |             Leave to `None` to use the `~coloredlogs.StandardErrorHandler`
 53 |             for logs, and `sys.stderr` for error messages.
 54 | 
 55 |     Returns:
 56 |         int: An error code, or 0 if the program executed successfully.
 57 |     """
 58 | 
 59 |     _print = functools.partial(print, file=stream or sys.stderr)
 60 | 
 61 |     # Parse command line arguments
 62 |     try:
 63 |         args = docopt.docopt(
 64 |             HELP, argv, version='instalooter {}'.format(__version__))
 65 |     except docopt.DocoptExit as de:
 66 |         _print(de)
 67 |         return 1
 68 | 
 69 |     # Print usage and exit if required (docopt does not do this !)
 70 |     if args['--usage']:
 71 |         _print(USAGE)
 72 |         return 0
 73 | 
 74 |     # Set the loggers up with the requested logging level
 75 |     level = "ERROR" if args['--quiet'] else args.get("--loglevel", "INFO")
 76 |     for logger_ in (logger, login_logger, batch_logger):
 77 |         coloredlogs.install(
 78 |             level=int(level) if level.isdigit() else level,
 79 |             stream=stream,
 80 |             logger=logger_)
 81 | 
 82 |     # Check the requested logging level
 83 |     if args['-W'] not in WARNING_ACTIONS:
 84 |         _print("Unknown warning action:", args['-W'])
 85 |         _print("    available actions:", ', '.join(WARNING_ACTIONS))
 86 |         return 1
 87 | 
 88 |     with warnings.catch_warnings():
 89 |         warnings.simplefilter(args['-W'])
 90 | 
 91 |         try:
 92 |             # Run in batch mode
 93 |             if args['batch']:
 94 |                 # Load the batch configuration from the given file
 95 |                 with open(args['<batch_file>']) as batch_file:
 96 |                     batch_runner = BatchRunner(batch_file, args)
 97 |                 # Run the batch
 98 |                 batch_runner.run_all()
 99 |                 return 0
100 | 
101 |             # Login if requested
102 |             if args['login']:
103 |                 try:
104 |                     if not args['--username']:
105 |                         args['--username'] = six.moves.input('Username: ')
106 |                     login(args)
107 |                     return 0
108 |                 except ValueError as ve:
109 |                     logger.error("%s", ve)
110 |                     if args["--traceback"]:
111 |                        traceback.print_exc()
112 |                     return 1
113 | 
114 |             # Logout if requested
115 |             if args['logout']:
116 |                 if InstaLooter._cachefs().exists(InstaLooter._COOKIE_FILE):
117 |                     InstaLooter._logout()
118 |                     logger.success('Logged out.')
119 |                 else:
120 |                     warnings.warn('Cookie file not found.')
121 |                 return 0
122 | 
123 |             # Normal download mode:
124 |             if args['user']:
125 |                 looter_cls = ProfileLooter
126 |                 target = args['<profile>']
127 |             elif args['hashtag']:
128 |                 looter_cls = HashtagLooter
129 |                 target = args['<hashtag>']
130 |             elif args['post']:
131 |                 looter_cls = PostLooter
132 |                 target = args['<post_token>']
133 |             else:
134 |                 raise NotImplementedError("TODO")
135 | 
136 |             # Instantiate the looter
137 |             looter = looter_cls(
138 |                 target,
139 |                 add_metadata=args['--add-metadata'],
140 |                 get_videos=args['--get-videos'],
141 |                 videos_only=args['--videos-only'],
142 |                 jobs=int(args['--jobs']) if args['--jobs'] is not None else 16,
143 |                 template=args['--template'],
144 |                 dump_json=args['--dump-json'],
145 |                 dump_only=args['--dump-only'],
146 |                 extended_dump=args['--extended-dump']
147 |             )
148 | 
149 |             # Attempt to login and extract the timeframe
150 |             if args['--username']:
151 |                 login(args)
152 |             if args['--num-to-dl']:
153 |                 args['--num-to-dl'] = int(args['--num-to-dl'])
154 |             try:
155 |                 if args['--time'] is not None:
156 |                     args['--time'] = get_times_from_cli(args['--time'])
157 |             except ValueError as ve:
158 |                 _print("invalid format for --time parameter:", args["--time"])
159 |                 _print("    (format is [D]:[D] where D is an ISO 8601 date)")
160 |                 return 1
161 | 
162 |             logger.debug("Opening destination filesystem")
163 |             dest_url = args.get('<directory>') or os.getcwd()
164 |             dest_fs = fs.open_fs(dest_url, create=True)
165 | 
166 |             logger.notice("Starting download of `%s`", target)
167 |             n = looter.download(
168 |                 destination=dest_fs,
169 |                 media_count=args['--num-to-dl'],
170 |                 timeframe=args['--time'],
171 |                 new_only=args['--new'],
172 |                 pgpbar_cls=None if args['--quiet'] else TqdmProgressBar,
173 |                 dlpbar_cls=None if args['--quiet'] else TqdmProgressBar)
174 |             if n > 1:
175 |                 logger.success("Downloaded %i posts.", n)
176 |             elif n == 1:
177 |                 logger.success("Downloaded %i post.", n)
178 | 
179 |         except (Exception, KeyboardInterrupt) as e:
180 |             from .threadutils import threads_force_join, threads_count
181 |             # Show error traceback if any
182 |             if not isinstance(e, KeyboardInterrupt):
183 |                 logger.critical("%s", e)
184 |                 if args["--traceback"]:
185 |                     traceback.print_exc()
186 |             else:
187 |                 logger.critical("Interrupted")
188 |             # Close remaining threads spawned by InstaLooter.download
189 |             count = threads_count()
190 |             if count:
191 |                 logger.notice("Terminating %i remaining workers...", count)
192 |                 threads_force_join()
193 |             # Return the error number if any
194 |             errno = e.errno if hasattr(e, "errno") else None
195 |             return errno if errno is not None else 1
196 | 
197 |         else:
198 |             return 0
199 | 
200 |         finally:
201 |             logger.debug("Closing destination filesystem")
202 |             try:
203 |                 dest_fs.close()
204 |             except Exception:
205 |                 pass
206 | 


--------------------------------------------------------------------------------
/instalooter/cli/constants.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import textwrap
  6 | 
  7 | 
  8 | WARNING_ACTIONS = {'error', 'ignore', 'always', 'default', 'module', 'once'}
  9 | 
 10 | 
 11 | HELP = textwrap.dedent(
 12 |     """
 13 |     instalooter - Another API-less Instagram media downloader
 14 | 
 15 |     Usage:
 16 |         instalooter (-h | --help | --version | --usage)
 17 |         instalooter batch <batch_file> [<directory>] [options]
 18 |         instalooter hashtag <hashtag> [<directory>] [options]
 19 |         instalooter user <profile> [<directory>] [options]
 20 |         instalooter post <post_token> [<directory>] [options]
 21 |         instalooter logout
 22 |         instalooter login [options]
 23 | 
 24 |     Arguments:
 25 |         <profile>                    The username of the profile to download
 26 |                                      pictures and optionally videos from.
 27 |         <hashtag>                    A hashtag to download pictures and
 28 |                                      optionally videos from.
 29 |         <post_token>                 Either the url or the code of a post to
 30 |                                      download the picture or video from.
 31 |         <directory>                  The directory in which to download files.
 32 |                                      Can actually be a Pyfilesystem2 FS URL
 33 |                                      (see http://pyfilesystem2.rtfd.io).
 34 |         <batch_file>                 The path to the batch file containing
 35 |                                      batch download instructions (see the
 36 |                                      online documentation).
 37 | 
 38 |     Options - Credentials:
 39 |         -u USER, --username USER     The username to connect to Instagram with.
 40 |         -p PASS, --password PASS     The password to connect to Instagram with
 41 |                                      (will be asked in the shell if the
 42 |                                      `--username`  option was given without
 43 |                                      the corresponding `--password`).
 44 | 
 45 |     Options - Files:
 46 |         -n NUM, --num-to-dl NUM      Maximum number of new files to download
 47 |         -j JOBS, --jobs JOBS         Number of parallel threads to use to
 48 |                                      download files. [default: 16]
 49 |         -T TMPL, --template TMPL     A filename template to use to write the
 50 |                                      files (see *Template*). [default: {id}]
 51 |         -v, --get-videos             Get videos as well as photos.
 52 |         -V, --videos-only            Get videos only. Implies `--get-videos`.
 53 |         -N, --new                    Only look for files newer than the ones
 54 |                                      in the destination directory (faster).
 55 |         -t TIME, --time TIME         The time limit within which to download
 56 |                                      pictures and video (see *Time*).
 57 | 
 58 |     Options - Metadata:
 59 |         -m, --add-metadata           Add date and caption metadata to downloaded
 60 |                                      pictures (requires PIL/Pillow and piexif).
 61 |         -d, --dump-json              Save metadata to a JSON file next to
 62 |                                      downloaded videos/pictures.
 63 |         -D, --dump-only              Save only the metadata and no video/picture.
 64 |                                      Implies `--dump-json`.
 65 |         -e, --extended-dump          Always dump the maximum amount of extracted
 66 |                                      information, at the cost of more time.
 67 | 
 68 |     Options - Miscellaneous:
 69 |         -l LEVEL, --loglevel LEVEL   The level of log to produce, as an
 70 |                                      integer or a level name. [default: INFO]
 71 |         -q, --quiet                  Do not display any output or progress
 72 |                                      bar. Implies `--loglevel ERROR`.
 73 |         -h, --help                   Display this message and quit.
 74 |         --version                    Show program version and quit.
 75 |         --traceback                  Print error traceback if any (use when
 76 |                                      reporting an issue on GitHub, please!).
 77 |         -W WARNINGCTL                Change warning behaviour (same as the
 78 |                                      Python `-W` flag). [default: default]
 79 | 
 80 |     Template:
 81 |         The default filename of the pictures and videos on Instagram doesn't
 82 |         show anything about the file you just downloaded. But using the -T
 83 |         argument allows you to give instalooter a filename template, using the
 84 |         the following format with brackets-enclosed ({}) variable names among:
 85 |         - ``id``*² and ``code``*² of the instagram id of the media
 86 |         - ``ownerid``*, ``username`` and ``fullname`` of the owner
 87 |         - ``datetime``*: the date and time of the post (YYYY-MM-DD hh:mm:ss)
 88 |         - ``date``*: the date of the post (YYYY-MM-DD)
 89 |         - ``width``* and ``height``*
 90 |         - ``likescount``* and ``commentscount``*
 91 | 
 92 |         ²: use at least one of these to make sure the generated file name
 93 |         is unique (``datetime`` is not unique anymore since multiple posts).
 94 | 
 95 |         *: use these only to quicken download, since fetching the others may
 96 |         take a tad longer (in particular in hashtag download mode).
 97 | 
 98 |         You are however to make sure that the generated filename is unique,
 99 |         so you should use at least id, code or datetime somewhere.
100 |         Examples of acceptable values:
101 |             - {username}.{datetime}.{code}
102 |             - {username}-{likescount}-{width}x{height}.{id}
103 | 
104 |     Time:
105 |         The --time parameter can be given either a combination of start and stop
106 |         date in ISO format (e.g. 2016-12-21:2016-12-18, 2015-03-07:, :2016-08-02)
107 |         or a special value among: "thisday", "thisweek", "thismonth", "thisyear".
108 | 
109 |         Edges are included in the time frame, so if using the following value:
110 |         `--time 2016-05-10:2016-04-03`, then all medias will be downloaded
111 |         including the ones posted the 10th of May 2016 and the 3rd of April 2016.
112 | 
113 |     See more at http://instalooter.readthedocs.io/en/latest/usage.html
114 | 
115 |     """
116 | )
117 | 
118 | 
119 | USAGE = next(s for s in HELP.split("\n\n") if s.startswith("Usage"))
120 | 


--------------------------------------------------------------------------------
/instalooter/cli/login.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import getpass
 6 | import logging
 7 | 
 8 | import verboselogs
 9 | 
10 | from ..looters import InstaLooter
11 | 
12 | 
13 | logger = verboselogs.VerboseLogger(__name__)
14 | 
15 | 
16 | def login(args):
17 |     if args['--username']:
18 |         username = args['--username']
19 |         if not InstaLooter._logged_in():
20 |             password = args['--password'] or getpass.getpass()
21 |             InstaLooter._login(username, password)
22 |             if not args['--quiet']:
23 |                 logger.success('Logged in.')
24 |         elif not args['--quiet']:
25 |             logger.success("Already logged in.")
26 | 


--------------------------------------------------------------------------------
/instalooter/cli/logutils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | from __future__ import unicode_literals
 6 | 
 7 | import functools
 8 | import logging
 9 | import warnings
10 | import typing
11 | 
12 | if typing.TYPE_CHECKING:
13 |     from typing import Callable
14 | 
15 | 
16 | def warn_logging(logger):
17 |     # type: (logging.Logger) -> Callable
18 |     """Create a `showwarning` function that uses the given logger.
19 | 
20 |     Arguments:
21 |         logger (~logging.Logger): the logger to use.
22 | 
23 |     Returns:
24 |         function: a function that can be used as the `warnings.showwarning`
25 |             callback.
26 | 
27 |     """
28 |     def showwarning(message, category, filename, lineno, file=None, line=None):
29 |         logger.warning(message)
30 |     return showwarning
31 | 
32 | 
33 | def wrap_warnings(logger):
34 |     """Have the function patch `warnings.showwarning` with the given logger.
35 | 
36 |     Arguments:
37 |         logger (~logging.logger): the logger to wrap warnings with when
38 |             the decorated function is called.
39 | 
40 |     Returns:
41 |         `function`: a decorator function.
42 | 
43 |     """
44 |     def decorator(func):
45 |         @functools.wraps(func)
46 |         def new_func(*args, **kwargs):
47 |             showwarning = warnings.showwarning
48 |             warnings.showwarning = warn_logging(logger)
49 |             try:
50 |                 return func(*args, **kwargs)
51 |             finally:
52 |                 warnings.showwarning = showwarning
53 |         return new_func
54 |     return decorator
55 | 


--------------------------------------------------------------------------------
/instalooter/cli/threadutils.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | 
 4 | import threading
 5 | 
 6 | from ..worker import InstaDownloader
 7 | 
 8 | 
 9 | def threads_force_join():
10 |     for t in threading.enumerate():
11 |         if isinstance(t, InstaDownloader):
12 |             t.terminate()
13 |             t.join()
14 | 
15 | 
16 | def threads_count():
17 |     return sum(isinstance(t, InstaDownloader) for t in threading.enumerate())
18 | 


--------------------------------------------------------------------------------
/instalooter/cli/time.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import datetime
 6 | 
 7 | import dateutil.relativedelta
 8 | 
 9 | 
10 | def date_from_isoformat(isoformat_date):
11 |     """Convert an ISO-8601 date into a `datetime.date` object.
12 | 
13 |     Argument:
14 |         isoformat_date (str): a date in ISO-8601 format (YYYY-MM-DD)
15 | 
16 |     Returns:
17 |         ~datetime.date: the object corresponding to the given ISO date.
18 | 
19 |     Raises:
20 |         ValueError: when the date could not be converted successfully.
21 | 
22 |     See Also:
23 |         `ISO-8601 specification <https://en.wikipedia.org/wiki/ISO_8601>`_.
24 |     """
25 |     year, month, day = isoformat_date.split('-')
26 |     return datetime.date(int(year), int(month), int(day))
27 | 
28 | 
29 | def get_times_from_cli(cli_token):
30 |     """Convert a CLI token to a datetime tuple.
31 | 
32 |     Argument:
33 |         cli_token (str): an isoformat datetime token ([ISO date]:[ISO date])
34 |             or a special value among:
35 |                 * thisday
36 |                 * thisweek
37 |                 * thismonth
38 |                 * thisyear
39 | 
40 |     Returns:
41 |         tuple: a datetime.date objects couple, where the first item is
42 |             the start of a time frame and the second item the end of the
43 |             time frame. Both elements can also be None, if no date was
44 |             provided.
45 | 
46 |     Raises:
47 |         ValueError: when the CLI token is not in the right format
48 |             (no colon in the token, not one of the special values, dates
49 |             are not in proper ISO-8601 format.)
50 | 
51 |     See Also:
52 |         `ISO-8601 specification <https://en.wikipedia.org/wiki/ISO_8601>`_.
53 | 
54 |     """
55 |     today = datetime.date.today()
56 | 
57 |     if cli_token=="thisday":
58 |         return today, today
59 |     elif cli_token=="thisweek":
60 |         return today, today - dateutil.relativedelta.relativedelta(days=7)
61 |     elif cli_token=="thismonth":
62 |         return today, today - dateutil.relativedelta.relativedelta(months=1)
63 |     elif cli_token=="thisyear":
64 |         return today, today - dateutil.relativedelta.relativedelta(years=1)
65 |     else:
66 |         try:
67 |             start_date, stop_date = cli_token.split(':')
68 |         except ValueError:
69 |             raise ValueError("--time parameter must contain a colon (:)")
70 |         if not start_date and not stop_date: # ':', no start date, no stop date
71 |             return None, None
72 |         try:
73 |             start_date = date_from_isoformat(start_date) if start_date else None
74 |             stop_date = date_from_isoformat(stop_date) if stop_date else None
75 |         except ValueError:
76 |             raise ValueError("--time parameter was not provided ISO formatted dates")
77 |         if start_date is not None and stop_date is not None:
78 |             return max(start_date, stop_date), min(start_date, stop_date)
79 |         else:
80 |             return stop_date, start_date
81 | 


--------------------------------------------------------------------------------
/instalooter/looters.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """Instagram looters implementations.
  3 | """
  4 | from __future__ import absolute_import
  5 | from __future__ import unicode_literals
  6 | 
  7 | import abc
  8 | import atexit
  9 | import copy
 10 | import functools
 11 | import random
 12 | import re
 13 | import threading
 14 | import time
 15 | import typing
 16 | import warnings
 17 | 
 18 | import fs
 19 | import six
 20 | from requests import Session
 21 | from six.moves.queue import Queue
 22 | from six.moves.http_cookiejar import FileCookieJar, LWPCookieJar
 23 | 
 24 | from . import __author__, __name__ as __appname__, __version__
 25 | from ._impl import length_hint, json
 26 | from ._uadetect import get_user_agent
 27 | from ._utils import NameGenerator, get_shared_data, get_additional_data
 28 | from .medias import TimedMediasIterator, MediasIterator
 29 | from .pages import ProfileIterator, HashtagIterator
 30 | from .pbar import ProgressBar
 31 | from .worker import InstaDownloader
 32 | 
 33 | if typing.TYPE_CHECKING:
 34 |     from datetime import datetime
 35 |     from typing import (
 36 |         Any, Callable, Dict, Iterator, Iterable, List,
 37 |         Optional, Text, Tuple, Type, Union)
 38 |     from fs.base import FS
 39 |     from six.moves.http_cookiejar import CookieJar
 40 |     _T = typing.TypeVar("_T")
 41 |     _Timeframe = Tuple[Optional[datetime], Optional[datetime]]
 42 | 
 43 | 
 44 | __all__ = [
 45 |     "InstaLooter",
 46 |     "ProfileLooter",
 47 |     "HashtagLooter",
 48 |     "PostLooter",
 49 | ]
 50 | 
 51 | 
 52 | @six.add_metaclass(abc.ABCMeta)
 53 | class InstaLooter(object):
 54 |     """A brutal Instagram looter that raids without API tokens.
 55 |     """
 56 | 
 57 |     @classmethod
 58 |     def _cachefs(cls):
 59 |         # type: () -> FS
 60 |         """Get the a persistent filesystem to store the program cache.
 61 |         """
 62 |         url = "usercache://{}:{}:{}".format(__appname__, __author__, __version__)
 63 |         return fs.open_fs(url, create=True)
 64 | 
 65 |     @classmethod
 66 |     def _user_agent(cls):
 67 |         # type: () -> Text
 68 |         """Get the user agent of the default web browser on the local machine.
 69 |         """
 70 |         cache = cls._cachefs()
 71 |         if not cache.isfile(cls._USERAGENT_FILE):
 72 |             ua = get_user_agent(cache=cache.getsyspath(cls._USERAGENT_FILE))
 73 |             if ua is None:
 74 |                 warnings.warn("Could not detect user agent, using default")
 75 |                 ua = "Mozilla/5.0 (X11; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0"
 76 |             with cache.open("user-agent.txt", "w") as f:
 77 |                 f.write(ua)
 78 |         with cache.open(cls._USERAGENT_FILE) as f:
 79 |             return f.read()
 80 | 
 81 |     # str: The name of the user agent file in the cache filesystem
 82 |     _USERAGENT_FILE = "user-agent.txt"
 83 | 
 84 |     # str: The name of the cookie file in the cache filesystem
 85 |     _COOKIE_FILE = "cookies.txt"
 86 | 
 87 |     @classmethod
 88 |     def _init_session(cls, session=None):
 89 |         # type: (Optional[Session]) -> Session
 90 |         """Initialise the given session and load class cookies to its jar.
 91 | 
 92 |         Arguments:
 93 |             session (~requests.Session, optional): a `requests`
 94 |                 session, or `None` to create a new one.
 95 | 
 96 |         Returns:
 97 |             ~requests.Session: an initialised session instance.
 98 | 
 99 |         """
100 |         session = session or Session()
101 |         # Load cookies
102 |         path = cls._cachefs().getsyspath(cls._COOKIE_FILE)
103 |         session.cookies = LWPCookieJar(path)  # type: ignore
104 |         try:
105 |             typing.cast(FileCookieJar, session.cookies).load()
106 |         except IOError:
107 |             pass
108 |         session.cookies.clear_expired_cookies()  # type: ignore
109 |         return session
110 | 
111 |     @classmethod
112 |     def _login(cls, username, password, session=None):
113 |         # type: (str, str, Optional[Session]) -> None
114 |         """Login with provided credentials and session.
115 | 
116 |         Arguments:
117 |             username (str): the username to log in with.
118 |             password (str): the password to log in with.
119 |             session (~requests.Session, optional): the session to use,
120 |                 or `None` to create a new session.
121 | 
122 |         Note:
123 |             Code taken from LevPasha/instabot.py
124 | 
125 |         """
126 |         session = cls._init_session(session)
127 |         headers = copy.deepcopy(session.headers)
128 |         homepage = "https://www.instagram.com/"
129 |         login_url = "https://www.instagram.com/accounts/login/ajax/"
130 |         enc_password = "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(time.time(), password)
131 |         data = {'username': username, 'enc_password': enc_password}
132 | 
133 |         try:
134 |             session.headers.update({
135 |                 'Accept-Encoding': 'gzip, deflate',
136 |                 'Accept-Language': 'en-US,en;q=0.8',
137 |                 'Connection': 'keep-alive',
138 |                 'Content-Length': '0',
139 |                 'Host': 'www.instagram.com',
140 |                 'Origin': 'https://www.instagram.com',
141 |                 'Referer': 'https://www.instagram.com',
142 |                 'User-Agent': cls._user_agent(),
143 |                 'X-Instagram-AJAX': '1',
144 |                 'X-Requested-With': 'XMLHttpRequest'
145 |             })
146 | 
147 |             with session.get(homepage) as res:
148 |                 token = get_shared_data(res.text)['config']['csrf_token']
149 |                 session.headers.update({'X-CSRFToken': token})
150 | 
151 |             time.sleep(5 * random.random())  # nosec
152 |             with session.post(login_url, data, allow_redirects=True) as login:
153 |                 token = next(c.value for c in login.cookies if c.name == 'csrftoken')
154 |                 session.headers.update({'X-CSRFToken': token})
155 |                 if not login.ok:
156 |                     raise SystemError("Login error: check your connection")
157 |                 data = json.loads(login.text)
158 |                 if not data.get('authenticated', False):
159 |                     raise ValueError('Login error: check your login data')
160 | 
161 |             time.sleep(5 * random.random())  # nosec
162 |             with session.get(homepage) as res:
163 |                 if res.text.find(username) == -1:
164 |                     raise ValueError('Login error: check your login data')
165 |                 try:
166 |                     typing.cast(FileCookieJar, session.cookies).save()
167 |                 except IOError:
168 |                     pass
169 | 
170 |         finally:
171 |             session.headers = headers
172 | 
173 |     @classmethod
174 |     def _logout(cls, session=None):
175 |         # type: (Optional[Session]) -> None
176 |         """Log out from current session.
177 | 
178 |         Also deletes the eventual cookie file left in the cache directory,
179 |         to prevent new connections from using the old session ID.
180 | 
181 |         Arguments:
182 |             session (~requests.Session): the session to use, or `None`
183 |                 to create a new session.
184 | 
185 |         Note:
186 |             Code taken from LevPasha/instabot.py
187 | 
188 |         """
189 |         session = cls._init_session(session)
190 |         sessionid = cls._sessionid(session)
191 |         if sessionid is not None:
192 |             url = "https://www.instagram.com/accounts/logout/"
193 |             session.post(url, data={"csrfmiddlewaretoken": sessionid})
194 | 
195 |         cache = cls._cachefs()
196 |         if cache.exists(cls._COOKIE_FILE):
197 |             cache.remove(cls._COOKIE_FILE)
198 | 
199 |     @classmethod
200 |     def _logged_in(cls, session=None):
201 |         # type: (Optional[Session]) -> bool
202 |         """Check if there is an open Instagram session.
203 | 
204 |         Arguments:
205 |             session (~requests.Session): the session to use, or `None`
206 |                 to create a new session.
207 | 
208 |         Returns:
209 |             bool: `True` if there's an active session, `False` otherwise.
210 | 
211 |         """
212 |         return cls._sessionid(session) is not None
213 | 
214 |     @classmethod
215 |     def _sessionid(cls, session=None):
216 |         # type: (Optional[Session]) -> Optional[Text]
217 |         """Get the ID of the currently opened Instagram session.
218 | 
219 |         Arguments:
220 |             session (~requests.Session): the session to use, or `None`
221 |                 to create a new session.
222 | 
223 |         Returns:
224 |             str or None: the session ID, if any, or `None`.
225 | 
226 |         """
227 |         _session = cls._init_session(session)
228 |         _cookies = typing.cast(FileCookieJar, _session.cookies)
229 |         return next((ck.value for ck in _cookies
230 |                      if ck.domain == ".instagram.com"
231 |                      and ck.name == "ds_user_id"
232 |                      and ck.path == "/"), None)
233 | 
234 |     def __init__(self,
235 |                  add_metadata=False,    # type: bool
236 |                  get_videos=False,      # type: bool
237 |                  videos_only=False,     # type: bool
238 |                  jobs=16,               # type: int
239 |                  template="{id}",       # type: Text
240 |                  dump_json=False,       # type: bool
241 |                  dump_only=False,       # type: bool
242 |                  extended_dump=False,   # type: bool
243 |                  session=None           # type: Optional[Session]
244 |                  ):
245 |         # type: (...) -> None
246 |         """Create a new looter instance.
247 | 
248 |         Arguments:
249 |             add_metadata (bool): Add date and comment metadata to
250 |                 the downloaded pictures.
251 |             get_videos (bool): Also get the videos from the given target.
252 |             videos_only (bool): Only download videos (implies
253 |                 ``get_videos=True``).
254 |             jobs (bool): the number of parallel threads to use to
255 |                 download media (12 or more is advised to have a true parallel
256 |                 download of media files).
257 |             template (str): a filename format, in Python new-style-formatting
258 |                 format. See the the :ref:`Template` page of the documentation
259 |                 for available keys.
260 |             dump_json (bool): Save each resource metadata to a
261 |                 JSON file next to the actual image/video.
262 |             dump_only (bool): Only save metadata and discard the actual
263 |                 resource.
264 |             extended_dump (bool): Attempt to fetch as much metadata as
265 |                 possible, at the cost of more time. Set to `True` if, for
266 |                 instance, you always want the top comments to be downloaded
267 |                 in the dump.
268 |             session (~requests.Session or None): a `requests` session,
269 |                 or `None` to create a new one.
270 | 
271 |         """
272 |         self.add_metadata = add_metadata
273 |         self.get_videos = get_videos or videos_only
274 |         self.videos_only = videos_only
275 |         self.jobs = jobs
276 |         self.namegen = NameGenerator(template)
277 |         self.dump_only = dump_only
278 |         self.dump_json = dump_json or dump_only
279 |         self.extended_dump = extended_dump
280 |         self.session = self._init_session(session)
281 |         atexit.register(self.session.close)
282 | 
283 |         # Set the default webbrowser user agent
284 |         if self.session.headers['User-Agent'].startswith('python-requests'):
285 |             self.session.headers['User-Agent'] = self._user_agent()
286 | 
287 |         # Get CSRFToken and RHX
288 |         with self.session.get('https://www.instagram.com/') as res:
289 |             token = get_shared_data(res.text)['config']['csrf_token']
290 |             self.session.headers['X-CSRFToken'] = token
291 |             self.rhx = get_shared_data(res.text).get('rhx_gis', '')
292 | 
293 |     @abc.abstractmethod
294 |     def pages(self):
295 |         # type: () -> Iterator[Dict[Text, Any]]
296 |         """Obtain an iterator over Instagram post pages.
297 | 
298 |         Returns:
299 |             PageIterator: an iterator over the instagram post pages.
300 | 
301 |         """
302 |         return NotImplemented
303 | 
304 |     def _medias(self,
305 |                 pages_iterator,     # type: Iterable[Dict[Text, Any]]
306 |                 timeframe=None      # type: Optional[_Timeframe]
307 |                 ):
308 |         # type: (...) -> Iterator[Dict[Text, Any]]
309 |         """Obtain an iterator over the medias of the given pages iterator.
310 | 
311 |         Arguments:
312 |             pages_iterator (Iterator): an iterator over the Instagram
313 |                 pages, returned by `InstaLooter.pages`
314 | 
315 |         Returns:
316 |             MediasIterator: an iterator over the medias in every pages.
317 | 
318 |         """
319 |         if timeframe is not None:
320 |             return TimedMediasIterator(pages_iterator, timeframe)
321 |         return MediasIterator(pages_iterator)
322 | 
323 |     def medias(self, timeframe=None):
324 |         # type: (Optional[_Timeframe]) -> Iterator[Dict[Text, Any]]
325 |         """Obtain an iterator over the Instagram medias.
326 | 
327 |         Wraps the iterator returned by `InstaLooter.pages` to seamlessly
328 |         iterate over the medias of all the pages.
329 | 
330 |         Returns:
331 |             MediasIterator: an iterator over the medias in every pages.
332 | 
333 |         """
334 |         return self._medias(self.pages(), timeframe)
335 | 
336 |     def get_post_info(self, code):
337 |         # type: (str) -> dict
338 |         """Get media information from a given post code.
339 | 
340 |         Arguments:
341 |             code (str): the code of the post (can be obtained either
342 |                 from the ``shortcode`` attribute of media dictionaries, or
343 |                 from a post URL: ``https://www.instagram.com/p/<code>/``)
344 | 
345 |         Returns:
346 |             dict: a media dictionaries, in the format used by Instagram.
347 | 
348 |         """
349 |         url = "https://www.instagram.com/p/{}/".format(code)
350 |         with self.session.get(url) as res:
351 |             data = get_shared_data(res.text)
352 |             if 'graphql' in data['entry_data']['PostPage'][0]:
353 |                 return data['entry_data']['PostPage'][0]['graphql']['shortcode_media']
354 |             data = get_additional_data(res.text)
355 |             return data['graphql']['shortcode_media']
356 | 
357 |     def download_pictures(self,
358 |                           destination,       # type: Union[str, fs.base.FS]
359 |                           media_count=None,  # type: Optional[int]
360 |                           timeframe=None,    # type: Optional[_Timeframe]
361 |                           new_only=False,    # type: bool
362 |                           pgpbar_cls=None,   # type: Optional[Type[ProgressBar]]
363 |                           dlpbar_cls=None    # type: Optional[Type[ProgressBar]]
364 |                           ):
365 |         # type: (...) -> int
366 |         """Download all the pictures to the provided destination.
367 | 
368 |         Actually a shortcut for `.download` with ``condition`` set
369 |         to accept only images.
370 | 
371 |         """
372 |         return self.download(
373 |             destination,
374 |             condition=lambda media: not media["is_video"],
375 |             media_count=media_count,
376 |             timeframe=timeframe,
377 |             new_only=new_only,
378 |             pgpbar_cls=pgpbar_cls,
379 |             dlpbar_cls=dlpbar_cls,
380 |         )
381 | 
382 |     def download_videos(self,
383 |                         destination,       # type: Union[str, fs.base.FS]
384 |                         media_count=None,  # type: Optional[int]
385 |                         timeframe=None,    # type: Optional[_Timeframe]
386 |                         new_only=False,    # type: bool
387 |                         pgpbar_cls=None,   # type: Optional[Type[ProgressBar]]
388 |                         dlpbar_cls=None,   # type: Optional[Type[ProgressBar]]
389 |                         ):
390 |         # type: (...) -> int
391 |         """Download all videos to the provided destination.
392 | 
393 |         Actually a shortcut for `.download` with ``condition`` set
394 |         to accept only videos.
395 | 
396 |         """
397 |         return self.download(
398 |             destination,
399 |             condition=lambda media: media["is_video"],
400 |             media_count=media_count,
401 |             timeframe=timeframe,
402 |             new_only=new_only,
403 |             pgpbar_cls=pgpbar_cls,
404 |             dlpbar_cls=dlpbar_cls,
405 |         )
406 | 
407 |     def download(self,
408 |                  destination,           # type: Union[str, fs.base.FS]
409 |                  condition=None,        # type: Optional[Callable[[dict], bool]]
410 |                  media_count=None,      # type: Optional[int]
411 |                  timeframe=None,        # type: Optional[_Timeframe]
412 |                  new_only=False,        # type: bool
413 |                  pgpbar_cls=None,       # type: Optional[Type[ProgressBar]]
414 |                  dlpbar_cls=None,       # type: Optional[Type[ProgressBar]]
415 |                  ):
416 |         # type: (...) -> int
417 |         """Download all medias passing ``condition`` to destination.
418 | 
419 |         Arguments:
420 |             destination (~fs.base.FS or str): the filesystem where to
421 |                 store the downloaded files, as a filesystem instance or
422 |                 FS URL.
423 |             condition (function): the condition to filter the
424 |                 medias with. If `None` is given, a function is created using
425 |                 the ``get_videos`` and ``videos_only`` passed at object
426 |                 initialisation.
427 |             media_count (int or None): the maximum number of medias
428 |                 to download. Leave to ``None`` to download everything from
429 |                 the target. *Note that more files can be downloaded, since
430 |                 a post with multiple images/videos is considered to be a
431 |                 single media*.
432 |             timeframe (tuple or None): a tuple of two `~datetime.datetime`
433 |                 objects to enforce a time frame (the first item must be
434 |                 more recent). Leave to `None` to ignore times.
435 |             new_only (bool): stop media discovery when already
436 |                 downloaded medias are encountered.
437 |             pgpbar_cls (type or None): an optional `~.pbar.ProgressBar`
438 |                 subclass to use to display page scraping progress.
439 |             dlpbar_cls (type or None): an optional `~.pbar.ProgressBar`
440 |                 subclass to use to display file download progress.
441 | 
442 |         Returns:
443 |             int: the number of queued medias.
444 | 
445 |             May not be equal to the number of downloaded medias if some
446 |             errors occurred during background download.
447 | 
448 |         """
449 |         # Open the destination filesystem
450 |         destination, close_destination = self._init_destfs(destination)
451 | 
452 |         # Create an iterator over the pages with an optional progress bar
453 |         pages_iterator = self.pages()   # type: Iterable[Dict[Text, Any]]
454 |         pages_iterator = pgpbar = self._init_pbar(pages_iterator, pgpbar_cls)
455 | 
456 |         # Create an iterator over the medias
457 |         medias_iterator = self._medias(iter(pages_iterator), timeframe)
458 | 
459 |         # Create the media download bar from a dummy iterator
460 |         dlpbar = self._init_pbar(
461 |             six.moves.range(length_hint(medias_iterator)), dlpbar_cls)
462 | 
463 |         # Start a group of workers
464 |         workers, queue = self._init_workers(
465 |             dlpbar if dlpbar_cls is not None else None, destination)
466 | 
467 |         # Make sure exiting the main thread will shutdown workers
468 |         atexit.register(self._shutdown_workers, workers)
469 | 
470 |         # Queue all medias
471 |         medias_queued = self._fill_media_queue(
472 |             queue, destination, medias_iterator, media_count,
473 |             new_only, condition)
474 | 
475 |         # Once queuing the medias is fininished, finish the page progress bar
476 |         # and set a new maximum on the download progress bar.
477 |         if pgpbar_cls is not None:
478 |             pgpbar.finish()                         # type: ignore
479 |         if dlpbar_cls is not None:
480 |             dlpbar.set_maximum(medias_queued)       # type: ignore
481 | 
482 |         # If no medias were queued, issue a warning
483 |         # TODO: refine warning depending on download parameters
484 |         if medias_queued == 0:
485 |             warnings.warn("No medias found.")
486 | 
487 |         # Add poison pills to the queue and wait for workers to finish
488 |         self._poison_workers(workers, queue)
489 |         self._join_workers(workers, queue)
490 | 
491 |         # Once downloading is finished, finish the download progress bar
492 |         # and close the destination if needed.
493 |         if dlpbar_cls is not None:
494 |             dlpbar.finish()                        # type: ignore
495 |         if close_destination:
496 |             destination.close()
497 | 
498 |         return medias_queued
499 | 
500 |     def login(self, username, password):
501 |         # type: (str, str) -> None
502 |         """Log the instance in using the given credentials.
503 | 
504 |         Arguments:
505 |             username (str): the username to log in with.
506 |             password (str): the password to log in with.
507 | 
508 |         """
509 |         self._login(username, password, session=self.session)
510 | 
511 |     def logout(self):
512 |         # type: () -> None
513 |         """Log the instance out from the currently opened session.
514 |         """
515 |         self._logout(session=self.session)
516 | 
517 |     def logged_in(self):
518 |         # type: () -> bool
519 |         """Check if there's an open Instagram session.
520 |         """
521 |         return self._logged_in(self.session)
522 | 
523 |     def _init_pbar(self,
524 |                    it,             # type: Iterable[_T]
525 |                    pbar_cls=None,  # type: Optional[Type[ProgressBar]]
526 |                    ):
527 |         # type: (...) -> Iterable[_T]
528 |         """Wrap an iterable within a `ProgressBar`.
529 | 
530 |         Arguments:
531 |             it (~collections.Iterable): an iterable to wrap.
532 |             pgpbar_cls (type or None): an optional `ProgressBar` subclass
533 |                 to use, or `None` to avoid using a progress bar.
534 | 
535 |         Returns:
536 |             ~collections.Iterable: the wrapped iterable.
537 | 
538 |         """
539 |         if pbar_cls is not None:
540 |             if not issubclass(pbar_cls, ProgressBar):
541 |                 raise TypeError("pbar must implement the ProgressBar interface !")
542 |             maximum = length_hint(it)
543 |             it = pbar = pbar_cls(it)
544 |             pbar.set_maximum(maximum)
545 |             pbar.set_lock(threading.RLock())
546 |         return it
547 | 
548 |     def _init_destfs(self, destination, create=True):
549 |         # type: (Union[str, fs.base.FS], bool) -> Tuple[fs.base.FS, bool]
550 |         """Open a filesystem either from a FS URL or filesystem instance.
551 | 
552 |         Arguments:
553 |             destination (~fs.base.FS or str): the destination filesystem
554 |                 to open, as a filesystem instance or FS URL.
555 |             create (bool): whether or not to create a new
556 |                 filesystem if it does not exist.
557 | 
558 |         Returns:
559 |             (~fs.base.FS, bool): the open FS, and whether to close it.
560 | 
561 |         """
562 |         close_destination = False
563 |         if isinstance(destination, six.binary_type):
564 |             destination = destination.decode('utf-8')
565 |         if isinstance(destination, six.text_type):
566 |             destination = fs.open_fs(destination, create=create)
567 |             close_destination = True
568 |         if not isinstance(destination, fs.base.FS):
569 |             raise TypeError("<destination> must be a FS URL or FS instance.")
570 |         return destination, close_destination
571 | 
572 |     def _fill_media_queue(self,
573 |                           queue,            # type: Queue
574 |                           destination,      # type: fs.base.FS
575 |                           medias_iter,      # type: Iterable[Any]
576 |                           media_count=None,  # type: Optional[int]
577 |                           new_only=False,   # type: bool
578 |                           condition=None,   # type: Optional[Callable[[dict], bool]]
579 |                           ):
580 |         # type: (...) -> int
581 |         """Fill the download queue with medias from the provided iterator.
582 | 
583 |         Arguments:
584 |             queue (~queue.Queue): the download queue to fill.
585 |             destination (~fs.base.FS): the filesystem where to download
586 |                 the files.
587 |             medias_iterator (~collections.Iterable): an iterable over the
588 |                 Instagram medias to download.
589 |             media_count (int or None): the maximum number of new medias to
590 |                 download, or ``None`` to download all discoverable medias.
591 |             new_only (bool): stop media discovery when a media that was
592 |                 already downloaded is encountered.
593 |             condition (function or None): the condition to filter the medias
594 |                 with. If `None` is given, a function is created using the
595 |                 ``get_videos`` and ``videos_only`` passed at object
596 |                 initialisation.
597 | 
598 |         Returns:
599 |             int: the number of queued medias.
600 | 
601 |             May not be equal to the number of downloaded medias if some
602 |             errors occurred during downloads.
603 | 
604 |         """
605 |         # Create a condition from parameters if needed
606 |         if condition is not None:
607 |             _condition = condition       # type: Callable[[dict], bool]
608 |         else:
609 |             if self.videos_only:
610 |                 def _condition(media): return media['is_video']
611 |             elif not self.get_videos:
612 |                 def _condition(media): return not media['is_video']
613 |             else:
614 |                 def _condition(media): return True
615 | 
616 |         # Queue all media filling the condition
617 |         medias_queued = 0
618 |         for media in six.moves.filter(_condition, medias_iter):
619 | 
620 |             # Check if the whole post info is required
621 |             if self.namegen.needs_extended(media) or media["__typename"] != "GraphImage":
622 |                 media = self.get_post_info(media['shortcode'])
623 | 
624 |             # Check that sidecar children fit the condition
625 |             if media['__typename'] == "GraphSidecar":
626 |                 # Check that each node fits the condition
627 |                 for sidecar in media['edge_sidecar_to_children']['edges'][:]:
628 |                     if not _condition(sidecar['node']):
629 |                         media['edge_sidecar_to_children']['edges'].remove(sidecar)
630 | 
631 |                 # Check that the nodelist is not depleted
632 |                 if not media['edge_sidecar_to_children']['edges']:
633 |                     continue
634 | 
635 |             # Check that the file does not exist
636 |             # FIXME: not working well with sidecar
637 |             if new_only and destination.exists(self.namegen.file(media)):
638 |                 break
639 | 
640 |             # Put the medias in the queue
641 |             queue.put(media)
642 |             medias_queued += 1
643 | 
644 |             if media_count is not None and medias_queued >= media_count:
645 |                 break
646 | 
647 |         return medias_queued
648 | 
649 |     # WORKERS UTILS
650 | 
651 |     def _init_workers(self,
652 |                       pbar,         # type: Union[ProgressBar, Iterable, None]
653 |                       destination,  # type: fs.base.FS
654 |                       ):
655 |         # type: (...) -> Tuple[List[InstaDownloader], Queue]
656 | 
657 |         workers = []        # type: List[InstaDownloader]
658 |         queue = Queue()     # type: Queue
659 | 
660 |         for _ in six.moves.range(self.jobs):
661 |             worker = InstaDownloader(
662 |                 queue=queue,
663 |                 destination=destination,
664 |                 namegen=self.namegen,
665 |                 add_metadata=self.add_metadata,
666 |                 dump_json=self.dump_json,
667 |                 dump_only=self.dump_only,
668 |                 pbar=pbar,
669 |                 session=self.session)
670 |             worker.start()
671 |             workers.append(worker)
672 | 
673 |         return workers, queue
674 | 
675 |     def _poison_workers(self, workers, queue):
676 |         # type: (List[InstaDownloader], Queue) -> None
677 |         for worker in workers:
678 |             queue.put(None)
679 | 
680 |     def _join_workers(self, workers, queue):
681 |         # type: (List[InstaDownloader], Queue) -> None
682 |         if any(w.is_alive() for w in workers):
683 |             for worker in workers:
684 |                 worker.join()
685 | 
686 |     def _shutdown_workers(self, workers):
687 |         # type: (List[InstaDownloader]) -> None
688 |         for worker in workers:
689 |             worker.terminate()
690 | 
691 | 
692 | class ProfileLooter(InstaLooter):
693 |     """A looter targeting medias on a user profile.
694 |     """
695 | 
696 |     def __init__(self, username, **kwargs):
697 |         # type: (str, **Any) -> None
698 |         """Create a new profile looter.
699 | 
700 |         Arguments:
701 |             username (str): the username of the profile.
702 | 
703 |         See `InstaLooter.__init__` for more details about accepted
704 |         keyword arguments.
705 | 
706 |         """
707 |         super(ProfileLooter, self).__init__(**kwargs)
708 |         self._username = username
709 |         self._owner_id = None
710 | 
711 |     def pages(self):
712 |         # type: () -> ProfileIterator
713 |         """Obtain an iterator over Instagram post pages.
714 | 
715 |         Returns:
716 |             PageIterator: an iterator over the instagram post pages.
717 | 
718 |         Raises:
719 |             ValueError: when the requested user does not exist.
720 |             RuntimeError: when the user is a private account
721 |                 and there is no logged user (or the logged user
722 |                 does not follow that account).
723 | 
724 |         """
725 |         if self._owner_id is None:
726 |             it = ProfileIterator.from_username(self._username, self.session)
727 |             self._owner_id = it.owner_id
728 |             return it
729 |         return ProfileIterator(self._owner_id, self.session, self.rhx)
730 | 
731 | 
732 | class HashtagLooter(InstaLooter):
733 |     """A looter targeting medias tagged with a hashtag.
734 |     """
735 | 
736 |     def __init__(self, hashtag, **kwargs):
737 |         # type: (str, **Any) -> None
738 |         """Create a new hashtag looter.
739 | 
740 |         Arguments:
741 |             username (str): the hashtag to search for.
742 | 
743 |         See `InstaLooter.__init__` for more details about accepted
744 |         keyword arguments.
745 | 
746 |         """
747 |         super(HashtagLooter, self).__init__(**kwargs)
748 |         self._hashtag = hashtag
749 | 
750 |     def pages(self):  # noqa: D102
751 |         # type: () -> HashtagIterator
752 |         return HashtagIterator(self._hashtag, self.session, self.rhx)
753 | 
754 | 
755 | class PostLooter(InstaLooter):
756 |     """A looter targeting a specific post.
757 |     """
758 | 
759 |     _RX_URL = re.compile(
760 |         r'(?:https?://)?(?:www\.instagram\.com|instagr\.am)/p/([0-9a-zA-Z_\-]{10,11})'
761 |     )
762 | 
763 |     _RX_CODE = re.compile(
764 |         r'^[0-9a-zA-Z_\-]{10,11}$'
765 |     )
766 | 
767 |     def __init__(self, code, **kwargs):
768 |         # type: (str, **Any) -> None
769 |         """Create a new hashtag looter.
770 | 
771 |         Arguments:
772 |             code (str): the code of the post to get.
773 | 
774 |         See `InstaLooter.__init__` for more details about accepted
775 |         keyword arguments.
776 | 
777 |         """
778 |         super(PostLooter, self).__init__(**kwargs)
779 | 
780 |         self._info = None   # type: Optional[dict]
781 | 
782 |         match = self._RX_URL.match(code)
783 |         if match is not None:
784 |             self.code = match.group(1)
785 |         elif self._RX_CODE.match(code) is None:
786 |             raise ValueError("invalid post code: '{}'".format(code))
787 |         else:
788 |             self.code = code
789 | 
790 |     @property
791 |     def info(self):
792 |         # type: () -> dict
793 |         if self._info is None:
794 |             self._info = self.get_post_info(self.code)
795 |         return self._info
796 | 
797 |     def pages(self):
798 |         # type: () -> Iterator[Dict[Text, Any]]
799 |         """Return a generator that yields a page with only the refered post.
800 | 
801 |         Yields:
802 |             dict: a page dictionary with only a single media.
803 | 
804 |         """
805 |         yield {"edge_owner_to_timeline_media": {
806 |             "count": 1,
807 |             "page_info": {
808 |                 "has_next_page": False,
809 |                 "end_cursor": None,
810 |             },
811 |             "edges": [
812 |                 {"node": self.info}
813 |             ],
814 |         }}
815 | 
816 |     def medias(self, timeframe=None):
817 |         """Return a generator that yields only the refered post.
818 | 
819 |         Yields:
820 |             dict: a media dictionary obtained from the given post.
821 | 
822 |         Raises:
823 |             StopIteration: if the post does not fit the timeframe.
824 | 
825 |         """
826 |         info = self.info
827 |         if timeframe is not None:
828 |             start, end = TimedMediasIterator.get_times(timeframe)
829 |             timestamp = info.get("taken_at_timestamp") or info["media"]
830 |             if not (start >= timestamp >= end):
831 |                 raise StopIteration
832 |         yield info
833 | 
834 |     def download(self,
835 |                  destination,       # type: Union[str, fs.base.FS]
836 |                  condition=None,    # type: Optional[Callable[[dict], bool]]
837 |                  media_count=None,  # type: Optional[int]
838 |                  timeframe=None,    # type: Optional[_Timeframe]
839 |                  new_only=False,    # type: bool
840 |                  pgpbar_cls=None,   # type: Optional[Type[ProgressBar]]
841 |                  dlpbar_cls=None,   # type: Optional[Type[ProgressBar]]
842 |                  ):
843 |         # type: (...) -> int
844 |         """Download the refered post to the destination.
845 | 
846 |         See `InstaLooter.download` for argument reference.
847 | 
848 |         Note:
849 |             This function, opposed to other *looter* implementations, will
850 |             not spawn new threads, but simply use the main thread to download
851 |             the files.
852 | 
853 |             Since a worker is in charge of downloading a *media* at a time
854 |             (and not a *file*), there would be no point in spawning more.
855 | 
856 |         """
857 |         destination, close_destination = self._init_destfs(destination)
858 | 
859 |         queue = Queue()  # type: Queue[Optional[Dict]]
860 |         medias_queued = self._fill_media_queue(
861 |             queue, destination, iter(self.medias()), media_count,
862 |             new_only, condition)
863 |         queue.put(None)
864 | 
865 |         worker = InstaDownloader(
866 |             queue=queue,
867 |             destination=destination,
868 |             namegen=self.namegen,
869 |             add_metadata=self.add_metadata,
870 |             dump_json=self.dump_json,
871 |             dump_only=self.dump_only,
872 |             pbar=None,
873 |             session=self.session)
874 |         worker.run()
875 | 
876 |         return medias_queued
877 | 


--------------------------------------------------------------------------------
/instalooter/medias.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """Iterators over Instagram medias.
  3 | 
  4 | Iterators defined in this module wrap `PageIterator` instances to yield
  5 | individual medias defined in each page instead of whole pages.
  6 | """
  7 | from __future__ import absolute_import
  8 | from __future__ import unicode_literals
  9 | 
 10 | import datetime
 11 | import typing
 12 | 
 13 | import six
 14 | 
 15 | from .pages import PageIterator
 16 | 
 17 | if typing.TYPE_CHECKING:
 18 |     from typing import Any, Dict, List, Optional, Iterable, Set, Text
 19 | 
 20 | 
 21 | _I = typing.TypeVar('_I', bound='MediasIterator')
 22 | 
 23 | 
 24 | __all__ = [
 25 |     "MediasIterator",
 26 |     "TimedMediasIterator",
 27 | ]
 28 | 
 29 | 
 30 | class MediasIterator(typing.Iterator[typing.Dict[typing.Text, typing.Any]]):
 31 |     """An iterator over the medias obtained from a page iterator.
 32 |     """
 33 | 
 34 |     def __init__(self, page_iterator):
 35 |         # type: (Iterable[Dict[Text, Any]]) -> None
 36 |         self._it = iter(page_iterator)
 37 |         self._seen = set()          # type: Set[Text]
 38 |         self._edges = []            # type: List[Dict[Text, Dict[Text, Any]]]
 39 |         self._finished = False
 40 |         self._total = None          # type: Optional[int]
 41 |         self._done = 0
 42 | 
 43 |     def __iter__(self):
 44 |         # type: (_I) -> _I
 45 |         return self
 46 | 
 47 |     def _next_page(self):
 48 |         # type: () -> Dict[Text, Any]
 49 |         data = next(self._it)
 50 |         section = next(s for s in six.iterkeys(data) if s.endswith('_media'))
 51 |         return data[section]
 52 | 
 53 |     def __next__(self):
 54 |         # type: () -> Dict[Text, Any]
 55 |         if self._finished:
 56 |             raise StopIteration
 57 | 
 58 |         if not self._edges:
 59 |             page = self._next_page()
 60 |             self._total = page['count']
 61 |             self._edges.extend(page['edges'])
 62 |             if not page['edges']:
 63 |                 raise StopIteration
 64 | 
 65 |         media = self._edges.pop(0)
 66 |         self._done += 1
 67 | 
 68 |         if media['node']['id'] in self._seen:
 69 |             self._finished = True
 70 | 
 71 |         self._seen.add(media['node']['id'])
 72 |         return media['node']
 73 | 
 74 |     def __length_hint__(self):
 75 |         if self._total is None:
 76 |             try:
 77 |                 page = self._next_page()
 78 |                 self._total = page['count']
 79 |                 self._edges.extend(page['edges'])
 80 |             except StopIteration:
 81 |                 self._total = 0
 82 |         return self._total - self._done
 83 | 
 84 |     if six.PY2:
 85 |         next = __next__
 86 | 
 87 | 
 88 | class TimedMediasIterator(MediasIterator):
 89 |     """An iterator over the medias within a specific timeframe.
 90 |     """
 91 | 
 92 |     @staticmethod
 93 |     def get_times(timeframe):
 94 |         if timeframe is None:
 95 |             timeframe = (None, None)
 96 |         try:
 97 |             start_time = timeframe[0] or datetime.date.today()
 98 |             end_time = timeframe[1] or datetime.date.fromtimestamp(0)
 99 |         except (IndexError, AttributeError):
100 |             raise TypeError("'timeframe' must be a couple of dates!")
101 |         return start_time, end_time
102 | 
103 |     def __init__(self, page_iterator, timeframe=None):
104 |         super(TimedMediasIterator, self).__init__(page_iterator)
105 |         self.start_time, self.end_time = self.get_times(timeframe)
106 | 
107 |     def __next__(self):
108 |         number_old = 0
109 |         while True:
110 |             media = super(TimedMediasIterator, self).__next__()
111 |             timestamp = media.get('taken_at_timestamp') or media['date']
112 |             media_date = type(self.start_time).fromtimestamp(timestamp)
113 | 
114 |             if self.start_time >= media_date >= self.end_time:
115 |                 return media
116 |             elif media_date < self.end_time:
117 |                 number_old += 1
118 |                 if number_old >= PageIterator.PAGE_SIZE:
119 |                     self._finished = True
120 |                     raise StopIteration
121 | 
122 |     if six.PY2:
123 |         next = __next__
124 | 


--------------------------------------------------------------------------------
/instalooter/pages.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """Iterators over Instagram media pages.
  3 | """
  4 | from __future__ import absolute_import
  5 | from __future__ import unicode_literals
  6 | 
  7 | import abc
  8 | import hashlib
  9 | import itertools
 10 | import math
 11 | import time
 12 | import typing
 13 | 
 14 | import six
 15 | from requests import Session
 16 | 
 17 | from ._impl import json
 18 | from ._utils import get_shared_data
 19 | 
 20 | if typing.TYPE_CHECKING:
 21 |     from typing import Any, Dict, Iterator, Iterable, Optional, Text
 22 | 
 23 | 
 24 | __all__ = [
 25 |     "PageIterator",
 26 |     "HashtagIterator",
 27 |     "ProfileIterator",
 28 | ]
 29 | 
 30 | 
 31 | @six.add_metaclass(abc.ABCMeta)
 32 | class PageIterator(typing.Iterator[typing.Dict[typing.Text, typing.Any]]):
 33 |     """An abstract Instagram page iterator.
 34 |     """
 35 | 
 36 |     PAGE_SIZE = 50
 37 |     INTERVAL = 2
 38 | 
 39 |     _BASE_URL = "https://www.instagram.com/graphql/query/"
 40 |     _section_generic = NotImplemented    # type: Text
 41 |     _section_media = NotImplemented      # type: Text
 42 |     _URL = NotImplemented                # type: Text
 43 | 
 44 |     def __init__(self, session, rhx):
 45 |         # type: (Session, Text) -> None
 46 |         self._finished = False
 47 |         self._cursor = None     # type: Optional[Text]
 48 |         self._current_page = 0
 49 |         self._data_it = iter(self._page_loader(session, rhx))
 50 | 
 51 |     @abc.abstractmethod
 52 |     def _getparams(self, cursor):
 53 |         # type: (Optional[Text]) -> Text
 54 |         return NotImplemented
 55 | 
 56 |     def _page_loader(self, session, rhx):
 57 |         # type: (Session, Text) -> Iterable[Dict[Text, Dict[Text, Any]]]
 58 |         while True:
 59 |             # Cache cursor for later
 60 |             cursor = self._cursor
 61 |             # Query data
 62 |             try:
 63 |                 # Prepare the query
 64 |                 params = self._getparams(cursor)
 65 |                 json_params = json.dumps(params, separators=(',', ':'))
 66 |                 magic = "{}:{}".format(rhx, json_params)
 67 |                 session.headers['x-instagram-gis'] = hashlib.md5(magic.encode('utf-8')).hexdigest()
 68 |                 url = self._URL.format(json_params)
 69 |                 # Query the server for data
 70 |                 with session.get(url) as res:
 71 |                     self._last_page = data = res.json()
 72 |                 # Yield that same data until cursor is updated
 73 |                 while self._cursor == cursor:
 74 |                     yield data['data']
 75 |             except KeyError as e:
 76 |                 if data.get('message') == 'rate limited':
 77 |                     raise RuntimeError("Query rate exceeded (wait before next run)")
 78 |                 time.sleep(10)
 79 |             # Sleep before next query
 80 |             time.sleep(self.INTERVAL)
 81 | 
 82 |     def __length_hint__(self):
 83 |         # type: () -> int
 84 |         try:
 85 |             data = next(self._data_it)
 86 |             c = data[self._section_generic][self._section_media]['count']
 87 |             total = int(math.ceil(c / self.PAGE_SIZE))
 88 |         except (StopIteration, TypeError):
 89 |             total = 0
 90 |         return total - self._current_page
 91 | 
 92 |     def __iter__(self):
 93 |         return self
 94 | 
 95 |     def __next__(self):
 96 | 
 97 |         if self._finished:
 98 |             raise StopIteration
 99 | 
100 |         data = next(self._data_it)
101 | 
102 |         try:
103 |             media_info = data[self._section_generic][self._section_media]
104 |         except (TypeError, KeyError):
105 |             self._finished = True
106 |             raise StopIteration
107 | 
108 |         if not media_info['page_info']['has_next_page']:
109 |             self._finished = True
110 |         elif not media_info['edges']:
111 |             self._finished = True
112 |             raise StopIteration
113 |         else:
114 |             self._cursor = media_info['page_info']['end_cursor']
115 |             self._current_page += 1
116 | 
117 |         return data[self._section_generic]
118 | 
119 |     if six.PY2:
120 |         next = __next__
121 | 
122 | 
123 | class HashtagIterator(PageIterator):
124 |     """An iterator over the pages refering to a specific hashtag.
125 |     """
126 | 
127 |     _QUERY_ID = "17882293912014529"
128 |     _URL = "{}?query_id={}&variables={{}}".format(PageIterator._BASE_URL, _QUERY_ID)
129 |     _section_generic = "hashtag"
130 |     _section_media = "edge_hashtag_to_media"
131 | 
132 |     def __init__(self, hashtag, session, rhx):
133 |         super(HashtagIterator, self).__init__(session, rhx)
134 |         self.hashtag = hashtag
135 | 
136 |     def _getparams(self, cursor):
137 |         return {
138 |             "tag_name": self.hashtag,
139 |             "first": self.PAGE_SIZE,
140 |             "after": cursor
141 |         }
142 | 
143 |     def __next__(self):
144 |         item = super(HashtagIterator, self).__next__()
145 |         for media in item[self._section_media].get("edges", []):
146 |             media["node"].setdefault(
147 |                 "__typename",
148 |                 "GraphVideo" if media["node"].get("is_video", False) else "GraphImage"
149 |             )
150 |         return item
151 | 
152 |     if six.PY2:
153 |         next = __next__
154 | 
155 | 
156 | class ProfileIterator(PageIterator):
157 |     """An iterator over the pages of a user profile.
158 |     """
159 | 
160 |     _QUERY_HASH = "42323d64886122307be10013ad2dcc44"
161 |     #_QUERY_HASH = "472f257a40c653c64c666ce877d59d2b"
162 |     _URL = "{}?query_hash={}&variables={{}}".format(PageIterator._BASE_URL, _QUERY_HASH)
163 |     _section_generic = "user"
164 |     _section_media = "edge_owner_to_timeline_media"
165 | 
166 |     @classmethod
167 |     def _user_data(cls, username, session):
168 |         url = "https://www.instagram.com/{}/".format(username)
169 |         try:
170 |             with session.get(url) as res:
171 |                 return get_shared_data(res.text)
172 |         except (ValueError, AttributeError):
173 |             raise ValueError("user not found: '{}'".format(username))
174 | 
175 |     @classmethod
176 |     def from_username(cls, username, session):
177 |         user_data = cls._user_data(username, session)
178 |         if 'ProfilePage' not in user_data['entry_data']:
179 |             raise ValueError("user not found: '{}'".format(username))
180 |         data = user_data['entry_data']['ProfilePage'][0]['graphql']['user']
181 |         if data['is_private'] and not data['followed_by_viewer']:
182 |             con_id = next((c.value for c in session.cookies if c.name == "ds_user_id"), None)
183 |             if con_id != data['id']:
184 |                 raise RuntimeError("user '{}' is private".format(username))
185 |         return cls(data['id'], session, user_data.get('rhx_gis', ''))
186 | 
187 |     def __init__(self, owner_id, session, rhx):
188 |         super(ProfileIterator, self).__init__(session, rhx)
189 |         self.owner_id = owner_id
190 | 
191 |     def _getparams(self, cursor):
192 |         return {
193 |             "id": self.owner_id,
194 |             "first": self.PAGE_SIZE,
195 |             "after": cursor,
196 |         }
197 | 


--------------------------------------------------------------------------------
/instalooter/pbar.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """Progress bars used to report `InstaLooter.download` progress.
 3 | 
 4 | The module exposes and abstract class that can be derived to implement
 5 | your own progress displayer. The default implementation (which uses the
 6 | `tqdm` library) is used by the CLI.
 7 | """
 8 | from __future__ import absolute_import
 9 | from __future__ import unicode_literals
10 | 
11 | import abc
12 | import typing
13 | 
14 | import six
15 | import tqdm
16 | 
17 | if typing.TYPE_CHECKING:
18 |     from threading import Lock, RLock
19 |     from typing import Union
20 | 
21 | 
22 | _T = typing.TypeVar('_T', covariant=True)
23 | _L = typing.TypeVar('_L')
24 | 
25 | 
26 | @six.add_metaclass(abc.ABCMeta)
27 | class ProgressBar(typing.Iterator[_T]):
28 |     """An abstract progess bar used to report interal progress.
29 |     """
30 | 
31 |     def __init__(self, it, *args, **kwargs):
32 |         self.it = it
33 |         self.__lock = None  # type: Union[Lock, RLock, None]
34 | 
35 |     def __iter__(self):
36 |         # type: () -> ProgressBar[_T]
37 |         return self
38 | 
39 |     def __next__(self):
40 |         # type: () -> _T
41 |         item = next(self.it)
42 |         self.update()
43 |         return item
44 | 
45 |     if six.PY2:
46 |         next = __next__
47 | 
48 |     @abc.abstractmethod
49 |     def update(self):
50 |         # type: () -> None
51 |         """Update the progress bar by one step.
52 |         """
53 |         return NotImplemented
54 | 
55 |     @abc.abstractmethod
56 |     def set_maximum(self, maximum):
57 |         # type: (int) -> None
58 |         """Set the maximum number of steps of the operation.
59 |         """
60 |         return NotImplemented
61 | 
62 |     def finish(self):
63 |         # type: () -> None
64 |         """Notify the progress bar the operation is finished.
65 |         """
66 |         pass
67 | 
68 |     def set_lock(self, lock):
69 |         # type: (Union[Lock, RLock]) -> None
70 |         """Set a lock to be used by parallel workers.
71 |         """
72 |         self.__lock = lock
73 | 
74 |     def get_lock(self):
75 |         # type: () -> Union[Lock, RLock]
76 |         """Obtain the progress bar lock.
77 |         """
78 |         if self.__lock is None:
79 |             raise RuntimeError("lock was not initialised")
80 |         return self.__lock
81 | 
82 | 
83 | class TqdmProgressBar(tqdm.tqdm, ProgressBar):
84 |     """A progress bar using the `tqdm` library.
85 |     """
86 | 
87 |     def __init__(self, it, *args, **kwargs):  # noqa: D102, D107
88 |         kwargs["leave"] = False
89 |         super(TqdmProgressBar, self).__init__(it, *args, **kwargs)
90 |         ProgressBar.__init__(self, it)
91 | 
92 |     def set_maximum(self, maximum):  # noqa: D102
93 |         self.total = maximum
94 | 
95 |     def finish(self):  # noqa: D102
96 |         self.close()
97 | 


--------------------------------------------------------------------------------
/instalooter/static/splash.html:
--------------------------------------------------------------------------------
 1 | <html lang="en">
 2 | 
 3 | <head>
 4 |     <!-- meta -->
 5 |     <meta charset="utf-8">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 7 |     <meta name="author" content="Martin Larralde">
 8 |     <!-- styling -->
 9 |     <!-- <link href="/static/EBI-Icon-fonts/fonts.css" rel="stylesheet" crossorigin="anonymous"> -->
10 |     <link href="/static/bootstrap/dist/css/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
11 |     <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
12 |     <!-- icon -->
13 |     <link rel="icon" type="image/png" href="https://assets.gitlab-static.net/uploads/-/system/project/avatar/2101509/instaLooter-small.png?width=64"/>
14 |     <title>InstaLooter - UserAgent catcher</title>
15 | </head>
16 | 
17 | <body>
18 |   <main>
19 |     <div style="padding-bottom: 20%; background: url('/static/img/splash.jpg'); background-size: cover; background-repeat: fixed;" class="jumbotron bg-secondary text-white text-center mb-0">
20 |       <div class="container-fluid">
21 |         <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/Instagram_logo_2016.svg/480px-Instagram_logo_2016.svg.png" alt="">
22 |         <div class="display-1">Don't Panic</div>
23 |         <hr class="bg-white">
24 |         <p>
25 |             Hi ! You recently installed and updated InstaLooter, and this page opened
26 |             because it needs to detect your User Agent. This way, we can trick Instagram
27 |             into thinking that you are using your usual web browser !
28 |             <i>You will not see this page on the next run.</i>
29 |         </p>
30 |         <hr class="bg-white">
31 |         <p>Your User Agent is: {}</p>
32 |         <p>It has been cached in: {}</p>
33 |       </div>
34 |     </div>
35 |   </main>
36 |   <footer class="footer mt-auto py-3">
37 |     <div class="container">
38 |       <div class="row">
39 |         <span class="text-muted">© 2016-2019, Martin Larralde (GPLv3)</span>
40 |       </div>
41 |     </div>
42 |   </footer>
43 |   <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script>
44 | </body>
45 | </html>
46 | 


--------------------------------------------------------------------------------
/instalooter/worker.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """Background download thread.
  3 | """
  4 | from __future__ import absolute_import
  5 | from __future__ import unicode_literals
  6 | 
  7 | import io
  8 | import operator
  9 | import threading
 10 | import time
 11 | 
 12 | import requests
 13 | import six
 14 | import tenacity
 15 | 
 16 | from ._impl import PIL, piexif, json
 17 | 
 18 | 
 19 | class InstaDownloader(threading.Thread):
 20 |     """The background InstaLooter worker class.
 21 |     """
 22 | 
 23 |     _tenacity_options = {
 24 |         "stop": tenacity.stop_after_attempt(5),
 25 |         "wait": tenacity.wait_exponential(1, 10),
 26 |     }
 27 | 
 28 |     def __init__(self,
 29 |                  queue,
 30 |                  destination,
 31 |                  namegen,
 32 |                  add_metadata=False,
 33 |                  dump_json=False,
 34 |                  dump_only=False,
 35 |                  pbar=None,
 36 |                  session=None):
 37 | 
 38 |         super(InstaDownloader, self).__init__()
 39 | 
 40 |         self.queue = queue
 41 |         self.destination = destination
 42 |         self.namegen = namegen
 43 |         self.session = session or requests.Session()
 44 |         self.pbar = pbar
 45 | 
 46 |         self.dump_only = dump_only
 47 |         self.dump_json = dump_json or dump_only
 48 |         self.add_metadata = add_metadata
 49 | 
 50 |         self._killed = False
 51 |         self._downloading = None
 52 | 
 53 |         retry = tenacity.retry(**self._tenacity_options)
 54 |         self._DOWNLOAD_METHODS = {
 55 |             "GraphImage": retry(self._download_image),
 56 |             "GraphVideo": retry(self._download_video),
 57 |             "GraphSidecar": self._download_sidecar,
 58 |         }
 59 | 
 60 |     def _download_image(self, media):
 61 |         url = media['display_url']
 62 |         filename = self.namegen.file(media)
 63 | 
 64 |         if self.destination.exists(filename):
 65 |             return
 66 | 
 67 |         # FIXME: find a way to remove failed temporary downloads
 68 |         with self.destination.open(filename, "wb") as f:
 69 |             with self.session.get(url) as res:
 70 |                 f.write(res.content)
 71 |         self._set_time(media, filename)
 72 | 
 73 |     def _download_video(self, media):
 74 |         url = media['video_url']
 75 |         filename = self.namegen.file(media)
 76 | 
 77 |         if self.destination.exists(filename):
 78 |             return
 79 | 
 80 |         # FIXME: find a way to remove failed temporary downloads
 81 |         with self.destination.open(filename, "wb") as f:
 82 |             with self.session.get(url) as res:
 83 |                 for chunk in res.iter_content(io.DEFAULT_BUFFER_SIZE):
 84 |                     f.write(chunk)
 85 |         self._set_time(media, filename)
 86 | 
 87 |     def _download_sidecar(self, media):
 88 |         edges = media.pop('edge_sidecar_to_children')['edges']
 89 |         for edge in six.moves.map(operator.itemgetter('node'), edges):
 90 |             for key, value in six.iteritems(media):
 91 |                 edge.setdefault(key, value)
 92 |             self._DOWNLOAD_METHODS[edge['__typename']](edge)
 93 | 
 94 |     def _set_time(self, media, filename):
 95 |         details = {}
 96 |         details["modified"] = details["accessed"] = details["created"] = \
 97 |             media.get('taken_at_timestamp') or media['date']
 98 |         self.destination.setinfo(filename, {"details": details})
 99 | 
100 |     def _dump(self, media):
101 |         basename = self.namegen.base(media)
102 |         filename = "{}.json".format(basename)
103 |         mode = "w" if six.PY3 else "wb"
104 |         with self.destination.open(filename, mode) as dest:
105 |             json.dump(media, dest, indent=4, sort_keys=True)
106 |         self._set_time(media, filename)
107 | 
108 |     def run(self):
109 |         while not self._killed:
110 |             try:
111 |                 media = self.queue.get_nowait()
112 | 
113 |                 # Received a poison pill: break the loop
114 |                 if media is None:
115 |                     self._killed = True
116 | 
117 |                 else:
118 |                     # Download media
119 |                     if not self.dump_only:
120 |                         self._DOWNLOAD_METHODS[media["__typename"]](media)
121 |                     # Dump JSON metadata if needed
122 |                     if self.dump_json:
123 |                         self._dump(media)
124 |                     # Update progress bar if any
125 |                     if self.pbar is not None and not self._killed:
126 |                         with self.pbar.get_lock():
127 |                             self.pbar.update()
128 | 
129 |                 self.queue.task_done()
130 | 
131 |             except six.moves.queue.Empty:
132 |                 time.sleep(1)
133 | 
134 |     def terminate(self):
135 |         self._killed = True
136 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
  1 | [metadata]
  2 | name = instalooter
  3 | version = attr: instalooter.__version__
  4 | author = Martin Larralde
  5 | author-email = martin.larralde@ens-paris-saclay.fr
  6 | home-page = https://github.com/althonos/instalooter
  7 | description = Another API-less Instagram pictures and videos downloader
  8 | long-description = file: README.rst
  9 | license = GPLv3+
 10 | license-file = COPYING
 11 | platform = any
 12 | keywords = instagram, download, web, web scraping, looter
 13 | classifiers =
 14 |     Development Status :: 4 - Beta
 15 |     Intended Audience :: Developers
 16 |     Intended Audience :: End Users/Desktop
 17 |     License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
 18 |     Programming Language :: Python
 19 |     Programming Language :: Python :: 2.7
 20 |     Programming Language :: Python :: 3.3
 21 |     Programming Language :: Python :: 3.4
 22 |     Programming Language :: Python :: 3.5
 23 |     Programming Language :: Python :: 3.6
 24 |     Topic :: Internet
 25 |     Topic :: Software Development :: Libraries :: Python Modules
 26 |     Operating System :: OS Independent
 27 | 
 28 | [options]
 29 | zip_safe = true
 30 | include_package_data = true
 31 | python_requires = >= 2.7, != 3.0.*, != 3.1.*, != 3.2.*, != 3.5.1
 32 | packages = find:
 33 | test_suite = tests
 34 | install_requires =
 35 |     coloredlogs ~=14.0
 36 |     python-dateutil ~=2.1
 37 |     docopt ~=0.4
 38 |     fs ~=2.1
 39 |     requests ~=2.18
 40 |     six ~=1.4
 41 |     tqdm ~=4.19
 42 |     tenacity ~=6.0
 43 |     typing ~=3.6    ; python_version < '3.6'
 44 |     verboselogs ~=1.7
 45 | tests_require =
 46 |     instalooter[test]
 47 | 
 48 | [options.entry_points]
 49 | console_scripts =
 50 |     instalooter = instalooter.cli:main
 51 | 
 52 | [options.packages.find]
 53 | exclude =
 54 |     tests
 55 |     tests.utils
 56 | 
 57 | [options.extras_require]
 58 | # add EXIF metadata to downloaded pictures
 59 | metadata =
 60 |     piexif
 61 |     Pillow
 62 | # all features
 63 | all =
 64 |     %(metadata)s
 65 | # test dependencies
 66 | test =
 67 |     contexter ~=0.1
 68 |     mock ~=2.0 ; python_version < '3.4'
 69 |     parameterized ~=0.6
 70 |     green ~=2.12
 71 |     %(metadata)s
 72 | # coverage dependencies
 73 | coverage =
 74 |     coverage
 75 |     codecov
 76 |     codacy-coverage
 77 | # documentation dependencies
 78 | doc =
 79 |     sphinx ~=1.7
 80 |     sphinx-bootstrap-theme ~=0.6
 81 |     semantic-version ~=2.6
 82 | # development dependencies
 83 | dev =
 84 |     docutils
 85 |     Pygments
 86 |     %(test)s
 87 |     %(coverage)s
 88 | 
 89 | [bdist_wheel]
 90 | universal=1
 91 | 
 92 | [coverage:report]
 93 | exclude_lines =
 94 |     pragma: no cover
 95 |     raise AssertionError
 96 |     raise NotImplementedError
 97 |     return NotImplemented
 98 |     if 0:
 99 |     if __name__ == .__main__.:
100 |     except ImportError
101 |     if six.PY2:
102 |     if six.PY3:
103 |     @typing.overload
104 |     @abc.abstractmethod
105 |     if typing.TYPE_CHECKING:
106 | 
107 | [coverage:run]
108 | branch=True
109 | 
110 | [green]
111 | file-pattern = test_*.py
112 | verbose = 2
113 | no-skip-report = true
114 | quiet-stdout = true
115 | run-coverage = true
116 | processes = 1
117 | 
118 | [pydocstyle]
119 | inherit = false
120 | match-dir = (?!tests)(?!resources)(?!docs)[^\.].*
121 | match = (?!test)(?!setup)[^\._].*\.py
122 | ignore = D200, D203, D213, D406, D407
123 | 
124 | [flake8]
125 | max-line-length = 99
126 | doctests = True
127 | ignore = D200, D203, D213, D406, D407
128 | exclude =
129 |     .git,
130 |     .eggs,
131 |     __pycache__,
132 |     tests/,
133 |     docs/,
134 |     build/,
135 |     dist/,
136 |     setup.py
137 | 
138 | [mypy]
139 | disallow_any_decorated = false
140 | disallow_any_generics = false
141 | disallow_any_unimported = false
142 | disallow_subclassing_any = false
143 | disallow_untyped_calls = false
144 | disallow_untyped_defs = false
145 | ignore_missing_imports = true
146 | warn_unused_ignores = true
147 | warn_return_any = false
148 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # released under the GNU General Public License version 3.0 (GPLv3)
3 | 
4 | from setuptools import setup
5 | setup()
6 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | import os
4 | import sys
5 | sys.path.insert(0, os.path.abspath('..'))
6 | 


--------------------------------------------------------------------------------
/tests/test_batch.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import textwrap
 6 | import unittest
 7 | import warnings
 8 | 
 9 | import fs
10 | import requests
11 | 
12 | from instalooter.cli import main
13 | from instalooter.batch import BatchRunner
14 | from instalooter.looters import InstaLooter
15 | 
16 | 
17 | try:
18 |     CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok
19 | except requests.exceptions.ConnectionError:
20 |     CONNECTION_FAILURE = True
21 | 
22 | 
23 | class TestBatchRunner(unittest.TestCase):
24 | 
25 |     @classmethod
26 |     def setUpClass(cls):
27 |         cls.session = requests.Session()
28 | 
29 |     @classmethod
30 |     def tearDownClass(cls):
31 |         cls.session.close()
32 | 
33 |     def setUp(self):
34 |         self.destfs = fs.open_fs("temp://")
35 |         self.tmpdir = self.destfs.getsyspath("/")
36 | 
37 |     def tearDown(self):
38 |         self.destfs.close()
39 | 
40 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
41 |     def test_cli(self):
42 |         cfg = textwrap.dedent(
43 |             """
44 |             [my job]
45 | 
46 |             num-to-dl = 3
47 |             quiet = true
48 | 
49 |             users:
50 |                 therock: {self.tmpdir}
51 |                 nintendo: {self.tmpdir}
52 |             """
53 |         ).format(self=self)
54 | 
55 |         with self.destfs.open('batch.ini', 'w') as batch_file:
56 |             batch_file.write(cfg)
57 | 
58 |         retcode = main(["batch", self.destfs.getsyspath('batch.ini')])
59 |         self.assertEqual(retcode, 0)
60 |         self.assertGreaterEqual(
61 |             len(list(self.destfs.filterdir("/", ["*.jpg"]))), 6)
62 | 
63 | 
64 | def setUpModule():
65 |    warnings.simplefilter('ignore')
66 | 
67 | 
68 | def tearDownModule():
69 |    warnings.simplefilter(warnings.defaultaction)
70 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import datetime
  6 | import unittest
  7 | import json
  8 | import os
  9 | import time
 10 | 
 11 | import contexter
 12 | import fs
 13 | import parameterized
 14 | import requests
 15 | import six
 16 | from six.moves.queue import Queue
 17 | 
 18 | from instalooter.cli import main
 19 | from instalooter.cli import time as timeutils
 20 | from instalooter.cli import threadutils
 21 | from instalooter.cli.constants import USAGE
 22 | from instalooter.cli.login import login
 23 | from instalooter.worker import InstaDownloader
 24 | 
 25 | from .utils import mock
 26 | from .utils.method_names import firstparam
 27 | from .utils.ig_mock import MockPages
 28 | 
 29 | 
 30 | try:
 31 |     CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok
 32 | except requests.exceptions.ConnectionError:
 33 |     CONNECTION_FAILURE = True
 34 | 
 35 | 
 36 | class TestCLI(unittest.TestCase):
 37 | 
 38 |     @classmethod
 39 |     def setUpClass(cls):
 40 |         cls.session = requests.Session()
 41 | 
 42 |     @classmethod
 43 |     def tearDownClass(cls):
 44 |         cls.session.close()
 45 | 
 46 |     def setUp(self):
 47 |         self.destfs = fs.open_fs("temp://")
 48 |         self.tmpdir = self.destfs.getsyspath("/")
 49 | 
 50 |     def tearDown(self):
 51 |         self.destfs.close()
 52 |         if os.getenv("CI") == "true":
 53 |             time.sleep(1)
 54 | 
 55 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 56 |     def test_user(self):
 57 |         with contexter.Contexter() as ctx:
 58 |             ctx << mock.patch('instalooter.cli.ProfileLooter.pages', MockPages('nintendo'))
 59 |             r = main(["user", "nintendo", self.tmpdir, "-q", '-n', '10'])
 60 |         self.assertEqual(r, 0)
 61 |         self.assertEqual(len(self.destfs.listdir('/')), 10)
 62 | 
 63 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 64 |     def test_single_post(self):
 65 |         r = main(["post", "BFB6znLg5s1", self.tmpdir, "-q"])
 66 |         self.assertEqual(r, 0)
 67 |         self.assertTrue(self.destfs.exists("1243533605591030581.jpg"))
 68 | 
 69 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 70 |     def test_dump_json(self):
 71 |         r = main(["post", "BIqZ8L8AHmH", self.tmpdir, '-q', '-d'])
 72 |         self.assertEqual(r, 0)
 73 | 
 74 |         self.assertTrue(self.destfs.exists("1308972728853756295.json"))
 75 |         self.assertTrue(self.destfs.exists("1308972728853756295.jpg"))
 76 | 
 77 |         with self.destfs.open("1308972728853756295.json") as fp:
 78 |             json_metadata = json.load(fp)
 79 | 
 80 |         self.assertEqual("1308972728853756295", json_metadata["id"])
 81 |         self.assertEqual("BIqZ8L8AHmH", json_metadata["shortcode"])
 82 | 
 83 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 84 |     def test_dump_only(self):
 85 |         r = main(["post", "BIqZ8L8AHmH", self.tmpdir, '-q', '-D'])
 86 |         self.assertEqual(r, 0)
 87 | 
 88 |         self.assertTrue(self.destfs.exists("1308972728853756295.json"))
 89 |         self.assertFalse(self.destfs.exists("1308972728853756295.jpg"))
 90 | 
 91 |         with self.destfs.open("1308972728853756295.json") as fp:
 92 |             json_metadata = json.load(fp)
 93 | 
 94 |         self.assertEqual("1308972728853756295", json_metadata["id"])
 95 |         self.assertEqual("BIqZ8L8AHmH", json_metadata["shortcode"])
 96 | 
 97 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 98 |     def test_usage(self):
 99 |         handle = six.moves.StringIO()
100 |         main(["--usage"], stream=handle)
101 |         self.assertEqual(handle.getvalue().strip(), USAGE.strip())
102 | 
103 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
104 |     def test_single_post_from_url(self):
105 |         url = "https://www.instagram.com/p/BFB6znLg5s1/"
106 |         main(["post", url, self.tmpdir, "-q"])
107 |         self.assertIn("1243533605591030581.jpg", os.listdir(self.tmpdir))
108 | 
109 | 
110 | class TestTimeUtils(unittest.TestCase):
111 | 
112 |     @parameterized.parameterized.expand([
113 |         (":", (None, None)),
114 |         ("2017-03-12:", (None, datetime.date(2017, 3, 12))),
115 |         (":2016-08-04", (datetime.date(2016, 8, 4), None)),
116 |         ("2017-03-01:2017-02-01", (datetime.date(2017, 3, 1), datetime.date(2017, 2, 1))),
117 |     ], testcase_func_name=firstparam)
118 |     def test_get_times_from_cli(self, token, expected):
119 |         self.assertEqual(timeutils.get_times_from_cli(token), expected)
120 | 
121 |     @parameterized.parameterized.expand([
122 |         ("thisday", 0, 0),
123 |         ("thisweek", 7, 7),
124 |         ("thismonth", 28, 31),
125 |         ("thisyear", 365, 366),
126 |     ], testcase_func_name=firstparam)
127 |     def test_get_times_from_cli_keywords(self, token, inf, sup):
128 |         start, stop = timeutils.get_times_from_cli(token)
129 |         self.assertGreaterEqual(start - stop, datetime.timedelta(inf))
130 |         self.assertLessEqual(start - stop, datetime.timedelta(sup))
131 |         self.assertEqual(start, datetime.date.today())
132 | 
133 |     @parameterized.parameterized.expand([
134 |         ["x"],
135 |         ["x:y"],
136 |         ["x:y:z"],
137 |     ], testcase_func_name=firstparam)
138 |     def test_get_times_from_cli_bad_format(self, token):
139 |         self.assertRaises(ValueError, timeutils.get_times_from_cli, token)
140 | 
141 | 
142 | @mock.patch('instalooter.looters.InstaLooter._login')
143 | @mock.patch('getpass.getpass')
144 | class TestLoginUtils(unittest.TestCase):
145 | 
146 |     def test_cli_login_no_username(self, getpass_, login_):
147 |         args = {'--username': None, "--password": None}
148 |         login(args)
149 |         login_.assert_not_called()
150 | 
151 |     @mock.patch('instalooter.looters.InstaLooter._logged_in')
152 |     def test_cli_login_no_password(self, logged_in_, getpass_, login_):
153 |         args = {'--username': "user", "--password": None, "--quiet": False}
154 |         logged_in_.return_value = False
155 |         getpass_.return_value = "pasw"
156 |         login(args)
157 |         login_.assert_called_once_with("user", "pasw")
158 | 
159 |     @mock.patch('instalooter.looters.InstaLooter._logged_in')
160 |     def test_cli_login(self, logged_in_, getpass_, login_):
161 |         args = {'--username': "user", "--password": "pasw", "--quiet": False}
162 |         logged_in_.return_value = False
163 |         login(args)
164 |         login_.assert_called_once_with("user", "pasw")
165 | 
166 |     @mock.patch('instalooter.looters.InstaLooter._logged_in')
167 |     def test_cli_already_logged_in(self, logged_in_, getpass_, login_):
168 |         args = {'--username': "user", "--password": "pasw", "--quiet": False}
169 |         logged_in_.return_value = True
170 |         login(args)
171 |         login_.assert_not_called()
172 | 
173 | 
174 | class TestThreadUtils(unittest.TestCase):
175 | 
176 |     def test_threads_count(self):
177 | 
178 |         q = Queue()
179 |         t1 = InstaDownloader(q, None, None)
180 |         t2 = InstaDownloader(q, None, None)
181 | 
182 |         try:
183 |             self.assertEqual(threadutils.threads_count(), 0)
184 |             t1.start()
185 |             self.assertEqual(threadutils.threads_count(), 1)
186 |             t2.start()
187 |             self.assertEqual(threadutils.threads_count(), 2)
188 |         finally:
189 |             t1.terminate()
190 |             t2.terminate()
191 | 
192 |     def test_threads_force_join(self):
193 | 
194 |         q = Queue()
195 |         t1 = InstaDownloader(q, None, None)
196 |         t2 = InstaDownloader(q, None, None)
197 | 
198 |         t1.start()
199 |         t2.start()
200 | 
201 |         self.assertTrue(t1.is_alive())
202 |         self.assertTrue(t2.is_alive())
203 | 
204 |         threadutils.threads_force_join()
205 | 
206 |         self.assertFalse(t1.is_alive())
207 |         self.assertFalse(t2.is_alive())
208 | 


--------------------------------------------------------------------------------
/tests/test_issues.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import datetime
  6 | import json
  7 | import os
  8 | import textwrap
  9 | import time
 10 | import unittest
 11 | import warnings
 12 | 
 13 | import contexter
 14 | import fs
 15 | import requests
 16 | import six
 17 | 
 18 | from instalooter._impl import length_hint, piexif, PIL
 19 | from instalooter.batch import BatchRunner, logger as batch_logger
 20 | from instalooter.cli import main
 21 | from instalooter.looters import InstaLooter, HashtagLooter, ProfileLooter, PostLooter
 22 | 
 23 | from .utils import mock
 24 | from .utils.ig_mock import MockPages
 25 | 
 26 | 
 27 | try:
 28 |     CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok
 29 | except requests.exceptions.ConnectionError:
 30 |     CONNECTION_FAILURE = True
 31 | 
 32 | 
 33 | class TestResolvedIssues(unittest.TestCase):
 34 | 
 35 |     if six.PY2:
 36 |         assertRegex = unittest.TestCase.assertRegexpMatches
 37 | 
 38 |     @classmethod
 39 |     def setUpClass(cls):
 40 |         cls.session = requests.Session()
 41 |         _user_agent = mock.Mock(return_value=cls.session.headers["User-Agent"])
 42 |         cls.patch = mock.patch.object(InstaLooter, "_user_agent", new=_user_agent)
 43 |         cls.patch.__enter__()
 44 | 
 45 |     @classmethod
 46 |     def tearDownClass(cls):
 47 |         cls.session.close()
 48 |         cls.patch.__exit__(None, None, None)
 49 | 
 50 |     def setUp(self):
 51 |         self.destfs = fs.open_fs("temp://")
 52 |         self.tmpdir = self.destfs.getsyspath("/")
 53 |         warnings._showwarning = warnings.showwarning
 54 | 
 55 |     def tearDown(self):
 56 |         self.destfs.close()
 57 |         warnings.showwarning = warnings._showwarning
 58 |         if os.getenv("CI") == "true":
 59 |             time.sleep(1)
 60 | 
 61 |     @unittest.expectedFailure
 62 |     @unittest.skipUnless(piexif, "piexif required for this test")
 63 |     def test_issue_009(self):
 64 |         """
 65 |         Thanks to @kurtmaia for reporting this bug.
 66 | 
 67 |         Checks that adding metadata to pictures downloaded from a hashtag
 68 |         works as well.
 69 |         """
 70 |         looter = HashtagLooter("fluoxetine", add_metadata=True, session=self.session)
 71 |         with contexter.Contexter() as ctx:
 72 |             ctx << mock.patch.object(looter, 'pages', MockPages('fluoxetine'))
 73 |             looter.download(self.destfs, media_count=10)
 74 |         for f in self.destfs.listdir("/"):
 75 |             exif = piexif.load(self.destfs.getbytes(f))
 76 |             self.assertTrue(exif['Exif'])  # Date & Caption
 77 |             self.assertTrue(exif['0th'])  # Image creator
 78 | 
 79 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 80 |     def test_issue_012(self):
 81 |         """Feature request by @paramjitrohit.
 82 | 
 83 |         Allows downloading pictures and videos only within a timeframe.
 84 |         """
 85 |         looter = ProfileLooter("nintendo", session=self.session)
 86 |         day = datetime.date(2018, 3, 16)
 87 |         with contexter.Contexter() as ctx:
 88 |             ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
 89 |             medias_in_timeframe = list(looter.medias(timeframe=[day, day]))
 90 |         self.assertEqual(len(medias_in_timeframe), 2)
 91 | 
 92 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 93 |     def test_issue_019(self):
 94 |         """
 95 |         Thanks to @emijawdo for reporting this bug.
 96 | 
 97 |         Checks that instalooter does not crash when not given a destination
 98 |         directory and uses the current directory.
 99 |         """
100 |         initial_dir = os.getcwd()
101 |         os.chdir(self.tmpdir)
102 | 
103 |         try:
104 |             with contexter.Contexter() as ctx:
105 |                 ctx << mock.patch('instalooter.looters.InstaLooter.pages', MockPages('nintendo'))
106 |                 main(["user", "nintendo", "-n", "3", "-q"])
107 |                 self.assertGreaterEqual(len(self.destfs.listdir("/")), 3)
108 |         finally:
109 |             os.chdir(initial_dir)
110 | 
111 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
112 |     def test_issue_014(self):
113 |         """Feature request by @JFLarsen.
114 | 
115 |         Allows customizing filenames using a template following Python
116 |         `.format()` minilanguage.
117 |         """
118 |         looter = ProfileLooter("nintendo", template="{username}.{id}", session=self.session)
119 |         with contexter.Contexter() as ctx:
120 |             ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
121 |             looter.download(self.destfs, media_count=5)
122 |         for f in self.destfs.scandir("/"):
123 |             self.assertTrue(f.name.startswith('nintendo.'))
124 | 
125 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
126 |     @unittest.skipIf(os.getenv("IG_USERNAME") is None, "need private user account")
127 |     def test_issue_006(self):
128 |         """
129 |         Checks that instalooter does not iterate forever on a private
130 |         profile.
131 |         """
132 |         with self.assertRaises(RuntimeError):
133 |             username = os.getenv("IG_USERNAME")
134 |             looter = ProfileLooter(username, session=self.session)
135 |             looter.logout()
136 |             next(looter.medias())
137 | 
138 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
139 |     def test_issue_015(self):
140 |         """
141 |         Feature request by @MohamedIM.
142 | 
143 |         Checks that videos are not downloaded several times if present
144 |         already in the destination directory.
145 |         """
146 |         looter = ProfileLooter("nintendo", session=self.session)
147 | 
148 |         with contexter.Contexter() as ctx:
149 |             ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
150 |             looter.download_videos(self.destfs, media_count=1)
151 |             video_file = next(self.destfs.filterdir("/", ["*.mp4"]))
152 |             mtime = self.destfs.getdetails(video_file.name).accessed
153 |             looter.download_videos(self.destfs, media_count=1)
154 |             self.assertEqual(mtime, self.destfs.getdetails(video_file.name).accessed)
155 | 
156 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
157 |     def test_issue_022(self):
158 |         """
159 |         Thanks to @kuchenmitsahne for reporting this bug.
160 | 
161 |         Checks that using ``{datetime}`` in the template does not put
162 |         a Windows forbidden character in the filename.
163 |         """
164 |         FORBIDDEN = set('<>:"/\|?*')
165 |         looter = ProfileLooter("nintendo", template="{datetime}", session=self.session)
166 |         with contexter.Contexter() as ctx:
167 |             ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
168 |             looter.download(self.destfs, media_count=5)
169 |         for f in self.destfs.scandir("/"):
170 |             self.assertFalse(FORBIDDEN.intersection(f.name))
171 | 
172 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
173 |     @unittest.skipUnless(PIL, "PIL required for this test")
174 |     def test_issue_026(self):
175 |         """
176 |         Feature request by @verafide.
177 | 
178 |         Checks that pictures that are downloaded are not
179 |         resized.
180 |         """
181 |         PostLooter("BO0XpEshejh", session=self.session).download(self.destfs)
182 |         pic = PIL.Image.open(self.destfs.getsyspath("1419863760138791137.jpg"))
183 |         self.assertEqual(pic.size, (525, 612))
184 | 
185 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
186 |     def test_issue_039(self):
187 |         """
188 |         Feature request by @verafide
189 | 
190 |         Checks that all pictures are downloaded from posts
191 |         with more than one picture.
192 |         """
193 |         looter = PostLooter("BRHecUuFhPl", session=self.session)
194 |         looter.download(self.destfs)
195 |         self.assertEqual(
196 |             set(self.destfs.listdir("/")),
197 |             {
198 |                 "1461270165803344956.jpg",
199 |                 "1461270167497776767.jpg",
200 |                 "1461270174435133336.jpg",
201 |                 "1461270172581471925.jpg",
202 |                 "1461270181565655668.jpg",
203 |             }
204 |         )
205 | 
206 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
207 |     def test_issue_042(self):
208 |         """
209 |         Thanks to @MohamedIM for reporting this bug.
210 | 
211 |         Checks that a multipost is successfully downloaded from
212 |         the CLI `post` option.
213 |         """
214 |         looter = PostLooter('BRW-j_dBI6F', get_videos=True, session=self.session)
215 |         looter.download(self.destfs)
216 |         self.assertEqual(
217 |             set(self.destfs.listdir("/")),
218 |             {
219 |                 '1465633492745668095.mp4',
220 |                 '1465633517836005761.mp4',
221 |                 '1465633541559037966.mp4',
222 |                 '1465633561523918792.mp4',
223 |             }
224 |         )
225 | 
226 |     # OUTDATED: warn_windows is not used anymore
227 |     #
228 |     # def test_issue_044(self):
229 |     #     """
230 |     #     Thanks to @Bangaio64 for reporting this bug.
231 |     #
232 |     #     Checks that warn_windows does not trigger an exception.
233 |     #     """
234 |     #     import instalooter.utils
235 |     #     warnings.showwarning = instalooter.utils.warn_windows
236 |     #     looter = instalooter.InstaLooter(
237 |     #         directory=self.tmpdir,
238 |     #         profile="akjhdskjhfkjsdhfkjhdskjhfkjdshkfjhsdkjfdhkjdfshdfskhfd"
239 |     #     )
240 |     #     try:
241 |     #         looter.download()
242 |     #     except Exception:
243 |     #         self.fail()
244 | 
245 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
246 |     def test_issue_041(self):
247 |         """Feature request by @liorlior
248 | 
249 |         Allow downloading only videos.
250 |         """
251 |         looter = ProfileLooter("nintendo", videos_only=True, session=self.session)
252 |         day = datetime.date(2017, 3, 10)
253 |         with contexter.Contexter() as ctx:
254 |             ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
255 |             looter.download(self.destfs, timeframe=[day, day])
256 |         self.assertEqual(self.destfs.listdir("/"), ["1467639884243493431.mp4"])
257 | 
258 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
259 |     def test_issue_052(self):
260 |         """Thanks to @cyrusclarke for reporting this bug.
261 | 
262 |         Checks that on hashtags with a lot of posts, the time parameter
263 |         doesn't cause the program to crash without finding any media to
264 |         download.
265 |         """
266 |         main(["hashtag", "happy", self.tmpdir, "-q", "-t", "thisweek", "-n", "5"])
267 |         self.assertGreaterEqual(len(self.destfs.listdir('/')), 5)
268 | 
269 |     # OUTDATED: Sidecar info dicts are not converted anymore but passed
270 |     #           to the workers directly.
271 |     #
272 |     # def test_issue_057(self):
273 |     #     """
274 |     #     Thanks to @VasiliPupkin256 for reporting this bug.
275 |     #
276 |     #     Checks that metadata can successfully extract caption
277 |     #     out of multiposts containing images.
278 |     #     """
279 |     #     looter = ProfileLooter("awwwwshoot_ob", session=self.session)
280 |     #     sidecar = next(m for m in looter.medias() if m['__typename'] == "GraphSidecar")
281 |     #
282 |     #     looter = PostLooter(sidecar['shortcode'], session=self.session)
283 |     #     looter.download(self.destfs)
284 |     #
285 |     #     for key in ('caption', 'code', 'date'):
286 |     #         self.assertIn(key, media)
287 |     #         self.assertIsNotNone(media[key])
288 | 
289 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
290 |     def test_issue_066(self):
291 |         """Thanks to @douglasrizzo for reporting this bug.
292 | 
293 |         Check that likescount and commentscount can be used
294 |         in filename templates without causing the program to
295 |         crash.
296 |         """
297 |         looter = ProfileLooter(
298 |             "nintendo", get_videos=True, add_metadata=True,
299 |             template='{id}-{likescount}-{commentscount}',
300 |             session=self.session)
301 |         with contexter.Contexter() as ctx:
302 |             ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
303 |             looter.download(self.destfs, media_count=10)
304 |         for image in self.destfs.listdir("/"):
305 |             self.assertRegex(image, '[a-zA-Z0-9]*-[0-9]*-[0-9]*.(jpg|mp4)')
306 | 
307 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
308 |     def test_issue_076(self):
309 |         """Thanks to @zeshuaro for reporting this bug.
310 | 
311 |         Check that when downloading hashtags, the downloader
312 |         actually stops.
313 |         """
314 |         looter = HashtagLooter("oulianov", session=self.session)
315 | 
316 |         medias_it = looter.medias()
317 |         postcount = length_hint(medias_it)
318 | 
319 |         for i, m in enumerate(medias_it):
320 |             if i > postcount:
321 |                 self.fail("looter.medias() did not stop.")
322 | 
323 |     # OUTDATED: URLs are not modified anymore as Instagram prevents
324 |     #           any modification
325 |     #
326 |     # def test_issue_082(self):
327 |     #     """
328 |     #     Thanks to @MohamedIM for reporting this bug.
329 |     #
330 |     #     Check that urls containing 'h-ak-igx' are not stripped from all
331 |     #     their parameters.
332 |     #     """
333 |     #     looter = instalooter.looter.PostLooter('BWOYSYQDCo5', template='{code}')
334 |     #     info = next(looter.medias())
335 |     #
336 |     #     info['display_url'] = \
337 |     #         'https://ig-s-c-a.akamaihd.net/h-ak-igx/19764472_1586345694718446_4011887281420894208_n.jpg'
338 |     #     looter.get_post_info = lambda code: info
339 |     #
340 |     #     looter.download_post('BWOYSYQDCo5')
341 |     #
342 |     #     with open(os.path.join(self.tmpdir, 'BWOYSYQDCo5.jpg'), 'rb') as f:
343 |     #         self.assertNotIn(b'5xx Server Error', f.read())
344 | 
345 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
346 |     def test_issue_084(self):
347 |         """Thanks to @raphaelbernardino for reporting this bug.
348 | 
349 |         Make sure private profiles with few pictures (less than a page worth)
350 |         raise the private error as expected.
351 |         """
352 |         looter = ProfileLooter("rararudo", session=self.session)
353 |         self.assertRaises(RuntimeError, looter.medias)
354 | 
355 |     @unittest.expectedFailure
356 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
357 |     @unittest.skipUnless(piexif, "piexif required for this test")
358 |     def test_issue_094(self):
359 |         """Thanks to @jeanmarctst for raising this issue.
360 | 
361 |         Make sure caption is properly extracted from images downloaded
362 |         from a post code and written to the metadata.
363 |         """
364 |         looter = PostLooter("BY77tSfBnRm",
365 |                             add_metadata=True, template='{code}', session=self.session)
366 |         looter.download(self.destfs)
367 |         metadata = piexif.load(self.destfs.getbytes("BY77tSfBnRm.jpg"), True)
368 |         self.assertTrue(metadata['Exif']['UserComment'])
369 | 
370 |     def test_issue_125(self):
371 |         """Thanks to @applepanda for reporting this bug.
372 | 
373 |         Make sure colons in path do not cause issue in batch mode.
374 |         """
375 |         configfile = six.StringIO(textwrap.dedent(
376 |             """
377 |             [Family]
378 |             users =
379 |             	instagram: D:\\Instagram\\Profiles\\instagram
380 |             	therock: D:\\Instagram\\Profiles\\therock
381 |             """
382 |         ))
383 |         runner = BatchRunner(configfile)
384 |         self.assertEqual(
385 |             runner.get_targets(runner._get('Family', 'users')),
386 |             {'instagram': 'D:\\Instagram\\Profiles\\instagram',
387 |              'therock': 'D:\\Instagram\\Profiles\\therock'}
388 |         )
389 | 
390 |     @mock.patch('instalooter.looters.InstaLooter.__init__')
391 |     def test_issue_184(self, _):
392 |         """Feature request by @ghost.
393 | 
394 |         Allow downloading a post directly from its URL.
395 |         """
396 |         looter = PostLooter("https://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k")
397 |         self.assertEqual(looter.code, "BJlIB9WhdRn")
398 | 
399 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
400 |     def test_issue_185(self):
401 |         """Feature request by @JPNYC81.
402 | 
403 |         Make sure an ``instalooter`` batch keeps even if it encounters errors
404 |         on a specific job. This test tries with an non-existing profile.
405 |         """
406 |         configfile = six.StringIO(textwrap.dedent(
407 |             """
408 |             [Family]
409 |             num-to-dl = 3
410 |             users =
411 |                 jdskjhjkfhkdshfkjdhsfjsfdkjhfksdjhf: {tmp}
412 |             	instagram: {tmp}
413 |             	therock: {tmp}
414 |             """
415 |         ).format(tmp=self.tmpdir))
416 |         runner = BatchRunner(configfile)
417 |         with mock.patch('instalooter.batch.logger'):
418 |             runner.run_all()
419 |         self.assertGreaterEqual(len(self.destfs.listdir('/')), 6)
420 | 
421 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
422 |     def test_issue_194(self):
423 |         """Feature request by @raphaelbernardino
424 | 
425 |         When trying to download from an non-existing user, try to display a
426 |         meaningful message instead of a cryptic error.
427 |         """
428 |         username = "jdhfdjkhdlqdhfdhqfqjqlhfhdsdjquryerhdjfhqlkdfhkqhfqkure"
429 |         looter = ProfileLooter(username)
430 |         with self.assertRaises(ValueError) as ctx:
431 |             media = next(looter.medias())
432 |         self.assertEqual(str(ctx.exception), "user not found: '{}'".format(username))
433 | 
434 | 
435 | # @mock.patch('instalooter.looter.requests.Session', lambda: TestPullRequests.session)
436 | class TestPullRequests(unittest.TestCase):
437 | 
438 |     @classmethod
439 |     def setUpClass(cls):
440 |         cls.session = requests.Session()
441 | 
442 |     @classmethod
443 |     def tearDownClass(cls):
444 |         cls.session.close()
445 | 
446 |     def setUp(self):
447 |         self.destfs = fs.open_fs("temp://")
448 |         self.tmpdir = self.destfs.getsyspath("/")
449 | 
450 |     def tearDown(self):
451 |         self.destfs.close()
452 |         if os.getenv("CI") == "true":
453 |             time.sleep(1)
454 | 
455 |     def _pr_122_looter(self):
456 |         return ProfileLooter('nintendo', template='{code}', session=self.session)
457 | 
458 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
459 |     def test_pr_122_download_post(self):
460 |         """Feature implemented by @susundberg.
461 | 
462 |         Set the access time and modification time of a downloaded media
463 |         according to its IG date.
464 |         """
465 |         code = 'BY77tSfBnRm'
466 |         post_looter = PostLooter(code, session=self.session, template='{code}')
467 |         info = post_looter.get_post_info(code)
468 |         post_looter.download(self.destfs)
469 |         stat = self.destfs.getdetails('{}.jpg'.format(code))
470 |         self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp'])
471 |         self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp'])
472 | 
473 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
474 |     def test_pr_122_download_pictures(self):
475 |         """Feature implemented by @susundberg.
476 | 
477 |         Set the access time and modification time of a downloaded media
478 |         according to its IG date.
479 |         """
480 |         # Test download_pictures
481 |         looter = self._pr_122_looter()
482 |         with contexter.Contexter() as ctx:
483 |             ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
484 |             pic = next(m for m in looter.medias() if not m['is_video'])
485 |             looter.download_pictures(self.destfs, media_count=1)
486 |         stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode']))
487 |         self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp'])
488 |         self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp'])
489 | 
490 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
491 |     def test_pr_122_download_videos(self):
492 |         """Feature implemented by @susundberg.
493 | 
494 |         Set the access time and modification time of a downloaded media
495 |         according to its IG date.
496 |         """
497 |         # Test download_videos
498 |         looter = self._pr_122_looter()
499 |         with contexter.Contexter() as ctx:
500 |             ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
501 |             vid = next(m for m in looter.medias() if m['is_video'])
502 |             looter.download_videos(self.destfs, media_count=1)
503 |         stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode']))
504 |         self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp'])
505 |         self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp'])
506 | 
507 | 
508 | def setUpModule():
509 |    warnings.simplefilter('ignore')
510 | 
511 | 
512 | def tearDownModule():
513 |    warnings.simplefilter(warnings.defaultaction)
514 | 


--------------------------------------------------------------------------------
/tests/test_login.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import os
 6 | import unittest
 7 | 
 8 | import requests
 9 | import fs.memoryfs
10 | 
11 | from instalooter.looters import InstaLooter, ProfileLooter
12 | 
13 | 
14 | USERNAME = os.getenv("IG_USERNAME")
15 | PASSWORD = os.getenv("IG_PASSWORD")
16 | 
17 | try:
18 |     CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok
19 | except requests.exceptions.ConnectionError:
20 |     CONNECTION_FAILURE = True
21 | 
22 | 
23 | @unittest.skipIf(os.getenv("CI") == "true", "not supported in CI")
24 | @unittest.skipUnless(USERNAME and PASSWORD, "credentials required")
25 | class TestLogin(unittest.TestCase):
26 | 
27 |     @classmethod
28 |     def setUpClass(cls):
29 |         cls.session = requests.Session()
30 | 
31 |     @classmethod
32 |     def tearDownClass(cls):
33 |         cls.session.close()
34 | 
35 |     def setUp(self):
36 |         self.looter = ProfileLooter(USERNAME, template="test")
37 |         self.destfs = fs.memoryfs.MemoryFS()
38 | 
39 |     def tearDown(self):
40 |         self.destfs.close()
41 | 
42 |     def test_login(self):
43 | 
44 |         self.assertFalse(self.looter.logged_in())
45 |         self.assertRaises(RuntimeError, self.looter.medias)
46 |         self.assertFalse(self.looter._cachefs().exists(self.looter._COOKIE_FILE))
47 | 
48 |         try:
49 |             self.looter.login(USERNAME, PASSWORD)
50 |             self.assertTrue(self.looter.logged_in())
51 |             self.assertTrue(self.looter._cachefs().exists(self.looter._COOKIE_FILE))
52 |             self.assertTrue(next(self.looter.medias()))
53 |         finally:
54 |             self.looter.logout()
55 |             self.assertFalse(self.looter._cachefs().exists(self.looter._COOKIE_FILE))
56 | 
57 |     def test_download(self):
58 |         try:
59 |             self.looter.login(USERNAME, PASSWORD)
60 |             self.looter.download(self.destfs)
61 |             self.assertTrue(self.destfs.exists('test.jpg'))
62 |             self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF')
63 |         finally:
64 |             self.looter.logout()
65 | 


--------------------------------------------------------------------------------
/tests/test_looter.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | from __future__ import absolute_import
  3 | from __future__ import unicode_literals
  4 | 
  5 | import datetime
  6 | import os
  7 | import time
  8 | import unittest
  9 | import warnings
 10 | 
 11 | import fs.memoryfs
 12 | import parameterized
 13 | import requests
 14 | import six
 15 | 
 16 | from instalooter.looters import InstaLooter, ProfileLooter, HashtagLooter, PostLooter
 17 | 
 18 | from .utils import mock
 19 | from .utils.method_names import signature
 20 | 
 21 | 
 22 | try:
 23 |     CONNECTION_FAILURE = not requests.get("https://instagr.am/instagram").ok
 24 | except requests.exceptions.ConnectionError:
 25 |     CONNECTION_FAILURE = True
 26 | 
 27 | 
 28 | class TestInstaLooter(unittest.TestCase):
 29 | 
 30 |     MEDIA_COUNT = 5
 31 | 
 32 |     @classmethod
 33 |     def setUpClass(cls):
 34 |         cls.session = requests.Session()
 35 | 
 36 |     @classmethod
 37 |     def tearDownClass(cls):
 38 |         cls.session.close()
 39 | 
 40 |     def setUp(self):
 41 |         self.destfs = fs.memoryfs.MemoryFS()
 42 | 
 43 |     def tearDown(self):
 44 |         self.destfs.close()
 45 |         if os.getenv("CI") == "true":
 46 |             time.sleep(1)
 47 | 
 48 |     @parameterized.parameterized.expand([
 49 |         parameterized.param("instagram",),
 50 |         parameterized.param("instagram", get_videos=True),
 51 |         # parameterized.param("serotonine",),
 52 |     ], testcase_func_name=signature)
 53 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 54 |     def test_profile(self, profile, **kwargs):
 55 |         looter = ProfileLooter(profile, session=self.session, **kwargs)
 56 |         looter.download(self.destfs, media_count=self.MEDIA_COUNT)
 57 |         self.assertGreaterEqual(len(self.destfs.listdir("/")), self.MEDIA_COUNT)
 58 | 
 59 |     @parameterized.parameterized.expand([
 60 |         parameterized.param("eggs"),
 61 |         parameterized.param("python", videos_only=True),
 62 |     ], testcase_func_name=signature)
 63 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 64 |     def test_hashtag(self, hashtag, **kwargs):
 65 |         looter = HashtagLooter(hashtag, session=self.session, **kwargs)
 66 |         looter.download(self.destfs, media_count=self.MEDIA_COUNT)
 67 |         self.assertGreaterEqual(len(self.destfs.listdir("/")), self.MEDIA_COUNT)
 68 | 
 69 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 70 |     def test_timeframe_datetime(self):
 71 |         looter = HashtagLooter("protein")
 72 |         now = datetime.datetime.now()
 73 |         timeframe = now - datetime.timedelta(5), now - datetime.timedelta(7)
 74 |         media = next(looter.medias(timeframe=timeframe))
 75 | 
 76 |         taken_at = datetime.datetime.fromtimestamp(media["taken_at_timestamp"])
 77 |         self.assertLessEqual(taken_at, max(timeframe))
 78 |         self.assertGreaterEqual(taken_at, min(timeframe))
 79 | 
 80 |     @unittest.skipIf(CONNECTION_FAILURE, "cannot connect to Instagram")
 81 |     def test_timeframe_date(self):
 82 |         looter = HashtagLooter("protein")
 83 |         today = datetime.date.today()
 84 |         timeframe = today - datetime.timedelta(5), today - datetime.timedelta(7)
 85 |         media = next(looter.medias(timeframe=timeframe))
 86 | 
 87 |         taken_at = datetime.datetime.fromtimestamp(media["taken_at_timestamp"])
 88 |         self.assertLessEqual(taken_at.date(), max(timeframe))
 89 |         self.assertGreaterEqual(taken_at.date(), min(timeframe))
 90 | 
 91 | 
 92 | class TestPostLooter(unittest.TestCase):
 93 | 
 94 |     def tearDown(self):
 95 |         if os.getenv("CI") == "true":
 96 |             time.sleep(1)
 97 | 
 98 |     @mock.patch('instalooter.looters.InstaLooter.__init__')
 99 |     def test_post_url(self, _):
100 |         urls = (
101 |             "http://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k",
102 |             "https://www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k",
103 |             "www.instagram.com/p/BJlIB9WhdRn/?taken-by=2k",
104 |             "http://instagr.am/p/BJlIB9WhdRn/?taken-by=2k",
105 |             "https://instagr.am/p/BJlIB9WhdRn/?taken-by=2k",
106 |             "instagr.am/p/BJlIB9WhdRn/?taken-by=2k",
107 |         )
108 |         for url in urls:
109 |             looter = PostLooter(url)
110 |             self.assertEqual(looter.code, "BJlIB9WhdRn")
111 | 
112 |     @mock.patch('instalooter.looters.InstaLooter.__init__')
113 |     def test_invalid_post_code(self, _):
114 |         with self.assertRaises(ValueError):
115 |             looter = PostLooter("instagram")  # invalid code
116 | 
117 | 
118 | # class TestTemplate(_TempTestCase):
119 | #
120 | #     MEDIA_COUNT = 30
121 | #
122 | #     def test_template_1(self):
123 | #         profile = "therock"
124 | #         looter = instaLooter.InstaLooter(
125 | #             self.tmpdir, profile=profile, get_videos=True,
126 | #             template='{username}-{id}'
127 | #         )
128 | #         looter.download(media_count=self.MEDIA_COUNT, with_pbar=False)
129 | #         for f in os.listdir(self.tmpdir):
130 | #             self.assertTrue(f.startswith(profile))
131 | #
132 | #
133 | # class TestDump(_TempTestCase):
134 | #
135 | #     def assertMediaEqual(self, media, dump):
136 | #         for key in ['__typename', 'date', 'dimensions', 'display_src',
137 | #                     'is_video', 'media_preview']:
138 | #             self.assertEqual(media[key], dump[key])
139 | #
140 | #         self.assertEqual(
141 | #             media.get('code') or media['shortcode'],
142 | #             dump.get('code' or dump['shortcode'])
143 | #         )
144 | #         self.assertEqual(
145 | #             media['owner']['id'],
146 | #             dump['owner']['id']
147 | #         )
148 | #         self.assertIn('likes', dump)
149 | #         self.assertIn('comments', dump)
150 | #
151 | #     def test_dump_json(self):
152 | #         looter = instaLooter.InstaLooter(
153 | #             self.tmpdir,
154 | #             profile="instagram",
155 | #             dump_json=True,
156 | #         )
157 | #         test_medias = list(itertools.islice(
158 | #             (m for m in looter.medias() if not m['is_video']), 3))
159 | #         looter.download(media_count=3)
160 | #
161 | #         # Check all files were downloaded as expected
162 | #         self.assertEqual(
163 | #             sorted(os.listdir(self.tmpdir)),
164 | #             sorted(f for media in test_medias for f in (
165 | #                 str("{}.jpg").format(media['id']),
166 | #                 str("{}.json").format(media['id']),
167 | #             ))
168 | #         )
169 | #
170 | #         # Check the metadata are OK
171 | #         for media in test_medias:
172 | #             with open(os.path.join(self.tmpdir, "{}.json").format(media['id'])) as f:
173 | #                 dump = json.load(f)
174 | #             self.assertMediaEqual(media, dump)
175 | #
176 | #     def test_dump_only(self):
177 | #         looter = instaLooter.InstaLooter(
178 | #             self.tmpdir,
179 | #             profile="instagram",
180 | #             dump_only=True,
181 | #         )
182 | #         test_medias = list(itertools.islice(
183 | #             (m for m in looter.medias() if not m['is_video']), 3))
184 | #         looter.download(media_count=3)
185 | #
186 | #         # Check all files were downloaded as expected
187 | #         self.assertEqual(
188 | #             sorted(os.listdir(self.tmpdir)),
189 | #             sorted(str("{}.json").format(media['id']) for media in test_medias)
190 | #         )
191 | #
192 | #         # Check the metadata are OK
193 | #         for media in test_medias:
194 | #             with open(os.path.join(self.tmpdir, "{}.json").format(media['id'])) as f:
195 | #                 dump = json.load(f)
196 | #             self.assertMediaEqual(media, dump)
197 | #
198 | #     def test_extended_dump(self):
199 | #         looter = instaLooter.InstaLooter(
200 | #             self.tmpdir,
201 | #             profile="instagram",
202 | #             dump_only=True,
203 | #             extended_dump=True,
204 | #         )
205 | #         test_medias = list(itertools.islice(
206 | #             (m for m in looter.medias() if not m['is_video']), 3))
207 | #         looter.download(media_count=3)
208 | #
209 | #         # Check all files were downloaded as expected
210 | #         self.assertEqual(
211 | #             sorted(os.listdir(self.tmpdir)),
212 | #             sorted(str("{}.json").format(media['id']) for media in test_medias)
213 | #         )
214 | #
215 | #         # Check the metadata are OK
216 | #         for media in test_medias:
217 | #             with open(os.path.join(self.tmpdir, "{}.json").format(media['id'])) as f:
218 | #                 dump = json.load(f)
219 | #             self.assertMediaEqual(media, dump)
220 | #
221 | #             # Check the dump was "extended"
222 | #             self.assertIn('edge_media_to_comment', dump)
223 | #             self.assertIn('edge_media_to_caption', dump)
224 | #
225 | #
226 | # class TestUtils(_TempTestCase):
227 | #
228 | #     MEDIA_COUNT = 30
229 | #
230 | #     def setUp(self):
231 | #         super(TestUtils, self).setUp()
232 | #         self.looter = instaLooter.InstaLooter()
233 | #
234 | #     def test_extract_post_code_from_url(self):
235 | #         url = "https://www.instagram.com/p/BFB6znLg5s1/"
236 | #
237 | #         self.assertEqual(
238 | #             self.looter._extract_code_from_url(url),
239 | #             'BFB6znLg5s1',
240 | #         )
241 | #
242 | #         with self.assertRaises(ValueError):
243 | #             self.looter._extract_code_from_url(
244 | #                 'https://www.instagram.com/'
245 | #             )
246 | #
247 | #     def test_get_owner_info(self):
248 | #         therock = self.looter.get_owner_info("BTHqEhWFR4y")
249 | #         self.assertEqual(therock['username'], 'therock')
250 | #         self.assertEqual(therock['id'], '232192182')
251 | #         self.assertFalse(therock['is_private'])
252 | #
253 | #         gearbox = self.looter.get_owner_info("BfMWE3aFsEh")
254 | #         self.assertEqual(gearbox['username'], 'gearboxsoftware')
255 | #         self.assertEqual(gearbox['id'], '1409542965')
256 | #         self.assertFalse(gearbox['is_private'])
257 | #
258 | #     def test_url_generator_nocallable(self):
259 | #         with self.assertRaises(ValueError):
260 | #             self.looter = instaLooter.InstaLooter(
261 | #                 self.tmpdir, profile="instagram", url_generator=1
262 | #             )
263 | #
264 | #     @unittest.skipIf(sys.version_info < (3,4),
265 | #                      "operator.length_hint is a 3.4+ feature.")
266 | #     def test_length_hint_empty(self):
267 | #
268 | #         looter = instaLooter.InstaLooter(profile="jkshksjdhfjkhdkfhk")
269 | #         self.assertEqual(operator.length_hint(looter), 0)
270 | #
271 | #         looter = instaLooter.InstaLooter(hashtag="jkshksjdhfjkhdkfhk")
272 | #         self.assertEqual(operator.length_hint(looter), 0)
273 | #
274 | #     @unittest.skipIf(sys.version_info < (3,4),
275 | #                      "operator.length_hint is a 3.4+ feature.")
276 | #     def test_length_hint(self):
277 | #
278 | #         looter = instaLooter.InstaLooter(self.tmpdir, profile="tide")
279 | #         hint = operator.length_hint(looter)
280 | #
281 | #         # Check the post count is greater than 0
282 | #         self.assertGreater(hint, 0)
283 | #
284 | #         # Download pictures and check if the count
285 | #         # match (at most as many posts downloaded)
286 | #         looter.download()
287 | #         self.assertLessEqual(len(os.listdir(self.tmpdir)), hint)
288 | 
289 | 
290 | # def load_tests(loader, tests, pattern):
291 | #     suite = unittest.TestSuite()
292 | #     TestProfileLooter.register_tests()
293 | #     suite.addTests(loader.loadTestsFromTestCase(TestProfileLooter))
294 | #     # suite.addTests(loader.loadTestsFromTestCase(TestHashtagDownload))
295 | #     # suite.addTests(loader.loadTestsFromTestCase(TestTemplate))
296 | #     return suite
297 | 
298 | 
299 | def setUpModule():
300 |    warnings.simplefilter('ignore')
301 | 
302 | 
303 | def tearDownModule():
304 |    warnings.simplefilter(warnings.defaultaction)
305 | 


--------------------------------------------------------------------------------
/tests/test_pbar.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import threading
 6 | import unittest
 7 | import warnings
 8 | 
 9 | import six
10 | 
11 | from instalooter.pbar import ProgressBar, TqdmProgressBar
12 | 
13 | 
14 | class TestProgressBar(unittest.TestCase):
15 | 
16 |     def test_derived_progress_bar(self):
17 | 
18 |         class MyProgressBar(ProgressBar):
19 |             _test = {"update": 0, "max": None}
20 |             def update(self):
21 |                 self._test['update'] += 1
22 |             def set_maximum(self, maximum):
23 |                 self._test['max'] = maximum
24 | 
25 |         pb = MyProgressBar(iter(range(10)))
26 |         self.assertEqual(pb._test['update'], 0)
27 |         self.assertIs(pb._test['max'], None)
28 | 
29 |         self.assertEqual(next(pb), 0)
30 |         self.assertEqual(pb._test['update'], 1)
31 | 
32 |         pb.set_maximum(10)
33 |         self.assertEqual(pb._test['max'], 10)
34 | 
35 |         self.assertEqual(list(pb), list(range(1, 10)))
36 |         self.assertRaises(StopIteration, next, pb)
37 |         self.assertEqual(pb._test['update'], 10)
38 |         pb.finish()
39 | 
40 |         self.assertRaises(RuntimeError, pb.get_lock)
41 |         lock = threading.RLock()
42 |         pb.set_lock(lock)
43 |         self.assertIs(pb.get_lock(), lock)
44 | 
45 |     def test_tqdm_progress_bar(self):
46 | 
47 |         fh = six.moves.StringIO()
48 |         pb = TqdmProgressBar(iter(range(10)), file=fh)
49 | 
50 |         self.assertEqual(pb.n, 0)
51 |         self.assertIs(pb.total, None)
52 | 
53 |         self.assertEqual(next(pb), 0)
54 |         self.assertEqual(pb.n, 1)
55 |         self.assertIs(pb.total, None)
56 | 
57 |         pb.set_maximum(10)
58 |         self.assertEqual(pb.total, 10)
59 | 
60 |         self.assertEqual(list(pb), list(range(1, 10)))
61 |         self.assertRaises(StopIteration, next, pb)
62 |         self.assertEqual(pb.n, 10)
63 |         pb.finish()
64 | 
65 |         lock = threading.RLock()
66 |         pb.set_lock(lock)
67 |         self.assertIs(pb.get_lock(), lock)
68 | 
69 | 
70 | def setUpModule():
71 |    warnings.simplefilter('ignore')
72 | 
73 | 
74 | def tearDownModule():
75 |    warnings.simplefilter(warnings.defaultaction)
76 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | 
 4 | try:
 5 |     from unittest import mock
 6 | except ImportError:
 7 |     import mock
 8 | 
 9 | from . import ig_mock
10 | from . import method_names
11 | 


--------------------------------------------------------------------------------
/tests/utils/ig_mock.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | from __future__ import unicode_literals
 4 | 
 5 | import os
 6 | import json
 7 | 
 8 | import fs.path
 9 | import instalooter.looters
10 | 
11 | 
12 | _test_dir = os.path.abspath(os.path.join(__file__, ".."))
13 | _url = "tar://{}/ig_mock.tar.gz".format(_test_dir)
14 | 
15 | 
16 | def get_mock_fs():
17 |     return fs.open_fs(_url)
18 | 
19 | 
20 | class MockPages(object):
21 | 
22 |     def __init__(self, profile):
23 |         self.profile = profile
24 | 
25 |     def __call__(self):
26 |         with get_mock_fs() as mockfs:
27 |             with mockfs.open("pages/{}".format(self.profile)) as f:
28 |                 return iter(json.load(f))
29 | 
30 | 
31 | if __name__ == "__main__":
32 | 
33 |     with fs.open_fs(_test_dir) as test_fs:
34 |         if test_fs.exists(fs.path.basename(_url)):
35 |             test_fs.remove(fs.path.basename(_url))
36 | 
37 |     with fs.open_fs(_url, create=True) as mockfs:
38 |         mockfs.makedir("pages", recreate=True)
39 |         nintendo = instalooter.looters.ProfileLooter("nintendo")
40 |         with mockfs.open("pages/nintendo", "w") as f:
41 |             json.dump(list(nintendo.pages()), f)
42 | 
43 |         fluoxetine = instalooter.looters.HashtagLooter("fluoxetine")
44 |         with mockfs.open("pages/fluoxetine", "w") as f:
45 |             pages_it = fluoxetine.pages_it
46 |             json.dump([next(pages_it) for _ in range(3)], f)
47 | 


--------------------------------------------------------------------------------
/tests/utils/ig_mock.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/althonos/InstaLooter/468f76caced67560214d5e2e6e745d7ffb2c0674/tests/utils/ig_mock.tar.gz


--------------------------------------------------------------------------------
/tests/utils/method_names.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from __future__ import absolute_import
 3 | 
 4 | def signature(func, param_num, params):
 5 |     args = ','.join("{!r}".format(a) for a in params.args)
 6 |     kwargs = ','.join("{}={!r}".format(k, v) for k,v in params.kwargs.items())
 7 |     if args and kwargs:
 8 |         return "{}({},{})".format(func.__name__, args, kwargs)
 9 |     else:
10 |         return "{}({})".format(func.__name__, args or kwargs)
11 | 
12 | def firstparam(func, param_num, params):
13 |     return "{}({!r})".format(func.__name__, params.args[0])
14 | 
15 | def num(func, param_num, params):
16 |     return "{}_{}".format(func.__name__, param_num)
17 | 


--------------------------------------------------------------------------------