├── tests ├── __init__.py ├── test_pagination.py ├── test_replay.py ├── test_live.py └── test_media.py ├── .github ├── FUNDING.yml ├── PULL_REQUEST_TEMPLATE.md ├── ISSUE_TEMPLATE │ ├── feature-request.md │ ├── question-others.md │ └── bug-report.md └── ISSUE_TEMPLATE.md ├── requirements.txt ├── requirements-dev.txt ├── instagram_private_api_extensions ├── __init__.py ├── compat.py ├── pagination.py ├── replay.py ├── media.py └── live.py ├── docs ├── api.rst ├── make.bat ├── index.rst ├── usage.rst └── conf.py ├── LICENSE ├── CONTRIBUTING.md ├── .gitignore ├── setup.py ├── .travis.yml └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: ['https://buymeacoffee.com/ping/'] 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | moviepy==1.0.1 2 | Pillow>=4.0.0 3 | requests>=2.9.1 4 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | flake8 2 | responses==0.6.0 3 | Sphinx>=1.5.1 4 | sphinx-rtd-theme>=0.1.9 5 | pylint 6 | -------------------------------------------------------------------------------- /instagram_private_api_extensions/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 https://github.com/ping 2 | # 3 | # This software is released under the MIT License. 4 | # https://opensource.org/licenses/MIT 5 | 6 | __version__ = '0.3.9' 7 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## What does this PR do? 2 | 3 | (Briefly describe what this PR is about.) 4 | 5 | ## Why was this PR needed? 6 | 7 | (Briefly describe reasons.) 8 | 9 | ## What are the relevant issue numbers? 10 | 11 | (List issue numbers here.) 12 | 13 | ## Does this PR meet the acceptance criteria? 14 | 15 | - [ ] Passes flake8 (refer to ``.travis.yml``) 16 | - [ ] Docs are buildable 17 | - [ ] Branch has no merge conflicts with ``master`` 18 | - [ ] Is covered by a test 19 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | Developer Interface 4 | =================== 5 | 6 | This page of the documentation will cover all methods and classes available to the developer. 7 | 8 | - `Media`_ 9 | - `Pagination`_ 10 | - `Live`_ 11 | - `Replay`_ 12 | 13 | .. _api_media: 14 | 15 | Media 16 | ----- 17 | 18 | .. automodule:: instagram_private_api_extensions.media 19 | :members: 20 | 21 | .. _api_pagination: 22 | 23 | Pagination 24 | ---------- 25 | 26 | .. automodule:: instagram_private_api_extensions.pagination 27 | :members: 28 | 29 | .. _api_live: 30 | 31 | Live 32 | ---- 33 | 34 | .. automodule:: instagram_private_api_extensions.live 35 | 36 | .. autoclass:: Downloader 37 | :special-members: __init__ 38 | :inherited-members: 39 | 40 | .. _api_replay: 41 | 42 | Replay 43 | ------ 44 | 45 | .. automodule:: instagram_private_api_extensions.replay 46 | 47 | .. autoclass:: Downloader 48 | :special-members: __init__ 49 | :inherited-members: 50 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=instagram_private_api_extensions 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Request for new functionality 4 | 5 | --- 6 | 7 | ## Please follow the guide below 8 | 9 | - Issues submitted without this template format will be **ignored**. 10 | - Please read the questions **carefully** and answer completely. 11 | - Do not post screenshots of error messages or code. 12 | - Put an `x` into all the boxes [ ] relevant to your issue (like so [x] no spaces). 13 | - Use the *Preview* tab to see how your issue will actually look like. 14 | 15 | --- 16 | 17 | ### Before submitting an issue make sure you have: 18 | - [ ] Updated to the lastest version v0.3.8 19 | - [ ] Read the [README](https://github.com/ping/instagram_private_api_extensions/blob/master/README.md) 20 | - [ ] [Searched](https://github.com/ping/instagram_private_api_extensions/search?type=Issues) the bugtracker for similar issues including **closed** ones 21 | 22 | --- 23 | 24 | ### Describe your Feature Request: 25 | 26 | Please make sure the description is worded well enough to be understood with as much context and examples as possible. 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question-others.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question/Others 3 | about: Not an error or feature request 4 | 5 | --- 6 | 7 | ## Please follow the guide below 8 | 9 | - Issues submitted without this template format will be **ignored**. 10 | - Please read the questions **carefully** and answer completely. 11 | - Do not post screenshots of error messages or code. 12 | - Put an `x` into all the boxes [ ] relevant to your issue (like so [x] no spaces). 13 | - Use the *Preview* tab to see how your issue will actually look like. 14 | 15 | --- 16 | 17 | ### Before submitting an issue make sure you have: 18 | - [ ] Updated to the lastest version v0.3.8 19 | - [ ] Read the [README](https://github.com/ping/instagram_private_api_extensions/blob/master/README.md) 20 | - [ ] [Searched](https://github.com/ping/instagram_private_api_extensions/search?type=Issues) the bugtracker for similar issues including **closed** ones 21 | 22 | --- 23 | 24 | ### Describe your Question/Issue: 25 | 26 | Please make sure the description is worded well enough to be understood with as much context and examples as possible. 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 ping 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Please follow the guide below 2 | 3 | - Issues submitted without this template format will be **ignored**. 4 | - Please read the questions **carefully** and answer completely. 5 | - Do not post screenshots of error messages or code. 6 | - Put an `x` into all the boxes [ ] relevant to your issue (like so [x] no spaces). 7 | - Use the *Preview* tab to see how your issue will actually look like. 8 | 9 | --- 10 | 11 | ### Before submitting an issue make sure you have: 12 | - [ ] Updated to the lastest version v0.3.9 13 | - [ ] Read the [README](https://github.com/ping/instagram_private_api_extensions/blob/master/README.md) 14 | - [ ] [Searched](https://github.com/ping/instagram_private_api_extensions/search?type=Issues) the bugtracker for similar issues including **closed** ones 15 | 16 | ### Purpose of your issue? 17 | - [ ] Bug report (encountered problems/errors) 18 | - [ ] Feature request (request for a new functionality) 19 | - [ ] Question 20 | - [ ] Other 21 | 22 | --- 23 | 24 | ### Describe your issue: 25 | 26 | Please make sure the description is worded well enough to be understood with as much context and examples as possible. 27 | -------------------------------------------------------------------------------- /instagram_private_api_extensions/compat.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # pylint: disable=unused-import 3 | try: 4 | import urllib.request as compat_urllib_request 5 | except ImportError: # Python 2 6 | import urllib2 as compat_urllib_request 7 | 8 | try: 9 | import urllib.error as compat_urllib_error 10 | except ImportError: # Python 2 11 | import urllib2 as compat_urllib_error 12 | 13 | try: 14 | import urllib.parse as compat_urllib_parse 15 | except ImportError: # Python 2 16 | import urllib as compat_urllib_parse 17 | 18 | try: 19 | from urllib.parse import urlparse as compat_urllib_parse_urlparse 20 | except ImportError: # Python 2 21 | from urlparse import urlparse as compat_urllib_parse_urlparse 22 | 23 | try: 24 | import urllib.parse as compat_urlparse 25 | except ImportError: # Python 2 26 | import urlparse as compat_urlparse 27 | 28 | try: 29 | import http.client as compat_http_client 30 | except ImportError: # Python 2 31 | import httplib as compat_http_client 32 | 33 | try: 34 | from urllib.request import urlretrieve as compat_urlretrieve 35 | except ImportError: # Python 2 36 | from urllib import urlretrieve as compat_urlretrieve 37 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Issues 4 | When submitting an [issue report](https://github.com/ping/instagram_private_api_extensions/issues/new), please make sure to fill up the details as specified in the [issue template](.github/ISSUE_TEMPLATE.md). 5 | 6 | > This is a strict requirement, and failure to do so will get your issue closed without response. 7 | 8 | 9 | ## Pull Requests 10 | Here are a few simple guidelines to follow if you wish to submit a pull request: 11 | 12 | - [**Submit an Issue**](https://github.com/ping/instagram_private_api_extensions/issues/new) (mark as "Other") describing what you intend to implement. Allow me time to provide feedback so that there is less risk of rework or rejection. 13 | - New endpoints should be accompanied by a **relevant test case**. 14 | - Backward compatibility should not be broken without very good reason. 15 | - I try to maintain a **small dependency footprint**. If you intend to add a new dependency, make sure that there is a strong case for it. 16 | - Run ``flake8 --max-line-length=120`` on your changes before pushing. 17 | - **Please do not take a rejection of a PR personally**. I appreciate for your contribution but I reserve the right to be the final arbiter for any changes. You're free to fork my work and tailor it for your needs, it's fine! 18 | 19 | Thank you for your interest. 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .coveragerc 2 | example*.mp4 3 | example*.jpg 4 | media/ 5 | mpdstub.tar.gz 6 | mpdstub/ 7 | output*.mp4 8 | output*/ 9 | remote.jpg 10 | 11 | venv/ 12 | docs/_build 13 | docs/_static 14 | docs/_templates 15 | 16 | .vscode/ 17 | 18 | *.iml 19 | .idea/ 20 | 21 | # OS generated files # 22 | .DS_Store 23 | .DS_Store? 24 | ._* 25 | .Spotlight-V100 26 | .Trashes 27 | ehthumbs.db 28 | Thumbs.db 29 | 30 | # Byte-compiled / optimized / DLL files 31 | __pycache__/ 32 | *.py[cod] 33 | *$py.class 34 | 35 | # C extensions 36 | *.so 37 | 38 | # Distribution / packaging 39 | .Python 40 | env/ 41 | build/ 42 | develop-eggs/ 43 | dist/ 44 | downloads/ 45 | eggs/ 46 | .eggs/ 47 | lib/ 48 | lib64/ 49 | parts/ 50 | sdist/ 51 | var/ 52 | *.egg-info/ 53 | .installed.cfg 54 | *.egg 55 | 56 | # PyInstaller 57 | # Usually these files are written by a python script from a template 58 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 59 | *.manifest 60 | *.spec 61 | 62 | # Installer logs 63 | pip-log.txt 64 | pip-delete-this-directory.txt 65 | 66 | # Unit test / coverage reports 67 | htmlcov/ 68 | .tox/ 69 | .coverage 70 | .coverage.* 71 | .cache 72 | nosetests.xml 73 | coverage.xml 74 | *,cover 75 | .hypothesis/ 76 | 77 | # Translations 78 | *.mo 79 | *.pot 80 | 81 | # Django stuff: 82 | *.log 83 | 84 | # Sphinx documentation 85 | docs/_build/ 86 | 87 | # PyBuilder 88 | target/ 89 | 90 | #Ipython Notebook 91 | .ipynb_checkpoints 92 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | import io 3 | try: 4 | from setuptools import setup 5 | except ImportError: 6 | from distutils.core import setup 7 | 8 | __author__ = 'ping ' 9 | __version__ = '0.3.9' 10 | 11 | packages = [ 12 | 'instagram_private_api_extensions' 13 | ] 14 | 15 | test_reqs = ['responses>=0.5.1'] 16 | 17 | with io.open(path.join(path.abspath(path.dirname(__file__)), 'README.md'), encoding='utf-8') as f: 18 | long_description = f.read() 19 | 20 | setup( 21 | name='instagram_private_api_extensions', 22 | version=__version__, 23 | author='ping', 24 | author_email='lastmodified@gmail.com', 25 | license='MIT', 26 | url='https://github.com/ping/instagram_private_api_extensions/tree/master', 27 | keywords='instagram private api extensions', 28 | description='An extension module for https://github.com/ping/instagram_private_api', 29 | long_description=long_description, 30 | long_description_content_type='text/markdown', 31 | packages=packages, 32 | install_requires=['moviepy==1.0.1', 'Pillow>=4.0.0', 'requests>=2.9.1'], 33 | test_requires=test_reqs, 34 | platforms=['any'], 35 | classifiers=[ 36 | 'Development Status :: 4 - Beta', 37 | 'Intended Audience :: Developers', 38 | 'License :: OSI Approved :: MIT License', 39 | 'Topic :: Software Development :: Libraries :: Python Modules', 40 | 'Programming Language :: Python :: 2.7', 41 | 'Programming Language :: Python :: 3.5', 42 | 'Programming Language :: Python :: 3.6', 43 | ] 44 | ) 45 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Report an error or problem 4 | 5 | --- 6 | 7 | ## Please follow the guide below 8 | 9 | - Issues submitted without this template format will be **ignored**. 10 | - Please read the questions **carefully** and answer completely. 11 | - Do not post screenshots of error messages or code. 12 | - Put an `x` into all the boxes [ ] relevant to your issue (like so [x] no spaces). 13 | - Use the *Preview* tab to see how your issue will actually look like. 14 | 15 | --- 16 | 17 | ### Before submitting an issue make sure you have: 18 | - [ ] Updated to the lastest version v0.3.8 19 | - [ ] Read the [README](https://github.com/ping/instagram_private_api_extensions/blob/master/README.md) 20 | - [ ] [Searched](https://github.com/ping/instagram_private_api_extensions/search?type=Issues) the bugtracker for similar issues including **closed** ones 21 | 22 | --- 23 | 24 | ### Describe the Bug/Error: 25 | 26 | Please make sure the description is worded well enough to be understood with as much context and examples as possible. 27 | 28 | Code to replicate the error must be provided below. 29 | 30 | --- 31 | 32 | Paste the output of ``python -V`` here: 33 | 34 | Code: 35 | 36 | ```python 37 | # Example code that will produce the error reported 38 | from instagram_web_api import Client 39 | 40 | web_api = Client() 41 | user_feed_info = web_api.user_feed('1234567890', count=10) 42 | ``` 43 | 44 | Error/Debug Log: 45 | 46 | ```python 47 | Traceback (most recent call last): 48 | File "", line 1, in 49 | ZeroDivisionError: integer division or modulo by zero 50 | ``` 51 | -------------------------------------------------------------------------------- /tests/test_pagination.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import os 4 | 5 | try: 6 | from instagram_private_api_extensions import pagination 7 | except ImportError: 8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 9 | from instagram_private_api_extensions import pagination 10 | 11 | 12 | class TestPagination(unittest.TestCase): 13 | """Tests for pagination related functions.""" 14 | 15 | def test_page(self): 16 | testset = ['a', 'b', 'c', 'd', 'e', 'f', 'h', 'i', 'j', 'k', 'l', 'm', 'n'] 17 | 18 | def paging_stub(start=0): 19 | page_size = 3 20 | result = { 21 | 'items': testset[start:start + page_size] 22 | } 23 | if len(testset) > start + page_size: 24 | result['next_index'] = start + page_size 25 | return result 26 | 27 | resultset = [] 28 | for results in pagination.page( 29 | paging_stub, args={}, 30 | cursor_key='start', 31 | get_cursor=lambda r: r.get('next_index'), 32 | wait=0): 33 | if results.get('items'): 34 | resultset.extend(results['items']) 35 | self.assertEqual(testset, resultset) 36 | 37 | resultset = [] 38 | for results in pagination.page( 39 | paging_stub, args={}, 40 | cursor_key='start', 41 | get_cursor=lambda r: r.get('next_index'), 42 | wait=1): 43 | if results.get('items'): 44 | resultset.extend(results['items']) 45 | self.assertEqual(testset, resultset) 46 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. instagram_private_api_extensions documentation master file, created by 2 | sphinx-quickstart on Tue May 2 11:50:20 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | instagram_private_api_extensions 7 | ================================ 8 | 9 | An extension module to `instagram_private_api `_ to help with common tasks such as posting a photo or video. 10 | 11 | Features 12 | -------- 13 | 14 | 1. :ref:`api_media`: Edits a photo/video so that it complies with Instagram's requirements by: 15 | 16 | * Resizing 17 | * Cropping to fit the minimum/maximum aspect ratio 18 | * Generating the video thumbnail image 19 | * Clipping the video duration if it is too long 20 | * Changing the format/encoding 21 | 22 | 2. :ref:`api_pagination`: Page through an api call such as ``api.user_feed()``. 23 | 24 | 3. :ref:`api_live`: Download an ongoing IG live stream. Requires `ffmpeg `_ installed. 25 | 26 | 4. :ref:`api_replay`: Download an IG live replay stream. Requires `ffmpeg `_ installed. 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | :caption: Usage 31 | 32 | usage 33 | 34 | .. toctree:: 35 | :maxdepth: 2 36 | :caption: API Documentation 37 | 38 | api 39 | 40 | .. toctree:: 41 | :caption: Links 42 | 43 | Repository 44 | Bug Tracker 45 | Tests 46 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | language: python 3 | cache: pip 4 | python: 5 | - "2.7" 6 | - "3.5" 7 | - "3.6" 8 | 9 | before_install: 10 | - sudo add-apt-repository -y ppa:mc3man/trusty-media 11 | - sudo apt-get -y update 12 | - sudo apt-get install -y ffmpeg 13 | # Download mpd for live testing 14 | - curl -s -L -o mpdstub.tar.gz 'https://drive.google.com/uc?export=download&id=1X3avLfZWzRnjIo5GECjNRqRmAnAv2Gpe' 15 | - mkdir mpdstub && tar -xzf mpdstub.tar.gz -C mpdstub/ 16 | # Download replay files for replay testing 17 | - curl --retry 2 -s -L -o replaystub.tar.gz 'https://drive.google.com/uc?export=download&id=143OZeqaCFG6TU0fEjx3Spf3JhA-YKxk3' 18 | - tar -xzf replaystub.tar.gz -C mpdstub/ 19 | - if [[ ${TRAVIS_PYTHON_VERSION:0:1} == '3' ]]; then ((cd mpdstub && python -m http.server 8000) &); fi 20 | - if [[ ${TRAVIS_PYTHON_VERSION:0:1} == '2' ]]; then ((cd mpdstub && python -m SimpleHTTPServer 8000) &); fi 21 | # Download media files for media testing 22 | - mkdir media 23 | - curl --retry 2 -s -o 'media/test.jpg' 'https://c2.staticflickr.com/6/5267/5669212075_039ed45bff_z.jpg' 24 | - curl --retry 2 -s -o 'media/test.mp4' 'https://raw.githubusercontent.com/johndyer/mediaelement-files/master/big_buck_bunny.mp4' 25 | 26 | install: 27 | - pip install -r requirements.txt 28 | - pip install -r requirements-dev.txt 29 | - pip install coveralls 30 | 31 | script: 32 | - flake8 --max-line-length=120 instagram_private_api_extensions --exclude=./instagram_private_api_extensions/compat.py 33 | - pylint -E instagram_private_api_extensions 34 | - coverage run --source=instagram_private_api_extensions -m unittest discover -s tests -v 35 | 36 | after_success: 37 | coveralls 38 | 39 | branches: 40 | only: 41 | - master 42 | -------------------------------------------------------------------------------- /instagram_private_api_extensions/pagination.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 https://github.com/ping 2 | # 3 | # This software is released under the MIT License. 4 | # https://opensource.org/licenses/MIT 5 | 6 | import time 7 | 8 | 9 | def page(fn, args, cursor_key='max_id', get_cursor=lambda r: r.get('next_max_id'), wait=5): 10 | """ 11 | A helper method to page through a feed/listing api call 12 | 13 | .. code-block:: python 14 | 15 | from instagram_private_api import Client 16 | from instagram_web_api import WebClient 17 | from instagram_private_api_extensions.pagination import page 18 | 19 | api = Client('username', 'password') 20 | items = [] 21 | for results in page(api.user_feed, args={'user_id': '2958144170'}): 22 | if results.get('items'): 23 | items.extend(results['items']) 24 | print(len(items)) 25 | 26 | webapi = WebClient(username='username', password='password', authenticate=True) 27 | items = [] 28 | for results in pagination.page( 29 | webapi.user_feed, 30 | args={'user_id': '2958144170', 'extract': False}, 31 | cursor_key='end_cursor', 32 | get_cursor=lambda r: r.get('media', {}).get('page_info', {}).get('end_cursor')): 33 | 34 | if results.get('media', {}).get('nodes', []): 35 | items.extend(results.get('media', {}).get('nodes', [])) 36 | print(len(items)) 37 | 38 | :param fn: function call 39 | :param args: dict of arguments to pass to fn 40 | :param cursor_key: param name for the cursor, e.g. 'max_id' 41 | :param get_cursor: anonymous function to etract the next cursor value 42 | :param wait: interval in seconds to sleep between api calls 43 | :return: 44 | """ 45 | results = fn(**args) 46 | yield results 47 | 48 | cursor = get_cursor(results) 49 | while cursor: 50 | if wait: 51 | time.sleep(wait) 52 | args[cursor_key] = cursor 53 | results = fn(**args) 54 | yield results 55 | cursor = get_cursor(results) 56 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | .. _usage: 2 | 3 | Installation 4 | ============ 5 | 6 | Pip 7 | --- 8 | 9 | Install via pip 10 | 11 | .. code-block:: bash 12 | 13 | $ pip install git+ssh://git@github.com/ping/instagram_private_api_extensions.git@0.3.9 14 | 15 | Update your install with the latest release 16 | 17 | .. code-block:: bash 18 | 19 | $ pip install git+ssh://git@github.com/ping/instagram_private_api_extensions.git@0.3.9 --upgrade 20 | 21 | Force an update from source 22 | 23 | .. code-block:: bash 24 | 25 | $ pip install git+ssh://git@github.com/ping/instagram_private_api_extensions.git --upgrade --force-reinstall 26 | 27 | 28 | Source Code 29 | ----------- 30 | 31 | The library is maintained on GitHub. Feel free to clone the repository. 32 | 33 | .. code-block:: bash 34 | 35 | git clone git://github.com/ping/instagram_private_api_extensions.git 36 | 37 | 38 | Usage 39 | ===== 40 | 41 | 42 | :ref:`media ` 43 | ------------------------ 44 | 45 | .. code-block:: python 46 | 47 | from instagram_private_api import Client, MediaRatios 48 | from instagram_private_api_extensions import media 49 | 50 | api = Client('username', 'password') 51 | 52 | # post a photo 53 | photo_data, photo_size = media.prepare_image( 54 | 'pathto/my_photo.jpg', aspect_ratios=MediaRatios.standard) 55 | api.post_photo(photo_data, photo_size, caption='Hello World!') 56 | 57 | # post a video 58 | vid_data, vid_size, vid_duration, vid_thumbnail = media.prepare_video( 59 | 'pathto/my_video.mp4', aspect_ratios=MediaRatios.standard) 60 | api.post_video(vid_data, vid_size, vid_duration, vid_thumbnail) 61 | 62 | # post a photo story 63 | photo_data, photo_size = media.prepare_image( 64 | 'pathto/my_photo.jpg', aspect_ratios=MediaRatios.reel) 65 | api.post_photo_story(photo_data, photo_size) 66 | 67 | # post a video story 68 | vid_data, vid_size, vid_duration, vid_thumbnail = media.prepare_video( 69 | 'pathto/my_video.mp4', aspect_ratios=MediaRatios.reel) 70 | api.post_video_story(vid_data, vid_size, vid_duration, vid_thumbnail) 71 | 72 | # post a video without reading the whole file into memory 73 | vid_saved_path, vid_size, vid_duration, vid_thumbnail = media.prepare_video( 74 | 'pathto/my_video.mp4', aspect_ratios=MediaRatios.standard, 75 | save_path='pathto/my_saved_video.mp4', save_only=True) 76 | # To use save_only, the file must be saved locally 77 | # by specifying the save_path 78 | with open(vid_saved_path, 'rb') as video_fp: 79 | api.post_video(video_fp, vid_size, vid_duration, vid_thumbnail) 80 | 81 | 82 | :ref:`pagination ` 83 | ---------------------------------- 84 | 85 | .. code-block:: python 86 | 87 | from instagram_private_api_extensions import pagination 88 | 89 | # page through a feed 90 | items = [] 91 | for results in pagination.page(api.user_feed, args={'user_id': '123456'}): 92 | if results.get('items'): 93 | items.extend(results['items']) 94 | print(len(items)) 95 | 96 | 97 | :ref:`live ` 98 | ---------------------- 99 | 100 | .. code-block:: python 101 | 102 | from instagram_private_api_extensions import live 103 | 104 | broadcast = api.broadcast_info('1234567890') 105 | 106 | dl = live.Downloader( 107 | mpd=broadcast['dash_playback_url'], 108 | output_dir='output_%s/' % str(broadcast['id']), 109 | user_agent=api.user_agent) 110 | try: 111 | dl.run() 112 | except KeyboardInterrupt: 113 | if not dl.is_aborted: 114 | dl.stop() 115 | finally: 116 | # combine the downloaded files 117 | # Requires ffmpeg installed. If you prefer to use avconv 118 | # for example, omit this step and do it manually 119 | dl.stitch('my_video.mp4') 120 | 121 | :ref:`replay ` 122 | -------------------------- 123 | 124 | .. code-block:: python 125 | 126 | from instagram_private_api_extensions import replay 127 | 128 | user_story_feed = api.user_story_feed('12345') 129 | 130 | broadcasts = user_story_feed.get('post_live_item', {}).get('broadcasts', []) 131 | for broadcast in broadcasts: 132 | dl = replay.Downloader( 133 | mpd=broadcast['dash_manifest'], 134 | output_dir='output_{}/'.format(broadcast['id']), 135 | user_agent=api.user_agent) 136 | # download and save to file 137 | dl.download('output_{}.mp4'.format(broadcast['id'])) 138 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Instagram Private API Extensions 2 | 3 | An extension module to [instagram\_private\_api](https://github.com/ping/instagram_private_api) to help with common tasks such as posting a photo or video. 4 | 5 | ![](https://img.shields.io/badge/Python-2.7%2C%203.5-3776ab.svg?maxAge=2592000) 6 | [![Release](https://img.shields.io/github/release/ping/instagram_private_api_extensions.svg?colorB=ff7043)](https://github.com/ping/instagram_private_api_extensions/releases) 7 | [![Docs](https://img.shields.io/badge/docs-readthedocs.io-ff4980.svg?maxAge=2592000)](https://instagram-private-api-extensions.readthedocs.io/en/latest/) 8 | [![Build](https://img.shields.io/travis/com/ping/instagram_private_api_extensions.svg)](https://travis-ci.com/ping/instagram_private_api_extensions) 9 | [![Coverage](https://img.shields.io/coveralls/ping/instagram_private_api_extensions.svg)](https://coveralls.io/github/ping/instagram_private_api_extensions) 10 | 11 | ## Features 12 | 13 | 1. [``media``](#media): Edits a photo/video so that it complies with Instagram's requirements by: 14 | * Resizing 15 | * Cropping to fit the minimum/maximum aspect ratio 16 | * Generating the video thumbnail image 17 | * Clipping the video duration if it is too long 18 | * Changing the format/encoding 19 | 20 | 2. [``pagination``](#pagination): Page through an api call such as ``api.user_feed()``. 21 | 22 | 3. [``live``](#live): Download an ongoing IG live stream. Requires ffmpeg installed. 23 | 24 | 4. [``replay``](#replay): Download an IG live replay stream. Requires ffmpeg installed. 25 | 26 | ## Documentation 27 | 28 | Documentation is available at https://instagram-private-api-extensions.readthedocs.io/en/latest/ 29 | 30 | ## Install 31 | 32 | Install with pip using 33 | 34 | ```bash 35 | pip install git+https://git@github.com/ping/instagram_private_api_extensions.git@0.3.9 36 | ``` 37 | 38 | To update: 39 | 40 | ```bash 41 | pip install git+https://git@github.com/ping/instagram_private_api_extensions.git@0.3.9 --upgrade 42 | ``` 43 | 44 | To update with latest repo code: 45 | 46 | ```bash 47 | pip install git+https://git@github.com/ping/instagram_private_api_extensions.git --upgrade --force-reinstall 48 | ``` 49 | 50 | ## Usage 51 | 52 | ### [Media](instagram_private_api_extensions/media.py) 53 | ```python 54 | from instagram_private_api import Client, MediaRatios 55 | from instagram_private_api_extensions import media 56 | 57 | api = Client('username', 'password') 58 | 59 | # post a photo 60 | photo_data, photo_size = media.prepare_image( 61 | 'pathto/my_photo.jpg', aspect_ratios=MediaRatios.standard) 62 | api.post_photo(photo_data, photo_size, caption='Hello World!') 63 | 64 | # post a video 65 | vid_data, vid_size, vid_duration, vid_thumbnail = media.prepare_video( 66 | 'pathto/my_video.mp4', aspect_ratios=MediaRatios.standard) 67 | api.post_video(vid_data, vid_size, vid_duration, vid_thumbnail) 68 | 69 | # post a photo story 70 | photo_data, photo_size = media.prepare_image( 71 | 'pathto/my_photo.jpg', aspect_ratios=MediaRatios.reel) 72 | api.post_photo_story(photo_data, photo_size) 73 | 74 | # post a video story 75 | vid_data, vid_size, vid_duration, vid_thumbnail = media.prepare_video( 76 | 'pathto/my_video.mp4', aspect_ratios=MediaRatios.reel) 77 | api.post_video_story(vid_data, vid_size, vid_duration, vid_thumbnail) 78 | 79 | # post a video without reading the whole file into memory 80 | vid_saved_path, vid_size, vid_duration, vid_thumbnail = media.prepare_video( 81 | 'pathto/my_video.mp4', aspect_ratios=MediaRatios.standard, 82 | save_path='pathto/my_saved_video.mp4', save_only=True) 83 | # To use save_only, the file must be saved locally 84 | # by specifying the save_path 85 | with open(vid_saved_path, 'rb') as video_fp: 86 | api.post_video(video_fp, vid_size, vid_duration, vid_thumbnail) 87 | ``` 88 | 89 | ### [Pagination](instagram_private_api_extensions/pagination.py) 90 | 91 | ```python 92 | from instagram_private_api_extensions import pagination 93 | 94 | # page through a feed 95 | items = [] 96 | for results in pagination.page(api.user_feed, args={'user_id': '123456'}): 97 | if results.get('items'): 98 | items.extend(results['items']) 99 | print(len(items)) 100 | ``` 101 | 102 | ### [Live](instagram_private_api_extensions/live.py) 103 | 104 | ```python 105 | from instagram_private_api_extensions import live 106 | 107 | broadcast = api.broadcast_info('1234567890') 108 | 109 | dl = live.Downloader( 110 | mpd=broadcast['dash_playback_url'], 111 | output_dir='output_{}/'.format(broadcast['id']), 112 | user_agent=api.user_agent) 113 | try: 114 | dl.run() 115 | except KeyboardInterrupt: 116 | if not dl.is_aborted: 117 | dl.stop() 118 | finally: 119 | # combine the downloaded files 120 | # Requires ffmpeg installed. If you prefer to use avconv 121 | # for example, omit this step and do it manually 122 | dl.stitch('my_video.mp4') 123 | ``` 124 | 125 | ### [Replay](instagram_private_api_extensions/replay.py) 126 | 127 | ```python 128 | from instagram_private_api_extensions import replay 129 | 130 | user_story_feed = api.user_story_feed('12345') 131 | 132 | broadcasts = user_story_feed.get('post_live_item', {}).get('broadcasts', []) 133 | for broadcast in broadcasts: 134 | dl = replay.Downloader( 135 | mpd=broadcast['dash_manifest'], 136 | output_dir='output_{}/'.format(broadcast['id']), 137 | user_agent=api.user_agent) 138 | # download and save to file 139 | dl.download('output_{}.mp4'.format(broadcast['id'])) 140 | ``` 141 | 142 | ## Support 143 | Make sure to review the [contributing documentation](CONTRIBUTING.md) before submitting an issue report or pull request. 144 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # instagram_private_api_extensions documentation build configuration file, created by 4 | # sphinx-quickstart on Tue May 2 11:50:20 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | import os 20 | import sys 21 | sys.path.insert(0, os.path.abspath('..')) 22 | 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = ['sphinx.ext.autodoc', 34 | 'sphinx.ext.viewcode'] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix(es) of source filenames. 40 | # You can specify multiple suffix as a list of string: 41 | # 42 | # source_suffix = ['.rst', '.md'] 43 | source_suffix = '.rst' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = u'instagram_private_api_extensions' 50 | copyright = u'2017, ping' 51 | author = u'ping' 52 | 53 | # The version info for the project you're documenting, acts as replacement for 54 | # |version| and |release|, also used in various other places throughout the 55 | # built documents. 56 | # 57 | # The short X.Y version. 58 | version = u'0.3.9' 59 | # The full version, including alpha/beta/rc tags. 60 | release = u'0.3.9' 61 | 62 | # The language for content autogenerated by Sphinx. Refer to documentation 63 | # for a list of supported languages. 64 | # 65 | # This is also used if you do content translation via gettext catalogs. 66 | # Usually you set "language" from the command line for these cases. 67 | language = None 68 | 69 | # List of patterns, relative to source directory, that match files and 70 | # directories to ignore when looking for source files. 71 | # This patterns also effect to html_static_path and html_extra_path 72 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 73 | 74 | # The name of the Pygments (syntax highlighting) style to use. 75 | pygments_style = 'sphinx' 76 | 77 | # If true, `todo` and `todoList` produce output, else they produce nothing. 78 | todo_include_todos = False 79 | 80 | 81 | # -- Options for HTML output ---------------------------------------------- 82 | 83 | # The theme to use for HTML and HTML Help pages. See the documentation for 84 | # a list of builtin themes. 85 | # 86 | html_theme = 'sphinx_rtd_theme' 87 | 88 | # Theme options are theme-specific and customize the look and feel of a theme 89 | # further. For a list of options available for each theme, see the 90 | # documentation. 91 | # 92 | # html_theme_options = {} 93 | 94 | # Add any paths that contain custom static files (such as style sheets) here, 95 | # relative to this directory. They are copied after the builtin static files, 96 | # so a file named "default.css" will overwrite the builtin "default.css". 97 | html_static_path = ['_static'] 98 | 99 | 100 | # -- Options for HTMLHelp output ------------------------------------------ 101 | 102 | # Output file base name for HTML help builder. 103 | htmlhelp_basename = 'instagram_private_api_extensionsdoc' 104 | 105 | 106 | # -- Options for LaTeX output --------------------------------------------- 107 | 108 | latex_elements = { 109 | # The paper size ('letterpaper' or 'a4paper'). 110 | # 111 | # 'papersize': 'letterpaper', 112 | 113 | # The font size ('10pt', '11pt' or '12pt'). 114 | # 115 | # 'pointsize': '10pt', 116 | 117 | # Additional stuff for the LaTeX preamble. 118 | # 119 | # 'preamble': '', 120 | 121 | # Latex figure (float) alignment 122 | # 123 | # 'figure_align': 'htbp', 124 | } 125 | 126 | # Grouping the document tree into LaTeX files. List of tuples 127 | # (source start file, target name, title, 128 | # author, documentclass [howto, manual, or own class]). 129 | latex_documents = [ 130 | (master_doc, 'instagram_private_api_extensions.tex', u'instagram\\_private\\_api\\_extensions Documentation', 131 | u'ping', 'manual'), 132 | ] 133 | 134 | 135 | # -- Options for manual page output --------------------------------------- 136 | 137 | # One entry per manual page. List of tuples 138 | # (source start file, name, description, authors, manual section). 139 | man_pages = [ 140 | (master_doc, 'instagram_private_api_extensions', u'instagram_private_api_extensions Documentation', 141 | [author], 1) 142 | ] 143 | 144 | 145 | # -- Options for Texinfo output ------------------------------------------- 146 | 147 | # Grouping the document tree into Texinfo files. List of tuples 148 | # (source start file, target name, title, author, 149 | # dir menu entry, description, category) 150 | texinfo_documents = [ 151 | (master_doc, 'instagram_private_api_extensions', u'instagram_private_api_extensions Documentation', 152 | author, 'instagram_private_api_extensions', 'One line description of project.', 153 | 'Miscellaneous'), 154 | ] 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /tests/test_replay.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import os 4 | import shutil 5 | 6 | try: 7 | from instagram_private_api_extensions import replay 8 | except ImportError: 9 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 10 | from instagram_private_api_extensions import replay 11 | 12 | 13 | MPD_CONTENT = ''' 14 | 16 | 17 | 18 | 19 | 20 | http://127.0.01:8000/replay_audio.mp4 21 | 22 | 23 | 24 | 25 | http://127.0.01:8000/replay_video.mp4 26 | 27 | 28 | 29 | ''' # noqa 30 | 31 | MPD_CONTENT_MULTIPERIODS = ''' 32 | 34 | 35 | 36 | 37 | 38 | http://127.0.01:8000/replay_audio.mp4 39 | 40 | 41 | 42 | 43 | http://127.0.01:8000/replay_video.mp4 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | http://127.0.01:8000/replay_audio.mp4 52 | 53 | 54 | 55 | 56 | http://127.0.01:8000/replay_video.mp4 57 | 58 | 59 | 60 | ''' # noqa 61 | 62 | 63 | class TestReplay(unittest.TestCase): 64 | """Tests for replay related functions.""" 65 | 66 | @classmethod 67 | def setUpClass(cls): 68 | for f in ('output_replay', 'output_replay_cleartempfile', 69 | 'output_replay_skipffmpeg', 'output_replay_badffmpeg', 70 | 'output_replay_multiperiods'): 71 | if os.path.isfile(f): 72 | os.remove(f) 73 | for fd in ('output_replay.mp4', 'output_replay_cleartempfile.mp4', 74 | 'output_replay_skipffmpeg.mp4', 'output_replay_badffmpeg.mp4', 75 | 'output_replay_multiperiods.mp4'): 76 | if os.path.exists(fd): 77 | shutil.rmtree(fd, ignore_errors=True) 78 | 79 | def test_downloader(self): 80 | dl = replay.Downloader( 81 | mpd=MPD_CONTENT, 82 | output_dir='output_replay') 83 | 84 | output_file = 'output_replay.mp4' 85 | dl.download(output_file) 86 | self.assertTrue(os.path.isfile(output_file), '{0!s} not generated'.format(output_file)) 87 | 88 | def test_downloader_multiperiods(self): 89 | dl = replay.Downloader( 90 | mpd=MPD_CONTENT_MULTIPERIODS, 91 | output_dir='output_replay_multiperiods') 92 | 93 | output_file = 'output_replay_multiperiods.mp4' 94 | dl.download(output_file) 95 | self.assertTrue( 96 | os.path.isfile('output_replay_multiperiods-1.mp4'), 97 | '{0!s} not generated'.format('output_replay_multiperiods-1.mp4')) 98 | self.assertTrue( 99 | os.path.isfile('output_replay_multiperiods-2.mp4'), 100 | '{0!s} not generated'.format('output_replay_multiperiods-2.mp4')) 101 | 102 | def test_downloader_cleartempfiles(self): 103 | dl = replay.Downloader( 104 | mpd=MPD_CONTENT, 105 | output_dir='output_replay_cleartempfile') 106 | 107 | output_file = 'output_replay_cleartempfile.mp4' 108 | dl.download(output_file, cleartempfiles=False) 109 | self.assertTrue( 110 | os.path.isfile('output_replay_cleartempfile/replay_video.mp4'), 111 | 'Temp video file was cleared') 112 | self.assertTrue( 113 | os.path.isfile('output_replay_cleartempfile/replay_audio.mp4'), 114 | 'Temp audio file was cleared') 115 | self.assertTrue(os.path.isfile(output_file), '{0!s} not generated'.format(output_file)) 116 | 117 | def test_downloader_skipffmpeg(self): 118 | dl = replay.Downloader( 119 | mpd=MPD_CONTENT, 120 | output_dir='output_replay_skipffmpeg') 121 | 122 | output_file = 'output_replay_skipffmpeg.mp4' 123 | dl.download(output_file, skipffmpeg=True) 124 | self.assertTrue( 125 | os.path.isfile('output_replay_skipffmpeg/replay_video.mp4'), 126 | 'Temp video file was cleared') 127 | self.assertTrue( 128 | os.path.isfile('output_replay_skipffmpeg/replay_audio.mp4'), 129 | 'Temp audio file was cleared') 130 | self.assertFalse(os.path.isfile(output_file), '{0!s} not generated'.format(output_file)) 131 | 132 | def test_downloader_badffmpeg(self): 133 | dl = replay.Downloader( 134 | mpd=MPD_CONTENT, 135 | output_dir='output_replay_badffmpeg', 136 | user_agent=None, 137 | ffmpeg_binary='ffmpegbad') 138 | 139 | output_file = 'output_replay_badffmpeg.mp4' 140 | dl.download(output_file, cleartempfiles=True) 141 | self.assertTrue( 142 | os.path.isfile('output_replay_badffmpeg/replay_video.mp4'), 143 | 'Temp video file was cleared') 144 | self.assertTrue( 145 | os.path.isfile('output_replay_badffmpeg/replay_audio.mp4'), 146 | 'Temp audio file was cleared') 147 | self.assertFalse(os.path.isfile(output_file), '{0!s} generated'.format(output_file)) 148 | 149 | 150 | if __name__ == '__main__': 151 | unittest.main() 152 | -------------------------------------------------------------------------------- /tests/test_live.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import os 4 | import shutil 5 | 6 | import responses 7 | from requests.exceptions import ConnectionError 8 | 9 | try: 10 | from instagram_private_api_extensions import live 11 | except ImportError: 12 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 13 | from instagram_private_api_extensions import live 14 | 15 | 16 | class TestLive(unittest.TestCase): 17 | """Tests for live related functions.""" 18 | 19 | TEST_MPD_URL = 'http://127.0.01:8000/mpd/17875351285037717.mpd' 20 | 21 | @classmethod 22 | def setUpClass(cls): 23 | for f in ('output.mp4', 'output_singlethreaded.mp4', 24 | 'output_httperrors.mp4', 'output_404.mp4', 'output_connerror.mp4', 25 | 'output_respheaders.mp4', 'output_fragment_connerror.mp4'): 26 | if os.path.isfile(f): 27 | os.remove(f) 28 | for fd in ('output', 'output_singlethreaded', 'output_httperrors', 'output_404', 29 | 'output_connerror', 'output_respheaders', 'output_fragment_connerror'): 30 | if os.path.exists(fd): 31 | shutil.rmtree(fd, ignore_errors=True) 32 | 33 | def test_downloader(self): 34 | def check_status(): 35 | return True 36 | 37 | dl = live.Downloader( 38 | mpd=self.TEST_MPD_URL, 39 | output_dir='output', 40 | duplicate_etag_retry=10, 41 | callback_check=check_status) 42 | dl.run() 43 | output_file = 'output.mp4' 44 | dl.stitch(output_file, cleartempfiles=False) 45 | self.assertTrue(os.path.isfile(output_file), '{0!s} not generated'.format(output_file)) 46 | 47 | def test_downloader_404(self): 48 | def check_status(): 49 | return True 50 | 51 | dl = live.Downloader( 52 | mpd=self.TEST_MPD_URL + 'x', 53 | output_dir='output_404', 54 | max_connection_error_retry=2, 55 | callback_check=check_status) 56 | dl.run() 57 | output_file = 'output_404.mp4' 58 | with self.assertRaises(Exception): 59 | dl.stitch(output_file, cleartempfiles=False) 60 | self.assertFalse(os.path.isfile(output_file), '{0!s} is generated'.format(output_file)) 61 | 62 | def test_downloader_single_threaded(self): 63 | dl = live.Downloader( 64 | mpd=self.TEST_MPD_URL, 65 | output_dir='output_singlethreaded', 66 | duplicate_etag_retry=10, 67 | singlethreaded=True) 68 | dl.run() 69 | output_file = 'output_singlethreaded.mp4' 70 | dl.stitch(output_file, cleartempfiles=True) 71 | self.assertTrue(os.path.isfile(output_file), '{0!s} not generated'.format(output_file)) 72 | 73 | @responses.activate 74 | def test_downloader_http_errors(self): 75 | with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: 76 | rsps.add(responses.GET, self.TEST_MPD_URL, status=500) 77 | rsps.add(responses.GET, self.TEST_MPD_URL, status=404) 78 | 79 | dl = live.Downloader( 80 | mpd=self.TEST_MPD_URL, 81 | output_dir='output_httperrors', 82 | duplicate_etag_retry=2, 83 | singlethreaded=True) 84 | dl.run() 85 | dl.stream_id = '17875351285037717' 86 | output_file = 'output_httperrors.mp4' 87 | dl.stitch(output_file, cleartempfiles=True) 88 | self.assertFalse(os.path.isfile(output_file), '{0!s} is generated'.format(output_file)) 89 | 90 | @responses.activate 91 | def test_downloader_conn_error(self): 92 | exception = ConnectionError() 93 | with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: 94 | max_retry = 3 95 | for _ in range(max_retry + 1): 96 | rsps.add(responses.GET, self.TEST_MPD_URL, body=exception) 97 | 98 | dl = live.Downloader( 99 | mpd=self.TEST_MPD_URL, 100 | output_dir='output_connerror', 101 | duplicate_etag_retry=2, 102 | singlethreaded=True, 103 | max_connection_error_retry=max_retry) 104 | dl.run() 105 | dl.stream_id = '17875351285037717' 106 | output_file = 'output_connerror.mp4' 107 | dl.stitch(output_file, cleartempfiles=True) 108 | self.assertFalse(os.path.isfile(output_file), '{0!s} not generated'.format(output_file)) 109 | 110 | @responses.activate 111 | def test_downloader_fragment_dl_error(self): 112 | exception = ConnectionError() 113 | fragments = [ 114 | 'dash-hd1/17875351285037717-init.m4v', 115 | 'dash-hd1/17875351285037717-281033.m4v', 116 | 'dash-hd1/17875351285037717-282033.m4v', 117 | 'dash-hd1/17875351285037717-283033.m4v', 118 | 'dash-hd1/17875351285037717-284033.m4v', 119 | 'dash-hd1/17875351285037717-285033.m4v', 120 | 'dash-hd1/17875351285037717-286033.m4v', 121 | 'dash-hd1/17875351285037717-287033.m4v', 122 | 'dash-hd1/17875351285037717-288033.m4v', 123 | 'dash-hd1/17875351285037717-289033.m4v', 124 | 'dash-hd1/17875351285037717-290033.m4v', 125 | 'dash-ld/17875351285037717-init.m4a', 126 | 'dash-ld/17875351285037717-281033.m4a', 127 | 'dash-ld/17875351285037717-282033.m4a', 128 | 'dash-ld/17875351285037717-283033.m4a', 129 | 'dash-ld/17875351285037717-284033.m4a', 130 | 'dash-ld/17875351285037717-285033.m4a', 131 | 'dash-ld/17875351285037717-286033.m4a', 132 | 'dash-ld/17875351285037717-287033.m4a', 133 | 'dash-ld/17875351285037717-288033.m4a', 134 | 'dash-ld/17875351285037717-289033.m4a', 135 | 'dash-ld/17875351285037717-290033.m4a', 136 | ] 137 | with open('mpdstub/mpd/17875351285037717.mpd', 'r') as f: 138 | mpd_content = f.read() 139 | with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: 140 | rsps.add(responses.GET, self.TEST_MPD_URL, body=mpd_content) 141 | max_retry = 1 142 | 143 | for fragment in fragments: 144 | for _ in range(max_retry + 1): 145 | rsps.add(responses.GET, 'http://127.0.01:8000/' + fragment, body=exception) 146 | 147 | dl = live.Downloader( 148 | mpd=self.TEST_MPD_URL, 149 | output_dir='output_fragment_connerror', 150 | duplicate_etag_retry=2, 151 | singlethreaded=True, 152 | max_connection_error_retry=max_retry) 153 | dl.run() 154 | dl.stream_id = '17875351285037717' 155 | output_file = 'output_fragment_connerror.mp4' 156 | dl.stitch(output_file, cleartempfiles=True) 157 | self.assertFalse(os.path.isfile(output_file), '{0!s} not generated'.format(output_file)) 158 | 159 | @responses.activate 160 | def test_downloader_resp_headers(self): 161 | with open('mpdstub/mpd/17875351285037717.mpd', 'r') as f: 162 | mpd_content = f.read() 163 | with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: 164 | rsps.add(responses.GET, self.TEST_MPD_URL, body=mpd_content) 165 | rsps.add(responses.GET, self.TEST_MPD_URL, body=mpd_content, 166 | headers={'Cache-Control': 'max-age=1'}) 167 | rsps.add(responses.GET, self.TEST_MPD_URL, body=mpd_content, 168 | headers={'X-FB-Video-Broadcast-Ended': '1'}) 169 | 170 | dl = live.Downloader( 171 | mpd=self.TEST_MPD_URL, 172 | output_dir='output_respheaders') 173 | dl.run() 174 | 175 | with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: 176 | rsps.add(responses.GET, self.TEST_MPD_URL, body=mpd_content, 177 | headers={'Cache-Control': 'max-age=1'}) 178 | rsps.add(responses.GET, self.TEST_MPD_URL, body=mpd_content, 179 | headers={'Cache-Control': 'max-age=1000'}) 180 | 181 | dl = live.Downloader( 182 | mpd=self.TEST_MPD_URL, 183 | output_dir='output_respheaders') 184 | dl.run() 185 | 186 | # Can't stitch and check for output because responses does not support 187 | # url pass through, so the segments cannot be downloaded. 188 | 189 | 190 | if __name__ == '__main__': 191 | unittest.main() 192 | -------------------------------------------------------------------------------- /tests/test_media.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import os 4 | import tempfile 5 | import io 6 | 7 | try: 8 | from instagram_private_api_extensions import media 9 | except ImportError: 10 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 11 | from instagram_private_api_extensions import media 12 | 13 | from moviepy.video.io.VideoFileClip import VideoFileClip 14 | from PIL import Image 15 | 16 | 17 | class TestMedia(unittest.TestCase): 18 | """Tests for emdia related functions.""" 19 | 20 | TEST_IMAGE_PATH = 'media/test.jpg' 21 | TEST_IMAGE_SIZE = (640, 493) 22 | TEST_VIDEO_PATH = 'media/test.mp4' 23 | TEST_VIDEO_SIZE = (640, 360) 24 | TEST_VIDEO_DURATION = 60.0 25 | 26 | def test_prepare_image(self): 27 | _, size = media.prepare_image( 28 | self.TEST_IMAGE_PATH, max_size=(400, 400), aspect_ratios=0.8) 29 | self.assertLessEqual(size[0], 400, 'Invalid width.') 30 | self.assertLessEqual(size[1], 400, 'Invalid height.') 31 | self.assertEqual(round(1.0 * size[0] / size[1], 2), 0.8, 'Invalid aspect ratio.') 32 | 33 | def test_prepare_image2(self): 34 | _, size = media.prepare_image( 35 | self.TEST_IMAGE_PATH, max_size=(400, 350), aspect_ratios=(0.8, 1.2)) 36 | self.assertLessEqual(size[0], 400, 'Invalid width.') 37 | self.assertLessEqual(size[1], 350, 'Invalid height.') 38 | ar = 1.0 * size[0] / size[1] 39 | self.assertLessEqual(round(ar, 2), 1.2) 40 | self.assertGreaterEqual(round(ar, 2), 0.8) 41 | 42 | def test_prepare_image3(self): 43 | _, size = media.prepare_image( 44 | self.TEST_IMAGE_PATH, max_size=(1080, 1350), aspect_ratios=(0.8, 1.2), min_size=(640, 640)) 45 | self.assertLessEqual(size[0], 1080, 'Invalid width (max)') 46 | self.assertLessEqual(size[1], 1350, 'Invalid height (max).') 47 | self.assertGreaterEqual(size[0], 640, 'Invalid width (min)') 48 | self.assertGreaterEqual(size[1], 640, 'Invalid height (min)') 49 | ar = 1.0 * size[0] / size[1] 50 | self.assertLessEqual(round(ar, 2), 1.2) 51 | self.assertGreaterEqual(round(ar, 2), 0.8) 52 | 53 | def test_prepare_image4(self): 54 | with self.assertRaises(ValueError): 55 | media.prepare_image( 56 | self.TEST_IMAGE_PATH, max_size=(1080, 1350), aspect_ratios=(4.0 / 5), min_size=(1081, 640)) 57 | 58 | def test_remote_image(self): 59 | image_url = 'https://c2.staticflickr.com/6/5267/5669212075_039ed45bff_z.jpg' 60 | image_data, size = media.prepare_image( 61 | image_url, max_size=(400, 400), save_path='remote.jpg') 62 | self.assertLessEqual(size[0], 400, 'Invalid width.') 63 | self.assertLessEqual(size[1], 400, 'Invalid height.') 64 | self.assertGreater(len(image_data), 0) 65 | 66 | def test_prepare_video(self): 67 | vid_returned, size, duration, thumbnail_content = media.prepare_video( 68 | self.TEST_VIDEO_PATH, aspect_ratios=1.0, max_duration=10.0, save_path='media/output.mp4', 69 | save_only=True) 70 | self.assertEqual(duration, 10.0, 'Invalid duration.') 71 | self.assertEqual(size[0], size[1], 'Invalid width/length.') 72 | self.assertTrue(os.path.isfile('media/output.mp4'), 'Output file not generated.') 73 | self.assertTrue(os.path.isfile(vid_returned), 'Output file not returned.') 74 | 75 | with self.assertRaises(ValueError) as ve: 76 | media.prepare_video( 77 | self.TEST_VIDEO_PATH, aspect_ratios=1.0, max_duration=10, save_only=True) 78 | self.assertEqual(str(ve.exception), '"save_path" cannot be empty.') 79 | self.assertGreater(len(thumbnail_content), 0, 'No thumbnail content returned.') 80 | 81 | # Save video, thumbnail content and verify attributes 82 | with VideoFileClip('media/output.mp4') as vidclip_output: 83 | self.assertAlmostEqual(duration, vidclip_output.duration, places=0) 84 | self.assertEqual(size[0], vidclip_output.size[0]) 85 | self.assertEqual(size[1], vidclip_output.size[1]) 86 | 87 | im = Image.open(io.BytesIO(thumbnail_content)) 88 | self.assertEqual(size[0], im.size[0]) 89 | self.assertEqual(size[1], im.size[1]) 90 | 91 | def test_prepare_video2(self): 92 | video_content, size, duration, thumbnail_content = media.prepare_video( 93 | self.TEST_VIDEO_PATH, max_size=(480, 480), min_size=(0, 0)) 94 | self.assertEqual(duration, self.TEST_VIDEO_DURATION, 'Duration changed.') 95 | self.assertLessEqual(size[0], 480, 'Invalid width.') 96 | self.assertLessEqual(size[1], 480, 'Invalid height.') 97 | self.assertEqual( 98 | 1.0 * size[0] / size[1], 99 | 1.0 * self.TEST_VIDEO_SIZE[0] / self.TEST_VIDEO_SIZE[1], 100 | 'Aspect ratio changed.') 101 | self.assertGreater(len(video_content), 0, 'No video content returned.') 102 | self.assertGreater(len(thumbnail_content), 0, 'No thumbnail content returned.') 103 | 104 | # Save video, thumbnail content and verify attributes 105 | video_output = tempfile.NamedTemporaryFile(prefix='ipae_test_', suffix='.mp4', delete=False) 106 | video_output.write(video_content) 107 | video_output.close() 108 | with VideoFileClip(video_output.name) as vidclip_output: 109 | self.assertAlmostEqual(duration, vidclip_output.duration, places=0) 110 | self.assertEqual(size[0], vidclip_output.size[0]) 111 | self.assertEqual(size[1], vidclip_output.size[1]) 112 | 113 | im = Image.open(io.BytesIO(thumbnail_content)) 114 | self.assertEqual(size[0], im.size[0]) 115 | self.assertEqual(size[1], im.size[1]) 116 | 117 | def test_prepare_video3(self): 118 | video_content, size, duration, thumbnail_content = media.prepare_video( 119 | self.TEST_VIDEO_PATH, max_size=None, max_duration=1000.0, 120 | skip_reencoding=True, min_size=None) 121 | 122 | self.assertEqual(size[0], self.TEST_VIDEO_SIZE[0], 'Width changed.') 123 | self.assertEqual(size[1], self.TEST_VIDEO_SIZE[1], 'Height changed.') 124 | 125 | self.assertGreater(len(video_content), 0, 'No video content returned.') 126 | self.assertGreater(len(thumbnail_content), 0, 'No thumbnail content returned.') 127 | 128 | # Save video, thumbnail content and verify attributes 129 | video_output = tempfile.NamedTemporaryFile(prefix='ipae_test_', suffix='.mp4', delete=False) 130 | video_output.write(video_content) 131 | video_output.close() 132 | with VideoFileClip(video_output.name) as vidclip_output: 133 | self.assertAlmostEqual(duration, vidclip_output.duration, places=0) 134 | self.assertEqual(size[0], vidclip_output.size[0]) 135 | self.assertEqual(size[1], vidclip_output.size[1]) 136 | 137 | im = Image.open(io.BytesIO(thumbnail_content)) 138 | self.assertEqual(size[0], im.size[0]) 139 | self.assertEqual(size[1], im.size[1]) 140 | 141 | self.assertEqual( 142 | os.path.getsize(video_output.name), 143 | os.path.getsize(self.TEST_VIDEO_PATH)) 144 | 145 | def test_remote_video(self): 146 | video_url = 'https://raw.githubusercontent.com/johndyer/mediaelement-files/master/big_buck_bunny.mp4' 147 | video_content, size, duration, thumbnail_content = media.prepare_video( 148 | video_url, aspect_ratios=1.0, max_duration=10.0) 149 | self.assertEqual(duration, 10.0, 'Invalid duration.') 150 | self.assertEqual(size[0], size[1], 'Invalid width/length.') 151 | self.assertGreater(len(video_content), 0, 'No video content returned.') 152 | self.assertGreater(len(thumbnail_content), 0, 'No thumbnail content returned.') 153 | 154 | # Save video, thumbnail content and verify attributes 155 | video_output = tempfile.NamedTemporaryFile(prefix='ipae_test_', suffix='.mp4', delete=False) 156 | video_output.write(video_content) 157 | video_output.close() 158 | with VideoFileClip(video_output.name) as vidclip_output: 159 | self.assertAlmostEqual(duration, vidclip_output.duration, places=0) 160 | self.assertEqual(size[0], vidclip_output.size[0]) 161 | self.assertEqual(size[1], vidclip_output.size[1]) 162 | 163 | im = Image.open(io.BytesIO(thumbnail_content)) 164 | self.assertEqual(size[0], im.size[0]) 165 | self.assertEqual(size[1], im.size[1]) 166 | 167 | def test_helper_methods(self): 168 | self.assertRaises(ValueError, lambda: media.prepare_video( 169 | self.TEST_VIDEO_PATH, thumbnail_frame_ts=999.99)) 170 | self.assertRaises(ValueError, lambda: media.prepare_video( 171 | self.TEST_VIDEO_PATH, save_path='output.mov')) 172 | self.assertRaises(ValueError, lambda: media.calc_crop((1, 2, 3), (500, 600))) 173 | box = media.calc_crop((1, 2), (400, 800)) 174 | self.assertEqual(box, (0, 200, 400, 600)) 175 | 176 | 177 | if __name__ == '__main__': 178 | unittest.main() 179 | -------------------------------------------------------------------------------- /instagram_private_api_extensions/replay.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 https://github.com/ping 2 | # 3 | # This software is released under the MIT License. 4 | # https://opensource.org/licenses/MIT 5 | 6 | import argparse 7 | import logging 8 | import os 9 | import re 10 | import xml.etree.ElementTree 11 | import subprocess 12 | from contextlib import closing 13 | 14 | import requests 15 | try: 16 | from .compat import compat_urllib_parse_urlparse 17 | except ValueError: 18 | # pragma: no cover 19 | # To allow running in terminal 20 | from compat import compat_urllib_parse_urlparse 21 | 22 | 23 | logger = logging.getLogger(__file__) 24 | 25 | 26 | MPD_NAMESPACE = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'} 27 | 28 | 29 | class Downloader(object): 30 | """Downloads and assembles a given IG live replay stream""" 31 | 32 | USER_AGENT = 'Instagram 10.26.0 (iPhone8,1; iOS 10_2; en_US; en-US; ' \ 33 | 'scale=2.00; gamut=normal; 750x1334) AppleWebKit/420+' 34 | DOWNLOAD_TIMEOUT = 15 35 | 36 | def __init__(self, mpd, output_dir, user_agent=None, **kwargs): 37 | """ 38 | 39 | :param mpd: URL to mpd 40 | :param output_dir: folder to store the downloaded files 41 | :return: 42 | """ 43 | self.mpd = mpd 44 | self.output_dir = output_dir 45 | if not os.path.exists(self.output_dir): 46 | os.makedirs(self.output_dir) 47 | 48 | self.user_agent = user_agent or self.USER_AGENT 49 | self.download_timeout = kwargs.pop('download_timeout', None) or self.DOWNLOAD_TIMEOUT 50 | 51 | session = requests.Session() 52 | adapter = requests.adapters.HTTPAdapter(max_retries=2) 53 | session.mount('http://', adapter) 54 | session.mount('https://', adapter) 55 | self.session = session 56 | 57 | # custom ffmpeg binary path, fallback to ffmpeg_binary path in env if available 58 | self.ffmpeg_binary = kwargs.pop('ffmpeg_binary', None) or os.getenv('FFMPEG_BINARY', 'ffmpeg') 59 | 60 | xml.etree.ElementTree.register_namespace('', MPD_NAMESPACE['mpd']) 61 | self.mpd_document = xml.etree.ElementTree.fromstring(self.mpd) 62 | 63 | duration_attribute = self.mpd_document.attrib.get('mediaPresentationDuration', '') 64 | mobj = re.match(r'PT(?P\d+)H(?P\d+)M(?P\d+\.\d+)', duration_attribute) 65 | if mobj: 66 | duration = int(round( 67 | int(mobj.group('hrs')) * 60 * 60 + 68 | int(mobj.group('mins')) * 60 + 69 | float(mobj.group('secs')) 70 | )) 71 | else: 72 | logger.warning('Unable to parse duration: {}'.format(duration_attribute)) 73 | duration = 0 74 | self.duration = duration 75 | 76 | def download(self, output_filename, 77 | skipffmpeg=False, 78 | cleartempfiles=True): 79 | """ 80 | Download and saves the generated file with the file name specified. 81 | 82 | :param output_filename: Output file path 83 | :param skipffmpeg: bool flag to not use ffmpeg to join audio and video file into final mp4 84 | :param cleartempfiles: bool flag to remove downloaded and temp files 85 | :return: 86 | """ 87 | 88 | periods = self.mpd_document.findall('mpd:Period', MPD_NAMESPACE) 89 | logger.debug('Found {0:d} period(s)'.format(len(periods))) 90 | 91 | generated_files = [] 92 | 93 | # Aaccording to specs, multiple periods are allow but IG only sends one usually 94 | for period_idx, period in enumerate(periods): 95 | adaptation_sets = period.findall('mpd:AdaptationSet', MPD_NAMESPACE) 96 | audio_stream = None 97 | video_stream = None 98 | if not len(adaptation_sets) == 2: 99 | logger.warning('Unexpected number of adaptation sets: {}'.format(len(adaptation_sets))) 100 | for adaptation_set in adaptation_sets: 101 | representations = adaptation_set.findall('mpd:Representation', MPD_NAMESPACE) 102 | # sort representations by quality and pick best one 103 | representations = sorted( 104 | representations, 105 | key=lambda rep: ( 106 | (int(rep.attrib.get('width', '0')) * int(rep.attrib.get('height', '0'))) or 107 | int(rep.attrib.get('bandwidth', '0')) or 108 | rep.attrib.get('FBQualityLabel') or 109 | int(rep.attrib.get('audioSamplingRate', '0'))), 110 | reverse=True) 111 | representation = representations[0] 112 | representation_id = representation.attrib.get('id', '') 113 | mime_type = representation.attrib.get('mimeType', '') 114 | logger.debug( 115 | 'Selected representation with mimeType {0!s} id {1!s} out of {2!s}'.format( 116 | mime_type, 117 | representation_id, 118 | ' / '.join([r.attrib.get('id', '') for r in representations]) 119 | )) 120 | representation_base_url = representation.find('mpd:BaseURL', MPD_NAMESPACE).text 121 | logger.debug(representation_base_url) 122 | if 'video' in mime_type and not video_stream: 123 | video_stream = representation_base_url 124 | elif 'audio' in mime_type and not audio_stream: 125 | audio_stream = representation_base_url 126 | 127 | if audio_stream and video_stream: 128 | break 129 | 130 | audio_file = os.path.join( 131 | self.output_dir, 132 | os.path.basename(compat_urllib_parse_urlparse(audio_stream).path) 133 | ) 134 | video_file = os.path.join( 135 | self.output_dir, 136 | os.path.basename(compat_urllib_parse_urlparse(video_stream).path) 137 | ) 138 | for target in ((audio_stream, audio_file), (video_stream, video_file)): 139 | logger.debug('Downloading {} as {}'.format(*target)) 140 | with closing(self.session.get( 141 | target[0], 142 | headers={'User-Agent': self.user_agent, 'Accept': '*/*'}, 143 | timeout=self.download_timeout, stream=True)) as res: 144 | res.raise_for_status() 145 | 146 | with open(target[1], 'wb') as f: 147 | for chunk in res.iter_content(chunk_size=1024*100): 148 | f.write(chunk) 149 | 150 | if skipffmpeg: 151 | continue 152 | 153 | if len(periods) > 1: 154 | # Generate a new filename by appending n+1 155 | # to the original specified output filename 156 | # so that it looks like output-1.mp4, output-2.mp4, etc 157 | dir_name = os.path.dirname(output_filename) 158 | file_name = os.path.basename(output_filename) 159 | dot_pos = file_name.rfind('.') 160 | if dot_pos >= 0: 161 | filename_no_ext = file_name[0:dot_pos] 162 | ext = file_name[dot_pos:] 163 | else: 164 | filename_no_ext = file_name 165 | ext = '' 166 | generated_filename = os.path.join( 167 | dir_name, '{0!s}-{1:d}{2!s}'.format(filename_no_ext, period_idx + 1, ext)) 168 | else: 169 | generated_filename = output_filename 170 | 171 | ffmpeg_loglevel = 'error' 172 | if logger.level == logging.DEBUG: 173 | ffmpeg_loglevel = 'warning' 174 | 175 | cmd = [ 176 | self.ffmpeg_binary, '-y', 177 | '-loglevel', ffmpeg_loglevel, 178 | '-i', audio_file, 179 | '-i', video_file, 180 | '-c:v', 'copy', 181 | '-c:a', 'copy', 182 | generated_filename] 183 | 184 | try: 185 | exit_code = subprocess.call(cmd) 186 | if exit_code: 187 | logger.error('ffmpeg exited with the code: {0!s}'.format(exit_code)) 188 | logger.error('Command: {0!s}'.format(' '.join(cmd))) 189 | continue 190 | except Exception as call_err: 191 | logger.error('ffmpeg exited with the error: {0!s}'.format(call_err)) 192 | logger.error('Command: {0!s}'.format(' '.join(cmd))) 193 | continue 194 | 195 | generated_files.append(generated_filename) 196 | logger.debug('Generated {}'.format(generated_filename)) 197 | if cleartempfiles: 198 | for f in (audio_file, video_file): 199 | try: 200 | os.remove(f) 201 | except (IOError, OSError) as ioe: 202 | logger.warning('Error removing {0!s}: {1!s}'.format(f, str(ioe))) 203 | 204 | return generated_files 205 | 206 | 207 | if __name__ == '__main__': # pragma: no cover 208 | 209 | # pylint: disable-all 210 | 211 | # Example of how to init and start the Downloader 212 | parser = argparse.ArgumentParser() 213 | parser.add_argument('mpd') 214 | parser.add_argument('-v', action='store_true', help='Verbose') 215 | parser.add_argument('-s', metavar='OUTPUT_FILENAME', required=True, 216 | help='Output filename') 217 | parser.add_argument('-o', metavar='DOWLOAD_DIR', 218 | default='output/', help='Download folder') 219 | parser.add_argument('-c', action='store_true', help='Clear temp files') 220 | args = parser.parse_args() 221 | 222 | if args.v: 223 | logger.setLevel(logging.DEBUG) 224 | else: 225 | logger.setLevel(logging.INFO) 226 | 227 | logging.basicConfig(level=logger.level) 228 | 229 | with open(args.mpd, 'r') as mpd_file: 230 | mpd_contents = mpd_file.read() 231 | dl = Downloader(mpd=mpd_contents, output_dir=args.o) 232 | try: 233 | generated_files = dl.download(args.s, cleartempfiles=args.c) 234 | print('Video Duration: %s' % dl.duration) 235 | print('Generated files: \n%s' % '\n'.join(generated_files)) 236 | except KeyboardInterrupt: 237 | logger.info('Interrupted') 238 | -------------------------------------------------------------------------------- /instagram_private_api_extensions/media.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 https://github.com/ping 2 | # 3 | # This software is released under the MIT License. 4 | # https://opensource.org/licenses/MIT 5 | 6 | import os 7 | import io 8 | import re 9 | import tempfile 10 | import shutil 11 | 12 | from PIL import Image 13 | import requests 14 | 15 | 16 | def calc_resize(max_size, curr_size, min_size=(0, 0)): 17 | """ 18 | Calculate if resize is required based on the max size desired 19 | and the current size 20 | 21 | :param max_size: tuple of (width, height) 22 | :param curr_size: tuple of (width, height) 23 | :param min_size: tuple of (width, height) 24 | :return: 25 | """ 26 | max_width, max_height = max_size or (0, 0) 27 | min_width, min_height = min_size or (0, 0) 28 | 29 | if (max_width and min_width > max_width) or (max_height and min_height > max_height): 30 | raise ValueError('Invalid min / max sizes.') 31 | 32 | orig_width, orig_height = curr_size 33 | if max_width and max_height and (orig_width > max_width or orig_height > max_height): 34 | resize_factor = min( 35 | 1.0 * max_width / orig_width, 36 | 1.0 * max_height / orig_height) 37 | new_width = int(resize_factor * orig_width) 38 | new_height = int(resize_factor * orig_height) 39 | return new_width, new_height 40 | 41 | elif min_width and min_height and (orig_width < min_width or orig_height < min_height): 42 | resize_factor = max( 43 | 1.0 * min_width / orig_width, 44 | 1.0 * min_height / orig_height 45 | ) 46 | new_width = int(resize_factor * orig_width) 47 | new_height = int(resize_factor * orig_height) 48 | return new_width, new_height 49 | 50 | 51 | def calc_crop(aspect_ratios, curr_size): 52 | """ 53 | Calculate if cropping is required based on the desired aspect 54 | ratio and the current size. 55 | 56 | :param aspect_ratios: single float value or tuple of (min_ratio, max_ratio) 57 | :param curr_size: tuple of (width, height) 58 | :return: 59 | """ 60 | try: 61 | if len(aspect_ratios) == 2: 62 | min_aspect_ratio = float(aspect_ratios[0]) 63 | max_aspect_ratio = float(aspect_ratios[1]) 64 | else: 65 | raise ValueError('Invalid aspect ratios') 66 | except TypeError: 67 | # not a min-max range 68 | min_aspect_ratio = float(aspect_ratios) 69 | max_aspect_ratio = float(aspect_ratios) 70 | 71 | curr_aspect_ratio = 1.0 * curr_size[0] / curr_size[1] 72 | if not min_aspect_ratio <= curr_aspect_ratio <= max_aspect_ratio: 73 | curr_width = curr_size[0] 74 | curr_height = curr_size[1] 75 | if curr_aspect_ratio > max_aspect_ratio: 76 | # media is too wide 77 | new_height = curr_height 78 | new_width = max_aspect_ratio * new_height 79 | else: 80 | # media is too tall 81 | new_width = curr_width 82 | new_height = new_width / min_aspect_ratio 83 | left = int((curr_width - new_width)/2) 84 | top = int((curr_height - new_height)/2) 85 | right = int((curr_width + new_width)/2) 86 | bottom = int((curr_height + new_height)/2) 87 | return left, top, right, bottom 88 | 89 | 90 | def is_remote(media): 91 | """Detect if media specified is a url""" 92 | if re.match(r'^https?://', media): 93 | return True 94 | return False 95 | 96 | 97 | def prepare_image(img, max_size=(1080, 1350), 98 | aspect_ratios=(4.0 / 5.0, 90.0 / 47.0), 99 | save_path=None, **kwargs): 100 | """ 101 | Prepares an image file for posting. 102 | Defaults for size and aspect ratio from https://help.instagram.com/1469029763400082 103 | 104 | :param img: file path 105 | :param max_size: tuple of (max_width, max_height) 106 | :param aspect_ratios: single float value or tuple of (min_ratio, max_ratio) 107 | :param save_path: optional output file path 108 | :param kwargs: 109 | - **min_size**: tuple of (min_width, min_height) 110 | :return: 111 | """ 112 | min_size = kwargs.pop('min_size', (320, 167)) 113 | if is_remote(img): 114 | res = requests.get(img) 115 | im = Image.open(io.BytesIO(res.content)) 116 | else: 117 | im = Image.open(img) 118 | 119 | if aspect_ratios: 120 | crop_box = calc_crop(aspect_ratios, im.size) 121 | if crop_box: 122 | im = im.crop(crop_box) 123 | 124 | new_size = calc_resize(max_size, im.size, min_size=min_size) 125 | if new_size: 126 | im = im.resize(new_size) 127 | 128 | if im.mode != 'RGB': 129 | # Removes transparency (alpha) 130 | im = im.convert('RGBA') 131 | im2 = Image.new('RGB', im.size, (255, 255, 255)) 132 | im2.paste(im, (0, 0), im) 133 | im = im2 134 | if save_path: 135 | im.save(save_path) 136 | 137 | b = io.BytesIO() 138 | im.save(b, 'JPEG') 139 | return b.getvalue(), im.size 140 | 141 | 142 | def prepare_video(vid, thumbnail_frame_ts=0.0, 143 | max_size=(1080, 1350), 144 | aspect_ratios=(4.0 / 5.0, 90.0 / 47.0), 145 | max_duration=60.0, 146 | save_path=None, 147 | skip_reencoding=False, 148 | **kwargs): 149 | """ 150 | Prepares a video file for posting. 151 | Defaults for size and aspect ratio from https://help.instagram.com/1469029763400082 152 | 153 | :param vid: file path 154 | :param thumbnail_frame_ts: the frame of clip corresponding to time t (in seconds) to be used as the thumbnail 155 | :param max_size: tuple of (max_width, max_height) 156 | :param aspect_ratios: single float value or tuple of (min_ratio, max_ratio) 157 | :param max_duration: maximum video duration in seconds 158 | :param save_path: optional output video file path 159 | :param skip_reencoding: if set to True, the file will not be re-encoded 160 | if there are no modifications required. Default: False. 161 | :param kwargs: 162 | - **min_size**: tuple of (min_width, min_height) 163 | - **progress_bar**: bool flag to show/hide progress bar 164 | - **save_only**: bool flag to return only the path to the saved video file. Requires save_path be set. 165 | - **preset**: Sets the time that FFMPEG will spend optimizing the compression. 166 | Choices are: ultrafast, superfast, veryfast, faster, fast, medium, 167 | slow, slower, veryslow, placebo. Note that this does not impact 168 | the quality of the video, only the size of the video file. So 169 | choose ultrafast when you are in a hurry and file size does not matter. 170 | :return: 171 | """ 172 | from moviepy.video.io.VideoFileClip import VideoFileClip 173 | from moviepy.video.fx.all import resize, crop 174 | 175 | min_size = kwargs.pop('min_size', (612, 320)) 176 | logger = 'bar' if kwargs.pop('progress_bar', None) else None 177 | save_only = kwargs.pop('save_only', False) 178 | preset = kwargs.pop('preset', 'medium') 179 | if save_only and not save_path: 180 | raise ValueError('"save_path" cannot be empty.') 181 | if save_path: 182 | if not save_path.lower().endswith('.mp4'): 183 | raise ValueError('You must specify a .mp4 save path') 184 | 185 | vid_is_modified = False # flag to track if re-encoding can be skipped 186 | 187 | temp_video_file = tempfile.NamedTemporaryFile(prefix='ipae_', suffix='.mp4', delete=False) 188 | 189 | if is_remote(vid): 190 | # Download remote file 191 | res = requests.get(vid) 192 | temp_video_file.write(res.content) 193 | video_src_filename = temp_video_file.name 194 | else: 195 | shutil.copyfile(vid, temp_video_file.name) 196 | video_src_filename = vid 197 | 198 | # Ref: https://github.com/Zulko/moviepy/issues/833#issuecomment-537885162 199 | with VideoFileClip(temp_video_file.name) as vidclip: 200 | 201 | if vidclip.duration < 3 * 1.0: 202 | raise ValueError('Duration is too short') 203 | 204 | if vidclip.duration > max_duration * 1.0: 205 | vidclip = vidclip.subclip(0, max_duration) 206 | vid_is_modified = True 207 | 208 | if thumbnail_frame_ts > vidclip.duration: 209 | raise ValueError('Invalid thumbnail frame') 210 | 211 | if aspect_ratios: 212 | crop_box = calc_crop(aspect_ratios, vidclip.size) 213 | if crop_box: 214 | vidclip = crop(vidclip, x1=crop_box[0], y1=crop_box[1], x2=crop_box[2], y2=crop_box[3]) 215 | vid_is_modified = True 216 | 217 | if max_size or min_size: 218 | new_size = calc_resize(max_size, vidclip.size, min_size=min_size) 219 | if new_size: 220 | vidclip = resize(vidclip, newsize=new_size) 221 | vid_is_modified = True 222 | 223 | temp_vid_output_file = tempfile.NamedTemporaryFile(prefix='ipae_', suffix='.mp4', delete=False) 224 | if vid_is_modified or not skip_reencoding: 225 | # write out 226 | vidclip.write_videofile( 227 | temp_vid_output_file.name, codec='libx264', audio=True, audio_codec='aac', 228 | verbose=False, logger=logger, preset=preset, remove_temp=True) 229 | else: 230 | # no reencoding 231 | shutil.copyfile(video_src_filename, temp_vid_output_file.name) 232 | 233 | if save_path: 234 | shutil.copyfile(temp_vid_output_file.name, save_path) 235 | 236 | # Temp thumbnail img filename 237 | temp_thumbnail_file = tempfile.NamedTemporaryFile(prefix='ipae_', suffix='.jpg', delete=False) 238 | vidclip.save_frame(temp_thumbnail_file.name, t=thumbnail_frame_ts) 239 | 240 | video_duration = vidclip.duration 241 | video_size = vidclip.size 242 | 243 | video_thumbnail_content = temp_thumbnail_file.read() 244 | 245 | if not save_only: 246 | video_content_len = os.path.getsize(temp_vid_output_file.name) 247 | video_content = temp_vid_output_file.read() 248 | else: 249 | video_content_len = os.path.getsize(save_path) 250 | video_content = save_path # return the file path instead 251 | 252 | if video_content_len > 50 * 1024 * 1000: 253 | raise ValueError('Video file is too big.') 254 | 255 | return video_content, video_size, video_duration, video_thumbnail_content 256 | 257 | 258 | if __name__ == '__main__': # pragma: no cover 259 | # pylint: disable-all 260 | import argparse 261 | 262 | parser = argparse.ArgumentParser(description='Demo media.py') 263 | parser.add_argument('-i', '--image', dest='image', type=str) 264 | parser.add_argument('-v', '--video', dest='video', type=str) 265 | parser.add_argument('-video-story', dest='videostory', type=str) 266 | 267 | args = parser.parse_args() 268 | 269 | if args.image: 270 | photo_data, size = prepare_image(args.image, max_size=(1000, 800), aspect_ratios=0.9) 271 | print('Image dimensions: {0:d}x{1:d}'.format(size[0], size[1])) 272 | 273 | def print_vid_info(video_data, size, duration, thumbnail_data): 274 | print( 275 | 'vid file size: {0:d}, thumbnail file size: {1:d}, , ' 276 | 'vid dimensions: {2:d}x{3:d}, duration: {4:f}'.format( 277 | len(video_data), len(thumbnail_data), size[0], size[1], duration)) 278 | 279 | if args.video: 280 | print('Example 1: Resize video to aspect ratio 1, duration 10s') 281 | video_data, size, duration, thumbnail_data = prepare_video( 282 | args.video, aspect_ratios=1.0, max_duration=10, 283 | save_path='example1.mp4') 284 | print_vid_info(video_data, size, duration, thumbnail_data) 285 | 286 | print('Example 2: Resize video to no greater than 480x480') 287 | video_data, size, duration, thumbnail_data = prepare_video( 288 | args.video, thumbnail_frame_ts=2.0, max_size=(480, 480)) 289 | print_vid_info(video_data, size, duration, thumbnail_data) 290 | 291 | print('Example 3: Leave video intact and speed up retrieval') 292 | video_data, size, duration, thumbnail_data = prepare_video( 293 | args.video, max_size=None, skip_reencoding=True) 294 | print_vid_info(video_data, size, duration, thumbnail_data) 295 | 296 | if args.videostory: 297 | print('Generate a video suitable for posting as a story') 298 | video_data, size, duration, thumbnail_data = prepare_video( 299 | args.videostory, aspect_ratios=(3.0/4), max_duration=14.9, 300 | min_size=(612, 612), max_size=(1080, 1080), save_path='story.mp4') 301 | print_vid_info(video_data, size, duration, thumbnail_data) 302 | -------------------------------------------------------------------------------- /instagram_private_api_extensions/live.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 https://github.com/ping 2 | # 3 | # This software is released under the MIT License. 4 | # https://opensource.org/licenses/MIT 5 | 6 | import argparse 7 | import logging 8 | import os 9 | import time 10 | import re 11 | import hashlib 12 | import xml.etree.ElementTree 13 | import threading 14 | import shutil 15 | import subprocess 16 | 17 | import requests 18 | try: 19 | from .compat import compat_urlparse 20 | except ValueError: 21 | # pragma: no cover 22 | # To allow running in terminal 23 | from compat import compat_urlparse 24 | 25 | 26 | logger = logging.getLogger(__file__) 27 | 28 | 29 | MPD_NAMESPACE = {'mpd': 'urn:mpeg:dash:schema:mpd:2011'} 30 | 31 | 32 | class Downloader(object): 33 | """Downloads and assembles a given IG live stream""" 34 | 35 | USER_AGENT = 'Instagram 10.26.0 (iPhone8,1; iOS 10_2; en_US; en-US; ' \ 36 | 'scale=2.00; gamut=normal; 750x1334) AppleWebKit/420+' 37 | MPD_DOWNLOAD_TIMEOUT = 2 38 | DOWNLOAD_TIMEOUT = 15 39 | DUPLICATE_ETAG_RETRY = 30 40 | MAX_CONNECTION_ERROR_RETRY = 10 41 | SLEEP_INTERVAL_BEFORE_RETRY = 5 42 | 43 | def __init__(self, mpd, output_dir, callback_check=None, singlethreaded=False, user_agent=None, **kwargs): 44 | """ 45 | 46 | :param mpd: URL to mpd 47 | :param output_dir: folder to store the downloaded files 48 | :param callback_check: callback function that can be used to check 49 | on stream status if the downloader cannot be sure that the stream 50 | is over 51 | :param singlethreaded: flag to force single threaded downloads. 52 | Not advisable since this increases the probability of lost segments. 53 | :return: 54 | """ 55 | self.mpd = mpd 56 | self.output_dir = output_dir 57 | if not os.path.exists(self.output_dir): 58 | os.makedirs(self.output_dir) 59 | 60 | self.threads = [] 61 | self.downloaders = {} 62 | self.last_etag = '' 63 | self.duplicate_etag_count = 0 64 | self.callback = callback_check 65 | self.is_aborted = False 66 | self.singlethreaded = singlethreaded 67 | self.stream_id = '' 68 | self.segment_meta = {} 69 | self.user_agent = user_agent or self.USER_AGENT 70 | self.mpd_download_timeout = kwargs.pop('mpd_download_timeout', None) or self.MPD_DOWNLOAD_TIMEOUT 71 | self.download_timeout = kwargs.pop('download_timeout', None) or self.DOWNLOAD_TIMEOUT 72 | self.duplicate_etag_retry = kwargs.pop('duplicate_etag_retry', None) or self.DUPLICATE_ETAG_RETRY 73 | self.max_connection_error_retry = (kwargs.pop('max_connection_error_retry', None) 74 | or self.MAX_CONNECTION_ERROR_RETRY) 75 | self.sleep_interval_before_retry = (kwargs.pop('sleep_interval_before_retry', None) 76 | or self.SLEEP_INTERVAL_BEFORE_RETRY) 77 | 78 | session = requests.Session() 79 | adapter = requests.adapters.HTTPAdapter(max_retries=2, pool_maxsize=25) 80 | session.mount('http://', adapter) 81 | session.mount('https://', adapter) 82 | self.session = session 83 | 84 | # to store the duration of the initial buffered sgements available 85 | self.initial_buffered_duration = 0.0 86 | 87 | # custom ffmpeg binary path, fallback to ffmpeg_binary path in env if available 88 | self.ffmpeg_binary = kwargs.pop('ffmpeg_binary', None) or os.getenv('FFMPEG_BINARY', 'ffmpeg') 89 | 90 | def _store_segment_meta(self, segment, representation): 91 | if segment not in self.segment_meta: 92 | self.segment_meta[segment] = representation 93 | 94 | def run(self): 95 | """Begin downloading""" 96 | connection_retries_count = 0 97 | while not self.is_aborted: 98 | try: 99 | mpd, wait = self._download_mpd() 100 | connection_retries_count = 0 # reset count 101 | 102 | if not self.duplicate_etag_count: 103 | self._process_mpd(mpd) 104 | else: 105 | logger.debug('Skip mpd processing: {0:d} - {1!s}'.format( 106 | self.duplicate_etag_count, self.last_etag)) 107 | if wait: 108 | logger.debug('Sleeping for {0:d}s'.format(wait)) 109 | time.sleep(wait) 110 | 111 | except requests.HTTPError as e: 112 | err_msg = 'HTTPError downloading {0!s}: {1!s}.'.format(self.mpd, e) 113 | if e.response is not None and \ 114 | (e.response.status_code >= 500 or e.response.status_code == 404): 115 | # 505 - temporal server problem 116 | # 404 - seems to indicate that stream is starting but not ready 117 | # 403 - stream is too long gone 118 | connection_retries_count += 1 119 | if connection_retries_count <= self.max_connection_error_retry: 120 | logger.warning(err_msg) 121 | time.sleep(self.sleep_interval_before_retry) 122 | else: 123 | logger.error(err_msg) 124 | self.is_aborted = True 125 | else: 126 | logger.error(err_msg) 127 | self.is_aborted = True 128 | except requests.ConnectionError as e: 129 | # transient error maybe? 130 | connection_retries_count += 1 131 | if connection_retries_count <= self.max_connection_error_retry: 132 | logger.warning('ConnectionError downloading {0!s}: {1!s}. Retrying...'.format(self.mpd, e)) 133 | else: 134 | logger.error('ConnectionError downloading {0!s}: {1!s}.'.format(self.mpd, e)) 135 | self.is_aborted = True 136 | 137 | self.stop() 138 | 139 | def stop(self): 140 | """ 141 | This is usually called automatically by the downloader but if the download process is 142 | interrupted unexpectedly, e.g. KeyboardInterrupt, you should call this method to gracefully 143 | close off the download. 144 | 145 | :return: 146 | """ 147 | self.is_aborted = True 148 | if not self.singlethreaded: 149 | logger.debug('Stopping download threads...') 150 | threads = self.downloaders.values() 151 | logger.debug('{0:d} of {1:d} threads are alive'.format( 152 | len([t for t in threads if t and t.is_alive()]), 153 | len(threads))) 154 | [t.join() for t in threads if t and t.is_alive()] 155 | 156 | def _download_mpd(self): 157 | """Downloads the mpd stream info and returns the xml object.""" 158 | logger.debug('Requesting {0!s}'.format(self.mpd)) 159 | res = self.session.get(self.mpd, headers={ 160 | 'User-Agent': self.user_agent, 161 | 'Accept': '*/*', 162 | }, timeout=self.mpd_download_timeout) 163 | res.raise_for_status() 164 | 165 | # IG used to send this header when the broadcast ended. 166 | # Leaving it in in case it returns. 167 | broadcast_ended = res.headers.get('X-FB-Video-Broadcast-Ended', '') 168 | # Use the cache-control header as indicator that stream has ended 169 | cache_control = res.headers.get('Cache-Control', '') 170 | mobj = re.match(r'max\-age=(?P[0-9]+)', cache_control) 171 | if mobj: 172 | max_age = int(mobj.group('age')) 173 | else: 174 | max_age = 0 175 | 176 | # Use ETag to detect if the same mpd is received repeatedly 177 | # if missing, use contents hash as psuedo etag 178 | etag = res.headers.get('ETag') or hashlib.md5(res.content).hexdigest() 179 | if etag != self.last_etag: 180 | self.last_etag = etag 181 | self.duplicate_etag_count = 0 182 | else: 183 | self.duplicate_etag_count += 1 184 | 185 | if broadcast_ended: 186 | logger.debug('Found X-FB-Video-Broadcast-Ended header: {0!s}'.format(broadcast_ended)) 187 | logger.info('Stream ended.') 188 | self.is_aborted = True 189 | elif max_age > 1: 190 | logger.info('Stream ended (cache-control: {0!s}).'.format(cache_control)) 191 | self.is_aborted = True 192 | else: 193 | # Periodically check callback if duplicate etag is detected 194 | if self.duplicate_etag_count and (self.duplicate_etag_count % 5 == 0): 195 | logger.warning('Duplicate etag {0!s} detected {1:d} time(s)'.format( 196 | etag, self.duplicate_etag_count)) 197 | if self.callback: 198 | callback = self.callback 199 | try: 200 | abort = callback() 201 | if abort: 202 | logger.debug('Callback returned True') 203 | self.is_aborted = True 204 | except Exception as e: # pylint: disable=broad-except 205 | logger.warning('Error from callback: {0!s}'.format(str(e))) 206 | # Final hard abort 207 | elif self.duplicate_etag_count >= self.duplicate_etag_retry: 208 | logger.info('Stream likely ended (duplicate etag/hash detected).') 209 | self.is_aborted = True 210 | 211 | xml.etree.ElementTree.register_namespace('', MPD_NAMESPACE['mpd']) 212 | mpd = xml.etree.ElementTree.fromstring(res.text) 213 | minimum_update_period = mpd.attrib.get('minimumUpdatePeriod', '') 214 | mobj = re.match('PT(?P[0-9]+)S', minimum_update_period) 215 | if mobj: 216 | after = int(mobj.group('secs')) 217 | else: 218 | after = 1 219 | return mpd, after 220 | 221 | def _process_mpd(self, mpd): 222 | periods = mpd.findall('mpd:Period', MPD_NAMESPACE) 223 | logger.debug('Found {0:d} period(s)'.format(len(periods))) 224 | # Aaccording to specs, multiple periods are allow but IG only sends one usually 225 | for period in periods: 226 | logger.debug('Processing period {0!s}'.format(period.attrib.get('id'))) 227 | for adaptation_set in period.findall('mpd:AdaptationSet', MPD_NAMESPACE): 228 | representations = adaptation_set.findall('mpd:Representation', MPD_NAMESPACE) 229 | # sort representations by quality and pick best one 230 | representations = sorted( 231 | representations, 232 | key=lambda rep: ( 233 | (int(rep.attrib.get('width', '0')) * int(rep.attrib.get('height', '0'))) or 234 | int(rep.attrib.get('bandwidth', '0')) or 235 | rep.attrib.get('FBQualityLabel') or 236 | int(rep.attrib.get('audioSamplingRate', '0'))), 237 | reverse=True) 238 | representation = representations[0] 239 | representation_id = representation.attrib.get('id', '') 240 | logger.debug( 241 | 'Selected representation with id {0!s} out of {1!s}'.format( 242 | representation_id, 243 | ' / '.join([r.attrib.get('id', '') for r in representations]) 244 | )) 245 | 246 | representation_label = '' 247 | # only store segments meta for video 248 | if 'video' in representation.attrib.get('mimeType', ''): 249 | if representation.attrib.get('FBQualityLabel'): 250 | representation_label = representation.attrib.get('FBQualityLabel') 251 | elif representation.attrib.get('width') and representation.attrib.get('height'): 252 | representation_label = '{0!s}x{1!s}'.format( 253 | representation.attrib.get('width'), 254 | representation.attrib.get('height')) 255 | elif representation_id: 256 | representation_label = representation_id 257 | 258 | segment_template = representation.find('mpd:SegmentTemplate', MPD_NAMESPACE) 259 | 260 | init_segment = segment_template.attrib.get('initialization') 261 | media_name = segment_template.attrib.get('media') 262 | timescale = int(segment_template.attrib.get('timescale')) 263 | 264 | # store stream ID 265 | if not self.stream_id: 266 | mobj = re.search(r'\b(?P[0-9_]+)\-init', init_segment) 267 | if mobj: 268 | self.stream_id = mobj.group('id') 269 | 270 | # download timeline segments 271 | segment_timeline = segment_template.find('mpd:SegmentTimeline', MPD_NAMESPACE) 272 | segments = segment_timeline.findall('mpd:S', MPD_NAMESPACE) 273 | 274 | buffered_duration = 0 275 | for i, seg in enumerate(segments): 276 | buffered_duration += int(seg.attrib.get('d')) 277 | seg_filename = media_name.replace( 278 | '$Time$', seg.attrib.get('t')).replace('$RepresentationID$', representation_id) 279 | segment_url = compat_urlparse.urljoin(self.mpd, seg_filename) 280 | 281 | if representation_label: 282 | self._store_segment_meta( 283 | os.path.basename(compat_urlparse.urlparse(seg_filename).path), representation_label) 284 | 285 | # Append init chunk to first segment in the timeline for now 286 | # Not sure if it's needed for every segment yet 287 | init_chunk = None 288 | if i == 0: 289 | # download init segment 290 | init_segment_url = compat_urlparse.urljoin(self.mpd, init_segment) 291 | init_chunk = self._download( 292 | init_segment_url, None, timeout=self.mpd_download_timeout) 293 | 294 | self._extract( 295 | os.path.basename(seg_filename), 296 | segment_url, 297 | os.path.join( 298 | self.output_dir, 299 | os.path.basename( 300 | compat_urlparse.urlparse(seg_filename).path) 301 | ), 302 | init_chunk=init_chunk) 303 | 304 | if not self.initial_buffered_duration: 305 | self.initial_buffered_duration = float(buffered_duration) / timescale 306 | logger.debug('Initial buffered duration: {0!s}'.format(self.initial_buffered_duration)) 307 | 308 | def _extract(self, identifier, target, output, init_chunk=None): 309 | if identifier in self.downloaders: 310 | logger.debug('Already downloading {0!s}'.format(identifier)) 311 | return 312 | logger.debug('Requesting {0!s}'.format(target)) 313 | if self.singlethreaded: 314 | self._download(target, output, init_chunk=init_chunk) 315 | else: 316 | # push each download into it's own thread 317 | t = threading.Thread( 318 | target=self._download, name=identifier, 319 | kwargs={'target': target, 'output': output, 'init_chunk': init_chunk}) 320 | t.start() 321 | self.downloaders[identifier] = t 322 | 323 | def _download(self, target, output, timeout=None, init_chunk=None): 324 | retry_attempts = self.max_connection_error_retry + 1 325 | for i in range(1, retry_attempts + 1): 326 | try: 327 | res = self.session.get(target, headers={ 328 | 'User-Agent': self.user_agent, 329 | 'Accept': '*/*', 330 | }, timeout=timeout or self.download_timeout) 331 | res.raise_for_status() 332 | 333 | if not output: 334 | return res.content 335 | 336 | with open(output, 'wb') as f: 337 | if init_chunk: 338 | # prepend init chunk 339 | logger.debug('Appended chunk len {0:d} to {1!s}'.format( 340 | len(init_chunk), output)) 341 | f.write(init_chunk) 342 | f.write(res.content) 343 | return 344 | except (requests.HTTPError, requests.ConnectionError) as e: 345 | if isinstance(e, requests.HTTPError): 346 | err_msg = 'HTTPError {0:d} {1!s}: {2!s}.'.format(e.response.status_code, target, e) 347 | else: 348 | err_msg = 'ConnectionError {0!s}: {1!s}'.format(target, e) 349 | if i < retry_attempts: 350 | logger.warning('{0!s}. Retrying... '.format(err_msg)) 351 | else: 352 | logger.error(err_msg) 353 | 354 | @staticmethod 355 | def _get_file_index(filename): 356 | """ Extract the numbered index in filename for sorting """ 357 | mobj = re.match(r'.+\-(?P[0-9]+)\.[a-z]+', filename) 358 | if mobj: 359 | return int(mobj.group('idx')) 360 | return -1 361 | 362 | def stitch(self, output_filename, 363 | skipffmpeg=False, 364 | cleartempfiles=True): 365 | """ 366 | Combines all the dowloaded stream segments into the final mp4 file. 367 | 368 | :param output_filename: Output file path 369 | :param skipffmpeg: bool flag to not use ffmpeg to join audio and video file into final mp4 370 | :param cleartempfiles: bool flag to remove downloaded and temp files 371 | """ 372 | if not self.stream_id: 373 | raise ValueError('No stream ID found.') 374 | 375 | has_ffmpeg_error = False 376 | files_generated = [] 377 | 378 | all_segments = sorted( 379 | self.segment_meta.keys(), 380 | key=lambda x: self._get_file_index(x)) # pylint: disable=unnecessary-lambda 381 | prev_res = '' 382 | sources = [] 383 | audio_stream_format = 'source_{0}_{1}_m4a.tmp' 384 | video_stream_format = 'source_{0}_{1}_m4v.tmp' 385 | video_stream = '' 386 | audio_stream = '' 387 | 388 | # Iterate through all the segments and generate a pair of source files 389 | # for each time a resolution change is detected 390 | for segment in all_segments: 391 | 392 | video_stream = os.path.join( 393 | self.output_dir, video_stream_format.format(self.stream_id, len(sources))) 394 | audio_stream = os.path.join( 395 | self.output_dir, audio_stream_format.format(self.stream_id, len(sources))) 396 | 397 | if not os.path.isfile(os.path.join(self.output_dir, segment)): 398 | logger.warning('Segment not found: {0!s}'.format(segment)) 399 | continue 400 | 401 | if not os.path.isfile(os.path.join(self.output_dir, segment.replace('.m4v', '.m4a'))): 402 | logger.warning('Segment not found: {0!s}'.format(segment.replace('.m4v', '.m4a'))) 403 | continue 404 | 405 | if prev_res and prev_res != self.segment_meta[segment]: 406 | # resolution change detected 407 | # push current generated file pair into sources 408 | sources.append({'video': video_stream, 'audio': audio_stream}) 409 | video_stream = os.path.join( 410 | self.output_dir, video_stream_format.format(self.stream_id, len(sources))) 411 | audio_stream = os.path.join( 412 | self.output_dir, audio_stream_format.format(self.stream_id, len(sources))) 413 | 414 | prev_res = self.segment_meta[segment] 415 | file_mode = 'ab' if os.path.exists(video_stream) else 'wb' 416 | seg_file = os.path.join(self.output_dir, segment) 417 | 418 | with open(video_stream, file_mode) as outfile,\ 419 | open(seg_file, 'rb') as readfile: 420 | shutil.copyfileobj(readfile, outfile) 421 | logger.debug( 422 | 'Assembling video stream {0!s} => {1!s}'.format(segment, video_stream)) 423 | 424 | with open(audio_stream, file_mode) as outfile,\ 425 | open(seg_file.replace('.m4v', '.m4a'), 'rb') as readfile: 426 | shutil.copyfileobj(readfile, outfile) 427 | logger.debug( 428 | 'Assembling audio stream {0!s} => {1!s}'.format(segment, audio_stream)) 429 | 430 | if audio_stream and video_stream: 431 | # push last pair into source 432 | sources.append({'video': video_stream, 'audio': audio_stream}) 433 | 434 | if len(sources) > 1: 435 | logger.warning( 436 | 'Stream has sections with different resolutions.\n' 437 | '{0:d} mp4 files will be generated in total.'.format(len(sources))) 438 | 439 | if not skipffmpeg: 440 | for n, source in enumerate(sources): 441 | 442 | if len(sources) == 1: 443 | # use supplied output filename as-is if it's the only one 444 | generated_filename = output_filename 445 | else: 446 | # Generate a new filename by appending n+1 447 | # to the original specified output filename 448 | # so that it looks like output-1.mp4, output-2.mp4, etc 449 | dir_name = os.path.dirname(output_filename) 450 | file_name = os.path.basename(output_filename) 451 | dot_pos = file_name.rfind('.') 452 | if dot_pos >= 0: 453 | filename_no_ext = file_name[0:dot_pos] 454 | ext = file_name[dot_pos:] 455 | else: 456 | filename_no_ext = file_name 457 | ext = '' 458 | generated_filename = os.path.join( 459 | dir_name, '{0!s}-{1:d}{2!s}'.format(filename_no_ext, n + 1, ext)) 460 | 461 | ffmpeg_loglevel = 'error' 462 | if logger.level == logging.DEBUG: 463 | ffmpeg_loglevel = 'warning' 464 | cmd = [ 465 | self.ffmpeg_binary, '-y', 466 | '-loglevel', ffmpeg_loglevel, 467 | '-i', source['audio'], 468 | '-i', source['video'], 469 | '-c:v', 'copy', 470 | '-c:a', 'copy', 471 | generated_filename] 472 | exit_code = subprocess.call(cmd) 473 | 474 | if exit_code: 475 | logger.error('ffmpeg exited with the code: {0!s}'.format(exit_code)) 476 | logger.error('Command: {0!s}'.format(' '.join(cmd))) 477 | has_ffmpeg_error = True 478 | else: 479 | files_generated.append(generated_filename) 480 | if cleartempfiles and not skipffmpeg: 481 | # Don't del source*.tmp files if not using ffmpeg 482 | # so that user can still use the source* files with another 483 | # tool such as avconv 484 | for f in (source['audio'], source['video']): 485 | try: 486 | os.remove(f) 487 | except (IOError, OSError) as ioe: 488 | logger.warning('Error removing {0!s}: {1!s}'.format(f, str(ioe))) 489 | 490 | if cleartempfiles and not has_ffmpeg_error: 491 | # Specifically only remove this stream's segment files 492 | for seg in all_segments: 493 | for f in (seg, seg.replace('.m4v', '.m4a')): 494 | try: 495 | os.remove(os.path.join(self.output_dir, f)) 496 | except (IOError, OSError) as ioe: 497 | logger.warning('Error removing {0!s}: {1!s}'.format(f, str(ioe))) 498 | 499 | return files_generated 500 | 501 | 502 | if __name__ == '__main__': # pragma: no cover 503 | import json 504 | 505 | # Example of how to init and start the Downloader 506 | parser = argparse.ArgumentParser() 507 | parser.add_argument('mpd') 508 | parser.add_argument('-v', action='store_true', help='Verbose') 509 | parser.add_argument('-s', metavar='OUTPUT_FILENAME', 510 | help='Output filename') 511 | parser.add_argument('-o', metavar='DOWLOAD_DIR', 512 | default='output/', help='Download folder') 513 | parser.add_argument('-c', action='store_true', help='Clear temp files') 514 | args = parser.parse_args() 515 | 516 | if args.v: 517 | logger.setLevel(logging.DEBUG) 518 | else: 519 | logger.setLevel(logging.INFO) 520 | 521 | logging.basicConfig(level=logger.level) 522 | 523 | dl = Downloader(mpd=args.mpd, output_dir=args.o) 524 | try: 525 | dl.run() 526 | except KeyboardInterrupt: 527 | logger.info('Interrupted') 528 | if not dl.is_aborted: 529 | dl.stop() 530 | finally: 531 | if args.s: 532 | with open('segment_meta.json', 'w') as metafile: 533 | json.dump(dl.segment_meta, metafile, indent=2) 534 | output_files = dl.stitch(args.s, cleartempfiles=args.c) 535 | print('Generated: {0!s}'.format(' '.join(output_files))) 536 | --------------------------------------------------------------------------------