├── .gitignore
├── LICENSE
├── README.md
├── instagram.py
└── main.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 dundua
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # IG-Downloader
 2 | Bulk download Instagram stories
 3 | 
 4 | This script will download stories of followed users posted in the past 24 hours. A logged in Instagram account is required to use this.
 5 | 
 6 | # Usage
 7 |     python3 main.py
 8 | 
 9 | On first run, a config file will be generated. The script will ask for some web browser cookie values of a currently logged in user, which can be obtained from the developer tools of Google Chrome or Firefox.
10 | 
11 | Note that the cookies may need to be periodically refreshed at least every 3 months or else this script may not be able to sucessfully authenticate. 
12 | 
13 | To periodically obtain stories from followed users, run this script at least every 24 hours. A Windows Scheduled Task or a Unix cron job is recommended to perform this automatically.
14 | 
15 | # Special Thanks
16 | - https://github.com/CaliAlec/ChromeIGStory
17 | - https://github.com/mgp25/Instagram-API
18 | 
19 | # License
20 | MIT
21 | 
22 | # Legal Disclaimer
23 | This project is in no way affiliated with, authorized, maintained, sponsored or endorsed by Instagram or any of its affiliates or subsidiaries. This is an independent project that utilizes Instagram's unofficial API. Use at your own risk.
24 | 


--------------------------------------------------------------------------------
/instagram.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from requests.adapters import HTTPAdapter
  3 | from requests.packages.urllib3.util.retry import Retry
  4 | import logging
  5 | import os
  6 | from datetime import datetime
  7 | try:
  8 |     import defusedxml.minidom as xml
  9 | except ImportError:
 10 |     import xml.dom.minidom as xml
 11 | 
 12 | class instagram:
 13 |     def __init__(self, cookie):
 14 |         """This sets up this class to communicate with Instagram.
 15 | 
 16 |         Args:
 17 |             cookie: A dictionary object with the required cookie values (ds_user_id, sessionid, csrftoken).
 18 |         """
 19 |         self.userid = cookie["ds_user_id"]
 20 |         self.sessionid = cookie["sessionid"]
 21 |         self.csrftoken = cookie["csrftoken"]
 22 |         self.mid = cookie["mid"]
 23 |         self.headers = {
 24 |             "accept"           : "*/*",
 25 |             "accept-encoding"  : "gzip, deflate",
 26 |             "accept-language"  : "en-US",
 27 |             "content_type"     : "application/x-www-form-urlencoded; charset=UTF-8",
 28 |             "cache-control"    : "no-cache",
 29 |             "cookie"           : "ds_user_id=" + self.userid + "; sessionid=" + self.sessionid + "; csrftoken=" + self.csrftoken + "; mid=" + self.mid,
 30 |             "dnt"              : "1",
 31 |             # "pragma" : "no-cache",
 32 |             # "referer" : "https://www.instagram.com/",
 33 |             "user-agent"       : "Instagram 10.26.0 (iPhone7,2; iOS 10_1_1; en_US; en-US; scale=2.00; gamut=normal; 750x1334) AppleWebKit/420+",
 34 |             "x-ig-capabilities": "36oD",
 35 |             # "x-ig-connection-type" : "WIFI",
 36 |             # "x-ig-fb-http-engine" : "Liger"
 37 |         }
 38 |         self.session = requests.Session()
 39 |         max_tries = 3
 40 |         backoff_factor = 0.2
 41 |         status_forcelist = (500, 502, 503, 504)
 42 |         retry = Retry(total=max_tries, read=max_tries, connect=max_tries, backoff_factor=backoff_factor, status_forcelist=status_forcelist)
 43 |         adapter = HTTPAdapter(max_retries=retry)
 44 |         self.session.mount('http://', adapter)
 45 |         self.session.mount('https://', adapter)
 46 |         self.session.headers = self.headers
 47 | 
 48 |     def getReelTray(self):
 49 |         """Get reel tray from API.
 50 | 
 51 |         Returns:
 52 |             Response object with reel tray API response
 53 |         """
 54 |         endpoint = "https://i.instagram.com/api/v1/feed/reels_tray/"
 55 |         response = self.session.get(endpoint, timeout=60)
 56 |         if response.status_code != requests.codes.ok:
 57 |             logging.error("Status Code Error." + str(response.status_code))
 58 |             response.raise_for_status()
 59 |         return response
 60 | 
 61 |     def getReelMedia(self, user):
 62 |         """Get reel media of a user from API.
 63 | 
 64 |         Args:
 65 |             user: User ID
 66 | 
 67 |         Returns:
 68 |             Response object with reel media API response
 69 |         """
 70 |         endpoint = "https://i.instagram.com/api/v1/feed/user/" + str(user) + "/reel_media/"
 71 |         response = self.session.get(endpoint, timeout=60)
 72 |         if response.status_code != requests.codes.ok:
 73 |             logging.error("Status Code Error." + str(response.status_code))
 74 |             response.raise_for_status()
 75 |         return response
 76 | 
 77 |     def getStories(self):
 78 |         return self.getReelTray()
 79 | 
 80 |     def getUserStories(self, user):
 81 |         return self.getReelMedia(user)
 82 | 
 83 |     def getUserIDs(self, json: dict) -> list:
 84 |         """Extract user IDs from reel tray JSON.
 85 | 
 86 |         Args:
 87 |             json: Reel tray response from IG
 88 | 
 89 |         Returns:
 90 |             List of user IDs
 91 |         """
 92 |         users = []
 93 |         for user in json['tray']:
 94 |             users.append(user['user']['pk'])
 95 |         return users
 96 | 
 97 |     def getFile(self, url: str, dest: str):
 98 |         """Download file and save to destination
 99 | 
100 |         Args:
101 |             url: URL of item to download
102 |             dest: File system destination to save item to
103 | 
104 |         Returns:
105 |             None
106 |         """
107 |         logging.debug("URL: %s", url)
108 |         logging.debug("Dest: %s", dest)
109 |         try:
110 |             if os.path.getsize(dest) == 0:
111 |                 logging.info("Empty file exists. Removing.")
112 |                 os.remove(dest)
113 |         except FileNotFoundError:
114 |             pass
115 | 
116 |         try:
117 |             dirpath = os.path.dirname(dest)
118 |             os.makedirs(dirpath, exist_ok=True)
119 |             with open(dest, "xb") as handle:
120 |                 response = self.session.get(url, stream=True, timeout=60)
121 |                 if response.status_code != requests.codes.ok:
122 |                     logging.error("Status Code Error." + str(response.status_code))
123 |                     response.raise_for_status()
124 |                 for data in response.iter_content(chunk_size=4194304):
125 |                     handle.write(data)
126 |                 handle.close()
127 |         except FileExistsError:
128 |             logging.info("File already exists.")
129 | 
130 |         if os.path.getsize(dest) == 0:
131 |             logging.info("Error downloading. Removing.")
132 |             os.remove(dest)
133 | 
134 |     def formatPath(self, user: str, pk: int, timestamp: int, postid: str, mediatype: int) -> str:
135 |         """Format download path to a specific format/template
136 | 
137 |         Args:
138 |             user: User name
139 |             pk: User ID
140 |             timestamp: UTC Unix timestamp
141 |             postid: Post ID
142 |             mediatype: Media type as defined by IG
143 | 
144 |         Returns:
145 |             None
146 |         """
147 |         dirpath = os.path.dirname(__file__)
148 |         utcdatetime = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d-%H-%M-%S")
149 |         if mediatype == 1:
150 |             ext = ".jpg"
151 |             type = "stories"
152 |         elif mediatype == 2:
153 |             ext = ".mp4"
154 |             type = "stories"
155 |         elif mediatype == 3:
156 |             ext = ".mp4"
157 |             type = "livestories"
158 |         else:
159 |             ext = ""
160 |             type = "other"
161 |         path = os.path.join(dirpath, "downloads", user + "_" + str(pk), type, utcdatetime + "_" + str(timestamp) + "_" + postid + ext)
162 |         return path
163 | 
164 |     def downloadReel(self, resp):
165 |         """Download stories of a followed user's tray.
166 | 
167 |         Download the stories of a followed user.
168 | 
169 |         Args:
170 |             resp: JSON dictionary of reel from IG API
171 | 
172 |         Returns:
173 |             None
174 |         """
175 |         try:
176 |             for index, item in enumerate(resp['items']):
177 |                 logging.debug('    ' + str(index))
178 |                 username = item['user']['username']
179 |                 userpk = item['user']['pk']
180 |                 timestamp = item['taken_at']
181 |                 postid = item['id']
182 |                 mediatype = item['media_type']
183 |                 if mediatype == 2: # Video
184 |                     largest = 0
185 |                     for versionindex, video in enumerate(item['video_versions']):
186 |                         itemsize = video['width'] * video['height']
187 |                         largestsize = item['video_versions'][largest]['width'] * \
188 |                                       item['video_versions'][largest]['height']
189 |                         if itemsize > largestsize:
190 |                             largest = versionindex
191 |                     logging.debug('        V' + str(largest))
192 |                     url = item['video_versions'][largest]['url']
193 |                     logging.debug('            ' + url)
194 |                 elif mediatype == 1: # Image
195 |                     largest = 0
196 |                     for versionindex, image in enumerate(item['image_versions2']['candidates']):
197 |                         itemsize = image['width'] * image['height']
198 |                         largestsize = item['image_versions2']['candidates'][largest]['width'] * \
199 |                                       item['image_versions2']['candidates'][largest]['height']
200 |                         if itemsize > largestsize:
201 |                             largest = versionindex
202 |                     logging.debug('        I' + str(largest))
203 |                     url = item['image_versions2']['candidates'][largest]['url']
204 |                     logging.debug('            ' + url)
205 |                 else: # Unknown
206 |                     logging.debug('        E')
207 |                     url = None
208 |                     pass
209 | 
210 |                 path = self.formatPath(username, userpk, timestamp, postid, mediatype)
211 |                 self.getFile(url, path)
212 |         except KeyError: # JSON 'item' key does not exist for later items in tray as of 6/2/2017
213 |             pass
214 | 
215 |     def downloadTray(self, resp):
216 |         """Download stories of logged in user's tray.
217 | 
218 |         Download the stories as available in the tray. The tray contains a list of
219 |         reels, a collection of the stories posted by a followed user.
220 | 
221 |         The tray only contains a small set of reels of the first few users. To download
222 |         the rest, a reel must be obtained for each user in the tray.
223 | 
224 |         Args:
225 |             resp: JSON dictionary of tray from IG API
226 | 
227 |         Returns:
228 |             None
229 |         """
230 |         for reel in resp['tray']:
231 |             self.downloadReel(reel)
232 | 
233 |     def downloadStoryLive(self, resp):
234 |         """Download post-live stories of a followed user's tray.
235 | 
236 |         Download the post-live stories of a followed user.
237 | 
238 |         Args:
239 |             resp: JSON dictionary of reel from IG API
240 | 
241 |         Returns:
242 |             None
243 |         """
244 |         try:
245 |             for index,item in enumerate(resp["post_live"]["post_live_items"]):
246 |                 logging.debug('    ' + str(index))
247 |                 username = item["user"]["username"]
248 |                 userpk = item["user"]["pk"]
249 |                 for bindex,broadcast in enumerate(item["broadcasts"]):
250 |                     logging.debug('        ' + str(bindex))
251 |                     timestamp = broadcast["published_time"]
252 |                     postid = broadcast["media_id"]
253 |                     dash = broadcast["dash_manifest"]
254 |                     dashxml = xml.parseString(dash)
255 |                     elements = dashxml.getElementsByTagName("BaseURL")
256 |                     for eindex,element in enumerate(elements):
257 |                         for node in element.childNodes:
258 |                             if node.nodeType == node.TEXT_NODE:
259 |                                 url = node.data
260 |                                 mediatype = 3
261 |                                 path = self.formatPath(username, userpk, timestamp, postid + "_" + str(eindex), mediatype)
262 |                                 self.getFile(url, path)
263 |         except KeyError: # No "post_live" key
264 |             logging.debug('    ' + 'No live stories.')
265 | 
266 |     def close(self):
267 |         """Close seesion to IG
268 | 
269 |         Returns:
270 |             None
271 |         """
272 |         self.session.close()
273 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import sys
  4 | import time
  5 | import os
  6 | import instagram
  7 | import tarfile
  8 | 
  9 | def saveJSON(timestamp: int, type: str, content: dict):
 10 |     """Save JSON file
 11 | 
 12 |     Args:
 13 |         timestamp: Unix timestamp
 14 |         type: Name
 15 |         content: JSON data
 16 | 
 17 |     Returns:
 18 |         None
 19 |     """
 20 |     dirpath = os.getcwd()
 21 |     path = os.path.join(dirpath, "json", str(timestamp) + "_" + type + ".json")
 22 |     dirpath = os.path.dirname(path)
 23 |     os.makedirs(dirpath, exist_ok=True)
 24 |     f = open(path, "tx")
 25 |     json.dump(content, f)
 26 |     f.close()
 27 | 
 28 | def main():
 29 |     """ Main function
 30 | 
 31 |     Returns:
 32 |         None
 33 |     """
 34 |     configfile = "config.json"
 35 |     logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
 36 | 
 37 |     try:
 38 |         logging.info("Opening config file.")
 39 |         f = open(configfile, "tr")
 40 |     except FileNotFoundError:
 41 |         logging.info("File not found.")
 42 |         logging.info("Creating new config file.")
 43 |         f = open(configfile, "tx+")
 44 | 
 45 |     try:
 46 |         logging.info("Loading config file.")
 47 |         config = json.load(f)
 48 |     except json.decoder.JSONDecodeError:
 49 |         logging.info("Config file corrupted.")
 50 |         logging.info("Creating new config file.")
 51 |         f.close()
 52 |         f = open(configfile, "tw+")
 53 |         logging.info("Updating config.")
 54 |         iguserid = input("Enter your IG user ID: ")
 55 |         igsessionid = input("Enter your IG session ID: ")
 56 |         igcsrftoken = input("Enter your IG CSRF token: ")
 57 |         igmid = input("Enter your IG mid: ")
 58 |         config = {
 59 |             "ds_user_id": iguserid,
 60 |             "sessionid" : igsessionid,
 61 |             "csrftoken" : igcsrftoken,
 62 |             "mid" : igmid,
 63 |         }
 64 |         logging.info("Saving config.")
 65 |         json.dump(config, f) # Save config
 66 | 
 67 |     #logging.info("Config settings:")
 68 |     #logging.info("%s", config) # Contains private data
 69 |     f.close()
 70 | 
 71 |     # Insert error checking to see if config is valid and works
 72 | 
 73 |     logging.info("Initialize IG interface.")
 74 |     ig = instagram.instagram(config)
 75 |     logging.info("Get story tray.")
 76 |     traytime = int(time.time())
 77 |     storyresp = ig.getStories()
 78 |     storyjson = storyresp.json()
 79 |     saveJSON(traytime, "tray", storyjson)
 80 | 
 81 |     logging.info("Downloading story tray.")
 82 |     ig.downloadTray(storyjson)
 83 |     users = ig.getUserIDs(storyjson)
 84 |     for user in users:
 85 |         reeltime = int(time.time())
 86 |         uresp = ig.getUserStories(user)
 87 |         ujson = uresp.json()
 88 |         saveJSON(reeltime, "reel_" + str(user), ujson)
 89 |         ig.downloadReel(ujson)
 90 | 
 91 |     logging.info("Downloading post-live stories.")
 92 |     ig.downloadStoryLive(storyjson)
 93 | 
 94 |     logging.info("Collecting list of JSON objects.")
 95 |     jsonlist = []
 96 |     for file in os.listdir("json"):
 97 |         if file.endswith(".json"):
 98 |             jsonlist.append(os.path.join("json", file))
 99 | 
100 |     logging.info("Creating tar.xz file of JSON objects.")
101 |     path = os.path.join(os.getcwd(), "json", str(traytime) + ".tar.xz")
102 |     tar = tarfile.open(path, "x:xz")
103 |     for path in jsonlist:
104 |         tar.add(path)
105 |     tar.close()
106 | 
107 |     logging.info("Removing old JSON objects.")
108 |     for path in jsonlist:
109 |         os.remove(path)
110 | 
111 |     logging.info("Done.")
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------