├── .gitignore ├── LICENSE ├── README.md ├── instagram.py └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 dundua 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IG-Downloader 2 | Bulk download Instagram stories 3 | 4 | This script will download stories of followed users posted in the past 24 hours. A logged in Instagram account is required to use this. 5 | 6 | # Usage 7 | python3 main.py 8 | 9 | On first run, a config file will be generated. The script will ask for some web browser cookie values of a currently logged in user, which can be obtained from the developer tools of Google Chrome or Firefox. 10 | 11 | Note that the cookies may need to be periodically refreshed at least every 3 months or else this script may not be able to sucessfully authenticate. 12 | 13 | To periodically obtain stories from followed users, run this script at least every 24 hours. A Windows Scheduled Task or a Unix cron job is recommended to perform this automatically. 14 | 15 | # Special Thanks 16 | - https://github.com/CaliAlec/ChromeIGStory 17 | - https://github.com/mgp25/Instagram-API 18 | 19 | # License 20 | MIT 21 | 22 | # Legal Disclaimer 23 | This project is in no way affiliated with, authorized, maintained, sponsored or endorsed by Instagram or any of its affiliates or subsidiaries. This is an independent project that utilizes Instagram's unofficial API. Use at your own risk. 24 | -------------------------------------------------------------------------------- /instagram.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from requests.adapters import HTTPAdapter 3 | from requests.packages.urllib3.util.retry import Retry 4 | import logging 5 | import os 6 | from datetime import datetime 7 | try: 8 | import defusedxml.minidom as xml 9 | except ImportError: 10 | import xml.dom.minidom as xml 11 | 12 | class instagram: 13 | def __init__(self, cookie): 14 | """This sets up this class to communicate with Instagram. 15 | 16 | Args: 17 | cookie: A dictionary object with the required cookie values (ds_user_id, sessionid, csrftoken). 18 | """ 19 | self.userid = cookie["ds_user_id"] 20 | self.sessionid = cookie["sessionid"] 21 | self.csrftoken = cookie["csrftoken"] 22 | self.mid = cookie["mid"] 23 | self.headers = { 24 | "accept" : "*/*", 25 | "accept-encoding" : "gzip, deflate", 26 | "accept-language" : "en-US", 27 | "content_type" : "application/x-www-form-urlencoded; charset=UTF-8", 28 | "cache-control" : "no-cache", 29 | "cookie" : "ds_user_id=" + self.userid + "; sessionid=" + self.sessionid + "; csrftoken=" + self.csrftoken + "; mid=" + self.mid, 30 | "dnt" : "1", 31 | # "pragma" : "no-cache", 32 | # "referer" : "https://www.instagram.com/", 33 | "user-agent" : "Instagram 10.26.0 (iPhone7,2; iOS 10_1_1; en_US; en-US; scale=2.00; gamut=normal; 750x1334) AppleWebKit/420+", 34 | "x-ig-capabilities": "36oD", 35 | # "x-ig-connection-type" : "WIFI", 36 | # "x-ig-fb-http-engine" : "Liger" 37 | } 38 | self.session = requests.Session() 39 | max_tries = 3 40 | backoff_factor = 0.2 41 | status_forcelist = (500, 502, 503, 504) 42 | retry = Retry(total=max_tries, read=max_tries, connect=max_tries, backoff_factor=backoff_factor, status_forcelist=status_forcelist) 43 | adapter = HTTPAdapter(max_retries=retry) 44 | self.session.mount('http://', adapter) 45 | self.session.mount('https://', adapter) 46 | self.session.headers = self.headers 47 | 48 | def getReelTray(self): 49 | """Get reel tray from API. 50 | 51 | Returns: 52 | Response object with reel tray API response 53 | """ 54 | endpoint = "https://i.instagram.com/api/v1/feed/reels_tray/" 55 | response = self.session.get(endpoint, timeout=60) 56 | if response.status_code != requests.codes.ok: 57 | logging.error("Status Code Error." + str(response.status_code)) 58 | response.raise_for_status() 59 | return response 60 | 61 | def getReelMedia(self, user): 62 | """Get reel media of a user from API. 63 | 64 | Args: 65 | user: User ID 66 | 67 | Returns: 68 | Response object with reel media API response 69 | """ 70 | endpoint = "https://i.instagram.com/api/v1/feed/user/" + str(user) + "/reel_media/" 71 | response = self.session.get(endpoint, timeout=60) 72 | if response.status_code != requests.codes.ok: 73 | logging.error("Status Code Error." + str(response.status_code)) 74 | response.raise_for_status() 75 | return response 76 | 77 | def getStories(self): 78 | return self.getReelTray() 79 | 80 | def getUserStories(self, user): 81 | return self.getReelMedia(user) 82 | 83 | def getUserIDs(self, json: dict) -> list: 84 | """Extract user IDs from reel tray JSON. 85 | 86 | Args: 87 | json: Reel tray response from IG 88 | 89 | Returns: 90 | List of user IDs 91 | """ 92 | users = [] 93 | for user in json['tray']: 94 | users.append(user['user']['pk']) 95 | return users 96 | 97 | def getFile(self, url: str, dest: str): 98 | """Download file and save to destination 99 | 100 | Args: 101 | url: URL of item to download 102 | dest: File system destination to save item to 103 | 104 | Returns: 105 | None 106 | """ 107 | logging.debug("URL: %s", url) 108 | logging.debug("Dest: %s", dest) 109 | try: 110 | if os.path.getsize(dest) == 0: 111 | logging.info("Empty file exists. Removing.") 112 | os.remove(dest) 113 | except FileNotFoundError: 114 | pass 115 | 116 | try: 117 | dirpath = os.path.dirname(dest) 118 | os.makedirs(dirpath, exist_ok=True) 119 | with open(dest, "xb") as handle: 120 | response = self.session.get(url, stream=True, timeout=60) 121 | if response.status_code != requests.codes.ok: 122 | logging.error("Status Code Error." + str(response.status_code)) 123 | response.raise_for_status() 124 | for data in response.iter_content(chunk_size=4194304): 125 | handle.write(data) 126 | handle.close() 127 | except FileExistsError: 128 | logging.info("File already exists.") 129 | 130 | if os.path.getsize(dest) == 0: 131 | logging.info("Error downloading. Removing.") 132 | os.remove(dest) 133 | 134 | def formatPath(self, user: str, pk: int, timestamp: int, postid: str, mediatype: int) -> str: 135 | """Format download path to a specific format/template 136 | 137 | Args: 138 | user: User name 139 | pk: User ID 140 | timestamp: UTC Unix timestamp 141 | postid: Post ID 142 | mediatype: Media type as defined by IG 143 | 144 | Returns: 145 | None 146 | """ 147 | dirpath = os.path.dirname(__file__) 148 | utcdatetime = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d-%H-%M-%S") 149 | if mediatype == 1: 150 | ext = ".jpg" 151 | type = "stories" 152 | elif mediatype == 2: 153 | ext = ".mp4" 154 | type = "stories" 155 | elif mediatype == 3: 156 | ext = ".mp4" 157 | type = "livestories" 158 | else: 159 | ext = "" 160 | type = "other" 161 | path = os.path.join(dirpath, "downloads", user + "_" + str(pk), type, utcdatetime + "_" + str(timestamp) + "_" + postid + ext) 162 | return path 163 | 164 | def downloadReel(self, resp): 165 | """Download stories of a followed user's tray. 166 | 167 | Download the stories of a followed user. 168 | 169 | Args: 170 | resp: JSON dictionary of reel from IG API 171 | 172 | Returns: 173 | None 174 | """ 175 | try: 176 | for index, item in enumerate(resp['items']): 177 | logging.debug(' ' + str(index)) 178 | username = item['user']['username'] 179 | userpk = item['user']['pk'] 180 | timestamp = item['taken_at'] 181 | postid = item['id'] 182 | mediatype = item['media_type'] 183 | if mediatype == 2: # Video 184 | largest = 0 185 | for versionindex, video in enumerate(item['video_versions']): 186 | itemsize = video['width'] * video['height'] 187 | largestsize = item['video_versions'][largest]['width'] * \ 188 | item['video_versions'][largest]['height'] 189 | if itemsize > largestsize: 190 | largest = versionindex 191 | logging.debug(' V' + str(largest)) 192 | url = item['video_versions'][largest]['url'] 193 | logging.debug(' ' + url) 194 | elif mediatype == 1: # Image 195 | largest = 0 196 | for versionindex, image in enumerate(item['image_versions2']['candidates']): 197 | itemsize = image['width'] * image['height'] 198 | largestsize = item['image_versions2']['candidates'][largest]['width'] * \ 199 | item['image_versions2']['candidates'][largest]['height'] 200 | if itemsize > largestsize: 201 | largest = versionindex 202 | logging.debug(' I' + str(largest)) 203 | url = item['image_versions2']['candidates'][largest]['url'] 204 | logging.debug(' ' + url) 205 | else: # Unknown 206 | logging.debug(' E') 207 | url = None 208 | pass 209 | 210 | path = self.formatPath(username, userpk, timestamp, postid, mediatype) 211 | self.getFile(url, path) 212 | except KeyError: # JSON 'item' key does not exist for later items in tray as of 6/2/2017 213 | pass 214 | 215 | def downloadTray(self, resp): 216 | """Download stories of logged in user's tray. 217 | 218 | Download the stories as available in the tray. The tray contains a list of 219 | reels, a collection of the stories posted by a followed user. 220 | 221 | The tray only contains a small set of reels of the first few users. To download 222 | the rest, a reel must be obtained for each user in the tray. 223 | 224 | Args: 225 | resp: JSON dictionary of tray from IG API 226 | 227 | Returns: 228 | None 229 | """ 230 | for reel in resp['tray']: 231 | self.downloadReel(reel) 232 | 233 | def downloadStoryLive(self, resp): 234 | """Download post-live stories of a followed user's tray. 235 | 236 | Download the post-live stories of a followed user. 237 | 238 | Args: 239 | resp: JSON dictionary of reel from IG API 240 | 241 | Returns: 242 | None 243 | """ 244 | try: 245 | for index,item in enumerate(resp["post_live"]["post_live_items"]): 246 | logging.debug(' ' + str(index)) 247 | username = item["user"]["username"] 248 | userpk = item["user"]["pk"] 249 | for bindex,broadcast in enumerate(item["broadcasts"]): 250 | logging.debug(' ' + str(bindex)) 251 | timestamp = broadcast["published_time"] 252 | postid = broadcast["media_id"] 253 | dash = broadcast["dash_manifest"] 254 | dashxml = xml.parseString(dash) 255 | elements = dashxml.getElementsByTagName("BaseURL") 256 | for eindex,element in enumerate(elements): 257 | for node in element.childNodes: 258 | if node.nodeType == node.TEXT_NODE: 259 | url = node.data 260 | mediatype = 3 261 | path = self.formatPath(username, userpk, timestamp, postid + "_" + str(eindex), mediatype) 262 | self.getFile(url, path) 263 | except KeyError: # No "post_live" key 264 | logging.debug(' ' + 'No live stories.') 265 | 266 | def close(self): 267 | """Close seesion to IG 268 | 269 | Returns: 270 | None 271 | """ 272 | self.session.close() 273 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import sys 4 | import time 5 | import os 6 | import instagram 7 | import tarfile 8 | 9 | def saveJSON(timestamp: int, type: str, content: dict): 10 | """Save JSON file 11 | 12 | Args: 13 | timestamp: Unix timestamp 14 | type: Name 15 | content: JSON data 16 | 17 | Returns: 18 | None 19 | """ 20 | dirpath = os.getcwd() 21 | path = os.path.join(dirpath, "json", str(timestamp) + "_" + type + ".json") 22 | dirpath = os.path.dirname(path) 23 | os.makedirs(dirpath, exist_ok=True) 24 | f = open(path, "tx") 25 | json.dump(content, f) 26 | f.close() 27 | 28 | def main(): 29 | """ Main function 30 | 31 | Returns: 32 | None 33 | """ 34 | configfile = "config.json" 35 | logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) 36 | 37 | try: 38 | logging.info("Opening config file.") 39 | f = open(configfile, "tr") 40 | except FileNotFoundError: 41 | logging.info("File not found.") 42 | logging.info("Creating new config file.") 43 | f = open(configfile, "tx+") 44 | 45 | try: 46 | logging.info("Loading config file.") 47 | config = json.load(f) 48 | except json.decoder.JSONDecodeError: 49 | logging.info("Config file corrupted.") 50 | logging.info("Creating new config file.") 51 | f.close() 52 | f = open(configfile, "tw+") 53 | logging.info("Updating config.") 54 | iguserid = input("Enter your IG user ID: ") 55 | igsessionid = input("Enter your IG session ID: ") 56 | igcsrftoken = input("Enter your IG CSRF token: ") 57 | igmid = input("Enter your IG mid: ") 58 | config = { 59 | "ds_user_id": iguserid, 60 | "sessionid" : igsessionid, 61 | "csrftoken" : igcsrftoken, 62 | "mid" : igmid, 63 | } 64 | logging.info("Saving config.") 65 | json.dump(config, f) # Save config 66 | 67 | #logging.info("Config settings:") 68 | #logging.info("%s", config) # Contains private data 69 | f.close() 70 | 71 | # Insert error checking to see if config is valid and works 72 | 73 | logging.info("Initialize IG interface.") 74 | ig = instagram.instagram(config) 75 | logging.info("Get story tray.") 76 | traytime = int(time.time()) 77 | storyresp = ig.getStories() 78 | storyjson = storyresp.json() 79 | saveJSON(traytime, "tray", storyjson) 80 | 81 | logging.info("Downloading story tray.") 82 | ig.downloadTray(storyjson) 83 | users = ig.getUserIDs(storyjson) 84 | for user in users: 85 | reeltime = int(time.time()) 86 | uresp = ig.getUserStories(user) 87 | ujson = uresp.json() 88 | saveJSON(reeltime, "reel_" + str(user), ujson) 89 | ig.downloadReel(ujson) 90 | 91 | logging.info("Downloading post-live stories.") 92 | ig.downloadStoryLive(storyjson) 93 | 94 | logging.info("Collecting list of JSON objects.") 95 | jsonlist = [] 96 | for file in os.listdir("json"): 97 | if file.endswith(".json"): 98 | jsonlist.append(os.path.join("json", file)) 99 | 100 | logging.info("Creating tar.xz file of JSON objects.") 101 | path = os.path.join(os.getcwd(), "json", str(traytime) + ".tar.xz") 102 | tar = tarfile.open(path, "x:xz") 103 | for path in jsonlist: 104 | tar.add(path) 105 | tar.close() 106 | 107 | logging.info("Removing old JSON objects.") 108 | for path in jsonlist: 109 | os.remove(path) 110 | 111 | logging.info("Done.") 112 | 113 | if __name__ == "__main__": 114 | main() 115 | --------------------------------------------------------------------------------