├── requirements.txt ├── .gitignore ├── instagram_downloader ├── __init__.py ├── exceptions.py ├── constants.py ├── util.py ├── context.py └── exporter.py ├── test.py ├── README.md └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | requests -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | __pycache__ 3 | urls.txt 4 | *.raw 5 | my_test.py 6 | -------------------------------------------------------------------------------- /instagram_downloader/__init__.py: -------------------------------------------------------------------------------- 1 | from .exceptions import * 2 | from .context import Context 3 | from .exporter import MediaExporterV2, MediaExporter, MediaItem -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from instagram_downloader import * 4 | 5 | def build_exporter(ctx: Context): 6 | if ctx.exporter_version == 1: 7 | return MediaExporter(ctx) 8 | elif ctx.exporter_version == 2: 9 | return MediaExporterV2(ctx) 10 | else: 11 | raise ContextCorrupted 12 | 13 | def load_existing_urls(filename: str): 14 | urls = [] 15 | if os.path.exists(filename): 16 | urls = json.load(open(filename, 'r')) 17 | return urls 18 | 19 | def main(ctx: Context, after: str = ""): 20 | output = "urls.json" 21 | urls = load_existing_urls(output) 22 | exporter = build_exporter(ctx) 23 | 24 | while True: 25 | print("New request (current cursor: {}) !".format(after)) 26 | try: 27 | media_item = exporter.export(first=100, after=after) 28 | except InstagramRateLimit as exception: 29 | print(exception) 30 | break 31 | urls.extend(media_item.urls) 32 | json.dump(urls, open(output, "w+")) 33 | if after == media_item.after: 34 | break 35 | if not media_item.has_next: 36 | break 37 | after = media_item.after 38 | if not after: 39 | break 40 | 41 | if __name__ == "__main__": 42 | after = "" # May be used in case the script breaks 43 | target = "TARGET" 44 | ctx = Context(target, 2) 45 | main(ctx, after) 46 | -------------------------------------------------------------------------------- /instagram_downloader/exceptions.py: -------------------------------------------------------------------------------- 1 | class InstagramRateLimit(Exception): 2 | def __init__(self, after: str = ""): 3 | self.__after = after 4 | 5 | @property 6 | def after(self): 7 | return self.__after 8 | 9 | def __str__(self): 10 | return "Instagram ratelimited your IP address. The current cursor : {}".format(self.__after) 11 | 12 | class ContextCorrupted(Exception): 13 | def __str__(self): 14 | return "The context was corrupted. Any kind of modification is prohibited." 15 | 16 | class ContextInvalidExporterVersion(Exception): 17 | def __init__(self, exporter_version: int): 18 | self.__exporter_version = exporter_version 19 | 20 | @property 21 | def exporter_version(self): 22 | return self.__exporter_version 23 | 24 | def __str__(self): 25 | return "Invalid exporter version was provided : {}".format(self.exporter_version) 26 | 27 | class MediaExporterInvalidVersion(Exception): 28 | def __init__(self, awaited_version: int, exporter_version: int): 29 | self.__awaited_version = awaited_version 30 | self.__exporter_version = exporter_version 31 | 32 | @property 33 | def awaited_version(self): 34 | return self.__awaited_version 35 | 36 | @property 37 | def exporter_version(self): 38 | return self.__exporter_version 39 | 40 | @property 41 | def __str__(self): 42 | return "Exporter version is {}. Exporter version found from Context : {}".format(self.awaited_version, self.exporter_version) 43 | 44 | class RegexReworkException(Exception): 45 | def __init__(self, regex_name: str): 46 | self.__regex_name = regex_name 47 | 48 | @property 49 | def regex_name(self): 50 | return self.__regex_name 51 | 52 | def __str__(self): 53 | return "The regex for {} seems to be broken. It needs a rework.".format(self.regex_name) 54 | -------------------------------------------------------------------------------- /instagram_downloader/constants.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | 4 | """ 5 | Headers to try to get to make ratelimited more delayed : 6 | X-IG-WWW-Claim : possible value : hmac.AR0AbouJHrmcxB5qmK63cSap8R1sB28wynhILjXkHjfa9rG_ 7 | Cookies : possible value : "csrftoken=SiVOKBnuebNtr3OwQoHG0RnAWmCoMSJw; mid=YjdrngALAAF7q7d152TbJAvkD5m8; ig_did=35AE6F4E-9302-42FE-8179-577C6A0E826A; ds_user_id=34860308922; sessionid=34860308922%3AIB2gtwW5WNM4NY%3A16; datr=L1-gYsKLshDxe64ElQAZaIDg; shbid=\"18935\\05434860308922\\0541687797516:01f79031041c0061075b33a54e3e54101477701729e70153e949cd612d5f74e7bb4f6e33\"; shbts=\"1656261516\\05434860308922\\0541687797516:01f72b2123ffb77811ea145b92c01367746a8bfe48753b0f960a0612c1cb3f23f601efc7"; rur="RVA\05434860308922\0541687799708:01f76230e0f66c113c958d96d9817747e3516bda77bf6d21b42d64a4ef7a5381e14440f3\"" 8 | """ 9 | 10 | 11 | headers = { 12 | "Host": "www.instagram.com", 13 | "Origin": "https://www.instagram.com", 14 | "Referer": "https://www.instagram.com/", 15 | "User-Agent": "TwitterBot/1.0", 16 | "Accept": "*/*", 17 | "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3", 18 | "Accept-Encoding": "gzip, deflate, br", 19 | "DNT": "1", 20 | "Connection": "keep-alive", 21 | "Sec-Fetch-Dest": "empty", 22 | "Sec-Fetch-Mode": "cors", 23 | "Sec-Fetch-Site": "same-origin", 24 | "Pragma": "no-cache", 25 | "Cache-Control": "no-cache", 26 | "TE": "trailers", 27 | "X-Requested-With": "XMLHttpRequest", 28 | "X-IG-WWW-Claim": "0", 29 | } 30 | instagram_urls = dict( 31 | main="https://www.instagram.com", 32 | graphql="https://www.instagram.com/graphql/query/?query_hash={}&variables={}", 33 | feed_api="https://i.instagram.com/api/v1/feed/user/{}/username/?count={}", 34 | target="https://www.instagram.com/{}/channel/?hl=fr", 35 | target_json="https://www.instagram.com/{}/?__a=1&__d=dis/" 36 | ) 37 | base_36 = string.digits + string.ascii_letters 38 | consumer_lib_regex = r"(\/static\/bundles\/(?:es6|metro)\/ConsumerLibCommons\.js\/[a-f0-9]+\.js)" 39 | query_kinds_regex = dict( 40 | posts=r"queryId:\"([a-f0-9]{32})\"", 41 | stories=r"(?:const|var) _=\"([a-f0-9]{32})\"" # const = es6, var = metro 42 | ) 43 | x_asbd_id_regex = r"ASBD_ID='(\d+)'" 44 | x_ig_app_id_regex = r"instagramWebDesktopFBAppId='(\d+)'" 45 | window_shared_data_regex = r"^