├── .gitignore
├── geckodriver
├── README.md
├── geckodriver.log
├── bot.py.save
└── bot.py


/.gitignore:
--------------------------------------------------------------------------------
1 | config.ini
2 | images/*
3 | videos/*


--------------------------------------------------------------------------------
/geckodriver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/someoneonearthwholovestg/telegram-bot-instagram-downloader-picture-videos-python-selenium/HEAD/geckodriver


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # telegram-bot-instagram-downloader-picture-videos-python-selenium
 2 | Telegram bot to download picture profile, posts, videos.
 3 | 
 4 | You need to create two folders `./images` and `./videos`
 5 | You need to create a file called `config.ini` with your tokenBot of telegram like in the next two line.
 6 | 
 7 | [Telegram]
 8 | 
 9 | tokenBot = xxxxxxxxxxxxxxxxxxxxxxxxx
10 | 


--------------------------------------------------------------------------------
/geckodriver.log:
--------------------------------------------------------------------------------
 1 | 1614675495333	geckodriver	INFO	Listening on 127.0.0.1:41585
 2 | 1614675495337	mozrunner::runner	INFO	Running command: "/usr/bin/firefox" "--marionette" "-headless" "-foreground" "-no-remote" "-profile" "/tmp/rust_mozprofileoxbAau"
 3 | *** You are running in headless mode.
 4 | console.warn: SearchSettings: "get: No settings file exists, new profile?" (new Error("", "(unknown module)"))
 5 | 1614675496958	Marionette	INFO	Listening on port 37279
 6 | 1614675497063	Marionette	WARN	TLS certificate errors will be ignored for this session
 7 | 1614675497074	Marionette	ERROR	[15] No reply from Marionette:Register
 8 | 1614675505008	Marionette	ERROR	[33] No reply from Marionette:Register
 9 | JavaScript error: resource://gre/actors/PictureInPictureChild.jsm, line 569: TypeError: this.contentWindow is null
10 | JavaScript error: resource://gre/actors/PictureInPictureChild.jsm, line 225: InvalidStateError: JSWindowActorChild.document getter: Cannot access property 'document' after actor 'PictureInPictureToggle' has been destroyed
11 | Exiting due to channel error.
12 | Exiting due to channel error.
13 | Exiting due to channel error.
14 | 


--------------------------------------------------------------------------------
/bot.py.save:
--------------------------------------------------------------------------------
  1 | from selenium import webdriver
  2 | from selenium.webdriver.support import expected_conditions as EC
  3 | from selenium.webdriver.chrome.options import Options
  4 | options = Options()
  5 | options.add_argument('--headless')
  6 | options.add_argument('--disable-gpu')
  7 | options.add_argument('--disable-dev-shm-usage')
  8 | options.add_argument("--no-sandbox")
  9 | # DRIVER_PATH = './chromedriver'
 10 | DRIVER_PATH = '/usr/lib/chromium-browser/chromedriver'
 11 | driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=options)
 12 | 
 13 | import logging
 14 | 
 15 | from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler
 16 | from telegram import InlineKeyboardButton,InlineKeyboardMarkup,KeyboardButton,ReplyKeyboardMarkup
 17 | 
 18 | import sys
 19 | import urllib
 20 | import re
 21 | import requests
 22 | import datetime
 23 | from datetime import datetime
 24 | from tqdm import tqdm
 25 | 
 26 | import time
 27 | 
 28 | # read config.ini for token
 29 | import os
 30 | try:
 31 |     from configparser import ConfigParser
 32 | except ImportError:
 33 |     from ConfigParser import ConfigParser  # ver. < 3.0
 34 | 
 35 | 
 36 | 
 37 | # Enable logging
 38 | logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 39 |                     level=logging.INFO)
 40 | 
 41 | logger = logging.getLogger(__name__)
 42 | 
 43 | 
 44 | # Define a few command handlers. These usually take the two arguments update and
 45 | # context. Error handlers also receive the raised TelegramError object in error.
 46 | def start(update, context):
 47 |     """Send a message when the command /start is issued."""
 48 |     user = update.message.from_user
 49 |     print(user)
 50 |     send = f"Hi {user.username}, started your bot. \n First name {user.first_name} \n ID:{user.id}"
 51 |     context.bot.send_message(chat_id=update.message.chat_id, text=send)
 52 |     update.message.reply_text('Hi!')
 53 | 
 54 | 
 55 | def help(update, context):
 56 |     """Send a message when the command /help is issued."""
 57 |     update.message.reply_text('Help!')
 58 | 
 59 | 
 60 | # def echo(update, context):
 61 | #     """Echo the user message."""
 62 | #     update.message.reply_text(update.message.text)
 63 | 
 64 | def profile(update, context):
 65 |     """Send profile"""
 66 |     chat_id = update.message.chat_id
 67 |     url = update.message.text.split('?')[0]
 68 | 
 69 |     #PROFILE PICTURE
 70 |     profile_pic = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/][a-zA-Z0-9._]*(\/)?$', url)
 71 | 
 72 |     if profile_pic:
 73 |         try:
 74 |             get_content = driver.get(url)
 75 |             print("\nDownloading the profile image...")
 76 |             src = driver.page_source
 77 |             find_pp = re.search(r'profile_pic_url_hd\":\"([^\'\" >]+)', src)
 78 |             pp_link = find_pp.group()
 79 |             pp_final = re.sub('profile_pic_url_hd":"', '', pp_link)
 80 |             file_size_request = requests.get(pp_final.replace('\\u0026','&'), stream=True)
 81 |             file_size = int(file_size_request.headers['Content-Length'])
 82 |             block_size = 1024
 83 |             filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
 84 |             t = tqdm(total=file_size, unit='B',
 85 |                         unit_scale=True, desc=filename, ascii=True)
 86 |             with open('./images/' + filename + '.jpg', 'wb') as f:
 87 |                 for data in file_size_request.iter_content(block_size):
 88 |                     t.update(len(data))
 89 |                     f.write(data)
 90 |             t.close()
 91 |             print("Profile picture downloaded successfully")
 92 |             context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb'))
 93 |         except Exception as e:
 94 |             print(e)
 95 |         
 96 |         return
 97 |     
 98 |     #PHOTOS OR VIDEOS
 99 |     normal_pic_or_video = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]p[\/][a-zA-Z0-9._]*[\/]?$', url)
100 | 
101 |     if normal_pic_or_video:
102 |         request_image = driver.get(url)
103 |         # time.sleep(3)
104 |         try: # try to accept cookie
105 |             driver.find_element_by_css_selector('.bIiDR').click()
106 |             n = 0
107 |         except:
108 |             n = 0
109 |         while True: # to emulate do while with fail_condition
110 |             try:
111 |                 src = driver.page_source
112 |                 check_type = re.search(r'<meta name="medium" content=[\'"]?([^\'" >]+)', src)
113 |                 check_type_f = check_type.group()
114 |                 final = re.sub('<meta name="medium" content="', '', check_type_f)
115 |                 
116 |                 if final == "image":
117 |                     print("\nDownloading the image...")
118 |                     # extract_image_link = re.search(r'meta property="og:image" content=[\'"]?([^\'" >]+)', src)
119 |                     # print(extract_image_link)
120 |                     time.sleep(1)
121 |                     try: # try to find image in carousel, if not
122 |                         if (int(n) == int(0)):
123 |                             image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(2) .KL4Bh img').get_attribute('src')
124 |                         else:
125 |                             image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(3) .KL4Bh img').get_attribute('src')
126 |                             # final = re.sub('meta property="og:image" content="', '', image_link).replace('\\u0026','&').replace('&amp;','&')
127 |                     except:
128 |                         image_link = driver.find_element_by_css_selector('article .KL4Bh img').get_attribute('src')
129 |                     final = image_link
130 |                     _response = requests.get(final).content
131 |                     file_size_request = requests.get(final, stream=True)
132 |                     file_size = int(file_size_request.headers['Content-Length'])
133 |                     block_size = 1024 
134 |                     filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
135 |                     t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True)
136 |                     with open('./images/' +filename + '.jpg', 'wb') as f:
137 |                         for data in file_size_request.iter_content(block_size):
138 |                             t.update(len(data))
139 |                             f.write(data)
140 |                     t.close()
141 |                     context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb'))
142 |                     print("Image downloaded successfully")
143 | 
144 |                 if final == "video": 
145 |                     print("Downloading the video...")
146 |                     extract_video_link = re.search(r'meta property="og:video" content=[\'"]?([^\'" >]+)', src)
147 |                     video_link = extract_video_link.group()
148 |                     final = re.sub('meta property="og:video" content="', '', video_link).replace('\\u0026','&').replace('&amp;','&')
149 |                     _response = requests.get(final).content
150 |                     file_size_request = requests.get(final, stream=True)
151 |                     file_size = int(file_size_request.headers['Content-Length'])
152 |                     block_size = 1024 
153 |                     filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
154 |                     t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True)
155 |                     with open('./videos/' +filename + '.mp4', 'wb') as f:
156 |                         for data in file_size_request.iter_content(block_size):
157 |                             t.update(len(data))
158 |                             f.write(data)
159 |                     t.close()
160 |                     context.bot.send_video(chat_id=chat_id, video=open('./videos/' + filename + '.mp4', 'rb'))
161 |                     print("Video downloaded successfully")
162 |                 
163 |                 try:
164 |                     driver.find_element_by_css_selector('._6CZji').click()
165 |                     n += 1
166 |                 except:
167 |                     print('uscito')
168 |                     break
169 | 
170 |             except AttributeError:
171 |                 print("Unknown URL")
172 | 
173 | #     update.send_photo(chat_id, photo=open('path', 'rb'))
174 | 
175 |     #STORIES
176 |     # stories = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]stories[\/][a-zA-Z0-9._]*[\/][a-zA-Z0-9._]*[\/]?$', url)
177 | 
178 |     # if stories:
179 |     #     request_image = driver.get(url)
180 |     #     # time.sleep(3)
181 |     #     try: # try to accept cookie
182 |     #         driver.find_element_by_css_selector('.bIiDR').click()
183 |     #         n = 0
184 |     #     except:
185 |     #         n = 0
186 |     #     while True: # to emulate do while with fail_condition
187 |     #         try:
188 | 
189 | 
190 | 
191 | 
192 | 
193 | def error(update, context):
194 |     """Log Errors caused by Updates."""
195 |     logger.warning('Update "%s" caused error "%s"', update, context.error)
196 | 
197 | 
198 | def main():
199 |     """Start the bot."""
200 | 
201 |     # instantiate
202 |     config = ConfigParser()
203 |     # parse existing file
204 |     config.read('config.ini')
205 |     tokenBot = config.get('Telegram', 'tokenBot')
206 |     tokenBot = config.read(os.path.join(os.path.dirname(__file__), 'Telegram', 'tokenBot'))
207 | 
208 |     # Create the Und pass it your bot's token.
209 |     # Make sure to set use_context=True to use the new context based callbacks
210 |     # Post version 12 this will no longer be necessary
211 |     updater = Updater(tokenBot, use_context=True)
212 | 
213 |     # Get the dispatcher to register handlers
214 |     dp = updater.dispatcher
215 | 
216 |     # on different commands - answer in Telegram
217 |     dp.add_handler(CommandHandler("start", start))
218 |     dp.add_handler(CommandHandler("help", help))
219 | 
220 |     # dp.add_handler(CommandHandler("profile", profile))
221 | 
222 |     # on noncommand i.e message - echo the message on Telegram
223 |     dp.add_handler(MessageHandler(Filters.text, profile))
224 | 
225 |     # log all errors
226 |     dp.add_error_handler(error)
227 | 
228 |     # Start the Bot
229 |     updater.start_polling()
230 | 
231 |     # Run the bot until you press Ctrl-C or the process receives SIGINT,
232 |     # SIGTERM or SIGABRT. This should be used most of the time, since
233 |     # start_polling() is non-blocking and will stop the bot gracefully.
234 |     updater.idle()
235 | 
236 | 
237 | if __name__ == '__main__':
238 |     main()
239 | 
240 | 
241 | 


--------------------------------------------------------------------------------
/bot.py:
--------------------------------------------------------------------------------
  1 | from selenium import webdriver
  2 | from selenium.webdriver.support import expected_conditions as EC
  3 | # from selenium.webdriver.chrome.options import Options
  4 | from selenium.webdriver.firefox.options import Options
  5 | options = Options()
  6 | options.headless = True
  7 | # options.add_argument('--headless')
  8 | # options.add_argument('--disable-gpu')
  9 | 
 10 | # options.add_argument('--disable-dev-shm-usage')
 11 | # options.add_argulsment("--no-sandbox")
 12 | #DRIVER_PATH = './chromedriver'
 13 | # DRIVER_PATH = '/usr/lib/chromium-browser/chromedriver'
 14 | DRIVER_PATH = './geckodriver'
 15 | driver = webdriver.Firefox(executable_path=DRIVER_PATH, options=options)
 16 | 
 17 | import logging
 18 | 
 19 | from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler
 20 | from telegram import InlineKeyboardButton,InlineKeyboardMarkup,KeyboardButton,ReplyKeyboardMarkup
 21 | 
 22 | 
 23 | import sys
 24 | import urllib
 25 | import re
 26 | import requests
 27 | import datetime
 28 | from datetime import datetime
 29 | from tqdm import tqdm
 30 | 
 31 | import time
 32 | 
 33 | # read config.ini for token
 34 | import os
 35 | 
 36 | # read config.ini for token
 37 | try:
 38 |     from configparser import ConfigParser
 39 | except ImportError:
 40 |     from ConfigParser import ConfigParser  # ver. < 3.0
 41 | 
 42 | 
 43 | 
 44 | # Enable logging
 45 | logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 46 |                     level=logging.INFO)
 47 | 
 48 | logger = logging.getLogger(__name__)
 49 | 
 50 | 
 51 | # Define a few command handlers. These usually take the two arguments update and
 52 | # context. Error handlers also receive the raised TelegramError object in error.
 53 | def start(update, context):
 54 |     """Send a message when the command /start is issued."""
 55 |     user = update.message.from_user
 56 |     print(user)
 57 |     send = f"Hi {user.username}, started your bot. \n First name {user.first_name} \n ID:{user.id}"
 58 |     context.bot.send_message(chat_id=update.message.chat_id, text=send)
 59 |     update.message.reply_text('Hi!')
 60 | 
 61 | 
 62 | def help(update, context):
 63 |     """Send a message when the command /help is issued."""
 64 |     update.message.reply_text('Help!')
 65 | 
 66 | 
 67 | # def echo(update, context):
 68 | #     """Echo the user message."""
 69 | #     update.message.reply_text(update.message.text)
 70 | 
 71 | def profile(update, context):
 72 |     """Send profile"""
 73 |     chat_id = update.message.chat_id
 74 |     url = update.message.text.split('?')[0]
 75 | 
 76 |     #PROFILE PICTURE
 77 |     profile_pic = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/][a-zA-Z0-9._]*(\/)?$', url)
 78 | 
 79 |     if profile_pic:
 80 |         try:
 81 |             get_content = driver.get(url)
 82 |             print("\nDownloading the profile image...")
 83 |             src = driver.page_source
 84 |             find_pp = re.search(r'profile_pic_url_hd\":\"([^\'\" >]+)', src)
 85 |             pp_link = find_pp.group()
 86 |             pp_final = re.sub('profile_pic_url_hd":"', '', pp_link)
 87 |             file_size_request = requests.get(pp_final.replace('\\u0026','&'), stream=True)
 88 |             file_size = int(file_size_request.headers['Content-Length'])
 89 |             block_size = 1024
 90 |             filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
 91 |             t = tqdm(total=file_size, unit='B',
 92 |                         unit_scale=True, desc=filename, ascii=True)
 93 |             with open('./images/' + filename + '.jpg', 'wb') as f:
 94 |                 for data in file_size_request.iter_content(block_size):
 95 |                     t.update(len(data))
 96 |                     f.write(data)
 97 |             t.close()
 98 |             print("Profile picture downloaded successfully")
 99 |             context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb'))
100 |         except Exception as e:
101 |             print(e)
102 |         
103 |         return
104 |     
105 |     #PHOTOS OR VIDEOS
106 |     normal_pic_or_video = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]p[\/][a-zA-Z0-9._]*[\/]?$', url)
107 | 
108 |     if normal_pic_or_video:
109 |         request_image = driver.get(url)
110 |         # time.sleep(3)
111 |         try: # try to accept cookie
112 |             driver.find_element_by_css_selector('.bIiDR').click()
113 |             n = 0
114 |         except:
115 |             n = 0
116 |         while True: # to emulate do while with fail_condition
117 |             try:
118 |                 src = driver.page_source
119 |                 check_type = re.search(r'<meta name="medium" content=[\'"]?([^\'" >]+)', src)
120 |                 check_type_f = check_type.group()
121 |                 final = re.sub('<meta name="medium" content="', '', check_type_f)
122 |                 
123 |                 if final == "image":
124 |                     print("\nDownloading the image...")
125 |                     # extract_image_link = re.search(r'meta property="og:image" content=[\'"]?([^\'" >]+)', src)
126 |                     # print(extract_image_link)
127 |                     time.sleep(1)
128 |                     try: # try to find image in carousel, if not
129 |                         if (int(n) == int(0)):
130 |                             image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(2) .KL4Bh img').get_attribute('src')
131 |                         else:
132 |                             image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(3) .KL4Bh img').get_attribute('src')
133 |                             # final = re.sub('meta property="og:image" content="', '', image_link).replace('\\u0026','&').replace('&amp;','&')
134 |                     except:
135 |                         image_link = driver.find_element_by_css_selector('article .KL4Bh img').get_attribute('src')
136 |                     final = image_link
137 |                     _response = requests.get(final).content
138 |                     file_size_request = requests.get(final, stream=True)
139 |                     file_size = int(file_size_request.headers['Content-Length'])
140 |                     block_size = 1024 
141 |                     filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
142 |                     t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True)
143 |                     with open('./images/' +filename + '.jpg', 'wb') as f:
144 |                         for data in file_size_request.iter_content(block_size):
145 |                             t.update(len(data))
146 |                             f.write(data)
147 |                     t.close()
148 |                     context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb'))
149 |                     print("Image downloaded successfully")
150 | 
151 |                 if final == "video": 
152 |                     print("Downloading the video...")
153 |                     extract_video_link = re.search(r'meta property="og:video" content=[\'"]?([^\'" >]+)', src)
154 |                     video_link = extract_video_link.group()
155 |                     final = re.sub('meta property="og:video" content="', '', video_link).replace('\\u0026','&').replace('&amp;','&')
156 |                     _response = requests.get(final).content
157 |                     file_size_request = requests.get(final, stream=True)
158 |                     file_size = int(file_size_request.headers['Content-Length'])
159 |                     block_size = 1024 
160 |                     filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
161 |                     t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True)
162 |                     with open('./videos/' +filename + '.mp4', 'wb') as f:
163 |                         for data in file_size_request.iter_content(block_size):
164 |                             t.update(len(data))
165 |                             f.write(data)
166 |                     t.close()
167 |                     context.bot.send_video(chat_id=chat_id, video=open('./videos/' + filename + '.mp4', 'rb'))
168 |                     print("Video downloaded successfully")
169 |                 
170 |                 try:
171 |                     driver.find_element_by_css_selector('._6CZji').click()
172 |                     time.sleep(1)
173 |                     n += 1
174 |                 except:
175 |                     print('uscito')
176 |                     break
177 |             except AttributeError:
178 |                 print("Unknown URL")
179 | 
180 | #     update.send_photo(chat_id, photo=open('path', 'rb'))
181 | 
182 |     #STORIES
183 |     # stories = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]stories[\/][a-zA-Z0-9._]*[\/][a-zA-Z0-9._]*[\/]?$', url)
184 | 
185 |     # if stories:
186 |     #     request_image = driver.get(url)
187 |     #     # time.sleep(3)
188 |     #     try: # try to accept cookie
189 |     #         driver.find_element_by_css_selector('.bIiDR').click()
190 |     #         n = 0
191 |     #     except:
192 |     #         n = 0
193 |     #     while True: # to emulate do while with fail_condition
194 |     #         try:
195 | 
196 | 
197 | 
198 | 
199 | 
200 | def error(update, context):
201 |     """Log Errors caused by Updates."""
202 |     logger.warning('Update "%s" caused error "%s"', update, context.error)
203 | 
204 | 
205 | def main():
206 |     """Start the bot."""
207 | 
208 |     # instantiate
209 |     config = ConfigParser()
210 |     # parse existing file
211 |     config.read('config.ini')
212 |     #config.read(os.path.join(os.path.dirname(__file__), 'config.ini'))
213 |     tokenBot = config.get('Telegram', 'tokenBot')
214 | 
215 |     # Create the Updater and pass it your bot's token.
216 |     # Make sure to set use_context=True to use the new context based callbacks
217 |     # Post version 12 this will no longer be necessary
218 |     updater = Updater(tokenBot, use_context=True)
219 | 
220 |     # Get the dispatcher to register handlers
221 |     dp = updater.dispatcher
222 | 
223 |     # on different commands - answer in Telegram
224 |     dp.add_handler(CommandHandler("start", start))
225 |     dp.add_handler(CommandHandler("help", help))
226 | 
227 |     # dp.add_handler(CommandHandler("profile", profile))
228 | 
229 |     # on noncommand i.e message - echo the message on Telegram
230 |     dp.add_handler(MessageHandler(Filters.text, profile))
231 | 
232 |     # log all errors
233 |     dp.add_error_handler(error)
234 | 
235 |     # Start the Bot
236 |     updater.start_polling()
237 | 
238 |     # Run the bot until you press Ctrl-C or the process receives SIGINT,
239 |     # SIGTERM or SIGABRT. This should be used most of the time, since
240 |     # start_polling() is non-blocking and will stop the bot gracefully.
241 |     updater.idle()
242 | 
243 | 
244 | if __name__ == '__main__':
245 |     main()
246 | 
247 | 
248 | 


--------------------------------------------------------------------------------