├── .gitignore ├── geckodriver ├── README.md ├── geckodriver.log ├── bot.py.save └── bot.py /.gitignore: -------------------------------------------------------------------------------- 1 | config.ini 2 | images/* 3 | videos/* -------------------------------------------------------------------------------- /geckodriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/someoneonearthwholovestg/telegram-bot-instagram-downloader-picture-videos-python-selenium/HEAD/geckodriver -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # telegram-bot-instagram-downloader-picture-videos-python-selenium 2 | Telegram bot to download picture profile, posts, videos. 3 | 4 | You need to create two folders `./images` and `./videos` 5 | You need to create a file called `config.ini` with your tokenBot of telegram like in the next two line. 6 | 7 | [Telegram] 8 | 9 | tokenBot = xxxxxxxxxxxxxxxxxxxxxxxxx 10 | -------------------------------------------------------------------------------- /geckodriver.log: -------------------------------------------------------------------------------- 1 | 1614675495333 geckodriver INFO Listening on 127.0.0.1:41585 2 | 1614675495337 mozrunner::runner INFO Running command: "/usr/bin/firefox" "--marionette" "-headless" "-foreground" "-no-remote" "-profile" "/tmp/rust_mozprofileoxbAau" 3 | *** You are running in headless mode. 4 | console.warn: SearchSettings: "get: No settings file exists, new profile?" (new Error("", "(unknown module)")) 5 | 1614675496958 Marionette INFO Listening on port 37279 6 | 1614675497063 Marionette WARN TLS certificate errors will be ignored for this session 7 | 1614675497074 Marionette ERROR [15] No reply from Marionette:Register 8 | 1614675505008 Marionette ERROR [33] No reply from Marionette:Register 9 | JavaScript error: resource://gre/actors/PictureInPictureChild.jsm, line 569: TypeError: this.contentWindow is null 10 | JavaScript error: resource://gre/actors/PictureInPictureChild.jsm, line 225: InvalidStateError: JSWindowActorChild.document getter: Cannot access property 'document' after actor 'PictureInPictureToggle' has been destroyed 11 | Exiting due to channel error. 12 | Exiting due to channel error. 13 | Exiting due to channel error. 14 | -------------------------------------------------------------------------------- /bot.py.save: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.support import expected_conditions as EC 3 | from selenium.webdriver.chrome.options import Options 4 | options = Options() 5 | options.add_argument('--headless') 6 | options.add_argument('--disable-gpu') 7 | options.add_argument('--disable-dev-shm-usage') 8 | options.add_argument("--no-sandbox") 9 | # DRIVER_PATH = './chromedriver' 10 | DRIVER_PATH = '/usr/lib/chromium-browser/chromedriver' 11 | driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=options) 12 | 13 | import logging 14 | 15 | from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler 16 | from telegram import InlineKeyboardButton,InlineKeyboardMarkup,KeyboardButton,ReplyKeyboardMarkup 17 | 18 | import sys 19 | import urllib 20 | import re 21 | import requests 22 | import datetime 23 | from datetime import datetime 24 | from tqdm import tqdm 25 | 26 | import time 27 | 28 | # read config.ini for token 29 | import os 30 | try: 31 | from configparser import ConfigParser 32 | except ImportError: 33 | from ConfigParser import ConfigParser # ver. < 3.0 34 | 35 | 36 | 37 | # Enable logging 38 | logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 39 | level=logging.INFO) 40 | 41 | logger = logging.getLogger(__name__) 42 | 43 | 44 | # Define a few command handlers. These usually take the two arguments update and 45 | # context. Error handlers also receive the raised TelegramError object in error. 46 | def start(update, context): 47 | """Send a message when the command /start is issued.""" 48 | user = update.message.from_user 49 | print(user) 50 | send = f"Hi {user.username}, started your bot. \n First name {user.first_name} \n ID:{user.id}" 51 | context.bot.send_message(chat_id=update.message.chat_id, text=send) 52 | update.message.reply_text('Hi!') 53 | 54 | 55 | def help(update, context): 56 | """Send a message when the command /help is issued.""" 57 | update.message.reply_text('Help!') 58 | 59 | 60 | # def echo(update, context): 61 | # """Echo the user message.""" 62 | # update.message.reply_text(update.message.text) 63 | 64 | def profile(update, context): 65 | """Send profile""" 66 | chat_id = update.message.chat_id 67 | url = update.message.text.split('?')[0] 68 | 69 | #PROFILE PICTURE 70 | profile_pic = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/][a-zA-Z0-9._]*(\/)?$', url) 71 | 72 | if profile_pic: 73 | try: 74 | get_content = driver.get(url) 75 | print("\nDownloading the profile image...") 76 | src = driver.page_source 77 | find_pp = re.search(r'profile_pic_url_hd\":\"([^\'\" >]+)', src) 78 | pp_link = find_pp.group() 79 | pp_final = re.sub('profile_pic_url_hd":"', '', pp_link) 80 | file_size_request = requests.get(pp_final.replace('\\u0026','&'), stream=True) 81 | file_size = int(file_size_request.headers['Content-Length']) 82 | block_size = 1024 83 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S') 84 | t = tqdm(total=file_size, unit='B', 85 | unit_scale=True, desc=filename, ascii=True) 86 | with open('./images/' + filename + '.jpg', 'wb') as f: 87 | for data in file_size_request.iter_content(block_size): 88 | t.update(len(data)) 89 | f.write(data) 90 | t.close() 91 | print("Profile picture downloaded successfully") 92 | context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb')) 93 | except Exception as e: 94 | print(e) 95 | 96 | return 97 | 98 | #PHOTOS OR VIDEOS 99 | normal_pic_or_video = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]p[\/][a-zA-Z0-9._]*[\/]?$', url) 100 | 101 | if normal_pic_or_video: 102 | request_image = driver.get(url) 103 | # time.sleep(3) 104 | try: # try to accept cookie 105 | driver.find_element_by_css_selector('.bIiDR').click() 106 | n = 0 107 | except: 108 | n = 0 109 | while True: # to emulate do while with fail_condition 110 | try: 111 | src = driver.page_source 112 | check_type = re.search(r']+)', src) 113 | check_type_f = check_type.group() 114 | final = re.sub(']+)', src) 119 | # print(extract_image_link) 120 | time.sleep(1) 121 | try: # try to find image in carousel, if not 122 | if (int(n) == int(0)): 123 | image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(2) .KL4Bh img').get_attribute('src') 124 | else: 125 | image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(3) .KL4Bh img').get_attribute('src') 126 | # final = re.sub('meta property="og:image" content="', '', image_link).replace('\\u0026','&').replace('&','&') 127 | except: 128 | image_link = driver.find_element_by_css_selector('article .KL4Bh img').get_attribute('src') 129 | final = image_link 130 | _response = requests.get(final).content 131 | file_size_request = requests.get(final, stream=True) 132 | file_size = int(file_size_request.headers['Content-Length']) 133 | block_size = 1024 134 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S') 135 | t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True) 136 | with open('./images/' +filename + '.jpg', 'wb') as f: 137 | for data in file_size_request.iter_content(block_size): 138 | t.update(len(data)) 139 | f.write(data) 140 | t.close() 141 | context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb')) 142 | print("Image downloaded successfully") 143 | 144 | if final == "video": 145 | print("Downloading the video...") 146 | extract_video_link = re.search(r'meta property="og:video" content=[\'"]?([^\'" >]+)', src) 147 | video_link = extract_video_link.group() 148 | final = re.sub('meta property="og:video" content="', '', video_link).replace('\\u0026','&').replace('&','&') 149 | _response = requests.get(final).content 150 | file_size_request = requests.get(final, stream=True) 151 | file_size = int(file_size_request.headers['Content-Length']) 152 | block_size = 1024 153 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S') 154 | t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True) 155 | with open('./videos/' +filename + '.mp4', 'wb') as f: 156 | for data in file_size_request.iter_content(block_size): 157 | t.update(len(data)) 158 | f.write(data) 159 | t.close() 160 | context.bot.send_video(chat_id=chat_id, video=open('./videos/' + filename + '.mp4', 'rb')) 161 | print("Video downloaded successfully") 162 | 163 | try: 164 | driver.find_element_by_css_selector('._6CZji').click() 165 | n += 1 166 | except: 167 | print('uscito') 168 | break 169 | 170 | except AttributeError: 171 | print("Unknown URL") 172 | 173 | # update.send_photo(chat_id, photo=open('path', 'rb')) 174 | 175 | #STORIES 176 | # stories = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]stories[\/][a-zA-Z0-9._]*[\/][a-zA-Z0-9._]*[\/]?$', url) 177 | 178 | # if stories: 179 | # request_image = driver.get(url) 180 | # # time.sleep(3) 181 | # try: # try to accept cookie 182 | # driver.find_element_by_css_selector('.bIiDR').click() 183 | # n = 0 184 | # except: 185 | # n = 0 186 | # while True: # to emulate do while with fail_condition 187 | # try: 188 | 189 | 190 | 191 | 192 | 193 | def error(update, context): 194 | """Log Errors caused by Updates.""" 195 | logger.warning('Update "%s" caused error "%s"', update, context.error) 196 | 197 | 198 | def main(): 199 | """Start the bot.""" 200 | 201 | # instantiate 202 | config = ConfigParser() 203 | # parse existing file 204 | config.read('config.ini') 205 | tokenBot = config.get('Telegram', 'tokenBot') 206 | tokenBot = config.read(os.path.join(os.path.dirname(__file__), 'Telegram', 'tokenBot')) 207 | 208 | # Create the Und pass it your bot's token. 209 | # Make sure to set use_context=True to use the new context based callbacks 210 | # Post version 12 this will no longer be necessary 211 | updater = Updater(tokenBot, use_context=True) 212 | 213 | # Get the dispatcher to register handlers 214 | dp = updater.dispatcher 215 | 216 | # on different commands - answer in Telegram 217 | dp.add_handler(CommandHandler("start", start)) 218 | dp.add_handler(CommandHandler("help", help)) 219 | 220 | # dp.add_handler(CommandHandler("profile", profile)) 221 | 222 | # on noncommand i.e message - echo the message on Telegram 223 | dp.add_handler(MessageHandler(Filters.text, profile)) 224 | 225 | # log all errors 226 | dp.add_error_handler(error) 227 | 228 | # Start the Bot 229 | updater.start_polling() 230 | 231 | # Run the bot until you press Ctrl-C or the process receives SIGINT, 232 | # SIGTERM or SIGABRT. This should be used most of the time, since 233 | # start_polling() is non-blocking and will stop the bot gracefully. 234 | updater.idle() 235 | 236 | 237 | if __name__ == '__main__': 238 | main() 239 | 240 | 241 | -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.support import expected_conditions as EC 3 | # from selenium.webdriver.chrome.options import Options 4 | from selenium.webdriver.firefox.options import Options 5 | options = Options() 6 | options.headless = True 7 | # options.add_argument('--headless') 8 | # options.add_argument('--disable-gpu') 9 | 10 | # options.add_argument('--disable-dev-shm-usage') 11 | # options.add_argulsment("--no-sandbox") 12 | #DRIVER_PATH = './chromedriver' 13 | # DRIVER_PATH = '/usr/lib/chromium-browser/chromedriver' 14 | DRIVER_PATH = './geckodriver' 15 | driver = webdriver.Firefox(executable_path=DRIVER_PATH, options=options) 16 | 17 | import logging 18 | 19 | from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler 20 | from telegram import InlineKeyboardButton,InlineKeyboardMarkup,KeyboardButton,ReplyKeyboardMarkup 21 | 22 | 23 | import sys 24 | import urllib 25 | import re 26 | import requests 27 | import datetime 28 | from datetime import datetime 29 | from tqdm import tqdm 30 | 31 | import time 32 | 33 | # read config.ini for token 34 | import os 35 | 36 | # read config.ini for token 37 | try: 38 | from configparser import ConfigParser 39 | except ImportError: 40 | from ConfigParser import ConfigParser # ver. < 3.0 41 | 42 | 43 | 44 | # Enable logging 45 | logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 46 | level=logging.INFO) 47 | 48 | logger = logging.getLogger(__name__) 49 | 50 | 51 | # Define a few command handlers. These usually take the two arguments update and 52 | # context. Error handlers also receive the raised TelegramError object in error. 53 | def start(update, context): 54 | """Send a message when the command /start is issued.""" 55 | user = update.message.from_user 56 | print(user) 57 | send = f"Hi {user.username}, started your bot. \n First name {user.first_name} \n ID:{user.id}" 58 | context.bot.send_message(chat_id=update.message.chat_id, text=send) 59 | update.message.reply_text('Hi!') 60 | 61 | 62 | def help(update, context): 63 | """Send a message when the command /help is issued.""" 64 | update.message.reply_text('Help!') 65 | 66 | 67 | # def echo(update, context): 68 | # """Echo the user message.""" 69 | # update.message.reply_text(update.message.text) 70 | 71 | def profile(update, context): 72 | """Send profile""" 73 | chat_id = update.message.chat_id 74 | url = update.message.text.split('?')[0] 75 | 76 | #PROFILE PICTURE 77 | profile_pic = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/][a-zA-Z0-9._]*(\/)?$', url) 78 | 79 | if profile_pic: 80 | try: 81 | get_content = driver.get(url) 82 | print("\nDownloading the profile image...") 83 | src = driver.page_source 84 | find_pp = re.search(r'profile_pic_url_hd\":\"([^\'\" >]+)', src) 85 | pp_link = find_pp.group() 86 | pp_final = re.sub('profile_pic_url_hd":"', '', pp_link) 87 | file_size_request = requests.get(pp_final.replace('\\u0026','&'), stream=True) 88 | file_size = int(file_size_request.headers['Content-Length']) 89 | block_size = 1024 90 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S') 91 | t = tqdm(total=file_size, unit='B', 92 | unit_scale=True, desc=filename, ascii=True) 93 | with open('./images/' + filename + '.jpg', 'wb') as f: 94 | for data in file_size_request.iter_content(block_size): 95 | t.update(len(data)) 96 | f.write(data) 97 | t.close() 98 | print("Profile picture downloaded successfully") 99 | context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb')) 100 | except Exception as e: 101 | print(e) 102 | 103 | return 104 | 105 | #PHOTOS OR VIDEOS 106 | normal_pic_or_video = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]p[\/][a-zA-Z0-9._]*[\/]?$', url) 107 | 108 | if normal_pic_or_video: 109 | request_image = driver.get(url) 110 | # time.sleep(3) 111 | try: # try to accept cookie 112 | driver.find_element_by_css_selector('.bIiDR').click() 113 | n = 0 114 | except: 115 | n = 0 116 | while True: # to emulate do while with fail_condition 117 | try: 118 | src = driver.page_source 119 | check_type = re.search(r']+)', src) 120 | check_type_f = check_type.group() 121 | final = re.sub(']+)', src) 126 | # print(extract_image_link) 127 | time.sleep(1) 128 | try: # try to find image in carousel, if not 129 | if (int(n) == int(0)): 130 | image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(2) .KL4Bh img').get_attribute('src') 131 | else: 132 | image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(3) .KL4Bh img').get_attribute('src') 133 | # final = re.sub('meta property="og:image" content="', '', image_link).replace('\\u0026','&').replace('&','&') 134 | except: 135 | image_link = driver.find_element_by_css_selector('article .KL4Bh img').get_attribute('src') 136 | final = image_link 137 | _response = requests.get(final).content 138 | file_size_request = requests.get(final, stream=True) 139 | file_size = int(file_size_request.headers['Content-Length']) 140 | block_size = 1024 141 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S') 142 | t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True) 143 | with open('./images/' +filename + '.jpg', 'wb') as f: 144 | for data in file_size_request.iter_content(block_size): 145 | t.update(len(data)) 146 | f.write(data) 147 | t.close() 148 | context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb')) 149 | print("Image downloaded successfully") 150 | 151 | if final == "video": 152 | print("Downloading the video...") 153 | extract_video_link = re.search(r'meta property="og:video" content=[\'"]?([^\'" >]+)', src) 154 | video_link = extract_video_link.group() 155 | final = re.sub('meta property="og:video" content="', '', video_link).replace('\\u0026','&').replace('&','&') 156 | _response = requests.get(final).content 157 | file_size_request = requests.get(final, stream=True) 158 | file_size = int(file_size_request.headers['Content-Length']) 159 | block_size = 1024 160 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S') 161 | t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True) 162 | with open('./videos/' +filename + '.mp4', 'wb') as f: 163 | for data in file_size_request.iter_content(block_size): 164 | t.update(len(data)) 165 | f.write(data) 166 | t.close() 167 | context.bot.send_video(chat_id=chat_id, video=open('./videos/' + filename + '.mp4', 'rb')) 168 | print("Video downloaded successfully") 169 | 170 | try: 171 | driver.find_element_by_css_selector('._6CZji').click() 172 | time.sleep(1) 173 | n += 1 174 | except: 175 | print('uscito') 176 | break 177 | except AttributeError: 178 | print("Unknown URL") 179 | 180 | # update.send_photo(chat_id, photo=open('path', 'rb')) 181 | 182 | #STORIES 183 | # stories = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]stories[\/][a-zA-Z0-9._]*[\/][a-zA-Z0-9._]*[\/]?$', url) 184 | 185 | # if stories: 186 | # request_image = driver.get(url) 187 | # # time.sleep(3) 188 | # try: # try to accept cookie 189 | # driver.find_element_by_css_selector('.bIiDR').click() 190 | # n = 0 191 | # except: 192 | # n = 0 193 | # while True: # to emulate do while with fail_condition 194 | # try: 195 | 196 | 197 | 198 | 199 | 200 | def error(update, context): 201 | """Log Errors caused by Updates.""" 202 | logger.warning('Update "%s" caused error "%s"', update, context.error) 203 | 204 | 205 | def main(): 206 | """Start the bot.""" 207 | 208 | # instantiate 209 | config = ConfigParser() 210 | # parse existing file 211 | config.read('config.ini') 212 | #config.read(os.path.join(os.path.dirname(__file__), 'config.ini')) 213 | tokenBot = config.get('Telegram', 'tokenBot') 214 | 215 | # Create the Updater and pass it your bot's token. 216 | # Make sure to set use_context=True to use the new context based callbacks 217 | # Post version 12 this will no longer be necessary 218 | updater = Updater(tokenBot, use_context=True) 219 | 220 | # Get the dispatcher to register handlers 221 | dp = updater.dispatcher 222 | 223 | # on different commands - answer in Telegram 224 | dp.add_handler(CommandHandler("start", start)) 225 | dp.add_handler(CommandHandler("help", help)) 226 | 227 | # dp.add_handler(CommandHandler("profile", profile)) 228 | 229 | # on noncommand i.e message - echo the message on Telegram 230 | dp.add_handler(MessageHandler(Filters.text, profile)) 231 | 232 | # log all errors 233 | dp.add_error_handler(error) 234 | 235 | # Start the Bot 236 | updater.start_polling() 237 | 238 | # Run the bot until you press Ctrl-C or the process receives SIGINT, 239 | # SIGTERM or SIGABRT. This should be used most of the time, since 240 | # start_polling() is non-blocking and will stop the bot gracefully. 241 | updater.idle() 242 | 243 | 244 | if __name__ == '__main__': 245 | main() 246 | 247 | 248 | --------------------------------------------------------------------------------