├── .gitignore
├── geckodriver
├── README.md
├── geckodriver.log
├── bot.py.save
└── bot.py
/.gitignore:
--------------------------------------------------------------------------------
1 | config.ini
2 | images/*
3 | videos/*
--------------------------------------------------------------------------------
/geckodriver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/someoneonearthwholovestg/telegram-bot-instagram-downloader-picture-videos-python-selenium/HEAD/geckodriver
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # telegram-bot-instagram-downloader-picture-videos-python-selenium
2 | Telegram bot to download picture profile, posts, videos.
3 |
4 | You need to create two folders `./images` and `./videos`
5 | You need to create a file called `config.ini` with your tokenBot of telegram like in the next two line.
6 |
7 | [Telegram]
8 |
9 | tokenBot = xxxxxxxxxxxxxxxxxxxxxxxxx
10 |
--------------------------------------------------------------------------------
/geckodriver.log:
--------------------------------------------------------------------------------
1 | 1614675495333 geckodriver INFO Listening on 127.0.0.1:41585
2 | 1614675495337 mozrunner::runner INFO Running command: "/usr/bin/firefox" "--marionette" "-headless" "-foreground" "-no-remote" "-profile" "/tmp/rust_mozprofileoxbAau"
3 | *** You are running in headless mode.
4 | console.warn: SearchSettings: "get: No settings file exists, new profile?" (new Error("", "(unknown module)"))
5 | 1614675496958 Marionette INFO Listening on port 37279
6 | 1614675497063 Marionette WARN TLS certificate errors will be ignored for this session
7 | 1614675497074 Marionette ERROR [15] No reply from Marionette:Register
8 | 1614675505008 Marionette ERROR [33] No reply from Marionette:Register
9 | JavaScript error: resource://gre/actors/PictureInPictureChild.jsm, line 569: TypeError: this.contentWindow is null
10 | JavaScript error: resource://gre/actors/PictureInPictureChild.jsm, line 225: InvalidStateError: JSWindowActorChild.document getter: Cannot access property 'document' after actor 'PictureInPictureToggle' has been destroyed
11 | Exiting due to channel error.
12 | Exiting due to channel error.
13 | Exiting due to channel error.
14 |
--------------------------------------------------------------------------------
/bot.py.save:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.support import expected_conditions as EC
3 | from selenium.webdriver.chrome.options import Options
4 | options = Options()
5 | options.add_argument('--headless')
6 | options.add_argument('--disable-gpu')
7 | options.add_argument('--disable-dev-shm-usage')
8 | options.add_argument("--no-sandbox")
9 | # DRIVER_PATH = './chromedriver'
10 | DRIVER_PATH = '/usr/lib/chromium-browser/chromedriver'
11 | driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=options)
12 |
13 | import logging
14 |
15 | from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler
16 | from telegram import InlineKeyboardButton,InlineKeyboardMarkup,KeyboardButton,ReplyKeyboardMarkup
17 |
18 | import sys
19 | import urllib
20 | import re
21 | import requests
22 | import datetime
23 | from datetime import datetime
24 | from tqdm import tqdm
25 |
26 | import time
27 |
28 | # read config.ini for token
29 | import os
30 | try:
31 | from configparser import ConfigParser
32 | except ImportError:
33 | from ConfigParser import ConfigParser # ver. < 3.0
34 |
35 |
36 |
37 | # Enable logging
38 | logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
39 | level=logging.INFO)
40 |
41 | logger = logging.getLogger(__name__)
42 |
43 |
44 | # Define a few command handlers. These usually take the two arguments update and
45 | # context. Error handlers also receive the raised TelegramError object in error.
46 | def start(update, context):
47 | """Send a message when the command /start is issued."""
48 | user = update.message.from_user
49 | print(user)
50 | send = f"Hi {user.username}, started your bot. \n First name {user.first_name} \n ID:{user.id}"
51 | context.bot.send_message(chat_id=update.message.chat_id, text=send)
52 | update.message.reply_text('Hi!')
53 |
54 |
55 | def help(update, context):
56 | """Send a message when the command /help is issued."""
57 | update.message.reply_text('Help!')
58 |
59 |
60 | # def echo(update, context):
61 | # """Echo the user message."""
62 | # update.message.reply_text(update.message.text)
63 |
64 | def profile(update, context):
65 | """Send profile"""
66 | chat_id = update.message.chat_id
67 | url = update.message.text.split('?')[0]
68 |
69 | #PROFILE PICTURE
70 | profile_pic = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/][a-zA-Z0-9._]*(\/)?$', url)
71 |
72 | if profile_pic:
73 | try:
74 | get_content = driver.get(url)
75 | print("\nDownloading the profile image...")
76 | src = driver.page_source
77 | find_pp = re.search(r'profile_pic_url_hd\":\"([^\'\" >]+)', src)
78 | pp_link = find_pp.group()
79 | pp_final = re.sub('profile_pic_url_hd":"', '', pp_link)
80 | file_size_request = requests.get(pp_final.replace('\\u0026','&'), stream=True)
81 | file_size = int(file_size_request.headers['Content-Length'])
82 | block_size = 1024
83 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
84 | t = tqdm(total=file_size, unit='B',
85 | unit_scale=True, desc=filename, ascii=True)
86 | with open('./images/' + filename + '.jpg', 'wb') as f:
87 | for data in file_size_request.iter_content(block_size):
88 | t.update(len(data))
89 | f.write(data)
90 | t.close()
91 | print("Profile picture downloaded successfully")
92 | context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb'))
93 | except Exception as e:
94 | print(e)
95 |
96 | return
97 |
98 | #PHOTOS OR VIDEOS
99 | normal_pic_or_video = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]p[\/][a-zA-Z0-9._]*[\/]?$', url)
100 |
101 | if normal_pic_or_video:
102 | request_image = driver.get(url)
103 | # time.sleep(3)
104 | try: # try to accept cookie
105 | driver.find_element_by_css_selector('.bIiDR').click()
106 | n = 0
107 | except:
108 | n = 0
109 | while True: # to emulate do while with fail_condition
110 | try:
111 | src = driver.page_source
112 | check_type = re.search(r']+)', src)
113 | check_type_f = check_type.group()
114 | final = re.sub(']+)', src)
119 | # print(extract_image_link)
120 | time.sleep(1)
121 | try: # try to find image in carousel, if not
122 | if (int(n) == int(0)):
123 | image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(2) .KL4Bh img').get_attribute('src')
124 | else:
125 | image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(3) .KL4Bh img').get_attribute('src')
126 | # final = re.sub('meta property="og:image" content="', '', image_link).replace('\\u0026','&').replace('&','&')
127 | except:
128 | image_link = driver.find_element_by_css_selector('article .KL4Bh img').get_attribute('src')
129 | final = image_link
130 | _response = requests.get(final).content
131 | file_size_request = requests.get(final, stream=True)
132 | file_size = int(file_size_request.headers['Content-Length'])
133 | block_size = 1024
134 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
135 | t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True)
136 | with open('./images/' +filename + '.jpg', 'wb') as f:
137 | for data in file_size_request.iter_content(block_size):
138 | t.update(len(data))
139 | f.write(data)
140 | t.close()
141 | context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb'))
142 | print("Image downloaded successfully")
143 |
144 | if final == "video":
145 | print("Downloading the video...")
146 | extract_video_link = re.search(r'meta property="og:video" content=[\'"]?([^\'" >]+)', src)
147 | video_link = extract_video_link.group()
148 | final = re.sub('meta property="og:video" content="', '', video_link).replace('\\u0026','&').replace('&','&')
149 | _response = requests.get(final).content
150 | file_size_request = requests.get(final, stream=True)
151 | file_size = int(file_size_request.headers['Content-Length'])
152 | block_size = 1024
153 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
154 | t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True)
155 | with open('./videos/' +filename + '.mp4', 'wb') as f:
156 | for data in file_size_request.iter_content(block_size):
157 | t.update(len(data))
158 | f.write(data)
159 | t.close()
160 | context.bot.send_video(chat_id=chat_id, video=open('./videos/' + filename + '.mp4', 'rb'))
161 | print("Video downloaded successfully")
162 |
163 | try:
164 | driver.find_element_by_css_selector('._6CZji').click()
165 | n += 1
166 | except:
167 | print('uscito')
168 | break
169 |
170 | except AttributeError:
171 | print("Unknown URL")
172 |
173 | # update.send_photo(chat_id, photo=open('path', 'rb'))
174 |
175 | #STORIES
176 | # stories = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]stories[\/][a-zA-Z0-9._]*[\/][a-zA-Z0-9._]*[\/]?$', url)
177 |
178 | # if stories:
179 | # request_image = driver.get(url)
180 | # # time.sleep(3)
181 | # try: # try to accept cookie
182 | # driver.find_element_by_css_selector('.bIiDR').click()
183 | # n = 0
184 | # except:
185 | # n = 0
186 | # while True: # to emulate do while with fail_condition
187 | # try:
188 |
189 |
190 |
191 |
192 |
193 | def error(update, context):
194 | """Log Errors caused by Updates."""
195 | logger.warning('Update "%s" caused error "%s"', update, context.error)
196 |
197 |
198 | def main():
199 | """Start the bot."""
200 |
201 | # instantiate
202 | config = ConfigParser()
203 | # parse existing file
204 | config.read('config.ini')
205 | tokenBot = config.get('Telegram', 'tokenBot')
206 | tokenBot = config.read(os.path.join(os.path.dirname(__file__), 'Telegram', 'tokenBot'))
207 |
208 | # Create the Und pass it your bot's token.
209 | # Make sure to set use_context=True to use the new context based callbacks
210 | # Post version 12 this will no longer be necessary
211 | updater = Updater(tokenBot, use_context=True)
212 |
213 | # Get the dispatcher to register handlers
214 | dp = updater.dispatcher
215 |
216 | # on different commands - answer in Telegram
217 | dp.add_handler(CommandHandler("start", start))
218 | dp.add_handler(CommandHandler("help", help))
219 |
220 | # dp.add_handler(CommandHandler("profile", profile))
221 |
222 | # on noncommand i.e message - echo the message on Telegram
223 | dp.add_handler(MessageHandler(Filters.text, profile))
224 |
225 | # log all errors
226 | dp.add_error_handler(error)
227 |
228 | # Start the Bot
229 | updater.start_polling()
230 |
231 | # Run the bot until you press Ctrl-C or the process receives SIGINT,
232 | # SIGTERM or SIGABRT. This should be used most of the time, since
233 | # start_polling() is non-blocking and will stop the bot gracefully.
234 | updater.idle()
235 |
236 |
237 | if __name__ == '__main__':
238 | main()
239 |
240 |
241 |
--------------------------------------------------------------------------------
/bot.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.support import expected_conditions as EC
3 | # from selenium.webdriver.chrome.options import Options
4 | from selenium.webdriver.firefox.options import Options
5 | options = Options()
6 | options.headless = True
7 | # options.add_argument('--headless')
8 | # options.add_argument('--disable-gpu')
9 |
10 | # options.add_argument('--disable-dev-shm-usage')
11 | # options.add_argulsment("--no-sandbox")
12 | #DRIVER_PATH = './chromedriver'
13 | # DRIVER_PATH = '/usr/lib/chromium-browser/chromedriver'
14 | DRIVER_PATH = './geckodriver'
15 | driver = webdriver.Firefox(executable_path=DRIVER_PATH, options=options)
16 |
17 | import logging
18 |
19 | from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackQueryHandler
20 | from telegram import InlineKeyboardButton,InlineKeyboardMarkup,KeyboardButton,ReplyKeyboardMarkup
21 |
22 |
23 | import sys
24 | import urllib
25 | import re
26 | import requests
27 | import datetime
28 | from datetime import datetime
29 | from tqdm import tqdm
30 |
31 | import time
32 |
33 | # read config.ini for token
34 | import os
35 |
36 | # read config.ini for token
37 | try:
38 | from configparser import ConfigParser
39 | except ImportError:
40 | from ConfigParser import ConfigParser # ver. < 3.0
41 |
42 |
43 |
44 | # Enable logging
45 | logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
46 | level=logging.INFO)
47 |
48 | logger = logging.getLogger(__name__)
49 |
50 |
51 | # Define a few command handlers. These usually take the two arguments update and
52 | # context. Error handlers also receive the raised TelegramError object in error.
53 | def start(update, context):
54 | """Send a message when the command /start is issued."""
55 | user = update.message.from_user
56 | print(user)
57 | send = f"Hi {user.username}, started your bot. \n First name {user.first_name} \n ID:{user.id}"
58 | context.bot.send_message(chat_id=update.message.chat_id, text=send)
59 | update.message.reply_text('Hi!')
60 |
61 |
62 | def help(update, context):
63 | """Send a message when the command /help is issued."""
64 | update.message.reply_text('Help!')
65 |
66 |
67 | # def echo(update, context):
68 | # """Echo the user message."""
69 | # update.message.reply_text(update.message.text)
70 |
71 | def profile(update, context):
72 | """Send profile"""
73 | chat_id = update.message.chat_id
74 | url = update.message.text.split('?')[0]
75 |
76 | #PROFILE PICTURE
77 | profile_pic = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/][a-zA-Z0-9._]*(\/)?$', url)
78 |
79 | if profile_pic:
80 | try:
81 | get_content = driver.get(url)
82 | print("\nDownloading the profile image...")
83 | src = driver.page_source
84 | find_pp = re.search(r'profile_pic_url_hd\":\"([^\'\" >]+)', src)
85 | pp_link = find_pp.group()
86 | pp_final = re.sub('profile_pic_url_hd":"', '', pp_link)
87 | file_size_request = requests.get(pp_final.replace('\\u0026','&'), stream=True)
88 | file_size = int(file_size_request.headers['Content-Length'])
89 | block_size = 1024
90 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
91 | t = tqdm(total=file_size, unit='B',
92 | unit_scale=True, desc=filename, ascii=True)
93 | with open('./images/' + filename + '.jpg', 'wb') as f:
94 | for data in file_size_request.iter_content(block_size):
95 | t.update(len(data))
96 | f.write(data)
97 | t.close()
98 | print("Profile picture downloaded successfully")
99 | context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb'))
100 | except Exception as e:
101 | print(e)
102 |
103 | return
104 |
105 | #PHOTOS OR VIDEOS
106 | normal_pic_or_video = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]p[\/][a-zA-Z0-9._]*[\/]?$', url)
107 |
108 | if normal_pic_or_video:
109 | request_image = driver.get(url)
110 | # time.sleep(3)
111 | try: # try to accept cookie
112 | driver.find_element_by_css_selector('.bIiDR').click()
113 | n = 0
114 | except:
115 | n = 0
116 | while True: # to emulate do while with fail_condition
117 | try:
118 | src = driver.page_source
119 | check_type = re.search(r']+)', src)
120 | check_type_f = check_type.group()
121 | final = re.sub(']+)', src)
126 | # print(extract_image_link)
127 | time.sleep(1)
128 | try: # try to find image in carousel, if not
129 | if (int(n) == int(0)):
130 | image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(2) .KL4Bh img').get_attribute('src')
131 | else:
132 | image_link = driver.find_element_by_css_selector('article .vi798 li:nth-child(3) .KL4Bh img').get_attribute('src')
133 | # final = re.sub('meta property="og:image" content="', '', image_link).replace('\\u0026','&').replace('&','&')
134 | except:
135 | image_link = driver.find_element_by_css_selector('article .KL4Bh img').get_attribute('src')
136 | final = image_link
137 | _response = requests.get(final).content
138 | file_size_request = requests.get(final, stream=True)
139 | file_size = int(file_size_request.headers['Content-Length'])
140 | block_size = 1024
141 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
142 | t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True)
143 | with open('./images/' +filename + '.jpg', 'wb') as f:
144 | for data in file_size_request.iter_content(block_size):
145 | t.update(len(data))
146 | f.write(data)
147 | t.close()
148 | context.bot.send_photo(chat_id, photo=open('./images/'+filename+'.jpg','rb'))
149 | print("Image downloaded successfully")
150 |
151 | if final == "video":
152 | print("Downloading the video...")
153 | extract_video_link = re.search(r'meta property="og:video" content=[\'"]?([^\'" >]+)', src)
154 | video_link = extract_video_link.group()
155 | final = re.sub('meta property="og:video" content="', '', video_link).replace('\\u0026','&').replace('&','&')
156 | _response = requests.get(final).content
157 | file_size_request = requests.get(final, stream=True)
158 | file_size = int(file_size_request.headers['Content-Length'])
159 | block_size = 1024
160 | filename = datetime.strftime(datetime.now(), '%Y-%m-%d-%H-%M-%S')
161 | t=tqdm(total=file_size, unit='B', unit_scale=True, desc=filename, ascii=True)
162 | with open('./videos/' +filename + '.mp4', 'wb') as f:
163 | for data in file_size_request.iter_content(block_size):
164 | t.update(len(data))
165 | f.write(data)
166 | t.close()
167 | context.bot.send_video(chat_id=chat_id, video=open('./videos/' + filename + '.mp4', 'rb'))
168 | print("Video downloaded successfully")
169 |
170 | try:
171 | driver.find_element_by_css_selector('._6CZji').click()
172 | time.sleep(1)
173 | n += 1
174 | except:
175 | print('uscito')
176 | break
177 | except AttributeError:
178 | print("Unknown URL")
179 |
180 | # update.send_photo(chat_id, photo=open('path', 'rb'))
181 |
182 | #STORIES
183 | # stories = re.match(r'^(https:)[\/][\/](www\.)?instagram.com[\/]stories[\/][a-zA-Z0-9._]*[\/][a-zA-Z0-9._]*[\/]?$', url)
184 |
185 | # if stories:
186 | # request_image = driver.get(url)
187 | # # time.sleep(3)
188 | # try: # try to accept cookie
189 | # driver.find_element_by_css_selector('.bIiDR').click()
190 | # n = 0
191 | # except:
192 | # n = 0
193 | # while True: # to emulate do while with fail_condition
194 | # try:
195 |
196 |
197 |
198 |
199 |
200 | def error(update, context):
201 | """Log Errors caused by Updates."""
202 | logger.warning('Update "%s" caused error "%s"', update, context.error)
203 |
204 |
205 | def main():
206 | """Start the bot."""
207 |
208 | # instantiate
209 | config = ConfigParser()
210 | # parse existing file
211 | config.read('config.ini')
212 | #config.read(os.path.join(os.path.dirname(__file__), 'config.ini'))
213 | tokenBot = config.get('Telegram', 'tokenBot')
214 |
215 | # Create the Updater and pass it your bot's token.
216 | # Make sure to set use_context=True to use the new context based callbacks
217 | # Post version 12 this will no longer be necessary
218 | updater = Updater(tokenBot, use_context=True)
219 |
220 | # Get the dispatcher to register handlers
221 | dp = updater.dispatcher
222 |
223 | # on different commands - answer in Telegram
224 | dp.add_handler(CommandHandler("start", start))
225 | dp.add_handler(CommandHandler("help", help))
226 |
227 | # dp.add_handler(CommandHandler("profile", profile))
228 |
229 | # on noncommand i.e message - echo the message on Telegram
230 | dp.add_handler(MessageHandler(Filters.text, profile))
231 |
232 | # log all errors
233 | dp.add_error_handler(error)
234 |
235 | # Start the Bot
236 | updater.start_polling()
237 |
238 | # Run the bot until you press Ctrl-C or the process receives SIGINT,
239 | # SIGTERM or SIGABRT. This should be used most of the time, since
240 | # start_polling() is non-blocking and will stop the bot gracefully.
241 | updater.idle()
242 |
243 |
244 | if __name__ == '__main__':
245 | main()
246 |
247 |
248 |
--------------------------------------------------------------------------------