├── uploaders
    ├── __init__.py
    ├── tiktok.py
    ├── facebook.py
    ├── instagram.py
    ├── youtube.py
    └── twitter.py
├── scraping_manager
    ├── __init__.py
    └── automate.py
├── spreadsheet_manager
    ├── __init__.py
    ├── google_ss.py
    └── xlsx.py
├── logo.png
├── requirements.txt
├── globals.py
├── .gitignore
├── LICENSE
├── download.py
├── config.py
├── __main__.py
└── README.md


/uploaders/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scraping_manager/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/spreadsheet_manager/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/darideveloper/video-post/HEAD/logo.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | selenium==3.141.0
2 | webdriver-manager==3.4.1
3 | requests==2.25.1
4 | moviepy==1.0.3
5 | gspread==4.0.1
6 | oauth2client==4.1.3
7 | 


--------------------------------------------------------------------------------
/globals.py:
--------------------------------------------------------------------------------
1 | global scraper
2 | global current_folder
3 | global videos_path
4 | global download_folder
5 | 
6 | scraper = None
7 | current_folder = ""
8 | videos_path = ""
9 | download_folder = ""


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *__pycache__
 3 | *.json
 4 | *temp.*
 5 | temp.*
 6 | *.log*
 7 | *.log*
 8 | *.zip
 9 | *.png
10 | chrome_data
11 | downloads
12 | done
13 | videos.txt
14 | venv
15 | videos.xlsx
16 | *.mp4


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Dari Developer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/uploaders/tiktok.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import globals
 3 | 
 4 | def upload (file_path:str, title:str, description:str, tags:list): 
 5 |     """ Upload video to tiktok """
 6 | 
 7 |     print ("\tUploading video to Tiktok...")
 8 | 
 9 |     # Open page 
10 |     globals.scraper.set_page ("https://www.tiktok.com/upload?lang=en")
11 |     time.sleep (5)
12 | 
13 |     # Add id to frame
14 |     iframe_selector = "iframe"
15 |     iframe = globals.scraper.get_elem (iframe_selector)
16 |     globals.scraper.driver.execute_script("arguments[0].setAttribute('id', 'iframe');", iframe)
17 |     time.sleep (2)
18 |     globals.scraper.refresh_selenium ()
19 | 
20 |     # Swicth to internal frame
21 |     globals.scraper.switch_to_frame ("iframe")
22 |     time.sleep (2)
23 | 
24 |     # Upload file
25 |     selector_input = 'input[accept="video/*"]'
26 |     globals.scraper.send_data (selector_input, file_path)
27 |     time.sleep (10)
28 | 
29 |     # Video title and description
30 |     selector_details = 'div[aria-autocomplete="list"][role="combobox"]'
31 |     tag_text = ""
32 |     for tag in tags:
33 |         tag_text += f" #{tag}"
34 |     text_formated = f" - {description} - {tag_text}"
35 |     globals.scraper.send_data (selector_details, text_formated)
36 |     time.sleep (1)
37 | 
38 |     # Post video
39 |     selector_post = 'button.tiktok-btn-pc.tiktok-btn-pc-primary'
40 |     globals.scraper.click_js (selector_post)
41 |     time.sleep (15)


--------------------------------------------------------------------------------
/uploaders/facebook.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import globals
 3 | 
 4 | def upload (facebook_page, file_path:str, title:str, description:str, tags:list): 
 5 |     """ Upload video to facebook page """
 6 | 
 7 |     print ("\tUploading video to Facebook Page...")
 8 | 
 9 |     # Open page 
10 |     globals.scraper.set_page (facebook_page)
11 |     time.sleep (5)
12 | 
13 |     # Start new post
14 |     selector_new_post = 'div[aria-label="Create post"]'
15 |     globals.scraper.click (selector_new_post)
16 |     globals.scraper.refresh_selenium () 
17 |     time.sleep (2)
18 | 
19 | 
20 |     # Select to upload photo or video
21 |     selector_photo_video = '[aria-label="Photo/Video"][role="button"]'
22 |     globals.scraper.click (selector_photo_video)
23 |     time.sleep (2)
24 | 
25 |     # Upload file
26 |     selector_input = 'input[accept="image/*,image/heif,image/heic,video/*,video/mp4,video/x-m4v,video/x-matroska,.mkv"]'
27 |     globals.scraper.send_data (selector_input, file_path)
28 |     time.sleep (10)
29 |     globals.scraper.refresh_selenium ()
30 |     
31 |     # Video title and description
32 |     selector_details = '.k4urcfbm.l9j0dhe7.datstx6m.rq0escxv div[role="textbox"][contenteditable="true"]'
33 |     tag_text = ""
34 |     for tag in tags:
35 |         tag_text += f"\n#{tag}"
36 |     text_formated = f"{title}\n\n{description}\n{tag_text}"
37 |     globals.scraper.click_js (selector_details)
38 |     globals.scraper.send_data (selector_details, text_formated)
39 |     time.sleep (1)
40 |     globals.scraper.refresh_selenium ()
41 | 
42 |     # Post video
43 |     selector_post = 'input[type="submit"]'
44 |     globals.scraper.click_js (selector_post)
45 |     time.sleep (15)


--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import globals
 4 | import requests
 5 | 
 6 | def tiktok (tiktok_url:str, title:str):
 7 |     """Download video from tictok and save it in downloads folder
 8 | 
 9 |     Args:
10 |         globals.scraper (object): web scraping instance
11 |         tiktok_url (str): tiktok link
12 |     """
13 | 
14 |     print ("\tDownloading video...")
15 | 
16 |     # Open browser and go to snaptik
17 |     time.sleep (5)
18 |     globals.scraper.switch_to_tab (0)
19 |     globals.scraper.set_page ("https://snaptik.app/en")
20 | 
21 |     # Paste link and start download
22 |     selector_input = "#url"
23 |     selector_submit = "#submiturl"
24 |     globals.scraper.send_data(selector_input, tiktok_url)
25 |     globals.scraper.click (selector_submit)
26 |     
27 |     # Wait for video load
28 |     download_selector = 'a.abutton.is-success:nth-child(1)'
29 |     
30 |     try:
31 |         globals.scraper.wait_load(download_selector, time_out=60)
32 |     except:
33 |         print ("Error to download, video omitted")
34 |         
35 |         # Close browser and end function
36 |         globals.scraper.kill()
37 |         return None
38 | 
39 |     # get file link and extension
40 |     downlod_link = globals.scraper.get_attrib (download_selector, "href")
41 |     separator = downlod_link.rfind (".")
42 |     extension = downlod_link[separator+1:]
43 | 
44 |     # Download file
45 |     file_path = os.path.join (os.path.dirname (__file__), "downloads", f"{title}.{extension}")
46 |     mp4 (downlod_link, file_path)
47 |     return file_path
48 | 
49 | def mp4 (url:str, file_path:str):
50 |     """ Download mp4 video from link """
51 | 
52 |     res = requests.get (url)
53 |     res.raise_for_status()
54 |     with open (file_path, "wb") as file:
55 |         for chunk in res.iter_content (chunk_size=8000):
56 |             file.write (chunk)
57 |     time.sleep (5)
58 |     


--------------------------------------------------------------------------------
/uploaders/instagram.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import globals
 3 | from selenium.webdriver.common.keys import Keys
 4 | 
 5 | def upload (file_path:str, title:str, description:str, tags:list): 
 6 |     """ Upload video to instagram reels """
 7 | 
 8 |     print ("\tUploading video to Instagram Reels...")
 9 | 
10 |     # Open page 
11 |     instagram_url = "https://www.instagram.com/"
12 |     globals.scraper.set_page (instagram_url)
13 |     globals.scraper.set_page (instagram_url)
14 |     globals.scraper.send_data ("body", Keys.CONTROL + Keys.SHIFT + "r")
15 |     time.sleep (5)
16 | 
17 |     # Open reels
18 |     selector_new = "nav.NXc7H.jLuN9 .J5g42 > .XrOey:nth-child(3)"
19 |     globals.scraper.click (selector_new)
20 |     globals.scraper.refresh_selenium ()
21 |     selector_reel = '.CreationPopup.CreationPopup_show > [data-id="reel"]'
22 |     globals.scraper.click (selector_reel)
23 |     globals.scraper.refresh_selenium ()
24 |  
25 |     # Upload file
26 |     selector_input = 'input[accept="video/mp4,video/quicktime"]'
27 |     globals.scraper.send_data (selector_input, file_path)
28 |     time.sleep (10)
29 |     
30 |     # Go to video details
31 |     selector_continue = ".qF0y9.Igw0E.IwRSH.eGOV_._4EzTm.XfCBB.g6RW6"
32 |     globals.scraper.click (selector_continue)
33 |     time.sleep (2)
34 |     globals.scraper.refresh_selenium ()
35 |     globals.scraper.click (selector_continue)
36 |     time.sleep (2)
37 |     globals.scraper.refresh_selenium ()
38 |     
39 |     # Video title and description
40 |     selector_details = 'textarea[aria-label="Write a caption..."]'
41 |     tag_text = ""
42 |     for tag in tags:
43 |         tag_text += f"\n#{tag}"
44 |     text_formated = f"{title}\n\n{description}\n{tag_text}"
45 |     time.sleep (1)
46 |     globals.scraper.send_data (selector_details, text_formated)
47 | 
48 |     # Share video
49 |     selector_share = ".qF0y9.Igw0E.IwRSH.eGOV_._4EzTm.XfCBB.g6RW6"
50 |     globals.scraper.click (selector_share)
51 |     time.sleep (15)
52 | 


--------------------------------------------------------------------------------
/spreadsheet_manager/google_ss.py:
--------------------------------------------------------------------------------
 1 | #! python3
 2 | # Conect to google spreadsheets
 3 | import os
 4 | import gspread
 5 | import time
 6 | import sys
 7 | from oauth2client.service_account import ServiceAccountCredentials
 8 | 
 9 | class SS_manager (): 
10 |     """ Class to conect to google shets and upload data"""
11 | 
12 |     def __init__ (self, google_sheet_link, creds_path, sheet_name=None): 
13 |         """ Construtor of the class"""
14 | 
15 |         # Read credentials
16 |         if not os.path.isfile (creds_path):
17 |             raise FileNotFoundError ("The credential file path is not correct")
18 |         
19 |         scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
20 |         creds = ServiceAccountCredentials.from_json_keyfile_name(creds_path, scope)
21 |         client = gspread.authorize(creds)
22 | 
23 |         # Conect to google sheet
24 |         sheet = client.open_by_url(google_sheet_link)
25 | 
26 |         # Set the sheet 1 as worksheet
27 |         if sheet_name:
28 |             self.worksheet = sheet.worksheet(sheet_name)
29 |         else:
30 |             self.worksheet = sheet.sheet1
31 | 
32 |     def write_cell (self, value, row=1, column=1):
33 |         """ Write data in specific cell 
34 |         """
35 |         self.worksheet.update_cell(row, column, value)
36 | 
37 |     def write_data (self, data, row=1, column=1): 
38 |         """ Write list of data in the worksheet"""
39 |         
40 |         # check if data exist
41 |         if not data: 
42 |             print ("THERE IS NO NEW INFORMATION TO WRITE IN THE FILE.")
43 |         else:
44 |             print ("Writing information on spreadsheet...")
45 | 
46 |             # Loop for each row of data
47 |             for row_data in data: 
48 | 
49 |                 # Set the position of the next row. Omit the header
50 |                 row_index = data.index(row_data) + row
51 |                 
52 |                 for cell in row_data:
53 |                     column_index = row_data.index (cell) + column
54 | 
55 |                     # Write data in gss
56 |                     print (cell, row_index, column_index)
57 |                     self.write_cell (cell, row_index, column_index)
58 | 
59 | 
60 |     def get_data (self): 
61 |         """ Read all records of the sheet"""
62 | 
63 |         records = self.worksheet.get_all_records()
64 |         return records


--------------------------------------------------------------------------------
/uploaders/youtube.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import globals
 3 | 
 4 | def upload (file_path:str, title:str, description:str, tags:list): 
 5 |     """ Upload video to youtube shorts """
 6 | 
 7 |     print ("\tUploading video to Youtube Shorts...")
 8 | 
 9 |     # Open page 
10 |     youtube_url = "https://studio.youtube.com/"
11 |     globals.scraper.set_page (youtube_url)
12 | 
13 |     # Ignore browser warning
14 |     selector_go_shorts = "body > div > div.buttons > a.button.text-button.black-secondary:only-child"
15 |     try:
16 |         globals.scraper.click (selector_go_shorts)
17 |     except:
18 |         pass
19 |     globals.scraper.refresh_selenium()
20 | 
21 |     # Dimiss button
22 |     selector_continue = "#dismiss-button"
23 |     try:
24 |         globals.scraper.click (selector_continue)
25 |     except:
26 |         pass
27 |     globals.scraper.refresh_selenium()
28 | 
29 |     # Open upload
30 |     selector_upload = "#upload-button"
31 |     selector_upload_icon = "#upload-icon"
32 |     try:
33 |         globals.scraper.click_js (selector_upload)
34 |     except:
35 |         globals.scraper.click_js (selector_upload_icon)
36 |     globals.scraper.refresh_selenium()
37 | 
38 |     # Upload file
39 |     selector_input = 'input[type="file"]'
40 |     globals.scraper.send_data (selector_input, file_path)
41 |     globals.scraper.refresh_selenium()
42 |     time.sleep (10)
43 | 
44 |     # Video title 
45 |     # selector_description = ".input-container.title #textbox"
46 |     # globals.scraper.send_data (selector_description, title)
47 | 
48 |     # Video description 
49 |     selector_description = ".input-container.description #textbox"
50 |     globals.scraper.send_data (selector_description, description)
51 | 
52 |     # No child content
53 |     selector_no_child = "tp-yt-paper-radio-button:nth-child(2)"
54 |     globals.scraper.click (selector_no_child)
55 | 
56 |     # Open more details
57 |     selector_more = "#toggle-button"
58 |     globals.scraper.click (selector_more)
59 |     time.sleep (3)
60 |     globals.scraper.refresh_selenium()
61 | 
62 |     # Tags
63 |     selector_tags = "#tags-container #text-input"
64 |     for tag in tags:
65 |         globals.scraper.send_data (selector_tags, f"{tag}\n")
66 | 
67 |     # Next pages
68 |     selector_next = "#next-button"
69 |     globals.scraper.click (selector_next)
70 |     globals.scraper.refresh_selenium()
71 |     globals.scraper.click (selector_next)
72 |     globals.scraper.refresh_selenium()
73 |     globals.scraper.click (selector_next)
74 |     globals.scraper.refresh_selenium()
75 | 
76 |     # Public type
77 |     selector_public = 'tp-yt-paper-radio-button[name="PUBLIC"]'
78 |     globals.scraper.click (selector_public)
79 | 
80 |     # Publish video
81 |     selector_publish = "#done-button"
82 |     globals.scraper.click (selector_publish)
83 |     time.sleep (15)
84 | 


--------------------------------------------------------------------------------
/uploaders/twitter.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import download
 3 | import globals
 4 | from selenium.webdriver.common.keys import Keys
 5 | 
 6 | def convert (file_path:str): 
 7 |     """ Upload video to twitter """
 8 | 
 9 |     print ("\tConverting video for twitter...")
10 |     
11 |     # Open converter page
12 |     converter_url = "https://servicios-web.online-convert.com/es/convertir-para-twitter"
13 |     globals.scraper.set_page (converter_url)
14 | 
15 |     # Upload video
16 |     selector_input = "#fileUploadInput"
17 |     globals.scraper.send_data (selector_input, file_path)
18 | 
19 |     # Start conversion
20 |     selector_start = "button.btn.btn-lg.submit-btn.mb-0"
21 |     last_url = globals.scraper.driver.current_url
22 |     while True:
23 |         globals.scraper.click (selector_start)
24 |         time.sleep (2)
25 |         current_url = globals.scraper.driver.current_url
26 |         if current_url != last_url:
27 |             break 
28 | 
29 |     # Get download link
30 |     while True:
31 |         time.sleep (2)
32 |         selector_download = 'a.btn.btn-large.btn-download[title="Descargar tu archivo"]'
33 |         downlod_link = globals.scraper.get_attrib (selector_download, "href")
34 |         if downlod_link and downlod_link != 'https://www.online-convert.com/es':
35 |             time.sleep (10)
36 |             downlod_link = globals.scraper.get_attrib (selector_download, "href")
37 |             break
38 |         else:
39 |             continue
40 | 
41 |     # Download file
42 |     file_converted = file_path.replace(".mp4", " for twitter.mp4")
43 |     download.mp4 (downlod_link, file_converted)
44 |     return file_converted
45 | 
46 | 
47 | def upload (file_path:str, title:str, description:str, tags:list): 
48 | 
49 |     print ("\tUploading video to Twitter...")
50 | 
51 |     # Open page 
52 |     twitter_url = "https://twitter.com/home"
53 |     globals.scraper.set_page (twitter_url)
54 |     time.sleep (5)
55 |     globals.scraper.refresh_selenium ()
56 | 
57 |     # Upload file
58 |     selector_input = 'input[accept="image/jpeg,image/png,image/webp,image/gif,video/mp4,video/quicktime,video/webm"]'
59 |     globals.scraper.send_data (selector_input, file_path)
60 |     time.sleep (10)
61 |     
62 |     # Video title and description
63 |     selector_details = 'label[data-testid="tweetTextarea_0_label"] div[role="textbox"]'
64 |     tag_text = ""
65 |     for tag in tags:
66 |         tag_text += f"\n#{tag}"
67 |     text_formated = f"{title}\n\n{description}\n{tag_text}"
68 |     globals.scraper.send_data (selector_details, text_formated)
69 | 
70 |     # Post tweet
71 |     selector_share = 'div[data-testid="tweetButtonInline"]'
72 |     globals.scraper.click_js (selector_share)
73 |     
74 |     # Wait to update video
75 |     selector_placeholder = ".public-DraftEditorPlaceholder-inner"
76 |     while True:
77 |         time.sleep (2)
78 |         place_holder = globals.scraper.get_text (selector_placeholder)
79 |         if place_holder:
80 |             break
81 |         else:
82 |             continue
83 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | 
  4 | current_file = os.path.basename(__file__)
  5 | current_folder = os.path.dirname(__file__)  
  6 | default_config_path = os.path.join(current_folder, "config.json")
  7 | 
  8 | class Config ():
  9 |     def __init__ (self, config_path=default_config_path, utf8=False): 
 10 |         """Contructor of class
 11 | 
 12 |         Args:
 13 |             config_path (str/path, optional): Json file for process credentials. Defaults to config.json file.
 14 |             utf8 (bool, optional): Read or write data in utf8 format. Defaults to False.
 15 |         """
 16 |         self.config_path=config_path
 17 |         self.utf8=utf8
 18 | 
 19 |         config_exist = os.path.isfile(self.config_path)
 20 |         if not config_exist: 
 21 |             print (f"NOT FILE {self.config_path}")
 22 | 
 23 |     def get (self, credential=""): 
 24 |         """
 25 |         Get specific credential from config file
 26 |         """
 27 |         
 28 |         # Read credentials file 
 29 |         if self.utf8: 
 30 |             config_file = open(self.config_path, "r", encoding='utf-8')
 31 |         else: 
 32 |             config_file = open(self.config_path, "r")
 33 |         
 34 |         # Get specific credential
 35 |         try:
 36 |             config_data = json.loads(config_file.read())
 37 |             return (config_data[credential])
 38 |         except Exception as err: 
 39 |             # print (err)
 40 |             return ""
 41 | 
 42 |         # Close file
 43 |         config_file.close()
 44 | 
 45 |     def get_all (self): 
 46 |         """
 47 |         return all crdentials from file
 48 |         """
 49 | 
 50 |         # Read credentials file 
 51 |         if self.utf8: 
 52 |             config_file = open(self.config_path, "r", encoding='utf-8')
 53 |         else: 
 54 |             config_file = open(self.config_path, "r")
 55 |         
 56 |         # Get specific credential
 57 |         try:
 58 |             config_data = json.loads(config_file.read())
 59 |             return (config_data)
 60 |         except Exception as err: 
 61 |             print (err)
 62 |             return ""
 63 | 
 64 |         # Close file
 65 |         config_file.close()
 66 | 
 67 |     def create_config (self, credentials, rewrite=False): 
 68 |         """
 69 |         Create a config file with default credentials
 70 |         """
 71 |         
 72 |         if rewrite: 
 73 |             open_mode = "w"
 74 |         else: 
 75 |             open_mode = "a"
 76 | 
 77 |         with open (self.config_path, open_mode) as config_file:
 78 |             config_file.write(json.dumps(credentials))
 79 |             
 80 | 
 81 |     def update (self, credential="", value=""): 
 82 |         """
 83 |         Update specific credential in config file
 84 |         """
 85 |         
 86 |         with open (self.config_path, "r") as config_file: 
 87 |             config_data = json.loads(config_file.read())
 88 |             config_data[credential] = value
 89 |         
 90 |         with open (self.config_path, "w") as config_file:
 91 |             config_file.write(json.dumps(config_data))
 92 | 
 93 |     def update_all (self, credentials, values): 
 94 |         """
 95 |         Update credentials
 96 |         """
 97 |         
 98 |         for cred_config, cred_gui in credentials.items(): 
 99 |             
100 |             new_credential = values[cred_gui]
101 |             self.update (cred_config, new_credential)


--------------------------------------------------------------------------------
/spreadsheet_manager/xlsx.py:
--------------------------------------------------------------------------------
  1 | import openpyxl
  2 | from openpyxl.utils import get_column_letter
  3 | from openpyxl.styles import Font
  4 | 
  5 | class SS_manager (): 
  6 |     """Manage local spread sheets
  7 |     """
  8 |     
  9 |     def __init__(self, file_name): 
 10 |         
 11 |         self.file_name = file_name
 12 |         self.wb = openpyxl.load_workbook(self.file_name)
 13 |         self.current_sheet = None
 14 |     
 15 |     def get_sheets (self): 
 16 |         """ Return the list of sheets in the current document
 17 |         """
 18 |         
 19 |         return self.wb.sheetnames
 20 |     
 21 |         
 22 |     def clean_workbook (self): 
 23 |         """ Delete all sheets in current workbook
 24 |         """
 25 |         
 26 |         for sheet in self.wb.sheetnames: 
 27 |             sheet_obj = self.wb[sheet]
 28 |             self.wb.remove(sheet_obj)
 29 |             
 30 |     def create_get_sheet (self, sheet_name):
 31 |         """ Create a new sheet with specifici name, and set it as
 32 |         current sheet in class 
 33 |         """
 34 |         
 35 |         self.wb.create_sheet(sheet_name)
 36 |         self.set_sheet(sheet_name)
 37 |     
 38 |     def set_sheet (self, sheet_name): 
 39 |         
 40 |         self.current_sheet = self.wb[sheet_name]
 41 |     
 42 |     def save (self):
 43 |         """Save current workbook
 44 |         """
 45 |         
 46 |         self.wb.save(self.file_name)
 47 | 
 48 |     def write_cell (self, value="", row=1, column=1): 
 49 |         """ Write data in specific cell 
 50 |         """
 51 |         
 52 |         self.current_sheet.cell (row, column).value = value
 53 |     
 54 |     def write_data (self, data=[], start_row=1, start_column=1): 
 55 |         """ Write data list starting in specific cell
 56 |         """
 57 |                 
 58 |         current_row = start_row
 59 |         current_column = start_column
 60 |         
 61 |         for row in data: 
 62 |                         
 63 |             for cell_value in row: 
 64 |                                 
 65 |                 cell_obj = self.current_sheet.cell (current_row, current_column)
 66 |                 cell_obj.value = cell_value
 67 |                 
 68 |                 current_column += 1
 69 |             
 70 |             current_column = start_column
 71 |             current_row += 1
 72 |     
 73 |     def auto_width (self): 
 74 |         """ Set corect width to each coumn in the current sheet
 75 |         """
 76 |     
 77 |         for col in self.current_sheet.columns:
 78 |             max_length = 0
 79 |             column = col[0].column_letter # Get the column name
 80 |             for cell in col:
 81 |                 try: # Necessary to avoid error on empty cells
 82 |                     if len(str(cell.value)) > max_length:
 83 |                         max_length = len(str(cell.value))
 84 |                 except:
 85 |                     pass
 86 |             adjusted_width = (max_length + 2) * 1.2
 87 |             self.current_sheet.column_dimensions[column].width = adjusted_width
 88 |         
 89 |     def format_range (self, start_cell=(1,1), end_cell=(1,1), italic=False, 
 90 |                       bold=False, font_size=8): 
 91 |         
 92 |         # Create font style
 93 |         formated_font = Font(size=font_size, italic=italic, bold=bold)
 94 |         
 95 |         # Apply style
 96 |         current_row = start_cell[0]
 97 |         current_column = start_cell[1]
 98 |         
 99 |         for row in range(start_cell[0], end_cell[0] + 1): 
100 |                         
101 |             for cell_value in range(start_cell[1], end_cell[1] + 1): 
102 |                                 
103 |                 cell_obj = self.current_sheet.cell (current_row, current_column)
104 |                 cell_obj.font = formated_font
105 |                 
106 |                 current_column += 1
107 |             
108 |             current_column = 1
109 |             current_row += 1
110 |         
111 |     def get_data (self): 
112 |         """ Get all data from the current page """
113 | 
114 |         rows = self.current_sheet.max_row
115 |         columns = self.current_sheet.max_column
116 | 
117 |         data = []
118 |         for row in range(1, rows + 1):
119 | 
120 |             row_data = []
121 |             for column in range(1, columns + 1):
122 |                 cell_data = self.current_sheet.cell (row, column).value
123 |                 row_data.append (cell_data)
124 | 
125 |             data.append (row_data)
126 | 
127 |         return data


--------------------------------------------------------------------------------
/__main__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import shutil
  5 | import globals
  6 | import datetime
  7 | import download
  8 | from uploaders import instagram, twitter, youtube, facebook, tiktok
  9 | from config import Config
 10 | from moviepy.editor import VideoFileClip
 11 | from spreadsheet_manager.google_ss import SS_manager
 12 | from scraping_manager.automate import Web_scraping
 13 | 
 14 | # Get credentials
 15 | credentials = Config()
 16 | chrome_folder = credentials.get ("chrome_folder")
 17 | facebook_page = credentials.get ("facebook_page")
 18 | api_key = credentials.get ("api_key")
 19 | sheet_url = credentials.get ("sheet_url")
 20 | upload_instagram = credentials.get ("instagram")
 21 | upload_facebook = credentials.get ("facebook")
 22 | upload_twitter = credentials.get ("twitter")
 23 | upload_youtube = credentials.get ("youtube")
 24 | upload_tiktok = credentials.get ("tiktok")
 25 | 
 26 | # Global variables
 27 | globals.current_folder = os.path.dirname (__file__)
 28 | globals.download_folder = os.path.join (globals.current_folder, "downloads")
 29 | globals.chrome_folder = chrome_folder
 30 | 
 31 | 
 32 | def start_scraper ():
 33 |     """ Start selenium with user settings and save as global variable
 34 |     """
 35 | 
 36 | 
 37 |     # Start browser for install extensions
 38 |     globals.scraper = Web_scraping (headless=False, 
 39 |                                     download_folder=globals.download_folder,
 40 |                                     chrome_folder=globals.chrome_folder)   
 41 | 
 42 | def get_video_duration (file_path:str):
 43 |     """Get the duration in seconds from specific video
 44 | 
 45 |     Args:
 46 |         file_path (str): path of the video
 47 | 
 48 |     Returns:
 49 |         float: duation in seconds
 50 |     """
 51 |     clip = VideoFileClip(file_path)
 52 |     clip.close()
 53 |     return clip.duration
 54 | 
 55 | def main (): 
 56 |     """
 57 |     Download videos from tiktok and post in: 
 58 |         * facebook page
 59 |         * youtube shorts
 60 |         * instagram reels
 61 |         * twitter
 62 |         * tiktok
 63 |     """
 64 | 
 65 |     # Get data from file
 66 |     print ("connecting with google sheet...")
 67 |     ss = SS_manager(sheet_url, api_key)
 68 |     videos_data = ss.get_data()
 69 | 
 70 |     # Main loop for each video
 71 |     output_data = [[]]
 72 |     for row in videos_data:
 73 | 
 74 |         # Get data from row
 75 |         date_time_text = row["date time"]
 76 |         video_url_name = row["url or name"]
 77 |         title = row["title"]
 78 |         description = row["description"]
 79 |         tags_text = row["tags"]
 80 |         processed = row["processed"]
 81 |         uploaded_instagram = row["uploaded instagram"]
 82 |         uploaded_facebook = row["uploaded facebook"]
 83 |         uploaded_twitter = row["uploaded twitter"]
 84 |         uploaded_youtube = row["uploaded youtube"]
 85 |         uploaded_tiktok = row["uploaded tiktok"]
 86 | 
 87 |         # Tags to list
 88 |         tags = tags_text.split(",")
 89 |         
 90 |         # Format date time
 91 |         date_time = datetime.datetime.strptime(date_time_text, "%m/%d/%Y %H:%M")
 92 | 
 93 |         # Validate video link
 94 |         if not video_url_name:
 95 |             break
 96 |         else:
 97 | 
 98 |             print (f"\nCurrent video: {title}")
 99 | 
100 |             # Time validation
101 |             now = datetime.datetime.now()
102 |             if now > date_time:
103 |                 print (f"\tVideo skipped. The current time ({now}) is greater than the publication time ({date_time}).")
104 |                 output_data.append ([])
105 |                 continue
106 | 
107 |             # Validate video processed
108 |             if processed.lower().strip() == "yes":
109 |                 print ("\tVideo omitted, already processed")
110 |                 output_data.append ([])
111 |                 continue
112 |             else: 
113 | 
114 |                 start_scraper ()
115 |                 
116 |                 # Wait time
117 |                 wait_time = date_time - now
118 |                 print (f"\tWaiting for the time: {date_time}...")
119 |                 time.sleep (wait_time.total_seconds())
120 |                 
121 |                 # Default values for output uploaded in spreadsheet
122 |                 uploaded_instagram = "no"
123 |                 uploaded_facebook = "no"
124 |                 uploaded_twitter = "no"
125 |                 uploaded_youtube = "no"
126 |                 uploaded_tiktok = "no"
127 |                 processed = "yes"
128 | 
129 |                 # Download video
130 |                 if "www.tiktok.com" in video_url_name:
131 |                     # Download tiktok video
132 |                     file_path = download.tiktok (video_url_name, title)
133 |                 else:
134 |                     # Validate video path
135 |                     file_path = os.path.join (globals.current_folder, "downloads", video_url_name)
136 |                     if not os.path.isfile (file_path):
137 |                         raise FileNotFoundError (file_path)
138 | 
139 |                 duration = get_video_duration (file_path)
140 | 
141 |                 # Validate duration for youtube and instagram
142 |                 if duration <= 60:
143 |                     # Upload video to youtube
144 |                     if upload_youtube:
145 |                         youtube.upload (file_path, title, description, tags)
146 |                         uploaded_youtube = "yes"
147 | 
148 |                     # Upload video to instagram
149 |                     if upload_instagram:
150 |                         instagram.upload (file_path, title, description, tags)
151 |                         uploaded_instagram = "yes"
152 | 
153 |                     # Upload video to tiktok
154 |                     if upload_tiktok:
155 |                         tiktok.upload (file_path, title, description, tags)
156 |                         uploaded_tiktok = "yes"
157 |                 else:
158 |                     print ("\tYoutube, Instagram and Tiktok: video skipped (60 sec it's max time for this pages)")
159 | 
160 |                 # Validate duration for twitter
161 |                 if duration <= 140:
162 |                     if upload_twitter: 
163 |                         # Convert video
164 |                         file_converted = twitter.convert (file_path)
165 |                         globals.scraper.kill ()
166 |                         start_scraper ()
167 | 
168 |                         # Upload video to twitter
169 |                         twitter.upload (file_converted, title, description, tags)
170 |                         uploaded_twitter = "yes"
171 | 
172 |                         # Move twitter file to done folder
173 |                         shutil.move (file_converted, file_converted.replace("downloads", "done"))
174 | 
175 |                 else:
176 |                     print ("\tTwitter: video skipped (2:20 min it's max time for twitter)")
177 |                 
178 |                 # Post in faebook page without time validation
179 |                 if upload_facebook:
180 |                     facebook.upload (facebook_page, file_path, title, description, tags)
181 |                     uploaded_facebook = "yes"
182 | 
183 |                 # End browser
184 |                 globals.scraper.kill()
185 | 
186 |                 # Move file to done folder
187 |                 os.replace (file_path, file_path.replace("downloads", "done"))
188 | 
189 |         # Add row to output data
190 |         output_data.append ([
191 |             date_time_text,
192 |             video_url_name, 
193 |             title, 
194 |             description, 
195 |             tags_text, 
196 |             processed, 
197 |             uploaded_instagram, 
198 |             uploaded_facebook, 
199 |             uploaded_twitter, 
200 |             uploaded_youtube,
201 |             uploaded_tiktok
202 |         ])
203 | 
204 |     # End browser
205 |     try:
206 |         globals.scraper.kill()
207 |     except:
208 |         pass
209 | 
210 |     # Update data in sheet
211 |     ss.worksheet.update (output_data)
212 |     print ("Done")
213 | 
214 | 
215 | if __name__ == "__main__":
216 |     main()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div><a href='https://github.com/darideveloper/video-post/blob/master/LICENSE' target='_blank'>
  2 |             <img src='https://img.shields.io/github/license/darideveloper/video-post.svg?style=for-the-badge' alt='MIT License' height='30px'/>
  3 |         </a><a href='https://www.linkedin.com/in/francisco-dari-hernandez-6456b6181/' target='_blank'>
  4 |                 <img src='https://img.shields.io/static/v1?style=for-the-badge&message=LinkedIn&color=0A66C2&logo=LinkedIn&logoColor=FFFFFF&label=' alt='Linkedin' height='30px'/>
  5 |             </a><a href='https://t.me/darideveloper' target='_blank'>
  6 |                 <img src='https://img.shields.io/static/v1?style=for-the-badge&message=Telegram&color=26A5E4&logo=Telegram&logoColor=FFFFFF&label=' alt='Telegram' height='30px'/>
  7 |             </a><a href='https://github.com/darideveloper' target='_blank'>
  8 |                 <img src='https://img.shields.io/static/v1?style=for-the-badge&message=GitHub&color=181717&logo=GitHub&logoColor=FFFFFF&label=' alt='Github' height='30px'/>
  9 |             </a><a href='https://www.fiverr.com/darideveloper?up_rollout=true' target='_blank'>
 10 |                 <img src='https://img.shields.io/static/v1?style=for-the-badge&message=Fiverr&color=222222&logo=Fiverr&logoColor=1DBF73&label=' alt='Fiverr' height='30px'/>
 11 |             </a><a href='https://discord.com/users/992019836811083826' target='_blank'>
 12 |                 <img src='https://img.shields.io/static/v1?style=for-the-badge&message=Discord&color=5865F2&logo=Discord&logoColor=FFFFFF&label=' alt='Discord' height='30px'/>
 13 |             </a><a href='mailto:darideveloper@gmail.com?subject=Hello Dari Developer' target='_blank'>
 14 |                 <img src='https://img.shields.io/static/v1?style=for-the-badge&message=Gmail&color=EA4335&logo=Gmail&logoColor=FFFFFF&label=' alt='Gmail' height='30px'/>
 15 |             </a></div><div align='center'><br><br><img src='https://github.com/darideveloper/video-post/blob/master/logo.png?raw=true' alt='Video Post' height='80px'/>
 16 | 
 17 | # Video Post
 18 | 
 19 | Download videos from tiktok and post in: 
 20 | * facebook page
 21 | * youtube shorts
 22 | * instagram reels
 23 | * twitter
 24 | * tiktok
 25 | 
 26 | Project type: **client**
 27 | 
 28 | </div><br><details>
 29 |             <summary>Table of Contents</summary>
 30 |             <ol>
 31 | <li><a href='#buildwith'>Build With</a></li>
 32 | <li><a href='#media'>Media</a></li>
 33 | <li><a href='#details'>Details</a></li>
 34 | <li><a href='#install'>Install</a></li>
 35 | <li><a href='#settings'>Settings</a></li>
 36 | <li><a href='#run'>Run</a></li></ol>
 37 |         </details><br>
 38 | 
 39 | # Build with
 40 | 
 41 | <div align='center'><a href='https://www.python.org/' target='_blank'> <img src='https://cdn.svgporn.com/logos/python.svg' alt='Python' title='Python' height='50px'/> </a><a href='https://requests.readthedocs.io/en/latest/' target='_blank'> <img src='https://requests.readthedocs.io/en/latest/_static/requests-sidebar.png' alt='Requests' title='Requests' height='50px'/> </a><a href='https://www.selenium.dev/' target='_blank'> <img src='https://cdn.svgporn.com/logos/selenium.svg' alt='Selenium' title='Selenium' height='50px'/> </a><a href='https://sheets.google.com/' target='_blank'> <img src='https://www.gstatic.com/images/branding/product/1x/sheets_2020q4_48dp.png' alt='Google Sheets' title='Google Sheets' height='50px'/> </a></div>
 42 | 
 43 | # Details
 44 | 
 45 | ## Workflow
 46 | 
 47 | 1. Checks if the current time and date match the publication date configured in the google sheet.
 48 | 
 49 | 1. Validate the videos in *downloads* folder with the name or download it from tiktok.
 50 | 
 51 | 2. Validate the duration of the video for each social network, and made the necessaries file conversions. 
 52 | 
 53 | Note: each social network have different limits of duration for the videos.
 54 | If a video is longer than the page limits, it is automatically skipped.
 55 | 
 56 | * **facebook:** 240 minutes
 57 | * **youtube shorts:** 60 seconds
 58 | * **instagram reels:** 60 seconds
 59 | * **tiktok:** 60 seconds
 60 | * **twitter:** 2:20 minutes
 61 | 
 62 | Additional, for upload the videos to twitter, they require and extra conversion with the page: https://servicios-web.online-convert.com/es/convertir-para-twitter (This step is done automatically)
 63 | 
 64 | 3. Post the video in each social network type the tilte, description and tags/keywords.
 65 | 
 66 | 4. Move the video to **done** folder.
 67 | 
 68 | After post all videos, the google sheet is updated. 
 69 | 
 70 | ## Warnings
 71 | 
 72 | ### Youtube
 73 | 
 74 | By default, youtube have a **limit of 15 uploads per day**. You can upload more videos with a manually verification. 
 75 | If you try to upload more videos without the verification, the program will raise an error.
 76 | 
 77 | ### Instagram
 78 | 
 79 | For upload reels to instagram, we use the extension **INSSIST**.
 80 | At this time, the **extension have a bug** that allow us to **post reels without limits**, but when the developers fix it, **you will need to pay for the extension** to continue using the reels (at the moment, there are no other alternatives).
 81 | 
 82 | ### Google chrome
 83 | 
 84 | While the program is running, you will **not be able to use** your Google Chrome browser
 85 | Also, make sure that when you start chrome, it **opens a new blank tab** (not the last open tabs), to avoid errors.
 86 | 
 87 | ### Files
 88 | 
 89 | The program **automatically replaces** the files in the "done" and "downloads" folders. Make sure you don't have any important videos with the same names as the spreadsheet, or you will lose it
 90 | 
 91 | ### Updates
 92 | 
 93 | This is a web automation project.
 94 | Web automation **depends** entirely on the **structure of the page**, which means that **if any social network is updated** (for example, facebook) and changes the way videos are uploaded (a structural change (html) with or without changes layout (css)), **the project will need to be updated too**.
 95 | 
 96 | # Install
 97 | 
 98 | Install all modules from pip: 
 99 | 
100 | ``` bash
101 | $ pip install -r requirements.txt
102 | ```
103 | 
104 | # Settings
105 | 
106 | ## Programs
107 | 
108 | To run the project, the following programs must be installed:: 
109 | 
110 | * [Google Chrome](https://www.google.com/intl/es/chrome) last version
111 | 
112 | ## Generate Google API Key
113 | 
114 | You can learn how to generate an API Key for google sheets, in this [tutorial](https://github.com/DariHernandez/tutorials/tree/master/generate%20google%20sheets%20api%20key)
115 | 
116 | ## Create spreadsheet
117 | 
118 | You need to create a spreadsheet in the same google account that generated your API Key.
119 | 
120 | The name of the spreadsheet does not matter, but it is recommended to use "video post" or something similar, to easily identify it
121 | 
122 | ### Columns
123 | 
124 | The names of the columns are the following:
125 | 
126 | *Note: all letters must be in lowercase and without spaces or extra characters*
127 | 
128 | * date time
129 | * file or name
130 | * title	
131 | * description
132 | * tags
133 | * processed
134 | * uploaded instagram
135 | * uploaded facebook
136 | * uploaded twitter
137 | * uploaded youtube
138 | * uploaded tiktok
139 | 
140 | *Note: more detail about the google sheet in* **How to use > Google sheet** *section*
141 | 
142 | ## Setup chrome
143 | 
144 | This project does not require users and passwords to login, instead it will use the sessions that you already have opened in your browser.
145 | 
146 | Before running the project, you need to do some extra steeps for prepare your google chrome.
147 | 
148 | 1. Login to your instagram account.
149 | 2. Login to a facebook account how have access for post in the facebook page.
150 | 3. Login to yout twitter account.
151 | 4. Login to your youtube account.
152 | 5. Install the following extensions in chrome: [AdBlock](https://chrome.google.com/webstore/detail/adblock-%E2%80%94-best-ad-blocker/gighmmpiobklfepjocnamgkkbiglidom?hl=es-419) y [Insist](https://chrome.google.com/webstore/detail/inssist-web-client-for-in/bcocdbombenodlegijagbhdjbifpiijp?hl=en-US)
153 | 
154 | ## Config file
155 | 
156 | All **configurations** are saved in a **config.json file**, so **you can create and edit it manually**
157 | 
158 | This is the content of the file (copy, paste, and replace with your datz):
159 | 
160 | ```json
161 | {
162 |  "chrome_folder": "C:\Users\{your user name}\AppData\Local\Google\Chrome\User Data",
163 |  "facebook_page": "https://www.facebook.com/your_page_name/?ref=pages_you_manage",
164 |  "api_key": "{project folder}\video post\sheets-340407-d8642222c103.json",
165 |  "sheet_url": "https://docs.google.com/spreadsheets/d/1Eh1...iw0M/edit?usp=sharing",
166 |  "instagram": false,
167 |  "facebook": false,
168 |  "twitter": true,
169 |  "youtube": false,
170 |  "tiktok": false
171 | 
172 | }
173 | ```
174 | 
175 | ### chrome_folder
176 | The path of google chrome data. By default, in windows, there it in: **C:\Users\{your user name}\AppData\Local\Google\Chrome\User Data**
177 | 
178 | ### api_key
179 | Path of your google api key in json format, generated in **Install > Generate Google API Key** section
180 | 
181 | ### sheet_url
182 | Link of the google sheet with edit permissions. Here a [tutorial](https://github.com/DariHernandez/tutorials/tree/master/share%20google%20sheet%20with%20edit%20permissions) about how to generate the link
183 | 
184 | ### facebook_page
185 | Link of your facebook page.
186 | 
187 | ### instagram
188 | Post (true) or skip (false) all the videos for instagram
189 | 
190 | ### facebook
191 | Post (true) or skip (false) all the videos for facebook
192 | 
193 | ### twitter
194 | Post (true) or skip (false) all the videos for twitter
195 | 
196 | ### youtube
197 | Post (true) or skip (false) all the videos for youtube
198 | 
199 | ### tiktok
200 | Post (true) or skip (false) all the videos for youtube
201 | 
202 | ## Google sheet
203 | 
204 | Her the details about howe to use the columns in the google sheet:
205 | 
206 | Column|description|sample
207 | |---|---|---|
208 | date time|Date and time in which the post should be published. Important: **dates must be in chronological order.** Format: mm/dd/yyyy h:m|02/05/2022 18:20
209 | url or name|tiktok video link or name of the file in the "downloads" folder (You can use any of the two and the program will detect it automatically)| https://www.tiktok.com/...8670874256902 or video1.mp4
210 | title|title of the video to post in all social networks| Dancing
211 | description|description / caption for add to the video| Hello, this is my first dancing video
212 | tags|keywords or hashtags, separated by commas| shorts,tiktok,dancing,funny
213 | processed|status of the video: processed by the program or not. By default, **no**| yes
214 | uploaded instagram|if the video is already processed, show if it have been posted in this social network. By default, **no**| no
215 | uploaded facebook|if the video is already processed, show if it have been posted in this social network. By default, **no**| yes
216 | uploaded twitter|if the video is already processed, show if it have been posted in this social network. By default, **no**| no
217 | uploaded youtube|if the video is already processed, show if it have been posted in this social network. By default, **no**| yes
218 | uploaded tiktok|if the video is already processed, show if it have been posted in this social network. By default, **no**| yes
219 | 
220 | # Run
221 | 
222 | After do the last steps, you can use the program running the **__ main__.py** or the project folder with your python 3.9 interpreter.
223 | 
224 | Only, before of each running, be sure of: 
225 | 
226 | ## Before run
227 | 
228 | ### Kill chrome processes
229 | 
230 | **Kill/end** from task manager (windows) or htop (linux), **all google chrome processes**.
231 | 
232 | Here a tutorial about how to [kill google chrome process in windows](https://github.com/DariHernandez/tutorials/tree/master/kill%20google%20chrome%20in%20windows)
233 | 
234 | ### Close google sheet
235 | 
236 | To ensure data integrity, do not edit or make changes to the spreadsheet while the program is running.
237 | 
238 | 


--------------------------------------------------------------------------------
/scraping_manager/automate.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import logging
  5 | import zipfile
  6 | from selenium import webdriver
  7 | from selenium.webdriver.common.keys import Keys
  8 | from selenium.common.exceptions import TimeoutException
  9 | import webdriver_manager
 10 | from webdriver_manager.chrome import ChromeDriverManager
 11 | from webdriver_manager.utils import ChromeType
 12 | from selenium.webdriver.common.by import By
 13 | from selenium.webdriver.support.ui import WebDriverWait
 14 | from selenium.webdriver.support import expected_conditions as EC
 15 | from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 16 | 
 17 | current_file = os.path.basename(__file__)
 18 |   
 19 | # Diable web driver manager logs 
 20 | logger_webdriver = logging.getLogger("webdriver_manager")
 21 | logger_webdriver.setLevel(logging.ERROR)
 22 | 
 23 | logger_selenium = logging.getLogger("selenium")
 24 | logger_selenium.setLevel(logging.ERROR)
 25 | 
 26 | class Web_scraping (): 
 27 |     """
 28 |     Class to manage and configure web browser
 29 |     """
 30 |     
 31 |     def __init__ (
 32 |             self, web_page="", headless=False, time_out=0, 
 33 |             proxy_server="", proxy_port="", proxy_user="", proxy_pass="", 
 34 |             chrome_folder="", user_agent=False, capabilities=False,
 35 |             download_folder="", extensions=[], incognito=False): 
 36 |         """
 37 |         Constructor of the class
 38 |         """
 39 |         
 40 |         self.basetime = 1
 41 | 
 42 |         # variables of class 
 43 |         self.__headless = headless
 44 |         self.__current_dir = os.path.dirname (__file__)
 45 |         self.__web_page = web_page
 46 |         self.__proxy_server = proxy_server
 47 |         self.__proxy_port = proxy_port
 48 |         self.__proxy_user = proxy_user
 49 |         self.__proxy_pass = proxy_pass
 50 |         self.__pluginfile = 'proxy_auth_plugin.zip'
 51 |         self.__chrome_folder = chrome_folder
 52 |         self.__user_agent = user_agent
 53 |         self.__capabilities = capabilities
 54 |         self.__download_folder = download_folder
 55 |         self.__extensions = extensions
 56 |         self.__incognito = incognito
 57 |         
 58 |         # Create and instance of the web browser 
 59 |         self.__set_browser_instance()
 60 |         
 61 |         # Get current file name
 62 |         self.current_file = os.path.basename(__file__)
 63 |         
 64 |         # Set time out 
 65 |         if time_out > 0: 
 66 |             self.driver.set_page_load_timeout(30)
 67 | 
 68 |     def __set_browser_instance (self):
 69 |         """
 70 |         Open and configure browser
 71 |         """
 72 |         
 73 |         # Disable logs
 74 |         os.environ['WDM_LOG_LEVEL'] = '0'
 75 |         os.environ['WDM_PRINT_FIRST_LINE'] = 'False'
 76 |         
 77 |         # Configure browser
 78 |         options = webdriver.ChromeOptions()
 79 |         options.add_argument('--no-sandbox')
 80 |         options.add_argument('--start-maximized')
 81 |         options.add_argument('--output=/dev/null')
 82 |         options.add_argument('--log-level=3')
 83 |         options.add_argument("--disable-notifications")
 84 |         options.add_argument("disable-infobars")
 85 | 
 86 |         # Experimentals
 87 |         options.add_experimental_option('excludeSwitches', ['enable-logging', "enable-automation"])
 88 |         options.add_experimental_option('useAutomationExtension', False)
 89 |         
 90 |         if self.__headless:        
 91 |             options.add_argument("--window-size=1920,1080")
 92 |             options.add_argument("--headless")
 93 |         
 94 |         # Set proxy without autentication
 95 |         if (self.__proxy_server and self.__proxy_port 
 96 |             and not self.__proxy_user and not self.__proxy_pass):
 97 |             
 98 |             proxy = f"{self.__proxy_server}:{self.__proxy_port}"
 99 |             options.add_argument(f"--proxy-server={proxy}")
100 |         
101 |         # Set proxy with autentification 
102 |         if (self.__proxy_server and self.__proxy_port 
103 |             and self.__proxy_user and self.__proxy_pass):
104 |             
105 |             self.__create_proxy_extesion()
106 |             options.add_extension(self.__pluginfile)
107 | 
108 |         # Set chrome folder
109 |         if self.__chrome_folder:
110 |             options.add_argument(f"--user-data-dir={self.__chrome_folder}")
111 | 
112 |         # Set default user agent
113 |         if self.__user_agent:
114 |             options.add_argument('--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36')
115 |         
116 |         if self.__capabilities:
117 |             capabilities = DesiredCapabilities.CHROME
118 |             capabilities["goog:loggingPrefs"] = {"performance": "ALL"}
119 |         else: 
120 |             capabilities = None
121 | 
122 |         if self.__download_folder:
123 |             prefs = {"download.default_directory" : f"{self.__download_folder}", 
124 |                     "download.prompt_for_download": True}
125 |             options.add_experimental_option("prefs",prefs)
126 | 
127 |         if self.__extensions:
128 |             for extension in self.__extensions:
129 |                 options.add_extension(extension)
130 | 
131 |         if self.__incognito:
132 |             options.add_argument("--incognito")
133 | 
134 |         options.add_argument("--disable-blink-features=AutomationControlled")
135 | 
136 | 
137 |         
138 |         # Set configuration to  and create instance
139 |         chromedriver = ChromeDriverManager(chrome_type=ChromeType.GOOGLE, 
140 |                                             log_level='0', 
141 |                                             print_first_line=False).install()
142 |         self.driver = webdriver.Chrome(chromedriver, 
143 |                                 options=options, 
144 |                                 service_log_path=None,
145 |                                 desired_capabilities=capabilities)
146 | 
147 |         # Clean terminal
148 |         # os.system('cls||clear')
149 |         
150 |         if self.__web_page: 
151 |             self.driver.get (self.__web_page)
152 | 
153 |             # Wait to load page
154 |             # time.sleep (self.basetime*5)
155 |             
156 |     def __create_proxy_extesion (self): 
157 |         """Create a proxy chrome extension"""
158 |         
159 |         # plugin data
160 |         manifest_json = """
161 |         {
162 |             "version": "1.0.0",
163 |             "manifest_version": 2,
164 |             "name": "Chrome Proxy",
165 |             "permissions": [
166 |                 "proxy",
167 |                 "tabs",
168 |                 "unlimitedStorage",
169 |                 "storage",
170 |                 "<all_urls>",
171 |                 "webRequest",
172 |                 "webRequestBlocking"
173 |             ],
174 |             "background": {
175 |                 "scripts": ["background.js"]
176 |             },
177 |             "minimum_chrome_version":"22.0.0"
178 |         }
179 |         """
180 | 
181 |         background_js = """
182 |         var config = {
183 |                 mode: "fixed_servers",
184 |                 rules: {
185 |                 singleProxy: {
186 |                     scheme: "http",
187 |                     host: "%s",
188 |                     port: parseInt(%s)
189 |                 },
190 |                 bypassList: ["localhost"]
191 |                 }
192 |             };
193 | 
194 |         chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
195 | 
196 |         function callbackFn(details) {
197 |             return {
198 |                 authCredentials: {
199 |                     username: "%s",
200 |                     password: "%s"
201 |                 }
202 |             };
203 |         }
204 | 
205 |         chrome.webRequest.onAuthRequired.addListener(
206 |                     callbackFn,
207 |                     {urls: ["<all_urls>"]},
208 |                     ['blocking']
209 |         );
210 |         """ % (self.__proxy_server, self.__proxy_port, self.__proxy_user, self.__proxy_pass)
211 | 
212 |         # Compress file
213 |         with zipfile.ZipFile(self.__pluginfile, 'w') as zp:
214 |             zp.writestr("manifest.json", manifest_json)
215 |             zp.writestr("background.js", background_js)
216 |     
217 |     def screenshot (self, base_name):
218 |         """
219 |         Take a sreenshot of the current browser window
220 |         """ 
221 | 
222 |         if str(base_name).endswith(".png"):
223 |             file_name = base_name
224 |         else: 
225 |             file_name = f"{base_name}.png"
226 |             
227 |         self.driver.save_screenshot(file_name)
228 |              
229 |     def get_browser (self): 
230 |         """
231 |         Return the current instance of web browser
232 |         """
233 |         
234 |         return self.driver
235 |     
236 |     
237 |     def end_browser (self): 
238 |         """
239 |         End current instance of web browser
240 |         """    
241 |         
242 |         self.driver.close()
243 |     
244 |     
245 |     def __reload_browser (self): 
246 |         """
247 |         Close the current instance of the web browser and reload in the same page
248 |         """
249 | 
250 |         self.end_browser()
251 |         self.driver = self.get_browser()
252 |         self.driver.get (self.__web_page)
253 | 
254 |     
255 |     def send_data (self, selector, data): 
256 |         """
257 |         Send data to specific input fill
258 |         """
259 |         
260 |         elem = self.driver.find_element_by_css_selector (selector)
261 |         elem.send_keys (data)
262 | 
263 |     
264 |     def click (self, selector): 
265 |         """
266 |         Send click to specific element in the page
267 |         """
268 |         
269 |         elem = self.driver.find_element_by_css_selector (selector)
270 |         elem.click()
271 |     
272 |     
273 |     def wait_load (self, selector, time_out = 10, refresh_back_tab=-1): 
274 |         """
275 |         Wait to page load an element
276 |         """
277 |         
278 |         total_time = 0
279 |         
280 |         while True: 
281 |             if total_time < time_out: 
282 |                 total_time += 1
283 |                 try: 
284 |                     elem = self.driver.find_element_by_css_selector (selector)
285 |                     elem.text
286 |                     break
287 |                 except:
288 |                     
289 |                     # Wait time or refresh page
290 |                     if refresh_back_tab != -1: 
291 |                         self.refresh_selenium(back_tab=refresh_back_tab)
292 |                     else:
293 |                         time.sleep (self.basetime)
294 |                         
295 |                     continue
296 |             else: 
297 |                 raise Exception ("Time out exeded. The element {} is not in the page".format (selector))
298 |     
299 |         
300 |     def wait_die (self, selector, time_out = 10): 
301 |         """
302 |         Wait to page vanish and element
303 |         """
304 |                 
305 |         
306 |         total_time = 0
307 |         
308 |         while True: 
309 |             if total_time < time_out: 
310 |                 total_time += 1
311 |                 try: 
312 |                     elem = self.driver.find_element_by_css_selector (selector)
313 |                     elem.text
314 |                     time.sleep(self.basetime)
315 |                     continue
316 |                 except: 
317 |                     break
318 |             else: 
319 |                 raise Exception ("Time out exeded. The element {} is until in the page".format (selector))    
320 |     
321 |     
322 |     def get_text (self, selector):
323 |         """
324 |         Return text for specific element in the page
325 |         """
326 |         
327 |         try: 
328 |             elem = self.driver.find_element_by_css_selector (selector)
329 |             return elem.text
330 |         except Exception as err: 
331 |             # print (err)
332 |             return None
333 |         
334 |     
335 |     def get_texts (self, selector):
336 |         """
337 |         Return a list of text for specific selector
338 |         """
339 |         
340 |         texts = []
341 |         
342 |         elems = self.driver.find_elements_by_css_selector (selector)
343 |         
344 |         for elem in elems:         
345 |             try: 
346 |                 texts.append(elem.text)
347 |             except:
348 |                 continue
349 |         
350 |         return texts
351 |     
352 |      
353 |     def get_attrib (self, selector, attrib_name): 
354 |         """
355 |         Return the class value from specific element in the page
356 |         """
357 |         
358 |         try: 
359 |             elem = self.driver.find_element_by_css_selector (selector)
360 |             return elem.get_attribute(attrib_name)
361 |         except:
362 |             return None
363 |         
364 |         
365 |     def get_attribs (self, selector, attrib_name, allow_duplicates=True, allow_empty=True): 
366 |         """
367 |         Return the attributes value from specific element in the page
368 |         """
369 |         
370 |         attributes = []
371 |         elems = self.driver.find_elements_by_css_selector (selector)
372 | 
373 |         for elem in elems:
374 | 
375 |             try: 
376 |                 attribute = elem.get_attribute(attrib_name)
377 |                 
378 |                 # Skip duplicates in not duplicate mode
379 |                 if not allow_duplicates and attribute in attributes: 
380 |                     continue
381 |                 
382 |                 # Skip empty results in not ampty mode
383 |                 if not allow_empty and attribute.strip() == "":
384 |                     continue
385 | 
386 |                 attributes.append(attribute)
387 | 
388 |             except: 
389 |                 continue
390 |     
391 |         return attributes
392 |         
393 |     def get_elem (self, selector):
394 |         """
395 |         Return an specific element in the page
396 |         """
397 |         
398 |         elem = self.driver.find_element_by_css_selector (selector)
399 |         return elem
400 |     
401 |     
402 |     def get_elems (self, selector):
403 |         """
404 |         Return a list of specific element in the page
405 |         """
406 |         
407 |         elems = self.driver.find_elements_by_css_selector (selector)
408 |         return elems
409 |     
410 |     
411 |     def set_page_js (self, web_page, new_tab=False): 
412 |         """Open page with js, in current or new tab
413 |         """
414 |         
415 |         self.__web_page = web_page
416 |         
417 |         if new_tab:
418 |             script = f'window.open("{web_page}");'
419 |         else: 
420 |             script = f'window.open("{web_page}").focus();'
421 |         
422 |         print (script)
423 |         
424 |         self.driver.execute_script(script)
425 |     
426 |     def set_page (self, web_page, time_out=0, break_time_out=False):
427 |         """
428 |         Update the web page in browser
429 |         """
430 |         
431 |         try:
432 |             
433 |             self.__web_page = web_page
434 |             
435 |             # Save time out when is greader than 0
436 |             if time_out > 0:  
437 |                 self.driver.set_page_load_timeout(time_out)
438 |             
439 |             self.driver.get(self.__web_page)
440 |             
441 |         # Catch error in load page
442 |         except TimeoutException: 
443 |             
444 |             # Raise error
445 |             if break_time_out: 
446 |                 raise Exception(f"Time out to load page: {web_page}")
447 |         
448 |             # Ignore error
449 |             else: 
450 |                 self.driver.execute_script("window.stop();")
451 | 
452 | 
453 |     
454 |     
455 |     def click_js (self, selector): 
456 |         """
457 |         Send click with js, for hiden elements
458 |         """
459 |         
460 |         elem = self.driver.find_element_by_css_selector (selector)
461 |         self.driver.execute_script("arguments[0].click();", elem)
462 |         
463 |     
464 |     def select_drop_dopwn (self, selector, item_index): 
465 |         """
466 |         Select specific elemet (with number) in a drop down elemet
467 |         """
468 |         
469 |         elem = self.driver.find_element_by_css_selector (selector)
470 |         
471 |         elem.click()
472 |         for _ in range(0, item_index):
473 |             time.sleep(0.1)
474 |             elem.send_keys(Keys.DOWN)
475 |         elem.send_keys(Keys.ENTER)
476 |     
477 |     
478 |     def go_bottom (self): 
479 |         """
480 |         Go to the end of the page, sending keys
481 |         """
482 |         
483 |         elem = self.driver.find_element_by_css_selector ("body")
484 |         elem.send_keys(Keys.CONTROL + Keys.END)
485 |     
486 |     
487 |     def go_top (self): 
488 |         """
489 |         Go to the start of the page, sending keys
490 |         """
491 |         
492 |         elem = self.driver.find_element_by_css_selector ("body")
493 |         elem.send_keys(Keys.CONTROL + Keys.UP)
494 |     
495 |     
496 |     def go_down (self): 
497 |         """
498 |         advance to down, in the page, sending keys
499 |         """
500 |         
501 |         elem = self.driver.find_element_by_css_selector ("body")
502 |         elem.send_keys(Keys.PAGE_DOWN)
503 |     
504 |     
505 |     def go_up (self): 
506 |         """
507 |         Return to up, in page, sending keys
508 |         """
509 |         
510 |         elem = self.driver.find_element_by_css_selector ("body")
511 |         elem.send_keys(Keys.PAGE_UP)
512 |     
513 |     
514 |     def switch_to_main_frame (self): 
515 |         """
516 |         Switch to the main contecnt of the page
517 |         """
518 |         
519 |         self.driver.switch_to_default_content()
520 |     
521 |     
522 |     def switch_to_frame (self, frame_id): 
523 |         """
524 |         Switch to iframe inside the main content
525 |         """
526 | 
527 |         self.driver.switch_to_frame(frame_id)
528 | 
529 |     
530 |     def open_tab (self): 
531 |         """
532 |         Create new empty tab in browser
533 |         """
534 | 
535 |         self.driver.execute_script("window.open('');")
536 | 
537 |     
538 |     def close_tab (self): 
539 |         """
540 |         Clase the current tab in the browser
541 |         """
542 | 
543 |         self.driver.close()
544 | 
545 |     
546 |     def switch_to_tab (self, number): 
547 |         """
548 |         Switch to specific number of tab
549 |         """
550 | 
551 |         windows = self.driver.window_handles
552 |         self.driver.switch_to.window(windows[number])
553 |     
554 |     
555 |     def refresh_selenium (self, time_units=1, back_tab=0): 
556 |         """
557 |         Refresh the selenium data, creating and closing a new tab
558 |         """
559 |         
560 |         # Open new tab and go to it
561 |         self.open_tab()
562 |         self.switch_to_tab(len(self.driver.window_handles)-1)
563 |         
564 |         # Wait time
565 |         time.sleep(self.basetime * time_units)
566 |         
567 |         # Close new tab and return to specific tab
568 |         self.close_tab()
569 |         self.switch_to_tab(back_tab)     
570 |         
571 |         # Wait time
572 |         time.sleep(self.basetime * time_units)   
573 |     
574 |     def save_page(self, file_html): 
575 |         """ Save current page in local file"""
576 |         page_html = self.driver.page_source
577 |         current_folder = os.path.dirname (__file__)
578 |         page_file = open(os.path.join (current_folder, file_html), "w")
579 |         page_file.write(page_html)
580 |         page_file.close()
581 | 
582 |     def zoom (self, percentage=50): 
583 |         """ Custom page zoom with JS"""
584 | 
585 |         script = f"document.body.style.zoom='{percentage}%'"
586 |         self.driver.execute_script (script)
587 | 
588 |     def kill (self):
589 |         """ Detect and close all tabs """
590 |         tabs = self.driver.window_handles
591 |         for _ in tabs:
592 |             self.switch_to_tab(0)
593 |             self.end_browser()
594 | 
595 |     def scroll (self, selector, scroll_x, scroll_y):
596 |         """ Scroll X or Y in specific element of the page """
597 | 
598 |         elem = self.get_elem(selector)
599 |         self.driver.execute_script("arguments[0].scrollTo(arguments[1], arguments[2])", 
600 |                                     elem, 
601 |                                     scroll_x, 
602 |                                     scroll_y) 
603 | 


--------------------------------------------------------------------------------