├── tweetshot ├── tests │ ├── __init__.py │ └── test_take_screenshot.py ├── webdrivers │ ├── __init__.py │ ├── chrome.py │ ├── firefox.py │ ├── set_webdriver.py │ └── driver.py ├── __init__.py ├── cli_scripts.py └── take_screenshot.py ├── examples └── screenshot.png ├── README.md ├── LICENSE ├── setup.py └── .gitignore /tweetshot/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tweetshot/webdrivers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/averdones/tweetshot/HEAD/examples/screenshot.png -------------------------------------------------------------------------------- /tweetshot/__init__.py: -------------------------------------------------------------------------------- 1 | from tweetshot.take_screenshot import ( 2 | take_screenshot, take_screenshot_as_bytes, take_screenshot_as_base64, take_screenshot_as_pil 3 | ) 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tweetshot 2 | Tweetshot is a python library to take screenshots of tweets. 3 | 4 | # Quick guide 5 | ## Installation 6 | pip install tweetshot 7 | ## Usage 8 | Open a command line, or a terminal, and run: 9 | 10 | tweetshot [TWEET-URL] 11 | 12 | For example, try the following command: 13 | 14 | tweetshot https://twitter.com/hideo_kojima_en/status/1002107372091817984 15 | 16 | You should see a new image file called `screenshot.png` in the directory from where you run the command. 17 | The image should the following one: 18 | 19 | ![](examples/screenshot.png?raw=true) 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Antonio Verdone 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /tweetshot/webdrivers/chrome.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.chrome.options import Options 3 | 4 | from tweetshot.webdrivers.driver import get_driver 5 | 6 | 7 | def get_chrome_driver(options_list=None, executable_path=None): 8 | """Loads the chromedriver. 9 | 10 | There are 3 possible ways of loading the driver: 11 | - Let webdriver_manager to load it automatically. 12 | - Load it from the PATH. 13 | - Point to the executable path of the driver. 14 | 15 | Args: 16 | options_list (list[Str], optional): Selenium driver options. 17 | executable_path (str, optional): Path to the executable drive. If set to 'chromedriver', it will read it 18 | from the PATH. 19 | 20 | Returns: 21 | callable: Selenium Chrome driver object. 22 | 23 | """ 24 | try: 25 | from webdriver_manager.chrome import ChromeDriverManager as driver_manager 26 | except Exception as e: 27 | print(e) 28 | driver_manager = None 29 | 30 | return get_driver(webdriver.Chrome, driver_manager, options_list, executable_path, Options()) 31 | -------------------------------------------------------------------------------- /tweetshot/webdrivers/firefox.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.firefox.options import Options 3 | 4 | from tweetshot.webdrivers.driver import get_driver 5 | 6 | 7 | def get_firefox_driver(options_list=None, executable_path=None): 8 | """Loads the geckodriver. 9 | 10 | There are 3 possible ways of loading the driver: 11 | - Let webdriver_manager to load it automatically. 12 | - Load it from the PATH. 13 | - Point to the executable path of the driver. 14 | 15 | Args: 16 | options_list (list[Str], optional): Selenium driver options. 17 | executable_path (str, optional): Path to the executable drive. If set to 'geckodriver', it will read it 18 | from the PATH. 19 | 20 | Returns: 21 | callable: Selenium Chrome driver object. 22 | 23 | """ 24 | try: 25 | from webdriver_manager.firefox import GeckoDriverManager as driver_manager 26 | except Exception as e: 27 | print(e) 28 | driver_manager = None 29 | 30 | return get_driver(webdriver.Firefox, driver_manager, options_list, executable_path, Options()) 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | # Long description 5 | with open('README.md', 'r') as fh: 6 | long_description = fh.read() 7 | 8 | 9 | # Requirements 10 | def get_requirements(): 11 | return [ 12 | 'selenium>=3.14', 13 | 'pillow>=7.1.2', 14 | 'webdriver-manager>=2.5.2' 15 | ] 16 | 17 | 18 | setuptools.setup( 19 | name="tweetshot", 20 | version="0.0.1", 21 | author="Antonio Verdone", 22 | author_email="averdones@gmail.com", 23 | description="Take a tweet screenshot", 24 | long_description=long_description, 25 | long_description_content_type="text/markdown", 26 | url="https://github.com/averdones/tweetshot", 27 | packages=setuptools.find_packages(), 28 | install_requires=get_requirements(), 29 | entry_points={ 30 | "console_scripts": ["tweetshot=tweetshot.cli_scripts:main"]}, 31 | classifiers=[ 32 | "Programming Language :: Python :: 3.6", 33 | "Programming Language :: Python :: 3.7", 34 | "Programming Language :: Python :: 3.8", 35 | "License :: OSI Approved :: MIT License", 36 | "Operating System :: OS Independent", 37 | ], 38 | keywords="keyframes iframes video extractor", 39 | python_requires=">=3.6" 40 | ) 41 | -------------------------------------------------------------------------------- /tweetshot/webdrivers/set_webdriver.py: -------------------------------------------------------------------------------- 1 | from tweetshot.webdrivers.chrome import get_chrome_driver 2 | from tweetshot.webdrivers.firefox import get_firefox_driver 3 | 4 | 5 | OPTIONS_LIST = [ 6 | '--headless' 7 | ] 8 | 9 | 10 | def get_driver(type='chrome', executable_path=None): 11 | """Loads a selenium driver from one of the available ones. 12 | 13 | Supported drivers: 14 | - chromedriver 15 | - geckodriver 16 | 17 | Args: 18 | type (str, optional): Type of driver to use. Either 'chrome' or 'firefox', to use chromedriver or 19 | geckodriver respectively. 20 | executable_path (str, optional): Path to the executable drive. If set to 'chromedriver', it will read it 21 | from the PATH. 22 | 23 | Returns: 24 | callable: Selenium driver. 25 | 26 | """ 27 | if type == 'chrome': 28 | driver = get_chrome_driver(options_list=OPTIONS_LIST, executable_path=executable_path) 29 | elif type == 'firefox': 30 | driver = get_firefox_driver(options_list=OPTIONS_LIST, executable_path=executable_path) 31 | else: 32 | raise ("Type must be either 'chrome' or 'firefox'.") 33 | 34 | driver.set_window_size(1920, 1080) 35 | 36 | return driver 37 | -------------------------------------------------------------------------------- /tweetshot/webdrivers/driver.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def get_driver(webdriver, driver_manager, options_list, executable_path, options): 5 | """Loads a generic driver. 6 | 7 | There are 3 possible ways of loading the driver: 8 | - Let webdriver_manager to load it automatically. 9 | - Load it from the PATH. 10 | - Point to the executable path of the driver. 11 | 12 | Args: 13 | webdriver (callable): Selenium webdriver. 14 | driver_manager (callable): Driver manager. 15 | options_list (list[Str]): Selenium driver options. 16 | executable_path (str): Path to the executable drive. If set, it will read it from the PATH. 17 | options (callable): Options for Selenium driver 18 | 19 | Returns: 20 | callable: Selenium Chrome driver object. 21 | 22 | """ 23 | if options_list is not None: 24 | for option in options_list: 25 | options.add_argument(option) 26 | 27 | if executable_path is None: 28 | print("Using automatic driver manager") 29 | return webdriver(executable_path=driver_manager(log_level=0).install(), options=options, 30 | service_log_path=os.devnull) 31 | else: 32 | try: 33 | print("Using custom driver") 34 | return webdriver(executable_path=executable_path, options=options, service_log_path=os.devnull) 35 | except: 36 | print("ERROR. Falling back to automatic driver manager") 37 | return webdriver(executable_path=driver_manager(log_level=0).install(), options=options, 38 | service_log_path=os.devnull) 39 | -------------------------------------------------------------------------------- /tweetshot/cli_scripts.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from tweetshot.take_screenshot import take_screenshot 4 | 5 | 6 | def parse_arguments(): 7 | parser = argparse.ArgumentParser(description="Takes a screenshot of a tweet") 8 | parser.add_argument('url', type=str, help="Url of the tweet to screenshot") 9 | parser.add_argument('-d', '--driver_type', type=str, default='chrome', help="Type of driver to use. Either 'chrome'" 10 | "or 'firefox'") 11 | parser.add_argument('-c', '--custom_driver', type=str, default=None, help="Path to a custom driver to use. This " 12 | "must match the --driver_type argument," 13 | "which by default is 'chrome'") 14 | parser.add_argument("-t", "--timeout", type=str, default=10, help="Number of seconds before timing out while" 15 | " taking the screenshot") 16 | parser.add_argument("--driver-from-path", dest="driver_from_path", action="store_true", help="If present, this " 17 | "option will use load the Selenium driver from the PATH, if present. The type of driver used" 18 | "is selected with the option --driver_type") 19 | parser.add_argument("-f", "--filename", type=str, help="Output file of the screenshot taken. If thi is just a name" 20 | "the file will be saved in the same directory where the program is located. Otherwise, an" 21 | "absolute path can be introduced to save the file in a different directory") 22 | 23 | return parser.parse_args() 24 | 25 | 26 | def main(): 27 | args = parse_arguments() 28 | 29 | if args.driver_from_path: 30 | if args.driver_type == 'chrome': 31 | args.custom_driver = 'chromedriver' 32 | elif args.driver_type == 'firefox': 33 | args.custom_driver = 'geckodriver' 34 | 35 | take_screenshot(args.url, args.driver_type, args.custom_driver, args.timeout, args.filename) 36 | -------------------------------------------------------------------------------- /tweetshot/tests/test_take_screenshot.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | 3 | from tweetshot import take_screenshot, take_screenshot_as_pil, take_screenshot_as_bytes, take_screenshot_as_base64 4 | 5 | 6 | def test_take_screenshot_chrome(tmp_path): 7 | url = 'https://twitter.com/hideo_kojima_en/status/1002107372091817984' 8 | im = tmp_path / "shot" 9 | im = take_screenshot(url, type_driver='chrome', output_filename=im) 10 | 11 | assert isinstance(Image.open(im), Image.Image) 12 | 13 | 14 | def test_take_screenshot_firefox(tmp_path): 15 | url = 'https://twitter.com/hideo_kojima_en/status/1002107372091817984' 16 | im = tmp_path / "shot" 17 | im = take_screenshot(url, type_driver='firefox', output_filename=im) 18 | 19 | assert isinstance(Image.open(im), Image.Image) 20 | 21 | 22 | def test_take_screenshot_as_pil_chrome(): 23 | url = 'https://twitter.com/hideo_kojima_en/status/1002107372091817984' 24 | im = take_screenshot_as_pil(url, type_driver='chrome') 25 | 26 | assert isinstance(im, Image.Image) 27 | 28 | 29 | def test_take_screenshot_as_pil_firefox(): 30 | url = 'https://twitter.com/hideo_kojima_en/status/1002107372091817984' 31 | im = take_screenshot_as_pil(url, type_driver='firefox') 32 | 33 | assert isinstance(im, Image.Image) 34 | 35 | 36 | def test_take_screenshot_as_bytes_chrome(): 37 | url = 'https://twitter.com/hideo_kojima_en/status/1002107372091817984' 38 | im = take_screenshot_as_bytes(url, type_driver='chrome') 39 | 40 | assert isinstance(im, bytes) 41 | 42 | def test_take_screenshot_as_bytes_firefox(): 43 | url = 'https://twitter.com/hideo_kojima_en/status/1002107372091817984' 44 | im = take_screenshot_as_bytes(url, type_driver='firefox') 45 | 46 | assert isinstance(im, bytes) 47 | 48 | 49 | def test_take_screenshot_as_base64_chrome(): 50 | url = 'https://twitter.com/hideo_kojima_en/status/1002107372091817984' 51 | im = take_screenshot_as_base64(url, type_driver='chrome') 52 | 53 | assert isinstance(im, bytes) 54 | 55 | 56 | def test_take_screenshot_as_base64_firefox(): 57 | url = 'https://twitter.com/hideo_kojima_en/status/1002107372091817984' 58 | im = take_screenshot_as_base64(url, type_driver='firefox') 59 | 60 | assert isinstance(im, bytes) 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python,windows,pycharm+all 3 | 4 | ### PyCharm+all ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/**/usage.statistics.xml 12 | .idea/**/dictionaries 13 | .idea/**/shelf 14 | 15 | # Sensitive or high-churn files 16 | .idea/**/dataSources/ 17 | .idea/**/dataSources.ids 18 | .idea/**/dataSources.local.xml 19 | .idea/**/sqlDataSources.xml 20 | .idea/**/dynamic.xml 21 | .idea/**/uiDesigner.xml 22 | .idea/**/dbnavigator.xml 23 | 24 | # Gradle 25 | .idea/**/gradle.xml 26 | .idea/**/libraries 27 | 28 | # Gradle and Maven with auto-import 29 | # When using Gradle or Maven with auto-import, you should exclude module files, 30 | # since they will be recreated, and may cause churn. Uncomment if using 31 | # auto-import. 32 | # .idea/modules.xml 33 | # .idea/*.iml 34 | # .idea/modules 35 | 36 | # CMake 37 | cmake-build-*/ 38 | 39 | # Mongo Explorer plugin 40 | .idea/**/mongoSettings.xml 41 | 42 | # File-based project format 43 | *.iws 44 | 45 | # IntelliJ 46 | out/ 47 | 48 | # mpeltonen/sbt-idea plugin 49 | .idea_modules/ 50 | 51 | # JIRA plugin 52 | atlassian-ide-plugin.xml 53 | 54 | # Cursive Clojure plugin 55 | .idea/replstate.xml 56 | 57 | # Crashlytics plugin (for Android Studio and IntelliJ) 58 | com_crashlytics_export_strings.xml 59 | crashlytics.properties 60 | crashlytics-build.properties 61 | fabric.properties 62 | 63 | # Editor-based Rest Client 64 | .idea/httpRequests 65 | 66 | ### PyCharm+all Patch ### 67 | # Ignores the whole .idea folder and all .iml files 68 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 69 | 70 | .idea/ 71 | 72 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 73 | 74 | *.iml 75 | modules.xml 76 | .idea/misc.xml 77 | *.ipr 78 | 79 | ### Python ### 80 | # Byte-compiled / optimized / DLL files 81 | __pycache__/ 82 | *.py[cod] 83 | *$py.class 84 | 85 | # C extensions 86 | *.so 87 | 88 | # Distribution / packaging 89 | .Python 90 | build/ 91 | develop-eggs/ 92 | dist/ 93 | downloads/ 94 | eggs/ 95 | .eggs/ 96 | lib/ 97 | lib64/ 98 | parts/ 99 | sdist/ 100 | var/ 101 | wheels/ 102 | *.egg-info/ 103 | .installed.cfg 104 | *.egg 105 | MANIFEST 106 | 107 | # PyInstaller 108 | # Usually these files are written by a python script from a template 109 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 110 | *.manifest 111 | *.spec 112 | 113 | # Installer logs 114 | pip-log.txt 115 | pip-delete-this-directory.txt 116 | 117 | # Unit test / coverage reports 118 | htmlcov/ 119 | .tox/ 120 | .coverage 121 | .coverage.* 122 | .cache 123 | nosetests.xml 124 | coverage.xml 125 | *.cover 126 | .hypothesis/ 127 | .pytest_cache/ 128 | 129 | # Translations 130 | *.mo 131 | *.pot 132 | 133 | # Django stuff: 134 | *.log 135 | local_settings.py 136 | db.sqlite3 137 | 138 | # Flask stuff: 139 | instance/ 140 | .webassets-cache 141 | 142 | # Scrapy stuff: 143 | .scrapy 144 | 145 | # Sphinx documentation 146 | docs/_build/ 147 | 148 | # PyBuilder 149 | target/ 150 | 151 | # Jupyter Notebook 152 | .ipynb_checkpoints 153 | 154 | # pyenv 155 | .python-version 156 | 157 | # celery beat schedule file 158 | celerybeat-schedule 159 | 160 | # SageMath parsed files 161 | *.sage.py 162 | 163 | # Environments 164 | .env 165 | .venv 166 | env/ 167 | venv/ 168 | ENV/ 169 | env.bak/ 170 | venv.bak/ 171 | 172 | # Spyder project settings 173 | .spyderproject 174 | .spyproject 175 | 176 | # Rope project settings 177 | .ropeproject 178 | 179 | # mkdocs documentation 180 | /site 181 | 182 | # mypy 183 | .mypy_cache/ 184 | 185 | ### Python Patch ### 186 | .venv/ 187 | 188 | ### Python.VirtualEnv Stack ### 189 | # Virtualenv 190 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 191 | [Bb]in 192 | [Ii]nclude 193 | [Ll]ib 194 | [Ll]ib64 195 | [Ll]ocal 196 | [Ss]cripts 197 | pyvenv.cfg 198 | pip-selfcheck.json 199 | 200 | ### Windows ### 201 | # Windows thumbnail cache files 202 | Thumbs.db 203 | ehthumbs.db 204 | ehthumbs_vista.db 205 | 206 | # Dump file 207 | *.stackdump 208 | 209 | # Folder config file 210 | [Dd]esktop.ini 211 | 212 | # Recycle Bin used on file shares 213 | $RECYCLE.BIN/ 214 | 215 | # Windows Installer files 216 | *.cab 217 | *.msi 218 | *.msix 219 | *.msm 220 | *.msp 221 | 222 | # Windows shortcuts 223 | *.lnk 224 | 225 | 226 | # End of https://www.gitignore.io/api/python,windows,pycharm+all -------------------------------------------------------------------------------- /tweetshot/take_screenshot.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from PIL import Image 3 | from io import BytesIO 4 | import base64 5 | from selenium.webdriver.support.ui import WebDriverWait 6 | from selenium.webdriver.support import expected_conditions as EC 7 | from selenium.webdriver.common.by import By 8 | 9 | from tweetshot.webdrivers.set_webdriver import get_driver 10 | 11 | 12 | class Tweetshot: 13 | """Class to take a screenshot of a tweet. 14 | 15 | Attributes: 16 | url (str): tweet URL. 17 | type_driver (str): Type of driver to use. Either 'chrome' or 'firefox'. 18 | executable_path (str, optional): Path to the executable drive. 19 | timeout (int, optional): Number of seconds before timing out when searching for the tweet element. 20 | 21 | """ 22 | 23 | def __init__(self, url, type_driver='chrome', executable_path=None, timeout=30): 24 | """Initializes instance of class Tweetshot.""" 25 | self.url = url 26 | self.type_driver = type_driver 27 | self.executable_path = executable_path 28 | self.timeout = timeout 29 | 30 | self.driver = None 31 | 32 | def get_tweetshot(self): 33 | """Gets a tweet screenshot. 34 | 35 | Returns: 36 | PIL image: tweet screenshot. 37 | 38 | """ 39 | self.set_up() 40 | self.go_to_url() 41 | tweet_element = self.get_tweet_element() 42 | tweet_location = self.get_tweet_location(tweet_element) 43 | self.wait_tweet_image_element() 44 | full_screenshot = self.take_full_screenshot() 45 | self.tear_down() 46 | 47 | return self.crop_tweet_screenshot(full_screenshot, tweet_location) 48 | 49 | def get_tweetshot_as_pil(self): 50 | """Returns the tweet screenshot as a PIL Image. 51 | 52 | Returns: 53 | PIL image: tweet screenshot. 54 | 55 | """ 56 | return self.get_tweetshot() 57 | 58 | def get_tweetshot_as_bytes(self): 59 | """Returns the tweet screenshot as a bytes Image. 60 | 61 | Returns: 62 | bytes: tweet screenshot. 63 | 64 | """ 65 | buffered = BytesIO() 66 | self.get_tweetshot().save(buffered, format="PNG") 67 | 68 | return buffered.getvalue() 69 | 70 | def get_tweetshot_as_base64(self): 71 | """Returns the tweet screenshot as a base64 image. 72 | 73 | Returns: 74 | base64: tweet screenshot. 75 | 76 | """ 77 | return base64.b64encode(self.get_tweetshot_as_bytes()) 78 | 79 | def set_up(self): 80 | """Sets up the Selenium driver connection.""" 81 | self.driver = get_driver(type=self.type_driver, executable_path=self.executable_path) 82 | 83 | def tear_down(self): 84 | """Tears down the Selenium driver connection.""" 85 | self.driver.quit() 86 | 87 | def go_to_url(self): 88 | """Sends the selenium driver to a specific url.""" 89 | self.driver.get(self.url) 90 | 91 | def take_full_screenshot(self): 92 | """Takes a screenshot of the full visible screen. 93 | 94 | Returns: 95 | PIL image: full screen screenshot. 96 | 97 | """ 98 | png = self.driver.get_screenshot_as_png() 99 | im = Image.open(BytesIO(png)) 100 | 101 | return im 102 | 103 | def get_tweet_element(self): 104 | """Gets the HTML element of the tweet. 105 | 106 | Returns: 107 | Tweet element. 108 | 109 | """ 110 | xpath = '/html/body/div/div/div/div[2]/main/div/div/div/div[1]/div/div/div/section/div/div/div/div[1]/div/' \ 111 | 'div/div/div/article' 112 | 113 | return WebDriverWait(self.driver, self.timeout).until(EC.visibility_of_element_located((By.XPATH, xpath))) 114 | 115 | def get_tweet_image_element(self): 116 | """Gets the HTML element of the image in the tweet, if present. 117 | 118 | Returns: 119 | Tweet element. 120 | 121 | """ 122 | # TODO: deal better with waiting for the tweet to fully load 123 | xpath = '/html/body/div/div/div/div[2]/main/div/div/div/div[1]/div/div/div/section/div/div/div/div[1]/div/' \ 124 | 'div/div/div/article/div/div[3]/div[2]/div/div/div/div/div[2]/div/div[2]/a[2]/div/div/div' 125 | 126 | return WebDriverWait(self.driver, 2).until(EC.visibility_of_element_located((By.XPATH, xpath))) 127 | 128 | def wait_tweet_image_element(self): 129 | """Waits for the image in the tweet to load, if there is one. 130 | 131 | Returns: 132 | 133 | """ 134 | # noinspection PyBroadException 135 | try: 136 | self.get_tweet_image_element() 137 | except Exception: 138 | pass 139 | 140 | @staticmethod 141 | def get_tweet_location(tweet_element): 142 | """Gets a 4-tuple defining the left, upper, right, and lower pixel coordinate of the tweet element. 143 | 144 | Args: 145 | tweet_element (WebElement instance): HTML tweet element. 146 | 147 | Returns: 148 | tuple: 4-tuple with coordinates. 149 | 150 | """ 151 | location = tweet_element.location 152 | size = tweet_element.size 153 | left = location['x'] 154 | top = location['y'] 155 | right = location['x'] + size['width'] 156 | bottom = location['y'] + size['height'] 157 | 158 | return left, top, right, bottom 159 | 160 | @staticmethod 161 | def crop_tweet_screenshot(full_screenshot, tweet_location): 162 | """Crops the full screen screenshot to get only the tweet. 163 | 164 | Args: 165 | full_screenshot (PIL Image): full screen screenshot. 166 | tweet_location (tuple); 4-tuple with coordinates. 167 | 168 | Returns: 169 | PIL image: tweet screenshot. 170 | 171 | """ 172 | return full_screenshot.crop(tweet_location) 173 | 174 | def save_tweet_screenshot(self, output_filename=None): 175 | """Saves the tweet screenshot to disk. 176 | 177 | Args: 178 | output_filename (Path or str, optional): Absolute or relative path where to save the screenshot. 179 | 180 | Returns: 181 | Path: Path to the saved file. 182 | 183 | """ 184 | if output_filename is None: 185 | output_filename = 'screenshot' 186 | 187 | tweetshot = self.get_tweetshot_as_pil() 188 | output_filename = Path(output_filename).with_suffix('.png') 189 | tweetshot.save(output_filename) 190 | 191 | return output_filename 192 | 193 | 194 | def take_screenshot(url, type_driver='chrome', executable_path=None, timeout=10, output_filename='screenshot'): 195 | """Takes a screenshot of a tweet and saves it to disk. 196 | 197 | Args: 198 | url (str): tweet URL. 199 | type_driver (str): Type of driver to use. Either 'chrome' or 'firefox'. 200 | executable_path (str, optional): Path to the executable drive. 201 | timeout (int, optional): Number of seconds before timing out when searching for the tweet element. 202 | output_filename (Path or str, optional): Absolute or relative path where to save the screenshot. 203 | 204 | Returns: 205 | Path: Path to the saved file. 206 | 207 | """ 208 | return Tweetshot(url, type_driver, executable_path, timeout).save_tweet_screenshot(output_filename) 209 | 210 | 211 | def take_screenshot_as_pil(url, type_driver='chrome', executable_path=None, timeout=10): 212 | """Returns a PIL Image with a tweet screenshot. 213 | 214 | Args: 215 | url (str): tweet URL. 216 | type_driver (str): Type of driver to use. Either 'chrome' or 'firefox'. 217 | executable_path (str, optional): Path to the executable drive. 218 | timeout (int, optional): Number of seconds before timing out when searching for the tweet element. 219 | 220 | Returns: 221 | PIL Image: Image in PIL Image format. 222 | 223 | """ 224 | return Tweetshot(url, type_driver, executable_path, timeout).get_tweetshot_as_pil() 225 | 226 | 227 | def take_screenshot_as_bytes(url, type_driver='chrome', executable_path=None, timeout=10): 228 | """Returns a bytes image with a tweet screenshot. 229 | 230 | Args: 231 | url (str): tweet URL. 232 | type_driver (str): Type of driver to use. Either 'chrome' or 'firefox'. 233 | executable_path (str, optional): Path to the executable drive. 234 | timeout (int, optional): Number of seconds before timing out when searching for the tweet element. 235 | 236 | Returns: 237 | bytes: Image in bytes format. 238 | 239 | """ 240 | return Tweetshot(url, type_driver, executable_path, timeout).get_tweetshot_as_bytes() 241 | 242 | 243 | def take_screenshot_as_base64(url, type_driver='chrome', executable_path=None, timeout=10): 244 | """Returns a base64 image with a tweet screenshot. 245 | 246 | Args: 247 | url (str): tweet URL. 248 | type_driver (str): Type of driver to use. Either 'chrome' or 'firefox'. 249 | executable_path (str, optional): Path to the executable drive. 250 | timeout (int, optional): Number of seconds before timing out when searching for the tweet element. 251 | 252 | Returns: 253 | base64: Image in base64 format. 254 | 255 | """ 256 | return Tweetshot(url, type_driver, executable_path, timeout).get_tweetshot_as_base64() 257 | --------------------------------------------------------------------------------