├── .gitignore ├── LICENSE ├── README.md ├── constants.py ├── crossposter.py ├── element_has_text_value.py ├── entry.py ├── image_size.py ├── insta.py ├── requirements.txt ├── screenshotter.py ├── skeet.py ├── tests ├── test_format_post_title.py └── test_image_size.py ├── threads.py ├── toot.py ├── tweet.py ├── update_entry.py └── webdriver.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .DS_Store 163 | .idea/ 164 | out/ 165 | secrets.py 166 | *.secret -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Molly White 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # w3igg-crossposter 2 | 3 | Automate crossposting web3isgoinggreat.com posts to social media. The tool captures a screenshot of the specified post, 4 | splits it into up to three segments if the post is long, and then posts to Twitter, Mastodon, Bluesky, Instagram, 5 | and/or Threads. 6 | 7 | ## Usage 8 | 9 | Install: Clone the repository, then run `pip install -r requirements.txt`. 10 | 11 | Example call: `crossposter.py 2023-05-01-0` 12 | 13 | Call the script with the numerical ID (`YYYY-MM-DD-INCREMENT`) of the entry to post. The script also takes several 14 | optional flags: 15 | 16 | - `--no-confirm`: Skip the confirmation step, which previews the post text and prompts you to check the screenshot(s) 17 | that will be posted 18 | - `--use-prev`: Use screenshots and post information stored from a past run 19 | - `--debug`: Show more verbose debug messages 20 | 21 | Mutually exclusive optional flags: 22 | 23 | - `--tweet`: Only post to Twitter 24 | - `--toot`: Only post to Mastodon 25 | - `--skeet`: Only post to Bluesky 26 | - `--insta`: Only post to Instagram 27 | - `--threads`: Only post to Threads 28 | 29 | ## Secrets 30 | 31 | The script requires a `secrets.py` file with the format: 32 | 33 | ``` 34 | TWITTER_API_KEY = "" 35 | TWITTER_API_KEY_SECRET = "" 36 | TWITTER_ACCESS_TOKEN = "" 37 | TWITTER_ACCESS_TOKEN_SECRET = "" 38 | 39 | MASTODON_EMAIL = "" 40 | MASTODON_PASSWORD = "" 41 | 42 | BLUESKY_USERNAME = "" 43 | BLUESKY_PASSWORD = "" 44 | 45 | INSTAGRAM_USERNAME = "" 46 | INSTAGRAM_PASSWORD = "" 47 | ``` 48 | 49 | It also requires a `mastodon.secret` file generated via [this 50 | process](https://mastodonpy.readthedocs.io/en/stable/#usage), and a `gcp-secret.secret` GCP Service Account key file for 51 | a Service Account with Cloud Functions Invoker access to the `addSocialPostIds` cloud function. 52 | 53 | Posting on Threads uses the same credentials as Instagram. -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | W3IGG_URL = "https://web3isgoinggreat.com" 2 | CLOUD_FUNCTIONS_URL = "https://us-central1-web3-334501.cloudfunctions.net" 3 | THREADS_URL = "https://www.threads.net/" 4 | USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/119.0" 5 | 6 | # Screenshot constants 7 | OUTPUT_DIR = "out" 8 | FILENAME_ROOT = "entry" 9 | SCALING_FACTOR = 4 10 | MAX_IMAGE_HEIGHT = 700 11 | MARGIN = 40 12 | 13 | # Twitter 14 | TWITTER_ALT_TEXT_LIMIT = 1000 15 | 16 | # Mastodon 17 | MASTODON_ALT_TEXT_LIMIT = 1500 18 | 19 | # Bluesky 20 | BLUESKY_BASE_URL = "https://bsky.social/xrpc" 21 | BLUESKY_ALT_TEXT_LIMIT = 1000 22 | 23 | THREADS_ALT_TEXT_LIMIT = 1000 24 | 25 | SERVICES = ["twitter", "mastodon", "bluesky", "instagram", "threads"] 26 | -------------------------------------------------------------------------------- /crossposter.py: -------------------------------------------------------------------------------- 1 | from constants import * 2 | from entry import get_entry, get_entry_details 3 | from screenshotter import get_screenshot 4 | from update_entry import update_entry_with_social_ids 5 | from webdriver import get_driver 6 | 7 | from toot import send_toot 8 | from tweet import send_tweet 9 | from skeet import send_skeet 10 | from insta import send_instagram 11 | from threads import send_threads 12 | 13 | import argparse 14 | import json 15 | import logging 16 | import os.path 17 | import re 18 | import subprocess 19 | 20 | ANSI = {"GREEN": "\033[92m", "YELLOW": "\033[93m", "ENDC": "\033[0m"} 21 | ZWSP = "\u200B" 22 | URL_REGEX = re.compile( 23 | r"[a-z](\.)[a-z]", flags=re.IGNORECASE 24 | ) # This is a naive regex in that it doesn't check if it's a legit TLD, but it should serve the purpose 25 | 26 | 27 | def cleanup(): 28 | """Clean up output directory before run, or create it if it doesn't exist.""" 29 | if os.path.exists(OUTPUT_DIR): 30 | # Erase all files in the output directory from last run 31 | for f in os.listdir(OUTPUT_DIR): 32 | os.remove(os.path.join(OUTPUT_DIR, f)) 33 | else: 34 | # Create the output directory if it's missing 35 | os.mkdir(OUTPUT_DIR) 36 | 37 | 38 | def format_post_title(post_title): 39 | title_result = post_title 40 | match = re.search(URL_REGEX, title_result) 41 | while match: 42 | title_result = ( 43 | title_result[: match.regs[1][0]] 44 | + ZWSP 45 | + title_result[match.regs[1][0] : match.regs[1][1]] 46 | + title_result[match.regs[1][1] :] 47 | ) 48 | match = re.search(URL_REGEX, title_result) 49 | return title_result 50 | 51 | 52 | def make_posts( 53 | post_text, 54 | url, 55 | num_screenshots, 56 | entry_details, 57 | tweet, 58 | toot, 59 | skeet, 60 | insta, 61 | threads, 62 | ): 63 | post_ids = {} 64 | post_text_with_url = f"{post_text}\n{url}" 65 | if tweet: 66 | post_ids["twitter"] = send_tweet(post_text, url, num_screenshots, entry_details) 67 | elif toot: 68 | post_ids["mastodon"] = send_toot( 69 | post_text_with_url, num_screenshots, entry_details 70 | ) 71 | elif skeet: 72 | post_ids["bluesky"] = send_skeet( 73 | post_text_with_url, num_screenshots, entry_details 74 | ) 75 | elif insta: 76 | post_ids["instagram"] = send_instagram( 77 | post_text_with_url, num_screenshots, entry_details 78 | ) 79 | elif threads: 80 | post_ids["threads"] = send_threads( 81 | post_text_with_url, num_screenshots, entry_details 82 | ) 83 | else: 84 | post_ids["twitter"] = send_tweet(post_text, url, num_screenshots, entry_details) 85 | post_ids["mastodon"] = send_toot( 86 | post_text_with_url, num_screenshots, entry_details 87 | ) 88 | post_ids["bluesky"] = send_skeet( 89 | post_text_with_url, num_screenshots, entry_details 90 | ) 91 | post_ids["instagram"] = send_instagram( 92 | post_text_with_url, num_screenshots, entry_details 93 | ) 94 | post_ids["threads"] = send_threads( 95 | post_text_with_url, num_screenshots, entry_details 96 | ) 97 | 98 | return post_ids 99 | 100 | 101 | def print_results(results): 102 | logger = logging.getLogger(__name__) 103 | if results["error"]: 104 | logger.error("⚠️ Posted with errors:") 105 | else: 106 | logger.info("✅ Posted without errors:") 107 | 108 | for service in SERVICES: 109 | if service in results: 110 | if results[service] == "Success": 111 | logger.info("✅ " + service) 112 | else: 113 | logger.error("⚠️" + results[service]) 114 | 115 | 116 | def crosspost( 117 | entry_id=None, 118 | no_confirm=False, 119 | use_prev=False, 120 | debug=False, 121 | tweet=False, 122 | toot=False, 123 | skeet=False, 124 | insta=False, 125 | threads=False, 126 | ): 127 | num_screenshots = None 128 | entry_details = None 129 | driver = None 130 | logger = logging.getLogger(__name__) 131 | sh = logging.StreamHandler() 132 | if debug: 133 | logger.setLevel(logging.DEBUG) 134 | sh.setLevel(logging.DEBUG) 135 | else: 136 | logger.setLevel(logging.INFO) 137 | sh.setLevel(logging.INFO) 138 | logger.addHandler(sh) 139 | 140 | if entry_id is None: 141 | print("Entry ID required.") 142 | else: 143 | try: 144 | if not use_prev: 145 | # Clear out output directory and fetch new data and screenshots 146 | cleanup() 147 | 148 | driver = get_driver() 149 | entry = get_entry(driver, entry_id) 150 | 151 | if entry is not None: 152 | screenshot_splits = get_screenshot(entry) 153 | num_screenshots = len(screenshot_splits) 154 | entry_details = get_entry_details(entry, screenshot_splits) 155 | with open( 156 | os.path.join(OUTPUT_DIR, "entry.json"), "w+" 157 | ) as json_file: 158 | json.dump( 159 | { 160 | "num_screenshots": num_screenshots, 161 | "entry_details": entry_details, 162 | }, 163 | json_file, 164 | ) 165 | else: 166 | # Use existing stored data and screenshots without fetch 167 | with open(os.path.join(OUTPUT_DIR, "entry.json"), "r") as json_file: 168 | stored = json.load(json_file) 169 | num_screenshots = stored["num_screenshots"] 170 | entry_details = stored["entry_details"] 171 | 172 | if entry_details: 173 | post_text = f"{format_post_title(entry_details['title'])}\n\n{entry_details['date']}" 174 | 175 | if no_confirm: 176 | logger.debug("Skipping confirmation step.") 177 | confirm = True 178 | else: 179 | # Open output directory to confirm images 180 | subprocess.call(["open", "-R", OUTPUT_DIR]) 181 | print("=" * 20 + "\n" + post_text + "\n" + "=" * 20 + "\n\n") 182 | confirm = input("Ready to post? [y/n] ").lower() 183 | confirm = True if confirm == "y" else False 184 | 185 | if confirm: 186 | post_ids = make_posts( 187 | post_text, 188 | entry_details["url"], 189 | num_screenshots, 190 | entry_details, 191 | tweet, 192 | toot, 193 | skeet, 194 | insta, 195 | threads, 196 | ) 197 | result = update_entry_with_social_ids(entry_id, post_ids) 198 | print_results(result) 199 | else: 200 | print("Exiting without posting.") 201 | else: 202 | print(f"Entry with ID {entry_id} not found.") 203 | finally: 204 | if driver is not None: 205 | driver.quit() 206 | 207 | 208 | if __name__ == "__main__": 209 | parser = argparse.ArgumentParser( 210 | description="Crosspost a Web3 is Going Just Great entry to social media." 211 | ) 212 | parser.add_argument("entry_id", help="ID of the W3IGG entry, in numerical format.") 213 | parser.add_argument( 214 | "--no-confirm", 215 | action="store_true", 216 | help="Send posts without prompting to confirm", 217 | ) 218 | parser.add_argument( 219 | "--use-prev", 220 | action="store_true", 221 | help="Use screenshots and post information from previous run without re-fetching", 222 | ) 223 | parser.add_argument( 224 | "--debug", action="store_true", help="Print verbose debugging information" 225 | ) 226 | 227 | # Option to only post to one of the services 228 | service_group = parser.add_mutually_exclusive_group() 229 | service_group.add_argument("--tweet", action="store_true") 230 | service_group.add_argument("--toot", action="store_true") 231 | service_group.add_argument("--skeet", action="store_true") 232 | service_group.add_argument("--insta", action="store_true") 233 | service_group.add_argument("--threads", action="store_true") 234 | args = parser.parse_args() 235 | 236 | crosspost(**vars(args)) 237 | -------------------------------------------------------------------------------- /element_has_text_value.py: -------------------------------------------------------------------------------- 1 | class element_has_text_value(object): 2 | """An expectation for checking that an element has a particular text value. 3 | 4 | locator - used to find the element 5 | returns the WebElement once it has the specified text value 6 | """ 7 | 8 | def __init__(self, locator, text_value): 9 | self.locator = locator 10 | self.text_value = text_value 11 | 12 | def __call__(self, driver): 13 | element = driver.find_element(*self.locator) 14 | if element.text == self.text_value: 15 | return element 16 | else: 17 | return False 18 | -------------------------------------------------------------------------------- /entry.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from constants import * 4 | 5 | from selenium.webdriver.common.by import By 6 | from selenium.common.exceptions import TimeoutException 7 | from selenium.webdriver.support import expected_conditions 8 | from selenium.webdriver.support.wait import WebDriverWait 9 | 10 | 11 | def get_entry(driver, entry_id): 12 | """Find the entry with the specified ID and return. 13 | 14 | Args: 15 | driver: WebDriver instance 16 | entry_id: ID of the entry to capture (either in human-readable format, or YYYY-MM-DD-INCREMENT format) 17 | 18 | Returns: 19 | WebElement corresponding to the entry, or None if the entry can't be found. 20 | """ 21 | logger = logging.getLogger(__name__) 22 | driver.get(W3IGG_URL + "/single/" + entry_id) 23 | try: 24 | WebDriverWait(driver, 10).until( 25 | expected_conditions.presence_of_element_located( 26 | (By.CLASS_NAME, "timeline-entry") 27 | ) 28 | ) 29 | except TimeoutException: 30 | logger.error( 31 | "Element with id '{}' not found or page timed out.".format(entry_id) 32 | ) 33 | else: 34 | driver.execute_script("document.body.style.zoom = '200%'") 35 | driver.execute_script( 36 | "document.querySelector('.timeline-icon').style.display = 'none'" 37 | ) 38 | entry = driver.find_element(By.CLASS_NAME, "timeline-description") 39 | return entry 40 | 41 | 42 | def get_entry_details(entry, splits): 43 | """ 44 | Get the details needed to generate the post text and alt text. 45 | 46 | Args: 47 | entry: WebElement corresponding to the entry, or None if the entry can't be found. 48 | splits: Array of split information. Each entry is an object containing the y coordinate and the number of 49 | paragraphs included in the split. 50 | 51 | Returns: 52 | Object containing title, URL, date, and entry text. Entry text is an array, with each element corresponding to 53 | the entry text visible in the screenshot segments. 54 | """ 55 | title = entry.find_element(By.TAG_NAME, "h2").text 56 | date = entry.find_element(By.CLASS_NAME, "timestamp").text 57 | url = entry.find_element( 58 | By.CSS_SELECTOR, "button[title='Permalink']" 59 | ).get_attribute("data-url") 60 | 61 | entry_text = [] 62 | entry_text_element = entry.find_element(By.CLASS_NAME, "timeline-body-text-wrapper") 63 | if len(splits) == 1: 64 | # Image isn't long enough to be segmented, don't need to match alt text to segments 65 | entry_text = [entry_text_element.text] 66 | else: 67 | # This is janky, but the first paragraph of a post is not wrapped in a
tag, so we have to get that text 68 | # somewhow. 69 | full_text = entry_text_element.text 70 | paragraphs = entry.find_elements(By.TAG_NAME, "p") 71 | second_paragraph_text = paragraphs[ 72 | 0 73 | ].text # Second paragraph is the first
tag in the entry 74 | first_paragraph_text = full_text.split(second_paragraph_text)[0] 75 | 76 | entry_text.append(first_paragraph_text + " ") 77 | paragraph_counter = 1 78 | segment_counter = 0 79 | for paragraph in paragraphs: 80 | if ( 81 | "paragraphs" in splits[segment_counter] 82 | and splits[segment_counter]["paragraphs"] == paragraph_counter 83 | ): 84 | # This segment is finished, start the next 85 | paragraph_counter = 0 86 | segment_counter += 1 87 | entry_text.append("") 88 | entry_text[-1] = entry_text[-1] + paragraph.text + " " 89 | paragraph_counter += 1 90 | 91 | details = {"title": title, "date": date, "url": url, "entry_text": entry_text} 92 | return details 93 | -------------------------------------------------------------------------------- /image_size.py: -------------------------------------------------------------------------------- 1 | from constants import MARGIN 2 | 3 | 4 | def calculate_optimal_segments(entry_height, possible_splits, num_segments): 5 | """Return an array of locations at which to split the image to achieve the target number of segments. 6 | 7 | Args: 8 | entry_height: Total height of entry in px, multiplied by scaling factor 9 | possible_splits: Possible y coordinates to split the image. Each element corresponds to a paragraph, starting 10 | at the second paragraph in the entry. 11 | num_segments: Number of segments to target in result 12 | 13 | Returns: 14 | Array of objects containing the y coordinate at which to split the image, and the number of paragraphs included 15 | in the segment (later used to align alt text with the image segments) 16 | """ 17 | segments = [] 18 | rough_height = entry_height / num_segments # Approx height of each segment 19 | target = rough_height # Y position of the next target split 20 | last = 0 21 | paragraph_count = 0 22 | for current in possible_splits: 23 | paragraph_count += 1 24 | if last < target < current: 25 | # Choose the split that's closest to the target 26 | if last != 0 and abs(target - last) < abs(target - current): 27 | segments.append({"y": last + MARGIN, "paragraphs": paragraph_count}) 28 | else: 29 | segments.append({"y": current + MARGIN, "paragraphs": paragraph_count}) 30 | if len(segments) == num_segments - 1: 31 | break 32 | paragraph_count = 0 33 | target += rough_height 34 | last = current 35 | return segments 36 | -------------------------------------------------------------------------------- /insta.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from constants import * 4 | from instagrapi import Client 5 | from PIL import Image 6 | from secrets import * 7 | 8 | 9 | def authenticate(): 10 | """ 11 | Authenticate user with Instagram username and password. Requires 2FA. 12 | Returns: 13 | Authenticated Instagram client. 14 | """ 15 | client = Client() 16 | mfa = input("Instagram 2FA code: ") 17 | client.login(INSTAGRAM_USERNAME, INSTAGRAM_PASSWORD, verification_code=mfa) 18 | return client 19 | 20 | 21 | def convert_images(num_screenshots): 22 | """ 23 | Convert screenshots to JPGs because Instagram is weirdly picky about file format. 24 | Args: 25 | num_screenshots: Number of screenshots for this post. 26 | 27 | Returns: 28 | List of screenshot paths. 29 | """ 30 | paths = [] 31 | force_aspect_ratio = ( 32 | num_screenshots > 1 33 | ) # When there are multiple images, the images will be cropped if the aspect ratios differ 34 | tallest_height = 0 35 | 36 | # Get height of tallest screenshot 37 | if force_aspect_ratio: 38 | for ind in range(num_screenshots): 39 | png_path = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png") 40 | image = Image.open(png_path) 41 | width, height = image.size 42 | if height > tallest_height: 43 | tallest_height = height 44 | 45 | for ind in range(num_screenshots): 46 | png_path = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png") 47 | jpg_path = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".jpg") 48 | image = Image.open(png_path) 49 | if force_aspect_ratio: 50 | original_width, original_height = image.size 51 | if original_height < tallest_height: 52 | canvas = Image.new( 53 | "RGBA", (original_width, tallest_height), (238, 238, 238, 255) 54 | ) # Canvas with light grey background in target size 55 | offset = 0, int(round(tallest_height - original_height) / 2) 56 | canvas.paste(image, offset) 57 | image = canvas 58 | rgb = image.convert("RGB") # Discard transparency 59 | rgb.save(jpg_path) 60 | paths.append(jpg_path) 61 | return paths 62 | 63 | 64 | def send_instagram(post_text, num_screenshots, entry_details): 65 | """ 66 | Create and send the Instagram post for this entry. 67 | 68 | Args: 69 | post_text: Text to post as the skeet contents. 70 | num_screenshots: Number of screenshots to be attached. 71 | entry_details: Object containing title, url, date, and array of entry text 72 | 73 | Returns: 74 | String containing ID of the Instagram post that was just posted, or None if the post fails. 75 | """ 76 | logger = logging.getLogger(__name__) 77 | try: 78 | images = convert_images(num_screenshots) 79 | 80 | client = authenticate() 81 | if len(images) == 1: 82 | path = images[0] 83 | media = client.photo_upload( 84 | path, 85 | post_text, 86 | extra_data={ 87 | "custom_accessibility_caption": entry_details["entry_text"][0] 88 | }, 89 | ) 90 | logger.info("Sending Instagram post (one image)") 91 | 92 | else: 93 | media = client.album_upload( 94 | images, 95 | post_text, 96 | extra_data={ 97 | "custom_accessibility_caption": entry_details["entry_text"][0] 98 | }, 99 | ) 100 | logger.info("Sending Instagram post (multiple images)") 101 | 102 | return media.code 103 | except Exception as e: 104 | print(e) 105 | return None 106 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==23.1.0 2 | beautifulsoup4==4.12.2 3 | blurhash==1.1.4 4 | cachetools==5.3.1 5 | certifi==2023.7.22 6 | chardet==5.2.0 7 | charset-normalizer==3.2.0 8 | decorator==4.4.2 9 | exceptiongroup==1.1.3 10 | future==0.18.3 11 | google-auth==2.22.0 12 | h11==0.14.0 13 | huepy==1.2.1 14 | idna==3.4 15 | imageio==2.33.0 16 | imageio-ffmpeg==0.4.9 17 | iniconfig==2.0.0 18 | instagrapi==2.0.0 19 | Mastodon.py==1.8.1 20 | mock==5.1.0 21 | moviepy==1.0.3 22 | numpy==1.26.2 23 | oauthlib==3.2.2 24 | outcome==1.2.0 25 | packaging==23.2 26 | Pillow==10.0.0 27 | pluggy==1.3.0 28 | proglog==0.1.10 29 | pyasn1==0.5.0 30 | pyasn1-modules==0.3.0 31 | pycryptodomex==3.18.0 32 | pydantic==1.10.9 33 | PySocks==1.7.1 34 | pytest==7.4.3 35 | python-dateutil==2.8.2 36 | python-magic==0.4.27 37 | pytz==2023.3.post1 38 | PyYAML==6.0.1 39 | requests==2.31.0 40 | requests-oauthlib==1.3.1 41 | requests-toolbelt==1.0.0 42 | responses==0.24.1 43 | rsa==4.9 44 | schedule==1.2.1 45 | selenium==4.16.0 46 | six==1.16.0 47 | sniffio==1.3.0 48 | sortedcontainers==2.4.0 49 | soupsieve==2.5 50 | tenacity==8.2.3 51 | tomli==2.0.1 52 | tqdm==4.66.1 53 | trio==0.22.2 54 | trio-websocket==0.10.4 55 | tweepy==4.14.0 56 | typing_extensions==4.8.0 57 | urllib3==1.26.16 58 | wsproto==1.2.0 59 | -------------------------------------------------------------------------------- /screenshotter.py: -------------------------------------------------------------------------------- 1 | from constants import * 2 | from image_size import calculate_optimal_segments 3 | 4 | from selenium.webdriver.common.by import By 5 | from selenium.common.exceptions import NoSuchElementException 6 | 7 | from PIL import Image 8 | 9 | import logging 10 | import os 11 | 12 | 13 | def get_screenshot(entry): 14 | """Load the page and capture a screenshot of the post with the specified ID. If the screenshot is overly long, split 15 | it into two or three segments at paragraph breaks. 16 | 17 | Args: 18 | entry: WebElement to capture 19 | 20 | Returns: 21 | Array with split information (y coordinate of split, and number of paragraphs included in each split). 22 | """ 23 | 24 | logger = logging.getLogger(__name__) 25 | image_bottom = None 26 | splits = [] 27 | num_segments = None 28 | if entry.size["height"] > MAX_IMAGE_HEIGHT: 29 | # This is a tall entry that we'll want to split into multiple screenshots 30 | num_segments = 3 if entry.size["height"] > (MAX_IMAGE_HEIGHT * 2) else 2 31 | 32 | # Avoid splitting through an image if there is one 33 | try: 34 | image = entry.find_element(By.CLASS_NAME, "captioned-image") 35 | if image: 36 | image_bottom = ( 37 | image.rect["y"] + image.rect["height"] - entry.location["y"] 38 | ) * SCALING_FACTOR 39 | except NoSuchElementException: 40 | pass 41 | 42 | # Get array of possible split coordinates (top of each
) 43 | paragraphs = entry.find_elements(By.TAG_NAME, "p") 44 | heights = [ 45 | (p.rect["y"] - entry.location["y"]) * SCALING_FACTOR for p in paragraphs 46 | ] 47 | if image_bottom: 48 | heights = list(filter(lambda x: x > image_bottom, heights)) 49 | 50 | # Decide which of the split possibilities to go with 51 | splits = calculate_optimal_segments( 52 | entry.size["height"] * SCALING_FACTOR, heights, num_segments 53 | ) 54 | 55 | # Grab screenshot 56 | logger.debug("Capturing screenshot") 57 | screenshot_path = os.path.join(OUTPUT_DIR, "screenshot.png") 58 | entry.screenshot(screenshot_path) 59 | 60 | with Image.open(screenshot_path) as image: 61 | entry_with_margin = Image.new( 62 | "RGB", 63 | (image.width + MARGIN * 2, image.height + MARGIN * 2), 64 | (238, 238, 238), 65 | ) 66 | entry_with_margin.paste(image, (MARGIN, MARGIN)) 67 | 68 | splits.append({"y": entry_with_margin.height}) 69 | if len(splits) > 1: 70 | logger.debug(f"Splitting screenshot into target {num_segments} segments.") 71 | last_crop = 0 72 | for ind, split in enumerate(splits): 73 | filename = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png") 74 | cp = entry_with_margin.copy() 75 | cp = cp.crop((0, last_crop, entry_with_margin.width, split["y"])) 76 | cp = cp.resize((int(cp.width * 0.75), int(cp.height * 0.75))) 77 | cp.save(filename) 78 | last_crop = split["y"] 79 | else: 80 | entry_with_margin = entry_with_margin.resize( 81 | (int(entry_with_margin.width * 0.75), int(entry_with_margin.height * 0.75)) 82 | ) 83 | entry_with_margin.save(os.path.join(OUTPUT_DIR, FILENAME_ROOT + "0.png")) 84 | 85 | os.remove(screenshot_path) # Clean up intermediate file that's not needed anymore 86 | 87 | return splits 88 | -------------------------------------------------------------------------------- /skeet.py: -------------------------------------------------------------------------------- 1 | from constants import * 2 | from PIL import Image 3 | from secrets import * 4 | from tenacity import retry, stop_after_attempt, retry_if_exception_type 5 | 6 | from datetime import datetime, timezone 7 | import logging 8 | import os 9 | import requests 10 | 11 | 12 | def authenticate(): 13 | """ 14 | Authenticate user with BlueSky identifier and password (password can be an app password). 15 | 16 | Returns: 17 | Tuple containing the JWT and DID. Note the JWT is short-lived and this script has no refresh functionality -- 18 | that would need to be added if this was a persistent process rather than a one-off call. 19 | """ 20 | resp = requests.post( 21 | BLUESKY_BASE_URL + "/com.atproto.server.createSession", 22 | json={"identifier": BLUESKY_USERNAME, "password": BLUESKY_PASSWORD}, 23 | ) 24 | resp_data = resp.json() 25 | jwt = resp_data["accessJwt"] 26 | did = resp_data["did"] 27 | return jwt, did 28 | 29 | 30 | @retry( 31 | stop=stop_after_attempt(3), 32 | retry=( 33 | retry_if_exception_type(requests.exceptions.Timeout) 34 | | retry_if_exception_type(requests.exceptions.ChunkedEncodingError) 35 | ), 36 | ) 37 | def upload_blob(ind, headers): 38 | """Try to upload an image. This is prone to errors, so retry a few times if needed. 39 | 40 | Args: 41 | ind: Index of the image to try to upload 42 | headers: HTTP headers to include in the request. 43 | 44 | Returns: 45 | Blob to send along with the post to attach the image. 46 | """ 47 | image_path = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png") 48 | with open(image_path, "rb") as image_file: 49 | image = image_file.read() 50 | resp = requests.post( 51 | BLUESKY_BASE_URL + "/com.atproto.repo.uploadBlob", 52 | data=image, 53 | headers={**headers, "Content-Type": "image/png"}, 54 | timeout=(5, 20), 55 | ) 56 | blob = resp.json().get("blob") 57 | 58 | # Get image aspect ratio 59 | with Image.open(image_path) as pil_image: 60 | width, height = pil_image.size 61 | aspect_ratio = {"width": width, "height": height} 62 | 63 | return blob, aspect_ratio 64 | 65 | 66 | def send_skeet(post_text, num_screenshots, entry_details): 67 | """ 68 | Create and send the skeet for this entry. 69 | 70 | Args: 71 | post_text: Text to post as the skeet contents. 72 | num_screenshots: Number of screenshots to be attached. 73 | entry_details: Object containing title, url, date, and array of entry text 74 | 75 | Returns: 76 | String containing ID of the skeet that was just posted, or None if the post fails. 77 | """ 78 | logger = logging.getLogger(__name__) 79 | try: 80 | (jwt, did) = authenticate() 81 | headers = {"Authorization": "Bearer " + jwt} 82 | 83 | # Upload screenshots 84 | blobs = [] 85 | ratios = [] 86 | for ind in range(num_screenshots): 87 | logger.debug(f"Uploading Bluesky image {ind}") 88 | blob, ratio = upload_blob(ind, headers) 89 | blobs.append(blob) 90 | ratios.append(ratio) 91 | 92 | iso_timestamp = datetime.now(timezone.utc).isoformat() 93 | iso_timestamp = ( 94 | iso_timestamp[:-6] + "Z" 95 | ) # bsky uses Z format, so trim off +00:00 and add Z 96 | 97 | # Hydrate screenshot images with alt text 98 | images = [] 99 | for ind, blob in enumerate(blobs): 100 | alt_text = entry_details["entry_text"][ind] 101 | if ind == 0: 102 | alt_text = entry_details["title"] + "\n" + alt_text 103 | images.append( 104 | { 105 | "image": blob, 106 | "alt": alt_text[:BLUESKY_ALT_TEXT_LIMIT], 107 | "aspectRatio": ratios[ind], 108 | } 109 | ) 110 | 111 | # Create rich text information to turn the W3IGG URL into a clickable link 112 | post_text_bytes = bytes(post_text, "utf-8") 113 | facets = [ 114 | { 115 | "features": [ 116 | { 117 | "uri": entry_details["url"], 118 | "$type": "app.bsky.richtext.facet#link", 119 | } 120 | ], 121 | "index": { 122 | "byteStart": post_text_bytes.find(bytes("https://", "utf-8")), 123 | "byteEnd": len(post_text_bytes), 124 | }, 125 | } 126 | ] 127 | 128 | post_data = { 129 | "repo": did, 130 | "collection": "app.bsky.feed.post", 131 | "record": { 132 | "$type": "app.bsky.feed.post", 133 | "text": post_text, 134 | "createdAt": iso_timestamp, 135 | "embed": {"$type": "app.bsky.embed.images", "images": images}, 136 | "facets": facets, 137 | }, 138 | } 139 | 140 | logger.info("Sending skeet") 141 | resp = requests.post( 142 | BLUESKY_BASE_URL + "/com.atproto.repo.createRecord", 143 | json=post_data, 144 | headers=headers, 145 | ) 146 | 147 | if resp.status_code != 200: 148 | logger.error(f"Failed to post skeet: {resp.status_code} {resp.text}") 149 | return None 150 | # Grab just the post ID without the full URI 151 | return resp.json()["uri"].split("/")[-1] 152 | 153 | except Exception as e: 154 | print(e) 155 | return None 156 | -------------------------------------------------------------------------------- /tests/test_format_post_title.py: -------------------------------------------------------------------------------- 1 | from crossposter import format_post_title, ZWSP 2 | 3 | 4 | def test_escape_url(): 5 | title = "Crypto.com did something" 6 | formatted = format_post_title(title) 7 | assert formatted[6] == ZWSP 8 | 9 | 10 | def test_dont_escape_number(): 11 | title = "Someone hacked for $1.5 million" 12 | formatted = format_post_title(title) 13 | assert ZWSP not in formatted 14 | 15 | 16 | def test_escape_multiple(): 17 | title = "Crypto.com something.net" 18 | formatted = format_post_title(title) 19 | assert formatted.count(ZWSP) == 2 20 | -------------------------------------------------------------------------------- /tests/test_image_size.py: -------------------------------------------------------------------------------- 1 | from image_size import * 2 | from constants import MARGIN 3 | 4 | 5 | def test_calculate_splits_for_two_segment_post(): 6 | possible_splits = [100, 200, 340, 450, 500, 600] 7 | splits = calculate_optimal_segments(700, possible_splits, 2) 8 | assert len(splits) == 1 9 | assert splits[0]["y"] == 340 + MARGIN 10 | assert splits[0]["paragraphs"] == 4 11 | 12 | 13 | def test_calculate_splits_for_three_segment_post(): 14 | possible_splits = [100, 200, 300, 400, 500, 600] 15 | splits = calculate_optimal_segments(700, possible_splits, 3) 16 | assert len(splits) == 2 17 | assert splits[0]["y"] == 200 + MARGIN 18 | assert splits[0]["paragraphs"] == 3 19 | assert splits[1]["y"] == 500 + MARGIN 20 | assert splits[1]["paragraphs"] == 2 21 | 22 | 23 | def test_calculate_splits_for_wonky_post(): 24 | # This is tall enough that it should get three segments, but there aren't enough options for places to split 25 | possible_splits = [3000] 26 | splits = calculate_optimal_segments(3200, possible_splits, 3) 27 | assert len(splits) == 1 28 | assert splits[0]["y"] == 3000 + MARGIN 29 | assert splits[0]["paragraphs"] == 1 30 | 31 | 32 | def test_calculate_splits_with_no_split_options(): 33 | splits = calculate_optimal_segments(900, [], 2) 34 | assert splits == [] 35 | -------------------------------------------------------------------------------- /threads.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from time import sleep 4 | 5 | from constants import * 6 | from element_has_text_value import element_has_text_value 7 | from webdriver import get_driver 8 | 9 | from selenium.webdriver.common.by import By 10 | from selenium.webdriver.common.action_chains import ActionChains 11 | from selenium.common.exceptions import ( 12 | TimeoutException, 13 | ) 14 | from selenium.webdriver.support import expected_conditions 15 | from selenium.webdriver.support.wait import WebDriverWait 16 | 17 | FILE_INPUT_XPATH = "//input[@type='file']" 18 | ALT_TEXT_BUTTONS_XPATH = "//div[@role='button'][.//span[text()='Alt']]" 19 | CONTENT_EDITABLE_DIV_XPATH = "//div[@contenteditable='true']" 20 | LEXICAL_TEXT_SPAN_XPATH = "//span[@data-lexical-text='true']" 21 | POST_BUTTON_XPATH = "//div[@role='button'][.//div[text()='Post']]" 22 | VIEW_BUTTON_XPATH = "//a[text()='View']" 23 | 24 | 25 | def send_threads(post_text, num_screenshots, entry_details): 26 | """ 27 | Create and send the Threads post for this entry. 28 | 29 | Args: 30 | post_text: Text to post as the tweet contents. 31 | num_screenshots: Number of screenshots to be attached. 32 | entry_details: Object containing title, url, date, and array of entry text 33 | driver: WebDriver instance 34 | 35 | Returns: 36 | String containing ID of the Threads post that was just posted, or None if the post fails. 37 | """ 38 | logger = logging.getLogger(__name__) 39 | driver = get_driver(headless=True, screenshot_resolution=False) 40 | driver.get(THREADS_URL) 41 | try: 42 | WebDriverWait(driver, 10).until( 43 | expected_conditions.presence_of_element_located( 44 | (By.XPATH, "//img[contains(@alt, 'profile picture')]") 45 | ) 46 | ) 47 | except TimeoutException: 48 | logger.error("Threads page didn't load within ten seconds.") 49 | driver.quit() 50 | 51 | else: 52 | # Open post modal 53 | create_button = driver.find_element( 54 | By.XPATH, "//div[@role='button'][.//*[name()='svg'][@aria-label='Create']]" 55 | ) 56 | create_button.click() 57 | 58 | try: 59 | WebDriverWait(driver, 10).until( 60 | expected_conditions.presence_of_element_located( 61 | (By.XPATH, POST_BUTTON_XPATH) 62 | ) 63 | ) 64 | WebDriverWait(driver, 10).until( 65 | expected_conditions.presence_of_element_located( 66 | (By.XPATH, FILE_INPUT_XPATH) 67 | ) 68 | ) 69 | except TimeoutException: 70 | logger.error("Threads post modal didn't load within ten seconds.") 71 | driver.quit() 72 | else: 73 | try: 74 | # Grab a reference to the post button to use later 75 | post_button = driver.find_element(By.XPATH, POST_BUTTON_XPATH) 76 | 77 | # Attach screenshot files 78 | for ind in range(num_screenshots): 79 | media_upload_input = driver.find_element(By.XPATH, FILE_INPUT_XPATH) 80 | # Attach image 81 | filename = os.path.abspath( 82 | os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png") 83 | ) 84 | media_upload_input.send_keys(filename) 85 | 86 | # Add alt text to the image that was just attached 87 | alt_text = entry_details["entry_text"][ind] 88 | if ind == 0: 89 | alt_text = entry_details["title"] + " " + alt_text 90 | alt_text = alt_text.replace( 91 | "\n", " " 92 | ) # Threads doesn't like newlines in alt text 93 | alt_text = alt_text[:THREADS_ALT_TEXT_LIMIT] 94 | 95 | WebDriverWait(driver, 10).until( 96 | expected_conditions.presence_of_element_located( 97 | (By.XPATH, ALT_TEXT_BUTTONS_XPATH) 98 | ) 99 | ) 100 | sleep(1) # Selenium hates me 101 | alt_text_buttons = driver.find_elements( 102 | By.XPATH, ALT_TEXT_BUTTONS_XPATH 103 | ) 104 | alt_text_buttons[-1].click() 105 | 106 | # Wait for alt text box to animate 107 | WebDriverWait(driver, 10).until( 108 | expected_conditions.presence_of_element_located( 109 | ( 110 | By.XPATH, 111 | "//*[contains(text(), 'Describe this for people')]", 112 | ) 113 | ) 114 | ) 115 | actions = ActionChains(driver) 116 | actions.send_keys(alt_text).perform() 117 | WebDriverWait(driver, 10).until( 118 | element_has_text_value( 119 | (By.XPATH, LEXICAL_TEXT_SPAN_XPATH), alt_text 120 | ) 121 | ) 122 | 123 | driver.find_element( 124 | By.XPATH, "//div[@role='button'][.//*[text()='Done']]" 125 | ).click() 126 | WebDriverWait(driver, 10).until( 127 | expected_conditions.visibility_of(post_button) 128 | ) 129 | 130 | # Enter post text 131 | driver.find_element(By.XPATH, CONTENT_EDITABLE_DIV_XPATH).click() 132 | sleep(1) 133 | actions = ActionChains(driver) 134 | actions.send_keys(post_text).perform() 135 | 136 | WebDriverWait(driver, 10).until( 137 | element_has_text_value( 138 | (By.XPATH, CONTENT_EDITABLE_DIV_XPATH), post_text 139 | ) 140 | ) 141 | 142 | # Click post button 143 | # There is a hidden post button under the modal that is disabled, so we need the additional selector here 144 | # to avoid trying to click that one 145 | post_button = driver.find_element( 146 | By.XPATH, 147 | "//div[@role='dialog']//div[@role='button'][.//div[text()='Post']]", 148 | ) 149 | post_button.click() 150 | 151 | # Wait for post to send, then get its ID 152 | WebDriverWait(driver, 20).until( 153 | expected_conditions.visibility_of_element_located( 154 | (By.XPATH, VIEW_BUTTON_XPATH) 155 | ) 156 | ) 157 | view_button = driver.find_element(By.XPATH, VIEW_BUTTON_XPATH) 158 | href = view_button.get_attribute("href") 159 | post_id = href.split("/")[-1] 160 | 161 | # Clean up and return post ID 162 | driver.quit() 163 | return post_id 164 | except Exception as e: 165 | logger.error("Something else went wrong during Threads post.") 166 | logger.error(e) 167 | driver.quit() 168 | 169 | return None 170 | -------------------------------------------------------------------------------- /toot.py: -------------------------------------------------------------------------------- 1 | from constants import * 2 | from secrets import * 3 | import logging 4 | import os 5 | 6 | from mastodon import Mastodon 7 | 8 | 9 | def authenticate(): 10 | """ 11 | Authenticate to Mastodon. 12 | 13 | Returns: 14 | Authenticated Mastodon client. 15 | """ 16 | api = Mastodon(client_id="mastodon.secret") 17 | api.log_in(MASTODON_EMAIL, MASTODON_PASSWORD, to_file="mastodon_user.secret") 18 | return api 19 | 20 | 21 | def send_toot(post_text, num_screenshots, entry_details): 22 | """ 23 | Create and send the toot for this entry. 24 | 25 | Args: 26 | post_text: Text to post as the toot contents. 27 | num_screenshots: Number of screenshots to be attached. 28 | entry_details: Object containing title, url, date, and array of entry text 29 | 30 | Returns: 31 | String containing ID of the toot that was just posted, or None if the post fails. 32 | """ 33 | logger = logging.getLogger(__name__) 34 | try: 35 | api = authenticate() 36 | 37 | # Upload screenshots 38 | media_ids = [] 39 | for ind in range(num_screenshots): 40 | logger.debug(f"Uploading Mastodon image {ind}") 41 | # Get alt text for this image 42 | alt_text = entry_details["entry_text"][ind] 43 | if ind == 0: 44 | alt_text = entry_details["title"] + "\n" + alt_text 45 | 46 | resp = api.media_post( 47 | os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png"), 48 | description=alt_text[:MASTODON_ALT_TEXT_LIMIT], 49 | focus=(0, -1), # Set focus to center top of post 50 | ) 51 | media_ids.append(resp.id) 52 | 53 | # Send tweet 54 | logger.info("Sending toot.") 55 | toot = api.status_post(post_text, media_ids=media_ids) 56 | return str(toot["id"]) 57 | 58 | except Exception as e: 59 | print(e) 60 | return None 61 | -------------------------------------------------------------------------------- /tweet.py: -------------------------------------------------------------------------------- 1 | from constants import * 2 | from secrets import * 3 | import logging 4 | import os 5 | import tweepy 6 | 7 | 8 | def authenticate(): 9 | """ 10 | Authenticate to Twitter. 11 | 12 | Returns: 13 | Tuple containing the Client (for posting tweet) and the API (for v1.1 media upload endpoint) 14 | """ 15 | client = tweepy.Client( 16 | consumer_key=TWITTER_API_KEY, 17 | consumer_secret=TWITTER_API_KEY_SECRET, 18 | access_token=TWITTER_ACCESS_TOKEN, 19 | access_token_secret=TWITTER_ACCESS_TOKEN_SECRET, 20 | ) 21 | auth = tweepy.OAuth1UserHandler( 22 | TWITTER_API_KEY, 23 | TWITTER_API_KEY_SECRET, 24 | TWITTER_ACCESS_TOKEN, 25 | TWITTER_ACCESS_TOKEN_SECRET, 26 | ) 27 | api = tweepy.API(auth) 28 | return client, api 29 | 30 | 31 | def send_tweet(post_text, url, num_screenshots, entry_details): 32 | """ 33 | Create and send the tweet for this entry. 34 | 35 | Args: 36 | post_text: Text to post as the tweet contents. 37 | url: W3IGG URL for the entry. 38 | num_screenshots: Number of screenshots to be attached. 39 | entry_details: Object containing title, url, date, and array of entry text 40 | 41 | Returns: 42 | String containing ID of the tweet that was just posted, or None if the post fails. 43 | """ 44 | logger = logging.getLogger(__name__) 45 | try: 46 | (client, api) = authenticate() 47 | 48 | # Upload screenshots 49 | media_ids = [] 50 | for ind in range(num_screenshots): 51 | logger.debug(f"Uploading Twitter image {ind}") 52 | resp = api.media_upload( 53 | os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png") 54 | ) 55 | 56 | # Add alt text to the image that was just uploaded 57 | alt_text = entry_details["entry_text"][ind] 58 | if ind == 0: 59 | alt_text = entry_details["title"] + "\n" + alt_text 60 | api.create_media_metadata(resp.media_id, alt_text[:TWITTER_ALT_TEXT_LIMIT]) 61 | 62 | media_ids.append(resp.media_id) 63 | 64 | # Send tweet 65 | logger.info("Sending tweet") 66 | tweet = client.create_tweet(text=post_text, user_auth=True, media_ids=media_ids) 67 | client.create_tweet( 68 | text=url, user_auth=True, in_reply_to_tweet_id=tweet.data["id"] 69 | ) 70 | return tweet.data["id"] 71 | except Exception as e: 72 | print(e) 73 | return None 74 | -------------------------------------------------------------------------------- /update_entry.py: -------------------------------------------------------------------------------- 1 | from constants import CLOUD_FUNCTIONS_URL 2 | from google.oauth2 import service_account 3 | from google.auth.transport.requests import AuthorizedSession, Request 4 | 5 | 6 | def update_entry_with_social_ids(entry_id, post_ids): 7 | """Add any post IDs to the W3IGG entry.""" 8 | function_url = CLOUD_FUNCTIONS_URL + "/addSocialPostIds" 9 | credentials = service_account.IDTokenCredentials.from_service_account_file( 10 | "gcp-secret.secret", 11 | target_audience=function_url, 12 | ) 13 | session = AuthorizedSession(credentials) 14 | 15 | resp = session.post( 16 | function_url, 17 | json={"entryId": entry_id, **post_ids}, 18 | ) 19 | resp.raise_for_status() 20 | return resp.json() 21 | -------------------------------------------------------------------------------- /webdriver.py: -------------------------------------------------------------------------------- 1 | from constants import * 2 | from selenium import webdriver 3 | 4 | 5 | def get_driver(headless=True, screenshot_resolution=True): 6 | """Get the driver with requisite options already set. 7 | 8 | Returns: 9 | Configured WebDriver instance. 10 | """ 11 | options = webdriver.FirefoxOptions() 12 | if headless: 13 | options.add_argument("--headless") 14 | options.profile = webdriver.FirefoxProfile( 15 | "/Users/molly/Library/Application Support/Firefox/Profiles/9e81e71e.w3igg-archiver", 16 | ) 17 | options.set_preference("general.useragent.override", USER_AGENT) 18 | if screenshot_resolution: 19 | options.set_preference("layout.css.devPixelsPerPx", str(SCALING_FACTOR)) 20 | options.set_preference("ui.textScaleFactor", 100) 21 | driver = webdriver.Firefox(options=options) 22 | if screenshot_resolution: 23 | driver.set_window_size(800, 5000) 24 | return driver 25 | --------------------------------------------------------------------------------