├── .gitignore
├── LICENSE
├── README.md
├── constants.py
├── crossposter.py
├── element_has_text_value.py
├── entry.py
├── image_size.py
├── insta.py
├── requirements.txt
├── screenshotter.py
├── skeet.py
├── tests
    ├── test_format_post_title.py
    └── test_image_size.py
├── threads.py
├── toot.py
├── tweet.py
├── update_entry.py
└── webdriver.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .DS_Store
163 | .idea/
164 | out/
165 | secrets.py
166 | *.secret


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Molly White
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # w3igg-crossposter
 2 | 
 3 | Automate crossposting web3isgoinggreat.com posts to social media. The tool captures a screenshot of the specified post,
 4 | splits it into up to three segments if the post is long, and then posts to Twitter, Mastodon, Bluesky, Instagram,
 5 | and/or Threads.
 6 | 
 7 | ## Usage
 8 | 
 9 | Install: Clone the repository, then run `pip install -r requirements.txt`.
10 | 
11 | Example call: `crossposter.py 2023-05-01-0`
12 | 
13 | Call the script with the numerical ID (`YYYY-MM-DD-INCREMENT`) of the entry to post. The script also takes several
14 | optional flags:
15 | 
16 | - `--no-confirm`: Skip the confirmation step, which previews the post text and prompts you to check the screenshot(s)
17 |   that will be posted
18 | - `--use-prev`: Use screenshots and post information stored from a past run
19 | - `--debug`: Show more verbose debug messages
20 | 
21 | Mutually exclusive optional flags:
22 | 
23 | - `--tweet`: Only post to Twitter
24 | - `--toot`: Only post to Mastodon
25 | - `--skeet`: Only post to Bluesky
26 | - `--insta`: Only post to Instagram
27 | - `--threads`: Only post to Threads
28 | 
29 | ## Secrets
30 | 
31 | The script requires a `secrets.py` file with the format:
32 | 
33 | ```
34 | TWITTER_API_KEY = ""
35 | TWITTER_API_KEY_SECRET = ""
36 | TWITTER_ACCESS_TOKEN = ""
37 | TWITTER_ACCESS_TOKEN_SECRET = ""
38 | 
39 | MASTODON_EMAIL = ""
40 | MASTODON_PASSWORD = ""
41 | 
42 | BLUESKY_USERNAME = ""
43 | BLUESKY_PASSWORD = ""
44 | 
45 | INSTAGRAM_USERNAME = ""
46 | INSTAGRAM_PASSWORD = ""
47 | ```
48 | 
49 | It also requires a `mastodon.secret` file generated via [this
50 | process](https://mastodonpy.readthedocs.io/en/stable/#usage), and a `gcp-secret.secret` GCP Service Account key file for
51 | a Service Account with Cloud Functions Invoker access to the `addSocialPostIds` cloud function.
52 | 
53 | Posting on Threads uses the same credentials as Instagram.


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
 1 | W3IGG_URL = "https://web3isgoinggreat.com"
 2 | CLOUD_FUNCTIONS_URL = "https://us-central1-web3-334501.cloudfunctions.net"
 3 | THREADS_URL = "https://www.threads.net/"
 4 | USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/119.0"
 5 | 
 6 | # Screenshot constants
 7 | OUTPUT_DIR = "out"
 8 | FILENAME_ROOT = "entry"
 9 | SCALING_FACTOR = 4
10 | MAX_IMAGE_HEIGHT = 700
11 | MARGIN = 40
12 | 
13 | # Twitter
14 | TWITTER_ALT_TEXT_LIMIT = 1000
15 | 
16 | # Mastodon
17 | MASTODON_ALT_TEXT_LIMIT = 1500
18 | 
19 | # Bluesky
20 | BLUESKY_BASE_URL = "https://bsky.social/xrpc"
21 | BLUESKY_ALT_TEXT_LIMIT = 1000
22 | 
23 | THREADS_ALT_TEXT_LIMIT = 1000
24 | 
25 | SERVICES = ["twitter", "mastodon", "bluesky", "instagram", "threads"]
26 | 


--------------------------------------------------------------------------------
/crossposter.py:
--------------------------------------------------------------------------------
  1 | from constants import *
  2 | from entry import get_entry, get_entry_details
  3 | from screenshotter import get_screenshot
  4 | from update_entry import update_entry_with_social_ids
  5 | from webdriver import get_driver
  6 | 
  7 | from toot import send_toot
  8 | from tweet import send_tweet
  9 | from skeet import send_skeet
 10 | from insta import send_instagram
 11 | from threads import send_threads
 12 | 
 13 | import argparse
 14 | import json
 15 | import logging
 16 | import os.path
 17 | import re
 18 | import subprocess
 19 | 
 20 | ANSI = {"GREEN": "\033[92m", "YELLOW": "\033[93m", "ENDC": "\033[0m"}
 21 | ZWSP = "\u200B"
 22 | URL_REGEX = re.compile(
 23 |     r"[a-z](\.)[a-z]", flags=re.IGNORECASE
 24 | )  # This is a naive regex in that it doesn't check if it's a legit TLD, but it should serve the purpose
 25 | 
 26 | 
 27 | def cleanup():
 28 |     """Clean up output directory before run, or create it if it doesn't exist."""
 29 |     if os.path.exists(OUTPUT_DIR):
 30 |         # Erase all files in the output directory from last run
 31 |         for f in os.listdir(OUTPUT_DIR):
 32 |             os.remove(os.path.join(OUTPUT_DIR, f))
 33 |     else:
 34 |         # Create the output directory if it's missing
 35 |         os.mkdir(OUTPUT_DIR)
 36 | 
 37 | 
 38 | def format_post_title(post_title):
 39 |     title_result = post_title
 40 |     match = re.search(URL_REGEX, title_result)
 41 |     while match:
 42 |         title_result = (
 43 |             title_result[: match.regs[1][0]]
 44 |             + ZWSP
 45 |             + title_result[match.regs[1][0] : match.regs[1][1]]
 46 |             + title_result[match.regs[1][1] :]
 47 |         )
 48 |         match = re.search(URL_REGEX, title_result)
 49 |     return title_result
 50 | 
 51 | 
 52 | def make_posts(
 53 |     post_text,
 54 |     url,
 55 |     num_screenshots,
 56 |     entry_details,
 57 |     tweet,
 58 |     toot,
 59 |     skeet,
 60 |     insta,
 61 |     threads,
 62 | ):
 63 |     post_ids = {}
 64 |     post_text_with_url = f"{post_text}\n{url}"
 65 |     if tweet:
 66 |         post_ids["twitter"] = send_tweet(post_text, url, num_screenshots, entry_details)
 67 |     elif toot:
 68 |         post_ids["mastodon"] = send_toot(
 69 |             post_text_with_url, num_screenshots, entry_details
 70 |         )
 71 |     elif skeet:
 72 |         post_ids["bluesky"] = send_skeet(
 73 |             post_text_with_url, num_screenshots, entry_details
 74 |         )
 75 |     elif insta:
 76 |         post_ids["instagram"] = send_instagram(
 77 |             post_text_with_url, num_screenshots, entry_details
 78 |         )
 79 |     elif threads:
 80 |         post_ids["threads"] = send_threads(
 81 |             post_text_with_url, num_screenshots, entry_details
 82 |         )
 83 |     else:
 84 |         post_ids["twitter"] = send_tweet(post_text, url, num_screenshots, entry_details)
 85 |         post_ids["mastodon"] = send_toot(
 86 |             post_text_with_url, num_screenshots, entry_details
 87 |         )
 88 |         post_ids["bluesky"] = send_skeet(
 89 |             post_text_with_url, num_screenshots, entry_details
 90 |         )
 91 |         post_ids["instagram"] = send_instagram(
 92 |             post_text_with_url, num_screenshots, entry_details
 93 |         )
 94 |         post_ids["threads"] = send_threads(
 95 |             post_text_with_url, num_screenshots, entry_details
 96 |         )
 97 | 
 98 |     return post_ids
 99 | 
100 | 
101 | def print_results(results):
102 |     logger = logging.getLogger(__name__)
103 |     if results["error"]:
104 |         logger.error("⚠️ Posted with errors:")
105 |     else:
106 |         logger.info("✅ Posted without errors:")
107 | 
108 |     for service in SERVICES:
109 |         if service in results:
110 |             if results[service] == "Success":
111 |                 logger.info("✅ " + service)
112 |             else:
113 |                 logger.error("⚠️" + results[service])
114 | 
115 | 
116 | def crosspost(
117 |     entry_id=None,
118 |     no_confirm=False,
119 |     use_prev=False,
120 |     debug=False,
121 |     tweet=False,
122 |     toot=False,
123 |     skeet=False,
124 |     insta=False,
125 |     threads=False,
126 | ):
127 |     num_screenshots = None
128 |     entry_details = None
129 |     driver = None
130 |     logger = logging.getLogger(__name__)
131 |     sh = logging.StreamHandler()
132 |     if debug:
133 |         logger.setLevel(logging.DEBUG)
134 |         sh.setLevel(logging.DEBUG)
135 |     else:
136 |         logger.setLevel(logging.INFO)
137 |         sh.setLevel(logging.INFO)
138 |     logger.addHandler(sh)
139 | 
140 |     if entry_id is None:
141 |         print("Entry ID required.")
142 |     else:
143 |         try:
144 |             if not use_prev:
145 |                 # Clear out output directory and fetch new data and screenshots
146 |                 cleanup()
147 | 
148 |                 driver = get_driver()
149 |                 entry = get_entry(driver, entry_id)
150 | 
151 |                 if entry is not None:
152 |                     screenshot_splits = get_screenshot(entry)
153 |                     num_screenshots = len(screenshot_splits)
154 |                     entry_details = get_entry_details(entry, screenshot_splits)
155 |                     with open(
156 |                         os.path.join(OUTPUT_DIR, "entry.json"), "w+"
157 |                     ) as json_file:
158 |                         json.dump(
159 |                             {
160 |                                 "num_screenshots": num_screenshots,
161 |                                 "entry_details": entry_details,
162 |                             },
163 |                             json_file,
164 |                         )
165 |             else:
166 |                 # Use existing stored data and screenshots without fetch
167 |                 with open(os.path.join(OUTPUT_DIR, "entry.json"), "r") as json_file:
168 |                     stored = json.load(json_file)
169 |                     num_screenshots = stored["num_screenshots"]
170 |                     entry_details = stored["entry_details"]
171 | 
172 |             if entry_details:
173 |                 post_text = f"{format_post_title(entry_details['title'])}\n\n{entry_details['date']}"
174 | 
175 |                 if no_confirm:
176 |                     logger.debug("Skipping confirmation step.")
177 |                     confirm = True
178 |                 else:
179 |                     # Open output directory to confirm images
180 |                     subprocess.call(["open", "-R", OUTPUT_DIR])
181 |                     print("=" * 20 + "\n" + post_text + "\n" + "=" * 20 + "\n\n")
182 |                     confirm = input("Ready to post? [y/n] ").lower()
183 |                     confirm = True if confirm == "y" else False
184 | 
185 |                 if confirm:
186 |                     post_ids = make_posts(
187 |                         post_text,
188 |                         entry_details["url"],
189 |                         num_screenshots,
190 |                         entry_details,
191 |                         tweet,
192 |                         toot,
193 |                         skeet,
194 |                         insta,
195 |                         threads,
196 |                     )
197 |                     result = update_entry_with_social_ids(entry_id, post_ids)
198 |                     print_results(result)
199 |                 else:
200 |                     print("Exiting without posting.")
201 |             else:
202 |                 print(f"Entry with ID {entry_id} not found.")
203 |         finally:
204 |             if driver is not None:
205 |                 driver.quit()
206 | 
207 | 
208 | if __name__ == "__main__":
209 |     parser = argparse.ArgumentParser(
210 |         description="Crosspost a Web3 is Going Just Great entry to social media."
211 |     )
212 |     parser.add_argument("entry_id", help="ID of the W3IGG entry, in numerical format.")
213 |     parser.add_argument(
214 |         "--no-confirm",
215 |         action="store_true",
216 |         help="Send posts without prompting to confirm",
217 |     )
218 |     parser.add_argument(
219 |         "--use-prev",
220 |         action="store_true",
221 |         help="Use screenshots and post information from previous run without re-fetching",
222 |     )
223 |     parser.add_argument(
224 |         "--debug", action="store_true", help="Print verbose debugging information"
225 |     )
226 | 
227 |     # Option to only post to one of the services
228 |     service_group = parser.add_mutually_exclusive_group()
229 |     service_group.add_argument("--tweet", action="store_true")
230 |     service_group.add_argument("--toot", action="store_true")
231 |     service_group.add_argument("--skeet", action="store_true")
232 |     service_group.add_argument("--insta", action="store_true")
233 |     service_group.add_argument("--threads", action="store_true")
234 |     args = parser.parse_args()
235 | 
236 |     crosspost(**vars(args))
237 | 


--------------------------------------------------------------------------------
/element_has_text_value.py:
--------------------------------------------------------------------------------
 1 | class element_has_text_value(object):
 2 |     """An expectation for checking that an element has a particular text value.
 3 | 
 4 |     locator - used to find the element
 5 |     returns the WebElement once it has the specified text value
 6 |     """
 7 | 
 8 |     def __init__(self, locator, text_value):
 9 |         self.locator = locator
10 |         self.text_value = text_value
11 | 
12 |     def __call__(self, driver):
13 |         element = driver.find_element(*self.locator)
14 |         if element.text == self.text_value:
15 |             return element
16 |         else:
17 |             return False
18 | 


--------------------------------------------------------------------------------
/entry.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from constants import *
 4 | 
 5 | from selenium.webdriver.common.by import By
 6 | from selenium.common.exceptions import TimeoutException
 7 | from selenium.webdriver.support import expected_conditions
 8 | from selenium.webdriver.support.wait import WebDriverWait
 9 | 
10 | 
11 | def get_entry(driver, entry_id):
12 |     """Find the entry with the specified ID and return.
13 | 
14 |     Args:
15 |         driver: WebDriver instance
16 |         entry_id: ID of the entry to capture (either in human-readable format, or YYYY-MM-DD-INCREMENT format)
17 | 
18 |     Returns:
19 |         WebElement corresponding to the entry, or None if the entry can't be found.
20 |     """
21 |     logger = logging.getLogger(__name__)
22 |     driver.get(W3IGG_URL + "/single/" + entry_id)
23 |     try:
24 |         WebDriverWait(driver, 10).until(
25 |             expected_conditions.presence_of_element_located(
26 |                 (By.CLASS_NAME, "timeline-entry")
27 |             )
28 |         )
29 |     except TimeoutException:
30 |         logger.error(
31 |             "Element with id '{}' not found or page timed out.".format(entry_id)
32 |         )
33 |     else:
34 |         driver.execute_script("document.body.style.zoom = '200%'")
35 |         driver.execute_script(
36 |             "document.querySelector('.timeline-icon').style.display = 'none'"
37 |         )
38 |         entry = driver.find_element(By.CLASS_NAME, "timeline-description")
39 |         return entry
40 | 
41 | 
42 | def get_entry_details(entry, splits):
43 |     """
44 |     Get the details needed to generate the post text and alt text.
45 | 
46 |     Args:
47 |         entry: WebElement corresponding to the entry, or None if the entry can't be found.
48 |         splits: Array of split information. Each entry is an object containing the y coordinate and the number of
49 |             paragraphs included in the split.
50 | 
51 |     Returns:
52 |         Object containing title, URL, date, and entry text. Entry text is an array, with each element corresponding to
53 |         the entry text visible in the screenshot segments.
54 |     """
55 |     title = entry.find_element(By.TAG_NAME, "h2").text
56 |     date = entry.find_element(By.CLASS_NAME, "timestamp").text
57 |     url = entry.find_element(
58 |         By.CSS_SELECTOR, "button[title='Permalink']"
59 |     ).get_attribute("data-url")
60 | 
61 |     entry_text = []
62 |     entry_text_element = entry.find_element(By.CLASS_NAME, "timeline-body-text-wrapper")
63 |     if len(splits) == 1:
64 |         # Image isn't long enough to be segmented, don't need to match alt text to segments
65 |         entry_text = [entry_text_element.text]
66 |     else:
67 |         # This is janky, but the first paragraph of a post is not wrapped in a <p> tag, so we have to get that text
68 |         # somewhow.
69 |         full_text = entry_text_element.text
70 |         paragraphs = entry.find_elements(By.TAG_NAME, "p")
71 |         second_paragraph_text = paragraphs[
72 |             0
73 |         ].text  # Second paragraph is the first <p> tag in the entry
74 |         first_paragraph_text = full_text.split(second_paragraph_text)[0]
75 | 
76 |         entry_text.append(first_paragraph_text + " ")
77 |         paragraph_counter = 1
78 |         segment_counter = 0
79 |         for paragraph in paragraphs:
80 |             if (
81 |                 "paragraphs" in splits[segment_counter]
82 |                 and splits[segment_counter]["paragraphs"] == paragraph_counter
83 |             ):
84 |                 # This segment is finished, start the next
85 |                 paragraph_counter = 0
86 |                 segment_counter += 1
87 |                 entry_text.append("")
88 |             entry_text[-1] = entry_text[-1] + paragraph.text + " "
89 |             paragraph_counter += 1
90 | 
91 |     details = {"title": title, "date": date, "url": url, "entry_text": entry_text}
92 |     return details
93 | 


--------------------------------------------------------------------------------
/image_size.py:
--------------------------------------------------------------------------------
 1 | from constants import MARGIN
 2 | 
 3 | 
 4 | def calculate_optimal_segments(entry_height, possible_splits, num_segments):
 5 |     """Return an array of locations at which to split the image to achieve the target number of segments.
 6 | 
 7 |     Args:
 8 |         entry_height: Total height of entry in px, multiplied by scaling factor
 9 |         possible_splits: Possible y coordinates to split the image. Each element corresponds to a paragraph, starting
10 |             at the second paragraph in the entry.
11 |         num_segments: Number of segments to target in result
12 | 
13 |     Returns:
14 |         Array of objects containing the y coordinate at which to split the image, and the number of paragraphs included
15 |         in the segment (later used to align alt text with the image segments)
16 |     """
17 |     segments = []
18 |     rough_height = entry_height / num_segments  # Approx height of each segment
19 |     target = rough_height  # Y position of the next target split
20 |     last = 0
21 |     paragraph_count = 0
22 |     for current in possible_splits:
23 |         paragraph_count += 1
24 |         if last < target < current:
25 |             # Choose the split that's closest to the target
26 |             if last != 0 and abs(target - last) < abs(target - current):
27 |                 segments.append({"y": last + MARGIN, "paragraphs": paragraph_count})
28 |             else:
29 |                 segments.append({"y": current + MARGIN, "paragraphs": paragraph_count})
30 |             if len(segments) == num_segments - 1:
31 |                 break
32 |             paragraph_count = 0
33 |             target += rough_height
34 |         last = current
35 |     return segments
36 | 


--------------------------------------------------------------------------------
/insta.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from constants import *
  4 | from instagrapi import Client
  5 | from PIL import Image
  6 | from secrets import *
  7 | 
  8 | 
  9 | def authenticate():
 10 |     """
 11 |     Authenticate user with Instagram username and password. Requires 2FA.
 12 |     Returns:
 13 |         Authenticated Instagram client.
 14 |     """
 15 |     client = Client()
 16 |     mfa = input("Instagram 2FA code: ")
 17 |     client.login(INSTAGRAM_USERNAME, INSTAGRAM_PASSWORD, verification_code=mfa)
 18 |     return client
 19 | 
 20 | 
 21 | def convert_images(num_screenshots):
 22 |     """
 23 |     Convert screenshots to JPGs because Instagram is weirdly picky about file format.
 24 |     Args:
 25 |         num_screenshots: Number of screenshots for this post.
 26 | 
 27 |     Returns:
 28 |         List of screenshot paths.
 29 |     """
 30 |     paths = []
 31 |     force_aspect_ratio = (
 32 |         num_screenshots > 1
 33 |     )  # When there are multiple images, the images will be cropped if the aspect ratios differ
 34 |     tallest_height = 0
 35 | 
 36 |     # Get height of tallest screenshot
 37 |     if force_aspect_ratio:
 38 |         for ind in range(num_screenshots):
 39 |             png_path = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png")
 40 |             image = Image.open(png_path)
 41 |             width, height = image.size
 42 |             if height > tallest_height:
 43 |                 tallest_height = height
 44 | 
 45 |     for ind in range(num_screenshots):
 46 |         png_path = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png")
 47 |         jpg_path = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".jpg")
 48 |         image = Image.open(png_path)
 49 |         if force_aspect_ratio:
 50 |             original_width, original_height = image.size
 51 |             if original_height < tallest_height:
 52 |                 canvas = Image.new(
 53 |                     "RGBA", (original_width, tallest_height), (238, 238, 238, 255)
 54 |                 )  # Canvas with light grey background in target size
 55 |                 offset = 0, int(round(tallest_height - original_height) / 2)
 56 |                 canvas.paste(image, offset)
 57 |                 image = canvas
 58 |         rgb = image.convert("RGB")  # Discard transparency
 59 |         rgb.save(jpg_path)
 60 |         paths.append(jpg_path)
 61 |     return paths
 62 | 
 63 | 
 64 | def send_instagram(post_text, num_screenshots, entry_details):
 65 |     """
 66 |     Create and send the Instagram post for this entry.
 67 | 
 68 |     Args:
 69 |         post_text: Text to post as the skeet contents.
 70 |         num_screenshots: Number of screenshots to be attached.
 71 |         entry_details: Object containing title, url, date, and array of entry text
 72 | 
 73 |     Returns:
 74 |         String containing ID of the Instagram post that was just posted, or None if the post fails.
 75 |     """
 76 |     logger = logging.getLogger(__name__)
 77 |     try:
 78 |         images = convert_images(num_screenshots)
 79 | 
 80 |         client = authenticate()
 81 |         if len(images) == 1:
 82 |             path = images[0]
 83 |             media = client.photo_upload(
 84 |                 path,
 85 |                 post_text,
 86 |                 extra_data={
 87 |                     "custom_accessibility_caption": entry_details["entry_text"][0]
 88 |                 },
 89 |             )
 90 |             logger.info("Sending Instagram post (one image)")
 91 | 
 92 |         else:
 93 |             media = client.album_upload(
 94 |                 images,
 95 |                 post_text,
 96 |                 extra_data={
 97 |                     "custom_accessibility_caption": entry_details["entry_text"][0]
 98 |                 },
 99 |             )
100 |             logger.info("Sending Instagram post (multiple images)")
101 | 
102 |         return media.code
103 |     except Exception as e:
104 |         print(e)
105 |         return None
106 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | attrs==23.1.0
 2 | beautifulsoup4==4.12.2
 3 | blurhash==1.1.4
 4 | cachetools==5.3.1
 5 | certifi==2023.7.22
 6 | chardet==5.2.0
 7 | charset-normalizer==3.2.0
 8 | decorator==4.4.2
 9 | exceptiongroup==1.1.3
10 | future==0.18.3
11 | google-auth==2.22.0
12 | h11==0.14.0
13 | huepy==1.2.1
14 | idna==3.4
15 | imageio==2.33.0
16 | imageio-ffmpeg==0.4.9
17 | iniconfig==2.0.0
18 | instagrapi==2.0.0
19 | Mastodon.py==1.8.1
20 | mock==5.1.0
21 | moviepy==1.0.3
22 | numpy==1.26.2
23 | oauthlib==3.2.2
24 | outcome==1.2.0
25 | packaging==23.2
26 | Pillow==10.0.0
27 | pluggy==1.3.0
28 | proglog==0.1.10
29 | pyasn1==0.5.0
30 | pyasn1-modules==0.3.0
31 | pycryptodomex==3.18.0
32 | pydantic==1.10.9
33 | PySocks==1.7.1
34 | pytest==7.4.3
35 | python-dateutil==2.8.2
36 | python-magic==0.4.27
37 | pytz==2023.3.post1
38 | PyYAML==6.0.1
39 | requests==2.31.0
40 | requests-oauthlib==1.3.1
41 | requests-toolbelt==1.0.0
42 | responses==0.24.1
43 | rsa==4.9
44 | schedule==1.2.1
45 | selenium==4.16.0
46 | six==1.16.0
47 | sniffio==1.3.0
48 | sortedcontainers==2.4.0
49 | soupsieve==2.5
50 | tenacity==8.2.3
51 | tomli==2.0.1
52 | tqdm==4.66.1
53 | trio==0.22.2
54 | trio-websocket==0.10.4
55 | tweepy==4.14.0
56 | typing_extensions==4.8.0
57 | urllib3==1.26.16
58 | wsproto==1.2.0
59 | 


--------------------------------------------------------------------------------
/screenshotter.py:
--------------------------------------------------------------------------------
 1 | from constants import *
 2 | from image_size import calculate_optimal_segments
 3 | 
 4 | from selenium.webdriver.common.by import By
 5 | from selenium.common.exceptions import NoSuchElementException
 6 | 
 7 | from PIL import Image
 8 | 
 9 | import logging
10 | import os
11 | 
12 | 
13 | def get_screenshot(entry):
14 |     """Load the page and capture a screenshot of the post with the specified ID. If the screenshot is overly long, split
15 |     it into two or three segments at paragraph breaks.
16 | 
17 |     Args:
18 |         entry: WebElement to capture
19 | 
20 |     Returns:
21 |          Array with split information (y coordinate of split, and number of paragraphs included in each split).
22 |     """
23 | 
24 |     logger = logging.getLogger(__name__)
25 |     image_bottom = None
26 |     splits = []
27 |     num_segments = None
28 |     if entry.size["height"] > MAX_IMAGE_HEIGHT:
29 |         # This is a tall entry that we'll want to split into multiple screenshots
30 |         num_segments = 3 if entry.size["height"] > (MAX_IMAGE_HEIGHT * 2) else 2
31 | 
32 |         # Avoid splitting through an image if there is one
33 |         try:
34 |             image = entry.find_element(By.CLASS_NAME, "captioned-image")
35 |             if image:
36 |                 image_bottom = (
37 |                     image.rect["y"] + image.rect["height"] - entry.location["y"]
38 |                 ) * SCALING_FACTOR
39 |         except NoSuchElementException:
40 |             pass
41 | 
42 |         # Get array of possible split coordinates (top of each <p>)
43 |         paragraphs = entry.find_elements(By.TAG_NAME, "p")
44 |         heights = [
45 |             (p.rect["y"] - entry.location["y"]) * SCALING_FACTOR for p in paragraphs
46 |         ]
47 |         if image_bottom:
48 |             heights = list(filter(lambda x: x > image_bottom, heights))
49 | 
50 |         # Decide which of the split possibilities to go with
51 |         splits = calculate_optimal_segments(
52 |             entry.size["height"] * SCALING_FACTOR, heights, num_segments
53 |         )
54 | 
55 |     # Grab screenshot
56 |     logger.debug("Capturing screenshot")
57 |     screenshot_path = os.path.join(OUTPUT_DIR, "screenshot.png")
58 |     entry.screenshot(screenshot_path)
59 | 
60 |     with Image.open(screenshot_path) as image:
61 |         entry_with_margin = Image.new(
62 |             "RGB",
63 |             (image.width + MARGIN * 2, image.height + MARGIN * 2),
64 |             (238, 238, 238),
65 |         )
66 |         entry_with_margin.paste(image, (MARGIN, MARGIN))
67 | 
68 |     splits.append({"y": entry_with_margin.height})
69 |     if len(splits) > 1:
70 |         logger.debug(f"Splitting screenshot into target {num_segments} segments.")
71 |         last_crop = 0
72 |         for ind, split in enumerate(splits):
73 |             filename = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png")
74 |             cp = entry_with_margin.copy()
75 |             cp = cp.crop((0, last_crop, entry_with_margin.width, split["y"]))
76 |             cp = cp.resize((int(cp.width * 0.75), int(cp.height * 0.75)))
77 |             cp.save(filename)
78 |             last_crop = split["y"]
79 |     else:
80 |         entry_with_margin = entry_with_margin.resize(
81 |             (int(entry_with_margin.width * 0.75), int(entry_with_margin.height * 0.75))
82 |         )
83 |         entry_with_margin.save(os.path.join(OUTPUT_DIR, FILENAME_ROOT + "0.png"))
84 | 
85 |     os.remove(screenshot_path)  # Clean up intermediate file that's not needed anymore
86 | 
87 |     return splits
88 | 


--------------------------------------------------------------------------------
/skeet.py:
--------------------------------------------------------------------------------
  1 | from constants import *
  2 | from PIL import Image
  3 | from secrets import *
  4 | from tenacity import retry, stop_after_attempt, retry_if_exception_type
  5 | 
  6 | from datetime import datetime, timezone
  7 | import logging
  8 | import os
  9 | import requests
 10 | 
 11 | 
 12 | def authenticate():
 13 |     """
 14 |     Authenticate user with BlueSky identifier and password (password can be an app password).
 15 | 
 16 |     Returns:
 17 |         Tuple containing the JWT and DID. Note the JWT is short-lived and this script has no refresh functionality --
 18 |         that would need to be added if this was a persistent process rather than a one-off call.
 19 |     """
 20 |     resp = requests.post(
 21 |         BLUESKY_BASE_URL + "/com.atproto.server.createSession",
 22 |         json={"identifier": BLUESKY_USERNAME, "password": BLUESKY_PASSWORD},
 23 |     )
 24 |     resp_data = resp.json()
 25 |     jwt = resp_data["accessJwt"]
 26 |     did = resp_data["did"]
 27 |     return jwt, did
 28 | 
 29 | 
 30 | @retry(
 31 |     stop=stop_after_attempt(3),
 32 |     retry=(
 33 |         retry_if_exception_type(requests.exceptions.Timeout)
 34 |         | retry_if_exception_type(requests.exceptions.ChunkedEncodingError)
 35 |     ),
 36 | )
 37 | def upload_blob(ind, headers):
 38 |     """Try to upload an image. This is prone to errors, so retry a few times if needed.
 39 | 
 40 |     Args:
 41 |         ind: Index of the image to try to upload
 42 |         headers: HTTP headers to include in the request.
 43 | 
 44 |     Returns:
 45 |         Blob to send along with the post to attach the image.
 46 |     """
 47 |     image_path = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png")
 48 |     with open(image_path, "rb") as image_file:
 49 |         image = image_file.read()
 50 |         resp = requests.post(
 51 |             BLUESKY_BASE_URL + "/com.atproto.repo.uploadBlob",
 52 |             data=image,
 53 |             headers={**headers, "Content-Type": "image/png"},
 54 |             timeout=(5, 20),
 55 |         )
 56 |         blob = resp.json().get("blob")
 57 | 
 58 |         # Get image aspect ratio
 59 |         with Image.open(image_path) as pil_image:
 60 |             width, height = pil_image.size
 61 |             aspect_ratio = {"width": width, "height": height}
 62 | 
 63 |         return blob, aspect_ratio
 64 | 
 65 | 
 66 | def send_skeet(post_text, num_screenshots, entry_details):
 67 |     """
 68 |     Create and send the skeet for this entry.
 69 | 
 70 |     Args:
 71 |         post_text: Text to post as the skeet contents.
 72 |         num_screenshots: Number of screenshots to be attached.
 73 |         entry_details: Object containing title, url, date, and array of entry text
 74 | 
 75 |     Returns:
 76 |         String containing ID of the skeet that was just posted, or None if the post fails.
 77 |     """
 78 |     logger = logging.getLogger(__name__)
 79 |     try:
 80 |         (jwt, did) = authenticate()
 81 |         headers = {"Authorization": "Bearer " + jwt}
 82 | 
 83 |         # Upload screenshots
 84 |         blobs = []
 85 |         ratios = []
 86 |         for ind in range(num_screenshots):
 87 |             logger.debug(f"Uploading Bluesky image {ind}")
 88 |             blob, ratio = upload_blob(ind, headers)
 89 |             blobs.append(blob)
 90 |             ratios.append(ratio)
 91 | 
 92 |         iso_timestamp = datetime.now(timezone.utc).isoformat()
 93 |         iso_timestamp = (
 94 |             iso_timestamp[:-6] + "Z"
 95 |         )  # bsky uses Z format, so trim off +00:00 and add Z
 96 | 
 97 |         # Hydrate screenshot images with alt text
 98 |         images = []
 99 |         for ind, blob in enumerate(blobs):
100 |             alt_text = entry_details["entry_text"][ind]
101 |             if ind == 0:
102 |                 alt_text = entry_details["title"] + "\n" + alt_text
103 |             images.append(
104 |                 {
105 |                     "image": blob,
106 |                     "alt": alt_text[:BLUESKY_ALT_TEXT_LIMIT],
107 |                     "aspectRatio": ratios[ind],
108 |                 }
109 |             )
110 | 
111 |         # Create rich text information to turn the W3IGG URL into a clickable link
112 |         post_text_bytes = bytes(post_text, "utf-8")
113 |         facets = [
114 |             {
115 |                 "features": [
116 |                     {
117 |                         "uri": entry_details["url"],
118 |                         "$type": "app.bsky.richtext.facet#link",
119 |                     }
120 |                 ],
121 |                 "index": {
122 |                     "byteStart": post_text_bytes.find(bytes("https://", "utf-8")),
123 |                     "byteEnd": len(post_text_bytes),
124 |                 },
125 |             }
126 |         ]
127 | 
128 |         post_data = {
129 |             "repo": did,
130 |             "collection": "app.bsky.feed.post",
131 |             "record": {
132 |                 "$type": "app.bsky.feed.post",
133 |                 "text": post_text,
134 |                 "createdAt": iso_timestamp,
135 |                 "embed": {"$type": "app.bsky.embed.images", "images": images},
136 |                 "facets": facets,
137 |             },
138 |         }
139 | 
140 |         logger.info("Sending skeet")
141 |         resp = requests.post(
142 |             BLUESKY_BASE_URL + "/com.atproto.repo.createRecord",
143 |             json=post_data,
144 |             headers=headers,
145 |         )
146 | 
147 |         if resp.status_code != 200:
148 |             logger.error(f"Failed to post skeet: {resp.status_code} {resp.text}")
149 |             return None
150 |         # Grab just the post ID without the full URI
151 |         return resp.json()["uri"].split("/")[-1]
152 | 
153 |     except Exception as e:
154 |         print(e)
155 |         return None
156 | 


--------------------------------------------------------------------------------
/tests/test_format_post_title.py:
--------------------------------------------------------------------------------
 1 | from crossposter import format_post_title, ZWSP
 2 | 
 3 | 
 4 | def test_escape_url():
 5 |     title = "Crypto.com did something"
 6 |     formatted = format_post_title(title)
 7 |     assert formatted[6] == ZWSP
 8 | 
 9 | 
10 | def test_dont_escape_number():
11 |     title = "Someone hacked for $1.5 million"
12 |     formatted = format_post_title(title)
13 |     assert ZWSP not in formatted
14 | 
15 | 
16 | def test_escape_multiple():
17 |     title = "Crypto.com something.net"
18 |     formatted = format_post_title(title)
19 |     assert formatted.count(ZWSP) == 2
20 | 


--------------------------------------------------------------------------------
/tests/test_image_size.py:
--------------------------------------------------------------------------------
 1 | from image_size import *
 2 | from constants import MARGIN
 3 | 
 4 | 
 5 | def test_calculate_splits_for_two_segment_post():
 6 |     possible_splits = [100, 200, 340, 450, 500, 600]
 7 |     splits = calculate_optimal_segments(700, possible_splits, 2)
 8 |     assert len(splits) == 1
 9 |     assert splits[0]["y"] == 340 + MARGIN
10 |     assert splits[0]["paragraphs"] == 4
11 | 
12 | 
13 | def test_calculate_splits_for_three_segment_post():
14 |     possible_splits = [100, 200, 300, 400, 500, 600]
15 |     splits = calculate_optimal_segments(700, possible_splits, 3)
16 |     assert len(splits) == 2
17 |     assert splits[0]["y"] == 200 + MARGIN
18 |     assert splits[0]["paragraphs"] == 3
19 |     assert splits[1]["y"] == 500 + MARGIN
20 |     assert splits[1]["paragraphs"] == 2
21 | 
22 | 
23 | def test_calculate_splits_for_wonky_post():
24 |     # This is tall enough that it should get three segments, but there aren't enough options for places to split
25 |     possible_splits = [3000]
26 |     splits = calculate_optimal_segments(3200, possible_splits, 3)
27 |     assert len(splits) == 1
28 |     assert splits[0]["y"] == 3000 + MARGIN
29 |     assert splits[0]["paragraphs"] == 1
30 | 
31 | 
32 | def test_calculate_splits_with_no_split_options():
33 |     splits = calculate_optimal_segments(900, [], 2)
34 |     assert splits == []
35 | 


--------------------------------------------------------------------------------
/threads.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from time import sleep
  4 | 
  5 | from constants import *
  6 | from element_has_text_value import element_has_text_value
  7 | from webdriver import get_driver
  8 | 
  9 | from selenium.webdriver.common.by import By
 10 | from selenium.webdriver.common.action_chains import ActionChains
 11 | from selenium.common.exceptions import (
 12 |     TimeoutException,
 13 | )
 14 | from selenium.webdriver.support import expected_conditions
 15 | from selenium.webdriver.support.wait import WebDriverWait
 16 | 
 17 | FILE_INPUT_XPATH = "//input[@type='file']"
 18 | ALT_TEXT_BUTTONS_XPATH = "//div[@role='button'][.//span[text()='Alt']]"
 19 | CONTENT_EDITABLE_DIV_XPATH = "//div[@contenteditable='true']"
 20 | LEXICAL_TEXT_SPAN_XPATH = "//span[@data-lexical-text='true']"
 21 | POST_BUTTON_XPATH = "//div[@role='button'][.//div[text()='Post']]"
 22 | VIEW_BUTTON_XPATH = "//a[text()='View']"
 23 | 
 24 | 
 25 | def send_threads(post_text, num_screenshots, entry_details):
 26 |     """
 27 |     Create and send the Threads post for this entry.
 28 | 
 29 |     Args:
 30 |         post_text: Text to post as the tweet contents.
 31 |         num_screenshots: Number of screenshots to be attached.
 32 |         entry_details: Object containing title, url, date, and array of entry text
 33 |         driver: WebDriver instance
 34 | 
 35 |     Returns:
 36 |         String containing ID of the Threads post that was just posted, or None if the post fails.
 37 |     """
 38 |     logger = logging.getLogger(__name__)
 39 |     driver = get_driver(headless=True, screenshot_resolution=False)
 40 |     driver.get(THREADS_URL)
 41 |     try:
 42 |         WebDriverWait(driver, 10).until(
 43 |             expected_conditions.presence_of_element_located(
 44 |                 (By.XPATH, "//img[contains(@alt, 'profile picture')]")
 45 |             )
 46 |         )
 47 |     except TimeoutException:
 48 |         logger.error("Threads page didn't load within ten seconds.")
 49 |         driver.quit()
 50 | 
 51 |     else:
 52 |         # Open post modal
 53 |         create_button = driver.find_element(
 54 |             By.XPATH, "//div[@role='button'][.//*[name()='svg'][@aria-label='Create']]"
 55 |         )
 56 |         create_button.click()
 57 | 
 58 |         try:
 59 |             WebDriverWait(driver, 10).until(
 60 |                 expected_conditions.presence_of_element_located(
 61 |                     (By.XPATH, POST_BUTTON_XPATH)
 62 |                 )
 63 |             )
 64 |             WebDriverWait(driver, 10).until(
 65 |                 expected_conditions.presence_of_element_located(
 66 |                     (By.XPATH, FILE_INPUT_XPATH)
 67 |                 )
 68 |             )
 69 |         except TimeoutException:
 70 |             logger.error("Threads post modal didn't load within ten seconds.")
 71 |             driver.quit()
 72 |         else:
 73 |             try:
 74 |                 # Grab a reference to the post button to use later
 75 |                 post_button = driver.find_element(By.XPATH, POST_BUTTON_XPATH)
 76 | 
 77 |                 # Attach screenshot files
 78 |                 for ind in range(num_screenshots):
 79 |                     media_upload_input = driver.find_element(By.XPATH, FILE_INPUT_XPATH)
 80 |                     # Attach image
 81 |                     filename = os.path.abspath(
 82 |                         os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png")
 83 |                     )
 84 |                     media_upload_input.send_keys(filename)
 85 | 
 86 |                     # Add alt text to the image that was just attached
 87 |                     alt_text = entry_details["entry_text"][ind]
 88 |                     if ind == 0:
 89 |                         alt_text = entry_details["title"] + "  " + alt_text
 90 |                     alt_text = alt_text.replace(
 91 |                         "\n", " "
 92 |                     )  # Threads doesn't like newlines in alt text
 93 |                     alt_text = alt_text[:THREADS_ALT_TEXT_LIMIT]
 94 | 
 95 |                     WebDriverWait(driver, 10).until(
 96 |                         expected_conditions.presence_of_element_located(
 97 |                             (By.XPATH, ALT_TEXT_BUTTONS_XPATH)
 98 |                         )
 99 |                     )
100 |                     sleep(1)  # Selenium hates me
101 |                     alt_text_buttons = driver.find_elements(
102 |                         By.XPATH, ALT_TEXT_BUTTONS_XPATH
103 |                     )
104 |                     alt_text_buttons[-1].click()
105 | 
106 |                     # Wait for alt text box to animate
107 |                     WebDriverWait(driver, 10).until(
108 |                         expected_conditions.presence_of_element_located(
109 |                             (
110 |                                 By.XPATH,
111 |                                 "//*[contains(text(), 'Describe this for people')]",
112 |                             )
113 |                         )
114 |                     )
115 |                     actions = ActionChains(driver)
116 |                     actions.send_keys(alt_text).perform()
117 |                     WebDriverWait(driver, 10).until(
118 |                         element_has_text_value(
119 |                             (By.XPATH, LEXICAL_TEXT_SPAN_XPATH), alt_text
120 |                         )
121 |                     )
122 | 
123 |                     driver.find_element(
124 |                         By.XPATH, "//div[@role='button'][.//*[text()='Done']]"
125 |                     ).click()
126 |                     WebDriverWait(driver, 10).until(
127 |                         expected_conditions.visibility_of(post_button)
128 |                     )
129 | 
130 |                 # Enter post text
131 |                 driver.find_element(By.XPATH, CONTENT_EDITABLE_DIV_XPATH).click()
132 |                 sleep(1)
133 |                 actions = ActionChains(driver)
134 |                 actions.send_keys(post_text).perform()
135 | 
136 |                 WebDriverWait(driver, 10).until(
137 |                     element_has_text_value(
138 |                         (By.XPATH, CONTENT_EDITABLE_DIV_XPATH), post_text
139 |                     )
140 |                 )
141 | 
142 |                 # Click post button
143 |                 # There is a hidden post button under the modal that is disabled, so we need the additional selector here
144 |                 # to avoid trying to click that one
145 |                 post_button = driver.find_element(
146 |                     By.XPATH,
147 |                     "//div[@role='dialog']//div[@role='button'][.//div[text()='Post']]",
148 |                 )
149 |                 post_button.click()
150 | 
151 |                 # Wait for post to send, then get its ID
152 |                 WebDriverWait(driver, 20).until(
153 |                     expected_conditions.visibility_of_element_located(
154 |                         (By.XPATH, VIEW_BUTTON_XPATH)
155 |                     )
156 |                 )
157 |                 view_button = driver.find_element(By.XPATH, VIEW_BUTTON_XPATH)
158 |                 href = view_button.get_attribute("href")
159 |                 post_id = href.split("/")[-1]
160 | 
161 |                 # Clean up and return post ID
162 |                 driver.quit()
163 |                 return post_id
164 |             except Exception as e:
165 |                 logger.error("Something else went wrong during Threads post.")
166 |                 logger.error(e)
167 |                 driver.quit()
168 | 
169 |     return None
170 | 


--------------------------------------------------------------------------------
/toot.py:
--------------------------------------------------------------------------------
 1 | from constants import *
 2 | from secrets import *
 3 | import logging
 4 | import os
 5 | 
 6 | from mastodon import Mastodon
 7 | 
 8 | 
 9 | def authenticate():
10 |     """
11 |     Authenticate to Mastodon.
12 | 
13 |     Returns:
14 |         Authenticated Mastodon client.
15 |     """
16 |     api = Mastodon(client_id="mastodon.secret")
17 |     api.log_in(MASTODON_EMAIL, MASTODON_PASSWORD, to_file="mastodon_user.secret")
18 |     return api
19 | 
20 | 
21 | def send_toot(post_text, num_screenshots, entry_details):
22 |     """
23 |     Create and send the toot for this entry.
24 | 
25 |     Args:
26 |         post_text: Text to post as the toot contents.
27 |         num_screenshots: Number of screenshots to be attached.
28 |         entry_details: Object containing title, url, date, and array of entry text
29 | 
30 |     Returns:
31 |         String containing ID of the toot that was just posted, or None if the post fails.
32 |     """
33 |     logger = logging.getLogger(__name__)
34 |     try:
35 |         api = authenticate()
36 | 
37 |         # Upload screenshots
38 |         media_ids = []
39 |         for ind in range(num_screenshots):
40 |             logger.debug(f"Uploading Mastodon image {ind}")
41 |             # Get alt text for this image
42 |             alt_text = entry_details["entry_text"][ind]
43 |             if ind == 0:
44 |                 alt_text = entry_details["title"] + "\n" + alt_text
45 | 
46 |             resp = api.media_post(
47 |                 os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png"),
48 |                 description=alt_text[:MASTODON_ALT_TEXT_LIMIT],
49 |                 focus=(0, -1),  # Set focus to center top of post
50 |             )
51 |             media_ids.append(resp.id)
52 | 
53 |         # Send tweet
54 |         logger.info("Sending toot.")
55 |         toot = api.status_post(post_text, media_ids=media_ids)
56 |         return str(toot["id"])
57 | 
58 |     except Exception as e:
59 |         print(e)
60 |         return None
61 | 


--------------------------------------------------------------------------------
/tweet.py:
--------------------------------------------------------------------------------
 1 | from constants import *
 2 | from secrets import *
 3 | import logging
 4 | import os
 5 | import tweepy
 6 | 
 7 | 
 8 | def authenticate():
 9 |     """
10 |     Authenticate to Twitter.
11 | 
12 |     Returns:
13 |         Tuple containing the Client (for posting tweet) and the API (for v1.1 media upload endpoint)
14 |     """
15 |     client = tweepy.Client(
16 |         consumer_key=TWITTER_API_KEY,
17 |         consumer_secret=TWITTER_API_KEY_SECRET,
18 |         access_token=TWITTER_ACCESS_TOKEN,
19 |         access_token_secret=TWITTER_ACCESS_TOKEN_SECRET,
20 |     )
21 |     auth = tweepy.OAuth1UserHandler(
22 |         TWITTER_API_KEY,
23 |         TWITTER_API_KEY_SECRET,
24 |         TWITTER_ACCESS_TOKEN,
25 |         TWITTER_ACCESS_TOKEN_SECRET,
26 |     )
27 |     api = tweepy.API(auth)
28 |     return client, api
29 | 
30 | 
31 | def send_tweet(post_text, url, num_screenshots, entry_details):
32 |     """
33 |     Create and send the tweet for this entry.
34 | 
35 |     Args:
36 |         post_text: Text to post as the tweet contents.
37 |         url: W3IGG URL for the entry.
38 |         num_screenshots: Number of screenshots to be attached.
39 |         entry_details: Object containing title, url, date, and array of entry text
40 | 
41 |     Returns:
42 |         String containing ID of the tweet that was just posted, or None if the post fails.
43 |     """
44 |     logger = logging.getLogger(__name__)
45 |     try:
46 |         (client, api) = authenticate()
47 | 
48 |         # Upload screenshots
49 |         media_ids = []
50 |         for ind in range(num_screenshots):
51 |             logger.debug(f"Uploading Twitter image {ind}")
52 |             resp = api.media_upload(
53 |                 os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png")
54 |             )
55 | 
56 |             # Add alt text to the image that was just uploaded
57 |             alt_text = entry_details["entry_text"][ind]
58 |             if ind == 0:
59 |                 alt_text = entry_details["title"] + "\n" + alt_text
60 |             api.create_media_metadata(resp.media_id, alt_text[:TWITTER_ALT_TEXT_LIMIT])
61 | 
62 |             media_ids.append(resp.media_id)
63 | 
64 |         # Send tweet
65 |         logger.info("Sending tweet")
66 |         tweet = client.create_tweet(text=post_text, user_auth=True, media_ids=media_ids)
67 |         client.create_tweet(
68 |             text=url, user_auth=True, in_reply_to_tweet_id=tweet.data["id"]
69 |         )
70 |         return tweet.data["id"]
71 |     except Exception as e:
72 |         print(e)
73 |         return None
74 | 


--------------------------------------------------------------------------------
/update_entry.py:
--------------------------------------------------------------------------------
 1 | from constants import CLOUD_FUNCTIONS_URL
 2 | from google.oauth2 import service_account
 3 | from google.auth.transport.requests import AuthorizedSession, Request
 4 | 
 5 | 
 6 | def update_entry_with_social_ids(entry_id, post_ids):
 7 |     """Add any post IDs to the W3IGG entry."""
 8 |     function_url = CLOUD_FUNCTIONS_URL + "/addSocialPostIds"
 9 |     credentials = service_account.IDTokenCredentials.from_service_account_file(
10 |         "gcp-secret.secret",
11 |         target_audience=function_url,
12 |     )
13 |     session = AuthorizedSession(credentials)
14 | 
15 |     resp = session.post(
16 |         function_url,
17 |         json={"entryId": entry_id, **post_ids},
18 |     )
19 |     resp.raise_for_status()
20 |     return resp.json()
21 | 


--------------------------------------------------------------------------------
/webdriver.py:
--------------------------------------------------------------------------------
 1 | from constants import *
 2 | from selenium import webdriver
 3 | 
 4 | 
 5 | def get_driver(headless=True, screenshot_resolution=True):
 6 |     """Get the driver with requisite options already set.
 7 | 
 8 |     Returns:
 9 |         Configured WebDriver instance.
10 |     """
11 |     options = webdriver.FirefoxOptions()
12 |     if headless:
13 |         options.add_argument("--headless")
14 |     options.profile = webdriver.FirefoxProfile(
15 |         "/Users/molly/Library/Application Support/Firefox/Profiles/9e81e71e.w3igg-archiver",
16 |     )
17 |     options.set_preference("general.useragent.override", USER_AGENT)
18 |     if screenshot_resolution:
19 |         options.set_preference("layout.css.devPixelsPerPx", str(SCALING_FACTOR))
20 |         options.set_preference("ui.textScaleFactor", 100)
21 |     driver = webdriver.Firefox(options=options)
22 |     if screenshot_resolution:
23 |         driver.set_window_size(800, 5000)
24 |     return driver
25 | 


--------------------------------------------------------------------------------