├── requirements.txt ├── .gitignore ├── app.json ├── .editorconfig ├── Dockerfile ├── gen.py ├── functions.py ├── reply.py ├── README.md ├── main.py └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | Mastodon.py==1.5.1 2 | markovify==0.8.2 3 | beautifulsoup4==4.9.1 4 | requests==2.24.0 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | clientcred.secret 2 | usercred.secret 3 | run.sh 4 | corpus.txt 5 | meme.jpg 6 | toots.db 7 | toots.db-journal 8 | toots.db-wal 9 | __pycache__/ 10 | .vscode/ 11 | .editorconfig 12 | .*.swp 13 | config.json 14 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mstdn-ebooks", 3 | "description": "An ebooks bot for Mastodon (and compatible) users", 4 | "repository": "https://github.com/Lynnesbian/mstdn-ebooks", 5 | "keywords": ["python", "mastodon"], 6 | "website":"https://fedi.lynnesbian.space/@lynnesbian", 7 | "image":"heroku/heroku" 8 | } 9 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | # Unix-style newlines with a newline ending every file 5 | [*] 6 | end_of_line = lf 7 | insert_final_newline = true 8 | trim_trailing_whitespace = true 9 | indent_style = tab 10 | indent_size = 2 11 | 12 | # Markdown 13 | [*.md] 14 | indent_size = 2 15 | 16 | # Python 17 | [*.py] 18 | indent_style = tab 19 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3-alpine 2 | 3 | ADD requirements.txt /ebooks/ 4 | 5 | WORKDIR /ebooks/ 6 | VOLUME /ebooks/data/ 7 | 8 | RUN apk add --virtual .build-deps gcc musl-dev libffi-dev openssl-dev \ 9 | && pip install -r requirements.txt \ 10 | && apk del --purge .build-deps \ 11 | && ln -s data/config.json . \ 12 | && ln -s data/toots.db . 13 | 14 | ADD *.py /ebooks/ 15 | 16 | RUN (echo "*/30 * * * * cd /ebooks/ && python gen.py"; \ 17 | echo "5 */2 * * * cd /ebooks/ && python main.py"; \ 18 | echo "@reboot cd /ebooks/ && python reply.py") | crontab - 19 | 20 | ENV ebooks_site=https://botsin.space 21 | 22 | CMD (test -f data/config.json || echo "{\"site\":\"${ebooks_site}\"}" > data/config.json) \ 23 | && (test -f data/toots.db || (python main.py && exit)) \ 24 | && exec crond -f -L /dev/stdout 25 | -------------------------------------------------------------------------------- /gen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this 4 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from mastodon import Mastodon 7 | import argparse, json, re 8 | import functions 9 | 10 | parser = argparse.ArgumentParser(description='Generate and post a toot.') 11 | parser.add_argument('-c', '--cfg', dest='cfg', default='config.json', nargs='?', 12 | help="Specify a custom location for config.json.") 13 | parser.add_argument('-s', '--simulate', dest='simulate', action='store_true', 14 | help="Print the toot without actually posting it. Use this to make sure your bot's actually working.") 15 | 16 | args = parser.parse_args() 17 | 18 | cfg = json.load(open(args.cfg)) 19 | 20 | client = None 21 | 22 | if not args.simulate: 23 | client = Mastodon( 24 | client_id=cfg['client']['id'], 25 | client_secret=cfg['client']['secret'], 26 | access_token=cfg['secret'], 27 | api_base_url=cfg['site']) 28 | 29 | if __name__ == '__main__': 30 | toot = functions.make_toot(cfg) 31 | if cfg['strip_paired_punctuation']: 32 | toot = re.sub(r"[\[\]\(\)\{\}\"“”«»„]", "", toot) 33 | if not args.simulate: 34 | try: 35 | client.status_post(toot, visibility = 'unlisted', spoiler_text = cfg['cw']) 36 | except Exception as err: 37 | toot = "An error occurred while submitting the generated post. Contact lynnesbian@fedi.lynnesbian.space for assistance." 38 | client.status_post(toot, visibility = 'unlisted', spoiler_text = "Error!") 39 | try: 40 | print(toot) 41 | except UnicodeEncodeError: 42 | print(toot.encode("ascii", "ignore")) # encode as ASCII, dropping any non-ASCII characters 43 | -------------------------------------------------------------------------------- /functions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this 4 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import markovify 7 | from bs4 import BeautifulSoup 8 | import re, multiprocessing, sqlite3, shutil, os, html 9 | 10 | def make_sentence(output, cfg): 11 | class nlt_fixed(markovify.NewlineText): #modified version of NewlineText that never rejects sentences 12 | def test_sentence_input(self, sentence): 13 | return True #all sentences are valid <3 14 | 15 | shutil.copyfile("toots.db", "toots-copy.db") #create a copy of the database because reply.py will be using the main one 16 | db = sqlite3.connect("toots-copy.db") 17 | db.text_factory = str 18 | c = db.cursor() 19 | if cfg['learn_from_cw']: 20 | toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall() 21 | else: 22 | toots = c.execute("SELECT content FROM `toots` WHERE cw = 0 ORDER BY RANDOM() LIMIT 10000").fetchall() 23 | 24 | if len(toots) == 0: 25 | output.send("Database is empty! Try running main.py.") 26 | return 27 | 28 | model = nlt_fixed( 29 | "\n".join([toot[0] for toot in toots]) 30 | ) 31 | 32 | db.close() 33 | os.remove("toots-copy.db") 34 | 35 | toots_str = None 36 | 37 | sentence = None 38 | tries = 0 39 | while sentence is None and tries < 10: 40 | sentence = model.make_short_sentence(500, tries=10000) 41 | tries = tries + 1 42 | 43 | # optionally remove mentions 44 | if cfg['mention_handling'] == 1: 45 | sentence = re.sub(r"^\S*@\u200B\S*\s?", "", sentence) 46 | elif cfg['mention_handling'] == 0: 47 | sentence = re.sub(r"\S*@\u200B\S*\s?", "", sentence) 48 | 49 | output.send(sentence) 50 | 51 | def make_toot(cfg): 52 | toot = None 53 | pin, pout = multiprocessing.Pipe(False) 54 | p = multiprocessing.Process(target = make_sentence, args = [pout, cfg]) 55 | p.start() 56 | p.join(5) #wait 5 seconds to get something 57 | if p.is_alive(): #if it's still trying to make a toot after 5 seconds 58 | p.terminate() 59 | p.join() 60 | else: 61 | toot = pin.recv() 62 | 63 | if toot == None: 64 | toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance." 65 | return toot 66 | 67 | def extract_toot(toot): 68 | toot = html.unescape(toot) # convert HTML escape codes to text 69 | soup = BeautifulSoup(toot, "html.parser") 70 | for lb in soup.select("br"): # replace
with linebreak 71 | lb.replace_with("\n") 72 | 73 | for p in soup.select("p"): # ditto for

74 | p.replace_with("\n") 75 | 76 | for ht in soup.select("a.hashtag"): # convert hashtags from links to text 77 | ht.unwrap() 78 | 79 | for link in soup.select("a"): #ocnvert = cfg['max_thread_length']: 50 | # stop replying 51 | print("didn't reply (max_thread_length exceeded)") 52 | return 53 | 54 | mention = extract_toot(notification['status']['content']) 55 | if (mention == "pin") or (mention == "unpin"): #check for keywords 56 | print("Found pin/unpin") 57 | #get a list of people the bot is following 58 | validusers = client.account_following(me) 59 | for user in validusers: 60 | if user["id"] == notification["account"]["id"]: #user is #valid 61 | print("User is valid") 62 | visibility = notification['status']['visibility'] 63 | if visibility == "public": 64 | visibility = "unlisted" 65 | if mention == "pin": 66 | print("pin received, pinning") 67 | client.status_pin(pin) 68 | client.status_post("Toot pinned!", post_id, visibility=visibility, spoiler_text = cfg['cw']) 69 | else: 70 | print("unpin received, unpinning") 71 | client.status_post("Toot unpinned!", post_id, visibility=visibility, spoiler_text = cfg['cw']) 72 | client.status_unpin(pin) 73 | else: 74 | print("User is not valid") 75 | else: 76 | toot = functions.make_toot(cfg) #generate a toot 77 | toot = acct + " " + toot #prepend the @ 78 | print(acct + " says " + mention) #logging 79 | visibility = notification['status']['visibility'] 80 | if visibility == "public": 81 | visibility = "unlisted" 82 | client.status_post(toot, post_id, visibility=visibility, spoiler_text = cfg['cw']) #send toost 83 | print("replied with " + toot) #logging 84 | 85 | rl = ReplyListener() 86 | client.stream_user(rl) #go! 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mstdn-ebooks 2 | **Lynnear Edition** 3 | 4 | This version makes quite a few changes from [the original](https://github.com/Jess3Jane/mastodon-ebooks), such as: 5 | - Unicode support 6 | - Non-Markov stuff 7 | - Stores toots in a sqlite database rather than a text file 8 | - Doesn't unnecessarily redownload all toots every time 9 | 10 | ## FediBooks 11 | Before you use mstdn-ebooks to create your own ebooks bot, I recommend checking out [FediBooks](https://fedibooks.com). Compared to mstdn-ebooks, FediBooks offers a few advantages: 12 | - Hosted and maintained by someone else - you don't have to worry about updating, keeping the computer on, etc 13 | - No installation required 14 | - A nice UI for managing your bot(s) 15 | - Easy configuration 16 | 17 | However, there are still a few reasons you might want to use mstdn-ebooks instead: 18 | - Your data stays local to your machine 19 | - More customisation potential - you can edit mstdn-ebooks to add functionality 20 | - Replying more (in)frequently than FediBooks allows 21 | 22 | Like mstdn-ebooks, FediBooks is free, both as in free of charge and free to modify, self-host, and more. 23 | 24 | ## Secure Fetch 25 | Secure fetch (aka authorised fetches, authenticated fetches, secure mode...) is *not* supported by mstdn-ebooks, and will fail to download any posts from users on instances with secure fetch enabled. For more information, see [this wiki page](https://github.com/Lynnesbian/mstdn-ebooks/wiki/Secure-fetch). 26 | 27 | ## Install/usage Guide 28 | An installation and usage guide is available [here](https://cloud.lynnesbian.space/s/jozbRi69t4TpD95). It's primarily targeted at Linux, but it should be possible on BSD, macOS, etc. I've also put some effort into providing steps for Windows, but I can't make any guarantees as to its effectiveness. 29 | 30 | ### Docker 31 | While there is a Docker version provided, it is **not guaranteed to work**. I personally don't use Docker and don't know how the Dockerfile works; it was create over a year ago by someone else and hasn't been updated since. It might work for you, it might not. If you'd like to help update the Dockerfile, please get in touch with me on the Fediverse. 32 | 33 | ## Compatibility 34 | | Software | Downloading statuses | Posting | Replying | 35 | |-----------|-------------------------------------------------------------------|---------|-------------------------------------------------------------| 36 | | Mastodon | Yes | Yes | Yes | 37 | | Pleroma | [Somewhat](https://git.pleroma.social/pleroma/pleroma/issues/866) | Yes | [No](https://git.pleroma.social/pleroma/pleroma/issues/416) | 38 | | Misskey | Yes | No | No | 39 | | diaspora* | [No](https://github.com/diaspora/diaspora/issues/7422) | No | No | 40 | | Others | Maybe | No | No | 41 | 42 | *Note: Bots are only supported on Mastodon and Pleroma instances. Bots can learn from users on other instances, but the bot itself must run on either a Mastodon or Pleroma instance.* 43 | 44 | mstdn-ebooks uses ActivityPub to download posts. This means that it is not dependant on any particular server software, and should work with anything that (properly) implements ActivityPub. Any software that does not support ActivityPub (e.g. diaspora*) is not supported, and won't work. 45 | 46 | I recommend that you create your bot's account on a Mastodon instance. Creating a bot on a Pleroma instance means that your bot will be unable to reply, although posting will work just fine. However, even if your bot is on a Mastodon instance, it will be able to learn from any Pleroma or Misskey users just fine. 47 | 48 | ## Configuration 49 | Configuring mstdn-ebooks is accomplished by editing `config.json`. If you want to use a different file for configuration, specify it with the `--cfg` argument. For example, if you want to use `/home/lynne/c.json` instead, you would run `python3 main.py --cfg /home/lynne/c.json` instead of just `python3 main.py` 50 | 51 | | Setting | Default | Meaning | 52 | |--------------------|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 53 | | site | https://botsin.space | The instance your bot will log in to and post from. This must start with `https://` or `http://` (preferably the latter) | 54 | | cw | null | The content warning (aka subject) mstdn-ebooks will apply to non-error posts. | 55 | | instance_blacklist | ["bofa.lol", "witches.town", "knzk.me"] | If your bot is following someone from a blacklisted instance, it will skip over them and not download their posts. This is useful for ensuring that mstdn-ebooks doesn't waste time trying to download posts from dead instances, without you having to unfollow the user(s) from them. | 56 | | learn_from_cw | false | If true, mstdn-ebooks will learn from CW'd posts. | 57 | | mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). | 58 | | max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. | 59 | | strip_paired_punctuation | false | If true, mstdn-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. | 60 | 61 | ## Donating 62 | Please don't feel obligated to donate at all. 63 | 64 | - [Ko-Fi](https://ko-fi.com/lynnesbian) allows you to make one-off payments in increments of AU$3. These payments are not taxed. 65 | - [PayPal](https://paypal.me/lynnesbian) allows you to make one-off payments of any amount in a range of currencies. These payments may be taxed. 66 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # toot downloader version two!! 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this 5 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 | 7 | from mastodon import Mastodon, MastodonUnauthorizedError 8 | from os import path 9 | from bs4 import BeautifulSoup 10 | import os, sqlite3, signal, sys, json, re, shutil, argparse 11 | import requests 12 | import functions 13 | 14 | parser = argparse.ArgumentParser(description='Log in and download posts.') 15 | parser.add_argument('-c', '--cfg', dest='cfg', default='config.json', nargs='?', 16 | help="Specify a custom location for config.json.") 17 | 18 | args = parser.parse_args() 19 | 20 | scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications", "write:accounts"] 21 | #cfg defaults 22 | 23 | cfg = { 24 | "site": "https://botsin.space", 25 | "cw": None, 26 | "instance_blacklist": ["bofa.lol", "witches.town", "knzk.me"], # rest in piece 27 | "learn_from_cw": False, 28 | "mention_handling": 1, 29 | "max_thread_length": 15, 30 | "strip_paired_punctuation": False 31 | } 32 | 33 | try: 34 | cfg.update(json.load(open(args.cfg, 'r'))) 35 | except FileNotFoundError: 36 | open(args.cfg, "w").write("{}") 37 | 38 | print("Using {} as configuration file".format(args.cfg)) 39 | 40 | if not cfg['site'].startswith("https://") and not cfg['site'].startswith("http://"): 41 | print("Site must begin with 'https://' or 'http://'. Value '{}' is invalid - try 'https://{}' instead.".format(cfg['site'])) 42 | sys.exit(1) 43 | 44 | if "client" not in cfg: 45 | print("No application info -- registering application with {}".format(cfg['site'])) 46 | client_id, client_secret = Mastodon.create_app("mstdn-ebooks", 47 | api_base_url=cfg['site'], 48 | scopes=scopes, 49 | website="https://github.com/Lynnesbian/mstdn-ebooks") 50 | 51 | cfg['client'] = { 52 | "id": client_id, 53 | "secret": client_secret 54 | } 55 | 56 | if "secret" not in cfg: 57 | print("No user credentials -- logging in to {}".format(cfg['site'])) 58 | client = Mastodon(client_id = cfg['client']['id'], 59 | client_secret = cfg['client']['secret'], 60 | api_base_url=cfg['site']) 61 | 62 | print("Open this URL and authenticate to give mstdn-ebooks access to your bot's account: {}".format(client.auth_request_url(scopes=scopes))) 63 | cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes) 64 | 65 | json.dump(cfg, open(args.cfg, "w+")) 66 | 67 | def extract_toot(toot): 68 | toot = functions.extract_toot(toot) 69 | toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning 70 | return(toot) 71 | 72 | client = Mastodon( 73 | client_id=cfg['client']['id'], 74 | client_secret = cfg['client']['secret'], 75 | access_token=cfg['secret'], 76 | api_base_url=cfg['site']) 77 | 78 | try: 79 | me = client.account_verify_credentials() 80 | except MastodonUnauthorizedError: 81 | print("The provided access token in {} is invalid. Please delete {} and run main.py again.".format(args.cfg, args.cfg)) 82 | sys.exit(1) 83 | 84 | following = client.account_following(me.id) 85 | 86 | db = sqlite3.connect("toots.db") 87 | db.text_factory=str 88 | c = db.cursor() 89 | c.execute("CREATE TABLE IF NOT EXISTS `toots` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw INT NOT NULL DEFAULT 0, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)") 90 | db.commit() 91 | 92 | tableinfo = c.execute("PRAGMA table_info(`toots`)").fetchall() 93 | found = False 94 | columns = [] 95 | for entry in tableinfo: 96 | if entry[1] == "sortid": 97 | found = True 98 | break 99 | columns.append(entry[1]) 100 | 101 | if not found: 102 | print("Migrating to new database format. Please wait...") 103 | print("WARNING: If any of the accounts your bot is following are Pleroma users, please delete toots.db and run main.py again to create it anew.") 104 | try: 105 | c.execute("DROP TABLE `toots_temp`") 106 | except: 107 | pass 108 | 109 | c.execute("CREATE TABLE `toots_temp` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw INT NOT NULL DEFAULT 0, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)") 110 | for f in following: 111 | user_toots = c.execute("SELECT * FROM `toots` WHERE userid LIKE ? ORDER BY id", (f.id,)).fetchall() 112 | if user_toots == None: 113 | continue 114 | 115 | if columns[-1] == "cw": 116 | for toot in user_toots: 117 | c.execute("INSERT INTO `toots_temp` (id, userid, uri, content, cw) VALUES (?, ?, ?, ?, ?)", toot) 118 | else: 119 | for toot in user_toots: 120 | c.execute("INSERT INTO `toots_temp` (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", toot) 121 | 122 | c.execute("DROP TABLE `toots`") 123 | c.execute("ALTER TABLE `toots_temp` RENAME TO `toots`") 124 | 125 | db.commit() 126 | 127 | def handleCtrlC(signal, frame): 128 | print("\nPREMATURE EVACUATION - Saving chunks") 129 | db.commit() 130 | sys.exit(1) 131 | 132 | signal.signal(signal.SIGINT, handleCtrlC) 133 | 134 | patterns = { 135 | "handle": re.compile(r"^.*@(.+)"), 136 | "url": re.compile(r"https?:\/\/(.*)"), 137 | "uri": re.compile(r'template="([^"]+)"'), 138 | "pid": re.compile(r"[^\/]+$"), 139 | } 140 | 141 | 142 | def insert_toot(oii, acc, post, cursor): # extracted to prevent duplication 143 | pid = patterns["pid"].search(oii['object']['id']).group(0) 144 | cursor.execute("REPLACE INTO toots (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", ( 145 | pid, 146 | 1 if (oii['object']['summary'] != None and oii['object']['summary'] != "") else 0, 147 | acc.id, 148 | oii['object']['id'], 149 | post 150 | )) 151 | 152 | 153 | for f in following: 154 | last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY sortid DESC LIMIT 1", (f.id,)).fetchone() 155 | if last_toot != None: 156 | last_toot = last_toot[0] 157 | else: 158 | last_toot = 0 159 | print("Downloading posts for user @{}, starting from {}".format(f.acct, last_toot)) 160 | 161 | #find the user's activitypub outbox 162 | print("WebFingering...") 163 | instance = patterns["handle"].search(f.acct) 164 | if instance == None: 165 | instance = patterns["url"].search(cfg['site']).group(1) 166 | else: 167 | instance = instance.group(1) 168 | 169 | if instance in cfg['instance_blacklist']: 170 | print("skipping blacklisted instance: {}".format(instance)) 171 | continue 172 | 173 | try: 174 | # 1. download host-meta to find webfinger URL 175 | r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10) 176 | # 2. use webfinger to find user's info page 177 | uri = patterns["uri"].search(r.text).group(1) 178 | uri = uri.format(uri = "{}@{}".format(f.username, instance)) 179 | r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10) 180 | j = r.json() 181 | found = False 182 | for link in j['links']: 183 | if link['rel'] == 'self': 184 | #this is a link formatted like "https://instan.ce/users/username", which is what we need 185 | uri = link['href'] 186 | found = True 187 | break 188 | if not found: 189 | print("Couldn't find a valid ActivityPub outbox URL.") 190 | 191 | # 3. download first page of outbox 192 | uri = "{}/outbox?page=true".format(uri) 193 | r = requests.get(uri, timeout=15) 194 | j = r.json() 195 | except: 196 | print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") 197 | sys.exit(1) 198 | 199 | pleroma = False 200 | if 'next' not in j and 'prev' not in j: 201 | # there's only one page of results, don't bother doing anything special 202 | pass 203 | elif 'prev' not in j: 204 | print("Using Pleroma compatibility mode") 205 | pleroma = True 206 | if 'first' in j: 207 | # apparently there used to be a 'first' field in pleroma's outbox output, but it's not there any more 208 | # i'll keep this for backwards compatibility with older pleroma instances 209 | # it was removed in pleroma 1.0.7 - https://git.pleroma.social/pleroma/pleroma/-/blob/841e4e4d835b8d1cecb33102356ca045571ef1fc/CHANGELOG.md#107-2019-09-26 210 | j = j['first'] 211 | else: 212 | print("Using standard mode") 213 | uri = "{}&min_id={}".format(uri, last_toot) 214 | r = requests.get(uri) 215 | j = r.json() 216 | 217 | print("Downloading and saving posts", end='', flush=True) 218 | done = False 219 | try: 220 | while not done and len(j['orderedItems']) > 0: 221 | for oi in j['orderedItems']: 222 | if oi['type'] != "Create": 223 | continue #this isn't a toot/post/status/whatever, it's a boost or a follow or some other activitypub thing. ignore 224 | 225 | # its a toost baby 226 | content = oi['object']['content'] 227 | toot = extract_toot(content) 228 | # print(toot) 229 | try: 230 | if pleroma: 231 | if c.execute("SELECT COUNT(*) FROM toots WHERE uri LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0: 232 | #we've caught up to the notices we've already downloaded, so we can stop now 233 | #you might be wondering, "lynne, what if the instance ratelimits you after 40 posts, and they've made 60 since main.py was last run? wouldn't the bot miss 20 posts and never be able to see them?" to which i reply, "i know but i don't know how to fix it" 234 | done = True 235 | continue 236 | if 'lang' in cfg: 237 | try: 238 | if oi['object']['contentMap'][cfg['lang']]: # filter for language 239 | insert_toot(oi, f, toot, c) 240 | except KeyError: 241 | #JSON doesn't have contentMap, just insert the toot irregardlessly 242 | insert_toot(oi, f, toot, c) 243 | else: 244 | insert_toot(oi, f, toot, c) 245 | pass 246 | except: 247 | pass #ignore any toots that don't successfully go into the DB 248 | 249 | # get the next/previous page 250 | try: 251 | if not pleroma: 252 | r = requests.get(j['prev'], timeout=15) 253 | else: 254 | r = requests.get(j['next'], timeout=15) 255 | except requests.Timeout: 256 | print("HTTP timeout, site did not respond within 15 seconds") 257 | except KeyError: 258 | print("Couldn't get next page - we've probably got all the posts") 259 | except: 260 | print("An error occurred while trying to obtain more posts.") 261 | 262 | j = r.json() 263 | print('.', end='', flush=True) 264 | print(" Done!") 265 | db.commit() 266 | except requests.HTTPError as e: 267 | if e.response.status_code == 429: 268 | print("Rate limit exceeded. This means we're downloading too many posts in quick succession. Saving toots to database and moving to next followed account.") 269 | db.commit() 270 | else: 271 | # TODO: remove duplicate code 272 | print("Encountered an error! Saving posts to database and moving to next followed account.") 273 | db.commit() 274 | except: 275 | print("Encountered an error! Saving posts to database and moving to next followed account.") 276 | db.commit() 277 | 278 | print("Done!") 279 | 280 | db.commit() 281 | db.execute("VACUUM") #compact db 282 | db.commit() 283 | db.close() 284 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | --------------------------------------------------------------------------------