├── .gitignore ├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── lib ├── __init__.py ├── compat.py ├── utils.py ├── stdout.py ├── text.py ├── html.py ├── www.py ├── inst.py ├── cli.py ├── mastodon.py └── bluesky.py ├── doc ├── changelog └── zygolophodon.1.in ├── t ├── common.sh ├── changelog.t ├── bad-addr.t ├── network.urls ├── url-error.t ├── network.t ├── help.t ├── version.t └── man-env.t ├── zygolophodon ├── .pylintrc ├── .perlcriticrc ├── private ├── update-readme └── gen-manpage ├── LICENSE ├── Makefile └── README /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | /doc/*.1 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: https://paypal.me/ijklw 2 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | zygolophodon's private modules 3 | ''' 4 | 5 | type(lambda x, /: x) # Python >= 3.8 is required 6 | -------------------------------------------------------------------------------- /doc/changelog: -------------------------------------------------------------------------------- 1 | zygolophodon (0.1) UNRELEASED; urgency=low 2 | 3 | * Initial release. 4 | 5 | -- Jakub Wilk Mon, 14 Apr 2025 20:31:28 +0200 6 | -------------------------------------------------------------------------------- /t/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright © 2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | tdir="${0%/*}" 7 | dir="$tdir/.." 8 | case " $* " in 9 | *' --installed '*) 10 | prog='zygolophodon';; 11 | *) 12 | prog="$dir/zygolophodon";; 13 | esac 14 | 15 | plan() 16 | { 17 | local extra='' 18 | if [ $1 -eq 0 ] 19 | then 20 | extra=" # SKIP $2" 21 | fi 22 | printf '1..%d%s\n' "$1" "$extra" 23 | printf '# test target = %s\n' "$prog" 24 | } 25 | 26 | # vim:ts=4 sts=4 sw=4 et ft=sh 27 | -------------------------------------------------------------------------------- /zygolophodon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # encoding=UTF-8 3 | 4 | # Copyright © 2025 Jakub Wilk 5 | # SPDX-License-Identifier: MIT 6 | 7 | ''' 8 | anonymous CLI for reading microblogging (chiefly Mastodon) posts 9 | ''' 10 | 11 | import sys 12 | 13 | type(lambda x, /: x) # Python >= 3.8 is required 14 | 15 | basedir = None 16 | if basedir is not None: 17 | sys.path[:0] = [basedir] 18 | 19 | import lib.cli # pylint: disable=wrong-import-position 20 | 21 | if __name__ == '__main__': 22 | lib.cli.main() 23 | 24 | # vim:ts=4 sts=4 sw=4 et 25 | -------------------------------------------------------------------------------- /t/changelog.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright © 2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | set -e -u 7 | 8 | . "${0%/*}/common.sh" 9 | 10 | plan 1 11 | if ! command -v dpkg-parsechangelog > /dev/null 12 | then 13 | echo 'ok 1 # SKIP missing dpkg-parsechangelog(1)' 14 | exit 15 | fi 16 | out=$(dpkg-parsechangelog -l"$dir/doc/changelog" --all 2>&1 >/dev/null) 17 | if [[ -z $out ]] 18 | then 19 | echo ok 1 20 | else 21 | sed -e 's/^/# /' <<< "$out" 22 | echo not ok 1 23 | fi 24 | 25 | # vim:ts=4 sts=4 sw=4 et ft=sh 26 | -------------------------------------------------------------------------------- /t/bad-addr.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright © 2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | set -e -u 7 | 8 | . "${0%/*}/common.sh" 9 | 10 | plan 2 11 | 12 | err=$("$prog" moo 2>&1 >/dev/null) || xs=$? 13 | echo "# exit status $xs" 14 | tname='exit status' 15 | case $xs in 16 | 2) echo "ok 1 $tname";; 17 | *) echo "not ok 1 $tname";; 18 | esac 19 | sed -e 's/^/# /' <<< $err 20 | tname='error message' 21 | case $err in 22 | *$'\n''zygolophodon: error: unsupported address') 23 | echo "ok 2 $tname";; 24 | *) 25 | echo "not ok 2 $tname";; 26 | esac 27 | 28 | # vim:ts=4 sts=4 sw=4 et ft=sh 29 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | load-plugins = pylint.extensions.check_elif 3 | 4 | [MESSAGES CONTROL] 5 | disable = 6 | fixme, 7 | invalid-name, 8 | missing-class-docstring, 9 | missing-function-docstring, 10 | no-else-return, 11 | no-self-use, 12 | too-few-public-methods, 13 | too-many-locals, 14 | use-dict-literal, 15 | useless-option-value, 16 | 17 | [REPORTS] 18 | msg-template = {path}:{line}: {C}: {symbol} [{obj}] {msg} 19 | reports = no 20 | score = no 21 | 22 | [FORMAT] 23 | expected-line-ending-format = LF 24 | max-line-length = 120 25 | 26 | [DESIGN] 27 | max-branches = 20 28 | 29 | # vim:ft=dosini ts=4 sts=4 sw=4 et 30 | -------------------------------------------------------------------------------- /lib/compat.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2022-2025 Jakub Wilk 2 | # SPDX-License-Identifier: MIT 3 | 4 | ''' 5 | support for old Python versions 6 | ''' 7 | 8 | import datetime 9 | import functools 10 | import re 11 | import sys 12 | 13 | if sys.version_info < (3, 9): 14 | functools.cache = functools.lru_cache(maxsize=None) 15 | 16 | if sys.version_info >= (3, 11): 17 | datetime_fromisoformat = datetime.datetime.fromisoformat 18 | else: 19 | def datetime_fromisoformat(d): 20 | d = re.sub(r'Z\Z', '+00:00', d) 21 | return datetime.datetime.fromisoformat(d) 22 | 23 | __all__ = [ 24 | 'datetime_fromisoformat', 25 | ] 26 | 27 | # vim:ts=4 sts=4 sw=4 et 28 | -------------------------------------------------------------------------------- /.perlcriticrc: -------------------------------------------------------------------------------- 1 | severity = 1 2 | verbose = %f:%l: [%p] %m\n 3 | 4 | [-CodeLayout::RequireTidyCode] 5 | # no, thanks 6 | 7 | [-ErrorHandling::RequireCarping] 8 | # "die" is good enough 9 | 10 | [InputOutput::RequireCheckedSyscalls] 11 | functions = :builtins 12 | exclude_functions = print say 13 | 14 | [-Modules::RequireVersionVar] 15 | # see https://bugs.debian.org/706266 16 | 17 | [-RegularExpressions::RequireDotMatchAnything] 18 | [-RegularExpressions::RequireExtendedFormatting] 19 | [-RegularExpressions::RequireLineBoundaryMatching] 20 | ## no, thanks 21 | 22 | [-ValuesAndExpressions::ProhibitNoisyQuotes] 23 | # no, thanks 24 | 25 | [-ValuesAndExpressions::ProhibitVersionStrings] 26 | # we don't care about Perl << 5.6, which doesn't support version strings 27 | 28 | # vim:ft=dosini 29 | -------------------------------------------------------------------------------- /private/update-readme: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright © 2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | no lib '.'; # CVE-2016-1238 7 | 8 | use strict; 9 | use warnings; 10 | use v5.14; 11 | 12 | use English qw(-no_match_vars); 13 | use FindBin (); 14 | use autodie; 15 | 16 | my $base = "$FindBin::Bin/.."; 17 | 18 | open my $fh, '<', "$base/README"; 19 | my $readme = do { 20 | local $RS = undef; 21 | <$fh>; 22 | }; 23 | close $fh; 24 | open $fh, '-|', "$base/zygolophodon", '--help'; 25 | my $usage = do { 26 | local $RS = undef; 27 | <$fh>; 28 | }; 29 | close $fh; 30 | $usage =~ s/^(?=.)/ /mg; 31 | $readme =~ s/\n \$ zygolophodon --help\n\K.*?(?=\n\S)/$usage/s 32 | or die; 33 | open $fh, '>', "$base/README"; 34 | { 35 | print {$fh} $readme; 36 | } 37 | close $fh; 38 | 39 | # vim:ts=4 sts=4 sw=4 et 40 | -------------------------------------------------------------------------------- /t/network.urls: -------------------------------------------------------------------------------- 1 | ### Mastodon ### 2 | 3 | https://mastodon.social/@bluecommunity # with pinned post 4 | https://infosec.exchange/@lcamtuf/109537142030669006 # with image attachment 5 | https://mastodon.social/statuses/1 # short (pre-v2.0) ident 6 | https://fosstodon.org/tags/python 7 | https://physics.social/tags/python # Mastodon v3.5 8 | @Mastodon@mastodon.social@mas.to # foreign account 9 | 10 | ### Iceshrimp ### 11 | 12 | https://bytes.programming.dev/@Updates 13 | https://fedia.social/notes/9vs6uhqixwnm3mvf 14 | https://infosec.town/tags/python 15 | 16 | ### Pleroma ### 17 | 18 | https://social.sfconservancy.org/users/conservancy # with pinned post 19 | https://pleroma.envs.net/notice/AsxefyNDCyqGkfRABs 20 | https://pleroma.debian.social/tag/python 21 | 22 | ### Snac (unsupported) ### 23 | 24 | grunfink@comam.es 25 | 26 | ### Bluesky ### 27 | 28 | https://bsky.app/profile/bsky.app # with pinned post 29 | https://bsky.app/profile/toronto.ca/post/3llr2e6rotc2d 30 | https://bsky.app/hashtag/python 31 | -------------------------------------------------------------------------------- /t/url-error.t: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright © 2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | set -e -u 7 | 8 | . "${0%/*}/common.sh" 9 | 10 | plan 2 11 | unshare_net() 12 | { 13 | unshare --user --net "$@" 14 | } 15 | if ! unshare_net true 2>/dev/null 16 | then 17 | unshare_net() 18 | { 19 | "$@" 20 | } 21 | fi 22 | export http_proxy='http://127.0.0.1:9/' 23 | export https_proxy="$http_proxy" 24 | export RES_OPTIONS=attempts:0 25 | xs=0 26 | base_url=https://mastodon.social 27 | url="$base_url/@Mastodon" 28 | echo "# $url" 29 | err=$(unshare_net "$prog" "$url" 2>&1 >/dev/null) || xs=$? 30 | echo "# exit status $xs" 31 | tname='exit status' 32 | case $xs in 33 | 1) echo "ok 1 $tname";; 34 | *) echo "not ok 1 $tname";; 35 | esac 36 | echo "# $err" 37 | tname='error message' 38 | case $err in 39 | "zygolophodon: <$base_url/api/v1/instance>: [E"[A-Z]*'] '*) 40 | echo "ok 2 $tname";; 41 | *) 42 | echo "not ok 2 $tname";; 43 | esac 44 | 45 | # vim:ts=4 sts=4 sw=4 et ft=sh 46 | -------------------------------------------------------------------------------- /t/network.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright © 2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | set -e -u 7 | 8 | . "${0%/*}/common.sh" 9 | 10 | case " $* " in 11 | *' --network '*) 12 | ;; 13 | *) 14 | plan 0 'use --network to opt in to network testing' 15 | exit 0 16 | ;; 17 | esac 18 | 19 | normspace() 20 | { 21 | # shellcheck disable=SC2048,SC2086 22 | s=$(set -f; printf '%s ' $*) 23 | printf '%s' "${s% }" 24 | } 25 | 26 | urls=() 27 | while read -r line 28 | do 29 | line=${line%%#*} 30 | line=$(normspace "$line") 31 | [[ -n $line ]] || continue 32 | urls+=("$line") 33 | done < "$tdir/network.urls" 34 | 35 | echo "1..${#urls[@]}" 36 | declare -i n=1 37 | for url in "${urls[@]}" 38 | do 39 | rc=0 40 | out=$("$prog" --limit=2 "$url") || rc=$? 41 | sed -e 's/^/# /' <<< "$out" 42 | if [[ $rc = 0 ]] 43 | then 44 | echo ok $n "$url" 45 | else 46 | echo not ok $n "$url" 47 | fi 48 | n+=1 49 | done 50 | 51 | # vim:ts=4 sts=4 sw=4 et ft=sh 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2022-2025 Jakub Wilk 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the “Software”), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /lib/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2022-2025 Jakub Wilk 2 | # SPDX-License-Identifier: MIT 3 | 4 | ''' 5 | misc stuff 6 | ''' 7 | 8 | import abc 9 | import functools 10 | import re 11 | 12 | class Dict(dict): 13 | __getattr__ = dict.__getitem__ 14 | 15 | class InternalError(RuntimeError): 16 | pass 17 | 18 | def expand_template(template, **subst): 19 | def repl(match): 20 | key = match.group() 21 | lkey = key.lower() 22 | try: 23 | return subst[lkey] 24 | except KeyError: 25 | msg = f'cannot expand {key} in template {template!r}' 26 | raise InternalError(msg) from None 27 | return re.sub('[A-Z]+', repl, template) 28 | 29 | def abstractattribute(): 30 | return abc.abstractmethod(lambda: None) 31 | 32 | def compose(f): 33 | def eff(g): 34 | @functools.wraps(g) 35 | def f_g(*args, **kwargs): 36 | return f(g(*args, **kwargs)) 37 | return f_g 38 | return eff 39 | 40 | __all__ = [ 41 | 'Dict', 42 | 'expand_template', 43 | 'abstractattribute', 44 | 'compose', 45 | ] 46 | 47 | # vim:ts=4 sts=4 sw=4 et 48 | -------------------------------------------------------------------------------- /t/help.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright © 2022-2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | set -e -u 7 | 8 | . "${0%/*}/common.sh" 9 | 10 | plan 2 11 | xout=$(< "$dir/README") 12 | xout=${xout#*$'\n $ zygolophodon --help\n '} 13 | xout=${xout%%$'\n\n'[^ ]*} 14 | xout=${xout//$'\n '/$'\n'} 15 | out=$("$prog" --help) 16 | if [[ "$out" = "$xout" ]] 17 | then 18 | echo 'ok 1' 19 | else 20 | diff -u <(cat <<< "$xout") <(cat <<< "$out") | sed -e 's/^/# /' 21 | echo 'not ok 1' 22 | fi 23 | # chop off the part that's auto-generated in the man page anyway: 24 | out=$(sed -e '/^ ADDRESS /,/^$/d' <<< "$out") 25 | xsum=$(sha256sum <<< "$out") 26 | xsum=${xsum%% *} 27 | var='SHA-256(help)' 28 | echo "# $var = $xsum" 29 | declare -i n=2 30 | t_sync() 31 | { 32 | path="$1" 33 | line=$(grep -F " $var = " < "$path") 34 | sum=${line##*" $var = "} 35 | if [ "$sum" = "$xsum" ] 36 | then 37 | echo ok $n "$path" 38 | else 39 | echo not ok $n "$path" 40 | fi 41 | n+=1 42 | } 43 | if [[ $prog = zygolophodon ]] 44 | then 45 | man_target=$(man -w $prog) 46 | else 47 | man_target="$dir/doc/zygolophodon.1.in" 48 | fi 49 | t_sync "$man_target" 50 | 51 | # vim:ts=4 sts=4 sw=4 et ft=sh 52 | -------------------------------------------------------------------------------- /t/version.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright © 2024-2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | set -e -u 7 | 8 | . "${0%/*}/common.sh" 9 | 10 | plan 4 11 | IFS='(); ' read -r _ changelog_version changelog_dist _ < "$dir/doc/changelog" 12 | echo "# changelog version = $changelog_version" 13 | echo "# changelog dist = $changelog_dist" 14 | if out=$("$prog" --version) 15 | then 16 | echo ok 1 17 | sed -e 's/^/# /' <<< "$out" 18 | case $out in 19 | $"zygolophodon $changelog_version"$'\n'*) 20 | echo ok 2;; 21 | *) 22 | echo not ok 2;; 23 | esac 24 | else 25 | echo not ok 1 26 | echo not ok 2 27 | fi 28 | if [ -d "$dir/.git" ] 29 | then 30 | echo 'ok 3 # SKIP git checkout' 31 | elif [ "$changelog_dist" = UNRELEASED ] 32 | then 33 | echo 'not ok 3' 34 | else 35 | echo 'ok 3' 36 | fi 37 | if [[ $prog = zygolophodon ]] 38 | then 39 | man_target=$prog 40 | else 41 | man_target="$dir/doc/zygolophodon.1" 42 | if [[ -f $man_target ]] 43 | then 44 | man_target='' 45 | fi 46 | fi 47 | echo "# man page target = $man_target" 48 | if [[ -n $man_target ]] 49 | then 50 | line=$(MANWIDTH=80 man "$man_target" | tail -n 1) 51 | IFS=' "' read -r _ man_version _ <<< "$line" 52 | echo "# man page version = $man_version" 53 | if [ "$man_version" = "$changelog_version" ] 54 | then 55 | echo ok 4 56 | else 57 | echo not ok 4 58 | fi 59 | else 60 | echo 'ok 4 # SKIP missing man page' 61 | fi 62 | 63 | # vim:ts=4 sts=4 sw=4 et ft=sh 64 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright © 2024-2025 Jakub Wilk 2 | # SPDX-License-Identifier: MIT 3 | 4 | PYTHON = python3 5 | 6 | PREFIX = /usr/local 7 | DESTDIR = 8 | 9 | bindir = $(PREFIX)/bin 10 | basedir = $(PREFIX)/share/zygolophodon 11 | mandir = $(PREFIX)/share/man 12 | 13 | .PHONY: all 14 | all: doc/zygolophodon.1 15 | 16 | %.1: %.1.in README private/gen-manpage 17 | private/gen-manpage < $(<) > $(@).tmp 18 | mv $(@).tmp $(@) 19 | 20 | .PHONY: install 21 | install: zygolophodon all 22 | $(PYTHON) - < lib/__init__.py # Python version check 23 | # executable: 24 | install -d $(DESTDIR)$(bindir) 25 | python_exe=$$($(PYTHON) -c 'import sys; print(sys.executable)') && \ 26 | sed \ 27 | -e "1 s@^#!.*@#!$$python_exe@" \ 28 | -e "s#^basedir = .*#basedir = '$(basedir)/'#" \ 29 | $(<) > $(<).tmp 30 | install $(<).tmp $(DESTDIR)$(bindir)/$(<) 31 | rm $(<).tmp 32 | # library: 33 | install -d $(DESTDIR)$(basedir)/lib 34 | install -p -m644 lib/*.py $(DESTDIR)$(basedir)/lib/ 35 | ifeq "$(DESTDIR)" "" 36 | umask 022 && $(PYTHON) -m compileall -q $(basedir)/lib/ 37 | endif 38 | # manual page: 39 | install -d $(DESTDIR)$(mandir)/man1 40 | install -p -m644 doc/$(<).1 $(DESTDIR)$(mandir)/man1/ 41 | 42 | .PHONY: test 43 | test: verbose= 44 | test: zygolophodon all 45 | prove $(and $(verbose),-v) 46 | 47 | .PHONY: test-installed 48 | test-installed: verbose= 49 | test-installed: $(or $(shell command -v zygolophodon;),$(bindir)/zygolophodon) 50 | prove $(and $(verbose),-v) :: --installed 51 | 52 | .PHONY: clean 53 | clean: 54 | rm -f *.tmp doc/*.1 doc/*.tmp 55 | find . -type f -name '*.py[co]' -delete 56 | find . -type d -name '__pycache__' -delete 57 | 58 | .error = GNU make is required 59 | 60 | # vim:ts=4 sts=4 sw=4 noet 61 | -------------------------------------------------------------------------------- /doc/zygolophodon.1.in: -------------------------------------------------------------------------------- 1 | .\" Copyright © 2025 Jakub Wilk 2 | .\" SPDX-License-Identifier: MIT 3 | 4 | .\" # SHA-256(help) = 6f4f12611723167e0d90775679acdf55b7a68f3db0c72fe8bfc6583b661bd29c 5 | 6 | .TH ZYGOLOPHODON 1 2025-02-01 "zygolophodon {{VERSION}}" 7 | .SH NAME 8 | zygolophodon \- CLI for reading Mastodon posts 9 | .SH SYNOPSIS 10 | .SY zygolophodon 11 | .RB [ \-\-limit " \fIN\fP]" 12 | .RB [ \-\-with\-ancestors ] 13 | .I ADDRESS 14 | .SH DESCRIPTION 15 | .B Zygolophodon 16 | is an anonymous command-line interface 17 | for reading microblogging posts, 18 | chiefly for Mastodon. 19 | .SS Supported servers 20 | .IP \(bu 2 21 | Mastodon (\(>= 3.4) 22 | .IP \(bu 2 23 | Iceshrimp, Catodon 24 | .IP \(bu 2 25 | Pleroma (\(>= 2.5), Akkoma (\(>= 2.5) 26 | .IP \(bu 2 27 | Bluesky 28 | .SS Supported addresses 29 | The following address schemes are supported: 30 | {{ADDRESS-LIST}} 31 | .SH OPTIONS 32 | .TP 33 | .BI "\-\-limit " N 34 | Request at most 35 | .I N 36 | posts. 37 | The default is 40. 38 | .TP 39 | .B \-\-with\-ancestors 40 | Show also ancestors of the post. 41 | .TP 42 | .BR \-h ", " \-\-help 43 | Show help message and exit. 44 | .TP 45 | .B \-\-version 46 | Show version information and exit. 47 | .ig 48 | .SH OUTPUT FORMAT 49 | TODO 50 | .. 51 | .SH ENVIRONMENT 52 | .TP 53 | .B PAGER 54 | If stdout is a terminal, zygolophodon pipes the output through 55 | .BR $PAGER . 56 | The default is 57 | .B pager 58 | (if it exists) 59 | or 60 | .BR more . 61 | Setting 62 | .B PAGER 63 | to the empty string 64 | or the value 65 | .B cat 66 | disables the use of the pager. 67 | .\" TODO: ZYGOLOPHODON_COLUMNS 68 | .\" TODO: ZYGOLOPHODON_LINK_SYMBOL 69 | .\" TODO: ZYGOLOPHODON_PAPERCLIP 70 | .TP 71 | .B LESS 72 | If this variable is unset, 73 | zygolophodon sets it to 74 | .B \-FXK 75 | (which is equivalent to 76 | .BR "\-\-quit\-if\-one\-screen \-\-no\-init \-\-quit\-on\-intr" ). 77 | .SH EXAMPLE 78 | .EX 79 | {{EXAMPLE}} 80 | .EE 81 | -------------------------------------------------------------------------------- /private/gen-manpage: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright © 2025 Jakub Wilk 4 | # SPDX-License-Identifier: MIT 5 | 6 | # pylint: disable=missing-module-docstring 7 | 8 | import pathlib 9 | import re 10 | import sys 11 | import types 12 | import textwrap 13 | 14 | int(0_0) # Python >= 3.6 is required 15 | 16 | here = pathlib.Path(__file__).parent 17 | base = here.parent 18 | 19 | def rextract(regexp, text, flags=0): 20 | regexp = re.compile(regexp, flags=flags) 21 | match = re.search(regexp, text) 22 | if match is None: 23 | raise LookupError(f'{regexp} not found') 24 | return match.group(1) 25 | 26 | def addr_to_roff(addr, itype): 27 | out = re.sub('([A-Z]+)', r'\\f(BI\1\\fP', addr) 28 | out = f'.B {out}' 29 | if itype: 30 | out += f' \\fR({itype})' 31 | out = f'.IP \\(bu 2\n{out}' 32 | return out 33 | 34 | charmap = r''' 35 | ' \(aq 36 | ` \` 37 | “ \(lq 38 | ” \(rq 39 | - \- 40 | ä \[:a] 41 | ⋮ \&... 42 | ''' 43 | charmap = { 44 | ord(key): value 45 | for line in charmap.strip().splitlines() 46 | for key, value in [line.split()] 47 | } 48 | 49 | def main(): 50 | chunks = types.SimpleNamespace() 51 | code = (base / 'lib/cli.py').read_text(encoding='UTF-8') 52 | chunks.version = rextract(r"\b__version__ = '(.+)'", code) 53 | readme = (base / 'README').read_text(encoding='UTF-8') 54 | addrs = rextract(r'^ +ADDRESS(( +(\S.*\n))+)', readme, flags=re.M) 55 | roff_addrs = [] 56 | for match in re.finditer(r'^ +(\S+)(?: [(](\S+)[)])?$', addrs, flags=re.M): 57 | addr, itype = match.groups() 58 | roff_addrs += [addr_to_roff(addr, itype)] 59 | chunks.address_list = str.join('\n', roff_addrs) 60 | try: 61 | example = rextract(r'(^ +[$] zygolophodon https:.+\n(( .*)?\n)+)', readme, flags=re.M) 62 | except LookupError: 63 | pass 64 | else: 65 | example = textwrap.dedent(example) 66 | example = re.sub('^([$] )(.*)', r'.RB "\1" "\2"', example) 67 | example = example.translate(charmap) 68 | example.encode('ASCII') 69 | chunks.example = example 70 | text = sys.stdin.read() 71 | def repl(match): 72 | name = match.group(1) 73 | name = name.lower().replace('-', '_') 74 | return getattr(chunks, name) 75 | text = re.sub(r'[{][{]([^\s}]+)[}][}]', repl, text) 76 | print(text) 77 | 78 | if __name__ == '__main__': 79 | main() 80 | 81 | # vim:ts=4 sts=4 sw=4 et 82 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Overview 2 | ======== 3 | 4 | **Zygolophodon** is an anonymous command-line interface 5 | for reading microblogging posts, chiefly for Mastodon. 6 | 7 | The following servers are supported: 8 | 9 | * Mastodon (≥ 3.4) 10 | * Iceshrimp, Catodon 11 | * Pleroma (≥ 2.5), Akkoma (≥ 2.5) 12 | * Bluesky 13 | 14 | Usage 15 | ===== 16 | 17 | .. code:: console 18 | 19 | $ zygolophodon --help 20 | usage: zygolophodon [-h] [--version] [--limit N] [--with-ancestors] ADDRESS 21 | 22 | positional arguments: 23 | ADDRESS @USER@DOMAIN 24 | USER@DOMAIN 25 | https://DOMAIN/@USER 26 | https://DOMAIN/@USER/media 27 | https://DOMAIN/@USER/with_replies 28 | https://DOMAIN/@USER/NNNNNN 29 | https://DOMAIN/@USER/NNNNNN/embed 30 | https://DOMAIN/tags/TAG 31 | https://DOMAIN/statuses/NNNNNN 32 | https://DOMAIN/redirect/statuses/NNNNNN 33 | https://DOMAIN/users/USER 34 | https://DOMAIN/users/USER/statuses/NNNNNN 35 | https://DOMAIN/notes/IDENT (Iceshrimp) 36 | https://DOMAIN/notice/IDENT (Pleroma) 37 | https://DOMAIN/tag/TAG (Pleroma) 38 | https://bsky.app/profile/USER (Bluesky) 39 | https://bsky.app/profile/USER/post/IDENT (Bluesky) 40 | https://bsky.app/hashtag/TAG (Bluesky) 41 | 42 | options: 43 | -h, --help show this help message and exit 44 | --version show version information and exit 45 | --limit N request at most N posts (default: 40) 46 | --with-ancestors show also post ancestors 47 | 48 | Example 49 | ======= 50 | 51 | .. code:: console 52 | 53 | $ zygolophodon https://mastodon.example.org/@cicero/114574908484913091 54 | Location: https://mastodon.example.org/@cicero/114574908484913091 55 | From: Marcus Tullius Cicero 56 | Date: 2025-05-26 15:46:00Z 57 | Language: la 58 | 59 | #LoremIpsum, quia dolor sit, amet, consectetur, adipisci velit, sed 60 | quia non numquam eius modi tempora incidunt, ut labore et dolore magnam 61 | aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum 62 | exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea 63 | commodi consequatur? 64 | 65 | * #LoremIpsum: https://mastodon.example.org/tags/LoremIpsum 66 | 67 | Requirements 68 | ============ 69 | 70 | * Python ≥ 3.8 71 | 72 | .. vim:ft=rst ts=3 sts=3 sw=3 et 73 | -------------------------------------------------------------------------------- /t/man-env.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # encoding=UTF-8 3 | 4 | # Copyright © 2025 Jakub Wilk 5 | # SPDX-License-Identifier: MIT 6 | 7 | # pylint: disable=missing-module-docstring 8 | 9 | import ast 10 | import pathlib 11 | import re 12 | import sys 13 | import types 14 | 15 | basedir = pathlib.Path(__file__).parent.parent 16 | sys.path[:0] = [str(basedir)] 17 | 18 | # pylint: disable-next=wrong-import-position 19 | from lib.utils import compose 20 | 21 | def _extract_src_vars(path): 22 | with open(path, encoding='UTF-8') as file: 23 | src = file.read() 24 | code = compile(src, path, 'exec') 25 | mod = types.ModuleType('_') 26 | exec(code, mod.__dict__) # pylint: disable=exec-used 27 | mod_node = ast.parse(src, path) 28 | for node in ast.walk(mod_node): 29 | if not isinstance(node, ast.Call): 30 | continue 31 | func = node.func 32 | if len(node.args) < 1: 33 | continue 34 | if not isinstance(node.args[0], ast.Constant): 35 | continue 36 | arg = node.args[0].value 37 | if isinstance(func, ast.Name) and func.id == 'Symbol': 38 | yield mod.Symbol.get_var(arg) # pylint: disable=no-member 39 | continue 40 | if isinstance(func, ast.Attribute) and func.attr == 'getenv': 41 | yield arg 42 | continue 43 | 44 | @compose(set) 45 | def extract_src_vars(): 46 | libdir = basedir / 'lib' 47 | for path in libdir.glob('*.py'): 48 | yield from _extract_src_vars(path) 49 | 50 | def _extract_man_vars_section(): 51 | path = basedir / 'doc/zygolophodon.1.in' 52 | with open(path, encoding='UTF-8') as file: 53 | src = file.read() 54 | match = re.search(r'\n[.]SH ENVIRONMENT\n(.+?\n)[.]SH ', src, re.DOTALL) 55 | [src] = match.groups() 56 | return src 57 | 58 | @compose(set) 59 | def _extract_man_vars(regexp): 60 | src = _extract_man_vars_section() 61 | for match in re.finditer(regexp, src): 62 | [var] = match.groups() 63 | yield var 64 | 65 | def extract_man_vars(): 66 | regexp = re.compile(r'^[.]TP\n[.]B (\S+)$', re.MULTILINE) 67 | return _extract_man_vars(regexp) 68 | 69 | def extract_man_todo_vars(): 70 | regexp = re.compile(r'.\" TODO: (ZYGOLOPHODON_[A-Z_]+)$', re.MULTILINE) 71 | return _extract_man_vars(regexp) 72 | 73 | def ok(cond, name, todo=False): 74 | status = ['not ok', 'ok'][cond] 75 | todo = ['# TODO'] * (todo and not cond) 76 | print(status, '-', name, *todo) 77 | 78 | def main(): 79 | if '--installed' in sys.argv: 80 | print('1..0 # SKIP post-install testing not supported') 81 | return 82 | src_vars = extract_src_vars() 83 | man_vars = extract_man_vars() 84 | man_todo_vars = extract_man_todo_vars() 85 | m = len(src_vars) + len(man_vars) 86 | print(f'1..{m}') 87 | for var in src_vars: 88 | todo = var in man_todo_vars 89 | ok(var in man_vars, f'{var} in man', todo=todo) 90 | for var in man_vars: 91 | ok(var in src_vars, f'{var} in src') 92 | 93 | if __name__ == '__main__': 94 | main() 95 | 96 | # vim:ts=4 sts=4 sw=4 et ft=python 97 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | permissions: {} 3 | on: 4 | - push 5 | - pull_request 6 | jobs: 7 | 8 | main: 9 | runs-on: ${{matrix.os}} 10 | strategy: 11 | matrix: 12 | include: 13 | - python: '3.8' 14 | os: ubuntu-22.04 15 | - python: '3.9' 16 | os: ubuntu-22.04 17 | - python: '3.10' 18 | os: ubuntu-22.04 19 | - python: '3.11' 20 | os: ubuntu-22.04 21 | - python: '3.12' 22 | os: ubuntu-22.04 23 | - python: '3.13' 24 | os: ubuntu-24.04 25 | - python: '3.14' 26 | os: ubuntu-24.04 27 | steps: 28 | - uses: actions/checkout@v4 29 | - name: set up Python ${{matrix.python}} 30 | uses: actions/setup-python@v5 31 | with: 32 | python-version: ${{matrix.python}} 33 | - name: run tests 34 | run: | 35 | make test verbose=1 36 | - name: run network tests 37 | run: | 38 | prove -v t/network.t '::' --network 39 | - name: install 40 | run: | 41 | make install PREFIX=~/.local 42 | - name: post-install smoke-test 43 | run: | 44 | cd / 45 | zygolophodon --version 46 | - name: post-install man page check 47 | env: 48 | MANPATH: /home/runner/.local/share/man 49 | MANWIDTH: 80 50 | run: | 51 | cd / 52 | man 1 zygolophodon | grep -A 10 -w ZYGOLOPHODON 53 | - name: run post-install tests 54 | run: | 55 | rm zygolophodon doc/*.1 doc/*.1.in 56 | make test-installed verbose=1 57 | git restore . 58 | - name: run pydiatra 59 | run: | 60 | python3 -m pip install pydiatra 61 | python3 -m pydiatra -v . 62 | - name: run pyflakes 63 | run: | 64 | python3 -m pip install pyflakes 65 | python3 -m pyflakes . 66 | - name: run pylint 67 | run: | 68 | python3 -m pip install pylint 69 | python3 -m pylint $(grep -rl '/env python3$' .) lib/*.py 70 | - name: check README syntax 71 | run: | 72 | python3 -m pip install restructuredtext-lint pygments 73 | rst-lint --level=info --encoding=UTF-8 README 74 | 75 | nonpython: 76 | strategy: 77 | matrix: 78 | os: 79 | - ubuntu-22.04 80 | - ubuntu-24.04 81 | runs-on: ${{matrix.os}} 82 | steps: 83 | - uses: actions/checkout@v4 84 | - name: set up APT 85 | run: | 86 | printf 'Apt::Install-Recommends "false";\n' | sudo tee -a /etc/apt/apt.conf 87 | sudo apt-get update 88 | - name: install perlcritic 89 | run: | 90 | sudo apt-get install libperl-critic-perl 91 | - name: run perlcritic 92 | run: | 93 | perlcritic $(grep -rl '/env perl$' .) 94 | - name: regenerate README 95 | run: | 96 | private/update-readme 97 | git diff --exit-code 98 | - name: install mandoc 99 | run: | 100 | sudo apt-get install mandoc 101 | - name: build man page 102 | run: | 103 | make doc/zygolophodon.1 104 | - name: run mandoc lint 105 | run: | 106 | mandoc -T lint doc/*.1 107 | 108 | # vim:ts=2 sts=2 sw=2 et 109 | -------------------------------------------------------------------------------- /lib/stdout.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2022-2025 Jakub Wilk 2 | # SPDX-License-Identifier: MIT 3 | 4 | ''' 5 | sys.stdout wrapper 6 | ''' 7 | 8 | import contextlib 9 | import http 10 | import inspect 11 | import io 12 | import os 13 | import shutil 14 | import subprocess 15 | import sys 16 | 17 | def find_command(command): 18 | if shutil.which(command): 19 | return command 20 | return None 21 | 22 | class StdOut(io.TextIOBase): 23 | 24 | def _install_pager(self): 25 | if not sys.__stdout__.isatty(): 26 | return 27 | cmdline = os.getenv('PAGER') 28 | if cmdline in {'', 'cat'}: 29 | return 30 | cmdline = (cmdline 31 | or find_command('pager') # Debian: 32 | # https://www.debian.org/doc/debian-policy/ch-customized-programs.html#editors-and-pagers 33 | or 'more' # POSIX: 34 | # https://pubs.opengroup.org/onlinepubs/007904975/utilities/man.html#tag_04_85_08 35 | ) 36 | env = None 37 | if os.getenv('LESS') is None: 38 | env = dict(env or os.environ, LESS='-FXK') 39 | self._pager = subprocess.Popen(cmdline, shell=True, stdin=subprocess.PIPE, env=env) # pylint: disable=consider-using-with 40 | self._stdout = io.TextIOWrapper(self._pager.stdin, 41 | encoding=sys.__stdout__.encoding, 42 | errors=sys.__stdout__.errors, 43 | line_buffering=True, 44 | ) 45 | 46 | def __init__(self): 47 | super().__init__() 48 | self._newlines = 0 49 | self._pager = None 50 | self._stdout = sys.__stdout__ 51 | self._install_pager() 52 | 53 | def _get_fp(self): 54 | if http.client.HTTPConnection.debuglevel: 55 | # Eww, FIXME in Python? 56 | # http.client prints debug messages to stdout. 57 | # Let's redirect them to stderr: 58 | for frameinfo in inspect.stack(context=0): 59 | if frameinfo.filename == http.client.__file__: 60 | return sys.__stderr__ 61 | return self._stdout 62 | 63 | def write(self, s): 64 | fp = self._get_fp() 65 | if fp is self._stdout: 66 | if s == '': 67 | return 68 | if s == '\n': 69 | if self._newlines == 2: 70 | return 71 | self._newlines += 1 72 | else: 73 | self._newlines = int(s[-1] == '\n') 74 | fp.write(s) 75 | 76 | def flush(self): 77 | self._get_fp().flush() 78 | 79 | def isatty(self): 80 | return sys.__stdout__.isatty() 81 | 82 | def __exit__(self, exc_type, exc_value, traceback): 83 | ret = super().__exit__(exc_type, exc_value, traceback) 84 | if self._pager: 85 | self._pager.__exit__(exc_type, exc_value, traceback) 86 | if exc_type is None and self._pager.returncode != 0: 87 | msg = 'pager failed' 88 | raise RuntimeError(msg) 89 | self._pager = None 90 | self._stdout = None 91 | return ret 92 | 93 | @contextlib.contextmanager 94 | def install(): 95 | assert sys.stdout is sys.__stdout__ 96 | try: 97 | with StdOut() as sys.stdout: 98 | yield 99 | finally: 100 | sys.stdout = sys.__stdout__ 101 | 102 | __all__ = ['install'] 103 | 104 | # vim:ts=4 sts=4 sw=4 et 105 | -------------------------------------------------------------------------------- /lib/text.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2022-2025 Jakub Wilk 2 | # SPDX-License-Identifier: MIT 3 | 4 | ''' 5 | text support 6 | ''' 7 | 8 | import os 9 | import re 10 | import textwrap 11 | import unicodedata 12 | 13 | columns = int(os.getenv('ZYGOLOPHODON_COLUMNS', '78')) 14 | 15 | def wcwidth(ch): 16 | # poor man's wcwidth(3) 17 | wd = unicodedata.east_asian_width(ch) 18 | return 1 + (wd in {'F', 'W'}) 19 | 20 | def wcswidth(s): 21 | # poor man's wcswidth(3) 22 | return sum(map(wcwidth, s)) 23 | 24 | class Symbol: 25 | 26 | @classmethod 27 | def get_var(cls, name): 28 | name = name.upper().replace(' ', '_') 29 | return f'ZYGOLOPHODON_{name}' 30 | 31 | def __init__(self, name): 32 | var = self.get_var(name) 33 | text = os.getenv(var, '*') 34 | if match := re.fullmatch('(.*):([0-9]+)', text): 35 | (text, width) = match.groups() 36 | width = int(width) 37 | else: 38 | width = wcswidth(text) 39 | self._text = text 40 | self.width = width 41 | 42 | def __str__(self): 43 | return self._text 44 | 45 | class symbols: 46 | link = Symbol('link symbol') 47 | paperclip = Symbol('paperclip') 48 | 49 | def isolate_bidi(text): 50 | ''' 51 | * If there are any explicit BDI formatting characters in the text 52 | (except PDF, which is harmless by itself), 53 | wrap the text with FSI + PDI. 54 | * Remove any excess PDIs. 55 | * Append PDIs to close any stray isolate initiators. 56 | ''' 57 | # 58 | # Documentation: https://unicode.org/reports/tr9/ 59 | # ("Unicode Bidirectional Algorithm") 60 | # 61 | n = None # the number of unclosed isolate initiators, 62 | # or None if the text doesn't need any BiDi treatment 63 | def repl(match): 64 | nonlocal n 65 | if n is None: 66 | n = 0 67 | s = match.group() 68 | if s in '\N{LRI}\N{RLI}\N{FSI}': 69 | n += 1 70 | elif s == '\N{PDI}': 71 | if n == 0: 72 | return '' 73 | n -= 1 74 | return s 75 | s = re.sub('[\N{LRE}\N{RLE}\N{LRO}\N{RLO}\N{LRI}\N{RLI}\N{FSI}\N{PDI}]', repl, text) 76 | if n is not None: 77 | pdi = (n + 1) * '\N{PDI}' 78 | s = f'\N{FSI}{s}{pdi}' 79 | return s 80 | 81 | def wrap_text(text, indent='', protect=None): 82 | # FIXME? BiDi-aware terminals consider newlines as paragraph separators, 83 | # so line-wrapping may disrupt BiDi. 84 | text = text.splitlines() 85 | for line in text: 86 | yield wrap_line(line, indent=indent, protect=protect) 87 | 88 | def wrap_line(line, indent='', protect=None): 89 | tokens = [] 90 | if protect: 91 | [prot_start, prot_end] = protect 92 | assert prot_start 93 | assert prot_end 94 | assert '\N{SUB}' not in (prot_start + prot_end) 95 | prot_re = re.compile( 96 | '\N{SUB}+|' 97 | + re.escape(prot_start) 98 | + '(.*?)' 99 | + re.escape(prot_end) 100 | ) 101 | def subst(match): 102 | nonlocal tokens 103 | token = match.group() 104 | tokens += [token] 105 | n = len(token) 106 | if match.group(1) is not None: 107 | n -= 2 108 | return '\N{SUB}' * n 109 | line = re.sub(prot_re, subst, line) 110 | lines = textwrap.wrap(line, 111 | width=columns, 112 | initial_indent=indent, 113 | subsequent_indent=indent, 114 | break_long_words=False, 115 | ) 116 | lines = str.join('\n', lines) 117 | if tokens: 118 | tokens.reverse() 119 | def unsubst(match): 120 | del match 121 | return tokens.pop() 122 | lines = re.sub('\N{SUB}+', unsubst, lines) 123 | assert not tokens 124 | return lines 125 | 126 | __all__ = [ 127 | 'Symbol', 128 | 'columns', 129 | 'isolate_bidi', 130 | 'symbols', 131 | 'wrap_text', 132 | ] 133 | 134 | # vim:ts=4 sts=4 sw=4 et 135 | -------------------------------------------------------------------------------- /lib/html.py: -------------------------------------------------------------------------------- 1 | # Copyright © 2022-2025 Jakub Wilk 2 | # SPDX-License-Identifier: MIT 3 | 4 | ''' 5 | HTML parsing 6 | ''' 7 | 8 | import html.parser 9 | import re 10 | import sys 11 | 12 | import lib.text 13 | 14 | class HTMLParser(html.parser.HTMLParser): 15 | 16 | def __init__(self): 17 | super().__init__() 18 | class state: 19 | paras = [] 20 | text = '' 21 | a_text = '' 22 | a_href = None 23 | a_depth = 0 24 | footnotes = {} 25 | self.z_state = state 26 | 27 | # FIXME: Add proper support for: 28 | # *
    ,
      ,
    • 29 | # *
      30 | # *
       31 | 
       32 |     def handle_starttag(self, tag, attrs):
       33 |         st = self.z_state
       34 |         if tag in {'p', 'ol', 'ul', 'blockquote', 'pre'}:
       35 |             while st.a_depth > 0:
       36 |                 self.handle_endtag('a')
       37 |             if st.text:
       38 |                 st.paras += [st.text]
       39 |                 st.text = ''
       40 |             return
       41 |         if tag == 'br':
       42 |             if st.a_depth > 0:
       43 |                 st.a_text += ' '
       44 |             else:
       45 |                 st.text += '\n'
       46 |             return
       47 |         if tag == 'a':
       48 |             if st.a_depth == 0:
       49 |                 href = dict(attrs).get('href', '')
       50 |                 # Let's normalize the URL somewhat,
       51 |                 # as per .
       52 |                 href = re.sub(r'\A[\0-\40]+|[\0-\40]+\Z|[\n\t]+', '', href)
       53 |                 st.a_href = href
       54 |             st.a_depth += 1
       55 |             return
       56 | 
       57 |     def handle_endtag(self, tag):
       58 |         st = self.z_state
       59 |         if tag == 'a':
       60 |             if st.a_depth > 0:
       61 |                 st.a_depth -= 1
       62 |             if st.a_depth == 0:
       63 |                 text = st.a_text
       64 |                 href = st.a_href
       65 |                 if re.fullmatch(r'#[\w_]+|@[\w_.-]+(@[\w.-]+)?', text) and st.footnotes.get(text, href) == href:
       66 |                     # The above should be close enough to Mastodon's own regexps:
       67 |                     # + HASHTAG_RE in ;
       68 |                     # + MENTION_RE in .
       69 |                     assert '\n' not in text
       70 |                     st.text += f'\N{STX}{text}\N{ETX}'
       71 |                     st.footnotes[text] = href
       72 |                 else:
       73 |                     if href in {text, f'http://{text}', f'https://{text}'}:
       74 |                         text = ''
       75 |                     else:
       76 |                         text = f'[{text}]'
       77 |                     assert '\n' not in text
       78 |                     st.text += f'{text}<\N{STX}{href}\N{ETX}>'
       79 |                 st.a_href = ''
       80 |                 st.a_text = ''
       81 |             return
       82 |         if tag == 'li':
       83 |             if st.a_depth > 0:
       84 |                 st.a_text += ' '
       85 |             else:
       86 |                 st.text += '\n'
       87 |             return
       88 | 
       89 |     def handle_data(self, data):
       90 |         st = self.z_state
       91 |         data = re.sub('[\N{STX}\N{ETX}]', '\N{REPLACEMENT CHARACTER}', data)
       92 |         data = re.sub(r'[^\S\N{NBSP}\N{NARROW NO-BREAK SPACE}]+', ' ', data)
       93 |         if st.a_depth > 0:
       94 |             st.a_text += data
       95 |         else:
       96 |             st.text += data
       97 | 
       98 |     def close(self):
       99 |         super().close()
      100 |         self.handle_starttag('p', {})
      101 | 
      102 |     if sys.version_info < (3, 10):
      103 |         def error(self, message):
      104 |             # hopefully not reachable
      105 |             raise RuntimeError(message)
      106 | 
      107 | def fmt_html(data, *, fmt_url=str):
      108 |     parser = HTMLParser()
      109 |     parser.feed(data)
      110 |     parser.close()
      111 |     lines = []
      112 |     for para in parser.z_state.paras:
      113 |         lines += lib.text.wrap_text(para, protect='\N{STX}\N{ETX}')
      114 |         lines += ['']
      115 |     text = str.join('\n', lines)
      116 |     def repl(match):
      117 |         [url] = match.groups()
      118 |         return fmt_url(url)
      119 |     text = re.sub('\N{STX}(.*?)\N{ETX}', repl, text, flags=re.DOTALL)
      120 |     lines = [text]
      121 |     link_symbol = lib.text.symbols.link
      122 |     if parser.z_state.footnotes:
      123 |         for footnote, url in parser.z_state.footnotes.items():
      124 |             url = fmt_url(url)
      125 |             lines += [f'{link_symbol} {footnote}: {url}']
      126 |     return str.join('\n', lines)
      127 | 
      128 | __all__ = ['fmt_html']
      129 | 
      130 | # vim:ts=4 sts=4 sw=4 et
      131 | 
      
      
      --------------------------------------------------------------------------------
      /lib/www.py:
      --------------------------------------------------------------------------------
        1 | # Copyright © 2022-2025 Jakub Wilk 
        2 | # SPDX-License-Identifier: MIT
        3 | 
        4 | '''
        5 | HTTP client
        6 | '''
        7 | 
        8 | import errno
        9 | import functools
       10 | import gzip
       11 | import http.client
       12 | import json
       13 | import re
       14 | import socket
       15 | import ssl
       16 | import sys
       17 | import urllib.parse
       18 | import urllib.request
       19 | 
       20 | from lib.utils import (
       21 |     Dict,
       22 | )
       23 | 
       24 | def _fmt_url_error(exc):
       25 |     if isinstance(exc, urllib.error.HTTPError):
       26 |         return str(exc)
       27 |     exc = exc.reason
       28 |     if isinstance(exc, socket.gaierror):
       29 |         for key, value in vars(socket).items():
       30 |             if key[:4] == 'EAI_' and value == exc.errno:
       31 |                 return f'[{key}] {exc.strerror}'
       32 |     if isinstance(exc, ssl.SSLError):
       33 |         pass
       34 |     elif isinstance(exc, OSError):
       35 |         try:
       36 |             ec = errno.errorcode[exc.errno]
       37 |         except LookupError:
       38 |             pass
       39 |         else:
       40 |             return f'[{ec}] {exc.strerror}'
       41 |     return str(exc)
       42 | 
       43 | class URLError(RuntimeError):
       44 | 
       45 |     def __init__(self, url, reason):
       46 |         self.url = url
       47 |         self.reason = reason
       48 | 
       49 |     def __str__(self):
       50 |         reason = self.reason
       51 |         if isinstance(reason, Exception):
       52 |             reason = _fmt_url_error(reason)
       53 |         return reason
       54 | 
       55 | class UserAgent:
       56 | 
       57 |     headers = {
       58 |         'User-Agent': 'zygolophodon (https://github.com/jwilk/zygolophodon)',
       59 |         'Accept-Encoding': 'gzip',
       60 |     }
       61 | 
       62 |     @classmethod
       63 |     def _build_opener(cls):
       64 |         handlers = ()
       65 |         if sys.version_info < (3, 13):
       66 |             # Work-around for 
       67 |             # ("urllib.request.urlopen() no longer respects the
       68 |             # http.client.HTTPConnection.debuglevel").
       69 |             handlers = [
       70 |                 Handler(debuglevel=http.client.HTTPConnection.debuglevel)
       71 |                 for Handler in [urllib.request.HTTPHandler, urllib.request.HTTPSHandler]
       72 |             ]
       73 |         opener = urllib.request.build_opener(*handlers)
       74 |         opener.addheaders[:] = cls.headers.items()
       75 |         return opener
       76 | 
       77 |     @classmethod
       78 |     def get(cls, url):
       79 |         request = urllib.request.Request(url)
       80 |         opener = cls._build_opener()
       81 |         try:
       82 |             response = opener.open(request)
       83 |         except urllib.error.HTTPError as exc:
       84 |             if Response.is_json(exc):
       85 |                 response = Response(exc, url=url)
       86 |                 try:
       87 |                     data = json.loads(response.data, object_hook=Dict)
       88 |                 except (json.JSONDecodeError, UnicodeError):
       89 |                     pass
       90 |                 else:
       91 |                     cls.handle_json_error(exc, data)
       92 |             raise URLError(url, exc) from exc
       93 |         except urllib.error.URLError as exc:
       94 |             raise URLError(url, exc) from exc
       95 |         return Response(response, url=url)
       96 | 
       97 |     @classmethod
       98 |     def handle_json_error(cls, exc, data):
       99 |         del exc, data
      100 | 
      101 | class Response:
      102 | 
      103 |     def __init__(self, response, *, url):
      104 |         with response:
      105 |             content_encoding = response.getheader('Content-Encoding', 'identity')
      106 |             data = response.read()
      107 |         if content_encoding == 'gzip':
      108 |             data = gzip.decompress(data)
      109 |         elif content_encoding == 'identity':
      110 |             pass
      111 |         else:
      112 |             msg = f'unexpected Content-Encoding: {content_encoding!r}'
      113 |             raise URLError(url, msg)
      114 |         self.data = data
      115 |         self.headers = response.headers
      116 |         self.url = url
      117 | 
      118 |     def is_json(self):
      119 |         ct = self.headers.get('Content-Type', '')
      120 |         match = re.match(r'application/json(;|\Z)', ct)
      121 |         return bool(match)
      122 | 
      123 |     @property
      124 |     def json(self):
      125 |         if not self.is_json():
      126 |             msg = 'error: non-JSON content'
      127 |             raise URLError(self.url, msg)
      128 |         try:
      129 |             data = json.loads(self.data, object_hook=Dict)
      130 |         except (json.JSONDecodeError, UnicodeError) as exc:
      131 |             msg = f'JSON decoding error: {exc}'
      132 |             raise URLError(self.url, msg) from exc
      133 |         return data
      134 | 
      135 |     @property
      136 |     def links(self):
      137 |         s = self.headers.get('Link', '')
      138 |         data = {}
      139 |         regexp = re.compile(r'<([^>]+)>; rel="(\w+)"(?:, |\Z)')
      140 |         i = 0
      141 |         while i < len(s):
      142 |             match = regexp.match(s, i)
      143 |             if not match:
      144 |                 raise URLError(self.url, f'cannot parse Link header field: {s!r}')
      145 |             (value, key) = match.groups()
      146 |             data[key] = value
      147 |             i = match.end()
      148 |         return data
      149 | 
      150 | urlquote = functools.partial(urllib.parse.quote, safe='')
      151 | 
      152 | __all__ = [
      153 |     'URLError',
      154 |     'UserAgent',
      155 |     'Response',
      156 |     'urlquote',
      157 | ]
      158 | 
      159 | # vim:ts=4 sts=4 sw=4 et
      160 | 
      
      
      --------------------------------------------------------------------------------
      /lib/inst.py:
      --------------------------------------------------------------------------------
        1 | # Copyright © 2022-2025 Jakub Wilk 
        2 | # SPDX-License-Identifier: MIT
        3 | 
        4 | '''
        5 | microblogging instances
        6 | '''
        7 | 
        8 | import abc
        9 | import re
       10 | import types
       11 | import urllib.parse
       12 | 
       13 | from lib.utils import (
       14 |     Dict,
       15 |     abstractattribute,
       16 |     expand_template,
       17 | )
       18 | 
       19 | from lib.www import urlquote
       20 | 
       21 | class Instance(abc.ABC):
       22 | 
       23 |     types = []
       24 | 
       25 |     tag_url_template = abstractattribute()
       26 | 
       27 |     post_id_regexp = abstractattribute()
       28 | 
       29 |     addr_parser = abstractattribute()
       30 | 
       31 |     def __init__(self, url, data=None):
       32 |         self.url = url
       33 |         self.data = data
       34 | 
       35 |     @classmethod
       36 |     def parse_addr(cls, addr):
       37 |         match = cls.addr_parser.parse(addr)  # pylint: disable=no-member
       38 |         if not match:
       39 |             return None
       40 |         if match.user:
       41 |             match.user = urllib.parse.unquote(match.user)
       42 |         if match.tag:
       43 |             match.tag = urllib.parse.unquote(match.tag)
       44 |         match.url = f'https://{match.domain}'
       45 |         del match.domain
       46 |         return match
       47 | 
       48 |     @classmethod
       49 |     def connect(cls, url):
       50 |         return cls(url)
       51 | 
       52 |     @abc.abstractmethod
       53 |     def fetch_user_by_name(self, name):
       54 |         pass
       55 | 
       56 |     @abc.abstractmethod
       57 |     def fetch_user_posts(self, user, *, limit, **params):
       58 |         pass
       59 | 
       60 |     @abc.abstractmethod
       61 |     def fetch_tag_posts(self, tag_name, *, limit, **params):
       62 |         pass
       63 | 
       64 |     @abc.abstractmethod
       65 |     def fetch_post(self, post_id):
       66 |         pass
       67 | 
       68 |     @abc.abstractmethod
       69 |     def fetch_post_context(self, post_id, *, ancestors=True, descendants=True):
       70 |         pass
       71 | 
       72 |     def get_tag_url(self, tag_name):
       73 |         template = self.tag_url_template
       74 |         if template is None:
       75 |             return None
       76 |         q_tag = urlquote(tag_name)
       77 |         path = expand_template(template, tag=q_tag)
       78 |         return f'{self.url}{path}'
       79 | 
       80 |     def fetch_tag_info(self, tag_name):
       81 |         return Dict(
       82 |             url=self.get_tag_url(tag_name),
       83 |             history=None,
       84 |         )
       85 | 
       86 |     @classmethod
       87 |     def register(cls, instance_type):
       88 |         cls.types += [instance_type]
       89 |         return instance_type
       90 | 
       91 | class AddrParser:
       92 | 
       93 |     _groups = set()
       94 | 
       95 |     def __init__(self, *templates, discard_prefixes=()):
       96 |         self._discard_prefixes = discard_prefixes
       97 |         self._raw_templates = templates
       98 |         # These are set later by __set_name__(),
       99 |         # when assigned to an Instance subclass:
      100 |         self.templates = ...
      101 |         self._post_id_regexp = ...
      102 |         self._regexps = ...
      103 | 
      104 |     def __set_name__(self, inst_type, _attr_name):
      105 |         self._post_id_regexp = inst_type.post_id_regexp
      106 |         self._regexps = []
      107 |         self.templates = []
      108 |         for template in self._raw_templates:
      109 |             if template[0] == '/':
      110 |                 template = f'https://DOMAIN{template}'
      111 |             self._add_template(template)
      112 |         del self._discard_prefixes
      113 |         del self._raw_templates
      114 | 
      115 |     def _add_template(self, template):
      116 |         self.templates += [template]
      117 |         group2regexp = dict(
      118 |             domain=r'[^@/?#\0-\40]+',
      119 |             user=r'[^/?#\0-\40]+',
      120 |             # FIXME? This is much more lax that USERNAME_RE in 
      121 |             tag=r'[^/?#\0-\40]+',
      122 |             ident=self._post_id_regexp,
      123 |         )
      124 |         discard = self._discard_prefixes
      125 |         def repl_punct(match):
      126 |             s = match.group()
      127 |             try:
      128 |                 if s != '.' and re.fullmatch(s, s):
      129 |                     return s
      130 |             except re.error:
      131 |                 pass
      132 |             return re.escape(s)
      133 |         template = re.sub(r'\W', repl_punct, template)
      134 |         if discard:
      135 |             discard_re = str.join('|', map(re.escape, discard))
      136 |             discard_re = f'(?:{discard_re})'
      137 |             template = template.replace('/DOMAIN/', f'/DOMAIN/(?:{discard_re}/)*')
      138 |         def repl_ident(match):
      139 |             s = match.group()
      140 |             if match.start() == 0 and s == 'https':
      141 |                 return s
      142 |             if s.isupper():
      143 |                 group = s
      144 |                 if group == 'NNNNNN':
      145 |                     group = 'IDENT'
      146 |                 regexp = group2regexp[group.lower()]
      147 |             else:
      148 |                 group = s
      149 |                 regexp = re.escape(s)
      150 |             self._groups.add(group.lower())
      151 |             return f'(?P<{group}>{regexp})'
      152 |         regexp = re.sub(r'(?
        2 | # SPDX-License-Identifier: MIT
        3 | 
        4 | '''
        5 | zygolophodon CLI
        6 | '''
        7 | 
        8 | import argparse
        9 | import functools
       10 | import http.client
       11 | import os
       12 | import re
       13 | import signal
       14 | import sys
       15 | import types
       16 | import urllib.parse
       17 | 
       18 | import lib.compat
       19 | import lib.html
       20 | import lib.inst
       21 | import lib.stdout
       22 | import lib.text
       23 | import lib.utils
       24 | import lib.www
       25 | 
       26 | import lib.mastodon
       27 | import lib.bluesky
       28 | 
       29 | __version__ = '0.1'
       30 | 
       31 | prog = argparse.ArgumentParser().prog
       32 | 
       33 | def fatal(msg):
       34 |     print(f'{prog}: {msg}', file=sys.stderr)
       35 |     sys.exit(1)
       36 | 
       37 | def fmt_url(url):
       38 |     if sys.stdout.isatty():
       39 |         return re.sub('(.)', r'_\b\1', url)
       40 |     return url
       41 | 
       42 | def fmt_user(account):
       43 |     name = lib.text.isolate_bidi(account.display_name)
       44 |     return f'{name} <{fmt_url(account.url)}>'.lstrip()
       45 | 
       46 | def fmt_date(d):
       47 |     d = lib.compat.datetime_fromisoformat(d)
       48 |     d = d.replace(microsecond=0)
       49 |     d = str(d)
       50 |     d = re.sub('[+]00:00$', 'Z', d)
       51 |     return d
       52 | 
       53 | fmt_html = functools.partial(lib.html.fmt_html, fmt_url=fmt_url)
       54 | 
       55 | class VersionAction(argparse.Action):
       56 |     '''
       57 |     argparse --version action
       58 |     '''
       59 | 
       60 |     def __init__(self, option_strings, dest=argparse.SUPPRESS):
       61 |         super().__init__(
       62 |             option_strings=option_strings,
       63 |             dest=dest,
       64 |             nargs=0,
       65 |             help='show version information and exit'
       66 |         )
       67 | 
       68 |     def __call__(self, parser, namespace, values, option_string=None):
       69 |         del namespace, values, option_string
       70 |         print(f'{parser.prog} {__version__}')
       71 |         print('+ Python {0}.{1}.{2}'.format(*sys.version_info))  # pylint: disable=consider-using-f-string
       72 |         parser.exit()
       73 | 
       74 | def pint(s):
       75 |     n = int(s)
       76 |     if n > 0:
       77 |         return n
       78 |     raise ValueError
       79 | pint.__name__ = 'positive int'
       80 | 
       81 | def xmain():
       82 |     ap = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
       83 |     if sys.version_info < (3, 10):
       84 |         # https://bugs.python.org/issue9694
       85 |         ap._optionals.title = 'options'  # pylint: disable=protected-access
       86 |     ap.add_argument('--version', action=VersionAction)
       87 |     default_limit = 40
       88 |     ap.add_argument('--limit', metavar='N', type=pint, default=default_limit,
       89 |         help=f'request at most N posts (default: {default_limit})'
       90 |     )
       91 |     ap.add_argument('--with-ancestors', action='store_true',
       92 |         help='show also post ancestors'
       93 |     )
       94 |     ap.add_argument('--debug-http', action='store_true', help=argparse.SUPPRESS)
       95 |     addr_help = []
       96 |     for instance_type in lib.inst.Instance.types:
       97 |         for template in instance_type.addr_parser.templates:
       98 |             line = template
       99 |             if instance_type is not lib.mastodon.Mastodon:
      100 |                 line += f' ({instance_type.__name__})'
      101 |             addr_help += [line]
      102 |     addr_help = str.join('\n', addr_help)
      103 |     ap.add_argument('addr', metavar='ADDRESS', help=addr_help)
      104 |     opts = ap.parse_args()
      105 |     if opts.debug_http:
      106 |         http.client.HTTPConnection.debuglevel = 1
      107 |     addr = opts.addr
      108 |     if '/' in addr:
      109 |         # strip URL fragment
      110 |         addr, _ = urllib.parse.urldefrag(addr)
      111 |     if not (match := lib.inst.parse_addr(addr)):
      112 |         ap.error('unsupported address')
      113 |     sys.stdout.flush()
      114 |     with lib.stdout.install():
      115 |         instance = match.instance_type.connect(match.url)
      116 |         if match.tag:
      117 |             process_tag(instance, match.tag,
      118 |                 limit=opts.limit,
      119 |             )
      120 |         elif not match.ident:
      121 |             process_user(instance, match.user,
      122 |                 replies=bool(match.with_replies),
      123 |                 media=bool(match.media),
      124 |                 limit=opts.limit,
      125 |             )
      126 |         else:
      127 |             with_context = opts.limit > 1 and not match.embed
      128 |             process_post(instance, post_id=match.ident,
      129 |                 with_replies=with_context,
      130 |                 with_ancestors=(with_context and opts.with_ancestors),
      131 |             )
      132 | 
      133 | def plural(i, noun):
      134 |     if i != 1:
      135 |         noun += 's'
      136 |     return f'{i} {noun}'
      137 | 
      138 | def process_tag(instance, tag_name, *, limit):
      139 |     info = instance.fetch_tag_info(tag_name)
      140 |     if info.url:
      141 |         print('Location:', fmt_url(info.url))
      142 |     else:
      143 |         print('Location:', f'(cannot generate URL for tag {tag_name!r})')
      144 |     history = info.history
      145 |     if history:
      146 |         n_posts = sum(int(entry.uses) for entry in history)
      147 |         n_users = sum(int(entry.accounts) for entry in history)
      148 |         n_posts_today = history[0].uses
      149 |         n_days = len(history)
      150 |         s_days = plural(n_days, 'day')
      151 |         print('Statistics:', f'(last {s_days})')
      152 |         print(' ', plural(n_posts, 'post'))
      153 |         if n_users > 0:
      154 |             print(' ', plural(n_users, 'user'))
      155 |         if n_posts > 0:
      156 |             print(' ', plural(n_posts_today, 'post'), 'today')
      157 |     posts = instance.fetch_tag_posts(tag_name, limit=limit)
      158 |     print_posts(posts, separators='=- ')
      159 | 
      160 | def process_user(instance, username, *, replies=False, media=False, limit):
      161 |     user = instance.fetch_user_by_name(username)
      162 |     print('User:', fmt_user(user))
      163 |     if user.note:
      164 |         print()
      165 |         print(fmt_html(user.note))
      166 |     seen = set()
      167 |     if not (media or replies):
      168 |         posts = instance.fetch_user_posts(user, limit=limit, pinned=True)
      169 |         def gen_posts():
      170 |             for post in posts:
      171 |                 if not post.pinned:
      172 |                     # Snac's Mastodon API yields all posts when we asked for pinned ones:
      173 |                     # https://codeberg.org/grunfink/snac2/issues/335
      174 |                     # Let's filter out non-pinned posts.
      175 |                     continue
      176 |                 yield post
      177 |                 seen.add(post.id)
      178 |         n = print_posts(gen_posts(), separators='=- ')
      179 |         if n >= limit:
      180 |             limit = 0
      181 |     params = types.SimpleNamespace()
      182 |     if media:
      183 |         params.only_media=True
      184 |     else:
      185 |         params.exclude_replies = not replies
      186 |     posts = instance.fetch_user_posts(user, limit=limit, **vars(params))
      187 |     # Filter out posts that were already printed as pinned:
      188 |     posts = (post for post in posts if post.id not in seen)
      189 |     print_posts(posts, separators='=- ')
      190 | 
      191 | def process_post(instance, post_id, *, with_replies=True, with_ancestors=False):
      192 |     post = instance.fetch_post(post_id)
      193 |     @functools.cache
      194 |     def get_context():
      195 |         return instance.fetch_post_context(post_id,
      196 |             ancestors=with_ancestors,
      197 |             descendants=with_replies,
      198 |         )
      199 |     if with_ancestors:
      200 |         context = get_context()
      201 |         print_posts(context.ancestors, hide_in_reply_to=True, separators=' -=')
      202 |     print_post(post, hide_in_reply_to=with_ancestors)
      203 |     if with_replies:
      204 |         context = get_context()
      205 |         print_posts(context.descendants, hide_in_reply_to=True, separators='=- ')
      206 | 
      207 | def print_separator(ch):
      208 |     print()
      209 |     print(ch * lib.text.columns)
      210 |     print()
      211 | 
      212 | def print_posts(posts, *, hide_in_reply_to=False, separators='-- '):
      213 |     def print_sep(i):
      214 |         ch = separators[i]
      215 |         if ch.isspace():
      216 |             return
      217 |         print_separator(ch)
      218 |     n = 0
      219 |     for n, post in enumerate(posts, start=1):
      220 |         print_sep(n > 1)
      221 |         print_post(post, hide_in_reply_to=hide_in_reply_to)
      222 |     if n > 0:
      223 |         print_sep(-1)
      224 |     return n
      225 | 
      226 | def normalize_lang(lang):
      227 |     if lang is None:
      228 |         return 'en'
      229 |     if lang.startswith('en-'):
      230 |         return 'en'
      231 |     return lang
      232 | 
      233 | def print_post(post, *, hide_in_reply_to=False):
      234 |     if post.location:
      235 |         print('Location:', fmt_url(post.location))
      236 |     url = post.url or post.uri
      237 |     if url and url != post.location:
      238 |         print('Origin:', fmt_url(url))
      239 |     if post.in_reply_to_id and not hide_in_reply_to:
      240 |         if post.in_reply_to_url:
      241 |             print('In-Reply-To:', fmt_url(post.in_reply_to_url))
      242 |         else:
      243 |             print('In-Reply-To:', f'(cannot generate URL for post id {post.in_reply_to_id})')
      244 |     if post.pinned:
      245 |         pinned = post.pinned
      246 |         pin_comment = []
      247 |         if isinstance(pinned, str):
      248 |             pin_comment = fmt_date(pinned)
      249 |             pin_comment = [f'({pin_comment})']
      250 |         print('Pinned: yes', *pin_comment)
      251 |     if post.account:
      252 |         # FIXME in Pleroma?
      253 |         # Why is the account information missing
      254 |         # for some reblogged posts?
      255 |         print('From:', fmt_user(post.account))
      256 |     date_comment = []
      257 |     if post.edited_at:
      258 |         date_comment = 'edited ' + fmt_date(post.edited_at)
      259 |         date_comment = [f'({date_comment})']
      260 |     print('Date:', fmt_date(post.created_at), *date_comment)
      261 |     if normalize_lang(post.language) != 'en':
      262 |         print('Language:', post.language)
      263 |     if post.reblog:
      264 |         print('Reblog: yes')
      265 |     print()
      266 |     if post.reblog:
      267 |         print_post(post.reblog)
      268 |     else:
      269 |         text = fmt_html(post.content)
      270 |         print(text)
      271 |     print()
      272 |     paperclip = lib.text.symbols.paperclip
      273 |     for att in post.media_attachments or ():
      274 |         # TODO? Render the images with chafa?
      275 |         print(paperclip, fmt_url(att.url))
      276 |         print()
      277 |         text = att.description or ''
      278 |         indent = ' ' * (1 + paperclip.width)
      279 |         text = lib.text.wrap_text(text, indent=indent)
      280 |         for line in text:
      281 |             print(line)
      282 |         print()
      283 | 
      284 | def main():
      285 |     try:
      286 |         xmain()
      287 |     except lib.www.URLError as exc:
      288 |         fatal(f'<{exc.url}>: {exc}')
      289 |     except BrokenPipeError:
      290 |         signal.signal(signal.SIGPIPE, signal.SIG_DFL)
      291 |         os.kill(os.getpid(), signal.SIGPIPE)
      292 |         raise
      293 | 
      294 | __all__ = ['main']
      295 | 
      296 | # vim:ts=4 sts=4 sw=4 et
      297 | 
      
      
      --------------------------------------------------------------------------------
      /lib/mastodon.py:
      --------------------------------------------------------------------------------
        1 | # Copyright © 2022-2025 Jakub Wilk 
        2 | # SPDX-License-Identifier: MIT
        3 | 
        4 | '''
        5 | Mastodon (and Mastodon-like) instances
        6 | '''
        7 | 
        8 | import abc
        9 | import functools
       10 | import re
       11 | import urllib.parse
       12 | 
       13 | import lib.www
       14 | 
       15 | from lib.inst import (
       16 |     AddrParser,
       17 |     Instance,
       18 | )
       19 | 
       20 | from lib.utils import (
       21 |     Dict,
       22 |     abstractattribute,
       23 |     expand_template,
       24 | )
       25 | 
       26 | urlquote = lib.www.urlquote
       27 | 
       28 | class UserAgent(lib.www.UserAgent):
       29 | 
       30 |     @classmethod
       31 |     def handle_json_error(cls, exc, data):
       32 |         try:
       33 |             msg = data.error
       34 |         except KeyError:
       35 |             return
       36 |         assert exc.msg
       37 |         exc.msg = msg
       38 | 
       39 | class Mastodonoid(Instance):
       40 | 
       41 |     post_url_template = abstractattribute()
       42 | 
       43 |     @classmethod
       44 |     @abc.abstractmethod
       45 |     def identify(cls, data):
       46 |         pass
       47 | 
       48 |     @functools.cached_property
       49 |     def api_version(self):
       50 |         match = re.match('([0-9]+([.][0-9]+)*)', self.data.version)
       51 |         version = match.group()
       52 |         version = version.split('.')
       53 |         return tuple(int(x) for x in version)
       54 | 
       55 |     @classmethod
       56 |     def connect(cls, url):
       57 |         # https://docs.joinmastodon.org/methods/instance/#v1
       58 |         # available since Mastodon v1.1
       59 |         #
       60 |         # FIXME? v1 is deprecated, but OTOH Mastodon before v4.0
       61 |         # and some non-Mastodon instances don't support v2.
       62 |         #
       63 |         # TODO? Use NodeInfo 
       64 |         # for identification?
       65 |         # But it's only available since Mastodon 3.0.
       66 |         instance = Mastodon(url, None)
       67 |         data = instance._fetch('instance')  # pylint: disable=protected-access
       68 |         inst_types = [
       69 |             inst for inst in Instance.types
       70 |             if issubclass(inst, Mastodonoid)
       71 |         ]
       72 |         inst_types.sort(
       73 |             key=(lambda t: t.identify(data)),
       74 |             reverse=True,
       75 |         )
       76 |         inst_type = inst_types[0]
       77 |         return inst_type(url, data)
       78 | 
       79 |     def _api_url(self, url):
       80 |         return f'{self.url}/api/v1/{url}'
       81 | 
       82 |     def _fetch(self, url):
       83 |         url = self._api_url(url)
       84 |         return UserAgent.get(url).json
       85 | 
       86 |     def fetch_user_by_name(self, name):
       87 |         # https://docs.joinmastodon.org/methods/accounts/#lookup
       88 |         # available since:
       89 |         # - Mastodon v3.4
       90 |         # - Pleroma v2.5
       91 |         # - Akkoma v2.5
       92 |         q_name = urlquote(name)
       93 |         return self._fetch(f'accounts/lookup?acct={q_name}')
       94 | 
       95 |     def _fetch_posts(self, url, *, limit, **params):
       96 |         url = self._api_url(url)
       97 |         page_limit = 40  # maximum allowed
       98 |         pinned = params.get('pinned', False)
       99 |         params['limit'] = min(limit, page_limit)
      100 |         q_params = urllib.parse.urlencode(params).lower()
      101 |         url += f'?{q_params}'
      102 |         while limit > 0:
      103 |             response = UserAgent.get(url)
      104 |             posts = response.json
      105 |             self.fix_posts(posts)
      106 |             for post in posts:
      107 |                 if post.pinned is None:
      108 |                     post.pinned = pinned
      109 |             yield from posts
      110 |             limit -= len(posts)
      111 |             next_url = response.links.get('next')
      112 |             if next_url is None:
      113 |                 break
      114 |             if not url.startswith(self._api_url('')):
      115 |                 msg = f'suspicious Link URL: {next_url!r}'
      116 |                 raise RuntimeError(msg)
      117 |             url = re.sub(
      118 |                 r'(?<=[?&]limit=)\d+(?=&|\Z)',
      119 |                 str(min(limit, page_limit)),
      120 |                 next_url
      121 |             )
      122 | 
      123 |     def fetch_user_posts(self, user, *, limit, **params):
      124 |         # https://docs.joinmastodon.org/methods/accounts/#statuses
      125 |         # available since Mastodon v2.7
      126 |         url = f'accounts/{user.id}/statuses'
      127 |         return self._fetch_posts(url, limit=limit, **params)
      128 | 
      129 |     def fetch_tag_posts(self, tag_name, *, limit, **params):
      130 |         # https://docs.joinmastodon.org/methods/timelines/#tag
      131 |         # available since Mastodon v0.1
      132 |         q_tag = urlquote(tag_name)
      133 |         url = f'timelines/tag/{q_tag}'
      134 |         return self._fetch_posts(url, limit=limit, **params)
      135 | 
      136 |     def fetch_post(self, post_id):
      137 |         # https://docs.joinmastodon.org/methods/statuses/#get
      138 |         # available since Mastodon v2.7
      139 |         post = self._fetch(f'statuses/{post_id}')
      140 |         self.fix_post(post)
      141 |         return post
      142 | 
      143 |     def fetch_post_context(self, post_id, *, ancestors=True, descendants=True):
      144 |         # https://docs.joinmastodon.org/methods/statuses/#context
      145 |         # available since Mastodon v0.1
      146 |         if not (ancestors or descendants):
      147 |             # shortcut:
      148 |             return Dict(ancestors=None, descendants=None)
      149 |         context = self._fetch(f'statuses/{post_id}/context')
      150 |         if ancestors:
      151 |             self.fix_posts(context.ancestors)
      152 |         else:
      153 |             context.ancestors = None
      154 |         if descendants:
      155 |             self.fix_posts(context.descendants)
      156 |         else:
      157 |             context.descendants = None
      158 |         return context
      159 | 
      160 |     def get_post_url(self, *, post_id):
      161 |         template = self.post_url_template
      162 |         if template is None:
      163 |             return None
      164 |         path = expand_template(template, ident=post_id)
      165 |         return f'{self.url}{path}'
      166 | 
      167 |     def get_fixed_post_url(self, url):
      168 |         return url
      169 | 
      170 |     def fix_post(self, post):
      171 |         irt_url = None
      172 |         if post.in_reply_to_id:
      173 |             irt_url = self.get_post_url(post_id=post.in_reply_to_id)
      174 |         post.in_reply_to_url = irt_url
      175 |         try:
      176 |             post.edited_at
      177 |         except KeyError:
      178 |             # * In Mastodon, the attribute is available only since v3.5.0.
      179 |             # * FIXME in Pleroma?
      180 |             #   Why is the attribute missing for reblogs?
      181 |             post.edited_at = None
      182 |         if post.reblog:
      183 |             self.fix_post(post.reblog)
      184 |             if post.url == post.reblog.uri:
      185 |                 # FIXME in Pleroma?
      186 |                 # Why is the URL unhelpful?
      187 |                 post.url = self.get_post_url(post_id=post.id)
      188 |             if post.uri == post.reblog.uri:
      189 |                 post.uri = None
      190 |         post.url = self.get_fixed_post_url(post.url)
      191 |         try:
      192 |             post.pinned
      193 |         except KeyError:
      194 |             post.pinned = None
      195 |         if post.url and post.url.startswith(f'{self.url}/'):
      196 |             post.location = post.url
      197 |         else:
      198 |             post.location = self.get_post_url(post_id=post.id)
      199 | 
      200 |     def fix_posts(self, posts):
      201 |         for post in posts:
      202 |             self.fix_post(post)
      203 | 
      204 |     def fetch_tag_info(self, tag_name):
      205 |         # https://docs.joinmastodon.org/methods/tags/#get
      206 |         # available since Mastodon v4.0
      207 |         if self.api_version < (4, 0):
      208 |             return Instance.fetch_tag_info(self, tag_name)
      209 |         q_tag = urlquote(tag_name)
      210 |         url = f'tags/{q_tag}'
      211 |         return self._fetch(url)
      212 | 
      213 | @Instance.register
      214 | class Mastodon(Mastodonoid):
      215 | 
      216 |     # Codebase: https://github.com/mastodon/mastodon
      217 | 
      218 |     tag_url_template = '/tags/TAG'
      219 | 
      220 |     post_url_template = '/statuses/IDENT'
      221 | 
      222 |     post_id_regexp = '[0-9]{1,18}'
      223 |     # Source: lib/mastodon/snowflake.rb
      224 |     #
      225 |     # Identifiers are decimal integers:
      226 |     #
      227 |     #    n = (t << 16) + r
      228 |     #
      229 |     # where
      230 |     #
      231 |     #    t is milliseconds since 1970;
      232 |     #    r are randomish lower bits.
      233 |     #
      234 |     # In practice, it's always
      235 |     # either 17 digits (until 2018)
      236 |     #     or 18 digits (2018-2453).
      237 |     #
      238 |     # $ export TZ=UTC0
      239 |     # $ qalc -t '"1970-01-01" + ((10 ** 16) >> 16) ms'
      240 |     # "1974-11-02T01:31:27"
      241 |     # $ qalc -t '"1970-01-01" + ((10 ** 17) >> 16) ms'
      242 |     # "2018-05-09T15:14:39"
      243 |     # $ qalc -t '"1970-01-01" + ((10 ** 18) >> 16) ms'
      244 |     # "2453-07-13T08:30:35"
      245 |     #
      246 |     # However, before Mastodon v2.0,
      247 |     # identifiers were sequential 64-bit(?) integers:
      248 |     # https://github.com/mastodon/mastodon/commit/468523f4ad85f99d
      249 | 
      250 |     addr_parser = AddrParser(
      251 |         # mail-like
      252 |         '@USER@DOMAIN',
      253 |         'USER@DOMAIN',
      254 |         # user
      255 |         '/@USER',
      256 |         '/@USER/media',
      257 |         '/@USER/with_replies',
      258 |         # post
      259 |         '/@USER/NNNNNN',
      260 |         '/@USER/NNNNNN/embed',
      261 |         # tag
      262 |         '/tags/TAG',
      263 |         # legacy user-less post
      264 |         '/statuses/NNNNNN',
      265 |         # offsite redirect pages
      266 |         '/redirect/statuses/NNNNNN',
      267 |         # URI->URL redirects
      268 |         '/users/USER',
      269 |         '/users/USER/statuses/NNNNNN',
      270 |         #
      271 |         discard_prefixes={'deck', 'web'},
      272 |     )
      273 | 
      274 |     @classmethod
      275 |     def identify(cls, data):
      276 |         del data
      277 |         return 0
      278 | 
      279 |     def get_fixed_post_url(self, url):
      280 |         q_base_url = re.escape(self.url)
      281 |         match = re.fullmatch(q_base_url + '/users/([^/]+)/statuses/([0-9]+)/activity', url or '')
      282 |         if match:
      283 |             # https://github.com/mastodon/mastodon/issues/34433
      284 |             # ("reblogs have wrong url")
      285 |             (user, post_id) = match.groups()
      286 |             url = f'{self.url}/@{user}/{post_id}'
      287 |         return url
      288 | 
      289 | @Instance.register
      290 | class UntamedMastodonoid(Mastodonoid):
      291 | 
      292 |     # fallback for unknown (but known to be unsupported) instance types
      293 | 
      294 |     tag_url_template = None
      295 | 
      296 |     post_url_template = None
      297 | 
      298 |     post_id_regexp = None
      299 | 
      300 |     addr_parser = AddrParser()  # dummy
      301 | 
      302 |     @classmethod
      303 |     def identify(cls, data):
      304 |         if re.search(r'\b(compatible|really)\b', data.version):
      305 |             return 0.1
      306 |         match = re.match('^([0-9]+)[.]', data.version)
      307 |         if match is None:
      308 |             return 0.1
      309 |         [major] = match.groups()
      310 |         major = int(major)
      311 |         if major < 1:
      312 |             # /api/v1/instance was added only in Mastodon 1.1
      313 |             # 
      314 |             # so this version is clearly a lie.
      315 |             return 0.1
      316 |         return -1
      317 | 
      318 | @Instance.register
      319 | class Iceshrimp(Mastodonoid):
      320 | 
      321 |     # Codebase: https://iceshrimp.dev/
      322 |     # Forks: https://codeberg.org/catodon/catodon
      323 | 
      324 |     tag_url_template = '/tags/TAG'
      325 | 
      326 |     post_url_template = '/notes/IDENT'
      327 | 
      328 |     post_id_regexp = '[0-9a-z]{16,24}'
      329 |     # Source: packages/backend/src/misc/gen-id.ts
      330 |     #
      331 |     # Identifiers are in the form:
      332 |     #
      333 |     #    t || r
      334 |     #
      335 |     # where
      336 |     #
      337 |     #    t is milliseconds since 2000;
      338 |     #    r is randomish, configurable length 8-16.
      339 |     #
      340 |     # Both are in base-36.
      341 |     #
      342 |     # The docs say the timestamp is 8 chars long
      343 |     # (and the code indeed ensures it's _at least_ 8 chars),
      344 |     # but that'll only suffice until 2089.
      345 |     #
      346 |     # $ export TZ=UTC0
      347 |     # $ qalc -t '"2000-01-01" + (36 ** 8) ms'
      348 |     # "2089-05-24T17:38:22"
      349 | 
      350 |     addr_parser = AddrParser(
      351 |         '/notes/IDENT',
      352 |     )
      353 | 
      354 |     @classmethod
      355 |     def identify(cls, data):
      356 |         if re.search(r'\b(Iceshrimp|Catodon)\b', data.version):
      357 |             return 1
      358 |         # FIXME? Should Iceshrimp.NET be considered supported?
      359 |         return -1
      360 | 
      361 |     def fetch_tag_info(self, tag_name):
      362 |         # FIXME in Iceshrimp?
      363 |         # The API is not available,
      364 |         # despite claimed version 4.2 or so.
      365 |         return Instance.fetch_tag_info(self, tag_name)
      366 | 
      367 | @Instance.register
      368 | class Pleroma(Mastodonoid):
      369 | 
      370 |     # Codebase: https://git.pleroma.social/pleroma/pleroma
      371 | 
      372 |     tag_url_template = '/tag/TAG'
      373 | 
      374 |     post_url_template = '/notice/IDENT'
      375 | 
      376 |     post_id_regexp = '[0-9a-zA-Z]{18}'
      377 |     # Source: https://git.pleroma.social/pleroma/flake_id
      378 |     #
      379 |     # Identifiers are base-62 integers:
      380 |     #
      381 |     #    n = (t << 64) + r
      382 |     #
      383 |     # where
      384 |     #
      385 |     #    t is milliseconds since 1970;
      386 |     #    r are randomish lower bits.
      387 |     #
      388 |     # In practice, it's always 18 digits (until 2284).
      389 |     #
      390 |     # $ export TZ=UTC0
      391 |     # $ qalc -t '"1970-01-01" + ((62 ** 17) >> 64) ms'
      392 |     # "1975-01-29T11:50:12"
      393 |     # $ qalc -t '"1970-01-01" + ((62 ** 18) >> 64) ms'
      394 |     # "2284-10-19T13:56:44"
      395 | 
      396 |     addr_parser = AddrParser(
      397 |         '/notice/IDENT',
      398 |         '/tag/TAG',
      399 |         # TODO? '/USER'?
      400 |         # But eww, that's awfully generic.
      401 |         # In the mean time /users/USER works already.
      402 |     )
      403 | 
      404 |     @classmethod
      405 |     def identify(cls, data):
      406 |         try:
      407 |             data.pleroma
      408 |         except KeyError:
      409 |             return -1
      410 |         return 1
      411 | 
      412 |     def fix_post(self, post):
      413 |         super().fix_post(post)
      414 |         try:
      415 |             pinned_at = post.pleroma.pinned_at
      416 |         except KeyError:
      417 |             # available only since Pleroma v2.4
      418 |             pass
      419 |         else:
      420 |             post.pinned = pinned_at
      421 | 
      422 | __all__ = [
      423 |     'Iceshrimp',
      424 |     'Mastodon',
      425 |     'Mastodonoid',
      426 |     'Pleroma',
      427 |     'UntamedMastodonoid',
      428 | ]
      429 | 
      430 | # vim:ts=4 sts=4 sw=4 et
      431 | 
      
      
      --------------------------------------------------------------------------------
      /lib/bluesky.py:
      --------------------------------------------------------------------------------
        1 | # Copyright © 2025 Jakub Wilk 
        2 | # SPDX-License-Identifier: MIT
        3 | 
        4 | '''
        5 | Bluesky
        6 | '''
        7 | 
        8 | import html
        9 | import re
       10 | 
       11 | import lib.www
       12 | 
       13 | from lib.inst import (
       14 |     AddrParser,
       15 |     Instance,
       16 | )
       17 | 
       18 | from lib.utils import (
       19 |     Dict,
       20 |     compose,
       21 | )
       22 | 
       23 | urlquote = lib.www.urlquote
       24 | 
       25 | def qre(pattern, flags=0):
       26 |     r'''
       27 |     re.compile() with additional support for \q<...> escape,
       28 |     which is like \Q...\E in Perl.
       29 |     '''
       30 |     def repl(match):
       31 |         (s, esc) = match.groups()
       32 |         if esc:
       33 |             return esc
       34 |         return re.escape(s)
       35 |     pattern = re.sub(r'\\q<(.*?)>|(\.)', repl, pattern)
       36 |     return re.compile(pattern, flags=flags)
       37 | 
       38 | def text2html(s):
       39 |     s = html.escape(s)
       40 |     s = s.replace('\n', '
      ') 41 | return s 42 | 43 | def decamel(s): 44 | def subst(match): 45 | return '_' + match.group().lower() 46 | s = re.sub('[A-Z]', subst, s) 47 | return s 48 | 49 | class UserAgent(lib.www.UserAgent): 50 | 51 | @classmethod 52 | def handle_json_error(cls, exc, data): 53 | try: 54 | code = data.error 55 | msg = data.message 56 | except KeyError: 57 | return 58 | assert exc.msg 59 | exc.msg = f'[{code}] {msg}' 60 | 61 | @Instance.register 62 | class Bluesky(Instance): 63 | 64 | # Codebase: https://github.com/bluesky-social/atproto 65 | 66 | tag_url_template = '/hashtag/TAG' 67 | 68 | post_id_regexp = '[2-7a-z]{13}' 69 | # Source: https://atproto.com/specs/tid 70 | # 71 | # Identifiers are base-32 integers: 72 | # 73 | # n = (t << 10) + r 74 | # 75 | # where 76 | # 77 | # t is microseconds since 1970; 78 | # r are randomish lower bits. 79 | # 80 | # In practice, it's always 13 characters. 81 | # 82 | # $ export TZ=UTC0 83 | # $ qalc -t '"1970-01-01" + ((32 ** 12) >> 10) us' 84 | # "2005-09-05T05:58:04" 85 | # $ qalc -t '"1970-01-01" + ((32 ** 13) >> 10) us' 86 | # "3111-09-16T23:09:51" 87 | 88 | addr_parser = AddrParser( 89 | 'https://bsky.app/profile/USER', 90 | 'https://bsky.app/profile/USER/post/IDENT', 91 | 'https://bsky.app/hashtag/TAG', 92 | # TODO? @USER, or maybe only @USER.bsky.social 93 | ) 94 | 95 | def __init__(self, url): 96 | super().__init__(url) 97 | self._did_to_handle = {} 98 | 99 | def _remember_user(self, user): 100 | try: 101 | handle = user.handle 102 | except KeyError: 103 | return 104 | self._did_to_handle[user.did] = handle 105 | 106 | @classmethod 107 | def parse_addr(cls, addr): 108 | match = super().parse_addr(addr) 109 | if not match: 110 | return None 111 | if match.ident: 112 | ident = f'at://{match.user}/app.bsky.feed.post/{match.ident}' 113 | match.ident = ident 114 | match.url = 'https://bsky.app' 115 | return match 116 | 117 | def _api_url(self, url, *, public=True): 118 | domain = 'api.bsky.app' 119 | if public: 120 | domain = f'public.{domain}' 121 | return f'https://{domain}/xrpc/{url}' 122 | 123 | def _fetch(self, url, *, public=True): 124 | url = self._api_url(url, public=public) 125 | return UserAgent.get(url).json 126 | 127 | @compose(''.join) 128 | def _mastodonize_text(self, text, *, facets=()): 129 | # FIXME: We convert text to HTML, only to convert HTML to text later on. 130 | btext = text.encode(errors='surrogatepass') 131 | def tslice(start, stop=None): 132 | return btext[start:stop].decode(errors='replace') 133 | i = 0 134 | for facet in facets: 135 | for feature in facet.features: 136 | tp = feature['$type'] 137 | match = qre(r'\q#(\w+)').fullmatch(tp) 138 | if not match: 139 | continue 140 | [tp] = match.groups() 141 | fn = getattr(self, f'_mastodonize_text_facet_{tp}', None) 142 | if not fn: 143 | continue 144 | j = facet.index.byteStart 145 | k = facet.index.byteEnd 146 | if i <= j < k: 147 | yield text2html(tslice(i, j)) 148 | yield fn(tslice(j, k), feature) # pylint: disable=not-callable 149 | i = k 150 | yield text2html(tslice(i)) 151 | 152 | def _mastodonize_user(self, user): 153 | class muser: 154 | at_did = user.did 155 | url = f'https://bsky.app/profile/{user.handle}' 156 | try: 157 | display_name = user.displayName 158 | except KeyError: 159 | display_name = '' 160 | try: 161 | note = user.description 162 | except KeyError: 163 | note = None 164 | else: 165 | note = self._mastodonize_text(note) 166 | return muser 167 | 168 | def _mastodonize_text_facet_link(self, text, feature): 169 | if match := qre(r'([^/]+/\S{10,})\q<...>').fullmatch(text): 170 | [prefix] = match.groups() 171 | q_prefix = re.escape(prefix) 172 | if re.fullmatch(fr'https?://{q_prefix}\S+', feature.uri): 173 | text = feature.uri 174 | q_url = html.escape(feature.uri) 175 | q_text = text2html(text) 176 | return f'{q_text}' 177 | 178 | def _mastodonize_text_facet_mention(self, text, feature): 179 | did = feature.did 180 | user = self._did_to_handle.get(did, did) 181 | url = f'https://bsky.app/profile/{user}' # FIXME: duplicate code 182 | return self._mastodonize_text_facet_link(text, Dict(uri=url)) 183 | 184 | def _mastodonize_text_facet_tag(self, text, feature): 185 | url = self.get_tag_url(feature.tag) 186 | return self._mastodonize_text_facet_link(text, Dict(uri=url)) 187 | 188 | def fetch_user_by_name(self, name): 189 | # https://docs.bsky.app/docs/api/app-bsky-actor-get-profile 190 | qname = urlquote(name) 191 | url = f'app.bsky.actor.getProfile?actor={qname}' 192 | user = self._fetch(url) 193 | return self._mastodonize_user(user) 194 | 195 | def _get_post_url(self, uri): 196 | uri_regexp = qre(fr'at://([^%@/?#\0-\40]+)/\q/({self.post_id_regexp})') 197 | match = uri_regexp.fullmatch(uri) 198 | if not match: 199 | # FIXME? 200 | return uri 201 | (user, post_id) = match.groups() 202 | user = self._did_to_handle.get(user, user) 203 | return f'https://bsky.app/profile/{user}/post/{post_id}' 204 | 205 | def _mastodonize_embed(self, embed): 206 | if embed is None: 207 | return 208 | tp = embed['$type'] 209 | match = qre(r'\q(\w+)#view').fullmatch(tp) 210 | class bad_att: 211 | url = 'about:invalid' # FIXME? 212 | description = f'(unknown embed type: {tp})' 213 | if not match: 214 | yield bad_att 215 | return 216 | [tp] = match.groups() 217 | tp = decamel(tp) 218 | try: 219 | fn = getattr(self, f'_mastodonize_embed_{tp}') 220 | except AttributeError: 221 | yield bad_att 222 | return 223 | yield from fn(embed) 224 | 225 | def _mastodonize_embed_images(self, embed): 226 | for image in embed.images: 227 | class att: 228 | url = image.fullsize 229 | description = image.alt 230 | yield att 231 | 232 | def _mastodonize_embed_video(self, embed): 233 | class att: 234 | url = embed.playlist 235 | description = None 236 | yield att 237 | 238 | def _mastodonize_embed_record(self, embed): 239 | # FIXME? 240 | record = embed.record 241 | try: 242 | author = record.author 243 | except KeyError: 244 | pass 245 | else: 246 | self._remember_user(author) 247 | try: 248 | descr = record.value.text 249 | except KeyError: 250 | descr = None 251 | class att: 252 | url = self._get_post_url(record.uri) 253 | description = descr 254 | yield att 255 | 256 | def _mastodonize_embed_record_with_media(self, embed): 257 | # FIXME? 258 | yield from self._mastodonize_embed(embed.media) 259 | yield from self._mastodonize_embed_record(embed.record) 260 | 261 | def _mastodonize_embed_external(self, embed): 262 | # FIXME? 263 | ext = embed.external 264 | class att: 265 | url = ext.uri 266 | description = f'{ext.title}\n\n{ext.description}' 267 | yield att 268 | 269 | def _mastodonize_post(self, post, *, reason=None): 270 | record = post.record 271 | self._remember_user(post.author) 272 | try: 273 | embed = post.embed 274 | except KeyError: 275 | embed = None 276 | try: 277 | in_reply_to_uri = record.reply.parent.uri 278 | except KeyError: 279 | _in_reply_to_url = in_reply_to_uri = None 280 | else: 281 | _in_reply_to_url = self._get_post_url(in_reply_to_uri) 282 | _pinned = False 283 | if reason and reason['$type'] == 'app.bsky.feed.defs#reasonPin': 284 | _pinned = True 285 | try: 286 | facets = record.facets 287 | except KeyError: 288 | facets = () 289 | class mpost: 290 | id = url = location = self._get_post_url(post.uri) 291 | in_reply_to_id = in_reply_to_url = _in_reply_to_url 292 | account = self._mastodonize_user(post.author) 293 | # Editing posts is not supported yet: 294 | # https://github.com/bluesky-social/social-app/issues/673 295 | # ("Allow editing posts") 296 | edited_at = None 297 | created_at = record.createdAt 298 | try: 299 | language = record.langs 300 | except KeyError: 301 | language = None 302 | else: 303 | language = str.join(', ', language) 304 | reblog = None 305 | content = self._mastodonize_text(record.text, facets=facets) 306 | media_attachments = list(self._mastodonize_embed(embed)) 307 | pinned = _pinned 308 | if reason and reason['$type'] == 'app.bsky.feed.defs#reasonRepost': 309 | self._remember_user(reason.by) 310 | class mrepost: 311 | id = url = uri = location = None 312 | in_reply_to_id = in_reply_to_url = None 313 | account = self._mastodonize_user(reason.by) 314 | edited_at = None 315 | created_at = reason.indexedAt 316 | language = None 317 | reblog = mpost 318 | content = None 319 | media_attachments = None 320 | pinned = None 321 | return mrepost 322 | else: 323 | return mpost 324 | 325 | def fetch_user_posts(self, user, *, limit, pinned=False, **params): 326 | # https://docs.bsky.app/docs/api/app-bsky-feed-get-author-feed 327 | if pinned: 328 | # It's easier to fetch pinned posts together with non-pinned ones. 329 | return 330 | del params 331 | page_limit = 100 # maximum allowed 332 | url = f'app.bsky.feed.getAuthorFeed?actor={user.at_did}&filter=posts_and_author_threads&includePins=true' 333 | rlimit = min(limit, page_limit) 334 | page_url = f'{url}&limit={rlimit}' 335 | while limit > 0: 336 | response = self._fetch(page_url) 337 | for item in response.feed: 338 | try: 339 | reason = item.reason 340 | except KeyError: 341 | reason = None 342 | yield self._mastodonize_post(item.post, reason=reason) 343 | try: 344 | cursor = response.cursor 345 | except KeyError: 346 | break 347 | limit -= len(response.feed) 348 | rlimit = min(limit, page_limit) 349 | next_url = f'{url}&limit={rlimit}&cursor={cursor}' 350 | assert next_url != page_url 351 | page_url = next_url 352 | 353 | def fetch_tag_posts(self, tag_name, *, limit, **params): 354 | # https://docs.bsky.app/docs/api/app-bsky-feed-search-posts 355 | # 356 | # FIXME? app.bsky.feed.searchPosts doesn't seem to support paging properly: 357 | # https://github.com/bluesky-social/atproto/issues/2838 358 | # ("Calling AppView's searchPosts with a cursor returns a 403 error") 359 | () = params 360 | q_tag = urlquote('#' + tag_name) 361 | url = f'app.bsky.feed.searchPosts?q={q_tag}&limit={limit}&sort=top' 362 | response = self._fetch(url, public=False) 363 | for post in response.posts: 364 | yield self._mastodonize_post(post) 365 | 366 | def _get_post_fetch_url(self, post_id, depth=None, parent_height=None): 367 | # https://docs.bsky.app/docs/api/app-bsky-feed-get-post-thread 368 | q_post_id = urlquote(post_id) 369 | url = f'app.bsky.feed.getPostThread?uri={q_post_id}' 370 | if depth is not None: 371 | url += f'&depth={depth}' 372 | if parent_height is not None: 373 | url += f'&parentHeight={parent_height}' 374 | return url 375 | 376 | def fetch_post(self, post_id): 377 | url = self._get_post_fetch_url(post_id, depth=0, parent_height=0) 378 | thread = self._fetch(url).thread 379 | return self._mastodonize_post(thread.post) 380 | 381 | def fetch_post_context(self, post_id, *, ancestors=True, descendants=True): 382 | # FIXME? This duplicates some of the work of fetch_post(). 383 | context = Dict(ancestors=[], descendants=[]) 384 | if not (ancestors or descendants): 385 | # shortcut: 386 | return context 387 | kwargs = {} 388 | if not ancestors: 389 | kwargs.update(parent_height=0) 390 | if not descendants: 391 | kwargs.update(depth=0) 392 | url = self._get_post_fetch_url(post_id, **kwargs) 393 | thread = self._fetch(url).thread 394 | if ancestors: 395 | # pylint: disable=no-member 396 | parent = thread 397 | while True: 398 | try: 399 | parent = parent.parent 400 | except KeyError: 401 | break 402 | context.ancestors += [self._mastodonize_post(parent.post)] 403 | context.ancestors.reverse() 404 | if descendants: 405 | # pylint: disable=no-member 406 | def add_descendants(thread): 407 | context.descendants += [self._mastodonize_post(thread.post)] 408 | try: 409 | replies = thread.replies 410 | except KeyError: 411 | return 412 | for reply in replies: 413 | add_descendants(reply) 414 | for reply in thread.replies: 415 | add_descendants(reply) 416 | return context 417 | 418 | __all__ = [ 419 | 'Bluesky', 420 | ] 421 | 422 | # vim:ts=4 sts=4 sw=4 et 423 | --------------------------------------------------------------------------------