├── .gitignore
├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── ci.yml
├── lib
    ├── __init__.py
    ├── compat.py
    ├── utils.py
    ├── stdout.py
    ├── text.py
    ├── html.py
    ├── www.py
    ├── inst.py
    ├── cli.py
    ├── mastodon.py
    └── bluesky.py
├── doc
    ├── changelog
    └── zygolophodon.1.in
├── t
    ├── common.sh
    ├── changelog.t
    ├── bad-addr.t
    ├── network.urls
    ├── url-error.t
    ├── network.t
    ├── help.t
    ├── version.t
    └── man-env.t
├── zygolophodon
├── .pylintrc
├── .perlcriticrc
├── private
    ├── update-readme
    └── gen-manpage
├── LICENSE
├── Makefile
└── README


/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[co]
2 | /doc/*.1
3 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | custom: https://paypal.me/ijklw
2 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | zygolophodon's private modules
3 | '''
4 | 
5 | type(lambda x, /: x)  # Python >= 3.8 is required
6 | 


--------------------------------------------------------------------------------
/doc/changelog:
--------------------------------------------------------------------------------
1 | zygolophodon (0.1) UNRELEASED; urgency=low
2 | 
3 |   * Initial release.
4 | 
5 |  -- Jakub Wilk <jwilk@jwilk.net>  Mon, 14 Apr 2025 20:31:28 +0200
6 | 


--------------------------------------------------------------------------------
/t/common.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | tdir="${0%/*}"
 7 | dir="$tdir/.."
 8 | case " $* " in
 9 |     *' --installed '*)
10 |         prog='zygolophodon';;
11 |     *)
12 |         prog="$dir/zygolophodon";;
13 | esac
14 | 
15 | plan()
16 | {
17 |     local extra=''
18 |     if [ $1 -eq 0 ]
19 |     then
20 |         extra=" # SKIP $2"
21 |     fi
22 |     printf '1..%d%s\n' "$1" "$extra"
23 |     printf '# test target = %s\n' "$prog"
24 | }
25 | 
26 | # vim:ts=4 sts=4 sw=4 et ft=sh
27 | 


--------------------------------------------------------------------------------
/zygolophodon:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # encoding=UTF-8
 3 | 
 4 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 5 | # SPDX-License-Identifier: MIT
 6 | 
 7 | '''
 8 | anonymous CLI for reading microblogging (chiefly Mastodon) posts
 9 | '''
10 | 
11 | import sys
12 | 
13 | type(lambda x, /: x)  # Python >= 3.8 is required
14 | 
15 | basedir = None
16 | if basedir is not None:
17 |     sys.path[:0] = [basedir]
18 | 
19 | import lib.cli  # pylint: disable=wrong-import-position
20 | 
21 | if __name__ == '__main__':
22 |     lib.cli.main()
23 | 
24 | # vim:ts=4 sts=4 sw=4 et
25 | 


--------------------------------------------------------------------------------
/t/changelog.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | set -e -u
 7 | 
 8 | . "${0%/*}/common.sh"
 9 | 
10 | plan 1
11 | if ! command -v dpkg-parsechangelog > /dev/null
12 | then
13 |     echo 'ok 1 # SKIP missing dpkg-parsechangelog(1)'
14 |     exit
15 | fi
16 | out=$(dpkg-parsechangelog -l"$dir/doc/changelog" --all 2>&1 >/dev/null)
17 | if [[ -z $out ]]
18 | then
19 |     echo ok 1
20 | else
21 |     sed -e 's/^/# /' <<< "$out"
22 |     echo not ok 1
23 | fi
24 | 
25 | # vim:ts=4 sts=4 sw=4 et ft=sh
26 | 


--------------------------------------------------------------------------------
/t/bad-addr.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | set -e -u
 7 | 
 8 | . "${0%/*}/common.sh"
 9 | 
10 | plan 2
11 | 
12 | err=$("$prog" moo 2>&1 >/dev/null) || xs=$?
13 | echo "# exit status $xs"
14 | tname='exit status'
15 | case $xs in
16 |     2) echo "ok 1 $tname";;
17 |     *) echo "not ok 1 $tname";;
18 | esac
19 | sed -e 's/^/# /' <<< $err
20 | tname='error message'
21 | case $err in
22 |     *$'\n''zygolophodon: error: unsupported address')
23 |         echo "ok 2 $tname";;
24 |     *)
25 |         echo "not ok 2 $tname";;
26 | esac
27 | 
28 | # vim:ts=4 sts=4 sw=4 et ft=sh
29 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MASTER]
 2 | load-plugins = pylint.extensions.check_elif
 3 | 
 4 | [MESSAGES CONTROL]
 5 | disable =
 6 |     fixme,
 7 |     invalid-name,
 8 |     missing-class-docstring,
 9 |     missing-function-docstring,
10 |     no-else-return,
11 |     no-self-use,
12 |     too-few-public-methods,
13 |     too-many-locals,
14 |     use-dict-literal,
15 |     useless-option-value,
16 | 
17 | [REPORTS]
18 | msg-template = {path}:{line}: {C}: {symbol} [{obj}] {msg}
19 | reports = no
20 | score = no
21 | 
22 | [FORMAT]
23 | expected-line-ending-format = LF
24 | max-line-length = 120
25 | 
26 | [DESIGN]
27 | max-branches = 20
28 | 
29 | # vim:ft=dosini ts=4 sts=4 sw=4 et
30 | 


--------------------------------------------------------------------------------
/lib/compat.py:
--------------------------------------------------------------------------------
 1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | '''
 5 | support for old Python versions
 6 | '''
 7 | 
 8 | import datetime
 9 | import functools
10 | import re
11 | import sys
12 | 
13 | if sys.version_info < (3, 9):
14 |     functools.cache = functools.lru_cache(maxsize=None)
15 | 
16 | if sys.version_info >= (3, 11):
17 |     datetime_fromisoformat = datetime.datetime.fromisoformat
18 | else:
19 |     def datetime_fromisoformat(d):
20 |         d = re.sub(r'Z\Z', '+00:00', d)
21 |         return datetime.datetime.fromisoformat(d)
22 | 
23 | __all__ = [
24 |     'datetime_fromisoformat',
25 | ]
26 | 
27 | # vim:ts=4 sts=4 sw=4 et
28 | 


--------------------------------------------------------------------------------
/.perlcriticrc:
--------------------------------------------------------------------------------
 1 | severity = 1
 2 | verbose = %f:%l: [%p] %m\n
 3 | 
 4 | [-CodeLayout::RequireTidyCode]
 5 | # no, thanks
 6 | 
 7 | [-ErrorHandling::RequireCarping]
 8 | # "die" is good enough
 9 | 
10 | [InputOutput::RequireCheckedSyscalls]
11 | functions = :builtins
12 | exclude_functions = print say
13 | 
14 | [-Modules::RequireVersionVar]
15 | # see https://bugs.debian.org/706266
16 | 
17 | [-RegularExpressions::RequireDotMatchAnything]
18 | [-RegularExpressions::RequireExtendedFormatting]
19 | [-RegularExpressions::RequireLineBoundaryMatching]
20 | ## no, thanks
21 | 
22 | [-ValuesAndExpressions::ProhibitNoisyQuotes]
23 | # no, thanks
24 | 
25 | [-ValuesAndExpressions::ProhibitVersionStrings]
26 | # we don't care about Perl << 5.6, which doesn't support version strings
27 | 
28 | # vim:ft=dosini
29 | 


--------------------------------------------------------------------------------
/private/update-readme:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | no lib '.';  # CVE-2016-1238
 7 | 
 8 | use strict;
 9 | use warnings;
10 | use v5.14;
11 | 
12 | use English qw(-no_match_vars);
13 | use FindBin ();
14 | use autodie;
15 | 
16 | my $base = "$FindBin::Bin/..";
17 | 
18 | open my $fh, '<', "$base/README";
19 | my $readme = do {
20 |     local $RS = undef;
21 |     <$fh>;
22 | };
23 | close $fh;
24 | open $fh, '-|', "$base/zygolophodon", '--help';
25 | my $usage = do {
26 |     local $RS = undef;
27 |     <$fh>;
28 | };
29 | close $fh;
30 | $usage =~ s/^(?=.)/   /mg;
31 | $readme =~ s/\n   \$ zygolophodon --help\n\K.*?(?=\n\S)/$usage/s
32 |     or die;
33 | open $fh, '>', "$base/README";
34 | {
35 |     print {$fh} $readme;
36 | }
37 | close $fh;
38 | 
39 | # vim:ts=4 sts=4 sw=4 et
40 | 


--------------------------------------------------------------------------------
/t/network.urls:
--------------------------------------------------------------------------------
 1 | ### Mastodon ###
 2 | 
 3 | https://mastodon.social/@bluecommunity  # with pinned post
 4 | https://infosec.exchange/@lcamtuf/109537142030669006  # with image attachment
 5 | https://mastodon.social/statuses/1  # short (pre-v2.0) ident
 6 | https://fosstodon.org/tags/python
 7 | https://physics.social/tags/python  # Mastodon v3.5
 8 | @Mastodon@mastodon.social@mas.to  # foreign account
 9 | 
10 | ### Iceshrimp ###
11 | 
12 | https://bytes.programming.dev/@Updates
13 | https://fedia.social/notes/9vs6uhqixwnm3mvf
14 | https://infosec.town/tags/python
15 | 
16 | ### Pleroma ###
17 | 
18 | https://social.sfconservancy.org/users/conservancy  # with pinned post
19 | https://pleroma.envs.net/notice/AsxefyNDCyqGkfRABs
20 | https://pleroma.debian.social/tag/python
21 | 
22 | ### Snac (unsupported) ###
23 | 
24 | grunfink@comam.es
25 | 
26 | ### Bluesky ###
27 | 
28 | https://bsky.app/profile/bsky.app  # with pinned post
29 | https://bsky.app/profile/toronto.ca/post/3llr2e6rotc2d
30 | https://bsky.app/hashtag/python
31 | 


--------------------------------------------------------------------------------
/t/url-error.t:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | set -e -u
 7 | 
 8 | . "${0%/*}/common.sh"
 9 | 
10 | plan 2
11 | unshare_net()
12 | {
13 |     unshare --user --net "$@"
14 | }
15 | if ! unshare_net true 2>/dev/null
16 | then
17 |     unshare_net()
18 |     {
19 |         "$@"
20 |     }
21 | fi
22 | export http_proxy='http://127.0.0.1:9/'
23 | export https_proxy="$http_proxy"
24 | export RES_OPTIONS=attempts:0
25 | xs=0
26 | base_url=https://mastodon.social
27 | url="$base_url/@Mastodon"
28 | echo "# $url"
29 | err=$(unshare_net "$prog" "$url" 2>&1 >/dev/null) || xs=$?
30 | echo "# exit status $xs"
31 | tname='exit status'
32 | case $xs in
33 |     1) echo "ok 1 $tname";;
34 |     *) echo "not ok 1 $tname";;
35 | esac
36 | echo "# $err"
37 | tname='error message'
38 | case $err in
39 |     "zygolophodon: <$base_url/api/v1/instance>: [E"[A-Z]*'] '*)
40 |         echo "ok 2 $tname";;
41 |     *)
42 |         echo "not ok 2 $tname";;
43 | esac
44 | 
45 | # vim:ts=4 sts=4 sw=4 et ft=sh
46 | 


--------------------------------------------------------------------------------
/t/network.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | set -e -u
 7 | 
 8 | . "${0%/*}/common.sh"
 9 | 
10 | case " $* " in
11 |     *' --network '*)
12 |         ;;
13 |     *)
14 |         plan 0 'use --network to opt in to network testing'
15 |         exit 0
16 |         ;;
17 | esac
18 | 
19 | normspace()
20 | {
21 |     # shellcheck disable=SC2048,SC2086
22 |     s=$(set -f; printf '%s ' $*)
23 |     printf '%s' "${s% }"
24 | }
25 | 
26 | urls=()
27 | while read -r line
28 | do
29 |     line=${line%%#*}
30 |     line=$(normspace "$line")
31 |     [[ -n $line ]] || continue
32 |     urls+=("$line")
33 | done < "$tdir/network.urls"
34 | 
35 | echo "1..${#urls[@]}"
36 | declare -i n=1
37 | for url in "${urls[@]}"
38 | do
39 |     rc=0
40 |     out=$("$prog" --limit=2 "$url") || rc=$?
41 |     sed -e 's/^/# /' <<< "$out"
42 |     if [[ $rc = 0 ]]
43 |     then
44 |         echo ok $n "$url"
45 |     else
46 |         echo not ok $n "$url"
47 |     fi
48 |     n+=1
49 | done
50 | 
51 | # vim:ts=4 sts=4 sw=4 et ft=sh
52 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the “Software”), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/lib/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | '''
 5 | misc stuff
 6 | '''
 7 | 
 8 | import abc
 9 | import functools
10 | import re
11 | 
12 | class Dict(dict):
13 |     __getattr__ = dict.__getitem__
14 | 
15 | class InternalError(RuntimeError):
16 |     pass
17 | 
18 | def expand_template(template, **subst):
19 |     def repl(match):
20 |         key = match.group()
21 |         lkey = key.lower()
22 |         try:
23 |             return subst[lkey]
24 |         except KeyError:
25 |             msg = f'cannot expand {key} in template {template!r}'
26 |             raise InternalError(msg) from None
27 |     return re.sub('[A-Z]+', repl, template)
28 | 
29 | def abstractattribute():
30 |     return abc.abstractmethod(lambda: None)
31 | 
32 | def compose(f):
33 |     def eff(g):
34 |         @functools.wraps(g)
35 |         def f_g(*args, **kwargs):
36 |             return f(g(*args, **kwargs))
37 |         return f_g
38 |     return eff
39 | 
40 | __all__ = [
41 |     'Dict',
42 |     'expand_template',
43 |     'abstractattribute',
44 |     'compose',
45 | ]
46 | 
47 | # vim:ts=4 sts=4 sw=4 et
48 | 


--------------------------------------------------------------------------------
/t/help.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | set -e -u
 7 | 
 8 | . "${0%/*}/common.sh"
 9 | 
10 | plan 2
11 | xout=$(< "$dir/README")
12 | xout=${xout#*$'\n   $ zygolophodon --help\n   '}
13 | xout=${xout%%$'\n\n'[^ ]*}
14 | xout=${xout//$'\n   '/$'\n'}
15 | out=$("$prog" --help)
16 | if [[ "$out" = "$xout" ]]
17 | then
18 |     echo 'ok 1'
19 | else
20 |     diff -u <(cat <<< "$xout") <(cat <<< "$out") | sed -e 's/^/# /'
21 |     echo 'not ok 1'
22 | fi
23 | # chop off the part that's auto-generated in the man page anyway:
24 | out=$(sed -e '/^  ADDRESS /,/^$/d' <<< "$out")
25 | xsum=$(sha256sum <<< "$out")
26 | xsum=${xsum%% *}
27 | var='SHA-256(help)'
28 | echo "# $var = $xsum"
29 | declare -i n=2
30 | t_sync()
31 | {
32 |     path="$1"
33 |     line=$(grep -F " $var = " < "$path")
34 |     sum=${line##*" $var = "}
35 |     if [ "$sum" = "$xsum" ]
36 |     then
37 |         echo ok $n "$path"
38 |     else
39 |         echo not ok $n "$path"
40 |     fi
41 |     n+=1
42 | }
43 | if [[ $prog = zygolophodon ]]
44 | then
45 |     man_target=$(man -w $prog)
46 | else
47 |     man_target="$dir/doc/zygolophodon.1.in"
48 | fi
49 | t_sync "$man_target"
50 | 
51 | # vim:ts=4 sts=4 sw=4 et ft=sh
52 | 


--------------------------------------------------------------------------------
/t/version.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright © 2024-2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | set -e -u
 7 | 
 8 | . "${0%/*}/common.sh"
 9 | 
10 | plan 4
11 | IFS='(); ' read -r _ changelog_version changelog_dist _ < "$dir/doc/changelog"
12 | echo "# changelog version = $changelog_version"
13 | echo "# changelog dist = $changelog_dist"
14 | if out=$("$prog" --version)
15 | then
16 |     echo ok 1
17 |     sed -e 's/^/# /' <<< "$out"
18 |     case $out in
19 |         $"zygolophodon $changelog_version"$'\n'*)
20 |             echo ok 2;;
21 |         *)
22 |             echo not ok 2;;
23 |     esac
24 | else
25 |     echo not ok 1
26 |     echo not ok 2
27 | fi
28 | if [ -d "$dir/.git" ]
29 | then
30 |     echo 'ok 3 # SKIP git checkout'
31 | elif [ "$changelog_dist" = UNRELEASED ]
32 | then
33 |     echo 'not ok 3'
34 | else
35 |     echo 'ok 3'
36 | fi
37 | if [[ $prog = zygolophodon ]]
38 | then
39 |     man_target=$prog
40 | else
41 |     man_target="$dir/doc/zygolophodon.1"
42 |     if [[ -f $man_target ]]
43 |     then
44 |         man_target=''
45 |     fi
46 | fi
47 | echo "# man page target = $man_target"
48 | if [[ -n $man_target ]]
49 | then
50 |     line=$(MANWIDTH=80 man "$man_target" | tail -n 1)
51 |     IFS=' "' read -r _ man_version _ <<< "$line"
52 |     echo "# man page version = $man_version"
53 |     if [ "$man_version" = "$changelog_version" ]
54 |     then
55 |         echo ok 4
56 |     else
57 |         echo not ok 4
58 |     fi
59 | else
60 |     echo 'ok 4 # SKIP missing man page'
61 | fi
62 | 
63 | # vim:ts=4 sts=4 sw=4 et ft=sh
64 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright © 2024-2025 Jakub Wilk <jwilk@jwilk.net>
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | PYTHON = python3
 5 | 
 6 | PREFIX = /usr/local
 7 | DESTDIR =
 8 | 
 9 | bindir = $(PREFIX)/bin
10 | basedir = $(PREFIX)/share/zygolophodon
11 | mandir = $(PREFIX)/share/man
12 | 
13 | .PHONY: all
14 | all: doc/zygolophodon.1
15 | 
16 | %.1: %.1.in README private/gen-manpage
17 | 	private/gen-manpage < $(<) > $(@).tmp
18 | 	mv $(@).tmp $(@)
19 | 
20 | .PHONY: install
21 | install: zygolophodon all
22 | 	$(PYTHON) - < lib/__init__.py  # Python version check
23 | 	# executable:
24 | 	install -d $(DESTDIR)$(bindir)
25 | 	python_exe=$$($(PYTHON) -c 'import sys; print(sys.executable)') && \
26 | 	sed \
27 | 		-e "1 s@^#!.*@#!$$python_exe@" \
28 | 		-e "s#^basedir = .*#basedir = '$(basedir)/'#" \
29 | 		$(<) > $(<).tmp
30 | 	install $(<).tmp $(DESTDIR)$(bindir)/$(<)
31 | 	rm $(<).tmp
32 | 	# library:
33 | 	install -d $(DESTDIR)$(basedir)/lib
34 | 	install -p -m644 lib/*.py $(DESTDIR)$(basedir)/lib/
35 | ifeq "$(DESTDIR)" ""
36 | 	umask 022 && $(PYTHON) -m compileall -q $(basedir)/lib/
37 | endif
38 | 	# manual page:
39 | 	install -d $(DESTDIR)$(mandir)/man1
40 | 	install -p -m644 doc/$(<).1 $(DESTDIR)$(mandir)/man1/
41 | 
42 | .PHONY: test
43 | test: verbose=
44 | test: zygolophodon all
45 | 	prove $(and $(verbose),-v)
46 | 
47 | .PHONY: test-installed
48 | test-installed: verbose=
49 | test-installed: $(or $(shell command -v zygolophodon;),$(bindir)/zygolophodon)
50 | 	prove $(and $(verbose),-v) :: --installed
51 | 
52 | .PHONY: clean
53 | clean:
54 | 	rm -f *.tmp doc/*.1 doc/*.tmp
55 | 	find . -type f -name '*.py[co]' -delete
56 | 	find . -type d -name '__pycache__' -delete
57 | 
58 | .error = GNU make is required
59 | 
60 | # vim:ts=4 sts=4 sw=4 noet
61 | 


--------------------------------------------------------------------------------
/doc/zygolophodon.1.in:
--------------------------------------------------------------------------------
 1 | .\" Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 2 | .\" SPDX-License-Identifier: MIT
 3 | 
 4 | .\" # SHA-256(help) = 6f4f12611723167e0d90775679acdf55b7a68f3db0c72fe8bfc6583b661bd29c
 5 | 
 6 | .TH ZYGOLOPHODON 1 2025-02-01 "zygolophodon {{VERSION}}"
 7 | .SH NAME
 8 | zygolophodon \- CLI for reading Mastodon posts
 9 | .SH SYNOPSIS
10 | .SY zygolophodon
11 | .RB [ \-\-limit " \fIN\fP]"
12 | .RB [ \-\-with\-ancestors ]
13 | .I ADDRESS
14 | .SH DESCRIPTION
15 | .B Zygolophodon
16 | is an anonymous command-line interface
17 | for reading microblogging posts,
18 | chiefly for Mastodon.
19 | .SS Supported servers
20 | .IP \(bu 2
21 | Mastodon (\(>= 3.4)
22 | .IP \(bu 2
23 | Iceshrimp, Catodon
24 | .IP \(bu 2
25 | Pleroma (\(>= 2.5), Akkoma (\(>= 2.5)
26 | .IP \(bu 2
27 | Bluesky
28 | .SS Supported addresses
29 | The following address schemes are supported:
30 | {{ADDRESS-LIST}}
31 | .SH OPTIONS
32 | .TP
33 | .BI "\-\-limit " N
34 | Request at most
35 | .I N
36 | posts.
37 | The default is 40.
38 | .TP
39 | .B \-\-with\-ancestors
40 | Show also ancestors of the post.
41 | .TP
42 | .BR \-h ", " \-\-help
43 | Show help message and exit.
44 | .TP
45 | .B \-\-version
46 | Show version information and exit.
47 | .ig
48 | .SH OUTPUT FORMAT
49 | TODO
50 | ..
51 | .SH ENVIRONMENT
52 | .TP
53 | .B PAGER
54 | If stdout is a terminal, zygolophodon pipes the output through
55 | .BR $PAGER .
56 | The default is
57 | .B pager
58 | (if it exists)
59 | or
60 | .BR more .
61 | Setting
62 | .B PAGER
63 | to the empty string
64 | or the value
65 | .B cat
66 | disables the use of the pager.
67 | .\" TODO: ZYGOLOPHODON_COLUMNS
68 | .\" TODO: ZYGOLOPHODON_LINK_SYMBOL
69 | .\" TODO: ZYGOLOPHODON_PAPERCLIP
70 | .TP
71 | .B LESS
72 | If this variable is unset,
73 | zygolophodon sets it to
74 | .B \-FXK
75 | (which is equivalent to
76 | .BR "\-\-quit\-if\-one\-screen \-\-no\-init \-\-quit\-on\-intr" ).
77 | .SH EXAMPLE
78 | .EX
79 | {{EXAMPLE}}
80 | .EE
81 | 


--------------------------------------------------------------------------------
/private/gen-manpage:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 4 | # SPDX-License-Identifier: MIT
 5 | 
 6 | # pylint: disable=missing-module-docstring
 7 | 
 8 | import pathlib
 9 | import re
10 | import sys
11 | import types
12 | import textwrap
13 | 
14 | int(0_0)  # Python >= 3.6 is required
15 | 
16 | here = pathlib.Path(__file__).parent
17 | base = here.parent
18 | 
19 | def rextract(regexp, text, flags=0):
20 |     regexp = re.compile(regexp, flags=flags)
21 |     match = re.search(regexp, text)
22 |     if match is None:
23 |         raise LookupError(f'{regexp} not found')
24 |     return match.group(1)
25 | 
26 | def addr_to_roff(addr, itype):
27 |     out = re.sub('([A-Z]+)', r'\\f(BI\1\\fP', addr)
28 |     out = f'.B {out}'
29 |     if itype:
30 |         out += f' \\fR({itype})'
31 |     out = f'.IP \\(bu 2\n{out}'
32 |     return out
33 | 
34 | charmap = r'''
35 | ' \(aq
36 | ` \`
37 | “ \(lq
38 | ” \(rq
39 | - \-
40 | ä \[:a]
41 | ⋮ \&...
42 | '''
43 | charmap = {
44 |     ord(key): value
45 |     for line in charmap.strip().splitlines()
46 |     for key, value in [line.split()]
47 | }
48 | 
49 | def main():
50 |     chunks = types.SimpleNamespace()
51 |     code = (base / 'lib/cli.py').read_text(encoding='UTF-8')
52 |     chunks.version = rextract(r"\b__version__ = '(.+)'", code)
53 |     readme = (base / 'README').read_text(encoding='UTF-8')
54 |     addrs = rextract(r'^ +ADDRESS(( +(\S.*\n))+)', readme, flags=re.M)
55 |     roff_addrs = []
56 |     for match in re.finditer(r'^ +(\S+)(?: [(](\S+)[)])?$', addrs, flags=re.M):
57 |         addr, itype = match.groups()
58 |         roff_addrs += [addr_to_roff(addr, itype)]
59 |     chunks.address_list = str.join('\n', roff_addrs)
60 |     try:
61 |         example = rextract(r'(^ +[$] zygolophodon https:.+\n(( .*)?\n)+)', readme, flags=re.M)
62 |     except LookupError:
63 |         pass
64 |     else:
65 |         example = textwrap.dedent(example)
66 |         example = re.sub('^([$] )(.*)', r'.RB "\1" "\2"', example)
67 |         example = example.translate(charmap)
68 |         example.encode('ASCII')
69 |         chunks.example = example
70 |     text = sys.stdin.read()
71 |     def repl(match):
72 |         name = match.group(1)
73 |         name = name.lower().replace('-', '_')
74 |         return getattr(chunks, name)
75 |     text = re.sub(r'[{][{]([^\s}]+)[}][}]', repl, text)
76 |     print(text)
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 
81 | # vim:ts=4 sts=4 sw=4 et
82 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | Overview
 2 | ========
 3 | 
 4 | **Zygolophodon** is an anonymous command-line interface
 5 | for reading microblogging posts, chiefly for Mastodon.
 6 | 
 7 | The following servers are supported:
 8 | 
 9 | * Mastodon (≥ 3.4)
10 | * Iceshrimp, Catodon
11 | * Pleroma (≥ 2.5), Akkoma (≥ 2.5)
12 | * Bluesky
13 | 
14 | Usage
15 | =====
16 | 
17 | .. code:: console
18 | 
19 |    $ zygolophodon --help
20 |    usage: zygolophodon [-h] [--version] [--limit N] [--with-ancestors] ADDRESS
21 | 
22 |    positional arguments:
23 |      ADDRESS           @USER@DOMAIN
24 |                        USER@DOMAIN
25 |                        https://DOMAIN/@USER
26 |                        https://DOMAIN/@USER/media
27 |                        https://DOMAIN/@USER/with_replies
28 |                        https://DOMAIN/@USER/NNNNNN
29 |                        https://DOMAIN/@USER/NNNNNN/embed
30 |                        https://DOMAIN/tags/TAG
31 |                        https://DOMAIN/statuses/NNNNNN
32 |                        https://DOMAIN/redirect/statuses/NNNNNN
33 |                        https://DOMAIN/users/USER
34 |                        https://DOMAIN/users/USER/statuses/NNNNNN
35 |                        https://DOMAIN/notes/IDENT (Iceshrimp)
36 |                        https://DOMAIN/notice/IDENT (Pleroma)
37 |                        https://DOMAIN/tag/TAG (Pleroma)
38 |                        https://bsky.app/profile/USER (Bluesky)
39 |                        https://bsky.app/profile/USER/post/IDENT (Bluesky)
40 |                        https://bsky.app/hashtag/TAG (Bluesky)
41 | 
42 |    options:
43 |      -h, --help        show this help message and exit
44 |      --version         show version information and exit
45 |      --limit N         request at most N posts (default: 40)
46 |      --with-ancestors  show also post ancestors
47 | 
48 | Example
49 | =======
50 | 
51 | .. code:: console
52 | 
53 |    $ zygolophodon https://mastodon.example.org/@cicero/114574908484913091
54 |    Location: https://mastodon.example.org/@cicero/114574908484913091
55 |    From: Marcus Tullius Cicero <https://mastodon.example.org/@cicero>
56 |    Date: 2025-05-26 15:46:00Z
57 |    Language: la
58 | 
59 |    #LoremIpsum, quia dolor sit, amet, consectetur, adipisci velit, sed
60 |    quia non numquam eius modi tempora incidunt, ut labore et dolore magnam
61 |    aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum
62 |    exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea
63 |    commodi consequatur?
64 | 
65 |    * #LoremIpsum: https://mastodon.example.org/tags/LoremIpsum
66 | 
67 | Requirements
68 | ============
69 | 
70 | * Python ≥ 3.8
71 | 
72 | .. vim:ft=rst ts=3 sts=3 sw=3 et
73 | 


--------------------------------------------------------------------------------
/t/man-env.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # encoding=UTF-8
 3 | 
 4 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
 5 | # SPDX-License-Identifier: MIT
 6 | 
 7 | # pylint: disable=missing-module-docstring
 8 | 
 9 | import ast
10 | import pathlib
11 | import re
12 | import sys
13 | import types
14 | 
15 | basedir = pathlib.Path(__file__).parent.parent
16 | sys.path[:0] = [str(basedir)]
17 | 
18 | # pylint: disable-next=wrong-import-position
19 | from lib.utils import compose
20 | 
21 | def _extract_src_vars(path):
22 |     with open(path, encoding='UTF-8') as file:
23 |         src = file.read()
24 |     code = compile(src, path, 'exec')
25 |     mod = types.ModuleType('_')
26 |     exec(code, mod.__dict__)  # pylint: disable=exec-used
27 |     mod_node = ast.parse(src, path)
28 |     for node in ast.walk(mod_node):
29 |         if not isinstance(node, ast.Call):
30 |             continue
31 |         func = node.func
32 |         if len(node.args) < 1:
33 |             continue
34 |         if not isinstance(node.args[0], ast.Constant):
35 |             continue
36 |         arg = node.args[0].value
37 |         if isinstance(func, ast.Name) and func.id == 'Symbol':
38 |             yield mod.Symbol.get_var(arg)  # pylint: disable=no-member
39 |             continue
40 |         if isinstance(func, ast.Attribute) and func.attr == 'getenv':
41 |             yield arg
42 |             continue
43 | 
44 | @compose(set)
45 | def extract_src_vars():
46 |     libdir = basedir / 'lib'
47 |     for path in libdir.glob('*.py'):
48 |         yield from _extract_src_vars(path)
49 | 
50 | def _extract_man_vars_section():
51 |     path = basedir / 'doc/zygolophodon.1.in'
52 |     with open(path, encoding='UTF-8') as file:
53 |         src = file.read()
54 |     match = re.search(r'\n[.]SH ENVIRONMENT\n(.+?\n)[.]SH ', src, re.DOTALL)
55 |     [src] = match.groups()
56 |     return src
57 | 
58 | @compose(set)
59 | def _extract_man_vars(regexp):
60 |     src = _extract_man_vars_section()
61 |     for match in re.finditer(regexp, src):
62 |         [var] = match.groups()
63 |         yield var
64 | 
65 | def extract_man_vars():
66 |     regexp = re.compile(r'^[.]TP\n[.]B (\S+)$', re.MULTILINE)
67 |     return _extract_man_vars(regexp)
68 | 
69 | def extract_man_todo_vars():
70 |     regexp = re.compile(r'.\" TODO: (ZYGOLOPHODON_[A-Z_]+)$', re.MULTILINE)
71 |     return _extract_man_vars(regexp)
72 | 
73 | def ok(cond, name, todo=False):
74 |     status = ['not ok', 'ok'][cond]
75 |     todo = ['# TODO'] * (todo and not cond)
76 |     print(status, '-', name, *todo)
77 | 
78 | def main():
79 |     if '--installed' in sys.argv:
80 |         print('1..0 # SKIP post-install testing not supported')
81 |         return
82 |     src_vars = extract_src_vars()
83 |     man_vars = extract_man_vars()
84 |     man_todo_vars = extract_man_todo_vars()
85 |     m = len(src_vars) + len(man_vars)
86 |     print(f'1..{m}')
87 |     for var in src_vars:
88 |         todo = var in man_todo_vars
89 |         ok(var in man_vars, f'{var} in man', todo=todo)
90 |     for var in man_vars:
91 |         ok(var in src_vars, f'{var} in src')
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 
96 | # vim:ts=4 sts=4 sw=4 et ft=python
97 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | permissions: {}
  3 | on:
  4 | - push
  5 | - pull_request
  6 | jobs:
  7 | 
  8 |   main:
  9 |     runs-on: ${{matrix.os}}
 10 |     strategy:
 11 |       matrix:
 12 |         include:
 13 |         - python: '3.8'
 14 |           os: ubuntu-22.04
 15 |         - python: '3.9'
 16 |           os: ubuntu-22.04
 17 |         - python: '3.10'
 18 |           os: ubuntu-22.04
 19 |         - python: '3.11'
 20 |           os: ubuntu-22.04
 21 |         - python: '3.12'
 22 |           os: ubuntu-22.04
 23 |         - python: '3.13'
 24 |           os: ubuntu-24.04
 25 |         - python: '3.14'
 26 |           os: ubuntu-24.04
 27 |     steps:
 28 |     - uses: actions/checkout@v4
 29 |     - name: set up Python ${{matrix.python}}
 30 |       uses: actions/setup-python@v5
 31 |       with:
 32 |         python-version: ${{matrix.python}}
 33 |     - name: run tests
 34 |       run: |
 35 |         make test verbose=1
 36 |     - name: run network tests
 37 |       run: |
 38 |         prove -v t/network.t '::' --network
 39 |     - name: install
 40 |       run: |
 41 |         make install PREFIX=~/.local
 42 |     - name: post-install smoke-test
 43 |       run: |
 44 |         cd /
 45 |         zygolophodon --version
 46 |     - name: post-install man page check
 47 |       env:
 48 |         MANPATH: /home/runner/.local/share/man
 49 |         MANWIDTH: 80
 50 |       run: |
 51 |         cd /
 52 |         man 1 zygolophodon | grep -A 10 -w ZYGOLOPHODON
 53 |     - name: run post-install tests
 54 |       run: |
 55 |         rm zygolophodon doc/*.1 doc/*.1.in
 56 |         make test-installed verbose=1
 57 |         git restore .
 58 |     - name: run pydiatra
 59 |       run: |
 60 |         python3 -m pip install pydiatra
 61 |         python3 -m pydiatra -v .
 62 |     - name: run pyflakes
 63 |       run: |
 64 |         python3 -m pip install pyflakes
 65 |         python3 -m pyflakes .
 66 |     - name: run pylint
 67 |       run: |
 68 |         python3 -m pip install pylint
 69 |         python3 -m pylint $(grep -rl '/env python3$' .) lib/*.py
 70 |     - name: check README syntax
 71 |       run: |
 72 |         python3 -m pip install restructuredtext-lint pygments
 73 |         rst-lint --level=info --encoding=UTF-8 README
 74 | 
 75 |   nonpython:
 76 |     strategy:
 77 |       matrix:
 78 |         os:
 79 |         - ubuntu-22.04
 80 |         - ubuntu-24.04
 81 |     runs-on: ${{matrix.os}}
 82 |     steps:
 83 |     - uses: actions/checkout@v4
 84 |     - name: set up APT
 85 |       run: |
 86 |         printf 'Apt::Install-Recommends "false";\n' | sudo tee -a /etc/apt/apt.conf
 87 |         sudo apt-get update
 88 |     - name: install perlcritic
 89 |       run: |
 90 |         sudo apt-get install libperl-critic-perl
 91 |     - name: run perlcritic
 92 |       run: |
 93 |         perlcritic $(grep -rl '/env perl$' .)
 94 |     - name: regenerate README
 95 |       run: |
 96 |         private/update-readme
 97 |         git diff --exit-code
 98 |     - name: install mandoc
 99 |       run: |
100 |         sudo apt-get install mandoc
101 |     - name: build man page
102 |       run: |
103 |         make doc/zygolophodon.1
104 |     - name: run mandoc lint
105 |       run: |
106 |         mandoc -T lint doc/*.1
107 | 
108 | # vim:ts=2 sts=2 sw=2 et
109 | 


--------------------------------------------------------------------------------
/lib/stdout.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | '''
  5 | sys.stdout wrapper
  6 | '''
  7 | 
  8 | import contextlib
  9 | import http
 10 | import inspect
 11 | import io
 12 | import os
 13 | import shutil
 14 | import subprocess
 15 | import sys
 16 | 
 17 | def find_command(command):
 18 |     if shutil.which(command):
 19 |         return command
 20 |     return None
 21 | 
 22 | class StdOut(io.TextIOBase):
 23 | 
 24 |     def _install_pager(self):
 25 |         if not sys.__stdout__.isatty():
 26 |             return
 27 |         cmdline = os.getenv('PAGER')
 28 |         if cmdline in {'', 'cat'}:
 29 |             return
 30 |         cmdline = (cmdline
 31 |             or find_command('pager')  # Debian:
 32 |             # https://www.debian.org/doc/debian-policy/ch-customized-programs.html#editors-and-pagers
 33 |             or 'more'  # POSIX:
 34 |             # https://pubs.opengroup.org/onlinepubs/007904975/utilities/man.html#tag_04_85_08
 35 |         )
 36 |         env = None
 37 |         if os.getenv('LESS') is None:
 38 |             env = dict(env or os.environ, LESS='-FXK')
 39 |         self._pager = subprocess.Popen(cmdline, shell=True, stdin=subprocess.PIPE, env=env)  # pylint: disable=consider-using-with
 40 |         self._stdout = io.TextIOWrapper(self._pager.stdin,
 41 |             encoding=sys.__stdout__.encoding,
 42 |             errors=sys.__stdout__.errors,
 43 |             line_buffering=True,
 44 |         )
 45 | 
 46 |     def __init__(self):
 47 |         super().__init__()
 48 |         self._newlines = 0
 49 |         self._pager = None
 50 |         self._stdout = sys.__stdout__
 51 |         self._install_pager()
 52 | 
 53 |     def _get_fp(self):
 54 |         if http.client.HTTPConnection.debuglevel:
 55 |             # Eww, FIXME in Python?
 56 |             # http.client prints debug messages to stdout.
 57 |             # Let's redirect them to stderr:
 58 |             for frameinfo in inspect.stack(context=0):
 59 |                 if frameinfo.filename == http.client.__file__:
 60 |                     return sys.__stderr__
 61 |         return self._stdout
 62 | 
 63 |     def write(self, s):
 64 |         fp = self._get_fp()
 65 |         if fp is self._stdout:
 66 |             if s == '':
 67 |                 return
 68 |             if s == '\n':
 69 |                 if self._newlines == 2:
 70 |                     return
 71 |                 self._newlines += 1
 72 |             else:
 73 |                 self._newlines = int(s[-1] == '\n')
 74 |         fp.write(s)
 75 | 
 76 |     def flush(self):
 77 |         self._get_fp().flush()
 78 | 
 79 |     def isatty(self):
 80 |         return sys.__stdout__.isatty()
 81 | 
 82 |     def __exit__(self, exc_type, exc_value, traceback):
 83 |         ret = super().__exit__(exc_type, exc_value, traceback)
 84 |         if self._pager:
 85 |             self._pager.__exit__(exc_type, exc_value, traceback)
 86 |             if exc_type is None and self._pager.returncode != 0:
 87 |                 msg = 'pager failed'
 88 |                 raise RuntimeError(msg)
 89 |             self._pager = None
 90 |             self._stdout = None
 91 |         return ret
 92 | 
 93 | @contextlib.contextmanager
 94 | def install():
 95 |     assert sys.stdout is sys.__stdout__
 96 |     try:
 97 |         with StdOut() as sys.stdout:
 98 |             yield
 99 |     finally:
100 |         sys.stdout = sys.__stdout__
101 | 
102 | __all__ = ['install']
103 | 
104 | # vim:ts=4 sts=4 sw=4 et
105 | 


--------------------------------------------------------------------------------
/lib/text.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | '''
  5 | text support
  6 | '''
  7 | 
  8 | import os
  9 | import re
 10 | import textwrap
 11 | import unicodedata
 12 | 
 13 | columns = int(os.getenv('ZYGOLOPHODON_COLUMNS', '78'))
 14 | 
 15 | def wcwidth(ch):
 16 |     # poor man's wcwidth(3)
 17 |     wd = unicodedata.east_asian_width(ch)
 18 |     return 1 + (wd in {'F', 'W'})
 19 | 
 20 | def wcswidth(s):
 21 |     # poor man's wcswidth(3)
 22 |     return sum(map(wcwidth, s))
 23 | 
 24 | class Symbol:
 25 | 
 26 |     @classmethod
 27 |     def get_var(cls, name):
 28 |         name = name.upper().replace(' ', '_')
 29 |         return f'ZYGOLOPHODON_{name}'
 30 | 
 31 |     def __init__(self, name):
 32 |         var = self.get_var(name)
 33 |         text = os.getenv(var, '*')
 34 |         if match := re.fullmatch('(.*):([0-9]+)', text):
 35 |             (text, width) = match.groups()
 36 |             width = int(width)
 37 |         else:
 38 |             width = wcswidth(text)
 39 |         self._text = text
 40 |         self.width = width
 41 | 
 42 |     def __str__(self):
 43 |         return self._text
 44 | 
 45 | class symbols:
 46 |     link = Symbol('link symbol')
 47 |     paperclip = Symbol('paperclip')
 48 | 
 49 | def isolate_bidi(text):
 50 |     '''
 51 |     * If there are any explicit BDI formatting characters in the text
 52 |       (except PDF, which is harmless by itself),
 53 |       wrap the text with FSI + PDI.
 54 |     * Remove any excess PDIs.
 55 |     * Append PDIs to close any stray isolate initiators.
 56 |     '''
 57 |     #
 58 |     # Documentation: https://unicode.org/reports/tr9/
 59 |     #   ("Unicode Bidirectional Algorithm")
 60 |     #
 61 |     n = None  # the number of unclosed isolate initiators,
 62 |     # or None if the text doesn't need any BiDi treatment
 63 |     def repl(match):
 64 |         nonlocal n
 65 |         if n is None:
 66 |             n = 0
 67 |         s = match.group()
 68 |         if s in '\N{LRI}\N{RLI}\N{FSI}':
 69 |             n += 1
 70 |         elif s == '\N{PDI}':
 71 |             if n == 0:
 72 |                 return ''
 73 |             n -= 1
 74 |         return s
 75 |     s = re.sub('[\N{LRE}\N{RLE}\N{LRO}\N{RLO}\N{LRI}\N{RLI}\N{FSI}\N{PDI}]', repl, text)
 76 |     if n is not None:
 77 |         pdi = (n + 1) * '\N{PDI}'
 78 |         s = f'\N{FSI}{s}{pdi}'
 79 |     return s
 80 | 
 81 | def wrap_text(text, indent='', protect=None):
 82 |     # FIXME? BiDi-aware terminals consider newlines as paragraph separators,
 83 |     # so line-wrapping may disrupt BiDi.
 84 |     text = text.splitlines()
 85 |     for line in text:
 86 |         yield wrap_line(line, indent=indent, protect=protect)
 87 | 
 88 | def wrap_line(line, indent='', protect=None):
 89 |     tokens = []
 90 |     if protect:
 91 |         [prot_start, prot_end] = protect
 92 |         assert prot_start
 93 |         assert prot_end
 94 |         assert '\N{SUB}' not in (prot_start + prot_end)
 95 |         prot_re = re.compile(
 96 |             '\N{SUB}+|'
 97 |             + re.escape(prot_start)
 98 |             + '(.*?)'
 99 |             + re.escape(prot_end)
100 |         )
101 |         def subst(match):
102 |             nonlocal tokens
103 |             token = match.group()
104 |             tokens += [token]
105 |             n = len(token)
106 |             if match.group(1) is not None:
107 |                 n -= 2
108 |             return '\N{SUB}' * n
109 |         line = re.sub(prot_re, subst, line)
110 |     lines = textwrap.wrap(line,
111 |         width=columns,
112 |         initial_indent=indent,
113 |         subsequent_indent=indent,
114 |         break_long_words=False,
115 |     )
116 |     lines = str.join('\n', lines)
117 |     if tokens:
118 |         tokens.reverse()
119 |         def unsubst(match):
120 |             del match
121 |             return tokens.pop()
122 |         lines = re.sub('\N{SUB}+', unsubst, lines)
123 |     assert not tokens
124 |     return lines
125 | 
126 | __all__ = [
127 |     'Symbol',
128 |     'columns',
129 |     'isolate_bidi',
130 |     'symbols',
131 |     'wrap_text',
132 | ]
133 | 
134 | # vim:ts=4 sts=4 sw=4 et
135 | 


--------------------------------------------------------------------------------
/lib/html.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | '''
  5 | HTML parsing
  6 | '''
  7 | 
  8 | import html.parser
  9 | import re
 10 | import sys
 11 | 
 12 | import lib.text
 13 | 
 14 | class HTMLParser(html.parser.HTMLParser):
 15 | 
 16 |     def __init__(self):
 17 |         super().__init__()
 18 |         class state:
 19 |             paras = []
 20 |             text = ''
 21 |             a_text = ''
 22 |             a_href = None
 23 |             a_depth = 0
 24 |             footnotes = {}
 25 |         self.z_state = state
 26 | 
 27 |     # FIXME: Add proper support for:
 28 |     # * <ol>, <ul>, <li>
 29 |     # * <blockquote>
 30 |     # * <pre>
 31 | 
 32 |     def handle_starttag(self, tag, attrs):
 33 |         st = self.z_state
 34 |         if tag in {'p', 'ol', 'ul', 'blockquote', 'pre'}:
 35 |             while st.a_depth > 0:
 36 |                 self.handle_endtag('a')
 37 |             if st.text:
 38 |                 st.paras += [st.text]
 39 |                 st.text = ''
 40 |             return
 41 |         if tag == 'br':
 42 |             if st.a_depth > 0:
 43 |                 st.a_text += ' '
 44 |             else:
 45 |                 st.text += '\n'
 46 |             return
 47 |         if tag == 'a':
 48 |             if st.a_depth == 0:
 49 |                 href = dict(attrs).get('href', '')
 50 |                 # Let's normalize the URL somewhat,
 51 |                 # as per <https://url.spec.whatwg.org/#concept-basic-url-parser>.
 52 |                 href = re.sub(r'\A[\0-\40]+|[\0-\40]+\Z|[\n\t]+', '', href)
 53 |                 st.a_href = href
 54 |             st.a_depth += 1
 55 |             return
 56 | 
 57 |     def handle_endtag(self, tag):
 58 |         st = self.z_state
 59 |         if tag == 'a':
 60 |             if st.a_depth > 0:
 61 |                 st.a_depth -= 1
 62 |             if st.a_depth == 0:
 63 |                 text = st.a_text
 64 |                 href = st.a_href
 65 |                 if re.fullmatch(r'#[\w_]+|@[\w_.-]+(@[\w.-]+)?', text) and st.footnotes.get(text, href) == href:
 66 |                     # The above should be close enough to Mastodon's own regexps:
 67 |                     # + HASHTAG_RE in <app/models/tag.rb>;
 68 |                     # + MENTION_RE in <app/models/account.rb>.
 69 |                     assert '\n' not in text
 70 |                     st.text += f'\N{STX}{text}\N{ETX}'
 71 |                     st.footnotes[text] = href
 72 |                 else:
 73 |                     if href in {text, f'http://{text}', f'https://{text}'}:
 74 |                         text = ''
 75 |                     else:
 76 |                         text = f'[{text}]'
 77 |                     assert '\n' not in text
 78 |                     st.text += f'{text}<\N{STX}{href}\N{ETX}>'
 79 |                 st.a_href = ''
 80 |                 st.a_text = ''
 81 |             return
 82 |         if tag == 'li':
 83 |             if st.a_depth > 0:
 84 |                 st.a_text += ' '
 85 |             else:
 86 |                 st.text += '\n'
 87 |             return
 88 | 
 89 |     def handle_data(self, data):
 90 |         st = self.z_state
 91 |         data = re.sub('[\N{STX}\N{ETX}]', '\N{REPLACEMENT CHARACTER}', data)
 92 |         data = re.sub(r'[^\S\N{NBSP}\N{NARROW NO-BREAK SPACE}]+', ' ', data)
 93 |         if st.a_depth > 0:
 94 |             st.a_text += data
 95 |         else:
 96 |             st.text += data
 97 | 
 98 |     def close(self):
 99 |         super().close()
100 |         self.handle_starttag('p', {})
101 | 
102 |     if sys.version_info < (3, 10):
103 |         def error(self, message):
104 |             # hopefully not reachable
105 |             raise RuntimeError(message)
106 | 
107 | def fmt_html(data, *, fmt_url=str):
108 |     parser = HTMLParser()
109 |     parser.feed(data)
110 |     parser.close()
111 |     lines = []
112 |     for para in parser.z_state.paras:
113 |         lines += lib.text.wrap_text(para, protect='\N{STX}\N{ETX}')
114 |         lines += ['']
115 |     text = str.join('\n', lines)
116 |     def repl(match):
117 |         [url] = match.groups()
118 |         return fmt_url(url)
119 |     text = re.sub('\N{STX}(.*?)\N{ETX}', repl, text, flags=re.DOTALL)
120 |     lines = [text]
121 |     link_symbol = lib.text.symbols.link
122 |     if parser.z_state.footnotes:
123 |         for footnote, url in parser.z_state.footnotes.items():
124 |             url = fmt_url(url)
125 |             lines += [f'{link_symbol} {footnote}: {url}']
126 |     return str.join('\n', lines)
127 | 
128 | __all__ = ['fmt_html']
129 | 
130 | # vim:ts=4 sts=4 sw=4 et
131 | 


--------------------------------------------------------------------------------
/lib/www.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | '''
  5 | HTTP client
  6 | '''
  7 | 
  8 | import errno
  9 | import functools
 10 | import gzip
 11 | import http.client
 12 | import json
 13 | import re
 14 | import socket
 15 | import ssl
 16 | import sys
 17 | import urllib.parse
 18 | import urllib.request
 19 | 
 20 | from lib.utils import (
 21 |     Dict,
 22 | )
 23 | 
 24 | def _fmt_url_error(exc):
 25 |     if isinstance(exc, urllib.error.HTTPError):
 26 |         return str(exc)
 27 |     exc = exc.reason
 28 |     if isinstance(exc, socket.gaierror):
 29 |         for key, value in vars(socket).items():
 30 |             if key[:4] == 'EAI_' and value == exc.errno:
 31 |                 return f'[{key}] {exc.strerror}'
 32 |     if isinstance(exc, ssl.SSLError):
 33 |         pass
 34 |     elif isinstance(exc, OSError):
 35 |         try:
 36 |             ec = errno.errorcode[exc.errno]
 37 |         except LookupError:
 38 |             pass
 39 |         else:
 40 |             return f'[{ec}] {exc.strerror}'
 41 |     return str(exc)
 42 | 
 43 | class URLError(RuntimeError):
 44 | 
 45 |     def __init__(self, url, reason):
 46 |         self.url = url
 47 |         self.reason = reason
 48 | 
 49 |     def __str__(self):
 50 |         reason = self.reason
 51 |         if isinstance(reason, Exception):
 52 |             reason = _fmt_url_error(reason)
 53 |         return reason
 54 | 
 55 | class UserAgent:
 56 | 
 57 |     headers = {
 58 |         'User-Agent': 'zygolophodon (https://github.com/jwilk/zygolophodon)',
 59 |         'Accept-Encoding': 'gzip',
 60 |     }
 61 | 
 62 |     @classmethod
 63 |     def _build_opener(cls):
 64 |         handlers = ()
 65 |         if sys.version_info < (3, 13):
 66 |             # Work-around for <https://github.com/python/cpython/issues/99352>
 67 |             # ("urllib.request.urlopen() no longer respects the
 68 |             # http.client.HTTPConnection.debuglevel").
 69 |             handlers = [
 70 |                 Handler(debuglevel=http.client.HTTPConnection.debuglevel)
 71 |                 for Handler in [urllib.request.HTTPHandler, urllib.request.HTTPSHandler]
 72 |             ]
 73 |         opener = urllib.request.build_opener(*handlers)
 74 |         opener.addheaders[:] = cls.headers.items()
 75 |         return opener
 76 | 
 77 |     @classmethod
 78 |     def get(cls, url):
 79 |         request = urllib.request.Request(url)
 80 |         opener = cls._build_opener()
 81 |         try:
 82 |             response = opener.open(request)
 83 |         except urllib.error.HTTPError as exc:
 84 |             if Response.is_json(exc):
 85 |                 response = Response(exc, url=url)
 86 |                 try:
 87 |                     data = json.loads(response.data, object_hook=Dict)
 88 |                 except (json.JSONDecodeError, UnicodeError):
 89 |                     pass
 90 |                 else:
 91 |                     cls.handle_json_error(exc, data)
 92 |             raise URLError(url, exc) from exc
 93 |         except urllib.error.URLError as exc:
 94 |             raise URLError(url, exc) from exc
 95 |         return Response(response, url=url)
 96 | 
 97 |     @classmethod
 98 |     def handle_json_error(cls, exc, data):
 99 |         del exc, data
100 | 
101 | class Response:
102 | 
103 |     def __init__(self, response, *, url):
104 |         with response:
105 |             content_encoding = response.getheader('Content-Encoding', 'identity')
106 |             data = response.read()
107 |         if content_encoding == 'gzip':
108 |             data = gzip.decompress(data)
109 |         elif content_encoding == 'identity':
110 |             pass
111 |         else:
112 |             msg = f'unexpected Content-Encoding: {content_encoding!r}'
113 |             raise URLError(url, msg)
114 |         self.data = data
115 |         self.headers = response.headers
116 |         self.url = url
117 | 
118 |     def is_json(self):
119 |         ct = self.headers.get('Content-Type', '')
120 |         match = re.match(r'application/json(;|\Z)', ct)
121 |         return bool(match)
122 | 
123 |     @property
124 |     def json(self):
125 |         if not self.is_json():
126 |             msg = 'error: non-JSON content'
127 |             raise URLError(self.url, msg)
128 |         try:
129 |             data = json.loads(self.data, object_hook=Dict)
130 |         except (json.JSONDecodeError, UnicodeError) as exc:
131 |             msg = f'JSON decoding error: {exc}'
132 |             raise URLError(self.url, msg) from exc
133 |         return data
134 | 
135 |     @property
136 |     def links(self):
137 |         s = self.headers.get('Link', '')
138 |         data = {}
139 |         regexp = re.compile(r'<([^>]+)>; rel="(\w+)"(?:, |\Z)')
140 |         i = 0
141 |         while i < len(s):
142 |             match = regexp.match(s, i)
143 |             if not match:
144 |                 raise URLError(self.url, f'cannot parse Link header field: {s!r}')
145 |             (value, key) = match.groups()
146 |             data[key] = value
147 |             i = match.end()
148 |         return data
149 | 
150 | urlquote = functools.partial(urllib.parse.quote, safe='')
151 | 
152 | __all__ = [
153 |     'URLError',
154 |     'UserAgent',
155 |     'Response',
156 |     'urlquote',
157 | ]
158 | 
159 | # vim:ts=4 sts=4 sw=4 et
160 | 


--------------------------------------------------------------------------------
/lib/inst.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | '''
  5 | microblogging instances
  6 | '''
  7 | 
  8 | import abc
  9 | import re
 10 | import types
 11 | import urllib.parse
 12 | 
 13 | from lib.utils import (
 14 |     Dict,
 15 |     abstractattribute,
 16 |     expand_template,
 17 | )
 18 | 
 19 | from lib.www import urlquote
 20 | 
 21 | class Instance(abc.ABC):
 22 | 
 23 |     types = []
 24 | 
 25 |     tag_url_template = abstractattribute()
 26 | 
 27 |     post_id_regexp = abstractattribute()
 28 | 
 29 |     addr_parser = abstractattribute()
 30 | 
 31 |     def __init__(self, url, data=None):
 32 |         self.url = url
 33 |         self.data = data
 34 | 
 35 |     @classmethod
 36 |     def parse_addr(cls, addr):
 37 |         match = cls.addr_parser.parse(addr)  # pylint: disable=no-member
 38 |         if not match:
 39 |             return None
 40 |         if match.user:
 41 |             match.user = urllib.parse.unquote(match.user)
 42 |         if match.tag:
 43 |             match.tag = urllib.parse.unquote(match.tag)
 44 |         match.url = f'https://{match.domain}'
 45 |         del match.domain
 46 |         return match
 47 | 
 48 |     @classmethod
 49 |     def connect(cls, url):
 50 |         return cls(url)
 51 | 
 52 |     @abc.abstractmethod
 53 |     def fetch_user_by_name(self, name):
 54 |         pass
 55 | 
 56 |     @abc.abstractmethod
 57 |     def fetch_user_posts(self, user, *, limit, **params):
 58 |         pass
 59 | 
 60 |     @abc.abstractmethod
 61 |     def fetch_tag_posts(self, tag_name, *, limit, **params):
 62 |         pass
 63 | 
 64 |     @abc.abstractmethod
 65 |     def fetch_post(self, post_id):
 66 |         pass
 67 | 
 68 |     @abc.abstractmethod
 69 |     def fetch_post_context(self, post_id, *, ancestors=True, descendants=True):
 70 |         pass
 71 | 
 72 |     def get_tag_url(self, tag_name):
 73 |         template = self.tag_url_template
 74 |         if template is None:
 75 |             return None
 76 |         q_tag = urlquote(tag_name)
 77 |         path = expand_template(template, tag=q_tag)
 78 |         return f'{self.url}{path}'
 79 | 
 80 |     def fetch_tag_info(self, tag_name):
 81 |         return Dict(
 82 |             url=self.get_tag_url(tag_name),
 83 |             history=None,
 84 |         )
 85 | 
 86 |     @classmethod
 87 |     def register(cls, instance_type):
 88 |         cls.types += [instance_type]
 89 |         return instance_type
 90 | 
 91 | class AddrParser:
 92 | 
 93 |     _groups = set()
 94 | 
 95 |     def __init__(self, *templates, discard_prefixes=()):
 96 |         self._discard_prefixes = discard_prefixes
 97 |         self._raw_templates = templates
 98 |         # These are set later by __set_name__(),
 99 |         # when assigned to an Instance subclass:
100 |         self.templates = ...
101 |         self._post_id_regexp = ...
102 |         self._regexps = ...
103 | 
104 |     def __set_name__(self, inst_type, _attr_name):
105 |         self._post_id_regexp = inst_type.post_id_regexp
106 |         self._regexps = []
107 |         self.templates = []
108 |         for template in self._raw_templates:
109 |             if template[0] == '/':
110 |                 template = f'https://DOMAIN{template}'
111 |             self._add_template(template)
112 |         del self._discard_prefixes
113 |         del self._raw_templates
114 | 
115 |     def _add_template(self, template):
116 |         self.templates += [template]
117 |         group2regexp = dict(
118 |             domain=r'[^@/?#\0-\40]+',
119 |             user=r'[^/?#\0-\40]+',
120 |             # FIXME? This is much more lax that USERNAME_RE in <app/models/account.rb>
121 |             tag=r'[^/?#\0-\40]+',
122 |             ident=self._post_id_regexp,
123 |         )
124 |         discard = self._discard_prefixes
125 |         def repl_punct(match):
126 |             s = match.group()
127 |             try:
128 |                 if s != '.' and re.fullmatch(s, s):
129 |                     return s
130 |             except re.error:
131 |                 pass
132 |             return re.escape(s)
133 |         template = re.sub(r'\W', repl_punct, template)
134 |         if discard:
135 |             discard_re = str.join('|', map(re.escape, discard))
136 |             discard_re = f'(?:{discard_re})'
137 |             template = template.replace('/DOMAIN/', f'/DOMAIN/(?:{discard_re}/)*')
138 |         def repl_ident(match):
139 |             s = match.group()
140 |             if match.start() == 0 and s == 'https':
141 |                 return s
142 |             if s.isupper():
143 |                 group = s
144 |                 if group == 'NNNNNN':
145 |                     group = 'IDENT'
146 |                 regexp = group2regexp[group.lower()]
147 |             else:
148 |                 group = s
149 |                 regexp = re.escape(s)
150 |             self._groups.add(group.lower())
151 |             return f'(?P<{group}>{regexp})'
152 |         regexp = re.sub(r'(?<![:|\w])\w+', repl_ident, template)
153 |         regexp = re.compile(regexp)
154 |         self._regexps += [regexp]
155 | 
156 |     def parse(self, url):
157 |         for regexp in self._regexps:
158 |             if match := re.fullmatch(regexp, url):
159 |                 break
160 |         else:
161 |             return None
162 |         data = dict.fromkeys(self._groups)
163 |         data.update(
164 |             (group, value)
165 |             for group, value in match.groupdict().items()
166 |             if not group.isupper()
167 |         )
168 |         data.update(
169 |             (group.lower(), value)
170 |             for group, value in match.groupdict().items()
171 |             if group.isupper()
172 |         )
173 |         return types.SimpleNamespace(**data)
174 | 
175 | def parse_addr(addr):
176 |     for instance_type in Instance.types:
177 |         if match := instance_type.parse_addr(addr):
178 |             match.instance_type = instance_type
179 |             return match
180 |     return None
181 | 
182 | __all__ = [
183 |     'AddrParser',
184 |     'Instance',
185 |     'parse_addr',
186 | ]
187 | 
188 | # vim:ts=4 sts=4 sw=4 et
189 | 


--------------------------------------------------------------------------------
/lib/cli.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | '''
  5 | zygolophodon CLI
  6 | '''
  7 | 
  8 | import argparse
  9 | import functools
 10 | import http.client
 11 | import os
 12 | import re
 13 | import signal
 14 | import sys
 15 | import types
 16 | import urllib.parse
 17 | 
 18 | import lib.compat
 19 | import lib.html
 20 | import lib.inst
 21 | import lib.stdout
 22 | import lib.text
 23 | import lib.utils
 24 | import lib.www
 25 | 
 26 | import lib.mastodon
 27 | import lib.bluesky
 28 | 
 29 | __version__ = '0.1'
 30 | 
 31 | prog = argparse.ArgumentParser().prog
 32 | 
 33 | def fatal(msg):
 34 |     print(f'{prog}: {msg}', file=sys.stderr)
 35 |     sys.exit(1)
 36 | 
 37 | def fmt_url(url):
 38 |     if sys.stdout.isatty():
 39 |         return re.sub('(.)', r'_\b\1', url)
 40 |     return url
 41 | 
 42 | def fmt_user(account):
 43 |     name = lib.text.isolate_bidi(account.display_name)
 44 |     return f'{name} <{fmt_url(account.url)}>'.lstrip()
 45 | 
 46 | def fmt_date(d):
 47 |     d = lib.compat.datetime_fromisoformat(d)
 48 |     d = d.replace(microsecond=0)
 49 |     d = str(d)
 50 |     d = re.sub('[+]00:00$', 'Z', d)
 51 |     return d
 52 | 
 53 | fmt_html = functools.partial(lib.html.fmt_html, fmt_url=fmt_url)
 54 | 
 55 | class VersionAction(argparse.Action):
 56 |     '''
 57 |     argparse --version action
 58 |     '''
 59 | 
 60 |     def __init__(self, option_strings, dest=argparse.SUPPRESS):
 61 |         super().__init__(
 62 |             option_strings=option_strings,
 63 |             dest=dest,
 64 |             nargs=0,
 65 |             help='show version information and exit'
 66 |         )
 67 | 
 68 |     def __call__(self, parser, namespace, values, option_string=None):
 69 |         del namespace, values, option_string
 70 |         print(f'{parser.prog} {__version__}')
 71 |         print('+ Python {0}.{1}.{2}'.format(*sys.version_info))  # pylint: disable=consider-using-f-string
 72 |         parser.exit()
 73 | 
 74 | def pint(s):
 75 |     n = int(s)
 76 |     if n > 0:
 77 |         return n
 78 |     raise ValueError
 79 | pint.__name__ = 'positive int'
 80 | 
 81 | def xmain():
 82 |     ap = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
 83 |     if sys.version_info < (3, 10):
 84 |         # https://bugs.python.org/issue9694
 85 |         ap._optionals.title = 'options'  # pylint: disable=protected-access
 86 |     ap.add_argument('--version', action=VersionAction)
 87 |     default_limit = 40
 88 |     ap.add_argument('--limit', metavar='N', type=pint, default=default_limit,
 89 |         help=f'request at most N posts (default: {default_limit})'
 90 |     )
 91 |     ap.add_argument('--with-ancestors', action='store_true',
 92 |         help='show also post ancestors'
 93 |     )
 94 |     ap.add_argument('--debug-http', action='store_true', help=argparse.SUPPRESS)
 95 |     addr_help = []
 96 |     for instance_type in lib.inst.Instance.types:
 97 |         for template in instance_type.addr_parser.templates:
 98 |             line = template
 99 |             if instance_type is not lib.mastodon.Mastodon:
100 |                 line += f' ({instance_type.__name__})'
101 |             addr_help += [line]
102 |     addr_help = str.join('\n', addr_help)
103 |     ap.add_argument('addr', metavar='ADDRESS', help=addr_help)
104 |     opts = ap.parse_args()
105 |     if opts.debug_http:
106 |         http.client.HTTPConnection.debuglevel = 1
107 |     addr = opts.addr
108 |     if '/' in addr:
109 |         # strip URL fragment
110 |         addr, _ = urllib.parse.urldefrag(addr)
111 |     if not (match := lib.inst.parse_addr(addr)):
112 |         ap.error('unsupported address')
113 |     sys.stdout.flush()
114 |     with lib.stdout.install():
115 |         instance = match.instance_type.connect(match.url)
116 |         if match.tag:
117 |             process_tag(instance, match.tag,
118 |                 limit=opts.limit,
119 |             )
120 |         elif not match.ident:
121 |             process_user(instance, match.user,
122 |                 replies=bool(match.with_replies),
123 |                 media=bool(match.media),
124 |                 limit=opts.limit,
125 |             )
126 |         else:
127 |             with_context = opts.limit > 1 and not match.embed
128 |             process_post(instance, post_id=match.ident,
129 |                 with_replies=with_context,
130 |                 with_ancestors=(with_context and opts.with_ancestors),
131 |             )
132 | 
133 | def plural(i, noun):
134 |     if i != 1:
135 |         noun += 's'
136 |     return f'{i} {noun}'
137 | 
138 | def process_tag(instance, tag_name, *, limit):
139 |     info = instance.fetch_tag_info(tag_name)
140 |     if info.url:
141 |         print('Location:', fmt_url(info.url))
142 |     else:
143 |         print('Location:', f'(cannot generate URL for tag {tag_name!r})')
144 |     history = info.history
145 |     if history:
146 |         n_posts = sum(int(entry.uses) for entry in history)
147 |         n_users = sum(int(entry.accounts) for entry in history)
148 |         n_posts_today = history[0].uses
149 |         n_days = len(history)
150 |         s_days = plural(n_days, 'day')
151 |         print('Statistics:', f'(last {s_days})')
152 |         print(' ', plural(n_posts, 'post'))
153 |         if n_users > 0:
154 |             print(' ', plural(n_users, 'user'))
155 |         if n_posts > 0:
156 |             print(' ', plural(n_posts_today, 'post'), 'today')
157 |     posts = instance.fetch_tag_posts(tag_name, limit=limit)
158 |     print_posts(posts, separators='=- ')
159 | 
160 | def process_user(instance, username, *, replies=False, media=False, limit):
161 |     user = instance.fetch_user_by_name(username)
162 |     print('User:', fmt_user(user))
163 |     if user.note:
164 |         print()
165 |         print(fmt_html(user.note))
166 |     seen = set()
167 |     if not (media or replies):
168 |         posts = instance.fetch_user_posts(user, limit=limit, pinned=True)
169 |         def gen_posts():
170 |             for post in posts:
171 |                 if not post.pinned:
172 |                     # Snac's Mastodon API yields all posts when we asked for pinned ones:
173 |                     # https://codeberg.org/grunfink/snac2/issues/335
174 |                     # Let's filter out non-pinned posts.
175 |                     continue
176 |                 yield post
177 |                 seen.add(post.id)
178 |         n = print_posts(gen_posts(), separators='=- ')
179 |         if n >= limit:
180 |             limit = 0
181 |     params = types.SimpleNamespace()
182 |     if media:
183 |         params.only_media=True
184 |     else:
185 |         params.exclude_replies = not replies
186 |     posts = instance.fetch_user_posts(user, limit=limit, **vars(params))
187 |     # Filter out posts that were already printed as pinned:
188 |     posts = (post for post in posts if post.id not in seen)
189 |     print_posts(posts, separators='=- ')
190 | 
191 | def process_post(instance, post_id, *, with_replies=True, with_ancestors=False):
192 |     post = instance.fetch_post(post_id)
193 |     @functools.cache
194 |     def get_context():
195 |         return instance.fetch_post_context(post_id,
196 |             ancestors=with_ancestors,
197 |             descendants=with_replies,
198 |         )
199 |     if with_ancestors:
200 |         context = get_context()
201 |         print_posts(context.ancestors, hide_in_reply_to=True, separators=' -=')
202 |     print_post(post, hide_in_reply_to=with_ancestors)
203 |     if with_replies:
204 |         context = get_context()
205 |         print_posts(context.descendants, hide_in_reply_to=True, separators='=- ')
206 | 
207 | def print_separator(ch):
208 |     print()
209 |     print(ch * lib.text.columns)
210 |     print()
211 | 
212 | def print_posts(posts, *, hide_in_reply_to=False, separators='-- '):
213 |     def print_sep(i):
214 |         ch = separators[i]
215 |         if ch.isspace():
216 |             return
217 |         print_separator(ch)
218 |     n = 0
219 |     for n, post in enumerate(posts, start=1):
220 |         print_sep(n > 1)
221 |         print_post(post, hide_in_reply_to=hide_in_reply_to)
222 |     if n > 0:
223 |         print_sep(-1)
224 |     return n
225 | 
226 | def normalize_lang(lang):
227 |     if lang is None:
228 |         return 'en'
229 |     if lang.startswith('en-'):
230 |         return 'en'
231 |     return lang
232 | 
233 | def print_post(post, *, hide_in_reply_to=False):
234 |     if post.location:
235 |         print('Location:', fmt_url(post.location))
236 |     url = post.url or post.uri
237 |     if url and url != post.location:
238 |         print('Origin:', fmt_url(url))
239 |     if post.in_reply_to_id and not hide_in_reply_to:
240 |         if post.in_reply_to_url:
241 |             print('In-Reply-To:', fmt_url(post.in_reply_to_url))
242 |         else:
243 |             print('In-Reply-To:', f'(cannot generate URL for post id {post.in_reply_to_id})')
244 |     if post.pinned:
245 |         pinned = post.pinned
246 |         pin_comment = []
247 |         if isinstance(pinned, str):
248 |             pin_comment = fmt_date(pinned)
249 |             pin_comment = [f'({pin_comment})']
250 |         print('Pinned: yes', *pin_comment)
251 |     if post.account:
252 |         # FIXME in Pleroma?
253 |         # Why is the account information missing
254 |         # for some reblogged posts?
255 |         print('From:', fmt_user(post.account))
256 |     date_comment = []
257 |     if post.edited_at:
258 |         date_comment = 'edited ' + fmt_date(post.edited_at)
259 |         date_comment = [f'({date_comment})']
260 |     print('Date:', fmt_date(post.created_at), *date_comment)
261 |     if normalize_lang(post.language) != 'en':
262 |         print('Language:', post.language)
263 |     if post.reblog:
264 |         print('Reblog: yes')
265 |     print()
266 |     if post.reblog:
267 |         print_post(post.reblog)
268 |     else:
269 |         text = fmt_html(post.content)
270 |         print(text)
271 |     print()
272 |     paperclip = lib.text.symbols.paperclip
273 |     for att in post.media_attachments or ():
274 |         # TODO? Render the images with chafa?
275 |         print(paperclip, fmt_url(att.url))
276 |         print()
277 |         text = att.description or ''
278 |         indent = ' ' * (1 + paperclip.width)
279 |         text = lib.text.wrap_text(text, indent=indent)
280 |         for line in text:
281 |             print(line)
282 |         print()
283 | 
284 | def main():
285 |     try:
286 |         xmain()
287 |     except lib.www.URLError as exc:
288 |         fatal(f'<{exc.url}>: {exc}')
289 |     except BrokenPipeError:
290 |         signal.signal(signal.SIGPIPE, signal.SIG_DFL)
291 |         os.kill(os.getpid(), signal.SIGPIPE)
292 |         raise
293 | 
294 | __all__ = ['main']
295 | 
296 | # vim:ts=4 sts=4 sw=4 et
297 | 


--------------------------------------------------------------------------------
/lib/mastodon.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2022-2025 Jakub Wilk <jwilk@jwilk.net>
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | '''
  5 | Mastodon (and Mastodon-like) instances
  6 | '''
  7 | 
  8 | import abc
  9 | import functools
 10 | import re
 11 | import urllib.parse
 12 | 
 13 | import lib.www
 14 | 
 15 | from lib.inst import (
 16 |     AddrParser,
 17 |     Instance,
 18 | )
 19 | 
 20 | from lib.utils import (
 21 |     Dict,
 22 |     abstractattribute,
 23 |     expand_template,
 24 | )
 25 | 
 26 | urlquote = lib.www.urlquote
 27 | 
 28 | class UserAgent(lib.www.UserAgent):
 29 | 
 30 |     @classmethod
 31 |     def handle_json_error(cls, exc, data):
 32 |         try:
 33 |             msg = data.error
 34 |         except KeyError:
 35 |             return
 36 |         assert exc.msg
 37 |         exc.msg = msg
 38 | 
 39 | class Mastodonoid(Instance):
 40 | 
 41 |     post_url_template = abstractattribute()
 42 | 
 43 |     @classmethod
 44 |     @abc.abstractmethod
 45 |     def identify(cls, data):
 46 |         pass
 47 | 
 48 |     @functools.cached_property
 49 |     def api_version(self):
 50 |         match = re.match('([0-9]+([.][0-9]+)*)', self.data.version)
 51 |         version = match.group()
 52 |         version = version.split('.')
 53 |         return tuple(int(x) for x in version)
 54 | 
 55 |     @classmethod
 56 |     def connect(cls, url):
 57 |         # https://docs.joinmastodon.org/methods/instance/#v1
 58 |         # available since Mastodon v1.1
 59 |         #
 60 |         # FIXME? v1 is deprecated, but OTOH Mastodon before v4.0
 61 |         # and some non-Mastodon instances don't support v2.
 62 |         #
 63 |         # TODO? Use NodeInfo <https://nodeinfo.diaspora.software/>
 64 |         # for identification?
 65 |         # But it's only available since Mastodon 3.0.
 66 |         instance = Mastodon(url, None)
 67 |         data = instance._fetch('instance')  # pylint: disable=protected-access
 68 |         inst_types = [
 69 |             inst for inst in Instance.types
 70 |             if issubclass(inst, Mastodonoid)
 71 |         ]
 72 |         inst_types.sort(
 73 |             key=(lambda t: t.identify(data)),
 74 |             reverse=True,
 75 |         )
 76 |         inst_type = inst_types[0]
 77 |         return inst_type(url, data)
 78 | 
 79 |     def _api_url(self, url):
 80 |         return f'{self.url}/api/v1/{url}'
 81 | 
 82 |     def _fetch(self, url):
 83 |         url = self._api_url(url)
 84 |         return UserAgent.get(url).json
 85 | 
 86 |     def fetch_user_by_name(self, name):
 87 |         # https://docs.joinmastodon.org/methods/accounts/#lookup
 88 |         # available since:
 89 |         # - Mastodon v3.4
 90 |         # - Pleroma v2.5
 91 |         # - Akkoma v2.5
 92 |         q_name = urlquote(name)
 93 |         return self._fetch(f'accounts/lookup?acct={q_name}')
 94 | 
 95 |     def _fetch_posts(self, url, *, limit, **params):
 96 |         url = self._api_url(url)
 97 |         page_limit = 40  # maximum allowed
 98 |         pinned = params.get('pinned', False)
 99 |         params['limit'] = min(limit, page_limit)
100 |         q_params = urllib.parse.urlencode(params).lower()
101 |         url += f'?{q_params}'
102 |         while limit > 0:
103 |             response = UserAgent.get(url)
104 |             posts = response.json
105 |             self.fix_posts(posts)
106 |             for post in posts:
107 |                 if post.pinned is None:
108 |                     post.pinned = pinned
109 |             yield from posts
110 |             limit -= len(posts)
111 |             next_url = response.links.get('next')
112 |             if next_url is None:
113 |                 break
114 |             if not url.startswith(self._api_url('')):
115 |                 msg = f'suspicious Link URL: {next_url!r}'
116 |                 raise RuntimeError(msg)
117 |             url = re.sub(
118 |                 r'(?<=[?&]limit=)\d+(?=&|\Z)',
119 |                 str(min(limit, page_limit)),
120 |                 next_url
121 |             )
122 | 
123 |     def fetch_user_posts(self, user, *, limit, **params):
124 |         # https://docs.joinmastodon.org/methods/accounts/#statuses
125 |         # available since Mastodon v2.7
126 |         url = f'accounts/{user.id}/statuses'
127 |         return self._fetch_posts(url, limit=limit, **params)
128 | 
129 |     def fetch_tag_posts(self, tag_name, *, limit, **params):
130 |         # https://docs.joinmastodon.org/methods/timelines/#tag
131 |         # available since Mastodon v0.1
132 |         q_tag = urlquote(tag_name)
133 |         url = f'timelines/tag/{q_tag}'
134 |         return self._fetch_posts(url, limit=limit, **params)
135 | 
136 |     def fetch_post(self, post_id):
137 |         # https://docs.joinmastodon.org/methods/statuses/#get
138 |         # available since Mastodon v2.7
139 |         post = self._fetch(f'statuses/{post_id}')
140 |         self.fix_post(post)
141 |         return post
142 | 
143 |     def fetch_post_context(self, post_id, *, ancestors=True, descendants=True):
144 |         # https://docs.joinmastodon.org/methods/statuses/#context
145 |         # available since Mastodon v0.1
146 |         if not (ancestors or descendants):
147 |             # shortcut:
148 |             return Dict(ancestors=None, descendants=None)
149 |         context = self._fetch(f'statuses/{post_id}/context')
150 |         if ancestors:
151 |             self.fix_posts(context.ancestors)
152 |         else:
153 |             context.ancestors = None
154 |         if descendants:
155 |             self.fix_posts(context.descendants)
156 |         else:
157 |             context.descendants = None
158 |         return context
159 | 
160 |     def get_post_url(self, *, post_id):
161 |         template = self.post_url_template
162 |         if template is None:
163 |             return None
164 |         path = expand_template(template, ident=post_id)
165 |         return f'{self.url}{path}'
166 | 
167 |     def get_fixed_post_url(self, url):
168 |         return url
169 | 
170 |     def fix_post(self, post):
171 |         irt_url = None
172 |         if post.in_reply_to_id:
173 |             irt_url = self.get_post_url(post_id=post.in_reply_to_id)
174 |         post.in_reply_to_url = irt_url
175 |         try:
176 |             post.edited_at
177 |         except KeyError:
178 |             # * In Mastodon, the attribute is available only since v3.5.0.
179 |             # * FIXME in Pleroma?
180 |             #   Why is the attribute missing for reblogs?
181 |             post.edited_at = None
182 |         if post.reblog:
183 |             self.fix_post(post.reblog)
184 |             if post.url == post.reblog.uri:
185 |                 # FIXME in Pleroma?
186 |                 # Why is the URL unhelpful?
187 |                 post.url = self.get_post_url(post_id=post.id)
188 |             if post.uri == post.reblog.uri:
189 |                 post.uri = None
190 |         post.url = self.get_fixed_post_url(post.url)
191 |         try:
192 |             post.pinned
193 |         except KeyError:
194 |             post.pinned = None
195 |         if post.url and post.url.startswith(f'{self.url}/'):
196 |             post.location = post.url
197 |         else:
198 |             post.location = self.get_post_url(post_id=post.id)
199 | 
200 |     def fix_posts(self, posts):
201 |         for post in posts:
202 |             self.fix_post(post)
203 | 
204 |     def fetch_tag_info(self, tag_name):
205 |         # https://docs.joinmastodon.org/methods/tags/#get
206 |         # available since Mastodon v4.0
207 |         if self.api_version < (4, 0):
208 |             return Instance.fetch_tag_info(self, tag_name)
209 |         q_tag = urlquote(tag_name)
210 |         url = f'tags/{q_tag}'
211 |         return self._fetch(url)
212 | 
213 | @Instance.register
214 | class Mastodon(Mastodonoid):
215 | 
216 |     # Codebase: https://github.com/mastodon/mastodon
217 | 
218 |     tag_url_template = '/tags/TAG'
219 | 
220 |     post_url_template = '/statuses/IDENT'
221 | 
222 |     post_id_regexp = '[0-9]{1,18}'
223 |     # Source: lib/mastodon/snowflake.rb
224 |     #
225 |     # Identifiers are decimal integers:
226 |     #
227 |     #    n = (t << 16) + r
228 |     #
229 |     # where
230 |     #
231 |     #    t is milliseconds since 1970;
232 |     #    r are randomish lower bits.
233 |     #
234 |     # In practice, it's always
235 |     # either 17 digits (until 2018)
236 |     #     or 18 digits (2018-2453).
237 |     #
238 |     # $ export TZ=UTC0
239 |     # $ qalc -t '"1970-01-01" + ((10 ** 16) >> 16) ms'
240 |     # "1974-11-02T01:31:27"
241 |     # $ qalc -t '"1970-01-01" + ((10 ** 17) >> 16) ms'
242 |     # "2018-05-09T15:14:39"
243 |     # $ qalc -t '"1970-01-01" + ((10 ** 18) >> 16) ms'
244 |     # "2453-07-13T08:30:35"
245 |     #
246 |     # However, before Mastodon v2.0,
247 |     # identifiers were sequential 64-bit(?) integers:
248 |     # https://github.com/mastodon/mastodon/commit/468523f4ad85f99d
249 | 
250 |     addr_parser = AddrParser(
251 |         # mail-like
252 |         '@USER@DOMAIN',
253 |         'USER@DOMAIN',
254 |         # user
255 |         '/@USER',
256 |         '/@USER/media',
257 |         '/@USER/with_replies',
258 |         # post
259 |         '/@USER/NNNNNN',
260 |         '/@USER/NNNNNN/embed',
261 |         # tag
262 |         '/tags/TAG',
263 |         # legacy user-less post
264 |         '/statuses/NNNNNN',
265 |         # offsite redirect pages
266 |         '/redirect/statuses/NNNNNN',
267 |         # URI->URL redirects
268 |         '/users/USER',
269 |         '/users/USER/statuses/NNNNNN',
270 |         #
271 |         discard_prefixes={'deck', 'web'},
272 |     )
273 | 
274 |     @classmethod
275 |     def identify(cls, data):
276 |         del data
277 |         return 0
278 | 
279 |     def get_fixed_post_url(self, url):
280 |         q_base_url = re.escape(self.url)
281 |         match = re.fullmatch(q_base_url + '/users/([^/]+)/statuses/([0-9]+)/activity', url or '')
282 |         if match:
283 |             # https://github.com/mastodon/mastodon/issues/34433
284 |             # ("reblogs have wrong url")
285 |             (user, post_id) = match.groups()
286 |             url = f'{self.url}/@{user}/{post_id}'
287 |         return url
288 | 
289 | @Instance.register
290 | class UntamedMastodonoid(Mastodonoid):
291 | 
292 |     # fallback for unknown (but known to be unsupported) instance types
293 | 
294 |     tag_url_template = None
295 | 
296 |     post_url_template = None
297 | 
298 |     post_id_regexp = None
299 | 
300 |     addr_parser = AddrParser()  # dummy
301 | 
302 |     @classmethod
303 |     def identify(cls, data):
304 |         if re.search(r'\b(compatible|really)\b', data.version):
305 |             return 0.1
306 |         match = re.match('^([0-9]+)[.]', data.version)
307 |         if match is None:
308 |             return 0.1
309 |         [major] = match.groups()
310 |         major = int(major)
311 |         if major < 1:
312 |             # /api/v1/instance was added only in Mastodon 1.1
313 |             # <https://github.com/mastodon/mastodon/commit/6be7bde24378bcb0>
314 |             # so this version is clearly a lie.
315 |             return 0.1
316 |         return -1
317 | 
318 | @Instance.register
319 | class Iceshrimp(Mastodonoid):
320 | 
321 |     # Codebase: https://iceshrimp.dev/
322 |     # Forks: https://codeberg.org/catodon/catodon
323 | 
324 |     tag_url_template = '/tags/TAG'
325 | 
326 |     post_url_template = '/notes/IDENT'
327 | 
328 |     post_id_regexp = '[0-9a-z]{16,24}'
329 |     # Source: packages/backend/src/misc/gen-id.ts
330 |     #
331 |     # Identifiers are in the form:
332 |     #
333 |     #    t || r
334 |     #
335 |     # where
336 |     #
337 |     #    t is milliseconds since 2000;
338 |     #    r is randomish, configurable length 8-16.
339 |     #
340 |     # Both are in base-36.
341 |     #
342 |     # The docs say the timestamp is 8 chars long
343 |     # (and the code indeed ensures it's _at least_ 8 chars),
344 |     # but that'll only suffice until 2089.
345 |     #
346 |     # $ export TZ=UTC0
347 |     # $ qalc -t '"2000-01-01" + (36 ** 8) ms'
348 |     # "2089-05-24T17:38:22"
349 | 
350 |     addr_parser = AddrParser(
351 |         '/notes/IDENT',
352 |     )
353 | 
354 |     @classmethod
355 |     def identify(cls, data):
356 |         if re.search(r'\b(Iceshrimp|Catodon)\b', data.version):
357 |             return 1
358 |         # FIXME? Should Iceshrimp.NET be considered supported?
359 |         return -1
360 | 
361 |     def fetch_tag_info(self, tag_name):
362 |         # FIXME in Iceshrimp?
363 |         # The API is not available,
364 |         # despite claimed version 4.2 or so.
365 |         return Instance.fetch_tag_info(self, tag_name)
366 | 
367 | @Instance.register
368 | class Pleroma(Mastodonoid):
369 | 
370 |     # Codebase: https://git.pleroma.social/pleroma/pleroma
371 | 
372 |     tag_url_template = '/tag/TAG'
373 | 
374 |     post_url_template = '/notice/IDENT'
375 | 
376 |     post_id_regexp = '[0-9a-zA-Z]{18}'
377 |     # Source: https://git.pleroma.social/pleroma/flake_id
378 |     #
379 |     # Identifiers are base-62 integers:
380 |     #
381 |     #    n = (t << 64) + r
382 |     #
383 |     # where
384 |     #
385 |     #    t is milliseconds since 1970;
386 |     #    r are randomish lower bits.
387 |     #
388 |     # In practice, it's always 18 digits (until 2284).
389 |     #
390 |     # $ export TZ=UTC0
391 |     # $ qalc -t '"1970-01-01" + ((62 ** 17) >> 64) ms'
392 |     # "1975-01-29T11:50:12"
393 |     # $ qalc -t '"1970-01-01" + ((62 ** 18) >> 64) ms'
394 |     # "2284-10-19T13:56:44"
395 | 
396 |     addr_parser = AddrParser(
397 |         '/notice/IDENT',
398 |         '/tag/TAG',
399 |         # TODO? '/USER'?
400 |         # But eww, that's awfully generic.
401 |         # In the mean time /users/USER works already.
402 |     )
403 | 
404 |     @classmethod
405 |     def identify(cls, data):
406 |         try:
407 |             data.pleroma
408 |         except KeyError:
409 |             return -1
410 |         return 1
411 | 
412 |     def fix_post(self, post):
413 |         super().fix_post(post)
414 |         try:
415 |             pinned_at = post.pleroma.pinned_at
416 |         except KeyError:
417 |             # available only since Pleroma v2.4
418 |             pass
419 |         else:
420 |             post.pinned = pinned_at
421 | 
422 | __all__ = [
423 |     'Iceshrimp',
424 |     'Mastodon',
425 |     'Mastodonoid',
426 |     'Pleroma',
427 |     'UntamedMastodonoid',
428 | ]
429 | 
430 | # vim:ts=4 sts=4 sw=4 et
431 | 


--------------------------------------------------------------------------------
/lib/bluesky.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2025 Jakub Wilk <jwilk@jwilk.net>
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | '''
  5 | Bluesky
  6 | '''
  7 | 
  8 | import html
  9 | import re
 10 | 
 11 | import lib.www
 12 | 
 13 | from lib.inst import (
 14 |     AddrParser,
 15 |     Instance,
 16 | )
 17 | 
 18 | from lib.utils import (
 19 |     Dict,
 20 |     compose,
 21 | )
 22 | 
 23 | urlquote = lib.www.urlquote
 24 | 
 25 | def qre(pattern, flags=0):
 26 |     r'''
 27 |     re.compile() with additional support for \q<...> escape,
 28 |     which is like \Q...\E in Perl.
 29 |     '''
 30 |     def repl(match):
 31 |         (s, esc) = match.groups()
 32 |         if esc:
 33 |             return esc
 34 |         return re.escape(s)
 35 |     pattern = re.sub(r'\\q<(.*?)>|(\.)', repl, pattern)
 36 |     return re.compile(pattern, flags=flags)
 37 | 
 38 | def text2html(s):
 39 |     s = html.escape(s)
 40 |     s = s.replace('\n', '<br>')
 41 |     return s
 42 | 
 43 | def decamel(s):
 44 |     def subst(match):
 45 |         return '_' + match.group().lower()
 46 |     s = re.sub('[A-Z]', subst, s)
 47 |     return s
 48 | 
 49 | class UserAgent(lib.www.UserAgent):
 50 | 
 51 |     @classmethod
 52 |     def handle_json_error(cls, exc, data):
 53 |         try:
 54 |             code = data.error
 55 |             msg = data.message
 56 |         except KeyError:
 57 |             return
 58 |         assert exc.msg
 59 |         exc.msg = f'[{code}] {msg}'
 60 | 
 61 | @Instance.register
 62 | class Bluesky(Instance):
 63 | 
 64 |     # Codebase: https://github.com/bluesky-social/atproto
 65 | 
 66 |     tag_url_template = '/hashtag/TAG'
 67 | 
 68 |     post_id_regexp = '[2-7a-z]{13}'
 69 |     # Source: https://atproto.com/specs/tid
 70 |     #
 71 |     # Identifiers are base-32 integers:
 72 |     #
 73 |     #    n = (t << 10) + r
 74 |     #
 75 |     # where
 76 |     #
 77 |     #    t is microseconds since 1970;
 78 |     #    r are randomish lower bits.
 79 |     #
 80 |     # In practice, it's always 13 characters.
 81 |     #
 82 |     # $ export TZ=UTC0
 83 |     # $ qalc -t '"1970-01-01" + ((32 ** 12) >> 10) us'
 84 |     # "2005-09-05T05:58:04"
 85 |     # $ qalc -t '"1970-01-01" + ((32 ** 13) >> 10) us'
 86 |     # "3111-09-16T23:09:51"
 87 | 
 88 |     addr_parser = AddrParser(
 89 |         'https://bsky.app/profile/USER',
 90 |         'https://bsky.app/profile/USER/post/IDENT',
 91 |         'https://bsky.app/hashtag/TAG',
 92 |         # TODO? @USER, or maybe only @USER.bsky.social
 93 |     )
 94 | 
 95 |     def __init__(self, url):
 96 |         super().__init__(url)
 97 |         self._did_to_handle = {}
 98 | 
 99 |     def _remember_user(self, user):
100 |         try:
101 |             handle = user.handle
102 |         except KeyError:
103 |             return
104 |         self._did_to_handle[user.did] = handle
105 | 
106 |     @classmethod
107 |     def parse_addr(cls, addr):
108 |         match = super().parse_addr(addr)
109 |         if not match:
110 |             return None
111 |         if match.ident:
112 |             ident = f'at://{match.user}/app.bsky.feed.post/{match.ident}'
113 |             match.ident = ident
114 |         match.url = 'https://bsky.app'
115 |         return match
116 | 
117 |     def _api_url(self, url, *,  public=True):
118 |         domain = 'api.bsky.app'
119 |         if public:
120 |             domain = f'public.{domain}'
121 |         return f'https://{domain}/xrpc/{url}'
122 | 
123 |     def _fetch(self, url, *, public=True):
124 |         url = self._api_url(url, public=public)
125 |         return UserAgent.get(url).json
126 | 
127 |     @compose(''.join)
128 |     def _mastodonize_text(self, text, *, facets=()):
129 |         # FIXME: We convert text to HTML, only to convert HTML to text later on.
130 |         btext = text.encode(errors='surrogatepass')
131 |         def tslice(start, stop=None):
132 |             return btext[start:stop].decode(errors='replace')
133 |         i = 0
134 |         for facet in facets:
135 |             for feature in facet.features:
136 |                 tp = feature['$type']
137 |                 match = qre(r'\q<app.bsky.richtext.facet>#(\w+)').fullmatch(tp)
138 |                 if not match:
139 |                     continue
140 |                 [tp] = match.groups()
141 |                 fn = getattr(self, f'_mastodonize_text_facet_{tp}', None)
142 |                 if not fn:
143 |                     continue
144 |                 j = facet.index.byteStart
145 |                 k = facet.index.byteEnd
146 |                 if i <= j < k:
147 |                     yield text2html(tslice(i, j))
148 |                     yield fn(tslice(j, k), feature)  # pylint: disable=not-callable
149 |                     i = k
150 |         yield text2html(tslice(i))
151 | 
152 |     def _mastodonize_user(self, user):
153 |         class muser:
154 |             at_did = user.did
155 |             url = f'https://bsky.app/profile/{user.handle}'
156 |             try:
157 |                 display_name = user.displayName
158 |             except KeyError:
159 |                 display_name = ''
160 |             try:
161 |                 note = user.description
162 |             except KeyError:
163 |                 note = None
164 |             else:
165 |                 note = self._mastodonize_text(note)
166 |         return muser
167 | 
168 |     def _mastodonize_text_facet_link(self, text, feature):
169 |         if match := qre(r'([^/]+/\S{10,})\q<...>').fullmatch(text):
170 |             [prefix] = match.groups()
171 |             q_prefix = re.escape(prefix)
172 |             if re.fullmatch(fr'https?://{q_prefix}\S+', feature.uri):
173 |                 text = feature.uri
174 |         q_url = html.escape(feature.uri)
175 |         q_text = text2html(text)
176 |         return f'<a href="{q_url}">{q_text}</a>'
177 | 
178 |     def _mastodonize_text_facet_mention(self, text, feature):
179 |         did = feature.did
180 |         user = self._did_to_handle.get(did, did)
181 |         url = f'https://bsky.app/profile/{user}'  # FIXME: duplicate code
182 |         return self._mastodonize_text_facet_link(text, Dict(uri=url))
183 | 
184 |     def _mastodonize_text_facet_tag(self, text, feature):
185 |         url = self.get_tag_url(feature.tag)
186 |         return self._mastodonize_text_facet_link(text, Dict(uri=url))
187 | 
188 |     def fetch_user_by_name(self, name):
189 |         # https://docs.bsky.app/docs/api/app-bsky-actor-get-profile
190 |         qname = urlquote(name)
191 |         url = f'app.bsky.actor.getProfile?actor={qname}'
192 |         user = self._fetch(url)
193 |         return self._mastodonize_user(user)
194 | 
195 |     def _get_post_url(self, uri):
196 |         uri_regexp = qre(fr'at://([^%@/?#\0-\40]+)/\q<app.bsky.feed.post>/({self.post_id_regexp})')
197 |         match = uri_regexp.fullmatch(uri)
198 |         if not match:
199 |             # FIXME?
200 |             return uri
201 |         (user, post_id) = match.groups()
202 |         user = self._did_to_handle.get(user, user)
203 |         return f'https://bsky.app/profile/{user}/post/{post_id}'
204 | 
205 |     def _mastodonize_embed(self, embed):
206 |         if embed is None:
207 |             return
208 |         tp = embed['$type']
209 |         match = qre(r'\q<app.bsky.embed.>(\w+)#view').fullmatch(tp)
210 |         class bad_att:
211 |             url = 'about:invalid'  # FIXME?
212 |             description = f'(unknown embed type: {tp})'
213 |         if not match:
214 |             yield bad_att
215 |             return
216 |         [tp] = match.groups()
217 |         tp = decamel(tp)
218 |         try:
219 |             fn = getattr(self, f'_mastodonize_embed_{tp}')
220 |         except AttributeError:
221 |             yield bad_att
222 |             return
223 |         yield from fn(embed)
224 | 
225 |     def _mastodonize_embed_images(self, embed):
226 |         for image in embed.images:
227 |             class att:
228 |                 url = image.fullsize
229 |                 description = image.alt
230 |             yield att
231 | 
232 |     def _mastodonize_embed_video(self, embed):
233 |         class att:
234 |             url = embed.playlist
235 |             description = None
236 |         yield att
237 | 
238 |     def _mastodonize_embed_record(self, embed):
239 |         # FIXME?
240 |         record = embed.record
241 |         try:
242 |             author = record.author
243 |         except KeyError:
244 |             pass
245 |         else:
246 |             self._remember_user(author)
247 |         try:
248 |             descr = record.value.text
249 |         except KeyError:
250 |             descr = None
251 |         class att:
252 |             url = self._get_post_url(record.uri)
253 |             description = descr
254 |         yield att
255 | 
256 |     def _mastodonize_embed_record_with_media(self, embed):
257 |         # FIXME?
258 |         yield from self._mastodonize_embed(embed.media)
259 |         yield from self._mastodonize_embed_record(embed.record)
260 | 
261 |     def _mastodonize_embed_external(self, embed):
262 |         # FIXME?
263 |         ext = embed.external
264 |         class att:
265 |             url = ext.uri
266 |             description = f'{ext.title}\n\n{ext.description}'
267 |         yield att
268 | 
269 |     def _mastodonize_post(self, post, *, reason=None):
270 |         record = post.record
271 |         self._remember_user(post.author)
272 |         try:
273 |             embed = post.embed
274 |         except KeyError:
275 |             embed = None
276 |         try:
277 |             in_reply_to_uri = record.reply.parent.uri
278 |         except KeyError:
279 |             _in_reply_to_url = in_reply_to_uri = None
280 |         else:
281 |             _in_reply_to_url = self._get_post_url(in_reply_to_uri)
282 |         _pinned = False
283 |         if reason and reason['$type'] == 'app.bsky.feed.defs#reasonPin':
284 |             _pinned = True
285 |         try:
286 |             facets = record.facets
287 |         except KeyError:
288 |             facets = ()
289 |         class mpost:
290 |             id = url = location = self._get_post_url(post.uri)
291 |             in_reply_to_id = in_reply_to_url = _in_reply_to_url
292 |             account = self._mastodonize_user(post.author)
293 |             # Editing posts is not supported yet:
294 |             # https://github.com/bluesky-social/social-app/issues/673
295 |             # ("Allow editing posts")
296 |             edited_at = None
297 |             created_at = record.createdAt
298 |             try:
299 |                 language = record.langs
300 |             except KeyError:
301 |                 language = None
302 |             else:
303 |                 language = str.join(', ', language)
304 |             reblog = None
305 |             content = self._mastodonize_text(record.text, facets=facets)
306 |             media_attachments = list(self._mastodonize_embed(embed))
307 |             pinned = _pinned
308 |         if reason and reason['$type'] == 'app.bsky.feed.defs#reasonRepost':
309 |             self._remember_user(reason.by)
310 |             class mrepost:
311 |                 id = url = uri = location = None
312 |                 in_reply_to_id = in_reply_to_url = None
313 |                 account = self._mastodonize_user(reason.by)
314 |                 edited_at = None
315 |                 created_at = reason.indexedAt
316 |                 language = None
317 |                 reblog = mpost
318 |                 content = None
319 |                 media_attachments = None
320 |                 pinned = None
321 |             return mrepost
322 |         else:
323 |             return mpost
324 | 
325 |     def fetch_user_posts(self, user, *, limit, pinned=False, **params):
326 |         # https://docs.bsky.app/docs/api/app-bsky-feed-get-author-feed
327 |         if pinned:
328 |             # It's easier to fetch pinned posts together with non-pinned ones.
329 |             return
330 |         del params
331 |         page_limit = 100  # maximum allowed
332 |         url = f'app.bsky.feed.getAuthorFeed?actor={user.at_did}&filter=posts_and_author_threads&includePins=true'
333 |         rlimit = min(limit, page_limit)
334 |         page_url = f'{url}&limit={rlimit}'
335 |         while limit > 0:
336 |             response = self._fetch(page_url)
337 |             for item in response.feed:
338 |                 try:
339 |                     reason = item.reason
340 |                 except KeyError:
341 |                     reason = None
342 |                 yield self._mastodonize_post(item.post, reason=reason)
343 |             try:
344 |                 cursor = response.cursor
345 |             except KeyError:
346 |                 break
347 |             limit -= len(response.feed)
348 |             rlimit = min(limit, page_limit)
349 |             next_url = f'{url}&limit={rlimit}&cursor={cursor}'
350 |             assert next_url != page_url
351 |             page_url = next_url
352 | 
353 |     def fetch_tag_posts(self, tag_name, *, limit, **params):
354 |         # https://docs.bsky.app/docs/api/app-bsky-feed-search-posts
355 |         #
356 |         # FIXME? app.bsky.feed.searchPosts doesn't seem to support paging properly:
357 |         # https://github.com/bluesky-social/atproto/issues/2838
358 |         # ("Calling AppView's searchPosts with a cursor returns a 403 error")
359 |         () = params
360 |         q_tag = urlquote('#' + tag_name)
361 |         url = f'app.bsky.feed.searchPosts?q={q_tag}&limit={limit}&sort=top'
362 |         response = self._fetch(url, public=False)
363 |         for post in response.posts:
364 |             yield self._mastodonize_post(post)
365 | 
366 |     def _get_post_fetch_url(self, post_id, depth=None, parent_height=None):
367 |         # https://docs.bsky.app/docs/api/app-bsky-feed-get-post-thread
368 |         q_post_id = urlquote(post_id)
369 |         url = f'app.bsky.feed.getPostThread?uri={q_post_id}'
370 |         if depth is not None:
371 |             url += f'&depth={depth}'
372 |         if parent_height is not None:
373 |             url += f'&parentHeight={parent_height}'
374 |         return url
375 | 
376 |     def fetch_post(self, post_id):
377 |         url = self._get_post_fetch_url(post_id, depth=0, parent_height=0)
378 |         thread = self._fetch(url).thread
379 |         return self._mastodonize_post(thread.post)
380 | 
381 |     def fetch_post_context(self, post_id, *, ancestors=True, descendants=True):
382 |         # FIXME? This duplicates some of the work of fetch_post().
383 |         context = Dict(ancestors=[], descendants=[])
384 |         if not (ancestors or descendants):
385 |             # shortcut:
386 |             return context
387 |         kwargs = {}
388 |         if not ancestors:
389 |             kwargs.update(parent_height=0)
390 |         if not descendants:
391 |             kwargs.update(depth=0)
392 |         url = self._get_post_fetch_url(post_id, **kwargs)
393 |         thread = self._fetch(url).thread
394 |         if ancestors:
395 |             # pylint: disable=no-member
396 |             parent = thread
397 |             while True:
398 |                 try:
399 |                     parent = parent.parent
400 |                 except KeyError:
401 |                     break
402 |                 context.ancestors += [self._mastodonize_post(parent.post)]
403 |             context.ancestors.reverse()
404 |         if descendants:
405 |             # pylint: disable=no-member
406 |             def add_descendants(thread):
407 |                 context.descendants += [self._mastodonize_post(thread.post)]
408 |                 try:
409 |                     replies = thread.replies
410 |                 except KeyError:
411 |                     return
412 |                 for reply in replies:
413 |                     add_descendants(reply)
414 |             for reply in thread.replies:
415 |                 add_descendants(reply)
416 |         return context
417 | 
418 | __all__ = [
419 |     'Bluesky',
420 | ]
421 | 
422 | # vim:ts=4 sts=4 sw=4 et
423 | 


--------------------------------------------------------------------------------