├── .gitignore ├── LICENSE ├── README.md ├── setup.py └── stale.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | 3 | dist 4 | build 5 | *.egg-info 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 - present Jon Parise 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Stale 2 | 3 | Stale identifies and deletes stale [Pinboard](http://pinboard.in/) links. 4 | 5 | You can grab the latest code package by cloning this repository: 6 | 7 | $ git clone https://github.com/jparise/stale.git 8 | 9 | ... or by downloading the [latest tarball][]. 10 | 11 | 12 | ## Usage 13 | 14 | ``` 15 | usage: stale.py [-h] [-t TOKEN] [--ignore REGEX [REGEX ...]] [-d] [-e] [--timeout TIMEOUT] [-v] [--debug] [--version] 16 | 17 | Identify (and optionally delete) stale Pinboard links. 18 | 19 | options: 20 | -h, --help show this help message and exit 21 | -t TOKEN, --token TOKEN 22 | your Pinboard API token ('username:hex-values') (default: None) 23 | --ignore REGEX [REGEX ...] 24 | ignore links from these hosts (default: None) 25 | -d, --delete delete stale links (default: False) 26 | -e equate errors with staleness (default: False) 27 | --timeout TIMEOUT HTTP connection timeout (in seconds) (default: 5) 28 | -v, --verbose enable verbose output (default: False) 29 | --debug enable debugging output (default: False) 30 | --version show program's version number and exit 31 | ``` 32 | 33 | You can find your personal Pinboard API token in your [Settings][]. It will 34 | look like `:`. 35 | 36 | ### SSL Certificates 37 | 38 | Stale visits each link to verify that it is still active. Because most hosts 39 | use SSL, it's important for your Python environment to have a current set of 40 | SSL certificates. Otherwise, the connection attempt might fail with an error 41 | like `[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed`. 42 | 43 | For macOS, you can update your certificates by running this command: 44 | 45 | /Applications/Python\ 3.7/Install\ Certificates.command 46 | 47 | [latest tarball]: https://github.com/jparise/stale/tarball/master 48 | [Settings]: https://pinboard.in/settings/password 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import find_packages, setup 4 | from stale import __version__ 5 | 6 | setup( 7 | name="stale", 8 | version=__version__, 9 | description="Identifies (and optionally removes) stale Pinboard links", 10 | author="Jon Parise", 11 | author_email="jon@indelible.org", 12 | keywords="pinboard", 13 | url="https://github.com/jparise/stale", 14 | license="MIT License", 15 | classifiers=[ 16 | "Environment :: Console", 17 | "Intended Audience :: End Users/Desktop", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: OS Independent", 20 | "Programming Language :: Python :: 3", 21 | "Topic :: Utilities", 22 | ], 23 | packages=find_packages(), 24 | entry_points={"console_scripts": ["stale = stale:main"]}, 25 | zip_safe=True, 26 | ) 27 | -------------------------------------------------------------------------------- /stale.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2010 - present Jon Parise 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | """Identify (and optionally delete) stale Pinboard links.""" 24 | 25 | import enum 26 | import getpass 27 | import json 28 | import os 29 | import re 30 | import sys 31 | 32 | from http.client import HTTPResponse 33 | from typing import Optional 34 | from urllib.error import HTTPError 35 | from urllib.parse import urldefrag, urlencode, urlparse, urljoin 36 | from urllib.request import ( 37 | HTTPHandler, 38 | HTTPSHandler, 39 | OpenerDirector, 40 | Request, 41 | build_opener, 42 | urlopen, 43 | ) 44 | 45 | __author__ = "Jon Parise " 46 | __version__ = "2.0-dev" 47 | 48 | PINBOARD_API_BASE = "https://api.pinboard.in/v1/" 49 | USER_AGENT = ( 50 | f"Mozilla/5.0 (compatible; stale/{__version__}; +https://github.com/jparise/stale)" 51 | ) 52 | 53 | 54 | class Color(enum.StrEnum): 55 | normal = "\033[0m" 56 | red = "\033[31m" 57 | green = "\033[32m" 58 | yellow = "\033[33m" 59 | purple = "\033[35m" 60 | cyan = "\033[36m" 61 | 62 | 63 | def pinboard_call(path, token, **kwargs): 64 | """Make a Pinboard API request and return a JSON-parsed response.""" 65 | params = kwargs.copy() 66 | params["auth_token"] = token 67 | params["format"] = "json" 68 | 69 | url = urljoin(PINBOARD_API_BASE, path) 70 | url += "?" + urlencode(params) 71 | 72 | request = Request(url, headers={"User-Agent": USER_AGENT}) 73 | response = urlopen(request) 74 | 75 | return json.load(response) 76 | 77 | 78 | def check_url( 79 | opener: OpenerDirector, url: str, timeout: Optional[float] = None 80 | ) -> HTTPResponse: 81 | """Check the given URL by issuring a HEAD request.""" 82 | # We don't want to include a fragment in our request. 83 | url, _fragment = urldefrag(url) 84 | 85 | # Attempt to open the target URL using a HEAD request. 86 | request = Request(url, headers={"User-Agent": USER_AGENT}, method="HEAD") 87 | 88 | return opener.open(request, timeout=timeout) 89 | 90 | 91 | def supports_color(): 92 | # Windows only supports colors if ANSICON is defined. 93 | if sys.platform == "win32" and "ANSICON" not in os.environ: 94 | return False 95 | 96 | # Otherwise, we assume all TTYs support ANSI color. 97 | return sys.stdout.isatty() 98 | 99 | 100 | def main(): 101 | import argparse 102 | 103 | parser = argparse.ArgumentParser( 104 | description=__doc__, 105 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 106 | ) 107 | parser.add_argument( 108 | "-t", "--token", help="your Pinboard API token ('username:hex-values')" 109 | ) 110 | parser.add_argument( 111 | "--ignore", 112 | nargs="+", 113 | type=re.compile, 114 | help="ignore links from these hosts", 115 | metavar="REGEX", 116 | ) 117 | parser.add_argument( 118 | "-d", "--delete", action="store_true", help="delete stale links" 119 | ) 120 | parser.add_argument( 121 | "-e", action="store_true", dest="errors", help="equate errors with staleness" 122 | ) 123 | parser.add_argument( 124 | "--timeout", type=float, default=5, help="HTTP connection timeout (in seconds)" 125 | ) 126 | parser.add_argument( 127 | "-v", "--verbose", action="store_true", help="enable verbose output" 128 | ) 129 | parser.add_argument("--debug", action="store_true", help="enable debugging output") 130 | parser.add_argument("--version", action="version", version=__version__) 131 | 132 | args = parser.parse_args() 133 | 134 | if not args.token: 135 | try: 136 | args.token = getpass.getpass("API Token: ") 137 | except KeyboardInterrupt: 138 | return 0 139 | 140 | try: 141 | posts = pinboard_call("posts/all", args.token) 142 | except Exception as e: 143 | print("Failed to retrieve posts:", e) 144 | return 1 145 | 146 | if not posts: 147 | print("No posts were retrieved.") 148 | return 1 149 | 150 | if args.verbose: 151 | print(f"Checking {len(posts)} posts ...") 152 | 153 | def report(color: Color, code: str, url: str, colorize=supports_color()): 154 | print( 155 | f"{color if colorize else ''}[{code}] " 156 | f"{Color.normal if colorize else ''}{url}" 157 | ) 158 | 159 | opener = build_opener( 160 | HTTPHandler(debuglevel=int(args.debug)), 161 | HTTPSHandler(debuglevel=int(args.debug)), 162 | ) 163 | 164 | # The set of HTTP status codes that we consider indicators of "staleness" 165 | # includes all client errors (4xx) except for: 166 | # 167 | # 403: we lack support for sending credentials with our requests for 168 | # sites that require authorization 169 | stale_codes = frozenset(range(400, 499)) - {403} 170 | 171 | for post in posts: 172 | url = post["href"] 173 | stale = False 174 | 175 | # If we have some hostnames to ignore, parse the URL and check if it 176 | # matches one of the patterns. 177 | if args.ignore: 178 | parsed = urlparse(url) 179 | for pattern in args.ignore: 180 | if pattern.match(parsed.hostname): 181 | report(Color.cyan, "Skip", url) 182 | continue 183 | 184 | try: 185 | result = check_url(opener, url, timeout=args.timeout) 186 | except KeyboardInterrupt: 187 | break 188 | except HTTPError as e: 189 | stale = e.code in stale_codes 190 | report(Color.red if stale else Color.purple, str(e.code), url) 191 | except OSError as e: 192 | # Timeouts are considered transient (non-fatal) errors. 193 | if isinstance(getattr(e, "reason", e), TimeoutError): 194 | report(Color.yellow, "Timeout", url) 195 | continue 196 | 197 | # All other errors are considered request failures. 198 | report(Color.red, "!!", url) 199 | print("> " + str(e).replace("\n", "\n> ")) 200 | if args.errors: 201 | stale = True 202 | else: 203 | code = result.getcode() 204 | if code in stale_codes: 205 | stale = True 206 | report(Color.red, str(code), url) 207 | elif args.verbose: 208 | report(Color.green, "OK", url) 209 | 210 | if stale and args.delete: 211 | print(f" Deleting {url}") 212 | try: 213 | pinboard_call("posts/delete", args.token, url=url) 214 | except Exception as e: 215 | print("> " + str(e)) 216 | 217 | 218 | if __name__ == "__main__": 219 | sys.exit(main()) 220 | --------------------------------------------------------------------------------