├── canto_next ├── __init__.py ├── encoding.py ├── locks.py ├── hooks.py ├── client.py ├── format.py ├── tag.py ├── storage.py ├── rwlock.py ├── server.py ├── plugins.py ├── fetch.py ├── transform.py ├── protocol.py ├── feed.py ├── config.py ├── remote.py └── canto_backend.py ├── bin ├── canto-remote └── canto-daemon ├── systemd └── user │ └── canto-daemon.service ├── README.md ├── tests ├── test-feed-password.py ├── base.py ├── test-hooks.py └── test-feed-index.py ├── plugins ├── script.py ├── reddit.py ├── sync-rsync.py └── sync-inoreader.py ├── setup.py ├── man ├── canto-daemon.1 └── canto-remote.1 └── COPYING /canto_next/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bin/canto-remote: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from canto_next.remote import CantoRemote 4 | import sys 5 | 6 | if __name__ == "__main__" : 7 | c = CantoRemote() 8 | sys.exit(0) 9 | -------------------------------------------------------------------------------- /bin/canto-daemon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from canto_next.canto_backend import CantoBackend 4 | import sys 5 | 6 | if __name__ == "__main__" : 7 | c = CantoBackend() 8 | sys.exit(0) 9 | -------------------------------------------------------------------------------- /systemd/user/canto-daemon.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Canto RSS Daemon 3 | After=network.target 4 | 5 | [Service] 6 | ExecStart=/usr/bin/canto-daemon 7 | TimeoutStopSec=10 8 | 9 | [Install] 10 | WantedBy=default.target 11 | -------------------------------------------------------------------------------- /canto_next/encoding.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | import locale 11 | 12 | locale_enc = locale.getpreferredencoding() 13 | 14 | # These are basically just wrappers to close the encoding 15 | # options into their scopes so we don't have to pass the 16 | # encoding around or call the above a million times. 17 | 18 | def get_encoder(errors = "replace", encoding = None): 19 | if not encoding: 20 | encoding = locale_enc 21 | 22 | def encoder(s): 23 | return s.encode(encoding, errors) 24 | return encoder 25 | 26 | encoder = get_encoder() 27 | -------------------------------------------------------------------------------- /canto_next/locks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #Canto - RSS reader backend 3 | # Copyright (C) 2016 Jack Miller 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | from .rwlock import RWLock 10 | 11 | # NOTE: feed_lock and tag_lock only protect the existence of Feed() and Tag() 12 | # objects, and their configuration. The Tag() objects have their own locks the 13 | # protect their content. 14 | 15 | feed_lock = RWLock('feed_lock') 16 | tag_lock = RWLock('tag_lock') 17 | 18 | # NOTE: if config_lock is held writable, feed_lock and tag_lock must also be 19 | # held writable. 20 | 21 | config_lock = RWLock('config_lock') 22 | 23 | # The rest of these are independent. 24 | watch_lock = RWLock('watch_lock') 25 | attr_lock = RWLock('attr_lock') 26 | socktran_lock = RWLock('socktran_lock') 27 | hook_lock = RWLock('hook_look') 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Canto Daemon 2 | 3 | This is the RSS backend for Canto clients. 4 | 5 | Canto-curses is the default client at: 6 | 7 | http://github.com/themoken/canto-curses 8 | 9 | ## Requirements 10 | 11 | Debian / Ubuntu 12 | - python3 13 | - python3-feedparser 14 | 15 | Arch (Py3 is default) 16 | - python 17 | - python-feedparser 18 | 19 | ## Install 20 | 21 | From this directory, run 22 | 23 | ```sh 24 | $ sudo python3 setup.py install 25 | ``` 26 | 27 | 28 | If you're a systemd user you can easily run it on startup 29 | 30 | ```sh 31 | $ systemctl --user enable canto-daemon 32 | ``` 33 | 34 | Or start it manually with 35 | 36 | ```sh 37 | $ systemctl --user start canto-daemon 38 | ``` 39 | 40 | By default, user sessions start on login, and end on logoff stopping daemons. 41 | This is good behavior, but if you don't want canto-daemon to stop when you 42 | log out (to keep gathering news) enable "lingering" for your account. 43 | 44 | ``` 45 | $ loginctl enable-linger 46 | ``` 47 | 48 | This will start one session for you on boot that will last until shutdown. 49 | -------------------------------------------------------------------------------- /tests/test-feed-password.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from base import * 5 | 6 | from canto_next.feed import CantoFeed 7 | from canto_next.fetch import CantoFetchThread 8 | 9 | import feedparser 10 | import urllib.parse 11 | import urllib.request 12 | 13 | TEST_URL="http://codezen.org/password-feed/canto.xml" 14 | USER="test" 15 | PASS="tester" 16 | 17 | class TestFeedPassword(Test): 18 | def check(self): 19 | # First make sure that feedparser hasn't been broken 20 | 21 | domain = urllib.parse.urlparse(TEST_URL)[1] 22 | man = urllib.request.HTTPPasswordMgrWithDefaultRealm() 23 | auth = urllib.request.HTTPBasicAuthHandler(man) 24 | auth.handler_order = 490 # Workaround feedparser issue #283 25 | auth.add_password(None, domain, USER, PASS) 26 | 27 | f = feedparser.parse(TEST_URL, handlers=[auth]) 28 | 29 | if f["bozo"] == 1: 30 | raise Exception("feedparser is broken!") 31 | 32 | test_shelf = {} 33 | test_feed = CantoFeed(test_shelf, "Passworded Feed", TEST_URL, 10, 86400, False, 34 | password=PASS, username=USER) 35 | 36 | thread = CantoFetchThread(test_feed, False) 37 | thread.start() 38 | thread.join() 39 | 40 | if TEST_URL not in test_shelf: 41 | raise Exception("Canto failed to get passworded feed") 42 | 43 | return True 44 | 45 | TestFeedPassword("feed password") 46 | -------------------------------------------------------------------------------- /tests/base.py: -------------------------------------------------------------------------------- 1 | from canto_next.remote import access_dict 2 | 3 | import traceback 4 | import logging 5 | import json 6 | 7 | logging.basicConfig( 8 | format = "%(message)s", 9 | level = logging.DEBUG 10 | ) 11 | 12 | import time 13 | 14 | class Test(object): 15 | def __init__(self, name): 16 | self.name = name 17 | self.run() 18 | 19 | def compare_flags(self, value): 20 | if self.flags != value: 21 | raise Exception("Expected flags %d - got %d" % (value, self.flags)) 22 | 23 | def compare_config(self, config, var, evalue): 24 | ok, got = access_dict(config, var) 25 | if not ok: 26 | raise Exception("Couldn't get %s?" % var) 27 | if got != evalue: 28 | raise Exception("Expected %s == %s - got %s" % (var, evalue, got)) 29 | 30 | def compare_var(self, var, evalue): 31 | if hasattr(self, var): 32 | val = getattr(self, var) 33 | if val != evalue: 34 | raise Exception("Expected self.%s == %s - got %s" % (var, evalue, val)) 35 | else: 36 | raise Exception("Couldn't get self.%s?" % var) 37 | 38 | def banner(self, text): 39 | print("*" * 25) 40 | print(text) 41 | print("*" * 25) 42 | 43 | def run(self): 44 | print("STARTING %s\n" % self.name) 45 | 46 | try: 47 | r = self.check() 48 | except Exception as e: 49 | print("\n%s - FAILED ON EXCEPTION" % self.name) 50 | print(traceback.format_exc()) 51 | return 1 52 | 53 | if r == True: 54 | print("\n%s - PASSED\n" % self.name) 55 | return 0 56 | 57 | print("\n%s - FAILED\n" % self.name) 58 | return 1 59 | 60 | def check(self): 61 | pass 62 | -------------------------------------------------------------------------------- /canto_next/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | import traceback 11 | import logging 12 | 13 | log = logging.getLogger("HOOKS") 14 | 15 | hooks = {} 16 | 17 | def on_hook(hook, func, key=None): 18 | if key != None and type(key) != str: 19 | key = str(key) 20 | 21 | if hook in hooks: 22 | if key in hooks[hook]: 23 | hooks[hook][key].append(func) 24 | else: 25 | hooks[hook][key] = [ func ] 26 | else: 27 | hooks[hook] = { key : [ func ] } 28 | 29 | def remove_hook(hook, func): 30 | for key in list(hooks[hook].keys()): 31 | while func in hooks[hook][key]: 32 | hooks[hook][key].remove(func) 33 | if hooks[hook][key] == []: 34 | del hooks[hook][key] 35 | if hooks[hook] == {}: 36 | del hooks[hook] 37 | 38 | def unhook_all(key): 39 | if key != None and type(key) != str: 40 | key = str(key) 41 | for hook in list(hooks.keys()): 42 | if key in hooks[hook]: 43 | del hooks[hook][key] 44 | if hooks[hook] == {}: 45 | del hooks[hook] 46 | 47 | def call_hook(hook, args): 48 | if hook in hooks: 49 | for key in list(hooks[hook].keys()): 50 | try: 51 | for func in hooks[hook][key][:]: 52 | try: 53 | func(*args) 54 | except: 55 | log.error("Error calling hook %s (func: %s args: %s)" % (hook, func, args)) 56 | log.error(traceback.format_exc()) 57 | except: 58 | pass 59 | -------------------------------------------------------------------------------- /plugins/script.py: -------------------------------------------------------------------------------- 1 | # Canto Script Plugin 2 | # by Jack Miller 3 | # v1.0 4 | 5 | # With this plugin you can add a feed with a URL starting with "script:" and 6 | # ending with a simple script invocation. You must specify a name for the feed. 7 | # 8 | # For example: 9 | # canto-remote addfeed "script:~/bin/feed.sh" name="Script Feed" 10 | # 11 | # Scripts must have executable permissions. 12 | # 13 | # Scripts can be passed constant arguments, but are not executed in a shell 14 | # environment. 15 | # 16 | # The script path should be absolute, or relative to home (~/), relative path 17 | # behavior is undefined. 18 | # 19 | # Scripts are expected to output parsable RSS/Atom XML to stdout. 20 | 21 | from canto_next.plugins import check_program 22 | 23 | check_program("canto-daemon") 24 | 25 | from canto_next.fetch import DaemonFetchThreadPlugin 26 | from canto_next.feed import DaemonFeedPlugin 27 | 28 | import feedparser 29 | import subprocess 30 | import logging 31 | import shlex 32 | import os 33 | 34 | log = logging.getLogger("SCRIPT") 35 | 36 | def debug(message): 37 | if EXTRA_LOG_OUTPUT: 38 | log.debug(message) 39 | 40 | class ScriptFetch(DaemonFetchThreadPlugin): 41 | def __init__(self, fetch_thread): 42 | self.plugin_attrs = { 43 | "fetch_script" : self.fetch_script, 44 | } 45 | 46 | def fetch_script(self, **kwargs): 47 | if not kwargs["feed"].URL.startswith("script:"): 48 | return 49 | 50 | path = os.path.expanduser(kwargs["feed"].URL[7:]) 51 | 52 | log.debug("path: %s", path) 53 | 54 | path = shlex.split(path) 55 | 56 | log.debug("split: %s", path) 57 | 58 | output = subprocess.check_output(path) 59 | 60 | log.debug("output: %s", output) 61 | 62 | contents = kwargs["newcontent"] 63 | contents.clear() 64 | 65 | feed = feedparser.parse(output) 66 | 67 | for key in feed: 68 | contents[key] = feed[key] 69 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from distutils.command.install_data import install_data 4 | from distutils.command.build_py import build_py 5 | from distutils.core import setup 6 | import subprocess 7 | import glob 8 | import os 9 | 10 | string_version = "0.9.8" 11 | 12 | changes = ['canto_backend.py','remote.py'] 13 | 14 | class canto_next_build_py(build_py): 15 | def run(self): 16 | for source in changes: 17 | os.utime("canto_next/" + source, None) 18 | build_py.run(self) 19 | 20 | class canto_next_install_data(install_data): 21 | def run(self): 22 | try: 23 | git_hash = subprocess.check_output(["git", "describe"]).decode("UTF-8")[-9:-1] 24 | except Exception as e: 25 | print(e) 26 | git_hash = "" 27 | 28 | install_data.run(self) 29 | 30 | install_cmd = self.get_finalized_command('install') 31 | libdir = install_cmd.install_lib 32 | 33 | for source in changes: 34 | with open(libdir + '/canto_next/' + source, 'r+') as f: 35 | d = f.read().replace("REPLACE_VERSION", "\"" + string_version + "\"") 36 | d = d.replace("GIT_HASH", "\"" + git_hash + "\"") 37 | f.truncate(0) 38 | f.seek(0) 39 | f.write(d) 40 | 41 | setup(name='Canto', 42 | version=string_version, 43 | description='Next-gen console RSS/Atom reader', 44 | author='Jack Miller', 45 | author_email='jack@codezen.org', 46 | license='GPLv2', 47 | url='https://codezen.org/canto-ng', 48 | download_url='https://codezen.org/static/canto-daemon-' + string_version + '.tar.gz', 49 | packages=['canto_next'], 50 | install_requires=['feedparser'], 51 | scripts=['bin/canto-daemon','bin/canto-remote'], 52 | data_files = [("share/man/man1/", ["man/canto-daemon.1", "man/canto-remote.1"]), 53 | ("lib/systemd/user", ["systemd/user/canto-daemon.service"]), 54 | ("lib/canto/plugins", glob.glob("plugins/*.py"))], 55 | cmdclass = { 'install_data' : canto_next_install_data, 56 | 'build_py' : canto_next_build_py }, 57 | ) 58 | -------------------------------------------------------------------------------- /tests/test-hooks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import canto_next.hooks as hooks 5 | 6 | from base import * 7 | 8 | class TestHooks(Test): 9 | def __init__(self, name): 10 | self.test_set = "" 11 | self.test_args = [] 12 | Test.__init__(self, name) 13 | 14 | def hook_a(self): 15 | self.test_set += "a" 16 | 17 | def hook_b(self): 18 | self.test_set += "b" 19 | 20 | def hook_c(self): 21 | self.test_set += "c" 22 | 23 | def hook_args(self, *args): 24 | self.test_args = args 25 | 26 | def check(self): 27 | hooks.on_hook("test", self.hook_a) # No key 28 | hooks.on_hook("test", self.hook_b, "first_remove") 29 | hooks.on_hook("test", self.hook_c, "first_remove") 30 | hooks.on_hook("test2", self.hook_a, "second_remove") 31 | 32 | hooks.call_hook("test", []) 33 | 34 | if self.test_set != "abc": 35 | raise Exception("Basic hook test failed: %s" % self.test_set) 36 | 37 | self.test_set = "" 38 | hooks.call_hook("test2", []) 39 | 40 | if self.test_set != "a": 41 | raise Exception("Basic hook test2 failed: %s" % self.test_set) 42 | 43 | self.test_set = "" 44 | hooks.unhook_all("first_remove") 45 | hooks.call_hook("test", []) 46 | 47 | if self.test_set != "a": 48 | raise Exception("unhook_all failed: %s" % self.test_set) 49 | 50 | self.test_set = "" 51 | hooks.remove_hook("test", self.hook_a) 52 | hooks.call_hook("test", []) 53 | 54 | if self.test_set != "": 55 | raise Exception("remove_hook failed: %s" % self.test_set) 56 | 57 | hooks.call_hook("test2", []) 58 | 59 | if self.test_set != "a": 60 | raise Exception("improper hook removed: %s" % self.test_set) 61 | 62 | hooks.unhook_all("second_remove") 63 | 64 | if hooks.hooks != {}: 65 | raise Exception("hooks.hooks should be empty! %s" % hooks.hooks) 66 | 67 | hooks.on_hook("argtest", self.hook_args) 68 | 69 | for args in [ [], ["abc"], [1, 2, 3] ]: 70 | self.test_args = [] 71 | hooks.call_hook("argtest", args) 72 | if self.test_args != tuple(args): 73 | raise Exception("hook arguments failed in %s out %s" % (args, self.test_args)) 74 | 75 | return True 76 | 77 | TestHooks("hooks") 78 | -------------------------------------------------------------------------------- /man/canto-daemon.1: -------------------------------------------------------------------------------- 1 | .TH Canto-daemon 1 "Canto-daemon" 2 | 3 | .SH NAME 4 | Canto-daemon \- An extensible RSS aggregation daemon. 5 | 6 | .SH COMMAND LINE USAGE 7 | 8 | canto-daemon (options) 9 | 10 | .SH OPTIONS 11 | 12 | .TP 13 | \-V/--version 14 | Print version info and exit. 15 | 16 | .TP 17 | \-h/--help 18 | Print usage help 19 | 20 | .TP 21 | \-v 22 | Enable verbose logging (debug) 23 | 24 | .TP 25 | \-D/--dir [directory] 26 | Change base directory for canto-daemon (default: $XDG_CONFIG_HOME/canto) 27 | 28 | .TP 29 | \-n/--nofetch 30 | Do not fetch new content while running (debug). 31 | 32 | .TP 33 | \-\-noplugins 34 | Disable all plugins 35 | 36 | .TP 37 | \-\-enableplugins 'plugin1 plugin2...' 38 | Enable these plugins (overrides --noplugins) 39 | 40 | .TP 41 | \-\-disableplugins 'plugin1 plugin2...' 42 | Disable these plugins 43 | 44 | .TP 45 | \-a/--address [IP] 46 | Listen on address in addition to local Unix socket. Use "" for all interfaces. 47 | Defaults to loopback (127.0.0.1) 48 | 49 | .TP 50 | \-p/--port [port] 51 | Use port when listening with -a 52 | 53 | .SH CONFIGURATION 54 | 55 | This daemon manages its own configuration, and should be manipulated through 56 | canto-remote or a client, like canto-curses. 57 | 58 | .SH PLUGINS 59 | 60 | Plugins are packaged in /canto/plugins (i.e. /usr/lib/canto/plugins) 61 | and can be enabled by copying them into your configuration directory (i.e. 62 | $XDG_CONFIG_HOME/canto/plugins). Many of them have configuration in them, so 63 | you may want to open the files to make changes. Here are a few example plugins 64 | included with the canto-daemon source. 65 | 66 | .TP 67 | reddit.py 68 | Add content to reddit items. Includes a sort 69 | .B "reddit_score_sort" 70 | that will automatically order items by upvotes. 71 | 72 | .TP 73 | sync-rsync.py 74 | An rsync based sync plugin. See header for configuration, can be used to sync 75 | with a remote server via SSH, or with another local filesystem (for 76 | NFS/sshfs/Dropbox/gdrive etc.) 77 | 78 | .TP 79 | sync-inoreader.py 80 | Synchronize (most) content with inoreader.com, a service with support for RSS 81 | on the web as well as various mobile devices. 82 | 83 | .SH FILES 84 | 85 | .TP 86 | .I $XDG_CONFIG_HOME/canto/daemon-log 87 | 88 | Canto-daemon log file. 89 | 90 | .TP 91 | .I $XDG_CONFIG_HOME/canto/plugins/ 92 | 93 | Canto-daemon plugins directory 94 | 95 | .TP 96 | .I /usr/lib/canto/plugins 97 | Default packaged plugin directory. 98 | 99 | .SH HOMEPAGE 100 | http://codezen.org/canto-ng 101 | 102 | .SH AUTHOR 103 | Jack Miller 104 | 105 | .SH SEE ALSO 106 | .BR canto-remote (1) 107 | -------------------------------------------------------------------------------- /canto_next/client.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | from .protocol import CantoSocket 11 | from .hooks import call_hook 12 | 13 | import logging 14 | import fcntl 15 | import errno 16 | import time 17 | import sys 18 | import os 19 | 20 | log = logging.getLogger("CLIENT") 21 | 22 | class CantoClient(CantoSocket): 23 | def __init__(self, socket_name, **kwargs): 24 | kwargs["server"] = False 25 | CantoSocket.__init__(self, socket_name, **kwargs) 26 | 27 | def connect(self): 28 | conn = CantoSocket.connect(self) 29 | call_hook("client_new_socket", [conn]) 30 | return conn 31 | 32 | 33 | # Test whether we can lock the pidfile, and if we can, fork the daemon 34 | # with the proper arguments. 35 | 36 | def start_daemon(self): 37 | pidfile = self.conf_dir + "/pid" 38 | if os.path.exists(pidfile) and os.path.isfile(pidfile): 39 | try: 40 | pf = open(pidfile, "a+") 41 | fcntl.flock(pf.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) 42 | if os.path.exists(self.socket_path): 43 | os.unlink(self.socket_path) 44 | fcntl.flock(pf.fileno(), fcntl.LOCK_UN) 45 | pf.close() 46 | except IOError as e: 47 | if e.errno == errno.EAGAIN: 48 | # If we failed to get a lock, then the daemon is running 49 | # and we're done. 50 | return 51 | 52 | pid = os.fork() 53 | if not pid: 54 | # Shutup any log output before canto-daemon sets up it's log 55 | # (particularly the error that one is already running) 56 | 57 | fd = os.open("/dev/null", os.O_RDWR) 58 | os.dup2(fd, sys.stderr.fileno()) 59 | 60 | cmd = "canto-daemon -D " + self.conf_dir 61 | if self.verbosity > 0: 62 | cmd += " -" + ("v" * self.verbosity) 63 | 64 | os.setpgid(os.getpid(), os.getpid()) 65 | os.execve("/bin/sh", ["/bin/sh", "-c", cmd], os.environ) 66 | 67 | # Should never get here, but just in case. 68 | sys.exit(-1) 69 | 70 | while not os.path.exists(self.socket_path): 71 | time.sleep(0.1) 72 | 73 | return pid 74 | 75 | # Write a (cmd, args) 76 | def write(self, cmd, args, conn=0): 77 | return self.do_write(self.sockets[conn], cmd, args) 78 | 79 | # Read a (cmd, args) 80 | def read(self, timeout=None, conn=0): 81 | return self.do_read(self.sockets[conn], timeout) 82 | -------------------------------------------------------------------------------- /canto_next/format.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | import logging 11 | 12 | log = logging.getLogger("FORMAT") 13 | 14 | def get_formatter(fmt, keys): 15 | l = len(fmt) 16 | def formatter(dct): 17 | s = "" 18 | i = 0 19 | while i < l: 20 | if fmt[i] == '%': 21 | i += 1 22 | code = fmt[i] 23 | if code not in keys: 24 | i += 1 25 | continue 26 | key = keys[code] 27 | if key not in dct: 28 | i += 1 29 | continue 30 | s += repr(dct[key]) 31 | elif fmt[i] == '\\': 32 | s += fmt[i + 1] 33 | i += 1 34 | else: 35 | s += fmt[i] 36 | i += 1 37 | return s 38 | return formatter 39 | 40 | def escsplit(arg, delim, maxsplit=0, minsplit=0, escapeterms=False): 41 | r = [] 42 | acc = "" 43 | escaped = False 44 | skipchars = 0 45 | 46 | for i, c in enumerate(arg): 47 | if skipchars > 0: 48 | skipchars -= 1 49 | continue 50 | 51 | if escaped: 52 | escaped = False 53 | acc += c 54 | 55 | # We append the escape character because we just want to intelligently 56 | # split, not unescape the components 57 | 58 | elif c == '\\': 59 | escaped = True 60 | 61 | # Don't unescape things that may need to be split again. Most 62 | # notably canto-remote splitting on = and then on . 63 | 64 | if not escapeterms: 65 | acc += c 66 | 67 | elif c == delim[0]: 68 | 69 | # If this is a multi-char delimiter that doesn't match 70 | # keep the character and move on. 71 | 72 | if len(delim) > 1 and\ 73 | arg[i : i+len(delim)] != delim: 74 | acc += c 75 | continue 76 | 77 | # If we have matched a 1 or multi-char delimiter we need 78 | # to skip the remaining characters. 79 | 80 | else: 81 | skipchars = len(delim) - 1 82 | 83 | r.append(acc) 84 | acc = "" 85 | 86 | # Last split? 87 | if maxsplit == 1: 88 | r.append(arg[i + 1:]) 89 | break 90 | elif maxsplit > 1: 91 | maxsplit -= 1 92 | else: 93 | acc += c 94 | else: 95 | # Get last frag, if we didn't maxout. 96 | r.append(acc) 97 | 98 | if minsplit > 0 and len(r) < (minsplit + 1): 99 | r += [ None ] * ((minsplit + 1) - len(r)) 100 | 101 | return r 102 | -------------------------------------------------------------------------------- /man/canto-remote.1: -------------------------------------------------------------------------------- 1 | .TH Canto-remote 1 "Canto-remote" 2 | 3 | .SH NAME 4 | Canto-remote \- A basic remote for canto-daemon 5 | 6 | This provides some capability to configure and query canto-daemon without a full client. Useful for scripting or interfacing with other programs. 7 | 8 | .SH COMMAND LINE USAGE 9 | 10 | canto-remote (options) [command] (arguments) 11 | 12 | .SH OPTIONS 13 | 14 | .TP 15 | \-V 16 | Print version info and exit. 17 | 18 | .TP 19 | \-h/--help 20 | Print usage help 21 | 22 | .TP 23 | \-D/--dir [directory] 24 | Change base directory for canto-daemon (default: $XDG_CONFIG_HOME/canto) 25 | 26 | .TP 27 | \-\-noplugins 28 | Disable all plugins 29 | 30 | .TP 31 | \-\-enableplugins 'plugin1 plugin2...' 32 | Enable these plugins (overrides --noplugins) 33 | 34 | .TP 35 | \-\-disableplugins 'plugin1 plugin2...' 36 | Disable these plugins 37 | 38 | .TP 39 | \-a/--address [IP] 40 | Attempt to connect to address instead of local Unix socket, defaults to 41 | loopback (127.0.0.1) 42 | 43 | .TP 44 | \-p/--port [port] 45 | Use port when connecting with -a 46 | 47 | .SH COMMANDS 48 | 49 | Canto-remote supports various commands to configure canto-daemon, although 50 | using a real client is recommended. 51 | 52 | .TP 53 | .B help [command] 54 | Get help on a specific command 55 | 56 | .TP 57 | .B addfeed [URL] (username=user password=secret) (name="Some name") 58 | Subscribe to a feed, optionally with special arguments. If no name is 59 | specified, the feed's title will be used. 60 | 61 | .TP 62 | .B listfeeds 63 | List all subscribed feeds 64 | 65 | .TP 66 | .B delfeed [name or listfeeds index] 67 | Unsubscribe from a feed 68 | 69 | .TP 70 | .B status (--tag=tag) (--read|--total|--tags) 71 | Print the number of unread items, optionally restricted to a specific tag, 72 | --read prints read items, --total prints total items, --tags prints the value 73 | for all tags. 74 | 75 | NOTE: This is still subject to filters, so if you're filtering all read items, 76 | --read will never return anything but 0. 77 | 78 | .TP 79 | .B force-update 80 | Refetch all feeds, regardless of timestamps 81 | 82 | .TP 83 | .B config (="value") 84 | Change a configuration variable 85 | .br 86 | .B (You probably don't want to do this by hand, use a client) 87 | 88 | .TP 89 | .B one-config (= value) 90 | Change a single configuration variable, assuming the entire line is one setting. 91 | .br 92 | .B (You probably don't want to do this by hand, use a client) 93 | 94 | .TP 95 | .B export 96 | .br 97 | Write an OPML formatted list of feeds to stdout 98 | .br 99 | (i.e. canto-remote export > feedlist.opml) 100 | 101 | .TP 102 | .B import 103 | .br 104 | Import an OPML formatted list of feeds from stdin 105 | .br 106 | (i.e. canto-remote import < feedlist.opml) 107 | 108 | .TP 109 | .B kill 110 | .br 111 | Kill the daemon you're connecting to 112 | 113 | .TP 114 | .B script 115 | .br 116 | Run a script (testing code) 117 | 118 | .TP 119 | NOTE: Plugins can also add remote commands, like 'sync', which will also be 120 | listed with --help (or no arguments). 121 | 122 | .SH HOMEPAGE 123 | http://codezen.org/canto-ng 124 | 125 | .SH AUTHOR 126 | Jack Miller 127 | 128 | .SH SEE ALSO 129 | .BR canto-daemon (1) 130 | -------------------------------------------------------------------------------- /canto_next/tag.py: -------------------------------------------------------------------------------- 1 | #Canto - RSS reader backend 2 | # Copyright (C) 2016 Jack Miller 3 | # 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License version 2 as 6 | # published by the Free Software Foundation. 7 | 8 | from .hooks import on_hook, call_hook 9 | from .rwlock import read_lock, write_lock 10 | from .locks import * 11 | 12 | import logging 13 | 14 | log = logging.getLogger("TAG") 15 | 16 | class CantoTags(): 17 | def __init__(self): 18 | self.tags = {} 19 | self.changed_tags = [] 20 | 21 | # Per-tag transforms 22 | self.tag_transforms = {} 23 | 24 | # Extra tag map 25 | # This allows tags to be defined as parts of larger tags. For example, 26 | # Penny Arcade and xkcd could both have the extra "comic" tag which 27 | # could then be used in a filter to implement categories. 28 | 29 | self.extra_tags = {} 30 | 31 | def items_to_tags(self, ids): 32 | tags = [] 33 | for id in ids: 34 | for tag in self.tags: 35 | if id in self.tags[tag] and tag not in tags: 36 | tags.append(tag) 37 | return tags 38 | 39 | def tag_changed(self, tag): 40 | if tag not in self.changed_tags: 41 | self.changed_tags.append(tag) 42 | 43 | def get_tag(self, tag): 44 | if tag in list(self.tags.keys()): 45 | return self.tags[tag] 46 | return [] 47 | 48 | def get_tags(self): 49 | return list(self.tags.keys()) 50 | 51 | def tag_transform(self, tag, transform): 52 | self.tag_transforms[tag] = transform 53 | 54 | def set_extra_tags(self, tag, extra_tags): 55 | self.extra_tags[tag] = extra_tags 56 | 57 | def clear_tags(self): 58 | self.tags = {} 59 | 60 | def reset(self): 61 | self.tag_transforms = {} 62 | self.extra_tags = {} 63 | 64 | # Don't set tag_changed here, because we don't want to alert when a tag 65 | # will probably just be re-populated with identical content. 66 | 67 | # It it isn't, then the add or remove will set it for us. 68 | 69 | self.clear_tags() 70 | 71 | # 72 | # Following must be called with tag_lock held with write 73 | # 74 | 75 | def add_tag(self, id, name): 76 | if name in self.extra_tags: 77 | extras = self.extra_tags[name] 78 | else: 79 | extras = [] 80 | 81 | alladded = [ name ] + extras 82 | 83 | for name in alladded: 84 | # Create tag if no tag exists 85 | if name not in self.tags: 86 | self.tags[name] = [] 87 | call_hook("daemon_new_tag", [[ name ]]) 88 | 89 | # Add to tag. 90 | if id not in self.tags[name]: 91 | self.tags[name].append(id) 92 | self.tag_changed(name) 93 | 94 | def remove_tag(self, id, name): 95 | if name in self.tags and id in self.tags[name]: 96 | self.tags[name].remove(id) 97 | self.tag_changed(name) 98 | 99 | def remove_id(self, id): 100 | for tag in self.tags: 101 | if id in self.tags[tag]: 102 | self.tags[tag].remove(id) 103 | self.tag_changed(tag) 104 | 105 | def apply_transforms(self, tag, tagobj): 106 | from .config import config 107 | # Global transform 108 | if config.global_transform: 109 | tagobj = config.global_transform(tagobj) 110 | 111 | # Tag level transform 112 | if tag in self.tag_transforms and\ 113 | self.tag_transforms[tag]: 114 | tagobj = self.tag_transforms[tag](tagobj) 115 | 116 | return tagobj 117 | 118 | def do_tag_changes(self): 119 | for tag in self.changed_tags: 120 | tagobj = self.get_tag(tag) 121 | 122 | try: 123 | tagobj = self.apply_transforms(tag, tagobj) 124 | except Exception as e: 125 | log.error("Exception applying transforms: %s" % e) 126 | 127 | self.tags[tag] = tagobj 128 | call_hook("daemon_tag_change", [ tag ]) 129 | self.changed_tags = [] 130 | 131 | alltags = CantoTags() 132 | -------------------------------------------------------------------------------- /canto_next/storage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | from .feed import wlock_feeds 11 | from .hooks import call_hook 12 | 13 | import tempfile 14 | import logging 15 | import shutil 16 | import json 17 | import gzip 18 | import time 19 | import os 20 | 21 | log = logging.getLogger("SHELF") 22 | 23 | class CantoShelf(): 24 | def __init__(self, filename): 25 | self.filename = filename 26 | 27 | self.cache = {} 28 | 29 | self.open() 30 | 31 | def check_control_data(self): 32 | if "control" not in self.cache: 33 | self.cache["control"] = {} 34 | 35 | for ctrl_field in ["canto-modified","canto-user-modified"]: 36 | if ctrl_field not in self.cache["control"]: 37 | self.cache["control"][ctrl_field] = 0 38 | 39 | @wlock_feeds 40 | def open(self): 41 | call_hook("daemon_db_open", [self.filename]) 42 | 43 | if not os.path.exists(self.filename): 44 | fp = gzip.open(self.filename, "wt", 9, "UTF-8") 45 | json.dump(self.cache, fp) 46 | fp.close() 47 | else: 48 | fp = gzip.open(self.filename, "rt", 9, "UTF-8") 49 | try: 50 | self.cache = json.load(fp) 51 | except: 52 | log.info("Failed to JSON load, old shelf?") 53 | try: 54 | import shelve 55 | s = shelve.open(self.filename, "r") 56 | for key in s: 57 | self.cache[key] = s[key] 58 | except Exception as e: 59 | log.error("Failed to migrate old shelf: %s", e) 60 | try: 61 | f = open(self.filename) 62 | data = f.read() 63 | f.close() 64 | log.error("BAD DATA: [%s]" % data) 65 | except Exception as e: 66 | log.error("Couldn't even read data? %s" % e) 67 | pass 68 | log.error("Carrying on with empty shelf") 69 | self.cache = {} 70 | else: 71 | log.info("Migrated old shelf") 72 | finally: 73 | fp.close() 74 | 75 | self.check_control_data() 76 | 77 | def __setitem__(self, name, value): 78 | self.cache[name] = value 79 | self.update_mod() 80 | 81 | def __getitem__(self, name): 82 | return self.cache[name] 83 | 84 | def __contains__(self, name): 85 | return name in self.cache 86 | 87 | def __delitem__(self, name): 88 | if name in self.cache: 89 | del self.cache[name] 90 | self.update_mod() 91 | 92 | def update_umod(self): 93 | if "control" not in self.cache: 94 | self.cache["control"] = self.cache['control'] 95 | 96 | ts = int(time.mktime(time.gmtime())) 97 | self.cache["control"]["canto-user-modified"] = ts 98 | self.cache["control"]["canto-modified"] = ts 99 | 100 | def update_mod(self): 101 | if "control" not in self.cache: 102 | self.cache["control"] = self.cache['control'] 103 | 104 | ts = int(time.mktime(time.gmtime())) 105 | self.cache["control"]["canto-modified"] = ts 106 | 107 | @wlock_feeds 108 | def sync(self): 109 | 110 | # If we get a sync after we're closed, or before we're open 111 | # just ignore it. 112 | 113 | if self.cache == {}: 114 | return 115 | 116 | f, tmpname = tempfile.mkstemp("", "feeds", os.path.dirname(self.filename)) 117 | os.close(f) 118 | 119 | fp = gzip.open(tmpname, "wt", 9, "UTF-8") 120 | json.dump(self.cache, fp, indent=4, sort_keys=True) 121 | fp.close() 122 | 123 | log.debug("Written tempfile.") 124 | 125 | shutil.move(tmpname, self.filename) 126 | 127 | log.debug("Synced.") 128 | 129 | def close(self): 130 | log.debug("Closing.") 131 | self.sync() 132 | self.cache = {} 133 | call_hook("daemon_db_close", [self.filename]) 134 | -------------------------------------------------------------------------------- /canto_next/rwlock.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #Canto - RSS reader backend 3 | # Copyright (C) 2016 Jack Miller 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | # Seriously python? No RWlock? 10 | 11 | from threading import RLock, current_thread 12 | import traceback 13 | import time 14 | 15 | import logging 16 | log = logging.getLogger("RWLOCK") 17 | 18 | alllocks = [] 19 | 20 | class RWLock(object): 21 | def __init__(self, name=""): 22 | self.name = name 23 | self.readers = 0 24 | self.reader_stacks = [] 25 | self.lock = RLock() 26 | self.reader_lock = RLock() 27 | 28 | self.writer_stacks = [] 29 | self.writer_id = 0 30 | 31 | alllocks.append(self) 32 | 33 | def acquire_read(self, block=True): 34 | 35 | # Hold reader_lock to see if we've already actually got this lock. 36 | 37 | r = self.reader_lock.acquire(block) 38 | if not r: 39 | return r 40 | 41 | cti = current_thread().ident 42 | if cti == self.writer_id or cti in [ x[0] for x in self.reader_stacks ]: 43 | self.readers += 1 44 | self.reader_stacks.append((current_thread().ident, traceback.format_stack())) 45 | self.reader_lock.release() 46 | return True 47 | 48 | # Release the lock so that if we block on getting the main lock, other 49 | # threads can still perform the above check and release_read(). 50 | 51 | self.reader_lock.release() 52 | 53 | # Get full lock so writers can keep us from getting a lock we don't 54 | # already hold. 55 | 56 | r = self.lock.acquire(block) 57 | if not r: 58 | return r 59 | 60 | # Re-acquire reader_lock so we can manipulate the vars. 61 | 62 | self.reader_lock.acquire() 63 | 64 | self.readers += 1 65 | self.reader_stacks.append((current_thread().ident, traceback.format_stack())) 66 | 67 | # Release everything. 68 | 69 | self.reader_lock.release() 70 | self.lock.release() 71 | return True 72 | 73 | def release_read(self): 74 | last = False 75 | 76 | self.reader_lock.acquire() 77 | self.readers -= 1 78 | 79 | for tup in reversed(self.reader_stacks[:]): 80 | if tup[0] == current_thread().ident: 81 | self.reader_stacks.remove(tup) 82 | break 83 | 84 | if self.readers == 0: 85 | last = True 86 | 87 | self.reader_lock.release() 88 | return last 89 | 90 | def acquire_write(self, block=True): 91 | r = self.lock.acquire(block) 92 | 93 | if not r: 94 | return r 95 | 96 | self.writer_stacks.append(traceback.format_stack()) 97 | self.writer_id = current_thread().ident; 98 | 99 | warned = False 100 | 101 | while self.readers > 0: 102 | if current_thread().ident in [ x[0] for x in self.reader_stacks ]: 103 | if not warned: 104 | log.debug("WARN: %s holds read, trying to get write on %s", 105 | current_thread().ident, self.name) 106 | warned = True 107 | 108 | # Break the deadlock if we're the last reader 109 | if len(self.reader_stacks) == 1: 110 | break 111 | 112 | time.sleep(0.1) 113 | return True 114 | 115 | def release_write(self): 116 | last = False 117 | 118 | self.writer_stacks = self.writer_stacks[0:-1] 119 | if self.writer_stacks == []: 120 | self.writer_id = 0 121 | last = True 122 | 123 | self.lock.release() 124 | 125 | return last 126 | 127 | def read_lock(lock): 128 | def _rlock_fn(fn): 129 | def _rlock(*args, **kwargs): 130 | lock.acquire_read() 131 | try: 132 | return fn(*args, **kwargs) 133 | finally: 134 | lock.release_read() 135 | return _rlock 136 | return _rlock_fn 137 | 138 | def write_lock(lock): 139 | def _wlock_fn(fn): 140 | def _wlock(*args, **kwargs): 141 | lock.acquire_write() 142 | try: 143 | return fn(*args, **kwargs) 144 | finally: 145 | lock.release_write() 146 | return _wlock 147 | return _wlock_fn 148 | -------------------------------------------------------------------------------- /canto_next/server.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | from .protocol import CantoSocket 11 | from .hooks import call_hook 12 | 13 | from socket import SHUT_RDWR 14 | from threading import Thread, Lock 15 | import traceback 16 | import logging 17 | import select 18 | 19 | log = logging.getLogger("SERVER") 20 | 21 | class CantoServer(CantoSocket): 22 | def __init__(self, socket_name, dispatch, **kwargs): 23 | kwargs["server"] = True 24 | CantoSocket.__init__(self, socket_name, **kwargs) 25 | self.dispatch = dispatch 26 | self.conn_thread = None 27 | 28 | self.connections_lock = Lock() 29 | self.connections = [] # (socket, thread) tuples 30 | self.alive = True 31 | 32 | self.start_conn_loop() 33 | 34 | # Endlessly consume data from the connection. If there's enough data 35 | # for a complete command, toss it on the shared Queue.Queue 36 | 37 | def read_loop(self, conn): 38 | try: 39 | while self.alive: 40 | d = self.do_read(conn) 41 | if d: 42 | if d == select.POLLHUP: 43 | log.info("Connection ended.") 44 | return 45 | self.dispatch(conn, d) 46 | except Exception as e: 47 | tb = traceback.format_exc() 48 | log.error("Response thread dead on exception:") 49 | log.error("\n" + "".join(tb)) 50 | return 51 | 52 | # Sit and select for connections on sockets: 53 | 54 | def conn_loop(self, sockets): 55 | while self.alive: 56 | try: 57 | # select with a timeout so we can check we're still alive 58 | r, w, x = select.select(sockets, [], sockets, 1) 59 | for s in sockets: 60 | # If socket is readable, it's got a pending connection. 61 | if s in r: 62 | conn = s.accept() 63 | log.info("conn %s from sock %s" % (conn, s)) 64 | self.accept_conn(conn[0]) 65 | except Exception as e: 66 | tb = traceback.format_exc() 67 | log.error("Connection monitor exception:") 68 | log.error("\n" + "".join(tb)) 69 | log.error("Attempting to continue.") 70 | 71 | def start_conn_loop(self): 72 | self.conn_thread = Thread(target = self.conn_loop, 73 | args = (self.sockets,), name = "Connection Monitor") 74 | self.conn_thread.daemon = True 75 | self.conn_thread.start() 76 | log.debug("Spawned connection monitor thread.") 77 | 78 | # Remove dead connection threads. 79 | 80 | def no_dead_conns(self): 81 | self.connections_lock.acquire() 82 | for c, t in self.connections[:]: 83 | if not t.is_alive(): 84 | call_hook("server_kill_socket", [c]) 85 | t.join() 86 | c.close() 87 | self.connections.remove((c, t)) 88 | if self.connections == []: 89 | call_hook("server_no_connections", []) 90 | self.connections_lock.release() 91 | 92 | def accept_conn(self, conn): 93 | self.read_locks[conn] = Lock() 94 | self.write_locks[conn] = Lock() 95 | self.write_frags[conn] = None 96 | 97 | # Notify watchers about new socket. 98 | call_hook("server_new_socket", [conn]) 99 | 100 | self.connections_lock.acquire() 101 | 102 | self.connections.append((conn,\ 103 | Thread(target = self.read_loop,\ 104 | args = (conn,), name="Connection #%s" %\ 105 | (len(self.connections))) 106 | )) 107 | 108 | self.connections[-1][1].daemon = True 109 | self.connections[-1][1].start() 110 | 111 | if len(self.connections) == 1: 112 | call_hook("server_first_connection", []) 113 | 114 | self.connections_lock.release() 115 | 116 | log.debug("Spawned new thread.") 117 | 118 | # Write a (cmd, args) to a single connection. 119 | def write(self, conn, cmd, args): 120 | if not conn: 121 | return None 122 | return self.do_write(conn, cmd, args) 123 | 124 | # Write a (cmd, args) to every connection. 125 | def write_all(self, cmd, args): 126 | self.no_dead_conns() 127 | 128 | self.connections_lock.acquire() 129 | for conn, t in self.connections: 130 | self.do_write(conn, cmd, args) 131 | self.connections_lock.release() 132 | 133 | def exit(self): 134 | self.alive = False 135 | self.conn_thread.join() 136 | 137 | # No locking, as we should already be single-threaded 138 | 139 | for conn, t in self.connections: 140 | conn.shutdown(SHUT_RDWR) 141 | conn.close() 142 | -------------------------------------------------------------------------------- /canto_next/plugins.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | import traceback 11 | import logging 12 | import sys 13 | import os 14 | 15 | log = logging.getLogger("PLUGINS") 16 | 17 | class CantoWrongProgramException(Exception): 18 | pass 19 | 20 | PROGRAM="unset" 21 | 22 | def set_program(program_name): 23 | global PROGRAM 24 | PROGRAM=program_name 25 | 26 | def check_program(*args): 27 | global PROGRAM 28 | if PROGRAM not in args: 29 | raise CantoWrongProgramException 30 | 31 | def try_plugins(topdir, plugin_default=True, disabled_plugins=[], enabled_plugins=[]): 32 | p = topdir + "/plugins" 33 | pinit = p + "/__init__.py" 34 | 35 | if not os.path.exists(p): 36 | log.info("Creating plugins directory.") 37 | try: 38 | os.mkdir(p) 39 | except Exception as e: 40 | tb = traceback.format_exc() 41 | log.error("Exception creating plugin directory") 42 | log.error("\n" + "".join(tb)) 43 | return 44 | elif not os.path.isdir(p): 45 | log.warn("Plugins file is not directory.") 46 | return 47 | 48 | if not os.path.exists(pinit): 49 | log.info("Creating plugin __init__.py") 50 | try: 51 | f = open(pinit, "w") 52 | f.close() 53 | except Exception as e: 54 | tb = traceback.format_exc() 55 | log.error("Exception creating plugin __init__.py") 56 | log.error("\n" + "".join(tb)) 57 | return 58 | 59 | # Add plugin path to front of Python path. 60 | sys.path.insert(0, topdir) 61 | 62 | all_errors = "" 63 | 64 | # Go ahead and import all .py 65 | for fname in sorted(os.listdir(p)): 66 | if fname.endswith(".py") and fname != "__init__.py": 67 | try: 68 | proper = fname[:-3] 69 | 70 | if plugin_default: 71 | if proper in disabled_plugins: 72 | log.info("[plugin] %s - DISABLED" % proper) 73 | else: 74 | __import__("plugins." + proper) 75 | log.info("[plugin] %s" % proper) 76 | else: 77 | if proper in enabled_plugins: 78 | __import__("plugins." + proper) 79 | log.info("[plugin] %s - ENABLED" % proper) 80 | else: 81 | log.info("[plugin] %s - DISABLED" % proper) 82 | except CantoWrongProgramException: 83 | pass 84 | except Exception as e: 85 | tb = traceback.format_exc() 86 | log.error("Exception importing file %s" % fname) 87 | nice = "".join(tb) 88 | all_errors += nice 89 | log.error(nice) 90 | 91 | if all_errors != "": 92 | return all_errors 93 | 94 | class PluginHandler(object): 95 | def __init__(self): 96 | self.plugin_attrs = {} 97 | 98 | def update_plugin_lookups(self): 99 | # Populate a dict of overridden attributes 100 | 101 | self.plugin_attrs = {} 102 | 103 | self.plugin_class_instances =\ 104 | [ c(self) for c in self.plugin_class.__subclasses__() ] 105 | 106 | for iclass in self.plugin_class_instances[:]: 107 | try: 108 | # Warn if we're overriding a previously defined plugin attr 109 | for iclass_attr in list(iclass.plugin_attrs.keys()): 110 | if iclass_attr in self.plugin_attrs: 111 | log.warn("Multiply defined plugin attribute!: %s" %\ 112 | iclass_attr) 113 | 114 | self.plugin_attrs.update(iclass.plugin_attrs) 115 | except Exception as e: 116 | log.error("Error initializing plugins:") 117 | log.error(traceback.format_exc()) 118 | 119 | # Malformed plugins removed from instances 120 | self.plugin_class_instances.remove(iclass) 121 | continue 122 | 123 | def __getattribute__(self, name): 124 | if name == "plugin_attrs" or name not in self.plugin_attrs: 125 | return object.__getattribute__(self, name) 126 | return self.plugin_attrs[name] 127 | 128 | # Plugin is the base class for all of the separate plugin classes for each Gui 129 | # object. There are two reasons to pin plugins to an empty class: 130 | # 131 | # - 'object' in the hierarchy via PluginHandler means we can use 132 | # __subclasses__, the cornerstone of the plugins system 133 | # 134 | # - This allows the plugins to have a hard distinction between self (the 135 | # instantiated class object) and obj (the instantiated main object that's 136 | # being overridden). This means that plugins don't have to worry about 137 | # clobbering anything. 138 | # 139 | # As a side effect, using the separate plugin architecture, we also can 140 | # enable/disable pluggability on a class basis. For example, if TagList 141 | # didn't specify a plugin_class, then it could not be overridden or hooked. 142 | 143 | class Plugin(object): 144 | pass 145 | -------------------------------------------------------------------------------- /plugins/reddit.py: -------------------------------------------------------------------------------- 1 | # Canto Reddit Plugin 2 | # by Jack Miller 3 | # v1.2 4 | # 5 | # If this is placed in the plugins directory, it will add a new sort: 6 | # reddit_score_sort, and will add "score [subreddit]" to the beginning of 7 | # every relevant feed item. 8 | 9 | # PREPEND_SCORE, if true will add the score to the entry title. Note, this 10 | # doesn't effect the sort. 11 | 12 | PREPEND_SCORE = True 13 | 14 | # PREPEND_SUBREDDIT, if true will add the [subreddit] to the entry title. 15 | 16 | PREPEND_SUBREDDIT = True 17 | 18 | # EXTRA_LOG_OUTPUT, if true will log non-error stuff with -v. 19 | 20 | EXTRA_LOG_OUTPUT = False 21 | 22 | # You shouldn't have to change anything beyond this line. 23 | 24 | from canto_next.plugins import check_program 25 | 26 | check_program("canto-daemon") 27 | 28 | from canto_next.fetch import DaemonFetchThreadPlugin 29 | from canto_next.feed import DaemonFeedPlugin 30 | from canto_next.transform import transform_locals, CantoTransform 31 | 32 | import urllib.request, urllib.error, urllib.parse 33 | import logging 34 | import time 35 | import json 36 | import re 37 | 38 | log = logging.getLogger("REDDIT") 39 | 40 | def debug(message): 41 | if EXTRA_LOG_OUTPUT: 42 | log.debug(message) 43 | 44 | keep_attrs = [ "score", "subreddit" ] 45 | 46 | class RedditFetchJSON(DaemonFetchThreadPlugin): 47 | def __init__(self, fetch_thread): 48 | self.plugin_attrs = { 49 | "fetch_redditJSON" : self.fetch_redditJSON, 50 | } 51 | 52 | self.comment_id_regex = re.compile(".*comments/([^/]*)/.*") 53 | self.tb_id_regex = re.compile(".*tb/([^/]*)") 54 | 55 | def fetch_redditJSON(self, **kwargs): 56 | if "reddit.com" not in kwargs["feed"].URL: 57 | return 58 | 59 | # Get the feed's JSON 60 | try: 61 | json_url = kwargs["feed"].URL.replace("/.rss","/.json") 62 | req = urllib.request.Request(json_url, headers = { "User-Agent" : "Canto-Reddit-Plugin"}) 63 | response = urllib.request.urlopen(req, None, 10) 64 | reddit_json = json.loads(response.read().decode()) 65 | except Exception as e: 66 | log.error("Error fetching Reddit JSON: %s" % e) 67 | return 68 | 69 | for entry in kwargs["newcontent"]["entries"]: 70 | m = self.comment_id_regex.match(entry["link"]) 71 | if not m: 72 | m = self.tb_id_regex.match(entry["link"]) 73 | if not m: 74 | debug("Couldn't find ID in %s ?!" % entry["link"]) 75 | continue 76 | m = "t3_" + m.groups()[0] 77 | 78 | for rj in reddit_json["data"]["children"]: 79 | if rj["data"]["name"] == m: 80 | debug("Found m=%s" % m) 81 | 82 | d = { "data" : {}} 83 | for attr in keep_attrs: 84 | if attr in rj["data"]: 85 | d["data"][attr] = rj["data"][attr] 86 | 87 | entry["reddit-json"] = d 88 | break 89 | else: 90 | debug("Couldn't find m= %s" % m) 91 | 92 | class RedditScoreSort(CantoTransform): 93 | def __init__(self): 94 | self.name = "Reddit Score Sort" 95 | 96 | def needed_attributes(self, tag): 97 | return [ "reddit-score" ] 98 | 99 | def transform(self, items, attrs): 100 | scored = [] 101 | unscored = [] 102 | 103 | for item in items: 104 | if "reddit-score" in attrs[item]: 105 | 106 | # For some reason, reddit-score has been parsed as a string 107 | # some times. Attempt to coerce. 108 | 109 | if not type(attrs[item]["reddit-score"]) == int: 110 | try: 111 | attrs[item]["reddit-score"] =\ 112 | int(attrs[item]["reddit-score"]) 113 | except: 114 | unscored.append(item) 115 | else: 116 | scored.append(item) 117 | else: 118 | scored.append(item) 119 | else: 120 | unscored.append(item) 121 | 122 | scored = [ (attrs[x]["reddit-score"], x) for x in scored ] 123 | scored.sort() 124 | scored.reverse() 125 | scored = [ x for (s, x) in scored ] 126 | 127 | return scored + unscored 128 | 129 | class RedditAnnotate(DaemonFeedPlugin): 130 | def __init__(self, daemon_feed): 131 | 132 | self.plugin_attrs = { 133 | "edit_reddit" : self.edit_reddit, 134 | } 135 | 136 | def edit_reddit(self, feed, newcontent, tags_to_add, tags_to_remove, remove_items): 137 | for entry in newcontent["entries"]: 138 | if "reddit-json" not in entry: 139 | debug("NO JSON, bailing") 140 | continue 141 | 142 | rj = entry["reddit-json"] 143 | if not rj: 144 | debug("JSON empty, bailing") 145 | continue 146 | 147 | if "subreddit" not in entry: 148 | entry["subreddit"] = rj["data"]["subreddit"] 149 | if PREPEND_SUBREDDIT: 150 | entry["title"] =\ 151 | "[" + entry["subreddit"] + "] " + entry["title"] 152 | 153 | if PREPEND_SCORE: 154 | score = rj["data"]["score"] 155 | if "reddit-score" in entry: 156 | entry["title"] = re.sub("^\d+ ", "", entry["title"]) 157 | 158 | entry["reddit-score"] = score 159 | entry["title"] =\ 160 | ("%d " % entry["reddit-score"]) + entry["title"] 161 | 162 | return (tags_to_add, tags_to_remove, remove_items) 163 | 164 | transform_locals["reddit_score_sort"] = RedditScoreSort() 165 | -------------------------------------------------------------------------------- /canto_next/fetch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #Canto - RSS reader backend 3 | # Copyright (C) 2016 Jack Miller 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | from .plugins import PluginHandler, Plugin 10 | from .feed import allfeeds 11 | from .hooks import call_hook 12 | 13 | from multiprocessing import cpu_count 14 | from threading import Thread 15 | 16 | import feedparser 17 | import traceback 18 | import urllib.parse 19 | import urllib.request 20 | import urllib.error 21 | import logging 22 | import socket 23 | import json 24 | import time 25 | 26 | log = logging.getLogger("CANTO-FETCH") 27 | 28 | # Function to pass to json.dumps to strip non-serializable data 29 | def json_ignore(x): 30 | return None 31 | 32 | class DaemonFetchThreadPlugin(Plugin): 33 | pass 34 | 35 | # This is the first time I've ever had a need for multiple inheritance. 36 | # I'm not sure if that's a good thing or not =) 37 | 38 | class CantoFetchThread(PluginHandler, Thread): 39 | def __init__(self, feed, fromdisk): 40 | PluginHandler.__init__(self) 41 | Thread.__init__(self, name="Fetch: %s" % feed.URL) 42 | self.daemon = True 43 | 44 | self.plugin_class = DaemonFetchThreadPlugin 45 | self.update_plugin_lookups() 46 | 47 | # feedparser honors this value, want to avoid hung feeds when the 48 | # internet connection is flaky 49 | 50 | socket.setdefaulttimeout(30) 51 | 52 | self.feed = feed 53 | self.fromdisk = fromdisk 54 | 55 | def run(self): 56 | 57 | # Initial load, just feed.index grab from disk. 58 | 59 | if self.fromdisk: 60 | self.feed.index({"entries" : []}) 61 | return 62 | 63 | self.feed.last_update = time.time() 64 | 65 | # Otherwise, actually try to get an update. 66 | 67 | extra_headers = { 'User-Agent' :\ 68 | 'Canto/0.9.0 + http://codezen.org/canto-ng'} 69 | 70 | try: 71 | result = None 72 | # Passworded Feed 73 | if self.feed.username or self.feed.password: 74 | domain = urllib.parse.urlparse(self.feed.URL)[1] 75 | man = urllib.request.HTTPPasswordMgrWithDefaultRealm() 76 | auth = urllib.request.HTTPBasicAuthHandler(man) 77 | auth.handler_order = 490 78 | auth.add_password(None, domain, self.feed.username, 79 | self.feed.password) 80 | 81 | try: 82 | result = feedparser.parse(self.feed.URL, handlers=[auth], 83 | request_headers = extra_headers) 84 | except: 85 | # And, failing that, Digest Authentication 86 | man = urllib.request.HTTPPasswordMgrWithDefaultRealm() 87 | auth = urllib.request.HTTPDigestAuthHandler(man) 88 | auth.handler_order = 490 89 | auth.add_password(None, domain, self.feed.username, 90 | self.feed.password) 91 | result = feedparser.parse(self.feed.URL, handlers=[auth], 92 | request_headers = extra_headers) 93 | 94 | # No password 95 | else: 96 | result = feedparser.parse(self.feed.URL, 97 | request_headers = extra_headers) 98 | 99 | update_contents = result 100 | except Exception as e: 101 | log.error("ERROR: try to parse %s, got %s" % (self.feed.URL, e)) 102 | return 103 | 104 | # Allow DaemonFetchThreadPlugins to do any sort of fetch stuff Doing 105 | # this before any other processing allows us to have plugins that 106 | # totally override the standard fetch. 107 | 108 | for attr in list(self.plugin_attrs.keys()): 109 | if not attr.startswith("fetch_"): 110 | continue 111 | 112 | try: 113 | a = getattr(self, attr) 114 | a(feed = self.feed, newcontent = update_contents) 115 | except: 116 | log.error("Error running fetch thread plugin") 117 | log.error(traceback.format_exc()) 118 | 119 | log.debug("Plugins complete.") 120 | 121 | # Interpret feedparser's bozo_exception, if there was an 122 | # error that resulted in no content, it's the same as 123 | # any other broken feed. 124 | 125 | if "bozo_exception" in update_contents: 126 | if update_contents["bozo_exception"] == urllib.error.URLError: 127 | log.error("ERROR: couldn't grab %s : %s" %\ 128 | (self.feed.URL,\ 129 | update_contents["bozo_exception"].reason)) 130 | return 131 | elif len(update_contents["entries"]) == 0: 132 | log.error("No content in %s: %s" %\ 133 | (self.feed.URL,\ 134 | update_contents["bozo_exception"])) 135 | return 136 | 137 | # Replace it if we ignore it, since exceptions 138 | # are not pickle-able. 139 | 140 | update_contents["bozo_exception"] = None 141 | 142 | # Update timestamp 143 | update_contents["canto_update"] = self.feed.last_update 144 | 145 | update_contents = json.loads(json.dumps(update_contents, default=json_ignore)) 146 | 147 | log.debug("Parsed %s", self.feed.URL) 148 | 149 | # This handles it's own locking 150 | self.feed.index(update_contents) 151 | 152 | class CantoFetch(): 153 | def __init__(self, shelf): 154 | self.shelf = shelf 155 | self.deferred = [] 156 | self.threads = [] 157 | self.thread_limit = cpu_count() 158 | log.debug("Thread Limit: %s", self.thread_limit) 159 | 160 | def needs_update(self, feed): 161 | passed = time.time() - feed.last_update 162 | if passed < feed.rate * 60: 163 | return False 164 | return True 165 | 166 | def still_working(self, URL): 167 | for thread, workingURL in self.threads: 168 | if URL == workingURL: 169 | return True 170 | return False 171 | 172 | def _start_one(self, feed, fromdisk): 173 | if len(self.threads) >= self.thread_limit: 174 | return False 175 | 176 | # If feed is stopped/dead, pretend like we did the work but don't 177 | # resurrect tags 178 | 179 | if feed.stopped: 180 | return True 181 | 182 | thread = CantoFetchThread(feed, fromdisk) 183 | thread.start() 184 | log.debug("Started thread for feed %s", feed) 185 | self.threads.append((thread, feed.URL)) 186 | return True 187 | 188 | def fetch(self, force, fromdisk): 189 | for feed, fd in self.deferred[:]: 190 | if self._start_one(feed, fd): 191 | log.debug("No longer deferred") 192 | self.deferred = self.deferred[1:] 193 | else: 194 | return 195 | 196 | for feed in allfeeds.get_feeds(): 197 | if not force and not self.needs_update(feed): 198 | continue 199 | 200 | if self.still_working(feed.URL): 201 | continue 202 | 203 | if not self._start_one(feed, fromdisk): 204 | log.debug("Deferring %s %s", feed, fromdisk) 205 | self.deferred.append((feed, fromdisk)) 206 | 207 | def reap(self, force=False): 208 | work_done = False 209 | newthreads = [] 210 | 211 | for thread, URL in self.threads: 212 | if not force and thread.is_alive(): 213 | newthreads.append((thread, URL)) 214 | continue 215 | work_done = True 216 | thread.join() 217 | 218 | self.threads = newthreads 219 | 220 | if work_done and self.threads == []: 221 | self.shelf.sync() 222 | -------------------------------------------------------------------------------- /tests/test-feed-index.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from base import * 5 | 6 | from canto_next.feed import CantoFeed, dict_id, allfeeds 7 | from canto_next.tag import alltags 8 | import time 9 | 10 | TEST_URL = "http://example.com/" 11 | DEF_KEEP_TIME = 86400 12 | 13 | class TestFeedIndex(Test): 14 | 15 | # Make sure all items in the feeds have all of their tags... 16 | 17 | def compare_feed_and_tags(self, shelf): 18 | for feed in allfeeds.get_feeds(): 19 | maintag = "maintag:" + feed.name 20 | for entry in shelf[feed.URL]["entries"]: 21 | full_id = feed._item_id(entry) 22 | if maintag not in alltags.items_to_tags([full_id]): 23 | raise Exception("Item %s didn't make it into tag %s" % (entry, maintag)) 24 | 25 | if "canto-tags" in entry: 26 | for tag in entry["canto-tags"]: 27 | if tag not in alltags.items_to_tags([full_id]): 28 | raise Exception("Item %s didn't make it into user tag %s" % (entry, tag)) 29 | 30 | self.compare_tags_and_feeds(shelf) 31 | 32 | # ... and make sure that all tags have a real source in the feeds 33 | 34 | def compare_tags_and_feeds(self, shelf): 35 | for tag in alltags.get_tags(): 36 | for item in alltags.tags[tag]: 37 | URL = dict_id(item)["URL"] 38 | id = dict_id(item)["ID"] 39 | 40 | feed = allfeeds.get_feed(dict_id(item)["URL"]) 41 | if tag.startswith("maintag:") and tag[8:] == feed.name: 42 | continue 43 | 44 | for entry in shelf[feed.URL]["entries"]: 45 | if id == entry["id"] and tag not in entry["canto-tags"]: 46 | raise Exception("Tag %s has no source: %s / %s!" % (tag, feed.name, entry)) 47 | 48 | def generate_update_contents(self, num_items, item_content_template, update_time): 49 | entries = [] 50 | 51 | for i in range(num_items): 52 | c = eval(repr(item_content_template)) 53 | for key in c: 54 | if type(c[key]) == str and "%d" in c[key]: 55 | c[key] = c[key] % i 56 | entries.append(c) 57 | 58 | return { "canto_update" : update_time, "entries" : entries } 59 | 60 | # Generate a feed and shelf 61 | 62 | def generate_baseline(self, feed_name, feed_url, num_items, item_content_template, update_time): 63 | alltags.reset() 64 | allfeeds.reset() 65 | 66 | test_shelf = {} 67 | test_feed = CantoFeed(test_shelf, feed_name, feed_url, 10, DEF_KEEP_TIME, False) 68 | update = self.generate_update_contents(num_items, item_content_template, time.time()) 69 | 70 | test_feed.index(update) 71 | 72 | self.compare_feed_and_tags(test_shelf) 73 | 74 | if "maintag:Test Feed" not in alltags.tags: 75 | raise Exception("Failed to populate maintag") 76 | 77 | if len(alltags.tags["maintag:Test Feed"]) != 100: 78 | raise Exception("Failed to put items in maintag") 79 | 80 | if feed_url not in test_shelf: 81 | raise Exception("Failed to write to shelf") 82 | 83 | if "entries" not in test_shelf[feed_url]: 84 | raise Exception("Failed to get any entries") 85 | 86 | if len(test_shelf[feed_url]["entries"]) != 100: 87 | raise Exception("Failed to record all items") 88 | 89 | test_shelf["canto_update"] = update_time 90 | 91 | for i, entry in enumerate(test_shelf[feed_url]["entries"]): 92 | if "id" not in entry: 93 | raise Exception("Failed to id item %d" % i) 94 | if "canto_update" not in entry: 95 | raise Exception("Failed to record update time on item %d" % i) 96 | 97 | entry["canto_update"] = update_time 98 | 99 | return test_feed, test_shelf, update 100 | 101 | def check(self): 102 | content = { 103 | "title" : "Title %d", 104 | "link" : TEST_URL + "%d/", 105 | } 106 | 107 | update_content = { 108 | "title" : "Title %d updated", 109 | "link" : TEST_URL + "%d/updated", 110 | } 111 | 112 | self.banner("sanity") 113 | 114 | # Index basic sanity checks (should write to shelf, should populate tags) 115 | # All internal to the baseline generator. 116 | 117 | f, s, u = self.generate_baseline("Test Feed", "http://example.com", 100, content, time.time()) 118 | 119 | self.banner("discard") 120 | 121 | now = time.time() 122 | test_feed, test_shelf, first_update = self.generate_baseline("Test Feed", TEST_URL, 100, content, now - (DEF_KEEP_TIME + 1)) 123 | 124 | second_update = self.generate_update_contents(100, update_content, now) 125 | 126 | # Keep some items from the first update 127 | 128 | second_update["entries"].extend(first_update["entries"][:5]) 129 | 130 | test_feed.index(second_update) 131 | 132 | self.compare_feed_and_tags(test_shelf) 133 | 134 | tag = alltags.tags["maintag:Test Feed"] 135 | nitems = len(tag) 136 | 137 | if nitems != 105: 138 | raise Exception("Wrong number of items in tag! %d - %s" % (nitems, tag)) 139 | if dict_id(tag[100])["ID"] != "http://example.com/0/": 140 | raise Exception("Failed to keep order got id = %s" % dict_id(tag[100])["ID"]) 141 | 142 | self.banner("keep_time") 143 | 144 | test_feed, test_shelf, first_update = self.generate_baseline("Test Feed", TEST_URL, 100, content, now - 300) 145 | 146 | test_feed.index(second_update) 147 | 148 | self.compare_feed_and_tags(test_shelf) 149 | 150 | tag = alltags.tags["maintag:Test Feed"] 151 | nitems = len(tag) 152 | if nitems != 200: 153 | raise Exception("Wrong number of items in tag! %d - %s" % (nitems, tag)) 154 | if dict_id(tag[0])["ID"] != "http://example.com/0/updated": 155 | raise Exception("Failed to keep order got id = %s" % dict_id(tag[0])["ID"]) 156 | if dict_id(tag[100])["ID"] != "http://example.com/0/": 157 | raise Exception("Failed to keep order got id = %s" % dict_id(tag[100])["ID"]) 158 | if dict_id(tag[199])["ID"] != "http://example.com/99/": 159 | raise Exception("Failed to keep order got id = %s" % dict_id(tag[199])["ID"]) 160 | 161 | self.banner("keep_unread") 162 | 163 | now = time.time() 164 | test_feed, test_shelf, first_update = self.generate_baseline("Test Feed", TEST_URL, 100, content, now - (DEF_KEEP_TIME + 1)) 165 | 166 | test_feed.keep_unread = True 167 | 168 | # Mark five that aren't keep_unread protected, but are too young to discard 169 | for i in range(5): 170 | test_shelf[TEST_URL]["entries"][i]["canto_update"] = now - 300 171 | test_shelf[TEST_URL]["entries"][i]["canto-state"] = [ "read" ] 172 | 173 | # Mark 25 that should be discarded 174 | for i in range(25, 50): 175 | test_shelf[TEST_URL]["entries"][i]["canto-state"] = [ "read" ] 176 | 177 | second_update = self.generate_update_contents(100, update_content, now) 178 | 179 | test_feed.index(second_update) 180 | 181 | self.compare_feed_and_tags(test_shelf) 182 | 183 | tag = alltags.tags["maintag:Test Feed"] 184 | nitems = len(tag) 185 | 186 | if nitems != 175: 187 | raise Exception("Wrong number of items in tag! %d - %s" % (nitems, tag)) 188 | 189 | self.banner("save all items on empty new content") 190 | 191 | test_feed, test_shelf, first_update = self.generate_baseline("Test Feed", TEST_URL, 100, content, now - (DEF_KEEP_TIME + 1)) 192 | 193 | test_feed.index(self.generate_update_contents(0, update_content, now)) 194 | 195 | tag = alltags.tags["maintag:Test Feed"] 196 | nitems = len(tag) 197 | 198 | if nitems != 100: 199 | raise Exception("Wrong number of items in tag! %d - %s" % (nitems, tag)) 200 | 201 | return True 202 | 203 | TestFeedIndex("feed index") 204 | -------------------------------------------------------------------------------- /canto_next/transform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #Canto - RSS reader backend 3 | # Copyright (C) 2016 Jack Miller 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | from .feed import allfeeds 10 | from .tag import alltags 11 | 12 | import logging 13 | import re 14 | 15 | log = logging.getLogger("TRANSFORM") 16 | 17 | transform_locals = { } 18 | 19 | # A Transform is generically any form of manipulation of the number of items 20 | # (filter) or order of those items (sort) based on some criteria. 21 | 22 | # The CantoTransform class serves as the base of all Transforms. It takes the 23 | # elements returned by a class' `needed_attributes()`, populates a dict of 24 | # these elements from cache/disk, and then gives them to the `transform()` call. 25 | 26 | class CantoTransform(): 27 | def __init__(self, name): 28 | self.name = name 29 | 30 | def __str__(self): 31 | return self.name 32 | 33 | # This is called with the feeds already read locked. 34 | 35 | def __call__(self, tag): 36 | a = {} 37 | f = allfeeds.items_to_feeds(tag) 38 | needed = self.needed_attributes(tag) 39 | 40 | for feed in f: 41 | attrs = {} 42 | for i in f[feed]: 43 | attrs[i] = needed 44 | a.update(feed.get_attributes(f[feed], attrs)) 45 | 46 | for item in tag[:]: 47 | if item not in a.keys(): 48 | log.warn("Missing attributes for %s" % item) 49 | tag.remove(item) 50 | 51 | return self.transform(tag, a) 52 | 53 | def needed_attributes(self, tag): 54 | return [] 55 | 56 | def transform(self, items, attrs): 57 | return items 58 | 59 | # A StateFilter will filter out items that match a particular state. Supports 60 | # using "-tag" to indicate to filter out those missing the tag. 61 | 62 | class StateFilter(CantoTransform): 63 | def __init__(self, state): 64 | CantoTransform.__init__(self, "Filter state: %s" % state) 65 | self.state = state 66 | 67 | def needed_attributes(self, tag): 68 | return ["canto-state"] 69 | 70 | def transform(self, items, attrs): 71 | if self.state[0] == "-": 72 | state = self.state[1:] 73 | keep = True 74 | else: 75 | state = self.state 76 | keep = False 77 | 78 | return [ i for i in items if \ 79 | (state in attrs[i]["canto-state"]) == keep] 80 | 81 | # Filter out items whose [attribute] content matches an arbitrary regex. 82 | 83 | class ContentFilterRegex(CantoTransform): 84 | def __init__(self, attribute, regex): 85 | CantoTransform.__init__(self, "Filter %s in %s" % (attribute, regex)) 86 | self.attribute = attribute 87 | try: 88 | self.match = re.compile(regex) 89 | except: 90 | self.match = None 91 | log.error("Couldn't compile regex: %s" % regex) 92 | 93 | def needed_attributes(self, tag): 94 | if not self.match: 95 | return [] 96 | return [ self.attribute ] 97 | 98 | def transform(self, items, attrs): 99 | if not self.match: 100 | return items 101 | 102 | r = [] 103 | for item in items: 104 | a = attrs[item] 105 | if self.attribute not in a: 106 | r.append(item) 107 | continue 108 | if type(a[self.attribute]) != str: 109 | log.error("Can't match non-string!") 110 | continue 111 | 112 | if not self.match.match(a[self.attribute]): 113 | r.append(item) 114 | return r 115 | 116 | # Simple basic-string abstraction of the above. 117 | 118 | class ContentFilter(ContentFilterRegex): 119 | def __init__(self, attribute, string): 120 | string = ".*" + re.escape(string) + ".*" 121 | ContentFilterRegex.__init__(self, attribute, string) 122 | 123 | class SortTransform(CantoTransform): 124 | def __init__(self, name, attr): 125 | CantoTransform.__init__(self, name) 126 | self.attr = attr 127 | 128 | def needed_attributes(self, tag): 129 | return [ self.attr ] 130 | 131 | def transform(self, items, attrs): 132 | r = [ ( attrs[item][self.attr], item ) for item in items ] 133 | r.sort() 134 | return [ item[1] for item in r ] 135 | 136 | # Meta-filter for AND 137 | class AllTransform(CantoTransform): 138 | def __init__(self, *args): 139 | name = "(" 140 | for i, t in enumerate(args): 141 | if i > 0: 142 | name += " AND " 143 | if hasattr(t, "name"): 144 | name += t.name 145 | else: 146 | name += "Unknown" 147 | 148 | name += ")" 149 | CantoTransform.__init__(self, name) 150 | self.transforms = args 151 | 152 | def needed_attributes(self, tag): 153 | needed = [] 154 | for t in self.transforms: 155 | for a in t.needed_attributes(tag): 156 | if a not in needed: 157 | needed.append(a) 158 | return needed 159 | 160 | def transform(self, items, attrs): 161 | good_items = items[:] 162 | for t in self.transforms: 163 | good_items = t.transform(good_items, attrs) 164 | if not good_items: 165 | break 166 | return good_items 167 | 168 | class AnyTransform(CantoTransform): 169 | def __init__(self, *args): 170 | name = "(" 171 | for i, t in enumerate(args): 172 | if i > 0: 173 | name += " OR " 174 | if hasattr(t, "name"): 175 | name += t.name 176 | else: 177 | name += "Unknown" 178 | name += ")" 179 | CantoTransform.__init__(self, name) 180 | self.transforms = args 181 | 182 | def needed_attributes(self, tag): 183 | needed = [] 184 | for t in self.transforms: 185 | for a in t.needed_attributes(tag): 186 | if a not in needed: 187 | needed.append(a) 188 | return needed 189 | 190 | def transform(self, items, attrs): 191 | good_items = [] 192 | per_transform = [] 193 | 194 | for t in self.transforms: 195 | per_transform.append(t.transform(items, attrs)) 196 | 197 | for pt in per_transform: 198 | for item in pt: 199 | if item not in good_items: 200 | good_items.append(item) 201 | return good_items 202 | 203 | class InTags(CantoTransform): 204 | def __init__(self, *args): 205 | name = "in tags: %s" % (args,) 206 | 207 | CantoTransform.__init__(self, name) 208 | self.tags = args 209 | 210 | def needed_attributes(self, tag): 211 | return [] 212 | 213 | def transform(self, items, attrs): 214 | good = [] 215 | 216 | for item in items: 217 | for itag in alltags.items_to_tags([item]): 218 | if itag in self.tags: 219 | good.append(item) 220 | break 221 | 222 | return good 223 | 224 | class ItemLimit(CantoTransform): 225 | def __init__(self, num): 226 | if type(num) != int: 227 | log.error("ItemLimit must be called with a numerical argument") 228 | self.limit = 0 229 | return 230 | else: 231 | self.limit = num 232 | 233 | self.name="Limit %d items" % self.limit 234 | 235 | def transform(self, items, attrs): 236 | # Shortcut if failed init 237 | if self.limit == 0: 238 | return items 239 | 240 | return items[:self.limit] 241 | 242 | # Transform_locals is a list of elements that we pass to the eval() call when 243 | # evaluating a transform line from the config. Passing these into the local 244 | # scope allows simple filters to be created on the fly. 245 | 246 | transform_locals["StateFilter"] = StateFilter 247 | transform_locals["ContentFilterRegex"] = ContentFilterRegex 248 | transform_locals["ContentFilter"] = ContentFilter 249 | transform_locals["All"] = AllTransform 250 | transform_locals["Any"] = AnyTransform 251 | transform_locals["InTags"] = InTags 252 | transform_locals["ItemLimit"] = ItemLimit 253 | 254 | transform_locals["filter_read"] = StateFilter("read") 255 | transform_locals["sort_alphabetical"] =\ 256 | SortTransform("Sort Alphabetical", "title") 257 | 258 | # So now lines line `global_transform = ContentFilter('title', 'AMA')` can be 259 | # simply, safely, parsed with the Python interpreter. As well as supporting the 260 | # simple syntax `global_transform = filter_read` etc. 261 | 262 | # This code will throw an exception if it's invalid, so calling code must be 263 | # prepared. 264 | 265 | def eval_transform(transform_name): 266 | return eval(transform_name, {}, transform_locals) 267 | -------------------------------------------------------------------------------- /plugins/sync-rsync.py: -------------------------------------------------------------------------------- 1 | # Canto rsync Plugin 2 | # by Jack Miller 3 | # v1.1 4 | 5 | # This implements a lightweight remote sync based around rsync to a remote 6 | # server, or copying to mounted filesystem, etc. 7 | 8 | ENABLED = False 9 | #ENABLED = True 10 | 11 | # SSH 12 | # For ssh based rsync (remote hosts) you should have key authentication setup 13 | # so it runs without prompting for a password. 14 | 15 | #SYNC_LOCATION = "user@host:" 16 | 17 | # Dropbox, assuming you have dropbox running 18 | #SYNC_LOCATION = "~/Dropbox/" 19 | 20 | # Mount / NFS / sshfs etc. 21 | #SYNC_LOCATION = "/mnt/wherever/" 22 | 23 | # Synchronization interval in seconds 24 | INTERVAL = 5 * 60 25 | 26 | # How long, in seconds, we should wait for the initial sync. Setting to 0 will 27 | # cause a sync to occur before any other items can be read from disk, which 28 | # ensures you won't see any old items, but also means a full sync has to occur 29 | # before any items make it to the client and causes a long delay on startup. 30 | 31 | INITIAL_SYNC = 30 32 | 33 | #============================================ 34 | # Probably won't need to change these. 35 | 36 | # rsync 37 | # -a (archive mode) to preserve times / perms 38 | # -v (verbose) to output interesting log info 39 | # -z (compress) to save bandwidth 40 | 41 | CMD = [ "rsync", "-avz"] 42 | 43 | targets = { "db" : ".cantofeeds", 44 | "conf" : ".cantoconf" 45 | } 46 | 47 | from canto_next.plugins import check_program 48 | 49 | check_program("canto-daemon", "canto-remote") 50 | 51 | if not ENABLED: 52 | raise Exception("Plugin disabled.") 53 | 54 | from canto_next.hooks import on_hook, call_hook 55 | from canto_next.canto_backend import DaemonBackendPlugin 56 | from canto_next.remote import DaemonRemotePlugin 57 | 58 | from canto_next.config import parse_locks, parse_unlocks, config 59 | from canto_next.locks import config_lock, feed_lock 60 | from canto_next.feed import wlock_all, wunlock_all, rlock_all, runlock_all, allfeeds 61 | from canto_next.tag import alltags 62 | 63 | from tempfile import mkstemp 64 | import subprocess 65 | import logging 66 | import shutil 67 | import gzip 68 | import json 69 | import time 70 | import os 71 | 72 | log = logging.getLogger("SYNC-RSYNC") 73 | 74 | class CantoFileSync(DaemonBackendPlugin): 75 | def __init__(self, backend): 76 | self.plugin_attrs = { 77 | "cmd_sync" : self.cmd_sync, 78 | "cmd_syncto" : self.cmd_syncto 79 | } 80 | 81 | self.backend = backend 82 | 83 | # Plugin __init__ happens extremely early so that plugin types can be 84 | # used in validating configuration, etc. We use the daemon_serving hook 85 | # to do our work after the config and storage is setup. 86 | 87 | on_hook("daemon_serving", self.setup) 88 | 89 | def setup(self): 90 | 91 | # Use setattributes and setconfigs commands to determine that we are the fresh 92 | # copy that should be synchronized. 93 | 94 | on_hook("daemon_end_loop", self.loop) 95 | on_hook("daemon_pre_setconfigs", self.pre_setconfigs) 96 | on_hook("daemon_pre_setattributes", self.pre_setattributes) 97 | on_hook("daemon_exit", self.cmd_syncto) 98 | 99 | self.reset() 100 | 101 | # sync will grab files, check the timediff on the file if the file is 102 | # actually newer (like we failed to sync last time) then it will set 103 | # fresh_config and do a syncto. 104 | 105 | self.sync_ts = 0 106 | if (INITIAL_SYNC == 0): 107 | self.cmd_sync() 108 | elif (INITIAL_SYNC < INTERVAL): 109 | self.sync_ts = time.time() - (INTERVAL - INITIAL_SYNC) 110 | 111 | def reset(self): 112 | self.fresh_config = False 113 | self.sent_config = False 114 | 115 | self.fresh_content = False 116 | self.sent_content = False 117 | 118 | # Use hooks to determine when we need to copy stuff. 119 | 120 | def pre_setattributes(self, socket, args): 121 | self.fresh_content = True 122 | 123 | def pre_setconfigs(self, socket, args): 124 | self.fresh_config = True 125 | 126 | # Open a shelf at path, determine if it's been changed more recently than 127 | # our current shelf. 128 | 129 | def time_diff(self, path): 130 | log.debug("Checking if %s is older than our shelf.", path) 131 | 132 | try: 133 | fp = gzip.open(path, "rt", 9, "UTF-8") 134 | s = json.load(fp) 135 | fp.close() 136 | except: 137 | # If something messed up, assume that the sync failed and 138 | # pretend that we're newer anyway. 139 | return -1 140 | 141 | if "control" in s and "canto-user-modified" in s["control"]: 142 | remote_stamp = s["control"]["canto-user-modified"] 143 | else: 144 | log.debug("Remote has no timestamp") 145 | return -1 146 | 147 | rlock_all() 148 | if "control" in self.backend.shelf and "canto-user-modified" in self.backend.shelf["control"]: 149 | local_stamp = self.backend.shelf["control"]["canto-user-modified"] 150 | runlock_all() 151 | else: 152 | log.debug("We have no timestamp") 153 | runlock_all() 154 | return 1 155 | 156 | if remote_stamp > local_stamp: 157 | log.debug("db: We are older") 158 | elif remote_stamp == local_stamp: 159 | log.debug("db: We are equal") 160 | else: 161 | log.debug("db: We are newer") 162 | 163 | return remote_stamp - local_stamp 164 | 165 | def cmd_syncto(self, socket = None, args = None): 166 | if self.fresh_content: 167 | f, fname = mkstemp() 168 | os.close(f) 169 | 170 | # Lock feeds to make sure nothing's in flight 171 | wlock_all() 172 | 173 | # Sync the shelf so it's all on disk 174 | 175 | self.backend.shelf.sync() 176 | 177 | shutil.copyfile(self.backend.feed_path, fname) 178 | 179 | # Let everything else continue 180 | wunlock_all() 181 | 182 | call_hook("daemon_syncto", [ "db", fname ]) 183 | 184 | # Cleanup temp file 185 | os.unlink(fname) 186 | 187 | self.fresh_content = False 188 | self.sent_content = True 189 | 190 | if self.fresh_config: 191 | f, fname = mkstemp() 192 | os.close(f) 193 | 194 | config_lock.acquire_read() 195 | shutil.copyfile(self.backend.conf_path, fname) 196 | config_lock.release_read() 197 | 198 | call_hook("daemon_syncto", [ "conf", fname ]) 199 | 200 | os.unlink(fname) 201 | 202 | self.fresh_config = False 203 | self.sent_config = True 204 | 205 | def cmd_sync(self, socket = None, args = None): 206 | needs_syncto = False 207 | 208 | if not self.sent_config: 209 | f, fname = mkstemp() 210 | os.close(f) 211 | 212 | call_hook("daemon_syncfrom", [ "conf", fname ]) 213 | 214 | conf_stat = os.stat(self.backend.conf_path) 215 | sync_stat = os.stat(fname) 216 | 217 | log.debug('conf: %s sync: %s' % (conf_stat.st_mtime, sync_stat.st_mtime)) 218 | 219 | diff = sync_stat.st_mtime - conf_stat.st_mtime 220 | 221 | # Will be empty tempfile if syncfrom failed. 222 | 223 | if sync_stat.st_size != 0: 224 | if diff > 0: 225 | log.debug("conf: We are older") 226 | parse_locks() 227 | shutil.move(fname, self.backend.conf_path) 228 | config.parse() 229 | parse_unlocks() 230 | 231 | # Echo these changes to all connected sockets that care 232 | for socket in self.backend.watches["config"]: 233 | self.backend.in_configs({}, socket) 234 | 235 | elif diff == 0: 236 | log.debug("conf: We are equal") 237 | os.unlink(fname) 238 | else: 239 | log.debug("conf: We are newer") 240 | os.unlink(fname) 241 | self.fresh_config = True 242 | needs_syncto = True 243 | else: 244 | os.unlink(fname) 245 | 246 | if not self.sent_content: 247 | f, fname = mkstemp() 248 | os.close(f) 249 | 250 | call_hook("daemon_syncfrom", [ "db", fname ]) 251 | 252 | diff = self.time_diff(fname) 253 | 254 | if diff > 0: 255 | # Lock feeds to make sure nothing's in flight 256 | wlock_all() 257 | 258 | # Close the file so we can replace it. 259 | self.backend.shelf.close() 260 | 261 | shutil.move(fname, self.backend.feed_path) 262 | 263 | self.backend.shelf.open() 264 | 265 | # Clear out all of the currently tagged items. Usually on 266 | # update, we're able to discard items that we have in old 267 | # content, but aren't in new. But since we just replaced all of 268 | # our old content with a totally fresh copy, we might not know 269 | # they exist. Can't use reset() because we don't want to lose 270 | # configuration. 271 | 272 | alltags.clear_tags() 273 | 274 | # First half of wunlock_all, release these locks so 275 | # fetch threads can get locks 276 | 277 | for feed in sorted(allfeeds.feeds.keys()): 278 | allfeeds.feeds[feed].lock.release_write() 279 | 280 | # Complete wunlock_all() 281 | feed_lock.release_write() 282 | 283 | # Force feeds to be repopulated from disk, which will handle 284 | # communicating changes to connections 285 | 286 | self.backend.fetch.fetch(True, True) 287 | self.backend.fetch.reap(True) 288 | 289 | # Equal, just clear it up 290 | 291 | elif diff == 0: 292 | os.unlink(fname) 293 | 294 | # If we're actually newer on a syncfrom then make syncto happen 295 | # next time. This can happen on init. 296 | 297 | else: 298 | os.unlink(fname) 299 | self.fresh_content = True 300 | needs_syncto = True 301 | 302 | if needs_syncto: 303 | self.cmd_syncto() 304 | 305 | self.reset() 306 | 307 | def loop(self): 308 | ts = time.time() 309 | if (ts - self.sync_ts >= INTERVAL): 310 | self.cmd_sync() 311 | self.sync_ts = ts 312 | 313 | class RemoteSync(DaemonRemotePlugin): 314 | def __init__(self, remote): 315 | self.plugin_attrs = { "cmd_sync" : self.cmd_sync } 316 | self.remote = remote 317 | 318 | on_hook("remote_print_commands", self.print_sync_commands) 319 | 320 | def print_sync_commands(self): 321 | print("\nSync Plugin") 322 | print("\tsync - sync the daemon") 323 | 324 | def cmd_sync(self): 325 | """USAGE: canto-remote sync 326 | Synchronize this daemon with a remote daemon""" 327 | self.remote.write("SYNC", {}) 328 | 329 | # Each of these gets called with a "target" (i.e. a type of file we want to 330 | # sync) and a temporary filename to either copy to somewhere else or overwrite. 331 | 332 | # NOTE: The logic for whether this file actually gets used is in sync.py. For 333 | # the feeds database, it takes the last user modification into account because 334 | # any db with a running daemon is going to be modified often by new feed info, 335 | # making mtime worthless. For the config, however, it's only written when a 336 | # change has been made, so mtime should be sufficient. This is why we use -a to 337 | # rsync. 338 | 339 | def rsync_to(target, fname): 340 | 341 | if target in targets: 342 | cmd = CMD + [ fname, SYNC_LOCATION + targets[target] ] 343 | else: 344 | log.warn("Unknown file to sync: %s" % target) 345 | return 346 | 347 | log.debug("Syncto cmd: %s", cmd) 348 | 349 | try: 350 | out = subprocess.check_output(cmd) 351 | except Exception as e: 352 | log.warn("Command %s : %s" % (cmd, e)) 353 | else: 354 | log.debug("Syncto output: %s", out) 355 | 356 | def rsync_from(target, fname): 357 | if target in targets: 358 | cmd = CMD + [ SYNC_LOCATION + targets[target], fname ] 359 | else: 360 | log.warn("Unknown file to sync: %s" % target) 361 | return 362 | 363 | log.debug("Syncfrom cmd: %s", cmd) 364 | 365 | try: 366 | out = subprocess.check_output(cmd) 367 | except Exception as e: 368 | log.warn("Command %s : %s" % (cmd, e)) 369 | else: 370 | log.debug("Syncfrom output: %s", out) 371 | 372 | on_hook("daemon_syncfrom", rsync_from) 373 | on_hook("daemon_syncto", rsync_to) 374 | -------------------------------------------------------------------------------- /canto_next/protocol.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | from threading import Lock 11 | import logging 12 | import socket 13 | import select 14 | import errno 15 | import getopt 16 | import struct 17 | import shlex 18 | import json 19 | import time 20 | import sys 21 | import os 22 | 23 | log = logging.getLogger('SOCKET') 24 | 25 | class CantoSocket: 26 | def __init__(self, socket_name, **kwargs): 27 | 28 | self.socket_name = socket_name 29 | 30 | if "server" in kwargs and kwargs["server"]: 31 | self.server = True 32 | else: 33 | self.server = False 34 | 35 | if "port" in kwargs: 36 | self.port = kwargs["port"] 37 | else: 38 | self.port = 0 39 | 40 | if "interface" in kwargs: 41 | self.interface = kwargs["interface"] 42 | else: 43 | self.interface = '' 44 | 45 | if "address" in kwargs: 46 | self.address = kwargs["address"] 47 | else: 48 | self.address = None 49 | 50 | self.sockets = [] 51 | self.read_locks = {} 52 | self.write_locks = {} 53 | self.write_frags = {} 54 | 55 | self.connect() 56 | 57 | # Handle options common to all servers and clients 58 | 59 | def common_args(self, extrashort = "", extralong = [], version = ""): 60 | self.verbosity = 0 61 | self.port = -1 62 | self.addr = None 63 | self.disabled_plugins = [] 64 | self.enabled_plugins = [] 65 | self.plugin_default = True 66 | 67 | try: 68 | optlist, sys.argv =\ 69 | getopt.getopt(sys.argv[1:], 'D:p:a:vV' + extrashort, ["dir=", 70 | "port=", "address=","version", "noplugins","enableplugins=", 71 | "disableplugins="] + extralong) 72 | 73 | except getopt.GetoptError as e: 74 | log.error("Error: %s" % e.msg) 75 | return -1 76 | 77 | old_path = os.path.expanduser("~/.canto-ng") 78 | 79 | if os.path.exists(old_path): 80 | self.conf_dir = old_path 81 | else: 82 | if "XDG_CONFIG_HOME" in os.environ: 83 | xdg_path = os.environ["XDG_CONFIG_HOME"] 84 | else: 85 | xdg_path = "~/.config" 86 | 87 | xdg_path = os.path.expanduser(xdg_path) 88 | self.conf_dir = xdg_path + "/canto" 89 | 90 | self.location_args = [] 91 | 92 | for opt, arg in optlist: 93 | if opt in [ "-D", "--dir"]: 94 | self.conf_dir = os.path.expanduser(arg) 95 | self.conf_dir = os.path.realpath(self.conf_dir) 96 | self.location_args += [ opt, arg ] 97 | 98 | elif opt in ["-V", "--version"]: 99 | print(version) 100 | sys.exit(0) 101 | 102 | elif opt in ["-v"]: 103 | self.verbosity += 1 104 | 105 | elif opt in [ "-p", "--port"]: 106 | try: 107 | self.port = int(arg) 108 | if self.port < 0: 109 | raise Exception 110 | except: 111 | log.error("Error: Port must be >0 integer.") 112 | return -1 113 | 114 | # Assume loopback if address hasn't been set yet. 115 | if self.addr == None: 116 | self.addr = "127.0.0.1" 117 | 118 | self.location_args += [ opt, arg ] 119 | 120 | elif opt in [ "-a", "--address"]: 121 | self.addr = arg 122 | self.location_args += [ opt, arg ] 123 | 124 | elif opt in ['--noplugins']: 125 | self.plugin_default = False 126 | 127 | elif opt in ['--disableplugins']: 128 | self.disabled_plugins = shlex.split(arg) 129 | 130 | elif opt in ['--enableplugins']: 131 | self.enabled_plugins = shlex.split(arg) 132 | 133 | self.socket_path = self.conf_dir + "/.canto_socket" 134 | 135 | return optlist 136 | 137 | # Server setup, potentially both unix and inet sockets. 138 | def connect(self): 139 | if self.server: 140 | if self.socket_name: 141 | # Remove old unix socket. 142 | if os.path.exists(self.socket_name): 143 | os.remove(self.socket_name) 144 | 145 | # Setup new socket. 146 | sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 147 | sock.setblocking(0) 148 | sock.bind(self.socket_name) 149 | sock.listen(5) 150 | 151 | # Net socket setup. 152 | if self.port > 0: 153 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 154 | sock.setblocking(0) 155 | 156 | sock.bind((self.interface, self.port)) 157 | sock.listen(5) 158 | 159 | # Client setup, can only do unix or inet, not both. 160 | 161 | else: 162 | if self.address and self.port > 0: 163 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 164 | addr = (self.address, self.port) 165 | else: 166 | sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 167 | addr = self.socket_name 168 | 169 | tries = 10 170 | 171 | while tries > 0: 172 | try: 173 | sock.connect(addr) 174 | break 175 | except Exception as e: 176 | if e.args[0] != errno.ECONNREFUSED or tries == 1: 177 | raise 178 | time.sleep(1) 179 | tries -= 1 180 | 181 | self.sockets.append(sock) 182 | self.read_locks[sock] = Lock() 183 | self.write_locks[sock] = Lock() 184 | self.write_frags[sock] = None 185 | return sock 186 | 187 | # Setup poll.poll() object to watch for read status on conn. 188 | def read_mode(self, poll, conn): 189 | poll.register(conn.fileno(),\ 190 | select.POLLIN | select.POLLHUP | select.POLLERR |\ 191 | select.POLLPRI) 192 | 193 | # Setup poll.poll() object to watch for write status on conn. 194 | def write_mode(self, poll, conn): 195 | poll.register(conn.fileno(),\ 196 | select.POLLOUT | select.POLLHUP | select.POLLERR |\ 197 | select.POLLNVAL) 198 | 199 | # Take raw data, return (cmd, args) tuple or None if not enough data. 200 | def parse(self, conn, data): 201 | try: 202 | cmd, args = eval(repr(json.loads(data)), {}, {}) 203 | except: 204 | log.error("Failed to parse message: %s" % data) 205 | else: 206 | log.debug("\n\nRead:\n%s", json.dumps((cmd, args), indent=4, sort_keys=True)) 207 | return (cmd, args) 208 | 209 | def do_read(self, conn, timeout=None): 210 | while True: 211 | to = timeout 212 | if self.write_frags[conn] != None: 213 | if to == None: 214 | to = 500 215 | self.do_write(conn, None, None) 216 | 217 | self.read_locks[conn].acquire() 218 | r = self._do_read(conn, to) 219 | self.read_locks[conn].release() 220 | 221 | if r == select.POLLHUP: 222 | self.disconnected(conn) 223 | elif r == None and timeout == None: 224 | continue 225 | return r 226 | 227 | def _do_read(self, conn, timeout): 228 | poll = select.poll() 229 | 230 | try: 231 | self.read_mode(poll, conn) 232 | except: 233 | log.error("Error putting conn in read mode.") 234 | log.error("Interpreting as HUP") 235 | return select.POLLHUP 236 | 237 | # We only care about the first (only) descriptor's event 238 | try: 239 | p = poll.poll(timeout) 240 | except select.error as e: 241 | if e.args[0] == errno.EINTR: 242 | return 243 | log.debug("Raising error: %s", e[1]) 244 | raise 245 | 246 | if timeout and not p: 247 | return 248 | 249 | e = p[0][1] 250 | 251 | log.debug("E: %d", e) 252 | if e & select.POLLERR: 253 | log.debug("Read ERR") 254 | return select.POLLHUP 255 | if e & (select.POLLIN | select.POLLPRI): 256 | message = b"" 257 | 258 | try: 259 | size_bytes = conn.recv(8) 260 | if not size_bytes: 261 | log.debug("No bytes - HUP") 262 | return select.POLLHUP 263 | except: 264 | log.debug("Couldn't get size, interpreting as HUP\n") 265 | return select.POLLHUP 266 | 267 | size = struct.unpack('!q', size_bytes)[0] 268 | 269 | while size: 270 | try: 271 | frag = conn.recv(min((4096, size))) 272 | size -= len(frag) 273 | message += frag 274 | except Exception as e: 275 | if e.args[0] == errno.EINTR: 276 | continue 277 | 278 | log.error("Error receiving: %s" % e) 279 | log.error("Interpreting as HUP") 280 | return select.POLLHUP 281 | 282 | # Never get POLLRDHUP on INET sockets, so 283 | # use POLLIN with no data as POLLHUP 284 | 285 | if not message: 286 | log.debug("Read POLLIN with no data") 287 | return select.POLLHUP 288 | 289 | return self.parse(conn, message.decode()) 290 | 291 | # Parse POLLHUP last so if we still got POLLIN, any data 292 | # is still retrieved from the socket. 293 | if e & select.POLLHUP: 294 | log.debug("Read HUP") 295 | return select.POLLHUP 296 | 297 | # Non-empty, but not anything we're interested in? 298 | log.debug("Unknown poll.poll() return") 299 | return select.POLLHUP 300 | 301 | # Writes a (cmd, args) to a single connection, returns: 302 | # 1) None if the write completed. 303 | # 2) select.POLLHUP is the connection is dead. 304 | 305 | def do_write(self, conn, cmd, args): 306 | 307 | # conn could be missing when the connection monitor thread has already 308 | # cleaned up a connection (i.e. saw it close) before the response 309 | # thread finishes sending it's response. So, instead of having response 310 | # threads lock connections (ouch), or deferring the processing of the 311 | # POLLHUP response, just detect when it's using stale data and ignore 312 | # it. 313 | 314 | try: 315 | wlock = self.write_locks[conn] 316 | except KeyError as e: 317 | log.debug("conn not in write_locks %s" % e) 318 | return 319 | 320 | # If we're just flushing data, we shouldn't hang on these: 321 | 322 | if cmd == None: 323 | if not wlock.acquire(False): 324 | return 325 | else: 326 | wlock.acquire() 327 | 328 | r, frag = self._do_write(conn, cmd, args, self.write_frags[conn]) 329 | wlock.release() 330 | 331 | if r == select.POLLHUP: 332 | self.disconnected(conn) 333 | elif r == errno.EINTR: 334 | self.write_frags[conn] = frag 335 | else: 336 | self.write_frags[conn] = None 337 | 338 | return r 339 | 340 | def _do_write(self, conn, cmd, args, frag): 341 | log.debug("\n\nWrite:\n%s\n", json.dumps((cmd, args), indent=4, sort_keys=True)) 342 | 343 | tosend = b"" 344 | 345 | if cmd: 346 | message = json.dumps((cmd, args)).encode("UTF-8") 347 | size = struct.pack("!q", len(message)) 348 | tosend = size + message 349 | 350 | if frag: 351 | tosend = frag + tosend 352 | 353 | while tosend: 354 | poll = select.poll() 355 | 356 | try: 357 | self.write_mode(poll, conn) 358 | except: 359 | log.error("Error putting conn in write mode.") 360 | log.error("Interpreting as HUP") 361 | return (select.POLLHUP, 0) 362 | 363 | try: 364 | p = poll.poll(1) 365 | except select.error as e: 366 | if e.args[0] == errno.EINTR: 367 | return (errno.EINTR, tosend) 368 | log.error("Raising error: %s" % e[1]) 369 | raise 370 | 371 | if p == []: 372 | log.debug("poll timed out") 373 | return (errno.EINTR, tosend) 374 | 375 | e = p[0][1] 376 | 377 | if e & select.POLLHUP: 378 | log.debug("Write HUP") 379 | return (select.POLLHUP, 0) 380 | if e & select.POLLNVAL: 381 | log.debug("Write NVAL") 382 | return (select.POLLHUP, 0) 383 | if e & select.POLLERR: 384 | log.debug("Write ERR") 385 | return (select.POLLHUP, 0) 386 | if e & select.POLLOUT: 387 | try: 388 | sent = conn.send(tosend) 389 | except Exception as e: 390 | if e.args[0] == errno.EINTR: 391 | return (errno.EINTR, tosend) 392 | log.error("Error sending: %s" % e[1]) 393 | log.error("Interpreting as HUP") 394 | return (select.POLLHUP, 0) 395 | 396 | tosend = tosend[sent:] 397 | log.debug("Sent %d bytes.", sent) 398 | 399 | return (None, 0) 400 | 401 | def disconnected(self, conn): 402 | del self.read_locks[conn] 403 | del self.write_locks[conn] 404 | del self.write_frags[conn] 405 | -------------------------------------------------------------------------------- /canto_next/feed.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | from .plugins import PluginHandler, Plugin 11 | from .tag import alltags 12 | from .rwlock import RWLock, read_lock, write_lock 13 | from .locks import feed_lock, tag_lock 14 | from .hooks import call_hook 15 | 16 | import traceback 17 | import logging 18 | import json 19 | import time 20 | 21 | log = logging.getLogger("FEED") 22 | 23 | def dict_id(i): 24 | if type(i) == dict: 25 | return i 26 | return json.loads(i) 27 | 28 | class CantoFeeds(): 29 | def __init__(self): 30 | self.order = [] 31 | self.feeds = {} 32 | self.dead_feeds = {} 33 | 34 | @write_lock(feed_lock) 35 | def add_feed(self, URL, feed): 36 | self.order.append(URL) 37 | self.feeds[URL] = feed 38 | 39 | # Return old feed object 40 | if URL in self.dead_feeds: 41 | del self.dead_feeds[URL] 42 | 43 | @read_lock(feed_lock) 44 | def get_feed(self, URL): 45 | if URL in self.feeds: 46 | return self.feeds[URL] 47 | if URL in self.dead_feeds: 48 | return self.dead_feeds[URL] 49 | 50 | @read_lock(feed_lock) 51 | def get_feeds(self): 52 | return [ self.get_feed(URL) for URL in self.order] 53 | 54 | @read_lock(feed_lock) 55 | def items_to_feeds(self, items): 56 | f = {} 57 | for i in items: 58 | d_i = dict_id(i) 59 | 60 | if d_i["URL"] in self.feeds: 61 | feed = self.feeds[d_i["URL"]] 62 | else: 63 | raise Exception("Can't find feed: %s" % d_i["URL"]) 64 | 65 | if feed in f: 66 | f[feed].append(i) 67 | else: 68 | f[feed] = [i] 69 | return f 70 | 71 | def all_parsed(self): 72 | for URL in self.dead_feeds: 73 | feed = self.dead_feeds[URL] 74 | call_hook("daemon_del_tag", [[ "maintag:" + feed.name ]]) 75 | feed.destroy() 76 | self.dead_feeds = {} 77 | 78 | @write_lock(feed_lock) 79 | def reset(self): 80 | self.dead_feeds = self.feeds 81 | self.feeds = {} 82 | self.order = [] 83 | 84 | allfeeds = CantoFeeds() 85 | 86 | # Lock helpers 87 | 88 | def wlock_all(): 89 | feed_lock.acquire_write() 90 | for feed in sorted(allfeeds.feeds.keys()): 91 | allfeeds.feeds[feed].lock.acquire_write() 92 | 93 | def wunlock_all(): 94 | for feed in sorted(allfeeds.feeds.keys()): 95 | allfeeds.feeds[feed].lock.release_write() 96 | feed_lock.release_write() 97 | 98 | def wlock_feeds(fn): 99 | def _fl(*args): 100 | wlock_all() 101 | try: 102 | return fn(*args) 103 | finally: 104 | wunlock_all() 105 | return _fl 106 | 107 | def rlock_all(): 108 | feed_lock.acquire_read() 109 | for feed in sorted(allfeeds.feeds.keys()): 110 | allfeeds.feeds[feed].lock.acquire_read() 111 | 112 | def runlock_all(): 113 | for feed in sorted(allfeeds.feeds.keys()): 114 | allfeeds.feeds[feed].lock.release_read() 115 | feed_lock.release_read() 116 | 117 | def rlock_feeds(fn): 118 | def _fl(*args): 119 | rlock_all() 120 | try: 121 | return fn(*args) 122 | finally: 123 | runlock_all() 124 | return _fl 125 | 126 | # feed_objs to enforce 127 | def rlock_feed_objs(objs): 128 | feed_lock.acquire_read() 129 | for feed in sorted(allfeeds.feeds.keys()): 130 | for obj in objs: 131 | if obj.URL == feed: 132 | obj.lock.acquire_read() 133 | break 134 | 135 | def runlock_feed_objs(objs): 136 | for feed in sorted(allfeeds.feeds.keys()): 137 | for obj in objs: 138 | if obj.URL == feed: 139 | obj.lock.release_read() 140 | break 141 | feed_lock.release_read() 142 | 143 | def stop_feeds(): 144 | for feed in allfeeds.feeds: 145 | allfeeds.feeds[feed].stopped = True 146 | 147 | class DaemonFeedPlugin(Plugin): 148 | pass 149 | 150 | class CantoFeed(PluginHandler): 151 | def __init__(self, shelf, name, URL, rate, keep_time, keep_unread, **kwargs): 152 | PluginHandler.__init__(self) 153 | 154 | self.plugin_class = DaemonFeedPlugin 155 | self.update_plugin_lookups() 156 | 157 | self.shelf = shelf 158 | self.name = name 159 | self.URL = URL 160 | self.rate = rate 161 | self.keep_time = keep_time 162 | self.keep_unread = keep_unread 163 | self.stopped = False 164 | 165 | self.last_update = 0 166 | 167 | # This is held by the update thread, as well as any get / set attribute 168 | # threads 169 | 170 | self.lock = RWLock() 171 | 172 | self.username = None 173 | if "username" in kwargs: 174 | self.username = kwargs["username"] 175 | 176 | self.password = None 177 | if "password" in kwargs: 178 | self.password = kwargs["password"] 179 | 180 | allfeeds.add_feed(URL, self) 181 | 182 | def __str__(self): 183 | return "CantoFeed: %s" % self.name 184 | 185 | # Return { id : { attribute : value .. } .. } 186 | 187 | def get_attributes(self, items, attributes): 188 | r = {} 189 | 190 | d = self.shelf[self.URL] 191 | 192 | args = [ (dict_id(item)["ID"], item, attributes[item]) for item in items ] 193 | args.sort() 194 | 195 | got = [ (item["id"], item) for item in d["entries"] ] 196 | got.sort() 197 | 198 | for item, full_id, needed_attrs in args: 199 | while got and item > got[0][0]: 200 | got.pop(0) 201 | 202 | if got and got[0][0] == item: 203 | attrs = {} 204 | for a in needed_attrs: 205 | if a == "description": 206 | real = "summary" 207 | else: 208 | real = a 209 | 210 | if real in got[0][1]: 211 | attrs[a] = got[0][1][real] 212 | else: 213 | attrs[a] = "" 214 | r[full_id] = attrs 215 | got.pop(0) 216 | else: 217 | log.warn("item not found: %s" % item) 218 | r[full_id] = {} 219 | for a in needed_attrs: 220 | r[full_id][a] = "" 221 | r[full_id]["title"] = "???" 222 | return r 223 | 224 | # Given an ID and a dict of attributes, update the disk. 225 | def set_attributes(self, items, attributes): 226 | 227 | self.lock.acquire_write() 228 | 229 | d = self.shelf[self.URL] 230 | 231 | items_to_remove = [] 232 | tags_to_add = [] 233 | 234 | for item in items: 235 | d_id = dict_id(item)["ID"] 236 | 237 | for d_item in d["entries"]: 238 | if d_id != d_item["id"]: 239 | continue 240 | for a in attributes[item]: 241 | d_item[a] = attributes[item][a] 242 | 243 | items_to_remove.append(d_item) 244 | tags_to_add += self._tag([d_item]) 245 | 246 | self.shelf[self.URL] = d 247 | self.shelf.update_umod() 248 | 249 | self.lock.release_write() 250 | 251 | self._retag(items_to_remove, tags_to_add, []) 252 | 253 | def _item_id(self, item): 254 | return json.dumps({ "URL" : self.URL, "ID" : item["id"] }) 255 | 256 | def _tag(self, items): 257 | tags_to_add = [] 258 | 259 | for item in items: 260 | tags_to_add.append((item, "maintag:" + self.name)) 261 | if "canto-tags" in item: 262 | for user_tag in item["canto-tags"]: 263 | log.debug("index adding user tag: %s - %s", user_tag,item["id"]) 264 | tags_to_add.append((item, user_tag)) 265 | 266 | return tags_to_add 267 | 268 | def _retag(self, items_to_remove, tags_to_add, tags_to_remove): 269 | feed_lock.acquire_read() 270 | tag_lock.acquire_write() 271 | 272 | for item in items_to_remove: 273 | alltags.remove_id(self._item_id(item)) 274 | 275 | for item, tag in tags_to_add: 276 | alltags.add_tag(self._item_id(item), tag) 277 | 278 | for item, tag in tags_to_remove: 279 | alltags.remove_tag(self._item_id(item), tag) 280 | 281 | alltags.do_tag_changes() 282 | 283 | tag_lock.release_write() 284 | feed_lock.release_read() 285 | 286 | def _keep_olditem(self, olditem): 287 | ref_time = time.time() 288 | 289 | if "canto_update" not in olditem: 290 | olditem["canto_update"] = ref_time 291 | 292 | item_time = olditem["canto_update"] 293 | 294 | if "canto-state" in olditem: 295 | item_state = olditem["canto-state"] 296 | else: 297 | item_state = [] 298 | 299 | if (ref_time - item_time) < self.keep_time: 300 | log.debug("Item not over keep_time (%d): %s", 301 | self.keep_time, olditem["id"]) 302 | elif self.keep_unread and "read" not in item_state: 303 | log.debug("Keeping unread item: %s\n", olditem["id"]) 304 | else: 305 | log.debug("Discarding: %s", olditem["id"]) 306 | return False 307 | return True 308 | 309 | # Re-index contents 310 | # If we have update_contents, use that 311 | # If not, at least populate self.items from disk. 312 | 313 | # MUST GUARANTEE self.items is in same order as entries on disk. 314 | 315 | def index(self, update_contents): 316 | 317 | # If the daemon is shutting down, discard this update. 318 | 319 | if self.stopped: 320 | return 321 | 322 | self.lock.acquire_write() 323 | 324 | if self.URL not in self.shelf: 325 | # Stub empty feed 326 | log.debug("Previous content not found for %s.", self.URL) 327 | old_contents = {"entries" : []} 328 | else: 329 | old_contents = self.shelf[self.URL] 330 | log.debug("Fetched previous content for %s.", self.URL) 331 | 332 | new_entries = [] 333 | 334 | for i, item in enumerate(update_contents["entries"]): 335 | 336 | # Update canto_update only for freshly seen items. 337 | item["canto_update"] = update_contents["canto_update"] 338 | 339 | # Attempt to isolate a feed unique ID 340 | if "id" not in item: 341 | if "link" in item: 342 | item["id"] = item["link"] 343 | elif "title" in item: 344 | item["id"] = item["title"] 345 | else: 346 | log.error("Unable to uniquely ID item: %s" % item) 347 | continue 348 | 349 | new_entries.append((i, item["id"], item)) 350 | 351 | # Sort by string id 352 | new_entries.sort(key=lambda x: x[1]) 353 | 354 | # Remove duplicates 355 | last_id = "" 356 | for x in new_entries[:]: 357 | if x[1] == last_id: 358 | new_entries.remove(x) 359 | else: 360 | last_id = x[1] 361 | 362 | old_entries = [ (i, item["id"], item) for (i, item) in enumerate(old_contents["entries"])] 363 | 364 | old_entries.sort(key=lambda x: x[1]) 365 | 366 | keep_all = new_entries == [] 367 | 368 | kept_entries = [] 369 | 370 | for x in new_entries: 371 | 372 | # old_entry is really old, see if we should keep or discard 373 | 374 | while old_entries and x[1] > old_entries[0][1]: 375 | if keep_all or self._keep_olditem(old_entries[0][2]): 376 | kept_entries.append(old_entries.pop(0)) 377 | else: 378 | old_entries.pop(0) 379 | 380 | # new entry and old entry match, move content over 381 | 382 | if old_entries and x[1] == old_entries[0][1]: 383 | olditem = old_entries.pop(0)[2] 384 | for key in olditem: 385 | if key == "canto_update": 386 | continue 387 | elif key.startswith("canto"): 388 | x[2][key] = olditem[key] 389 | 390 | # new entry is really new, tell everyone 391 | 392 | else: 393 | call_hook("daemon_new_item", [self, x[2]]) 394 | 395 | # Resort lists by place, instead of string id 396 | new_entries.sort() 397 | old_entries.sort() 398 | 399 | if keep_all: 400 | kept_entries += old_entries 401 | else: 402 | for x in old_entries: 403 | if self._keep_olditem(x[2]): 404 | kept_entries.append(x) 405 | 406 | kept_entries.sort() 407 | new_entries += kept_entries 408 | 409 | update_contents["entries"] = [ x[2] for x in new_entries ] 410 | 411 | tags_to_add = self._tag(update_contents["entries"]) 412 | tags_to_remove = [] 413 | remove_items = [] 414 | 415 | # Allow plugins to add items prior to running the editing functions 416 | # so that the editing functions are guaranteed the full list. 417 | 418 | for attr in list(self.plugin_attrs.keys()): 419 | if not attr.startswith("additems_"): 420 | continue 421 | 422 | try: 423 | a = getattr(self, attr) 424 | tags_to_add, tags_to_remove, remove_items = a(self, update_contents, tags_to_add, tags_to_remove, remove_items) 425 | except: 426 | log.error("Error running feed item adding plugin") 427 | log.error(traceback.format_exc()) 428 | 429 | # Allow plugins DaemonFeedPlugins defining edit_* functions to have a 430 | # crack at the contents before we commit to disk. 431 | 432 | for attr in list(self.plugin_attrs.keys()): 433 | if not attr.startswith("edit_"): 434 | continue 435 | 436 | try: 437 | a = getattr(self, attr) 438 | tags_to_add, tags_to_remove, remove_items = a(self, update_contents, tags_to_add, tags_to_remove, remove_items) 439 | except: 440 | log.error("Error running feed editing plugin") 441 | log.error(traceback.format_exc()) 442 | 443 | if not self.stopped: 444 | # Commit the updates to disk. 445 | 446 | self.shelf[self.URL] = update_contents 447 | 448 | self.lock.release_write() 449 | 450 | self._retag(old_contents["entries"] + remove_items, tags_to_add, tags_to_remove) 451 | else: 452 | self.lock.release_write() 453 | 454 | def destroy(self): 455 | # Check for existence in case of delete quickly 456 | # after add. 457 | 458 | self.stopped = True 459 | if self.URL in self.shelf: 460 | del self.shelf[self.URL] 461 | -------------------------------------------------------------------------------- /canto_next/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | #Canto - RSS reader backend 4 | # Copyright (C) 2016 Jack Miller 5 | # 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License version 2 as 8 | # published by the Free Software Foundation. 9 | 10 | from .locks import feed_lock, config_lock, tag_lock, watch_lock 11 | from .encoding import locale_enc 12 | from .transform import eval_transform 13 | from .feed import allfeeds, CantoFeed 14 | from .tag import alltags 15 | 16 | import traceback 17 | import logging 18 | import codecs 19 | import json 20 | import os 21 | import re 22 | 23 | log = logging.getLogger("CONFIG") 24 | 25 | default_config =\ 26 | { 27 | "defaults" : 28 | { 29 | "rate" : 10, 30 | "keep_time" : 86400, 31 | "keep_unread" : False, 32 | "global_transform" : "filter_read" 33 | }, 34 | 35 | "feeds" : [ 36 | { 37 | "name" : "Canto", 38 | "url" : "http://codezen.org/canto-ng/feed/" 39 | }, 40 | { 41 | "name" : "Slashdot", 42 | "url" : "http://rss.slashdot.org/slashdot/Slashdot" 43 | }, 44 | { 45 | "name" : "Reddit", 46 | "url": "http://reddit.com/.rss" 47 | } 48 | ] 49 | } 50 | 51 | def parse_locks(): 52 | config_lock.acquire_write() 53 | feed_lock.acquire_write() 54 | tag_lock.acquire_write() 55 | watch_lock.acquire_read() 56 | 57 | def parse_unlocks(): 58 | config_lock.release_write() 59 | feed_lock.release_write() 60 | tag_lock.release_write() 61 | watch_lock.release_read() 62 | 63 | class CantoConfig(): 64 | def init(self, filename, shelf): 65 | self.filename = filename 66 | self.shelf = shelf 67 | self.json = {} 68 | 69 | self.defaults_validators = [ 70 | ("rate", self.validate_int, False), 71 | ("keep_time", self.validate_int, False), 72 | ("keep_unread", self.validate_bool, False), 73 | ("global_transform", self.validate_set_transform, False), 74 | ] 75 | 76 | self.defaults_defaults = { 77 | "rate" : 10, 78 | "keep_time" : 86400, 79 | "keep_unread" : False, 80 | "global_transform" : "None", 81 | } 82 | 83 | self.feed_validators = [ 84 | ("name", self.validate_unique_feed_name, True), 85 | ("url", self.validate_unique_url, True), 86 | ("rate", self.validate_int, False), 87 | ("keep_time", self.validate_int, False), 88 | ("keep_unread", self.validate_bool, False), 89 | ("username", self.validate_string, False), 90 | ("password", self.validate_string, False), 91 | ] 92 | 93 | self.feed_defaults = {} 94 | 95 | self.tag_validators = [ 96 | ("transform", self.validate_set_transform, False), 97 | ("extra_tags", self.validate_string_list, False), 98 | ] 99 | 100 | self.tag_defaults = {} 101 | 102 | def reset(self): 103 | allfeeds.reset() 104 | alltags.reset() 105 | 106 | self.errors = {} 107 | self.final = {} 108 | 109 | # Accumulators for verifying uniqueness 110 | self.urls = [] 111 | self.feed_names = [] 112 | 113 | def parse(self, fromfile=True, changes={}): 114 | parse_locks() 115 | 116 | # Since we host client config too, check if 117 | # we should do a reparse. 118 | 119 | we_care = False 120 | for header in [ "feeds", "tags", "defaults" ]: 121 | if header in changes: 122 | if header == "tags": 123 | for tag in changes["tags"]: 124 | if list(changes["tags"][tag].keys()) != [ "collapsed" ]: 125 | we_care = True 126 | if we_care: 127 | break 128 | else: 129 | we_care = True 130 | break 131 | 132 | if fromfile or we_care: 133 | self.reset() 134 | if fromfile: 135 | self.read_config() 136 | if self.validate(): 137 | self.instantiate() 138 | if not fromfile: 139 | self.write() 140 | elif not fromfile: 141 | self.write() 142 | 143 | parse_unlocks() 144 | 145 | def read_config(self): 146 | if not os.path.exists(self.filename): 147 | log.info("No config found, writing default.") 148 | self.json = default_config.copy() 149 | self.write() 150 | 151 | c = codecs.open(self.filename, "rb", locale_enc) 152 | self.json = json.load(c) 153 | c.close() 154 | 155 | log.info("Read %s" % self.filename) 156 | log.debug("Parsed into: %s", self.json) 157 | 158 | def error(self, ident, val, error): 159 | if ident in self.errors: 160 | self.errors[ident].append((val, error)) 161 | else: 162 | self.errors[ident] = [(val, error)] 163 | 164 | def _validate_unique(self, ident, value, accumulator, desc): 165 | if not self.validate_string(ident, value): 166 | return False 167 | 168 | if value in accumulator: 169 | self.error(ident, value, "%s already used!" % (desc,)) 170 | return False 171 | 172 | accumulator.append(value) 173 | return (True, value) 174 | 175 | def validate_unique_url(self, ident, value): 176 | return self._validate_unique(ident, value, self.urls, "URL") 177 | 178 | def validate_unique_feed_name(self, ident, value): 179 | return self._validate_unique(ident, value, self.feed_names, "Feed name") 180 | 181 | def validate_bool(self, ident, value): 182 | if type(value) != bool: 183 | self.error(ident, value, "Not boolean!") 184 | return False 185 | return (True, value) 186 | 187 | def validate_int(self, ident, value): 188 | if type(value) != int: 189 | self.error(ident, value, "Not integer!") 190 | return False 191 | return (True, value) 192 | 193 | def validate_string(self, ident, value): 194 | if type(value) != str: 195 | self.error(ident, value, "Not unicode!") 196 | return False 197 | return (True, value) 198 | 199 | def validate_string_list(self, ident, value): 200 | if type(value) != list: 201 | self.error(ident, value, "Not list!") 202 | return False 203 | 204 | for idx, item in enumerate(value): 205 | item_ident = ident + ("[%d]" % idx) 206 | if not self.validate_string(item_ident, item): 207 | return False 208 | 209 | return (True, value) 210 | 211 | # Unfortunately, this must return the value, so that the JSON doesn't get 212 | # tainted with non-serializable values. 213 | 214 | def validate_set_transform(self, ident, value): 215 | try: 216 | r = eval_transform(value) 217 | except Exception as e: 218 | tb = traceback.format_exc() 219 | msg = "\n" + "".join(tb) 220 | self.error(ident, value, "Invalid transform" + msg) 221 | return (True, "None") 222 | 223 | return (True, value) 224 | 225 | def validate_dict(self, ident_prefix, d, validators): 226 | section_invalidated = False 227 | for rgx, validator, required in validators: 228 | r = re.compile(rgx) 229 | 230 | found = False 231 | 232 | for opt in list(d.keys()): 233 | match = r.match(opt) 234 | if not match: 235 | continue 236 | 237 | found = True 238 | ident = ident_prefix + ("[%s]" % opt) 239 | 240 | ret = validator(ident, d[opt]) 241 | if not ret: 242 | if required: 243 | self.error(ident, d[opt],\ 244 | "Set but invalid and required!") 245 | section_invalidated = True 246 | else: 247 | self.error(ident, d[opt], "Set but invalid!") 248 | del d[opt] 249 | else: 250 | # NOTE: we're ignoring the first tuple, it should 251 | # always be True. If it wasn't for the fact that (val,) 252 | # looks terrible that could also be returned from the 253 | # validators. 254 | 255 | d[opt] = ret[1] 256 | 257 | if not found and required: 258 | ident = ident_prefix + "[%s]" % rgx 259 | self.error(ident, None,\ 260 | "No matching value found on required option!") 261 | section_invalidated = True 262 | 263 | if section_invalidated: 264 | break 265 | 266 | return not section_invalidated 267 | 268 | # Validate validates only what exists in self.final, it does not make 269 | # substitutions for defaults. That's done on instantiation. 270 | 271 | def validate(self): 272 | # Because we have to ensure that all items in the JSON are 273 | # simple, we can do this cheap deepcopy intead of importing 274 | # copy or doing it ourselves. 275 | 276 | self.final = eval(repr(self.json), {}, {}) 277 | 278 | if "defaults" in self.final: 279 | good = self.validate_dict("[defaults]", self.final["defaults"], 280 | self.defaults_validators) 281 | if not good: 282 | del self.final["defaults"] 283 | 284 | if "tags" in self.final and not self.errors: 285 | for tag in list(self.final["tags"].keys()): 286 | good = self.validate_dict("[tags][" + tag + "]", self.final["tags"][tag], 287 | self.tag_validators) 288 | if not good: 289 | del self.final["tags"][tag] 290 | 291 | if "feeds" in self.final and not self.errors: 292 | for i, feed in enumerate(self.final["feeds"][:]): 293 | good = self.validate_dict("[feeds][%s]" % i, feed, 294 | self.feed_validators) 295 | if not good: 296 | self.final["feeds"].remove(feed) 297 | 298 | if self.errors: 299 | log.error("ERRORS:") 300 | for key in list(self.errors.keys()): 301 | log.error("%s:" % key) 302 | for value, error in self.errors[key]: 303 | log.error("\t%s -> %s" % (value, error)) 304 | return False 305 | 306 | log.info("Validated: %s" % self.final) 307 | return True 308 | 309 | # Create Tag and Feed objects based on final validated config 310 | 311 | def instantiate(self): 312 | 313 | if "defaults" in self.final: 314 | for k in self.defaults_defaults.keys(): 315 | if k not in self.final["defaults"]: 316 | self.final["defaults"][k] = self.defaults_defaults[k] 317 | else: 318 | self.final["defaults"] = self.defaults_defaults.copy() 319 | 320 | if "tags" in self.final: 321 | for tag in self.final["tags"]: 322 | defs = self.final["tags"][tag] 323 | 324 | if "transform" not in defs: 325 | defs["transform"] = "None" 326 | 327 | defs["transform"] = eval_transform(defs["transform"]) 328 | 329 | if "extra_tags" not in defs: 330 | defs["extra_tags"] = [] 331 | 332 | alltags.tag_transform(tag, defs["transform"]) 333 | alltags.set_extra_tags(tag, defs["extra_tags"]) 334 | 335 | # Feeds must be instantiated *after* tags, so tag settings like extra_tags 336 | # can rely on getting an add_tag for each item after all tag settings have 337 | # been handled. 338 | 339 | if "feeds" in self.final: 340 | for feed in self.final["feeds"]: 341 | 342 | # Mandatory arguments to CantoFeed 343 | for k in [ "rate", "keep_time", "keep_unread" ]: 344 | if k not in feed: 345 | feed[k] = self.final["defaults"][k] 346 | 347 | # Optional arguments in kwargs 348 | kws = {} 349 | for k in ["password", "username"]: 350 | if k in feed: 351 | kws[k] = feed[k] 352 | 353 | feed = CantoFeed(self.shelf, feed["name"],\ 354 | feed["url"], feed["rate"], feed["keep_time"], feed["keep_unread"], **kws) 355 | 356 | # Set global transform. 357 | 358 | self.global_transform = eval_transform(\ 359 | self.final["defaults"]["global_transform"]) 360 | 361 | # Delete settings from the JSON. Any key equal to "DELETE" will be removed, 362 | # keys that are lists will items removed if specified. 363 | 364 | def _delete(self, deletions, current): 365 | for key in list(deletions.keys()): 366 | 367 | # Nothing to do. 368 | 369 | if key not in current: 370 | continue 371 | 372 | # Delete surface fields. 373 | 374 | if deletions[key] == "DELETE": 375 | del current[key] 376 | 377 | # Delete potential fields in deeper dicts. 378 | 379 | elif type(deletions[key]) == dict: 380 | self._delete(deletions[key], current[key]) 381 | 382 | # If we've specified a list for and are operating on a list, 383 | # then eliminate those. 384 | 385 | elif type(deletions[key]) == list and\ 386 | type(current[key]) == list: 387 | 388 | log.debug("Deleting items from list lists:") 389 | log.debug("\\%s", deletions[key]) 390 | log.debug("\\%s", current[key]) 391 | 392 | for item in deletions[key]: 393 | if item in current[key]: 394 | current[key].remove(item) 395 | 396 | def delete(self, deletions): 397 | self._delete(deletions, self.json) 398 | 399 | def _merge(self, change, current): 400 | for key in list(change.keys()): 401 | # Move over missing keys 402 | 403 | if key not in current: 404 | current[key] = change[key] 405 | 406 | # Merge subsequent dicts, or overwrite if wrong types 407 | # (in a well-behaved merge that shouldn't happen) 408 | 409 | elif type(change[key]) == dict: 410 | if type(current[key]) != dict: 411 | log.warn("Type change! Old value of ['%s'] not dict." % 412 | (key,)) 413 | current[key] = change[key] 414 | else: 415 | self._merge(change[key], current[key]) 416 | 417 | # Merge lists (append change items not present in current and 418 | # potentially change their order based on the contents in change). 419 | 420 | elif type(change[key]) == list: 421 | if type(current[key]) != list: 422 | log.warn("Type change! Old value of ['%s'] not list." % 423 | (key, )) 424 | current[key] = change[key] 425 | else: 426 | log.debug("Merging lists:") 427 | log.debug("\\%s", change[key]) 428 | log.debug("\\%s", current[key]) 429 | 430 | # Any items not in change are prepended. This allows the 431 | # simple n-item append to work as expected, it allows the 432 | # sort case to work as expected, and gives consistent 433 | # behavior in the case of items unaccounted for in change. 434 | 435 | current[key] = [ i for i in current[key] if i not in change[key] ] +\ 436 | change[key] 437 | 438 | # Move over present 439 | 440 | else: 441 | current[key] = change[key] 442 | 443 | def merge(self, newconfigs): 444 | self._merge(newconfigs, self.json) 445 | 446 | def write(self): 447 | try: 448 | f = codecs.open(self.filename, "wb", locale_enc) 449 | json.dump(self.json, f, ensure_ascii=False, sort_keys=True, indent=4) 450 | finally: 451 | f.close() 452 | 453 | config = CantoConfig() 454 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /plugins/sync-inoreader.py: -------------------------------------------------------------------------------- 1 | # Canto Inoreader Plugin 2 | # by Jack Miller 3 | # v0.4 4 | 5 | # DEPENDENCIES 6 | 7 | # This plugin requires the 'requests' module, which can usually be found in 8 | # your distro's package manager as python3-requests (or python-requests on 9 | # Arch). 10 | 11 | # IMPORTANT NOTES 12 | 13 | # - When this plugin is enabled, canto will synchronize your subscribed feeds 14 | # with Inoreader. If you've been using canto, you should export your feeds 15 | # (canto-remote export > feeds.opml) and import them into Inoreader if you 16 | # don't want to lose your feeds because Inoreader's info is assumed to be more 17 | # correct than ours. 18 | # 19 | # - Feed subscriptions are only synchronized *from Inoreader* on startup, so if 20 | # you add / remove feeds with Inoreader, you will have to restart the daemon to 21 | # pickup the changes. Adding or removing feeds with canto works properly all 22 | # the time. 23 | # 24 | # - You must have a standard Inoreader account, not an OAuth (Google/Facebook 25 | # login). 26 | 27 | # CONFIGURATION 28 | 29 | # Inoreader credentials 30 | 31 | EMAIL="somebody@somewhere.com" 32 | PASSWORD="passw0rd" 33 | 34 | # You don't *have* to change these, but the API is rate limited. So if you want 35 | # to avoid rate limit issues, register your own application Preferences -> 36 | # Developer options on the Inoreader site and replace these. 37 | 38 | APP_ID="1000001299" 39 | APP_KEY="i0UOUtLQjj2WTre8WA3a9GWt_cgDhpkO" 40 | 41 | BASE_URL="https://www.inoreader.com/reader/" 42 | 43 | # === You shouldn't have to change anything past this line. === 44 | 45 | from canto_next.plugins import check_program 46 | 47 | check_program("canto-daemon") 48 | 49 | from canto_next.fetch import DaemonFetchThreadPlugin 50 | from canto_next.feed import DaemonFeedPlugin, allfeeds 51 | from canto_next.hooks import call_hook, on_hook 52 | from canto_next.config import config 53 | 54 | from urllib.parse import urlencode, quote 55 | import traceback 56 | import requests 57 | import logging 58 | import time 59 | import json 60 | 61 | log = logging.getLogger("SYNC-INOREADER") 62 | 63 | class InoreaderReqFailed(Exception): 64 | pass 65 | 66 | class InoreaderAuthFailed(Exception): 67 | pass 68 | 69 | class CantoInoreaderAPI(): 70 | def __init__(self): 71 | self.extra_headers = { 72 | "User-Agent" : "Canto/0.9.0 + http://codezen.org/canto-ng", 73 | "AppKey" : APP_KEY, 74 | "AppID" : APP_ID, 75 | } 76 | 77 | try: 78 | self.authorization = self.auth() 79 | except: 80 | self.authorization = None 81 | 82 | self.dead = False 83 | 84 | self.add_tags_queued = {} 85 | self.del_tags_queued = {} 86 | 87 | def auth(self): 88 | headers = self.extra_headers.copy() 89 | headers['Email'] = EMAIL 90 | headers['Passwd'] = PASSWORD 91 | 92 | try: 93 | r = requests.get("https://www.inoreader.com/accounts/ClientLogin", headers, timeout=1) 94 | except Exception as e: 95 | raise InoreaderReqFailed(str(e)) 96 | 97 | if r.status_code != 200: 98 | raise InoreaderAuthFailed("Failed to authorize: [%s] %s" % (r.status_code, r.text)) 99 | 100 | for line in r.text.splitlines(): 101 | if line.startswith("Auth="): 102 | log.debug("authorization: %s", line[5:]) 103 | return line[5:] 104 | 105 | raise InoreaderAuthFailed("Failed to find Auth= in auth response") 106 | 107 | def inoreader_req(self, path, query = {}): 108 | tries = 3 109 | r = {} 110 | 111 | while tries and not self.dead: 112 | tries -= 1 113 | if not self.authorization: 114 | try: 115 | self.authorization = self.auth() 116 | except InoreaderReqFailed as e: 117 | log.debug("Auth request failed: %s", e) 118 | continue 119 | except InoreaderAuthFailed: 120 | log.error("Inoreader authorization failed, please check your credentials in sync-inoreader.py") 121 | self.dead = True 122 | raise 123 | 124 | headers = self.extra_headers.copy() 125 | headers["Authorization"] = "GoogleLogin auth=" + self.authorization 126 | 127 | try: 128 | r = requests.get(BASE_URL + path, params=query, headers=headers, timeout=1) 129 | except requests.exceptions.Timeout: 130 | raise InoreaderReqFailed 131 | 132 | if r.status_code != 200: 133 | log.debug("STATUS %s", r.status_code) 134 | log.debug(r.headers) 135 | log.debug(r.text) 136 | else: 137 | return r 138 | 139 | # No authorization, attempt to get another code on the next try. 140 | 141 | if r.status_code == 401: 142 | self.authorization = None 143 | elif r.status_code == 429: 144 | log.error("Inoreader rate limit reached.") 145 | self.dead = True 146 | elif r.status_code == 503: 147 | log.error("Inoreader appears down, state may be lost") 148 | 149 | raise InoreaderReqFailed 150 | 151 | # Convert special tags into /state/com.google/tag and others into 152 | # /label/tag, useful when matching without knowing the user. 153 | 154 | def full_ino_tag_suffix(self, tag): 155 | if tag in ["read", "starred", "fresh"]: 156 | return "/state/com.google/" + tag 157 | return "/label/" + tag 158 | 159 | # Add the user/- prefix to go upstream to Inoreader. 160 | 161 | def full_ino_tag(self, tag): 162 | return "user/-" + self.full_ino_tag_suffix(tag) 163 | 164 | # Do the opposite, convert an Inoreader tag into a natural name. (i.e.) 165 | # /user/whatever/state/com.google/read -> read 166 | 167 | def strip_ino_tag(self, tag): 168 | tag = tag.split("/", 3) 169 | if tag[2] == "state": 170 | return tag[3].split("/", 1)[1] 171 | return tag[3] 172 | 173 | # Return whether Inoreader data includes this natural tag 174 | 175 | def has_tag(self, item, tag): 176 | if "canto_inoreader_categories" not in item: 177 | return False 178 | 179 | suff = self.full_ino_tag_suffix(tag) 180 | for category in item["canto_inoreader_categories"]: 181 | if category.endswith(suff): 182 | return True 183 | return False 184 | 185 | def add_tag(self, item, tag): 186 | ino_id = item["canto_inoreader_id"] 187 | if not self.has_tag(item, tag): 188 | if tag in self.add_tags_queued: 189 | self.add_tags_queued[tag].append(ino_id) 190 | else: 191 | self.add_tags_queued[tag] = [ino_id] 192 | 193 | def remove_tag(self, item, tag): 194 | ino_id = item["canto_inoreader_id"] 195 | if self.has_tag(item, tag): 196 | if tag in self.del_tags_queued: 197 | self.del_tags_queued[tag].append(ino_id) 198 | else: 199 | self.del_tags_queued[tag] = [ino_id] 200 | 201 | def _urllimit(self, prefix, ino_ids): 202 | t = prefix 203 | l = len(t) 204 | 205 | for i, ino_id in enumerate(ino_ids): 206 | if l + len(ino_id) > 2048: 207 | self.inoreader_req(t) 208 | return ino_ids[i:] 209 | t += ino_id 210 | l += len(ino_id) 211 | 212 | self.inoreader_req(t) 213 | return [] 214 | 215 | def flush_changes(self): 216 | for key in self.add_tags_queued: 217 | to_add = [ "&i=" + quote(x) for x in self.add_tags_queued[key]] 218 | while to_add: 219 | to_add = self._urllimit("api/0/edit-tag?a=" + quote(self.full_ino_tag(key)), to_add) 220 | 221 | for key in self.del_tags_queued: 222 | to_del = [ "&i=" + quote(x) for x in self.del_tags_queued[key]] 223 | while to_del: 224 | to_del = self._urllimit("api/0/edit-tag?r=" + quote(self.full_ino_tag(key)), to_del) 225 | 226 | self.add_tags_queued = {} 227 | self.del_tags_queued = {} 228 | 229 | def get_subs(self): 230 | return self.inoreader_req("api/0/subscription/list").json()["subscriptions"] 231 | 232 | def add_sub(self, feed_url, title): 233 | query = { 234 | "ac" : "subscribe", 235 | "s" : "feed/" + feed_url, 236 | "t" : title 237 | } 238 | 239 | self.inoreader_req("api/0/subscription/edit", query) 240 | 241 | def del_sub(self, feed_url): 242 | query = { 243 | "ac" : "unsubscribe", 244 | "s" : "feed/" + feed_url 245 | } 246 | 247 | self.inoreader_req("api/0/subscription/edit", query) 248 | 249 | api = CantoInoreaderAPI() 250 | 251 | # Given a change set, and the current attributes of a canto item, tell 252 | # Inoreader about it. 253 | 254 | def sync_state_to(changes, attrs, add_only = False): 255 | if "canto-state" in changes: 256 | if "read" in changes["canto-state"]: 257 | api.add_tag(attrs, "read") 258 | elif not add_only: 259 | if api.has_tag(attrs, "read"): 260 | api.remove_tag(attrs, "read") 261 | 262 | if "canto-tags" in changes: 263 | for tag in changes["canto-tags"]: 264 | tag = tag.split(":", 1)[1] # strip user: or category: prefix 265 | if not api.has_tag(attrs, tag): 266 | api.add_tag(attrs, tag) 267 | 268 | if add_only: 269 | return 270 | 271 | for tag in attrs["canto_inoreader_categories"]: 272 | tag = api.strip_ino_tag(tag) 273 | if "user:" + tag not in changes["canto-tags"]: 274 | api.remove_tag(attrs, tag) 275 | 276 | class CantoFeedInoReader(DaemonFeedPlugin): 277 | def __init__(self, feed): 278 | self.plugin_attrs = { "edit_inoreader_sync" : self.edit_inoreader_sync, 279 | "additems_inoreader" : self.additems_inoreader } 280 | self.feed = feed 281 | self.ino_data = None 282 | 283 | def _list_add(self, item, attr, new): 284 | if attr not in item: 285 | item[attr] = [ new ] 286 | elif new not in item[attr]: 287 | item[attr].append(new) 288 | 289 | def add_utag(self, item, tags_to_add, tag): 290 | self._list_add(item, "canto-tags", "user:" + tag) 291 | tags_to_add.append((item, "user:" + tag)) 292 | 293 | def add_state(self, item, state): 294 | self._list_add(item, "canto-state", state) 295 | 296 | def additems_inoreader(self, feed, newcontent, tags_to_add, tags_to_remove, remove_items): 297 | stream_id = quote("feed/" + feed.URL, []) 298 | 299 | query = { "n" : 1000 } 300 | 301 | # Collect all of the items 302 | 303 | self.ino_data = [] 304 | 305 | content_path = "api/0/stream/contents/" + stream_id 306 | 307 | try: 308 | r = api.inoreader_req(content_path, query).json() 309 | self.ino_data.extend(r["items"]) 310 | except (InoreaderAuthFailed, InoreaderReqFailed): 311 | return (tags_to_add, tags_to_remove, remove_items) 312 | except Exception as e: 313 | log.debug("EXCEPT: %s", traceback.format_exc()) 314 | raise 315 | 316 | # Find items that were inserted last time, and remove them, potentially 317 | # adding them to our fresh Inoreader data. 318 | 319 | # This keeps us from getting dupes when Inoreader finds an item, we 320 | # insert it, and then a real copy comes to canto but canto doesn't 321 | # detect the dupe since the ids are different. 322 | 323 | for canto_entry in newcontent["entries"][:]: 324 | if "canto-from-inoreader" not in canto_entry: 325 | continue 326 | 327 | remove_items.append(canto_entry) 328 | tags_to_add = [ x for x in tags_to_add if x[0] != canto_entry] 329 | 330 | newcontent["entries"].remove(canto_entry) 331 | 332 | for ino_entry in self.ino_data[:]: 333 | if canto_entry["id"] == ino_entry["id"]: 334 | break 335 | else: 336 | self.ino_data.append(canto_entry) 337 | 338 | # Now insert (or re-insert) items that aren't already in our data. 339 | 340 | # NOTE: It's okay if re-inserted items are also in remove_ids, since 341 | # that's processed first, and will be cancelled out by adding the tags 342 | # afterwards. 343 | 344 | for ino_entry in self.ino_data: 345 | for canto_entry in newcontent["entries"]: 346 | if ino_entry["canonical"][0]["href"] != canto_entry["link"]: 347 | continue 348 | if ino_entry["id"] == canto_entry["id"]: 349 | canto_entry["canto-from-inoreader"] = True 350 | break 351 | else: 352 | if "canto-from-inoreader" not in ino_entry: 353 | # feedparser compatibility 354 | ino_entry["summary"] = ino_entry["summary"]["content"] 355 | ino_entry["link"] = ino_entry["canonical"][0]["href"] 356 | 357 | # mark this item as from inoreader (missing from feed) 358 | ino_entry["canto-from-inoreader"] = True 359 | 360 | newcontent["entries"].append(ino_entry) 361 | tags_to_add.append((ino_entry, "maintag:" + feed.name )) 362 | 363 | return (tags_to_add, tags_to_remove, remove_items) 364 | 365 | def edit_inoreader_sync(self, feed, newcontent, tags_to_add, tags_to_remove, remove_items): 366 | 367 | # Add inoreader_id/categories information to the items 368 | 369 | # This is very similar to the loop in additems_inoreader, but needs to 370 | # be separate in case other plugins add items that inoreader might 371 | # track. 372 | 373 | for ino_entry in self.ino_data: 374 | for canto_entry in newcontent["entries"][:]: 375 | if ino_entry["canonical"][0]["href"] != canto_entry["link"]: 376 | continue 377 | canto_entry["canto_inoreader_id"] = ino_entry["id"] 378 | canto_entry["canto_inoreader_categories"] = ino_entry["categories"] 379 | break 380 | 381 | # Drop the data. 382 | self.ino_data = None 383 | 384 | for entry in newcontent["entries"]: 385 | # If we didn't get an id for this item, skip it 386 | 387 | if "canto_inoreader_id" not in entry: 388 | continue 389 | 390 | for category in entry["canto_inoreader_categories"]: 391 | if category.endswith("/state/com.google/read"): 392 | self.add_state(entry, "read") 393 | continue 394 | 395 | cat = category.split("/", 3) 396 | if len(cat) < 4: 397 | log.debug("Weird category? %s", cat) 398 | continue 399 | 400 | if cat[2] == "state": 401 | if cat[3] == "com.google/starred": 402 | self.add_utag(entry, tags_to_add, "starred") 403 | elif cat[2] == "label": 404 | self.add_utag(entry, tags_to_add, cat[3]) 405 | 406 | # If this is the first time we've paired an item up with its 407 | # Inoreader data, our state is better, so sync it to Inoreader, and 408 | # then skip the remainder of the logic to remove canto state/tags 409 | 410 | if "canto-inoreader-sync" not in entry: 411 | sync_state_to(entry, entry, True) 412 | entry["canto-inoreader-sync"] = True 413 | continue 414 | 415 | if "canto-state" not in entry or type(entry["canto-state"]) != list: 416 | continue 417 | 418 | # It appears that if an item is "fresh" it will resist all attempts 419 | # to set it as read? 420 | 421 | if "read" in entry["canto-state"] and not\ 422 | (api.has_tag(entry, "read") or api.has_tag(entry, "fresh")): 423 | log.debug("Marking unread from Inoreader") 424 | entry["canto-state"].remove("read") 425 | 426 | if "canto-tags" not in entry or type(entry["canto-tags"]) != list: 427 | continue 428 | 429 | for tag in entry["canto-tags"][:]: 430 | if not api.has_tag(entry, tag.split(":", 1)[1]): 431 | entry["canto-tags"].remove(tag) 432 | tags_to_remove.append((entry, tag)) 433 | 434 | api.flush_changes() 435 | return (tags_to_add, tags_to_remove, remove_items) 436 | 437 | # For canto communicating to Inoreader, we tap into the relevant hooks to 438 | # pickup state / tag changes, and convert that into Inoreader API calls. 439 | 440 | def post_setattributes(socket, args): 441 | for item_id in args.keys(): 442 | dict_id = json.loads(item_id) 443 | 444 | feed = allfeeds.get_feed(dict_id["URL"]) 445 | 446 | attrs = feed.get_attributes([item_id], { item_id :\ 447 | ["canto_inoreader_id", "canto_inoreader_categories", "canto-state", "canto-tags"] }) 448 | attrs = attrs[item_id] 449 | 450 | # If the canto_inoreader_id isn't right (likely empty since get_attributes 451 | # will sub in "") then skip synchronizing this item. 452 | 453 | ino_id = attrs["canto_inoreader_id"] 454 | if not ino_id.startswith("tag:google.com,2005:reader/item/"): 455 | continue 456 | 457 | sync_state_to(args[item_id], attrs) 458 | 459 | api.flush_changes() 460 | 461 | on_hook("daemon_post_setattributes", post_setattributes) 462 | 463 | def post_setconfigs(socket, args): 464 | if "feeds" in args: 465 | for feed in args["feeds"]: 466 | api.add_sub(feed["url"], feed["name"]) 467 | 468 | on_hook("daemon_post_setconfigs", post_setconfigs) 469 | 470 | def post_delconfigs(socket, args): 471 | if "feeds" in args: 472 | for feed in args["feeds"]: 473 | api.del_sub(feed["url"]) 474 | 475 | on_hook("daemon_post_delconfigs", post_delconfigs) 476 | 477 | # Do the initial feed synchronization. This only occurs once per run, and 478 | # assumes Inoreader knows everything. 479 | 480 | def on_daemon_serving(): 481 | log.debug("Synchronizing subscriptions.") 482 | ino_subs = api.get_subs() 483 | 484 | for c_feed in config.json["feeds"]: 485 | url = c_feed["url"] 486 | 487 | for sub in ino_subs: 488 | if sub["url"] == url: 489 | break 490 | else: 491 | log.debug("Old feed: %s", url) 492 | call_hook("daemon_del_configs", [ None, { "feeds" : [ c_feed ] } ] ) 493 | 494 | for sub in ino_subs: 495 | url = sub["url"] 496 | name = sub["title"] 497 | 498 | for c_feed in config.json["feeds"]: 499 | if c_feed["url"] == url: 500 | break 501 | if c_feed["name"] == name: 502 | log.info("Found feed with same name, but not URL? Skipping.") 503 | break 504 | else: 505 | log.debug("New feed: %s", url) 506 | call_hook("daemon_set_configs", [ None, { "feeds" : [ { "name" : name, "url" : url } ] } ]) 507 | 508 | 509 | on_hook("daemon_serving", on_daemon_serving) 510 | -------------------------------------------------------------------------------- /canto_next/remote.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #Canto - RSS reader backend 3 | # Copyright (C) 2016 Jack Miller 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | from .plugins import PluginHandler, Plugin, try_plugins, set_program 10 | from .client import CantoClient 11 | from .format import escsplit 12 | from .hooks import call_hook 13 | 14 | from xml.sax.saxutils import escape as xml_escape 15 | import xml.parsers.expat 16 | import feedparser 17 | import traceback 18 | import pprint 19 | import time 20 | import sys 21 | 22 | import logging 23 | 24 | def assign_to_dict(d, var, val): 25 | terms = escsplit(var, '.', 0, 0, True) 26 | cur = d 27 | 28 | for term in terms[:-1]: 29 | if term not in cur: 30 | cur[term] = {} 31 | elif type(cur[term]) != dict: 32 | return (False, "Term %s is not dict" % term) 33 | cur = cur[term] 34 | 35 | cur[terms[-1]] = val 36 | return (True, val) 37 | 38 | def access_dict(d, var): 39 | terms = escsplit(var, '.', 0, 0, True) 40 | cur = d 41 | 42 | for term in terms[:-1]: 43 | if term not in cur: 44 | return (False, False) 45 | elif type(cur) != dict: 46 | return (False, False) 47 | cur = cur[term] 48 | 49 | if terms[-1] not in cur: 50 | return (False, False) 51 | return (True, cur[terms[-1]]) 52 | 53 | class DaemonRemotePlugin(Plugin): 54 | pass 55 | 56 | class CantoRemote(PluginHandler, CantoClient): 57 | def __init__(self): 58 | self.plugin_attrs = {} 59 | 60 | # By default this will log to stderr. 61 | logging.basicConfig( 62 | format = "%(asctime)s : %(name)s -> %(message)s", 63 | datefmt = "%H:%M:%S", 64 | level = logging.ERROR 65 | ) 66 | 67 | version = "canto-remote " + REPLACE_VERSION + " " + GIT_HASH 68 | optl = self.common_args("h", ["help"], version) 69 | if optl == -1: 70 | sys.exit(-1) 71 | 72 | if self.args(optl) == -1: 73 | sys.exit(-1) 74 | 75 | set_program("canto-remote") 76 | try_plugins(self.conf_dir, self.plugin_default, self.disabled_plugins, 77 | self.enabled_plugins) 78 | 79 | PluginHandler.__init__(self) 80 | self.plugin_class = DaemonRemotePlugin 81 | self.update_plugin_lookups() 82 | 83 | try: 84 | if self.port < 0: 85 | self.start_daemon() 86 | CantoClient.__init__(self, self.socket_path) 87 | else: 88 | CantoClient.__init__(self, None,\ 89 | port = self.port, address = self.addr) 90 | except Exception as e: 91 | print("Error: %s" % e) 92 | print(self.socket_path) 93 | sys.exit(-1) 94 | 95 | self.handle_args() 96 | 97 | def args(self, optlist): 98 | for opt, arg in optlist: 99 | if opt in [ "-h", "--help" ]: 100 | self.print_help() 101 | sys.exit(0) 102 | return 0 103 | 104 | def print_help(self): 105 | print("USAGE: canto-remote [options] ([command] [command-args])\n") 106 | self.print_commands() 107 | print("\n\t-h/--help\tThis help") 108 | print("\t-V/--version\tPrint version") 109 | print("\t-v/\t\tVerbose logging (for debug)") 110 | print("\t-D/--dir \tSet configuration directory.") 111 | print("\nPlugin control\n") 112 | print("\t--noplugins\t\t\t\tDisable plugins") 113 | print("\t--enableplugins 'plugin1 plugin2...'\tEnable single plugins (overrides --noplugins)") 114 | print("\t--disableplugins 'plugin1 plugin2...'\tDisable single plugins") 115 | print("\nNetwork control\n") 116 | print("NOTE: These should be used in conjunction with SSH port forwarding to be secure\n") 117 | print("\t-a/--address \tConnect to this address") 118 | print("\t-p/--port \tConnect to this port") 119 | 120 | def print_commands(self): 121 | print("COMMANDS") 122 | print("\thelp - get help on a command") 123 | print("\taddfeed - subscribe to a new feed") 124 | print("\tlistfeeds - list all subscribed feeds") 125 | print("\tdelfeed - unsubscribe from a feed") 126 | print("\tstatus - print item counts") 127 | print("\tforce-update - refetch all feeds") 128 | print("\tconfig - change / query configuration variables") 129 | print("\tone-config - change / query one configuration variable") 130 | print("\texport - export feed list as OPML") 131 | print("\timport - import feed list from OPML") 132 | print("\tkill - cleanly kill the daemon") 133 | print("\tscript - run script") 134 | call_hook("remote_print_commands", []) 135 | 136 | def _wait_response(self, cmd): 137 | r = None 138 | while True: 139 | r = self.read() 140 | if type(r) == int: 141 | if r == 16: 142 | print("Server hung up.") 143 | else: 144 | print("Got code: %d" % r) 145 | print("Please check daemon-log for exception.") 146 | return 147 | elif type(r) == tuple: 148 | if not cmd: 149 | return r 150 | if r[0] == cmd: 151 | return r[1] 152 | elif r[0] == "ERRORS": 153 | print("ERRORS!") 154 | for key in list(r[1].keys()): 155 | for val, err in r[1][key]: 156 | print("%s -> %s: %s" % (key, val, err)) 157 | elif r: 158 | print("Unknown return: %s" % r) 159 | break 160 | return None 161 | 162 | def _autoname(self, URL): 163 | extra_headers = { 'User-Agent' :\ 164 | 'Canto/0.9.0 + http://codezen.org/canto-ng' } 165 | try: 166 | content = feedparser.parse(URL, request_headers = extra_headers) 167 | except Exception as e: 168 | print("ERROR: Couldn't determine name: %s" % e) 169 | return None 170 | 171 | if "title" in content["feed"]: 172 | return content["feed"]["title"] 173 | else: 174 | print("Couldn't find title in feed!") 175 | 176 | return None 177 | 178 | def _get_feeds(self): 179 | self.write("CONFIGS", [ "feeds" ]) 180 | c = self._wait_response("CONFIGS") 181 | 182 | return c["feeds"] 183 | 184 | def _addfeed(self, attrs): 185 | # Fill out 186 | if "name" not in attrs or not attrs["name"]: 187 | attrs["name"] = self._autoname(attrs["url"]) 188 | if not attrs["name"]: 189 | print("Failed to autoname, please specify!") 190 | return False 191 | 192 | print("Adding feed %s - %s" % (attrs["url"], attrs["name"])) 193 | 194 | # SET merges the config options, so f will be appended to the 195 | # current value of "feeds", rather than overwriting. 196 | 197 | self.write("SETCONFIGS", { "feeds" : [ attrs ] } ) 198 | self.write("PING", []) 199 | self._wait_response("PONG") 200 | 201 | return True 202 | 203 | def cmd_addfeed(self): 204 | """USAGE: canto-remote addfeed [URL] (option=value) ... 205 | Where URL is the feed's URL. You can also specify options for the feed: 206 | 207 | name = Feed name (if not specified remote will attempt to lookup) 208 | rate = Rate, in minutes, at which this feed should be fetched. 209 | 210 | username = Username (if necessary) for password protected feeds. 211 | password = Password for password protected feeds.""" 212 | 213 | if len(sys.argv) < 2: 214 | return False 215 | 216 | feed = { "url" : sys.argv[1] } 217 | name = None 218 | 219 | # Grab any feedopts from the commandline. 220 | 221 | for arg in sys.argv[2:]: 222 | opt, val = escsplit(arg, "=", 1, 1, True) 223 | if not opt or not val: 224 | print("ERROR: can't parse '%s' as x=y setting." % arg) 225 | continue 226 | feed[opt] = val 227 | 228 | return self._addfeed(feed) 229 | 230 | def cmd_listfeeds(self): 231 | """USAGE: canto-remote listfeeds 232 | Lists all tracked feeds.""" 233 | 234 | if len(sys.argv) > 1: 235 | return False 236 | 237 | for idx, f in enumerate(self._get_feeds()): 238 | s = ("%d. " % idx) + f["name"] + " " 239 | 240 | if "alias" in f: 241 | s += "(" + f["alias"] + ")" 242 | 243 | s += "\n" + f["url"] + "\n" 244 | print(s) 245 | 246 | def cmd_delfeed(self): 247 | """USAGE: canto-remote delfeed [URL|name|alias] 248 | Unsubscribe from a feed.""" 249 | if len(sys.argv) != 2: 250 | return False 251 | 252 | term = sys.argv[1] 253 | 254 | for idx, f in enumerate(self._get_feeds()): 255 | matches = [ f["url"], f["name"], "%s" % idx] 256 | if "alias" in f: 257 | matches.append(f["alias"]) 258 | 259 | if term in matches: 260 | print("Unsubscribing from %s" % f["url"]) 261 | self.write("DELCONFIGS", { "feeds" : [ f ] }) 262 | 263 | def _config(self, args, evaled=False): 264 | sets = {} 265 | gets = [] 266 | 267 | for arg in args: 268 | 269 | if "=" not in arg: 270 | gets.append(arg) 271 | continue 272 | 273 | var, val = escsplit(arg, "=", 1, 1) 274 | var = var.lstrip().rstrip() 275 | 276 | # We'll want to read back any value, regardless 277 | gets.append(var) 278 | 279 | if evaled: 280 | try: 281 | val = eval(val, {},{}) 282 | val_ok = True 283 | except Exception as e: 284 | print("Unable to eval value: %s - %s" % (val, e)) 285 | val_ok = False 286 | else: 287 | val_ok = True 288 | 289 | if val_ok: 290 | val_ok, ret = assign_to_dict(sets, var, val) 291 | if not val_ok: 292 | print(ret) 293 | 294 | if sets: 295 | self.write("SETCONFIGS", sets) 296 | 297 | self.write("CONFIGS", []) 298 | c = self._wait_response("CONFIGS") 299 | 300 | for var in gets: 301 | valid, value = access_dict(c, var) 302 | if valid: 303 | print("%s = %s" % (var, value)) 304 | else: 305 | print("Couldn't get %s!" % var) 306 | 307 | return True 308 | 309 | def cmd_one_config(self): 310 | """USAGE: canto-remote one-config [--eval] [option] ( = value) 311 | Where option is a full variable declaration like 'section.variable' and 312 | value is any string. If value is omitted, the current value will be printed. 313 | 314 | This differs from config as only one option can be set/got at a time, but 315 | it allows lax argument parsing (i.e. one-config CantoCurses.browser = 316 | firefox will work as expected, without quoting.) 317 | 318 | If the value you're setting is a type other than string, you must specify 319 | --eval and the value will be eval'd into a proper type. 320 | 321 | NOTE: validation is done by the client that uses the variable, canto-remote 322 | will let you give bad values, or even set values to non-existent 323 | variables.""" 324 | 325 | if len(sys.argv) < 2: 326 | return False 327 | 328 | if sys.argv[1] == "--eval": 329 | return self._config([" ".join(sys.argv[2:])], True) 330 | return self._config([" ".join(sys.argv[1:])], False) 331 | 332 | def cmd_config(self): 333 | """USAGE: canto-remote config [--eval] [option](=value) ... 334 | Where option is a full variable declaration like 'section.variable' and 335 | value is any string. If value is omitted, the current value will be printed. 336 | 337 | This differs from one-config as multiple sets/gets can be done, but it is 338 | more strict in terms of argument parsing. 339 | 340 | If the value you're setting is a type other than string, you must specify 341 | --eval and the value will be eval'd into a proper type. 342 | 343 | NOTE: validation is done by the client that uses the variable, canto-remote 344 | will let you give bad values, or even set values to non-existent 345 | variables.""" 346 | 347 | if len(sys.argv) < 2: 348 | return False 349 | 350 | if sys.argv[1] == "--eval": 351 | return self._config(sys.argv[2:], True) 352 | return self._config(sys.argv[1:], False) 353 | 354 | def cmd_export(self): 355 | """USAGE: canto-remote export 356 | 357 | This will print an OPML file to standard output.""" 358 | 359 | print("""""") 360 | print("""\t""") 361 | for f in self._get_feeds(): 362 | print("""\t\t""" %\ 363 | (xml_escape(f["name"].replace("\"","\\\"")), 364 | xml_escape(f["url"]))) 365 | 366 | print("""\t""") 367 | print("""""") 368 | 369 | def cmd_import(self): 370 | """USAGE: canto-remote import [OPML file] 371 | 372 | This will automatically import feeds from an OPML file, which can be 373 | generated by many different feed readers and other programs.""" 374 | 375 | if len(sys.argv) != 2: 376 | return False 377 | 378 | opmlpath = sys.argv[1] 379 | 380 | try: 381 | data = open(opmlpath, "r").read() 382 | except Exception as e: 383 | print("Couldn't read OPML file:") 384 | traceback.print_exc() 385 | return 386 | 387 | feeds = [] 388 | def parse_opml(name, attrs): 389 | # Skip elements we don't care about. 390 | if name != "outline": 391 | return 392 | 393 | # Skip outline elements with unknown type. 394 | if "type" in attrs and attrs["type"] not in ["pie","rss"]: 395 | return 396 | 397 | # Skip outline elements with type, but no URL 398 | if "xmlUrl" not in attrs: 399 | return 400 | 401 | f = { "url" : attrs["xmlUrl"], "name" : None } 402 | if "text" in attrs: 403 | f["name"] = attrs["text"] 404 | 405 | feeds.append(f) 406 | 407 | parser = xml.parsers.expat.ParserCreate() 408 | parser.StartElementHandler = parse_opml 409 | parser.Parse(data.encode("UTF-8"), 1) 410 | 411 | for feed in feeds: 412 | self._addfeed(feed) 413 | 414 | def cmd_script(self): 415 | """USAGE canto-remote script (scriptfile) 416 | 417 | Run script from scriptfile or stdin. 418 | 419 | Note: This is intended for testing and does not gracefully handle errors.""" 420 | 421 | if len(sys.argv) not in [1, 2]: 422 | return False 423 | 424 | if len(sys.argv) == 1: 425 | lines = sys.stdin.readlines() 426 | else: 427 | f = open(sys.argv[1], "r") 428 | lines = f.readlines() 429 | f.close() 430 | 431 | pp = pprint.PrettyPrinter() 432 | 433 | for line in lines: 434 | line = line[:-1].lstrip() 435 | print(line) 436 | sys.__stdout__.flush() 437 | 438 | # Wait for n responses. 439 | 440 | if line.startswith("REMOTE_WAIT "): 441 | num = int(line.split(" ", 1)[-1]) 442 | for i in range(num): 443 | r = self._wait_response(None) 444 | print(pp.pformat(r)) 445 | sys.__stdout__.flush() 446 | 447 | elif line.startswith("REMOTE_IGNORE "): 448 | num = int(line.split(" ", 1)[-1]) 449 | for i in range(num): 450 | self._wait_response(None) 451 | 452 | # Hang with socket open so that the daemon thinks 453 | # we're using any data we've requested. Script runners 454 | # must be smart enough to signal-kill this remote. 455 | 456 | elif line.startswith("REMOTE_HANG"): 457 | while True: 458 | time.sleep(1000) 459 | 460 | # Skip comments / blank 461 | 462 | elif line == '' or line.startswith("#"): 463 | continue 464 | 465 | else: 466 | cmd, arg = line.split(' ', 1) 467 | self.write(cmd, eval(arg)) 468 | 469 | def cmd_kill(self): 470 | """USAGE: canto-remote kill 471 | 472 | Cleanly kill the connected daemon.""" 473 | 474 | self.write("DIE", {}) 475 | 476 | def cmd_force_update(self): 477 | """USAGE: canto-remote force-update 478 | 479 | Force fetch of all feeds.""" 480 | 481 | self.write("FORCEUPDATE", {}) 482 | 483 | def _numstate(self, tag, state): 484 | self.write("AUTOATTR", [ "canto-state" ]) 485 | self.write("ITEMS", [ tag ]) 486 | 487 | items = [] 488 | 489 | cmd, r = self._wait_response(None) 490 | while cmd != "ITEMSDONE": 491 | if cmd == "ITEMS": 492 | items.extend(r[tag]) 493 | cmd, r = self._wait_response(None) 494 | 495 | if len(items) == 0: 496 | return 0 497 | 498 | attrs = self._wait_response("ATTRIBUTES") 499 | 500 | if state == "unread": 501 | return len([ x for x in items if "read" not in attrs[x]["canto-state"]]) 502 | elif state == "read": 503 | return len([ x for x in items if "read" in attrs[x]["canto-state"]]) 504 | else: 505 | return len(items) 506 | 507 | def cmd_status(self): 508 | """USAGE: canto-remote status (--tag=tag) (--read|--total|--tags) 509 | 510 | Print the number of unread items tracked by the daemon. 511 | 512 | --tags can be given to print a summary for all existing tags 513 | 514 | --tag can be given to print the number of items in that particular tag 515 | (i.e. "maintag:Slashdot", "user:cool") 516 | 517 | --read can be given to print the number of read items 518 | --total can be given to print the total number of items 519 | 520 | NOTE: This is still subject to defaults.global_transform, and defined tag 521 | transforms, so if you're using filter_read, for example, it will never 522 | return a single read item (so --total will have no effect and --read will 523 | always return 0).""" 524 | 525 | state = "unread" 526 | if "--read" in sys.argv: 527 | state = "read" 528 | if "--total" in sys.argv: 529 | state = "all" 530 | 531 | self.write("LISTTAGS","") 532 | t = self._wait_response("LISTTAGS") 533 | 534 | if "--tags" in sys.argv: 535 | for tag in t: 536 | print("%s : %s" % (tag, self._numstate(tag, state))) 537 | elif "--tag" in sys.argv: 538 | if "--tag" == sys.argv[-1]: 539 | print("--tag must be followed by a tag name") 540 | sys.exit(-1) 541 | 542 | tag = sys.argv[sys.argv.index("--tag") + 1] 543 | if tag not in t: 544 | print("Unknown tag %s - use --tags to list known tags" % tag) 545 | sys.exit(-1) 546 | 547 | print("%s : %s" % (tag, self._numstate(tag, state))) 548 | else: 549 | print("%s" % sum([self._numstate(tag, state) for tag in t if tag.startswith("maintag:")])) 550 | 551 | def cmd_help(self): 552 | """USAGE: canto-remote help [command]""" 553 | if len(sys.argv) < 2: 554 | return False 555 | 556 | command = "cmd_" + sys.argv[1].replace("-","_") 557 | 558 | if hasattr(self, command): 559 | print(getattr(self, command).__doc__) 560 | else: 561 | print(self.cmd_help.__doc__) 562 | self.print_commands() 563 | 564 | def handle_args(self): 565 | if len(sys.argv) < 1: 566 | self.print_help() 567 | return 568 | 569 | command = "cmd_" + sys.argv[0].replace("-","_") 570 | 571 | if hasattr(self, command): 572 | func = getattr(self, command) 573 | r = func() 574 | if r == False: 575 | print(func.__doc__) 576 | else: 577 | self.print_help() 578 | -------------------------------------------------------------------------------- /canto_next/canto_backend.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #Canto - RSS reader backend 3 | # Copyright (C) 2016 Jack Miller 4 | # 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License version 2 as 7 | # published by the Free Software Foundation. 8 | 9 | # This Backend class is the core of the daemon's specific protocol. 10 | 11 | CANTO_PROTOCOL_VERSION = 0.9 12 | 13 | from .feed import allfeeds, wlock_all, stop_feeds, rlock_feed_objs, runlock_feed_objs 14 | from .encoding import encoder 15 | from .server import CantoServer 16 | from .config import config, parse_locks, parse_unlocks 17 | from .storage import CantoShelf 18 | from .fetch import CantoFetch 19 | from .hooks import on_hook, call_hook 20 | from .tag import alltags 21 | from .transform import eval_transform 22 | from .plugins import PluginHandler, Plugin, try_plugins, set_program 23 | from .rwlock import alllocks, write_lock, read_lock 24 | from .locks import * 25 | 26 | import traceback 27 | import logging 28 | import signal 29 | import fcntl 30 | import errno 31 | import time 32 | import sys 33 | import os 34 | 35 | # By default this will log to stderr. 36 | logging.basicConfig( 37 | format = "%(asctime)s : %(name)s -> %(message)s", 38 | datefmt = "%H:%M:%S", 39 | level = logging.INFO 40 | ) 41 | 42 | log = logging.getLogger("CANTO-DAEMON") 43 | 44 | class DaemonBackendPlugin(Plugin): 45 | pass 46 | 47 | # Index threads and the main thread no longer take multiple locks at once. The 48 | # cmd_* functions in CantoBackend only need to worry about deadlocking with 49 | # each other. 50 | 51 | class CantoBackend(PluginHandler, CantoServer): 52 | def __init__(self): 53 | 54 | # Nothing referenced before try_plugins should be 55 | # pluggable. 56 | 57 | self.plugin_attrs = {} 58 | 59 | # Shelf for feeds: 60 | self.fetch = None 61 | self.fetch_manual = False 62 | self.fetch_force = False 63 | 64 | # Whether fetching is inhibited. 65 | self.no_fetch = False 66 | 67 | self.watches = { "new_tags" : [], 68 | "del_tags" : [], 69 | "config" : [], 70 | "tags" : {} } 71 | 72 | self.autoattr = {} 73 | 74 | # Per socket transforms. 75 | self.socket_transforms = {} 76 | 77 | self.shelf = None 78 | 79 | # No bad arguments. 80 | version = "canto-daemon " + REPLACE_VERSION + " " + GIT_HASH 81 | optl = self.common_args("nhc:",["nofetch","help","cache="], version) 82 | if optl == -1: 83 | sys.exit(-1) 84 | 85 | if self.args(optl): 86 | sys.exit(-1) 87 | 88 | # No invalid paths. 89 | if self.ensure_paths(): 90 | sys.exit(-1) 91 | 92 | # Get pid lock. 93 | if self.pid_lock(): 94 | sys.exit(-1) 95 | 96 | # Previous to this line, all output is just error messages to stderr. 97 | self.set_log() 98 | 99 | # Initial log chatter. 100 | log.info(version) 101 | 102 | if self.verbosity: 103 | rootlog = logging.getLogger() 104 | rootlog.setLevel(max(rootlog.level - 10 * self.verbosity,0)) 105 | log.info("verbosity = %d" % self.verbosity) 106 | 107 | log.info("conf_dir = %s" % self.conf_dir) 108 | 109 | # Evaluate any plugins 110 | set_program("canto-daemon") 111 | try_plugins(self.conf_dir, self.plugin_default, self.disabled_plugins, 112 | self.enabled_plugins) 113 | 114 | PluginHandler.__init__(self) 115 | 116 | self.plugin_class = DaemonBackendPlugin 117 | self.update_plugin_lookups() 118 | 119 | if self.no_fetch: 120 | log.info("NOFETCH, will not be automatically updating.") 121 | 122 | # Actual start. 123 | self.get_storage() 124 | 125 | self.get_config() 126 | 127 | self.get_fetch() 128 | 129 | self.setup_hooks() 130 | 131 | self.sfile = self.conf_dir + "/.canto_socket" 132 | log.info("Listening on unix socket: %s" % self.sfile) 133 | 134 | try: 135 | if self.port < 0: 136 | CantoServer.__init__(self, self.sfile, self.socket_command) 137 | else: 138 | log.info("Listening on interface %s:%d" %\ 139 | (self.addr, self.port)) 140 | CantoServer.__init__(self, self.sfile, self.socket_command,\ 141 | port = self.port, interface = self.addr) 142 | except Exception as e: 143 | err = "Error: %s" % e 144 | print(err) 145 | log.error(err) 146 | call_hook("daemon_exit", []) 147 | sys.exit(-1) 148 | 149 | # Signal handlers kickoff after everything else is init'd 150 | 151 | self.interrupted = 0 152 | 153 | signal.signal(signal.SIGINT, self.sig_int) 154 | signal.signal(signal.SIGTERM, self.sig_int) 155 | signal.signal(signal.SIGUSR1, self.sig_usr) 156 | 157 | self.start() 158 | 159 | def on_config_change(self, change, originating_socket): 160 | 161 | config.parse(False, change) 162 | 163 | log.debug("config.errors = %s", config.errors) 164 | 165 | if config.errors: 166 | self.write(originating_socket, "ERRORS", config.errors) 167 | config.parse() 168 | 169 | # No changes actually realized, bail 170 | return 171 | else: 172 | config.write() 173 | 174 | # Kill feeds that haven't been re-instantiated. 175 | allfeeds.all_parsed() 176 | 177 | # Force check of fetching. This automatically starts the fetch. For new 178 | # feeds, but also takes any new settings (like rates) into account. 179 | 180 | self.fetch_force = True 181 | 182 | # Pretend that the sockets *other* than the ones that made the change 183 | # issued a CONFIGS for each of the root keys. 184 | 185 | for socket in self.watches["config"]: 186 | # Don't echo changes back to socket that made them. 187 | if socket != originating_socket: 188 | self.in_configs(list(change.keys()), socket) 189 | 190 | # Notify clients of new tags. 191 | 192 | @read_lock(watch_lock) 193 | def on_new_tag(self, tags): 194 | for socket in self.watches["new_tags"]: 195 | self.write(socket, "NEWTAGS", tags) 196 | 197 | # Propagate tag changes to watching sockets. 198 | 199 | @read_lock(watch_lock) 200 | def on_tag_change(self, tag): 201 | if tag in self.watches["tags"]: 202 | for socket in self.watches["tags"][tag]: 203 | self.write(socket, "TAGCHANGE", tag) 204 | 205 | # Notify clients of dead tags: 206 | 207 | @read_lock(watch_lock) 208 | def on_del_tag(self, tags): 209 | for socket in self.watches["del_tags"]: 210 | self.write(socket, "DELTAGS", tags) 211 | 212 | # If a socket dies, it's no longer watching any events. 213 | 214 | @write_lock(socktran_lock) 215 | @write_lock(watch_lock) 216 | def on_kill_socket(self, socket): 217 | while socket in self.watches["config"]: 218 | self.watches["config"].remove(socket) 219 | 220 | while socket in self.watches["new_tags"]: 221 | self.watches["new_tags"].remove(socket) 222 | 223 | while socket in self.watches["del_tags"]: 224 | self.watches["del_tags"].remove(socket) 225 | 226 | for tag in self.watches["tags"]: 227 | while socket in self.watches["tags"][tag]: 228 | self.watches["tags"][tag].remove(socket) 229 | 230 | if socket in list(self.socket_transforms.keys()): 231 | del self.socket_transforms[socket] 232 | 233 | # We need to be alerted on certain events, ensure 234 | # we get notified about them. 235 | 236 | def setup_hooks(self): 237 | on_hook("daemon_new_tag", self.on_new_tag) 238 | on_hook("daemon_del_tag", self.on_del_tag) 239 | on_hook("daemon_config_change", self.on_config_change) 240 | on_hook("daemon_tag_change", self.on_tag_change) 241 | on_hook("server_kill_socket", self.on_kill_socket) 242 | 243 | # For plugins 244 | on_hook("daemon_set_configs", lambda x, y : self.internal_command(x, self.in_setconfigs, y)) 245 | on_hook("daemon_del_configs", lambda x, y : self.internal_command(x, self.in_delconfigs, y)) 246 | on_hook("daemon_get_configs", lambda x, y : self.internal_command(x, self.in_configs, y)) 247 | 248 | # VERSION -> X.Y 249 | 250 | def cmd_version(self, socket, args): 251 | self.write(socket, "VERSION", CANTO_PROTOCOL_VERSION) 252 | 253 | # PING -> PONG 254 | 255 | def cmd_ping(self, socket, args): 256 | self.write(socket, "PONG", "") 257 | 258 | # LISTTAGS -> [ "tag1", "tag2", .. ] 259 | # This makes no guarantee on order *other* than the fact that 260 | # maintag tags will be first, and in feed order. Following tags 261 | # are in whatever order the dict gives them in. 262 | 263 | @read_lock(feed_lock) 264 | @read_lock(tag_lock) 265 | def cmd_listtags(self, socket, args): 266 | r = [] 267 | for feed in allfeeds.get_feeds(): 268 | r.append("maintag:" + feed.name) 269 | for tag in alltags.get_tags(): 270 | if tag not in r: 271 | r.append(tag) 272 | 273 | self.write(socket, "LISTTAGS", r) 274 | 275 | # LISTTRANSFORMS -> [ { "name" : " " } for all defined filters ] 276 | 277 | @read_lock(config_lock) 278 | def cmd_listtransforms(self, socket, args): 279 | transforms = [] 280 | for transform in config.transforms: 281 | transforms.append({"name" : transform["name"]}) 282 | self.write(socket, "LISTTRANSFORMS", transforms) 283 | 284 | # TRANSFORM {} -> return current socket transform, with names instead of 285 | # actual filt objects. 286 | # TRANSFORM {"string":"transform"} -> set a socket transform 287 | # TRANSFORM {"string": None } -> un set a socket transform 288 | 289 | @write_lock(socktran_lock) 290 | def cmd_transform(self, socket, args): 291 | if not args: 292 | if socket in self.socket_transforms: 293 | str_dict = {} 294 | for filt in self.socket_transforms[socket]: 295 | str_dict[filt] = str(self.socket_transforms[socket][filt]) 296 | self.write(socket, "TRANSFORM", str_dict) 297 | else: 298 | self.write(socket, "TRANSFORM", {}) 299 | return 300 | 301 | if socket not in self.socket_transforms: 302 | self.socket_transforms[socket] = {} 303 | 304 | for key in args: 305 | # Unset beforehand means query. 306 | if not args[key]: 307 | if key in self.socket_transforms[socket]: 308 | self.write(socket, "TRANSFORM", { key : str(self.socket_transforms[socket][key])}) 309 | else: 310 | self.write(socket, "TRANSFORM", { key : "None" }) 311 | continue 312 | 313 | filt = None 314 | try: 315 | filt = eval_transform(args[key]) 316 | except Exception as e: 317 | self.write(socket, "EXCEPT",\ 318 | "Couldn't parse transform: %s\n%s" % (args[key], e)) 319 | continue 320 | 321 | if filt == None: 322 | if key in self.socket_transforms[socket]: 323 | log.debug("Unsetting socket transform %s:%s", socket, key) 324 | del self.socket_transforms[socket][key] 325 | continue 326 | 327 | log.debug("Setting socket transform: %s:%s = %s", socket, key, filt) 328 | self.socket_transforms[socket][key] = filt 329 | 330 | # AUTOATTR [ attrs ... ] -> Follow up each items request with 331 | # an attributes request for attrs. 332 | 333 | # This command is intended to reduce round trip time and allow 334 | # clients to become informative quickly by making the individual 335 | # story IDs unnecessary to request information about them. 336 | 337 | # Hold attr_lock just to keep cmd_item from trying to use autoattr 338 | @write_lock(attr_lock) 339 | def cmd_autoattr(self, socket, args): 340 | self.autoattr[socket] = args 341 | 342 | # ITEMS [tags] -> { tag : [ ids ], tag2 : ... } 343 | 344 | @read_lock(attr_lock) 345 | @read_lock(feed_lock) 346 | def _apply_socktrans(self, socket, tag): 347 | feeds = allfeeds.items_to_feeds(tag) 348 | rlock_feed_objs(feeds) 349 | socktran_lock.acquire_read() 350 | try: 351 | 352 | for filt in self.socket_transforms[socket]: 353 | tag = self.socket_transforms[socket][filt](tag) 354 | finally: 355 | socktran_lock.release_read() 356 | runlock_feed_objs(feeds) 357 | return tag 358 | 359 | def cmd_items(self, socket, args): 360 | ids = [] 361 | response = {} 362 | 363 | for tag in args: 364 | items = alltags.get_tag(tag) 365 | 366 | if socket in self.socket_transforms: 367 | items = self._apply_socktrans(socket, items) 368 | 369 | attr_list = [] 370 | 371 | if len(items) == 0: 372 | self.write(socket, "ITEMS", { tag : [] }) 373 | else: 374 | attr_req = {} 375 | if socket in self.autoattr: 376 | for id in items: 377 | attr_req[id] = self.autoattr[socket][:] 378 | attr_list.append(attr_req) 379 | 380 | self.write(socket, "ITEMS", { tag : items }) 381 | 382 | self.write(socket, "ITEMSDONE", {}) 383 | 384 | for attr_req in attr_list: 385 | self.cmd_attributes(socket, attr_req) 386 | 387 | # ATTRIBUTES { id : [ attribs .. ] .. } -> 388 | # { id : { attribute : value } ... } 389 | 390 | # Hold feed_lock so that get_attributes won't fail on a missing feed, but 391 | # items_to_feeds can still throw an exception if attributes requests come 392 | # in for items from removed feeds. 393 | 394 | @read_lock(feed_lock) 395 | def cmd_attributes(self, socket, args): 396 | ret = {} 397 | feeds = allfeeds.items_to_feeds(list(args.keys())) 398 | for f in feeds: 399 | ret.update(f.get_attributes(feeds[f], args)) 400 | 401 | self.write(socket, "ATTRIBUTES", ret) 402 | 403 | # SETATTRIBUTES { id : { attribute : value } ... } -> None 404 | 405 | @read_lock(feed_lock) 406 | @write_lock(tag_lock) 407 | def cmd_setattributes(self, socket, args): 408 | 409 | feeds = allfeeds.items_to_feeds(list(args.keys())) 410 | for f in feeds: 411 | f.set_attributes(feeds[f], args) 412 | 413 | tags = alltags.items_to_tags(list(args.keys())) 414 | for t in tags: 415 | call_hook("daemon_tag_change", [ t ]) 416 | 417 | # CONFIGS [ "top_sec", ... ] -> { "top_sec" : full_value } 418 | 419 | # Internally, called only by functions that hold read or write on 420 | # config_lock 421 | 422 | def in_configs(self, args, socket=None): 423 | if args: 424 | ret = {} 425 | for topsec in args: 426 | if topsec in config.json: 427 | ret[topsec] = config.json[topsec] 428 | else: 429 | ret = config.json 430 | 431 | if socket: 432 | self.write(socket, "CONFIGS", ret) 433 | return ret 434 | 435 | # External, needs to grab lock. 436 | 437 | @read_lock(config_lock) 438 | def cmd_configs(self, socket, args): 439 | ret = self.in_configs(args, socket) 440 | 441 | # SETCONFIGS { "key" : "value", ...} 442 | 443 | def in_setconfigs(self, args): 444 | self.cmd_setconfigs(None, args) 445 | return config.json 446 | 447 | def cmd_setconfigs(self, socket, args): 448 | parse_locks() 449 | 450 | config.merge(args.copy()) 451 | 452 | # config_change handles it's own locking 453 | call_hook("daemon_config_change", [args, socket]) 454 | 455 | parse_unlocks() 456 | 457 | # DELCONFIGS { "key" : "DELETE", ...} 458 | 459 | def in_delconfigs(self, args): 460 | self.cmd_delconfigs(None, args) 461 | return config.json 462 | 463 | def cmd_delconfigs(self, socket, args): 464 | parse_locks() 465 | 466 | config.delete(args.copy()) 467 | 468 | # config_change handles it's own locking 469 | call_hook("daemon_config_change", [args, socket]) 470 | 471 | parse_unlocks() 472 | 473 | # WATCHCONFIGS 474 | 475 | @write_lock(watch_lock) 476 | def cmd_watchconfigs(self, socket, args): 477 | if socket not in self.watches["config"]: 478 | self.watches["config"].append(socket) 479 | 480 | # WATCHNEWTAGS 481 | 482 | @write_lock(watch_lock) 483 | def cmd_watchnewtags(self, socket, args): 484 | if socket not in self.watches["new_tags"]: 485 | self.watches["new_tags"].append(socket) 486 | 487 | # WATCHDELTAGS 488 | 489 | @write_lock(watch_lock) 490 | def cmd_watchdeltags(self, socket, args): 491 | if socket not in self.watches["del_tags"]: 492 | self.watches["del_tags"].append(socket) 493 | 494 | # WATCHTAGS [ "tag", ... ] 495 | 496 | @write_lock(watch_lock) 497 | def cmd_watchtags(self, socket, args): 498 | for tag in args: 499 | log.debug("socket %s watching tag %s", socket, tag) 500 | if tag in self.watches["tags"]: 501 | if socket not in self.watches["tags"][tag]: 502 | self.watches["tags"][tag].append(socket) 503 | else: 504 | self.watches["tags"][tag] = [socket] 505 | 506 | # UPDATE {} 507 | 508 | # Note that this is intended to allow clients to take manual 509 | # control when canto is started with --nofetch and doesn't 510 | # override rates or any other factors in updating. 511 | 512 | def cmd_update(self, socket, args): 513 | self.fetch_manual = True 514 | self.fetch_force = False 515 | 516 | # FORCEUPDATE {} 517 | 518 | # This command, on the other hand, *will* force the timers. 519 | 520 | def cmd_forceupdate(self, socket, args): 521 | self.fetch_manual = True 522 | self.fetch_force = True 523 | 524 | # The workhorse that maps all requests to their handlers. 525 | 526 | def socket_command(self, socket, data): 527 | cmd, args = data 528 | 529 | if cmd == "DIE": 530 | log.info("Received DIE.") 531 | self.interrupted = True 532 | else: 533 | cmdf = "cmd_" + cmd.lower() 534 | if hasattr(self, cmdf): 535 | func = getattr(self, cmdf) 536 | 537 | call_hook("daemon_pre_" + cmd.lower(), [socket, args]) 538 | 539 | try: 540 | func(socket, args) 541 | except Exception as e: 542 | tb = "".join(traceback.format_exc()) 543 | self.write(socket, "EXCEPT", tb) 544 | log.error("Protocol exception:") 545 | log.error("\n" + tb) 546 | 547 | call_hook("daemon_post_" + cmd.lower(), [socket, args]) 548 | else: 549 | log.info("Got unknown command: %s" % (cmd)) 550 | 551 | def internal_command(self, cb, func, args): 552 | r = func(args) 553 | if cb: 554 | cb(r) 555 | 556 | def run(self): 557 | 558 | # Start fetch threads to load from disk. No need for locking as we 559 | # haven't started any threads yet 560 | self.fetch.fetch(True, True) 561 | 562 | log.debug("Beginning to serve...") 563 | call_hook("daemon_serving", []) 564 | while 1: 565 | if self.interrupted: 566 | log.info("Interrupted. Exiting.") 567 | return 568 | 569 | # Clean up any dead connection threads. 570 | self.no_dead_conns() 571 | 572 | # Clean up any threads done updating. 573 | self.fetch.reap() 574 | 575 | # Check whether feeds need to be updated and fetch 576 | # them if necessary. 577 | 578 | if (not self.no_fetch or self.fetch_manual): 579 | self.fetch.fetch(self.fetch_force, False) 580 | 581 | self.fetch_manual = False 582 | self.fetch_force = False 583 | 584 | call_hook("daemon_end_loop", []) 585 | 586 | time.sleep(1) 587 | 588 | # Shutdown cleanly 589 | 590 | def cleanup(self): 591 | # Stop feeds, will cause feed.index() threads to bail without 592 | # Messing with the disk. 593 | 594 | stop_feeds() 595 | 596 | # Grab locks to keep any other write usage from happening. 597 | 598 | wlock_all() 599 | 600 | self.shelf.close() 601 | 602 | call_hook("daemon_exit", []) 603 | 604 | # The rest of this is bonus, the important part is to protect the disk. 605 | log.debug("DB shutdown.") 606 | 607 | # Force all connection threads to end. 608 | 609 | self.exit() 610 | 611 | # Wait for all fetches to end. 612 | 613 | self.fetch.reap(True) 614 | 615 | # Delete the socket file, so it can only be used when we're actually 616 | # listening. 617 | 618 | self.remove_socketfile() 619 | 620 | # Unlock the pidfile so another daemon could take over. Probably don't 621 | # have to do this since we're about to sys.exit anyway, but why not. 622 | 623 | self.pid_unlock() 624 | 625 | log.info("Exiting cleanly.") 626 | 627 | def print_help(self): 628 | print("USAGE: canto-daemon [options]") 629 | print("\t-h/--help\tThis help") 630 | print("\t-V/--version\tPrint version") 631 | print("\t-v/\t\tVerbose logging (for debug)") 632 | print("\t-D/--dir \tSet configuration directory.") 633 | print("\t-n/--nofetch\tJust serve content, don't fetch new content.") 634 | print("\n\nPlugin control\n") 635 | print("\t--noplugins\t\t\t\tDisable plugins") 636 | print("\t--enableplugins 'plugin1 plugin2...'\tEnable single plugins (overrides --noplugins)") 637 | print("\t--disableplugins 'plugin1 plugin2...'\tDisable single plugins") 638 | print("\nNetwork control\n") 639 | print("NOTE: These should be used in conjunction with SSH port forwarding to be secure\n") 640 | print("\t-a/--address \tBind to interface with this address") 641 | print("\t-p/--port \tBind to this port") 642 | 643 | # This function parses and validates all of the command line arguments. 644 | def args(self, optlist): 645 | for opt, arg in optlist: 646 | if opt in ["-n", "--nofetch"]: 647 | self.no_fetch = True 648 | elif opt in ['-h', '--help']: 649 | self.print_help() 650 | sys.exit(0) 651 | return 0 652 | 653 | def sig_int(self, a, b): 654 | log.info("Received INT") 655 | self.interrupted = 1 656 | 657 | def sig_usr(self, a, b): 658 | import threading 659 | import gc 660 | 661 | held_locks = {} 662 | code = {} 663 | curthreads = threading.enumerate() 664 | 665 | for threadId, stack in sys._current_frames().items(): 666 | name = str(threadId) 667 | for ct in curthreads: 668 | if ct.ident == threadId: 669 | name = ct.name 670 | 671 | code[name] = ["NAME: %s" % name] 672 | for filename, lineno, fname, line in traceback.extract_stack(stack): 673 | code[name].append('FILE: "%s", line %d, in %s' % (filename, lineno, fname)) 674 | if line: 675 | code[name].append(" %s" % (line.strip())) 676 | 677 | held_locks[name] = "" 678 | for lock in alllocks: 679 | if lock.writer_id == threadId: 680 | held_locks[name] += ("%s(w)" % lock.name) 681 | continue 682 | for reader_id, reader_stack in lock.reader_stacks: 683 | if reader_id == threadId: 684 | held_locks[name] += ("%s(r)" % lock.name) 685 | 686 | for k in code: 687 | log.info('\n\nLOCKS: %s \n%s' % (held_locks[k], '\n'.join(code[k]))) 688 | 689 | log.info("\n\nSTACKS:") 690 | for lock in alllocks: 691 | for (reader_id, reader_stack) in lock.reader_stacks: 692 | log.info("Lock %s (%s readers)" % (lock.name, lock.readers)) 693 | log.info("Lock reader (thread %s):" % (reader_id,)) 694 | log.info(''.join(reader_stack)) 695 | 696 | for writer_stack in lock.writer_stacks: 697 | log.info("Lock %s (%s readers)" % (lock.name, lock.readers)) 698 | log.info("Lock writer (thread %s):" % (lock.writer_id,)) 699 | log.info(''.join(writer_stack)) 700 | 701 | self.shelf.sync() 702 | gc.collect() 703 | 704 | # If we've got pympler installed, output a summary of memory usage. 705 | 706 | try: 707 | from pympler import summary, muppy 708 | summary.print_(summary.summarize(muppy.get_objects())) 709 | except: 710 | pass 711 | 712 | # This function makes sure that the configuration paths are all R/W or 713 | # creatable. 714 | 715 | def ensure_paths(self): 716 | if os.path.exists(self.conf_dir): 717 | if not os.path.isdir(self.conf_dir): 718 | log.error("Error: %s is not a directory." % self.conf_dir) 719 | return -1 720 | if not os.access(self.conf_dir, os.R_OK): 721 | log.error("Error: %s is not readable." % self.conf_dir) 722 | return -1 723 | if not os.access(self.conf_dir, os.W_OK): 724 | log.error("Error: %s is not writable." % self.conf_dir) 725 | return -1 726 | else: 727 | try: 728 | os.makedirs(self.conf_dir) 729 | except Exception as e: 730 | log.error("Exception making %s : %s" % (self.conf_dir, e)) 731 | return -1 732 | return self.ensure_files() 733 | 734 | def ensure_files(self): 735 | for f in [ "feeds", "conf", "daemon-log", "pid"]: 736 | p = self.conf_dir + "/" + f 737 | if os.path.exists(p): 738 | if not os.path.isfile(p): 739 | log.error("Error: %s is not a file." % p) 740 | return -1 741 | if not os.access(p, os.R_OK): 742 | log.error("Error: %s is not readable." % p) 743 | return -1 744 | if not os.access(p, os.W_OK): 745 | log.error("Error: %s is not writable." % p) 746 | return -1 747 | 748 | # These paths are now guaranteed to read/writable. 749 | 750 | self.feed_path = self.conf_dir + "/feeds" 751 | self.pid_path = self.conf_dir + "/pid" 752 | self.log_path = self.conf_dir + "/daemon-log" 753 | self.conf_path = self.conf_dir + "/conf" 754 | 755 | return None 756 | 757 | def pid_lock(self): 758 | self.pidfile = open(self.pid_path, "a+") 759 | try: 760 | fcntl.flock(self.pidfile.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) 761 | self.pidfile.seek(0, 0) 762 | self.pidfile.truncate() 763 | self.pidfile.write("%d" % os.getpid()) 764 | self.pidfile.flush() 765 | except IOError as e: 766 | if e.errno == errno.EAGAIN: 767 | log.error("Error: Another canto-daemon is running here.") 768 | return -1 769 | raise 770 | return None 771 | 772 | def pid_unlock(self): 773 | log.debug("Unlocking pidfile.") 774 | fcntl.flock(self.pidfile.fileno(), fcntl.LOCK_UN) 775 | self.pidfile.close() 776 | log.debug("Unlocked.") 777 | 778 | # Reset basic log info to log to the right file. 779 | def set_log(self): 780 | f = open(self.log_path, "w") 781 | os.dup2(f.fileno(), sys.stderr.fileno()) 782 | 783 | # Bring up storage, the only errors possible at this point are 784 | # fatal and handled lower in CantoShelf. 785 | 786 | def get_storage(self): 787 | self.shelf = CantoShelf(self.feed_path) 788 | 789 | # Bring up config, the only errors possible at this point will 790 | # be fatal and handled lower in CantoConfig. 791 | 792 | def get_config(self): 793 | config.init(self.conf_path, self.shelf) 794 | config.parse() 795 | if config.errors: 796 | print("ERRORS:") 797 | for key in list(config.errors.keys()): 798 | for value, error in config.errors[key]: 799 | s = "\t%s -> %s: %s" % (key, value, error) 800 | print(encoder(s)) 801 | 802 | sys.exit(-1) 803 | 804 | def get_fetch(self): 805 | self.fetch = CantoFetch(self.shelf) 806 | 807 | def remove_socketfile(self): 808 | os.unlink(self.sfile) 809 | 810 | def start(self): 811 | try: 812 | self.run() 813 | 814 | # Cleanly shutdown on ^C. 815 | except KeyboardInterrupt: 816 | pass 817 | 818 | # Pretty print any non-Keyboard exceptions. 819 | except Exception as e: 820 | tb = traceback.format_exc() 821 | log.error("Exiting on exception:") 822 | log.error("\n" + "".join(tb)) 823 | 824 | self.cleanup() 825 | sys.exit(0) 826 | --------------------------------------------------------------------------------