├── .gitignore ├── COPYING ├── Makefile ├── README.md ├── doc ├── ggs ├── ggs.html ├── ggs.md ├── maildir-put ├── maildir-put.html ├── maildir-put.md ├── rss2json ├── rss2json.html ├── rss2json.md ├── ua ├── ua-inline ├── ua-inline.html ├── ua-inline.md ├── ua-proxify ├── ua-proxify.html ├── ua-proxify.md ├── ua-scrapers ├── ua-scrapers.html ├── ua-scrapers.md ├── ua.html └── ua.md ├── ggs ├── README.md └── ggs.go ├── ggsrc.example ├── go.mod ├── go.sum ├── maildir-put ├── README.md ├── cache.go └── maildir-put.go ├── rss2json ├── README.md └── rss2json.go ├── scrapers ├── README.md └── ua-scraper-mal ├── ua-inline ├── README.md └── ua-inline.go ├── ua-proxify ├── README.md ├── get.php └── ua-proxify.go └── weboobmsg2json └── weboobmsg2json /.gitignore: -------------------------------------------------------------------------------- 1 | ggs/ggs 2 | maildir-put/maildir-put 3 | rss2json/rss2json 4 | ua-inline/ua-inline 5 | ua-proxify/ua-proxify 6 | scrapers/ua-scraper-torrent9 7 | tmp-go 8 | *.pyc 9 | node_modules 10 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PREFIX=/usr/local 2 | DESTDIR= 3 | 4 | BINDIR=$(DESTDIR)$(PREFIX)/bin 5 | DOCDIR=$(DESTDIR)$(PREFIX)/share/doc/ua 6 | MANDIR=$(DESTDIR)$(PREFIX)/share/man 7 | 8 | GODIRS=ggs rss2json maildir-put ua-inline ua-proxify 9 | SCRAPERS=mal 10 | 11 | export GOPATH ?= $(PWD)/tmp-go 12 | 13 | .PHONY: all clean doc 14 | 15 | all: ggs/ggs rss2json/rss2json maildir-put/maildir-put ua-inline/ua-inline ua-proxify/ua-proxify 16 | 17 | doc: 18 | test -d doc || mkdir doc 19 | test -f doc/ua.md || ln -s ../README.md doc/ua.md 20 | test -f doc/ua-scrapers.md || ln -s ../scrapers/README.md doc/ua-scrapers.md 21 | for d in $(GODIRS) ; do test -f doc/$$d.md || ln -s ../$$d/README.md doc/$$d.md ; done 22 | cd doc ; for f in *.md ; do ronn $$f ; done 23 | 24 | ggs/ggs: ggs/ggs.go $(GOPATH) 25 | cd ggs; go build 26 | 27 | rss2json/rss2json: rss2json/rss2json.go $(GOPATH) 28 | cd rss2json; go build 29 | 30 | maildir-put/maildir-put: maildir-put/maildir-put.go maildir-put/cache.go $(GOPATH) 31 | cd maildir-put; go build 32 | 33 | ua-inline/ua-inline: ua-inline/ua-inline.go $(GOPATH) 34 | cd ua-inline; go build 35 | 36 | ua-proxify/ua-proxify: ua-proxify/ua-proxify.go $(GOPATH) 37 | cd ua-proxify; go build 38 | 39 | $(GOPATH): 40 | mkdir $(GOPATH) 41 | mkdir $(GOPATH)/bin 42 | mkdir $(GOPATH)/src 43 | mkdir $(GOPATH)/pkg 44 | 45 | install: all 46 | install -d $(BINDIR) 47 | for f in $(GODIRS) ; do install $$f/$$f $(BINDIR)/ ; done 48 | for s in $(SCRAPERS) ; do install scrapers/ua-scraper-$$s $(BINDIR)/ ; done 49 | install weboobmsg2json/weboobmsg2json $(BINDIR)/ 50 | 51 | install -d $(DOCDIR) 52 | install -d $(MANDIR)/man1/ 53 | install ggsrc.example $(DOCDIR) 54 | for f in doc/*.md doc/*.html ; do install $$f $(DOCDIR)/ ; done 55 | for f in $(GODIRS) ; do gzip < doc/$$f > $(MANDIR)/man1/$$f.1.gz ; done 56 | 57 | clean: 58 | for f in $(GODIRS) ; do rm -f $$f/$$f ; done 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Universal Aggregator 2 | 3 | This is a set of tools to aggregate all your information into your 4 | maildir. Each tool can be used separately ; you can find a more complete 5 | description in their respective folder. 6 | 7 | * `ggs` is a software which runs commands periodically 8 | * `maildir-put` reads a set of messages from its standard input and puts 9 | them in a maildir 10 | * `rss2json` transforms any RSS/Atom feed into a set of messages that 11 | `maildir-put` can process 12 | * You can write your own producers (scrapers) for maildir-put ; some are 13 | already provided in the `scrapers/` directory. 14 | * You can also put filters, like `ua-inline` or `ua-proxify`. 15 | 16 | ## Usage 17 | 18 | ggs [path-to-configuration-file] 19 | 20 | ## Dependencies 21 | 22 | * Go 23 | * libxml 24 | * [jq](https://stedolan.github.io/jq/) 25 | * For additional scrapers: scrapy, python 3 and nodejs 26 | 27 | ## Installation 28 | 29 | make && sudo make install 30 | 31 | ## Configuration 32 | 33 | See the `ggs` documentation for more information. Here is an sample 34 | configuration file, which puts some feeds into `Fun` and `Geek` folders, 35 | some new chapters notification from mangareader into `Entertainment`, 36 | and my Github personal feed into inbox: 37 | 38 | default_timeout=30 39 | 40 | rss() { 41 | command 2000 "rss2json \"$1\" | ua-inline | maildir-put -root $HOME/Maildir-feeds -folder \"$2\"" 42 | } 43 | 44 | mangareader() { 45 | command 2000 "ua-scraper-mangareader -a name=$1 | "\ 46 | "maildir-put -root $HOME/Maildir-feeds -folder Entertainment" 47 | } 48 | 49 | rss http://xkcd.com/atom.xml Fun 50 | rss http://feeds.feedburner.com/smbc-comics/PvLb Fun 51 | rss http://syndication.thedailywtf.com/TheDailyWtf Fun 52 | 53 | rss http://www.reddit.com/r/science/top/.rss Geek 54 | rss http://www.phoronix.com/rss.php Geek 55 | 56 | mangareader naruto 57 | mangareader bleach 58 | mangareader gantz 59 | 60 | rss https://github.com/sloonz.private.atom?token=HIDDEN "" 61 | 62 | ## Weboob compatibility 63 | 64 | You can use [weboob](http://weboob.org/) modules used by 65 | [boobmsg](http://weboob.org/applications/boobmsg) to generate 66 | messages. Configure the modules using `boobmsg`, and use `weboobmsg2json 67 | [module-name]` to generate messages. `[module-name]` can be found in 68 | `~/.config/weboob/backends`. 69 | -------------------------------------------------------------------------------- /doc/ggs: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "GGS" "" "February 2017" "" "" 5 | \fBGGS\fR (Grey Goo Spawner) is a simple software that runs jobs periodically\. It is similar with cron, but with some differences : 6 | . 7 | .IP "\(bu" 4 8 | Whereas \fBcron\fR launches jobs at specific times, \fBggs\fR is mainly interested in intervals\. It will run all jobs at its startup and then will re\-run each job after a certain delay has passed\. 9 | . 10 | .IP "\(bu" 4 11 | \fBggs\fR has a system of \fBworkers\fR, similar to many servers (like nginx or Apache with MPM Workers) to limit ressource concurrency between your jobs \. 12 | . 13 | .IP "\(bu" 4 14 | You can define a timeout for your jobs, too\. 15 | . 16 | .IP "" 0 17 | . 18 | .SH "Usage" 19 | \fBggs [options] [configuration file]\fR 20 | . 21 | .P 22 | If no configuration file is provided, \fBggs\fR will use \fB~/\.config/ggsrc\fR by default\. 23 | . 24 | .SH "Requirements" 25 | . 26 | .IP "\(bu" 4 27 | jq \fIhttps://stedolan\.github\.io/jq/\fR 28 | . 29 | .IP "" 0 30 | . 31 | .SH "Installation" 32 | \fBgo build ggs\.go && cp ggs /usr/local/bin\fR 33 | . 34 | .SH "Configuration" 35 | Configuration file is a shell script, so same rule as \fBsh\fR applies\. 36 | . 37 | .P 38 | You create a job with the \fBcommand\fR function, which takes two arguments: the delay between launches, and the command to run\. You can specify a timeout (in seconds) by setting the \fBtimeout\fR environnement variable (optional, default: 0 no timeout)\. 39 | . 40 | .IP "" 4 41 | . 42 | .nf 43 | 44 | timeout=30 command 300 "uptime | mail admin@example\.com" 45 | command 5 \'ping \-c 1 github\.com || sudo halt \-p\' 46 | . 47 | .fi 48 | . 49 | .IP "" 0 50 | . 51 | .P 52 | You can also set the number of workers (maximum number of jobs that can run simultaneously): 53 | . 54 | .IP "" 4 55 | . 56 | .nf 57 | 58 | workers=5 #Warning: dont do "workers = 5", spaces matters here ! 59 | . 60 | .fi 61 | . 62 | .IP "" 0 63 | . 64 | .SH "Advanced configuration" 65 | The configuration file is just a shell script which produces a JSON document which maches the structure of the \fBConfig\fR structure\. You can do \fBexec my_script\fR to produce the same JSON with a script in your favorite language\. You can also use variables, functions, execute external commands, and so on\.\.\. 66 | -------------------------------------------------------------------------------- /doc/ggs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | GGS 7 | 44 | 45 | 52 | 53 |
54 | 55 | 62 | 63 |
    64 |
  1. ggs
  2. 65 |
  3. 66 |
  4. ggs
  5. 67 |
68 | 69 |

GGS

70 |

GGS (Grey Goo Spawner) is a simple software that runs jobs 71 | periodically. It is similar with cron, but with some differences :

72 | 73 | 82 | 83 | 84 |

Usage

85 | 86 |

ggs [options] [configuration file]

87 | 88 |

If no configuration file is provided, ggs will use ~/.config/ggsrc 89 | by default.

90 | 91 |

Requirements

92 | 93 | 96 | 97 | 98 |

Installation

99 | 100 |

go build ggs.go && cp ggs /usr/local/bin

101 | 102 |

Configuration

103 | 104 |

Configuration file is a shell script, so same rule as sh applies.

105 | 106 |

You create a job with the command function, which takes two arguments: 107 | the delay between launches, and the command to run. You can specify a 108 | timeout (in seconds) by setting the timeout environnement variable 109 | (optional, default: 0 no timeout).

110 | 111 |
timeout=30 command 300 "uptime | mail admin@example.com"
112 | command 5 'ping -c 1 github.com || sudo halt -p'
113 | 
114 | 115 |

You can also set the number of workers (maximum number of jobs that can 116 | run simultaneously):

117 | 118 |
workers=5 #Warning: dont do "workers = 5", spaces matters here !
119 | 
120 | 121 |

Advanced configuration

122 | 123 |

The configuration file is just a shell script which produces a JSON 124 | document which maches the structure of the Config structure. You can do 125 | exec my_script to produce the same JSON with a script in your favorite 126 | language. You can also use variables, functions, execute external 127 | commands, and so on...

128 | 129 | 130 |
    131 |
  1. 132 |
  2. February 2017
  3. 133 |
  4. ggs
  5. 134 |
135 | 136 |
137 | 138 | 139 | -------------------------------------------------------------------------------- /doc/ggs.md: -------------------------------------------------------------------------------- 1 | ../ggs/README.md -------------------------------------------------------------------------------- /doc/maildir-put: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "MAILDIR\-PUT" "" "February 2016" "" "" 5 | \fBmaildir\-put\fR is a tool to put messages in a predefined JSON format inside a maildir\. It also try to detect duplicates and drop them\. 6 | . 7 | .SH "Usage" 8 | . 9 | .nf 10 | 11 | message\-producer | maildir\-put [arguments] 12 | . 13 | .fi 14 | . 15 | .P 16 | Available arguments: 17 | . 18 | .IP "\(bu" 4 19 | \fB\-cache\fR: path to a cache file used to store message IDs for duplicate detection 20 | . 21 | .IP "\(bu" 4 22 | \fB\-root\fR: path to the root maildir directory\. Defaults to ~/Maildir\. 23 | . 24 | .IP "\(bu" 4 25 | \fB\-folder\fR: maildir folder to put messages\. Defaults to "", the inbox\. The folder separator is "/"\. 26 | . 27 | .IP "\(bu" 4 28 | \fB\-redis\fR: specify this flag to use redis for message IDs cache\. If both \fB\-redis\fR and \fB\-cache\fR are specified, the given cache will be migrated to redis 29 | . 30 | .IP "\(bu" 4 31 | \fB\-redis\-db\fR, \fB\-redis\-addr\fR, \fB\-redis\-password\fR: redis connection settings\. 32 | . 33 | .IP "" 0 34 | . 35 | .SH "Installation" 36 | . 37 | .nf 38 | 39 | go build && cp maildir\-put /usr/local/bin 40 | . 41 | .fi 42 | . 43 | .SH "Input format" 44 | As its input, \fBmaildir\-put\fR takes a stream of JSON dictionaries (not a list of dictionaries)\. Each dictionary represents a message\. Available keys are: 45 | . 46 | .IP "\(bu" 4 47 | \fIbody\fR: the body of the message, in HTML\. Mandatory\. 48 | . 49 | .IP "\(bu" 4 50 | \fItitle\fR: the subject of the message, in text\. Mandatory\. 51 | . 52 | .IP "\(bu" 4 53 | \fIdate\fR: the date of the message\. Optional, defaults to current time\. If provided, must be RFC 2822 compliant\. 54 | . 55 | .IP "\(bu" 4 56 | \fIauthor\fR: the name of the author, in text\. Optional\. 57 | . 58 | .IP "\(bu" 4 59 | \fIauthorEmail\fR: the mail addresse of the author\. Optional\. 60 | . 61 | .IP "\(bu" 4 62 | \fIid\fR: an unique identifier for the message\. It will be used for the creation of the Message\-Id header, as well as in duplicates detection\. It should include three parts: an unique identifier for the application (for example: \fBrss2json\fR), an unique identifier for the paramenters (for example: the feed URL) and an unique identifier for the message (for example: an article ID)\. The identifier for the parameters may be omitted if you provide a \fIhost\fR key and that the host is sufficient to identify the parameters\. Mandatory for threaded discussions handling and duplicates detection, optional else\. 63 | . 64 | .IP "\(bu" 4 65 | \fIhost\fR: the domain name of the producer of the message (in general, the hostname of the server form where you fetched the information)\. Used in \fBMessage\-Id\fR and \fBReferences\fR headers construction, as well as in duplicates detection\. Optional, but strongly encouraged for threaded discussions handling and duplicates detection\. 66 | . 67 | .IP "\(bu" 4 68 | \fIreferences\fR: for threaded discussions, \fIid\fR of the parent messages\. Note that \fIhost\fR must match in the two messages\. 69 | . 70 | .IP "\(bu" 4 71 | \fIurl\fR: URL of the message\. Used by \fBua\-inline\fR to resolve relative references\. 72 | . 73 | .IP "" 0 74 | . 75 | .P 76 | All strings must be encoded in UTF\-8\. 77 | -------------------------------------------------------------------------------- /doc/maildir-put.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | maildir-put 7 | 44 | 45 | 52 | 53 |
54 | 55 | 60 | 61 |
    62 |
  1. maildir-put
  2. 63 |
  3. 64 |
  4. maildir-put
  5. 65 |
66 | 67 |

maildir-put

68 |

maildir-put is a tool to put messages in a predefined JSON format 69 | inside a maildir. It also try to detect duplicates and drop them.

70 | 71 |

Usage

72 | 73 |
message-producer | maildir-put [arguments]
 74 | 
75 | 76 |

Available arguments:

77 | 78 | 89 | 90 | 91 |

Installation

92 | 93 |
go build && cp maildir-put /usr/local/bin
 94 | 
95 | 96 |

Input format

97 | 98 |

As its input, maildir-put takes a stream of JSON dictionaries (not a 99 | list of dictionaries). Each dictionary represents a message. Available 100 | keys are:

101 | 102 | 128 | 129 | 130 |

All strings must be encoded in UTF-8.

131 | 132 | 133 |
    134 |
  1. 135 |
  2. February 2016
  3. 136 |
  4. maildir-put
  5. 137 |
138 | 139 |
140 | 141 | 142 | -------------------------------------------------------------------------------- /doc/maildir-put.md: -------------------------------------------------------------------------------- 1 | ../maildir-put/README.md -------------------------------------------------------------------------------- /doc/rss2json: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "RSS2JSON" "" "March 2014" "" "" 5 | \fBrss2json\fR is a simple tool intended to be used with \fBmaildir\-put\fR and \fBggs\fR\. It is used to convert any RSS or Atom feed into a stream of messages usable by \fBmaildir\-put\fR\. 6 | . 7 | .SH "Usage" 8 | . 9 | .nf 10 | 11 | rss2json feed\-url 12 | . 13 | .fi 14 | . 15 | .SH "Dependencies" 16 | . 17 | .IP "\(bu" 4 18 | libxml 19 | . 20 | .IP "\(bu" 4 21 | Optional: python and feedparser for parsing of ill\-formed feeds 22 | . 23 | .IP "" 0 24 | . 25 | .SH "Installation" 26 | . 27 | .nf 28 | 29 | go build && cp rss2json /usr/local/bin 30 | . 31 | .fi 32 | 33 | -------------------------------------------------------------------------------- /doc/rss2json.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | rss2json 7 | 44 | 45 | 52 | 53 |
54 | 55 | 60 | 61 |
    62 |
  1. rss2json
  2. 63 |
  3. 64 |
  4. rss2json
  5. 65 |
66 | 67 |

rss2json

68 |

rss2json is a simple tool intended to be used with maildir-put and ggs. It is used to convert any RSS or Atom feed into a stream of messages usable by maildir-put.

69 | 70 |

Usage

71 | 72 |
rss2json feed-url
73 | 
74 | 75 |

Dependencies

76 | 77 | 81 | 82 | 83 |

Installation

84 | 85 |
go build && cp rss2json /usr/local/bin
86 | 
87 | 88 | 89 |
    90 |
  1. 91 |
  2. March 2014
  3. 92 |
  4. rss2json
  5. 93 |
94 | 95 |
96 | 97 | 98 | -------------------------------------------------------------------------------- /doc/rss2json.md: -------------------------------------------------------------------------------- 1 | ../rss2json/README.md -------------------------------------------------------------------------------- /doc/ua: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "UA" "" "July 2017" "" "" 5 | This is a set of tools to aggregate all your information into your maildir\. Each tool can be used separately ; you can find a more complete description in their respective folder\. 6 | . 7 | .IP "\(bu" 4 8 | \fBggs\fR is a software which runs commands periodically 9 | . 10 | .IP "\(bu" 4 11 | \fBmaildir\-put\fR reads a set of messages from its standard input and puts them in a maildir 12 | . 13 | .IP "\(bu" 4 14 | \fBrss2json\fR transforms any RSS/Atom feed into a set of messages that \fBmaildir\-put\fR can process 15 | . 16 | .IP "\(bu" 4 17 | You can write your own producers (scrapers) for maildir\-put ; some are already provided in the \fBscrapers/\fR directory\. 18 | . 19 | .IP "\(bu" 4 20 | You can also put filters, like \fBua\-inline\fR or \fBua\-proxify\fR\. 21 | . 22 | .IP "" 0 23 | . 24 | .SH "Usage" 25 | . 26 | .nf 27 | 28 | ggs [path\-to\-configuration\-file] 29 | . 30 | .fi 31 | . 32 | .SH "Dependencies" 33 | . 34 | .IP "\(bu" 4 35 | Go 36 | . 37 | .IP "\(bu" 4 38 | libxml 39 | . 40 | .IP "\(bu" 4 41 | jq \fIhttps://stedolan\.github\.io/jq/\fR 42 | . 43 | .IP "\(bu" 4 44 | For additional scrapers: scrapy, python 3 and nodejs 45 | . 46 | .IP "" 0 47 | . 48 | .SH "Installation" 49 | . 50 | .nf 51 | 52 | make && sudo make install 53 | . 54 | .fi 55 | . 56 | .SH "Configuration" 57 | See the \fBggs\fR documentation for more information\. Here is an sample configuration file, which puts some feeds into \fBFun\fR and \fBGeek\fR folders, some new chapters notification from mangareader into \fBEntertainment\fR, and my Github personal feed into inbox: 58 | . 59 | .IP "" 4 60 | . 61 | .nf 62 | 63 | default_timeout=30 64 | 65 | rss() { 66 | command 2000 "rss2json \e"$1\e" | ua\-inline | maildir\-put \-root $HOME/Maildir\-feeds \-folder \e"$2\e"" 67 | } 68 | 69 | mangareader() { 70 | command 2000 "ua\-scraper\-mangareader \-a name=$1 | "\e 71 | "maildir\-put \-root $HOME/Maildir\-feeds \-folder Entertainment" 72 | } 73 | 74 | rss http://xkcd\.com/atom\.xml Fun 75 | rss http://feeds\.feedburner\.com/smbc\-comics/PvLb Fun 76 | rss http://syndication\.thedailywtf\.com/TheDailyWtf Fun 77 | 78 | rss http://www\.reddit\.com/r/science/top/\.rss Geek 79 | rss http://www\.phoronix\.com/rss\.php Geek 80 | 81 | mangareader naruto 82 | mangareader bleach 83 | mangareader gantz 84 | 85 | rss https://github\.com/sloonz\.private\.atom?token=HIDDEN "" 86 | . 87 | .fi 88 | . 89 | .IP "" 0 90 | . 91 | .SH "Weboob compatibility" 92 | You can use weboob \fIhttp://weboob\.org/\fR modules used by boobmsg \fIhttp://weboob\.org/applications/boobmsg\fR to generate messages\. Configure the modules using \fBboobmsg\fR, and use \fBweboobmsg2json [module\-name]\fR to generate messages\. \fB[module\-name]\fR can be found in \fB~/\.config/weboob/backends\fR\. 93 | -------------------------------------------------------------------------------- /doc/ua-inline: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "UA\-INLINE" "" "March 2014" "" "" 5 | . 6 | .SH "NAME" 7 | \fBua\-inline\fR \- Inline HTML resources 8 | . 9 | .P 10 | This is a simple filter intended to be used before \fBmaildir\-put\fR\. It replaces external images inside the body of the message by their content (using \fBdata:\fR scheme)\. 11 | . 12 | .P 13 | If the body contains relative references, it tries to resolve them using the \fBurl\fR key of the message\. If that’s not possible, no inlining is done\. 14 | . 15 | .SH "Example usage, in ggsrc" 16 | . 17 | .nf 18 | 19 | command 2000 "rss2json feed\-url | ua\-inline | maildir\-put" 20 | . 21 | .fi 22 | . 23 | .SH "Installation" 24 | . 25 | .nf 26 | 27 | go build && cp ua\-inline /usr/local/bin 28 | . 29 | .fi 30 | 31 | -------------------------------------------------------------------------------- /doc/ua-inline.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Inline HTML resources 7 | 44 | 45 | 52 | 53 |
54 | 55 | 60 | 61 |
    62 |
  1. ua-inline
  2. 63 |
  3. 64 |
  4. ua-inline
  5. 65 |
66 | 67 |

NAME

68 |

69 | ua-inline - Inline HTML resources 70 |

71 | 72 |

This is a simple filter intended to be used before maildir-put. It 73 | replaces external images inside the body of the message by their content 74 | (using data: scheme).

75 | 76 |

If the body contains relative references, it tries to resolve them using 77 | the url key of the message. If that’s not possible, no inlining 78 | is done.

79 | 80 |

Example usage, in ggsrc

81 | 82 |
command 2000 "rss2json feed-url | ua-inline | maildir-put"
 83 | 
84 | 85 |

Installation

86 | 87 |
go build && cp ua-inline /usr/local/bin
 88 | 
89 | 90 | 91 |
    92 |
  1. 93 |
  2. March 2014
  3. 94 |
  4. ua-inline
  5. 95 |
96 | 97 |
98 | 99 | 100 | -------------------------------------------------------------------------------- /doc/ua-inline.md: -------------------------------------------------------------------------------- 1 | ../ua-inline/README.md -------------------------------------------------------------------------------- /doc/ua-proxify: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "UA\-PROXIFY" "" "March 2016" "" "" 5 | . 6 | .SH "NAME" 7 | \fBua\-proxify\fR \- Transform external URLs in a message 8 | . 9 | .P 10 | This is a simple filter intended to be used before \fBmaildir\-put\fR\. It changes the URL of external resources (CSS, images)\. 11 | . 12 | .P 13 | If the body contains relative references, it tries to resolve them using the \fBurl\fR key of the message\. If that’s not possible, no change is done\. 14 | . 15 | .SH "Example usage, in ggsrc" 16 | \fBget\.php\fR is a simple example script provided with \fBua\-proxify\fR\. It can be used that way: 17 | . 18 | .IP "" 4 19 | . 20 | .nf 21 | 22 | command 2000 "rss2json feed\-url | \e 23 | ua\-proxify "http://example\.com/get?url={{\.URL|urlquery}}&sig={{\.URL|HMAC \e"$HMAC_KEY\e"}}" | \e 24 | maildir\-put" 25 | . 26 | .fi 27 | . 28 | .IP "" 0 29 | . 30 | .P 31 | \fB$HMAC_KEY\fR can be generated with \fBopenssl rand \-base64 32\fR and must be set in the top of \fBget\.php\fR\. 32 | . 33 | .SH "Installation" 34 | . 35 | .nf 36 | 37 | go build && cp ua\-proxify /usr/local/bin 38 | . 39 | .fi 40 | 41 | -------------------------------------------------------------------------------- /doc/ua-proxify.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Transform external URLs in a message 7 | 44 | 45 | 52 | 53 |
54 | 55 | 60 | 61 |
    62 |
  1. ua-proxify
  2. 63 |
  3. 64 |
  4. ua-proxify
  5. 65 |
66 | 67 |

NAME

68 |

69 | ua-proxify - Transform external URLs in a message 70 |

71 | 72 |

This is a simple filter intended to be used before maildir-put. It 73 | changes the URL of external resources (CSS, images).

74 | 75 |

If the body contains relative references, it tries to resolve them using 76 | the url key of the message. If that’s not possible, no change 77 | is done.

78 | 79 |

Example usage, in ggsrc

80 | 81 |

get.php is a simple example script provided with ua-proxify. It can be used 82 | that way:

83 | 84 |
command 2000 "rss2json feed-url | \
 85 |     ua-proxify "http://example.com/get?url={{.URL|urlquery}}&sig={{.URL|HMAC \"$HMAC_KEY\"}}" | \
 86 |     maildir-put"
 87 | 
88 | 89 |

$HMAC_KEY can be generated with openssl rand -base64 32 and must be set in 90 | the top of get.php.

91 | 92 |

Installation

93 | 94 |
go build && cp ua-proxify /usr/local/bin
 95 | 
96 | 97 | 98 |
    99 |
  1. 100 |
  2. March 2016
  3. 101 |
  4. ua-proxify
  5. 102 |
103 | 104 |
105 | 106 | 107 | -------------------------------------------------------------------------------- /doc/ua-proxify.md: -------------------------------------------------------------------------------- 1 | ../ua-proxify/README.md -------------------------------------------------------------------------------- /doc/ua-scrapers: -------------------------------------------------------------------------------- 1 | .\" generated with Ronn/v0.7.3 2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3 3 | . 4 | .TH "UA\-SCRAPERS" "" "July 2017" "" "" 5 | List all courses on EdX \fIhttps://www\.edx\.org/\fR\.List new comics on Lyon public library \fIhttps://www\.bm\-lyon\.fr/\fR\.List season animes from myanimelist \fIhttps://myanimelist\.net/anime/season\fR\.List latest chapters for a given manga on mangareader \fIhttp://www\.mangareader\.net/\fR\. 6 | . 7 | .P 8 | Usage: \fBua\-scraper\-mangareader \-a name=[manga\-title]\fR\. \fB[manga\-title]\fR is the path of the manga on mangareader, for example \fBnatsume\-yuujinchou\fR for http://www\.mangareader\.net/natsume\-yuujinchou\.List latest torrents on torrent9 \fIhttp://www\.torrent9\.cc/\fR\. 9 | . 10 | .P 11 | Usage: 12 | . 13 | .IP "\(bu" 4 14 | All categories: \fBua\-scraper\-torrent9\fR 15 | . 16 | .IP "\(bu" 4 17 | Specific categories: \fBua\-scraper\-torrent9 "category1 category2\.\.\."\fR 18 | . 19 | .IP "" 0 20 | . 21 | .P 22 | Categories references the anchor in the URL (for example \fBebook\fR for http://www\.torrent9\.cc/#ebook)\.List lastest torrents on yggtorrent \fIhttps://yggtorrent\.com/\fR\. 23 | . 24 | .P 25 | Usage: 26 | . 27 | .IP "\(bu" 4 28 | All categories: \fBua\-scraper\-yggtorrent\fR 29 | . 30 | .IP "\(bu" 4 31 | Specific category: \fBua\-scraper\-yggtorrent [url]\fR\. 32 | . 33 | .IP "" 0 34 | 35 | -------------------------------------------------------------------------------- /doc/ua-scrapers.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | <p>This contains additional scrapers. You can take those as examples to 7 | write your own.</p> 8 | 9 | ua-scraper-exdcourses 10 | 47 | 48 | 55 | 56 |
57 | 58 | 60 | 61 |
    62 |
  1. ua-scrapers
  2. 63 |
  3. 64 |
  4. ua-scrapers
  5. 65 |
66 | 67 |

This contains additional scrapers. You can take those as examples to 68 | write your own.

69 | 70 | ua-scraper-exdcourses

71 |

List all courses on EdX.

72 | 73 |

ua-scraper-lyon-bm-bd

74 | 75 |

List new comics on Lyon public library.

76 | 77 |

ua-scraper-mal

78 | 79 |

List season animes from myanimelist.

80 | 81 |

ua-scraper-mangareader

82 | 83 |

List latest chapters for a given manga on mangareader.

84 | 85 |

Usage: ua-scraper-mangareader -a name=[manga-title]. [manga-title] 86 | is the path of the manga on mangareader, for example natsume-yuujinchou 87 | for http://www.mangareader.net/natsume-yuujinchou.

88 | 89 |

ua-scraper-torrent9

90 | 91 |

List latest torrents on torrent9.

92 | 93 |

Usage:

94 | 95 | 99 | 100 | 101 |

Categories references the anchor in the URL (for example ebook for 102 | http://www.torrent9.cc/#ebook).

103 | 104 |

ua-scraper-yggtorrent

105 | 106 |

List lastest torrents on yggtorrent.

107 | 108 |

Usage:

109 | 110 | 114 | 115 | 116 | 117 |
    118 |
  1. 119 |
  2. July 2017
  3. 120 |
  4. ua-scrapers
  5. 121 |
122 | 123 |
124 | 125 | 126 | -------------------------------------------------------------------------------- /doc/ua-scrapers.md: -------------------------------------------------------------------------------- 1 | ../scrapers/README.md -------------------------------------------------------------------------------- /doc/ua.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | The Universal Aggregator 7 | 44 | 45 | 52 | 53 |
54 | 55 | 62 | 63 |
    64 |
  1. ua
  2. 65 |
  3. 66 |
  4. ua
  5. 67 |
68 | 69 |

The Universal Aggregator

70 |

This is a set of tools to aggregate all your information into your 71 | maildir. Each tool can be used separately ; you can find a more complete 72 | description in their respective folder.

73 | 74 | 84 | 85 | 86 |

Usage

87 | 88 |
ggs [path-to-configuration-file]
 89 | 
90 | 91 |

Dependencies

92 | 93 | 99 | 100 | 101 |

Installation

102 | 103 |
make && sudo make install
104 | 
105 | 106 |

Configuration

107 | 108 |

See the ggs documentation for more information. Here is an sample 109 | configuration file, which puts some feeds into Fun and Geek folders, 110 | some new chapters notification from mangareader into Entertainment, 111 | and my Github personal feed into inbox:

112 | 113 |
default_timeout=30
114 | 
115 | rss() {
116 |     command 2000 "rss2json \"$1\" | ua-inline | maildir-put -root $HOME/Maildir-feeds -folder \"$2\""
117 | }
118 | 
119 | mangareader() {
120 |     command 2000 "ua-scraper-mangareader -a name=$1 | "\
121 |         "maildir-put -root $HOME/Maildir-feeds -folder Entertainment"
122 | }
123 | 
124 | rss http://xkcd.com/atom.xml Fun
125 | rss http://feeds.feedburner.com/smbc-comics/PvLb Fun
126 | rss http://syndication.thedailywtf.com/TheDailyWtf Fun
127 | 
128 | rss http://www.reddit.com/r/science/top/.rss Geek
129 | rss http://www.phoronix.com/rss.php Geek
130 | 
131 | mangareader naruto
132 | mangareader bleach
133 | mangareader gantz
134 | 
135 | rss https://github.com/sloonz.private.atom?token=HIDDEN ""
136 | 
137 | 138 |

Weboob compatibility

139 | 140 |

You can use weboob modules used by 141 | boobmsg to generate 142 | messages. Configure the modules using boobmsg, and use weboobmsg2json 143 | [module-name] to generate messages. [module-name] can be found in 144 | ~/.config/weboob/backends.

145 | 146 | 147 |
    148 |
  1. 149 |
  2. July 2017
  3. 150 |
  4. ua
  5. 151 |
152 | 153 |
154 | 155 | 156 | -------------------------------------------------------------------------------- /doc/ua.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /ggs/README.md: -------------------------------------------------------------------------------- 1 | # GGS 2 | 3 | `GGS` (Grey Goo Spawner) is a simple software that runs jobs 4 | periodically. It is similar with cron, but with some differences : 5 | 6 | * Whereas `cron` launches jobs at specific times, `ggs` is mainly 7 | interested in intervals. It will run all jobs at its startup and then 8 | will re-run each job after a certain delay has passed. 9 | 10 | * `ggs` has a system of `workers`, similar to many servers (like nginx 11 | or Apache with MPM Workers) to limit ressource concurrency between your 12 | jobs . 13 | 14 | * You can define a timeout for your jobs, too. 15 | 16 | ## Usage 17 | 18 | `ggs [options] [configuration file]` 19 | 20 | If no configuration file is provided, `ggs` will use `~/.config/ggsrc` 21 | by default. 22 | 23 | ## Requirements 24 | 25 | * [jq](https://stedolan.github.io/jq/) 26 | 27 | ## Installation 28 | 29 | `go build ggs.go && cp ggs /usr/local/bin` 30 | 31 | ## Configuration 32 | 33 | Configuration file is a shell script, so same rule as `sh` applies. 34 | 35 | You create a job with the `command` function, which takes two arguments: 36 | the delay between launches, and the command to run. You can specify a 37 | timeout (in seconds) by setting the `timeout` environnement variable 38 | (optional, default: 0 no timeout). 39 | 40 | timeout=30 command 300 "uptime | mail admin@example.com" 41 | command 5 'ping -c 1 github.com || sudo halt -p' 42 | 43 | You can also set the number of workers (maximum number of jobs that can 44 | run simultaneously): 45 | 46 | workers=5 #Warning: dont do "workers = 5", spaces matters here ! 47 | 48 | ## Advanced configuration 49 | 50 | The configuration file is just a shell script which produces a JSON 51 | document which maches the structure of the `Config` structure. You can do 52 | `exec my_script` to produce the same JSON with a script in your favorite 53 | language. You can also use variables, functions, execute external 54 | commands, and so on... 55 | -------------------------------------------------------------------------------- /ggs/ggs.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "os/exec" 11 | "os/signal" 12 | "sync" 13 | "syscall" 14 | "time" 15 | ) 16 | 17 | type Command struct { 18 | Delay int 19 | Timeout int 20 | Command string 21 | } 22 | 23 | type Config struct { 24 | Workers int 25 | Commands []*Command 26 | disabled bool 27 | } 28 | 29 | const CONFIG_WRAPPER = ` 30 | workers=5 31 | default_timeout=0 32 | commands=$(jq -n '[]') 33 | 34 | command() { 35 | delay=$1; shift 36 | commands=$(echo "$commands" | \ 37 | jq --arg delay "$delay" --arg cmd "$*" \ 38 | --arg timeout "${timeout:-$default_timeout}" \ 39 | '. + [{Timeout: ($timeout|tonumber), Delay: ($delay|tonumber), Command: $cmd}]') 40 | timeout= 41 | } 42 | 43 | . %s 44 | 45 | echo "$commands" | jq --arg workers "$workers" '{Workers: ($workers|tonumber), Commands: .}' 46 | ` 47 | 48 | type loggerWriter struct { 49 | log *log.Logger 50 | cmd *exec.Cmd 51 | buf []byte 52 | } 53 | 54 | func (w *loggerWriter) Write(data []byte) (int, error) { 55 | sz := len(data) 56 | data = append(w.buf, data...) 57 | lines := bytes.Split(data, []byte("\n")) 58 | if len(lines[len(lines)-1]) == 0 { 59 | w.buf = nil 60 | } else { 61 | w.buf = lines[len(lines)-1] 62 | } 63 | lines = lines[:len(lines)-1] 64 | for _, line := range lines { 65 | w.log.Printf("[%d] %s", w.cmd.Process.Pid, string(line)) 66 | } 67 | return sz, nil 68 | } 69 | 70 | func (w *loggerWriter) Close() { 71 | if w.buf != nil { 72 | w.log.Printf("[%d] %s", w.cmd.Process.Pid, string(w.buf)) 73 | w.buf = nil 74 | } 75 | } 76 | 77 | func readConfig(cfgFile string) (cfg *Config, err error) { 78 | sp := exec.Command("sh") 79 | sp.Stderr = os.Stderr 80 | sp.Stdin = bytes.NewBuffer([]byte(fmt.Sprintf(CONFIG_WRAPPER, cfgFile))) 81 | out, err := sp.Output() 82 | if err != nil { 83 | return nil, err 84 | } 85 | 86 | cfg = new(Config) 87 | err = json.Unmarshal(out, cfg) 88 | if err != nil { 89 | return nil, err 90 | } 91 | 92 | return cfg, nil 93 | } 94 | 95 | func process(cmd *Command) { 96 | var timer *time.Timer 97 | var err error 98 | 99 | sp := exec.Command("sh", "-c", cmd.Command) 100 | stdout := &loggerWriter{log: log.Default(), cmd: sp} 101 | stderr := &loggerWriter{log: log.Default(), cmd: sp} 102 | sp.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} 103 | sp.Stdout = stdout 104 | sp.Stderr = stderr 105 | 106 | if err = sp.Start(); err != nil { 107 | log.Printf("%s failed: %s", cmd.Command, err.Error()) 108 | return 109 | } 110 | log.Printf("[%d] %s", sp.Process.Pid, cmd.Command) 111 | 112 | if cmd.Timeout > 0 { 113 | timer = time.AfterFunc(time.Duration(cmd.Timeout)*time.Second, func() { 114 | if sp.ProcessState == nil { 115 | syscall.Kill(-sp.Process.Pid, syscall.SIGTERM) 116 | } 117 | }) 118 | } 119 | 120 | err = sp.Wait() 121 | stdout.Close() 122 | stderr.Close() 123 | 124 | if err != nil { 125 | log.Printf("[%d] %s failed: %s", sp.Process.Pid, cmd.Command, err.Error()) 126 | } else { 127 | log.Printf("[%d] done", sp.Process.Pid) 128 | } 129 | 130 | timer.Stop() 131 | } 132 | 133 | func reload(cfgFile string, oldConfig *Config, runOnce bool) (config *Config, err error) { 134 | // loopGroup is the number of (pending) writers on the command channel. 135 | // After disabling a configuration, we have to wait for it to fall to 0 before 136 | // closing the channel (otherwise, they will write to the closed channel). 137 | // 138 | // onceGroup is the number of unprocessed commands in the initial batch. 139 | var loopGroup, onceGroup sync.WaitGroup 140 | 141 | var closeChannel sync.Once 142 | 143 | config, err = readConfig(cfgFile) 144 | if err != nil { 145 | return nil, err 146 | } 147 | 148 | ch := make(chan *Command, len(config.Commands)) 149 | 150 | for i := 0; i < config.Workers; i++ { 151 | go func() { 152 | for !config.disabled { 153 | var cmd *Command 154 | if cmd = <-ch; cmd == nil { 155 | continue 156 | } 157 | 158 | process(cmd) 159 | 160 | if runOnce { 161 | onceGroup.Done() 162 | } else { 163 | loopGroup.Add(1) 164 | time.AfterFunc(time.Duration(cmd.Delay)*time.Second, func() { 165 | if !config.disabled { 166 | ch <- cmd 167 | } 168 | loopGroup.Done() 169 | }) 170 | } 171 | } 172 | 173 | loopGroup.Wait() 174 | closeChannel.Do(func() { close(ch) }) 175 | }() 176 | } 177 | 178 | for _, cmd := range config.Commands { 179 | ch <- cmd 180 | if runOnce { 181 | onceGroup.Add(1) 182 | } 183 | } 184 | 185 | if runOnce { 186 | onceGroup.Wait() 187 | os.Exit(0) 188 | } 189 | 190 | if oldConfig != nil { 191 | oldConfig.disabled = true 192 | } 193 | 194 | return config, nil 195 | } 196 | 197 | func main() { 198 | var runOnce bool 199 | var cfgFile string 200 | 201 | flag.BoolVar(&runOnce, "once", false, "Process commands once, and then exit") 202 | flag.Parse() 203 | 204 | if cfgFile = flag.Arg(0); cfgFile == "" { 205 | cfgFile = os.ExpandEnv("$HOME/.config/ggsrc") 206 | } 207 | 208 | config, err := reload(cfgFile, nil, runOnce) 209 | if err != nil { 210 | fmt.Fprintf(os.Stderr, "Error while reading configuration: %s", err) 211 | os.Exit(1) 212 | } 213 | 214 | // wait for signals (interrupt, reload) 215 | sigChan := make(chan os.Signal, 2) 216 | signal.Notify(sigChan, syscall.SIGINT, syscall.SIGUSR1) 217 | for sig := range sigChan { 218 | switch sig { 219 | case syscall.SIGINT: 220 | return 221 | case syscall.SIGUSR1: 222 | config, err = reload(cfgFile, config, runOnce) 223 | if err != nil { 224 | fmt.Fprintf(os.Stderr, "Error while reloading configuration: %s", err) 225 | } 226 | } 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /ggsrc.example: -------------------------------------------------------------------------------- 1 | default_timeout=30 2 | 3 | rss() { 4 | command 2000 "rss2json \"$1\" | maildir-put -root $HOME/Maildir-feeds -folder \"$2\"" 5 | } 6 | 7 | mangareader() { 8 | command 2000 "mangareader2json http://mangareader.net/$1 | "\ 9 | "maildir-put -root $HOME/Maildir-feeds -folder Entertainment" 10 | } 11 | 12 | rss http://xkcd.com/atom.xml Fun 13 | rss http://feeds.feedburner.com/smbc-comics/PvLb Fun 14 | rss http://syndication.thedailywtf.com/TheDailyWtf Fun 15 | 16 | rss http://www.reddit.com/r/science/top/.rss Geek 17 | rss http://www.phoronix.com/rss.php Geek 18 | 19 | mangareader naruto 20 | mangareader bleach 21 | mangareader gantz 22 | 23 | rss https://github.com/sloonz.private.atom?token=HIDDEN "" 24 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/sloonz/ua 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/garyburd/redigo v1.6.2 // indirect 7 | github.com/onsi/ginkgo v1.16.1 // indirect 8 | github.com/onsi/gomega v1.11.0 // indirect 9 | github.com/sloonz/cfeedparser v0.0.0-20210430180901-ecf35ac90d83 10 | github.com/sloonz/go-maildir v0.0.0-20210417175458-ec35083290ab 11 | github.com/sloonz/go-mime-message v0.0.0-20210417175330-cb2e834a9b3b 12 | github.com/sloonz/go-qprintable v0.0.0-20210417175225-715103f9e6eb 13 | gopkg.in/bsm/ratelimit.v1 v1.0.0-20160220154919-db14e161995a // indirect 14 | gopkg.in/redis.v3 v3.6.4 15 | ) 16 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 4 | github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= 5 | github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= 6 | github.com/garyburd/redigo v1.6.2 h1:yE/pwKCrbLpLpQICzYTeZ7JsTA/C53wFTJHaEtRqniM= 7 | github.com/garyburd/redigo v1.6.2/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY= 8 | github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= 9 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 10 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 11 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 12 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 13 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 14 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 15 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 16 | github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 17 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 18 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 19 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 20 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 21 | github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= 22 | github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= 23 | github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= 24 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 25 | github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= 26 | github.com/onsi/ginkgo v1.16.1 h1:foqVmeWDD6yYpK+Yz3fHyNIxFYNxswxqNFjSKe+vI54= 27 | github.com/onsi/ginkgo v1.16.1/go.mod h1:CObGmKUOKaSC0RjmoAK7tKyn4Azo5P2IWuoMnvwxz1E= 28 | github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= 29 | github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= 30 | github.com/onsi/gomega v1.11.0 h1:+CqWgvj0OZycCaqclBD1pxKHAU+tOkHmQIWvDHq2aug= 31 | github.com/onsi/gomega v1.11.0/go.mod h1:azGKhqFUon9Vuj0YmTfLSmx0FUwqXYSTl5re8lQLTUg= 32 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 33 | github.com/sloonz/cfeedparser v0.0.0-20160203163450-a220b181f09f h1:pa1Gxag3FaJ68Mz2LaB9KYyzXG3pCnDIjP4A6M3XQqo= 34 | github.com/sloonz/cfeedparser v0.0.0-20160203163450-a220b181f09f/go.mod h1:pSLBLXl/QPOmNbplyTkt/TdU/0gWf/BidRMVcyfQIKk= 35 | github.com/sloonz/cfeedparser v0.0.0-20210430180901-ecf35ac90d83 h1:hzwGBD4Vb4D/wMzSK8k/8aRZySfykyMx3sYJSfIH0jw= 36 | github.com/sloonz/cfeedparser v0.0.0-20210430180901-ecf35ac90d83/go.mod h1:0G5ru/AAfpMT2UNCjXytFSpOyTxOZW4+3EFa8uSF2OU= 37 | github.com/sloonz/go-maildir v0.0.0-20210417175458-ec35083290ab h1:H8W5t9eJbVOltrNUQBPWGgpqFszJifXdcjJ0nhVREQw= 38 | github.com/sloonz/go-maildir v0.0.0-20210417175458-ec35083290ab/go.mod h1:DtE1Xilsk4k8SzX2J52IgP9+bTpxKC8ZdTsbqq9QJJw= 39 | github.com/sloonz/go-mime-message v0.0.0-20210417175330-cb2e834a9b3b h1:yzAB0kQ/6jGaAOMmyylzH2SZp7g7vomILqkxAs4ghqw= 40 | github.com/sloonz/go-mime-message v0.0.0-20210417175330-cb2e834a9b3b/go.mod h1:G3uvxOtJJl6pu9Bl9l0JkHIsn/9qBVSBd5iEQZVI7ic= 41 | github.com/sloonz/go-qprintable v0.0.0-20160203160305-775b3a4592d5/go.mod h1:rvsMTVl5yyd7liGH3cxu5eRjfNcC1WkSKe4HBSZ3ZA4= 42 | github.com/sloonz/go-qprintable v0.0.0-20210417175225-715103f9e6eb h1:T+USeSgAg9MysHPeOQ2W3KAuBQHVZzG0XMHyfHN88Yg= 43 | github.com/sloonz/go-qprintable v0.0.0-20210417175225-715103f9e6eb/go.mod h1:WKd1iQMtoZdaS9rlKDPprxWJoan2hkQA9BcGt+oxezs= 44 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 45 | github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= 46 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 47 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 48 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 49 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 50 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 51 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 52 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 53 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 54 | golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 55 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 56 | golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U= 57 | golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 58 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 59 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 60 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 61 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 62 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 63 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 64 | golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 65 | golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 66 | golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 67 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 68 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 69 | golang.org/x/sys v0.0.0-20210112080510-489259a85091 h1:DMyOG0U+gKfu8JZzg2UQe9MeaC1X+xQWlAKcRnjxjCw= 70 | golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 71 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 72 | golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= 73 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 74 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 75 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 76 | golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 77 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 78 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 79 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 80 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 81 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 82 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 83 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 84 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 85 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 86 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 87 | gopkg.in/bsm/ratelimit.v1 v1.0.0-20160220154919-db14e161995a h1:stTHdEoWg1pQ8riaP5ROrjS6zy6wewH/Q2iwnLCQUXY= 88 | gopkg.in/bsm/ratelimit.v1 v1.0.0-20160220154919-db14e161995a/go.mod h1:KF9sEfUPAXdG8Oev9e99iLGnl2uJMjc5B+4y3O7x610= 89 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 90 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 91 | gopkg.in/redis.v3 v3.6.4 h1:u7XgPH1rWwsdZnR+azldXC6x9qDU2luydOIeU/l52fE= 92 | gopkg.in/redis.v3 v3.6.4/go.mod h1:6XeGv/CrsUFDU9aVbUdNykN7k1zVmoeg83KC9RbQfiU= 93 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 94 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 95 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 96 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 97 | gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 98 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 99 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 100 | -------------------------------------------------------------------------------- /maildir-put/README.md: -------------------------------------------------------------------------------- 1 | # maildir-put 2 | 3 | `maildir-put` is a tool to put messages in a predefined JSON format 4 | inside a maildir. It also try to detect duplicates and drop them. 5 | 6 | ## Usage 7 | 8 | message-producer | maildir-put [arguments] 9 | 10 | Available arguments: 11 | 12 | * `-cache`: path to a cache file used to store message IDs for duplicate 13 | detection 14 | * `-root`: path to the root maildir directory. Defaults to ~/Maildir. 15 | * `-folder`: maildir folder to put messages. Defaults to "", the inbox. 16 | The folder separator is "/". 17 | * `-redis`: specify this flag to use redis for message IDs cache. If both 18 | `-redis` and `-cache` are specified, the given cache will be migrated to 19 | redis 20 | * `-redis-db`, `-redis-addr`, `-redis-password`: redis connection settings. 21 | 22 | ## Installation 23 | 24 | go build && cp maildir-put /usr/local/bin 25 | 26 | ## Input format 27 | 28 | As its input, `maildir-put` takes a stream of JSON dictionaries (not a 29 | list of dictionaries). Each dictionary represents a message. Available 30 | keys are: 31 | 32 | * *body*: the body of the message, in HTML. Mandatory. 33 | * *title*: the subject of the message, in text. Mandatory. 34 | * *date*: the date of the message. Optional, defaults to current time. If 35 | provided, must be RFC 2822 compliant. 36 | * *author*: the name of the author, in text. Optional. 37 | * *authorEmail*: the mail addresse of the author. Optional. 38 | * *id*: an unique identifier for the message. It will be used for the 39 | creation of the Message-Id header, as well as in duplicates detection. It 40 | should include three parts: an unique identifier for the application 41 | (for example: `rss2json`), an unique identifier for the paramenters 42 | (for example: the feed URL) and an unique identifier for the message 43 | (for example: an article ID). The identifier for the parameters may be 44 | omitted if you provide a *host* key and that the host is sufficient to 45 | identify the parameters. Mandatory for threaded discussions handling and 46 | duplicates detection, optional else. 47 | * *host*: the domain name of the producer of the message (in general, 48 | the hostname of the server form where you fetched the information). Used 49 | in `Message-Id` and `References` headers construction, as well as in 50 | duplicates detection. Optional, but strongly encouraged for threaded 51 | discussions handling and duplicates detection. 52 | * *references*: for threaded discussions, *id* of the parent messages. Note 53 | that *host* must match in the two messages. 54 | * *url*: URL of the message. Used by `ua-inline` to resolve relative 55 | references. 56 | 57 | All strings must be encoded in UTF-8. 58 | -------------------------------------------------------------------------------- /maildir-put/cache.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/binary" 6 | "gopkg.in/redis.v3" 7 | "io" 8 | "log" 9 | "os" 10 | "syscall" 11 | "time" 12 | "bytes" 13 | ) 14 | 15 | type Cache struct { 16 | data map[string]bool 17 | newData map[string]bool 18 | ts []byte 19 | path string 20 | useRedis bool 21 | redisClient *redis.Client 22 | redisOptions redis.Options 23 | } 24 | 25 | func (c *Cache) OpenCache() (err error) { 26 | var key string 27 | 28 | tsBuf := bytes.NewBuffer(nil) 29 | binary.Write(tsBuf, binary.BigEndian, time.Now().Unix()) 30 | 31 | c.data = make(map[string]bool) 32 | c.newData = make(map[string]bool) 33 | c.ts = tsBuf.Bytes() 34 | 35 | if c.useRedis { 36 | c.redisClient = redis.NewClient(&c.redisOptions) 37 | } 38 | 39 | cacheFile, err := os.Open(c.path) 40 | if err != nil && !os.IsNotExist(err) { 41 | return err 42 | } else if os.IsNotExist(err) { 43 | return nil 44 | } 45 | 46 | reader := bufio.NewReader(cacheFile) 47 | for err != io.EOF { 48 | if key, err = reader.ReadString('\n'); err != nil && err != io.EOF { 49 | return err 50 | } 51 | if key != "" && key != "" { 52 | key = key[:len(key)-1] 53 | c.data[key] = true 54 | } 55 | } 56 | 57 | return nil 58 | } 59 | 60 | func (c *Cache) Getset(id, host, msgId string) bool { 61 | if c.useRedis { 62 | res := c.redisClient.HExists("ua:"+host, id) 63 | if res.Err() != nil && res.Err() != redis.Nil { 64 | log.Fatalf("Error using redis cache: %s", res.Err()) 65 | } 66 | 67 | present := res.Val() 68 | 69 | res = c.redisClient.HSet("ua:"+host, id, string(c.ts)) 70 | if res.Err() != nil && res.Err() != redis.Nil { 71 | log.Fatalf("Error using redis cache: %s", res.Err()) 72 | } 73 | 74 | return present 75 | } else { 76 | if _, has := c.data[msgId]; has { 77 | return true 78 | } 79 | if _, has := c.newData[msgId]; has { 80 | return true 81 | } 82 | c.newData[msgId] = true 83 | } 84 | return false 85 | } 86 | 87 | func (c *Cache) Dump() error { 88 | if c.useRedis { 89 | return nil 90 | } 91 | 92 | cacheFile, err := os.OpenFile(c.path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0660) 93 | if err != nil { 94 | return err 95 | } 96 | defer cacheFile.Close() 97 | 98 | if err = syscall.Flock(int(cacheFile.Fd()), syscall.LOCK_EX); err != nil { 99 | return err 100 | } 101 | 102 | writer := bufio.NewWriter(cacheFile) 103 | for key, _ := range c.newData { 104 | if _, err = writer.WriteString(key); err != nil { 105 | return err 106 | } 107 | if _, err = writer.WriteString("\n"); err != nil { 108 | return err 109 | } 110 | } 111 | if err = writer.Flush(); err != nil { 112 | return err 113 | } 114 | 115 | return nil 116 | } 117 | -------------------------------------------------------------------------------- /maildir-put/maildir-put.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "errors" 7 | "flag" 8 | "fmt" 9 | "github.com/sloonz/go-maildir" 10 | "github.com/sloonz/go-mime-message" 11 | "github.com/sloonz/go-qprintable" 12 | "io" 13 | "log" 14 | "os" 15 | "strings" 16 | "time" 17 | ) 18 | 19 | var hostname string 20 | var cache Cache 21 | 22 | type Attachment struct { 23 | CID string `json:"cid"` 24 | MimeType string `json:"mimeType"` 25 | Data []byte `json:"data"` 26 | Filename string `json:"filename"` 27 | } 28 | 29 | type Message struct { 30 | Id string `json:"id"` 31 | Body string `json:"body"` 32 | Title string `json:"title"` 33 | Author string `json:"author"` 34 | AuthorEmail string `json:"authorEmail"` 35 | Date string `json:"date"` 36 | References []string `json:"references"` 37 | Host string `json:"host"` 38 | Attachments []Attachment `json:"attachments"` 39 | } 40 | 41 | func isAtomText(s string, allowDot bool) bool { 42 | if s == "" { 43 | return false 44 | } 45 | 46 | pointAllowed := false 47 | for i := 0; i < len(s); i++ { 48 | c := s[i] 49 | 50 | // "." is allowed, but not in first position 51 | // ".." is not allowed 52 | if c == '.' && pointAllowed && allowDot { 53 | pointAllowed = false 54 | continue 55 | } else { 56 | pointAllowed = true 57 | } 58 | 59 | if c >= 'a' && c <= 'z' { 60 | continue 61 | } 62 | if c >= 'A' && c <= 'Z' { 63 | continue 64 | } 65 | if c >= '0' && c <= '9' { 66 | continue 67 | } 68 | if c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || 69 | c == '\'' || c == '*' || c == '+' || c == '-' || c == '/' || 70 | c == '=' || c == '?' || c == '^' || c == '_' || c == '`' || 71 | c == '{' || c == '|' || c == '}' || c == '~' { 72 | continue 73 | } 74 | 75 | return false 76 | } 77 | 78 | return true 79 | } 80 | 81 | // allowDot=true is for no-fold-quote ; allowDot=fales is for quoted-string 82 | func encNoFoldQuote(s string, buf *bytes.Buffer, allowDot bool) { 83 | if isAtomText(s, allowDot) { 84 | buf.WriteString(s) 85 | } else { 86 | // Encode left part as no-fold-quote 87 | // ASCII 9 (\t), 32 (space), 34 (dquote), 92 (backslash) are escaped with a backslash 88 | // Non-ASCII and ASCII 0, 10 (\n), 13 (\r) are dropped 89 | // Other characters are transmitted as-is 90 | buf.WriteByte('"') 91 | for i := 0; i < len(s); i++ { 92 | if s[i] == 0 || s[i] == '\r' || s[i] == '\n' || s[i] > 127 { 93 | // Drop it 94 | } else if s[i] == '\t' || s[i] == ' ' || s[i] == '"' || s[i] == '\\' { 95 | buf.Write([]byte{'\\', s[i]}) 96 | } else { 97 | buf.WriteByte(s[i]) 98 | } 99 | } 100 | buf.WriteByte('"') 101 | } 102 | } 103 | 104 | func encNoFoldLiteral(s string, buf *bytes.Buffer) { 105 | if isAtomText(s, true) { 106 | buf.WriteString(s) 107 | } else { 108 | // Encode right part as no-fold-literal 109 | // ASCII 9 (\t), 32 (space), 91 ([), 92 (backslash) and 93 (]) are escaped with a backslash 110 | // Non-ASCII and ASCII 0, 10 (\n), 13 (\r) are dropped 111 | // Other characters are transmitted as-is 112 | buf.WriteByte('[') 113 | for i := 0; i < len(s); i++ { 114 | if s[i] == 0 || s[i] == '\r' || s[i] == '\n' || s[i] > 127 { 115 | // Drop it 116 | } else if s[i] == '\t' || s[i] == ' ' || s[i] == '[' || s[i] == '\\' || s[i] == ']' { 117 | buf.Write([]byte{'\\', s[i]}) 118 | } else { 119 | buf.WriteByte(s[i]) 120 | } 121 | } 122 | buf.WriteByte(']') 123 | } 124 | } 125 | 126 | func formatDate(date string) string { 127 | parsedDate, err := time.Parse(time.RFC3339, date) 128 | if err != nil { 129 | return date 130 | } 131 | 132 | return parsedDate.Format(time.RFC1123Z) 133 | } 134 | 135 | func MessageId(id, host string) string { 136 | // According to RFC 2822: 137 | // msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] 138 | // id-left = dot-atom-text / no-fold-quote 139 | // id-right = dot-atom-text / no-fold-literal 140 | idBuf := bytes.NewBufferString("<") 141 | encNoFoldQuote(id, idBuf, true) 142 | idBuf.WriteByte('@') 143 | encNoFoldLiteral(host, idBuf) 144 | idBuf.WriteByte('>') 145 | 146 | return idBuf.String() 147 | } 148 | 149 | func (m *Message) Process(md *maildir.Maildir) error { 150 | var id string 151 | var mail *message.Message 152 | 153 | if m.Body == "" || m.Title == "" { 154 | return errors.New("Missing mandatory field") 155 | } 156 | 157 | if m.Host == "" { 158 | m.Host = hostname 159 | } 160 | 161 | if m.AuthorEmail == "" { 162 | m.AuthorEmail = "noreply@" + m.Host 163 | } 164 | 165 | if m.Date == "" { 166 | m.Date = time.Now().UTC().Format(time.RFC1123Z) 167 | } 168 | 169 | if m.Id != "" { 170 | id = MessageId(m.Id, m.Host) 171 | if cache.Getset(m.Id, m.Host, id) { 172 | return nil 173 | } 174 | } 175 | 176 | rootContentType := "text/html; charset=\"UTF-8\"" 177 | 178 | bodyPart := message.NewTextMessage(qprintable.UnixTextEncoding, bytes.NewBufferString(m.Body)) 179 | bodyPart.SetHeader("Content-Type", rootContentType) 180 | 181 | if m.Attachments == nil { 182 | mail = bodyPart 183 | } else { 184 | ctBuf := bytes.NewBufferString("") 185 | encNoFoldQuote(rootContentType, ctBuf, false) 186 | rootPart := message.NewMultipartMessageParams("related", "", 187 | map[string]string{"type": ctBuf.String()}) 188 | 189 | rootPart.AddPart(bodyPart) 190 | for _, attachment := range m.Attachments { 191 | attPart := message.NewBinaryMessage(bytes.NewBuffer(attachment.Data)) 192 | attPart.SetHeader("Content-ID", fmt.Sprintf("<%s>", attachment.CID)) 193 | attPart.SetHeader("Content-Type", attachment.MimeType) 194 | if attachment.Filename == "" { 195 | attPart.SetHeader("Content-Disposition", "inline") 196 | } else { 197 | fnBuf := bytes.NewBufferString("") 198 | encNoFoldQuote(attachment.Filename, fnBuf, false) 199 | attPart.SetHeader("Content-Description", attachment.Filename) 200 | attPart.SetHeader("Content-Disposition", fmt.Sprintf("inline; filename=%s", fnBuf.String())) 201 | } 202 | rootPart.AddPart(attPart) 203 | } 204 | 205 | mail = &rootPart.Message 206 | } 207 | 208 | // In a maildir, mails are expected to end with LF line endings. Most softwares are 209 | // just fine with CRLF line endings, but some (for example Mutt) don’t. 210 | mail.EOL = "\n" 211 | mail.SetHeader("Date", formatDate(m.Date)) 212 | mail.SetHeader("Subject", message.EncodeWord(m.Title)) 213 | mail.SetHeader("From", message.EncodeWord(m.Author)+" <"+m.AuthorEmail+">") 214 | if id != "" { 215 | mail.SetHeader("Message-Id", id) 216 | } 217 | if len(m.References) > 0 { 218 | refs := "" 219 | for _, r := range m.References { 220 | refs += " " + MessageId(r, m.Host) 221 | } 222 | mail.SetHeader("References", refs) 223 | } 224 | 225 | md.CreateMail(mail) 226 | 227 | return nil 228 | } 229 | 230 | func main() { 231 | var rootDir, folder string 232 | var err error 233 | 234 | flag.StringVar(&rootDir, "root", os.ExpandEnv("$HOME/Maildir"), "path to maildir") 235 | flag.StringVar(&folder, "folder", "", "maildir folder name to put email (empty for inbox)") 236 | flag.StringVar(&cache.path, "cache", os.ExpandEnv("$HOME/.cache/maildir-put.cache"), 237 | "path to store message-ids to drop duplicate messages") 238 | flag.BoolVar(&cache.useRedis, "redis", false, "use redis for cache storage") 239 | flag.StringVar(&cache.redisOptions.Addr, "redis-addr", "127.0.0.1:6379", "redis address") 240 | flag.Int64Var(&cache.redisOptions.DB, "redis-db", 0, "redis base") 241 | flag.StringVar(&cache.redisOptions.Password, "redis-password", "", "redis password") 242 | 243 | if flag.Parse(); !flag.Parsed() { 244 | flag.PrintDefaults() 245 | os.Exit(1) 246 | } 247 | 248 | if err = cache.OpenCache(); err != nil { 249 | log.Fatalf("Can't open cache: %s", err.Error()) 250 | } 251 | 252 | if hostname, err = os.Hostname(); err != nil { 253 | log.Fatalf("Can't get hostname: %s", err.Error()) 254 | } 255 | 256 | md, err := maildir.New(rootDir, true) 257 | if err != nil { 258 | log.Fatalf("Can't open maildir: %s", err.Error()) 259 | } 260 | 261 | for _, subfolder := range strings.Split(folder, "/") { 262 | if subfolder != "" { 263 | md, err = md.Child(subfolder, true) 264 | if err != nil { 265 | log.Fatalf("Can't open maildir: %s", err.Error()) 266 | } 267 | } 268 | } 269 | 270 | dec := json.NewDecoder(os.Stdin) 271 | for { 272 | msg := new(Message) 273 | err = dec.Decode(msg) 274 | if err == nil { 275 | err = msg.Process(md) 276 | } 277 | 278 | if err == io.EOF { 279 | break 280 | } else if err != nil { 281 | log.Printf("Cannot read input message: %s", err.Error()) 282 | } 283 | } 284 | 285 | if err = cache.Dump(); err != nil { 286 | log.Printf("warning: can't dump cache: %s", err.Error()) 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /rss2json/README.md: -------------------------------------------------------------------------------- 1 | # rss2json 2 | 3 | `rss2json` is a simple tool intended to be used with `maildir-put` and `ggs`. It is used to convert any RSS or Atom feed into a stream of messages usable by `maildir-put`. 4 | 5 | ## Usage 6 | 7 | rss2json feed-url 8 | 9 | rss2json -url=feed-url < feed-from-stdin 10 | 11 | ## Dependencies 12 | 13 | * libxml 14 | * Optional: python and feedparser for parsing of ill-formed feeds 15 | 16 | ## Installation 17 | 18 | go build && cp rss2json /usr/local/bin 19 | -------------------------------------------------------------------------------- /rss2json/rss2json.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "net/url" 9 | "os" 10 | "regexp" 11 | "strings" 12 | "time" 13 | 14 | "github.com/sloonz/cfeedparser" 15 | ) 16 | 17 | func firstNonEmpty(s ...string) string { 18 | var val string 19 | for _, val = range s { 20 | if val != "" { 21 | break 22 | } 23 | } 24 | return val 25 | } 26 | 27 | func getDate(e *feedparser.Entry) string { 28 | emptyTime := time.Time{} 29 | if e.PublicationDateParsed != emptyTime { 30 | return e.PublicationDateParsed.Format(time.RFC3339) 31 | } 32 | if e.ModificationDateParsed != emptyTime { 33 | return e.ModificationDateParsed.Format(time.RFC3339) 34 | } 35 | if e.PublicationDate != "" { 36 | return e.PublicationDate 37 | } 38 | if e.ModificationDate != "" { 39 | return e.ModificationDate 40 | } 41 | return time.Now().UTC().Format(time.RFC3339) 42 | } 43 | 44 | var convertEOLReg = regexp.MustCompile("\r\n?") 45 | 46 | func convertEOL(s string) string { 47 | return convertEOLReg.ReplaceAllString(s, "\n") 48 | } 49 | 50 | func process(rawFeedUrl, rawBaseUrl string) error { 51 | feedUrl, err := url.Parse(rawFeedUrl) 52 | if err != nil { 53 | return err 54 | } 55 | 56 | baseUrl, err := url.Parse(rawBaseUrl) 57 | if err != nil { 58 | return err 59 | } 60 | 61 | var feed *feedparser.Feed 62 | if feedUrl.Scheme != "stdin" { 63 | feed, err = feedparser.ParseURL(feedUrl) 64 | } else { 65 | data, err := io.ReadAll(os.Stdin) 66 | if err != nil { 67 | return err 68 | } 69 | feed, err = feedparser.ParseString(string(data)) 70 | } 71 | 72 | if err != nil { 73 | return err 74 | } 75 | 76 | for _, entry := range feed.Entries { 77 | body := convertEOL(firstNonEmpty(entry.Content, entry.Summary)) 78 | body += "\n

View post

\n" 79 | 80 | linkUrl, err := url.Parse(entry.Link) 81 | linkHost := "" 82 | if err == nil { 83 | linkHost = linkUrl.Host 84 | } 85 | 86 | jsonEntry := make(map[string]string) 87 | jsonEntry["id"] = firstNonEmpty(entry.Id, entry.Link, entry.PublicationDate+":"+entry.Title) + ":" + rawBaseUrl 88 | jsonEntry["title"] = strings.TrimSpace(entry.Title) 89 | jsonEntry["body"] = body 90 | jsonEntry["author"] = strings.TrimSpace(firstNonEmpty(entry.Author.Name, entry.Author.Uri, entry.Author.Text)) 91 | jsonEntry["authorAddress"] = strings.TrimSpace(entry.Author.Email) 92 | jsonEntry["date"] = getDate(&entry) 93 | jsonEntry["host"] = firstNonEmpty(baseUrl.Host, linkHost) 94 | if entry.Link == "" { 95 | jsonEntry["url"] = baseUrl.String() 96 | } else { 97 | jsonEntry["url"] = entry.Link 98 | } 99 | 100 | encodedEntry, err := json.Marshal(jsonEntry) 101 | if err != nil { 102 | return err 103 | } 104 | 105 | fmt.Printf("%s\n", string(encodedEntry)) 106 | } 107 | 108 | return nil 109 | } 110 | 111 | func main() { 112 | baseUrlFlag := flag.String("url", "", "override feed url, useful for feeds given on stdin") 113 | flag.Parse() 114 | 115 | feedUrl := "stdin:" 116 | if flag.NArg() > 0 { 117 | feedUrl = flag.Args()[0] 118 | } 119 | 120 | baseUrl := feedUrl 121 | if *baseUrlFlag != "" { 122 | baseUrl = *baseUrlFlag 123 | } 124 | 125 | err := process(feedUrl, baseUrl) 126 | if err != nil { 127 | fmt.Fprintf(os.Stderr, "Can't process feed: %s\n", err.Error()) 128 | os.Exit(1) 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /scrapers/README.md: -------------------------------------------------------------------------------- 1 | This contains example scrapers to show how you can write your own. 2 | 3 | # ua-scraper-mal 4 | 5 | List season animes from [myanimelist](https://myanimelist.net/anime/season). 6 | -------------------------------------------------------------------------------- /scrapers/ua-scraper-mal: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import json 4 | import re 5 | 6 | import lxml.etree 7 | import requests 8 | 9 | html = lxml.etree.HTML(requests.get("http://myanimelist.net/anime/season").text) 10 | for item in html.cssselect(".seasonal-anime"): 11 | title = item.cssselect('.link-title')[0] 12 | genres = item.cssselect('.genres')[0] 13 | desc = item.cssselect('.synopsis')[0] 14 | 15 | link = title.get("href") 16 | img = item.cssselect(".image img")[0] 17 | img_tag = '' % (img.get("src") or img.get("data-src")) 18 | 19 | print((json.dumps({ 20 | 'url': link, 21 | 'id': link, 22 | 'title': title.text, 23 | 'body': '

%s

%s

%s

%s

' % (lxml.etree.tostring(title).decode(), img_tag, lxml.etree.tostring(desc).decode(), lxml.etree.tostring(genres).decode()), 24 | 'host': 'myanimelist.net' 25 | }))) 26 | -------------------------------------------------------------------------------- /ua-inline/README.md: -------------------------------------------------------------------------------- 1 | # ua-inline -- Inline HTML resources 2 | 3 | This is a simple filter intended to be used before `maildir-put`. It 4 | replaces external images inside the body of the message by their content 5 | (using `data:` scheme). 6 | 7 | If the body contains relative references, it tries to resolve them using 8 | the `url` key of the message. If that’s not possible, no inlining 9 | is done. 10 | 11 | ## Example usage, in `ggsrc` 12 | 13 | command 2000 "rss2json feed-url | ua-inline | maildir-put" 14 | 15 | ## Installation 16 | 17 | go build && cp ua-inline /usr/local/bin 18 | -------------------------------------------------------------------------------- /ua-inline/ua-inline.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // TODO: 4 | // Parallelize 5 | // Manage cache entries lifetime 6 | 7 | import ( 8 | "crypto/sha256" 9 | "encoding/base64" 10 | "encoding/json" 11 | "fmt" 12 | "html" 13 | "io" 14 | "io/ioutil" 15 | "log" 16 | "net/http" 17 | "net/url" 18 | "os" 19 | "os/user" 20 | "regexp" 21 | "strings" 22 | ) 23 | 24 | type Message map[string]interface{} 25 | 26 | var CacheDir string 27 | 28 | func hash(name string) string { 29 | h := sha256.New() 30 | h.Write([]byte(name)) 31 | return fmt.Sprintf("%x", h.Sum(nil)) 32 | } 33 | 34 | func fetch(resUrlString string, baseUrl *url.URL) (data []byte, contentType string) { 35 | var err error 36 | 37 | // Resolve relative url 38 | resUrl, _ := url.Parse(resUrlString) 39 | if resUrl == nil || (baseUrl == nil && !resUrl.IsAbs()) { 40 | return nil, "" 41 | } 42 | 43 | if !resUrl.IsAbs() { 44 | resUrl = baseUrl.ResolveReference(resUrl) 45 | } 46 | 47 | // Test cache 48 | h := hash(resUrl.String()) 49 | dataCacheFile := fmt.Sprintf("%s/data-%x@%s", CacheDir, h, resUrl.Host) 50 | typeCacheFile := fmt.Sprintf("%s/type-%x@%s", CacheDir, h, resUrl.Host) 51 | data, err = ioutil.ReadFile(dataCacheFile) 52 | if err == nil { 53 | var bContentType []byte 54 | bContentType, err = ioutil.ReadFile(typeCacheFile) 55 | contentType = string(bContentType) 56 | } 57 | if err == nil { 58 | return 59 | } else if !os.IsNotExist(err) { 60 | log.Printf("Can't read cache file %s or %s: %s", dataCacheFile, typeCacheFile, err.Error()) 61 | } 62 | 63 | // Cache miss 64 | resp, err := http.Get(resUrl.String()) 65 | if err != nil || resp.StatusCode < 200 || resp.StatusCode >= 300 { 66 | if err != nil { 67 | log.Printf("Error downloading %s: %s", resUrl.String(), err.Error()) 68 | } else { 69 | log.Printf("Error downloading %s: %s", resUrl.String(), resp.Status) 70 | } 71 | return nil, "" 72 | } 73 | 74 | data, err = ioutil.ReadAll(resp.Body) 75 | resp.Body.Close() 76 | if err != nil { 77 | log.Printf("Error downloading %s: %s", resUrl.String(), err.Error()) 78 | return nil, "" 79 | } 80 | 81 | // Get type 82 | if _, ok := resp.Header["Content-Type"]; ok { 83 | contentType = resp.Header["Content-Type"][0] 84 | } else { 85 | contentType = http.DetectContentType(data) 86 | } 87 | 88 | // Write to cache 89 | if err = ioutil.WriteFile(dataCacheFile, data, os.FileMode(0644)); err != nil { 90 | log.Printf("Can't write cache file %s: %s", dataCacheFile, err.Error()) 91 | } 92 | if err = ioutil.WriteFile(typeCacheFile, []byte(contentType), os.FileMode(0644)); err != nil { 93 | log.Printf("Can't write cache file %s: %s", typeCacheFile, err.Error()) 94 | } 95 | 96 | return 97 | } 98 | 99 | func ProcessMessage(msg Message, ch chan Message) { 100 | if _, ok := msg["body"]; !ok { 101 | ch <- msg 102 | return 103 | } 104 | 105 | body, ok := msg["body"].(string) 106 | if !ok { 107 | ch <- msg 108 | return 109 | } 110 | 111 | var msgUrl *url.URL 112 | if _, ok = msg["url"]; ok { 113 | if _, ok = msg["url"].(string); ok { 114 | msgUrl, _ = url.Parse(msg["url"].(string)) 115 | } 116 | } 117 | 118 | var attachments []map[string]string 119 | attrRe := "\\s*[\"']?\\s*([^\\s\"'>]+)\\s*[\"']?" 120 | 121 | // Inline as attachment 122 | body = regexp.MustCompile("]+>").ReplaceAllStringFunc(body, func(img string) string { 123 | src := regexp.MustCompile("src="+attrRe).FindStringSubmatch(img) 124 | if len(src) > 1 && !strings.HasPrefix(src[1], "data:") { 125 | cid := hash(src[1]) 126 | filename := regexp.MustCompile("/([^/?]+)(\\?|$)").FindStringSubmatch(src[1]) 127 | data, mimeType := fetch(html.UnescapeString(src[1]), msgUrl) 128 | if data != nil { 129 | attachment := map[string]string { 130 | "cid": cid, 131 | "mimeType": mimeType, 132 | "data": base64.StdEncoding.EncodeToString(data)} 133 | if filename != nil { 134 | attachment["filename"] = filename[1] 135 | } 136 | attachments = append(attachments, attachment) 137 | return strings.Replace(img, src[0], fmt.Sprintf("src=\"cid:%s\"", cid), 1) 138 | } 139 | } 140 | return img 141 | }) 142 | 143 | // Inline