├── .gitignore
├── COPYING
├── Makefile
├── README.md
├── doc
├── ggs
├── ggs.html
├── ggs.md
├── maildir-put
├── maildir-put.html
├── maildir-put.md
├── rss2json
├── rss2json.html
├── rss2json.md
├── ua
├── ua-inline
├── ua-inline.html
├── ua-inline.md
├── ua-proxify
├── ua-proxify.html
├── ua-proxify.md
├── ua-scrapers
├── ua-scrapers.html
├── ua-scrapers.md
├── ua.html
└── ua.md
├── ggs
├── README.md
└── ggs.go
├── ggsrc.example
├── go.mod
├── go.sum
├── maildir-put
├── README.md
├── cache.go
└── maildir-put.go
├── rss2json
├── README.md
└── rss2json.go
├── scrapers
├── README.md
└── ua-scraper-mal
├── ua-inline
├── README.md
└── ua-inline.go
├── ua-proxify
├── README.md
├── get.php
└── ua-proxify.go
└── weboobmsg2json
└── weboobmsg2json
/.gitignore:
--------------------------------------------------------------------------------
1 | ggs/ggs
2 | maildir-put/maildir-put
3 | rss2json/rss2json
4 | ua-inline/ua-inline
5 | ua-proxify/ua-proxify
6 | scrapers/ua-scraper-torrent9
7 | tmp-go
8 | *.pyc
9 | node_modules
10 |
--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | PREFIX=/usr/local
2 | DESTDIR=
3 |
4 | BINDIR=$(DESTDIR)$(PREFIX)/bin
5 | DOCDIR=$(DESTDIR)$(PREFIX)/share/doc/ua
6 | MANDIR=$(DESTDIR)$(PREFIX)/share/man
7 |
8 | GODIRS=ggs rss2json maildir-put ua-inline ua-proxify
9 | SCRAPERS=mal
10 |
11 | export GOPATH ?= $(PWD)/tmp-go
12 |
13 | .PHONY: all clean doc
14 |
15 | all: ggs/ggs rss2json/rss2json maildir-put/maildir-put ua-inline/ua-inline ua-proxify/ua-proxify
16 |
17 | doc:
18 | test -d doc || mkdir doc
19 | test -f doc/ua.md || ln -s ../README.md doc/ua.md
20 | test -f doc/ua-scrapers.md || ln -s ../scrapers/README.md doc/ua-scrapers.md
21 | for d in $(GODIRS) ; do test -f doc/$$d.md || ln -s ../$$d/README.md doc/$$d.md ; done
22 | cd doc ; for f in *.md ; do ronn $$f ; done
23 |
24 | ggs/ggs: ggs/ggs.go $(GOPATH)
25 | cd ggs; go build
26 |
27 | rss2json/rss2json: rss2json/rss2json.go $(GOPATH)
28 | cd rss2json; go build
29 |
30 | maildir-put/maildir-put: maildir-put/maildir-put.go maildir-put/cache.go $(GOPATH)
31 | cd maildir-put; go build
32 |
33 | ua-inline/ua-inline: ua-inline/ua-inline.go $(GOPATH)
34 | cd ua-inline; go build
35 |
36 | ua-proxify/ua-proxify: ua-proxify/ua-proxify.go $(GOPATH)
37 | cd ua-proxify; go build
38 |
39 | $(GOPATH):
40 | mkdir $(GOPATH)
41 | mkdir $(GOPATH)/bin
42 | mkdir $(GOPATH)/src
43 | mkdir $(GOPATH)/pkg
44 |
45 | install: all
46 | install -d $(BINDIR)
47 | for f in $(GODIRS) ; do install $$f/$$f $(BINDIR)/ ; done
48 | for s in $(SCRAPERS) ; do install scrapers/ua-scraper-$$s $(BINDIR)/ ; done
49 | install weboobmsg2json/weboobmsg2json $(BINDIR)/
50 |
51 | install -d $(DOCDIR)
52 | install -d $(MANDIR)/man1/
53 | install ggsrc.example $(DOCDIR)
54 | for f in doc/*.md doc/*.html ; do install $$f $(DOCDIR)/ ; done
55 | for f in $(GODIRS) ; do gzip < doc/$$f > $(MANDIR)/man1/$$f.1.gz ; done
56 |
57 | clean:
58 | for f in $(GODIRS) ; do rm -f $$f/$$f ; done
59 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # The Universal Aggregator
2 |
3 | This is a set of tools to aggregate all your information into your
4 | maildir. Each tool can be used separately ; you can find a more complete
5 | description in their respective folder.
6 |
7 | * `ggs` is a software which runs commands periodically
8 | * `maildir-put` reads a set of messages from its standard input and puts
9 | them in a maildir
10 | * `rss2json` transforms any RSS/Atom feed into a set of messages that
11 | `maildir-put` can process
12 | * You can write your own producers (scrapers) for maildir-put ; some are
13 | already provided in the `scrapers/` directory.
14 | * You can also put filters, like `ua-inline` or `ua-proxify`.
15 |
16 | ## Usage
17 |
18 | ggs [path-to-configuration-file]
19 |
20 | ## Dependencies
21 |
22 | * Go
23 | * libxml
24 | * [jq](https://stedolan.github.io/jq/)
25 | * For additional scrapers: scrapy, python 3 and nodejs
26 |
27 | ## Installation
28 |
29 | make && sudo make install
30 |
31 | ## Configuration
32 |
33 | See the `ggs` documentation for more information. Here is an sample
34 | configuration file, which puts some feeds into `Fun` and `Geek` folders,
35 | some new chapters notification from mangareader into `Entertainment`,
36 | and my Github personal feed into inbox:
37 |
38 | default_timeout=30
39 |
40 | rss() {
41 | command 2000 "rss2json \"$1\" | ua-inline | maildir-put -root $HOME/Maildir-feeds -folder \"$2\""
42 | }
43 |
44 | mangareader() {
45 | command 2000 "ua-scraper-mangareader -a name=$1 | "\
46 | "maildir-put -root $HOME/Maildir-feeds -folder Entertainment"
47 | }
48 |
49 | rss http://xkcd.com/atom.xml Fun
50 | rss http://feeds.feedburner.com/smbc-comics/PvLb Fun
51 | rss http://syndication.thedailywtf.com/TheDailyWtf Fun
52 |
53 | rss http://www.reddit.com/r/science/top/.rss Geek
54 | rss http://www.phoronix.com/rss.php Geek
55 |
56 | mangareader naruto
57 | mangareader bleach
58 | mangareader gantz
59 |
60 | rss https://github.com/sloonz.private.atom?token=HIDDEN ""
61 |
62 | ## Weboob compatibility
63 |
64 | You can use [weboob](http://weboob.org/) modules used by
65 | [boobmsg](http://weboob.org/applications/boobmsg) to generate
66 | messages. Configure the modules using `boobmsg`, and use `weboobmsg2json
67 | [module-name]` to generate messages. `[module-name]` can be found in
68 | `~/.config/weboob/backends`.
69 |
--------------------------------------------------------------------------------
/doc/ggs:
--------------------------------------------------------------------------------
1 | .\" generated with Ronn/v0.7.3
2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3
3 | .
4 | .TH "GGS" "" "February 2017" "" ""
5 | \fBGGS\fR (Grey Goo Spawner) is a simple software that runs jobs periodically\. It is similar with cron, but with some differences :
6 | .
7 | .IP "\(bu" 4
8 | Whereas \fBcron\fR launches jobs at specific times, \fBggs\fR is mainly interested in intervals\. It will run all jobs at its startup and then will re\-run each job after a certain delay has passed\.
9 | .
10 | .IP "\(bu" 4
11 | \fBggs\fR has a system of \fBworkers\fR, similar to many servers (like nginx or Apache with MPM Workers) to limit ressource concurrency between your jobs \.
12 | .
13 | .IP "\(bu" 4
14 | You can define a timeout for your jobs, too\.
15 | .
16 | .IP "" 0
17 | .
18 | .SH "Usage"
19 | \fBggs [options] [configuration file]\fR
20 | .
21 | .P
22 | If no configuration file is provided, \fBggs\fR will use \fB~/\.config/ggsrc\fR by default\.
23 | .
24 | .SH "Requirements"
25 | .
26 | .IP "\(bu" 4
27 | jq \fIhttps://stedolan\.github\.io/jq/\fR
28 | .
29 | .IP "" 0
30 | .
31 | .SH "Installation"
32 | \fBgo build ggs\.go && cp ggs /usr/local/bin\fR
33 | .
34 | .SH "Configuration"
35 | Configuration file is a shell script, so same rule as \fBsh\fR applies\.
36 | .
37 | .P
38 | You create a job with the \fBcommand\fR function, which takes two arguments: the delay between launches, and the command to run\. You can specify a timeout (in seconds) by setting the \fBtimeout\fR environnement variable (optional, default: 0 no timeout)\.
39 | .
40 | .IP "" 4
41 | .
42 | .nf
43 |
44 | timeout=30 command 300 "uptime | mail admin@example\.com"
45 | command 5 \'ping \-c 1 github\.com || sudo halt \-p\'
46 | .
47 | .fi
48 | .
49 | .IP "" 0
50 | .
51 | .P
52 | You can also set the number of workers (maximum number of jobs that can run simultaneously):
53 | .
54 | .IP "" 4
55 | .
56 | .nf
57 |
58 | workers=5 #Warning: dont do "workers = 5", spaces matters here !
59 | .
60 | .fi
61 | .
62 | .IP "" 0
63 | .
64 | .SH "Advanced configuration"
65 | The configuration file is just a shell script which produces a JSON document which maches the structure of the \fBConfig\fR structure\. You can do \fBexec my_script\fR to produce the same JSON with a script in your favorite language\. You can also use variables, functions, execute external commands, and so on\.\.\.
66 |
--------------------------------------------------------------------------------
/doc/ggs.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | GGS
7 |
44 |
45 |
52 |
53 |
54 |
55 |
62 |
63 |
64 | - ggs
65 |
66 | - ggs
67 |
68 |
69 |
GGS
70 |
GGS
(Grey Goo Spawner) is a simple software that runs jobs
71 | periodically. It is similar with cron, but with some differences :
72 |
73 |
74 | Whereas cron
launches jobs at specific times, ggs
is mainly
75 | interested in intervals. It will run all jobs at its startup and then
76 | will re-run each job after a certain delay has passed.
77 | ggs
has a system of workers
, similar to many servers (like nginx
78 | or Apache with MPM Workers) to limit ressource concurrency between your
79 | jobs .
80 | You can define a timeout for your jobs, too.
81 |
82 |
83 |
84 |
Usage
85 |
86 |
ggs [options] [configuration file]
87 |
88 |
If no configuration file is provided, ggs
will use ~/.config/ggsrc
89 | by default.
90 |
91 |
Requirements
92 |
93 |
96 |
97 |
98 |
Installation
99 |
100 |
go build ggs.go && cp ggs /usr/local/bin
101 |
102 |
Configuration
103 |
104 |
Configuration file is a shell script, so same rule as sh
applies.
105 |
106 |
You create a job with the command
function, which takes two arguments:
107 | the delay between launches, and the command to run. You can specify a
108 | timeout (in seconds) by setting the timeout
environnement variable
109 | (optional, default: 0 no timeout).
110 |
111 |
timeout=30 command 300 "uptime | mail admin@example.com"
112 | command 5 'ping -c 1 github.com || sudo halt -p'
113 |
114 |
115 |
You can also set the number of workers (maximum number of jobs that can
116 | run simultaneously):
117 |
118 |
workers=5 #Warning: dont do "workers = 5", spaces matters here !
119 |
120 |
121 |
Advanced configuration
122 |
123 |
The configuration file is just a shell script which produces a JSON
124 | document which maches the structure of the Config
structure. You can do
125 | exec my_script
to produce the same JSON with a script in your favorite
126 | language. You can also use variables, functions, execute external
127 | commands, and so on...
128 |
129 |
130 |
135 |
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/doc/ggs.md:
--------------------------------------------------------------------------------
1 | ../ggs/README.md
--------------------------------------------------------------------------------
/doc/maildir-put:
--------------------------------------------------------------------------------
1 | .\" generated with Ronn/v0.7.3
2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3
3 | .
4 | .TH "MAILDIR\-PUT" "" "February 2016" "" ""
5 | \fBmaildir\-put\fR is a tool to put messages in a predefined JSON format inside a maildir\. It also try to detect duplicates and drop them\.
6 | .
7 | .SH "Usage"
8 | .
9 | .nf
10 |
11 | message\-producer | maildir\-put [arguments]
12 | .
13 | .fi
14 | .
15 | .P
16 | Available arguments:
17 | .
18 | .IP "\(bu" 4
19 | \fB\-cache\fR: path to a cache file used to store message IDs for duplicate detection
20 | .
21 | .IP "\(bu" 4
22 | \fB\-root\fR: path to the root maildir directory\. Defaults to ~/Maildir\.
23 | .
24 | .IP "\(bu" 4
25 | \fB\-folder\fR: maildir folder to put messages\. Defaults to "", the inbox\. The folder separator is "/"\.
26 | .
27 | .IP "\(bu" 4
28 | \fB\-redis\fR: specify this flag to use redis for message IDs cache\. If both \fB\-redis\fR and \fB\-cache\fR are specified, the given cache will be migrated to redis
29 | .
30 | .IP "\(bu" 4
31 | \fB\-redis\-db\fR, \fB\-redis\-addr\fR, \fB\-redis\-password\fR: redis connection settings\.
32 | .
33 | .IP "" 0
34 | .
35 | .SH "Installation"
36 | .
37 | .nf
38 |
39 | go build && cp maildir\-put /usr/local/bin
40 | .
41 | .fi
42 | .
43 | .SH "Input format"
44 | As its input, \fBmaildir\-put\fR takes a stream of JSON dictionaries (not a list of dictionaries)\. Each dictionary represents a message\. Available keys are:
45 | .
46 | .IP "\(bu" 4
47 | \fIbody\fR: the body of the message, in HTML\. Mandatory\.
48 | .
49 | .IP "\(bu" 4
50 | \fItitle\fR: the subject of the message, in text\. Mandatory\.
51 | .
52 | .IP "\(bu" 4
53 | \fIdate\fR: the date of the message\. Optional, defaults to current time\. If provided, must be RFC 2822 compliant\.
54 | .
55 | .IP "\(bu" 4
56 | \fIauthor\fR: the name of the author, in text\. Optional\.
57 | .
58 | .IP "\(bu" 4
59 | \fIauthorEmail\fR: the mail addresse of the author\. Optional\.
60 | .
61 | .IP "\(bu" 4
62 | \fIid\fR: an unique identifier for the message\. It will be used for the creation of the Message\-Id header, as well as in duplicates detection\. It should include three parts: an unique identifier for the application (for example: \fBrss2json\fR), an unique identifier for the paramenters (for example: the feed URL) and an unique identifier for the message (for example: an article ID)\. The identifier for the parameters may be omitted if you provide a \fIhost\fR key and that the host is sufficient to identify the parameters\. Mandatory for threaded discussions handling and duplicates detection, optional else\.
63 | .
64 | .IP "\(bu" 4
65 | \fIhost\fR: the domain name of the producer of the message (in general, the hostname of the server form where you fetched the information)\. Used in \fBMessage\-Id\fR and \fBReferences\fR headers construction, as well as in duplicates detection\. Optional, but strongly encouraged for threaded discussions handling and duplicates detection\.
66 | .
67 | .IP "\(bu" 4
68 | \fIreferences\fR: for threaded discussions, \fIid\fR of the parent messages\. Note that \fIhost\fR must match in the two messages\.
69 | .
70 | .IP "\(bu" 4
71 | \fIurl\fR: URL of the message\. Used by \fBua\-inline\fR to resolve relative references\.
72 | .
73 | .IP "" 0
74 | .
75 | .P
76 | All strings must be encoded in UTF\-8\.
77 |
--------------------------------------------------------------------------------
/doc/maildir-put.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | maildir-put
7 |
44 |
45 |
52 |
53 |
54 |
55 |
60 |
61 |
62 | - maildir-put
63 |
64 | - maildir-put
65 |
66 |
67 |
maildir-put
68 |
maildir-put
is a tool to put messages in a predefined JSON format
69 | inside a maildir. It also try to detect duplicates and drop them.
70 |
71 |
Usage
72 |
73 |
message-producer | maildir-put [arguments]
74 |
75 |
76 |
Available arguments:
77 |
78 |
79 | -cache
: path to a cache file used to store message IDs for duplicate
80 | detection
81 | -root
: path to the root maildir directory. Defaults to ~/Maildir.
82 | -folder
: maildir folder to put messages. Defaults to "", the inbox.
83 | The folder separator is "/".
84 | -redis
: specify this flag to use redis for message IDs cache. If both
85 | -redis
and -cache
are specified, the given cache will be migrated to
86 | redis
87 | -redis-db
, -redis-addr
, -redis-password
: redis connection settings.
88 |
89 |
90 |
91 |
Installation
92 |
93 |
go build && cp maildir-put /usr/local/bin
94 |
95 |
96 |
97 |
98 |
As its input, maildir-put
takes a stream of JSON dictionaries (not a
99 | list of dictionaries). Each dictionary represents a message. Available
100 | keys are:
101 |
102 |
103 | - body: the body of the message, in HTML. Mandatory.
104 | - title: the subject of the message, in text. Mandatory.
105 | - date: the date of the message. Optional, defaults to current time. If
106 | provided, must be RFC 2822 compliant.
107 | - author: the name of the author, in text. Optional.
108 | - authorEmail: the mail addresse of the author. Optional.
109 | - id: an unique identifier for the message. It will be used for the
110 | creation of the Message-Id header, as well as in duplicates detection. It
111 | should include three parts: an unique identifier for the application
112 | (for example:
rss2json
), an unique identifier for the paramenters
113 | (for example: the feed URL) and an unique identifier for the message
114 | (for example: an article ID). The identifier for the parameters may be
115 | omitted if you provide a host key and that the host is sufficient to
116 | identify the parameters. Mandatory for threaded discussions handling and
117 | duplicates detection, optional else.
118 | - host: the domain name of the producer of the message (in general,
119 | the hostname of the server form where you fetched the information). Used
120 | in
Message-Id
and References
headers construction, as well as in
121 | duplicates detection. Optional, but strongly encouraged for threaded
122 | discussions handling and duplicates detection.
123 | - references: for threaded discussions, id of the parent messages. Note
124 | that host must match in the two messages.
125 | - url: URL of the message. Used by
ua-inline
to resolve relative
126 | references.
127 |
128 |
129 |
130 |
All strings must be encoded in UTF-8.
131 |
132 |
133 |
138 |
139 |
140 |
141 |
142 |
--------------------------------------------------------------------------------
/doc/maildir-put.md:
--------------------------------------------------------------------------------
1 | ../maildir-put/README.md
--------------------------------------------------------------------------------
/doc/rss2json:
--------------------------------------------------------------------------------
1 | .\" generated with Ronn/v0.7.3
2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3
3 | .
4 | .TH "RSS2JSON" "" "March 2014" "" ""
5 | \fBrss2json\fR is a simple tool intended to be used with \fBmaildir\-put\fR and \fBggs\fR\. It is used to convert any RSS or Atom feed into a stream of messages usable by \fBmaildir\-put\fR\.
6 | .
7 | .SH "Usage"
8 | .
9 | .nf
10 |
11 | rss2json feed\-url
12 | .
13 | .fi
14 | .
15 | .SH "Dependencies"
16 | .
17 | .IP "\(bu" 4
18 | libxml
19 | .
20 | .IP "\(bu" 4
21 | Optional: python and feedparser for parsing of ill\-formed feeds
22 | .
23 | .IP "" 0
24 | .
25 | .SH "Installation"
26 | .
27 | .nf
28 |
29 | go build && cp rss2json /usr/local/bin
30 | .
31 | .fi
32 |
33 |
--------------------------------------------------------------------------------
/doc/rss2json.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | rss2json
7 |
44 |
45 |
52 |
53 |
54 |
55 |
60 |
61 |
62 | - rss2json
63 |
64 | - rss2json
65 |
66 |
67 |
rss2json
68 |
rss2json
is a simple tool intended to be used with maildir-put
and ggs
. It is used to convert any RSS or Atom feed into a stream of messages usable by maildir-put
.
69 |
70 |
Usage
71 |
72 |
rss2json feed-url
73 |
74 |
75 |
Dependencies
76 |
77 |
78 | - libxml
79 | - Optional: python and feedparser for parsing of ill-formed feeds
80 |
81 |
82 |
83 |
Installation
84 |
85 |
go build && cp rss2json /usr/local/bin
86 |
87 |
88 |
89 |
94 |
95 |
96 |
97 |
98 |
--------------------------------------------------------------------------------
/doc/rss2json.md:
--------------------------------------------------------------------------------
1 | ../rss2json/README.md
--------------------------------------------------------------------------------
/doc/ua:
--------------------------------------------------------------------------------
1 | .\" generated with Ronn/v0.7.3
2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3
3 | .
4 | .TH "UA" "" "July 2017" "" ""
5 | This is a set of tools to aggregate all your information into your maildir\. Each tool can be used separately ; you can find a more complete description in their respective folder\.
6 | .
7 | .IP "\(bu" 4
8 | \fBggs\fR is a software which runs commands periodically
9 | .
10 | .IP "\(bu" 4
11 | \fBmaildir\-put\fR reads a set of messages from its standard input and puts them in a maildir
12 | .
13 | .IP "\(bu" 4
14 | \fBrss2json\fR transforms any RSS/Atom feed into a set of messages that \fBmaildir\-put\fR can process
15 | .
16 | .IP "\(bu" 4
17 | You can write your own producers (scrapers) for maildir\-put ; some are already provided in the \fBscrapers/\fR directory\.
18 | .
19 | .IP "\(bu" 4
20 | You can also put filters, like \fBua\-inline\fR or \fBua\-proxify\fR\.
21 | .
22 | .IP "" 0
23 | .
24 | .SH "Usage"
25 | .
26 | .nf
27 |
28 | ggs [path\-to\-configuration\-file]
29 | .
30 | .fi
31 | .
32 | .SH "Dependencies"
33 | .
34 | .IP "\(bu" 4
35 | Go
36 | .
37 | .IP "\(bu" 4
38 | libxml
39 | .
40 | .IP "\(bu" 4
41 | jq \fIhttps://stedolan\.github\.io/jq/\fR
42 | .
43 | .IP "\(bu" 4
44 | For additional scrapers: scrapy, python 3 and nodejs
45 | .
46 | .IP "" 0
47 | .
48 | .SH "Installation"
49 | .
50 | .nf
51 |
52 | make && sudo make install
53 | .
54 | .fi
55 | .
56 | .SH "Configuration"
57 | See the \fBggs\fR documentation for more information\. Here is an sample configuration file, which puts some feeds into \fBFun\fR and \fBGeek\fR folders, some new chapters notification from mangareader into \fBEntertainment\fR, and my Github personal feed into inbox:
58 | .
59 | .IP "" 4
60 | .
61 | .nf
62 |
63 | default_timeout=30
64 |
65 | rss() {
66 | command 2000 "rss2json \e"$1\e" | ua\-inline | maildir\-put \-root $HOME/Maildir\-feeds \-folder \e"$2\e""
67 | }
68 |
69 | mangareader() {
70 | command 2000 "ua\-scraper\-mangareader \-a name=$1 | "\e
71 | "maildir\-put \-root $HOME/Maildir\-feeds \-folder Entertainment"
72 | }
73 |
74 | rss http://xkcd\.com/atom\.xml Fun
75 | rss http://feeds\.feedburner\.com/smbc\-comics/PvLb Fun
76 | rss http://syndication\.thedailywtf\.com/TheDailyWtf Fun
77 |
78 | rss http://www\.reddit\.com/r/science/top/\.rss Geek
79 | rss http://www\.phoronix\.com/rss\.php Geek
80 |
81 | mangareader naruto
82 | mangareader bleach
83 | mangareader gantz
84 |
85 | rss https://github\.com/sloonz\.private\.atom?token=HIDDEN ""
86 | .
87 | .fi
88 | .
89 | .IP "" 0
90 | .
91 | .SH "Weboob compatibility"
92 | You can use weboob \fIhttp://weboob\.org/\fR modules used by boobmsg \fIhttp://weboob\.org/applications/boobmsg\fR to generate messages\. Configure the modules using \fBboobmsg\fR, and use \fBweboobmsg2json [module\-name]\fR to generate messages\. \fB[module\-name]\fR can be found in \fB~/\.config/weboob/backends\fR\.
93 |
--------------------------------------------------------------------------------
/doc/ua-inline:
--------------------------------------------------------------------------------
1 | .\" generated with Ronn/v0.7.3
2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3
3 | .
4 | .TH "UA\-INLINE" "" "March 2014" "" ""
5 | .
6 | .SH "NAME"
7 | \fBua\-inline\fR \- Inline HTML resources
8 | .
9 | .P
10 | This is a simple filter intended to be used before \fBmaildir\-put\fR\. It replaces external images inside the body of the message by their content (using \fBdata:\fR scheme)\.
11 | .
12 | .P
13 | If the body contains relative references, it tries to resolve them using the \fBurl\fR key of the message\. If that’s not possible, no inlining is done\.
14 | .
15 | .SH "Example usage, in ggsrc
"
16 | .
17 | .nf
18 |
19 | command 2000 "rss2json feed\-url | ua\-inline | maildir\-put"
20 | .
21 | .fi
22 | .
23 | .SH "Installation"
24 | .
25 | .nf
26 |
27 | go build && cp ua\-inline /usr/local/bin
28 | .
29 | .fi
30 |
31 |
--------------------------------------------------------------------------------
/doc/ua-inline.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Inline HTML resources
7 |
44 |
45 |
52 |
53 |
54 |
55 |
60 |
61 |
62 | - ua-inline
63 |
64 | - ua-inline
65 |
66 |
67 |
NAME
68 |
69 | ua-inline
- Inline HTML resources
70 |
71 |
72 |
This is a simple filter intended to be used before maildir-put
. It
73 | replaces external images inside the body of the message by their content
74 | (using data:
scheme).
75 |
76 |
If the body contains relative references, it tries to resolve them using
77 | the url
key of the message. If that’s not possible, no inlining
78 | is done.
79 |
80 |
Example usage, in ggsrc
81 |
82 |
command 2000 "rss2json feed-url | ua-inline | maildir-put"
83 |
84 |
85 |
Installation
86 |
87 |
go build && cp ua-inline /usr/local/bin
88 |
89 |
90 |
91 |
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/doc/ua-inline.md:
--------------------------------------------------------------------------------
1 | ../ua-inline/README.md
--------------------------------------------------------------------------------
/doc/ua-proxify:
--------------------------------------------------------------------------------
1 | .\" generated with Ronn/v0.7.3
2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3
3 | .
4 | .TH "UA\-PROXIFY" "" "March 2016" "" ""
5 | .
6 | .SH "NAME"
7 | \fBua\-proxify\fR \- Transform external URLs in a message
8 | .
9 | .P
10 | This is a simple filter intended to be used before \fBmaildir\-put\fR\. It changes the URL of external resources (CSS, images)\.
11 | .
12 | .P
13 | If the body contains relative references, it tries to resolve them using the \fBurl\fR key of the message\. If that’s not possible, no change is done\.
14 | .
15 | .SH "Example usage, in ggsrc
"
16 | \fBget\.php\fR is a simple example script provided with \fBua\-proxify\fR\. It can be used that way:
17 | .
18 | .IP "" 4
19 | .
20 | .nf
21 |
22 | command 2000 "rss2json feed\-url | \e
23 | ua\-proxify "http://example\.com/get?url={{\.URL|urlquery}}&sig={{\.URL|HMAC \e"$HMAC_KEY\e"}}" | \e
24 | maildir\-put"
25 | .
26 | .fi
27 | .
28 | .IP "" 0
29 | .
30 | .P
31 | \fB$HMAC_KEY\fR can be generated with \fBopenssl rand \-base64 32\fR and must be set in the top of \fBget\.php\fR\.
32 | .
33 | .SH "Installation"
34 | .
35 | .nf
36 |
37 | go build && cp ua\-proxify /usr/local/bin
38 | .
39 | .fi
40 |
41 |
--------------------------------------------------------------------------------
/doc/ua-proxify.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Transform external URLs in a message
7 |
44 |
45 |
52 |
53 |
54 |
55 |
60 |
61 |
62 | - ua-proxify
63 |
64 | - ua-proxify
65 |
66 |
67 |
NAME
68 |
69 | ua-proxify
- Transform external URLs in a message
70 |
71 |
72 |
This is a simple filter intended to be used before maildir-put
. It
73 | changes the URL of external resources (CSS, images).
74 |
75 |
If the body contains relative references, it tries to resolve them using
76 | the url
key of the message. If that’s not possible, no change
77 | is done.
78 |
79 |
Example usage, in ggsrc
80 |
81 |
get.php
is a simple example script provided with ua-proxify
. It can be used
82 | that way:
83 |
84 |
command 2000 "rss2json feed-url | \
85 | ua-proxify "http://example.com/get?url={{.URL|urlquery}}&sig={{.URL|HMAC \"$HMAC_KEY\"}}" | \
86 | maildir-put"
87 |
88 |
89 |
$HMAC_KEY
can be generated with openssl rand -base64 32
and must be set in
90 | the top of get.php
.
91 |
92 |
Installation
93 |
94 |
go build && cp ua-proxify /usr/local/bin
95 |
96 |
97 |
98 |
103 |
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/doc/ua-proxify.md:
--------------------------------------------------------------------------------
1 | ../ua-proxify/README.md
--------------------------------------------------------------------------------
/doc/ua-scrapers:
--------------------------------------------------------------------------------
1 | .\" generated with Ronn/v0.7.3
2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3
3 | .
4 | .TH "UA\-SCRAPERS" "" "July 2017" "" ""
5 | List all courses on EdX \fIhttps://www\.edx\.org/\fR\.List new comics on Lyon public library \fIhttps://www\.bm\-lyon\.fr/\fR\.List season animes from myanimelist \fIhttps://myanimelist\.net/anime/season\fR\.List latest chapters for a given manga on mangareader \fIhttp://www\.mangareader\.net/\fR\.
6 | .
7 | .P
8 | Usage: \fBua\-scraper\-mangareader \-a name=[manga\-title]\fR\. \fB[manga\-title]\fR is the path of the manga on mangareader, for example \fBnatsume\-yuujinchou\fR for http://www\.mangareader\.net/natsume\-yuujinchou\.List latest torrents on torrent9 \fIhttp://www\.torrent9\.cc/\fR\.
9 | .
10 | .P
11 | Usage:
12 | .
13 | .IP "\(bu" 4
14 | All categories: \fBua\-scraper\-torrent9\fR
15 | .
16 | .IP "\(bu" 4
17 | Specific categories: \fBua\-scraper\-torrent9 "category1 category2\.\.\."\fR
18 | .
19 | .IP "" 0
20 | .
21 | .P
22 | Categories references the anchor in the URL (for example \fBebook\fR for http://www\.torrent9\.cc/#ebook)\.List lastest torrents on yggtorrent \fIhttps://yggtorrent\.com/\fR\.
23 | .
24 | .P
25 | Usage:
26 | .
27 | .IP "\(bu" 4
28 | All categories: \fBua\-scraper\-yggtorrent\fR
29 | .
30 | .IP "\(bu" 4
31 | Specific category: \fBua\-scraper\-yggtorrent [url]\fR\.
32 | .
33 | .IP "" 0
34 |
35 |
--------------------------------------------------------------------------------
/doc/ua-scrapers.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | <p>This contains additional scrapers. You can take those as examples to
7 | write your own.</p>
8 |
9 | ua-scraper-exdcourses
10 |
47 |
48 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 | - ua-scrapers
63 |
64 | - ua-scrapers
65 |
66 |
67 |
This contains additional scrapers. You can take those as examples to
68 | write your own.
69 |
70 | ua-scraper-exdcourses
71 |
List all courses on EdX.
72 |
73 |
ua-scraper-lyon-bm-bd
74 |
75 |
List new comics on Lyon public library.
76 |
77 |
ua-scraper-mal
78 |
79 |
List season animes from myanimelist.
80 |
81 |
ua-scraper-mangareader
82 |
83 |
List latest chapters for a given manga on mangareader.
84 |
85 |
Usage: ua-scraper-mangareader -a name=[manga-title]
. [manga-title]
86 | is the path of the manga on mangareader, for example natsume-yuujinchou
87 | for http://www.mangareader.net/natsume-yuujinchou.
88 |
89 |
ua-scraper-torrent9
90 |
91 |
List latest torrents on torrent9.
92 |
93 |
Usage:
94 |
95 |
96 | - All categories:
ua-scraper-torrent9
97 | - Specific categories:
ua-scraper-torrent9 "category1 category2..."
98 |
99 |
100 |
101 |
Categories references the anchor in the URL (for example ebook
for
102 | http://www.torrent9.cc/#ebook).
103 |
104 |
ua-scraper-yggtorrent
105 |
106 |
List lastest torrents on yggtorrent.
107 |
108 |
Usage:
109 |
110 |
111 | - All categories:
ua-scraper-yggtorrent
112 | - Specific category:
ua-scraper-yggtorrent [url]
.
113 |
114 |
115 |
116 |
117 |
122 |
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/doc/ua-scrapers.md:
--------------------------------------------------------------------------------
1 | ../scrapers/README.md
--------------------------------------------------------------------------------
/doc/ua.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | The Universal Aggregator
7 |
44 |
45 |
52 |
53 |
54 |
55 |
62 |
63 |
64 | - ua
65 |
66 | - ua
67 |
68 |
69 |
The Universal Aggregator
70 |
This is a set of tools to aggregate all your information into your
71 | maildir. Each tool can be used separately ; you can find a more complete
72 | description in their respective folder.
73 |
74 |
75 | ggs
is a software which runs commands periodically
76 | maildir-put
reads a set of messages from its standard input and puts
77 | them in a maildir
78 | rss2json
transforms any RSS/Atom feed into a set of messages that
79 | maildir-put
can process
80 | - You can write your own producers (scrapers) for maildir-put ; some are
81 | already provided in the
scrapers/
directory.
82 | - You can also put filters, like
ua-inline
or ua-proxify
.
83 |
84 |
85 |
86 |
Usage
87 |
88 |
ggs [path-to-configuration-file]
89 |
90 |
91 |
Dependencies
92 |
93 |
94 | - Go
95 | - libxml
96 | - jq
97 | - For additional scrapers: scrapy, python 3 and nodejs
98 |
99 |
100 |
101 |
Installation
102 |
103 |
make && sudo make install
104 |
105 |
106 |
Configuration
107 |
108 |
See the ggs
documentation for more information. Here is an sample
109 | configuration file, which puts some feeds into Fun
and Geek
folders,
110 | some new chapters notification from mangareader into Entertainment
,
111 | and my Github personal feed into inbox:
112 |
113 |
default_timeout=30
114 |
115 | rss() {
116 | command 2000 "rss2json \"$1\" | ua-inline | maildir-put -root $HOME/Maildir-feeds -folder \"$2\""
117 | }
118 |
119 | mangareader() {
120 | command 2000 "ua-scraper-mangareader -a name=$1 | "\
121 | "maildir-put -root $HOME/Maildir-feeds -folder Entertainment"
122 | }
123 |
124 | rss http://xkcd.com/atom.xml Fun
125 | rss http://feeds.feedburner.com/smbc-comics/PvLb Fun
126 | rss http://syndication.thedailywtf.com/TheDailyWtf Fun
127 |
128 | rss http://www.reddit.com/r/science/top/.rss Geek
129 | rss http://www.phoronix.com/rss.php Geek
130 |
131 | mangareader naruto
132 | mangareader bleach
133 | mangareader gantz
134 |
135 | rss https://github.com/sloonz.private.atom?token=HIDDEN ""
136 |
137 |
138 |
Weboob compatibility
139 |
140 |
You can use weboob modules used by
141 | boobmsg to generate
142 | messages. Configure the modules using boobmsg
, and use weboobmsg2json
143 | [module-name]
to generate messages. [module-name]
can be found in
144 | ~/.config/weboob/backends
.
145 |
146 |
147 |
152 |
153 |
154 |
155 |
156 |
--------------------------------------------------------------------------------
/doc/ua.md:
--------------------------------------------------------------------------------
1 | ../README.md
--------------------------------------------------------------------------------
/ggs/README.md:
--------------------------------------------------------------------------------
1 | # GGS
2 |
3 | `GGS` (Grey Goo Spawner) is a simple software that runs jobs
4 | periodically. It is similar with cron, but with some differences :
5 |
6 | * Whereas `cron` launches jobs at specific times, `ggs` is mainly
7 | interested in intervals. It will run all jobs at its startup and then
8 | will re-run each job after a certain delay has passed.
9 |
10 | * `ggs` has a system of `workers`, similar to many servers (like nginx
11 | or Apache with MPM Workers) to limit ressource concurrency between your
12 | jobs .
13 |
14 | * You can define a timeout for your jobs, too.
15 |
16 | ## Usage
17 |
18 | `ggs [options] [configuration file]`
19 |
20 | If no configuration file is provided, `ggs` will use `~/.config/ggsrc`
21 | by default.
22 |
23 | ## Requirements
24 |
25 | * [jq](https://stedolan.github.io/jq/)
26 |
27 | ## Installation
28 |
29 | `go build ggs.go && cp ggs /usr/local/bin`
30 |
31 | ## Configuration
32 |
33 | Configuration file is a shell script, so same rule as `sh` applies.
34 |
35 | You create a job with the `command` function, which takes two arguments:
36 | the delay between launches, and the command to run. You can specify a
37 | timeout (in seconds) by setting the `timeout` environnement variable
38 | (optional, default: 0 no timeout).
39 |
40 | timeout=30 command 300 "uptime | mail admin@example.com"
41 | command 5 'ping -c 1 github.com || sudo halt -p'
42 |
43 | You can also set the number of workers (maximum number of jobs that can
44 | run simultaneously):
45 |
46 | workers=5 #Warning: dont do "workers = 5", spaces matters here !
47 |
48 | ## Advanced configuration
49 |
50 | The configuration file is just a shell script which produces a JSON
51 | document which maches the structure of the `Config` structure. You can do
52 | `exec my_script` to produce the same JSON with a script in your favorite
53 | language. You can also use variables, functions, execute external
54 | commands, and so on...
55 |
--------------------------------------------------------------------------------
/ggs/ggs.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "encoding/json"
6 | "flag"
7 | "fmt"
8 | "log"
9 | "os"
10 | "os/exec"
11 | "os/signal"
12 | "sync"
13 | "syscall"
14 | "time"
15 | )
16 |
17 | type Command struct {
18 | Delay int
19 | Timeout int
20 | Command string
21 | }
22 |
23 | type Config struct {
24 | Workers int
25 | Commands []*Command
26 | disabled bool
27 | }
28 |
29 | const CONFIG_WRAPPER = `
30 | workers=5
31 | default_timeout=0
32 | commands=$(jq -n '[]')
33 |
34 | command() {
35 | delay=$1; shift
36 | commands=$(echo "$commands" | \
37 | jq --arg delay "$delay" --arg cmd "$*" \
38 | --arg timeout "${timeout:-$default_timeout}" \
39 | '. + [{Timeout: ($timeout|tonumber), Delay: ($delay|tonumber), Command: $cmd}]')
40 | timeout=
41 | }
42 |
43 | . %s
44 |
45 | echo "$commands" | jq --arg workers "$workers" '{Workers: ($workers|tonumber), Commands: .}'
46 | `
47 |
48 | type loggerWriter struct {
49 | log *log.Logger
50 | cmd *exec.Cmd
51 | buf []byte
52 | }
53 |
54 | func (w *loggerWriter) Write(data []byte) (int, error) {
55 | sz := len(data)
56 | data = append(w.buf, data...)
57 | lines := bytes.Split(data, []byte("\n"))
58 | if len(lines[len(lines)-1]) == 0 {
59 | w.buf = nil
60 | } else {
61 | w.buf = lines[len(lines)-1]
62 | }
63 | lines = lines[:len(lines)-1]
64 | for _, line := range lines {
65 | w.log.Printf("[%d] %s", w.cmd.Process.Pid, string(line))
66 | }
67 | return sz, nil
68 | }
69 |
70 | func (w *loggerWriter) Close() {
71 | if w.buf != nil {
72 | w.log.Printf("[%d] %s", w.cmd.Process.Pid, string(w.buf))
73 | w.buf = nil
74 | }
75 | }
76 |
77 | func readConfig(cfgFile string) (cfg *Config, err error) {
78 | sp := exec.Command("sh")
79 | sp.Stderr = os.Stderr
80 | sp.Stdin = bytes.NewBuffer([]byte(fmt.Sprintf(CONFIG_WRAPPER, cfgFile)))
81 | out, err := sp.Output()
82 | if err != nil {
83 | return nil, err
84 | }
85 |
86 | cfg = new(Config)
87 | err = json.Unmarshal(out, cfg)
88 | if err != nil {
89 | return nil, err
90 | }
91 |
92 | return cfg, nil
93 | }
94 |
95 | func process(cmd *Command) {
96 | var timer *time.Timer
97 | var err error
98 |
99 | sp := exec.Command("sh", "-c", cmd.Command)
100 | stdout := &loggerWriter{log: log.Default(), cmd: sp}
101 | stderr := &loggerWriter{log: log.Default(), cmd: sp}
102 | sp.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
103 | sp.Stdout = stdout
104 | sp.Stderr = stderr
105 |
106 | if err = sp.Start(); err != nil {
107 | log.Printf("%s failed: %s", cmd.Command, err.Error())
108 | return
109 | }
110 | log.Printf("[%d] %s", sp.Process.Pid, cmd.Command)
111 |
112 | if cmd.Timeout > 0 {
113 | timer = time.AfterFunc(time.Duration(cmd.Timeout)*time.Second, func() {
114 | if sp.ProcessState == nil {
115 | syscall.Kill(-sp.Process.Pid, syscall.SIGTERM)
116 | }
117 | })
118 | }
119 |
120 | err = sp.Wait()
121 | stdout.Close()
122 | stderr.Close()
123 |
124 | if err != nil {
125 | log.Printf("[%d] %s failed: %s", sp.Process.Pid, cmd.Command, err.Error())
126 | } else {
127 | log.Printf("[%d] done", sp.Process.Pid)
128 | }
129 |
130 | timer.Stop()
131 | }
132 |
133 | func reload(cfgFile string, oldConfig *Config, runOnce bool) (config *Config, err error) {
134 | // loopGroup is the number of (pending) writers on the command channel.
135 | // After disabling a configuration, we have to wait for it to fall to 0 before
136 | // closing the channel (otherwise, they will write to the closed channel).
137 | //
138 | // onceGroup is the number of unprocessed commands in the initial batch.
139 | var loopGroup, onceGroup sync.WaitGroup
140 |
141 | var closeChannel sync.Once
142 |
143 | config, err = readConfig(cfgFile)
144 | if err != nil {
145 | return nil, err
146 | }
147 |
148 | ch := make(chan *Command, len(config.Commands))
149 |
150 | for i := 0; i < config.Workers; i++ {
151 | go func() {
152 | for !config.disabled {
153 | var cmd *Command
154 | if cmd = <-ch; cmd == nil {
155 | continue
156 | }
157 |
158 | process(cmd)
159 |
160 | if runOnce {
161 | onceGroup.Done()
162 | } else {
163 | loopGroup.Add(1)
164 | time.AfterFunc(time.Duration(cmd.Delay)*time.Second, func() {
165 | if !config.disabled {
166 | ch <- cmd
167 | }
168 | loopGroup.Done()
169 | })
170 | }
171 | }
172 |
173 | loopGroup.Wait()
174 | closeChannel.Do(func() { close(ch) })
175 | }()
176 | }
177 |
178 | for _, cmd := range config.Commands {
179 | ch <- cmd
180 | if runOnce {
181 | onceGroup.Add(1)
182 | }
183 | }
184 |
185 | if runOnce {
186 | onceGroup.Wait()
187 | os.Exit(0)
188 | }
189 |
190 | if oldConfig != nil {
191 | oldConfig.disabled = true
192 | }
193 |
194 | return config, nil
195 | }
196 |
197 | func main() {
198 | var runOnce bool
199 | var cfgFile string
200 |
201 | flag.BoolVar(&runOnce, "once", false, "Process commands once, and then exit")
202 | flag.Parse()
203 |
204 | if cfgFile = flag.Arg(0); cfgFile == "" {
205 | cfgFile = os.ExpandEnv("$HOME/.config/ggsrc")
206 | }
207 |
208 | config, err := reload(cfgFile, nil, runOnce)
209 | if err != nil {
210 | fmt.Fprintf(os.Stderr, "Error while reading configuration: %s", err)
211 | os.Exit(1)
212 | }
213 |
214 | // wait for signals (interrupt, reload)
215 | sigChan := make(chan os.Signal, 2)
216 | signal.Notify(sigChan, syscall.SIGINT, syscall.SIGUSR1)
217 | for sig := range sigChan {
218 | switch sig {
219 | case syscall.SIGINT:
220 | return
221 | case syscall.SIGUSR1:
222 | config, err = reload(cfgFile, config, runOnce)
223 | if err != nil {
224 | fmt.Fprintf(os.Stderr, "Error while reloading configuration: %s", err)
225 | }
226 | }
227 | }
228 | }
229 |
--------------------------------------------------------------------------------
/ggsrc.example:
--------------------------------------------------------------------------------
1 | default_timeout=30
2 |
3 | rss() {
4 | command 2000 "rss2json \"$1\" | maildir-put -root $HOME/Maildir-feeds -folder \"$2\""
5 | }
6 |
7 | mangareader() {
8 | command 2000 "mangareader2json http://mangareader.net/$1 | "\
9 | "maildir-put -root $HOME/Maildir-feeds -folder Entertainment"
10 | }
11 |
12 | rss http://xkcd.com/atom.xml Fun
13 | rss http://feeds.feedburner.com/smbc-comics/PvLb Fun
14 | rss http://syndication.thedailywtf.com/TheDailyWtf Fun
15 |
16 | rss http://www.reddit.com/r/science/top/.rss Geek
17 | rss http://www.phoronix.com/rss.php Geek
18 |
19 | mangareader naruto
20 | mangareader bleach
21 | mangareader gantz
22 |
23 | rss https://github.com/sloonz.private.atom?token=HIDDEN ""
24 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/sloonz/ua
2 |
3 | go 1.16
4 |
5 | require (
6 | github.com/garyburd/redigo v1.6.2 // indirect
7 | github.com/onsi/ginkgo v1.16.1 // indirect
8 | github.com/onsi/gomega v1.11.0 // indirect
9 | github.com/sloonz/cfeedparser v0.0.0-20210430180901-ecf35ac90d83
10 | github.com/sloonz/go-maildir v0.0.0-20210417175458-ec35083290ab
11 | github.com/sloonz/go-mime-message v0.0.0-20210417175330-cb2e834a9b3b
12 | github.com/sloonz/go-qprintable v0.0.0-20210417175225-715103f9e6eb
13 | gopkg.in/bsm/ratelimit.v1 v1.0.0-20160220154919-db14e161995a // indirect
14 | gopkg.in/redis.v3 v3.6.4
15 | )
16 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
4 | github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
5 | github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
6 | github.com/garyburd/redigo v1.6.2 h1:yE/pwKCrbLpLpQICzYTeZ7JsTA/C53wFTJHaEtRqniM=
7 | github.com/garyburd/redigo v1.6.2/go.mod h1:NR3MbYisc3/PwhQ00EMzDiPmrwpPxAn5GI05/YaO1SY=
8 | github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
9 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
10 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
11 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
12 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
13 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
14 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
15 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
16 | github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
17 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
18 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
19 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
20 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
21 | github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
22 | github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
23 | github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
24 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
25 | github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
26 | github.com/onsi/ginkgo v1.16.1 h1:foqVmeWDD6yYpK+Yz3fHyNIxFYNxswxqNFjSKe+vI54=
27 | github.com/onsi/ginkgo v1.16.1/go.mod h1:CObGmKUOKaSC0RjmoAK7tKyn4Azo5P2IWuoMnvwxz1E=
28 | github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
29 | github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
30 | github.com/onsi/gomega v1.11.0 h1:+CqWgvj0OZycCaqclBD1pxKHAU+tOkHmQIWvDHq2aug=
31 | github.com/onsi/gomega v1.11.0/go.mod h1:azGKhqFUon9Vuj0YmTfLSmx0FUwqXYSTl5re8lQLTUg=
32 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
33 | github.com/sloonz/cfeedparser v0.0.0-20160203163450-a220b181f09f h1:pa1Gxag3FaJ68Mz2LaB9KYyzXG3pCnDIjP4A6M3XQqo=
34 | github.com/sloonz/cfeedparser v0.0.0-20160203163450-a220b181f09f/go.mod h1:pSLBLXl/QPOmNbplyTkt/TdU/0gWf/BidRMVcyfQIKk=
35 | github.com/sloonz/cfeedparser v0.0.0-20210430180901-ecf35ac90d83 h1:hzwGBD4Vb4D/wMzSK8k/8aRZySfykyMx3sYJSfIH0jw=
36 | github.com/sloonz/cfeedparser v0.0.0-20210430180901-ecf35ac90d83/go.mod h1:0G5ru/AAfpMT2UNCjXytFSpOyTxOZW4+3EFa8uSF2OU=
37 | github.com/sloonz/go-maildir v0.0.0-20210417175458-ec35083290ab h1:H8W5t9eJbVOltrNUQBPWGgpqFszJifXdcjJ0nhVREQw=
38 | github.com/sloonz/go-maildir v0.0.0-20210417175458-ec35083290ab/go.mod h1:DtE1Xilsk4k8SzX2J52IgP9+bTpxKC8ZdTsbqq9QJJw=
39 | github.com/sloonz/go-mime-message v0.0.0-20210417175330-cb2e834a9b3b h1:yzAB0kQ/6jGaAOMmyylzH2SZp7g7vomILqkxAs4ghqw=
40 | github.com/sloonz/go-mime-message v0.0.0-20210417175330-cb2e834a9b3b/go.mod h1:G3uvxOtJJl6pu9Bl9l0JkHIsn/9qBVSBd5iEQZVI7ic=
41 | github.com/sloonz/go-qprintable v0.0.0-20160203160305-775b3a4592d5/go.mod h1:rvsMTVl5yyd7liGH3cxu5eRjfNcC1WkSKe4HBSZ3ZA4=
42 | github.com/sloonz/go-qprintable v0.0.0-20210417175225-715103f9e6eb h1:T+USeSgAg9MysHPeOQ2W3KAuBQHVZzG0XMHyfHN88Yg=
43 | github.com/sloonz/go-qprintable v0.0.0-20210417175225-715103f9e6eb/go.mod h1:WKd1iQMtoZdaS9rlKDPprxWJoan2hkQA9BcGt+oxezs=
44 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
45 | github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
46 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
47 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
48 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
49 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
50 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
51 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
52 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
53 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
54 | golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
55 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
56 | golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
57 | golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
58 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
59 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
60 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
61 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
62 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
63 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
64 | golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
65 | golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
66 | golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
67 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
68 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
69 | golang.org/x/sys v0.0.0-20210112080510-489259a85091 h1:DMyOG0U+gKfu8JZzg2UQe9MeaC1X+xQWlAKcRnjxjCw=
70 | golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
71 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
72 | golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
73 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
74 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
75 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
76 | golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
77 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
78 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
79 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
80 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
81 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
82 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
83 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
84 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
85 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
86 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
87 | gopkg.in/bsm/ratelimit.v1 v1.0.0-20160220154919-db14e161995a h1:stTHdEoWg1pQ8riaP5ROrjS6zy6wewH/Q2iwnLCQUXY=
88 | gopkg.in/bsm/ratelimit.v1 v1.0.0-20160220154919-db14e161995a/go.mod h1:KF9sEfUPAXdG8Oev9e99iLGnl2uJMjc5B+4y3O7x610=
89 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
90 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
91 | gopkg.in/redis.v3 v3.6.4 h1:u7XgPH1rWwsdZnR+azldXC6x9qDU2luydOIeU/l52fE=
92 | gopkg.in/redis.v3 v3.6.4/go.mod h1:6XeGv/CrsUFDU9aVbUdNykN7k1zVmoeg83KC9RbQfiU=
93 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
94 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
95 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
96 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
97 | gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
98 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
99 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
100 |
--------------------------------------------------------------------------------
/maildir-put/README.md:
--------------------------------------------------------------------------------
1 | # maildir-put
2 |
3 | `maildir-put` is a tool to put messages in a predefined JSON format
4 | inside a maildir. It also try to detect duplicates and drop them.
5 |
6 | ## Usage
7 |
8 | message-producer | maildir-put [arguments]
9 |
10 | Available arguments:
11 |
12 | * `-cache`: path to a cache file used to store message IDs for duplicate
13 | detection
14 | * `-root`: path to the root maildir directory. Defaults to ~/Maildir.
15 | * `-folder`: maildir folder to put messages. Defaults to "", the inbox.
16 | The folder separator is "/".
17 | * `-redis`: specify this flag to use redis for message IDs cache. If both
18 | `-redis` and `-cache` are specified, the given cache will be migrated to
19 | redis
20 | * `-redis-db`, `-redis-addr`, `-redis-password`: redis connection settings.
21 |
22 | ## Installation
23 |
24 | go build && cp maildir-put /usr/local/bin
25 |
26 | ## Input format
27 |
28 | As its input, `maildir-put` takes a stream of JSON dictionaries (not a
29 | list of dictionaries). Each dictionary represents a message. Available
30 | keys are:
31 |
32 | * *body*: the body of the message, in HTML. Mandatory.
33 | * *title*: the subject of the message, in text. Mandatory.
34 | * *date*: the date of the message. Optional, defaults to current time. If
35 | provided, must be RFC 2822 compliant.
36 | * *author*: the name of the author, in text. Optional.
37 | * *authorEmail*: the mail addresse of the author. Optional.
38 | * *id*: an unique identifier for the message. It will be used for the
39 | creation of the Message-Id header, as well as in duplicates detection. It
40 | should include three parts: an unique identifier for the application
41 | (for example: `rss2json`), an unique identifier for the paramenters
42 | (for example: the feed URL) and an unique identifier for the message
43 | (for example: an article ID). The identifier for the parameters may be
44 | omitted if you provide a *host* key and that the host is sufficient to
45 | identify the parameters. Mandatory for threaded discussions handling and
46 | duplicates detection, optional else.
47 | * *host*: the domain name of the producer of the message (in general,
48 | the hostname of the server form where you fetched the information). Used
49 | in `Message-Id` and `References` headers construction, as well as in
50 | duplicates detection. Optional, but strongly encouraged for threaded
51 | discussions handling and duplicates detection.
52 | * *references*: for threaded discussions, *id* of the parent messages. Note
53 | that *host* must match in the two messages.
54 | * *url*: URL of the message. Used by `ua-inline` to resolve relative
55 | references.
56 |
57 | All strings must be encoded in UTF-8.
58 |
--------------------------------------------------------------------------------
/maildir-put/cache.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bufio"
5 | "encoding/binary"
6 | "gopkg.in/redis.v3"
7 | "io"
8 | "log"
9 | "os"
10 | "syscall"
11 | "time"
12 | "bytes"
13 | )
14 |
15 | type Cache struct {
16 | data map[string]bool
17 | newData map[string]bool
18 | ts []byte
19 | path string
20 | useRedis bool
21 | redisClient *redis.Client
22 | redisOptions redis.Options
23 | }
24 |
25 | func (c *Cache) OpenCache() (err error) {
26 | var key string
27 |
28 | tsBuf := bytes.NewBuffer(nil)
29 | binary.Write(tsBuf, binary.BigEndian, time.Now().Unix())
30 |
31 | c.data = make(map[string]bool)
32 | c.newData = make(map[string]bool)
33 | c.ts = tsBuf.Bytes()
34 |
35 | if c.useRedis {
36 | c.redisClient = redis.NewClient(&c.redisOptions)
37 | }
38 |
39 | cacheFile, err := os.Open(c.path)
40 | if err != nil && !os.IsNotExist(err) {
41 | return err
42 | } else if os.IsNotExist(err) {
43 | return nil
44 | }
45 |
46 | reader := bufio.NewReader(cacheFile)
47 | for err != io.EOF {
48 | if key, err = reader.ReadString('\n'); err != nil && err != io.EOF {
49 | return err
50 | }
51 | if key != "" && key != "" {
52 | key = key[:len(key)-1]
53 | c.data[key] = true
54 | }
55 | }
56 |
57 | return nil
58 | }
59 |
60 | func (c *Cache) Getset(id, host, msgId string) bool {
61 | if c.useRedis {
62 | res := c.redisClient.HExists("ua:"+host, id)
63 | if res.Err() != nil && res.Err() != redis.Nil {
64 | log.Fatalf("Error using redis cache: %s", res.Err())
65 | }
66 |
67 | present := res.Val()
68 |
69 | res = c.redisClient.HSet("ua:"+host, id, string(c.ts))
70 | if res.Err() != nil && res.Err() != redis.Nil {
71 | log.Fatalf("Error using redis cache: %s", res.Err())
72 | }
73 |
74 | return present
75 | } else {
76 | if _, has := c.data[msgId]; has {
77 | return true
78 | }
79 | if _, has := c.newData[msgId]; has {
80 | return true
81 | }
82 | c.newData[msgId] = true
83 | }
84 | return false
85 | }
86 |
87 | func (c *Cache) Dump() error {
88 | if c.useRedis {
89 | return nil
90 | }
91 |
92 | cacheFile, err := os.OpenFile(c.path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0660)
93 | if err != nil {
94 | return err
95 | }
96 | defer cacheFile.Close()
97 |
98 | if err = syscall.Flock(int(cacheFile.Fd()), syscall.LOCK_EX); err != nil {
99 | return err
100 | }
101 |
102 | writer := bufio.NewWriter(cacheFile)
103 | for key, _ := range c.newData {
104 | if _, err = writer.WriteString(key); err != nil {
105 | return err
106 | }
107 | if _, err = writer.WriteString("\n"); err != nil {
108 | return err
109 | }
110 | }
111 | if err = writer.Flush(); err != nil {
112 | return err
113 | }
114 |
115 | return nil
116 | }
117 |
--------------------------------------------------------------------------------
/maildir-put/maildir-put.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "encoding/json"
6 | "errors"
7 | "flag"
8 | "fmt"
9 | "github.com/sloonz/go-maildir"
10 | "github.com/sloonz/go-mime-message"
11 | "github.com/sloonz/go-qprintable"
12 | "io"
13 | "log"
14 | "os"
15 | "strings"
16 | "time"
17 | )
18 |
19 | var hostname string
20 | var cache Cache
21 |
22 | type Attachment struct {
23 | CID string `json:"cid"`
24 | MimeType string `json:"mimeType"`
25 | Data []byte `json:"data"`
26 | Filename string `json:"filename"`
27 | }
28 |
29 | type Message struct {
30 | Id string `json:"id"`
31 | Body string `json:"body"`
32 | Title string `json:"title"`
33 | Author string `json:"author"`
34 | AuthorEmail string `json:"authorEmail"`
35 | Date string `json:"date"`
36 | References []string `json:"references"`
37 | Host string `json:"host"`
38 | Attachments []Attachment `json:"attachments"`
39 | }
40 |
41 | func isAtomText(s string, allowDot bool) bool {
42 | if s == "" {
43 | return false
44 | }
45 |
46 | pointAllowed := false
47 | for i := 0; i < len(s); i++ {
48 | c := s[i]
49 |
50 | // "." is allowed, but not in first position
51 | // ".." is not allowed
52 | if c == '.' && pointAllowed && allowDot {
53 | pointAllowed = false
54 | continue
55 | } else {
56 | pointAllowed = true
57 | }
58 |
59 | if c >= 'a' && c <= 'z' {
60 | continue
61 | }
62 | if c >= 'A' && c <= 'Z' {
63 | continue
64 | }
65 | if c >= '0' && c <= '9' {
66 | continue
67 | }
68 | if c == '!' || c == '#' || c == '$' || c == '%' || c == '&' ||
69 | c == '\'' || c == '*' || c == '+' || c == '-' || c == '/' ||
70 | c == '=' || c == '?' || c == '^' || c == '_' || c == '`' ||
71 | c == '{' || c == '|' || c == '}' || c == '~' {
72 | continue
73 | }
74 |
75 | return false
76 | }
77 |
78 | return true
79 | }
80 |
81 | // allowDot=true is for no-fold-quote ; allowDot=fales is for quoted-string
82 | func encNoFoldQuote(s string, buf *bytes.Buffer, allowDot bool) {
83 | if isAtomText(s, allowDot) {
84 | buf.WriteString(s)
85 | } else {
86 | // Encode left part as no-fold-quote
87 | // ASCII 9 (\t), 32 (space), 34 (dquote), 92 (backslash) are escaped with a backslash
88 | // Non-ASCII and ASCII 0, 10 (\n), 13 (\r) are dropped
89 | // Other characters are transmitted as-is
90 | buf.WriteByte('"')
91 | for i := 0; i < len(s); i++ {
92 | if s[i] == 0 || s[i] == '\r' || s[i] == '\n' || s[i] > 127 {
93 | // Drop it
94 | } else if s[i] == '\t' || s[i] == ' ' || s[i] == '"' || s[i] == '\\' {
95 | buf.Write([]byte{'\\', s[i]})
96 | } else {
97 | buf.WriteByte(s[i])
98 | }
99 | }
100 | buf.WriteByte('"')
101 | }
102 | }
103 |
104 | func encNoFoldLiteral(s string, buf *bytes.Buffer) {
105 | if isAtomText(s, true) {
106 | buf.WriteString(s)
107 | } else {
108 | // Encode right part as no-fold-literal
109 | // ASCII 9 (\t), 32 (space), 91 ([), 92 (backslash) and 93 (]) are escaped with a backslash
110 | // Non-ASCII and ASCII 0, 10 (\n), 13 (\r) are dropped
111 | // Other characters are transmitted as-is
112 | buf.WriteByte('[')
113 | for i := 0; i < len(s); i++ {
114 | if s[i] == 0 || s[i] == '\r' || s[i] == '\n' || s[i] > 127 {
115 | // Drop it
116 | } else if s[i] == '\t' || s[i] == ' ' || s[i] == '[' || s[i] == '\\' || s[i] == ']' {
117 | buf.Write([]byte{'\\', s[i]})
118 | } else {
119 | buf.WriteByte(s[i])
120 | }
121 | }
122 | buf.WriteByte(']')
123 | }
124 | }
125 |
126 | func formatDate(date string) string {
127 | parsedDate, err := time.Parse(time.RFC3339, date)
128 | if err != nil {
129 | return date
130 | }
131 |
132 | return parsedDate.Format(time.RFC1123Z)
133 | }
134 |
135 | func MessageId(id, host string) string {
136 | // According to RFC 2822:
137 | // msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
138 | // id-left = dot-atom-text / no-fold-quote
139 | // id-right = dot-atom-text / no-fold-literal
140 | idBuf := bytes.NewBufferString("<")
141 | encNoFoldQuote(id, idBuf, true)
142 | idBuf.WriteByte('@')
143 | encNoFoldLiteral(host, idBuf)
144 | idBuf.WriteByte('>')
145 |
146 | return idBuf.String()
147 | }
148 |
149 | func (m *Message) Process(md *maildir.Maildir) error {
150 | var id string
151 | var mail *message.Message
152 |
153 | if m.Body == "" || m.Title == "" {
154 | return errors.New("Missing mandatory field")
155 | }
156 |
157 | if m.Host == "" {
158 | m.Host = hostname
159 | }
160 |
161 | if m.AuthorEmail == "" {
162 | m.AuthorEmail = "noreply@" + m.Host
163 | }
164 |
165 | if m.Date == "" {
166 | m.Date = time.Now().UTC().Format(time.RFC1123Z)
167 | }
168 |
169 | if m.Id != "" {
170 | id = MessageId(m.Id, m.Host)
171 | if cache.Getset(m.Id, m.Host, id) {
172 | return nil
173 | }
174 | }
175 |
176 | rootContentType := "text/html; charset=\"UTF-8\""
177 |
178 | bodyPart := message.NewTextMessage(qprintable.UnixTextEncoding, bytes.NewBufferString(m.Body))
179 | bodyPart.SetHeader("Content-Type", rootContentType)
180 |
181 | if m.Attachments == nil {
182 | mail = bodyPart
183 | } else {
184 | ctBuf := bytes.NewBufferString("")
185 | encNoFoldQuote(rootContentType, ctBuf, false)
186 | rootPart := message.NewMultipartMessageParams("related", "",
187 | map[string]string{"type": ctBuf.String()})
188 |
189 | rootPart.AddPart(bodyPart)
190 | for _, attachment := range m.Attachments {
191 | attPart := message.NewBinaryMessage(bytes.NewBuffer(attachment.Data))
192 | attPart.SetHeader("Content-ID", fmt.Sprintf("<%s>", attachment.CID))
193 | attPart.SetHeader("Content-Type", attachment.MimeType)
194 | if attachment.Filename == "" {
195 | attPart.SetHeader("Content-Disposition", "inline")
196 | } else {
197 | fnBuf := bytes.NewBufferString("")
198 | encNoFoldQuote(attachment.Filename, fnBuf, false)
199 | attPart.SetHeader("Content-Description", attachment.Filename)
200 | attPart.SetHeader("Content-Disposition", fmt.Sprintf("inline; filename=%s", fnBuf.String()))
201 | }
202 | rootPart.AddPart(attPart)
203 | }
204 |
205 | mail = &rootPart.Message
206 | }
207 |
208 | // In a maildir, mails are expected to end with LF line endings. Most softwares are
209 | // just fine with CRLF line endings, but some (for example Mutt) don’t.
210 | mail.EOL = "\n"
211 | mail.SetHeader("Date", formatDate(m.Date))
212 | mail.SetHeader("Subject", message.EncodeWord(m.Title))
213 | mail.SetHeader("From", message.EncodeWord(m.Author)+" <"+m.AuthorEmail+">")
214 | if id != "" {
215 | mail.SetHeader("Message-Id", id)
216 | }
217 | if len(m.References) > 0 {
218 | refs := ""
219 | for _, r := range m.References {
220 | refs += " " + MessageId(r, m.Host)
221 | }
222 | mail.SetHeader("References", refs)
223 | }
224 |
225 | md.CreateMail(mail)
226 |
227 | return nil
228 | }
229 |
230 | func main() {
231 | var rootDir, folder string
232 | var err error
233 |
234 | flag.StringVar(&rootDir, "root", os.ExpandEnv("$HOME/Maildir"), "path to maildir")
235 | flag.StringVar(&folder, "folder", "", "maildir folder name to put email (empty for inbox)")
236 | flag.StringVar(&cache.path, "cache", os.ExpandEnv("$HOME/.cache/maildir-put.cache"),
237 | "path to store message-ids to drop duplicate messages")
238 | flag.BoolVar(&cache.useRedis, "redis", false, "use redis for cache storage")
239 | flag.StringVar(&cache.redisOptions.Addr, "redis-addr", "127.0.0.1:6379", "redis address")
240 | flag.Int64Var(&cache.redisOptions.DB, "redis-db", 0, "redis base")
241 | flag.StringVar(&cache.redisOptions.Password, "redis-password", "", "redis password")
242 |
243 | if flag.Parse(); !flag.Parsed() {
244 | flag.PrintDefaults()
245 | os.Exit(1)
246 | }
247 |
248 | if err = cache.OpenCache(); err != nil {
249 | log.Fatalf("Can't open cache: %s", err.Error())
250 | }
251 |
252 | if hostname, err = os.Hostname(); err != nil {
253 | log.Fatalf("Can't get hostname: %s", err.Error())
254 | }
255 |
256 | md, err := maildir.New(rootDir, true)
257 | if err != nil {
258 | log.Fatalf("Can't open maildir: %s", err.Error())
259 | }
260 |
261 | for _, subfolder := range strings.Split(folder, "/") {
262 | if subfolder != "" {
263 | md, err = md.Child(subfolder, true)
264 | if err != nil {
265 | log.Fatalf("Can't open maildir: %s", err.Error())
266 | }
267 | }
268 | }
269 |
270 | dec := json.NewDecoder(os.Stdin)
271 | for {
272 | msg := new(Message)
273 | err = dec.Decode(msg)
274 | if err == nil {
275 | err = msg.Process(md)
276 | }
277 |
278 | if err == io.EOF {
279 | break
280 | } else if err != nil {
281 | log.Printf("Cannot read input message: %s", err.Error())
282 | }
283 | }
284 |
285 | if err = cache.Dump(); err != nil {
286 | log.Printf("warning: can't dump cache: %s", err.Error())
287 | }
288 | }
289 |
--------------------------------------------------------------------------------
/rss2json/README.md:
--------------------------------------------------------------------------------
1 | # rss2json
2 |
3 | `rss2json` is a simple tool intended to be used with `maildir-put` and `ggs`. It is used to convert any RSS or Atom feed into a stream of messages usable by `maildir-put`.
4 |
5 | ## Usage
6 |
7 | rss2json feed-url
8 |
9 | rss2json -url=feed-url < feed-from-stdin
10 |
11 | ## Dependencies
12 |
13 | * libxml
14 | * Optional: python and feedparser for parsing of ill-formed feeds
15 |
16 | ## Installation
17 |
18 | go build && cp rss2json /usr/local/bin
19 |
--------------------------------------------------------------------------------
/rss2json/rss2json.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "flag"
6 | "fmt"
7 | "io"
8 | "net/url"
9 | "os"
10 | "regexp"
11 | "strings"
12 | "time"
13 |
14 | "github.com/sloonz/cfeedparser"
15 | )
16 |
17 | func firstNonEmpty(s ...string) string {
18 | var val string
19 | for _, val = range s {
20 | if val != "" {
21 | break
22 | }
23 | }
24 | return val
25 | }
26 |
27 | func getDate(e *feedparser.Entry) string {
28 | emptyTime := time.Time{}
29 | if e.PublicationDateParsed != emptyTime {
30 | return e.PublicationDateParsed.Format(time.RFC3339)
31 | }
32 | if e.ModificationDateParsed != emptyTime {
33 | return e.ModificationDateParsed.Format(time.RFC3339)
34 | }
35 | if e.PublicationDate != "" {
36 | return e.PublicationDate
37 | }
38 | if e.ModificationDate != "" {
39 | return e.ModificationDate
40 | }
41 | return time.Now().UTC().Format(time.RFC3339)
42 | }
43 |
44 | var convertEOLReg = regexp.MustCompile("\r\n?")
45 |
46 | func convertEOL(s string) string {
47 | return convertEOLReg.ReplaceAllString(s, "\n")
48 | }
49 |
50 | func process(rawFeedUrl, rawBaseUrl string) error {
51 | feedUrl, err := url.Parse(rawFeedUrl)
52 | if err != nil {
53 | return err
54 | }
55 |
56 | baseUrl, err := url.Parse(rawBaseUrl)
57 | if err != nil {
58 | return err
59 | }
60 |
61 | var feed *feedparser.Feed
62 | if feedUrl.Scheme != "stdin" {
63 | feed, err = feedparser.ParseURL(feedUrl)
64 | } else {
65 | data, err := io.ReadAll(os.Stdin)
66 | if err != nil {
67 | return err
68 | }
69 | feed, err = feedparser.ParseString(string(data))
70 | }
71 |
72 | if err != nil {
73 | return err
74 | }
75 |
76 | for _, entry := range feed.Entries {
77 | body := convertEOL(firstNonEmpty(entry.Content, entry.Summary))
78 | body += "\nView post
\n"
79 |
80 | linkUrl, err := url.Parse(entry.Link)
81 | linkHost := ""
82 | if err == nil {
83 | linkHost = linkUrl.Host
84 | }
85 |
86 | jsonEntry := make(map[string]string)
87 | jsonEntry["id"] = firstNonEmpty(entry.Id, entry.Link, entry.PublicationDate+":"+entry.Title) + ":" + rawBaseUrl
88 | jsonEntry["title"] = strings.TrimSpace(entry.Title)
89 | jsonEntry["body"] = body
90 | jsonEntry["author"] = strings.TrimSpace(firstNonEmpty(entry.Author.Name, entry.Author.Uri, entry.Author.Text))
91 | jsonEntry["authorAddress"] = strings.TrimSpace(entry.Author.Email)
92 | jsonEntry["date"] = getDate(&entry)
93 | jsonEntry["host"] = firstNonEmpty(baseUrl.Host, linkHost)
94 | if entry.Link == "" {
95 | jsonEntry["url"] = baseUrl.String()
96 | } else {
97 | jsonEntry["url"] = entry.Link
98 | }
99 |
100 | encodedEntry, err := json.Marshal(jsonEntry)
101 | if err != nil {
102 | return err
103 | }
104 |
105 | fmt.Printf("%s\n", string(encodedEntry))
106 | }
107 |
108 | return nil
109 | }
110 |
111 | func main() {
112 | baseUrlFlag := flag.String("url", "", "override feed url, useful for feeds given on stdin")
113 | flag.Parse()
114 |
115 | feedUrl := "stdin:"
116 | if flag.NArg() > 0 {
117 | feedUrl = flag.Args()[0]
118 | }
119 |
120 | baseUrl := feedUrl
121 | if *baseUrlFlag != "" {
122 | baseUrl = *baseUrlFlag
123 | }
124 |
125 | err := process(feedUrl, baseUrl)
126 | if err != nil {
127 | fmt.Fprintf(os.Stderr, "Can't process feed: %s\n", err.Error())
128 | os.Exit(1)
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/scrapers/README.md:
--------------------------------------------------------------------------------
1 | This contains example scrapers to show how you can write your own.
2 |
3 | # ua-scraper-mal
4 |
5 | List season animes from [myanimelist](https://myanimelist.net/anime/season).
6 |
--------------------------------------------------------------------------------
/scrapers/ua-scraper-mal:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import json
4 | import re
5 |
6 | import lxml.etree
7 | import requests
8 |
9 | html = lxml.etree.HTML(requests.get("http://myanimelist.net/anime/season").text)
10 | for item in html.cssselect(".seasonal-anime"):
11 | title = item.cssselect('.link-title')[0]
12 | genres = item.cssselect('.genres')[0]
13 | desc = item.cssselect('.synopsis')[0]
14 |
15 | link = title.get("href")
16 | img = item.cssselect(".image img")[0]
17 | img_tag = '
' % (img.get("src") or img.get("data-src"))
18 |
19 | print((json.dumps({
20 | 'url': link,
21 | 'id': link,
22 | 'title': title.text,
23 | 'body': '%s
%s
%s
%s
' % (lxml.etree.tostring(title).decode(), img_tag, lxml.etree.tostring(desc).decode(), lxml.etree.tostring(genres).decode()),
24 | 'host': 'myanimelist.net'
25 | })))
26 |
--------------------------------------------------------------------------------
/ua-inline/README.md:
--------------------------------------------------------------------------------
1 | # ua-inline -- Inline HTML resources
2 |
3 | This is a simple filter intended to be used before `maildir-put`. It
4 | replaces external images inside the body of the message by their content
5 | (using `data:` scheme).
6 |
7 | If the body contains relative references, it tries to resolve them using
8 | the `url` key of the message. If that’s not possible, no inlining
9 | is done.
10 |
11 | ## Example usage, in `ggsrc`
12 |
13 | command 2000 "rss2json feed-url | ua-inline | maildir-put"
14 |
15 | ## Installation
16 |
17 | go build && cp ua-inline /usr/local/bin
18 |
--------------------------------------------------------------------------------
/ua-inline/ua-inline.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | // TODO:
4 | // Parallelize
5 | // Manage cache entries lifetime
6 |
7 | import (
8 | "crypto/sha256"
9 | "encoding/base64"
10 | "encoding/json"
11 | "fmt"
12 | "html"
13 | "io"
14 | "io/ioutil"
15 | "log"
16 | "net/http"
17 | "net/url"
18 | "os"
19 | "os/user"
20 | "regexp"
21 | "strings"
22 | )
23 |
24 | type Message map[string]interface{}
25 |
26 | var CacheDir string
27 |
28 | func hash(name string) string {
29 | h := sha256.New()
30 | h.Write([]byte(name))
31 | return fmt.Sprintf("%x", h.Sum(nil))
32 | }
33 |
34 | func fetch(resUrlString string, baseUrl *url.URL) (data []byte, contentType string) {
35 | var err error
36 |
37 | // Resolve relative url
38 | resUrl, _ := url.Parse(resUrlString)
39 | if resUrl == nil || (baseUrl == nil && !resUrl.IsAbs()) {
40 | return nil, ""
41 | }
42 |
43 | if !resUrl.IsAbs() {
44 | resUrl = baseUrl.ResolveReference(resUrl)
45 | }
46 |
47 | // Test cache
48 | h := hash(resUrl.String())
49 | dataCacheFile := fmt.Sprintf("%s/data-%x@%s", CacheDir, h, resUrl.Host)
50 | typeCacheFile := fmt.Sprintf("%s/type-%x@%s", CacheDir, h, resUrl.Host)
51 | data, err = ioutil.ReadFile(dataCacheFile)
52 | if err == nil {
53 | var bContentType []byte
54 | bContentType, err = ioutil.ReadFile(typeCacheFile)
55 | contentType = string(bContentType)
56 | }
57 | if err == nil {
58 | return
59 | } else if !os.IsNotExist(err) {
60 | log.Printf("Can't read cache file %s or %s: %s", dataCacheFile, typeCacheFile, err.Error())
61 | }
62 |
63 | // Cache miss
64 | resp, err := http.Get(resUrl.String())
65 | if err != nil || resp.StatusCode < 200 || resp.StatusCode >= 300 {
66 | if err != nil {
67 | log.Printf("Error downloading %s: %s", resUrl.String(), err.Error())
68 | } else {
69 | log.Printf("Error downloading %s: %s", resUrl.String(), resp.Status)
70 | }
71 | return nil, ""
72 | }
73 |
74 | data, err = ioutil.ReadAll(resp.Body)
75 | resp.Body.Close()
76 | if err != nil {
77 | log.Printf("Error downloading %s: %s", resUrl.String(), err.Error())
78 | return nil, ""
79 | }
80 |
81 | // Get type
82 | if _, ok := resp.Header["Content-Type"]; ok {
83 | contentType = resp.Header["Content-Type"][0]
84 | } else {
85 | contentType = http.DetectContentType(data)
86 | }
87 |
88 | // Write to cache
89 | if err = ioutil.WriteFile(dataCacheFile, data, os.FileMode(0644)); err != nil {
90 | log.Printf("Can't write cache file %s: %s", dataCacheFile, err.Error())
91 | }
92 | if err = ioutil.WriteFile(typeCacheFile, []byte(contentType), os.FileMode(0644)); err != nil {
93 | log.Printf("Can't write cache file %s: %s", typeCacheFile, err.Error())
94 | }
95 |
96 | return
97 | }
98 |
99 | func ProcessMessage(msg Message, ch chan Message) {
100 | if _, ok := msg["body"]; !ok {
101 | ch <- msg
102 | return
103 | }
104 |
105 | body, ok := msg["body"].(string)
106 | if !ok {
107 | ch <- msg
108 | return
109 | }
110 |
111 | var msgUrl *url.URL
112 | if _, ok = msg["url"]; ok {
113 | if _, ok = msg["url"].(string); ok {
114 | msgUrl, _ = url.Parse(msg["url"].(string))
115 | }
116 | }
117 |
118 | var attachments []map[string]string
119 | attrRe := "\\s*[\"']?\\s*([^\\s\"'>]+)\\s*[\"']?"
120 |
121 | // Inline
as attachment
122 | body = regexp.MustCompile("
]+>").ReplaceAllStringFunc(body, func(img string) string {
123 | src := regexp.MustCompile("src="+attrRe).FindStringSubmatch(img)
124 | if len(src) > 1 && !strings.HasPrefix(src[1], "data:") {
125 | cid := hash(src[1])
126 | filename := regexp.MustCompile("/([^/?]+)(\\?|$)").FindStringSubmatch(src[1])
127 | data, mimeType := fetch(html.UnescapeString(src[1]), msgUrl)
128 | if data != nil {
129 | attachment := map[string]string {
130 | "cid": cid,
131 | "mimeType": mimeType,
132 | "data": base64.StdEncoding.EncodeToString(data)}
133 | if filename != nil {
134 | attachment["filename"] = filename[1]
135 | }
136 | attachments = append(attachments, attachment)
137 | return strings.Replace(img, src[0], fmt.Sprintf("src=\"cid:%s\"", cid), 1)
138 | }
139 | }
140 | return img
141 | })
142 |
143 | // Inline