├── .gitignore
├── README.md
├── add-cron-job.sh
├── check-cron-job.sh
├── eprint-iacr-notifier.py
├── install-deps.sh
├── os.sh
└── restart-cron-job.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | README.html
2 | ei-notif.conf
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | eprint-iacr-notifier
2 | ====================
3 |
4 | Daily email reminders with the newest [Cryptology ePrint Archive](https://eprint.iacr.org/eprint-bin/search.pl?last=365&title=1) papers.
5 |
6 | ## TODO
7 |
8 | - OS X seems to delete /var/log/eprint after every reboot and so the script launch fails after reboot
9 | - https://eprint.iacr.org/2019/1172 has a ``<`` sign in the abstract that throws off our parser
10 | - For now, the script uses a throwaway Gmail account to send emails, because it was the easiest way to get this running. This should be made more flexible.
11 | - Better logging library
12 |
13 | ## Installation (as a cron job)
14 |
15 | This works both in Ubuntu Linux (via `/etc/cron.d/`) and on OS X (via `launchctl`).
16 |
17 | First, install dependencies:
18 |
19 | ./install-deps.sh
20 |
21 | Then, [create a dummy Gmail account](https://gmail.com) that you do not care about.
22 | Importantly, activate ["less secure apps" mode](https://myaccount.google.com/lesssecureapps), so it can be used by the script to send emails.
23 | **WARNING:** The password of this account will show up in your syslog, so don't use your actual Gmail account.
24 |
25 | Second, run the following:
26 |
27 | ./add-cron-job.sh
28 |
29 | The script will ask you which paper you've read last, what email address you want to be notified at and the credentials of the dummy Gmail account used to send the notifications.
30 |
31 | ### Checking status and restarting the service
32 |
33 | To see if the service is running:
34 |
35 | ./check-cron-job.sh
36 |
37 | To restart the service:
38 |
39 | ./restart-cron-job.sh
40 |
41 | ## Pull requests
42 |
43 | ...that fix bugs are very welcome.
44 |
45 | ## Other IACR ePrint tools
46 |
47 | Check out my lab mate's Zack Newman [`iacr-dl` Python tool](https://github.com/znewman01/iacr-dl), which provides a nice Python API for downloading ePrint papers.
48 |
--------------------------------------------------------------------------------
/add-cron-job.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -e
4 | #set -x
5 |
6 | scriptdir=$(cd $(dirname $0); pwd -P)
7 |
8 | . $scriptdir/os.sh
9 |
10 | name=eprint-iacr-notifier.py
11 | bin=/usr/local/bin/$name
12 | conf=/etc/eprint-iacr-notifier.conf
13 | logdir=/var/log/eprint
14 | logfile=$logdir/out+err.log
15 |
16 | if [ "$OS" = "OSX" ]; then
17 | user_group=staff
18 | root_group=wheel
19 | cronfile=$HOME/Library/LaunchAgents/org.iacr.eprint.notifier.plist
20 | else
21 | user_group=`whoami`
22 | root_group=root
23 | crondir=/etc/cron.d
24 | cronfile=$crondir/eprint-iacr-notifier
25 | fi
26 |
27 | # symlink script in /usr/local/bin
28 | sudo ln -sf $scriptdir/$name $bin
29 | sudo chmod +x $bin
30 |
31 | if [ ! -f "$conf" ]; then
32 | # write year and latest paper ID to conf file
33 | conf_tmp=`mktemp`
34 | year=`date +%Y`
35 | read -p "What is the last paper ID you have read? " ID
36 | echo $year >$conf_tmp
37 | echo $ID >>$conf_tmp
38 | sudo mv $conf_tmp $conf
39 | else
40 | echo "WARNING: Already detected config file at: $conf. Leaving intact."
41 | fi
42 |
43 | # create log file and dir
44 | if [ ! -d $logdir ]; then
45 | sudo mkdir $logdir
46 | fi
47 |
48 | if [ ! -f "$logfile" ]; then
49 | sudo touch "$logfile"
50 | else
51 | echo "WARNING: Already detected log file at: $logfile. Leaving intact."
52 | fi
53 |
54 | sudo chown `whoami`:$user_group $logdir
55 | sudo chown `whoami`:$user_group "$logfile"
56 |
57 | if [ ! -f "$cronfile" ]; then
58 | # get details about the Gmail account used to send emails
59 | read -p "What is the email address that should receive new paper notifications? " NOTIF_EMAIL
60 | read -p "Gmail account username that should send out the emails: " GMAIL_USER
61 | read -p "Gmail account password: " GMAIL_PASSWD
62 | GMAIL_USER=${GMAIL_USER%"@gmail.com"} # in case user gives extra @gmail.com
63 | GMAIL_ADDR="$GMAIL_USER@gmail.com"
64 | fi
65 |
66 | if [ "$OS" = "OSX" ]; then
67 | if [ ! -f "$cronfile" ]; then
68 | # copy .plist file to ~/Library/LaunchAgents
69 | cat >$cronfile << EOF
70 |
71 |
72 |
73 |
74 | Label
75 | org.iacr.eprint.notifier
76 | ProgramArguments
77 |
78 | /usr/local/bin/python3
79 | /usr/local/bin/eprint-iacr-notifier.py
80 | $NOTIF_EMAIL
81 | $GMAIL_ADDR
82 | $GMAIL_PASSWD
83 | /etc/eprint-iacr-notifier.conf
84 |
86 |
87 | KeepAlive
88 |
89 | SuccessfulExit
90 |
91 |
92 | StandardErrorPath
93 | $logfile
94 | StandardOutPath
95 | $logfile
96 | StartInterval
97 | 3600
98 |
99 |
100 | EOF
101 | else
102 | echo "WARNING: Already detected plist file at: $cronfile. Leaving intact."
103 | fi
104 |
105 | launchctl unload $cronfile || :
106 | launchctl load -w $cronfile
107 |
108 | echo "launchd job status: "
109 | launchctl list | grep org.iacr.eprint.notifier
110 |
111 | elif [ "$OS" = "Linux" ]; then
112 | if [ ! -f "$cronfile" ]; then
113 | # configure cron job in /etc/cron.d/
114 | # For help, see: https://crontab.guru/#0_5_*_*_1-7
115 | time="5" # the hour of the day to run the script: from 0 to 23 (military format)
116 | days="1-7" # the days of the week to run the script: any range from 1 to 7
117 | cron_tmp=`mktemp`
118 | echo "0 $time * * $days `whoami` $bin $NOTIF_EMAIL $GMAIL_ADDR $GMAIL_PASSWD $conf >>$logfile 2>&1" >$cron_tmp
119 | # if you want to test the cron job is installing correctly, use this 'every 1-minute' template instead
120 | #echo "* * * * * `whoami` $bin $NOTIF_EMAIL $GMAIL_ADDR $GMAIL_PASSWD $conf >>$logfile 2>&1" >$cron_tmp
121 | sudo mkdir -p $crondir
122 | sudo mv $cron_tmp $cronfile
123 | sudo chmod 0644 $cronfile
124 | sudo chown root:$root_group $cronfile
125 | else
126 | echo "WARNING: Already detected cron file at: $cronfile. Leaving intact."
127 | fi
128 | fi
129 |
--------------------------------------------------------------------------------
/check-cron-job.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | #set -x
5 |
6 | scriptdir=$(cd $(dirname $0); pwd -P)
7 |
8 | . $scriptdir/os.sh
9 |
10 | logdir=/var/log/eprint
11 | logfile=$logdir/out+err.log
12 |
13 | if [ "$OS" = "OSX" ]; then
14 | name=org.iacr.eprint.notifier
15 | cronfile=$HOME/Library/LaunchAgents/$name.plist
16 |
17 | echo "Cronfile at '$cronfile':"
18 | echo "========================"
19 | if [ -f "$cronfile" ]; then
20 | cat $cronfile
21 | else
22 | echo "ERROR: No launchd .plist file at '$cronfile'"
23 | fi
24 | echo
25 |
26 | if [ -d "$logdir" ]; then
27 | echo "Last run in logfile at '$logfile':"
28 | echo "=================================="
29 | if [ -f $logfile ]; then
30 | last_run_line=`grep -n ^Time: /var/log/eprint/out+err.log | tail -n 1 | cut -f 1 -d:`
31 |
32 | if [ -n "$last_run_line" ]; then
33 | tail -n +$last_run_line $logfile
34 | fi
35 | else
36 | echo "ERROR: No $logfile in $logdir. List of files in $logdir is:"
37 | ls -l $logdir
38 | fi
39 | echo
40 | else
41 | echo "ERROR: No logdir in $logdir"
42 | fi
43 |
44 | echo "Status in 'launchctl list':"
45 | echo "==========================="
46 | list=`launchctl list`
47 | echo "$list" | head -n 1
48 | echo "$list" | grep $name
49 | echo
50 | echo "(If status is 0, then all went well the last time the script was run!)"
51 | fi
52 |
--------------------------------------------------------------------------------
/eprint-iacr-notifier.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from bs4 import BeautifulSoup
4 | from email.mime.multipart import MIMEMultipart
5 | from email.mime.text import MIMEText
6 | from pprint import pprint
7 |
8 | import bs4
9 | import datetime
10 | import smtplib
11 | import sys
12 | import urllib.request, urllib.error, urllib.parse
13 |
14 | def get_url(url):
15 | print("Downloading page at", url, "...")
16 | response = urllib.request.urlopen(url)
17 | html = response.read()
18 | print(" Done.")
19 | return html
20 |
21 | def dedup_spaces(string):
22 | return " ".join(string.split()).strip()
23 |
24 | # paper IDs in URLs need to be padded with zeros
25 | # e.g., 1 should be 001
26 | def format_paper_id(paper_id):
27 | if paper_id >= 100:
28 | return str(paper_id)
29 | else:
30 | return "{:03d}".format(paper_id)
31 |
32 | def process_paper(base_url, paper_id, parser):
33 | url = base_url + format_paper_id(paper_id)
34 | paper_html = get_url(url)
35 |
36 | soup = BeautifulSoup(paper_html, parser)
37 |
38 | bs = soup.find_all('b')
39 | title = bs[0].text
40 | authors = soup.find('i')
41 |
42 | title = dedup_spaces(title)
43 | authors = dedup_spaces(authors.text)
44 |
45 | # For some papers, after Abstract: there could be tags like
in the abstract itself (see https://eprint.iacr.org/2019/868 for example).
46 | # This means bs[1].next_sibling won't get the full abstract, so we have to keep iterating.
47 | first_paragraph = bs[1].next_sibling
48 | # first_paragraph.parent is the tag
49 | # bs[1] is a Tag object and bs[1].next_sibling is a NavigableString
50 |
51 | assert(type(first_paragraph) is bs4.element.NavigableString)
52 | abstract = dedup_spaces(first_paragraph)
53 |
54 | curr_paragraph = first_paragraph.next_sibling
55 | while True: # the next tag is for "Category / Keywords"
56 | assert(type(first_paragraph) is bs4.element.NavigableString or type(first_paragraph) is bs4.element.Tag)
57 |
58 | if type(curr_paragraph) is bs4.element.Tag:
59 | par = dedup_spaces(curr_paragraph.get_text())
60 | elif type(curr_paragraph) is bs4.element.NavigableString:
61 | par = dedup_spaces(curr_paragraph)
62 | else:
63 | print("ERROR: I need to better understand BeautifoulSoup")
64 | sys.exit(1)
65 |
66 | if par == "Category / Keywords:":
67 | break
68 |
69 | if len(par) > 0:
70 | assert(type(par) is str)
71 | abstract += "\n\n" + par
72 |
73 | curr_paragraph = curr_paragraph.next_sibling
74 |
75 | pdflink = url + ".pdf"
76 |
77 | #print " * Title:", title
78 | #print " * Authors:", authors
79 | #print " * Abstract:", abstract
80 | #print " * PDF: ", pdflink
81 | #print
82 |
83 | return title, authors, abstract, pdflink
84 |
85 | def test_gmail(username, passwd):
86 | print("Testing " + username + "@gmail.com with password '" + passwd + "'")
87 | server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
88 | server.login(username, passwd);
89 | server.quit()
90 |
91 | # delete's the script's name
92 | script_name = sys.argv[0]
93 | del sys.argv[0]
94 |
95 | if len(sys.argv) < 4:
96 | print("Usage:", script_name, " [] []")
97 | sys.exit(1)
98 |
99 | notified_email = sys.argv[0]
100 | del sys.argv[0]
101 | sender_gmail_addr = sys.argv[0]
102 | sender_gmail_username = sender_gmail_addr.split("@")[0]
103 | del sys.argv[0]
104 | sender_gmail_passw = sys.argv[0]
105 | del sys.argv[0]
106 | conf_file = sys.argv[0]
107 | del sys.argv[0]
108 |
109 | now = datetime.datetime.now()
110 | print("Time:", now.strftime("%Y-%m-%d %H:%M"))
111 | print("Notified email(s):", notified_email)
112 |
113 | # please set to true when you are debugging and you don't want to send emails
114 | simulate_email=False
115 | # please set to true when you are debugging and you don't want to update the conf file with the latest paper ID
116 | simulate_conf_update=False
117 |
118 | if len(sys.argv) > 0:
119 | simulate_email=(sys.argv[0].lower() == "true" or sys.argv[0] == "1")
120 | del sys.argv[0]
121 | print("Simulate email sending?", simulate_email)
122 |
123 | if len(sys.argv) > 0:
124 | simulate_conf_update=(sys.argv[0].lower() == "true" or sys.argv[0] == "1")
125 | del sys.argv[0]
126 | print("Do NOT update conf?", simulate_conf_update)
127 |
128 | print()
129 |
130 | if simulate_email == False:
131 | test_gmail(sender_gmail_username, sender_gmail_passw)
132 |
133 | parser = "lxml"
134 |
135 | # open file for both reading and writing
136 | f = open(conf_file, "r+")
137 | # read year from file
138 | year = int(f.readline())
139 | # read last paper's ID from file
140 | last_paper_id = f.readline()
141 | url = "http://eprint.iacr.org/" + str(year) + "/"
142 |
143 | if last_paper_id == '':
144 | print("ERROR: '" + conf_file + "' file is empty. Please type the paper ID that you want to start from.")
145 | sys.exit(1)
146 |
147 | last_paper_id = int(last_paper_id)
148 | print("Last paper ID:", last_paper_id)
149 |
150 | # download eprint index
151 | index_html = get_url(url)
152 |
153 | soup = BeautifulSoup(index_html, parser)
154 |
155 | # parse papers
156 | #print "New paper IDs:"
157 | skipped = [] # keep track of skipped links, for debugging purposes
158 | new_last_paper_id = last_paper_id
159 | email_html="Hey there,
\nNew papers have been published on the Cryptology ePrint Archive!
\n"
160 | email_text="Hey there,\n\nNew papers have been published on the Cryptology ePrint Archive!\n\n"
161 | firstPaper = True
162 | for link in reversed(soup.find_all('a')):
163 | link = link.get('href')
164 | # links will be of the form // and //.pdf
165 | split = link.split(".")
166 |
167 | # if it's a PDF link, skip
168 | if len(split) == 2:
169 | skipped.append(link)
170 | continue
171 | assert len(split) == 1
172 |
173 | # now split into [ empty string, , ]
174 | split = split[0]
175 | paper_id = split.split("/")
176 | if len(paper_id) != 3:
177 | #print "Skipped non-paper link: " + link
178 | skipped.append(link)
179 | continue;
180 |
181 | paper_id = int(paper_id[2])
182 | if paper_id > last_paper_id:
183 | #print
184 | #print paper_id
185 | title, authors, abstract, pdflink = process_paper(url, paper_id, parser)
186 | new_last_paper_id = max(paper_id, new_last_paper_id)
187 |
188 | email_html += "\n"
189 | if firstPaper == False:
190 | email_html += "
\n"
191 | email_html += "Title: " + title + " (" + format_paper_id(paper_id) + " PDF)
\n"
192 | email_html += "Authors: " + authors + "
\n"
193 | email_html += "Abstract: " + abstract.replace("\n\n", "
\n\n") + "
\n"
194 |
195 | email_text += "\n"
196 | email_text += "Title: " + title + "\n"
197 | email_text += "Authors: " + authors + "\n"
198 | email_text += "Link: " + pdflink + "\n"
199 | email_text += "Abstract: " + abstract + "\n\n"
200 | email_text += "-----------------"
201 | email_text += "\n\n"
202 |
203 | firstPaper = False
204 |
205 |
206 | # if there were new papers, email them to the right person
207 | if new_last_paper_id > last_paper_id:
208 | email_text += "Cheers,\nThe Crypto eprint whisperer\nhttps://github.com/alinush/eprint-iacr-notifier\n\nMay Alice and Bob forever talk securely."
209 |
210 | email_html += "\n"
211 | email_html += "Cheers,
\nThe Crypto eprint whisperer
\n"
212 | email_html += "GitHub: https://github.com/alinush/eprint-iacr-notifier
\n"
213 | email_html += "May the hardness of discrete log forever be with you."
214 |
215 | email_html = email_html.strip()
216 | email_text = email_text.strip()
217 |
218 | print()
219 | print("Emailing " + notified_email + ":")
220 | print()
221 | # Must encode to UTF8 before printing. Otherwise some Unicode strings will print fine to stdout, but not when redirected to a file.
222 | print(email_html.encode('utf-8'))
223 | print()
224 |
225 | # craft MIME email
226 | mime_email = MIMEMultipart('alternative')
227 | subj = 'New Crypto ePrints: ' + str(year) + '/' + format_paper_id(last_paper_id + 1)
228 | # WARNING: Updating mime_email['Subject'] with += or setting it twice with = results in two different subjects for the same email
229 | # when we notify two email addresses. This is why we only set it once with an if statement.
230 | if new_last_paper_id > last_paper_id + 1:
231 | mime_email['Subject'] = subj + ' to ' + str(year) + '/' + format_paper_id(new_last_paper_id)
232 | else:
233 | mime_email['Subject'] = subj
234 | mime_email['From'] = sender_gmail_addr;
235 | mime_email['To'] = notified_email;
236 | mime_email.attach(MIMEText(email_text, 'plain', 'utf-8'))
237 | mime_email.attach(MIMEText(email_html, 'html', 'utf-8'))
238 |
239 | # connect to Gmail and send
240 | if simulate_email == False:
241 | server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
242 | server.login(sender_gmail_username, sender_gmail_passw);
243 | server.sendmail(
244 | sender_gmail_addr,
245 | notified_email.split(","),
246 | mime_email.as_string())
247 | server.quit()
248 |
249 | print("Sent email with title '" + mime_email['Subject'] + "' successfully to:", notified_email)
250 | else:
251 | print("Simulating, so no email was sent.")
252 |
253 | if simulate_conf_update == False:
254 | # write the new last paper ID in the conf file (and rewrite the same year)
255 | f.truncate(0)
256 | f.seek(0)
257 | f.write(str(year) + '\n')
258 | f.write(str(new_last_paper_id) + '\n')
259 | f.close()
260 | print("Wrote last ID to conf file:", new_last_paper_id)
261 | else:
262 | print("Simulating so conf file was not updated")
263 |
--------------------------------------------------------------------------------
/install-deps.sh:
--------------------------------------------------------------------------------
1 | set -e
2 |
3 | scriptdir=$(cd $(dirname $0); pwd -P)
4 |
5 | . $scriptdir/os.sh
6 |
7 | if [ "$OS" = "Linux" ]; then
8 | sudo apt-get install python-bs4
9 | elif [ "$OS" = "OSX" ]; then
10 | pip install lxml
11 | pip install beautifulsoup4
12 | fi
13 |
--------------------------------------------------------------------------------
/os.sh:
--------------------------------------------------------------------------------
1 | OS_FLAVOR="Unknown"
2 | NUM_CPUS=
3 |
4 | if [ "$(uname -s)" = "Darwin" ]; then
5 | OS="OSX"
6 | NUM_CPUS=`sysctl -n hw.ncpu`
7 | elif [ "$(uname -s)" = "Linux" ]; then
8 | OS="Linux"
9 | NUM_CPUS=`grep -c ^processor /proc/cpuinfo`
10 | if [ -f /etc/issue ]; then
11 | if grep Fedora /etc/issue >/dev/null; then
12 | OS_FLAVOR="Fedora"
13 | elif grep Ubuntu /etc/issue >/dev/null; then
14 | OS_FLAVOR="Ubuntu"
15 | fi
16 | fi
17 | fi
18 |
19 | #echo "OS: $OS"
20 | #echo "OS Flavor: $OS_FLAVOR"
21 |
--------------------------------------------------------------------------------
/restart-cron-job.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -e
4 |
5 | scriptdir=$(cd $(dirname $0); pwd -P)
6 |
7 | . $scriptdir/os.sh
8 |
9 | # Create log dir if it doesn't exist
10 | logdir=/var/log/eprint
11 | logfile=$logdir/out+err.log
12 |
13 | sudo mkdir -p $logdir
14 | sudo chown `whoami`:$user_group $logdir
15 | sudo touch "$logfile"
16 | sudo chown `whoami`:$user_group "$logfile"
17 |
18 | if [ "$OS" = "OSX" ]; then
19 | cronfile=$HOME/Library/LaunchAgents/org.iacr.eprint.notifier.plist
20 |
21 | if [ -f $cronfile ]; then
22 | echo "Unloading $cronfile..."
23 | launchctl unload $cronfile
24 |
25 | echo "Realoading..."
26 | launchctl load -w $cronfile
27 | else
28 | echo "$cronfile does not exist."
29 | echo "Did you add it using $scriptdir/add-cron-job.sh?"
30 | exit 1
31 | fi
32 | else
33 | echo "ERROR: Not implemented yet."
34 | exit 1
35 | fi
36 |
--------------------------------------------------------------------------------