├── .gitignore ├── README.md ├── add-cron-job.sh ├── check-cron-job.sh ├── eprint-iacr-notifier.py ├── install-deps.sh ├── os.sh └── restart-cron-job.sh /.gitignore: -------------------------------------------------------------------------------- 1 | README.html 2 | ei-notif.conf 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | eprint-iacr-notifier 2 | ==================== 3 | 4 | Daily email reminders with the newest [Cryptology ePrint Archive](https://eprint.iacr.org/eprint-bin/search.pl?last=365&title=1) papers. 5 | 6 | ## TODO 7 | 8 | - OS X seems to delete /var/log/eprint after every reboot and so the script launch fails after reboot 9 | - https://eprint.iacr.org/2019/1172 has a ``<`` sign in the abstract that throws off our parser 10 | - For now, the script uses a throwaway Gmail account to send emails, because it was the easiest way to get this running. This should be made more flexible. 11 | - Better logging library 12 | 13 | ## Installation (as a cron job) 14 | 15 | This works both in Ubuntu Linux (via `/etc/cron.d/`) and on OS X (via `launchctl`). 16 | 17 | First, install dependencies: 18 | 19 | ./install-deps.sh 20 | 21 | Then, [create a dummy Gmail account](https://gmail.com) that you do not care about. 22 | Importantly, activate ["less secure apps" mode](https://myaccount.google.com/lesssecureapps), so it can be used by the script to send emails. 23 | **WARNING:** The password of this account will show up in your syslog, so don't use your actual Gmail account. 24 | 25 | Second, run the following: 26 | 27 | ./add-cron-job.sh 28 | 29 | The script will ask you which paper you've read last, what email address you want to be notified at and the credentials of the dummy Gmail account used to send the notifications. 30 | 31 | ### Checking status and restarting the service 32 | 33 | To see if the service is running: 34 | 35 | ./check-cron-job.sh 36 | 37 | To restart the service: 38 | 39 | ./restart-cron-job.sh 40 | 41 | ## Pull requests 42 | 43 | ...that fix bugs are very welcome. 44 | 45 | ## Other IACR ePrint tools 46 | 47 | Check out my lab mate's Zack Newman [`iacr-dl` Python tool](https://github.com/znewman01/iacr-dl), which provides a nice Python API for downloading ePrint papers. 48 | -------------------------------------------------------------------------------- /add-cron-job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | #set -x 5 | 6 | scriptdir=$(cd $(dirname $0); pwd -P) 7 | 8 | . $scriptdir/os.sh 9 | 10 | name=eprint-iacr-notifier.py 11 | bin=/usr/local/bin/$name 12 | conf=/etc/eprint-iacr-notifier.conf 13 | logdir=/var/log/eprint 14 | logfile=$logdir/out+err.log 15 | 16 | if [ "$OS" = "OSX" ]; then 17 | user_group=staff 18 | root_group=wheel 19 | cronfile=$HOME/Library/LaunchAgents/org.iacr.eprint.notifier.plist 20 | else 21 | user_group=`whoami` 22 | root_group=root 23 | crondir=/etc/cron.d 24 | cronfile=$crondir/eprint-iacr-notifier 25 | fi 26 | 27 | # symlink script in /usr/local/bin 28 | sudo ln -sf $scriptdir/$name $bin 29 | sudo chmod +x $bin 30 | 31 | if [ ! -f "$conf" ]; then 32 | # write year and latest paper ID to conf file 33 | conf_tmp=`mktemp` 34 | year=`date +%Y` 35 | read -p "What is the last paper ID you have read? " ID 36 | echo $year >$conf_tmp 37 | echo $ID >>$conf_tmp 38 | sudo mv $conf_tmp $conf 39 | else 40 | echo "WARNING: Already detected config file at: $conf. Leaving intact." 41 | fi 42 | 43 | # create log file and dir 44 | if [ ! -d $logdir ]; then 45 | sudo mkdir $logdir 46 | fi 47 | 48 | if [ ! -f "$logfile" ]; then 49 | sudo touch "$logfile" 50 | else 51 | echo "WARNING: Already detected log file at: $logfile. Leaving intact." 52 | fi 53 | 54 | sudo chown `whoami`:$user_group $logdir 55 | sudo chown `whoami`:$user_group "$logfile" 56 | 57 | if [ ! -f "$cronfile" ]; then 58 | # get details about the Gmail account used to send emails 59 | read -p "What is the email address that should receive new paper notifications? " NOTIF_EMAIL 60 | read -p "Gmail account username that should send out the emails: " GMAIL_USER 61 | read -p "Gmail account password: " GMAIL_PASSWD 62 | GMAIL_USER=${GMAIL_USER%"@gmail.com"} # in case user gives extra @gmail.com 63 | GMAIL_ADDR="$GMAIL_USER@gmail.com" 64 | fi 65 | 66 | if [ "$OS" = "OSX" ]; then 67 | if [ ! -f "$cronfile" ]; then 68 | # copy .plist file to ~/Library/LaunchAgents 69 | cat >$cronfile << EOF 70 | 71 | 72 | 73 | 74 | Label 75 | org.iacr.eprint.notifier 76 | ProgramArguments 77 | 78 | /usr/local/bin/python3 79 | /usr/local/bin/eprint-iacr-notifier.py 80 | $NOTIF_EMAIL 81 | $GMAIL_ADDR 82 | $GMAIL_PASSWD 83 | /etc/eprint-iacr-notifier.conf 84 | 86 | 87 | KeepAlive 88 | 89 | SuccessfulExit 90 | 91 | 92 | StandardErrorPath 93 | $logfile 94 | StandardOutPath 95 | $logfile 96 | StartInterval 97 | 3600 98 | 99 | 100 | EOF 101 | else 102 | echo "WARNING: Already detected plist file at: $cronfile. Leaving intact." 103 | fi 104 | 105 | launchctl unload $cronfile || : 106 | launchctl load -w $cronfile 107 | 108 | echo "launchd job status: " 109 | launchctl list | grep org.iacr.eprint.notifier 110 | 111 | elif [ "$OS" = "Linux" ]; then 112 | if [ ! -f "$cronfile" ]; then 113 | # configure cron job in /etc/cron.d/ 114 | # For help, see: https://crontab.guru/#0_5_*_*_1-7 115 | time="5" # the hour of the day to run the script: from 0 to 23 (military format) 116 | days="1-7" # the days of the week to run the script: any range from 1 to 7 117 | cron_tmp=`mktemp` 118 | echo "0 $time * * $days `whoami` $bin $NOTIF_EMAIL $GMAIL_ADDR $GMAIL_PASSWD $conf >>$logfile 2>&1" >$cron_tmp 119 | # if you want to test the cron job is installing correctly, use this 'every 1-minute' template instead 120 | #echo "* * * * * `whoami` $bin $NOTIF_EMAIL $GMAIL_ADDR $GMAIL_PASSWD $conf >>$logfile 2>&1" >$cron_tmp 121 | sudo mkdir -p $crondir 122 | sudo mv $cron_tmp $cronfile 123 | sudo chmod 0644 $cronfile 124 | sudo chown root:$root_group $cronfile 125 | else 126 | echo "WARNING: Already detected cron file at: $cronfile. Leaving intact." 127 | fi 128 | fi 129 | -------------------------------------------------------------------------------- /check-cron-job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | #set -x 5 | 6 | scriptdir=$(cd $(dirname $0); pwd -P) 7 | 8 | . $scriptdir/os.sh 9 | 10 | logdir=/var/log/eprint 11 | logfile=$logdir/out+err.log 12 | 13 | if [ "$OS" = "OSX" ]; then 14 | name=org.iacr.eprint.notifier 15 | cronfile=$HOME/Library/LaunchAgents/$name.plist 16 | 17 | echo "Cronfile at '$cronfile':" 18 | echo "========================" 19 | if [ -f "$cronfile" ]; then 20 | cat $cronfile 21 | else 22 | echo "ERROR: No launchd .plist file at '$cronfile'" 23 | fi 24 | echo 25 | 26 | if [ -d "$logdir" ]; then 27 | echo "Last run in logfile at '$logfile':" 28 | echo "==================================" 29 | if [ -f $logfile ]; then 30 | last_run_line=`grep -n ^Time: /var/log/eprint/out+err.log | tail -n 1 | cut -f 1 -d:` 31 | 32 | if [ -n "$last_run_line" ]; then 33 | tail -n +$last_run_line $logfile 34 | fi 35 | else 36 | echo "ERROR: No $logfile in $logdir. List of files in $logdir is:" 37 | ls -l $logdir 38 | fi 39 | echo 40 | else 41 | echo "ERROR: No logdir in $logdir" 42 | fi 43 | 44 | echo "Status in 'launchctl list':" 45 | echo "===========================" 46 | list=`launchctl list` 47 | echo "$list" | head -n 1 48 | echo "$list" | grep $name 49 | echo 50 | echo "(If status is 0, then all went well the last time the script was run!)" 51 | fi 52 | -------------------------------------------------------------------------------- /eprint-iacr-notifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from bs4 import BeautifulSoup 4 | from email.mime.multipart import MIMEMultipart 5 | from email.mime.text import MIMEText 6 | from pprint import pprint 7 | 8 | import bs4 9 | import datetime 10 | import smtplib 11 | import sys 12 | import urllib.request, urllib.error, urllib.parse 13 | 14 | def get_url(url): 15 | print("Downloading page at", url, "...") 16 | response = urllib.request.urlopen(url) 17 | html = response.read() 18 | print(" Done.") 19 | return html 20 | 21 | def dedup_spaces(string): 22 | return " ".join(string.split()).strip() 23 | 24 | # paper IDs in URLs need to be padded with zeros 25 | # e.g., 1 should be 001 26 | def format_paper_id(paper_id): 27 | if paper_id >= 100: 28 | return str(paper_id) 29 | else: 30 | return "{:03d}".format(paper_id) 31 | 32 | def process_paper(base_url, paper_id, parser): 33 | url = base_url + format_paper_id(paper_id) 34 | paper_html = get_url(url) 35 | 36 | soup = BeautifulSoup(paper_html, parser) 37 | 38 | bs = soup.find_all('b') 39 | title = bs[0].text 40 | authors = soup.find('i') 41 | 42 | title = dedup_spaces(title) 43 | authors = dedup_spaces(authors.text) 44 | 45 | # For some papers, after Abstract: there could be tags like

in the abstract itself (see https://eprint.iacr.org/2019/868 for example). 46 | # This means bs[1].next_sibling won't get the full abstract, so we have to keep iterating. 47 | first_paragraph = bs[1].next_sibling 48 | # first_paragraph.parent is the tag 49 | # bs[1] is a Tag object and bs[1].next_sibling is a NavigableString 50 | 51 | assert(type(first_paragraph) is bs4.element.NavigableString) 52 | abstract = dedup_spaces(first_paragraph) 53 | 54 | curr_paragraph = first_paragraph.next_sibling 55 | while True: # the next tag is for "Category / Keywords" 56 | assert(type(first_paragraph) is bs4.element.NavigableString or type(first_paragraph) is bs4.element.Tag) 57 | 58 | if type(curr_paragraph) is bs4.element.Tag: 59 | par = dedup_spaces(curr_paragraph.get_text()) 60 | elif type(curr_paragraph) is bs4.element.NavigableString: 61 | par = dedup_spaces(curr_paragraph) 62 | else: 63 | print("ERROR: I need to better understand BeautifoulSoup") 64 | sys.exit(1) 65 | 66 | if par == "Category / Keywords:": 67 | break 68 | 69 | if len(par) > 0: 70 | assert(type(par) is str) 71 | abstract += "\n\n" + par 72 | 73 | curr_paragraph = curr_paragraph.next_sibling 74 | 75 | pdflink = url + ".pdf" 76 | 77 | #print " * Title:", title 78 | #print " * Authors:", authors 79 | #print " * Abstract:", abstract 80 | #print " * PDF: ", pdflink 81 | #print 82 | 83 | return title, authors, abstract, pdflink 84 | 85 | def test_gmail(username, passwd): 86 | print("Testing " + username + "@gmail.com with password '" + passwd + "'") 87 | server = smtplib.SMTP_SSL('smtp.gmail.com', 465) 88 | server.login(username, passwd); 89 | server.quit() 90 | 91 | # delete's the script's name 92 | script_name = sys.argv[0] 93 | del sys.argv[0] 94 | 95 | if len(sys.argv) < 4: 96 | print("Usage:", script_name, " [] []") 97 | sys.exit(1) 98 | 99 | notified_email = sys.argv[0] 100 | del sys.argv[0] 101 | sender_gmail_addr = sys.argv[0] 102 | sender_gmail_username = sender_gmail_addr.split("@")[0] 103 | del sys.argv[0] 104 | sender_gmail_passw = sys.argv[0] 105 | del sys.argv[0] 106 | conf_file = sys.argv[0] 107 | del sys.argv[0] 108 | 109 | now = datetime.datetime.now() 110 | print("Time:", now.strftime("%Y-%m-%d %H:%M")) 111 | print("Notified email(s):", notified_email) 112 | 113 | # please set to true when you are debugging and you don't want to send emails 114 | simulate_email=False 115 | # please set to true when you are debugging and you don't want to update the conf file with the latest paper ID 116 | simulate_conf_update=False 117 | 118 | if len(sys.argv) > 0: 119 | simulate_email=(sys.argv[0].lower() == "true" or sys.argv[0] == "1") 120 | del sys.argv[0] 121 | print("Simulate email sending?", simulate_email) 122 | 123 | if len(sys.argv) > 0: 124 | simulate_conf_update=(sys.argv[0].lower() == "true" or sys.argv[0] == "1") 125 | del sys.argv[0] 126 | print("Do NOT update conf?", simulate_conf_update) 127 | 128 | print() 129 | 130 | if simulate_email == False: 131 | test_gmail(sender_gmail_username, sender_gmail_passw) 132 | 133 | parser = "lxml" 134 | 135 | # open file for both reading and writing 136 | f = open(conf_file, "r+") 137 | # read year from file 138 | year = int(f.readline()) 139 | # read last paper's ID from file 140 | last_paper_id = f.readline() 141 | url = "http://eprint.iacr.org/" + str(year) + "/" 142 | 143 | if last_paper_id == '': 144 | print("ERROR: '" + conf_file + "' file is empty. Please type the paper ID that you want to start from.") 145 | sys.exit(1) 146 | 147 | last_paper_id = int(last_paper_id) 148 | print("Last paper ID:", last_paper_id) 149 | 150 | # download eprint index 151 | index_html = get_url(url) 152 | 153 | soup = BeautifulSoup(index_html, parser) 154 | 155 | # parse papers 156 | #print "New paper IDs:" 157 | skipped = [] # keep track of skipped links, for debugging purposes 158 | new_last_paper_id = last_paper_id 159 | email_html="Hey there,

\nNew papers have been published on the Cryptology ePrint Archive!

\n" 160 | email_text="Hey there,\n\nNew papers have been published on the Cryptology ePrint Archive!\n\n" 161 | firstPaper = True 162 | for link in reversed(soup.find_all('a')): 163 | link = link.get('href') 164 | # links will be of the form // and //.pdf 165 | split = link.split(".") 166 | 167 | # if it's a PDF link, skip 168 | if len(split) == 2: 169 | skipped.append(link) 170 | continue 171 | assert len(split) == 1 172 | 173 | # now split into [ empty string, , ] 174 | split = split[0] 175 | paper_id = split.split("/") 176 | if len(paper_id) != 3: 177 | #print "Skipped non-paper link: " + link 178 | skipped.append(link) 179 | continue; 180 | 181 | paper_id = int(paper_id[2]) 182 | if paper_id > last_paper_id: 183 | #print 184 | #print paper_id 185 | title, authors, abstract, pdflink = process_paper(url, paper_id, parser) 186 | new_last_paper_id = max(paper_id, new_last_paper_id) 187 | 188 | email_html += "\n" 189 | if firstPaper == False: 190 | email_html += "



\n" 191 | email_html += "Title: " + title + " (" + format_paper_id(paper_id) + " PDF)
\n" 192 | email_html += "Authors: " + authors + "
\n" 193 | email_html += "Abstract: " + abstract.replace("\n\n", "

\n\n") + "

\n" 194 | 195 | email_text += "\n" 196 | email_text += "Title: " + title + "\n" 197 | email_text += "Authors: " + authors + "\n" 198 | email_text += "Link: " + pdflink + "\n" 199 | email_text += "Abstract: " + abstract + "\n\n" 200 | email_text += "-----------------" 201 | email_text += "\n\n" 202 | 203 | firstPaper = False 204 | 205 | 206 | # if there were new papers, email them to the right person 207 | if new_last_paper_id > last_paper_id: 208 | email_text += "Cheers,\nThe Crypto eprint whisperer\nhttps://github.com/alinush/eprint-iacr-notifier\n\nMay Alice and Bob forever talk securely." 209 | 210 | email_html += "\n" 211 | email_html += "Cheers,
\nThe Crypto eprint whisperer
\n" 212 | email_html += "GitHub: https://github.com/alinush/eprint-iacr-notifier

\n" 213 | email_html += "May the hardness of discrete log forever be with you." 214 | 215 | email_html = email_html.strip() 216 | email_text = email_text.strip() 217 | 218 | print() 219 | print("Emailing " + notified_email + ":") 220 | print() 221 | # Must encode to UTF8 before printing. Otherwise some Unicode strings will print fine to stdout, but not when redirected to a file. 222 | print(email_html.encode('utf-8')) 223 | print() 224 | 225 | # craft MIME email 226 | mime_email = MIMEMultipart('alternative') 227 | subj = 'New Crypto ePrints: ' + str(year) + '/' + format_paper_id(last_paper_id + 1) 228 | # WARNING: Updating mime_email['Subject'] with += or setting it twice with = results in two different subjects for the same email 229 | # when we notify two email addresses. This is why we only set it once with an if statement. 230 | if new_last_paper_id > last_paper_id + 1: 231 | mime_email['Subject'] = subj + ' to ' + str(year) + '/' + format_paper_id(new_last_paper_id) 232 | else: 233 | mime_email['Subject'] = subj 234 | mime_email['From'] = sender_gmail_addr; 235 | mime_email['To'] = notified_email; 236 | mime_email.attach(MIMEText(email_text, 'plain', 'utf-8')) 237 | mime_email.attach(MIMEText(email_html, 'html', 'utf-8')) 238 | 239 | # connect to Gmail and send 240 | if simulate_email == False: 241 | server = smtplib.SMTP_SSL('smtp.gmail.com', 465) 242 | server.login(sender_gmail_username, sender_gmail_passw); 243 | server.sendmail( 244 | sender_gmail_addr, 245 | notified_email.split(","), 246 | mime_email.as_string()) 247 | server.quit() 248 | 249 | print("Sent email with title '" + mime_email['Subject'] + "' successfully to:", notified_email) 250 | else: 251 | print("Simulating, so no email was sent.") 252 | 253 | if simulate_conf_update == False: 254 | # write the new last paper ID in the conf file (and rewrite the same year) 255 | f.truncate(0) 256 | f.seek(0) 257 | f.write(str(year) + '\n') 258 | f.write(str(new_last_paper_id) + '\n') 259 | f.close() 260 | print("Wrote last ID to conf file:", new_last_paper_id) 261 | else: 262 | print("Simulating so conf file was not updated") 263 | -------------------------------------------------------------------------------- /install-deps.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | scriptdir=$(cd $(dirname $0); pwd -P) 4 | 5 | . $scriptdir/os.sh 6 | 7 | if [ "$OS" = "Linux" ]; then 8 | sudo apt-get install python-bs4 9 | elif [ "$OS" = "OSX" ]; then 10 | pip install lxml 11 | pip install beautifulsoup4 12 | fi 13 | -------------------------------------------------------------------------------- /os.sh: -------------------------------------------------------------------------------- 1 | OS_FLAVOR="Unknown" 2 | NUM_CPUS= 3 | 4 | if [ "$(uname -s)" = "Darwin" ]; then 5 | OS="OSX" 6 | NUM_CPUS=`sysctl -n hw.ncpu` 7 | elif [ "$(uname -s)" = "Linux" ]; then 8 | OS="Linux" 9 | NUM_CPUS=`grep -c ^processor /proc/cpuinfo` 10 | if [ -f /etc/issue ]; then 11 | if grep Fedora /etc/issue >/dev/null; then 12 | OS_FLAVOR="Fedora" 13 | elif grep Ubuntu /etc/issue >/dev/null; then 14 | OS_FLAVOR="Ubuntu" 15 | fi 16 | fi 17 | fi 18 | 19 | #echo "OS: $OS" 20 | #echo "OS Flavor: $OS_FLAVOR" 21 | -------------------------------------------------------------------------------- /restart-cron-job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | scriptdir=$(cd $(dirname $0); pwd -P) 6 | 7 | . $scriptdir/os.sh 8 | 9 | # Create log dir if it doesn't exist 10 | logdir=/var/log/eprint 11 | logfile=$logdir/out+err.log 12 | 13 | sudo mkdir -p $logdir 14 | sudo chown `whoami`:$user_group $logdir 15 | sudo touch "$logfile" 16 | sudo chown `whoami`:$user_group "$logfile" 17 | 18 | if [ "$OS" = "OSX" ]; then 19 | cronfile=$HOME/Library/LaunchAgents/org.iacr.eprint.notifier.plist 20 | 21 | if [ -f $cronfile ]; then 22 | echo "Unloading $cronfile..." 23 | launchctl unload $cronfile 24 | 25 | echo "Realoading..." 26 | launchctl load -w $cronfile 27 | else 28 | echo "$cronfile does not exist." 29 | echo "Did you add it using $scriptdir/add-cron-job.sh?" 30 | exit 1 31 | fi 32 | else 33 | echo "ERROR: Not implemented yet." 34 | exit 1 35 | fi 36 | --------------------------------------------------------------------------------