├── 2023-06-29_09-11.png
├── README.md
└── arxiv.sh
/2023-06-29_09-11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/victoriastuart/arxiv-sh/HEAD/2023-06-29_09-11.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | **arxiv.sh**: automated daily query for new arXiv.org articles in select subjects via RSS
2 |
3 | I schedule the script to run daily and email the results to myself, which I read in Claws Mail (screenshot attached; links open in web browser).
4 |
5 | ```
6 | # /etc/crontab
7 | # "At 6:00 am daily" [http://crontab.guru/]:
8 | 0 6 * * * victoria nice -n 19 /mnt/Vancouver/programming/scripts/arxiv.sh
9 | ```
10 |
11 | Due to the way arXiv.org presents the data, some older results are retained on each day but they are clearly delineated in the script output and can be ignored (scroll past them) or used for reference to older results.
12 |
--------------------------------------------------------------------------------
/arxiv.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # vim: set filetype=sh :
3 | # vim: syntax=sh autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 textwidth=220
4 | export LANG=C.UTF-8
5 |
6 | # file: /mnt/Vancouver/programming/scripts/arxiv.sh
7 | # created: 2023-06-15
8 | # version: 02 [2023-06-29]
9 | # last modified: 2023-06-29 08:53:03 -0700 (PST)
10 | #
11 | # versions: * v02 [2023-06-29] Added extra "=====..." lines to major sections for easier
12 | # visual reference, and broke printf statements into individual lines
13 | # for easier coding / readability / debugging / future extensions.
14 | # * v01 [2023-06-15] inaugural; replaces /mnt/Vancouver/apps/arxiv/arxiv-rss.sh
15 | # which appears to be deprecated (upstream) 2023-06-15?
16 | # =============================================================================
17 |
18 | # This first redirect > overwrites "/tmp/arxiv.txt" (if it exists):
19 | printf '%s\n' $(date +'%Y-%m-%d') > /tmp/arxiv.txt
20 |
21 | # ==============================================================================
22 | # COMPUTER SCIENCE - ARTIFICIAL INTELLIGENCE
23 |
24 | # curl https://arxiv.org/list/cs.AI/recent > /tmp/awi2eing.txt
25 | # NOTE: with API parameters use single not double quotes:
26 | # curl 'https://arxiv.org/list/cs.AI/pastweek?skip=0&show=100' > /tmp/awi2eing.txt
27 |
28 | curl 'https://arxiv.org/list/cs.AI/pastweek?skip=0&show=200' > /tmp/awi2eing.txt
29 |
30 | cat /tmp/awi2eing.txt | grep -E '
||^Title: |href="/abs/' > /tmp/ai.txt
31 |
32 | sed -r -i '
33 | s//\n\n========================================\n/
34 | s/<\/h3>/\n========================================/
35 | s/Title:<\/span> /• /g
36 | s/.*"(\/abs.*)" .*$/ https:\/\/arxiv.org\1/
37 | ' /tmp/ai.txt
38 |
39 | sed -i '
40 | s/".*$//
41 | s/^<.*//g
42 | ' /tmp/ai.txt
43 |
44 | # These redirects >> APPEND the content to the file (> overwrites):
45 | printf "\n===============================================================================" >> /tmp/arxiv.txt
46 | printf "\n===============================================================================" >> /tmp/arxiv.txt
47 | printf "\n[1/5] ARTIFICIAL INTELLIGENCE [$(date +'%Y-%m-%d')]" >> /tmp/arxiv.txt
48 | printf "\n===============================================================================" >> /tmp/arxiv.txt
49 | printf "\n===============================================================================\n" >> /tmp/arxiv.txt
50 |
51 | tac /tmp/ai.txt >> /tmp/arxiv.txt
52 |
53 | # ==============================================================================
54 | # COMPUTER SCIENCE - COMPUTATION and LANGUAGE
55 |
56 | curl 'https://arxiv.org/list/cs.CL/pastweek?skip=0&show=100' > /tmp/gei6sihu.txt
57 |
58 | cat /tmp/gei6sihu.txt | grep -E '|^Title: |href="/abs/' > /tmp/cl.txt
59 |
60 | sed -r -i '
61 | s//\n========================================\n/
62 | s/<\/h3>/\n========================================/
63 | s/Title:<\/span> /• /g
64 | s/.*"(\/abs.*)" .*$/ https:\/\/arxiv.org\1/
65 | ' /tmp/cl.txt
66 |
67 | sed -i '
68 | s/".*$//
69 | s/^<.*//g
70 | ' /tmp/cl.txt
71 |
72 | printf "\n===============================================================================" >> /tmp/arxiv.txt
73 | printf "\n===============================================================================" >> /tmp/arxiv.txt
74 | printf "\n[2/5] COMPUTATION and LANGUAGE [$(date +'%Y-%m-%d')]" >> /tmp/arxiv.txt
75 | printf "\n===============================================================================" >> /tmp/arxiv.txt
76 | printf "\n===============================================================================\n" >> /tmp/arxiv.txt
77 |
78 | tac /tmp/cl.txt >> /tmp/arxiv.txt
79 |
80 | # ==============================================================================
81 | # COMPUTER SCIENCE - INFORMATION RETRIEVAL
82 |
83 | # curl 'https://arxiv.org/list/cs.IR/pastweek?skip=0&show=100' > /tmp/ahpahn4v.txt
84 | # Fewer in this group:
85 | curl 'https://arxiv.org/list/cs.IR/recent' > /tmp/ahpahn4v.txt
86 |
87 | cat /tmp/ahpahn4v.txt | grep -E '|^Title: |href="/abs/' > /tmp/ir.txt
88 |
89 | sed -r -i '
90 | s//\n========================================\n/
91 | s/<\/h3>/\n========================================/
92 | s/Title:<\/span> /• /g
93 | s/.*"(\/abs.*)" .*$/ https:\/\/arxiv.org\1/
94 | ' /tmp/ir.txt
95 |
96 | sed -i '
97 | s/".*$//
98 | s/^<.*//g
99 | ' /tmp/ir.txt
100 |
101 | printf "\n===============================================================================" >> /tmp/arxiv.txt
102 | printf "\n===============================================================================" >> /tmp/arxiv.txt
103 | printf "\n[3/5] COMPUTATION AND LANGUAGE [$(date +'%Y-%m-%d')]" >> /tmp/arxiv.txt
104 | printf "\n===============================================================================" >> /tmp/arxiv.txt
105 | printf "\n===============================================================================\n" >> /tmp/arxiv.txt
106 |
107 | tac /tmp/ir.txt >> /tmp/arxiv.txt
108 |
109 | # ==============================================================================
110 | # COMPUTER SCIENCE - MACHINE LEARNING
111 |
112 | curl 'https://arxiv.org/list/cs.LG/pastweek?skip=0&show=150' > /tmp/ohgai5ni.txt
113 |
114 | cat /tmp/ohgai5ni.txt | grep -E '|^Title: |href="/abs/' > /tmp/ml.txt
115 |
116 | sed -r -i '
117 | s//\n========================================\n/
118 | s/<\/h3>/\n========================================/
119 | s/Title:<\/span> /• /g
120 | s/.*"(\/abs.*)" .*$/ https:\/\/arxiv.org\1/
121 | ' /tmp/ml.txt
122 |
123 | sed -i '
124 | s/".*$//
125 | s/^<.*//g
126 | ' /tmp/ml.txt
127 |
128 | printf "\n===============================================================================" >> /tmp/arxiv.txt
129 | printf "\n===============================================================================" >> /tmp/arxiv.txt
130 | printf "\n[4/5] COMPUTER SCIENCE - MACHINE LEARNING [$(date +'%Y-%m-%d')]" >> /tmp/arxiv.txt
131 | printf "\n===============================================================================" >> /tmp/arxiv.txt
132 | printf "\n===============================================================================\n" >> /tmp/arxiv.txt
133 |
134 | tac /tmp/ml.txt >> /tmp/arxiv.txt
135 |
136 | # ==============================================================================
137 | # Statistics - Machine Learning
138 |
139 | # Fewer in this group:
140 | curl 'https://arxiv.org/list/stat.ML/pastweek?skip=0&show=50' > /tmp/eeb8lae7.txt
141 |
142 | cat /tmp/eeb8lae7.txt | grep -E '|^Title: |href="/abs/' > /tmp/statml.txt
143 |
144 | sed -r -i '
145 | s//\n========================================\n/
146 | s/<\/h3>/\n========================================/
147 | s/Title:<\/span> /• /g
148 | s/.*"(\/abs.*)" .*$/ https:\/\/arxiv.org\1/
149 | ' /tmp/statml.txt
150 |
151 | sed -i '
152 | s/".*$//
153 | s/^<.*//g
154 | ' /tmp/statml.txt
155 |
156 | printf "\n===============================================================================" >> /tmp/arxiv.txt
157 | printf "\n===============================================================================" >> /tmp/arxiv.txt
158 | printf "\n[5/5] STATISTICS - MACHINE LEARNING [$(date +'%Y-%m-%d')]" >> /tmp/arxiv.txt
159 | printf "\n===============================================================================" >> /tmp/arxiv.txt
160 | printf "\n===============================================================================\n" >> /tmp/arxiv.txt
161 |
162 | tac /tmp/statml.txt >> /tmp/arxiv.txt
163 |
164 | # ==============================================================================
165 |
166 |
167 | mail -s 'arxiv' mail@VictoriasJourney.com < /tmp/arxiv.txt
168 |
169 | # ==============================================================================
170 | # [end of file]
171 | # ==============================================================================
172 |
--------------------------------------------------------------------------------