├── doc
├── img1.png
├── img2.png
├── img3.png
└── img4.png
├── scholar_server.py
├── README.md
└── scholar_to_rss.py
/doc/img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osnsyc/Scholar-to-RSS/HEAD/doc/img1.png
--------------------------------------------------------------------------------
/doc/img2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osnsyc/Scholar-to-RSS/HEAD/doc/img2.png
--------------------------------------------------------------------------------
/doc/img3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osnsyc/Scholar-to-RSS/HEAD/doc/img3.png
--------------------------------------------------------------------------------
/doc/img4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/osnsyc/Scholar-to-RSS/HEAD/doc/img4.png
--------------------------------------------------------------------------------
/scholar_server.py:
--------------------------------------------------------------------------------
1 | import http.server
2 | import socketserver
3 |
4 | port = 9278
5 |
6 | Handler = http.server.SimpleHTTPRequestHandler
7 |
8 | with socketserver.TCPServer(("", port), Handler) as httpd:
9 | print(f"Serving at port {port}")
10 | httpd.serve_forever()
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Description
2 |
3 | Use Outlook email to receive Google Scholar Alert emails and parse them into an RSS feed.
4 |
5 | ## Changelog
6 |
7 | - `2024-12-30`: You have to register for an [Azure](https://azure.microsoft.com/) account (free) to get started with Microsoft Entra ID.
8 |
9 | ## Create an azure account
10 |
11 | - https://azure.microsoft.com/
12 |
13 | ## MS_GRAPH token (Outlook)
14 |
15 | Creat app:https://entra.microsoft.com/#home
16 |
17 | 
18 | 
19 | 
20 | 
21 |
22 | ## Git clone
23 |
24 | ```shell
25 | git clone https://github.com/osnsyc/Scholar-to-RSS.git
26 | cd Scholar-to-RSS
27 | ```
28 |
29 | ```python
30 | pip install beautifulsoup4 msal
31 | ```
32 |
33 | ## Config
34 |
35 | ```ini
36 | # config.ini
37 | [Outlook]
38 | APP_ID = 12345678-1234-1234-1234-1234567890
39 |
40 | ```
41 |
42 | ## Mail settings
43 |
44 | Set Outlook mail as Alert email in Google Scholar
45 |
46 | **or**
47 |
48 | Set Outlook mail as Forwarded email in Gmail
49 |
50 | ## Run
51 |
52 | ```python
53 | python scholar_to_rss.py
54 | ```
55 | Add certificate using Microsoft Graph on first run:https://microsoft.com/devicelogin,type in your user_code
56 |
57 | ```python
58 | python scholar_server.py
59 | ```
60 |
61 | ## RSS Subscription
62 |
63 | `http://YOUR_HOST:9278/scholar.xml`
--------------------------------------------------------------------------------
/scholar_to_rss.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | import os
4 | import time
5 | import json
6 | import requests
7 | import configparser
8 | from datetime import datetime, timedelta
9 | from bs4 import BeautifulSoup
10 | import msal
11 |
12 | class Scholar2RSS:
13 |
14 | def __init__(self, APP_ID):
15 | self.APP_ID = APP_ID
16 | self.GRAPH_ENDPOINT = 'https://graph.microsoft.com/v1.0'
17 | self.SCOPES = ['Mail.ReadWrite']
18 | self.MS_API_TOKEN = './ms_graph_api_token.json'
19 | self.XML_PATH = './scholar.xml'
20 |
21 | def convert_to_timestamp(self, date_string):
22 | date_obj = datetime.strptime(date_string, "%a, %d %b %Y %H:%M:%S %z")
23 | return date_obj.timestamp()
24 |
25 | def generate_access_token(self):
26 | # Save Session Token as a token file
27 | access_token_cache = msal.SerializableTokenCache()
28 |
29 | # read the token file
30 | if os.path.exists(self.MS_API_TOKEN):
31 | access_token_cache.deserialize(open(self.MS_API_TOKEN, "r").read())
32 | token_detail = json.load(open(self.MS_API_TOKEN,))
33 | token_detail_key = list(token_detail['AccessToken'].keys())[0]
34 | token_expiration = datetime.fromtimestamp(int(token_detail['AccessToken'][token_detail_key]['expires_on']))
35 | # if datetime.now() > token_expiration:
36 | # os.remove(self.MS_API_TOKEN)
37 | # access_token_cache = msal.SerializableTokenCache()
38 |
39 | # assign a SerializableTokenCache object to the client instance
40 | client = msal.PublicClientApplication(client_id=self.APP_ID, token_cache=access_token_cache)
41 |
42 | accounts = client.get_accounts()
43 | if accounts:
44 | # load the session
45 | token_response = client.acquire_token_silent(self.SCOPES, accounts[0])
46 | else:
47 | # authetnicate your accoutn as usual
48 | flow = client.initiate_device_flow(scopes=self.SCOPES)
49 | print('Open https://microsoft.com/devicelogin, user_code: ' + flow['user_code'])
50 | token_response = client.acquire_token_by_device_flow(flow)
51 |
52 | with open(self.MS_API_TOKEN, 'w') as _f:
53 | _f.write(access_token_cache.serialize())
54 |
55 | return token_response
56 |
57 | def get_mail(self):
58 | endpoint = self.GRAPH_ENDPOINT + '/me/messages'
59 | access_token = self.generate_access_token()
60 | headers = {'Authorization': 'Bearer ' + access_token['access_token']}
61 | request_body = {
62 | '$select': 'sender, subject, body',
63 | 'filter': 'isRead eq false and from/emailAddress/address eq \'scholaralerts-noreply@google.com\''
64 | }
65 |
66 | response = requests.get(endpoint, headers=headers, params=request_body)
67 | if response.status_code == 200:
68 | content = json.loads(response.text)
69 | if content['value']:
70 | return content['value']
71 | else:
72 | print('No new mail.')
73 | return None
74 | else:
75 | print(response.text)
76 | return None
77 |
78 | def mark_mail_as_read(self, id):
79 | access_token = self.generate_access_token()
80 | headers = {
81 | 'Authorization': 'Bearer ' + access_token['access_token'],
82 | 'Content-Type': 'application/json',
83 | }
84 | request_body = {'isRead': True}
85 |
86 | endpoint = self.GRAPH_ENDPOINT + '/me/messages/' + id
87 | response = requests.patch(endpoint, headers=headers, data=json.dumps(request_body))
88 | if response.status_code != 200:
89 | print(response.text)
90 |
91 | def update_xml_file(self, mail):
92 | # Create xml file
93 | if not os.path.exists(self.XML_PATH):
94 | content = '
" + "Abstract:" + abstracts[index] + "
" + ']]>