├── src ├── __init__.py ├── startup.sh ├── startup4Pubmed.sh ├── startup4RaspberryPi.sh ├── startup4RaspberryPi4Pubmed.sh ├── cron.conf ├── sendTwitterMessage.py ├── argparser_utils.py ├── logging_utils.py ├── setup.sh ├── HTMLParser_utils.py ├── sendSlackMessage.py ├── checkBioRxivRSS.py ├── test.yaml ├── checkPubmedRSS.py ├── altmetric_utils.py ├── main.py ├── main4Pubmed.py ├── checkAltmetrics.py └── access_sqlite3.py ├── log └── README.md ├── db └── README.md ├── LICENSE ├── .gitignore └── README.md /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /log/README.md: -------------------------------------------------------------------------------- 1 | ### Save Batch log files. 2 | -------------------------------------------------------------------------------- /db/README.md: -------------------------------------------------------------------------------- 1 | ### Store sqlite3 database files automatically. 2 | -------------------------------------------------------------------------------- /src/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python ./main.py --yaml_setting_file ./production.yaml 4 | -------------------------------------------------------------------------------- /src/startup4Pubmed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python ./main4Pubmed.py --yaml_setting_file ./production.yaml 4 | -------------------------------------------------------------------------------- /src/startup4RaspberryPi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd /home/pi/Desktop/BioRxivCurator/src 3 | /home/pi/miniconda3/bin/python ./main.py --yaml_setting_file ./production.yaml 4 | -------------------------------------------------------------------------------- /src/startup4RaspberryPi4Pubmed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd /home/pi/Desktop/BioRxivCurator/src 3 | /home/pi/miniconda3/bin/python ./main4Pubmed.py --yaml_setting_file ./production.yaml 4 | -------------------------------------------------------------------------------- /src/cron.conf: -------------------------------------------------------------------------------- 1 | 0 11 * * * bash /home/pi/Desktop/BioRxivCurator/src/startup4RaspberryPi.sh > /home/pi/Desktop/error.txt 2>&1 2 | 0 21 * * * bash /home/pi/Desktop/BioRxivCurator/src/startup4RaspberryPi4Pubmed.sh > /home/pi/Desktop/error4PubMed.txt 2>&1 3 | -------------------------------------------------------------------------------- /src/sendTwitterMessage.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | 3 | 4 | def send_twitter_message(consumer_key, consumer_secret, access_token, access_secret, message): 5 | """ 6 | Simple wrapper for sending a Twitter message. 7 | """ 8 | auth = tweepy.OAuthHandler(consumer_key, consumer_secret) 9 | auth.set_access_token(access_token, access_secret) 10 | api = tweepy.API(auth) 11 | api.update_status(message) 12 | -------------------------------------------------------------------------------- /src/argparser_utils.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import yaml 3 | 4 | 5 | def get_argument(): 6 | argparser = ArgumentParser() 7 | argparser.add_argument('--yaml_setting_file', type=str, 8 | help="yaml setting file", required=True) 9 | args = argparser.parse_args() 10 | 11 | with open(args.yaml_setting_file) as file: 12 | obj = yaml.load(file) 13 | 14 | return obj 15 | -------------------------------------------------------------------------------- /src/logging_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from datetime import datetime 4 | 5 | 6 | def logger(moduleName): 7 | """ 8 | Setup a log 9 | """ 10 | logger = logging.getLogger(moduleName) 11 | logging.basicConfig(level=logging.DEBUG, 12 | filename="../log/log_" + 13 | datetime.now().strftime("%Y%m%d%H%M%S") + ".txt", 14 | format="%(asctime)s : %(levelname)s : %(module)s : %(funcName)s : %(message)s") 15 | return logger 16 | -------------------------------------------------------------------------------- /src/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sudo curl -sSL https://repo.continuum.io/miniconda/Miniconda-3.16.0-Linux-armv7l.sh -o /tmp/miniconda.sh 3 | bash /tmp/miniconda.sh -bfp /home/pi/miniconda3 4 | sudo rm -rf /tmp/miniconda.sh 5 | echo 'export PATH="/home/pi/miniconda3/bin:$PATH"' >> ~/.bashrc 6 | source ~/.bashrc 7 | conda install -y python=2 8 | conda update conda 9 | pip install feedparser 10 | pip install pyyaml 11 | pip install slackclient 12 | pip install tweepy 13 | 14 | # Install sqlite3 client app 15 | sudo apt-get install sqlitebrowser -------------------------------------------------------------------------------- /src/HTMLParser_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import feedparser 3 | from HTMLParser import HTMLParser 4 | import re 5 | import requests 6 | 7 | 8 | class parser(HTMLParser): 9 | def __init__(self): 10 | HTMLParser.__init__(self) 11 | self.flag = False 12 | self.link = "" 13 | 14 | def handle_starttag(self, tag, attrs): 15 | attrs = dict(attrs) 16 | if tag == "a" and re.match("^//doi.org", attrs["href"]): 17 | self.flag = True 18 | self.link = attrs["href"].replace("//doi.org/", "") 19 | 20 | def handle_data(self, data): 21 | if self.flag: 22 | self.flag = False 23 | 24 | 25 | def getDOI(link): 26 | r = requests.get(link) 27 | test = parser() 28 | test.feed(r.text) 29 | return test.link 30 | -------------------------------------------------------------------------------- /src/sendSlackMessage.py: -------------------------------------------------------------------------------- 1 | from slackclient import SlackClient 2 | from logging_utils import logger 3 | from time import sleep 4 | 5 | 6 | def send_slack_message(slack_token, channel, message): 7 | """ 8 | Simple wrapper for sending a Slack message. 9 | """ 10 | sc = SlackClient(slack_token) 11 | response = sc.api_call( 12 | "chat.postMessage", 13 | channel=channel, 14 | text=message 15 | ) 16 | 17 | # Check to see if the message sent successfully 18 | if response["ok"]: 19 | logger(__name__).info( 20 | "Message posted successfully: " + response["message"]["ts"]) 21 | 22 | # If the message failed, check for rate limit headers in the response 23 | elif response["ok"] is False and response["headers"]["Retry-After"]: 24 | delay = int(response["headers"]["Retry-After"]) 25 | logger(__name__).warning( 26 | "Rate limited. Retrying in " + str(delay) + " seconds") 27 | sleep(delay) 28 | response = sc.api_call( 29 | "chat.postMessage", 30 | channel=channel, 31 | text=message 32 | ) 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Naoto Imamachi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/checkBioRxivRSS.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import feedparser 3 | from logging_utils import logger 4 | 5 | 6 | def check_RSS(subjects): 7 | """ 8 | Check the RSS feed of BioRxiv 9 | :param subjects: subject categories 10 | :return: RSS data list 11 | """ 12 | # Get & Parse RSS 13 | feed = feedparser.parse( 14 | "http://connect.biorxiv.org/biorxiv_xml.php?subject={0}".format("+".join(subjects))) 15 | 16 | rss_data_list = [] # RSS data list object 17 | if feed.bozo == 1: 18 | logger(__name__).error(feed.bozo_exception) 19 | logger(__name__).error("Failed to reach the feed.") 20 | else: 21 | for pub in feed["items"]: 22 | rss_data_list.append( 23 | RSS_data(doi=pub["dc_identifier"], 24 | title=pub["title"], 25 | url=pub["link"].split('?')[0], 26 | date=pub["updated"])) 27 | return rss_data_list 28 | 29 | 30 | class RSS_data(object): 31 | def __init__(self, doi, title, url, date): 32 | self.doi = doi 33 | self.title = title 34 | self.url = url 35 | self.date = date 36 | -------------------------------------------------------------------------------- /src/test.yaml: -------------------------------------------------------------------------------- 1 | rss_categories: ['genomics', 'bioinformatics'] 2 | pubmed_rss_link: 3 | nature_communications: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1VSjW0JqT_vUo3cx35p0JWrF-tg9pTvR_cXhuez1oIpZtSX9y1' 4 | nature_biotechnology: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1N5mTdq-XQ44UN5eRYLyumWS-YatO6PiWd4YMZN_9xSIi7DQpO 5 | nature_methods: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1loZZVJsOLJ-gJdcPOEy3dMRCwYVwe8Ddp5bzKnwIX-ZEjLEjc 6 | nature_genetics: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1LmeR7JfcIZ8ekn37Saorp-EvwSleIgr-kuPrH5u_qvUf7_54b 7 | molecular_cell: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=18ervbTh5APRvfjolw6T8mvS9kKue9zImyx16MWRfKe1t-HABw 8 | elife: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1J9m0S5PiOKnPlEQbwldXX7FyOKGNPzWaxHS3P13W6BUPLH1Ya 9 | genome_research: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1Jau6enPigEWTTCNUZHwEQVsdJQTOhhfBVTFz--6RM6BvBk5J0 10 | genes_and_development: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1FSk0ACuTgzTVKNDrRt684lNVMdL43mTkuWkT0YqSwWU1v5kzC 11 | slack_token: 'xxxxxxxx' 12 | slack_channel: '@xxxxxxx' 13 | twitter_consumer_key: xxxxxxxx 14 | twitter_consumer_secret: xxxxxxxx 15 | twitter_access_token: xxxxxxxx 16 | twitter_access_token_secret: xxxxxxxx 17 | twitter_consumer_key_pubmed: xxxxxxxx 18 | twitter_consumer_secret_pubmed: xxxxxxxx 19 | twitter_access_token_pubmed: xxxxxxxx 20 | twitter_access_token_secret_pubmed: xxxxxxxx -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # sqlite3 db 104 | *.sqlite3 105 | 106 | # SNS token files 107 | *.tkn 108 | 109 | # log files 110 | log_*.txt 111 | 112 | # yaml file 113 | production.yaml -------------------------------------------------------------------------------- /src/checkPubmedRSS.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import feedparser 3 | from logging_utils import logger 4 | from HTMLParser_utils import getDOI 5 | from datetime import datetime 6 | from time import sleep 7 | 8 | 9 | # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1ROYcHRjBNrxpwoceMUwxUyyF6uHjjPfuA44ekGfuKxDQTKIQE 10 | # (((((((("Nature biotechnology"[Journal]) OR "Nature methods"[Journal]) OR "Nature genetics"[Journal]) OR "Molecular cell"[Journal]) OR "eLife"[Journal]) OR "PLoS biology"[Journal]) OR "Genome research"[Journal]) OR "Genes & development"[Journal]) OR "Nature cell biology"[Journal] 11 | def check_RSS(url): 12 | """ 13 | Check the RSS feed of PubMed. 14 | - Nature biotechnology 15 | - Nature methods 16 | - Nature genetics 17 | - Molecular cell 18 | - eLife 19 | - PLoS biology 20 | - Genome research 21 | - Genes & development 22 | - Nature cell biology 23 | 24 | :param subjects: subject categories 25 | :return: RSS data list 26 | """ 27 | # Get & Parse RSS 28 | feed = feedparser.parse(url) 29 | 30 | rss_data_list = [] # RSS data list object 31 | if feed.bozo == 1: 32 | logger(__name__).error(feed.bozo_exception) 33 | logger(__name__).error("Failed to reach the feed.") 34 | else: 35 | for pub in feed["items"]: 36 | link = pub["link"].split('?')[0] 37 | doi = getDOI(link) 38 | rss_data_list.append( 39 | RSS_data(doi=doi, 40 | title=pub["title"], 41 | url="https://doi.org/{0}".format(doi), 42 | date=datetime.now().strftime("%Y-%m-%d"))) 43 | sleep(0.5) 44 | return rss_data_list 45 | 46 | 47 | class RSS_data(object): 48 | def __init__(self, doi, title, url, date): 49 | self.doi = doi 50 | self.title = title 51 | self.url = url 52 | self.date = date 53 | -------------------------------------------------------------------------------- /src/altmetric_utils.py: -------------------------------------------------------------------------------- 1 | # Altmetric API Wrapper 2 | # Source: https://github.com/lnielsen/python-altmetric 3 | 4 | import requests 5 | 6 | try: 7 | import json 8 | except ImportError: 9 | import simplejson as json 10 | 11 | 12 | class AltmetricException(Exception): 13 | pass 14 | 15 | 16 | class AltmetricHTTPException(AltmetricException): 17 | def __init__(self, status_code, msg): 18 | self.status_code = status_code 19 | self.msg = msg 20 | 21 | 22 | class ParseException(AltmetricException): 23 | pass 24 | 25 | 26 | class Altmetric(object): 27 | def __init__(self, apikey='', apiver='v1'): 28 | """ 29 | Cache API key and address. 30 | """ 31 | self.apikey = apikey 32 | self.apiver = apiver 33 | self.default_params = {} 34 | 35 | if self.apikey: 36 | self.default_params = {'key': apikey} 37 | 38 | self.api_url = "https://api.altmetric.com/{0}".format(self.apiver) 39 | 40 | def __repr__(self): 41 | if self.apikey: 42 | return ''.format(self.apiver, self.apikey) 43 | else: 44 | return ''.format(self.apiver) 45 | 46 | def call(self, method, *args, **kwargs): 47 | url = "{0}/{1}/{2}".format(self.api_url, 48 | method, "/".join([a for a in args])) 49 | # Parameter 50 | params = kwargs or {} 51 | params.update(self.default_params) 52 | 53 | headers = {} 54 | 55 | # GET request 56 | req = requests.get(url, params=params, headers=headers) 57 | 58 | # Success 59 | if req.status_code == 200: 60 | try: 61 | return json.loads(req.text) 62 | except ValueError as e: 63 | raise ParseException(e.message) 64 | elif req.status_code == 404 and req.text == "Not Found": 65 | return None 66 | else: 67 | raise AltmetricHTTPException(req.status_code, req.text) 68 | 69 | def __getattr__(self, method_name): 70 | def get(self, *args, **kwargs): 71 | return self.call(method_name, *args, **kwargs) 72 | return get.__get__(self) 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BioRxivCurator 2 | 3 | BioRxiv article curation batch scripts using Altmetrics data. 4 | Altmetrics data is provided by Altmetric.com, a research metrics company who track and collect the online conversations around millions of scholarly outputs. 5 | Further information about how the Altmetric Attention Score is calculated is available [here](https://www.altmetric.com/about-altmetrics/the-donut-and-score/). 6 | 7 | ## Sample Twitter bot 8 | 9 | The following twitter account is a sample bot for tweeting curated BioRxiv articles using Altmetrics data. 10 | https://twitter.com/BioRxivCurator 11 | 12 | ## Requirements and Installation 13 | 14 | ### Raspberry Pi 15 | 16 | I recommend using `setup.sh` script to set up your environment on Raspbian OS. 17 | This script automatically create python environment and install sqlite3 client app. 18 | 19 | ```bash 20 | $ sudo bash ./src/setup.py 21 | ``` 22 | 23 | I tested this script on Raspberry Pi 3 ModelB (Raspbian Stretch with Desktop). 24 | The following show what to do in this script. 25 | 26 | #### Installing python modules 27 | 28 | ```bash 29 | $ pip install feedparser 30 | $ pip install pyyaml 31 | $ pip install slackclient 32 | $ pip install tweepy 33 | ``` 34 | 35 | #### Installing sqlite3 client app(Option) 36 | 37 | If you want to see stored data from a GUI, I recommend you to install sqlite3 client app named DB Browser for SQLite. 38 | 39 | ```bash 40 | $ sudo apt-get install sqlitebrowser 41 | ``` 42 | 43 | ## Preparation of slack and twitter access token 44 | 45 | `./src/production.yaml` are needed to run BioRxivCurator. 46 | rss_categories is set to several categories. Check adaptive categories for BioRxiv RSS feed. 47 | https://www.biorxiv.org/alertsrss 48 | 49 | The values of slack_token, slack_channel, twitter_consumer_key, twitter_consumer_secret, twitter_access_token and twitter_access_token_secret are replaced with yours. 50 | 51 | ``` 52 | rss_categories: ['genomics', 'bioinformatics'] 53 | slack_token: 'xxxxxxxx' 54 | slack_channel: '@xxxxxxx' 55 | twitter_consumer_key: xxxxxxxx 56 | twitter_consumer_secret: xxxxxxxx 57 | twitter_access_token: xxxxxxxx 58 | twitter_access_token_secret: xxxxxxxx 59 | ``` 60 | 61 | ## Basic Usage 62 | 63 | Run BioRxivCurator following the main script. 64 | 65 | ``` 66 | $ python ./src/startup.sh 67 | ``` 68 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from logging_utils import logger 3 | from argparser_utils import get_argument 4 | 5 | import access_sqlite3 6 | from checkBioRxivRSS import check_RSS 7 | from checkAltmetrics import check_altmetrics 8 | from sendSlackMessage import send_slack_message 9 | from sendTwitterMessage import send_twitter_message 10 | 11 | 12 | def main(): 13 | # Get setting file 14 | setting_dict = get_argument() 15 | logger(__name__).info(setting_dict) 16 | 17 | # Parse RSS feed 18 | logger(__name__).info("Start Parsing RSS feed...") 19 | RSS_data_list = check_RSS(setting_dict['rss_categories']) 20 | 21 | # Create sqlite3 database if not exists 22 | sqlite3_file = "../db/storeAltmetrics.sqlite3" 23 | if not access_sqlite3.create_tables(sqlite3_file): 24 | return 25 | 26 | # Insert new target articles into sqlite3 db 27 | logger(__name__).info("Insert new target articles into sqlite3 db.") 28 | if not access_sqlite3.insert_new_doi(sqlite3_file, RSS_data_list): 29 | return 30 | 31 | # Get all target articles for checking altmetrics score 32 | logger(__name__).info( 33 | "Get all target articles for checking altmetrics score.") 34 | target_doi_list = access_sqlite3.select_target_doi(sqlite3_file) 35 | 36 | # Get altmetric score for each article 37 | for doi_info in target_doi_list: 38 | logger(__name__).info("Get altmetric score for " + doi_info.doi) 39 | altmetrics_data = check_altmetrics(doi_info) 40 | if altmetrics_data == None: 41 | continue 42 | 43 | # Insert scores into sqlite3 db 44 | logger(__name__).info("Insert scores into sqlite3 db.") 45 | access_sqlite3.insert_altmetric_score( 46 | sqlite3_file, doi_info.doi, altmetrics_data) 47 | 48 | # Send a message to SNS 49 | if altmetrics_data.flg == 1: 50 | message = """{0}\n{1}\n""".format(doi_info.title, doi_info.url) 51 | send_slack_message( 52 | setting_dict['slack_token'], 53 | setting_dict['slack_channel'], 54 | message) 55 | 56 | # Tweet message 57 | send_twitter_message( 58 | setting_dict['twitter_consumer_key'], 59 | setting_dict['twitter_consumer_secret'], 60 | setting_dict['twitter_access_token'], 61 | setting_dict['twitter_access_token_secret'], 62 | message) 63 | 64 | logger(__name__).info("Successfully finished.") 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /src/main4Pubmed.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from logging_utils import logger 3 | from argparser_utils import get_argument 4 | 5 | import access_sqlite3 6 | from checkPubmedRSS import check_RSS 7 | from checkAltmetrics import check_altmetrics 8 | from sendSlackMessage import send_slack_message 9 | from sendTwitterMessage import send_twitter_message 10 | 11 | 12 | def main(): 13 | # Get setting file 14 | setting_dict = get_argument() 15 | logger(__name__).info(setting_dict) 16 | 17 | # Parse RSS feed 18 | logger(__name__).info("Start Parsing RSS feed...") 19 | RSS_data_list = [] 20 | for link in setting_dict['pubmed_rss_link'].values(): 21 | RSS_data_list.extend(check_RSS(link)) 22 | 23 | # Create sqlite3 database if not exists 24 | sqlite3_file = "../db/storeAltmetrics4PubMed.sqlite3" 25 | if not access_sqlite3.create_tables(sqlite3_file): 26 | return 27 | 28 | # Insert new target articles into sqlite3 db 29 | logger(__name__).info("Insert new target articles into sqlite3 db.") 30 | if not access_sqlite3.insert_new_doi(sqlite3_file, RSS_data_list): 31 | return 32 | 33 | # Get all target articles for checking altmetrics score 34 | logger(__name__).info( 35 | "Get all target articles for checking altmetrics score.") 36 | target_doi_list = access_sqlite3.select_target_doi(sqlite3_file) 37 | 38 | # Get altmetric score for each article 39 | for doi_info in target_doi_list: 40 | logger(__name__).info("Get altmetric score for " + doi_info.doi) 41 | altmetrics_data = check_altmetrics(doi_info) 42 | if altmetrics_data == None: 43 | continue 44 | 45 | # Insert scores into sqlite3 db 46 | logger(__name__).info("Insert scores into sqlite3 db.") 47 | access_sqlite3.insert_altmetric_score( 48 | sqlite3_file, doi_info.doi, altmetrics_data) 49 | 50 | # Send a message to SNS 51 | if altmetrics_data.flg == 1: 52 | try: 53 | message = """{0}\n{1}\n""".format(doi_info.title, doi_info.url) 54 | send_slack_message( 55 | setting_dict['slack_token'], 56 | setting_dict['slack_channel'], 57 | message) 58 | 59 | # Tweet message 60 | send_twitter_message( 61 | setting_dict['twitter_consumer_key_pubmed'], 62 | setting_dict['twitter_consumer_secret_pubmed'], 63 | setting_dict['twitter_access_token_pubmed'], 64 | setting_dict['twitter_access_token_secret_pubmed'], 65 | message) 66 | except: 67 | logger(__name__).error( 68 | "Fail to send a message to SNS " + doi_info.doi) 69 | 70 | logger(__name__).info("Successfully finished.") 71 | 72 | 73 | if __name__ == '__main__': 74 | main() 75 | -------------------------------------------------------------------------------- /src/checkAltmetrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import altmetric_utils 3 | from logging_utils import logger 4 | from time import sleep 5 | from datetime import datetime 6 | 7 | 8 | def check_altmetrics(doi_info): 9 | try: 10 | # Get altmetric score 11 | sleep(1) # Escaping hammer altmetric server 12 | doi = doi_info.doi 13 | altmetric_api = altmetric_utils.Altmetric() 14 | response = altmetric_api.doi(doi) 15 | 16 | if response: 17 | logger(__name__).info("Get altmetrics score for " + doi) 18 | # Check altmetric score (pct: >=90) 19 | try: 20 | if response["context"]['journal']['pct'] >= 90: 21 | flg = 1 22 | else: 23 | flg = 0 24 | 25 | pct = response["context"]['journal']['pct'] 26 | except KeyError: 27 | flg = 0 28 | pct = 0 29 | logger(__name__).error( 30 | "Fail to getting PCT for " + doi) 31 | 32 | # Check elasped date 33 | date = str(doi_info.date).split("-") 34 | updated_date = datetime(int(date[0]), int(date[1]), int(date[2])) 35 | elasped_date = (datetime.now() - updated_date).days 36 | if elasped_date > 30: 37 | flg = -1 38 | 39 | try: 40 | altmetric_score = response["score"] 41 | except: 42 | altmetric_score = 0 43 | logger(__name__).error( 44 | "Fail to getting altmetrics score for " + doi) 45 | 46 | return altmetrics_data(altmetric_score=altmetric_score, 47 | pct=pct, 48 | flg=flg) 49 | else: 50 | logger(__name__).error( 51 | "Fail to getting altmetrics score for " + doi) 52 | return altmetrics_data(altmetric_score=0, pct=0, flg=0) 53 | 54 | except altmetric_utils.AltmetricHTTPException as e: 55 | if e.status_code == 403: 56 | logger(__name__).error( 57 | "You aren't authorized for this call.") 58 | logger(__name__).error(e.msg) 59 | elif e.status_code == 420: 60 | logger(__name__).error( 61 | "You are being rate limited.") 62 | logger(__name__).error(e.msg) 63 | elif e.status_code == 502: 64 | logger(__name__).error( 65 | "The API version you are using is currently down for maintenance.") 66 | logger(__name__).error(e.msg) 67 | elif e.status_code == 404: 68 | logger(__name__).error( 69 | "Altmetric doesn't have any details for the article or set of articles you requested.") 70 | logger(__name__).error(e.msg) 71 | return None 72 | 73 | 74 | class altmetrics_data(object): 75 | def __init__(self, altmetric_score, pct, flg): 76 | self.altmetric_score = altmetric_score 77 | self.pct = pct 78 | self.flg = flg 79 | -------------------------------------------------------------------------------- /src/access_sqlite3.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from logging_utils import logger 3 | 4 | 5 | def create_tables(sqlite3_file): 6 | """ 7 | Try to create new tables if not exists. 8 | :param sqlite3_file: sqlite3 database file 9 | :return: Boolean 10 | """ 11 | try: 12 | with sqlite3.connect(sqlite3_file) as conn: 13 | c = conn.cursor() 14 | 15 | # Create biorxiv_altmetrics_log table 16 | sql = """CREATE TABLE IF NOT EXISTS biorxiv_altmetrics_log 17 | (doi TEXT, 18 | title TEXT, 19 | link TEXT, 20 | update_date TEXT, 21 | altmetric_score INTEGER, 22 | altmetric_pct INTEGER, 23 | altmetric_flg INTEGER, 24 | PRIMARY KEY(doi) 25 | )""" 26 | c.execute(sql) 27 | conn.commit() 28 | 29 | return True 30 | 31 | except sqlite3.Error as e: 32 | logger(__name__).error(e) 33 | return False 34 | 35 | 36 | def insert_new_doi(sqlite3_file, RSS_data_list): 37 | """ 38 | Try to insert new doi into sqlite3 database. 39 | :param sqlite3_file: sqlite3 database file 40 | :param RSS_data_list: RSS data list 41 | :return: boolean 42 | """ 43 | try: 44 | with sqlite3.connect(sqlite3_file) as conn: 45 | c = conn.cursor() 46 | 47 | # Insert article info into biorxiv_altmetrics_log if not already exists 48 | sql = """INSERT OR IGNORE INTO biorxiv_altmetrics_log 49 | VALUES(?,?,?,?,?,?,?)""" 50 | doi_info = [tuple([p.doi, p.title, p.url, p.date, 0, 0, 0]) 51 | for p in RSS_data_list] 52 | c.executemany(sql, doi_info) 53 | conn.commit() 54 | 55 | return True 56 | 57 | except sqlite3.Error as e: 58 | logger(__name__).error(e) 59 | return False 60 | 61 | 62 | def select_target_doi(sqlite3_file): 63 | """ 64 | try to select target doi from biorxiv_altmetrics_log. 65 | :param sqlite3_file: sqlite3 database file 66 | :return: target doi list 67 | """ 68 | try: 69 | with sqlite3.connect(sqlite3_file) as conn: 70 | c = conn.cursor() 71 | 72 | # Select target doi from biorxiv_altmetrics_log 73 | sql = """SELECT doi, title, link, update_date from biorxiv_altmetrics_log 74 | WHERE altmetric_flg = 0""" 75 | c.execute(sql) 76 | 77 | # Store doi data as target_doi_data object 78 | target_doi_list = [] 79 | for doi_info in c.fetchall(): 80 | target_doi_list.append(target_doi_data( 81 | doi=doi_info[0], title=doi_info[1], url=doi_info[2], date=doi_info[3])) 82 | 83 | return target_doi_list 84 | 85 | except sqlite3.Error as e: 86 | logger(__name__).error(e) 87 | return [] 88 | 89 | 90 | def insert_altmetric_score(sqlite3_file, doi, altmetrics_data): 91 | """ 92 | Try to insert altmetric score into biorxiv_altmetrics_log 93 | :param sqlite3_file: sqlite3 database file 94 | :return: boolean 95 | """ 96 | try: 97 | with sqlite3.connect(sqlite3_file) as conn: 98 | c = conn.cursor() 99 | 100 | # insert altmetric score into biorxiv_altmetrics_log 101 | sql = """UPDATE biorxiv_altmetrics_log 102 | SET altmetric_score = ?, 103 | altmetric_pct = ?, 104 | altmetric_flg = ? 105 | WHERE doi = ?""" 106 | c.execute(sql, tuple([altmetrics_data.altmetric_score, 107 | altmetrics_data.pct, altmetrics_data.flg, 108 | doi])) 109 | conn.commit() 110 | 111 | return True 112 | 113 | except sqlite3.Error as e: 114 | logger(__name__).error(e) 115 | return False 116 | 117 | 118 | class target_doi_data(object): 119 | def __init__(self, doi, title, url, date): 120 | self.doi = doi 121 | self.title = title 122 | self.url = url 123 | self.date = date 124 | --------------------------------------------------------------------------------