├── src
    ├── __init__.py
    ├── startup.sh
    ├── startup4Pubmed.sh
    ├── startup4RaspberryPi.sh
    ├── startup4RaspberryPi4Pubmed.sh
    ├── cron.conf
    ├── sendTwitterMessage.py
    ├── argparser_utils.py
    ├── logging_utils.py
    ├── setup.sh
    ├── HTMLParser_utils.py
    ├── sendSlackMessage.py
    ├── checkBioRxivRSS.py
    ├── test.yaml
    ├── checkPubmedRSS.py
    ├── altmetric_utils.py
    ├── main.py
    ├── main4Pubmed.py
    ├── checkAltmetrics.py
    └── access_sqlite3.py
├── log
    └── README.md
├── db
    └── README.md
├── LICENSE
├── .gitignore
└── README.md


/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/log/README.md:
--------------------------------------------------------------------------------
1 | ### Save Batch log files.
2 | 


--------------------------------------------------------------------------------
/db/README.md:
--------------------------------------------------------------------------------
1 | ### Store sqlite3 database files automatically.
2 | 


--------------------------------------------------------------------------------
/src/startup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python ./main.py --yaml_setting_file ./production.yaml
4 | 


--------------------------------------------------------------------------------
/src/startup4Pubmed.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python ./main4Pubmed.py --yaml_setting_file ./production.yaml
4 | 


--------------------------------------------------------------------------------
/src/startup4RaspberryPi.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd /home/pi/Desktop/BioRxivCurator/src
3 | /home/pi/miniconda3/bin/python ./main.py --yaml_setting_file ./production.yaml
4 | 


--------------------------------------------------------------------------------
/src/startup4RaspberryPi4Pubmed.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd /home/pi/Desktop/BioRxivCurator/src
3 | /home/pi/miniconda3/bin/python ./main4Pubmed.py --yaml_setting_file ./production.yaml
4 | 


--------------------------------------------------------------------------------
/src/cron.conf:
--------------------------------------------------------------------------------
1 | 0 11 * * * bash /home/pi/Desktop/BioRxivCurator/src/startup4RaspberryPi.sh > /home/pi/Desktop/error.txt 2>&1
2 | 0 21 * * * bash /home/pi/Desktop/BioRxivCurator/src/startup4RaspberryPi4Pubmed.sh > /home/pi/Desktop/error4PubMed.txt 2>&1
3 | 


--------------------------------------------------------------------------------
/src/sendTwitterMessage.py:
--------------------------------------------------------------------------------
 1 | import tweepy
 2 | 
 3 | 
 4 | def send_twitter_message(consumer_key, consumer_secret, access_token, access_secret, message):
 5 |     """
 6 |     Simple wrapper for sending a Twitter message.
 7 |     """
 8 |     auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
 9 |     auth.set_access_token(access_token, access_secret)
10 |     api = tweepy.API(auth)
11 |     api.update_status(message)
12 | 


--------------------------------------------------------------------------------
/src/argparser_utils.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | import yaml
 3 | 
 4 | 
 5 | def get_argument():
 6 |     argparser = ArgumentParser()
 7 |     argparser.add_argument('--yaml_setting_file', type=str,
 8 |                            help="yaml setting file", required=True)
 9 |     args = argparser.parse_args()
10 | 
11 |     with open(args.yaml_setting_file) as file:
12 |         obj = yaml.load(file)
13 | 
14 |     return obj
15 | 


--------------------------------------------------------------------------------
/src/logging_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | def logger(moduleName):
 7 |     """
 8 |     Setup a log
 9 |     """
10 |     logger = logging.getLogger(moduleName)
11 |     logging.basicConfig(level=logging.DEBUG,
12 |                         filename="../log/log_" +
13 |                         datetime.now().strftime("%Y%m%d%H%M%S") + ".txt",
14 |                         format="%(asctime)s : %(levelname)s : %(module)s : %(funcName)s : %(message)s")
15 |     return logger
16 | 


--------------------------------------------------------------------------------
/src/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | sudo curl -sSL https://repo.continuum.io/miniconda/Miniconda-3.16.0-Linux-armv7l.sh -o /tmp/miniconda.sh
 3 | bash /tmp/miniconda.sh -bfp /home/pi/miniconda3
 4 | sudo rm -rf /tmp/miniconda.sh
 5 | echo 'export PATH="/home/pi/miniconda3/bin:$PATH"' >> ~/.bashrc
 6 | source ~/.bashrc
 7 | conda install -y python=2
 8 | conda update conda
 9 | pip install feedparser
10 | pip install pyyaml
11 | pip install slackclient
12 | pip install tweepy
13 | 
14 | # Install sqlite3 client app
15 | sudo apt-get install sqlitebrowser


--------------------------------------------------------------------------------
/src/HTMLParser_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import feedparser
 3 | from HTMLParser import HTMLParser
 4 | import re
 5 | import requests
 6 | 
 7 | 
 8 | class parser(HTMLParser):
 9 |     def __init__(self):
10 |         HTMLParser.__init__(self)
11 |         self.flag = False
12 |         self.link = ""
13 | 
14 |     def handle_starttag(self, tag, attrs):
15 |         attrs = dict(attrs)
16 |         if tag == "a" and re.match("^//doi.org", attrs["href"]):
17 |             self.flag = True
18 |             self.link = attrs["href"].replace("//doi.org/", "")
19 | 
20 |     def handle_data(self, data):
21 |         if self.flag:
22 |             self.flag = False
23 | 
24 | 
25 | def getDOI(link):
26 |     r = requests.get(link)
27 |     test = parser()
28 |     test.feed(r.text)
29 |     return test.link
30 | 


--------------------------------------------------------------------------------
/src/sendSlackMessage.py:
--------------------------------------------------------------------------------
 1 | from slackclient import SlackClient
 2 | from logging_utils import logger
 3 | from time import sleep
 4 | 
 5 | 
 6 | def send_slack_message(slack_token, channel, message):
 7 |     """
 8 |     Simple wrapper for sending a Slack message.
 9 |     """
10 |     sc = SlackClient(slack_token)
11 |     response = sc.api_call(
12 |         "chat.postMessage",
13 |         channel=channel,
14 |         text=message
15 |     )
16 | 
17 |     # Check to see if the message sent successfully
18 |     if response["ok"]:
19 |         logger(__name__).info(
20 |             "Message posted successfully: " + response["message"]["ts"])
21 | 
22 |     # If the message failed, check for rate limit headers in the response
23 |     elif response["ok"] is False and response["headers"]["Retry-After"]:
24 |         delay = int(response["headers"]["Retry-After"])
25 |         logger(__name__).warning(
26 |             "Rate limited. Retrying in " + str(delay) + " seconds")
27 |         sleep(delay)
28 |         response = sc.api_call(
29 |             "chat.postMessage",
30 |             channel=channel,
31 |             text=message
32 |         )
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Naoto Imamachi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/checkBioRxivRSS.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import feedparser
 3 | from logging_utils import logger
 4 | 
 5 | 
 6 | def check_RSS(subjects):
 7 |     """
 8 |     Check the RSS feed of BioRxiv
 9 |     :param subjects: subject categories
10 |     :return: RSS data list
11 |     """
12 |     # Get & Parse RSS
13 |     feed = feedparser.parse(
14 |         "http://connect.biorxiv.org/biorxiv_xml.php?subject={0}".format("+".join(subjects)))
15 | 
16 |     rss_data_list = []    # RSS data list object
17 |     if feed.bozo == 1:
18 |         logger(__name__).error(feed.bozo_exception)
19 |         logger(__name__).error("Failed to reach the feed.")
20 |     else:
21 |         for pub in feed["items"]:
22 |             rss_data_list.append(
23 |                 RSS_data(doi=pub["dc_identifier"],
24 |                          title=pub["title"],
25 |                          url=pub["link"].split('?')[0],
26 |                          date=pub["updated"]))
27 |     return rss_data_list
28 | 
29 | 
30 | class RSS_data(object):
31 |     def __init__(self, doi, title, url, date):
32 |         self.doi = doi
33 |         self.title = title
34 |         self.url = url
35 |         self.date = date
36 | 


--------------------------------------------------------------------------------
/src/test.yaml:
--------------------------------------------------------------------------------
 1 | rss_categories: ['genomics', 'bioinformatics']
 2 | pubmed_rss_link:
 3 |   nature_communications: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1VSjW0JqT_vUo3cx35p0JWrF-tg9pTvR_cXhuez1oIpZtSX9y1'
 4 |   nature_biotechnology: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1N5mTdq-XQ44UN5eRYLyumWS-YatO6PiWd4YMZN_9xSIi7DQpO
 5 |   nature_methods: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1loZZVJsOLJ-gJdcPOEy3dMRCwYVwe8Ddp5bzKnwIX-ZEjLEjc
 6 |   nature_genetics: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1LmeR7JfcIZ8ekn37Saorp-EvwSleIgr-kuPrH5u_qvUf7_54b
 7 |   molecular_cell: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=18ervbTh5APRvfjolw6T8mvS9kKue9zImyx16MWRfKe1t-HABw
 8 |   elife: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1J9m0S5PiOKnPlEQbwldXX7FyOKGNPzWaxHS3P13W6BUPLH1Ya
 9 |   genome_research: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1Jau6enPigEWTTCNUZHwEQVsdJQTOhhfBVTFz--6RM6BvBk5J0
10 |   genes_and_development: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1FSk0ACuTgzTVKNDrRt684lNVMdL43mTkuWkT0YqSwWU1v5kzC
11 | slack_token: 'xxxxxxxx'
12 | slack_channel: '@xxxxxxx'
13 | twitter_consumer_key: xxxxxxxx
14 | twitter_consumer_secret: xxxxxxxx
15 | twitter_access_token: xxxxxxxx
16 | twitter_access_token_secret: xxxxxxxx
17 | twitter_consumer_key_pubmed: xxxxxxxx
18 | twitter_consumer_secret_pubmed: xxxxxxxx
19 | twitter_access_token_pubmed: xxxxxxxx
20 | twitter_access_token_secret_pubmed: xxxxxxxx


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 
103 | # sqlite3 db
104 | *.sqlite3
105 | 
106 | # SNS token files
107 | *.tkn
108 | 
109 | # log files
110 | log_*.txt
111 | 
112 | # yaml file
113 | production.yaml


--------------------------------------------------------------------------------
/src/checkPubmedRSS.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import feedparser
 3 | from logging_utils import logger
 4 | from HTMLParser_utils import getDOI
 5 | from datetime import datetime
 6 | from time import sleep
 7 | 
 8 | 
 9 | # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/erss.cgi?rss_guid=1ROYcHRjBNrxpwoceMUwxUyyF6uHjjPfuA44ekGfuKxDQTKIQE
10 | # (((((((("Nature biotechnology"[Journal]) OR "Nature methods"[Journal]) OR "Nature genetics"[Journal]) OR "Molecular cell"[Journal]) OR "eLife"[Journal]) OR "PLoS biology"[Journal]) OR "Genome research"[Journal]) OR "Genes & development"[Journal]) OR "Nature cell biology"[Journal]
11 | def check_RSS(url):
12 |     """
13 |     Check the RSS feed of PubMed.
14 |     - Nature biotechnology
15 |     - Nature methods
16 |     - Nature genetics
17 |     - Molecular cell
18 |     - eLife
19 |     - PLoS biology
20 |     - Genome research
21 |     - Genes & development
22 |     - Nature cell biology
23 | 
24 |     :param subjects: subject categories
25 |     :return: RSS data list
26 |     """
27 |     # Get & Parse RSS
28 |     feed = feedparser.parse(url)
29 | 
30 |     rss_data_list = []    # RSS data list object
31 |     if feed.bozo == 1:
32 |         logger(__name__).error(feed.bozo_exception)
33 |         logger(__name__).error("Failed to reach the feed.")
34 |     else:
35 |         for pub in feed["items"]:
36 |             link = pub["link"].split('?')[0]
37 |             doi = getDOI(link)
38 |             rss_data_list.append(
39 |                 RSS_data(doi=doi,
40 |                          title=pub["title"],
41 |                          url="https://doi.org/{0}".format(doi),
42 |                          date=datetime.now().strftime("%Y-%m-%d")))
43 |             sleep(0.5)
44 |     return rss_data_list
45 | 
46 | 
47 | class RSS_data(object):
48 |     def __init__(self, doi, title, url, date):
49 |         self.doi = doi
50 |         self.title = title
51 |         self.url = url
52 |         self.date = date
53 | 


--------------------------------------------------------------------------------
/src/altmetric_utils.py:
--------------------------------------------------------------------------------
 1 | # Altmetric API Wrapper
 2 | # Source: https://github.com/lnielsen/python-altmetric
 3 | 
 4 | import requests
 5 | 
 6 | try:
 7 |     import json
 8 | except ImportError:
 9 |     import simplejson as json
10 | 
11 | 
12 | class AltmetricException(Exception):
13 |     pass
14 | 
15 | 
16 | class AltmetricHTTPException(AltmetricException):
17 |     def __init__(self, status_code, msg):
18 |         self.status_code = status_code
19 |         self.msg = msg
20 | 
21 | 
22 | class ParseException(AltmetricException):
23 |     pass
24 | 
25 | 
26 | class Altmetric(object):
27 |     def __init__(self, apikey='', apiver='v1'):
28 |         """
29 |         Cache API key and address.
30 |         """
31 |         self.apikey = apikey
32 |         self.apiver = apiver
33 |         self.default_params = {}
34 | 
35 |         if self.apikey:
36 |             self.default_params = {'key': apikey}
37 | 
38 |         self.api_url = "https://api.altmetric.com/{0}".format(self.apiver)
39 | 
40 |     def __repr__(self):
41 |         if self.apikey:
42 |             return '<Altmetric {0}: {1}>'.format(self.apiver, self.apikey)
43 |         else:
44 |             return '<Altmetric {0}>'.format(self.apiver)
45 | 
46 |     def call(self, method, *args, **kwargs):
47 |         url = "{0}/{1}/{2}".format(self.api_url,
48 |                                    method, "/".join([a for a in args]))
49 |         # Parameter
50 |         params = kwargs or {}
51 |         params.update(self.default_params)
52 | 
53 |         headers = {}
54 | 
55 |         # GET request
56 |         req = requests.get(url, params=params, headers=headers)
57 | 
58 |         # Success
59 |         if req.status_code == 200:
60 |             try:
61 |                 return json.loads(req.text)
62 |             except ValueError as e:
63 |                 raise ParseException(e.message)
64 |         elif req.status_code == 404 and req.text == "Not Found":
65 |             return None
66 |         else:
67 |             raise AltmetricHTTPException(req.status_code, req.text)
68 | 
69 |     def __getattr__(self, method_name):
70 |         def get(self, *args, **kwargs):
71 |             return self.call(method_name, *args, **kwargs)
72 |         return get.__get__(self)
73 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BioRxivCurator
 2 | 
 3 | BioRxiv article curation batch scripts using Altmetrics data.  
 4 | Altmetrics data is provided by Altmetric.com, a research metrics company who track and collect the online conversations around millions of scholarly outputs.
 5 | Further information about how the Altmetric Attention Score is calculated is available [here](https://www.altmetric.com/about-altmetrics/the-donut-and-score/).
 6 | 
 7 | ## Sample Twitter bot
 8 | 
 9 | The following twitter account is a sample bot for tweeting curated BioRxiv articles using Altmetrics data.  
10 | https://twitter.com/BioRxivCurator
11 | 
12 | ## Requirements and Installation
13 | 
14 | ### Raspberry Pi
15 | 
16 | I recommend using `setup.sh` script to set up your environment on Raspbian OS.
17 | This script automatically create python environment and install sqlite3 client app.
18 | 
19 | ```bash
20 | $ sudo bash ./src/setup.py
21 | ```
22 | 
23 | I tested this script on Raspberry Pi 3 ModelB (Raspbian Stretch with Desktop).
24 | The following show what to do in this script.
25 | 
26 | #### Installing python modules
27 | 
28 | ```bash
29 | $ pip install feedparser
30 | $ pip install pyyaml
31 | $ pip install slackclient
32 | $ pip install tweepy
33 | ```
34 | 
35 | #### Installing sqlite3 client app(Option)
36 | 
37 | If you want to see stored data from a GUI, I recommend you to install sqlite3 client app named DB Browser for SQLite.
38 | 
39 | ```bash
40 | $ sudo apt-get install sqlitebrowser
41 | ```
42 | 
43 | ## Preparation of slack and twitter access token
44 | 
45 | `./src/production.yaml` are needed to run BioRxivCurator.  
46 | rss_categories is set to several categories. Check adaptive categories for BioRxiv RSS feed.  
47 | https://www.biorxiv.org/alertsrss
48 | 
49 | The values of slack_token, slack_channel, twitter_consumer_key, twitter_consumer_secret, twitter_access_token and twitter_access_token_secret are replaced with yours.
50 | 
51 | ```
52 | rss_categories: ['genomics', 'bioinformatics']
53 | slack_token: 'xxxxxxxx'
54 | slack_channel: '@xxxxxxx'
55 | twitter_consumer_key: xxxxxxxx
56 | twitter_consumer_secret: xxxxxxxx
57 | twitter_access_token: xxxxxxxx
58 | twitter_access_token_secret: xxxxxxxx
59 | ```
60 | 
61 | ## Basic Usage
62 | 
63 | Run BioRxivCurator following the main script.
64 | 
65 | ```
66 | $ python ./src/startup.sh
67 | ```
68 | 


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from logging_utils import logger
 3 | from argparser_utils import get_argument
 4 | 
 5 | import access_sqlite3
 6 | from checkBioRxivRSS import check_RSS
 7 | from checkAltmetrics import check_altmetrics
 8 | from sendSlackMessage import send_slack_message
 9 | from sendTwitterMessage import send_twitter_message
10 | 
11 | 
12 | def main():
13 |     # Get setting file
14 |     setting_dict = get_argument()
15 |     logger(__name__).info(setting_dict)
16 | 
17 |     # Parse RSS feed
18 |     logger(__name__).info("Start Parsing RSS feed...")
19 |     RSS_data_list = check_RSS(setting_dict['rss_categories'])
20 | 
21 |     # Create sqlite3 database if not exists
22 |     sqlite3_file = "../db/storeAltmetrics.sqlite3"
23 |     if not access_sqlite3.create_tables(sqlite3_file):
24 |         return
25 | 
26 |     # Insert new target articles into sqlite3 db
27 |     logger(__name__).info("Insert new target articles into sqlite3 db.")
28 |     if not access_sqlite3.insert_new_doi(sqlite3_file, RSS_data_list):
29 |         return
30 | 
31 |     # Get all target articles for checking altmetrics score
32 |     logger(__name__).info(
33 |         "Get all target articles for checking altmetrics score.")
34 |     target_doi_list = access_sqlite3.select_target_doi(sqlite3_file)
35 | 
36 |     # Get altmetric score for each article
37 |     for doi_info in target_doi_list:
38 |         logger(__name__).info("Get altmetric score for " + doi_info.doi)
39 |         altmetrics_data = check_altmetrics(doi_info)
40 |         if altmetrics_data == None:
41 |             continue
42 | 
43 |         # Insert scores into sqlite3 db
44 |         logger(__name__).info("Insert scores into sqlite3 db.")
45 |         access_sqlite3.insert_altmetric_score(
46 |             sqlite3_file, doi_info.doi, altmetrics_data)
47 | 
48 |         # Send a message to SNS
49 |         if altmetrics_data.flg == 1:
50 |             message = """{0}\n{1}\n""".format(doi_info.title, doi_info.url)
51 |             send_slack_message(
52 |                 setting_dict['slack_token'],
53 |                 setting_dict['slack_channel'],
54 |                 message)
55 | 
56 |             # Tweet message
57 |             send_twitter_message(
58 |                 setting_dict['twitter_consumer_key'],
59 |                 setting_dict['twitter_consumer_secret'],
60 |                 setting_dict['twitter_access_token'],
61 |                 setting_dict['twitter_access_token_secret'],
62 |                 message)
63 | 
64 |     logger(__name__).info("Successfully finished.")
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/src/main4Pubmed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from logging_utils import logger
 3 | from argparser_utils import get_argument
 4 | 
 5 | import access_sqlite3
 6 | from checkPubmedRSS import check_RSS
 7 | from checkAltmetrics import check_altmetrics
 8 | from sendSlackMessage import send_slack_message
 9 | from sendTwitterMessage import send_twitter_message
10 | 
11 | 
12 | def main():
13 |     # Get setting file
14 |     setting_dict = get_argument()
15 |     logger(__name__).info(setting_dict)
16 | 
17 |     # Parse RSS feed
18 |     logger(__name__).info("Start Parsing RSS feed...")
19 |     RSS_data_list = []
20 |     for link in setting_dict['pubmed_rss_link'].values():
21 |         RSS_data_list.extend(check_RSS(link))
22 | 
23 |     # Create sqlite3 database if not exists
24 |     sqlite3_file = "../db/storeAltmetrics4PubMed.sqlite3"
25 |     if not access_sqlite3.create_tables(sqlite3_file):
26 |         return
27 | 
28 |     # Insert new target articles into sqlite3 db
29 |     logger(__name__).info("Insert new target articles into sqlite3 db.")
30 |     if not access_sqlite3.insert_new_doi(sqlite3_file, RSS_data_list):
31 |         return
32 | 
33 |     # Get all target articles for checking altmetrics score
34 |     logger(__name__).info(
35 |         "Get all target articles for checking altmetrics score.")
36 |     target_doi_list = access_sqlite3.select_target_doi(sqlite3_file)
37 | 
38 |     # Get altmetric score for each article
39 |     for doi_info in target_doi_list:
40 |         logger(__name__).info("Get altmetric score for " + doi_info.doi)
41 |         altmetrics_data = check_altmetrics(doi_info)
42 |         if altmetrics_data == None:
43 |             continue
44 | 
45 |         # Insert scores into sqlite3 db
46 |         logger(__name__).info("Insert scores into sqlite3 db.")
47 |         access_sqlite3.insert_altmetric_score(
48 |             sqlite3_file, doi_info.doi, altmetrics_data)
49 | 
50 |         # Send a message to SNS
51 |         if altmetrics_data.flg == 1:
52 |             try:
53 |                 message = """{0}\n{1}\n""".format(doi_info.title, doi_info.url)
54 |                 send_slack_message(
55 |                     setting_dict['slack_token'],
56 |                     setting_dict['slack_channel'],
57 |                     message)
58 | 
59 |                 # Tweet message
60 |                 send_twitter_message(
61 |                     setting_dict['twitter_consumer_key_pubmed'],
62 |                     setting_dict['twitter_consumer_secret_pubmed'],
63 |                     setting_dict['twitter_access_token_pubmed'],
64 |                     setting_dict['twitter_access_token_secret_pubmed'],
65 |                     message)
66 |             except:
67 |                 logger(__name__).error(
68 |                 "Fail to send a message to SNS " + doi_info.doi)
69 | 
70 |     logger(__name__).info("Successfully finished.")
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     main()
75 | 


--------------------------------------------------------------------------------
/src/checkAltmetrics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import altmetric_utils
 3 | from logging_utils import logger
 4 | from time import sleep
 5 | from datetime import datetime
 6 | 
 7 | 
 8 | def check_altmetrics(doi_info):
 9 |     try:
10 |         # Get altmetric score
11 |         sleep(1)    # Escaping hammer altmetric server
12 |         doi = doi_info.doi
13 |         altmetric_api = altmetric_utils.Altmetric()
14 |         response = altmetric_api.doi(doi)
15 | 
16 |         if response:
17 |             logger(__name__).info("Get altmetrics score for " + doi)
18 |             # Check altmetric score (pct: >=90)
19 |             try:
20 |                 if response["context"]['journal']['pct'] >= 90:
21 |                     flg = 1
22 |                 else:
23 |                     flg = 0
24 | 
25 |                 pct = response["context"]['journal']['pct']
26 |             except KeyError:
27 |                 flg = 0
28 |                 pct = 0
29 |                 logger(__name__).error(
30 |                 "Fail to getting PCT for " + doi)
31 | 
32 |             # Check elasped date
33 |             date = str(doi_info.date).split("-")
34 |             updated_date = datetime(int(date[0]), int(date[1]), int(date[2]))
35 |             elasped_date = (datetime.now() - updated_date).days
36 |             if elasped_date > 30:
37 |                 flg = -1
38 | 
39 |             try:
40 |                 altmetric_score = response["score"]
41 |             except:
42 |                 altmetric_score = 0
43 |                 logger(__name__).error(
44 |                 "Fail to getting altmetrics score for " + doi)
45 | 
46 |             return altmetrics_data(altmetric_score=altmetric_score,
47 |                                    pct=pct,
48 |                                    flg=flg)
49 |         else:
50 |             logger(__name__).error(
51 |                 "Fail to getting altmetrics score for " + doi)
52 |             return altmetrics_data(altmetric_score=0, pct=0, flg=0)
53 | 
54 |     except altmetric_utils.AltmetricHTTPException as e:
55 |         if e.status_code == 403:
56 |             logger(__name__).error(
57 |                 "You aren't authorized for this call.")
58 |             logger(__name__).error(e.msg)
59 |         elif e.status_code == 420:
60 |             logger(__name__).error(
61 |                 "You are being rate limited.")
62 |             logger(__name__).error(e.msg)
63 |         elif e.status_code == 502:
64 |             logger(__name__).error(
65 |                 "The API version you are using is currently down for maintenance.")
66 |             logger(__name__).error(e.msg)
67 |         elif e.status_code == 404:
68 |             logger(__name__).error(
69 |                 "Altmetric doesn't have any details for the article or set of articles you requested.")
70 |             logger(__name__).error(e.msg)
71 |         return None
72 | 
73 | 
74 | class altmetrics_data(object):
75 |     def __init__(self, altmetric_score, pct, flg):
76 |         self.altmetric_score = altmetric_score
77 |         self.pct = pct
78 |         self.flg = flg
79 | 


--------------------------------------------------------------------------------
/src/access_sqlite3.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | from logging_utils import logger
  3 | 
  4 | 
  5 | def create_tables(sqlite3_file):
  6 |     """
  7 |     Try to create new tables if not exists.
  8 |     :param sqlite3_file: sqlite3 database file
  9 |     :return: Boolean
 10 |     """
 11 |     try:
 12 |         with sqlite3.connect(sqlite3_file) as conn:
 13 |             c = conn.cursor()
 14 | 
 15 |             # Create biorxiv_altmetrics_log table
 16 |             sql = """CREATE TABLE IF NOT EXISTS biorxiv_altmetrics_log
 17 |                     (doi TEXT,
 18 |                      title TEXT,
 19 |                      link TEXT,
 20 |                      update_date TEXT,
 21 |                      altmetric_score INTEGER,
 22 |                      altmetric_pct INTEGER,
 23 |                      altmetric_flg INTEGER,
 24 |                      PRIMARY KEY(doi)
 25 |                     )"""
 26 |             c.execute(sql)
 27 |             conn.commit()
 28 | 
 29 |         return True
 30 | 
 31 |     except sqlite3.Error as e:
 32 |         logger(__name__).error(e)
 33 |         return False
 34 | 
 35 | 
 36 | def insert_new_doi(sqlite3_file, RSS_data_list):
 37 |     """
 38 |     Try to insert new doi into sqlite3 database.
 39 |     :param sqlite3_file: sqlite3 database file
 40 |     :param RSS_data_list: RSS data list
 41 |     :return: boolean
 42 |     """
 43 |     try:
 44 |         with sqlite3.connect(sqlite3_file) as conn:
 45 |             c = conn.cursor()
 46 | 
 47 |             # Insert article info into biorxiv_altmetrics_log if not already exists
 48 |             sql = """INSERT OR IGNORE INTO biorxiv_altmetrics_log
 49 |                      VALUES(?,?,?,?,?,?,?)"""
 50 |             doi_info = [tuple([p.doi, p.title, p.url, p.date, 0, 0, 0])
 51 |                         for p in RSS_data_list]
 52 |             c.executemany(sql, doi_info)
 53 |             conn.commit()
 54 | 
 55 |         return True
 56 | 
 57 |     except sqlite3.Error as e:
 58 |         logger(__name__).error(e)
 59 |         return False
 60 | 
 61 | 
 62 | def select_target_doi(sqlite3_file):
 63 |     """
 64 |     try to select target doi from biorxiv_altmetrics_log.
 65 |     :param sqlite3_file: sqlite3 database file
 66 |     :return: target doi list
 67 |     """
 68 |     try:
 69 |         with sqlite3.connect(sqlite3_file) as conn:
 70 |             c = conn.cursor()
 71 | 
 72 |             # Select target doi from biorxiv_altmetrics_log
 73 |             sql = """SELECT doi, title, link, update_date from biorxiv_altmetrics_log
 74 |                      WHERE altmetric_flg = 0"""
 75 |             c.execute(sql)
 76 | 
 77 |             # Store doi data as target_doi_data object
 78 |             target_doi_list = []
 79 |             for doi_info in c.fetchall():
 80 |                 target_doi_list.append(target_doi_data(
 81 |                     doi=doi_info[0], title=doi_info[1], url=doi_info[2], date=doi_info[3]))
 82 | 
 83 |         return target_doi_list
 84 | 
 85 |     except sqlite3.Error as e:
 86 |         logger(__name__).error(e)
 87 |         return []
 88 | 
 89 | 
 90 | def insert_altmetric_score(sqlite3_file, doi, altmetrics_data):
 91 |     """
 92 |     Try to insert altmetric score into biorxiv_altmetrics_log
 93 |     :param sqlite3_file: sqlite3 database file
 94 |     :return: boolean
 95 |     """
 96 |     try:
 97 |         with sqlite3.connect(sqlite3_file) as conn:
 98 |             c = conn.cursor()
 99 | 
100 |             # insert altmetric score into biorxiv_altmetrics_log
101 |             sql = """UPDATE biorxiv_altmetrics_log
102 |                      SET altmetric_score = ?,
103 |                           altmetric_pct = ?,
104 |                           altmetric_flg = ?
105 |                      WHERE doi = ?"""
106 |             c.execute(sql, tuple([altmetrics_data.altmetric_score,
107 |                                   altmetrics_data.pct, altmetrics_data.flg,
108 |                                   doi]))
109 |             conn.commit()
110 | 
111 |         return True
112 | 
113 |     except sqlite3.Error as e:
114 |         logger(__name__).error(e)
115 |         return False
116 | 
117 | 
118 | class target_doi_data(object):
119 |     def __init__(self, doi, title, url, date):
120 |         self.doi = doi
121 |         self.title = title
122 |         self.url = url
123 |         self.date = date
124 | 


--------------------------------------------------------------------------------