├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── ISSUE_TEMPLATE.md ├── LICENSE ├── README.rst ├── docs ├── Makefile ├── conf.py ├── documentation │ ├── client.rst │ └── utils.rst ├── index.rst └── make.bat ├── scripts └── wos ├── setup.py └── wos ├── __init__.py ├── client.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | 64 | # configuration file 65 | config.py 66 | 67 | # virtualenv 68 | /venv 69 | /venv3 70 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | 3 | # Required 4 | version: 2 5 | 6 | # Build documentation in the docs/ directory with Sphinx 7 | sphinx: 8 | builder: html 9 | configuration: docs/conf.py 10 | 11 | # Build the docs in additional formats such as PDF and ePub. 12 | formats: all 13 | 14 | # Install the requirements. 15 | python: 16 | version: 3.7 17 | install: 18 | - method: setuptools 19 | path: . 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '2.7' 4 | - '3.6' 5 | install: python setup.py install 6 | script: true 7 | before_deploy: 8 | - pip install restructuredtext_lint 9 | - pip install pygments 10 | - rst-lint README.rst 11 | deploy: 12 | provider: pypi 13 | skip_existing: true 14 | user: enrico.bacis 15 | password: 16 | secure: zad1tqcRlyNu3XniMlY/QjDROmQwVlNV5ABHqifpRBevaxJbvEl2Kax+p3NmWSg3D/j2luB6Gc4kQ97/HobQ4mfrVqk0TPoSbKyjPglsgyLIFmPCxlEYsOpi3gRq5kAbEL8OhQYIR+Dpzj80b0V95PZyeAVBlzBAakRmdyHOSbgt4jA/KNNcCGQgKKYpNFLWp5YGGnW+2yXerhApYzRdM0iO4FMYmzZyl7695ndnhUmSUcCJB1l+nA6fBdn4tiAFq3+IrM4l2BsfUyTZuR+X2NiGm1O23q5i/NY15QGnN53HgcaKgenYPSXrKzjAQYBk4QCvVOqi6H/BYx9edEiPHYIiLk1ULVVfWsyZTOS5OI6d1l1gTfvZJxmlBmZRP+xT9injnb3tQD/e4Est4tbzKjrzm9yDAmJuAN9S5SHOT8vYhhi1+xMCLNTsjKsCMRYNEodJjL8N5r7+jlnWhd6ep152f7Ddcg4BdIC/zQi/CEs2asAKsOCGWbCkjTOc+dbcDM12wiKgzYyV6YvXFeClfdN4weLPcS0R37b8rnH6IkvoImTSUkAhTirUQ9LS7yRwUdA8cb9vs/UgK5x0kV7OqdoAhQPIK9BmvEULY9NzHpnKZ0TnpvoCr4ix+9sXfZZskr9PeNa7aC9Jxbpwvt16E9R6PEpAuacyEtaputWYTnY= 17 | on: 18 | tags: true 19 | branch: master 20 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Prerequisites 2 | 3 | Make sure these boxes are checked before filling an issue 4 | 5 | * [ ] This issue is not the `No matches returned for Username or IP` 6 | connection problem detailed [here](https://github.com/enricobacis/wos#faq-i-cannot-connect-) 7 | * [ ] I can reproduce the problem 8 | * [ ] I am running the latest version 9 | 10 | ### Version 11 | 12 | [You can get this information from executing `pip show wos`] 13 | 14 | ### Description 15 | 16 | [Description of the bug or feature] 17 | 18 | ### Steps to Reproduce 19 | 20 | 1. [First Step] 21 | 2. [Second Step] 22 | 3. [and so on...] 23 | 24 | **Expected behavior:** [What you expected to happen] 25 | 26 | **Actual behavior:** [What actually happened] 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Enrico Bacis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | wos 2 | === 3 | 4 | *SOAP Client for querying the Web of Science database* 5 | 6 | Description 7 | ----------- 8 | 9 | |travis| |readthedocs| |license| |version| |downloads| |stars| 10 | 11 | .. |travis| image:: https://travis-ci.org/enricobacis/wos.svg?branch=master 12 | :target: https://travis-ci.org/enricobacis/wos 13 | .. |readthedocs| image:: https://readthedocs.org/projects/wos/badge/ 14 | :target: https://wos.readthedocs.io/ 15 | .. |license| image:: https://img.shields.io/github/license/enricobacis/wos 16 | :target: https://github.com/enricobacis/wos/blob/master/LICENSE 17 | .. |version| image:: https://img.shields.io/pypi/v/wos?color=blue 18 | :target: https://pypi.org/project/wos/ 19 | .. |downloads| image:: https://img.shields.io/pypi/dm/wos 20 | :target: https://pypi.org/project/wos/ 21 | .. |stars| image:: https://img.shields.io/github/stars/enricobacis/wos?style=social 22 | :target: https://github.com/enricobacis/wos 23 | 24 | Web of Science (previously Web of Knowledge) is an online subscription-based 25 | scientific citation indexing service maintained by Clarivate. 26 | 27 | ``wos`` is a python SOAP Client (both API and command-line tool) to query the 28 | WOS database in order to get XML data from a query using the WWS access. 29 | 30 | Installation 31 | ------------ 32 | 33 | The package has been uploaded to `PyPI`_, so you can 34 | install the package using pip: 35 | 36 | pip install wos 37 | 38 | Documentation 39 | ------------- 40 | 41 | This README and the documentation for the classes and methods can be accessed 42 | on `ReadTheDocs`_. 43 | 44 | Usage 45 | ----- 46 | 47 | You can use the ``wos`` command to query the Web of Science API. If you want to 48 | access data that needs to be accessed using the premium API, you also have to 49 | authenticate using your username and password. 50 | 51 | 52 | usage: wos [-h] [--close] [-l] [-u USER] [-p PASSWORD] [-s SID] 53 | {query,doi,connect} ... 54 | 55 | Query the Web of Science. 56 | 57 | positional arguments: 58 | {query,doi,connect} sub-command help 59 | query query the Web of Science. 60 | doi get the WOS ID from the DOI. 61 | connect connect and get an SID. 62 | 63 | optional arguments: 64 | -h, --help show this help message and exit 65 | --close Close session. 66 | --proxy PROXY HTTP proxy 67 | --timeout TIMEOUT API timeout 68 | -l, --lite Wos Lite 69 | -v, --verbose Verbose 70 | 71 | authentication: 72 | API credentials for premium access. 73 | 74 | -u USER, --user USER 75 | -p PASSWORD, --password PASSWORD 76 | -s SID, --sid SID 77 | 78 | You can use the WOS Lite API using the ``--lite`` parameter (for each query). 79 | 80 | You can also authenticate using the session id (SID). In fact the sessions are 81 | not closed by the command line utility. Example: 82 | 83 | .. code:: 84 | 85 | $ wos --user JohnDoe --password 12345 connect 86 | Authenticated using SID: ABCDEFGHIJKLM 87 | 88 | $ wos --sid ABCDEFGHIJKLM query 'AU=Knuth Donald' -c1 89 | Authenticated using SID: ABCDEFGHIJKLM 90 | 91 | 92 | 93 | WOS:000287850200007 94 | 95 | 96 | 97 | 98 | 99 | 100 | 103 | 33-45 104 | 105 | 106 | MATHEMATICAL INTELLIGENCER 107 | .... 108 | 109 | $ wos --sid ABCDEFGHIJKLM doi '10.1007/s00283-010-9170-7' 110 | 10.1007/s00283-010-9170-7 111 | 112 | Check the `user_query`_ documentation to understand how to create query strings. 113 | 114 | Example 115 | ------- 116 | 117 | Obviously you can also use the python client programmatically: 118 | 119 | .. code:: python 120 | 121 | from wos import WosClient 122 | import wos.utils 123 | 124 | with WosClient('JohnDoe', '12345') as client: 125 | print(wos.utils.query(client, 'AU=Knuth Donald')) 126 | 127 | APIs 128 | ---- 129 | 130 | In ``wos`` 0.1.11+, the ``WosClient`` class can access the following APIs. 131 | 132 | - ``retrieve`` [`lite `__ / `premium `__] 133 | 134 | - ``retrieveById`` [`lite `__ / `premium `__] 135 | 136 | - ``search`` [`lite `__ / `premium `__] 137 | 138 | - ``citedReferences`` [`premium `__] 139 | 140 | - ``citedReferencesRetrieve`` [`premium `__] 141 | 142 | - ``citingArticles`` [`premium `__] 143 | 144 | - ``relatedRecords`` [`premium `__] 145 | 146 | [FAQ] I cannot connect ... 147 | -------------------------- 148 | 149 | I am not affiliated with Clarivate. The library leverages the Web of Science `WWS`_ API (Web Services Premium or Lite), which is a paid service offered by Clarivate. This means that your institution has to pay for the Web of Science Core Collection access. The simple registration to Web of Knowledge / Web of Science does not entitle you to access the WWS API service. 150 | 151 | So if you receive errors like ``No matches returned for Username`` or ``No matches returned for IP``, these errors are thrown directly by the WWS API server. This means that the library is correctly communicating with the server, but you do not have access to the Web Services API. I do understand that you can access the WOS website from your network, but the website access and the API access (used in this project) are two separated products, and the website access does not imply the API access, since Clarivate bills them separately. This project does not scrape the website (which would violate the terms of usage) but invokes the WWS APIs offered by Clarivate. Thus there is nothing this project can do to help you. 152 | 153 | **If you think this is an error and you should be entitled to access the services, please contact Clarivate support first and verify if you have the WWS access. Please open an issue ONLY when you have (1) verified with Clarivate support that you have WWS access; (2) verified that you are connected from the correct network.** 154 | 155 | Disclaimer 156 | ---------- 157 | 158 | All product names, trademarks, and registered trademarks are the property of their respective owners. All company, product, and service names used in this document are for identification purposes only. The use of these names, trademarks, and brands do not constitute an endorsement or recommendation by the companies. 159 | 160 | 161 | .. _ReadTheDocs: https://wos.readthedocs.io/ 162 | .. _PyPI: https://pypi.python.org/project/wos 163 | .. _user_query: https://help.incites.clarivate.com/wosWebServicesLite/WebServiceOperationsGroup/WebServiceOperations/g2/user_query.html 164 | .. _WWS: https://clarivate.com/webofsciencegroup/solutions/xml-and-apis/ 165 | 166 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | import os 8 | import sys 9 | import datetime 10 | 11 | # -- Path setup -------------------------------------------------------------- 12 | 13 | # If extensions (or modules to document with autodoc) are in another directory, 14 | # add these directories to sys.path here. If the directory is relative to the 15 | # documentation root, use os.path.abspath to make it absolute, like shown here. 16 | 17 | sys.path.insert(0, os.path.abspath('..')) 18 | master_doc = 'index' 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | version = '0.2.7' 23 | 24 | project = 'wos' 25 | copyright = 'Product names and trademarks are the property of their respective owners.' 26 | author = 'Enrico Bacis' 27 | 28 | # The full version, including alpha/beta/rc tags 29 | release = version 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # Add any Sphinx extension module names here, as strings. They can be 35 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 36 | # ones. 37 | extensions = [ 38 | 'sphinx.ext.autodoc', 39 | 'sphinx.ext.coverage', 40 | 'sphinx.ext.napoleon' 41 | ] 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # Static paths. 47 | html_static_path = [] 48 | 49 | # List of patterns, relative to source directory, that match files and 50 | # directories to ignore when looking for source files. 51 | # This pattern also affects html_static_path and html_extra_path. 52 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 53 | 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | html_theme = 'alabaster' 61 | 62 | # Add any paths that contain custom static files (such as style sheets) here, 63 | # relative to this directory. They are copied after the builtin static files, 64 | # so a file named "default.css" will overwrite the builtin "default.css". 65 | html_static_path = ['_static'] 66 | -------------------------------------------------------------------------------- /docs/documentation/client.rst: -------------------------------------------------------------------------------- 1 | wos.client.WosClient 2 | ==================== 3 | 4 | Here is the documentation for the methods in the `wos.client.WosClient` class. 5 | 6 | -------------------------------------------------- 7 | 8 | .. automodule:: wos.client 9 | :members: 10 | -------------------------------------------------------------------------------- /docs/documentation/utils.rst: -------------------------------------------------------------------------------- 1 | wos.utils 2 | ========= 3 | 4 | Here is the documentation for the methods in the `wos.utils` package. 5 | 6 | -------------------------------------------------- 7 | 8 | .. automodule:: wos.utils 9 | :members: 10 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | -------------------------------------------------- 4 | 5 | Indices and tables 6 | ================== 7 | 8 | * :ref:`genindex` 9 | * :ref:`modindex` 10 | * :ref:`search` 11 | 12 | -------------------------------------------------- 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | :caption: Contents: 17 | 18 | documentation/client 19 | documentation/utils 20 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /scripts/wos: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from wos.utils import query, doi_to_wos 5 | from wos import WosClient 6 | 7 | import traceback 8 | import logging 9 | import suds 10 | import sys 11 | 12 | logging.getLogger('suds.client').setLevel(logging.CRITICAL) 13 | 14 | def pprint(data): 15 | """Print unicode string in a compatible way.""" 16 | print(data.encode('utf-8') if sys.version_info[0] < 3 else data) 17 | 18 | def main(): 19 | """Main method.""" 20 | parser = ArgumentParser(description='Query the Web of Science.') 21 | parser.add_argument('--close', action='store_true', help="Close session.") 22 | parser.add_argument('--proxy', type=str, default=None, help='HTTP proxy') 23 | parser.add_argument('--timeout', type=int, default=600, help='API timeout') 24 | parser.add_argument('-l', '--lite', action='store_true', help='Wos Lite') 25 | parser.add_argument('-v', '--verbose', action='store_true', help='Verbose') 26 | subparsers = parser.add_subparsers(help='sub-command help') 27 | 28 | g_auth = parser.add_argument_group('authentication', 29 | 'API credentials for premium access.') 30 | g_auth.add_argument('-u', '--user', type=str, default=None) 31 | g_auth.add_argument('-p', '--password', type=str, default=None) 32 | g_auth.add_argument('-s', '--sid', type=str, default=None) 33 | 34 | s_query = subparsers.add_parser('query', help='query the Web of Science.') 35 | s_query.add_argument('QUERY', help='search query') 36 | s_query.add_argument('-c', '--count', type=int, default=5) 37 | s_query.add_argument('-o', '--offset', type=int, default=1) 38 | s_query.add_argument('-m', '--max', type=int, help='entries', default=100) 39 | 40 | s_doi = subparsers.add_parser('doi', help='get the WOS ID from the DOI.') 41 | s_doi.add_argument('DOI', help='Document Object Identifier') 42 | 43 | subparsers.add_parser('connect', help='connect and get an SID.') 44 | 45 | args = parser.parse_args() 46 | 47 | try: 48 | with WosClient(args.user, args.password, args.sid, args.close, 49 | args.lite, args.proxy, args.timeout) as wc: 50 | if 'QUERY' in args: 51 | pprint(query(wc, args.QUERY, '', args.count, args.offset, 52 | args.max)) 53 | if 'DOI' in args: 54 | pprint(doi_to_wos(wc, args.DOI)) 55 | 56 | except suds.WebFault as wf: 57 | if args.verbose: 58 | traceback.print_exc() 59 | else: 60 | pprint(('ERROR: %s' % wf.fault.faultstring)) 61 | 62 | except Exception as e: 63 | if args.verbose: 64 | traceback.print_exc() 65 | else: 66 | pprint(('ERROR: %s' % str(e))) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from sys import version_info 3 | 4 | with open('README.rst') as README: 5 | long_description = README.read() 6 | long_description = long_description[long_description.index('Description'):] 7 | 8 | suds_install_requires = ['suds'] if version_info < (3, 0) else ['suds-py3'] 9 | 10 | setup(name='wos', 11 | version='0.2.7', 12 | description='Web of Science client using API v3.', 13 | long_description=long_description, 14 | install_requires=['limit'] + suds_install_requires, 15 | url='http://github.com/enricobacis/wos', 16 | author='Enrico Bacis', 17 | author_email='enrico.bacis@gmail.com', 18 | license='MIT', 19 | packages=['wos'], 20 | scripts=['scripts/wos'], 21 | keywords='wos isi web of science knowledge api client') 22 | -------------------------------------------------------------------------------- /wos/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['WosClient', 'utils'] 4 | 5 | from .client import WosClient 6 | from . import utils 7 | -------------------------------------------------------------------------------- /wos/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['WosClient'] 4 | 5 | import suds as _suds 6 | import functools as _functools 7 | from base64 import b64encode as _b64encode 8 | from collections import OrderedDict as _OrderedDict 9 | from sys import version_info as _version_info 10 | from limit import limit as _limit 11 | 12 | 13 | class WosClient(): 14 | """Query the Web of Science. 15 | You must provide user and password only to user premium WWS service. 16 | 17 | with WosClient() as wos: 18 | results = wos.search(...)""" 19 | 20 | base_url = 'http://search.webofknowledge.com' 21 | auth_url = base_url + '/esti/wokmws/ws/WOKMWSAuthenticate?wsdl' 22 | search_url = base_url + '/esti/wokmws/ws/WokSearch?wsdl' 23 | searchlite_url = base_url + '/esti/wokmws/ws/WokSearchLite?wsdl' 24 | 25 | def __init__(self, user=None, password=None, SID=None, close_on_exit=True, 26 | lite=False, proxy=None, timeout=600, throttle=(2, 1)): 27 | """Create the SOAP clients. user and password for premium access.""" 28 | 29 | self._SID = SID 30 | self._lite = lite 31 | self._close_on_exit = close_on_exit 32 | proxy = {'http': proxy} if proxy else None 33 | options = {'proxy': proxy, 'timeout': timeout} 34 | search_wsdl = self.searchlite_url if lite else self.search_url 35 | self._auth = _suds.client.Client(self.auth_url, **options) 36 | self._search = _suds.client.Client(search_wsdl, **options) 37 | self._throttle_wait = _limit(*throttle)(lambda: True) 38 | 39 | if user and password: 40 | auth = '%s:%s' % (user, password) 41 | auth = _b64encode(auth.encode('utf-8')).decode('utf-8') 42 | headers = {'Authorization': ('Basic %s' % auth).strip()} 43 | self._auth.set_options(headers=headers) 44 | 45 | def __enter__(self): 46 | """Automatically connect when used with 'with' statements.""" 47 | self.connect() 48 | return self 49 | 50 | def __exit__(self, exc_type, exc_value, traceback): 51 | """Close connection after closing the 'with' statement.""" 52 | if self._close_on_exit: 53 | self.close() 54 | 55 | def __del__(self): 56 | """Close connection when deleting the object.""" 57 | if self._close_on_exit: 58 | self.close() 59 | 60 | def is_lite(self): 61 | """Returns True if the client is for WOS lite""" 62 | return self._lite 63 | 64 | def _api(fn): 65 | """API decorator for common tests (sessions open, etc.) and throttle 66 | limitation (calls per second).""" 67 | @_functools.wraps(fn) 68 | def _fn(self, *args, **kwargs): 69 | self._throttle_wait() 70 | if not self._SID: 71 | raise RuntimeError('Session closed. Invoke connect() before.') 72 | resp = fn(self, *args, **kwargs) 73 | return (self._search.last_received().str() if self.is_lite() 74 | else resp) 75 | return _fn 76 | 77 | def _premium(fn): 78 | """Premium decorator for APIs that require premium access level.""" 79 | @_functools.wraps(fn) 80 | def _fn(self, *args, **kwargs): 81 | if self.is_lite(): 82 | raise RuntimeError('Premium API not available in lite access.') 83 | return fn(self, *args, **kwargs) 84 | return _fn 85 | 86 | @staticmethod 87 | def make_retrieveParameters(offset=1, count=100, name='RS', sort='D'): 88 | """Create retrieve parameters dictionary to be used with APIs. 89 | 90 | :count: Number of records to display in the result. Cannot be less than 91 | 0 and cannot be greater than 100. If count is 0 then only the 92 | summary information will be returned. 93 | 94 | :offset: First record in results to return. Must be greater than zero 95 | 96 | :name: Name of the field to order by. Use a two-character abbreviation 97 | to specify the field ('AU': Author, 'CF': Conference Title, 98 | 'CG': Page, 'CW': Source, 'CV': Volume, 'LC': Local Times Cited, 99 | 'LD': Load Date, 'PG': Page, 'PY': Publication Year, 'RS': 100 | Relevance, 'SO': Source, 'TC': Times Cited, 'VL': Volume) 101 | 102 | :sort: Must be A (ascending) or D (descending). The sort parameter can 103 | only be D for Relevance and TimesCited. 104 | """ 105 | return _OrderedDict([ 106 | ('firstRecord', offset), 107 | ('count', count), 108 | ('sortField', _OrderedDict([('name', name), ('sort', sort)])) 109 | ]) 110 | 111 | def connect(self): 112 | """Authenticate to WOS and set the SID cookie.""" 113 | if not self._SID: 114 | self._SID = self._auth.service.authenticate() 115 | print(('Authenticated (SID: %s)' % self._SID).encode('utf-8')) 116 | 117 | self._search.set_options(headers={'Cookie': 'SID="%s"' % self._SID}) 118 | self._auth.options.headers.update({'Cookie': 'SID="%s"' % self._SID}) 119 | return self._SID 120 | 121 | def close(self): 122 | """The close operation loads the session if it is valid and then closes 123 | it and releases the session seat. All the session data are deleted and 124 | become invalid after the request is processed. The session ID can no 125 | longer be used in subsequent requests.""" 126 | if self._SID: 127 | self._auth.service.closeSession() 128 | self._SID = None 129 | 130 | @_api 131 | def search(self, query, count=5, offset=1, editions=None, 132 | symbolicTimeSpan=None, timeSpan=None, retrieveParameters=None): 133 | """The search operation submits a search query to the specified 134 | database edition and retrieves data. This operation returns a query ID 135 | that can be used in subsequent operations to retrieve more records. 136 | 137 | :query: User query for requesting data. The query parser will return 138 | errors for invalid queries 139 | 140 | :count: Number of records to display in the result. Cannot be less than 141 | 0 and cannot be greater than 100. If count is 0 then only the 142 | summary information will be returned. 143 | 144 | :offset: First record in results to return. Must be greater than zero 145 | 146 | :editions: List of editions to be searched. If None, user permissions 147 | will be substituted. 148 | 149 | Fields: 150 | collection - Name of the collection 151 | edition - Name of the edition 152 | 153 | :symbolicTimeSpan: This element defines a range of load dates. The load 154 | date is the date when a record was added to a 155 | database. If symbolicTimeSpan is specified, the 156 | timeSpan parameter must be omitted. If timeSpan and 157 | symbolicTimeSpan are both omitted, then the maximum 158 | publication date time span will be inferred from the 159 | editions data. 160 | 161 | Valid values: 162 | '1week' - Specifies to use the end date as today and 163 | the begin date as 1 week prior to today. 164 | '2week' - Specifies to use the end date as today and 165 | the begin date as 2 week prior to today. 166 | '4week' - Specifies to use the end date as today and 167 | the begin date as 4 week prior to today. 168 | 169 | :timeSpan: This element defines specifies a range of publication dates. 170 | If timeSpan is used, the symbolicTimeSpan parameter must be 171 | omitted. If timeSpan and symbolicTimeSpan are both omitted, 172 | then the maximum time span will be inferred from the 173 | editions data. 174 | 175 | Fields: 176 | begin - Beginning date for this search. Format: YYYY-MM-DD 177 | end - Ending date for this search. Format: YYYY-MM-DD 178 | 179 | :retrieveParameters: Retrieve parameters. If omitted the result of 180 | make_retrieveParameters(offset, count, 'RS', 'D') 181 | is used. 182 | """ 183 | query = query.decode('utf-8') if _version_info[0] < 3 else query 184 | return self._search.service.search( 185 | queryParameters=_OrderedDict([ 186 | ('databaseId', 'WOS'), 187 | ('userQuery', query), 188 | ('editions', editions), 189 | ('symbolicTimeSpan', symbolicTimeSpan), 190 | ('timeSpan', timeSpan), 191 | ('queryLanguage', 'en') 192 | ]), 193 | retrieveParameters=(retrieveParameters or 194 | self.make_retrieveParameters(offset, count)) 195 | ) 196 | 197 | @_api 198 | def retrieve(self, queryId, count=100, offset=1, retrieveParameters=None): 199 | """The retrieve operation submits a query returned by a previous 200 | search, citingArticles, relatedRecords, or retrieveById operation. 201 | However, different retrieval parameters may be used to modify the 202 | output. For example, if a search operation returns five records sorted 203 | by times cited, a subsequent retrieve operation could run the same 204 | search against the same database and edition but return 10 records 205 | sorted by relevance. 206 | 207 | This operation is also useful for overcoming the retrieval limit of 100 208 | records per query. For example, a search operation may find 220 209 | records, as revealed by the content of the recordsFound element, but it 210 | returns only records 1-100. A subsequent retrieve operation could 211 | return records 101-200 and a third retrieve operation the remaining 20. 212 | 213 | :queryId: The query ID from a previous search 214 | 215 | :count: Number of records to display in the result. Cannot be less than 216 | 0 and cannot be greater than 100. If count is 0 then only the 217 | summary information will be returned. 218 | 219 | :offset: First record in results to return. Must be greater than zero 220 | 221 | :retrieveParameters: Retrieve parameters. If omitted the result of 222 | make_retrieveParameters(offset, count, 'RS', 'D') 223 | is used. 224 | """ 225 | return self._search.service.retrieve( 226 | queryId=queryId, 227 | retrieveParameters=(retrieveParameters or 228 | self.make_retrieveParameters(offset, count)) 229 | ) 230 | 231 | @_api 232 | def retrieveById(self, uid, count=100, offset=1, retrieveParameters=None): 233 | """The retrieveById operation returns records identified by unique 234 | identifiers. The identifiers are specific to each database. 235 | 236 | :uid: Web of Science unique record identifier 237 | 238 | :count: Number of records to display in the result. Cannot be less than 239 | 0 and cannot be greater than 100. If count is 0 then only the 240 | summary information will be returned. 241 | 242 | :offset: First record in results to return. Must be greater than zero 243 | 244 | :retrieveParameters: Retrieve parameters. If omitted the result of 245 | make_retrieveParameters(offset, count, 'RS', 'D') 246 | is used. 247 | """ 248 | return self._search.service.retrieveById( 249 | databaseId='WOS', 250 | uid=uid, 251 | queryLanguage='en', 252 | retrieveParameters=(retrieveParameters or 253 | self.make_retrieveParameters(offset, count)) 254 | ) 255 | 256 | @_api 257 | @_premium 258 | def citedReferences(self, uid, count=100, offset=1, 259 | retrieveParameters=None): 260 | """The citedReferences operation returns references cited by an article 261 | identified by a unique identifier. You may specify only one identifier 262 | per request. 263 | 264 | :uid: Web of Science unique record identifier 265 | 266 | :count: Number of records to display in the result. Cannot be less than 267 | 0 and cannot be greater than 100. If count is 0 then only the 268 | summary information will be returned. 269 | 270 | :offset: First record in results to return. Must be greater than zero 271 | 272 | :retrieveParameters: Retrieve parameters. If omitted the result of 273 | make_retrieveParameters(offset, count, 'RS', 'D') 274 | is used. 275 | """ 276 | return self._search.service.citedReferences( 277 | databaseId='WOS', 278 | uid=uid, 279 | queryLanguage='en', 280 | retrieveParameters=(retrieveParameters or 281 | self.make_retrieveParameters(offset, count)) 282 | ) 283 | 284 | @_api 285 | @_premium 286 | def citedReferencesRetrieve(self, queryId, count=100, offset=1, 287 | retrieveParameters=None): 288 | """The citedReferencesRetrieve operation submits a query returned by a 289 | previous citedReferences operation. 290 | 291 | This operation is useful for overcoming the retrieval limit of 100 292 | records per query. For example, a citedReferences operation may find 293 | 106 cited references, as revealed by the content of the recordsFound 294 | element, but it returns only records 1-100. You could perform a 295 | subsequent citedReferencesretrieve operation to obtain records 101-106. 296 | 297 | :queryId: The query ID from a previous citedReferences operation 298 | 299 | :count: Number of records to display in the result. Cannot be less than 300 | 0 and cannot be greater than 100. If count is 0 then only the 301 | summary information will be returned. 302 | 303 | :offset: First record in results to return. Must be greater than zero 304 | 305 | :retrieveParameters: Retrieve parameters. If omitted the result of 306 | make_retrieveParameters(offset, count, 'RS', 'D') 307 | is used. 308 | """ 309 | return self._search.service.citedReferencesRetrieve( 310 | queryId=queryId, 311 | retrieveParameters=(retrieveParameters or 312 | self.make_retrieveParameters(offset, count)) 313 | ) 314 | 315 | @_api 316 | @_premium 317 | def citingArticles(self, uid, count=100, offset=1, editions=None, 318 | timeSpan=None, retrieveParameters=None): 319 | """The citingArticles operation finds citing articles for the article 320 | specified by unique identifier. You may specify only one identifier per 321 | request. Web of Science Core Collection (WOS) is the only valid 322 | database for this operation. 323 | 324 | :uid: A unique item identifier. It cannot be None or empty string. 325 | 326 | :count: Number of records to display in the result. Cannot be less than 327 | 0 and cannot be greater than 100. If count is 0 then only the 328 | summary information will be returned. 329 | 330 | :offset: First record in results to return. Must be greater than zero 331 | 332 | :editions: List of editions to be searched. If None, user permissions 333 | will be substituted. 334 | 335 | Fields: 336 | collection - Name of the collection 337 | edition - Name of the edition 338 | 339 | :timeSpan: This element defines specifies a range of publication dates. 340 | If timeSpan is null, then the maximum time span will be 341 | inferred from the editions data. 342 | 343 | Fields: 344 | begin - Beginning date for this search. Format: YYYY-MM-DD 345 | end - Ending date for this search. Format: YYYY-MM-DD 346 | 347 | :retrieveParameters: Retrieve parameters. If omitted the result of 348 | make_retrieveParameters(offset, count, 'RS', 'D') 349 | is used. 350 | """ 351 | return self._search.service.citingArticles( 352 | databaseId='WOS', 353 | uid=uid, 354 | editions=editions, 355 | timeSpan=timeSpan, 356 | queryLanguage='en', 357 | retrieveParameters=(retrieveParameters or 358 | self.make_retrieveParameters(offset, count)) 359 | ) 360 | 361 | @_api 362 | @_premium 363 | def relatedRecords(self, uid, count=100, offset=1, editions=None, 364 | timeSpan=None, retrieveParameters=None): 365 | """The relatedRecords operation finds Related Records for the article 366 | specified by unique identifier. Related Records share cited references 367 | with the specified record. The operation returns the parent record 368 | along with the Related Records. The total number of Related Records for 369 | the parent record is shown at the end of the response. Use the retrieve 370 | parameter count to limit the number of Related Records returned. 371 | 372 | :uid: A unique item identifier. It cannot be None or empty string. 373 | 374 | :count: Number of records to display in the result. Cannot be less than 375 | 0 and cannot be greater than 100. If count is 0 then only the 376 | summary information will be returned. 377 | 378 | :offset: First record in results to return. Must be greater than zero 379 | 380 | :editions: List of editions to be searched. If None, user permissions 381 | will be substituted. 382 | 383 | Fields: 384 | collection - Name of the collection 385 | edition - Name of the edition 386 | 387 | :timeSpan: This element defines specifies a range of publication dates. 388 | If timeSpan is null, then the maximum time span will be 389 | inferred from the editions data. 390 | 391 | Fields: 392 | begin - Beginning date for this search. Format: YYYY-MM-DD 393 | end - Ending date for this search. Format: YYYY-MM-DD 394 | 395 | :retrieveParameters: Retrieve parameters. If omitted the result of 396 | make_retrieveParameters(offset, count, 'RS', 'D') 397 | is used. 398 | """ 399 | return self._search.service.relatedRecords( 400 | databaseId='WOS', 401 | uid=uid, 402 | editions=editions, 403 | timeSpan=timeSpan, 404 | queryLanguage='en', 405 | retrieveParameters=(retrieveParameters or 406 | self.make_retrieveParameters(offset, count)) 407 | ) 408 | -------------------------------------------------------------------------------- /wos/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['doi_to_wos', 'query', 'single'] 4 | 5 | from xml.etree import ElementTree as _ET 6 | from xml.dom import minidom as _minidom 7 | import re as _re 8 | 9 | 10 | def _get_records(wosclient, wos_query, count=5, offset=1): 11 | """Get the XML records for both WOS lite and premium.""" 12 | if wosclient.is_lite(): 13 | result = wosclient.search(wos_query, count, offset) 14 | pattern = r'<{0}>.*?'.format('return') 15 | return _re.search(pattern, result, _re.S).group(0) 16 | else: 17 | return wosclient.search(wos_query, count, offset).records 18 | 19 | 20 | def prettify(xml): 21 | xml = _minidom.parseString(xml).toprettyxml(indent=' '*4) 22 | return '\n'.join([line for line in xml.split('\n') if line.strip()]) 23 | 24 | 25 | def single(wosclient, wos_query, xml_query=None, count=5, offset=1): 26 | """Perform a single Web of Science query and then XML query the results.""" 27 | records = _get_records(wosclient, wos_query, count, offset) 28 | xml = _re.sub(' xmlns="[^"]+"', '', records, count=1).encode('utf-8') 29 | if not xml_query: 30 | return prettify(xml) 31 | xml = _ET.fromstring(xml) 32 | return [el.text for el in xml.findall(xml_query)] 33 | 34 | 35 | def query(wosclient, wos_query, xml_query=None, count=5, offset=1, limit=100): 36 | """Query Web of Science and XML query results with multiple requests.""" 37 | results = [single(wosclient, wos_query, xml_query, min(limit, count-x+1), x) 38 | for x in range(offset, count+1, limit)] 39 | if xml_query: 40 | return [el for res in results for el in res] 41 | 42 | if wosclient.is_lite(): 43 | pattern = _re.compile(r'.*?|.*', _re.DOTALL) 44 | res_string = '\n%s' 45 | else: 46 | pattern = _re.compile(r'^<\?xml.*?\n\n|\n$.*') 47 | res_string = '\n%s' 48 | return res_string % '\n'.join(pattern.sub('', res) for res in results) 49 | 50 | 51 | def doi_to_wos(wosclient, doi): 52 | """Convert DOI to WOS identifier.""" 53 | if wosclient.is_lite(): 54 | raise NotImplementedError('Not implemented for WOS Lite') 55 | 56 | results = query(wosclient, 'DO="%s"' % doi, './REC/UID', count=1) 57 | return results[0].lstrip('WOS:') if results else None 58 | --------------------------------------------------------------------------------