├── src └── arcas │ ├── IEEE │ ├── __init__.py │ ├── api_key.py │ └── main.py │ ├── PLOS │ ├── __init__.py │ └── main.py │ ├── arXiv │ ├── __init__.py │ └── main.py │ ├── nature │ ├── __init__.py │ └── main.py │ ├── Springer │ ├── __init__.py │ ├── api_key.py │ └── main.py │ ├── version.py │ ├── __init__.py │ └── tools.py ├── logo.jpg ├── docs ├── Reference │ ├── index.rst │ ├── Apis │ │ ├── index.rst │ │ ├── plos.rst │ │ ├── arxiv.rst │ │ ├── nature.rst │ │ ├── springer.rst │ │ └── ieee.rst │ ├── results_set.rst │ └── search_fields.rst ├── Guides │ ├── index.rst │ ├── category.rst │ ├── journal.rst │ ├── year.rst │ ├── abstract.rst │ ├── command_line.rst │ ├── plos.rst │ ├── title.rst │ ├── springer.rst │ └── api_key.rst ├── installation.rst ├── Tutorial │ ├── index.rst │ ├── tutorial_ii.rst │ ├── tutorial_iii.rst │ └── tutorial_i.rst ├── index.rst ├── Makefile └── conf.py ├── .travis.yml ├── .gitignore ├── tests ├── contributions.rts ├── test_tools.py ├── test_springer.py ├── test_arxiv.py ├── test_plos.py ├── test_nature.py └── test_ieee.py ├── LICENSE.txt ├── setup.py ├── README.rst └── bin └── arcas_scrape /src/arcas/IEEE/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/arcas/PLOS/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/arcas/arXiv/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/arcas/nature/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/arcas/Springer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/arcas/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.0" 2 | -------------------------------------------------------------------------------- /src/arcas/IEEE/api_key.py: -------------------------------------------------------------------------------- 1 | api_key = 'Your key here' -------------------------------------------------------------------------------- /src/arcas/Springer/api_key.py: -------------------------------------------------------------------------------- 1 | api_key = 'Your key here' 2 | -------------------------------------------------------------------------------- /logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArcasProject/Arcas/HEAD/logo.jpg -------------------------------------------------------------------------------- /docs/Reference/index.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | ========= 3 | 4 | Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | search_fields.rst 10 | results_set.rst 11 | Apis/index.rst 12 | -------------------------------------------------------------------------------- /src/arcas/__init__.py: -------------------------------------------------------------------------------- 1 | from .IEEE.main import Ieee 2 | from .arXiv.main import Arxiv 3 | from .nature.main import Nature 4 | from .Springer.main import Springer 5 | from .PLOS.main import Plos 6 | from .version import __version__ 7 | -------------------------------------------------------------------------------- /docs/Guides/index.rst: -------------------------------------------------------------------------------- 1 | Guides 2 | ======= 3 | 4 | Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | command_line.rst 10 | api_key.rst 11 | title.rst 12 | abstract.rst 13 | year.rst 14 | journal.rst 15 | category.rst -------------------------------------------------------------------------------- /docs/Reference/Apis/index.rst: -------------------------------------------------------------------------------- 1 | List of available APIS 2 | ===================== 3 | A list of the APIs you can ping with Arcas. 4 | Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | arxiv.rst 10 | ieee.rst 11 | nature.rst 12 | springer.rst 13 | plos.rst -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.4 4 | - 3.5 5 | - 3.6 6 | before_install: 7 | - export DISPLAY=:99.0 8 | - sh -e /etc/init.d/xvfb start 9 | install: 10 | - python setup.py develop 11 | 12 | script: 13 | - pytest tests 14 | - py.test --cov=arcas tests/ 15 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | Installing Arcas 3 | ================ 4 | 5 | From PyPi:: 6 | 7 | $ pip install arcas 8 | 9 | From GitHub:: 10 | 11 | $ git clone https://github.com/Nikoleta-v3/Arcas.git 12 | $ cd Arcas 13 | $ pip install -r requirements.txt 14 | $ python setup.py install 15 | 16 | Arcas is supported by Python 3.5. -------------------------------------------------------------------------------- /docs/Tutorial/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | Arcas' tutorials cover the basic usage of the library. 5 | 6 | These include retrieving medata of a single article from a single API, retrieving 7 | the same article from various APIs and finally retrieving a large number of medata 8 | from different APIs. 9 | 10 | Contents: 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | tutorial_i.rst 16 | tutorial_ii.rst 17 | tutorial_iii.rst 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # pycharm 2 | .idea 3 | 4 | # pycache 5 | *__pycache__/ 6 | 7 | # several other files 8 | status_report 9 | result.json 10 | *arcas.egg-info* 11 | 12 | # docs 13 | build/ 14 | dist/ 15 | docs/_build 16 | 17 | #hypothesis 18 | .hypothesis/ 19 | 20 | # notebooks 21 | Notes.ipynb 22 | .ipynb_checkpoints 23 | Structures.ipynb 24 | 25 | # logo files 26 | *.tex 27 | *.log 28 | *.aux 29 | *.nav 30 | *.out 31 | *.snm 32 | *.toc 33 | beamercolorthemesolarized.sty 34 | 35 | # tests 36 | .pytest_cache/ 37 | -------------------------------------------------------------------------------- /docs/Guides/category.rst: -------------------------------------------------------------------------------- 1 | .. _category: 2 | 3 | How to: Collect articles' based on `category` 4 | ============================================ 5 | 6 | Subject terms are often given to articles either by the authors or the journals 7 | themselves. Arcas allow the user to search articles that satisfies a given subject 8 | term using the :code:`category` argument. 9 | 10 | For example the query for a game theoretic article in arXiv would be the following:: 11 | 12 | >>> import arcas 13 | >>> api = arcas.Nature() 14 | >>> parameters = api.parameters_fix(category='Game Theory') 15 | >>> url = api.create_url_search(parameters) 16 | 'http://www.nature.com/opensearch/request?&query=dc.subject adj Game Theory' 17 | -------------------------------------------------------------------------------- /docs/Reference/Apis/plos.rst: -------------------------------------------------------------------------------- 1 | PLOS Search API 2 | ==== 3 | 4 | Query content from the seven open-access peer-reviewed journals from the 5 | Public Library of Science using any of the twenty-three terms in the PLOS Search. 6 | 7 | For more information on PLOS Search API visit the official site: 8 | http://api.plos.org/ under Documentation. 9 | 10 | :code:`Plos` class supports the following arguments as search fields: 11 | 12 | - :code:`author` 13 | - :code:`title` 14 | - :code:`abstract` 15 | - :code:`category` 16 | - :code:`journal` 17 | - :code:`year` 18 | - :code:`records` 19 | - :code:`start` 20 | 21 | The most recent check of compatibility between Arcas and the nature.com OpenSearch 22 | API was done on the 27th of August 2018. -------------------------------------------------------------------------------- /docs/Reference/Apis/arxiv.rst: -------------------------------------------------------------------------------- 1 | arXiv API 2 | ===== 3 | 4 | arXiv API is hosted at arXiv.org, is a document submission and retrieval system 5 | that is heavily used by the physics, mathematics and computer science 6 | communities. 7 | 8 | arXiv is set as the default API for Arcas. For more information on interacting with the 9 | api visit the official site for the user's manual: https://arxiv.org/help/api/user-manual. 10 | 11 | The :code:`Arxiv` class supports the following arguments as search fields: 12 | 13 | - :code:`author` 14 | - :code:`title` 15 | - :code:`abstract` 16 | - :code:`category` 17 | - :code:`journal` 18 | - :code:`records` 19 | - :code:`start` 20 | 21 | The most recent check of compatibility between Arcas and the arXiv API was done 22 | on the 27th of August 2018. -------------------------------------------------------------------------------- /docs/Guides/journal.rst: -------------------------------------------------------------------------------- 1 | .. _journal: 2 | 3 | How to: Collect articles' based on `journal` 4 | ========================================== 5 | 6 | Articles can also be retrieved using the full journal name/publication title. 7 | 8 | Thus sometime we might not be specifying only the publisher but the 9 | exact journal as well. This can be done using the argument `journal`. 10 | 11 | >>> import arcas 12 | >>> api = arcas.Nature() 13 | 14 | Assume that we would like to fetch an article from Nature's Blood Cancer Journal. 15 | The query message will be the following:: 16 | 17 | >>> parameters = api.parameters_fix(journal='Blood Cancer Journal') 18 | >>> url = api.create_url_search(parameters) 19 | 'http://www.nature.com/opensearch/request?&query=prism.publicationName=Blood Cancer Journal' 20 | -------------------------------------------------------------------------------- /docs/Reference/Apis/nature.rst: -------------------------------------------------------------------------------- 1 | nature.com OpenSearch API 2 | ====== 3 | 4 | The nature.com OpenSearch API provides an open, bibliographic search service 5 | for content hosted on nature.com, comprising around half a million news and 6 | research articles and citations 7 | 8 | For more information on interacting with the nature.com OpenSearch API visit the 9 | official site: https://www.nature.com/opensearch/. 10 | 11 | The :code:`Nature` class supports the following arguments as search fields: 12 | 13 | - :code:`author` 14 | - :code:`title` 15 | - :code:`abstract` 16 | - :code:`category` 17 | - :code:`journal` 18 | - :code:`year` 19 | - :code:`records` 20 | - :code:`start` 21 | 22 | The most recent check of compatibility between Arcas and the nature.com OpenSearch 23 | API was done on the 27th of August 2018. -------------------------------------------------------------------------------- /tests/contributions.rts: -------------------------------------------------------------------------------- 1 | Contributions 2 | ============= 3 | 4 | Contributions from anyone are more than welcome! This may include opening 5 | `issues `_, communicating ideas, 6 | implementing new features such as expanding the search fields and the results set. 7 | We would love to recieve new pull requests from you. 8 | 9 | Here's a guide to starting a contribution: 10 | 11 | Fork, then clone the repo:: 12 | 13 | git clone git@github.com:your-username/Arcas.git 14 | 15 | Make sure the tests pass:: 16 | 17 | pytest tests 18 | 19 | We encourage the use of coverage, enusring all aspects of the code are tested:: 20 | 21 | py.test --cov=arcas tests/ 22 | 23 | Add tests for your change. Make your change and make the tests pass. 24 | Please update the documentation too, and ensure doctests pass. Push to your fork 25 | and submit a pull request. 26 | 27 | We look forward to your contributions! -------------------------------------------------------------------------------- /docs/Reference/Apis/springer.rst: -------------------------------------------------------------------------------- 1 | Springer Open Access API 2 | ======== 3 | 4 | Springer Open Access API - Provides metadata and full-text content for more than 5 | 370,000 online documents from Springer open access xml, including BioMed Central 6 | and SpringerOpen journals. 7 | 8 | Information on the Springer Open Access API can be found on the official 9 | site: https://dev.springer.com/restfuloperations. In order to use the 10 | Springer Open Access API a user must register an application and generate 11 | an application key which is used in the query message for access. Guidelines 12 | on using your API key with Arcas can be found under :ref:`api_key`. 13 | 14 | :code:`Springer` class supports the following arguments as search fields: 15 | 16 | - :code:`author` 17 | - :code:`title` 18 | - :code:`journal` 19 | - :code:`category` 20 | - :code:`records` 21 | - :code:`start` 22 | 23 | and the most recent check of compatibility between Arcas and the Springer Open AccessI 24 | API was done on the 27th of August 2018. -------------------------------------------------------------------------------- /docs/Reference/Apis/ieee.rst: -------------------------------------------------------------------------------- 1 | IEEE Xplore 2 | ============ 3 | 4 | Query the Institute of Electrical and Electronics Engineers content 5 | repository and retrieve results for manipulation and presentation on local 6 | web interfaces. 7 | 8 | Information on the IEEE Xplore can be found on the official 9 | site: https://developer.ieee.org/docs. 10 | 11 | IEEE Xplore API requires a user to register their application in order to use the 12 | API. Once the application has been registered an API key is generated a user 13 | can use Arcas to collect articles. 14 | 15 | Guidelines on using your API key with Arcas can be found under :ref:`api_key`. 16 | 17 | The :code:`Ieee` class supports the following arguments as search fields: 18 | 19 | - :code:`author` 20 | - :code:`title` 21 | - :code:`abstract` 22 | - :code:`category` 23 | - :code:`journal` 24 | - :code:`year` 25 | - :code:`records` 26 | - :code:`start` 27 | 28 | The most recent check of compatibility between Arcas and the nature.com OpenSearch 29 | API was done on the 27th of August 2018. -------------------------------------------------------------------------------- /docs/Guides/year.rst: -------------------------------------------------------------------------------- 1 | .. _year: 2 | 3 | How to: Collect articles' based on `year` 4 | ========================================= 5 | 6 | Publication date of an article is another search field available with Arcas. 7 | Consider an example whereas we are interested in articles that have been published 8 | on a specific year. 9 | 10 | Let us assume that we are interested in the first article that will is returned 11 | by Plos that has been publish in 1993:: 12 | 13 | >>> import arcas 14 | >>> api = arcas.Plos() 15 | 16 | Now all that is needed to specify in the parameters that we want `year=1993`:: 17 | 18 | >>> parameters = api.parameters_fix(year=1993) 19 | >>> url = api.create_url_search(parameters) 20 | 21 | The url can be used to retrieve the response which is then passed to a data 22 | frame:: 23 | 24 | >>> request = api.make_request(url) 25 | >>> root = api.get_root(request) 26 | >>> raw_article = api.parse(root) 27 | >>> article = api.to_dataframe(raw_article[0]) 28 | 29 | The same example can be used to collect the article using the command line:: 30 | 31 | $ arcas_scrape -p plos -y 1993 32 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright 2017 Nikoleta Glynatsi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Arcas' documentation! 2 | ================================= 3 | Arcas is Python library which allow users to collect academic articles' metadata. 4 | 5 | A large number of scholarly databases and collections offer some form of API access. 6 | An API is an online tool to access data straight from the databases. Arcas is a tool 7 | designed to help communicate/ping various of these APIs. 8 | 9 | Arcas offers access to metadata of articles from the following journals 10 | and pre-prints: 11 | 12 | - IEEE 13 | - PLOS 14 | - Nature 15 | - Springer 16 | - arXiv 17 | 18 | Note some journals might require you to register and generate an application 19 | `key`. Currently, the following journals require you to register your application: 20 | 21 | - IEEE 22 | - Spinger 23 | 24 | Guidelines for adding your key to the right place can be found under the :ref:`api_key`. 25 | 26 | Table of Contents 27 | ================= 28 | 29 | .. toctree:: 30 | :maxdepth: 2 31 | 32 | installation.rst 33 | Tutorial/index.rst 34 | Guides/index.rst 35 | Reference/index.rst 36 | 37 | 38 | Indices and tables 39 | ================== 40 | 41 | .. * :ref:`genindex` 42 | .. * :ref:`modindex` 43 | * :ref:`search` 44 | 45 | -------------------------------------------------------------------------------- /docs/Guides/abstract.rst: -------------------------------------------------------------------------------- 1 | .. _abstract: 2 | 3 | How to: Collect articles' based on `abstract` 4 | ============================================ 5 | 6 | Often we might search articles based on words that can be found withing 7 | the abstract of the article. For example one might interested in an article's 8 | metadata for which the word eigenvalues is within the abstract. 9 | 10 | For this example we are going to be using the API of Nature:: 11 | 12 | >>> import arcas 13 | >>> api = arcas.Nature() 14 | 15 | Now all that is needed to specify in the parameters that we want `abstract='eigenvalues'`:: 16 | 17 | >>> parameters = api.parameters_fix(title='eigenvalues') 18 | >>> url = api.create_url_search(parameters) 19 | 20 | The query will be used to ping the API and afterwards we parse the response 21 | that has been retrieved:: 22 | 23 | >>> request = api.make_request(url) 24 | >>> root = api.get_root(request) 25 | >>> raw_article = api.parse(root) 26 | >>> article = api.to_dataframe(raw_article[0]) 27 | 28 | Note that Arcas can be used from the command line as well. To reproduce the query 29 | in the command line would would type the following:: 30 | 31 | $ arcas_scrape -p nature -a "eigenvalues" 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | import unittest 4 | import doctest 5 | 6 | # Read in the version number 7 | exec(open('src/arcas/version.py', 'r').read()) 8 | 9 | requirements = ["requests>=2.12.1", 10 | "requests_mock>=1.2.0", 11 | "ratelimit==1.4.1", 12 | "docopt", 13 | "pytz", 14 | "pandas", 15 | "pytest", 16 | "pytest-cov"] 17 | 18 | with open('README.rst') as readme_file: 19 | readme = readme_file.read() 20 | 21 | def test_suite(): 22 | """Discover all tests in the tests dir""" 23 | test_loader = unittest.TestLoader() 24 | # Read in unit tests 25 | test_suite = test_loader.discover('tests') 26 | 27 | # Read in doctests from README 28 | test_suite.addTests(doctest.DocFileSuite('README.md', 29 | optionflags=doctest.ELLIPSIS)) 30 | return test_suite 31 | 32 | setup( 33 | name='arcas', 34 | version=__version__, 35 | install_requires=requirements, 36 | author='Nikoleta Glynatsi', 37 | author_email=('glynatsine@cardiff.ac.uk'), 38 | packages=find_packages('src'), 39 | package_dir={"": "src"}, 40 | scripts=['bin/arcas_scrape'], 41 | test_suite='setup.test_suite', 42 | url='', 43 | license='The MIT License (MIT)', 44 | description='A library to gather data from academic apis', 45 | ) 46 | -------------------------------------------------------------------------------- /docs/Guides/command_line.rst: -------------------------------------------------------------------------------- 1 | .. _command_line: 2 | 3 | How to use Arcas from the command line 4 | ====================================== 5 | 6 | Arcas is a tool which can be used by the command line as well. 7 | 8 | To get information on the arguments we can pass we type the following command 9 | in a command prompt:: 10 | 11 | $ arcas_scrape --h 12 | Arcas. A library to facilitate scraping of APIs for scholarly resources. 13 | 14 | Usage: 15 | arcas_scrape [-h] [-p API] [-a AUTHOR] [-t TITLE] [-b ABSTRACT] [-y YEAR] 16 | [-r RECORDS] [-s START] [-v VALIDATE] [-f FILENAME] 17 | arcas_scrape --version 18 | 19 | 20 | Options: 21 | -h --help Show this 22 | --version Show version. 23 | -p API The online API, from a given list, to parse [default: arxiv] 24 | -a AUTHOR Terms to search for in Author 25 | -t TITLE Terms to search for in Title 26 | -b ABSTRACT Terms to search for in the Abstract 27 | -y YEAR Terms to search for in Year 28 | -r RECORDS Number of records to fetch [default: 1] 29 | -s START Sequence number of first record to fetch [default: 1] 30 | -v VALIDATE Checks if query returned with arguments asked [default: False] 31 | -f FILENAME Name of json file [default: results.json] 32 | -------------------------------------------------------------------------------- /docs/Tutorial/tutorial_ii.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial-ii: 2 | 3 | =================================================== 4 | Tutorial II: Retrieve an article from various APIs 5 | =================================================== 6 | 7 | In this tutorial we are aiming to make a similar query, to that in 8 | :ref:`tutorial I `, from different APIs. 9 | 10 | To achieve that we will use a :code:`for` loop, to loop over a list of given 11 | APIs classes. For each instance then repeat the following procedure:: 12 | 13 | >>> for p in [arcas.Ieee, arcas.Plos, arcas.Arxiv, arcas.Springer, arcas.Nature]: 14 | 15 | ... api = p() 16 | ... parameters = api.parameters_fix(title='Game', abstract='Game', records=1) 17 | ... url = api.create_url_search(parameters) 18 | ... request = api.make_request(url) 19 | ... root = api.get_root(request) 20 | ... raw_article = api.parse(root) 21 | 22 | ... for art in raw_article: 23 | ... article = api.to_dataframe(art) 24 | ... api.export(article, 'results_{}.json'.format(api.__class__.__name__)) 25 | 26 | 27 | The :code:`export` function, is a function that writes the results to a `json 28 | `_ file. Here the results of each API are stored to 29 | a different file named after which API they come from. 30 | 31 | Note that you need to require a :code:`key` before being able to use :code:`arcas.Ieee` 32 | and :code:`arcas.Springer`. -------------------------------------------------------------------------------- /tests/test_tools.py: -------------------------------------------------------------------------------- 1 | from xml.etree.ElementTree import Element 2 | 3 | import pandas as pd 4 | 5 | import requests_mock 6 | from arcas.tools import Api 7 | 8 | standard = 'http:/Search;' 9 | 10 | def test_api(): 11 | api = Api(standard) 12 | assert api.standard == standard 13 | 14 | def test_create_url(): 15 | parameters = ['title=game', 'year=2010', 'author=N Glynatsi'] 16 | api = Api(standard) 17 | 18 | url = api.create_url_search(parameters) 19 | assert isinstance(url, str) 20 | for parameter in parameters: 21 | assert parameter in url 22 | 23 | def test_requests(): 24 | with requests_mock.mock() as m: 25 | url = 'http://example.com' 26 | m.register_uri('GET', url, text='Example text') 27 | api = Api(url) 28 | response = api.make_request(url) 29 | 30 | assert response.status_code == 200 31 | assert response.text == 'Example text' 32 | 33 | def test_xml_to_dict(): 34 | root = Element('top') 35 | children = [Element('child')] 36 | root.extend(children) 37 | 38 | api = Api(standard) 39 | 40 | dummy_dict = api.xml_to_dict(root) 41 | assert isinstance(dummy_dict, dict) 42 | 43 | def test_create_keys(): 44 | article = {'title': 'A Title', 'abstract': 'The Abstract', 'date': 2000, 45 | 'author': ['Author']} 46 | api = Api(standard) 47 | key, unique_key = api.create_keys(article) 48 | 49 | assert key == 'Author2000' 50 | assert len(unique_key) == 32 51 | assert isinstance(unique_key, str) 52 | -------------------------------------------------------------------------------- /docs/Guides/plos.rst: -------------------------------------------------------------------------------- 1 | How to ping Plos 2 | ================ 3 | 4 | For more information on PLOS Search API visit the official site: http://api.plos.org/solr/faq/. 5 | 6 | Plos supports the following arguments as search fields: 7 | 8 | - :code:`author` 9 | - :code:`title` 10 | - :code:`abstract` 11 | - :code:`category` 12 | - :code:`journal` 13 | - :code:`year` 14 | - :code:`records` 15 | - :code:`start` 16 | 17 | Let us consider an example where we would like to retrieve the metadata of single article 18 | with the word "Game" in the :code:`title` which belongs in the :code:`category` 19 | "game theory" and it was published on PLOS ONE. 20 | 21 | Initially, we import Arcas and make an :code:`Plos()` instance:: 22 | 23 | >>> import arcas 24 | >>> api = arcas.Plos() 25 | 26 | Secondly we create the parameters list will be used to generate our message to the 27 | API:: 28 | 29 | >>> parameters = api.parameters_fix(title='Game', category='game theory', records=1) 30 | >>> url = api.create_url_search(parameters) 31 | >>> url 32 | 'http://api.plos.org/search?q=title:"Game"+AND+subject:"game theory"&rows=1' 33 | 34 | The url then is used to obtain a relevant article:: 35 | 36 | >>> request = api.make_request(url) 37 | >>> root = api.get_root(request) 38 | >>> raw_article = api.parse(root) 39 | >>> article = api.to_dataframe(*raw_article) 40 | 41 | The :code:`Plos()` class returns the following results:: 42 | 43 | >>> article.columns 44 | Index(['url', 'key', 'unique_key', 'title', 'author', 'abstract', 'doi', 45 | 'date', 'journal', 'provenance', 'score'], 46 | dtype='object') -------------------------------------------------------------------------------- /docs/Guides/title.rst: -------------------------------------------------------------------------------- 1 | .. _title: 2 | 3 | How to: Collect articles' based on `title` 4 | ========================================== 5 | 6 | Academic articles are published with a given title by their authors. Some times 7 | we found ourselves in search of articles relevant to our field and we do not 8 | know where to start. The most common approach is to search articles where a word 9 | describing our topic of interest is included in the article's title. 10 | 11 | For example a mathematician might be interested in looking for articles' that 12 | the world eigenvalues appears on the title. 13 | 14 | Initially we need to chose a publisher, for this example we assume that 15 | we are interested in the articles published by Nature:: 16 | 17 | >>> import arcas 18 | >>> api = arcas.Nature() 19 | 20 | Now all that is needed to specify in the parameters that we want `title='eigenvalues'`:: 21 | 22 | >>> parameters = api.parameters_fix(title='eigenvalues') 23 | >>> url = api.create_url_search(parameters) 24 | 25 | The query will be used to ping the API and afterwards we parse the response 26 | that has been retrieved:: 27 | 28 | >>> request = api.make_request(url) 29 | >>> root = api.get_root(request) 30 | >>> raw_article = api.parse(root) 31 | >>> article = api.to_dataframe(raw_article[0]) 32 | 33 | We can perform an insanity check and reassure that :code:`'eigenvalues'` is indeed 34 | within the title: 35 | 36 | >>> 'eigenvalues' in article['title'].unique()[0] 37 | True 38 | 39 | Note that Arcas can be used from the command line as well. If we wanted to 40 | reproduced the same example the command would be:: 41 | 42 | $ arcas_scrape -p nature -t "eigenvalues" 43 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Arcas 2 | ===== 3 | 4 | .. image:: https://api.travis-ci.org/ArcasProject/Arcas.svg?branch=master 5 | :target: https://travis-ci.org/ArcasProject/Arcas 6 | 7 | .. image:: https://img.shields.io/pypi/v/arcas.svg 8 | :target: https://pypi.python.org/pypi/arcas 9 | 10 | .. image:: https://badges.gitter.im/Join%20Chat.svg 11 | :target: https://gitter.im/ArcasProject/Lobby/?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge 12 | 13 | .. image:: https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square 14 | :target: http://makeapullrequest.com 15 | 16 | Arcas is a python tool designed to help with collecting academic articles 17 | from various APIs. 18 | 19 | Features 20 | -------- 21 | 22 | Arcas allows you: 23 | 24 | - access meta data from different academic APIs. 25 | - a collection of examples of analysing such meta data. 26 | 27 | Installation 28 | ------------- 29 | 30 | The easiest way to install it is: 31 | 32 | ``` 33 | $ pip install arcas 34 | ``` 35 | 36 | To install from source: 37 | 38 | ``` 39 | $ git clone https://github.com/Nikoleta-v3/Arcas.git 40 | $ cd Arcas 41 | $ python setup.py install 42 | ``` 43 | 44 | Usage 45 | ----- 46 | 47 | Arcas uses `docopt` to pass a list of arguments. 48 | 49 | For example: 50 | 51 | ``` 52 | $ arcas_scrape -p arxiv -t "Prisoner's Dilemma" -y 2000 -r 1 53 | ``` 54 | 55 | This query pings the arXiv api and asks for 1 record with the title containing 56 | the words Prisoner's Dilemma and published year 2000. 57 | 58 | Documentation 59 | ------------- 60 | The full documentation can be found here: http://arcas.readthedocs.io/en/latest/index.html. 61 | 62 | 63 | Examples 64 | -------- 65 | 66 | A repository that contains a set of example: https://github.com/ArcasProject/ArcasExamples -------------------------------------------------------------------------------- /docs/Reference/results_set.rst: -------------------------------------------------------------------------------- 1 | .. _results-set: 2 | 3 | Results set 4 | =========== 5 | 6 | Each response of the API returns a list of metadata for a given article. 7 | This list differs for each API. Arcas is designed to return a similar set of 8 | metadata for any given API. Thus the json results of Arcas has the following 9 | list of metadata: 10 | 11 | - :code:`key` 12 | - A generated key containing an authors name and publication year (e.g. Glynatsi2017) 13 | - :code:`unique_key` 14 | - A unique key generated using the `hashlib `_ 15 | python library. The hashable string is created by: [author name, title, 16 | year,abstract] 17 | - :code:`title` 18 | - Title of article 19 | - :code:`author` 20 | - A single entity of an author from the list of authors of the respective article 21 | - :code:`abstract` 22 | - The abstract of the article 23 | - :code:`date` 24 | - Date of publication 25 | - :code:`doi` 26 | - Article's doi 27 | - :code:`url` 28 | - Article's url 29 | - :code:`journal` 30 | - Journal of publication 31 | - :code:`pages` 32 | - Pages of publication 33 | - :code:`key_word` 34 | - A single entity of a keyword assigned to the article by the given journal 35 | - :code:`provenance` 36 | - Scholarly database for where the article was collected 37 | - :code:`category` 38 | - A list of subjects given to the article by the authors 39 | - :code:`score` 40 | - Score given to article by the given journal 41 | - :code:`open_access` 42 | - A boolean describing whether the article is open access or not 43 | 44 | Note that if a specific result is not available by an API, not because is missing 45 | but because is not implemented, Arcas returns :code:`'Not available'` for the 46 | value of that column. -------------------------------------------------------------------------------- /docs/Tutorial/tutorial_iii.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial-iii: 2 | 3 | ==================================================== 4 | Tutorial III: Retrieving a large number of articles 5 | ==================================================== 6 | 7 | Now that we have learned to ping several APIs for a single article, we will 8 | repeat the procedure for a large number of articles. In this example the 9 | number of articles we would like to retrieve is 20 from each API. 10 | 11 | Often, we are looking for hundreds of articles. Rather than asking the API 12 | for all the results at once, the APIs offer a paging mechanism through 13 | :code:`start` and :code:`records`. That way we can receive chunks of the 14 | result set at a time. :code:`start` defines the index of the first returned 15 | article and :code:`records` the number of articles returned by the query. 16 | 17 | >>> for p in [arcas.Ieee, arcas.Plos, arcas.Arxiv, arcas.Springer, arcas.Nature]: 18 | ... for start in range(2): 19 | ... 20 | ... api = p() 21 | ... parameters = api.parameters_fix(title='Game', abstract='Game', 22 | ... records=10, start=(start * 10)) 23 | ... url = api.create_url_search(parameters) 24 | ... request = api.make_request(url) 25 | ... root = api.get_root(request) 26 | ... raw_article = api.parse(root) 27 | ... 28 | ... for art in raw_article: 29 | ... article = api.to_dataframe(art) 30 | ... api.export(article, 'results_{}.json'.format(api.__class__.__name__)) 31 | 32 | In our example this might not seem as an important difference. But assume you 33 | were asking for a hundred of articles. Some APIs have a limited number of 34 | articles that be can returned, thus using this practice we avoid overloading 35 | the API. 36 | 37 | Note that you need to require a :code:`key` before being able to use :code:`arcas.Ieee` 38 | and :code:`arcas.Springer`. -------------------------------------------------------------------------------- /docs/Guides/springer.rst: -------------------------------------------------------------------------------- 1 | How to ping Springer 2 | ================ 3 | 4 | For more information on interacting with the Springer api visit the official 5 | site for the user's manual: https://dev.springernature.com/. 6 | 7 | In order to use Sringer api you will need to sign up and create an application 8 | key. You can sign up in the following https://dev.springernature.com/. Once 9 | you have done that you will need to copy the key in the `api_key.py`. This 10 | is located in the folder `src/arcas/Springer`. 11 | 12 | Once this is done you are all set to interact with the api. Springer supports 13 | the following arguments as search fields: 14 | 15 | - :code:`author` 16 | - :code:`title` 17 | - :code:`journal` 18 | - :code:`category` 19 | - :code:`records` 20 | - :code:`start` 21 | 22 | Note that `abstract` is not supported. Let us consider an example where 23 | we would like to retrieve the metadata of single article with the word "Game" in the 24 | :code:`title` published on :code:`year` 2010. 25 | 26 | Initially, we import Arcas and make an :code:`Springer()` instance:: 27 | 28 | >>> import arcas 29 | >>> api = arcas.Springer() 30 | 31 | Secondly we create the parameters list will be used to generate our message to the 32 | API:: 33 | 34 | >>> parameters = api.parameters_fix(title='Game', year=2010, records=1) 35 | >>> url = api.create_url_search(parameters) 36 | >>> url 37 | 'http://api.springer.com/metadata/pam?q=title:Game+AND+year:2010+AND+subject:game theory&p=1&api_key=Your key here' 38 | 39 | The url then is used to obtain a relevant article:: 40 | 41 | >>> request = api.make_request(url) 42 | >>> root = api.get_root(request) 43 | >>> raw_article = api.parse(root) 44 | >>> article = api.to_dataframe(*raw_article) 45 | 46 | The :code:`Springer()` class returns the following results:: 47 | 48 | >>> article.columns 49 | Index(['url', 'key', 'unique_key', 'title', 'author', 'abstract', 'doi', 50 | 'date', 'journal', 'provenance'], 51 | dtype='object') -------------------------------------------------------------------------------- /bin/arcas_scrape: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Arcas. A library to facilitate scraping of APIs for scholarly resources. 3 | 4 | Usage: 5 | arcas_scrape [-h] [-p API] [-a AUTHOR] [-t TITLE] [-b ABSTRACT] [-y YEAR] 6 | [-r RECORDS] [-s START] [-v VALIDATE] [-f FILENAME] 7 | arcas_scrape --version 8 | 9 | 10 | Options: 11 | -h --help Show this 12 | --version Show version. 13 | -p API The online API, from a given list, to parse [default: arxiv] 14 | -a AUTHOR Terms to search for in Author 15 | -t TITLE Terms to search for in Title 16 | -b ABSTRACT Terms to search for in the Abstract 17 | -y YEAR Terms to search for in Year 18 | -r RECORDS Number of records to fetch [default: 1] 19 | -s START Sequence number of first record to fetch [default: 1] 20 | -v VALIDATE Checks if query returned with arguments asked [default: False] 21 | -f FILENAME Name of json file [default: results.json] 22 | """ 23 | 24 | from arcas import Arxiv, Ieee, Nature, Springer, Plos 25 | import ast 26 | from docopt import docopt 27 | 28 | if __name__ == '__main__': 29 | arguments = docopt(__doc__, version='Arcas 1.0.0') 30 | 31 | # list of apis 32 | apis = {"ieee": Ieee, "arxiv": Arxiv, "nature": Nature, "springer": 33 | Springer, "plos": Plos} 34 | # create instance of selected api class 35 | api = apis[arguments['-p']]() 36 | 37 | # pass validate argument 38 | validate = ast.literal_eval(arguments['-v']) 39 | 40 | # generate the parameters 41 | parameters = api.parameters_fix(author=arguments['-a'], title=arguments['-t'], 42 | abstract=arguments['-b'], year=arguments['-y'], 43 | records=arguments['-r'], start=arguments['-s']) 44 | # generate url 45 | url = api.create_url_search(parameters) 46 | print(url) 47 | 48 | # generate the formalized json and export it 49 | post = api.run(url, arguments, validate) 50 | -------------------------------------------------------------------------------- /docs/Tutorial/tutorial_i.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial-i: 2 | 3 | ======================================= 4 | Tutorial I: Retrieving a single article 5 | ======================================= 6 | 7 | In this tutorial the aim is to retrieve a single article for the journal 8 | arXiv, where the word 'Game' is contained in the title or the abstract. 9 | 10 | Initially, let us import Arcas:: 11 | 12 | >>> import arcas 13 | 14 | The APIs, are implemented as classes. Here we make an API instance of the API 15 | arXiv:: 16 | 17 | >>> api = arcas.Arxiv() 18 | 19 | We will now create the query, to which arXiv listens to. :code:`records` is the 20 | number of records we are requesting for:: 21 | 22 | >>> parameters = api.parameters_fix(title='Game', abstract='Game', records=1) 23 | >>> url = api.create_url_search(parameters) 24 | 25 | The query will be used to ping the API and afterwards we parse the xml file 26 | that has been retrieved:: 27 | 28 | >>> request = api.make_request(url) 29 | >>> root = api.get_root(request) 30 | >>> raw_article = api.parse(root) 31 | >>> article = api.to_dataframe(raw_article[0]) 32 | 33 | Note that we are using the library `pandas `_ to 34 | store the results. The data frame contains metadata on an article as they 35 | are recorded in the journal arXiv. Here we can type the following to see the 36 | columns of the data frame:: 37 | 38 | >>> article.columns 39 | Index(['url', 'key', 'unique_key', 'title', 'author', 'abstract', 'doi', 40 | 'date', 'journal', 'provenance', 'primary_category', 'category', 41 | 'score', 'open_access'], 42 | dtype='object') 43 | 44 | and we can ask for the title:: 45 | 46 | >>> article.title.unique() 47 | array([ 'A New Approach to Solve a Class of Continuous-Time Nonlinear 48 | Quadratic Zero-Sum Game Using ADP'], dtype=object) 49 | 50 | Note that you might be getting a different title that me. That is fine it's just 51 | that new articles have been added to the API's database. 52 | 53 | The structure of the results is discussed in depth in :ref:`result set`. -------------------------------------------------------------------------------- /docs/Guides/api_key.rst: -------------------------------------------------------------------------------- 1 | .. _api_key: 2 | 3 | How to: Register Application and use `api_key` 4 | ============================================ 5 | 6 | Open APIs exist to allow users to access academic meta data easily. Some of those 7 | APIs may require a user to register their application in order to do so. 8 | 9 | Currently, the following APIs implemented within the library will require you to 10 | register: 11 | 12 | - IEEE Xplore; registration link: https://developer.ieee.org/member/register 13 | - Springer Open Access API; registration link: https://dev.springernature.com/login 14 | 15 | One you have registered as a user and have register your application, it will be 16 | given an application key. 17 | 18 | In order to be able to use the APIs listed here via Arcas you will have to add 19 | this key to the `api_key.py` file under the API's respective folder. 20 | 21 | Firstly, you will have to clone the repository from GitHub using the following 22 | command:: 23 | 24 | $ git clone https://github.com/Nikoleta-v3/Arcas.git 25 | 26 | 27 | Once you have a copy of the repository you can see that there is a folder for each 28 | API located at `src/arcas`. We can see this by typing the following commands:: 29 | 30 | $ cd Arcas/src/arcas 31 | $ ls 32 | arXiv IEEE __init__.py nature PLOS __pycache__ Springer tools.py version.py 33 | 34 | Both `IEEE` and the `Springer` folders has an `api_key.py` file which is where 35 | we need to add your application key. 36 | 37 | For example lets consider `IEEE`. Using the following command we list the files 38 | within the folder:: 39 | 40 | $ ls IEEE 41 | api_key.py __init__.py main.py 42 | 43 | We can also see what's in the `api_key.py` file:: 44 | 45 | $ cat IEEE/api_key.py 46 | api_key = 'Your key here' 47 | 48 | All we need to do is replace our key with the :code:`'Your key here'` and save. 49 | 50 | Once this is done all you have to do is go ahead and install the library. We need 51 | to navigate to the top of the repository:: 52 | 53 | $ cd ... 54 | 55 | and then just use the following command to install the package:: 56 | 57 | $ python setup.py install 58 | 59 | We will need to add your Springer key in :code:`src/arcas/Springer/api_key.py` as 60 | well so we can use both APIs. 61 | 62 | Once we have done this we should be ready to use all the APIs available to us 63 | via Arcas. -------------------------------------------------------------------------------- /docs/Reference/search_fields.rst: -------------------------------------------------------------------------------- 1 | .. _search-fields: 2 | 3 | Search Parameters 4 | ============= 5 | 6 | The table below outlines the parameters that can be passed to the query interface: 7 | 8 | 9 | +-----------------+----------------------------------------------------------------------------------------------+ 10 | | Parameter | Description | 11 | +=================+==============================================================================================+ 12 | | :code:`author` | Searches both first name and last name. | 13 | +-----------------+----------------------------------------------------------------------------------------------+ 14 | | :code:`title` | Locate documents containing a word or phrase in the "article title" element. | 15 | +-----------------+----------------------------------------------------------------------------------------------+ 16 | | :code:`abstract`| Locate documents containing a word or phrase in the "abstract" element. | 17 | +-----------------+----------------------------------------------------------------------------------------------+ 18 | | :code:`year` | The value for publication year. | 19 | +-----------------+----------------------------------------------------------------------------------------------+ 20 | | :code:`category`| Allows users to search the by keywords given to an article. | 21 | +-----------------+----------------------------------------------------------------------------------------------+ 22 | | :code:`journal` | Locate documents containing a word or phrase in the "full journal/publication title" element.| 23 | +-----------------+----------------------------------------------------------------------------------------------+ 24 | | :code:`records` | The number of records to fetch. | 25 | +-----------------+----------------------------------------------------------------------------------------------+ 26 | | :code:`start` | Sequence number of first record to fetch. | 27 | +-----------------+----------------------------------------------------------------------------------------------+ 28 | 29 | If a search argument is not available for a given API a message will be displayed. -------------------------------------------------------------------------------- /src/arcas/arXiv/main.py: -------------------------------------------------------------------------------- 1 | from arcas.tools import Api 2 | from xml.etree import ElementTree 3 | 4 | 5 | class Arxiv(Api): 6 | def __init__(self): 7 | self.standard = 'http://export.arxiv.org/api/query?search_query=' 8 | 9 | @staticmethod 10 | def keys(): 11 | """ 12 | Fields we are keeping from arXiv results. 13 | """ 14 | keys = ['url', 'key', 'unique_key', 'title', 'author', 'abstract', 'doi', 15 | 'date', 'journal', 'provenance', 'primary_category', 'category', 16 | 'score', 'open_access'] 17 | return keys 18 | 19 | def to_dataframe(self, raw_article): 20 | """A function which takes a dictionary with structure of the arXiv 21 | results, transforms it to a standardized format and returns a dataframe. 22 | """ 23 | raw_article['url'] = raw_article.get('id', None) 24 | 25 | for key_one, key_two in [['author', 'name'], ['category', 'category']]: 26 | raw_article[key_one] = raw_article.get(key_two, None) 27 | if raw_article[key_one] is not None: 28 | raw_article[key_one] = raw_article[key_one].split(',') 29 | 30 | raw_article['abstract'] = raw_article.get('summary', None) 31 | raw_article['date'] = int(raw_article.get('published', '0').split('-')[0]) 32 | raw_article['journal'] = raw_article.get('journal_ref', None) 33 | if raw_article['journal'] is None: 34 | raw_article['journal'] = "arXiv" 35 | 36 | raw_article['provenance'] = 'arXiv' 37 | raw_article['title'] = raw_article.get('title', None) 38 | raw_article['doi'] = raw_article.get('doi', None) 39 | raw_article['key'], raw_article['unique_key'] = self.create_keys(raw_article) 40 | 41 | raw_article['open_access'] = True 42 | raw_article['score'] = 'Not available' 43 | return self.dict_to_dataframe(raw_article) 44 | 45 | def parse(self, root): 46 | """Removing unwanted branches.""" 47 | branches = root.getchildren() 48 | raw_articles = [] 49 | for record in branches: 50 | if 'entry' in record.tag: 51 | raw_articles.append(self.xml_to_dict(record)) 52 | if not raw_articles: 53 | raw_articles = False 54 | return raw_articles 55 | 56 | @staticmethod 57 | def parameters_fix(author=None, title=None, abstract=None, year=None, 58 | records=None, start=None, category=None, journal=None, 59 | keyword=None): 60 | parameters = [] 61 | if author is not None: 62 | parameters.append('au:{}'.format(author)) 63 | if title is not None: 64 | parameters.append('ti:{}'.format(title)) 65 | if abstract is not None: 66 | parameters.append('abs:{}'.format(abstract)) 67 | if category is not None: 68 | parameters.append('cat:{}'.format(category)) 69 | if journal is not None: 70 | parameters.append('jr:{}'.format(journal)) 71 | if keyword is not None: 72 | parameters.append('all:{}'.format(keyword)) 73 | if records is not None: 74 | parameters.append('max_results={}'.format(records)) 75 | if start is not None: 76 | parameters.append('start={}'.format(start)) 77 | if year is not None: 78 | print('ArXiv does not support argument year.') # TODO: Add url to documentation 79 | 80 | return parameters 81 | 82 | @staticmethod 83 | def get_root(response): 84 | root = ElementTree.fromstring(response.text) 85 | return root 86 | -------------------------------------------------------------------------------- /tests/test_springer.py: -------------------------------------------------------------------------------- 1 | import arcas 2 | import pandas 3 | 4 | def test_setup(): 5 | api = arcas.Springer() 6 | assert api.standard == 'http://api.springer.com/metadata/pam?q=' 7 | 8 | def test_keys(): 9 | api = arcas.Springer() 10 | assert api.keys() == ['url', 'key', 'unique_key', 'title', 'author', 'abstract', 11 | 'doi', 'date', 'journal', 'provenance', 'category', 'score', 12 | 'open_access'] 13 | 14 | def test_parameters_and_url_author(): 15 | api = arcas.Springer() 16 | parameters = api.parameters_fix(author='Glynatsi') 17 | assert parameters == ['name:Glynatsi'] 18 | 19 | url = api.create_url_search(parameters) 20 | assert url == 'http://api.springer.com/metadata/pam?q=name:Glynatsi&api_key=Your key here' 21 | 22 | def test_parameters_and_url_title(): 23 | api = arcas.Springer() 24 | parameters = api.parameters_fix(title='Game') 25 | assert parameters == ['title:Game'] 26 | 27 | url = api.create_url_search(parameters) 28 | assert url == 'http://api.springer.com/metadata/pam?q=title:Game&api_key=Your key here' 29 | 30 | def test_parameters_and_url_category(): 31 | api = arcas.Springer() 32 | parameters = api.parameters_fix(category='game theory') 33 | assert parameters == ['subject:game theory'] 34 | 35 | url = api.create_url_search(parameters) 36 | assert url == 'http://api.springer.com/metadata/pam?q=subject:game theory&api_key=Your key here' 37 | 38 | def test_parameters_and_url_journal(): 39 | api = arcas.Springer() 40 | parameters = api.parameters_fix(journal='Springer') 41 | assert parameters == ['pub:Springer'] 42 | 43 | url = api.create_url_search(parameters) 44 | assert url == 'http://api.springer.com/metadata/pam?q=pub:Springer&api_key=Your key here' 45 | 46 | def test_parameters_and_url_record(): 47 | api = arcas.Springer() 48 | parameters = api.parameters_fix(records=1) 49 | assert parameters == ['p=1'] 50 | 51 | url = api.create_url_search(parameters) 52 | assert url == 'http://api.springer.com/metadata/pam?q=p=1&api_key=Your key here' 53 | 54 | def test_parameters_and_url_start(): 55 | api = arcas.Springer() 56 | parameters = api.parameters_fix(start=1) 57 | assert parameters == ['s=1'] 58 | 59 | url = api.create_url_search(parameters) 60 | assert url == 'http://api.springer.com/metadata/pam?q=s=1&api_key=Your key here' 61 | 62 | def test_create_url_search(): 63 | api = arcas.Springer() 64 | parameters = api.parameters_fix(title='Nash', journal='Spinger', records=2, start=5) 65 | url = api.create_url_search(parameters) 66 | assert url == 'http://api.springer.com/metadata/pam?q=title:Nash+AND+pub:Spinger&p=2&s=5&api_key=Your key here' 67 | 68 | def test_to_dataframe(): 69 | dummy_article = {'identifier': 'doi:10.1000/', 'title': 'Title', 70 | 'creator': 'E Glynatsi, V Knight', 'publicationName': 71 | 'Awesome Journal', 'genre': 'ReviewPaper', 'openAccess': 'false', 72 | 'h1': 'Abstract', 'p': 'Abstract', 73 | 'doi': '10.1000/', 'publisher': 'Springer', 74 | 'publicationDate': '2021-01-01', 'url': 'http://dx.doi.org/10.1000/', 75 | 'openAccess': 'false',} 76 | 77 | api = arcas.Springer() 78 | article = api.to_dataframe(dummy_article) 79 | 80 | assert isinstance(article, pandas.core.frame.DataFrame) 81 | assert list(article.columns) == api.keys() 82 | assert len(article['url']) == 2 83 | 84 | assert article['url'].unique()[0] == 'http://dx.doi.org/10.1000/' 85 | assert article['key'].unique()[0] == 'Glynatsi2021' 86 | assert article['title'].unique()[0] == 'Title' 87 | assert article['abstract'].unique()[0] == 'Abstract' 88 | assert article['journal'].unique()[0] == 'Awesome Journal' 89 | assert article['date'].unique()[0] == 2021 90 | assert article['open_access'].unique()[0] == False 91 | assert article['score'].unique()[0] == 'Not available' -------------------------------------------------------------------------------- /src/arcas/PLOS/main.py: -------------------------------------------------------------------------------- 1 | from arcas.tools import Api 2 | import xml.etree.ElementTree as etree 3 | from xml.etree import ElementTree 4 | 5 | 6 | class Plos(Api): 7 | def __init__(self): 8 | self.standard = 'http://api.plos.org/search?q=' 9 | 10 | def create_url_search(self, parameters): 11 | """Creates the search url, combining the standard url and various 12 | search parameters.""" 13 | url = self.standard 14 | url += parameters[0] 15 | for i in parameters[1:]: 16 | if 'rows=' in i or 'start=' in i: 17 | url += '&{}'.format(i) 18 | else: 19 | url += '+AND+{}'.format(i) 20 | return url 21 | 22 | def to_dataframe(self, raw_article): 23 | """A function which takes a dictionary with structure of the PLOS 24 | results and transform it to a standardized format. 25 | """ 26 | raw_article['author'] = raw_article.get('author_display', None) 27 | raw_article['abstract'] = raw_article.get('abstract', [None]) 28 | 29 | raw_article['date'] = int(raw_article.get('publication_date', '0').split('-')[0]) 30 | raw_article['journal'] = raw_article.get('journal', None) 31 | raw_article['provenance'] = 'PLOS' 32 | raw_article['score'] = raw_article.get('score', None) 33 | if raw_article['score'] is not None: 34 | raw_article['score'] = int(raw_article['score']) 35 | raw_article['doi'] = raw_article.get('id', None) 36 | raw_article['url'] = 'https://doi.org/' + raw_article['id'] 37 | raw_article['title'] = raw_article.get('title_display', None) 38 | raw_article['key'], raw_article['unique_key'] = self.create_keys(raw_article) 39 | 40 | raw_article['category'] = 'Not available' 41 | raw_article['open_access'] = 'Not available' 42 | return self.dict_to_dataframe(raw_article) 43 | 44 | @staticmethod 45 | def xml_to_dict(record): 46 | """Xml response with information on article to dictionary""" 47 | d = {} 48 | for key, value in record: 49 | if key is not None: 50 | if value is not None: 51 | d[key] = value 52 | else: 53 | d[key] = [] 54 | current_key = key 55 | else: 56 | if value is not None: 57 | d[current_key].append(value) 58 | return d 59 | 60 | def parse(self, root): 61 | """Parsing the xml file""" 62 | if root['response']['numFound'] == 0: 63 | return False 64 | return root['response']['docs'] 65 | 66 | @staticmethod 67 | def parameters_fix(author=None, title=None, abstract=None, year=None, 68 | records=None, start=None, category=None, journal=None, 69 | keyword=None): 70 | parameters = [] 71 | if author is not None: 72 | parameters.append('author:"{}"'.format(author)) 73 | if title is not None: 74 | parameters.append('title:"{}"'.format(title)) 75 | if abstract is not None: 76 | parameters.append('abstract:"{}"'.format(abstract)) 77 | if year is not None: 78 | parameters.append('publication_date:[{0}-01-01T00:00:00Z TO ' 79 | '{0}-12-30T23:59:59Z]'.format(year)) 80 | if journal is not None: 81 | parameters.append('journal:"{}"'.format(journal)) 82 | if category is not None: 83 | parameters.append('subject:"{}"'.format(category)) 84 | if keyword is not None: 85 | parameters.append('everything:"{}"'.format(keyword)) 86 | if records is not None: 87 | parameters.append('rows={}'.format(records)) 88 | if start is not None: 89 | parameters.append('start={}'.format(start)) 90 | 91 | return parameters 92 | 93 | @staticmethod 94 | def get_root(response): 95 | root = response.json() 96 | return root 97 | 98 | -------------------------------------------------------------------------------- /tests/test_arxiv.py: -------------------------------------------------------------------------------- 1 | import arcas 2 | import pandas 3 | 4 | def test_setup(): 5 | api = arcas.Arxiv() 6 | assert api.standard == 'http://export.arxiv.org/api/query?search_query=' 7 | 8 | def test_keys(): 9 | api = arcas.Arxiv() 10 | assert api.keys() == ['url', 'key', 'unique_key', 'title', 'author', 11 | 'abstract', 'doi', 'date', 'journal', 'provenance', 12 | 'primary_category', 'category', 'score', 'open_access'] 13 | 14 | def test_parameters_and_url_author(): 15 | api = arcas.Arxiv() 16 | parameters = api.parameters_fix(author='Glynatsi') 17 | assert parameters == ['au:Glynatsi'] 18 | 19 | url = api.create_url_search(parameters) 20 | assert url == 'http://export.arxiv.org/api/query?search_query=au:Glynatsi' 21 | 22 | def test_parameters_and_url_title(): 23 | api = arcas.Arxiv() 24 | parameters = api.parameters_fix(title='Game') 25 | assert parameters == ['ti:Game'] 26 | 27 | url = api.create_url_search(parameters) 28 | assert url == 'http://export.arxiv.org/api/query?search_query=ti:Game' 29 | 30 | def test_parameters_and_url_abstract(): 31 | api = arcas.Arxiv() 32 | parameters = api.parameters_fix(abstract='Game') 33 | assert parameters == ['abs:Game'] 34 | 35 | url = api.create_url_search(parameters) 36 | assert url == 'http://export.arxiv.org/api/query?search_query=abs:Game' 37 | 38 | def test_parameters_and_url_category(): 39 | api = arcas.Arxiv() 40 | parameters = api.parameters_fix(category='game theory') 41 | assert parameters == ['cat:game theory'] 42 | 43 | url = api.create_url_search(parameters) 44 | assert url == 'http://export.arxiv.org/api/query?search_query=cat:game theory' 45 | 46 | def test_parameters_and_url_journal(): 47 | api = arcas.Arxiv() 48 | parameters = api.parameters_fix(journal='arxiv') 49 | assert parameters == ['jr:arxiv'] 50 | 51 | url = api.create_url_search(parameters) 52 | assert url == 'http://export.arxiv.org/api/query?search_query=jr:arxiv' 53 | 54 | def test_parameters_and_url_record(): 55 | api = arcas.Arxiv() 56 | parameters = api.parameters_fix(records=1) 57 | assert parameters == ['max_results=1'] 58 | 59 | url = api.create_url_search(parameters) 60 | assert url == 'http://export.arxiv.org/api/query?search_query=max_results=1' 61 | 62 | def test_parameters_and_url_start(): 63 | api = arcas.Arxiv() 64 | parameters = api.parameters_fix(start=1) 65 | assert parameters == ['start=1'] 66 | 67 | url = api.create_url_search(parameters) 68 | assert url == 'http://export.arxiv.org/api/query?search_query=start=1' 69 | 70 | def test_create_url_search(): 71 | api = arcas.Arxiv() 72 | parameters = api.parameters_fix(title='Nash', abstract='mixed', records=2, start=5) 73 | url = api.create_url_search(parameters) 74 | assert url == 'http://export.arxiv.org/api/query?search_query=ti:Nash&abs:mixed&max_results=2&start=5' 75 | 76 | def test_to_dataframe(): 77 | dummy_article = {'entry': '\n', 'id': 'http://arxiv.org/abs/0000', 78 | 'updated': '2011', 'published': '2010', 'title': 'Title', 79 | 'summary': "Abstract", 'author': '\n', 'name': 'E Glynatsi, V Knight', 80 | 'doi': '10.0000', 'comment': 'This is a comment.', 81 | 'journal_ref': 'Awesome Journal', 'primary_category': 'Dummy', 82 | 'category': None} 83 | api = arcas.Arxiv() 84 | article = api.to_dataframe(dummy_article) 85 | 86 | assert isinstance(article, pandas.core.frame.DataFrame) 87 | assert list(article.columns) == api.keys() 88 | assert len(article['url']) == 2 89 | 90 | assert article['url'].unique()[0] == 'http://arxiv.org/abs/0000' 91 | assert article['key'].unique()[0] == 'Glynatsi2010' 92 | assert article['title'].unique()[0] == 'Title' 93 | assert article['abstract'].unique()[0] == 'Abstract' 94 | assert article['journal'].unique()[0] == 'Awesome Journal' 95 | assert article['primary_category'].unique()[0] == 'Dummy' 96 | assert article['category'].unique()[0] == None 97 | assert article['score'].unique()[0] == 'Not available' 98 | assert article['open_access'].unique()[0] == True -------------------------------------------------------------------------------- /tests/test_plos.py: -------------------------------------------------------------------------------- 1 | import arcas 2 | import pandas 3 | 4 | def test_setup(): 5 | api = arcas.Plos() 6 | assert api.standard == 'http://api.plos.org/search?q=' 7 | 8 | def test_keys(): 9 | api = arcas.Plos() 10 | assert api.keys() == ['url', 'key', 'unique_key', 'title', 'author', 'abstract', 11 | 'doi', 'date', 'journal', 'provenance', 'category', 'score', 12 | 'open_access'] 13 | 14 | def test_parameters_and_url_author(): 15 | api = arcas.Plos() 16 | parameters = api.parameters_fix(author='Glynatsi') 17 | assert parameters == ['author:"Glynatsi"'] 18 | 19 | url = api.create_url_search(parameters) 20 | assert url == 'http://api.plos.org/search?q=author:"Glynatsi"' 21 | 22 | def test_parameters_and_url_title(): 23 | api = arcas.Plos() 24 | parameters = api.parameters_fix(title='Game') 25 | assert parameters == ['title:"Game"'] 26 | 27 | url = api.create_url_search(parameters) 28 | assert url == 'http://api.plos.org/search?q=title:"Game"' 29 | 30 | def test_parameters_and_url_abstract(): 31 | api = arcas.Plos() 32 | parameters = api.parameters_fix(abstract='Game') 33 | assert parameters == ['abstract:"Game"'] 34 | 35 | url = api.create_url_search(parameters) 36 | assert url == 'http://api.plos.org/search?q=abstract:"Game"' 37 | 38 | def test_parameters_and_url_category(): 39 | api = arcas.Plos() 40 | parameters = api.parameters_fix(category='game theory') 41 | assert parameters == ['subject:"game theory"'] 42 | 43 | url = api.create_url_search(parameters) 44 | assert url == 'http://api.plos.org/search?q=subject:"game theory"' 45 | 46 | def test_parameters_and_url_journal(): 47 | api = arcas.Plos() 48 | parameters = api.parameters_fix(journal='PLOS ONE') 49 | assert parameters == ['journal:"PLOS ONE"'] 50 | 51 | url = api.create_url_search(parameters) 52 | assert url == 'http://api.plos.org/search?q=journal:"PLOS ONE"' 53 | 54 | def test_parameters_and_url_record(): 55 | api = arcas.Plos() 56 | parameters = api.parameters_fix(records=1) 57 | assert parameters == ['rows=1'] 58 | 59 | url = api.create_url_search(parameters) 60 | assert url == 'http://api.plos.org/search?q=rows=1' 61 | 62 | def test_parameters_and_url_start(): 63 | api = arcas.Plos() 64 | parameters = api.parameters_fix(start=1) 65 | assert parameters == ['start=1'] 66 | 67 | url = api.create_url_search(parameters) 68 | assert url == 'http://api.plos.org/search?q=start=1' 69 | 70 | def test_create_url_search(): 71 | api = arcas.Plos() 72 | parameters = api.parameters_fix(title='Nash', abstract='mixed', records=2, start=5) 73 | url = api.create_url_search(parameters) 74 | assert url == 'http://api.plos.org/search?q=title:"Nash"+AND+abstract:"mixed"&rows=2&start=5' 75 | 76 | def test_to_dataframe(): 77 | dummy_article = {'response': [], 78 | 'id': '10.0000/journal.pone.00000', 79 | 'journal': 'PLOS ONE', 80 | 'publication_date': '2010-12-12T00:00:00Z', 81 | 'article_type': 'Research Article', 82 | 'author_display': ['E Glynatsi', 'V Knight'], 83 | 'abstract': "Abstract", 84 | 'title_display': "Title", 85 | 'score': '10'} 86 | api = arcas.Plos() 87 | article = api.to_dataframe(dummy_article) 88 | 89 | assert isinstance(article, pandas.core.frame.DataFrame) 90 | assert list(article.columns) == api.keys() 91 | assert len(article['url']) == 2 92 | 93 | assert article['url'].unique()[0] == 'https://doi.org/' + dummy_article['id'] 94 | assert article['key'].unique()[0] == 'Glynatsi2010' 95 | assert article['title'].unique()[0] == 'Title' 96 | assert article['abstract'].unique()[0] == 'Abstract' 97 | assert article['journal'].unique()[0] == 'PLOS ONE' 98 | assert article['date'].unique()[0] == 2010 99 | assert article['doi'].unique()[0] == dummy_article['id'] 100 | assert article['open_access'].unique()[0] == 'Not available' 101 | assert article['score'].unique()[0] == 10 102 | -------------------------------------------------------------------------------- /src/arcas/IEEE/main.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | import ratelimit 4 | from arcas.tools import Api 5 | 6 | from .api_key import api_key 7 | from arcas.tools import APIError 8 | 9 | class Ieee(Api): 10 | """ 11 | API argument is 'ieee'. 12 | """ 13 | def __init__(self): 14 | self.standard = 'https://ieeexploreapi.ieee.org/api/v1/search/articles?' 15 | self.key_api = api_key 16 | 17 | def create_url_search(self, parameters): 18 | """Creates the search url, combining the standard url and various 19 | search parameters.""" 20 | url = self.standard 21 | url += parameters[0] 22 | for i in parameters[1:]: 23 | url += '&{}'.format(i) 24 | url += '&apikey={}'.format(self.key_api) 25 | return url 26 | 27 | @staticmethod 28 | @ratelimit.rate_limited(3) 29 | def make_request(url): 30 | """Request from an API and returns response.""" 31 | response = requests.get(url, stream=True, verify=False) 32 | if response.status_code != 200: 33 | raise APIError(response.status_code) 34 | return response 35 | 36 | def to_dataframe(self, raw_article): 37 | """A function which takes a dictionary with structure of the IEEE 38 | results and transform it to a standardized format. 39 | """ 40 | raw_article['url'] = raw_article.get('html_url', None) 41 | try: 42 | raw_article['author'] = [author['full_name'] for author in raw_article['authors']['authors']] 43 | except KeyError: 44 | raw_article['author'] = ['No authors found for this document.'] 45 | raw_article['abstract'] = raw_article.get('abstract', None) 46 | if raw_article['content_type'] == 'Conferences': 47 | date = raw_article.get('conference_dates', None) 48 | else: 49 | date = raw_article.get('publication_date', None) 50 | if date is not None: 51 | date = int(date.split(' ')[-1]) 52 | raw_article['date'] = date 53 | 54 | category = raw_article.get('index_terms', None) 55 | if category is not None: 56 | try: 57 | category = category['author_terms']['terms'] 58 | except KeyError: 59 | try: 60 | category = category['ieee_terms']['terms'] 61 | except KeyError: 62 | category = None 63 | raw_article['doi'] = raw_article.get('doi', None) 64 | raw_article['category'] = category 65 | 66 | raw_article['journal'] = raw_article.get('publication_title', None) 67 | raw_article['provenance'] = 'IEEE' 68 | raw_article['key'], raw_article['unique_key'] = self.create_keys(raw_article) 69 | 70 | raw_article['open_access'] = raw_article['access_type'] == 'OPEN_ACCESS' 71 | raw_article['score'] = 'Not available' 72 | return self.dict_to_dataframe(raw_article) 73 | 74 | def parse(self, root): 75 | """Parsing the xml file""" 76 | if root['total_records'] == 0: 77 | return False 78 | return root['articles'] 79 | 80 | @staticmethod 81 | def parameters_fix(author=None, title=None, abstract=None, year=None, 82 | records=None, start=None, category=None, journal=None, 83 | keyword=None): 84 | parameters = [] 85 | if author is not None: 86 | parameters.append('author={}'.format(author)) 87 | if title is not None: 88 | parameters.append('article_title={}'.format(title)) 89 | if abstract is not None: 90 | parameters.append('abstract={}'.format(abstract)) 91 | if year is not None: 92 | parameters.append('publication_year={}'.format(year)) 93 | if category is not None: 94 | parameters.append('index_terms={}'.format(category)) 95 | if journal is not None: 96 | parameters.append('publication_title={}'.format(journal)) 97 | if keyword is not None: 98 | parameters.append('querytext={}'.format(keyword)) 99 | if records is not None: 100 | parameters.append('max_records={}'.format(records)) 101 | if start is not None: 102 | parameters.append('start_record={}'.format(start)) 103 | 104 | return parameters 105 | 106 | @staticmethod 107 | def get_root(response): 108 | root = response.json() 109 | return root 110 | -------------------------------------------------------------------------------- /src/arcas/Springer/main.py: -------------------------------------------------------------------------------- 1 | from arcas.tools import Api 2 | from .api_key import api_key 3 | from xml.etree import ElementTree 4 | 5 | 6 | class Springer(Api): 7 | def __init__(self): 8 | self.standard = 'http://api.springer.com/metadata/pam?q=' 9 | self.key_api = api_key 10 | 11 | def create_url_search(self, parameters): 12 | """Creates the search url, combining the standard url and various 13 | search parameters.""" 14 | url = self.standard 15 | url += parameters[0] 16 | for i in parameters[1:]: 17 | if 's=' in i or 'p=' in i: 18 | url += '&{}'.format(i) 19 | else: 20 | url += '+AND+{}'.format(i) 21 | url += '&api_key={}'.format(self.key_api) 22 | return url 23 | 24 | def to_dataframe(self, raw_article): 25 | """A function which takes a dictionary with structure of the Springer 26 | results and transform it to a standardized format. 27 | """ 28 | raw_article['url'] = raw_article.get('url', None) 29 | raw_article['author'] = raw_article.get('creator', None) 30 | if raw_article['author'] is not None: 31 | raw_article['author'] = raw_article['author'].split(',') 32 | else: 33 | raw_article['author'] = ['No authors found for this document.'] 34 | 35 | raw_article['abstract'] = raw_article.get('p', None) 36 | raw_article['date'] = int(raw_article.get('publicationDate', '0').split('-')[0]) 37 | raw_article['journal'] = raw_article.get('publicationName', None) 38 | raw_article['provenance'] = 'Springer' 39 | raw_article['title'] = raw_article.get('title', None) 40 | raw_article['doi'] = raw_article.get('doi', None) 41 | raw_article['key'], raw_article['unique_key'] = self.create_keys(raw_article) 42 | 43 | raw_article['open_access'] = raw_article['openAccess'] == 'true' 44 | raw_article['score'] = 'Not available' 45 | raw_article['category'] = 'Not available' 46 | return self.dict_to_dataframe(raw_article) 47 | 48 | @staticmethod 49 | def xml_to_dict(raw_article): 50 | """Xml response with information on article to dictionary""" 51 | d = {} 52 | for key, value in raw_article: 53 | if key not in d: 54 | if value is not None: 55 | value = value.replace(',', ' ') 56 | d[key] = value 57 | else: 58 | if value is not None: 59 | value = value.replace(',', ' ') 60 | d[key] += ',' + value 61 | return d 62 | 63 | def parse(self, root): 64 | """Parsing the xml file""" 65 | branches = [branch for branch in root.getchildren() if branch.tag == 'records'] 66 | if not branches: 67 | return False 68 | else: 69 | raw_articles = [[]] 70 | for at in branches[0].iter(): 71 | key = at.tag.split('}')[-1] 72 | if key == 'article': 73 | raw_articles.append([]) 74 | else: 75 | raw_articles[-1].append((key, at.text)) 76 | raw_articles.remove(raw_articles[0]) 77 | while [] in raw_articles: 78 | raw_articles.remove([]) 79 | return [self.xml_to_dict(raw_article) for raw_article in raw_articles] 80 | 81 | @staticmethod 82 | def parameters_fix(author=None, title=None, abstract=None, year=None, 83 | records=None, start=None, category=None, journal=None, 84 | keyword=None): 85 | parameters = [] 86 | if author is not None: 87 | parameters.append('name:{}'.format(author)) 88 | if title is not None: 89 | parameters.append('title:{}'.format(title)) 90 | if year is not None: 91 | parameters.append('year:{}'.format(year)) 92 | if category is not None: 93 | parameters.append('subject:{}'.format(category)) 94 | if journal is not None: 95 | parameters.append('pub:{}'.format(journal)) 96 | if keyword is not None: 97 | parameters.append('keyword:{}'.format(keyword)) 98 | if records is not None: 99 | parameters.append('p={}'.format(records)) 100 | if start is not None: 101 | parameters.append('s={}'.format(start)) 102 | if abstract is not None: 103 | print('Springer does not support argument abstract.') 104 | print() 105 | 106 | return parameters 107 | 108 | @staticmethod 109 | def get_root(response): 110 | root = ElementTree.fromstring(response.text) 111 | return root 112 | 113 | 114 | -------------------------------------------------------------------------------- /src/arcas/nature/main.py: -------------------------------------------------------------------------------- 1 | from arcas.tools import Api 2 | from xml.etree import ElementTree 3 | 4 | 5 | class Nature(Api): 6 | def __init__(self): 7 | self.standard = 'http://www.nature.com/opensearch/request?&query=' 8 | 9 | def create_url_search(self, parameters): 10 | """Creates the search url, combining the standard url and various 11 | search parameters.""" 12 | url = self.standard 13 | url += parameters[0] 14 | for i in parameters[1:]: 15 | if 'maximumRecords=' in i or 'startRecord=' in i: 16 | url += '&{}'.format(i) 17 | else: 18 | url += '+AND+{}'.format(i) 19 | return url 20 | 21 | @staticmethod 22 | def xml_to_dict(records): 23 | """Xml response with information on article to dictionary""" 24 | d = {} 25 | for key, value in records: 26 | if key not in d: 27 | d[key] = value 28 | else: 29 | value = value.replace(',', ' ') 30 | d[key] += ',' + value 31 | return d 32 | 33 | def parse(self, root): 34 | """Parsing the xml file""" 35 | parents = root.getchildren() 36 | diagnostics = parents[3].tag.split('}')[-1] 37 | number_of_records = parents[0].text 38 | if (diagnostics == 'diagnostics') or (number_of_records == '0'): 39 | return False 40 | else: 41 | parents = parents[2] 42 | raw_articles = [[]] 43 | for at in parents.iter(): 44 | key = at.tag.split('}')[-1] 45 | if key == 'recordPosition': 46 | raw_articles.append([]) 47 | else: 48 | raw_articles[-1].append((key, at.text)) 49 | 50 | while [] in raw_articles: 51 | raw_articles.remove([]) 52 | 53 | return [self.xml_to_dict(raw_article) for raw_article in raw_articles] 54 | 55 | def to_dataframe(self, raw_article): 56 | """A function which takes a dictionary with structure of the nature 57 | results and transform it to a standardized format. 58 | """ 59 | raw_article['url'] = raw_article.get('url', None) 60 | for key_one, key_two in [['author', 'creator'], ['category', 'subject']]: 61 | raw_article[key_one] = raw_article.get(key_two, None) 62 | if raw_article[key_one] is not None: 63 | raw_article[key_one] = raw_article[key_one].split(',') 64 | if raw_article['author'] is None: 65 | raw_article['author'] = ['No authors found for this document.'] 66 | 67 | raw_article['abstract'] = raw_article.get('description', None) 68 | raw_article['date'] = int(raw_article.get('publicationDate', '0').split('-')[0]) 69 | raw_article['journal'] = raw_article.get('publicationName', None) 70 | 71 | raw_article['category'] = raw_article.get('subject', None) 72 | if raw_article['category'] is not None: 73 | raw_article['category'] = raw_article['category'].split(',') 74 | 75 | raw_article['provenance'] = 'Nature' 76 | raw_article['title'] = raw_article.get('title', None) 77 | raw_article['doi'] = raw_article.get('doi', None) 78 | raw_article['key'], raw_article['unique_key'] = self.create_keys(raw_article) 79 | 80 | raw_article['open_access'] = 'Not available' 81 | raw_article['score'] = 'Not available' 82 | return self.dict_to_dataframe(raw_article) 83 | 84 | @staticmethod 85 | def parameters_fix(author=None, title=None, abstract=None, year=None, 86 | records=None, start=None, category=None, journal=None, 87 | keyword=None): 88 | parameters = [] 89 | if author is not None: 90 | parameters.append('dc.creator={}'.format(author)) 91 | if title is not None: 92 | parameters.append('dc.title adj {}'.format(title)) 93 | if abstract is not None: 94 | parameters.append('dc.description adj {}'.format(abstract)) 95 | if year is not None: 96 | parameters.append('prism.publicationDate={}'.format(year)) 97 | if journal is not None: 98 | parameters.append('prism.publicationName={}'.format(journal)) 99 | if category is not None: 100 | parameters.append('dc.subject adj {}'.format(category)) 101 | if keyword is not None: 102 | parameters.append('cql.keywords={}'.format(keyword)) 103 | if records is not None: 104 | parameters.append('maximumRecords={}'.format(records)) 105 | if start is not None: 106 | parameters.append('startRecord={}'.format(start)) 107 | 108 | return parameters 109 | 110 | @staticmethod 111 | def get_root(response): 112 | root = ElementTree.fromstring(response.text) 113 | return root -------------------------------------------------------------------------------- /tests/test_nature.py: -------------------------------------------------------------------------------- 1 | import arcas 2 | import pandas 3 | 4 | def test_setup(): 5 | api = arcas.Nature() 6 | assert api.standard == 'http://www.nature.com/opensearch/request?&query=' 7 | 8 | def test_keys(): 9 | api = arcas.Nature() 10 | assert api.keys() == ['url', 'key', 'unique_key', 'title', 'author', 'abstract', 11 | 'doi', 'date', 'journal', 'provenance', 'category', 'score', 12 | 'open_access'] 13 | 14 | def test_parameters_and_url_author(): 15 | api = arcas.Nature() 16 | parameters = api.parameters_fix(author='Glynatsi') 17 | assert parameters == ['dc.creator=Glynatsi'] 18 | 19 | url = api.create_url_search(parameters) 20 | assert url == 'http://www.nature.com/opensearch/request?&query=dc.creator=Glynatsi' 21 | 22 | def test_parameters_and_url_title(): 23 | api = arcas.Nature() 24 | parameters = api.parameters_fix(title='Game') 25 | assert parameters == ['dc.title adj Game'] 26 | 27 | url = api.create_url_search(parameters) 28 | assert url == 'http://www.nature.com/opensearch/request?&query=dc.title adj Game' 29 | 30 | def test_parameters_and_url_abstract(): 31 | api = arcas.Nature() 32 | parameters = api.parameters_fix(abstract='Game') 33 | assert parameters == ['dc.description adj Game'] 34 | 35 | url = api.create_url_search(parameters) 36 | assert url == 'http://www.nature.com/opensearch/request?&query=dc.description adj Game' 37 | 38 | def test_parameters_and_url_year(): 39 | api = arcas.Nature() 40 | parameters = api.parameters_fix(year=2010) 41 | assert parameters == ['prism.publicationDate=2010'] 42 | 43 | url = api.create_url_search(parameters) 44 | assert url == 'http://www.nature.com/opensearch/request?&query=prism.publicationDate=2010' 45 | 46 | def test_parameters_and_url_category(): 47 | api = arcas.Nature() 48 | parameters = api.parameters_fix(category='game theory') 49 | assert parameters == ['dc.subject adj game theory'] 50 | 51 | url = api.create_url_search(parameters) 52 | assert url == 'http://www.nature.com/opensearch/request?&query=dc.subject adj game theory' 53 | 54 | def test_parameters_and_url_journal(): 55 | api = arcas.Nature() 56 | parameters = api.parameters_fix(journal='Nature') 57 | assert parameters == ['prism.publicationName=Nature'] 58 | 59 | url = api.create_url_search(parameters) 60 | assert url == 'http://www.nature.com/opensearch/request?&query=prism.publicationName=Nature' 61 | 62 | def test_parameters_and_url_record(): 63 | api = arcas.Nature() 64 | parameters = api.parameters_fix(records=1) 65 | assert parameters == ['maximumRecords=1'] 66 | 67 | url = api.create_url_search(parameters) 68 | assert url == 'http://www.nature.com/opensearch/request?&query=maximumRecords=1' 69 | 70 | def test_parameters_and_url_start(): 71 | api = arcas.Nature() 72 | parameters = api.parameters_fix(start=1) 73 | assert parameters == ['startRecord=1'] 74 | 75 | url = api.create_url_search(parameters) 76 | assert url == 'http://www.nature.com/opensearch/request?&query=startRecord=1' 77 | 78 | def test_create_url_search(): 79 | api = arcas.Nature() 80 | parameters = api.parameters_fix(title='Nash', abstract='mixed', records=2, start=5) 81 | url = api.create_url_search(parameters) 82 | assert url == 'http://www.nature.com/opensearch/request?&query=dc.title adj Nash+AND+dc.description adj mixed&maximumRecords=2&startRecord=5' 83 | 84 | def test_to_dataframe(): 85 | dummy_article = {'records': None, 'record': None, 'recordSchema': 'info:srw/schema/11/pam-v2.1', 86 | 'recordPacking': 'packed', 'recordData': None, 'message': None, 87 | 'article': None, 'head': None, 'identifier': 'doi:10.1000', 88 | 'title': 'Title', 'creator': 'E Glynatsi, V Knight', 89 | 'publicationName': 'Journal', 'doi': '10.1000', 'publicationDate': '2010', 90 | 'description': 'Abstract', 91 | 'volume': '48', 'number': '4', 'startingPage': '423', 92 | 'endingPage': '432', 'url': 'http://nature.org/abs/0000'} 93 | 94 | api = arcas.Nature() 95 | article = api.to_dataframe(dummy_article) 96 | 97 | assert isinstance(article, pandas.core.frame.DataFrame) 98 | assert list(article.columns) == api.keys() 99 | assert len(article['url']) == 2 100 | 101 | assert article['url'].unique()[0] == 'http://nature.org/abs/0000' 102 | assert article['key'].unique()[0] == 'Glynatsi2010' 103 | assert article['title'].unique()[0] == 'Title' 104 | assert article['abstract'].unique()[0] == 'Abstract' 105 | assert article['journal'].unique()[0] == 'Journal' 106 | assert article['doi'].unique()[0] == '10.1000' 107 | assert article['category'].unique()[0] == None 108 | assert article['score'].unique()[0] == 'Not available' 109 | assert article['open_access'].unique()[0] == 'Not available' -------------------------------------------------------------------------------- /src/arcas/tools.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import itertools 3 | from xml.etree import ElementTree 4 | 5 | import pandas as pd 6 | import requests 7 | 8 | import ratelimit 9 | 10 | 11 | class APIError(Exception): 12 | """An API Error Exception.""" 13 | 14 | def __init__(self, status): 15 | self.status = status 16 | 17 | def __str__(self): 18 | return "APIError: status={}".format(self.status) 19 | 20 | 21 | class Api(): 22 | 23 | def __init__(self, standard): 24 | """Initializations""" 25 | self.standard = standard 26 | 27 | def create_url_search(self, parameters): 28 | """Creates the search url, combining the standard url and various 29 | search parameters.""" 30 | url = self.standard 31 | url += parameters[0] 32 | for i in parameters[1:]: 33 | url += '&{}'.format(i) 34 | return url 35 | 36 | @staticmethod 37 | def keys(): 38 | """ 39 | Fields we are keeping from arXiv results. 40 | """ 41 | keys = ['url', 'key', 'unique_key', 'title', 'author', 'abstract', 'doi', 42 | 'date', 'journal', 'provenance', 'category', 'score', 'open_access'] 43 | return keys 44 | 45 | @staticmethod 46 | @ratelimit.rate_limited(3) 47 | def make_request(url): 48 | """Request from an API and returns response.""" 49 | response = requests.get(url, stream=True) 50 | if response.status_code != 200: 51 | raise APIError(response.status_code) 52 | return response 53 | 54 | @staticmethod 55 | def xml_to_dict(record): 56 | """Xml response with information on article to dictionary""" 57 | d = {} 58 | for at in record.iter(): 59 | key = at.tag.split('}')[-1] 60 | if key in d and at.text is not None: 61 | d[key] += ', {}'.format(at.text) 62 | else: 63 | d.update({key: at.text}) 64 | return d 65 | 66 | @staticmethod 67 | def to_dataframe(raw_article): 68 | pass 69 | 70 | @staticmethod 71 | def parse(root): 72 | pass 73 | 74 | @staticmethod 75 | def parameters_fix(author=None, title=None, abstract=None, year=None, 76 | records=None, start=None, category=None, journal=None): 77 | pass 78 | 79 | @staticmethod 80 | def get_root(response): 81 | root = ElementTree.parse(response.raw).getroot() 82 | return root 83 | 84 | @staticmethod 85 | def lower_case(post): 86 | post = dict((k.lower() if isinstance(k, str) else k, 87 | v.lower() if isinstance(v, str) else v) for k, v in 88 | post.items()) 89 | return post 90 | 91 | @staticmethod 92 | def create_keys(raw_article): 93 | """ 94 | Returns public key 'AuthorYear' and 95 | unique key hash('Author''Title''Year''Abstract') 96 | """ 97 | try: 98 | full_name = raw_article['author'][0].split(' ') 99 | except (TypeError, IndexError) as e: 100 | full_name = [None] 101 | year = raw_article['date'] 102 | string = '{}{}{}{}'.format(full_name[-1], raw_article['title'], year, 103 | raw_article['abstract']) 104 | 105 | hash_object = hashlib.md5(string.encode('utf-8')) 106 | 107 | key = '{}{}'.format(full_name[-1], year) 108 | unique_key = hash_object.hexdigest() 109 | 110 | return key, unique_key 111 | 112 | def dict_to_dataframe(self, raw_article): 113 | """ 114 | Takes a dictionary and returns a dataframe 115 | """ 116 | values = [] 117 | for key in self.keys(): 118 | if type(raw_article[key]) is not list: 119 | values.append([raw_article[key]]) 120 | else: 121 | values.append(raw_article[key]) 122 | data = [] 123 | for row in itertools.product(*values): 124 | data.append(row) 125 | df = pd.DataFrame(data, columns=self.keys()) 126 | return df 127 | 128 | @staticmethod 129 | def export(df, filename): 130 | """ Write the results to a json file 131 | """ 132 | df.to_json(filename) 133 | 134 | def run(self, url, arguments, validate): 135 | """Putting everything together. Makes the request, 136 | transforms from xml to dict to a standardized format and output to 137 | json file. 138 | """ 139 | response = self.make_request(url) 140 | root = self.get_root(response) 141 | raw_articles = self.parse(root) 142 | if not raw_articles: 143 | raise ValueError('Empty results at {}'.format(url)) 144 | else: 145 | dfs = [] 146 | for raw_article in raw_articles: 147 | df = self.to_dataframe(raw_article) 148 | dfs.append(df) 149 | df = pd.concat(dfs, ignore_index=True) 150 | 151 | self.export(df, filename=arguments['-f']) 152 | -------------------------------------------------------------------------------- /tests/test_ieee.py: -------------------------------------------------------------------------------- 1 | import arcas 2 | import pandas 3 | 4 | def test_setup(): 5 | api = arcas.Ieee() 6 | assert api.standard == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?' 7 | 8 | def test_keys(): 9 | api = arcas.Ieee() 10 | assert api.keys() == ['url', 'key', 'unique_key', 'title', 'author', 'abstract', 11 | 'doi', 'date', 'journal', 'provenance', 'category', 'score', 12 | 'open_access'] 13 | 14 | def test_parameters_and_url_author(): 15 | api = arcas.Ieee() 16 | parameters = api.parameters_fix(author='Glynatsi') 17 | assert parameters == ['author=Glynatsi'] 18 | 19 | url = api.create_url_search(parameters) 20 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?author=Glynatsi&apikey=Your key here' 21 | 22 | def test_parameters_and_url_title(): 23 | api = arcas.Ieee() 24 | parameters = api.parameters_fix(title='Game') 25 | assert parameters == ['article_title=Game'] 26 | 27 | url = api.create_url_search(parameters) 28 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?article_title=Game&apikey=Your key here' 29 | 30 | def test_parameters_and_url_abstract(): 31 | api = arcas.Ieee() 32 | parameters = api.parameters_fix(abstract='Game') 33 | assert parameters == ['abstract=Game'] 34 | 35 | url = api.create_url_search(parameters) 36 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?abstract=Game&apikey=Your key here' 37 | 38 | def test_parameters_and_url_year(): 39 | api = arcas.Ieee() 40 | parameters = api.parameters_fix(year=2010) 41 | assert parameters == ['publication_year=2010'] 42 | 43 | url = api.create_url_search(parameters) 44 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?publication_year=2010&apikey=Your key here' 45 | 46 | 47 | def test_parameters_and_url_category(): 48 | api = arcas.Ieee() 49 | parameters = api.parameters_fix(category='game theory') 50 | assert parameters == ['index_terms=game theory'] 51 | 52 | url = api.create_url_search(parameters) 53 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?index_terms=game theory&apikey=Your key here' 54 | 55 | def test_parameters_and_url_journal(): 56 | api = arcas.Ieee() 57 | parameters = api.parameters_fix(journal='Ieee') 58 | assert parameters == ['publication_title=Ieee'] 59 | 60 | url = api.create_url_search(parameters) 61 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?publication_title=Ieee&apikey=Your key here' 62 | 63 | def test_parameters_and_url_record(): 64 | api = arcas.Ieee() 65 | parameters = api.parameters_fix(records=1) 66 | assert parameters == ['max_records=1'] 67 | 68 | url = api.create_url_search(parameters) 69 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?max_records=1&apikey=Your key here' 70 | 71 | def test_parameters_and_url_start(): 72 | api = arcas.Ieee() 73 | parameters = api.parameters_fix(start=1) 74 | assert parameters == ['start_record=1'] 75 | 76 | url = api.create_url_search(parameters) 77 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?start_record=1&apikey=Your key here' 78 | 79 | def test_create_url_search(): 80 | api = arcas.Ieee() 81 | parameters = api.parameters_fix(title='Nash', journal='Spinger', records=2, start=5) 82 | url = api.create_url_search(parameters) 83 | assert url == 'https://ieeexploreapi.ieee.org/api/v1/search/articles?article_title=Nash&publication_title=Spinger&max_records=2&start_record=5&apikey=Your key here' 84 | 85 | def test_to_dataframe(): 86 | dummy_article = {'rank': 1, 'access_type': 'LOCKED', 'content_type': 'Journals', 87 | 'article_number': '000000', 'doi': '10.1000/', 88 | 'title': 'Title', 'publication_number': 0, 'publication_title': 'IEEE/Journal', 89 | 'volume': '22', 'issn': '1063-6692', 'publisher': 'IEEE', 90 | 'citing_paper_count': 4, 'publication_date': 'May. 2010', 91 | 'index_terms': {'author_terms': {'terms': ['something else', 92 | 'something']}}, 'pdf_url': 'https://ieeexplore.ieee.org/stamp/0000', 93 | 'abstract_url': 'https://ieeexplore.ieee.org/xpl/0000', 94 | 'html_url': 'https://ieeexplore.ieee.org/xpls/0000', 95 | 'authors': {'authors': [{'full_name': 'N Glynatsi'}, 96 | {'full_name': 'V Knight',}]}, 'abstract': "Abstract", 97 | 'access_type': 'LOCKED'} 98 | 99 | api = arcas.Ieee() 100 | article = api.to_dataframe(dummy_article) 101 | 102 | assert isinstance(article, pandas.core.frame.DataFrame) 103 | assert list(article.columns) == api.keys() 104 | assert len(article['url']) == 4 105 | 106 | assert article['url'].unique()[0] == 'https://ieeexplore.ieee.org/xpls/0000' 107 | assert article['key'].unique()[0] == 'Glynatsi2010' 108 | assert list(article['author'].unique()) == ['N Glynatsi', 'V Knight'] 109 | assert article['title'].unique()[0] == 'Title' 110 | assert article['abstract'].unique()[0] == 'Abstract' 111 | assert article['journal'].unique()[0] == 'IEEE/Journal' 112 | assert article['date'].unique()[0] == 2010 113 | assert article['open_access'].unique()[0] == False 114 | assert article['score'].unique()[0] == 'Not available' -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help 18 | help: 19 | @echo "Please use \`make ' where is one of" 20 | @echo " html to make standalone HTML files" 21 | @echo " dirhtml to make HTML files named index.html in directories" 22 | @echo " singlehtml to make a single large HTML file" 23 | @echo " pickle to make pickle files" 24 | @echo " json to make JSON files" 25 | @echo " htmlhelp to make HTML files and a HTML help project" 26 | @echo " qthelp to make HTML files and a qthelp project" 27 | @echo " applehelp to make an Apple Help Book" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " epub3 to make an epub3" 31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 32 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " xml to make Docutils-native XML files" 41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 42 | @echo " linkcheck to check all external links for integrity" 43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 44 | @echo " coverage to run coverage check of the documentation (if enabled)" 45 | @echo " dummy to check syntax errors of document sources" 46 | 47 | .PHONY: clean 48 | clean: 49 | rm -rf $(BUILDDIR)/* 50 | 51 | .PHONY: html 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | .PHONY: dirhtml 58 | dirhtml: 59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 60 | @echo 61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 62 | 63 | .PHONY: singlehtml 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | .PHONY: pickle 70 | pickle: 71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 72 | @echo 73 | @echo "Build finished; now you can process the pickle files." 74 | 75 | .PHONY: json 76 | json: 77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 78 | @echo 79 | @echo "Build finished; now you can process the JSON files." 80 | 81 | .PHONY: htmlhelp 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | .PHONY: qthelp 89 | qthelp: 90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 91 | @echo 92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Arcas.qhcp" 95 | @echo "To view the help file:" 96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Arcas.qhc" 97 | 98 | .PHONY: applehelp 99 | applehelp: 100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 101 | @echo 102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 103 | @echo "N.B. You won't be able to view it unless you put it in" \ 104 | "~/Library/Documentation/Help or install it in your application" \ 105 | "bundle." 106 | 107 | .PHONY: devhelp 108 | devhelp: 109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 110 | @echo 111 | @echo "Build finished." 112 | @echo "To view the help file:" 113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Arcas" 114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Arcas" 115 | @echo "# devhelp" 116 | 117 | .PHONY: epub 118 | epub: 119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 120 | @echo 121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 122 | 123 | .PHONY: epub3 124 | epub3: 125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 126 | @echo 127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 128 | 129 | .PHONY: latex 130 | latex: 131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 132 | @echo 133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 135 | "(use \`make latexpdf' here to do that automatically)." 136 | 137 | .PHONY: latexpdf 138 | latexpdf: 139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 140 | @echo "Running LaTeX files through pdflatex..." 141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 143 | 144 | .PHONY: latexpdfja 145 | latexpdfja: 146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 147 | @echo "Running LaTeX files through platex and dvipdfmx..." 148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 150 | 151 | .PHONY: text 152 | text: 153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 154 | @echo 155 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 156 | 157 | .PHONY: man 158 | man: 159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 160 | @echo 161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 162 | 163 | .PHONY: texinfo 164 | texinfo: 165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 166 | @echo 167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 168 | @echo "Run \`make' in that directory to run these through makeinfo" \ 169 | "(use \`make info' here to do that automatically)." 170 | 171 | .PHONY: info 172 | info: 173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 174 | @echo "Running Texinfo files through makeinfo..." 175 | make -C $(BUILDDIR)/texinfo info 176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 177 | 178 | .PHONY: gettext 179 | gettext: 180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 181 | @echo 182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 183 | 184 | .PHONY: changes 185 | changes: 186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 187 | @echo 188 | @echo "The overview file is in $(BUILDDIR)/changes." 189 | 190 | .PHONY: linkcheck 191 | linkcheck: 192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 193 | @echo 194 | @echo "Link check complete; look for any errors in the above output " \ 195 | "or in $(BUILDDIR)/linkcheck/output.txt." 196 | 197 | .PHONY: doctest 198 | doctest: 199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 200 | @echo "Testing of doctests in the sources finished, look at the " \ 201 | "results in $(BUILDDIR)/doctest/output.txt." 202 | 203 | .PHONY: coverage 204 | coverage: 205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 206 | @echo "Testing of coverage in the sources finished, look at the " \ 207 | "results in $(BUILDDIR)/coverage/python.txt." 208 | 209 | .PHONY: xml 210 | xml: 211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 212 | @echo 213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 214 | 215 | .PHONY: pseudoxml 216 | pseudoxml: 217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 218 | @echo 219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 220 | 221 | .PHONY: dummy 222 | dummy: 223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 224 | @echo 225 | @echo "Build finished. Dummy builder generates no files." 226 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Arcas documentation build configuration file, created by 5 | # sphinx-quickstart on Thu Dec 1 18:02:13 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | # import sys 22 | # sys.path.insert(0, os.path.abspath('.')) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [] 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ['_templates'] 37 | 38 | # The suffix(es) of source filenames. 39 | # You can specify multiple suffix as a list of string: 40 | # 41 | # source_suffix = ['.rst', '.md'] 42 | source_suffix = '.rst' 43 | 44 | # The encoding of source files. 45 | # 46 | # source_encoding = 'utf-8-sig' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = 'Arcas' 53 | copyright = '2016, Nikoleta Glynatsi' 54 | author = 'Nikoleta Glynatsi' 55 | 56 | # The version info for the project you're documenting, acts as replacement for 57 | # |version| and |release|, also used in various other places throughout the 58 | # built documents. 59 | # 60 | # The short X.Y version. 61 | version = '1.0.0' 62 | # The full version, including alpha/beta/rc tags. 63 | release = '1.0.0' 64 | 65 | # The language for content autogenerated by Sphinx. Refer to documentation 66 | # for a list of supported languages. 67 | # 68 | # This is also used if you do content translation via gettext catalogs. 69 | # Usually you set "language" from the command line for these cases. 70 | language = None 71 | 72 | # There are two options for replacing |today|: either, you set today to some 73 | # non-false value, then it is used: 74 | # 75 | # today = '' 76 | # 77 | # Else, today_fmt is used as the format for a strftime call. 78 | # 79 | # today_fmt = '%B %d, %Y' 80 | 81 | # List of patterns, relative to source directory, that match files and 82 | # directories to ignore when looking for source files. 83 | # This patterns also effect to html_static_path and html_extra_path 84 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 85 | 86 | # The reST default role (used for this markup: `text`) to use for all 87 | # documents. 88 | # 89 | # default_role = None 90 | 91 | # If true, '()' will be appended to :func: etc. cross-reference text. 92 | # 93 | # add_function_parentheses = True 94 | 95 | # If true, the current module name will be prepended to all description 96 | # unit titles (such as .. function::). 97 | # 98 | # add_module_names = True 99 | 100 | # If true, sectionauthor and moduleauthor directives will be shown in the 101 | # output. They are ignored by default. 102 | # 103 | # show_authors = False 104 | 105 | # The name of the Pygments (syntax highlighting) style to use. 106 | pygments_style = 'sphinx' 107 | 108 | # A list of ignored prefixes for module index sorting. 109 | # modindex_common_prefix = [] 110 | 111 | # If true, keep warnings as "system message" paragraphs in the built documents. 112 | # keep_warnings = False 113 | 114 | # If true, `todo` and `todoList` produce output, else they produce nothing. 115 | todo_include_todos = False 116 | 117 | 118 | # -- Options for HTML output ---------------------------------------------- 119 | 120 | # The theme to use for HTML and HTML Help pages. See the documentation for 121 | # a list of builtin themes. 122 | # 123 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 124 | 125 | if not on_rtd: # only import and set the theme if we're building docs locally 126 | import sphinx_rtd_theme 127 | html_theme = 'sphinx_rtd_theme' 128 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 129 | 130 | # Theme options are theme-specific and customize the look and feel of a theme 131 | # further. For a list of options available for each theme, see the 132 | # documentation. 133 | # 134 | # html_theme_options = {} 135 | 136 | # Add any paths that contain custom themes here, relative to this directory. 137 | # html_theme_path = [] 138 | 139 | # The name for this set of Sphinx documents. 140 | # " v documentation" by default. 141 | # 142 | # html_title = 'Arcas v0.0.1' 143 | 144 | # A shorter title for the navigation bar. Default is the same as html_title. 145 | # 146 | # html_short_title = None 147 | 148 | # The name of an image file (relative to this directory) to place at the top 149 | # of the sidebar. 150 | # 151 | # html_logo = None 152 | 153 | # The name of an image file (relative to this directory) to use as a favicon of 154 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 155 | # pixels large. 156 | # 157 | # html_favicon = None 158 | 159 | # Add any paths that contain custom static files (such as style sheets) here, 160 | # relative to this directory. They are copied after the builtin static files, 161 | # so a file named "default.css" will overwrite the builtin "default.css". 162 | html_static_path = ['_static'] 163 | 164 | # Add any extra paths that contain custom files (such as robots.txt or 165 | # .htaccess) here, relative to this directory. These files are copied 166 | # directly to the root of the documentation. 167 | # 168 | # html_extra_path = [] 169 | 170 | # If not None, a 'Last updated on:' timestamp is inserted at every page 171 | # bottom, using the given strftime format. 172 | # The empty string is equivalent to '%b %d, %Y'. 173 | # 174 | # html_last_updated_fmt = None 175 | 176 | # If true, SmartyPants will be used to convert quotes and dashes to 177 | # typographically correct entities. 178 | # 179 | # html_use_smartypants = True 180 | 181 | # Custom sidebar templates, maps document names to template names. 182 | # 183 | # html_sidebars = {} 184 | 185 | # Additional templates that should be rendered to pages, maps page names to 186 | # template names. 187 | # 188 | # html_additional_pages = {} 189 | 190 | # If false, no module index is generated. 191 | # 192 | # html_domain_indices = True 193 | 194 | # If false, no index is generated. 195 | # 196 | # html_use_index = True 197 | 198 | # If true, the index is split into individual pages for each letter. 199 | # 200 | # html_split_index = False 201 | 202 | # If true, links to the reST sources are added to the pages. 203 | # 204 | # html_show_sourcelink = True 205 | 206 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 207 | # 208 | # html_show_sphinx = True 209 | 210 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 211 | # 212 | # html_show_copyright = True 213 | 214 | # If true, an OpenSearch description file will be output, and all pages will 215 | # contain a tag referring to it. The value of this option must be the 216 | # base URL from which the finished HTML is served. 217 | # 218 | # html_use_opensearch = '' 219 | 220 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 221 | # html_file_suffix = None 222 | 223 | # Language to be used for generating the HTML full-text search index. 224 | # Sphinx supports the following languages: 225 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 226 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' 227 | # 228 | # html_search_language = 'en' 229 | 230 | # A dictionary with options for the search language support, empty by default. 231 | # 'ja' uses this config value. 232 | # 'zh' user can custom change `jieba` dictionary path. 233 | # 234 | # html_search_options = {'type': 'default'} 235 | 236 | # The name of a javascript file (relative to the configuration directory) that 237 | # implements a search results scorer. If empty, the default will be used. 238 | # 239 | # html_search_scorer = 'scorer.js' 240 | 241 | # Output file base name for HTML help builder. 242 | htmlhelp_basename = 'Arcasdoc' 243 | 244 | # -- Options for LaTeX output --------------------------------------------- 245 | 246 | latex_elements = { 247 | # The paper size ('letterpaper' or 'a4paper'). 248 | # 249 | # 'papersize': 'letterpaper', 250 | 251 | # The font size ('10pt', '11pt' or '12pt'). 252 | # 253 | # 'pointsize': '10pt', 254 | 255 | # Additional stuff for the LaTeX preamble. 256 | # 257 | # 'preamble': '', 258 | 259 | # Latex figure (float) alignment 260 | # 261 | # 'figure_align': 'htbp', 262 | } 263 | 264 | # Grouping the document tree into LaTeX files. List of tuples 265 | # (source start file, target name, title, 266 | # author, documentclass [howto, manual, or own class]). 267 | latex_documents = [ 268 | (master_doc, 'Arcas.tex', 'Arcas Documentation', 269 | 'Nikoleta Glynatsi', 'manual'), 270 | ] 271 | 272 | # The name of an image file (relative to this directory) to place at the top of 273 | # the title page. 274 | # 275 | # latex_logo = None 276 | 277 | # For "manual" documents, if this is true, then toplevel headings are parts, 278 | # not chapters. 279 | # 280 | # latex_use_parts = False 281 | 282 | # If true, show page references after internal links. 283 | # 284 | # latex_show_pagerefs = False 285 | 286 | # If true, show URL addresses after external links. 287 | # 288 | # latex_show_urls = False 289 | 290 | # Documents to append as an appendix to all manuals. 291 | # 292 | # latex_appendices = [] 293 | 294 | # It false, will not define \strong, \code, itleref, \crossref ... but only 295 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added 296 | # packages. 297 | # 298 | # latex_keep_old_macro_names = True 299 | 300 | # If false, no module index is generated. 301 | # 302 | # latex_domain_indices = True 303 | 304 | 305 | # -- Options for manual page output --------------------------------------- 306 | 307 | # One entry per manual page. List of tuples 308 | # (source start file, name, description, authors, manual section). 309 | man_pages = [ 310 | (master_doc, 'arcas', 'Arcas Documentation', 311 | [author], 1) 312 | ] 313 | 314 | # If true, show URL addresses after external links. 315 | # 316 | # man_show_urls = False 317 | 318 | 319 | # -- Options for Texinfo output ------------------------------------------- 320 | 321 | # Grouping the document tree into Texinfo files. List of tuples 322 | # (source start file, target name, title, author, 323 | # dir menu entry, description, category) 324 | texinfo_documents = [ 325 | (master_doc, 'Arcas', 'Arcas Documentation', 326 | author, 'Arcas', 'One line description of project.', 327 | 'Miscellaneous'), 328 | ] 329 | 330 | # Documents to append as an appendix to all manuals. 331 | # 332 | # texinfo_appendices = [] 333 | 334 | # If false, no module index is generated. 335 | # 336 | # texinfo_domain_indices = True 337 | 338 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 339 | # 340 | # texinfo_show_urls = 'footnote' 341 | 342 | # If true, do not generate a @detailmenu in the "Top" node's menu. 343 | # 344 | # texinfo_no_detailmenu = False 345 | --------------------------------------------------------------------------------