├── .editorconfig ├── .github └── workflows │ ├── pythonpackage.yml │ └── pythonpublish.yml ├── .gitignore ├── .zenodo.json ├── AUTHORS.md ├── CONTRIBUTING.md ├── HISTORY.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs └── examples │ ├── bit-pattern-savedrecs.txt │ ├── scopus.ris │ └── single-article.txt ├── requirements_dev.txt ├── setup.cfg ├── setup.py ├── tests ├── conftest.py ├── features │ ├── article.feature │ └── cached.feature ├── test_article.py ├── test_collection_cached.py └── test_fields.py └── wostools ├── __init__.py ├── _testutils.py ├── article.py ├── base.py ├── cached.py ├── cli.py ├── exceptions.py ├── fields.py └── sources ├── __init__.py ├── isi.py └── scopus.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | strategy: 10 | max-parallel: 4 11 | matrix: 12 | python-version: ["3.8", "3.9", "3.10", "3.11"] 13 | 14 | steps: 15 | - uses: actions/checkout@v1 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v1 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install -r requirements_dev.txt 24 | - name: Lint with flake8 25 | run: | 26 | pip install flake8 27 | # stop the build if there are Python syntax errors or undefined names 28 | flake8 wostools tests --show-source --statistics 29 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 30 | flake8 wostools tests --count --exit-zero --statistics 31 | - name: Test with pytest 32 | run: python setup.py test 33 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Set up Python 13 | uses: actions/setup-python@v1 14 | with: 15 | python-version: '3.x' 16 | 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install -r requirements_dev.txt 21 | pip install setuptools wheel twine 22 | 23 | - name: Test with pytest 24 | run: python setup.py test 25 | 26 | - name: Build and publish 27 | env: 28 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 29 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 30 | run: | 31 | python setup.py sdist bdist_wheel 32 | twine upload dist/* 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # scratch yrself 105 | scratch/ 106 | 107 | # pipenv files 108 | Pipfile 109 | Pipfile.lock 110 | 111 | # data folder 112 | data/ 113 | 114 | # editors 115 | .vscode 116 | .idea 117 | *.iml 118 | 119 | # xml files 120 | *.xml 121 | 122 | # json files 123 | *.json 124 | .testmondata 125 | -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Translates isi web of knowledge files into python objects.", 3 | "license": "MIT", 4 | "title": "coreofscience/python-wostools", 5 | "version": "v3.0.2", 6 | "upload_type": "software", 7 | "publication_date": "2018-08-13", 8 | "creators": [ 9 | { 10 | "orcid": "0000-0002-3097-9438", 11 | "affiliation": "Core of science", 12 | "name": "Oscar David Arbeláe1ez E." 13 | }, 14 | { 15 | "orcid": "0000-0002-1249-7128", 16 | "affiliation": "Core of science", 17 | "name": "Juan David Alzate Cardona" 18 | }, 19 | { 20 | "name": "Daniel Stiven Valencia Hernandez", 21 | "affiliation": "Core of science" 22 | } 23 | ], 24 | "access_right": "open", 25 | "related_identifiers": [ 26 | { 27 | "scheme": "url", 28 | "identifier": "https://github.com/coreofscience/python-wostools/tree/v3.0.2", 29 | "relation": "isSupplementTo" 30 | }, 31 | { 32 | "scheme": "doi", 33 | "identifier": "10.5281/zenodo.1344260", 34 | "relation": "isNewVersionOf" 35 | } 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | # Credits 2 | 3 | ## Development Lead 4 | 5 | - Core of Science \<\> 6 | 7 | ## Contributors 8 | 9 | - Oscar Arbeláez \<\> 10 | - Juan David Alzate Cardona \<\> 11 | - Daniel Valencia \<\> 12 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are welcome, and they are greatly appreciated! Every 4 | little bit helps, and credit will always be given. 5 | 6 | You can contribute in many ways: 7 | 8 | ## Types of Contributions 9 | 10 | ### Report Bugs 11 | 12 | Report bugs at 13 | . 14 | 15 | If you are reporting a bug, please include: 16 | 17 | - Your operating system name and version. 18 | - Any details about your local setup that might be helpful in 19 | troubleshooting. 20 | - Detailed steps to reproduce the bug. 21 | 22 | ### Fix Bugs 23 | 24 | Look through the GitHub issues for bugs. Anything tagged with \"bug\" 25 | and \"help wanted\" is open to whoever wants to implement it. 26 | 27 | ### Implement Features 28 | 29 | Look through the GitHub issues for features. Anything tagged with 30 | \"enhancement\" and \"help wanted\" is open to whoever wants to 31 | implement it. 32 | 33 | ### Write Documentation 34 | 35 | Python WoS tools could always use more documentation, whether as part of 36 | the official Python WoS tools docs, in docstrings, or even on the web in 37 | blog posts, articles, and such. 38 | 39 | ### Submit Feedback 40 | 41 | The best way to send feedback is to file an issue at 42 | . 43 | 44 | If you are proposing a feature: 45 | 46 | - Explain in detail how it would work. 47 | - Keep the scope as narrow as possible, to make it easier to 48 | implement. 49 | - Remember that this is a volunteer-driven project, and that 50 | contributions are welcome :) 51 | 52 | ## Get Started! 53 | 54 | Ready to contribute? Here\'s how to set up [wostools]{.title-ref} for 55 | local development. 56 | 57 | 1. Fork the [wostools]{.title-ref} repo on GitHub. 58 | 59 | 2. Clone your fork locally: 60 | 61 | ```bash 62 | $ git clone git@github.com:your_name_here/python-wostools.git 63 | ``` 64 | 65 | 3. Install your local copy into a virtualenv. Assuming you have 66 | virtualenvwrapper installed, this is how you set up your fork for 67 | local development: 68 | 69 | ```bash 70 | $ mkvirtualenv wostools 71 | $ cd wostools/ 72 | $ python setup.py develop 73 | ``` 74 | 75 | 4. Create a branch for local development: 76 | 77 | ```bash 78 | $ git checkout -b name-of-your-bugfix-or-feature 79 | ``` 80 | 81 | Now you can make your changes locally. 82 | 83 | 5. When you\'re done making changes, check that your changes pass 84 | flake8 and the tests, including testing other Python versions with 85 | tox: 86 | 87 | ```bash 88 | $ flake8 wostools tests 89 | $ python setup.py test or py.test 90 | $ tox 91 | ``` 92 | 93 | To get flake8 and tox, just pip install them into your virtualenv. 94 | 95 | 6. Commit your changes and push your branch to GitHub: 96 | 97 | ```bash 98 | $ git add . 99 | $ git commit -m "Your detailed description of your changes." 100 | $ git push origin name-of-your-bugfix-or-feature 101 | ``` 102 | 103 | 7. Submit a pull request through the GitHub website. 104 | 105 | ## Pull Request Guidelines 106 | 107 | Before you submit a pull request, check that it meets these guidelines: 108 | 109 | 1. The pull request should include tests. 110 | 2. If the pull request adds functionality, the docs should be updated. 111 | Put your new functionality into a function with a docstring, and add 112 | the feature to the list in README.md. 113 | 114 | ## Tips 115 | 116 | To run a subset of tests: 117 | 118 | ```bash 119 | $ py.test tests.test_wostools 120 | ``` 121 | 122 | ## Deploying 123 | 124 | A reminder for the maintainers on how to deploy. Make sure all your 125 | changes are committed (including an entry in HISTORY.rst). Then run: 126 | 127 | ```bash 128 | $ bumpversion patch # possible: major / minor / patch 129 | $ git push 130 | $ git push --tags 131 | ``` 132 | 133 | Travis will then deploy to PyPI if tests pass. 134 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | # History 2 | 3 | ## 3.0.2 (2020-10-15) 4 | 5 | - Fix packaging accident to allow package to install correclty. 6 | 7 | ## 3.0.1 (2020-10-15) 8 | 9 | - Clear logging error channel. 10 | 11 | ## 3.0.0 (2020-10-15) 12 | 13 | - (!) Adds scopus RIS format support. 14 | - Drops support for `LazyCollection`. 15 | - Adds docummented support for Python 3.8 and 3.9 16 | - Drops docummented support for Python 3.6. 17 | - Improves article matching in collections. 18 | 19 | ## 2.0.7 (2020-08-23) 20 | 21 | - Remove from the collection those documents whose label is unknow or conflictive. 22 | 23 | ## 2.0.6 (2020-08-21) 24 | 25 | - Accomodate for unknown fields in ISI WOS files. 26 | 27 | ## 2.0.5 (2020-08-15) 28 | 29 | - Fix and prevent distribution accidents. 30 | 31 | ## 2.0.4 (2020-08-15) 32 | 33 | - Add issue to the articles plain dict output. 34 | - Fix some bugs with issues. 35 | 36 | ## 2.0.3 (2020-08-15) 37 | 38 | - Add issue to the articles top level. 39 | 40 | ## 2.0.2 (2020-08-09) 41 | 42 | - Fix bug with first author merging articles. 43 | - Remove instances where we inherit from `object`. 44 | 45 | ## 2.0.1 (2020-08-09) 46 | 47 | - Fix error with wos files that have an invisible character before the field 48 | key. 49 | 50 | ## 2.0.0 (2020-08-08) 51 | 52 | - Make the article class more concrete 53 | - Make collections iterable 54 | - Add cached and lazy collections for different use cases 55 | 56 | ## 0.2.0 (2018-08-12) 57 | 58 | - Add support for all WOS fields. 59 | - Add graph building support. 60 | - Add a little cli for common tasks. 61 | 62 | ## 0.1.1 (2018-05-10) 63 | 64 | - First release on PyPI. 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018-2020, Core of Science 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.md 2 | include CONTRIBUTING.md 3 | include HISTORY.md 4 | include LICENSE 5 | include README.md 6 | 7 | recursive-include tests * 8 | recursive-exclude * __pycache__ 9 | recursive-exclude * *.py[co] 10 | 11 | recursive-include docs *.txt 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help, test-watch 2 | .DEFAULT_GOAL := help 3 | 4 | NOTIFY_FILE := /tmp/pytest-$$(pwd | md5sum | cut -d " " -f 1) 5 | 6 | define BROWSER_PYSCRIPT 7 | import os, webbrowser, sys 8 | 9 | try: 10 | from urllib import pathname2url 11 | except: 12 | from urllib.request import pathname2url 13 | 14 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 15 | endef 16 | export BROWSER_PYSCRIPT 17 | 18 | define PRINT_HELP_PYSCRIPT 19 | import re, sys 20 | 21 | for line in sys.stdin: 22 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 23 | if match: 24 | target, help = match.groups() 25 | print("%-20s %s" % (target, help)) 26 | endef 27 | export PRINT_HELP_PYSCRIPT 28 | 29 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 30 | 31 | help: 32 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 33 | 34 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 35 | 36 | clean-build: ## remove build artifacts 37 | rm -fr build/ 38 | rm -fr dist/ 39 | rm -fr .eggs/ 40 | find . -name '*.egg-info' -exec rm -fr {} + 41 | find . -name '*.egg' -exec rm -f {} + 42 | 43 | clean-pyc: ## remove Python file artifacts 44 | find . -name '*.pyc' -exec rm -f {} + 45 | find . -name '*.pyo' -exec rm -f {} + 46 | find . -name '*~' -exec rm -f {} + 47 | find . -name '__pycache__' -exec rm -fr {} + 48 | 49 | clean-test: ## remove test and coverage artifacts 50 | rm -fr .tox/ 51 | rm -f .coverage 52 | rm -fr htmlcov/ 53 | rm -fr .pytest_cache 54 | 55 | lint: ## check style with flake8 56 | flake8 wostools tests 57 | 58 | test: ## run tests quickly with the default Python 59 | python -m pytest 60 | 61 | test-watch: 62 | @ptw \ 63 | --ext "py,feature" \ 64 | --onpass "coverage report --skip-empty --skip-covered -m" \ 65 | --onfail "notify-send.sh -R $(NOTIFY_FILE) -i face-worried --hint int:transient:1 'Test failed' 'Ooops we have a problem, not all tests passed'" \ 66 | --onexit "notify-send.sh -R $(NOTIFY_FILE) -i media-playback-stop --hint int:transient:1 'Test runner stopped' 'Just so you know, the test runner stopped'" \ 67 | --runner "coverage run --source wostools -m pytest" \ 68 | 69 | coverage: ## check code coverage quickly with the default Python 70 | coverage run --source wostools -m pytest 71 | coverage report -m 72 | coverage html 73 | $(BROWSER) htmlcov/index.html 74 | 75 | release: dist ## package and upload a release 76 | twine upload dist/* 77 | 78 | dist: clean ## builds source and wheel package 79 | python setup.py sdist 80 | python setup.py bdist_wheel 81 | ls -l dist 82 | 83 | install: clean ## install the package to the active Python's site-packages 84 | python setup.py install 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python WoS tools 2 | 3 | This project is archived, please use: https://github.com/coreofscience/python-bibx. 4 | -------------------------------------------------------------------------------- /docs/examples/single-article.txt: -------------------------------------------------------------------------------- 1 | PT J 2 | AU Wodarz, S 3 | Hasegawa, T 4 | Ishio, S 5 | Homma, T 6 | AF Wodarz, Siggi 7 | Hasegawa, Takashi 8 | Ishio, Shunji 9 | Homma, Takayuki 10 | TI Structural control of ultra-fine CoPt nanodot arrays via 11 | electrodeposition process 12 | SO JOURNAL OF MAGNETISM AND MAGNETIC MATERIALS 13 | LA English 14 | DT Article 15 | DE Electrodeposition; Structural control; Nanodot array; Bit-patterned 16 | media; CoPt alloy 17 | ID BIT-PATTERNED MEDIA; ELECTRON-BEAM LITHOGRAPHY; RECORDING MEDIA; 18 | MAGNETIC MEDIA; DENSITY; FILMS; ANISOTROPY; STORAGE 19 | AB CoPt nanodot arrays were fabricated by combining electrodeposition and electron beam lithography (EBL) for the use of bit-patterned media (BPM). To achieve precise control of deposition uniformity and coercivity of the CoPt nanodot arrays, their crystal structure and magnetic properties were controlled by controlling the diffusion state of metal ions from the initial deposition stage with the application of bath agitation. Following bath agitation, the composition gradient of the CoPt alloy with thickness was mitigated to have a near-ideal alloy composition of Co:Pt =80:20, which induces epitaxial-like growth from Ru substrate, thus resulting in the improvement of the crystal orientation of the hcp (002) structure from its initial deposition stages. Furthermore, the cross-sectional transmission electron microscope (TEM) analysis of the nanodots deposited with bath agitation showed CoPt growth along its c-axis oriented in the perpendicular direction, having uniform lattice fringes on the hcp (002) plane from the Ru underlayer interface, which is a significant factor to induce perpendicular magnetic anisotropy. Magnetic characterization of the CoPt nanodot arrays showed increase in the perpendicular coercivity and squareness of the hysteresis loops from 2.0 kOe and 0.64 (without agitation) to 4.0 kOe and 0.87 with bath agitation. Based on the detailed characterization of nanodot arrays, the precise crystal structure control of the nanodot arrays with ultra-high recording density by electrochemical process was successfully demonstrated. 20 | C1 [Wodarz, Siggi; Homma, Takayuki] Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan. 21 | [Hasegawa, Takashi; Ishio, Shunji] Akita Univ, Dept Mat Sci, Akita 0108502, Japan. 22 | RP Homma, T (reprint author), Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan. 23 | EM t.homma@waseda.jp 24 | OI Hasegawa, Takashi/0000-0002-8178-4980 25 | FU JSPS KAKENHI Grant [25249104] 26 | FX This work was supported in part by JSPS KAKENHI Grant Number 25249104. 27 | CR Albrecht TR, 2013, IEEE T MAGN, V49, P773, DOI 10.1109/TMAG.2012.2227303 28 | BUSCHOW KHJ, 1983, J MAGN MAGN MATER, V38, P1, DOI 10.1016/0304-8853(83)90097-5 29 | Gapin AI, 2006, J APPL PHYS, V99, DOI 10.1063/1.2163289 30 | Homma Takayuki, 2015, ECS Transactions, V64, P1, DOI 10.1149/06431.0001ecst 31 | Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315 32 | Kubo T, 2005, J APPL PHYS, V97, DOI 10.1063/1.1855572 33 | Lodder JC, 2004, J MAGN MAGN MATER, V272, P1692, DOI 10.1016/j.jmmm.2003.12.259 34 | Mitsuzuka K, 2007, IEEE T MAGN, V43, P2160, DOI 10.1109/TMAG.2007.893129 35 | Ouchi T, 2010, ELECTROCHIM ACTA, V55, P8081, DOI 10.1016/j.electacta.2010.02.073 36 | Pattanaik G, 2006, J APPL PHYS, V99, DOI 10.1063/1.2150805 37 | Pattanaik G, 2007, ELECTROCHIM ACTA, V52, P2755, DOI 10.1016/j.electacta.2006.07.062 38 | Piramanayagam SN, 2009, J MAGN MAGN MATER, V321, P485, DOI 10.1016/j.jmmm.2008.05.007 39 | Ross CA, 2008, MRS BULL, V33, P838, DOI 10.1557/mrs2008.179 40 | Shiroishi Y, 2009, IEEE T MAGN, V45, P3816, DOI 10.1109/TMAG.2009.2024879 41 | Sirtori V, 2011, ACS APPL MATER INTER, V3, P1800, DOI 10.1021/am200267u 42 | Sohn JS, 2009, NANOTECHNOLOGY, V20, DOI 10.1088/0957-4484/20/2/025302 43 | Sun SH, 2000, SCIENCE, V287, P1989, DOI 10.1126/science.287.5460.1989 44 | Terris BD, 2007, MICROSYST TECHNOL, V13, P189, DOI 10.1007/s00542-006-0144-9 45 | Wang JP, 2008, P IEEE, V96, P1847, DOI 10.1109/JPROC.2008.2004318 46 | Weller D, 1999, IEEE T MAGN, V35, P4423, DOI 10.1109/20.809134 47 | Weller D, 2000, IEEE T MAGN, V36, P10, DOI 10.1109/20.824418 48 | Wodarz S, 2016, ELECTROCHIM ACTA, V197, P330, DOI 10.1016/j.electacta.2015.11.136 49 | Xu X, 2012, J ELECTROCHEM SOC, V159, pD240, DOI 10.1149/2.090204jes 50 | Yang X, 2007, J VAC SCI TECHNOL B, V25, P2202, DOI 10.1116/1.2798711 51 | Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r 52 | Yasui N, 2003, APPL PHYS LETT, V83, P3347, DOI 10.1063/1.1622787 53 | Yua H., 2009, J APPL PHYS, V105 54 | Zhu JG, 2008, IEEE T MAGN, V44, P125, DOI 10.1109/TMAG.2007.911031 55 | NR 28 56 | TC 0 57 | Z9 0 58 | U1 21 59 | U2 21 60 | PU ELSEVIER SCIENCE BV 61 | PI AMSTERDAM 62 | PA PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS 63 | SN 0304-8853 64 | EI 1873-4766 65 | J9 J MAGN MAGN MATER 66 | JI J. Magn. Magn. Mater. 67 | PD MAY 15 68 | PY 2017 69 | VL 430 70 | BP 52 71 | EP 58 72 | DI 10.1016/j.jmmm.2017.01.061 73 | PG 7 74 | WC Materials Science, Multidisciplinary; Physics, Condensed Matter 75 | SC Materials Science; Physics 76 | GA EP2GP 77 | UT WOS:000397201600008 78 | ER -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | flake8==3.8.4 2 | coverage==5.3 3 | 4 | pytest==6.1.2 5 | pytest-runner==5.2 6 | pytest-watch==4.2.0 7 | pytest-bdd==4.0.1 8 | 9 | dataclasses==0.8; python_version < "3.7" 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [flake8] 5 | exclude = docs 6 | ignore = E203, E266, E501, W503 7 | max-line-length = 89 8 | max-complexity = 18 9 | select = B,C,E,F,W,T4,B9 10 | 11 | [aliases] 12 | test = pytest 13 | 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """The setup script.""" 4 | 5 | from setuptools import find_packages, setup 6 | 7 | with open("README.md") as readme_file: 8 | readme = readme_file.read() 9 | 10 | with open("HISTORY.md") as history_file: 11 | history = history_file.read() 12 | 13 | requirements = ["Click>=7.0<8"] 14 | 15 | setup_requirements = ["pytest-runner"] 16 | 17 | 18 | test_requirements = ["pytest", "pytest-bdd", 'dataclasses; python_version<"3.7"'] 19 | 20 | setup( 21 | author="Core of Science", 22 | author_email="dev@coreofscience.com", 23 | classifiers=[ 24 | "Development Status :: 2 - Pre-Alpha", 25 | "Intended Audience :: Developers", 26 | "License :: OSI Approved :: MIT License", 27 | "Natural Language :: English", 28 | "Programming Language :: Python :: 3", 29 | "Programming Language :: Python :: 3.8", 30 | "Programming Language :: Python :: 3.9", 31 | "Programming Language :: Python :: 3.10", 32 | "Programming Language :: Python :: 3.11", 33 | ], 34 | entry_points={"console_scripts": ["wostools=wostools.cli:main"]}, 35 | description="Translates isi web of knowledge files into python objects.", 36 | install_requires=requirements, 37 | license="MIT license", 38 | long_description=readme + "\n\n" + history, 39 | include_package_data=True, 40 | keywords="wostools", 41 | name="wostools", 42 | packages=find_packages(include=["wostools", "wostools.*"]), 43 | setup_requires=setup_requirements, 44 | test_suite="tests", 45 | tests_require=test_requirements, 46 | url="https://github.com/coreofscience/python-wostools", 47 | version="3.0.2", 48 | zip_safe=False, 49 | long_description_content_type="text/markdown", 50 | ) 51 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration file for python-wostools tests. 3 | """ 4 | 5 | from wostools import Article 6 | 7 | import pytest 8 | import io 9 | 10 | 11 | @pytest.fixture 12 | def article(): 13 | file = io.StringIO( 14 | "PT J\n" 15 | "AU Wodarz, S\n" 16 | " Hasegawa, T\n" 17 | " Ishio, S\n" 18 | " Homma, T\n" 19 | "AF Wodarz, Siggi\n" 20 | " Hasegawa, Takashi\n" 21 | " Ishio, Shunji\n" 22 | " Homma, Takayuki\n" 23 | "TI Structural control of ultra-fine CoPt nanodot arrays via\n" 24 | " electrodeposition process\n" 25 | "SO JOURNAL OF MAGNETISM AND MAGNETIC MATERIALS\n" 26 | "LA English\n" 27 | "DT Article\n" 28 | "DE Electrodeposition; Structural control; Nanodot array; Bit-patterned\n" 29 | " media; CoPt alloy\n" 30 | "ID BIT-PATTERNED MEDIA; ELECTRON-BEAM LITHOGRAPHY; RECORDING MEDIA;\n" 31 | " MAGNETIC MEDIA; DENSITY; FILMS; ANISOTROPY; STORAGE\n" 32 | "AB CoPt nanodot arrays were fabricated by combining electrodeposition and electron beam lithography (EBL) for the use of bit-patterned media (BPM). To achieve precise control of deposition uniformity and coercivity of the CoPt nanodot arrays, their crystal structure and magnetic properties were controlled by controlling the diffusion state of metal ions from the initial deposition stage with the application of bath agitation. Following bath agitation, the composition gradient of the CoPt alloy with thickness was mitigated to have a near-ideal alloy composition of Co:Pt =80:20, which induces epitaxial-like growth from Ru substrate, thus resulting in the improvement of the crystal orientation of the hcp (002) structure from its initial deposition stages. Furthermore, the cross-sectional transmission electron microscope (TEM) analysis of the nanodots deposited with bath agitation showed CoPt growth along its c-axis oriented in the perpendicular direction, having uniform lattice fringes on the hcp (002) plane from the Ru underlayer interface, which is a significant factor to induce perpendicular magnetic anisotropy. Magnetic characterization of the CoPt nanodot arrays showed increase in the perpendicular coercivity and squareness of the hysteresis loops from 2.0 kOe and 0.64 (without agitation) to 4.0 kOe and 0.87 with bath agitation. Based on the detailed characterization of nanodot arrays, the precise crystal structure control of the nanodot arrays with ultra-high recording density by electrochemical process was successfully demonstrated.\n" 33 | "C1 [Wodarz, Siggi; Homma, Takayuki] Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan.\n" 34 | " [Hasegawa, Takashi; Ishio, Shunji] Akita Univ, Dept Mat Sci, Akita 0108502, Japan.\n" 35 | "RP Homma, T (reprint author), Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan.\n" 36 | "EM t.homma@waseda.jp\n" 37 | "OI Hasegawa, Takashi/0000-0002-8178-4980\n" 38 | "FU JSPS KAKENHI Grant [25249104]\n" 39 | "FX This work was supported in part by JSPS KAKENHI Grant Number 25249104.\n" 40 | "CR Albrecht TR, 2013, IEEE T MAGN, V49, P773, DOI 10.1109/TMAG.2012.2227303\n" 41 | " BUSCHOW KHJ, 1983, J MAGN MAGN MATER, V38, P1, DOI 10.1016/0304-8853(83)90097-5\n" 42 | " Gapin AI, 2006, J APPL PHYS, V99, DOI 10.1063/1.2163289\n" 43 | " Homma Takayuki, 2015, ECS Transactions, V64, P1, DOI 10.1149/06431.0001ecst\n" 44 | " Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315\n" 45 | " Kubo T, 2005, J APPL PHYS, V97, DOI 10.1063/1.1855572\n" 46 | " Lodder JC, 2004, J MAGN MAGN MATER, V272, P1692, DOI 10.1016/j.jmmm.2003.12.259\n" 47 | " Mitsuzuka K, 2007, IEEE T MAGN, V43, P2160, DOI 10.1109/TMAG.2007.893129\n" 48 | " Ouchi T, 2010, ELECTROCHIM ACTA, V55, P8081, DOI 10.1016/j.electacta.2010.02.073\n" 49 | " Pattanaik G, 2006, J APPL PHYS, V99, DOI 10.1063/1.2150805\n" 50 | " Pattanaik G, 2007, ELECTROCHIM ACTA, V52, P2755, DOI 10.1016/j.electacta.2006.07.062\n" 51 | " Piramanayagam SN, 2009, J MAGN MAGN MATER, V321, P485, DOI 10.1016/j.jmmm.2008.05.007\n" 52 | " Ross CA, 2008, MRS BULL, V33, P838, DOI 10.1557/mrs2008.179\n" 53 | " Shiroishi Y, 2009, IEEE T MAGN, V45, P3816, DOI 10.1109/TMAG.2009.2024879\n" 54 | " Sirtori V, 2011, ACS APPL MATER INTER, V3, P1800, DOI 10.1021/am200267u\n" 55 | " Sohn JS, 2009, NANOTECHNOLOGY, V20, DOI 10.1088/0957-4484/20/2/025302\n" 56 | " Sun SH, 2000, SCIENCE, V287, P1989, DOI 10.1126/science.287.5460.1989\n" 57 | " Terris BD, 2007, MICROSYST TECHNOL, V13, P189, DOI 10.1007/s00542-006-0144-9\n" 58 | " Wang JP, 2008, P IEEE, V96, P1847, DOI 10.1109/JPROC.2008.2004318\n" 59 | " Weller D, 1999, IEEE T MAGN, V35, P4423, DOI 10.1109/20.809134\n" 60 | " Weller D, 2000, IEEE T MAGN, V36, P10, DOI 10.1109/20.824418\n" 61 | " Wodarz S, 2016, ELECTROCHIM ACTA, V197, P330, DOI 10.1016/j.electacta.2015.11.136\n" 62 | " Xu X, 2012, J ELECTROCHEM SOC, V159, pD240, DOI 10.1149/2.090204jes\n" 63 | " Yang X, 2007, J VAC SCI TECHNOL B, V25, P2202, DOI 10.1116/1.2798711\n" 64 | " Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r\n" 65 | " Yasui N, 2003, APPL PHYS LETT, V83, P3347, DOI 10.1063/1.1622787\n" 66 | " Yua H., 2009, J APPL PHYS, V105\n" 67 | " Zhu JG, 2008, IEEE T MAGN, V44, P125, DOI 10.1109/TMAG.2007.911031\n" 68 | "NR 28\n" 69 | "TC 0\n" 70 | "Z9 0\n" 71 | "U1 21\n" 72 | "U2 21\n" 73 | "PU ELSEVIER SCIENCE BV\n" 74 | "PI AMSTERDAM\n" 75 | "PA PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS\n" 76 | "SN 0304-8853\n" 77 | "EI 1873-4766\n" 78 | "J9 J MAGN MAGN MATER\n" 79 | "JI J. Magn. Magn. Mater.\n" 80 | "PD MAY 15\n" 81 | "PY 2017\n" 82 | "VL 430\n" 83 | "BP 52\n" 84 | "EP 58\n" 85 | "DI 10.1016/j.jmmm.2017.01.061\n" 86 | "PG 7\n" 87 | "WC Materials Science, Multidisciplinary; Physics, Condensed Matter\n" 88 | "SC Materials Science; Physics\n" 89 | "GA EP2GP\n" 90 | "UT WOS:000397201600008\n" 91 | "ER" 92 | ) 93 | article_text = file.read() 94 | return Article.from_isi_text(article_text) 95 | 96 | 97 | @pytest.fixture 98 | def filename_single_document(): 99 | return "docs/examples/single-article.txt" 100 | 101 | 102 | @pytest.fixture 103 | def filename_many_documents(): 104 | return "docs/examples/bit-pattern-savedrecs.txt" 105 | -------------------------------------------------------------------------------- /tests/features/article.feature: -------------------------------------------------------------------------------- 1 | Feature: Article manager class 2 | 3 | Allows the user to parse and sort of dump articles 4 | 5 | Scenario: Computing an article's label 6 | Given an article with authors, year and journal 7 | When I compute the label for the article 8 | Then the label is a proper string 9 | 10 | Scenario Outline: Fail to compute a label 11 | Given a complete article missing 12 | When I try to compute the label for the article 13 | Then There's an error computing the label 14 | 15 | Examples: 16 | | field | 17 | | year | 18 | | authors | 19 | | journal | 20 | 21 | Scenario: Merge two articles 22 | Given a complete article 23 | And theres a similar article that includes a doi 24 | 25 | When I merge the two articles 26 | And I try to compute the label for the article 27 | 28 | Then the article's doi matches the other 29 | And there's no error computing the label 30 | And the label contains the doi of the other 31 | 32 | Scenario: Parse article from isi text 33 | Given some valid isi text 34 | When I create an article from the isi text 35 | Then the values in the isi text are part of the article 36 | And the isi text itself is part of the articles sources 37 | 38 | Scenario: Parse article from invalid isi text 39 | Given some isi text with invalid lines 40 | When I create an article from the isi text 41 | Then an invalid line error is risen 42 | 43 | Scenario: Turn an article to dict 44 | Given a reference article 45 | When I turn the article into a dict 46 | Then I get a reference dict of values 47 | 48 | Scenario: Parse article from citation 49 | Given some valid isi citation 50 | When I create an article from the citation 51 | Then the values of the citation are part of the article 52 | And the citation itself is part of the articles sources 53 | 54 | Scenario: Parse article from an invalid citation 55 | Given some invalid isi citation 56 | When I create an article from the citation 57 | Then an invalid reference error is risen -------------------------------------------------------------------------------- /tests/features/cached.feature: -------------------------------------------------------------------------------- 1 | Feature: cached collection 2 | 3 | We want this kind of collection to avoid duplication at all costs 4 | 5 | Scenario: preheat cache 6 | 7 | Given some valid isi text 8 | When I create a collection from that text 9 | Then the collection's cache is preheated 10 | 11 | Scenario: collection list articles and references 12 | 13 | Given a valid collection 14 | When I iterate over the collection 15 | Then all articles and references are present 16 | 17 | Scenario: list authors 18 | 19 | Given a valid collection 20 | When I iterate over the collection authors 21 | Then all authors are included 22 | And the author list include duplicates 23 | 24 | Scenario: list coauthors 25 | 26 | Given a valid collection 27 | When I iterate over the collection coauthors 28 | Then all coauthor pairs are included 29 | And the coauthor list include duplicates 30 | 31 | Scenario: duplicated articles are removed 32 | 33 | Given some valid isi text 34 | When I create a collection from that text 35 | And I create a collection from that text 36 | Then both collections have the same number of articles 37 | 38 | Scenario: citation pairs 39 | 40 | Given a valid collection 41 | When I list the collection's citation pairs 42 | Then all citation pairs are included 43 | 44 | Scenario: citation pairs include complete info from references 45 | 46 | Given some valid isi text 47 | And a different isi record that references the former 48 | When I create a collection from that text 49 | And I list the collection's citation pairs 50 | Then the citation always include all the available data 51 | -------------------------------------------------------------------------------- /tests/test_article.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from dataclasses import dataclass 3 | from typing import Dict, Optional 4 | 5 | from pytest import fixture 6 | from pytest_bdd import given, parsers, scenarios, then, when 7 | 8 | from wostools.article import Article 9 | from wostools.exceptions import InvalidIsiLine, InvalidReference, MissingLabelFields 10 | 11 | from wostools._testutils import Context 12 | 13 | ISI_TEMPLATE = """ 14 | PT J 15 | AU {author} 16 | {second_author} 17 | AF {author} 18 | {second_author} 19 | TI {title} 20 | SO JOURNAL OF MAGNETISM AND MAGNETIC MATERIALS 21 | LA English 22 | DT Article 23 | DE Electrodeposition; Structural control; Nanodot array; Bit-patterned 24 | media; CoPt alloy 25 | ID BIT-PATTERNED MEDIA; ELECTRON-BEAM LITHOGRAPHY; RECORDING MEDIA; 26 | MAGNETIC MEDIA; DENSITY; FILMS; ANISOTROPY; STORAGE 27 | AB CoPt nanodot arrays were fabricated by combining electrodeposition and electron beam lithography (EBL) for the use of bit-patterned media (BPM). To achieve precise control of deposition uniformity and coercivity of the CoPt nanodot arrays, their crystal structure and magnetic properties were controlled by controlling the diffusion state of metal ions from the initial deposition stage with the application of bath agitation. Following bath agitation, the composition gradient of the CoPt alloy with thickness was mitigated to have a near-ideal alloy composition of Co:Pt =80:20, which induces epitaxial-like growth from Ru substrate, thus resulting in the improvement of the crystal orientation of the hcp (002) structure from its initial deposition stages. Furthermore, the cross-sectional transmission electron microscope (TEM) analysis of the nanodots deposited with bath agitation showed CoPt growth along its c-axis oriented in the perpendicular direction, having uniform lattice fringes on the hcp (002) plane from the Ru underlayer interface, which is a significant factor to induce perpendicular magnetic anisotropy. Magnetic characterization of the CoPt nanodot arrays showed increase in the perpendicular coercivity and squareness of the hysteresis loops from 2.0 kOe and 0.64 (without agitation) to 4.0 kOe and 0.87 with bath agitation. Based on the detailed characterization of nanodot arrays, the precise crystal structure control of the nanodot arrays with ultra-high recording density by electrochemical process was successfully demonstrated. 28 | C1 [Wodarz, Siggi; Homma, Takayuki] Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan. 29 | [Hasegawa, Takashi; Ishio, Shunji] Akita Univ, Dept Mat Sci, Akita 0108502, Japan. 30 | RP Homma, T (reprint author), Waseda Univ, Dept Appl Chem, Shinjuku Ku, Tokyo 1698555, Japan. 31 | EM t.homma@waseda.jp 32 | OI Hasegawa, Takashi/0000-0002-8178-4980 33 | FU JSPS KAKENHI Grant [25249104] 34 | FX This work was supported in part by JSPS KAKENHI Grant Number 25249104. 35 | CR Albrecht TR, 2013, IEEE T MAGN, V49, P773, DOI 10.1109/TMAG.2012.2227303 36 | BUSCHOW KHJ, 1983, J MAGN MAGN MATER, V38, P1, DOI 10.1016/0304-8853(83)90097-5 37 | Gapin AI, 2006, J APPL PHYS, V99, DOI 10.1063/1.2163289 38 | Homma Takayuki, 2015, ECS Transactions, V64, P1, DOI 10.1149/06431.0001ecst 39 | Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315 40 | Kubo T, 2005, J APPL PHYS, V97, DOI 10.1063/1.1855572 41 | Lodder JC, 2004, J MAGN MAGN MATER, V272, P1692, DOI 10.1016/j.jmmm.2003.12.259 42 | Mitsuzuka K, 2007, IEEE T MAGN, V43, P2160, DOI 10.1109/TMAG.2007.893129 43 | Ouchi T, 2010, ELECTROCHIM ACTA, V55, P8081, DOI 10.1016/j.electacta.2010.02.073 44 | Pattanaik G, 2006, J APPL PHYS, V99, DOI 10.1063/1.2150805 45 | Pattanaik G, 2007, ELECTROCHIM ACTA, V52, P2755, DOI 10.1016/j.electacta.2006.07.062 46 | Piramanayagam SN, 2009, J MAGN MAGN MATER, V321, P485, DOI 10.1016/j.jmmm.2008.05.007 47 | Ross CA, 2008, MRS BULL, V33, P838, DOI 10.1557/mrs2008.179 48 | Shiroishi Y, 2009, IEEE T MAGN, V45, P3816, DOI 10.1109/TMAG.2009.2024879 49 | Sirtori V, 2011, ACS APPL MATER INTER, V3, P1800, DOI 10.1021/am200267u 50 | Sohn JS, 2009, NANOTECHNOLOGY, V20, DOI 10.1088/0957-4484/20/2/025302 51 | Sun SH, 2000, SCIENCE, V287, P1989, DOI 10.1126/science.287.5460.1989 52 | Terris BD, 2007, MICROSYST TECHNOL, V13, P189, DOI 10.1007/s00542-006-0144-9 53 | Wang JP, 2008, P IEEE, V96, P1847, DOI 10.1109/JPROC.2008.2004318 54 | Weller D, 1999, IEEE T MAGN, V35, P4423, DOI 10.1109/20.809134 55 | Weller D, 2000, IEEE T MAGN, V36, P10, DOI 10.1109/20.824418 56 | Wodarz S, 2016, ELECTROCHIM ACTA, V197, P330, DOI 10.1016/j.electacta.2015.11.136 57 | Xu X, 2012, J ELECTROCHEM SOC, V159, pD240, DOI 10.1149/2.090204jes 58 | Yang X, 2007, J VAC SCI TECHNOL B, V25, P2202, DOI 10.1116/1.2798711 59 | Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r 60 | Yasui N, 2003, APPL PHYS LETT, V83, P3347, DOI 10.1063/1.1622787 61 | Yua H., 2009, J APPL PHYS, V105 62 | Zhu JG, 2008, IEEE T MAGN, V44, P125, DOI 10.1109/TMAG.2007.911031 63 | NR 28 64 | TC 0 65 | Z9 0 66 | U1 21 67 | U2 21 68 | PU ELSEVIER SCIENCE BV 69 | PI AMSTERDAM 70 | PA PO BOX 211, 1000 AE AMSTERDAM, NETHERLANDS 71 | SN 0304-8853 72 | EI 1873-4766 73 | J9 {journal} 74 | JI J. Magn. Magn. Mater. 75 | PD MAY 15 76 | PY {year} 77 | VL {volume} 78 | IS {issue} 79 | BP {page} 80 | EP 58 81 | DI {doi} 82 | PG 7 83 | WC Materials Science, Multidisciplinary; Physics, Condensed Matter 84 | SC Materials Science; Physics 85 | GA EP2GP 86 | UT WOS:000397201600008 87 | ER 88 | """.strip() 89 | 90 | 91 | @dataclass 92 | class ArticleWrapper: 93 | article: Optional[Article] 94 | label: Optional[str] = None 95 | 96 | 97 | scenarios("features/article.feature") 98 | 99 | 100 | @fixture 101 | def attributes(): 102 | return { 103 | "title": "some title", 104 | "author": "John Doe", 105 | "second_author": "Jane Doe", 106 | "authors": ["John Doe", "Jane Doe"], 107 | "year": 1994, 108 | "page": "1330-5", 109 | "journal": "J MAGN MAGN MATER", 110 | "volume": "1000", 111 | "issue": "2", 112 | "doi": "10.1016/j.jmmm.2017.01.061", 113 | } 114 | 115 | 116 | @fixture 117 | def citation_attributes(): 118 | # Kryder MH, 2008, P IEEE, V96, P1810, DOI 10.1109/JPROC.2008.2004315 119 | return { 120 | "author": "L Antuan", 121 | "year": "2008", 122 | "journal": "P IEEE", 123 | "volume": "69", 124 | "page": "1810", 125 | "doi": "DOI 10.1109/JPROC.2008.2004315", 126 | } 127 | 128 | 129 | @fixture 130 | def label_context() -> Context[str]: 131 | return Context() 132 | 133 | 134 | @fixture 135 | def parse_context() -> Context[Article]: 136 | return Context() 137 | 138 | 139 | @fixture 140 | def citation_parse_context() -> Context[Article]: 141 | return Context() 142 | 143 | 144 | @fixture 145 | def to_dict_context() -> Context[Dict]: 146 | return Context() 147 | 148 | 149 | @given("a complete article missing ", target_fixture="wrapper") 150 | def article_missing(field: str): 151 | article = Article( 152 | title=None, authors=["L, Robertson"], year=1999, journal="Science" 153 | ) 154 | setattr(article, field, None) 155 | return ArticleWrapper(article=article) 156 | 157 | 158 | @given("a complete article", target_fixture="wrapper") 159 | @given("an article with authors, year and journal", target_fixture="wrapper") 160 | def article_with_authors_year_and_journal(): 161 | return ArticleWrapper( 162 | article=Article( 163 | title=None, authors=["L, Robertson"], year=1999, journal="Science" 164 | ), 165 | label="L Robertson, 1999, Science", 166 | ) 167 | 168 | 169 | @given("theres a similar article that includes a doi", target_fixture="other") 170 | def similar_article_with_doi(wrapper: ArticleWrapper): 171 | assert wrapper.article, "missing article to copy" 172 | article = deepcopy(wrapper.article) 173 | article.doi = "somedoi/123" 174 | if wrapper.label: 175 | return ArticleWrapper( 176 | article=article, label=", ".join([wrapper.label, article.doi]), 177 | ) 178 | return ArticleWrapper(article=article) 179 | 180 | 181 | @given("some valid isi text", target_fixture="isi_text") 182 | def valid_isi_text(attributes): 183 | return ISI_TEMPLATE.format(**attributes) 184 | 185 | 186 | @given("some isi text with invalid lines", target_fixture="isi_text") 187 | def invalid_lines_in_isi_text(attributes): 188 | return """ 189 | INVALIDKEY This value is going to die 190 | """.strip() 191 | 192 | 193 | @given("some invalid isi citation", target_fixture="isi_citation") 194 | def invalid_isi_citation(): 195 | return "Da Lambert, Hello" 196 | 197 | 198 | @given("some valid isi citation", target_fixture="isi_citation") 199 | def valid_isi_citation(citation_attributes): 200 | return "{author}, {year}, {journal}, V{volume}, P{page}, DOI {doi}".format( 201 | **citation_attributes 202 | ) 203 | 204 | 205 | @given("a reference article", target_fixture="wrapper") 206 | def reference_article(attributes): 207 | return ArticleWrapper( 208 | article=Article( 209 | title=attributes.get("title"), 210 | authors=attributes.get("authors"), 211 | year=attributes.get("year"), 212 | journal=attributes.get("journal"), 213 | volume=attributes.get("volume"), 214 | issue=attributes.get("issue"), 215 | page=attributes.get("page"), 216 | doi=attributes.get("doi"), 217 | references=attributes.get("references"), 218 | keywords=attributes.get("keywords"), 219 | sources=attributes.get("sources"), 220 | extra=attributes.get("extra"), 221 | ) 222 | ) 223 | 224 | 225 | @when("I merge the two articles") 226 | def merge_articles(wrapper: ArticleWrapper, other: ArticleWrapper): 227 | assert wrapper.article, "Missing article for this step" 228 | assert other.article, "Missing other article for this step" 229 | wrapper.article = wrapper.article.merge(other.article) 230 | wrapper.label = None 231 | 232 | 233 | @when("I try to compute the label for the article") 234 | @when("I compute the label for the article") 235 | def try_to_compute_label(label_context: Context[str], wrapper: ArticleWrapper): 236 | assert wrapper.article, "Missing article for this step" 237 | with label_context.capture(): 238 | label_context.push(wrapper.article.label) 239 | 240 | 241 | @when("I turn the article into a dict") 242 | def try_to_go_to_dict(wrapper: ArticleWrapper, to_dict_context: Context[Dict]): 243 | assert wrapper.article, "Missing article for this step" 244 | with to_dict_context.capture(): 245 | to_dict_context.push(wrapper.article.to_dict()) 246 | 247 | 248 | @when("I create an article from the isi text") 249 | def create_article_from_isi_text(isi_text, parse_context: Context[Article]): 250 | assert isi_text, "define some isi text to parse" 251 | with parse_context.capture(): 252 | parse_context.push(Article.from_isi_text(isi_text)) 253 | 254 | 255 | @when("I create an article from the citation") 256 | def create_article_from_citation( 257 | isi_citation, citation_parse_context: Context[Article] 258 | ): 259 | assert isi_citation, "define some isi citation to parse" 260 | with citation_parse_context.capture(): 261 | citation_parse_context.push(Article.from_isi_citation(isi_citation)) 262 | 263 | 264 | @then("the label is a proper string") 265 | def then_label_is_a_proper_string(label_context: Context[str], wrapper: ArticleWrapper): 266 | with label_context.assert_data() as label: 267 | assert label == wrapper.label 268 | 269 | 270 | @then("the label contains the doi of the other") 271 | def label_matches_other(label_context: Context[str], other: ArticleWrapper): 272 | with label_context.assert_data() as label: 273 | assert ( 274 | other.article and other.article.doi 275 | ), "There's no doi in the other article" 276 | assert other.article.doi in label 277 | 278 | 279 | @then("There's no error computing the label") 280 | @then("there's no error computing the label") 281 | def no_error_computing_label(label_context: Context[str]): 282 | with label_context.assert_data(): 283 | pass 284 | 285 | 286 | @then("There's an error computing the label") 287 | def error_computing_label(label_context: Context[str]): 288 | with label_context.assert_error() as error: 289 | assert isinstance(error, MissingLabelFields) 290 | 291 | 292 | @then(parsers.parse("the article matches the {field:w} of the other")) 293 | @then(parsers.parse("the article's {field:w} matches the other")) 294 | def contais_others_field(wrapper: ArticleWrapper, other: ArticleWrapper, field: str): 295 | assert wrapper.article 296 | assert other.article 297 | assert getattr(wrapper.article, field) == getattr(other.article, field) 298 | 299 | 300 | @then("the values in the isi text are part of the article") 301 | def values_make_it_to_the_article(parse_context: Context[Article], attributes: dict): 302 | with parse_context.assert_data() as article: 303 | for field in [ 304 | "title", 305 | "authors", 306 | "year", 307 | "page", 308 | "journal", 309 | "volume", 310 | "doi", 311 | ]: 312 | assert getattr(article, field) 313 | assert getattr(article, field) == attributes[field] 314 | 315 | 316 | @then("the values of the citation are part of the article") 317 | def citation_values_make_it_to_article( 318 | citation_parse_context: Context[Article], citation_attributes: dict 319 | ): 320 | with citation_parse_context.assert_data() as article: 321 | assert article.authors == [citation_attributes["author"]] 322 | for field in ["year", "journal", "page", "volume", "doi"]: 323 | assert str(getattr(article, field)) == citation_attributes[field] 324 | 325 | 326 | @then("the isi text itself is part of the articles sources") 327 | def isi_text_in_sources(parse_context: Context[Article], isi_text: str): 328 | assert parse_context.data, "no article parsed yet" 329 | assert isi_text in parse_context.data.sources 330 | 331 | 332 | @then("the citation itself is part of the articles sources") 333 | def citation_in_sources(citation_parse_context: Context[Article], isi_citation: str): 334 | with citation_parse_context.assert_data() as article: 335 | assert isi_citation in article.sources 336 | 337 | 338 | @then("an invalid line error is risen") 339 | def invalid_isi_line_risen(parse_context: Context[Article]): 340 | with parse_context.assert_error() as error: 341 | assert isinstance(error, InvalidIsiLine) 342 | 343 | 344 | @then("an invalid reference error is risen") 345 | def invalid_reference_risen(citation_parse_context: Context[Article]): 346 | with citation_parse_context.assert_error() as error: 347 | assert isinstance(error, InvalidReference) 348 | 349 | 350 | @then("I get a reference dict of values") 351 | def get_a_reference_dict(to_dict_context: Context[Dict], attributes: Dict): 352 | with to_dict_context.assert_data() as article_dict: 353 | assert any(article_dict.values()), "your dict has no values son" 354 | for key, value in article_dict.items(): 355 | assert not value or key in attributes 356 | assert not value or value == attributes[key] 357 | -------------------------------------------------------------------------------- /tests/test_collection_cached.py: -------------------------------------------------------------------------------- 1 | import io 2 | from typing import List, Dict, Tuple 3 | 4 | from pytest import fixture 5 | from pytest_bdd import scenarios, given, when, then 6 | 7 | from wostools import CachedCollection, Article 8 | from wostools._testutils import Context 9 | 10 | ISI_TEXT = """ 11 | FN Thomson Reuters Web of Science™ 12 | VR 1.0 13 | PT J 14 | AU Sun, ZW 15 | Russell, TP 16 | AF Sun, Zhiwei 17 | Russell, Thomas P. 18 | TI In situ grazing incidence small-angle X-ray scattering study of solvent 19 | vapor annealing in lamellae-forming block copolymer thin films: 20 | Trade-off of defects in deswelling 21 | SO JOURNAL OF POLYMER SCIENCE PART B-POLYMER PHYSICS 22 | LA English 23 | DT Article 24 | DE annealing; block copolymers; self-assembly; thin films; X-ray 25 | ID BIT-PATTERNED MEDIA; LITHOGRAPHY; GRAPHENE; ARRAYS; ORIENTATION; 26 | NANOWIRES; PARALLEL; BEHAVIOR; INPLANE; DENSITY 27 | AB Solvent vapor annealing (SVA) is one route to prepare block copolymer (BCP) thin films with long-range lateral ordering. The lattice defects in the spin-coated BCP thin film can be effectively and rapidly reduced using SVA. The solvent evaporation after annealing was shown to have a significant impact on the in-plane ordering of BCP microdomains. However, the effect of solvent evaporation on the out-of-plane defects in BCPs has not been considered. Using grazing-incidence x-ray scattering, the morphology evolution of lamellae-forming poly(2-vinlypyridine)-b-polystyrene-b-poly(2vinylpyridine) triblock copolymers, having lamellar microdomains oriented normal to substrate surface during SVA, was studied in this work. A micelle to lamellae transformation was observed during solvent uptake. The influence of solvent swelling ratio and solvent removal rate on both the in-plane and out-of-plane defect density was studied. It shows that there is a trade-off between the in-plane and out-of-plane defect densities during solvent evaporation. (c) 2017 Wiley Periodicals, Inc. J. Polym. Sci., Part B: Polym. Phys. 2017, 55, 980-989 28 | C1 [Sun, Zhiwei; Russell, Thomas P.] Univ Massachusetts Amherst, Dept Polymer Sci & Engn, Amherst, MA 01003 USA. 29 | [Russell, Thomas P.] Lawrence Berkeley Natl Lab, Div Mat Sci, Berkeley, CA 94720 USA. 30 | [Russell, Thomas P.] Beijing Univ Chem Technol, Beijing Adv Innovat Ctr Soft Matter Sci & Engn, Beijing, Peoples R China. 31 | RP Russell, TP (reprint author), Univ Massachusetts Amherst, Dept Polymer Sci & Engn, Amherst, MA 01003 USA.; Russell, TP (reprint author), Lawrence Berkeley Natl Lab, Div Mat Sci, Berkeley, CA 94720 USA.; Russell, TP (reprint author), Beijing Univ Chem Technol, Beijing Adv Innovat Ctr Soft Matter Sci & Engn, Beijing, Peoples R China. 32 | EM russell@mail.pse.umass.edu 33 | FU U.S. Department of Energy BES [BES-DE-FG02-96ER45612]; Director of the 34 | Office of Science, Office of Basic Energy Sciences, of the U.S. 35 | Department of Energy [DE-AC02-05CH11231]; Office of Science, Office of 36 | Basic Energy Sciences, of the U.S. Department of Energy 37 | [DE-AC02-05CH11231] 38 | FX The authors acknowledge the facility support in Advanced Light Source 39 | and Molecular Foundry in Lawrence Berkeley National Laboratory. This 40 | work was supported by the U.S. Department of Energy BES under contract 41 | BES-DE-FG02-96ER45612. The GISAXS characterization in beamline 7.3.3 of 42 | the Advanced Light Source is supported by the Director of the Office of 43 | Science, Office of Basic Energy Sciences, of the U.S. Department of 44 | Energy under contract no. DE-AC02-05CH11231. The SEM and AFM 45 | characterization in the Molecular Foundry was supported by the Office of 46 | Science, Office of Basic Energy Sciences, of the U.S. Department of 47 | Energy under contract no. DE-AC02-05CH11231. 48 | CR Bai W, 2015, MACROMOLECULES, V48, P8574, DOI 10.1021/acs.macromol.5b02174 49 | Bosworth JK, 2011, MACROMOLECULES, V44, P9196, DOI 10.1021/ma201967a 50 | Bosworth JK, 2010, J PHOTOPOLYM SCI TEC, V23, P145, DOI 10.2494/photopolymer.23.145 51 | Chai J, 2008, ACS NANO, V2, P489, DOI 10.1021/nn700341s 52 | Chai J, 2007, NAT NANOTECHNOL, V2, P500, DOI 10.1038/nnano.2007.227 53 | Choi S, 2012, SOFT MATTER, V8, P3463, DOI 10.1039/c2sm07297a 54 | Di ZY, 2012, MACROMOLECULES, V45, P5185, DOI 10.1021/ma3004136 55 | Farrell RA, 2012, NANOSCALE, V4, P3228, DOI 10.1039/c2nr00018k 56 | Gowd E. B., 2010, IOP C SER MAT SCI EN, V14 57 | Gu XD, 2014, ADV MATER, V26, P273, DOI 10.1002/adma.201302562 58 | Gunkel I, 2016, J POLYM SCI POL PHYS, V54, P331, DOI 10.1002/polb.23933 59 | Ilavsky J, 2012, J APPL CRYSTALLOGR, V45, P324, DOI 10.1107/S0021889812004037 60 | Jeong SJ, 2010, NANO LETT, V10, P3500, DOI 10.1021/nl101637f 61 | Ji S, 2008, MACROMOLECULES, V41, P9098, DOI 10.1021/ma801861h 62 | Khaira GS, 2014, ACS MACRO LETT, V3, P747, DOI 10.1021/mz5002349 63 | Kikitsu A, 2013, IEEE T MAGN, V49, P693, DOI 10.1109/TMAG.2012.2226566 64 | Kim BH, 2011, ADV MATER, V23, P5618, DOI 10.1002/adma.201103650 65 | Kim BH, 2010, ACS NANO, V4, P5464, DOI 10.1021/nn101491g 66 | Kurihara M, 2013, JPN J APPL PHYS, V52, DOI 10.7567/JJAP.52.086201 67 | Liu GX, 2012, ACS NANO, V6, P6786, DOI 10.1021/nn301515a 68 | Mahadevapuram N, 2016, J POLYM SCI POL PHYS, V54, P339, DOI 10.1002/polb.23937 69 | Paik MY, 2010, MACROMOLECULES, V43, P4253, DOI 10.1021/ma902646t 70 | Sinturel C, 2014, ACS APPL MATER INTER, V6, P12146, DOI 10.1021/am504086x 71 | Sun ZW, 2015, ADV MATER, V27, P4364, DOI 10.1002/adma.201501585 72 | Vu T, 2011, MACROMOLECULES, V44, P6121, DOI 10.1021/ma2009222 73 | Thurn-Albrecht T, 2000, SCIENCE, V290, P2126, DOI 10.1126/science.290.5499.2126 74 | Wan L., 2012, MOEMS, V11, P31405 75 | Wang JY, 2008, LANGMUIR, V24, P3545, DOI 10.1021/la703559q 76 | Xiao S., 2013, MOEMS, V12 77 | Xiao SG, 2014, ACS NANO, V8, P11854, DOI 10.1021/nn505630t 78 | Xiao SG, 2014, J POLYM SCI POL PHYS, V52, P361, DOI 10.1002/polb.23433 79 | Yamamoto R, 2014, IEEE T MAGN, V50, DOI 10.1109/TMAG.2013.2284474 80 | Yang X., 2014, MOEMS, V13 81 | Yang X., 2013, J MATER RES, V2013, P1 82 | Yang XM, 2014, NANOTECHNOLOGY, V25, DOI 10.1088/0957-4484/25/39/395301 83 | Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r 84 | Zhang JQ, 2014, MACROMOLECULES, V47, P5711, DOI 10.1021/ma500633b 85 | NR 37 86 | TC 0 87 | Z9 0 88 | U1 1 89 | U2 1 90 | PU WILEY 91 | PI HOBOKEN 92 | PA 111 RIVER ST, HOBOKEN 07030-5774, NJ USA 93 | SN 0887-6266 94 | EI 1099-0488 95 | J9 J POLYM SCI POL PHYS 96 | JI J. Polym. Sci. Pt. B-Polym. Phys. 97 | PD JUL 1 98 | PY 2017 99 | VL 55 100 | IS 13 101 | BP 980 102 | EP 989 103 | DI 10.1002/polb.24346 104 | PG 10 105 | WC Polymer Science 106 | SC Polymer Science 107 | GA EU7BQ 108 | UT WOS:000401190100002 109 | ER 110 | 111 | EF 112 | """.strip() 113 | 114 | ISI_TEXT_DIFFERENT_RECORD = """ 115 | FN Thomson Reuters Web of Science™ 116 | VR 1.0 117 | PT J 118 | AU Bosworth, JK 119 | Dobisz, EA 120 | Hellwig, O 121 | Ruiz, R 122 | AF Bosworth, Joan K. 123 | Dobisz, Elizabeth A. 124 | Hellwig, Olav 125 | Ruiz, Ricardo 126 | TI Impact of Out-of-Plane Translational Order in Block Copolymer 127 | Lithography 128 | SO MACROMOLECULES 129 | LA English 130 | DT Article 131 | ID BIT-PATTERNED MEDIA; DENSITY MULTIPLICATION; TERNARY BLENDS; THIN-FILMS; 132 | DIMENSIONS; ROUGHNESS; DOMAINS; SHAPES 133 | AB In block copolymer lithography, subtle distortions in the self-assembled domains, such as tilting or bending, have a strong impact on the quality of the lithographic features upon pattern transfer. We compared the feature size distribution observed at the top-surface of block copolymer thin films with the size distribution that the self-assembled structures project at the substrate interface, i.e., the lithographic image. We performed the comparison for films of perpendicularly oriented cylindrical block copolymer domains with various degrees of lateral order. We found that the size distribution of the projected image does not mimic the well-known Gaussian distribution observed at the top surface. Instead, the lithographic features display a skewed distribution with a long tail toward smaller feature dimensions, a shift of the median and a reduced number of transferred features. The distortions are more pronounced for films with shorter correlation lengths. We propose a simplified model that explains the observed shifts in the size distribution of the projected image by considering the tilting that cylinders undergo in the vicinity of dislocations. The presence of defects disrupting the in-plane orientational order riot only impacts the size distribution of the self-assembled features, but also induces nearby cylinder tilting and some general loss of out-of-plane translational order which, upon pattern transfer, is responsible for the observed distortions on the feature size distribution, 134 | C1 [Bosworth, Joan K.; Dobisz, Elizabeth A.; Hellwig, Olav; Ruiz, Ricardo] Hitachi Global Storage Technol, San Jose Res Ctr, San Jose, CA 95135 USA. 135 | RP Ruiz, R (reprint author), Hitachi Global Storage Technol, San Jose Res Ctr, 3403 Yerba Buena Rd, San Jose, CA 95135 USA. 136 | EM ricardo.ruiz@hitachigst.com 137 | OI Ruiz, Ricardo/0000-0002-1698-4281 138 | CR ALBRECHT T, 2009, NANOSCALE MAGNETIC M 139 | BATES FS, 1990, ANNU REV PHYS CHEM, V41, P525, DOI 10.1146/annurev.pc.41.100190.002521 140 | Black CT, 2007, IBM J RES DEV, V51, P605 141 | Cheng JY, 2008, ADV MATER, V20, P3155, DOI 10.1002/adma.200800826 142 | Cheng JY, 2010, ACS NANO, V4, P4815, DOI 10.1021/nn100686v 143 | Detcheverry FA, 2010, MACROMOLECULES, V43, P3446, DOI 10.1021/ma902332h 144 | Edwards EW, 2007, MACROMOLECULES, V40, P90, DOI 10.1021/ma0607564 145 | Guarini KW, 2002, ADV MATER, V14, P1290, DOI 10.1002/1521-4095(20020916)14:18<1290::AID-ADMA1290>3.0.CO;2-N 146 | Hammond MR, 2003, MACROMOLECULES, V36, P8712, DOI 10.1021/ma026001o 147 | Harrison C, 2004, EUROPHYS LETT, V67, P800, DOI 10.1209/epl/i2004-10126-5 148 | Harrison C, 2002, PHYS REV E, V66, DOI 10.1103/PhysRevE.66.011706 149 | Hellwig O, 2010, APPL PHYS LETT, V96, DOI 10.1063/1.3293301 150 | HO CS, 1983, IEEE T PATTERN ANAL, V5, P593 151 | *INTRS, LITH 152 | Ji SX, 2011, MACROMOLECULES, V44, P4291, DOI 10.1021/ma2005734 153 | Kleman M., 2003, SOFT MATTER PHYS INT 154 | LIU CC, 2010, J VAC SCI TECHNOL B, V34 155 | Liu G, 2010, J VAC SCI TECHNOL B, V28 156 | Nagpal U, 2011, ACS NANO, V5, P5673, DOI 10.1021/nn201335v 157 | Ruiz R, 2008, PHYS REV B, V77, DOI 10.1103/PhysRevB.77.054204 158 | Ruiz R, 2008, SCIENCE, V321, P936, DOI 10.1126/science.1157626 159 | Segalman RA, 2005, MAT SCI ENG R, V48, P191, DOI 10.1016/j.mser.2004.12.003 160 | Segalman RA, 2003, PHYS REV LETT, V91, DOI 10.1103/PhysRevLett.91.196101 161 | Segalman RA, 2003, MACROMOLECULES, V36, P3272, DOI 10.1021/ma021367m 162 | Stipe BC, 2010, NAT PHOTONICS, V4, P484, DOI 10.1038/nphoton.2010.90 163 | Stoykovich MP, 2010, MACROMOLECULES, V43, P2334, DOI 10.1021/ma902494v 164 | Stuen KO, 2009, MACROMOLECULES, V42, P5139, DOI 10.1021/ma900520v 165 | Tada Y, 2009, POLYMER, V50, P4250, DOI 10.1016/j.polymer.2009.06.039 166 | Welander AM, 2008, MACROMOLECULES, V41, P2759, DOI 10.1021/ma800056s 167 | Welander AM, 2008, J VAC SCI TECHNOL B, V26, P2484, DOI 10.1116/1.2987963 168 | Xiao SG, 2007, J VAC SCI TECHNOL B, V25, P1953, DOI 10.1116/1.2801860 169 | Yang XM, 2009, ACS NANO, V3, P1844, DOI 10.1021/nn900073r 170 | NR 32 171 | TC 11 172 | Z9 11 173 | U1 4 174 | U2 22 175 | PU AMER CHEMICAL SOC 176 | PI WASHINGTON 177 | PA 1155 16TH ST, NW, WASHINGTON, DC 20036 USA 178 | SN 0024-9297 179 | J9 MACROMOLECULES 180 | JI Macromolecules 181 | PD DEC 13 182 | PY 2011 183 | VL 44 184 | IS 23 185 | BP 9196 186 | EP 9204 187 | DI 10.1021/ma201967a 188 | PG 9 189 | WC Polymer Science 190 | SC Polymer Science 191 | GA 855ZG 192 | UT WOS:000297604200016 193 | ER 194 | 195 | EF 196 | """.strip() 197 | 198 | scenarios("features/cached.feature") 199 | 200 | 201 | @fixture 202 | def collection_context() -> Context[CachedCollection]: 203 | return Context() 204 | 205 | 206 | @fixture 207 | def iterate_collection_context() -> Context[List[Article]]: 208 | return Context() 209 | 210 | 211 | @fixture 212 | def iterate_authors_collection_context() -> Context[List[str]]: 213 | return Context() 214 | 215 | 216 | @fixture 217 | def iterate_coauthors_collection_context() -> Context[List[Tuple[str, str]]]: 218 | return Context() 219 | 220 | 221 | @fixture 222 | def iterate_citation_pairs_collection_context() -> Context[ 223 | List[Tuple[Article, Article]] 224 | ]: 225 | return Context() 226 | 227 | 228 | @given("some valid isi text", target_fixture="isi_text") 229 | def valid_isi_text(): 230 | return [ISI_TEXT] 231 | 232 | 233 | @given("a different isi record that references the former", target_fixture="isi_text") 234 | def isi_text_different_record(isi_text): 235 | return [*isi_text, ISI_TEXT_DIFFERENT_RECORD] 236 | 237 | 238 | @when("I create a collection from that text") 239 | def create_collection(isi_text, collection_context: Context[CachedCollection]): 240 | with collection_context.capture(): 241 | collection = CachedCollection(*(io.StringIO(doc) for doc in isi_text)) 242 | collection_context.push(collection) 243 | return collection_context 244 | 245 | 246 | @given("a valid collection") 247 | def context_valid_collection(collection_context): 248 | collection = CachedCollection(io.StringIO(ISI_TEXT)) 249 | collection_context.push(collection) 250 | 251 | 252 | @then("the collection's cache is preheated") 253 | def the_collection_cache_is_preheated(collection_context: Context[CachedCollection]): 254 | with collection_context.assert_data() as collection: 255 | assert collection._cache 256 | 257 | 258 | @when("I iterate over the collection") 259 | def iterate_over_collection( 260 | collection_context: Context[CachedCollection], 261 | iterate_collection_context: Context[List[Article]], 262 | ): 263 | with collection_context.assert_data() as collection: 264 | with iterate_collection_context.capture(): 265 | iterate_collection_context.push(list(collection)) 266 | 267 | 268 | @then("all articles and references are present") 269 | def all_articles_and_references_are_present( 270 | iterate_collection_context: Context[List[Article]], 271 | ): 272 | with iterate_collection_context.assert_data() as articles: 273 | assert len(articles) == 38 274 | for article in articles: 275 | assert article 276 | assert article.label 277 | 278 | 279 | @when("I iterate over the collection authors") 280 | def iterate_over_collection_authors( 281 | collection_context: Context[CachedCollection], 282 | iterate_authors_collection_context: Context[List[str]], 283 | ): 284 | with collection_context.assert_data() as collection: 285 | with iterate_authors_collection_context.capture(): 286 | iterate_authors_collection_context.push(list(collection.authors)) 287 | 288 | 289 | @then("all authors are included") 290 | @then("the author list include duplicates") 291 | def all_authors_included_even_duplicates( 292 | iterate_authors_collection_context: Context[List[str]], 293 | ): 294 | with iterate_authors_collection_context.assert_data() as authors: 295 | assert authors 296 | 297 | authors_count: Dict[str, int] = {} 298 | for author in authors: 299 | authors_count[author] = authors_count.get(author, 0) + 1 300 | assert author 301 | 302 | for author, count in authors_count.items(): 303 | assert author in ISI_TEXT 304 | assert count >= 1 305 | 306 | 307 | @when("I iterate over the collection coauthors") 308 | def iterate_over_collection_coauthors( 309 | collection_context: Context[CachedCollection], 310 | iterate_coauthors_collection_context: Context[List[Tuple[str, str]]], 311 | ): 312 | with collection_context.assert_data() as collection: 313 | with iterate_coauthors_collection_context.capture(): 314 | iterate_coauthors_collection_context.push(list(collection.coauthors)) 315 | 316 | 317 | @then("all coauthor pairs are included") 318 | @then("the coauthor list include duplicates") 319 | def all_coauthors_pairs_included_even_duplicates( 320 | iterate_coauthors_collection_context: Context[List[Tuple[str, str]]], 321 | ): 322 | with iterate_coauthors_collection_context.assert_data() as coauthors: 323 | assert coauthors 324 | 325 | coauthors_count: Dict[Tuple[str, str], int] = {} 326 | for pair in coauthors: 327 | coauthors_count[pair] = coauthors_count.get(pair, 0) + 1 328 | 329 | author, coauthor = pair 330 | assert author 331 | assert coauthor 332 | 333 | for pair, count in coauthors_count.items(): 334 | author, coauthor = pair 335 | assert author in ISI_TEXT 336 | assert coauthor in ISI_TEXT 337 | assert count >= 1 338 | 339 | 340 | @then("both collections have the same number of articles") 341 | def same_number_of_articles(collection_context: Context[CachedCollection]): 342 | 343 | with collection_context.assert_data() as collection: 344 | with collection_context.assert_history(1) as latest: 345 | print(latest) 346 | assert len(collection) == len(latest[0]) 347 | 348 | 349 | @when("I list the collection's citation pairs") 350 | def list_collection_citation_pairs( 351 | collection_context: Context[CachedCollection], 352 | iterate_citation_pairs_collection_context: Context[List[Tuple[Article, Article]]], 353 | ): 354 | with collection_context.assert_data() as collection: 355 | with iterate_citation_pairs_collection_context.capture(): 356 | iterate_citation_pairs_collection_context.push( 357 | list(collection.citation_pairs()) 358 | ) 359 | 360 | 361 | @then("all citation pairs are included") 362 | def all_citation_pairs_are_included( 363 | iterate_citation_pairs_collection_context: Context[List[Tuple[Article, Article]]] 364 | ): 365 | with iterate_citation_pairs_collection_context.assert_data() as citation_pairs: 366 | assert len(citation_pairs) == 37 367 | for article, reference in citation_pairs: 368 | assert isinstance(article, Article) 369 | assert isinstance(reference, Article) 370 | 371 | 372 | @then("the citation always include all the available data") 373 | def iterate_over_citation_pairs_two_isi_files( 374 | iterate_citation_pairs_collection_context: Context[List[Tuple[Article, Article]]] 375 | ): 376 | with iterate_citation_pairs_collection_context.assert_data() as citation_pairs: 377 | assert len(citation_pairs) == 68 378 | 379 | having_keywords = False 380 | for article, reference in citation_pairs: 381 | assert isinstance(article, Article) 382 | assert isinstance(reference, Article) 383 | 384 | if ( 385 | article.to_dict()["doi"] == "10.1002/polb.24346" 386 | and reference.to_dict()["doi"] == "10.1021/ma201967a" 387 | ): 388 | having_keywords = bool(article.keywords and reference.keywords) 389 | 390 | assert having_keywords 391 | -------------------------------------------------------------------------------- /tests/test_fields.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from wostools.fields import joined, delimited, integer, parse 4 | 5 | 6 | def test_joined_joins_sequences(): 7 | assert joined(["hello", "world"]) == "hello world" 8 | 9 | 10 | def test_delimited_split_strings(): 11 | assert delimited(["key; word;", "more; words"]) == ["key", "word", "more", "words"] 12 | 13 | 14 | def test_delimited_split_strings_no_semi_at_the_end(): 15 | assert delimited(["key; word", "more; words"]) == ["key", "word", "more", "words"] 16 | 17 | 18 | def test_integer_integer_makes_an_integer(): 19 | assert integer(["1"]) == 1 20 | 21 | 22 | def test_integer_raises_if_more_than_one_value_is_passed(): 23 | with pytest.raises(ValueError): 24 | integer(["", ""]) 25 | 26 | 27 | @pytest.mark.parametrize("header", ["VR", "FN"]) 28 | def test_parse_ignores_headers(header): 29 | assert parse(header, ["value", "value"]) == {} 30 | 31 | 32 | def test_parse_includes_on_unknown_fields(): 33 | assert parse("FG", ["value", "value"]) == {"FG": ["value", "value"]} 34 | 35 | 36 | def test_parse_raises_on_invalid_values(): 37 | with pytest.raises(ValueError): 38 | assert parse("PY", ["1994b"]) == {} 39 | -------------------------------------------------------------------------------- /wostools/__init__.py: -------------------------------------------------------------------------------- 1 | """Top-level package for Python WoS tools.""" 2 | 3 | __author__ = """Core of Science""" 4 | __email__ = "dev@coreofscience.com" 5 | __version__ = "3.0.2" 6 | 7 | from wostools.article import Article 8 | from wostools.cached import CachedCollection 9 | from wostools.cached import CachedCollection as Collection 10 | 11 | __all__ = ["CachedCollection", "Collection", "Article"] 12 | -------------------------------------------------------------------------------- /wostools/_testutils.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from dataclasses import dataclass 3 | from typing import Generic, Iterator, List, Optional, TypeVar 4 | 5 | T = TypeVar("T") 6 | 7 | 8 | @dataclass 9 | class Context(Generic[T]): 10 | history: Optional[List[T]] = None 11 | error: Optional[Exception] = None 12 | data: Optional[T] = None 13 | 14 | def push(self, data: Optional[T], error: Optional[Exception] = None): 15 | if self.history is None: 16 | self.history = [] 17 | if self.data: 18 | self.history.append(self.data) 19 | self.data = data 20 | self.error = error 21 | 22 | @contextmanager 23 | def capture(self): 24 | try: 25 | yield 26 | except Exception as e: 27 | self.push(None, error=e) 28 | 29 | @contextmanager 30 | def assert_data(self, name=None) -> Iterator[T]: 31 | if name is None: 32 | name = "data" 33 | assert self.data, f"No {name} computed yet" 34 | yield self.data 35 | 36 | @contextmanager 37 | def assert_error(self) -> Iterator[Exception]: 38 | assert self.error, "Expected an error and found none" 39 | yield self.error 40 | 41 | @contextmanager 42 | def assert_history(self, count): 43 | assert len(self.history) >= count 44 | yield self.history[-count:] 45 | -------------------------------------------------------------------------------- /wostools/article.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import logging 3 | import re 4 | from typing import Any, List, Mapping, Optional, Set 5 | 6 | from wostools.exceptions import InvalidIsiLine, InvalidReference, MissingLabelFields 7 | from wostools.fields import parse_all 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | # The null part accounts for an ISI wok bug 12 | ISI_LINE_PATTERN = re.compile( 13 | r"^(null|.)?((?P[A-Z0-9]{2})| )( (?P.*))?$" 14 | ) 15 | 16 | ISI_CITATION_PATTERN = re.compile( 17 | r"""^(?P[^,]+),[ ] # First author 18 | (?P\d{4}),[ ] # Publication year 19 | (?P[^,]+) # Journal 20 | (,[ ]V(?P[\w\d-]+))? # Volume 21 | (,[ ][Pp](?P\w+))? # Start page 22 | (,[ ]DOI[ ](?P.+))? # The all important DOI 23 | """, 24 | re.X, 25 | ) 26 | 27 | 28 | class Article: 29 | def __init__( 30 | self, 31 | title: Optional[str], 32 | authors: List[str], 33 | year: Optional[int], 34 | journal: Optional[str], 35 | volume: Optional[str] = None, 36 | issue: Optional[str] = None, 37 | page: Optional[str] = None, 38 | doi: Optional[str] = None, 39 | references: Optional[List[str]] = None, 40 | keywords: Optional[List[str]] = None, 41 | sources: Optional[Set[str]] = None, 42 | extra: Optional[Mapping] = None, 43 | ): 44 | self.title: Optional[str] = title 45 | self.authors: List[str] = authors 46 | self.keywords: List[str] = keywords or [] 47 | self.year: Optional[int] = year 48 | self.journal: Optional[str] = journal 49 | self.volume: Optional[str] = volume 50 | self.issue: Optional[str] = issue 51 | self.page: Optional[str] = page 52 | self.doi: Optional[str] = doi 53 | self.references: List[str] = references or [] 54 | self.sources: Set[str] = sources or set() 55 | self.extra: Mapping[str, Any] = extra or {} 56 | 57 | @property 58 | def label(self) -> str: 59 | if self.doi: 60 | return self.doi 61 | return self._label() 62 | 63 | def _label(self, exclude_doi=False, lower_p=False) -> str: 64 | if not (self.authors and self.year and self.journal): 65 | raise MissingLabelFields(self) 66 | page_prefix = "p" if lower_p else "P" 67 | pieces = { 68 | "AU": self.authors[0].replace(",", ""), 69 | "PY": str(self.year), 70 | "J9": str(self.journal), 71 | "VL": f"V{self.volume}" if self.volume else None, 72 | "BP": f"{page_prefix}{self.page}" if self.page else None, 73 | "DI": f"DOI {self.doi}" if self.doi else None, 74 | } 75 | return ", ".join(value for value in pieces.values() if value) 76 | 77 | @property 78 | def labels(self) -> Set[str]: 79 | if not self.doi: 80 | return {self.label, self._label(lower_p=True)} 81 | return { 82 | self.doi, 83 | self.label, 84 | self._label(exclude_doi=True), 85 | self._label(lower_p=True), 86 | self._label(exclude_doi=True, lower_p=True), 87 | } 88 | 89 | def to_dict(self, simplified=True): 90 | """ 91 | Transform the article into some key value pairs for easy transportation. 92 | """ 93 | extra = ( 94 | { 95 | "references": self.references, 96 | "extra": self.extra, 97 | "sources": list(self.sources), 98 | } 99 | if not simplified 100 | else {} 101 | ) 102 | return { 103 | "title": self.title, 104 | "authors": self.authors, 105 | "keywords": self.keywords, 106 | "year": self.year, 107 | "journal": self.journal, 108 | "volume": self.volume, 109 | "issue": self.issue, 110 | "page": self.page, 111 | "doi": self.doi, 112 | **extra, 113 | } 114 | 115 | def merge(self, other: "Article") -> "Article": 116 | if other.label not in self.labels: 117 | logger.warning( 118 | "\n".join( 119 | [ 120 | "Mixing articles with different labels might result in tragedy", 121 | f" mine: {self.labels}", 122 | f" others: {other.label}", 123 | ] 124 | ) 125 | ) 126 | return Article( 127 | title=self.title or other.title, 128 | authors=( 129 | self.authors 130 | + [author for author in other.authors if author not in self.authors] 131 | ), 132 | year=self.year or other.year, 133 | journal=self.journal or other.journal, 134 | volume=self.volume or other.volume, 135 | issue=self.issue or other.issue, 136 | page=self.page or other.page, 137 | doi=self.doi or other.doi, 138 | sources={*self.sources, *other.sources}, 139 | extra={**self.extra, **other.extra}, 140 | references=list({*self.references, *other.references}), 141 | keywords=list({*self.keywords, *other.keywords}), 142 | ) 143 | 144 | @classmethod 145 | def from_isi_text(cls, raw: str) -> "Article": 146 | data = collections.defaultdict(list) 147 | field = None 148 | for line in raw.split("\n"): 149 | match = ISI_LINE_PATTERN.match(line) 150 | if not match: 151 | raise InvalidIsiLine(line) 152 | parsed = match.groupdict() 153 | field = parsed.get("field") or field 154 | if not field or "value" not in parsed or parsed["value"] is None: 155 | continue 156 | data[field].append(parsed["value"]) 157 | processed = parse_all(dict(data)) 158 | return cls( 159 | title=processed.get("title"), 160 | authors=processed.get("authors", []), 161 | year=processed.get("year"), 162 | journal=processed.get("source_abbreviation"), 163 | volume=processed.get("volume"), 164 | issue=processed.get("issue"), 165 | page=processed.get("beginning_page"), 166 | doi=processed.get("DOI"), 167 | references=processed.get("references"), 168 | keywords=processed.get("keywords"), 169 | extra=processed, 170 | sources={raw}, 171 | ) 172 | 173 | @classmethod 174 | def from_isi_citation(cls, reference: str) -> "Article": 175 | match = ISI_CITATION_PATTERN.match(reference) 176 | if not match: 177 | raise InvalidReference(reference) 178 | data = {key: [value] for key, value in match.groupdict().items() if value} 179 | processed = parse_all(data) 180 | return cls( 181 | title=processed.get("title"), 182 | authors=processed.get("authors", []), 183 | year=processed.get("year"), 184 | journal=processed.get("source_abbreviation"), 185 | volume=processed.get("volume"), 186 | page=processed.get("beginning_page"), 187 | doi=processed.get("DOI"), 188 | extra=processed, 189 | sources={reference}, 190 | ) 191 | -------------------------------------------------------------------------------- /wostools/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | Base collection for a shared API. 3 | """ 4 | 5 | import glob 6 | import logging 7 | from typing import Iterable, Iterator, TextIO, Tuple 8 | 9 | from wostools.article import Article 10 | from wostools.exceptions import InvalidReference, WosToolsError 11 | from wostools.sources import isi, scopus 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class BaseCollection: 17 | """ 18 | A collection of WOS text files. 19 | """ 20 | 21 | def __init__(self, *files): 22 | self._files = files 23 | for file in self._files: 24 | file.seek(0) 25 | 26 | @classmethod 27 | def from_glob(cls, pattern): 28 | """Creates a new collection from a pattern using glob. 29 | 30 | Args: 31 | pattern (str): String with the pattern to be passed to glob. 32 | 33 | Returns: 34 | BaseCollection: Collection with the articles by using the pattern. 35 | """ 36 | return cls.from_filenames(*glob.glob(pattern)) 37 | 38 | @classmethod 39 | def from_filenames(cls, *filenames): 40 | """Creates a new collection from a list of filenames. 41 | 42 | Args: 43 | filenames (str): String with the filename. 44 | 45 | Returns: 46 | BaseCollection: Collection with the articles by reading the 47 | filenames. 48 | """ 49 | files = [open(filename, encoding="utf-8-sig") for filename in filenames] 50 | return cls(*files) 51 | 52 | @property 53 | def _iter_files(self) -> Iterable[TextIO]: 54 | """Iterates over all the single article texts in the collection. 55 | 56 | Returns: 57 | generator: A generator of strings with the text articles. 58 | """ 59 | for filehandle in self._files: 60 | filehandle.seek(0) 61 | yield filehandle 62 | filehandle.seek(0) 63 | 64 | def _articles(self) -> Iterable[Article]: 65 | for file in self._iter_files: 66 | try: 67 | yield from isi.parse_file(file) 68 | except WosToolsError: 69 | yield from scopus.parse_file(file) 70 | 71 | def __iter__(self) -> Iterator[Article]: 72 | """ 73 | Should iterate over all articles known in the collection. 74 | """ 75 | for article in self._articles(): 76 | yield article 77 | for reference in article.references: 78 | try: 79 | yield Article.from_isi_citation(reference) 80 | except InvalidReference: 81 | logger.info( 82 | f"Ignoring malformed reference '{reference}' from '{article.label}'" 83 | ) 84 | 85 | def __len__(self): 86 | return sum(1 for _ in self) 87 | 88 | @property 89 | def authors(self) -> Iterable[str]: 90 | """Iterates over all article authors, including duplicates 91 | 92 | Returns: 93 | generator: A generator with the authors (one by one) of the 94 | articles in the collection. 95 | """ 96 | raise NotImplementedError("Sub classes should know how to iterate over authors") 97 | 98 | @property 99 | def coauthors(self) -> Iterable[Tuple[str, str]]: 100 | """Iterates over coauthor pairs. 101 | 102 | Returns: 103 | generator: A generator with the pair of coauthors of the articles 104 | in the collections. 105 | """ 106 | raise NotImplementedError( 107 | "Sub classes should know how to iterate over coauthors" 108 | ) 109 | 110 | @property 111 | def citation_pairs(self) -> Iterable[Tuple[Article, Article]]: 112 | """ 113 | Computes the citation pairs for the articles in the collection. 114 | 115 | Returns: 116 | genertator: A generator with the citation links: pairs of article 117 | labesl, where the firts element is the article which cites the 118 | second element. 119 | """ 120 | raise NotImplementedError( 121 | "Sub classes should know how to iterate over citation pairs" 122 | ) 123 | -------------------------------------------------------------------------------- /wostools/cached.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collection with a nice cache. 3 | """ 4 | 5 | import itertools 6 | import logging 7 | from contextlib import suppress 8 | from typing import Dict, Iterable, Iterator, Set, Tuple 9 | 10 | from wostools.article import Article 11 | from wostools.base import BaseCollection 12 | from wostools.exceptions import InvalidReference, MissingLabelFields 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class CachedCollection(BaseCollection): 18 | """ 19 | A collection of WOS text files. 20 | """ 21 | 22 | def __init__(self, *files): 23 | super().__init__(*files) 24 | self._cache_key = None 25 | self._cache: Dict[str, Article] = {} 26 | self._labels: Dict[str, Set[str]] = {} 27 | self._refs: Dict[str, str] = {} 28 | self._preheat() 29 | 30 | def _add_article(self, article: Article): 31 | existing_labels = { 32 | alias 33 | for label in article.labels 34 | for alias in self._labels.get(label, set()) 35 | } 36 | all_labels = existing_labels | article.labels 37 | existing_refs = { 38 | self._refs[label] for label in all_labels if label in self._refs 39 | } 40 | for ref in existing_refs: 41 | other = self._cache.pop(ref, None) 42 | if other is not None: 43 | article = article.merge(other) 44 | 45 | self._cache[article.label] = article 46 | for label in all_labels: 47 | self._labels[label] = all_labels 48 | self._refs[label] = article.label 49 | 50 | def _preheat(self): 51 | # Preheat our cache 52 | key = ":".join(str(id(file) for file in self._files)) 53 | if key == self._cache_key: 54 | return 55 | for article in self._articles(): 56 | with suppress(MissingLabelFields): 57 | self._add_article(article) 58 | for reference in article.references: 59 | try: 60 | self._add_article(Article.from_isi_citation(reference)) 61 | except InvalidReference: 62 | logger.info( 63 | f"Ignoring malformed reference '{reference}' from '{article.label}'" 64 | ) 65 | self._cache_key = key 66 | 67 | def __iter__(self) -> Iterator[Article]: 68 | """Iterates over all articles. 69 | 70 | Returns: 71 | generator: A generator of Articles according to the text articles. 72 | """ 73 | self._preheat() 74 | yield from self._cache.values() 75 | 76 | @property 77 | def authors(self) -> Iterable[str]: 78 | """Iterates over all article authors, including duplicates 79 | 80 | Returns: 81 | generator: A generator with the authors (one by one) of the 82 | articles in the collection. 83 | """ 84 | for article in self: 85 | yield from article.authors 86 | 87 | @property 88 | def coauthors(self) -> Iterable[Tuple[str, str]]: 89 | """Iterates over coauthor pairs. 90 | 91 | Returns: 92 | generator: A generator with the pair of coauthors of the articles 93 | in the collections. 94 | """ 95 | for article in self._articles(): 96 | yield from ( 97 | (source, target) 98 | for source, target in itertools.combinations(sorted(article.authors), 2) 99 | ) 100 | 101 | def citation_pairs(self) -> Iterable[Tuple[Article, Article]]: 102 | """Computes the citation pairs for the articles in the collection. 103 | 104 | Returns: 105 | generator: A generator with the citation links: pairs of article 106 | labels, where the firsts element is the article which cites the 107 | second element. 108 | """ 109 | for article in self: 110 | for reference in article.references: 111 | if reference in self._refs: 112 | label = self._refs[reference] 113 | yield (article, self._cache[label]) 114 | -------------------------------------------------------------------------------- /wostools/cli.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | import click 5 | 6 | from wostools import CachedCollection 7 | 8 | 9 | @click.group() 10 | def main(): 11 | """ 12 | A little cli for wos tools. 13 | """ 14 | logger = logging.getLogger("wostools") 15 | logger.setLevel(logging.ERROR) 16 | 17 | 18 | @main.command("citation-pairs") 19 | @click.argument("sources", type=click.File("r"), nargs=-1) 20 | @click.option( 21 | "--output", 22 | type=click.File("w"), 23 | show_default=True, 24 | default="-", 25 | help="File to save json output.", 26 | ) 27 | def citation_pairs(sources, output): 28 | """ 29 | Build a collection by using the sources and print the citation pairs in json 30 | format or dumps them in the `output`. 31 | """ 32 | if not len(sources) > 0: 33 | click.secho("You should give at least a file with documents.", fg="red") 34 | return 35 | 36 | collection = CachedCollection.from_filenames(*[f.name for f in sources]) 37 | pairs = [ 38 | (source.label, target.label) for source, target in collection.citation_pairs() 39 | ] 40 | 41 | json.dump(pairs, output, indent=2) 42 | 43 | 44 | @main.command("to-json") 45 | @click.argument("sources", type=click.File("r"), nargs=-1) 46 | @click.option( 47 | "--output", 48 | type=click.File("w"), 49 | show_default=True, 50 | default="-", 51 | help="File to save json output.", 52 | ) 53 | @click.option( 54 | "-m", 55 | "--more", 56 | is_flag=True, 57 | show_default=True, 58 | default=False, 59 | help="Add extra info to the output", 60 | ) 61 | def to_json(sources, output, more): 62 | """ 63 | Build a collection by using the sources and print the citation pairs in json 64 | format or dumps them in the `output`. 65 | """ 66 | if not len(sources) > 0: 67 | click.secho("You should give at least a file with documents.", fg="red") 68 | return 69 | 70 | collection = CachedCollection.from_filenames(*[f.name for f in sources]) 71 | json.dump( 72 | [article.to_dict(simplified=not more) for article in collection], 73 | output, 74 | indent=2, 75 | ) 76 | -------------------------------------------------------------------------------- /wostools/exceptions.py: -------------------------------------------------------------------------------- 1 | class WosToolsError(Exception): 2 | """ 3 | Any exception known by wostools. 4 | """ 5 | 6 | 7 | class InvalidReference(WosToolsError, ValueError): 8 | """ 9 | Raised when we try to create an article out of an invalid reference. 10 | """ 11 | 12 | def __init__(self, reference: str): 13 | super().__init__(f"{reference} does not look like an ISI citation") 14 | 15 | 16 | class InvalidScopusFile(WosToolsError, ValueError): 17 | def __init__(self): 18 | super().__init__("The file does not look like a valid bib file") 19 | 20 | 21 | class InvalidIsiLine(WosToolsError, ValueError): 22 | """ 23 | Raised when we encounter an invalid line when processing an ISI file. 24 | """ 25 | 26 | def __init__(self, line: str): 27 | super().__init__(f"'{line}' is not a valid ISI file line") 28 | 29 | 30 | class MissingLabelFields(WosToolsError, ValueError): 31 | """ 32 | Raised when we don't have any of the required fields for an ISI reference. 33 | """ 34 | 35 | def __init__(self, article, message: str = None): 36 | self.article = article 37 | super().__init__(message or "Missing required fields for label") 38 | -------------------------------------------------------------------------------- /wostools/fields.py: -------------------------------------------------------------------------------- 1 | """ 2 | The wos fields definitions. 3 | """ 4 | 5 | import collections 6 | import functools 7 | import logging 8 | from typing import Any, Dict, List, Mapping 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | IsiField = collections.namedtuple( 14 | "IsiField", ["key", "description", "parse", "aliases"] 15 | ) 16 | 17 | 18 | def joined(seq, sep=" "): 19 | return sep.join(s.strip() for s in seq) 20 | 21 | 22 | def ident(seq): 23 | return [s.strip() for s in seq] 24 | 25 | 26 | def delimited(seq, delimiter="; "): 27 | return [ 28 | word.replace(delimiter.strip(), "") 29 | for words in seq 30 | for word in words.split(delimiter) 31 | if word 32 | ] 33 | 34 | 35 | def integer(seq): 36 | if len(seq) > 1: 37 | raise ValueError(f"Expected no more than one item and got {seq}") 38 | (first,) = seq 39 | return int(first.strip()) 40 | 41 | 42 | FIELDS = { 43 | "AB": IsiField("AB", "Abstract", joined, ["abstract"]), 44 | "AF": IsiField("AF", "Author Full Names", ident, ["author_full_names"]), 45 | "AR": IsiField("AR", "Article Number", joined, ["article_number"]), 46 | "AU": IsiField("AU", "Authors", ident, ["authors"]), 47 | "BA": IsiField("BA", "Book Authors", ident, ["book_authors"]), 48 | "BE": IsiField("BE", "Editors", ident, ["editors"]), 49 | "BF": IsiField("BF", "Book Authors Full Name", ident, ["book_authors_full_name"]), 50 | "BN": IsiField( 51 | "BN", 52 | "International Standard Book Number (ISBN)", 53 | joined, 54 | ["international_standard_book_number"], 55 | ), 56 | "BP": IsiField("BP", "Beginning Page", joined, ["beginning_page"]), 57 | "BS": IsiField("BS", "Book Series Subtitle", joined, ["book_series_subtitle"]), 58 | "C1": IsiField("C1", "Author Address", ident, ["author_address"]), 59 | "CA": IsiField("CA", "Group Authors", ident, ["group_authors"]), 60 | "CL": IsiField("CL", "Conference Location", joined, ["conference_location"]), 61 | "CR": IsiField( 62 | "CR", "Cited References", ident, ["cited_references", "references", "citations"] 63 | ), 64 | "CT": IsiField( 65 | "CT", 66 | "Conference Title", 67 | functools.partial(joined, sep="\n"), 68 | ["conference_title"], 69 | ), 70 | "CY": IsiField("CY", "Conference Date", joined, ["conference_date"]), 71 | "DE": IsiField("DE", "Author Keywords", delimited, ["author_keywords"]), 72 | "DI": IsiField( 73 | "DI", 74 | "Digital Object Identifier (DOI)", 75 | joined, 76 | ["digital_object_identifier", "DOI"], 77 | ), 78 | "DT": IsiField("DT", "Document Type", joined, ["document_type"]), 79 | "D2": IsiField( 80 | "D2", 81 | "Book Digital Object Identifier (DOI)", 82 | joined, 83 | ["book_digital_object_identifier"], 84 | ), 85 | "ED": IsiField("ED", "Editors", ident, ["editors"]), 86 | "EM": IsiField("EM", "E-mail Address", ident, ["email_address"]), 87 | "EI": IsiField( 88 | "EI", 89 | "Electronic International Standard Serial Number (eISSN)", 90 | joined, 91 | ["eissn"], 92 | ), 93 | "EP": IsiField("EP", "Ending Page", joined, ["ending_page"]), 94 | "FU": IsiField( 95 | "FU", 96 | "Funding Agency and Grant Number", 97 | delimited, 98 | ["funding_agency_and_grant_number"], 99 | ), 100 | "FX": IsiField("FX", "Funding Text", joined, ["funding_text"]), 101 | "GA": IsiField( 102 | "GA", "Document Delivery Number", joined, ["document_delivery_number"] 103 | ), 104 | "GP": IsiField("GP", "Book Group Authors", ident, ["book_group_authors"]), 105 | "HO": IsiField("HO", "Conference Host", joined, ["conference_host"]), 106 | "ID": IsiField("ID", "Keywords Plus", delimited, ["keywords_plus", "keywords"]), 107 | "IS": IsiField("IS", "Issue", joined, ["issue"]), 108 | "J9": IsiField( 109 | "J9", "29-Character Source Abbreviation", joined, ["source_abbreviation"] 110 | ), 111 | "JI": IsiField( 112 | "JI", "ISO Source Abbreviation", joined, ["iso_source_abbreviation"] 113 | ), 114 | "LA": IsiField("LA", "Language", joined, ["language"]), 115 | "MA": IsiField("MA", "Meeting Abstract", joined, ["meeting_abstract"]), 116 | "NR": IsiField("NR", "Cited Reference Count", integer, ["cited_reference_count"]), 117 | "OI": IsiField( 118 | "OI", 119 | "ORCID Identifier (Open Researcher and Contributor ID)", 120 | delimited, 121 | ["orcid_identifier"], 122 | ), 123 | "P2": IsiField( 124 | "P2", "Chapter count (Book Citation Index)", integer, ["chapter_count"] 125 | ), 126 | "PA": IsiField( 127 | "PA", 128 | "Publisher Address", 129 | functools.partial(joined, sep="\n"), 130 | ["publisher_address"], 131 | ), 132 | "PD": IsiField("PD", "Publication Date", joined, ["publication_date"]), 133 | "PG": IsiField("PG", "Page Count", integer, ["page_count"]), 134 | "PI": IsiField("PI", "Publisher City", joined, ["publisher_city"]), 135 | "PM": IsiField("PM", "PubMed ID", joined, ["pubmed_id"]), 136 | "PN": IsiField("PN", "Part Number", joined, ["part_number"]), 137 | "PT": IsiField( 138 | "PT", 139 | "Publication Type (J=Journal; B=Book; S=Series; P=Patent)", 140 | joined, 141 | ["publication_type"], 142 | ), 143 | "PU": IsiField("PU", "Publisher", joined, ["publisher"]), 144 | "PY": IsiField( 145 | "PY", "Year Published", integer, ["year_published", "year", "publication_year"] 146 | ), 147 | "RI": IsiField("RI", "ResearcherID Number", delimited, ["researcherid_number"]), 148 | "RP": IsiField("RP", "Reprint Address", joined, ["reprint_address"]), 149 | "SC": IsiField("SC", "Research Areas", delimited, ["research_areas"]), 150 | "SE": IsiField("SE", "Book Series Title", joined, ["book_series_title"]), 151 | "SI": IsiField("SI", "Special Issue", joined, ["special_issue"]), 152 | "SN": IsiField( 153 | "SN", "International Standard Serial Number (ISSN)", joined, ["issn"] 154 | ), 155 | "SO": IsiField("SO", "Publication Name", joined, ["publication_name"]), 156 | "SP": IsiField( 157 | "SP", 158 | "Conference Sponsors", 159 | functools.partial(delimited, delimiter=", "), 160 | ["conference_sponsors"], 161 | ), 162 | "SU": IsiField("SU", "Supplement", joined, ["supplement"]), 163 | "TC": IsiField( 164 | "TC", 165 | "Web of Science Core Collection Times Cited Count", 166 | integer, 167 | ["wos_times_cited_count", "wos_times_cited"], 168 | ), 169 | "TI": IsiField("TI", "Document Title", joined, ["title"]), 170 | "U1": IsiField("U1", "Usage Count (Last 180 Days)", integer, ["usage_count"]), 171 | "U2": IsiField("U2", "Usage Count (Since 2013)", integer, ["usage_count"]), 172 | "UT": IsiField( 173 | "UT", "Unique Article Identifier", joined, ["unique_article_identifier"] 174 | ), 175 | "VL": IsiField("VL", "Volume", joined, ["volume"]), 176 | "WC": IsiField( 177 | "WC", "Web of Science Categories", delimited, ["web_of_science_categories"] 178 | ), 179 | "Z9": IsiField( 180 | "Z9", 181 | "Total Times Cited Count (WoS Core, BCI, and CSCD)", 182 | integer, 183 | ["total_times_cited_count", "times_cited"], 184 | ), 185 | } 186 | 187 | 188 | def parse(key: str, value: List) -> Dict: 189 | if key in {"FN", "VR", "ER"}: 190 | # This disregards headers 191 | return {} 192 | if key in FIELDS: 193 | field = FIELDS[key] 194 | parsed = field.parse(value) 195 | return {k: parsed for k in [key, *field.aliases]} 196 | logger.info(f"Found an unknown field with key {key} and value {value}") 197 | return {key: ident(value)} 198 | 199 | 200 | def parse_all(raw_dict: Dict[str, List[str]]) -> Mapping[str, Any]: 201 | """Preprocesses a dictionary, with information about WoS field tags and its 202 | value according to an article, with some parser functions that depends on 203 | the field tag. If there is no a CR field, it adds one to the output with 204 | an empty list as value. Finally, the field aliases are also appended as 205 | keys. 206 | 207 | http://wos-resources.roblib.upei.ca/WOK46/help/WOK/hft_wos.html 208 | 209 | Args: 210 | raw_dict (dict): Dictionary where the keys are WoS field tags and the 211 | values are those corresponding to that field tag. 212 | 213 | Returns: 214 | dict: A dict with the same structure of the raw_input but the values are 215 | preprocessed according to some functions that depend on the field 216 | tag. Those functions were designed based on the field tad value 217 | structure. 218 | """ 219 | processed_data = {} 220 | raw_dict.setdefault("CR", []) 221 | for key, seq in raw_dict.items(): 222 | processed_data.update(parse(key, seq)) 223 | return processed_data 224 | -------------------------------------------------------------------------------- /wostools/sources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coreofscience/python-wostools/ab38b6dd0c4bc742c9082d5e297ed7e3b53f6c12/wostools/sources/__init__.py -------------------------------------------------------------------------------- /wostools/sources/isi.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable, TextIO 2 | from wostools.article import Article 3 | 4 | 5 | def _split(file) -> Iterable[str]: 6 | parts = file.read().split("\n\n") 7 | for part in parts: 8 | if part != "ER": 9 | yield part 10 | 11 | 12 | def parse_file(file: TextIO) -> Iterable[Article]: 13 | for raw in _split(file): 14 | yield Article.from_isi_text(raw) 15 | -------------------------------------------------------------------------------- /wostools/sources/scopus.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import logging 3 | import re 4 | from typing import Dict, Iterable, List, Optional, TextIO, Tuple 5 | 6 | from wostools.article import Article 7 | from wostools.exceptions import InvalidScopusFile 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def _size(file) -> int: 14 | file.seek(0, 2) 15 | size = file.tell() 16 | file.seek(0) 17 | return size 18 | 19 | 20 | def _int_or_nothing(raw: Optional[List[str]]) -> Optional[int]: 21 | if not raw: 22 | return None 23 | try: 24 | return int(raw[0]) 25 | except TypeError: 26 | return None 27 | 28 | 29 | def _joined(raw: Optional[List[str]]) -> Optional[str]: 30 | if not raw: 31 | return None 32 | return " ".join(raw) 33 | 34 | 35 | def _find_volume_info(ref: str) -> Tuple[Dict[str, str], str]: 36 | volume_pattern = re.compile(r"(?P\d+)( \((?P.+?)\))?") 37 | page_pattern = re.compile(r"(pp?\. (?P\w+)(-[^,\s]+)?)") 38 | page = page_pattern.search(ref) 39 | last_index = 0 40 | if page: 41 | last_index = page.lastindex or 0 42 | first, *_ = ref.split(page.group()) 43 | volume = volume_pattern.search(first) 44 | else: 45 | volume = volume_pattern.search(ref) 46 | if volume: 47 | last_index = volume.lastindex or 0 48 | 49 | if not page and not volume: 50 | return {}, ref 51 | 52 | data = {} 53 | if page: 54 | data.update(page.groupdict()) 55 | if volume: 56 | data.update(volume.groupdict()) 57 | 58 | if "volume" in data and data["volume"]: 59 | data["volume"] = f"V{data['volume']}" 60 | if "page" in data and data["page"]: 61 | data["page"] = f"P{data['page']}" 62 | 63 | return data, ref[last_index:] 64 | 65 | 66 | def _find_doi(ref: str) -> Tuple[Optional[str], str]: 67 | pattern = re.compile( 68 | r"((doi.org\/)|(aps.org\/doi\/)|(DOI:?)) ?(?P[^\s,;:]{5,})", re.I 69 | ) 70 | result = re.search(pattern, ref) 71 | if result is None or "doi" not in result.groupdict(): 72 | return None, ref 73 | doi = result.groupdict()["doi"] 74 | return f"DOI {doi}", ref[result.lastindex :] 75 | 76 | 77 | def _scopus_ref_to_isi(scopusref: str) -> str: 78 | authors, year, rest = re.split(r"(\(\d{4}\))", scopusref, maxsplit=1) 79 | first_name, last_name, *_ = authors.split(", ") 80 | year = year[1:-1] 81 | journal, rest = rest.split(", ", 1) 82 | volume_info, rest = _find_volume_info(rest) 83 | doi, _ = _find_doi(scopusref) 84 | parts = { 85 | "author": f"{first_name} {last_name.replace(' ', '').replace('.', '')}", 86 | "year": year, 87 | "journal": journal.strip().replace(".", "").upper() 88 | if not journal.isspace() 89 | else None, 90 | "volume": volume_info.get("volume"), 91 | "page": volume_info.get("page"), 92 | "doi": doi, 93 | } 94 | return ", ".join(val for val in parts.values() if val is not None) 95 | 96 | 97 | def parse_references(refs: List[str]) -> List[str]: 98 | if not refs: 99 | return [] 100 | result = [] 101 | for ref in refs: 102 | try: 103 | result.append(_scopus_ref_to_isi(ref)) 104 | except (KeyError, IndexError, TypeError, ValueError): 105 | logging.debug(f"Ignoring invalid reference {ref}") 106 | return result 107 | 108 | 109 | def ris_to_dict(record: str) -> Dict[str, List[str]]: 110 | RIS_PATTERN = re.compile(r"^(((?P[A-Z0-9]{2}))[ ]{2}-[ ]{1})?(?P(.*))$") 111 | parsed = defaultdict(list) 112 | current = None 113 | 114 | for line in record.split("\n"): 115 | match = RIS_PATTERN.match(line) 116 | if not match: 117 | raise InvalidScopusFile() 118 | data = match.groupdict() 119 | key = data.get("key") 120 | value = data.get("value") 121 | if "ER" in data: 122 | break 123 | if key: 124 | if key == "N1" and value and ":" in value: 125 | label, value = value.split(":", 1) 126 | value = value.strip() 127 | current = f"{key}:{label.strip()}" 128 | else: 129 | current = data["key"] 130 | if value and current: 131 | parsed[current].append(data.get("value", "")) 132 | return dict(parsed) 133 | 134 | 135 | def parse_record(record: str) -> Article: 136 | data = ris_to_dict(record) 137 | return Article( 138 | title=_joined(data.get("TI")), 139 | authors=data.get("AU", []), 140 | year=_int_or_nothing(data.get("PY")), 141 | journal=_joined(data.get("J2")), 142 | volume=_joined(data.get("VL")), 143 | issue=_joined(data.get("IS")), 144 | page=_joined(data.get("SP")), 145 | doi=_joined(data.get("DO")), 146 | keywords=data.get("KW"), 147 | references=parse_references(data.get("N1:References", [])), 148 | sources={"scopus"}, 149 | extra=data, 150 | ) 151 | 152 | 153 | def parse_file(file: TextIO) -> Iterable[Article]: 154 | if not _size(file): 155 | return [] 156 | for item in file.read().split("\n\n"): 157 | if item.isspace(): 158 | continue 159 | article = parse_record(item.strip()) 160 | yield article 161 | --------------------------------------------------------------------------------