├── .github └── workflows │ ├── main.yml │ └── publish.yml ├── .gitignore ├── HISTORY.md ├── LICENSE ├── README.md ├── pyproject.toml ├── rispy ├── __init__.py ├── config.py ├── parser.py ├── utils.py └── writer.py └── tests ├── data ├── example_basic.ris ├── example_bom.ris ├── example_custom_list_tags.ris ├── example_empty_tag.ris ├── example_extraneous_data.ris ├── example_full.ris ├── example_full_without_whitespace.ris ├── example_full_write.ris ├── example_multi_unknown_tags.ris ├── example_multiline.ris ├── example_single_unknown_tag.ris ├── example_starting_newlines.ris ├── example_urls.ris ├── example_utf_chars.ris └── example_wos.ris ├── test_benchmark.py ├── test_parser.py ├── test_utils.py └── test_writer.py /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | tags: 9 | - '*' 10 | workflow_dispatch: 11 | 12 | jobs: 13 | 14 | test: 15 | name: test 16 | runs-on: ubuntu-latest 17 | strategy: 18 | max-parallel: 5 19 | matrix: 20 | python-version: [ "3.9", "3.10", "3.11", "3.12" , "3.13" ] 21 | steps: 22 | - uses: actions/checkout@v4 23 | - uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install uv 27 | uses: astral-sh/setup-uv@v5 28 | with: 29 | enable-cache: true 30 | cache-dependency-glob: "**/pyproject.toml" 31 | - name: Install dependencies 32 | run: uv sync --all-extras 33 | - name: Test with pytest 34 | run: uv run poe test 35 | 36 | coverage: 37 | name: test + lint + coverage 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v4 41 | - uses: actions/setup-python@v5 42 | with: 43 | python-version: "3.13" 44 | - name: Install uv 45 | uses: astral-sh/setup-uv@v5 46 | with: 47 | enable-cache: true 48 | cache-dependency-glob: "**/pyproject.toml" 49 | - name: Install dependencies 50 | run: uv sync --all-extras 51 | - name: Check linting 52 | run: uv run poe lint 53 | - name: Test with pytest 54 | run: uv run coverage run -m pytest --benchmark-skip 55 | - name: Generate coverage report 56 | run: | 57 | echo "# Coverage Report" >> $GITHUB_STEP_SUMMARY 58 | uv run coverage report --format=markdown >> $GITHUB_STEP_SUMMARY || true 59 | uv run coverage json -q # will cause pipeline failure if coverage < minimum 60 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | pypi-publish: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | id-token: write 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: actions/setup-python@v5 16 | with: 17 | python-version: "3.13" 18 | - name: Install uv 19 | uses: astral-sh/setup-uv@v5 20 | with: 21 | enable-cache: true 22 | cache-dependency-glob: "**/pyproject.toml" 23 | - name: Install dependencies 24 | run: uv sync --all-extras 25 | - name: Build wheel and tar.gz 26 | run: uv run poe build 27 | - name: Publish Package 28 | uses: pypa/gh-action-pypi-publish@release/v1 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # python/editors 2 | *.codeintel 3 | *.egg 4 | *.eggs 5 | *.egg-info/ 6 | *.mo 7 | *.nja 8 | *.py[co] 9 | .benchmarks 10 | .cache 11 | .coverage 12 | .idea 13 | .mypy_cache 14 | .ruff_cache 15 | .ropeproject 16 | .tmp 17 | /.Python 18 | /.installed.cfg 19 | /.mr.developer.cfg 20 | /.project 21 | /.pydevproject 22 | build 23 | htmlcov 24 | dist 25 | venv 26 | 27 | # created from tests 28 | export.ris 29 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | # History 2 | 3 | ## v0.10.0 (2025-05-23) 4 | 5 | * Improve performance of rispy's parser and refactor parser 6 | * Add support for Python 3.13 and remove Python 3.8 7 | * Revert strip UTF-8 BOM strip 8 | * (dev) Add benchmark for rispy 9 | * (dev) Switch from Makefile to poethepoet 10 | * (dev) increase test coverage to 99.5% 11 | * (dev) GitHub publish package to PyPI 12 | 13 | ## v0.9.0 (2024-01-17) 14 | 15 | * Improve performance to yield from file objects instead of loading into memory at once (@scott-8 #57) 16 | * Support Python 3.12 17 | * (dev) Remove black; use ruff format instead 18 | * (dev) Rewrite restructured text docs to markdown 19 | 20 | ## v0.8.1 (2023-07-17) 21 | 22 | * Update RIS exporter to optionally write list tags or delimited single tags (@scott-8 #55) 23 | 24 | ## v0.8.0 (2023-07-13) 25 | 26 | Breaking changes: 27 | 28 | * Update minimum python version from 3.6 to 3.8 29 | * Improve URL parsing to be more robust and consistent with the spec; saved as a plural "urls" dictionary key instead of the singular "url" (@scott-8/shapiromatron #52) 30 | * Throw a `rispy.paser.ParseError` instead of a IOError for invalid parsing (@shapiromatron #54) 31 | 32 | Additional updates: 33 | 34 | * Write RIS unknown tags (@simon-20 #50) 35 | 36 | Tooling updates: 37 | 38 | * Support and test python 3.8 through 3.11 39 | * Update black 40 | * Switch to ruff from flake8 + isort 41 | * Switch to flit 42 | * Add basic coverage reports to github actions 43 | 44 | ## v0.7.1 (2021-06-01) 45 | 46 | * README.rst formatting fixes 47 | 48 | ## v0.7.0 (2021-06-01) 49 | 50 | New features: 51 | 52 | * Allow for subclassing of readers and writers for custom implementations and greater flexibility; these custom classes can be used in all high-level commands (load/loads/dump/dumps) (@scott-8 #36) 53 | * Add encoding param to rispy.load if custom file encoding is needed (@scott-8 #36) 54 | * Add convenience method to pretty-print reference type (@scott-8 #37) 55 | * Updated setup.py and build tooling to use setup.cfg; use wheel for testing in github actions (@KOLANICH #34) 56 | * Relicense to MIT (@shapiromatron #43) 57 | * Support python versions 3.6, 3.7, 3.8, and 3.9 (@shapiromatron #44) 58 | * Changed primary branch from `master` to `main` 59 | 60 | ## v0.6.0 (2020-11-04) 61 | 62 | New features: 63 | 64 | * Add new optional `strict=True` parameter to rispy.load/loads to allow parsing of RIS files with comments or additional metadata which aren't allowed/forbidden in spec (@ShreyRavi) 65 | * Allow pathlib.Path objects in rispy.load in addition to file objects 66 | * Enable multiple python environments in github test matrix (python 3.6, 3.7, and 3.8) 67 | 68 | ## v0.5.1 (2020-09-29) 69 | 70 | New features: 71 | 72 | * Strip BOM before processing records 73 | * Accept ER tag without trailing whitespace 74 | 75 | ## v0.5 (2020-02-21) 76 | 77 | New features: 78 | 79 | * Rename the package from `RISpy` to `rispy` (PEP8 https://www.python.org/dev/peps/pep-0008/#package-and-module-names) 80 | * Added the ability to write RIS files (via `dump`) in addition to read (@J535D165) 81 | * Code formatting rules via black and flake8 82 | * All methods by default return an evaluated list of references, not a generator (to be consistent w/ load/dump behavior) 83 | * Github actions - code formatting check and unit-tests 84 | 85 | Breaking changes: 86 | 87 | * Rename package from `RISparser` to `rispy` 88 | * Revise API for reading RIS files to mirror python APIs (like `json`, `pickle`) 89 | * `SE` RIS key mapped to `section` instead of `version` (per [wikipedia](https://en.wikipedia.org/wiki/RIS_(file_format))) 90 | * `NV` RIS key mapped to `number_of_volumes` instead of `number_of_Volumes` 91 | * `N2` RIS key mapped to `notes_abstract` instead of `abstract` 92 | * Python ≥ 3.6 required 93 | 94 | ## v0.4.3 (2018-04-10) 95 | 96 | * Allow for blank lines at beginning of input file [fixes #3] 97 | 98 | ## v0.4.2 (2017-05-29) 99 | 100 | * parser saves unknown tags into an `unknown_tag` key in dict 101 | * python2/3 compatible 102 | * Notes (N1) is now a ListType 103 | * Documented testing with pytest 104 | * Remove unused dependency peppercorn 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 rispy authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rispy - an RIS file parser/writer for Python 2 | 3 | [![PyPI Version](https://badge.fury.io/py/rispy.svg)](https://pypi.org/project/rispy/) 4 | [![PyPI Downloads](https://img.shields.io/pypi/dm/rispy)](https://pypistats.org/packages/rispy) 5 | 6 | A Python reader/writer of [RIS](https://en.wikipedia.org/wiki/RIS_(file_format)) reference files. 7 | 8 | *Pronunciation* - `rispee` - like "crispy", but without the c. 9 | 10 | ## Usage 11 | 12 | Parsing: 13 | 14 | ```python 15 | >>> import rispy 16 | >>> filepath = 'tests/data/example_full.ris' 17 | >>> with open(filepath, 'r') as bibliography_file: 18 | ... entries = rispy.load(bibliography_file) 19 | ... for entry in entries: 20 | ... print(entry['id']) 21 | ... print(entry['first_authors']) 22 | 12345 23 | ['Marx, Karl', 'Lindgren, Astrid'] 24 | 12345 25 | ['Marxus, Karlus', 'Lindgren, Astrid'] 26 | 27 | ``` 28 | 29 | A file path can also be used to read RIS files. If an encoding is not specified in ``load``, the default system encoding 30 | will be used. 31 | 32 | ```python 33 | >>> from pathlib import Path 34 | >>> import rispy 35 | >>> p = Path('tests', 'data', 'example_utf_chars.ris') 36 | >>> entries = rispy.load(p, encoding='utf-8-sig') 37 | >>> for entry in entries: 38 | ... print(entry['authors'][0]) 39 | Dobrokhotova, Yu E. 40 | 41 | ``` 42 | 43 | Writing: 44 | 45 | ```python 46 | >>> import rispy 47 | >>> entries = [ 48 | ... {'type_of_reference': 'JOUR', 49 | ... 'id': '42', 50 | ... 'primary_title': 'The title of the reference', 51 | ... 'first_authors': ['Marxus, Karlus', 'Lindgren, Astrid'] 52 | ... },{ 53 | ... 'type_of_reference': 'JOUR', 54 | ... 'id': '43', 55 | ... 'primary_title': 'Reference 43', 56 | ... 'abstract': 'Lorem ipsum' 57 | ... }] 58 | >>> filepath = 'export.ris' 59 | >>> with open(filepath, 'w') as bibliography_file: 60 | ... rispy.dump(entries, bibliography_file) 61 | 62 | ``` 63 | 64 | ## Example RIS entry 65 | 66 | ```text 67 | 1. 68 | TY - JOUR 69 | ID - 12345 70 | T1 - Title of reference 71 | A1 - Marx, Karl 72 | A1 - Lindgren, Astrid 73 | A2 - Glattauer, Daniel 74 | Y1 - 2014// 75 | N2 - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 76 | KW - Pippi 77 | KW - Nordwind 78 | KW - Piraten 79 | JF - Lorem 80 | JA - lorem 81 | VL - 9 82 | IS - 3 83 | SP - e0815 84 | CY - United States 85 | PB - Fun Factory 86 | PB - Fun Factory USA 87 | SN - 1932-6208 88 | M1 - 1008150341 89 | L2 - http://example.com 90 | ER - 91 | ``` 92 | 93 | ## TAG_KEY_MAPPING 94 | 95 | Most fields contain string values, but some like first_authors (A1) are parsed into lists. The default mapping is 96 | created from specifications scattered around the web, but to our knowledge there is not one single source of RIS truth, 97 | so these may need to be modified for specific export systems: 98 | 99 | - [Wikipedia](https://en.wikipedia.org/wiki/RIS_(file_format)) 100 | - [ResearcherId](https://web.archive.org/web/20170707033254/http://www.researcherid.com/resources/html/help_upload.htm) 101 | - [Refman](https://web.archive.org/web/20110930172154/http://www.refman.com/support/risformat_intro.asp) 102 | - [Refman (RIS format)](https://web.archive.org/web/20110930172154/http://www.refman.com/support/risformat_intro.asp) 103 | - [Zotero](https://github.com/zotero/translators/blob/master/RIS.js) 104 | 105 | ### Complete list of ListType tags 106 | 107 | ```python 108 | >>> from rispy import LIST_TYPE_TAGS 109 | >>> print(LIST_TYPE_TAGS) 110 | ['A1', 'A2', 'A3', 'A4', 'AU', 'KW', 'N1', 'UR'] 111 | 112 | ``` 113 | 114 | ### Complete default mapping 115 | 116 | ```python 117 | >>> from rispy import TAG_KEY_MAPPING 118 | >>> from pprint import pprint 119 | >>> pprint(TAG_KEY_MAPPING) 120 | {'A1': 'first_authors', 121 | 'A2': 'secondary_authors', 122 | 'A3': 'tertiary_authors', 123 | 'A4': 'subsidiary_authors', 124 | 'AB': 'abstract', 125 | 'AD': 'author_address', 126 | 'AN': 'accession_number', 127 | 'AU': 'authors', 128 | 'C1': 'custom1', 129 | 'C2': 'custom2', 130 | 'C3': 'custom3', 131 | 'C4': 'custom4', 132 | 'C5': 'custom5', 133 | 'C6': 'custom6', 134 | 'C7': 'custom7', 135 | 'C8': 'custom8', 136 | 'CA': 'caption', 137 | 'CN': 'call_number', 138 | 'CY': 'place_published', 139 | 'DA': 'date', 140 | 'DB': 'name_of_database', 141 | 'DO': 'doi', 142 | 'DP': 'database_provider', 143 | 'EP': 'end_page', 144 | 'ER': 'end_of_reference', 145 | 'ET': 'edition', 146 | 'ID': 'id', 147 | 'IS': 'number', 148 | 'J2': 'alternate_title1', 149 | 'JA': 'alternate_title2', 150 | 'JF': 'alternate_title3', 151 | 'JO': 'journal_name', 152 | 'KW': 'keywords', 153 | 'L1': 'file_attachments1', 154 | 'L2': 'file_attachments2', 155 | 'L4': 'figure', 156 | 'LA': 'language', 157 | 'LB': 'label', 158 | 'M1': 'note', 159 | 'M3': 'type_of_work', 160 | 'N1': 'notes', 161 | 'N2': 'notes_abstract', 162 | 'NV': 'number_of_volumes', 163 | 'OP': 'original_publication', 164 | 'PB': 'publisher', 165 | 'PY': 'year', 166 | 'RI': 'reviewed_item', 167 | 'RN': 'research_notes', 168 | 'RP': 'reprint_edition', 169 | 'SE': 'section', 170 | 'SN': 'issn', 171 | 'SP': 'start_page', 172 | 'ST': 'short_title', 173 | 'T1': 'primary_title', 174 | 'T2': 'secondary_title', 175 | 'T3': 'tertiary_title', 176 | 'TA': 'translated_author', 177 | 'TI': 'title', 178 | 'TT': 'translated_title', 179 | 'TY': 'type_of_reference', 180 | 'UK': 'unknown_tag', 181 | 'UR': 'urls', 182 | 'VL': 'volume', 183 | 'Y1': 'publication_year', 184 | 'Y2': 'access_date'} 185 | 186 | ``` 187 | 188 | ### Override key mapping 189 | 190 | The parser uses a `TAG_KEY_MAPPING`, which one can override by calling `rispy.load()` with the `mapping` parameter. 191 | 192 | ```python 193 | >>> from copy import deepcopy 194 | >>> import rispy 195 | >>> from pprint import pprint 196 | 197 | >>> filepath = 'tests/data/example_full.ris' 198 | >>> mapping = deepcopy(rispy.TAG_KEY_MAPPING) 199 | >>> mapping["SP"] = "pages_this_is_my_fun" 200 | >>> with open(filepath, 'r') as bibliography_file: 201 | ... entries = rispy.load(bibliography_file, mapping=mapping) 202 | ... pprint(sorted(entries[0].keys())) 203 | ['alternate_title2', 204 | 'alternate_title3', 205 | 'file_attachments2', 206 | 'first_authors', 207 | 'id', 208 | 'issn', 209 | 'keywords', 210 | 'note', 211 | 'notes_abstract', 212 | 'number', 213 | 'pages_this_is_my_fun', 214 | 'place_published', 215 | 'primary_title', 216 | 'publication_year', 217 | 'publisher', 218 | 'secondary_authors', 219 | 'type_of_reference', 220 | 'urls', 221 | 'volume'] 222 | 223 | ``` 224 | 225 | List tags can be customized in the same way, by passing a list to the `list_tags` parameter. 226 | 227 | ### Changing rispy behavior 228 | 229 | There are a few flags that can be passed to `rispy.load()` and `rispy.dump()` that change how `rispy` deals with tags. 230 | For example, setting `skip_unknown_tags` to `True` will cause `rispy` do not read or write tags not in the tag map. More 231 | can be found in the docstrings for each class. If more customization is necessary, a custom implementation can be 232 | created (see next section). 233 | 234 | ## Using custom implementations 235 | 236 | Not all RIS files follow the same formatting guidelines. There is an interface for creating custom implementations for 237 | reading and writing such files. An implementation contains the methods and parameters used to work with RIS files, and 238 | should be passed to `rispy.load()` or `rispy.dump()`. 239 | 240 | ### Customizing implementations 241 | 242 | Creating a custom implementation involves creating a class that inherits a base class, and overriding the necessary 243 | variables and methods. One of the existing parsers can also be inherited. Inheriting an existing class is advantageous 244 | if only minor changes need to be made. The sections below document what is available to be overridden, along with a few 245 | examples. 246 | 247 | #### Parsing 248 | 249 | Custom parsers can inherit `RisParser` (the default parser). Various parameters and methods can be overridden when creating a new parser. 250 | 251 | Examples: 252 | 253 | ```python 254 | class WokParser(RisParser): 255 | """Subclass of Base for reading Wok RIS files.""" 256 | 257 | START_TAG = "PT" 258 | IGNORE = ["FN", "VR", "EF"] 259 | PATTERN = r"^[A-Z][A-Z0-9] |^ER\s?|^EF\s?" 260 | DEFAULT_MAPPING = WOK_TAG_KEY_MAPPING 261 | DEFAULT_LIST_TAGS = WOK_LIST_TYPE_TAGS 262 | 263 | def get_content(self, line): 264 | return line[2:].strip() 265 | 266 | def is_header(self, line): 267 | return True 268 | 269 | ``` 270 | 271 | ### Writing 272 | 273 | Writing is very similar to parsing. A custom writer class can inherit `BaseWriter` or one if its subclasses, such as 274 | `RisWriter`. 275 | 276 | Examples: 277 | 278 | ```python 279 | class RisWriter(BaseWriter): 280 | """Subclass of BaseWriter for writing RIS files.""" 281 | 282 | START_TAG = "TY" 283 | PATTERN = "{tag} - {value}" 284 | DEFAULT_MAPPING = TAG_KEY_MAPPING 285 | DEFAULT_LIST_TAGS = LIST_TYPE_TAGS 286 | 287 | def set_header(self, count): 288 | return "{i}.".format(i=count) 289 | 290 | ``` 291 | 292 | ## Other functionality 293 | 294 | Other various utilities included in `rispy` are documented below. 295 | 296 | ### Reference type conversion 297 | 298 | A method is available to convert common RIS reference types into more readable terms. It takes a list of references and 299 | returns a copy of that list with modified reference types. The map for this conversion is located in ``config.py``. 300 | 301 | ```python 302 | >>> from rispy.utils import convert_reference_types 303 | >>> refs = [{"type_of_reference": "JOUR"}] 304 | >>> print(convert_reference_types(refs)) 305 | [{'type_of_reference': 'Journal'}] 306 | 307 | ``` 308 | 309 | ## Software for other RIS-like formats 310 | 311 | Some RIS-like formats contain rich citation data, for example lists and nested attributes, that `rispy` does not 312 | support. Software specializing in these formats includes: 313 | 314 | * [nbib](https://pypi.org/project/nbib/) - parses the "PubMed" or "MEDLINE" format 315 | 316 | ## Developer instructions 317 | 318 | Install [uv](https://docs.astral.sh/uv/) and make it available and on your path. Then: 319 | 320 | ```bash 321 | # setup environment 322 | uv venv --python=3.13 323 | source .venv/bin/activate # On Windows: .venv\Scripts\activate 324 | uv pip install -e ".[dev]" 325 | 326 | # list available tasks 327 | poe 328 | 329 | # check if code format changes are required 330 | poe lint 331 | 332 | # reformat code 333 | poe format 334 | 335 | # run tests 336 | poe test 337 | 338 | # run benchmark tests 339 | poe bench 340 | ``` 341 | 342 | If you'd prefer not to use `uv`, that's fine too; this is a standard Python package so feel free to use your 343 | preferred workflow. 344 | 345 | Github Actions are currently enabled to run `lint` and `test` when submitting a pull-request. 346 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "rispy" 3 | license = { file = "LICENSE" } 4 | authors = [ 5 | { name = "Maik Derstappen", email = "md@derico.de" } 6 | ] 7 | maintainers = [ 8 | { name = "Andy Shapiro", email = "shapiromatron@gmail.com" } 9 | ] 10 | readme = "README.md" 11 | dynamic = ["version", "description"] 12 | keywords = ["RIS", "parser", "bibliograph"] 13 | classifiers = [ 14 | "Intended Audience :: Developers", 15 | "License :: OSI Approved :: MIT License", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Programming Language :: Python :: 3.13", 22 | ] 23 | requires-python = ">=3.9" 24 | 25 | [project.urls] 26 | "Source" = "https://github.com/mrtango/rispy" 27 | "Changes" = "https://github.com/MrTango/rispy/blob/main/HISTORY.md" 28 | "Issue Tracker" = "https://github.com/MrTango/rispy/issues" 29 | "Download" = "https://pypi.org/project/rispy/" 30 | 31 | [project.optional-dependencies] 32 | dev = [ 33 | "poethepoet ~= 0.34.0", 34 | "pytest ~=8.3.5", 35 | "pytest-benchmark ~= 5.1.0", 36 | "flit ~= 3.12.0", 37 | "ruff ~= 0.11.10", 38 | "coverage ~= 7.8.0", 39 | ] 40 | 41 | [build-system] 42 | requires = ["flit_core >=3.2,<4"] 43 | build-backend = "flit_core.buildapi" 44 | 45 | [tool.coverage.run] 46 | omit = [ 47 | "tests/*", 48 | ] 49 | 50 | [tool.coverage.report] 51 | fail_under=99.5 52 | precision=1 53 | exclude_also = [ 54 | "@abstractmethod", 55 | ] 56 | 57 | [tool.flit.sdist] 58 | exclude = [".github", "tests"] 59 | 60 | [tool.ruff] 61 | line-length = 100 62 | target-version = "py39" 63 | 64 | [tool.ruff.lint] 65 | select = ["F", "E", "W", "I", "UP", "S", "B", "T20", "RUF"] 66 | 67 | [tool.ruff.lint.per-file-ignores] 68 | "test_*.py" = ["S101"] 69 | 70 | [tool.pytest.ini_options] 71 | addopts = "--doctest-glob='*.md'" 72 | 73 | [tool.poe.tasks.lint] 74 | help = "Check for formatting issues" 75 | sequence = [ 76 | {cmd = "ruff format . --check"}, 77 | {cmd = "ruff check ."}, 78 | ] 79 | 80 | [tool.poe.tasks.format] 81 | help = "Fix formatting issues (where possible)" 82 | sequence = [ 83 | {cmd = "ruff format ."}, 84 | {cmd = "ruff check . --fix --show-fixes"}, 85 | ] 86 | 87 | [tool.poe.tasks.test] 88 | help = "Run tests" 89 | cmd = "pytest --benchmark-skip" 90 | 91 | [tool.poe.tasks.bench] 92 | help = "Run benchmark tests" 93 | cmd = "pytest --benchmark-only" 94 | 95 | [tool.poe.tasks.coverage] 96 | help = "Generate test coverage report" 97 | sequence = [ 98 | {cmd = "coverage run -m pytest --benchmark-skip"}, 99 | {cmd = "coverage html"}, 100 | ] 101 | 102 | [tool.poe.tasks.build] 103 | help = "Build wheel package" 104 | cmd = "uv build" 105 | -------------------------------------------------------------------------------- /rispy/__init__.py: -------------------------------------------------------------------------------- 1 | """A Python reader/writer of RIS reference files""" 2 | 3 | from .config import LIST_TYPE_TAGS, TAG_KEY_MAPPING, TYPE_OF_REFERENCE_MAPPING 4 | from .parser import RisParser, WokParser, load, loads 5 | from .writer import BaseWriter, RisWriter, dump, dumps 6 | 7 | __version__ = "0.10.0" 8 | 9 | __all__ = [ 10 | "LIST_TYPE_TAGS", 11 | "TAG_KEY_MAPPING", 12 | "TYPE_OF_REFERENCE_MAPPING", 13 | "BaseWriter", 14 | "RisParser", 15 | "RisWriter", 16 | "WokParser", 17 | "__version__", 18 | "dump", 19 | "dumps", 20 | "load", 21 | "loads", 22 | ] 23 | -------------------------------------------------------------------------------- /rispy/config.py: -------------------------------------------------------------------------------- 1 | """Define default mappings.""" 2 | 3 | LIST_TYPE_TAGS = [ 4 | "A1", 5 | "A2", 6 | "A3", 7 | "A4", 8 | "AU", 9 | "KW", 10 | "N1", 11 | "UR", 12 | ] 13 | 14 | DELIMITED_TAG_MAPPING = { 15 | "UR": ";", 16 | } 17 | 18 | TAG_KEY_MAPPING = { 19 | "TY": "type_of_reference", 20 | "A1": "first_authors", # ListType 21 | "A2": "secondary_authors", # ListType 22 | "A3": "tertiary_authors", # ListType 23 | "A4": "subsidiary_authors", # ListType 24 | "AB": "abstract", 25 | "AD": "author_address", 26 | "AN": "accession_number", 27 | "AU": "authors", # ListType 28 | "C1": "custom1", 29 | "C2": "custom2", 30 | "C3": "custom3", 31 | "C4": "custom4", 32 | "C5": "custom5", 33 | "C6": "custom6", 34 | "C7": "custom7", 35 | "C8": "custom8", 36 | "CA": "caption", 37 | "CN": "call_number", 38 | "CY": "place_published", 39 | "DA": "date", 40 | "DB": "name_of_database", 41 | "DO": "doi", 42 | "DP": "database_provider", 43 | "ET": "edition", 44 | "EP": "end_page", 45 | "ID": "id", 46 | "IS": "number", 47 | "J2": "alternate_title1", 48 | "JA": "alternate_title2", 49 | "JF": "alternate_title3", 50 | "JO": "journal_name", 51 | "KW": "keywords", # ListType 52 | "L1": "file_attachments1", 53 | "L2": "file_attachments2", 54 | "L4": "figure", 55 | "LA": "language", 56 | "LB": "label", 57 | "M1": "note", 58 | "M3": "type_of_work", 59 | "N1": "notes", # ListType 60 | "N2": "notes_abstract", 61 | "NV": "number_of_volumes", 62 | "OP": "original_publication", 63 | "PB": "publisher", 64 | "PY": "year", 65 | "RI": "reviewed_item", 66 | "RN": "research_notes", 67 | "RP": "reprint_edition", 68 | "SE": "section", 69 | "SN": "issn", 70 | "SP": "start_page", 71 | "ST": "short_title", 72 | "T1": "primary_title", 73 | "T2": "secondary_title", 74 | "T3": "tertiary_title", 75 | "TA": "translated_author", 76 | "TI": "title", 77 | "TT": "translated_title", 78 | "UR": "urls", # ListType 79 | "VL": "volume", 80 | "Y1": "publication_year", 81 | "Y2": "access_date", 82 | "ER": "end_of_reference", 83 | "UK": "unknown_tag", 84 | } 85 | 86 | TYPE_OF_REFERENCE_MAPPING = { 87 | "ABST": "Abstract", 88 | "ADVS": "Audiovisual material", 89 | "AGGR": "Aggregated Database", 90 | "ANCIENT": "Ancient Text", 91 | "ART": "Art Work", 92 | "BILL": "Bill", 93 | "BLOG": "Blog", 94 | "BOOK": "Whole book", 95 | "CASE": "Case", 96 | "CHAP": "Book chapter", 97 | "CHART": "Chart", 98 | "CLSWK": "Classical Work", 99 | "COMP": "Computer program", 100 | "CONF": "Conference proceeding", 101 | "CPAPER": "Conference paper", 102 | "CTLG": "Catalog", 103 | "DATA": "Data file", 104 | "DBASE": "Online Database", 105 | "DICT": "Dictionary", 106 | "EBOOK": "Electronic Book", 107 | "ECHAP": "Electronic Book Section", 108 | "EDBOOK": "Edited Book", 109 | "EJOUR": "Electronic Article", 110 | "ELEC": "Web Page", 111 | "ENCYC": "Encyclopedia", 112 | "EQUA": "Equation", 113 | "FIGURE": "Figure", 114 | "GEN": "Generic", 115 | "GOVDOC": "Government Document", 116 | "GRANT": "Grant", 117 | "HEAR": "Hearing", 118 | "ICOMM": "Internet Communication", 119 | "INPR": "In Press", 120 | "JFULL": "Journal (full)", 121 | "JOUR": "Journal", 122 | "LEGAL": "Legal Rule or Regulation", 123 | "MANSCPT": "Manuscript", 124 | "MAP": "Map", 125 | "MGZN": "Magazine article", 126 | "MPCT": "Motion picture", 127 | "MULTI": "Online Multimedia", 128 | "MUSIC": "Music score", 129 | "NEWS": "Newspaper", 130 | "PAMP": "Pamphlet", 131 | "PAT": "Patent", 132 | "PCOMM": "Personal communication", 133 | "RPRT": "Report", 134 | "SER": "Serial publication", 135 | "SLIDE": "Slide", 136 | "SOUND": "Sound recording", 137 | "STAND": "Standard", 138 | "STAT": "Statute", 139 | "THES": "Thesis/Dissertation", 140 | "UNPB": "Unpublished work", 141 | "VIDEO": "Video recording", 142 | } 143 | 144 | WOK_LIST_TYPE_TAGS = [ 145 | "RI", 146 | "CR", 147 | "AF", 148 | "BA", 149 | "BF", 150 | "AU", 151 | "CA", 152 | "GP", 153 | ] 154 | 155 | WOK_TAG_KEY_MAPPING = { 156 | "FN": "file_name", 157 | "VR": "version_number", 158 | "PT": "publication_type", 159 | "AU": "authors", # ListType 160 | "AF": "author_full_name", 161 | "BA": "book_authors", 162 | "BF": "book_authors_full_name", 163 | "CA": "group_authors", # ListType 164 | "GP": "book_group_authors", # ListType 165 | "BE": "editors", # ListType 166 | "TI": "document_title", 167 | "SO": "publication_name", 168 | "SE": "book_series_title", 169 | "BS": "book_series_subtitle", 170 | "LA": "language", 171 | "DT": "document_type", 172 | "CT": "conference_title", 173 | "CY": "conference_date", 174 | "CL": "conference_location", 175 | "SP": "conference_sponsors", 176 | "HO": "conference_host", 177 | "DE": "author_keywords", 178 | "ID": "keywords_plus", 179 | "AB": "abstract", 180 | "C1": "author_address", 181 | "RP": "reprint_address", 182 | "EM": "email_address", 183 | "RI": "researcher_id", 184 | "OI": "orcid_id", 185 | "FU": "funding_agency_and_grant_number", 186 | "FX": "funding_text", 187 | "CR": "cited_references", # ListType 188 | "NR": "cited_reference_count", 189 | "TC": "wos_core_collection_cited_count", 190 | "Z9": "total_times_cited_count", 191 | "U1": "usage_count_180", 192 | "U2": "usage_count_2013", 193 | "PU": "publisher", 194 | "PI": "publisher_city", 195 | "PA": "publisher_address", 196 | "SN": "issn", 197 | "EI": "eissn", 198 | "BN": "isbn", 199 | "J9": "source_abbreviation_29c", 200 | "JI": "iso_source_abbreviation", 201 | "PD": "publication_date", 202 | "PY": "publication_year", 203 | "VL": "volume", 204 | "IS": "issue", 205 | "SI": "special_issue", 206 | "PN": "part_number", 207 | "SU": "supplement", 208 | "MA": "meeting_abstract", 209 | "BP": "beginning_page", 210 | "EP": "ending_page", 211 | "AR": "article_number", 212 | "DI": "doi", 213 | "D2": "book_doi", 214 | "EA": "early_access_date", 215 | "EY": "early_access_year", 216 | "PG": "page_count", 217 | "P2": "chapter_count", 218 | "WC": "wos_categories", # ListType 219 | "SC": "research_areas", # ListType 220 | "GA": "document_delivery_number", 221 | "PM": "pubmed_id", 222 | "UT": "accession_number", 223 | "OA": "open_access_indicator", 224 | "HP": "esi_hot_paper", 225 | "HC": "esi_highly_cited_paper", 226 | "DA": "date_generated", 227 | "ER": "end_of_record", 228 | "EF": "end_of_file", 229 | } 230 | -------------------------------------------------------------------------------- /rispy/parser.py: -------------------------------------------------------------------------------- 1 | """RIS Parser.""" 2 | 3 | from collections import defaultdict 4 | from pathlib import Path 5 | from typing import ClassVar, Optional, TextIO, Union 6 | 7 | from .config import ( 8 | DELIMITED_TAG_MAPPING, 9 | LIST_TYPE_TAGS, 10 | TAG_KEY_MAPPING, 11 | WOK_LIST_TYPE_TAGS, 12 | WOK_TAG_KEY_MAPPING, 13 | ) 14 | 15 | __all__ = ["RisParser", "WokParser", "load", "loads"] 16 | 17 | 18 | class NextLine(Exception): 19 | pass 20 | 21 | 22 | class ParseError(Exception): 23 | pass 24 | 25 | 26 | class RisParser: 27 | """RIS parser class 28 | 29 | When creating a new implementation class, some variables and classes need 30 | to be overridden. This docstring documents how to override these 31 | parameters when creating a subclass. 32 | 33 | Class variables: 34 | START_TAG (str): Start tag, required. 35 | END_TAG (str): End tag. Defaults to 'ER'. 36 | PATTERN (str): String containing a regex pattern. This pattern 37 | determines if a line has a valid tag. Required. 38 | DEFAULT_IGNORE (list, optional): Default list of tags to ignore. 39 | DEFAULT_MAPPING (dict): A default mapping for the custom parser. 40 | Required. 41 | DEFAULT_LIST_TAGS (list): A list of tags that should be read as lists. 42 | Required. 43 | 44 | """ 45 | 46 | START_TAG: str = "TY" 47 | END_TAG: str = "ER" 48 | UNKNOWN_TAG: str = "UK" 49 | PATTERN: str 50 | DEFAULT_IGNORE: ClassVar[list[str]] = [] 51 | DEFAULT_MAPPING: dict = TAG_KEY_MAPPING 52 | DEFAULT_LIST_TAGS: list[str] = LIST_TYPE_TAGS 53 | DEFAULT_DELIMITER_MAPPING: dict = DELIMITED_TAG_MAPPING 54 | DEFAULT_NEWLINE: ClassVar[str] = "\n" 55 | 56 | def __init__( 57 | self, 58 | *, 59 | mapping: Optional[dict] = None, 60 | list_tags: Optional[list[str]] = None, 61 | delimiter_tags_mapping: Optional[dict] = None, 62 | ignore: Optional[list[str]] = None, 63 | skip_unknown_tags: bool = False, 64 | enforce_list_tags: bool = True, 65 | newline: Optional[str] = None, 66 | ): 67 | """Initialize the parser function. 68 | 69 | Args: 70 | mapping (dict, optional): Map tags to tag names. 71 | list_tags (list, optional): List of list-type tags. 72 | delimiter_tags_mapping (dict, optional): Map of delimiters to tags. 73 | ignore (list, optional): List of tags to ignore. 74 | skip_unknown_tags (bool, optional): Bool to skip tags that are not in 75 | `TAG_KEY_MAPPING`. If unknown tags 76 | are not skipped, they will be added 77 | to the `unknown_tag` key. 78 | Defaults to `False`. 79 | enforce_list_tags (bool, optional): Bool for choosing whether to 80 | strictly enforce list type tags. 81 | If this is `False`, tags that 82 | occur multiple times in a reference 83 | will be converted to a list instead 84 | of being overridden. Values set to 85 | be list tags will still be read as 86 | list tags. Defaults to `True`. 87 | newline (str, optional): Line separator. 88 | 89 | """ 90 | self.mapping = mapping if mapping is not None else self.DEFAULT_MAPPING 91 | self.list_tags = list_tags if list_tags is not None else self.DEFAULT_LIST_TAGS 92 | self.delimiter_map = ( 93 | delimiter_tags_mapping 94 | if delimiter_tags_mapping is not None 95 | else self.DEFAULT_DELIMITER_MAPPING 96 | ) 97 | self.ignore = ignore if ignore is not None else self.DEFAULT_IGNORE 98 | self.skip_unknown_tags = skip_unknown_tags 99 | self.enforce_list_tags = enforce_list_tags 100 | self.newline = newline if newline is not None else self.DEFAULT_NEWLINE 101 | 102 | def _iter_till_start(self, lines) -> dict: 103 | while True: 104 | line = next(lines) 105 | if line.startswith(self.START_TAG): 106 | return {self.mapping[self.START_TAG]: self.parse_line(line)[1]} 107 | 108 | def parse(self, text: str) -> list[dict]: 109 | """Parse RIS string.""" 110 | line_gen = (line for line in text.split(self.newline)) 111 | return self.parse_lines(line_gen) 112 | 113 | def parse_lines(self, lines: Union[TextIO, list[str]]) -> list[dict]: 114 | """Parse RIS file line by line.""" 115 | 116 | result = [] 117 | last_tag = None 118 | 119 | try: 120 | record = self._iter_till_start(lines) 121 | 122 | while True: 123 | tag, content = self.parse_line(next(lines)) 124 | 125 | if tag is None: 126 | self._add_tag(record, last_tag, content, extend_multiline=True) 127 | continue 128 | 129 | if tag in self.ignore: 130 | continue 131 | 132 | if tag == self.END_TAG: 133 | result.append(record) 134 | 135 | record = self._iter_till_start(lines) 136 | continue 137 | 138 | self._add_tag(record, tag, content) 139 | last_tag = tag 140 | 141 | except StopIteration: 142 | return result 143 | 144 | def parse_line(self, line: str) -> Union[tuple[str, str], tuple[None, str]]: 145 | """Parse line of RIS file. 146 | 147 | This method parses a line between the start and end tag. 148 | It returns the tag and the content of the line. Typically, 149 | the first 2 characters are the tag, followed by a seperator, 150 | and the rest of the line is the content. 151 | 152 | Custom parsers can override this method to change the way 153 | lines are parsed. For example, a very basic RIS parser would 154 | return the first 2 characters as the tag and the rest of the 155 | line as the content of the tag. `(line[0:2], line[6:].strip())` 156 | 157 | Parameters 158 | ---------- 159 | line : str 160 | Line of RIS file between start and end tag. 161 | 162 | Returns 163 | ------- 164 | tuple 165 | Tuple containing the tag and the content of the tag. 166 | """ 167 | if line[2:5] == " -" and line[:2].isupper() and line[0:1].isalpha(): 168 | return (line[0:2], line[6:].strip()) 169 | else: 170 | return (None, line.strip()) 171 | 172 | def _add_single_value( 173 | self, record: dict, name: str, value: Union[str, list[str]], is_multi: bool = False 174 | ) -> None: 175 | """Process a single line. 176 | 177 | This method is only run on tags where repeated tags are not expected. 178 | The output for a tag can be a list when a delimiter is specified, 179 | even if it is not a list tag. 180 | """ 181 | if not is_multi: 182 | if self.enforce_list_tags or name not in record: 183 | ignore_this_if_has_one = value 184 | record.setdefault(name, ignore_this_if_has_one) 185 | else: 186 | self._add_list_value(record, name, value) 187 | else: 188 | value_must_exist_or_is_bug = record[name] 189 | if isinstance(value, list): 190 | record[name].extend(value) 191 | else: 192 | record[name] = " ".join((value_must_exist_or_is_bug, value)) 193 | 194 | def _add_list_value(self, record: dict, name: str, value: Union[str, list[str]]) -> None: 195 | """Process tags with multiple values.""" 196 | value_list = value if isinstance(value, list) else [value] 197 | try: 198 | record[name].extend(value_list) 199 | except KeyError: 200 | record[name] = value_list 201 | except AttributeError: 202 | must_exist = record[name] 203 | record[name] = [must_exist, *value_list] 204 | 205 | def _add_tag( 206 | self, record: dict, tag: str, content: str, extend_multiline: bool = False 207 | ) -> None: 208 | try: 209 | name = self.mapping[tag] 210 | except KeyError: 211 | if self.skip_unknown_tags: 212 | return 213 | 214 | # handle unknown tag 215 | name = self.mapping[self.UNKNOWN_TAG] 216 | if name not in record: 217 | record[name] = defaultdict(list) 218 | record[name][tag].append(content) 219 | 220 | else: 221 | if delimiter := self.delimiter_map.get(tag): 222 | content = [i.strip() for i in content.split(delimiter)] 223 | 224 | if tag in self.list_tags: 225 | self._add_list_value(record, name, content) 226 | else: 227 | self._add_single_value(record, name, content, is_multi=extend_multiline) 228 | 229 | 230 | class WokParser(RisParser): 231 | """Subclass of Base for reading Wok RIS files.""" 232 | 233 | START_TAG = "PT" 234 | DEFAULT_IGNORE: ClassVar[list[str]] = ["FN", "VR", "EF"] 235 | DEFAULT_MAPPING = WOK_TAG_KEY_MAPPING 236 | DEFAULT_LIST_TAGS = WOK_LIST_TYPE_TAGS 237 | DEFAULT_DELIMITER_MAPPING: ClassVar[dict] = {} 238 | 239 | def parse_line(self, line: str) -> Union[tuple[str, str], tuple[None, str]]: 240 | """Parse line of RIS file. 241 | 242 | This method parses a line between the start and end tag. 243 | It returns the tag and the content of the line. Typically, 244 | the first 2 characters are the tag, and the rest of the line 245 | is the content. 246 | 247 | Parameters 248 | ---------- 249 | line : str 250 | Line of RIS file between start and end tag. 251 | 252 | Returns 253 | ------- 254 | tuple 255 | Tuple containing the tag and the content of the tag. 256 | """ 257 | if line[0:2] == " ": 258 | return (None, line[3:].strip()) 259 | else: 260 | return (line[0:2], line[3:].strip()) 261 | 262 | 263 | def load( 264 | file: Union[TextIO, Path], 265 | *, 266 | encoding: Optional[str] = None, 267 | newline: Optional[str] = None, 268 | implementation: type[RisParser] = RisParser, 269 | **kw, 270 | ) -> list[dict]: 271 | """Load a RIS file and return a list of entries. 272 | 273 | Entries are codified as dictionaries whose keys are the 274 | different tags. For single line and singly occurring tags, 275 | the content is codified as a string. In the case of multiline 276 | or multiple key occurrences, the content is returned as a list 277 | of strings. 278 | 279 | Args: 280 | file (Union[TextIO, Path]): File handle of RIS data. 281 | encoding(str, optional): File encoding, only used when a Path is supplied. 282 | Consistent with the python standard library, 283 | if `None` is supplied, the default system 284 | encoding is used. 285 | newline(str, optional): File line separator. 286 | implementation (RisParser): RIS implementation; RisParser by default. 287 | 288 | Returns: 289 | list: Returns list of RIS entries. 290 | """ 291 | if isinstance(file, Path): 292 | with file.open(mode="r", newline=newline, encoding=encoding) as f: 293 | return implementation(**kw).parse_lines(f) 294 | if hasattr(file, "readline"): 295 | return implementation(newline=newline, **kw).parse_lines(file) 296 | elif hasattr(file, "read"): 297 | return loads(file.read(), implementation=implementation, newline=newline, **kw) 298 | raise ValueError("File must be a file-like object or a Path object") 299 | 300 | 301 | def loads(text: str, *, implementation: type[RisParser] = RisParser, **kw) -> list[dict]: 302 | """Load a RIS file and return a list of entries. 303 | 304 | Entries are codified as dictionaries whose keys are the 305 | different tags. For single line and singly occurring tags, 306 | the content is codified as a string. In the case of multiline 307 | or multiple key occurrences, the content is returned as a list 308 | of strings. 309 | 310 | Args: 311 | text (str): A string version of RIS data 312 | implementation (RisParser): RIS implementation; RisParser by default. 313 | 314 | Returns: 315 | list: Returns list of RIS entries. 316 | """ 317 | return implementation(**kw).parse(text) 318 | -------------------------------------------------------------------------------- /rispy/utils.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous functions.""" 2 | 3 | from copy import deepcopy 4 | 5 | from .config import TYPE_OF_REFERENCE_MAPPING 6 | 7 | 8 | def invert_dictionary(mapping: dict) -> dict: 9 | """Invert the keys and values of a dictionary.""" 10 | remap = {v: k for k, v in mapping.items()} 11 | if len(remap) != len(mapping): 12 | raise ValueError("Dictionary cannot be inverted; some values were not unique") 13 | return remap 14 | 15 | 16 | def convert_reference_types( 17 | reference_list: list[dict], 18 | reverse: bool = False, 19 | strict: bool = False, 20 | type_map: dict = TYPE_OF_REFERENCE_MAPPING, 21 | ) -> list: 22 | """Convert RIS reference types to pretty names. 23 | 24 | This method takes a list of references and returns a copy with converted 25 | reference types. 26 | 27 | Args: 28 | reference_list (list[dict]): A list of references. 29 | reverse (bool, optional): Convert in reverse. 30 | strict (bool, optional): Raise error if type not found. 31 | type_map (dict, optional): Dict used to map types. Default is 32 | TYPE_OF_REFERENCE_MAPPING. 33 | 34 | Returns: 35 | list: Returns list of RIS entries. 36 | 37 | """ 38 | 39 | def convert(ref, d=type_map): 40 | old_type = ref["type_of_reference"] 41 | try: 42 | ref["type_of_reference"] = d[old_type] 43 | except KeyError as err: 44 | if strict and old_type not in d.values(): 45 | raise KeyError(f'Type "{old_type}" not found.') from err 46 | return ref 47 | 48 | if not reverse: 49 | return [convert(r) for r in deepcopy(reference_list)] 50 | else: 51 | return [convert(r, invert_dictionary(type_map)) for r in deepcopy(reference_list)] 52 | -------------------------------------------------------------------------------- /rispy/writer.py: -------------------------------------------------------------------------------- 1 | """RIS Writer.""" 2 | 3 | import warnings 4 | from abc import ABC, abstractmethod 5 | from pathlib import Path 6 | from typing import ClassVar, Optional, TextIO, Union 7 | 8 | from .config import DELIMITED_TAG_MAPPING, LIST_TYPE_TAGS, TAG_KEY_MAPPING 9 | from .utils import invert_dictionary 10 | 11 | __all__ = ["BaseWriter", "RisWriter", "dump", "dumps"] 12 | 13 | 14 | class BaseWriter(ABC): 15 | """Base writer class. Create a subclass to use. 16 | 17 | When creating a new implementation class, some variables and classes need 18 | to be overridden. This docstring documents how to override these 19 | parameters when creating a subclass. 20 | 21 | Class variables: 22 | START_TAG (str): Start tag, required. 23 | END_TAG (str): End tag. Defaults to 'ER'. 24 | IGNORE (list, optional): List of tags to ignore. Defaults to []. 25 | PATTERN (str): String containing a format for a line 26 | (e.g. ``"{tag} - {value}"``). Should contain `tag` and 27 | `value` in curly brackets. Required. 28 | DEFAULT_MAPPING (list): Default mapping for this class. Required. 29 | DEFAULT_LIST_TAGS (list): Default list tags for this class. Required. 30 | DEFAULT_REFERENCE_TYPE (str): Default reference type, used if a 31 | reference does not have a type. 32 | SEPARATOR (str, optional): String to separate the references in the 33 | file. Defaults to newline. 34 | 35 | Class methods: 36 | set_header: Create a header for each reference. Has the reference 37 | number as a parameter. 38 | 39 | """ 40 | 41 | START_TAG: str 42 | END_TAG: str = "ER" 43 | UNKNOWN_TAG: str = "UK" 44 | PATTERN: str 45 | DEFAULT_IGNORE: ClassVar[list[str]] = [] 46 | DEFAULT_MAPPING: dict 47 | DEFAULT_LIST_TAGS: list[str] 48 | DEFAULT_DELIMITER_MAPPING: dict 49 | DEFAULT_REFERENCE_TYPE: str = "JOUR" 50 | REFERENCE_TYPE_KEY: str = "type_of_reference" 51 | SEPARATOR: Optional[str] = "" 52 | NEWLINE: str = "\n" 53 | 54 | def __init__( 55 | self, 56 | *, 57 | mapping: Optional[dict] = None, 58 | list_tags: Optional[list[str]] = None, 59 | delimiter_tags_mapping: Optional[dict] = None, 60 | ignore: Optional[list[str]] = None, 61 | skip_unknown_tags: bool = False, 62 | enforce_list_tags: bool = True, 63 | ): 64 | """Override default tag map and list tags in instance. 65 | 66 | Args: 67 | mapping (dict, optional): Map tags to tag names. 68 | list_tags (list, optional): List of list-type tags. 69 | delimiter_tags_mapping (dict, optional): Map of delimiters to tags. 70 | ignore (list, optional): List of tags to ignore. 71 | skip_unknown_tags (bool, optional): Bool for whether to write unknown 72 | tags to the file. Defaults to 73 | `False`. 74 | enforce_list_tags (bool, optional): If `True` tags that are not set as 75 | list tags will be written into one 76 | line. Defaults to `True`. 77 | 78 | """ 79 | self.mapping = mapping if mapping is not None else self.DEFAULT_MAPPING 80 | self.list_tags = list_tags if list_tags is not None else self.DEFAULT_LIST_TAGS 81 | self.delimiter_map = ( 82 | delimiter_tags_mapping 83 | if delimiter_tags_mapping is not None 84 | else self.DEFAULT_DELIMITER_MAPPING 85 | ) 86 | self.ignore = ignore if ignore is not None else self.DEFAULT_IGNORE 87 | self._rev_mapping = invert_dictionary(self.mapping) 88 | self.skip_unknown_tags = skip_unknown_tags 89 | self.enforce_list_tags = enforce_list_tags 90 | 91 | def _get_reference_type(self, ref): 92 | if self.REFERENCE_TYPE_KEY in ref: 93 | return ref[self.REFERENCE_TYPE_KEY] 94 | return self.DEFAULT_REFERENCE_TYPE 95 | 96 | def _format_line(self, tag, value=""): 97 | """Format a RIS line.""" 98 | return self.PATTERN.format(tag=tag, value=value) 99 | 100 | def _format_reference(self, ref, count, n): 101 | if header := self.set_header(count): 102 | yield header 103 | yield self._format_line(self.START_TAG, self._get_reference_type(ref)) 104 | 105 | tags_to_skip = [self.START_TAG, *self.ignore] 106 | if self.skip_unknown_tags: 107 | tags_to_skip.append(self.UNKNOWN_TAG) 108 | 109 | for label, value in ref.items(): 110 | # not available 111 | try: 112 | tag = self._rev_mapping[label.lower()] 113 | except KeyError: 114 | warnings.warn(UserWarning(f"label `{label}` not exported"), stacklevel=2) 115 | continue 116 | 117 | # ignore 118 | if tag in tags_to_skip: 119 | continue 120 | 121 | # list tag 122 | if tag in self.list_tags or (not self.enforce_list_tags and isinstance(value, list)): 123 | for val_i in value: 124 | yield self._format_line(tag, val_i) 125 | 126 | # unknown tag(s), which are lists held in a defaultdict 127 | elif tag == self.UNKNOWN_TAG: 128 | for unknown_tag in value.keys(): 129 | for val_i in value[unknown_tag]: 130 | yield self._format_line(unknown_tag, val_i) 131 | 132 | # write delimited tags 133 | elif tag in self.delimiter_map: 134 | combined_val = self.delimiter_map[tag].join(value) 135 | yield self._format_line(tag, combined_val) 136 | 137 | # all non-list tags 138 | else: 139 | yield self._format_line(tag, value) 140 | 141 | yield self._format_line(self.END_TAG) 142 | 143 | if self.SEPARATOR is not None and count < n: 144 | yield self.SEPARATOR 145 | 146 | def _yield_lines(self, references, extra_line=False): 147 | n = len(references) 148 | for i, ref in enumerate(references): 149 | yield from self._format_reference(ref, count=i + 1, n=n) 150 | if extra_line: 151 | yield "" 152 | 153 | def format_lines(self, file, references): 154 | """Write references to a file.""" 155 | for line in self._yield_lines(references): 156 | file.write(f"{line}{self.NEWLINE}") 157 | 158 | def formats(self, references: list[dict]) -> str: 159 | """Format a list of references into an RIS string.""" 160 | lines = self._yield_lines(references, extra_line=True) 161 | return self.NEWLINE.join(lines) 162 | 163 | @abstractmethod 164 | def set_header(self, count: int) -> str: 165 | """Create the header for each reference; if empty string, unused.""" 166 | ... 167 | 168 | 169 | class RisWriter(BaseWriter): 170 | """Subclass of BaseWriter for writing RIS files.""" 171 | 172 | START_TAG = "TY" 173 | PATTERN = "{tag} - {value}" 174 | DEFAULT_MAPPING = TAG_KEY_MAPPING 175 | DEFAULT_LIST_TAGS = LIST_TYPE_TAGS 176 | DEFAULT_DELIMITER_MAPPING = DELIMITED_TAG_MAPPING 177 | 178 | def set_header(self, count): 179 | return f"{count}." 180 | 181 | 182 | def dump( 183 | references: list[dict], 184 | file: Union[TextIO, Path], 185 | *, 186 | encoding: Optional[str] = None, 187 | implementation: type[BaseWriter] = RisWriter, 188 | **kw, 189 | ): 190 | """Write an RIS file to file or file-like object. 191 | 192 | Entries are codified as dictionaries whose keys are the 193 | different tags. For single line and singly occurring tags, 194 | the content is codified as a string. In the case of multiline 195 | or multiple key occurrences, the content is returned as a list 196 | of strings. 197 | 198 | Args: 199 | references (list[dict]): List of references. 200 | file (TextIO): File handle to store ris formatted data. 201 | encoding (str, optional): Encoding to use when opening file. 202 | implementation (BaseWriter): RIS implementation; base by default. 203 | """ 204 | if isinstance(file, Path): 205 | with file.open(mode="w", encoding=encoding) as f: 206 | implementation(**kw).format_lines(f, references) 207 | elif hasattr(file, "write"): 208 | implementation(**kw).format_lines(file, references) 209 | else: 210 | raise ValueError("File must be a file-like object or a Path object") 211 | 212 | 213 | def dumps(references: list[dict], *, implementation: type[BaseWriter] = RisWriter, **kw) -> str: 214 | """Return an RIS formatted string. 215 | 216 | Entries are codified as dictionaries whose keys are the 217 | different tags. For single line and singly occurring tags, 218 | the content is codified as a string. In the case of multiline 219 | or multiple key occurrences, the content is returned as a list 220 | of strings. 221 | 222 | Args: 223 | references (list[dict]): List of references. 224 | implementation (BaseWriter): RIS implementation; RisWriter by default. 225 | """ 226 | return implementation(**kw).formats(references) 227 | -------------------------------------------------------------------------------- /tests/data/example_basic.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | AU - Shannon,Claude E. 3 | PY - 1948/07// 4 | TI - A Mathematical Theory of Communication 5 | JF - Bell System Technical Journal 6 | SP - 379 7 | EP - 423 8 | VL - 27 9 | ER - 10 | -------------------------------------------------------------------------------- /tests/data/example_bom.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | DO - 10.1186/s40981-020-0316-0 3 | ER - 4 | 5 | -------------------------------------------------------------------------------- /tests/data/example_custom_list_tags.ris: -------------------------------------------------------------------------------- 1 | 1. 2 | TY - JOUR 3 | AU - Marx, Karl 4 | AU - Marxus, Karlus 5 | SN - 12345 6 | SN - ABCDEFG 7 | SN - 666666 8 | ER - 9 | -------------------------------------------------------------------------------- /tests/data/example_empty_tag.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | ID - 2006713348 3 | T1 - Outcome Measures After Shoulder Stabilization in the Athletic Population: A Systematic Review of Clinical and Patient-Reported Metrics 4 | A1 - Fanning E. 5 | Y1 - 2020// 6 | N2 - Background: Athletic endeavor can require the "athletic shoulder" to tolerate significant load through supraphysiological range and often under considerable repetition. 7 | Outcome measures are valuable when determining an athlete's safe return to sport... 8 | KW - *athlete 9 | KW - biomechanics 10 | KW - bone remodeling 11 | JF - Orthopaedic Journal of Sports Medicine 12 | JA - Orthop. J. Sports Med. 13 | VL - 8 14 | IS - 9 15 | SP - 16 | PB - SAGE Publications Ltd (E-mail: info@sagepub.co.uk) 17 | SN - 2325-9671 (electronic) 18 | DO - http://dx.doi.org/10.1177/2325967120950040 19 | ER - 20 | -------------------------------------------------------------------------------- /tests/data/example_extraneous_data.ris: -------------------------------------------------------------------------------- 1 | Record #1 of 2 2 | Provider: Provider 3 | Content: text/plain; charset="UTF-8" 4 | 1. 5 | TY - JOUR 6 | ID - 12345 7 | T1 - Title of reference 8 | A1 - Marx, Karl 9 | A1 - Lindgren, Astrid 10 | A2 - Glattauer, Daniel 11 | Y1 - 2014// 12 | N2 - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 13 | KW - Pippi 14 | KW - Nordwind 15 | KW - Piraten 16 | JF - Lorem 17 | JA - lorem 18 | VL - 9 19 | IS - 3 20 | SP - e0815 21 | CY - United States 22 | PB - Fun Factory 23 | SN - 1932-6208 24 | M1 - 1008150341 25 | L2 - http://example.com 26 | UR - http://example_url.com 27 | ER - 28 | 29 | Record #2 of 2 30 | Provider: Provider 31 | Content: text/plain; charset="UTF-8" 32 | 2. 33 | TY - JOUR 34 | ID - 12345 35 | T1 - The title of the reference 36 | A1 - Marxus, Karlus 37 | A1 - Lindgren, Astrid 38 | A2 - Glattauer, Daniel 39 | Y1 - 2006// 40 | N2 - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 41 | KW - Pippi Langstrumpf 42 | KW - Nordwind 43 | KW - Piraten 44 | JF - Lorem 45 | JA - lorem 46 | VL - 6 47 | IS - 3 48 | SP - e0815341 49 | CY - Germany 50 | PB - Dark Factory 51 | SN - 1732-4208 52 | M1 - 1228150341 53 | L2 - http://example2.com 54 | UR - http://example_url.com 55 | ER - 56 | -------------------------------------------------------------------------------- /tests/data/example_full.ris: -------------------------------------------------------------------------------- 1 | 1. 2 | TY - JOUR 3 | ID - 12345 4 | T1 - Title of reference 5 | A1 - Marx, Karl 6 | A1 - Lindgren, Astrid 7 | A2 - Glattauer, Daniel 8 | Y1 - 2014// 9 | N2 - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 10 | KW - Pippi 11 | KW - Nordwind 12 | KW - Piraten 13 | JF - Lorem 14 | JA - lorem 15 | VL - 9 16 | IS - 3 17 | SP - e0815 18 | CY - United States 19 | PB - Fun Factory 20 | SN - 1932-6208 21 | M1 - 1008150341 22 | L2 - http://example.com 23 | UR - http://example_url.com 24 | ER - 25 | 26 | 2. 27 | TY - JOUR 28 | ID - 12345 29 | T1 - The title of the reference 30 | A1 - Marxus, Karlus 31 | A1 - Lindgren, Astrid 32 | A2 - Glattauer, Daniel 33 | Y1 - 2006// 34 | N2 - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 35 | KW - Pippi Langstrumpf 36 | KW - Nordwind 37 | KW - Piraten 38 | JF - Lorem 39 | JA - lorem 40 | VL - 6 41 | IS - 3 42 | SP - e0815341 43 | CY - Germany 44 | PB - Dark Factory 45 | SN - 1732-4208 46 | M1 - 1228150341 47 | L2 - http://example2.com 48 | UR - http://example_url.com 49 | ER - 50 | -------------------------------------------------------------------------------- /tests/data/example_full_without_whitespace.ris: -------------------------------------------------------------------------------- 1 | 1. 2 | TY - JOUR 3 | ID - 12345 4 | T1 - Title of reference 5 | A1 - Marx, Karl 6 | A1 - Lindgren, Astrid 7 | A2 - Glattauer, Daniel 8 | Y1 - 2014// 9 | N2 - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 10 | KW - Pippi 11 | KW - Nordwind 12 | KW - Piraten 13 | JF - Lorem 14 | JA - lorem 15 | VL - 9 16 | IS - 3 17 | SP - e0815 18 | CY - United States 19 | PB - Fun Factory 20 | SN - 1932-6208 21 | M1 - 1008150341 22 | L2 - http://example.com 23 | UR - http://example_url.com 24 | ER - 25 | 26 | 2. 27 | TY - JOUR 28 | ID - 12345 29 | T1 - The title of the reference 30 | A1 - Marxus, Karlus 31 | A1 - Lindgren, Astrid 32 | A2 - Glattauer, Daniel 33 | Y1 - 2006// 34 | N2 - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 35 | KW - Pippi Langstrumpf 36 | KW - Nordwind 37 | KW - Piraten 38 | JF - Lorem 39 | JA - lorem 40 | VL - 6 41 | IS - 3 42 | SP - e0815341 43 | CY - Germany 44 | PB - Dark Factory 45 | SN - 1732-4208 46 | M1 - 1228150341 47 | L2 - http://example2.com 48 | UR - http://example_url.com 49 | ER - 50 | -------------------------------------------------------------------------------- /tests/data/example_full_write.ris: -------------------------------------------------------------------------------- 1 | 1. 2 | TY - JOUR 3 | T1 - Title of reference 4 | A1 - Marx, Karl 5 | A1 - Lindgren, Astrid 6 | A2 - Glattauer, Daniel 7 | Y1 - 2014// 8 | N2 - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 9 | JA - lorem 10 | VL - 9 11 | IS - 3 12 | SP - e0815 13 | CY - United States 14 | PB - Fun Factory 15 | SN - 1932-6208 16 | M1 - 1008150341 17 | L2 - http://example.com 18 | UR - http://example_url.com 19 | ER - 20 | 21 | 2. 22 | TY - JOUR 23 | T1 - The title of the reference 24 | A1 - Marxus, Karlus 25 | A1 - Lindgren, Astrid 26 | A2 - Glattauer, Daniel 27 | Y1 - 2006// 28 | N2 - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 29 | JA - lorem 30 | VL - 6 31 | IS - 3 32 | SP - e0815341 33 | CY - Germany 34 | PB - Dark Factory 35 | SN - 1732-4208 36 | M1 - 1228150341 37 | L2 - http://example2.com 38 | UR - http://example_url.com 39 | ER - 40 | -------------------------------------------------------------------------------- /tests/data/example_multi_unknown_tags.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | AU - Shannon,Claude E. 3 | PY - 1948/07// 4 | TI - A Mathematical Theory of Communication 5 | JF - Bell System Technical Journal 6 | EP - 423 7 | VL - 27 8 | JP - CRISPR 9 | DC - Direct Current 10 | ER - 11 | -------------------------------------------------------------------------------- /tests/data/example_multiline.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | N2 - first line, ER then second line and at the end the last line 3 | N1 - first line 4 | * second line 5 | * last line 6 | ER - 7 | 8 | TY - JOUR 9 | N2 - first line, 10 | ER then second line and at the end 11 | the last line 12 | N1 - first line 13 | * second line 14 | * last line 15 | ER - 16 | 17 | TY - JOUR 18 | N2 - first line, 19 | ER then second line and at the end 20 | the last line 21 | N1 - first line 22 | * second line 23 | * last line 24 | ER - 25 | -------------------------------------------------------------------------------- /tests/data/example_single_unknown_tag.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | AU - Shannon,Claude E. 3 | PY - 1948/07// 4 | TI - A Mathematical Theory of Communication 5 | JF - Bell System Technical Journal 6 | SP - 379 7 | EP - 423 8 | VL - 27 9 | JP - CRISPR 10 | JP - Direct Current 11 | ER - 12 | -------------------------------------------------------------------------------- /tests/data/example_starting_newlines.ris: -------------------------------------------------------------------------------- 1 | 2 | 3 | TY - JOUR 4 | AU - Shannon,Claude E. 5 | PY - 1948/07// 6 | TI - A Mathematical Theory of Communication 7 | JF - Bell System Technical Journal 8 | SP - 379 9 | EP - 423 10 | VL - 27 11 | ER - 12 | -------------------------------------------------------------------------------- /tests/data/example_urls.ris: -------------------------------------------------------------------------------- 1 | TY - JOUR 2 | AU - Shannon,Claude E. 3 | PY - 1948/07// 4 | TI - A Mathematical Theory of Communication 5 | JF - Bell System Technical Journal 6 | SP - 379 7 | EP - 423 8 | VL - 27 9 | UR - http://example.com 10 | ER - 11 | 12 | TY - JOUR 13 | AU - Shannon,Claude E. 14 | PY - 1948/07// 15 | TI - A Mathematical Theory of Communication 16 | JF - Bell System Technical Journal 17 | SP - 379 18 | EP - 423 19 | VL - 27 20 | UR - http://example.com 21 | UR - http://www.example.com 22 | ER - 23 | 24 | TY - JOUR 25 | AU - Shannon,Claude E. 26 | PY - 1948/07// 27 | TI - A Mathematical Theory of Communication 28 | JF - Bell System Technical Journal 29 | SP - 379 30 | EP - 423 31 | VL - 27 32 | UR - http://example.com; http://www.example.com 33 | ER - 34 | 35 | TY - JOUR 36 | AU - Shannon,Claude E. 37 | PY - 1948/07// 38 | TI - A Mathematical Theory of Communication 39 | JF - Bell System Technical Journal 40 | SP - 379 41 | EP - 423 42 | VL - 27 43 | UR - http://example.com;http://www.example.com 44 | ER - 45 | -------------------------------------------------------------------------------- /tests/data/example_utf_chars.ris: -------------------------------------------------------------------------------- 1 | TY - Journal Article 2 | AU - Dobrokhotova, Yu E. 3 | AU - Yusupova, R. O. 4 | AU - Ozerova, R. I. 5 | AU - Fayzullin, L. Z. 6 | AU - Karnaukhov, V. N. 7 | PY - 2009 8 | AB - Состояние рецепторного аппарата эндометрия и метаболизм эстрогенов при гиперплазиях эндометрия в позднем репродуктивном периоде 9 | The state of the receptor apparatus of the endometrium and estrogen metabolism in endometrial hyperplasia in the late reproductive period 10 | JO - Gynecology, Obstetrics and Perinatology 11 | VO - 8 12 | IS - 3 13 | SP - 52 14 | EP - 57 15 | DA - 2009 16 | N1 - Ä,Ö,Ü,ä,ö,ü,ß 17 | ST - Состояние рецепторного аппарата эндометрия и метаболизм эстрогенов при гиперплазиях эндометрия в позднем репродуктивном периоде 18 | ER - 19 | -------------------------------------------------------------------------------- /tests/data/example_wos.ris: -------------------------------------------------------------------------------- 1 | FN Clarivate Analytics Web of Science 2 | VR 1.0 3 | PT J 4 | AU Parkes-Loach, PS 5 | Majeed, AP 6 | Law, CJ 7 | Loach, PA 8 | AF Parkes-Loach, PS 9 | Majeed, AP 10 | Law, CJ 11 | Loach, PA 12 | TI Interactions stabilizing the structure of the core light-harvesting 13 | complex (LHl) of photosynthetic bacteria and its subunit (B820) 14 | SO BIOCHEMISTRY 15 | LA English 16 | DT Article 17 | ID CHEMICALLY SYNTHESIZED POLYPEPTIDES; IN-VITRO RECONSTITUTION; 18 | AMINO-ACID-SEQUENCE; RHODOSPIRILLUM-RUBRUM; RHODOBACTER-SPHAEROIDES; 19 | RHODOPSEUDOMONAS-VIRIDIS; BACTERIOCHLOROPHYLL-A; ALPHA-POLYPEPTIDE; 20 | CRYSTAL-STRUCTURE; ANTENNA COMPLEX 21 | AB Reconstitution experiments with a chemically synthesized core light-harvesting (LH1) beta-polypeptide analogue having 3-methylhistidine instead of histidine in the position that normally donates the coordinating ligand to bacteriochlorophyll (Bchl) have provided the experimental data needed to assign to B820 one of the two possible alphabeta.2Bchl pairs that are observed in the crystal structure of LH2 from Phaeospirillum (formerly Rhodospirillum) molischianum, the one with rings III and V of Bchl overlapping. Consistent with the assigned structure, experimental evidence is provided to show that significant stabilizing interactions for both the subunit complex (B820) and LH1 occur between the N-terminal regions of the alpha- and beta-polypeptides. On the basis of the results with the chemically synthesized polypeptides used in this study, along with earlier results with protease-modified polypeptides, mutants, and chemically synthesized polypeptides, the importance of a stretch of 9-13 amino acids at the N-terminal end of the alpha- and beta-polypeptides is underscored. A progressive loss of interaction with the LH1 beta-potypeptide was found as the first three N-terminal amino acids of the LH1 alpha-polypeptide were removed. The absence of the N-terminal formylmethionine (fMet), or conversion of the sulfur in this fMet to the sulfoxide, resulted in a decrease in LH1 formation. In addition to the removal of fMet, removal of the next two amino acids also resulted in a decrease in K-assoc for B820 formation and nearly eliminated the ability to form LH1. It is suggested that the first three amino acids (fMetTrpArg) of the LH1 alpha-polypeptide of Rhodospirillum rubrum form a cluster that is most likely involved in close interaction with the side chain of His -18 (see Figure 1 for numbering of amino acids) of the beta-polypeptide. The results provide evidence that the folding motif of the alpha- and beta-polypeptides in the N-terminal region observed in crystal structures of LH2 is also present in LH1 and contributes significantly to stabilizing the complex. 22 | C1 Northwestern Univ, Dept Biochem Mol Biol & Cell Biol, Evanston, IL 60208 USA. 23 | RP Loach, PA (reprint author), Northwestern Univ, Dept Biochem Mol Biol & Cell Biol, Hogan Hall,Room 2-100, Evanston, IL 60208 USA. 24 | EM p-loach@northwestern.edu 25 | RI Law, Christopher/E-7174-2011 26 | CR ALLEN JP, 1986, P NATL ACAD SCI USA, V83, P8589, DOI 10.1073/pnas.83.22.8589 27 | Arluison W, 2004, BIOCHEMISTRY-US, V43, P1276, DOI 10.1021/bi030205v 28 | BARRICK D, 1994, BIOCHEMISTRY-US, V33, P6546, DOI 10.1021/bi00187a023 29 | BERGER G, 1987, J LIQ CHROMATOGR, V10, P1519, DOI 10.1080/01483918708066784 30 | BRUNISHOLZ RA, 1984, H-S Z PHYSIOL CHEM, V365, P675, DOI 10.1515/bchm2.1984.365.2.675 31 | BRUNISHOLZ RA, 1981, FEBS LETT, V129, P150, DOI 10.1016/0014-5793(81)80778-8 32 | CHANG CH, 1986, FEBS LETT, V205, P82, DOI 10.1016/0014-5793(86)80870-5 33 | CHANG MC, 1990, PHOTOCHEM PHOTOBIOL, V52, P873, DOI 10.1111/j.1751-1097.1990.tb08696.x 34 | CHANG MC, 1990, BIOCHEMISTRY-US, V29, P421, DOI 10.1021/bi00454a017 35 | DAVIS CM, 1995, J BIOL CHEM, V270, P5793, DOI 10.1074/jbc.270.11.5793 36 | Davis CM, 1997, BIOCHEMISTRY-US, V36, P3671, DOI 10.1021/bi962386p 37 | DEISENHOFER J, 1984, J MOL BIOL, V180, P385, DOI 10.1016/S0022-2836(84)80011-X 38 | DERFELD CA, 1994, BIOCHIM BIOPHYS ACTA, V1185, P193 39 | Francia F, 1999, BIOCHEMISTRY-US, V38, P6834, DOI 10.1021/bi982891h 40 | Francia F, 2002, EUR J BIOCHEM, V269, P1877, DOI 10.1046/j.1432-1033.2002.02834.x 41 | Frese RN, 2000, P NATL ACAD SCI USA, V97, P5197, DOI 10.1073/pnas.090083797 42 | Goldsmith JO, 1996, BIOCHEMISTRY-US, V35, P2421, DOI 10.1021/bi9523365 43 | HELLER BA, 1990, PHOTOCHEM PHOTOBIOL, V51, P621, DOI 10.1111/j.1751-1097.1990.tb01975.x 44 | Hu XC, 2002, Q REV BIOPHYS, V35, P1, DOI 10.1017/S0033583501003754 45 | Jamieson SJ, 2002, EMBO J, V21, P3927, DOI 10.1093/emboj/cdf410 46 | JIRSAKOVA V, 1993, BIOCHIM BIOPHYS ACTA, V1183, P301, DOI 10.1016/0005-2728(93)90231-4 47 | Jungas C, 1999, EMBO J, V18, P534, DOI 10.1093/emboj/18.3.534 48 | KARRASCH S, 1995, EMBO J, V14, P631, DOI 10.1002/j.1460-2075.1995.tb07041.x 49 | Kehoe JW, 1998, BIOCHEMISTRY-US, V37, P3418, DOI 10.1021/bi9722709 50 | Koepke J, 1996, STRUCTURE, V4, P581, DOI 10.1016/S0969-2126(96)00063-9 51 | Law CJ, 2003, PHOTOSYNTH RES, V75, P193, DOI 10.1023/A:1023982327748 52 | LEE JK, 1989, J BACTERIOL, V171, P3391, DOI 10.1128/jb.171.6.3391-3405.1989 53 | LILBURN TG, 1995, J BACTERIOL, V177, P4593, DOI 10.1128/jb.177.16.4593-4600.1995 54 | LOACH PA, 1990, FEMS SYMP, V53, P235 55 | LOACH PA, 1994, PHOTOSYNTH RES, V40, P231, DOI 10.1007/BF00034773 56 | LOACH PA, 1995, ANOXYGENIC PHOTOSYNT, P437 57 | LOACH PA, 1985, MOL BIOL PHOTOSYNTHE, P197 58 | MCDERMOTT G, 1995, NATURE, V374, P517, DOI 10.1038/374517a0 59 | McGlynn P, 1996, J BIOL CHEM, V271, P3285, DOI 10.1074/jbc.271.6.3285 60 | Meadows KA, 1998, BIOCHEMISTRY-US, V37, P3411, DOI 10.1021/bi972269+ 61 | MEADOWS KA, 1995, BIOCHEMISTRY-US, V34, P1559, DOI 10.1021/bi00005a012 62 | MECKENSTOCK RU, 1992, FEBS LETT, V311, P128, DOI 10.1016/0014-5793(92)81383-W 63 | MICHALSKI TJ, 1988, J AM CHEM SOC, V110, P5888, DOI 10.1021/ja00225a047 64 | MILLER JF, 1987, BIOCHEMISTRY-US, V26, P5055, DOI 10.1021/bi00390a026 65 | Papiz MZ, 1996, TRENDS PLANT SCI, V1, P198, DOI 10.1016/1360-1385(96)20005-6 66 | Parkes-Loach PS, 2001, BIOCHEMISTRY-US, V40, P5593, DOI 10.1021/bi002580i 67 | PARKESLOACH PS, 1994, PHOTOSYNTH RES, V40, P247, DOI 10.1007/BF00034774 68 | PARKESLOACH PS, 1988, BIOCHEMISTRY-US, V27, P2718, DOI 10.1021/bi00408a011 69 | Pond AE, 2000, INORG CHEM, V39, P6061, DOI 10.1021/ic0007198 70 | Roszak AW, 2003, SCIENCE, V302, P1969, DOI 10.1126/science.1088892 71 | Scheuring S, 2003, P NATL ACAD SCI USA, V100, P1690, DOI 10.1073/pnas.0437992100 72 | THEILER R, 1984, H-S Z PHYSIOL CHEM, V365, P703, DOI 10.1515/bchm2.1984.365.2.703 73 | Todd JB, 1998, BIOCHEMISTRY-US, V37, P17458, DOI 10.1021/bi981114e 74 | Todd JB, 1999, PHOTOSYNTH RES, V62, P85, DOI 10.1023/A:1006337827672 75 | TONN SJ, 1977, BIOCHEMISTRY-US, V16, P877, DOI 10.1021/bi00624a011 76 | VANGRONDELLE R, 1994, BBA-BIOENERGETICS, V1187, P1, DOI 10.1016/0005-2728(94)90166-X 77 | VANMOURIK F, 1991, BIOCHIM BIOPHYS ACTA, V1059, P111, DOI 10.1016/S0005-2728(05)80193-8 78 | VISSCHERS RW, 1991, BIOCHEMISTRY-US, V30, P5734, DOI 10.1021/bi00237a015 79 | VISSCHERS RW, 1993, BIOCHIM BIOPHYS ACTA, V1183, P369, DOI 10.1016/0005-2728(93)90241-7 80 | Walz T, 1997, J MOL BIOL, V265, P107, DOI 10.1006/jmbi.1996.0714 81 | Wang ZY, 2002, J AM CHEM SOC, V124, P1072, DOI 10.1021/ja0112994 82 | Wang ZY, 2001, EUR J BIOCHEM, V268, P3375, DOI 10.1046/j.1432-1327.2001.02234.x 83 | Westerhuis WHJ, 1998, BBA-BIOENERGETICS, V1366, P317, DOI 10.1016/S0005-2728(98)00132-7 84 | ZUBER H, 1995, ANOXYGENIC PHOTOSYNT, P315 85 | NR 59 86 | TC 23 87 | Z9 25 88 | U1 0 89 | U2 5 90 | PU AMER CHEMICAL SOC 91 | PI WASHINGTON 92 | PA 1155 16TH ST, NW, WASHINGTON, DC 20036 USA 93 | SN 0006-2960 94 | J9 BIOCHEMISTRY-US 95 | JI Biochemistry 96 | PD JUN 8 97 | PY 2004 98 | VL 43 99 | IS 22 100 | BP 7003 101 | EP 7016 102 | DI 10.1021/bi049798f 103 | PG 14 104 | WC Biochemistry & Molecular Biology 105 | SC Biochemistry & Molecular Biology 106 | GA 826CV 107 | UT WOS:000221807500019 108 | PM 15170338 109 | DA 2019-03-18 110 | ER 111 | 112 | PT J 113 | AU Cao, WX 114 | Ye, X 115 | Georgiev, GY 116 | Berezhna, S 117 | Sjodin, T 118 | Demidov, AA 119 | Wang, W 120 | Sage, JT 121 | Champion, PM 122 | AF Cao, WX 123 | Ye, X 124 | Georgiev, GY 125 | Berezhna, S 126 | Sjodin, T 127 | Demidov, AA 128 | Wang, W 129 | Sage, JT 130 | Champion, PM 131 | TI Proximal and distal influences on ligand binding kinetics in 132 | microperoxidase and heme model compounds 133 | SO BIOCHEMISTRY 134 | LA English 135 | DT Article 136 | ID SPERM-WHALE MYOGLOBIN; RESONANCE RAMAN-SCATTERING; CARBON-MONOXIDE 137 | BINDING; POCKET DOCKING SITE; T-STATE HEMOGLOBIN; CYTOCHROME-C; GEMINATE 138 | RECOMBINATION; LOW PH; VIBRATIONAL-RELAXATION; QUATERNARY STRUCTURE 139 | AB We use laser flash photolysis and time-resolved Raman spectroscopy of CO-bound heme complexes to study proximal and distal influences on ligand rebinding kinetics. We report kinetics of CO rebinding to microperoxidase (MP) and 2-methylimidazole ligated Fe protoporphyrin IX in the 10 ns to 10 ms time window. We also report CO rebinding kinetics of MP in the 150 fs to 140 ps time window. For dilute, micelle-encapsulated (monodisperse) samples of MP, we do not observe the large amplitude geminate decay at similar to100 ps previously reported in time-resolved IR measurements on highly concentrated samples [Lim, M., Jackson, T. A., and Anfinrud, P. A. (1997) J. Biol. Inorg. Chem. 2, 531-536]. However, for high concentration aggregated samples, we do observe the large amplitude picosecond CO geminate rebinding and find that it is correlated with the absence of the iron-histidine vibrational mode in the time-resolved Raman spectrum. On the basis of these results, the energetic significance of a putative distal pocket CO docking site proposed by Lim et al. may need to be reconsidered. Finally, when high concentration samples of native myoglobin (Mb) were studied as a control, an analogous increase in the geminate rebinding kinetics was not observed. This verifies that studies of Mb under dilute conditions are applicable to the more concentrated regime found in the cellular milieu. 140 | C1 Northeastern Univ, Dept Phys, Boston, MA 02115 USA. 141 | Northeastern Univ, Ctr Interdisciplinary Res Complex Syst, Boston, MA 02115 USA. 142 | RP Champion, PM (reprint author), Northeastern Univ, Dept Phys, Boston, MA 02115 USA. 143 | EM jtsage@neu.edu; p.champion@neu.edu 144 | FU NIGMS NIH HHS [GM-52002]; NIDDK NIH HHS [DK035090] 145 | CR Adams P. A., 1996, CYTOCHROME C MULTIDI, P635 146 | ANFINRUD PA, 1994, P SOC PHOTO-OPT INS, V2138, P107, DOI 10.1117/12.181348 147 | Antonini E., 1971, HEMOGLOBIN MYOGLOBIN 148 | AUSTIN RH, 1975, BIOCHEMISTRY-US, V14, P5355, DOI 10.1021/bi00695a021 149 | BANGCHAROENPAURPONG O, 1984, J AM CHEM SOC, V106, P5688, DOI 10.1021/ja00331a045 150 | Barrick D, 1997, NAT STRUCT BIOL, V4, P78, DOI 10.1038/nsb0197-78 151 | BLAUER G, 1993, BIOCHEMISTRY-US, V32, P6674, DOI 10.1021/bi00077a021 152 | Brunori M, 2000, P NATL ACAD SCI USA, V97, P2058, DOI 10.1073/pnas.040459697 153 | Brunori M, 2000, BIOPHYS CHEM, V86, P221, DOI 10.1016/S0301-4622(00)00142-3 154 | CAO W, 2003, THESIS NE U BOSTON 155 | Cao WX, 2001, BIOPHYS J, V80, p283A 156 | Cao WX, 2001, BIOCHEMISTRY-US, V40, P5728, DOI 10.1021/bi010067e 157 | CARRAWAY AD, 1995, J INORG BIOCHEM, V60, P267, DOI 10.1016/0162-0134(95)00026-7 158 | CARVER TE, 1990, J BIOL CHEM, V265, P20007 159 | CHANCE B, 1966, J MOL BIOL, V17, P525, DOI 10.1016/S0022-2836(66)80162-6 160 | CHANG CK, 1973, J AM CHEM SOC, V95, P8477, DOI 10.1021/ja00806a062 161 | Christian JF, 1997, BIOCHEMISTRY-US, V36, P11198, DOI 10.1021/bi9710075 162 | Chu K, 2000, NATURE, V403, P921 163 | DUPRAT AF, 1995, BIOCHEMISTRY-US, V34, P2634, DOI 10.1021/bi00008a030 164 | ELBER R, 1990, J AM CHEM SOC, V112, P9161, DOI 10.1021/ja00181a020 165 | Franzen S, 2001, BIOCHEMISTRY-US, V40, P5299, DOI 10.1021/bi0023403 166 | GEIBEL J, 1978, J AM CHEM SOC, V100, P3575, DOI 10.1021/ja00479a047 167 | Harvey JN, 2000, J AM CHEM SOC, V122, P12401, DOI 10.1021/ja005543n 168 | HASINOFF BB, 1981, ARCH BIOCHEM BIOPHYS, V211, P396, DOI 10.1016/0003-9861(81)90470-7 169 | HENRY ER, 1983, J MOL BIOL, V166, P443, DOI 10.1016/S0022-2836(83)80094-1 170 | HORI H, 1980, J AM CHEM SOC, V102, P3608, DOI 10.1021/ja00530a049 171 | HUANG Y, 1991, J AM CHEM SOC, V113, P9141, DOI 10.1021/ja00024a018 172 | KINCAID J, 1979, P NATL ACAD SCI USA, V76, P549, DOI 10.1073/pnas.76.2.549 173 | Kumazaki S, 2000, J BIOL CHEM, V275, P38378, DOI 10.1074/jbc.M005533200 174 | Kundu S, 2002, PROTEINS, V46, P268, DOI 10.1002/prot.10048 175 | Laberge M, 1998, J BIOMOL STRUCT DYN, V15, P1039, DOI 10.1080/07391102.1998.10508999 176 | LI XY, 1988, J AM CHEM SOC, V110, P6024, DOI 10.1021/ja00226a017 177 | LIM M, 1995, SCIENCE, V269, P962, DOI 10.1126/science.7638619 178 | LIM M, 2001, ULTRAFAST INFRARED R, P191 179 | Lim MH, 1997, J BIOL INORG CHEM, V2, P531, DOI 10.1007/s007750050167 180 | LIM MH, 1995, J CHEM PHYS, V102, P4355, DOI 10.1063/1.469484 181 | Lim MH, 1997, NAT STRUCT BIOL, V4, P209, DOI 10.1038/nsb0397-209 182 | Linke W. F., 1940, SOLUBILITIES INORGAN 183 | MAZUMDAR S, 1991, INORG CHEM, V30, P700, DOI 10.1021/ic00004a020 184 | McMahon BH, 2000, J CHEM PHYS, V113, P6831, DOI 10.1063/1.1309524 185 | MIERS JB, 1991, J CHEM PHYS, V94, P1825, DOI 10.1063/1.459957 186 | NAGAI K, 1980, J MOL BIOL, V136, P271, DOI 10.1016/0022-2836(80)90374-5 187 | Negrerie M, 2001, J BIOL CHEM, V276, P46815, DOI 10.1074/jbc.M102224200 188 | Olson JS, 1997, J BIOL INORG CHEM, V2, P544, DOI 10.1007/s007750050169 189 | Olson JS, 1996, J BIOL CHEM, V271, P17593, DOI 10.1074/jbc.271.30.17593 190 | OLSON JS, 1988, NATURE, V336, P265, DOI 10.1038/336265a0 191 | Ostermann A, 2000, NATURE, V404, P205, DOI 10.1038/35004622 192 | OTHMAN S, 1993, BIOCHEMISTRY-US, V32, P9781, DOI 10.1021/bi00088a033 193 | PERUTZ MF, 1970, NATURE, V228, P726, DOI 10.1038/228726a0 194 | PERUTZ MF, 1966, J MOL BIOL, V21, P199, DOI 10.1016/0022-2836(66)90088-X 195 | Peterson ES, 1998, BIOCHEMISTRY-US, V37, P4346, DOI 10.1021/bi9708693 196 | PHILLIPS SEV, 1980, J MOL BIOL, V142, P531, DOI 10.1016/0022-2836(80)90262-4 197 | QUILLIN ML, 1993, J MOL BIOL, V234, P140, DOI 10.1006/jmbi.1993.1569 198 | RAY GB, 1994, J AM CHEM SOC, V116, P162, DOI 10.1021/ja00080a019 199 | RINGE D, 1984, BIOCHEMISTRY-US, V23, P2, DOI 10.1021/bi00296a001 200 | ROUSSEAU DL, 1988, RESONANCE RAMAN SPEC, P133 201 | SAGE JT, 1991, BIOCHEMISTRY-US, V30, P1227, DOI 10.1021/bi00219a010 202 | SAGE JT, 1991, BIOCHEMISTRY-US, V30, P1237, DOI 10.1021/bi00219a011 203 | SAGE JT, 1996, COMPREHENSIVE SUPRAM, V5, P171 204 | SAGE JT, 2004, ENCY SUPRAMOLECULAR 205 | SALMEEN I, 1978, BIOCHEMISTRY-US, V17, P800, DOI 10.1021/bi00598a008 206 | Scott EE, 1997, BIOCHEMISTRY-US, V36, P11909, DOI 10.1021/bi970719s 207 | Scott EE, 2001, J BIOL CHEM, V276, P5177, DOI 10.1074/jbc.M008282200 208 | SHARMA VS, 1975, BIOCHEM BIOPH RES CO, V66, P1301, DOI 10.1016/0006-291X(75)90501-X 209 | Sigfridsson E, 2002, J INORG BIOCHEM, V91, P101, DOI 10.1016/S0162-0134(02)00426-9 210 | Spiro TG, 2001, ACCOUNTS CHEM RES, V34, P137, DOI 10.1021/ar0001108j 211 | SRAJER V, 1988, J AM CHEM SOC, V110, P6656, DOI 10.1021/ja00228a009 212 | Sugimoto T, 1998, BIOPHYS J, V75, P2188, DOI 10.1016/S0006-3495(98)77662-3 213 | TERAOKA J, 1981, J BIOL CHEM, V256, P3969 214 | Tian WD, 1996, BIOCHEMISTRY-US, V35, P3487, DOI 10.1021/bi952474u 215 | TRAYLOR TG, 1981, ACCOUNTS CHEM RES, V14, P102, DOI 10.1021/ar00064a002 216 | TRAYLOR TG, 1990, J AM CHEM SOC, V112, P6875, DOI 10.1021/ja00175a022 217 | TRAYLOR TG, 1992, J AM CHEM SOC, V114, P417, DOI 10.1021/ja00028a005 218 | Unno M, 1998, J AM CHEM SOC, V120, P2670, DOI 10.1021/ja973293d 219 | URRY DW, 1967, J AM CHEM SOC, V89, P5276, DOI 10.1021/ja00996a034 220 | URRY DW, 1967, J AM CHEM SOC, V89, P4190, DOI 10.1021/ja00992a601 221 | Vogel KM, 1999, J AM CHEM SOC, V121, P9915, DOI 10.1021/ja990042r 222 | WANG JS, 1989, J PHYS CHEM-US, V93, P7925, DOI 10.1021/j100360a038 223 | Wang W, 2000, J PHYS CHEM B, V104, P10789, DOI 10.1021/jp0008602 224 | WEI YZ, 1994, J PHYS CHEM-US, V98, P6644, DOI 10.1021/j100077a034 225 | WHITE DK, 1979, J AM CHEM SOC, V101, P2443, DOI 10.1021/ja00503a034 226 | Yang F, 1996, J MOL BIOL, V256, P762, DOI 10.1006/jmbi.1996.0123 227 | Ye X, 2002, J AM CHEM SOC, V124, P5914, DOI 10.1021/ja017359n 228 | YE X, 2003, THESIS NE U BOSTON 229 | Ye XO, 2003, J PHYS CHEM A, V107, P8156, DOI 10.1021/jp0276799 230 | ZHU L, 1992, J MOL BIOL, V224, P207, DOI 10.1016/0022-2836(92)90584-7 231 | NR 86 232 | TC 31 233 | Z9 31 234 | U1 1 235 | U2 13 236 | PU AMER CHEMICAL SOC 237 | PI WASHINGTON 238 | PA 1155 16TH ST, NW, WASHINGTON, DC 20036 USA 239 | SN 0006-2960 240 | J9 BIOCHEMISTRY-US 241 | JI Biochemistry 242 | PD JUN 8 243 | PY 2004 244 | VL 43 245 | IS 22 246 | BP 7017 247 | EP 7027 248 | DI 10.1021/bi0497291 249 | PG 11 250 | WC Biochemistry & Molecular Biology 251 | SC Biochemistry & Molecular Biology 252 | GA 826CV 253 | UT WOS:000221807500020 254 | PM 15170339 255 | DA 2019-03-18 256 | ER 257 | 258 | EF 259 | -------------------------------------------------------------------------------- /tests/test_benchmark.py: -------------------------------------------------------------------------------- 1 | import rispy 2 | 3 | EXAMPLE_RECORD = """ 4 | 42. 5 | TY - JOUR 6 | ID - 12345 7 | T1 - The title of the reference 8 | A1 - Marxus, Karlus 9 | A1 - Lindgren, Astrid 10 | A2 - Glattauer, Daniel 11 | Y1 - 2006// 12 | N2 - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. 13 | KW - Pippi Langstrumpf 14 | KW - Nordwind 15 | KW - Piraten 16 | KW - Seeräuber 17 | KW - Kinderbuch 18 | KW - Astrid Lindgren 19 | JF - Lorem ipsum dolor sit amet 20 | JA - lorem ipsum dolor sit amet 21 | VL - 6 22 | IS - 3 23 | SP - e0815341 24 | CY - Germany 25 | PB - Dark Factory 26 | SN - 1732-4208 27 | M1 - 1228150341 28 | L2 - http://example2.com 29 | UR - http://example.com/1 30 | UR - http://example.com/2 31 | UR - http://example.com/3 32 | DO - 10.1371/journal.pone.0081534 33 | ER - 34 | 35 | """ # noqa 36 | 37 | 38 | EXAMPLE_RECORD_MULTILINE = """ 39 | 42. 40 | TY - JOUR 41 | ID - 12345 42 | T1 - The title of the reference 43 | A1 - Marxus, Karlus 44 | A1 - Lindgren, Astrid 45 | A2 - Glattauer, Daniel 46 | Y1 - 2006// 47 | N2 - BACKGROUND: Lorem dammed ipsum dolor sit amet, 48 | consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. 49 | - Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus 50 | - mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. 51 | Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet 52 | nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam 53 | dictum felis eu pede mollis pretium. 54 | KW - Pippi Langstrumpf 55 | KW - Nordwind 56 | KW - Piraten 57 | KW - Seeräuber 58 | KW - Kinderbuch 59 | KW - Astrid Lindgren 60 | JF - Lorem ipsum dolor sit amet 61 | JA - lorem ipsum dolor sit amet 62 | VL - 6 63 | IS - 3 64 | SP - e0815341 65 | CY - Germany 66 | PB - Dark Factory 67 | SN - 1732-4208 68 | M1 - 1228150341 69 | L2 - http://example2.com 70 | UR - http://example.com/1 71 | UR - http://example.com/2 72 | UR - http://example.com/3 73 | DO - 10.1371/journal.pone.0081534 74 | ER - 75 | 76 | """ 77 | 78 | 79 | def test_benchmark_rispy_large(benchmark): 80 | benchmark_dataset = EXAMPLE_RECORD * 10000 81 | 82 | benchmark(rispy.loads, benchmark_dataset) 83 | 84 | 85 | def test_benchmark_rispy_large_multiline(benchmark): 86 | benchmark_dataset = EXAMPLE_RECORD_MULTILINE * 10000 87 | 88 | benchmark(rispy.loads, benchmark_dataset) 89 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | import rispy 7 | 8 | DATA_DIR = Path(__file__).parent.resolve() / "data" 9 | 10 | 11 | @pytest.fixture 12 | def example_basic(): 13 | # expected output from `example_basic.ris` 14 | return [ 15 | { 16 | "type_of_reference": "JOUR", 17 | "authors": ["Shannon,Claude E."], 18 | "year": "1948/07//", 19 | "title": "A Mathematical Theory of Communication", 20 | "alternate_title3": "Bell System Technical Journal", 21 | "start_page": "379", 22 | "end_page": "423", 23 | "volume": "27", 24 | } 25 | ] 26 | 27 | 28 | def test_load_file(example_basic): 29 | # test with file object 30 | filepath = DATA_DIR / "example_basic.ris" 31 | with open(filepath) as f: 32 | entries = rispy.load(f) 33 | assert example_basic == entries 34 | 35 | 36 | def test_load_file_noreadline(example_basic): 37 | # test with file object that has no readline 38 | 39 | class NoReadline(StringIO): 40 | @property 41 | def readline(self): # type: ignore 42 | raise AttributeError("Not found") 43 | 44 | filepath = DATA_DIR / "example_basic.ris" 45 | f = NoReadline(filepath.read_text()) 46 | assert not hasattr(f, "readline") 47 | entries = rispy.load(f) 48 | assert example_basic == entries 49 | 50 | 51 | def test_load_path(example_basic): 52 | # test with Path object 53 | filepath = DATA_DIR / "example_basic.ris" 54 | p = Path(filepath) 55 | entries = rispy.load(p) 56 | assert example_basic == entries 57 | 58 | 59 | def test_load_bad_file(): 60 | with pytest.raises(ValueError, match="File must be a file-like object or a Path object"): 61 | rispy.load("test") # type: ignore 62 | 63 | 64 | def test_loads(example_basic): 65 | ristext = (DATA_DIR / "example_basic.ris").read_text() 66 | assert example_basic == rispy.loads(ristext) 67 | 68 | 69 | def test_load_multiline_ris(): 70 | filepath = DATA_DIR / "example_multiline.ris" 71 | expected = { 72 | "type_of_reference": "JOUR", 73 | "notes_abstract": "first line, ER then second line and at the end the last line", 74 | "notes": ["first line", "* second line", "* last line"], 75 | } 76 | with open(filepath) as f: 77 | entries = rispy.load(f) 78 | 79 | for entry in entries: 80 | assert expected == entry 81 | 82 | 83 | def test_load_example_full_ris(): 84 | filepath = DATA_DIR / "example_full.ris" 85 | expected = [ 86 | { 87 | "type_of_reference": "JOUR", 88 | "id": "12345", 89 | "primary_title": "Title of reference", 90 | "first_authors": ["Marx, Karl", "Lindgren, Astrid"], 91 | "secondary_authors": ["Glattauer, Daniel"], 92 | "publication_year": "2014//", 93 | "notes_abstract": "BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.", # noqa: E501 94 | "keywords": ["Pippi", "Nordwind", "Piraten"], 95 | "alternate_title3": "Lorem", 96 | "alternate_title2": "lorem", 97 | "volume": "9", 98 | "number": "3", 99 | "start_page": "e0815", 100 | "place_published": "United States", 101 | "publisher": "Fun Factory", 102 | "issn": "1932-6208", 103 | "note": "1008150341", 104 | "file_attachments2": "http://example.com", 105 | "urls": ["http://example_url.com"], 106 | }, 107 | { 108 | "type_of_reference": "JOUR", 109 | "id": "12345", 110 | "primary_title": "The title of the reference", 111 | "first_authors": ["Marxus, Karlus", "Lindgren, Astrid"], 112 | "secondary_authors": ["Glattauer, Daniel"], 113 | "publication_year": "2006//", 114 | "notes_abstract": "BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.", # noqa: E501 115 | "keywords": ["Pippi Langstrumpf", "Nordwind", "Piraten"], 116 | "alternate_title3": "Lorem", 117 | "alternate_title2": "lorem", 118 | "volume": "6", 119 | "number": "3", 120 | "start_page": "e0815341", 121 | "place_published": "Germany", 122 | "publisher": "Dark Factory", 123 | "issn": "1732-4208", 124 | "note": "1228150341", 125 | "file_attachments2": "http://example2.com", 126 | "urls": ["http://example_url.com"], 127 | }, 128 | ] 129 | 130 | with open(filepath) as f: 131 | entries = rispy.load(f) 132 | assert expected == entries 133 | 134 | 135 | def test_load_example_extraneous_data_ris(): 136 | filepath = DATA_DIR / "example_extraneous_data.ris" 137 | expected = [ 138 | { 139 | "type_of_reference": "JOUR", 140 | "id": "12345", 141 | "primary_title": "Title of reference", 142 | "first_authors": ["Marx, Karl", "Lindgren, Astrid"], 143 | "secondary_authors": ["Glattauer, Daniel"], 144 | "publication_year": "2014//", 145 | "notes_abstract": "BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.", # noqa: E501 146 | "keywords": ["Pippi", "Nordwind", "Piraten"], 147 | "alternate_title3": "Lorem", 148 | "alternate_title2": "lorem", 149 | "volume": "9", 150 | "number": "3", 151 | "start_page": "e0815", 152 | "place_published": "United States", 153 | "publisher": "Fun Factory", 154 | "issn": "1932-6208", 155 | "note": "1008150341", 156 | "file_attachments2": "http://example.com", 157 | "urls": ["http://example_url.com"], 158 | }, 159 | { 160 | "type_of_reference": "JOUR", 161 | "id": "12345", 162 | "primary_title": "The title of the reference", 163 | "first_authors": ["Marxus, Karlus", "Lindgren, Astrid"], 164 | "secondary_authors": ["Glattauer, Daniel"], 165 | "publication_year": "2006//", 166 | "notes_abstract": "BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.", # noqa: E501 167 | "keywords": ["Pippi Langstrumpf", "Nordwind", "Piraten"], 168 | "alternate_title3": "Lorem", 169 | "alternate_title2": "lorem", 170 | "volume": "6", 171 | "number": "3", 172 | "start_page": "e0815341", 173 | "place_published": "Germany", 174 | "publisher": "Dark Factory", 175 | "issn": "1732-4208", 176 | "note": "1228150341", 177 | "file_attachments2": "http://example2.com", 178 | "urls": ["http://example_url.com"], 179 | }, 180 | ] 181 | 182 | with open(filepath) as f: 183 | entries = rispy.load(f) 184 | assert expected == entries 185 | 186 | 187 | def test_load_example_full_ris_without_whitespace(): 188 | # Parse files without whitespace after ER tag. 189 | # Resolves https://github.com/MrTango/rispy/pull/25 190 | 191 | filepath = DATA_DIR / "example_full_without_whitespace.ris" 192 | expected = [ 193 | { 194 | "type_of_reference": "JOUR", 195 | "id": "12345", 196 | "primary_title": "Title of reference", 197 | "first_authors": ["Marx, Karl", "Lindgren, Astrid"], 198 | "secondary_authors": ["Glattauer, Daniel"], 199 | "publication_year": "2014//", 200 | "notes_abstract": "BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.", # noqa: E501 201 | "keywords": ["Pippi", "Nordwind", "Piraten"], 202 | "alternate_title3": "Lorem", 203 | "alternate_title2": "lorem", 204 | "volume": "9", 205 | "number": "3", 206 | "start_page": "e0815", 207 | "place_published": "United States", 208 | "publisher": "Fun Factory", 209 | "issn": "1932-6208", 210 | "note": "1008150341", 211 | "file_attachments2": "http://example.com", 212 | "urls": ["http://example_url.com"], 213 | }, 214 | { 215 | "type_of_reference": "JOUR", 216 | "id": "12345", 217 | "primary_title": "The title of the reference", 218 | "first_authors": ["Marxus, Karlus", "Lindgren, Astrid"], 219 | "secondary_authors": ["Glattauer, Daniel"], 220 | "publication_year": "2006//", 221 | "notes_abstract": "BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.", # noqa: E501 222 | "keywords": ["Pippi Langstrumpf", "Nordwind", "Piraten"], 223 | "alternate_title3": "Lorem", 224 | "alternate_title2": "lorem", 225 | "volume": "6", 226 | "number": "3", 227 | "start_page": "e0815341", 228 | "place_published": "Germany", 229 | "publisher": "Dark Factory", 230 | "issn": "1732-4208", 231 | "note": "1228150341", 232 | "file_attachments2": "http://example2.com", 233 | "urls": ["http://example_url.com"], 234 | }, 235 | ] 236 | 237 | with open(filepath) as f: 238 | entries = rispy.load(f) 239 | assert expected == entries 240 | 241 | 242 | def test_load_single_unknown_tag_ris(): 243 | filepath = DATA_DIR / "example_single_unknown_tag.ris" 244 | expected = { 245 | "type_of_reference": "JOUR", 246 | "authors": ["Shannon,Claude E."], 247 | "year": "1948/07//", 248 | "title": "A Mathematical Theory of Communication", 249 | "alternate_title3": "Bell System Technical Journal", 250 | "start_page": "379", 251 | "end_page": "423", 252 | "volume": "27", 253 | "unknown_tag": {"JP": ["CRISPR", "Direct Current"]}, 254 | } 255 | 256 | with open(filepath) as f: 257 | entries = rispy.load(f) 258 | 259 | assert expected == entries[0] 260 | 261 | 262 | def test_load_multiple_unknown_tags_ris(): 263 | filepath = DATA_DIR / "example_multi_unknown_tags.ris" 264 | expected = { 265 | "type_of_reference": "JOUR", 266 | "authors": ["Shannon,Claude E."], 267 | "year": "1948/07//", 268 | "title": "A Mathematical Theory of Communication", 269 | "alternate_title3": "Bell System Technical Journal", 270 | "end_page": "423", 271 | "volume": "27", 272 | "unknown_tag": {"JP": ["CRISPR"], "DC": ["Direct Current"]}, 273 | } 274 | with open(filepath) as f: 275 | entries = rispy.load(f) 276 | assert expected == entries[0] 277 | 278 | 279 | def test_starting_newline(): 280 | fn = DATA_DIR / "example_starting_newlines.ris" 281 | with open(fn) as f: 282 | entries = rispy.load(f) 283 | assert len(entries) == 1 284 | 285 | 286 | def test_strip_bom(): 287 | expected = { 288 | "type_of_reference": "JOUR", 289 | "doi": "10.1186/s40981-020-0316-0", 290 | } 291 | 292 | filepath = DATA_DIR / "example_bom.ris" 293 | 294 | # we properly decode the content of this file as UTF-8, but leave the BOM 295 | with open(filepath, encoding="utf-8-sig") as f: 296 | entries = rispy.load(f) 297 | 298 | assert expected == entries[0] 299 | 300 | 301 | def test_wos_ris(): 302 | fn = DATA_DIR / "example_wos.ris" 303 | with open(fn) as f: 304 | entries = rispy.load(f, implementation=rispy.WokParser) 305 | 306 | assert len(entries) == 2 307 | 308 | title = "Interactions stabilizing the structure of the core light-harvesting complex (LHl) of photosynthetic bacteria and its subunit (B820)" # noqa: E501 309 | assert entries[0]["document_title"] == title 310 | 311 | title = "Proximal and distal influences on ligand binding kinetics in microperoxidase and heme model compounds" # noqa: E501 312 | assert entries[1]["document_title"] == title 313 | 314 | 315 | def test_unkown_skip(): 316 | filepath = DATA_DIR / "example_multi_unknown_tags.ris" 317 | expected = { 318 | "type_of_reference": "JOUR", 319 | "authors": ["Shannon,Claude E."], 320 | "year": "1948/07//", 321 | "title": "A Mathematical Theory of Communication", 322 | "alternate_title3": "Bell System Technical Journal", 323 | "end_page": "423", 324 | "volume": "27", 325 | } 326 | 327 | with open(filepath) as f: 328 | entries = rispy.load(f, skip_unknown_tags=True) 329 | assert expected == entries[0] 330 | 331 | 332 | def test_type_conversion(): 333 | refs = [ 334 | {"type_of_reference": "JOUR", "id": "12345", "primary_title": "Title of reference"}, 335 | { 336 | "type_of_reference": "BOOK", 337 | "id": "12345", 338 | "primary_title": "The title of the reference", 339 | }, 340 | {"type_of_reference": "Journal", "id": "12345", "primary_title": "Title of reference"}, 341 | {"type_of_reference": "TEST", "id": "12345", "primary_title": "Title of reference"}, 342 | ] 343 | 344 | # test conversion 345 | test1 = rispy.utils.convert_reference_types(refs) 346 | test1_types = [i["type_of_reference"] for i in test1] 347 | assert test1_types == [ 348 | "Journal", 349 | "Whole book", 350 | "Journal", 351 | "TEST", 352 | ] 353 | 354 | # test reverse 355 | test2 = rispy.utils.convert_reference_types(test1, reverse=True) 356 | assert test2[0:2] == refs[0:2] 357 | assert test2[3] == refs[3] 358 | assert test2[2]["type_of_reference"] == "JOUR" 359 | 360 | # test strict 361 | with pytest.raises(KeyError): 362 | rispy.utils.convert_reference_types(refs, strict=True) 363 | refs_clean = refs[0:3] 364 | test3 = rispy.utils.convert_reference_types(refs_clean, strict=True) 365 | 366 | # test strict in reverse 367 | test4 = rispy.utils.convert_reference_types(test3, strict=True, reverse=True) 368 | assert test4[0:2] == refs_clean[0:2] 369 | assert test4[2]["type_of_reference"] == "JOUR" 370 | 371 | 372 | def test_encodings(): 373 | p = DATA_DIR / "example_utf_chars.ris" 374 | 375 | with open(p, encoding="utf-8-sig") as file: 376 | expected = rispy.load(file) 377 | 378 | with pytest.raises(UnicodeDecodeError): 379 | rispy.load(p, encoding="cp1252") 380 | 381 | entries = rispy.load(p, encoding="utf-8-sig") 382 | 383 | assert entries == expected 384 | 385 | 386 | def test_list_tag_enforcement(): 387 | filepath = DATA_DIR / "example_custom_list_tags.ris" 388 | 389 | expected = { 390 | "type_of_reference": "JOUR", 391 | "authors": ["Marx, Karl", "Marxus, Karlus"], 392 | "issn": ["12345", "ABCDEFG", "666666"], 393 | } 394 | 395 | entries = rispy.load(filepath, enforce_list_tags=False, list_tags=[]) 396 | assert expected == entries[0] 397 | 398 | 399 | def test_url_tag(): 400 | filepath = DATA_DIR / "example_urls.ris" 401 | with open(filepath) as f: 402 | entries = rispy.load(f) 403 | 404 | assert len(entries) == 4 405 | assert entries[0]["urls"] == ["http://example.com"] 406 | assert entries[1]["urls"] == ["http://example.com", "http://www.example.com"] 407 | assert entries[2]["urls"] == ["http://example.com", "http://www.example.com"] 408 | assert entries[3]["urls"] == ["http://example.com", "http://www.example.com"] 409 | 410 | 411 | def test_empty_tag(): 412 | filepath = DATA_DIR / "example_empty_tag.ris" 413 | with open(filepath) as f: 414 | entries = rispy.load(f) 415 | 416 | assert len(entries) == 1 417 | assert entries[0]["number"] == "9" 418 | assert entries[0]["start_page"] == "" 419 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rispy.utils import invert_dictionary 4 | 5 | 6 | def test_invert_dictionary(): 7 | d = {"a": "b"} 8 | assert invert_dictionary(d) == {"b": "a"} 9 | 10 | 11 | def test_invert_dictionary_failure(): 12 | d = {"a": "b", "c": "b"} 13 | with pytest.raises(ValueError, match="Dictionary cannot be inverted"): 14 | invert_dictionary(d) 15 | -------------------------------------------------------------------------------- /tests/test_writer.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from pathlib import Path 3 | from typing import ClassVar 4 | 5 | import pytest 6 | 7 | import rispy 8 | 9 | DATA_DIR = Path(__file__).parent.resolve() / "data" 10 | 11 | 12 | @pytest.fixture 13 | def ris_data(): 14 | return [ 15 | { 16 | "type_of_reference": "JOUR", 17 | "authors": ["Shannon, Claude E.", "Doe, John"], 18 | "year": "1948/07//", 19 | "title": "A Mathematical Theory of Communication", 20 | "start_page": "379", 21 | "urls": ["https://example.com", "https://example2.com"], 22 | } 23 | ] 24 | 25 | 26 | def test_dump_and_load(): 27 | # check that we can write the same file we read 28 | source_fp = DATA_DIR / "example_full.ris" 29 | 30 | # read text 31 | actual = source_fp.read_text() 32 | 33 | # map to RIS structure and dump 34 | entries = rispy.loads(actual) 35 | export = rispy.dumps(entries) 36 | 37 | assert actual == export 38 | 39 | 40 | def test_dumps_multiple_unknown_tags_ris(tmp_path): 41 | fp = tmp_path / "test_dump_unknown_tags.ris" 42 | 43 | results = [{"title": "my-title", "abstract": "my-abstract", "does_not_exists": "test"}] 44 | 45 | # check that we get a warning 46 | with pytest.warns(UserWarning, match="label `does_not_exists` not exported"): 47 | with open(fp, "w") as f: 48 | rispy.dump(results, f) 49 | 50 | # check that we get everything back except missing key 51 | text = Path(fp).read_text() 52 | entries = rispy.loads(text) 53 | assert entries[0] == { 54 | "type_of_reference": "JOUR", 55 | "title": "my-title", 56 | "abstract": "my-abstract", 57 | } 58 | 59 | # check file looks as expected 60 | lines = text.splitlines() 61 | assert lines[0] == "1." 62 | assert lines[1] == "TY - JOUR" 63 | assert lines[4] == "ER - " 64 | assert len(lines) == 5 65 | 66 | 67 | def test_custom_list_tags(): 68 | filepath = DATA_DIR / "example_custom_list_tags.ris" 69 | list_tags = deepcopy(rispy.LIST_TYPE_TAGS) 70 | list_tags.append("SN") 71 | 72 | expected = { 73 | "type_of_reference": "JOUR", 74 | "authors": ["Marx, Karl", "Marxus, Karlus"], 75 | "issn": ["12345", "ABCDEFG", "666666"], 76 | } 77 | 78 | actual = filepath.read_text() 79 | 80 | entries = rispy.loads(actual, list_tags=list_tags) 81 | assert expected == entries[0] 82 | 83 | export = rispy.dumps(entries, list_tags=list_tags) 84 | assert export == actual 85 | 86 | 87 | def test_skip_unknown_tags(): 88 | entries = [ 89 | { 90 | "type_of_reference": "JOUR", 91 | "authors": ["Marx, Karl", "Marxus, Karlus"], 92 | "issn": "12222", 93 | "unknown_tag": {"JP": ["CRISPR"], "DC": ["Direct Current"]}, 94 | } 95 | ] 96 | expected = [ 97 | { 98 | "type_of_reference": "JOUR", 99 | "authors": ["Marx, Karl", "Marxus, Karlus"], 100 | "issn": "12222", 101 | } 102 | ] 103 | 104 | export = rispy.dumps(entries, skip_unknown_tags=True) 105 | reload = rispy.loads(export) 106 | 107 | assert reload == expected 108 | 109 | 110 | def test_writing_all_list_tags(): 111 | expected = [ 112 | { 113 | "type_of_reference": "JOUR", 114 | "authors": ["Marx, Karl", "Marxus, Karlus"], 115 | "issn": ["12345", "ABCDEFG", "666666"], 116 | } 117 | ] 118 | 119 | export = rispy.dumps(expected, enforce_list_tags=False, list_tags=[]) 120 | entries = rispy.loads(export, list_tags=["AU", "SN"]) 121 | assert expected == entries 122 | 123 | 124 | def test_file_implementation_write(): 125 | class CustomParser(rispy.RisParser): 126 | DEFAULT_IGNORE: ClassVar[list[str]] = ["JF", "ID", "KW"] 127 | 128 | class CustomWriter(rispy.RisWriter): 129 | DEFAULT_IGNORE: ClassVar[list[str]] = ["JF", "ID", "KW"] 130 | 131 | list_tags = ["SN", "T1", "A1", "UR"] 132 | 133 | fn = DATA_DIR / "example_full.ris" 134 | with open(fn) as f: 135 | entries = rispy.load(f, implementation=CustomParser, list_tags=list_tags) 136 | 137 | fn_write = DATA_DIR / "example_full_write.ris" 138 | 139 | with open(fn_write, "w") as f: 140 | rispy.dump(entries, f, implementation=CustomWriter, list_tags=list_tags) 141 | 142 | with open(fn_write) as f: 143 | reload = rispy.load(f, implementation=CustomParser, list_tags=list_tags) 144 | 145 | assert reload == entries 146 | 147 | 148 | def test_write_single_unknown_tag(ris_data): 149 | ris_data[0]["unknown_tag"] = {"JP": ["CRISPR"]} 150 | text_output = rispy.dumps(ris_data) 151 | # check output is as expected 152 | lines = text_output.splitlines() 153 | assert lines[9] == "JP - CRISPR" 154 | assert len(lines) == 11 155 | 156 | 157 | def test_write_multiple_unknown_tag_same_type(ris_data): 158 | ris_data[0]["unknown_tag"] = {"JP": ["CRISPR", "PEOPLE"]} 159 | text_output = rispy.dumps(ris_data) 160 | 161 | # check output is as expected 162 | lines = text_output.splitlines() 163 | assert lines[9] == "JP - CRISPR" 164 | assert lines[10] == "JP - PEOPLE" 165 | assert len(lines) == 12 166 | 167 | 168 | def test_write_multiple_unknown_tag_diff_type(ris_data): 169 | ris_data[0]["unknown_tag"] = {"JP": ["CRISPR"], "ED": ["Swinburne, Ricardo"]} 170 | text_output = rispy.dumps(ris_data) 171 | 172 | # check output is as expected 173 | lines = text_output.splitlines() 174 | assert lines[9] == "JP - CRISPR" 175 | assert lines[10] == "ED - Swinburne, Ricardo" 176 | assert len(lines) == 12 177 | 178 | 179 | def test_default_dump(ris_data): 180 | text_output = rispy.dumps(ris_data) 181 | lines = text_output.splitlines() 182 | assert lines[2] == "AU - Shannon, Claude E." 183 | assert lines[3] == "AU - Doe, John" 184 | assert lines[7] == "UR - https://example.com" 185 | assert lines[8] == "UR - https://example2.com" 186 | assert len(lines) == 10 187 | 188 | 189 | def test_delimited_dump(ris_data): 190 | # remove URLs from list_tags and give it a custom delimiter 191 | text_output = rispy.dumps(ris_data, list_tags=["AU"], delimiter_tags_mapping={"UR": ","}) 192 | 193 | # check output is as expected 194 | lines = text_output.splitlines() 195 | assert lines[2] == "AU - Shannon, Claude E." 196 | assert lines[3] == "AU - Doe, John" 197 | assert lines[7] == "UR - https://example.com,https://example2.com" 198 | assert len(lines) == 9 199 | 200 | 201 | def test_dump_path(tmp_path, ris_data): 202 | # check that dump works with a Path object 203 | path = tmp_path / "file.ris" 204 | rispy.dump(ris_data, path) 205 | assert len(path.read_text()) > 0 206 | 207 | 208 | def test_bad_dump(ris_data): 209 | with pytest.raises(ValueError, match="File must be a file-like object or a Path object"): 210 | rispy.dump(ris_data, 123) # type: ignore 211 | --------------------------------------------------------------------------------