├── .github
    └── workflows
    │   ├── main.yml
    │   └── publish.yml
├── .gitignore
├── HISTORY.md
├── LICENSE
├── README.md
├── pyproject.toml
├── rispy
    ├── __init__.py
    ├── config.py
    ├── parser.py
    ├── utils.py
    └── writer.py
└── tests
    ├── data
        ├── example_basic.ris
        ├── example_bom.ris
        ├── example_custom_list_tags.ris
        ├── example_empty_tag.ris
        ├── example_extraneous_data.ris
        ├── example_full.ris
        ├── example_full_without_whitespace.ris
        ├── example_full_write.ris
        ├── example_multi_unknown_tags.ris
        ├── example_multiline.ris
        ├── example_single_unknown_tag.ris
        ├── example_starting_newlines.ris
        ├── example_urls.ris
        ├── example_utf_chars.ris
        └── example_wos.ris
    ├── test_benchmark.py
    ├── test_parser.py
    ├── test_utils.py
    └── test_writer.py


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |     tags:
 9 |       - '*'
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 | 
14 |   test:
15 |     name: test
16 |     runs-on: ubuntu-latest
17 |     strategy:
18 |       max-parallel: 5
19 |       matrix:
20 |         python-version: [ "3.9", "3.10", "3.11", "3.12" , "3.13" ]
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - uses: actions/setup-python@v5
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install uv
27 |       uses: astral-sh/setup-uv@v5
28 |       with:
29 |         enable-cache: true
30 |         cache-dependency-glob: "**/pyproject.toml"
31 |     - name: Install dependencies
32 |       run: uv sync --all-extras
33 |     - name: Test with pytest
34 |       run: uv run poe test
35 | 
36 |   coverage:
37 |     name: test + lint + coverage
38 |     runs-on: ubuntu-latest
39 |     steps:
40 |     - uses: actions/checkout@v4
41 |     - uses: actions/setup-python@v5
42 |       with:
43 |         python-version: "3.13"
44 |     - name: Install uv
45 |       uses: astral-sh/setup-uv@v5
46 |       with:
47 |         enable-cache: true
48 |         cache-dependency-glob: "**/pyproject.toml"
49 |     - name: Install dependencies
50 |       run: uv sync --all-extras
51 |     - name: Check linting
52 |       run: uv run poe lint
53 |     - name: Test with pytest
54 |       run: uv run coverage run -m pytest --benchmark-skip
55 |     - name: Generate coverage report
56 |       run: |
57 |         echo "# Coverage Report" >> $GITHUB_STEP_SUMMARY
58 |         uv run coverage report --format=markdown >> $GITHUB_STEP_SUMMARY || true
59 |         uv run coverage json -q  # will cause pipeline failure if coverage < minimum
60 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*'
 7 | 
 8 | jobs:
 9 |   pypi-publish:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       id-token: write
13 |     steps:
14 |     - uses: actions/checkout@v4
15 |     - uses: actions/setup-python@v5
16 |       with:
17 |         python-version: "3.13"
18 |     - name: Install uv
19 |       uses: astral-sh/setup-uv@v5
20 |       with:
21 |         enable-cache: true
22 |         cache-dependency-glob: "**/pyproject.toml"
23 |     - name: Install dependencies
24 |       run: uv sync --all-extras
25 |     - name: Build wheel and tar.gz
26 |       run: uv run poe build
27 |     - name: Publish Package
28 |       uses: pypa/gh-action-pypi-publish@release/v1
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # python/editors
 2 | *.codeintel
 3 | *.egg
 4 | *.eggs
 5 | *.egg-info/
 6 | *.mo
 7 | *.nja
 8 | *.py[co]
 9 | .benchmarks
10 | .cache
11 | .coverage
12 | .idea
13 | .mypy_cache
14 | .ruff_cache
15 | .ropeproject
16 | .tmp
17 | /.Python
18 | /.installed.cfg
19 | /.mr.developer.cfg
20 | /.project
21 | /.pydevproject
22 | build
23 | htmlcov
24 | dist
25 | venv
26 | 
27 | # created from tests
28 | export.ris
29 | 


--------------------------------------------------------------------------------
/HISTORY.md:
--------------------------------------------------------------------------------
  1 | # History
  2 | 
  3 | ## v0.10.0 (2025-05-23)
  4 | 
  5 | * Improve performance of rispy's parser and refactor parser
  6 | * Add support for Python 3.13 and remove Python 3.8
  7 | * Revert strip UTF-8 BOM strip
  8 | * (dev) Add benchmark for rispy
  9 | * (dev) Switch from Makefile to poethepoet
 10 | * (dev) increase test coverage to 99.5%
 11 | * (dev) GitHub publish package to PyPI
 12 | 
 13 | ## v0.9.0 (2024-01-17)
 14 | 
 15 | * Improve performance to yield from file objects instead of loading into memory at once (@scott-8 #57)
 16 | * Support Python 3.12
 17 | * (dev) Remove black; use ruff format instead
 18 | * (dev) Rewrite restructured text docs to markdown
 19 | 
 20 | ## v0.8.1 (2023-07-17)
 21 | 
 22 | * Update RIS exporter to optionally write list tags or delimited single tags (@scott-8 #55)
 23 | 
 24 | ## v0.8.0 (2023-07-13)
 25 | 
 26 | Breaking changes:
 27 | 
 28 | * Update minimum python version from 3.6 to 3.8
 29 | * Improve URL parsing to be more robust and consistent with the spec; saved as a plural "urls" dictionary key instead of the singular "url" (@scott-8/shapiromatron #52)
 30 | * Throw a `rispy.paser.ParseError` instead of a IOError for invalid parsing (@shapiromatron #54)
 31 | 
 32 | Additional updates:
 33 | 
 34 | * Write RIS unknown tags (@simon-20 #50)
 35 | 
 36 | Tooling updates:
 37 | 
 38 | * Support and test python 3.8 through 3.11
 39 | * Update black
 40 | * Switch to ruff from flake8 + isort
 41 | * Switch to flit
 42 | * Add basic coverage reports to github actions
 43 | 
 44 | ## v0.7.1 (2021-06-01)
 45 | 
 46 | * README.rst formatting fixes
 47 | 
 48 | ## v0.7.0 (2021-06-01)
 49 | 
 50 | New features:
 51 | 
 52 | * Allow for subclassing of readers and writers for custom implementations and greater flexibility; these custom classes can be used in all high-level commands (load/loads/dump/dumps)  (@scott-8 #36)
 53 | * Add encoding param to rispy.load if custom file encoding is needed (@scott-8 #36)
 54 | * Add convenience method to pretty-print reference type (@scott-8 #37)
 55 | * Updated setup.py and build tooling to use setup.cfg; use wheel for testing in github actions (@KOLANICH #34)
 56 | * Relicense to MIT (@shapiromatron #43)
 57 | * Support python versions 3.6, 3.7, 3.8, and 3.9 (@shapiromatron #44)
 58 | * Changed primary branch from `master` to `main`
 59 | 
 60 | ## v0.6.0 (2020-11-04)
 61 | 
 62 | New features:
 63 | 
 64 | * Add new optional `strict=True` parameter to rispy.load/loads to allow parsing of RIS files with comments or additional metadata which aren't allowed/forbidden in spec (@ShreyRavi)
 65 | * Allow pathlib.Path objects in rispy.load in addition to file objects
 66 | * Enable multiple python environments in github test matrix (python 3.6, 3.7, and 3.8)
 67 | 
 68 | ## v0.5.1 (2020-09-29)
 69 | 
 70 | New features:
 71 | 
 72 | * Strip BOM before processing records
 73 | * Accept ER tag without trailing whitespace
 74 | 
 75 | ## v0.5 (2020-02-21)
 76 | 
 77 | New features:
 78 | 
 79 | * Rename the package from `RISpy` to `rispy` (PEP8 https://www.python.org/dev/peps/pep-0008/#package-and-module-names)
 80 | * Added the ability to write RIS files (via `dump`) in addition to read (@J535D165)
 81 | * Code formatting rules via black and flake8
 82 | * All methods by default return an evaluated list of references, not a generator (to be consistent w/ load/dump behavior)
 83 | * Github actions - code formatting check and unit-tests
 84 | 
 85 | Breaking changes:
 86 | 
 87 | * Rename package from `RISparser` to `rispy`
 88 | * Revise API for reading RIS files to mirror python APIs (like `json`, `pickle`)
 89 | * `SE` RIS key mapped to `section` instead of `version` (per [wikipedia](https://en.wikipedia.org/wiki/RIS_(file_format)))
 90 | * `NV` RIS key mapped to `number_of_volumes` instead of `number_of_Volumes`
 91 | * `N2` RIS key mapped to `notes_abstract` instead of `abstract`
 92 | * Python ≥ 3.6 required
 93 | 
 94 | ## v0.4.3 (2018-04-10)
 95 | 
 96 | * Allow for blank lines at beginning of input file [fixes #3]
 97 | 
 98 | ## v0.4.2 (2017-05-29)
 99 | 
100 | * parser saves unknown tags into an `unknown_tag` key in dict
101 | * python2/3 compatible
102 | * Notes (N1) is now a ListType
103 | * Documented testing with pytest
104 | * Remove unused dependency peppercorn
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 rispy authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # rispy - an RIS file parser/writer for Python
  2 | 
  3 | [![PyPI Version](https://badge.fury.io/py/rispy.svg)](https://pypi.org/project/rispy/)
  4 | [![PyPI Downloads](https://img.shields.io/pypi/dm/rispy)](https://pypistats.org/packages/rispy)
  5 | 
  6 | A Python reader/writer of [RIS](https://en.wikipedia.org/wiki/RIS_(file_format)) reference files.
  7 | 
  8 | *Pronunciation* - `rispee` - like "crispy", but without the c.
  9 | 
 10 | ## Usage
 11 | 
 12 | Parsing:
 13 | 
 14 | ```python
 15 | >>> import rispy
 16 | >>> filepath = 'tests/data/example_full.ris'
 17 | >>> with open(filepath, 'r') as bibliography_file:
 18 | ...     entries = rispy.load(bibliography_file)
 19 | ...     for entry in entries:
 20 | ...         print(entry['id'])
 21 | ...         print(entry['first_authors'])
 22 | 12345
 23 | ['Marx, Karl', 'Lindgren, Astrid']
 24 | 12345
 25 | ['Marxus, Karlus', 'Lindgren, Astrid']
 26 | 
 27 | ```
 28 | 
 29 | A file path can also be used to read RIS files. If an encoding is not specified in ``load``, the default system encoding
 30 | will be used.
 31 | 
 32 | ```python
 33 | >>> from pathlib import Path
 34 | >>> import rispy
 35 | >>> p = Path('tests', 'data', 'example_utf_chars.ris')
 36 | >>> entries = rispy.load(p, encoding='utf-8-sig')
 37 | >>> for entry in entries:
 38 | ...     print(entry['authors'][0])
 39 | Dobrokhotova, Yu E.
 40 | 
 41 | ```
 42 | 
 43 | Writing:
 44 | 
 45 | ```python
 46 | >>> import rispy
 47 | >>> entries = [
 48 | ... {'type_of_reference': 'JOUR',
 49 | ...  'id': '42',
 50 | ...  'primary_title': 'The title of the reference',
 51 | ...  'first_authors': ['Marxus, Karlus', 'Lindgren, Astrid']
 52 | ...  },{
 53 | ... 'type_of_reference': 'JOUR',
 54 | ...  'id': '43',
 55 | ...  'primary_title': 'Reference 43',
 56 | ...  'abstract': 'Lorem ipsum'
 57 | ...  }]
 58 | >>> filepath = 'export.ris'
 59 | >>> with open(filepath, 'w') as bibliography_file:
 60 | ...     rispy.dump(entries, bibliography_file)
 61 | 
 62 | ```
 63 | 
 64 | ## Example RIS entry
 65 | 
 66 | ```text
 67 |    1.
 68 |    TY  - JOUR
 69 |    ID  - 12345
 70 |    T1  - Title of reference
 71 |    A1  - Marx, Karl
 72 |    A1  - Lindgren, Astrid
 73 |    A2  - Glattauer, Daniel
 74 |    Y1  - 2014//
 75 |    N2  - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
 76 |    KW  - Pippi
 77 |    KW  - Nordwind
 78 |    KW  - Piraten
 79 |    JF  - Lorem
 80 |    JA  - lorem
 81 |    VL  - 9
 82 |    IS  - 3
 83 |    SP  - e0815
 84 |    CY  - United States
 85 |    PB  - Fun Factory
 86 |    PB  - Fun Factory USA
 87 |    SN  - 1932-6208
 88 |    M1  - 1008150341
 89 |    L2  - http://example.com
 90 |    ER  -
 91 | ```
 92 | 
 93 | ## TAG_KEY_MAPPING
 94 | 
 95 | Most fields contain string values, but some like first_authors (A1) are parsed into lists. The default mapping is
 96 | created from specifications scattered around the web, but to our knowledge there is not one single source of RIS truth,
 97 | so these may need to be modified for specific export systems:
 98 | 
 99 | - [Wikipedia](https://en.wikipedia.org/wiki/RIS_(file_format))
100 | - [ResearcherId](https://web.archive.org/web/20170707033254/http://www.researcherid.com/resources/html/help_upload.htm)
101 | - [Refman](https://web.archive.org/web/20110930172154/http://www.refman.com/support/risformat_intro.asp)
102 | - [Refman (RIS format)](https://web.archive.org/web/20110930172154/http://www.refman.com/support/risformat_intro.asp)
103 | - [Zotero](https://github.com/zotero/translators/blob/master/RIS.js)
104 | 
105 | ### Complete list of ListType tags
106 | 
107 | ```python
108 | >>> from rispy import LIST_TYPE_TAGS
109 | >>> print(LIST_TYPE_TAGS)
110 | ['A1', 'A2', 'A3', 'A4', 'AU', 'KW', 'N1', 'UR']
111 | 
112 | ```
113 | 
114 | ### Complete default mapping
115 | 
116 | ```python
117 | >>> from rispy import TAG_KEY_MAPPING
118 | >>> from pprint import pprint
119 | >>> pprint(TAG_KEY_MAPPING)
120 | {'A1': 'first_authors',
121 |  'A2': 'secondary_authors',
122 |  'A3': 'tertiary_authors',
123 |  'A4': 'subsidiary_authors',
124 |  'AB': 'abstract',
125 |  'AD': 'author_address',
126 |  'AN': 'accession_number',
127 |  'AU': 'authors',
128 |  'C1': 'custom1',
129 |  'C2': 'custom2',
130 |  'C3': 'custom3',
131 |  'C4': 'custom4',
132 |  'C5': 'custom5',
133 |  'C6': 'custom6',
134 |  'C7': 'custom7',
135 |  'C8': 'custom8',
136 |  'CA': 'caption',
137 |  'CN': 'call_number',
138 |  'CY': 'place_published',
139 |  'DA': 'date',
140 |  'DB': 'name_of_database',
141 |  'DO': 'doi',
142 |  'DP': 'database_provider',
143 |  'EP': 'end_page',
144 |  'ER': 'end_of_reference',
145 |  'ET': 'edition',
146 |  'ID': 'id',
147 |  'IS': 'number',
148 |  'J2': 'alternate_title1',
149 |  'JA': 'alternate_title2',
150 |  'JF': 'alternate_title3',
151 |  'JO': 'journal_name',
152 |  'KW': 'keywords',
153 |  'L1': 'file_attachments1',
154 |  'L2': 'file_attachments2',
155 |  'L4': 'figure',
156 |  'LA': 'language',
157 |  'LB': 'label',
158 |  'M1': 'note',
159 |  'M3': 'type_of_work',
160 |  'N1': 'notes',
161 |  'N2': 'notes_abstract',
162 |  'NV': 'number_of_volumes',
163 |  'OP': 'original_publication',
164 |  'PB': 'publisher',
165 |  'PY': 'year',
166 |  'RI': 'reviewed_item',
167 |  'RN': 'research_notes',
168 |  'RP': 'reprint_edition',
169 |  'SE': 'section',
170 |  'SN': 'issn',
171 |  'SP': 'start_page',
172 |  'ST': 'short_title',
173 |  'T1': 'primary_title',
174 |  'T2': 'secondary_title',
175 |  'T3': 'tertiary_title',
176 |  'TA': 'translated_author',
177 |  'TI': 'title',
178 |  'TT': 'translated_title',
179 |  'TY': 'type_of_reference',
180 |  'UK': 'unknown_tag',
181 |  'UR': 'urls',
182 |  'VL': 'volume',
183 |  'Y1': 'publication_year',
184 |  'Y2': 'access_date'}
185 | 
186 | ```
187 | 
188 | ### Override key mapping
189 | 
190 | The parser uses a `TAG_KEY_MAPPING`, which one can override by calling `rispy.load()` with the `mapping` parameter.
191 | 
192 | ```python
193 | >>> from copy import deepcopy
194 | >>> import rispy
195 | >>> from pprint import pprint
196 | 
197 | >>> filepath = 'tests/data/example_full.ris'
198 | >>> mapping = deepcopy(rispy.TAG_KEY_MAPPING)
199 | >>> mapping["SP"] = "pages_this_is_my_fun"
200 | >>> with open(filepath, 'r') as bibliography_file:
201 | ...     entries = rispy.load(bibliography_file, mapping=mapping)
202 | ...     pprint(sorted(entries[0].keys()))
203 | ['alternate_title2',
204 |  'alternate_title3',
205 |  'file_attachments2',
206 |  'first_authors',
207 |  'id',
208 |  'issn',
209 |  'keywords',
210 |  'note',
211 |  'notes_abstract',
212 |  'number',
213 |  'pages_this_is_my_fun',
214 |  'place_published',
215 |  'primary_title',
216 |  'publication_year',
217 |  'publisher',
218 |  'secondary_authors',
219 |  'type_of_reference',
220 |  'urls',
221 |  'volume']
222 | 
223 | ```
224 | 
225 | List tags can be customized in the same way, by passing a list to the `list_tags` parameter.
226 | 
227 | ### Changing rispy behavior
228 | 
229 | There are a few flags that can be passed to `rispy.load()` and `rispy.dump()` that change how `rispy` deals with tags.
230 | For example, setting `skip_unknown_tags` to `True` will cause `rispy` do not read or write tags not in the tag map. More
231 | can be found in the docstrings for each class. If more customization is necessary, a custom implementation can be
232 | created (see next section).
233 | 
234 | ## Using custom implementations
235 | 
236 | Not all RIS files follow the same formatting guidelines. There is an interface for creating custom implementations for
237 | reading and writing such files. An implementation contains the methods and parameters used to work with RIS files, and
238 | should be passed to `rispy.load()` or `rispy.dump()`.
239 | 
240 | ### Customizing implementations
241 | 
242 | Creating a custom implementation involves creating a class that inherits a base class, and overriding the necessary
243 | variables and methods. One of the existing parsers can also be inherited. Inheriting an existing class is advantageous
244 | if only minor changes need to be made. The sections below document what is available to be overridden, along with a few
245 | examples.
246 | 
247 | #### Parsing
248 | 
249 | Custom parsers can inherit `RisParser` (the default parser). Various parameters and methods can be overridden when creating a new parser.
250 | 
251 | Examples:
252 | 
253 | ```python
254 | class WokParser(RisParser):
255 |       """Subclass of Base for reading Wok RIS files."""
256 | 
257 |       START_TAG = "PT"
258 |       IGNORE = ["FN", "VR", "EF"]
259 |       PATTERN = r"^[A-Z][A-Z0-9] |^ER\s?|^EF\s?"
260 |       DEFAULT_MAPPING = WOK_TAG_KEY_MAPPING
261 |       DEFAULT_LIST_TAGS = WOK_LIST_TYPE_TAGS
262 | 
263 |       def get_content(self, line):
264 |          return line[2:].strip()
265 | 
266 |       def is_header(self, line):
267 |          return True
268 | 
269 | ```
270 | 
271 | ### Writing
272 | 
273 | Writing is very similar to parsing. A custom writer class can inherit `BaseWriter` or one if its subclasses, such as
274 | `RisWriter`.
275 | 
276 | Examples:
277 | 
278 | ```python
279 | class RisWriter(BaseWriter):
280 |       """Subclass of BaseWriter for writing RIS files."""
281 | 
282 |       START_TAG = "TY"
283 |       PATTERN = "{tag}  - {value}"
284 |       DEFAULT_MAPPING = TAG_KEY_MAPPING
285 |       DEFAULT_LIST_TAGS = LIST_TYPE_TAGS
286 | 
287 |       def set_header(self, count):
288 |          return "{i}.".format(i=count)
289 | 
290 | ```
291 | 
292 | ## Other functionality
293 | 
294 | Other various utilities included in `rispy` are documented below.
295 | 
296 | ### Reference type conversion
297 | 
298 | A method is available to convert common RIS reference types into more readable terms. It takes a list of references and
299 | returns a copy of that list with modified reference types. The map for this conversion is located in ``config.py``.
300 | 
301 | ```python
302 | >>> from rispy.utils import convert_reference_types
303 | >>> refs = [{"type_of_reference": "JOUR"}]
304 | >>> print(convert_reference_types(refs))
305 | [{'type_of_reference': 'Journal'}]
306 | 
307 | ```
308 | 
309 | ## Software for other RIS-like formats
310 | 
311 | Some RIS-like formats contain rich citation data, for example lists and nested attributes, that `rispy` does not
312 | support. Software specializing in these formats includes:
313 | 
314 | * [nbib](https://pypi.org/project/nbib/) - parses the "PubMed" or "MEDLINE" format
315 | 
316 | ## Developer instructions
317 | 
318 | Install [uv](https://docs.astral.sh/uv/) and make it available and on your path. Then:
319 | 
320 | ```bash
321 | # setup environment
322 | uv venv --python=3.13
323 | source .venv/bin/activate  # On Windows: .venv\Scripts\activate
324 | uv pip install -e ".[dev]"
325 | 
326 | # list available tasks
327 | poe
328 | 
329 | # check if code format changes are required
330 | poe lint
331 | 
332 | # reformat code
333 | poe format
334 | 
335 | # run tests
336 | poe test
337 | 
338 | # run benchmark tests
339 | poe bench
340 | ```
341 | 
342 | If you'd prefer not to use `uv`, that's fine too; this is a standard Python package so feel free to use your
343 | preferred workflow.
344 | 
345 | Github Actions are currently enabled to run `lint` and `test` when submitting a pull-request.
346 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "rispy"
  3 | license = { file = "LICENSE" }
  4 | authors = [
  5 |     { name = "Maik Derstappen", email = "md@derico.de" }
  6 | ]
  7 | maintainers = [
  8 |     { name = "Andy Shapiro", email = "shapiromatron@gmail.com" }
  9 | ]
 10 | readme = "README.md"
 11 | dynamic = ["version", "description"]
 12 | keywords = ["RIS", "parser", "bibliograph"]
 13 | classifiers = [
 14 |     "Intended Audience :: Developers",
 15 |     "License :: OSI Approved :: MIT License",
 16 |     "Programming Language :: Python :: 3",
 17 |     "Programming Language :: Python :: 3.9",
 18 |     "Programming Language :: Python :: 3.10",
 19 |     "Programming Language :: Python :: 3.11",
 20 |     "Programming Language :: Python :: 3.12",
 21 |     "Programming Language :: Python :: 3.13",
 22 | ]
 23 | requires-python = ">=3.9"
 24 | 
 25 | [project.urls]
 26 | "Source" = "https://github.com/mrtango/rispy"
 27 | "Changes" = "https://github.com/MrTango/rispy/blob/main/HISTORY.md"
 28 | "Issue Tracker" = "https://github.com/MrTango/rispy/issues"
 29 | "Download" = "https://pypi.org/project/rispy/"
 30 | 
 31 | [project.optional-dependencies]
 32 | dev = [
 33 |     "poethepoet ~= 0.34.0",
 34 |     "pytest ~=8.3.5",
 35 |     "pytest-benchmark ~= 5.1.0",
 36 |     "flit ~= 3.12.0",
 37 |     "ruff ~= 0.11.10",
 38 |     "coverage ~= 7.8.0",
 39 | ]
 40 | 
 41 | [build-system]
 42 | requires = ["flit_core >=3.2,<4"]
 43 | build-backend = "flit_core.buildapi"
 44 | 
 45 | [tool.coverage.run]
 46 | omit = [
 47 |     "tests/*",
 48 | ]
 49 | 
 50 | [tool.coverage.report]
 51 | fail_under=99.5
 52 | precision=1
 53 | exclude_also = [
 54 |   "@abstractmethod",
 55 | ]
 56 | 
 57 | [tool.flit.sdist]
 58 | exclude = [".github", "tests"]
 59 | 
 60 | [tool.ruff]
 61 | line-length = 100
 62 | target-version = "py39"
 63 | 
 64 | [tool.ruff.lint]
 65 | select = ["F", "E", "W", "I", "UP", "S", "B", "T20", "RUF"]
 66 | 
 67 | [tool.ruff.lint.per-file-ignores]
 68 | "test_*.py" = ["S101"]
 69 | 
 70 | [tool.pytest.ini_options]
 71 | addopts = "--doctest-glob='*.md'"
 72 | 
 73 | [tool.poe.tasks.lint]
 74 | help = "Check for formatting issues"
 75 | sequence = [
 76 |   {cmd = "ruff format . --check"},
 77 |   {cmd = "ruff check ."},
 78 | ]
 79 | 
 80 | [tool.poe.tasks.format]
 81 | help = "Fix formatting issues (where possible)"
 82 | sequence = [
 83 |   {cmd = "ruff format ."},
 84 |   {cmd = "ruff check . --fix --show-fixes"},
 85 | ]
 86 | 
 87 | [tool.poe.tasks.test]
 88 | help = "Run tests"
 89 | cmd = "pytest --benchmark-skip"
 90 | 
 91 | [tool.poe.tasks.bench]
 92 | help = "Run benchmark tests"
 93 | cmd = "pytest --benchmark-only"
 94 | 
 95 | [tool.poe.tasks.coverage]
 96 | help = "Generate test coverage report"
 97 | sequence = [
 98 |   {cmd = "coverage run -m pytest --benchmark-skip"},
 99 |   {cmd = "coverage html"},
100 | ]
101 | 
102 | [tool.poe.tasks.build]
103 | help = "Build wheel package"
104 | cmd = "uv build"
105 | 


--------------------------------------------------------------------------------
/rispy/__init__.py:
--------------------------------------------------------------------------------
 1 | """A Python reader/writer of RIS reference files"""
 2 | 
 3 | from .config import LIST_TYPE_TAGS, TAG_KEY_MAPPING, TYPE_OF_REFERENCE_MAPPING
 4 | from .parser import RisParser, WokParser, load, loads
 5 | from .writer import BaseWriter, RisWriter, dump, dumps
 6 | 
 7 | __version__ = "0.10.0"
 8 | 
 9 | __all__ = [
10 |     "LIST_TYPE_TAGS",
11 |     "TAG_KEY_MAPPING",
12 |     "TYPE_OF_REFERENCE_MAPPING",
13 |     "BaseWriter",
14 |     "RisParser",
15 |     "RisWriter",
16 |     "WokParser",
17 |     "__version__",
18 |     "dump",
19 |     "dumps",
20 |     "load",
21 |     "loads",
22 | ]
23 | 


--------------------------------------------------------------------------------
/rispy/config.py:
--------------------------------------------------------------------------------
  1 | """Define default mappings."""
  2 | 
  3 | LIST_TYPE_TAGS = [
  4 |     "A1",
  5 |     "A2",
  6 |     "A3",
  7 |     "A4",
  8 |     "AU",
  9 |     "KW",
 10 |     "N1",
 11 |     "UR",
 12 | ]
 13 | 
 14 | DELIMITED_TAG_MAPPING = {
 15 |     "UR": ";",
 16 | }
 17 | 
 18 | TAG_KEY_MAPPING = {
 19 |     "TY": "type_of_reference",
 20 |     "A1": "first_authors",  # ListType
 21 |     "A2": "secondary_authors",  # ListType
 22 |     "A3": "tertiary_authors",  # ListType
 23 |     "A4": "subsidiary_authors",  # ListType
 24 |     "AB": "abstract",
 25 |     "AD": "author_address",
 26 |     "AN": "accession_number",
 27 |     "AU": "authors",  # ListType
 28 |     "C1": "custom1",
 29 |     "C2": "custom2",
 30 |     "C3": "custom3",
 31 |     "C4": "custom4",
 32 |     "C5": "custom5",
 33 |     "C6": "custom6",
 34 |     "C7": "custom7",
 35 |     "C8": "custom8",
 36 |     "CA": "caption",
 37 |     "CN": "call_number",
 38 |     "CY": "place_published",
 39 |     "DA": "date",
 40 |     "DB": "name_of_database",
 41 |     "DO": "doi",
 42 |     "DP": "database_provider",
 43 |     "ET": "edition",
 44 |     "EP": "end_page",
 45 |     "ID": "id",
 46 |     "IS": "number",
 47 |     "J2": "alternate_title1",
 48 |     "JA": "alternate_title2",
 49 |     "JF": "alternate_title3",
 50 |     "JO": "journal_name",
 51 |     "KW": "keywords",  # ListType
 52 |     "L1": "file_attachments1",
 53 |     "L2": "file_attachments2",
 54 |     "L4": "figure",
 55 |     "LA": "language",
 56 |     "LB": "label",
 57 |     "M1": "note",
 58 |     "M3": "type_of_work",
 59 |     "N1": "notes",  # ListType
 60 |     "N2": "notes_abstract",
 61 |     "NV": "number_of_volumes",
 62 |     "OP": "original_publication",
 63 |     "PB": "publisher",
 64 |     "PY": "year",
 65 |     "RI": "reviewed_item",
 66 |     "RN": "research_notes",
 67 |     "RP": "reprint_edition",
 68 |     "SE": "section",
 69 |     "SN": "issn",
 70 |     "SP": "start_page",
 71 |     "ST": "short_title",
 72 |     "T1": "primary_title",
 73 |     "T2": "secondary_title",
 74 |     "T3": "tertiary_title",
 75 |     "TA": "translated_author",
 76 |     "TI": "title",
 77 |     "TT": "translated_title",
 78 |     "UR": "urls",  # ListType
 79 |     "VL": "volume",
 80 |     "Y1": "publication_year",
 81 |     "Y2": "access_date",
 82 |     "ER": "end_of_reference",
 83 |     "UK": "unknown_tag",
 84 | }
 85 | 
 86 | TYPE_OF_REFERENCE_MAPPING = {
 87 |     "ABST": "Abstract",
 88 |     "ADVS": "Audiovisual material",
 89 |     "AGGR": "Aggregated Database",
 90 |     "ANCIENT": "Ancient Text",
 91 |     "ART": "Art Work",
 92 |     "BILL": "Bill",
 93 |     "BLOG": "Blog",
 94 |     "BOOK": "Whole book",
 95 |     "CASE": "Case",
 96 |     "CHAP": "Book chapter",
 97 |     "CHART": "Chart",
 98 |     "CLSWK": "Classical Work",
 99 |     "COMP": "Computer program",
100 |     "CONF": "Conference proceeding",
101 |     "CPAPER": "Conference paper",
102 |     "CTLG": "Catalog",
103 |     "DATA": "Data file",
104 |     "DBASE": "Online Database",
105 |     "DICT": "Dictionary",
106 |     "EBOOK": "Electronic Book",
107 |     "ECHAP": "Electronic Book Section",
108 |     "EDBOOK": "Edited Book",
109 |     "EJOUR": "Electronic Article",
110 |     "ELEC": "Web Page",
111 |     "ENCYC": "Encyclopedia",
112 |     "EQUA": "Equation",
113 |     "FIGURE": "Figure",
114 |     "GEN": "Generic",
115 |     "GOVDOC": "Government Document",
116 |     "GRANT": "Grant",
117 |     "HEAR": "Hearing",
118 |     "ICOMM": "Internet Communication",
119 |     "INPR": "In Press",
120 |     "JFULL": "Journal (full)",
121 |     "JOUR": "Journal",
122 |     "LEGAL": "Legal Rule or Regulation",
123 |     "MANSCPT": "Manuscript",
124 |     "MAP": "Map",
125 |     "MGZN": "Magazine article",
126 |     "MPCT": "Motion picture",
127 |     "MULTI": "Online Multimedia",
128 |     "MUSIC": "Music score",
129 |     "NEWS": "Newspaper",
130 |     "PAMP": "Pamphlet",
131 |     "PAT": "Patent",
132 |     "PCOMM": "Personal communication",
133 |     "RPRT": "Report",
134 |     "SER": "Serial publication",
135 |     "SLIDE": "Slide",
136 |     "SOUND": "Sound recording",
137 |     "STAND": "Standard",
138 |     "STAT": "Statute",
139 |     "THES": "Thesis/Dissertation",
140 |     "UNPB": "Unpublished work",
141 |     "VIDEO": "Video recording",
142 | }
143 | 
144 | WOK_LIST_TYPE_TAGS = [
145 |     "RI",
146 |     "CR",
147 |     "AF",
148 |     "BA",
149 |     "BF",
150 |     "AU",
151 |     "CA",
152 |     "GP",
153 | ]
154 | 
155 | WOK_TAG_KEY_MAPPING = {
156 |     "FN": "file_name",
157 |     "VR": "version_number",
158 |     "PT": "publication_type",
159 |     "AU": "authors",  # ListType
160 |     "AF": "author_full_name",
161 |     "BA": "book_authors",
162 |     "BF": "book_authors_full_name",
163 |     "CA": "group_authors",  # ListType
164 |     "GP": "book_group_authors",  # ListType
165 |     "BE": "editors",  # ListType
166 |     "TI": "document_title",
167 |     "SO": "publication_name",
168 |     "SE": "book_series_title",
169 |     "BS": "book_series_subtitle",
170 |     "LA": "language",
171 |     "DT": "document_type",
172 |     "CT": "conference_title",
173 |     "CY": "conference_date",
174 |     "CL": "conference_location",
175 |     "SP": "conference_sponsors",
176 |     "HO": "conference_host",
177 |     "DE": "author_keywords",
178 |     "ID": "keywords_plus",
179 |     "AB": "abstract",
180 |     "C1": "author_address",
181 |     "RP": "reprint_address",
182 |     "EM": "email_address",
183 |     "RI": "researcher_id",
184 |     "OI": "orcid_id",
185 |     "FU": "funding_agency_and_grant_number",
186 |     "FX": "funding_text",
187 |     "CR": "cited_references",  # ListType
188 |     "NR": "cited_reference_count",
189 |     "TC": "wos_core_collection_cited_count",
190 |     "Z9": "total_times_cited_count",
191 |     "U1": "usage_count_180",
192 |     "U2": "usage_count_2013",
193 |     "PU": "publisher",
194 |     "PI": "publisher_city",
195 |     "PA": "publisher_address",
196 |     "SN": "issn",
197 |     "EI": "eissn",
198 |     "BN": "isbn",
199 |     "J9": "source_abbreviation_29c",
200 |     "JI": "iso_source_abbreviation",
201 |     "PD": "publication_date",
202 |     "PY": "publication_year",
203 |     "VL": "volume",
204 |     "IS": "issue",
205 |     "SI": "special_issue",
206 |     "PN": "part_number",
207 |     "SU": "supplement",
208 |     "MA": "meeting_abstract",
209 |     "BP": "beginning_page",
210 |     "EP": "ending_page",
211 |     "AR": "article_number",
212 |     "DI": "doi",
213 |     "D2": "book_doi",
214 |     "EA": "early_access_date",
215 |     "EY": "early_access_year",
216 |     "PG": "page_count",
217 |     "P2": "chapter_count",
218 |     "WC": "wos_categories",  # ListType
219 |     "SC": "research_areas",  # ListType
220 |     "GA": "document_delivery_number",
221 |     "PM": "pubmed_id",
222 |     "UT": "accession_number",
223 |     "OA": "open_access_indicator",
224 |     "HP": "esi_hot_paper",
225 |     "HC": "esi_highly_cited_paper",
226 |     "DA": "date_generated",
227 |     "ER": "end_of_record",
228 |     "EF": "end_of_file",
229 | }
230 | 


--------------------------------------------------------------------------------
/rispy/parser.py:
--------------------------------------------------------------------------------
  1 | """RIS Parser."""
  2 | 
  3 | from collections import defaultdict
  4 | from pathlib import Path
  5 | from typing import ClassVar, Optional, TextIO, Union
  6 | 
  7 | from .config import (
  8 |     DELIMITED_TAG_MAPPING,
  9 |     LIST_TYPE_TAGS,
 10 |     TAG_KEY_MAPPING,
 11 |     WOK_LIST_TYPE_TAGS,
 12 |     WOK_TAG_KEY_MAPPING,
 13 | )
 14 | 
 15 | __all__ = ["RisParser", "WokParser", "load", "loads"]
 16 | 
 17 | 
 18 | class NextLine(Exception):
 19 |     pass
 20 | 
 21 | 
 22 | class ParseError(Exception):
 23 |     pass
 24 | 
 25 | 
 26 | class RisParser:
 27 |     """RIS parser class
 28 | 
 29 |     When creating a new implementation class, some variables and classes need
 30 |     to be overridden. This docstring documents how to override these
 31 |     parameters when creating a subclass.
 32 | 
 33 |     Class variables:
 34 |         START_TAG (str): Start tag, required.
 35 |         END_TAG (str): End tag. Defaults to 'ER'.
 36 |         PATTERN (str): String containing a regex pattern. This pattern
 37 |                        determines if a line has a valid tag. Required.
 38 |         DEFAULT_IGNORE (list, optional): Default list of tags to ignore.
 39 |         DEFAULT_MAPPING (dict): A default mapping for the custom parser.
 40 |                                 Required.
 41 |         DEFAULT_LIST_TAGS (list): A list of tags that should be read as lists.
 42 |                                   Required.
 43 | 
 44 |     """
 45 | 
 46 |     START_TAG: str = "TY"
 47 |     END_TAG: str = "ER"
 48 |     UNKNOWN_TAG: str = "UK"
 49 |     PATTERN: str
 50 |     DEFAULT_IGNORE: ClassVar[list[str]] = []
 51 |     DEFAULT_MAPPING: dict = TAG_KEY_MAPPING
 52 |     DEFAULT_LIST_TAGS: list[str] = LIST_TYPE_TAGS
 53 |     DEFAULT_DELIMITER_MAPPING: dict = DELIMITED_TAG_MAPPING
 54 |     DEFAULT_NEWLINE: ClassVar[str] = "\n"
 55 | 
 56 |     def __init__(
 57 |         self,
 58 |         *,
 59 |         mapping: Optional[dict] = None,
 60 |         list_tags: Optional[list[str]] = None,
 61 |         delimiter_tags_mapping: Optional[dict] = None,
 62 |         ignore: Optional[list[str]] = None,
 63 |         skip_unknown_tags: bool = False,
 64 |         enforce_list_tags: bool = True,
 65 |         newline: Optional[str] = None,
 66 |     ):
 67 |         """Initialize the parser function.
 68 | 
 69 |         Args:
 70 |             mapping (dict, optional): Map tags to tag names.
 71 |             list_tags (list, optional): List of list-type tags.
 72 |             delimiter_tags_mapping (dict, optional): Map of delimiters to tags.
 73 |             ignore (list, optional): List of tags to ignore.
 74 |             skip_unknown_tags (bool, optional): Bool to skip tags that are not in
 75 |                                                 `TAG_KEY_MAPPING`. If unknown tags
 76 |                                                 are not skipped, they will be added
 77 |                                                 to the `unknown_tag` key.
 78 |                                                 Defaults to `False`.
 79 |             enforce_list_tags (bool, optional): Bool for choosing whether to
 80 |                                                 strictly enforce list type tags.
 81 |                                                 If this is `False`, tags that
 82 |                                                 occur multiple times in a reference
 83 |                                                 will be converted to a list instead
 84 |                                                 of being overridden. Values set to
 85 |                                                 be list tags will still be read as
 86 |                                                 list tags. Defaults to `True`.
 87 |             newline (str, optional): Line separator.
 88 | 
 89 |         """
 90 |         self.mapping = mapping if mapping is not None else self.DEFAULT_MAPPING
 91 |         self.list_tags = list_tags if list_tags is not None else self.DEFAULT_LIST_TAGS
 92 |         self.delimiter_map = (
 93 |             delimiter_tags_mapping
 94 |             if delimiter_tags_mapping is not None
 95 |             else self.DEFAULT_DELIMITER_MAPPING
 96 |         )
 97 |         self.ignore = ignore if ignore is not None else self.DEFAULT_IGNORE
 98 |         self.skip_unknown_tags = skip_unknown_tags
 99 |         self.enforce_list_tags = enforce_list_tags
100 |         self.newline = newline if newline is not None else self.DEFAULT_NEWLINE
101 | 
102 |     def _iter_till_start(self, lines) -> dict:
103 |         while True:
104 |             line = next(lines)
105 |             if line.startswith(self.START_TAG):
106 |                 return {self.mapping[self.START_TAG]: self.parse_line(line)[1]}
107 | 
108 |     def parse(self, text: str) -> list[dict]:
109 |         """Parse RIS string."""
110 |         line_gen = (line for line in text.split(self.newline))
111 |         return self.parse_lines(line_gen)
112 | 
113 |     def parse_lines(self, lines: Union[TextIO, list[str]]) -> list[dict]:
114 |         """Parse RIS file line by line."""
115 | 
116 |         result = []
117 |         last_tag = None
118 | 
119 |         try:
120 |             record = self._iter_till_start(lines)
121 | 
122 |             while True:
123 |                 tag, content = self.parse_line(next(lines))
124 | 
125 |                 if tag is None:
126 |                     self._add_tag(record, last_tag, content, extend_multiline=True)
127 |                     continue
128 | 
129 |                 if tag in self.ignore:
130 |                     continue
131 | 
132 |                 if tag == self.END_TAG:
133 |                     result.append(record)
134 | 
135 |                     record = self._iter_till_start(lines)
136 |                     continue
137 | 
138 |                 self._add_tag(record, tag, content)
139 |                 last_tag = tag
140 | 
141 |         except StopIteration:
142 |             return result
143 | 
144 |     def parse_line(self, line: str) -> Union[tuple[str, str], tuple[None, str]]:
145 |         """Parse line of RIS file.
146 | 
147 |         This method parses a line between the start and end tag.
148 |         It returns the tag and the content of the line. Typically,
149 |         the first 2 characters are the tag, followed by a seperator,
150 |         and the rest of the line is the content.
151 | 
152 |         Custom parsers can override this method to change the way
153 |         lines are parsed. For example, a very basic RIS parser would
154 |         return the first 2 characters as the tag and the rest of the
155 |         line as the content of the tag. `(line[0:2], line[6:].strip())`
156 | 
157 |         Parameters
158 |         ----------
159 |         line : str
160 |             Line of RIS file between start and end tag.
161 | 
162 |         Returns
163 |         -------
164 |         tuple
165 |             Tuple containing the tag and the content of the tag.
166 |         """
167 |         if line[2:5] == "  -" and line[:2].isupper() and line[0:1].isalpha():
168 |             return (line[0:2], line[6:].strip())
169 |         else:
170 |             return (None, line.strip())
171 | 
172 |     def _add_single_value(
173 |         self, record: dict, name: str, value: Union[str, list[str]], is_multi: bool = False
174 |     ) -> None:
175 |         """Process a single line.
176 | 
177 |         This method is only run on tags where repeated tags are not expected.
178 |         The output for a tag can be a list when a delimiter is specified,
179 |         even if it is not a list tag.
180 |         """
181 |         if not is_multi:
182 |             if self.enforce_list_tags or name not in record:
183 |                 ignore_this_if_has_one = value
184 |                 record.setdefault(name, ignore_this_if_has_one)
185 |             else:
186 |                 self._add_list_value(record, name, value)
187 |         else:
188 |             value_must_exist_or_is_bug = record[name]
189 |             if isinstance(value, list):
190 |                 record[name].extend(value)
191 |             else:
192 |                 record[name] = " ".join((value_must_exist_or_is_bug, value))
193 | 
194 |     def _add_list_value(self, record: dict, name: str, value: Union[str, list[str]]) -> None:
195 |         """Process tags with multiple values."""
196 |         value_list = value if isinstance(value, list) else [value]
197 |         try:
198 |             record[name].extend(value_list)
199 |         except KeyError:
200 |             record[name] = value_list
201 |         except AttributeError:
202 |             must_exist = record[name]
203 |             record[name] = [must_exist, *value_list]
204 | 
205 |     def _add_tag(
206 |         self, record: dict, tag: str, content: str, extend_multiline: bool = False
207 |     ) -> None:
208 |         try:
209 |             name = self.mapping[tag]
210 |         except KeyError:
211 |             if self.skip_unknown_tags:
212 |                 return
213 | 
214 |             # handle unknown tag
215 |             name = self.mapping[self.UNKNOWN_TAG]
216 |             if name not in record:
217 |                 record[name] = defaultdict(list)
218 |             record[name][tag].append(content)
219 | 
220 |         else:
221 |             if delimiter := self.delimiter_map.get(tag):
222 |                 content = [i.strip() for i in content.split(delimiter)]
223 | 
224 |             if tag in self.list_tags:
225 |                 self._add_list_value(record, name, content)
226 |             else:
227 |                 self._add_single_value(record, name, content, is_multi=extend_multiline)
228 | 
229 | 
230 | class WokParser(RisParser):
231 |     """Subclass of Base for reading Wok RIS files."""
232 | 
233 |     START_TAG = "PT"
234 |     DEFAULT_IGNORE: ClassVar[list[str]] = ["FN", "VR", "EF"]
235 |     DEFAULT_MAPPING = WOK_TAG_KEY_MAPPING
236 |     DEFAULT_LIST_TAGS = WOK_LIST_TYPE_TAGS
237 |     DEFAULT_DELIMITER_MAPPING: ClassVar[dict] = {}
238 | 
239 |     def parse_line(self, line: str) -> Union[tuple[str, str], tuple[None, str]]:
240 |         """Parse line of RIS file.
241 | 
242 |         This method parses a line between the start and end tag.
243 |         It returns the tag and the content of the line. Typically,
244 |         the first 2 characters are the tag, and the rest of the line
245 |         is the content.
246 | 
247 |         Parameters
248 |         ----------
249 |         line : str
250 |             Line of RIS file between start and end tag.
251 | 
252 |         Returns
253 |         -------
254 |         tuple
255 |             Tuple containing the tag and the content of the tag.
256 |         """
257 |         if line[0:2] == "  ":
258 |             return (None, line[3:].strip())
259 |         else:
260 |             return (line[0:2], line[3:].strip())
261 | 
262 | 
263 | def load(
264 |     file: Union[TextIO, Path],
265 |     *,
266 |     encoding: Optional[str] = None,
267 |     newline: Optional[str] = None,
268 |     implementation: type[RisParser] = RisParser,
269 |     **kw,
270 | ) -> list[dict]:
271 |     """Load a RIS file and return a list of entries.
272 | 
273 |     Entries are codified as dictionaries whose keys are the
274 |     different tags. For single line and singly occurring tags,
275 |     the content is codified as a string. In the case of multiline
276 |     or multiple key occurrences, the content is returned as a list
277 |     of strings.
278 | 
279 |     Args:
280 |         file (Union[TextIO, Path]): File handle of RIS data.
281 |         encoding(str, optional): File encoding, only used when a Path is supplied.
282 |                                  Consistent with the python standard library,
283 |                                  if `None` is supplied, the default system
284 |                                  encoding is used.
285 |         newline(str, optional): File line separator.
286 |         implementation (RisParser): RIS implementation; RisParser by default.
287 | 
288 |     Returns:
289 |         list: Returns list of RIS entries.
290 |     """
291 |     if isinstance(file, Path):
292 |         with file.open(mode="r", newline=newline, encoding=encoding) as f:
293 |             return implementation(**kw).parse_lines(f)
294 |     if hasattr(file, "readline"):
295 |         return implementation(newline=newline, **kw).parse_lines(file)
296 |     elif hasattr(file, "read"):
297 |         return loads(file.read(), implementation=implementation, newline=newline, **kw)
298 |     raise ValueError("File must be a file-like object or a Path object")
299 | 
300 | 
301 | def loads(text: str, *, implementation: type[RisParser] = RisParser, **kw) -> list[dict]:
302 |     """Load a RIS file and return a list of entries.
303 | 
304 |     Entries are codified as dictionaries whose keys are the
305 |     different tags. For single line and singly occurring tags,
306 |     the content is codified as a string. In the case of multiline
307 |     or multiple key occurrences, the content is returned as a list
308 |     of strings.
309 | 
310 |     Args:
311 |         text (str): A string version of RIS data
312 |         implementation (RisParser): RIS implementation; RisParser by default.
313 | 
314 |     Returns:
315 |         list: Returns list of RIS entries.
316 |     """
317 |     return implementation(**kw).parse(text)
318 | 


--------------------------------------------------------------------------------
/rispy/utils.py:
--------------------------------------------------------------------------------
 1 | """Miscellaneous functions."""
 2 | 
 3 | from copy import deepcopy
 4 | 
 5 | from .config import TYPE_OF_REFERENCE_MAPPING
 6 | 
 7 | 
 8 | def invert_dictionary(mapping: dict) -> dict:
 9 |     """Invert the keys and values of a dictionary."""
10 |     remap = {v: k for k, v in mapping.items()}
11 |     if len(remap) != len(mapping):
12 |         raise ValueError("Dictionary cannot be inverted; some values were not unique")
13 |     return remap
14 | 
15 | 
16 | def convert_reference_types(
17 |     reference_list: list[dict],
18 |     reverse: bool = False,
19 |     strict: bool = False,
20 |     type_map: dict = TYPE_OF_REFERENCE_MAPPING,
21 | ) -> list:
22 |     """Convert RIS reference types to pretty names.
23 | 
24 |     This method takes a list of references and returns a copy with converted
25 |     reference types.
26 | 
27 |     Args:
28 |         reference_list (list[dict]): A list of references.
29 |         reverse (bool, optional): Convert in reverse.
30 |         strict (bool, optional): Raise error if type not found.
31 |         type_map (dict, optional): Dict used to map types. Default is
32 |                                    TYPE_OF_REFERENCE_MAPPING.
33 | 
34 |     Returns:
35 |         list: Returns list of RIS entries.
36 | 
37 |     """
38 | 
39 |     def convert(ref, d=type_map):
40 |         old_type = ref["type_of_reference"]
41 |         try:
42 |             ref["type_of_reference"] = d[old_type]
43 |         except KeyError as err:
44 |             if strict and old_type not in d.values():
45 |                 raise KeyError(f'Type "{old_type}" not found.') from err
46 |         return ref
47 | 
48 |     if not reverse:
49 |         return [convert(r) for r in deepcopy(reference_list)]
50 |     else:
51 |         return [convert(r, invert_dictionary(type_map)) for r in deepcopy(reference_list)]
52 | 


--------------------------------------------------------------------------------
/rispy/writer.py:
--------------------------------------------------------------------------------
  1 | """RIS Writer."""
  2 | 
  3 | import warnings
  4 | from abc import ABC, abstractmethod
  5 | from pathlib import Path
  6 | from typing import ClassVar, Optional, TextIO, Union
  7 | 
  8 | from .config import DELIMITED_TAG_MAPPING, LIST_TYPE_TAGS, TAG_KEY_MAPPING
  9 | from .utils import invert_dictionary
 10 | 
 11 | __all__ = ["BaseWriter", "RisWriter", "dump", "dumps"]
 12 | 
 13 | 
 14 | class BaseWriter(ABC):
 15 |     """Base writer class. Create a subclass to use.
 16 | 
 17 |     When creating a new implementation class, some variables and classes need
 18 |     to be overridden. This docstring documents how to override these
 19 |     parameters when creating a subclass.
 20 | 
 21 |     Class variables:
 22 |         START_TAG (str): Start tag, required.
 23 |         END_TAG (str): End tag. Defaults to 'ER'.
 24 |         IGNORE (list, optional): List of tags to ignore. Defaults to [].
 25 |         PATTERN (str): String containing a format for a line
 26 |                        (e.g. ``"{tag}  - {value}"``). Should contain `tag` and
 27 |                        `value` in curly brackets. Required.
 28 |         DEFAULT_MAPPING (list): Default mapping for this class. Required.
 29 |         DEFAULT_LIST_TAGS (list): Default list tags for this class. Required.
 30 |         DEFAULT_REFERENCE_TYPE (str): Default reference type, used if a
 31 |                                       reference does not have a type.
 32 |         SEPARATOR (str, optional): String to separate the references in the
 33 |                                   file. Defaults to newline.
 34 | 
 35 |     Class methods:
 36 |         set_header: Create a header for each reference. Has the reference
 37 |                     number as a parameter.
 38 | 
 39 |     """
 40 | 
 41 |     START_TAG: str
 42 |     END_TAG: str = "ER"
 43 |     UNKNOWN_TAG: str = "UK"
 44 |     PATTERN: str
 45 |     DEFAULT_IGNORE: ClassVar[list[str]] = []
 46 |     DEFAULT_MAPPING: dict
 47 |     DEFAULT_LIST_TAGS: list[str]
 48 |     DEFAULT_DELIMITER_MAPPING: dict
 49 |     DEFAULT_REFERENCE_TYPE: str = "JOUR"
 50 |     REFERENCE_TYPE_KEY: str = "type_of_reference"
 51 |     SEPARATOR: Optional[str] = ""
 52 |     NEWLINE: str = "\n"
 53 | 
 54 |     def __init__(
 55 |         self,
 56 |         *,
 57 |         mapping: Optional[dict] = None,
 58 |         list_tags: Optional[list[str]] = None,
 59 |         delimiter_tags_mapping: Optional[dict] = None,
 60 |         ignore: Optional[list[str]] = None,
 61 |         skip_unknown_tags: bool = False,
 62 |         enforce_list_tags: bool = True,
 63 |     ):
 64 |         """Override default tag map and list tags in instance.
 65 | 
 66 |         Args:
 67 |             mapping (dict, optional): Map tags to tag names.
 68 |             list_tags (list, optional): List of list-type tags.
 69 |             delimiter_tags_mapping (dict, optional): Map of delimiters to tags.
 70 |             ignore (list, optional): List of tags to ignore.
 71 |             skip_unknown_tags (bool, optional): Bool for whether to write unknown
 72 |                                                 tags to the file. Defaults to
 73 |                                                 `False`.
 74 |             enforce_list_tags (bool, optional): If `True` tags that are not set as
 75 |                                                 list tags will be written into one
 76 |                                                 line. Defaults to `True`.
 77 | 
 78 |         """
 79 |         self.mapping = mapping if mapping is not None else self.DEFAULT_MAPPING
 80 |         self.list_tags = list_tags if list_tags is not None else self.DEFAULT_LIST_TAGS
 81 |         self.delimiter_map = (
 82 |             delimiter_tags_mapping
 83 |             if delimiter_tags_mapping is not None
 84 |             else self.DEFAULT_DELIMITER_MAPPING
 85 |         )
 86 |         self.ignore = ignore if ignore is not None else self.DEFAULT_IGNORE
 87 |         self._rev_mapping = invert_dictionary(self.mapping)
 88 |         self.skip_unknown_tags = skip_unknown_tags
 89 |         self.enforce_list_tags = enforce_list_tags
 90 | 
 91 |     def _get_reference_type(self, ref):
 92 |         if self.REFERENCE_TYPE_KEY in ref:
 93 |             return ref[self.REFERENCE_TYPE_KEY]
 94 |         return self.DEFAULT_REFERENCE_TYPE
 95 | 
 96 |     def _format_line(self, tag, value=""):
 97 |         """Format a RIS line."""
 98 |         return self.PATTERN.format(tag=tag, value=value)
 99 | 
100 |     def _format_reference(self, ref, count, n):
101 |         if header := self.set_header(count):
102 |             yield header
103 |         yield self._format_line(self.START_TAG, self._get_reference_type(ref))
104 | 
105 |         tags_to_skip = [self.START_TAG, *self.ignore]
106 |         if self.skip_unknown_tags:
107 |             tags_to_skip.append(self.UNKNOWN_TAG)
108 | 
109 |         for label, value in ref.items():
110 |             # not available
111 |             try:
112 |                 tag = self._rev_mapping[label.lower()]
113 |             except KeyError:
114 |                 warnings.warn(UserWarning(f"label `{label}` not exported"), stacklevel=2)
115 |                 continue
116 | 
117 |             # ignore
118 |             if tag in tags_to_skip:
119 |                 continue
120 | 
121 |             # list tag
122 |             if tag in self.list_tags or (not self.enforce_list_tags and isinstance(value, list)):
123 |                 for val_i in value:
124 |                     yield self._format_line(tag, val_i)
125 | 
126 |             # unknown tag(s), which are lists held in a defaultdict
127 |             elif tag == self.UNKNOWN_TAG:
128 |                 for unknown_tag in value.keys():
129 |                     for val_i in value[unknown_tag]:
130 |                         yield self._format_line(unknown_tag, val_i)
131 | 
132 |             # write delimited tags
133 |             elif tag in self.delimiter_map:
134 |                 combined_val = self.delimiter_map[tag].join(value)
135 |                 yield self._format_line(tag, combined_val)
136 | 
137 |             # all non-list tags
138 |             else:
139 |                 yield self._format_line(tag, value)
140 | 
141 |         yield self._format_line(self.END_TAG)
142 | 
143 |         if self.SEPARATOR is not None and count < n:
144 |             yield self.SEPARATOR
145 | 
146 |     def _yield_lines(self, references, extra_line=False):
147 |         n = len(references)
148 |         for i, ref in enumerate(references):
149 |             yield from self._format_reference(ref, count=i + 1, n=n)
150 |         if extra_line:
151 |             yield ""
152 | 
153 |     def format_lines(self, file, references):
154 |         """Write references to a file."""
155 |         for line in self._yield_lines(references):
156 |             file.write(f"{line}{self.NEWLINE}")
157 | 
158 |     def formats(self, references: list[dict]) -> str:
159 |         """Format a list of references into an RIS string."""
160 |         lines = self._yield_lines(references, extra_line=True)
161 |         return self.NEWLINE.join(lines)
162 | 
163 |     @abstractmethod
164 |     def set_header(self, count: int) -> str:
165 |         """Create the header for each reference; if empty string, unused."""
166 |         ...
167 | 
168 | 
169 | class RisWriter(BaseWriter):
170 |     """Subclass of BaseWriter for writing RIS files."""
171 | 
172 |     START_TAG = "TY"
173 |     PATTERN = "{tag}  - {value}"
174 |     DEFAULT_MAPPING = TAG_KEY_MAPPING
175 |     DEFAULT_LIST_TAGS = LIST_TYPE_TAGS
176 |     DEFAULT_DELIMITER_MAPPING = DELIMITED_TAG_MAPPING
177 | 
178 |     def set_header(self, count):
179 |         return f"{count}."
180 | 
181 | 
182 | def dump(
183 |     references: list[dict],
184 |     file: Union[TextIO, Path],
185 |     *,
186 |     encoding: Optional[str] = None,
187 |     implementation: type[BaseWriter] = RisWriter,
188 |     **kw,
189 | ):
190 |     """Write an RIS file to file or file-like object.
191 | 
192 |     Entries are codified as dictionaries whose keys are the
193 |     different tags. For single line and singly occurring tags,
194 |     the content is codified as a string. In the case of multiline
195 |     or multiple key occurrences, the content is returned as a list
196 |     of strings.
197 | 
198 |     Args:
199 |         references (list[dict]): List of references.
200 |         file (TextIO): File handle to store ris formatted data.
201 |         encoding (str, optional): Encoding to use when opening file.
202 |         implementation (BaseWriter): RIS implementation; base by default.
203 |     """
204 |     if isinstance(file, Path):
205 |         with file.open(mode="w", encoding=encoding) as f:
206 |             implementation(**kw).format_lines(f, references)
207 |     elif hasattr(file, "write"):
208 |         implementation(**kw).format_lines(file, references)
209 |     else:
210 |         raise ValueError("File must be a file-like object or a Path object")
211 | 
212 | 
213 | def dumps(references: list[dict], *, implementation: type[BaseWriter] = RisWriter, **kw) -> str:
214 |     """Return an RIS formatted string.
215 | 
216 |     Entries are codified as dictionaries whose keys are the
217 |     different tags. For single line and singly occurring tags,
218 |     the content is codified as a string. In the case of multiline
219 |     or multiple key occurrences, the content is returned as a list
220 |     of strings.
221 | 
222 |     Args:
223 |         references (list[dict]): List of references.
224 |         implementation (BaseWriter): RIS implementation; RisWriter by default.
225 |     """
226 |     return implementation(**kw).formats(references)
227 | 


--------------------------------------------------------------------------------
/tests/data/example_basic.ris:
--------------------------------------------------------------------------------
 1 | TY  - JOUR
 2 | AU  - Shannon,Claude E.
 3 | PY  - 1948/07//
 4 | TI  - A Mathematical Theory of Communication
 5 | JF  - Bell System Technical Journal
 6 | SP  - 379
 7 | EP  - 423
 8 | VL  - 27
 9 | ER  - 
10 | 


--------------------------------------------------------------------------------
/tests/data/example_bom.ris:
--------------------------------------------------------------------------------
1 | ﻿TY  - JOUR
2 | DO  - 10.1186/s40981-020-0316-0
3 | ER  - 
4 | 
5 | 


--------------------------------------------------------------------------------
/tests/data/example_custom_list_tags.ris:
--------------------------------------------------------------------------------
1 | 1.
2 | TY  - JOUR
3 | AU  - Marx, Karl
4 | AU  - Marxus, Karlus
5 | SN  - 12345
6 | SN  - ABCDEFG
7 | SN  - 666666
8 | ER  - 
9 | 


--------------------------------------------------------------------------------
/tests/data/example_empty_tag.ris:
--------------------------------------------------------------------------------
 1 | TY  - JOUR
 2 | ID  - 2006713348
 3 | T1  - Outcome Measures After Shoulder Stabilization in the Athletic Population: A Systematic Review of Clinical and Patient-Reported Metrics
 4 | A1  - Fanning E.
 5 | Y1  - 2020//
 6 | N2  - Background: Athletic endeavor can require the "athletic shoulder" to tolerate significant load through supraphysiological range and often under considerable repetition.
 7 | Outcome measures are valuable when determining an athlete's safe return to sport...
 8 | KW  - *athlete
 9 | KW  - biomechanics
10 | KW  - bone remodeling
11 | JF  - Orthopaedic Journal of Sports Medicine
12 | JA  - Orthop. J. Sports Med.
13 | VL  - 8
14 | IS  - 9
15 | SP  -
16 | PB  - SAGE Publications Ltd (E-mail: info@sagepub.co.uk)
17 | SN  - 2325-9671 (electronic)
18 | DO  - http://dx.doi.org/10.1177/2325967120950040
19 | ER  -
20 | 


--------------------------------------------------------------------------------
/tests/data/example_extraneous_data.ris:
--------------------------------------------------------------------------------
 1 | Record #1 of 2
 2 | Provider: Provider
 3 | Content: text/plain; charset="UTF-8"
 4 | 1.
 5 | TY  - JOUR
 6 | ID  - 12345
 7 | T1  - Title of reference
 8 | A1  - Marx, Karl
 9 | A1  - Lindgren, Astrid
10 | A2  - Glattauer, Daniel
11 | Y1  - 2014//
12 | N2  - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
13 | KW  - Pippi
14 | KW  - Nordwind
15 | KW  - Piraten
16 | JF  - Lorem
17 | JA  - lorem
18 | VL  - 9
19 | IS  - 3
20 | SP  - e0815
21 | CY  - United States
22 | PB  - Fun Factory
23 | SN  - 1932-6208
24 | M1  - 1008150341
25 | L2  - http://example.com
26 | UR  - http://example_url.com
27 | ER  - 
28 | 
29 | Record #2 of 2
30 | Provider: Provider
31 | Content: text/plain; charset="UTF-8"
32 | 2.
33 | TY  - JOUR
34 | ID  - 12345
35 | T1  - The title of the reference
36 | A1  - Marxus, Karlus
37 | A1  - Lindgren, Astrid
38 | A2  - Glattauer, Daniel
39 | Y1  - 2006//
40 | N2  - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
41 | KW  - Pippi Langstrumpf
42 | KW  - Nordwind
43 | KW  - Piraten
44 | JF  - Lorem
45 | JA  - lorem
46 | VL  - 6
47 | IS  - 3
48 | SP  - e0815341
49 | CY  - Germany
50 | PB  - Dark Factory
51 | SN  - 1732-4208
52 | M1  - 1228150341
53 | L2  - http://example2.com
54 | UR  - http://example_url.com
55 | ER  - 
56 | 


--------------------------------------------------------------------------------
/tests/data/example_full.ris:
--------------------------------------------------------------------------------
 1 | 1.
 2 | TY  - JOUR
 3 | ID  - 12345
 4 | T1  - Title of reference
 5 | A1  - Marx, Karl
 6 | A1  - Lindgren, Astrid
 7 | A2  - Glattauer, Daniel
 8 | Y1  - 2014//
 9 | N2  - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
10 | KW  - Pippi
11 | KW  - Nordwind
12 | KW  - Piraten
13 | JF  - Lorem
14 | JA  - lorem
15 | VL  - 9
16 | IS  - 3
17 | SP  - e0815
18 | CY  - United States
19 | PB  - Fun Factory
20 | SN  - 1932-6208
21 | M1  - 1008150341
22 | L2  - http://example.com
23 | UR  - http://example_url.com
24 | ER  - 
25 | 
26 | 2.
27 | TY  - JOUR
28 | ID  - 12345
29 | T1  - The title of the reference
30 | A1  - Marxus, Karlus
31 | A1  - Lindgren, Astrid
32 | A2  - Glattauer, Daniel
33 | Y1  - 2006//
34 | N2  - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
35 | KW  - Pippi Langstrumpf
36 | KW  - Nordwind
37 | KW  - Piraten
38 | JF  - Lorem
39 | JA  - lorem
40 | VL  - 6
41 | IS  - 3
42 | SP  - e0815341
43 | CY  - Germany
44 | PB  - Dark Factory
45 | SN  - 1732-4208
46 | M1  - 1228150341
47 | L2  - http://example2.com
48 | UR  - http://example_url.com
49 | ER  - 
50 | 


--------------------------------------------------------------------------------
/tests/data/example_full_without_whitespace.ris:
--------------------------------------------------------------------------------
 1 | 1.
 2 | TY  - JOUR
 3 | ID  - 12345
 4 | T1  - Title of reference
 5 | A1  - Marx, Karl
 6 | A1  - Lindgren, Astrid
 7 | A2  - Glattauer, Daniel
 8 | Y1  - 2014//
 9 | N2  - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
10 | KW  - Pippi
11 | KW  - Nordwind
12 | KW  - Piraten
13 | JF  - Lorem
14 | JA  - lorem
15 | VL  - 9
16 | IS  - 3
17 | SP  - e0815
18 | CY  - United States
19 | PB  - Fun Factory
20 | SN  - 1932-6208
21 | M1  - 1008150341
22 | L2  - http://example.com
23 | UR  - http://example_url.com
24 | ER  -
25 | 
26 | 2.
27 | TY  - JOUR
28 | ID  - 12345
29 | T1  - The title of the reference
30 | A1  - Marxus, Karlus
31 | A1  - Lindgren, Astrid
32 | A2  - Glattauer, Daniel
33 | Y1  - 2006//
34 | N2  - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
35 | KW  - Pippi Langstrumpf
36 | KW  - Nordwind
37 | KW  - Piraten
38 | JF  - Lorem
39 | JA  - lorem
40 | VL  - 6
41 | IS  - 3
42 | SP  - e0815341
43 | CY  - Germany
44 | PB  - Dark Factory
45 | SN  - 1732-4208
46 | M1  - 1228150341
47 | L2  - http://example2.com
48 | UR  - http://example_url.com
49 | ER  -
50 | 


--------------------------------------------------------------------------------
/tests/data/example_full_write.ris:
--------------------------------------------------------------------------------
 1 | 1.
 2 | TY  - JOUR
 3 | T1  - Title of reference
 4 | A1  - Marx, Karl
 5 | A1  - Lindgren, Astrid
 6 | A2  - Glattauer, Daniel
 7 | Y1  - 2014//
 8 | N2  - BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
 9 | JA  - lorem
10 | VL  - 9
11 | IS  - 3
12 | SP  - e0815
13 | CY  - United States
14 | PB  - Fun Factory
15 | SN  - 1932-6208
16 | M1  - 1008150341
17 | L2  - http://example.com
18 | UR  - http://example_url.com
19 | ER  - 
20 | 
21 | 2.
22 | TY  - JOUR
23 | T1  - The title of the reference
24 | A1  - Marxus, Karlus
25 | A1  - Lindgren, Astrid
26 | A2  - Glattauer, Daniel
27 | Y1  - 2006//
28 | N2  - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
29 | JA  - lorem
30 | VL  - 6
31 | IS  - 3
32 | SP  - e0815341
33 | CY  - Germany
34 | PB  - Dark Factory
35 | SN  - 1732-4208
36 | M1  - 1228150341
37 | L2  - http://example2.com
38 | UR  - http://example_url.com
39 | ER  - 
40 | 


--------------------------------------------------------------------------------
/tests/data/example_multi_unknown_tags.ris:
--------------------------------------------------------------------------------
 1 | TY  - JOUR
 2 | AU  - Shannon,Claude E.
 3 | PY  - 1948/07//
 4 | TI  - A Mathematical Theory of Communication
 5 | JF  - Bell System Technical Journal
 6 | EP  - 423
 7 | VL  - 27
 8 | JP  - CRISPR
 9 | DC  - Direct Current
10 | ER  - 
11 | 


--------------------------------------------------------------------------------
/tests/data/example_multiline.ris:
--------------------------------------------------------------------------------
 1 | TY  - JOUR
 2 | N2  - first line, ER then second line and at the end the last line
 3 | N1  - first line
 4 |       * second line
 5 |       * last line
 6 | ER  -
 7 | 
 8 | TY  - JOUR
 9 | N2  - first line,
10 |       ER then second line and at the end
11 |       the last line
12 | N1  - first line
13 |       * second line
14 |       * last line
15 | ER  -
16 | 
17 | TY  - JOUR
18 | N2  - first line,
19 | ER then second line and at the end
20 | the last line
21 | N1  - first line
22 |       * second line
23 |       * last line
24 | ER  -
25 | 


--------------------------------------------------------------------------------
/tests/data/example_single_unknown_tag.ris:
--------------------------------------------------------------------------------
 1 | TY  - JOUR
 2 | AU  - Shannon,Claude E.
 3 | PY  - 1948/07//
 4 | TI  - A Mathematical Theory of Communication
 5 | JF  - Bell System Technical Journal
 6 | SP  - 379
 7 | EP  - 423
 8 | VL  - 27
 9 | JP  - CRISPR
10 | JP  - Direct Current
11 | ER  - 
12 | 


--------------------------------------------------------------------------------
/tests/data/example_starting_newlines.ris:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | TY  - JOUR
 4 | AU  - Shannon,Claude E.
 5 | PY  - 1948/07//
 6 | TI  - A Mathematical Theory of Communication
 7 | JF  - Bell System Technical Journal
 8 | SP  - 379
 9 | EP  - 423
10 | VL  - 27
11 | ER  - 
12 | 


--------------------------------------------------------------------------------
/tests/data/example_urls.ris:
--------------------------------------------------------------------------------
 1 | TY  - JOUR
 2 | AU  - Shannon,Claude E.
 3 | PY  - 1948/07//
 4 | TI  - A Mathematical Theory of Communication
 5 | JF  - Bell System Technical Journal
 6 | SP  - 379
 7 | EP  - 423
 8 | VL  - 27
 9 | UR  - http://example.com
10 | ER  -
11 | 
12 | TY  - JOUR
13 | AU  - Shannon,Claude E.
14 | PY  - 1948/07//
15 | TI  - A Mathematical Theory of Communication
16 | JF  - Bell System Technical Journal
17 | SP  - 379
18 | EP  - 423
19 | VL  - 27
20 | UR  - http://example.com
21 | UR  - http://www.example.com
22 | ER  -
23 | 
24 | TY  - JOUR
25 | AU  - Shannon,Claude E.
26 | PY  - 1948/07//
27 | TI  - A Mathematical Theory of Communication
28 | JF  - Bell System Technical Journal
29 | SP  - 379
30 | EP  - 423
31 | VL  - 27
32 | UR  - http://example.com; http://www.example.com
33 | ER  -
34 | 
35 | TY  - JOUR
36 | AU  - Shannon,Claude E.
37 | PY  - 1948/07//
38 | TI  - A Mathematical Theory of Communication
39 | JF  - Bell System Technical Journal
40 | SP  - 379
41 | EP  - 423
42 | VL  - 27
43 | UR  - http://example.com;http://www.example.com
44 | ER  -
45 | 


--------------------------------------------------------------------------------
/tests/data/example_utf_chars.ris:
--------------------------------------------------------------------------------
 1 | ﻿TY  - Journal Article
 2 | AU  - Dobrokhotova, Yu E.
 3 | AU  - Yusupova, R. O.
 4 | AU  - Ozerova, R. I.
 5 | AU  - Fayzullin, L. Z.
 6 | AU  - Karnaukhov, V. N.
 7 | PY  - 2009
 8 | AB  - Состояние рецепторного аппарата эндометрия и метаболизм эстрогенов при гиперплазиях эндометрия в позднем репродуктивном периоде
 9 | The state of the receptor apparatus of the endometrium and estrogen metabolism in endometrial hyperplasia in the late reproductive period
10 | JO  - Gynecology, Obstetrics and Perinatology
11 | VO  - 8
12 | IS  - 3
13 | SP  - 52
14 | EP  - 57
15 | DA  - 2009
16 | N1  - Ä,Ö,Ü,ä,ö,ü,ß
17 | ST  - Состояние рецепторного аппарата эндометрия и метаболизм эстрогенов при гиперплазиях эндометрия в позднем репродуктивном периоде
18 | ER  -
19 | 


--------------------------------------------------------------------------------
/tests/data/example_wos.ris:
--------------------------------------------------------------------------------
  1 | FN Clarivate Analytics Web of Science
  2 | VR 1.0
  3 | PT J
  4 | AU Parkes-Loach, PS
  5 |    Majeed, AP
  6 |    Law, CJ
  7 |    Loach, PA
  8 | AF Parkes-Loach, PS
  9 |    Majeed, AP
 10 |    Law, CJ
 11 |    Loach, PA
 12 | TI Interactions stabilizing the structure of the core light-harvesting
 13 |    complex (LHl) of photosynthetic bacteria and its subunit (B820)
 14 | SO BIOCHEMISTRY
 15 | LA English
 16 | DT Article
 17 | ID CHEMICALLY SYNTHESIZED POLYPEPTIDES; IN-VITRO RECONSTITUTION;
 18 |    AMINO-ACID-SEQUENCE; RHODOSPIRILLUM-RUBRUM; RHODOBACTER-SPHAEROIDES;
 19 |    RHODOPSEUDOMONAS-VIRIDIS; BACTERIOCHLOROPHYLL-A; ALPHA-POLYPEPTIDE;
 20 |    CRYSTAL-STRUCTURE; ANTENNA COMPLEX
 21 | AB Reconstitution experiments with a chemically synthesized core light-harvesting (LH1) beta-polypeptide analogue having 3-methylhistidine instead of histidine in the position that normally donates the coordinating ligand to bacteriochlorophyll (Bchl) have provided the experimental data needed to assign to B820 one of the two possible alphabeta.2Bchl pairs that are observed in the crystal structure of LH2 from Phaeospirillum (formerly Rhodospirillum) molischianum, the one with rings III and V of Bchl overlapping. Consistent with the assigned structure, experimental evidence is provided to show that significant stabilizing interactions for both the subunit complex (B820) and LH1 occur between the N-terminal regions of the alpha- and beta-polypeptides. On the basis of the results with the chemically synthesized polypeptides used in this study, along with earlier results with protease-modified polypeptides, mutants, and chemically synthesized polypeptides, the importance of a stretch of 9-13 amino acids at the N-terminal end of the alpha- and beta-polypeptides is underscored. A progressive loss of interaction with the LH1 beta-potypeptide was found as the first three N-terminal amino acids of the LH1 alpha-polypeptide were removed. The absence of the N-terminal formylmethionine (fMet), or conversion of the sulfur in this fMet to the sulfoxide, resulted in a decrease in LH1 formation. In addition to the removal of fMet, removal of the next two amino acids also resulted in a decrease in K-assoc for B820 formation and nearly eliminated the ability to form LH1. It is suggested that the first three amino acids (fMetTrpArg) of the LH1 alpha-polypeptide of Rhodospirillum rubrum form a cluster that is most likely involved in close interaction with the side chain of His -18 (see Figure 1 for numbering of amino acids) of the beta-polypeptide. The results provide evidence that the folding motif of the alpha- and beta-polypeptides in the N-terminal region observed in crystal structures of LH2 is also present in LH1 and contributes significantly to stabilizing the complex.
 22 | C1 Northwestern Univ, Dept Biochem Mol Biol & Cell Biol, Evanston, IL 60208 USA.
 23 | RP Loach, PA (reprint author), Northwestern Univ, Dept Biochem Mol Biol & Cell Biol, Hogan Hall,Room 2-100, Evanston, IL 60208 USA.
 24 | EM p-loach@northwestern.edu
 25 | RI Law, Christopher/E-7174-2011
 26 | CR ALLEN JP, 1986, P NATL ACAD SCI USA, V83, P8589, DOI 10.1073/pnas.83.22.8589
 27 |    Arluison W, 2004, BIOCHEMISTRY-US, V43, P1276, DOI 10.1021/bi030205v
 28 |    BARRICK D, 1994, BIOCHEMISTRY-US, V33, P6546, DOI 10.1021/bi00187a023
 29 |    BERGER G, 1987, J LIQ CHROMATOGR, V10, P1519, DOI 10.1080/01483918708066784
 30 |    BRUNISHOLZ RA, 1984, H-S Z PHYSIOL CHEM, V365, P675, DOI 10.1515/bchm2.1984.365.2.675
 31 |    BRUNISHOLZ RA, 1981, FEBS LETT, V129, P150, DOI 10.1016/0014-5793(81)80778-8
 32 |    CHANG CH, 1986, FEBS LETT, V205, P82, DOI 10.1016/0014-5793(86)80870-5
 33 |    CHANG MC, 1990, PHOTOCHEM PHOTOBIOL, V52, P873, DOI 10.1111/j.1751-1097.1990.tb08696.x
 34 |    CHANG MC, 1990, BIOCHEMISTRY-US, V29, P421, DOI 10.1021/bi00454a017
 35 |    DAVIS CM, 1995, J BIOL CHEM, V270, P5793, DOI 10.1074/jbc.270.11.5793
 36 |    Davis CM, 1997, BIOCHEMISTRY-US, V36, P3671, DOI 10.1021/bi962386p
 37 |    DEISENHOFER J, 1984, J MOL BIOL, V180, P385, DOI 10.1016/S0022-2836(84)80011-X
 38 |    DERFELD CA, 1994, BIOCHIM BIOPHYS ACTA, V1185, P193
 39 |    Francia F, 1999, BIOCHEMISTRY-US, V38, P6834, DOI 10.1021/bi982891h
 40 |    Francia F, 2002, EUR J BIOCHEM, V269, P1877, DOI 10.1046/j.1432-1033.2002.02834.x
 41 |    Frese RN, 2000, P NATL ACAD SCI USA, V97, P5197, DOI 10.1073/pnas.090083797
 42 |    Goldsmith JO, 1996, BIOCHEMISTRY-US, V35, P2421, DOI 10.1021/bi9523365
 43 |    HELLER BA, 1990, PHOTOCHEM PHOTOBIOL, V51, P621, DOI 10.1111/j.1751-1097.1990.tb01975.x
 44 |    Hu XC, 2002, Q REV BIOPHYS, V35, P1, DOI 10.1017/S0033583501003754
 45 |    Jamieson SJ, 2002, EMBO J, V21, P3927, DOI 10.1093/emboj/cdf410
 46 |    JIRSAKOVA V, 1993, BIOCHIM BIOPHYS ACTA, V1183, P301, DOI 10.1016/0005-2728(93)90231-4
 47 |    Jungas C, 1999, EMBO J, V18, P534, DOI 10.1093/emboj/18.3.534
 48 |    KARRASCH S, 1995, EMBO J, V14, P631, DOI 10.1002/j.1460-2075.1995.tb07041.x
 49 |    Kehoe JW, 1998, BIOCHEMISTRY-US, V37, P3418, DOI 10.1021/bi9722709
 50 |    Koepke J, 1996, STRUCTURE, V4, P581, DOI 10.1016/S0969-2126(96)00063-9
 51 |    Law CJ, 2003, PHOTOSYNTH RES, V75, P193, DOI 10.1023/A:1023982327748
 52 |    LEE JK, 1989, J BACTERIOL, V171, P3391, DOI 10.1128/jb.171.6.3391-3405.1989
 53 |    LILBURN TG, 1995, J BACTERIOL, V177, P4593, DOI 10.1128/jb.177.16.4593-4600.1995
 54 |    LOACH PA, 1990, FEMS SYMP, V53, P235
 55 |    LOACH PA, 1994, PHOTOSYNTH RES, V40, P231, DOI 10.1007/BF00034773
 56 |    LOACH PA, 1995, ANOXYGENIC PHOTOSYNT, P437
 57 |    LOACH PA, 1985, MOL BIOL PHOTOSYNTHE, P197
 58 |    MCDERMOTT G, 1995, NATURE, V374, P517, DOI 10.1038/374517a0
 59 |    McGlynn P, 1996, J BIOL CHEM, V271, P3285, DOI 10.1074/jbc.271.6.3285
 60 |    Meadows KA, 1998, BIOCHEMISTRY-US, V37, P3411, DOI 10.1021/bi972269+
 61 |    MEADOWS KA, 1995, BIOCHEMISTRY-US, V34, P1559, DOI 10.1021/bi00005a012
 62 |    MECKENSTOCK RU, 1992, FEBS LETT, V311, P128, DOI 10.1016/0014-5793(92)81383-W
 63 |    MICHALSKI TJ, 1988, J AM CHEM SOC, V110, P5888, DOI 10.1021/ja00225a047
 64 |    MILLER JF, 1987, BIOCHEMISTRY-US, V26, P5055, DOI 10.1021/bi00390a026
 65 |    Papiz MZ, 1996, TRENDS PLANT SCI, V1, P198, DOI 10.1016/1360-1385(96)20005-6
 66 |    Parkes-Loach PS, 2001, BIOCHEMISTRY-US, V40, P5593, DOI 10.1021/bi002580i
 67 |    PARKESLOACH PS, 1994, PHOTOSYNTH RES, V40, P247, DOI 10.1007/BF00034774
 68 |    PARKESLOACH PS, 1988, BIOCHEMISTRY-US, V27, P2718, DOI 10.1021/bi00408a011
 69 |    Pond AE, 2000, INORG CHEM, V39, P6061, DOI 10.1021/ic0007198
 70 |    Roszak AW, 2003, SCIENCE, V302, P1969, DOI 10.1126/science.1088892
 71 |    Scheuring S, 2003, P NATL ACAD SCI USA, V100, P1690, DOI 10.1073/pnas.0437992100
 72 |    THEILER R, 1984, H-S Z PHYSIOL CHEM, V365, P703, DOI 10.1515/bchm2.1984.365.2.703
 73 |    Todd JB, 1998, BIOCHEMISTRY-US, V37, P17458, DOI 10.1021/bi981114e
 74 |    Todd JB, 1999, PHOTOSYNTH RES, V62, P85, DOI 10.1023/A:1006337827672
 75 |    TONN SJ, 1977, BIOCHEMISTRY-US, V16, P877, DOI 10.1021/bi00624a011
 76 |    VANGRONDELLE R, 1994, BBA-BIOENERGETICS, V1187, P1, DOI 10.1016/0005-2728(94)90166-X
 77 |    VANMOURIK F, 1991, BIOCHIM BIOPHYS ACTA, V1059, P111, DOI 10.1016/S0005-2728(05)80193-8
 78 |    VISSCHERS RW, 1991, BIOCHEMISTRY-US, V30, P5734, DOI 10.1021/bi00237a015
 79 |    VISSCHERS RW, 1993, BIOCHIM BIOPHYS ACTA, V1183, P369, DOI 10.1016/0005-2728(93)90241-7
 80 |    Walz T, 1997, J MOL BIOL, V265, P107, DOI 10.1006/jmbi.1996.0714
 81 |    Wang ZY, 2002, J AM CHEM SOC, V124, P1072, DOI 10.1021/ja0112994
 82 |    Wang ZY, 2001, EUR J BIOCHEM, V268, P3375, DOI 10.1046/j.1432-1327.2001.02234.x
 83 |    Westerhuis WHJ, 1998, BBA-BIOENERGETICS, V1366, P317, DOI 10.1016/S0005-2728(98)00132-7
 84 |    ZUBER H, 1995, ANOXYGENIC PHOTOSYNT, P315
 85 | NR 59
 86 | TC 23
 87 | Z9 25
 88 | U1 0
 89 | U2 5
 90 | PU AMER CHEMICAL SOC
 91 | PI WASHINGTON
 92 | PA 1155 16TH ST, NW, WASHINGTON, DC 20036 USA
 93 | SN 0006-2960
 94 | J9 BIOCHEMISTRY-US
 95 | JI Biochemistry
 96 | PD JUN 8
 97 | PY 2004
 98 | VL 43
 99 | IS 22
100 | BP 7003
101 | EP 7016
102 | DI 10.1021/bi049798f
103 | PG 14
104 | WC Biochemistry & Molecular Biology
105 | SC Biochemistry & Molecular Biology
106 | GA 826CV
107 | UT WOS:000221807500019
108 | PM 15170338
109 | DA 2019-03-18
110 | ER
111 | 
112 | PT J
113 | AU Cao, WX
114 |    Ye, X
115 |    Georgiev, GY
116 |    Berezhna, S
117 |    Sjodin, T
118 |    Demidov, AA
119 |    Wang, W
120 |    Sage, JT
121 |    Champion, PM
122 | AF Cao, WX
123 |    Ye, X
124 |    Georgiev, GY
125 |    Berezhna, S
126 |    Sjodin, T
127 |    Demidov, AA
128 |    Wang, W
129 |    Sage, JT
130 |    Champion, PM
131 | TI Proximal and distal influences on ligand binding kinetics in
132 |    microperoxidase and heme model compounds
133 | SO BIOCHEMISTRY
134 | LA English
135 | DT Article
136 | ID SPERM-WHALE MYOGLOBIN; RESONANCE RAMAN-SCATTERING; CARBON-MONOXIDE
137 |    BINDING; POCKET DOCKING SITE; T-STATE HEMOGLOBIN; CYTOCHROME-C; GEMINATE
138 |    RECOMBINATION; LOW PH; VIBRATIONAL-RELAXATION; QUATERNARY STRUCTURE
139 | AB We use laser flash photolysis and time-resolved Raman spectroscopy of CO-bound heme complexes to study proximal and distal influences on ligand rebinding kinetics. We report kinetics of CO rebinding to microperoxidase (MP) and 2-methylimidazole ligated Fe protoporphyrin IX in the 10 ns to 10 ms time window. We also report CO rebinding kinetics of MP in the 150 fs to 140 ps time window. For dilute, micelle-encapsulated (monodisperse) samples of MP, we do not observe the large amplitude geminate decay at similar to100 ps previously reported in time-resolved IR measurements on highly concentrated samples [Lim, M., Jackson, T. A., and Anfinrud, P. A. (1997) J. Biol. Inorg. Chem. 2, 531-536]. However, for high concentration aggregated samples, we do observe the large amplitude picosecond CO geminate rebinding and find that it is correlated with the absence of the iron-histidine vibrational mode in the time-resolved Raman spectrum. On the basis of these results, the energetic significance of a putative distal pocket CO docking site proposed by Lim et al. may need to be reconsidered. Finally, when high concentration samples of native myoglobin (Mb) were studied as a control, an analogous increase in the geminate rebinding kinetics was not observed. This verifies that studies of Mb under dilute conditions are applicable to the more concentrated regime found in the cellular milieu.
140 | C1 Northeastern Univ, Dept Phys, Boston, MA 02115 USA.
141 |    Northeastern Univ, Ctr Interdisciplinary Res Complex Syst, Boston, MA 02115 USA.
142 | RP Champion, PM (reprint author), Northeastern Univ, Dept Phys, Boston, MA 02115 USA.
143 | EM jtsage@neu.edu; p.champion@neu.edu
144 | FU NIGMS NIH HHS [GM-52002]; NIDDK NIH HHS [DK035090]
145 | CR Adams P. A., 1996, CYTOCHROME C MULTIDI, P635
146 |    ANFINRUD PA, 1994, P SOC PHOTO-OPT INS, V2138, P107, DOI 10.1117/12.181348
147 |    Antonini E., 1971, HEMOGLOBIN MYOGLOBIN
148 |    AUSTIN RH, 1975, BIOCHEMISTRY-US, V14, P5355, DOI 10.1021/bi00695a021
149 |    BANGCHAROENPAURPONG O, 1984, J AM CHEM SOC, V106, P5688, DOI 10.1021/ja00331a045
150 |    Barrick D, 1997, NAT STRUCT BIOL, V4, P78, DOI 10.1038/nsb0197-78
151 |    BLAUER G, 1993, BIOCHEMISTRY-US, V32, P6674, DOI 10.1021/bi00077a021
152 |    Brunori M, 2000, P NATL ACAD SCI USA, V97, P2058, DOI 10.1073/pnas.040459697
153 |    Brunori M, 2000, BIOPHYS CHEM, V86, P221, DOI 10.1016/S0301-4622(00)00142-3
154 |    CAO W, 2003, THESIS NE U BOSTON
155 |    Cao WX, 2001, BIOPHYS J, V80, p283A
156 |    Cao WX, 2001, BIOCHEMISTRY-US, V40, P5728, DOI 10.1021/bi010067e
157 |    CARRAWAY AD, 1995, J INORG BIOCHEM, V60, P267, DOI 10.1016/0162-0134(95)00026-7
158 |    CARVER TE, 1990, J BIOL CHEM, V265, P20007
159 |    CHANCE B, 1966, J MOL BIOL, V17, P525, DOI 10.1016/S0022-2836(66)80162-6
160 |    CHANG CK, 1973, J AM CHEM SOC, V95, P8477, DOI 10.1021/ja00806a062
161 |    Christian JF, 1997, BIOCHEMISTRY-US, V36, P11198, DOI 10.1021/bi9710075
162 |    Chu K, 2000, NATURE, V403, P921
163 |    DUPRAT AF, 1995, BIOCHEMISTRY-US, V34, P2634, DOI 10.1021/bi00008a030
164 |    ELBER R, 1990, J AM CHEM SOC, V112, P9161, DOI 10.1021/ja00181a020
165 |    Franzen S, 2001, BIOCHEMISTRY-US, V40, P5299, DOI 10.1021/bi0023403
166 |    GEIBEL J, 1978, J AM CHEM SOC, V100, P3575, DOI 10.1021/ja00479a047
167 |    Harvey JN, 2000, J AM CHEM SOC, V122, P12401, DOI 10.1021/ja005543n
168 |    HASINOFF BB, 1981, ARCH BIOCHEM BIOPHYS, V211, P396, DOI 10.1016/0003-9861(81)90470-7
169 |    HENRY ER, 1983, J MOL BIOL, V166, P443, DOI 10.1016/S0022-2836(83)80094-1
170 |    HORI H, 1980, J AM CHEM SOC, V102, P3608, DOI 10.1021/ja00530a049
171 |    HUANG Y, 1991, J AM CHEM SOC, V113, P9141, DOI 10.1021/ja00024a018
172 |    KINCAID J, 1979, P NATL ACAD SCI USA, V76, P549, DOI 10.1073/pnas.76.2.549
173 |    Kumazaki S, 2000, J BIOL CHEM, V275, P38378, DOI 10.1074/jbc.M005533200
174 |    Kundu S, 2002, PROTEINS, V46, P268, DOI 10.1002/prot.10048
175 |    Laberge M, 1998, J BIOMOL STRUCT DYN, V15, P1039, DOI 10.1080/07391102.1998.10508999
176 |    LI XY, 1988, J AM CHEM SOC, V110, P6024, DOI 10.1021/ja00226a017
177 |    LIM M, 1995, SCIENCE, V269, P962, DOI 10.1126/science.7638619
178 |    LIM M, 2001, ULTRAFAST INFRARED R, P191
179 |    Lim MH, 1997, J BIOL INORG CHEM, V2, P531, DOI 10.1007/s007750050167
180 |    LIM MH, 1995, J CHEM PHYS, V102, P4355, DOI 10.1063/1.469484
181 |    Lim MH, 1997, NAT STRUCT BIOL, V4, P209, DOI 10.1038/nsb0397-209
182 |    Linke W. F., 1940, SOLUBILITIES INORGAN
183 |    MAZUMDAR S, 1991, INORG CHEM, V30, P700, DOI 10.1021/ic00004a020
184 |    McMahon BH, 2000, J CHEM PHYS, V113, P6831, DOI 10.1063/1.1309524
185 |    MIERS JB, 1991, J CHEM PHYS, V94, P1825, DOI 10.1063/1.459957
186 |    NAGAI K, 1980, J MOL BIOL, V136, P271, DOI 10.1016/0022-2836(80)90374-5
187 |    Negrerie M, 2001, J BIOL CHEM, V276, P46815, DOI 10.1074/jbc.M102224200
188 |    Olson JS, 1997, J BIOL INORG CHEM, V2, P544, DOI 10.1007/s007750050169
189 |    Olson JS, 1996, J BIOL CHEM, V271, P17593, DOI 10.1074/jbc.271.30.17593
190 |    OLSON JS, 1988, NATURE, V336, P265, DOI 10.1038/336265a0
191 |    Ostermann A, 2000, NATURE, V404, P205, DOI 10.1038/35004622
192 |    OTHMAN S, 1993, BIOCHEMISTRY-US, V32, P9781, DOI 10.1021/bi00088a033
193 |    PERUTZ MF, 1970, NATURE, V228, P726, DOI 10.1038/228726a0
194 |    PERUTZ MF, 1966, J MOL BIOL, V21, P199, DOI 10.1016/0022-2836(66)90088-X
195 |    Peterson ES, 1998, BIOCHEMISTRY-US, V37, P4346, DOI 10.1021/bi9708693
196 |    PHILLIPS SEV, 1980, J MOL BIOL, V142, P531, DOI 10.1016/0022-2836(80)90262-4
197 |    QUILLIN ML, 1993, J MOL BIOL, V234, P140, DOI 10.1006/jmbi.1993.1569
198 |    RAY GB, 1994, J AM CHEM SOC, V116, P162, DOI 10.1021/ja00080a019
199 |    RINGE D, 1984, BIOCHEMISTRY-US, V23, P2, DOI 10.1021/bi00296a001
200 |    ROUSSEAU DL, 1988, RESONANCE RAMAN SPEC, P133
201 |    SAGE JT, 1991, BIOCHEMISTRY-US, V30, P1227, DOI 10.1021/bi00219a010
202 |    SAGE JT, 1991, BIOCHEMISTRY-US, V30, P1237, DOI 10.1021/bi00219a011
203 |    SAGE JT, 1996, COMPREHENSIVE SUPRAM, V5, P171
204 |    SAGE JT, 2004, ENCY SUPRAMOLECULAR
205 |    SALMEEN I, 1978, BIOCHEMISTRY-US, V17, P800, DOI 10.1021/bi00598a008
206 |    Scott EE, 1997, BIOCHEMISTRY-US, V36, P11909, DOI 10.1021/bi970719s
207 |    Scott EE, 2001, J BIOL CHEM, V276, P5177, DOI 10.1074/jbc.M008282200
208 |    SHARMA VS, 1975, BIOCHEM BIOPH RES CO, V66, P1301, DOI 10.1016/0006-291X(75)90501-X
209 |    Sigfridsson E, 2002, J INORG BIOCHEM, V91, P101, DOI 10.1016/S0162-0134(02)00426-9
210 |    Spiro TG, 2001, ACCOUNTS CHEM RES, V34, P137, DOI 10.1021/ar0001108j
211 |    SRAJER V, 1988, J AM CHEM SOC, V110, P6656, DOI 10.1021/ja00228a009
212 |    Sugimoto T, 1998, BIOPHYS J, V75, P2188, DOI 10.1016/S0006-3495(98)77662-3
213 |    TERAOKA J, 1981, J BIOL CHEM, V256, P3969
214 |    Tian WD, 1996, BIOCHEMISTRY-US, V35, P3487, DOI 10.1021/bi952474u
215 |    TRAYLOR TG, 1981, ACCOUNTS CHEM RES, V14, P102, DOI 10.1021/ar00064a002
216 |    TRAYLOR TG, 1990, J AM CHEM SOC, V112, P6875, DOI 10.1021/ja00175a022
217 |    TRAYLOR TG, 1992, J AM CHEM SOC, V114, P417, DOI 10.1021/ja00028a005
218 |    Unno M, 1998, J AM CHEM SOC, V120, P2670, DOI 10.1021/ja973293d
219 |    URRY DW, 1967, J AM CHEM SOC, V89, P5276, DOI 10.1021/ja00996a034
220 |    URRY DW, 1967, J AM CHEM SOC, V89, P4190, DOI 10.1021/ja00992a601
221 |    Vogel KM, 1999, J AM CHEM SOC, V121, P9915, DOI 10.1021/ja990042r
222 |    WANG JS, 1989, J PHYS CHEM-US, V93, P7925, DOI 10.1021/j100360a038
223 |    Wang W, 2000, J PHYS CHEM B, V104, P10789, DOI 10.1021/jp0008602
224 |    WEI YZ, 1994, J PHYS CHEM-US, V98, P6644, DOI 10.1021/j100077a034
225 |    WHITE DK, 1979, J AM CHEM SOC, V101, P2443, DOI 10.1021/ja00503a034
226 |    Yang F, 1996, J MOL BIOL, V256, P762, DOI 10.1006/jmbi.1996.0123
227 |    Ye X, 2002, J AM CHEM SOC, V124, P5914, DOI 10.1021/ja017359n
228 |    YE X, 2003, THESIS NE U BOSTON
229 |    Ye XO, 2003, J PHYS CHEM A, V107, P8156, DOI 10.1021/jp0276799
230 |    ZHU L, 1992, J MOL BIOL, V224, P207, DOI 10.1016/0022-2836(92)90584-7
231 | NR 86
232 | TC 31
233 | Z9 31
234 | U1 1
235 | U2 13
236 | PU AMER CHEMICAL SOC
237 | PI WASHINGTON
238 | PA 1155 16TH ST, NW, WASHINGTON, DC 20036 USA
239 | SN 0006-2960
240 | J9 BIOCHEMISTRY-US
241 | JI Biochemistry
242 | PD JUN 8
243 | PY 2004
244 | VL 43
245 | IS 22
246 | BP 7017
247 | EP 7027
248 | DI 10.1021/bi0497291
249 | PG 11
250 | WC Biochemistry & Molecular Biology
251 | SC Biochemistry & Molecular Biology
252 | GA 826CV
253 | UT WOS:000221807500020
254 | PM 15170339
255 | DA 2019-03-18
256 | ER
257 | 
258 | EF
259 | 


--------------------------------------------------------------------------------
/tests/test_benchmark.py:
--------------------------------------------------------------------------------
 1 | import rispy
 2 | 
 3 | EXAMPLE_RECORD = """
 4 | 42.
 5 | TY  - JOUR
 6 | ID  - 12345
 7 | T1  - The title of the reference
 8 | A1  - Marxus, Karlus
 9 | A1  - Lindgren, Astrid
10 | A2  - Glattauer, Daniel
11 | Y1  - 2006//
12 | N2  - BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.
13 | KW  - Pippi Langstrumpf
14 | KW  - Nordwind
15 | KW  - Piraten
16 | KW  - Seeräuber
17 | KW  - Kinderbuch
18 | KW  - Astrid Lindgren
19 | JF  - Lorem ipsum dolor sit amet
20 | JA  - lorem ipsum dolor sit amet
21 | VL  - 6
22 | IS  - 3
23 | SP  - e0815341
24 | CY  - Germany
25 | PB  - Dark Factory
26 | SN  - 1732-4208
27 | M1  - 1228150341
28 | L2  - http://example2.com
29 | UR  - http://example.com/1
30 | UR  - http://example.com/2
31 | UR  - http://example.com/3
32 | DO  - 10.1371/journal.pone.0081534
33 | ER  -
34 | 
35 | """  # noqa
36 | 
37 | 
38 | EXAMPLE_RECORD_MULTILINE = """
39 | 42.
40 | TY  - JOUR
41 | ID  - 12345
42 | T1  - The title of the reference
43 | A1  - Marxus, Karlus
44 | A1  - Lindgren, Astrid
45 | A2  - Glattauer, Daniel
46 | Y1  - 2006//
47 | N2  - BACKGROUND: Lorem dammed ipsum dolor sit amet,
48 | consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa.
49 |     - Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus
50 |     - mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem.
51 |       Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet
52 | nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam
53 | dictum felis eu pede mollis pretium.
54 | KW  - Pippi Langstrumpf
55 | KW  - Nordwind
56 | KW  - Piraten
57 | KW  - Seeräuber
58 | KW  - Kinderbuch
59 | KW  - Astrid Lindgren
60 | JF  - Lorem ipsum dolor sit amet
61 | JA  - lorem ipsum dolor sit amet
62 | VL  - 6
63 | IS  - 3
64 | SP  - e0815341
65 | CY  - Germany
66 | PB  - Dark Factory
67 | SN  - 1732-4208
68 | M1  - 1228150341
69 | L2  - http://example2.com
70 | UR  - http://example.com/1
71 | UR  - http://example.com/2
72 | UR  - http://example.com/3
73 | DO  - 10.1371/journal.pone.0081534
74 | ER  -
75 | 
76 | """
77 | 
78 | 
79 | def test_benchmark_rispy_large(benchmark):
80 |     benchmark_dataset = EXAMPLE_RECORD * 10000
81 | 
82 |     benchmark(rispy.loads, benchmark_dataset)
83 | 
84 | 
85 | def test_benchmark_rispy_large_multiline(benchmark):
86 |     benchmark_dataset = EXAMPLE_RECORD_MULTILINE * 10000
87 | 
88 |     benchmark(rispy.loads, benchmark_dataset)
89 | 


--------------------------------------------------------------------------------
/tests/test_parser.py:
--------------------------------------------------------------------------------
  1 | from io import StringIO
  2 | from pathlib import Path
  3 | 
  4 | import pytest
  5 | 
  6 | import rispy
  7 | 
  8 | DATA_DIR = Path(__file__).parent.resolve() / "data"
  9 | 
 10 | 
 11 | @pytest.fixture
 12 | def example_basic():
 13 |     # expected output from `example_basic.ris`
 14 |     return [
 15 |         {
 16 |             "type_of_reference": "JOUR",
 17 |             "authors": ["Shannon,Claude E."],
 18 |             "year": "1948/07//",
 19 |             "title": "A Mathematical Theory of Communication",
 20 |             "alternate_title3": "Bell System Technical Journal",
 21 |             "start_page": "379",
 22 |             "end_page": "423",
 23 |             "volume": "27",
 24 |         }
 25 |     ]
 26 | 
 27 | 
 28 | def test_load_file(example_basic):
 29 |     # test with file object
 30 |     filepath = DATA_DIR / "example_basic.ris"
 31 |     with open(filepath) as f:
 32 |         entries = rispy.load(f)
 33 |     assert example_basic == entries
 34 | 
 35 | 
 36 | def test_load_file_noreadline(example_basic):
 37 |     # test with file object that has no readline
 38 | 
 39 |     class NoReadline(StringIO):
 40 |         @property
 41 |         def readline(self):  # type: ignore
 42 |             raise AttributeError("Not found")
 43 | 
 44 |     filepath = DATA_DIR / "example_basic.ris"
 45 |     f = NoReadline(filepath.read_text())
 46 |     assert not hasattr(f, "readline")
 47 |     entries = rispy.load(f)
 48 |     assert example_basic == entries
 49 | 
 50 | 
 51 | def test_load_path(example_basic):
 52 |     # test with Path object
 53 |     filepath = DATA_DIR / "example_basic.ris"
 54 |     p = Path(filepath)
 55 |     entries = rispy.load(p)
 56 |     assert example_basic == entries
 57 | 
 58 | 
 59 | def test_load_bad_file():
 60 |     with pytest.raises(ValueError, match="File must be a file-like object or a Path object"):
 61 |         rispy.load("test")  # type: ignore
 62 | 
 63 | 
 64 | def test_loads(example_basic):
 65 |     ristext = (DATA_DIR / "example_basic.ris").read_text()
 66 |     assert example_basic == rispy.loads(ristext)
 67 | 
 68 | 
 69 | def test_load_multiline_ris():
 70 |     filepath = DATA_DIR / "example_multiline.ris"
 71 |     expected = {
 72 |         "type_of_reference": "JOUR",
 73 |         "notes_abstract": "first line, ER then second line and at the end the last line",
 74 |         "notes": ["first line", "* second line", "* last line"],
 75 |     }
 76 |     with open(filepath) as f:
 77 |         entries = rispy.load(f)
 78 | 
 79 |     for entry in entries:
 80 |         assert expected == entry
 81 | 
 82 | 
 83 | def test_load_example_full_ris():
 84 |     filepath = DATA_DIR / "example_full.ris"
 85 |     expected = [
 86 |         {
 87 |             "type_of_reference": "JOUR",
 88 |             "id": "12345",
 89 |             "primary_title": "Title of reference",
 90 |             "first_authors": ["Marx, Karl", "Lindgren, Astrid"],
 91 |             "secondary_authors": ["Glattauer, Daniel"],
 92 |             "publication_year": "2014//",
 93 |             "notes_abstract": "BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.",  # noqa: E501
 94 |             "keywords": ["Pippi", "Nordwind", "Piraten"],
 95 |             "alternate_title3": "Lorem",
 96 |             "alternate_title2": "lorem",
 97 |             "volume": "9",
 98 |             "number": "3",
 99 |             "start_page": "e0815",
100 |             "place_published": "United States",
101 |             "publisher": "Fun Factory",
102 |             "issn": "1932-6208",
103 |             "note": "1008150341",
104 |             "file_attachments2": "http://example.com",
105 |             "urls": ["http://example_url.com"],
106 |         },
107 |         {
108 |             "type_of_reference": "JOUR",
109 |             "id": "12345",
110 |             "primary_title": "The title of the reference",
111 |             "first_authors": ["Marxus, Karlus", "Lindgren, Astrid"],
112 |             "secondary_authors": ["Glattauer, Daniel"],
113 |             "publication_year": "2006//",
114 |             "notes_abstract": "BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.",  # noqa: E501
115 |             "keywords": ["Pippi Langstrumpf", "Nordwind", "Piraten"],
116 |             "alternate_title3": "Lorem",
117 |             "alternate_title2": "lorem",
118 |             "volume": "6",
119 |             "number": "3",
120 |             "start_page": "e0815341",
121 |             "place_published": "Germany",
122 |             "publisher": "Dark Factory",
123 |             "issn": "1732-4208",
124 |             "note": "1228150341",
125 |             "file_attachments2": "http://example2.com",
126 |             "urls": ["http://example_url.com"],
127 |         },
128 |     ]
129 | 
130 |     with open(filepath) as f:
131 |         entries = rispy.load(f)
132 |     assert expected == entries
133 | 
134 | 
135 | def test_load_example_extraneous_data_ris():
136 |     filepath = DATA_DIR / "example_extraneous_data.ris"
137 |     expected = [
138 |         {
139 |             "type_of_reference": "JOUR",
140 |             "id": "12345",
141 |             "primary_title": "Title of reference",
142 |             "first_authors": ["Marx, Karl", "Lindgren, Astrid"],
143 |             "secondary_authors": ["Glattauer, Daniel"],
144 |             "publication_year": "2014//",
145 |             "notes_abstract": "BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.",  # noqa: E501
146 |             "keywords": ["Pippi", "Nordwind", "Piraten"],
147 |             "alternate_title3": "Lorem",
148 |             "alternate_title2": "lorem",
149 |             "volume": "9",
150 |             "number": "3",
151 |             "start_page": "e0815",
152 |             "place_published": "United States",
153 |             "publisher": "Fun Factory",
154 |             "issn": "1932-6208",
155 |             "note": "1008150341",
156 |             "file_attachments2": "http://example.com",
157 |             "urls": ["http://example_url.com"],
158 |         },
159 |         {
160 |             "type_of_reference": "JOUR",
161 |             "id": "12345",
162 |             "primary_title": "The title of the reference",
163 |             "first_authors": ["Marxus, Karlus", "Lindgren, Astrid"],
164 |             "secondary_authors": ["Glattauer, Daniel"],
165 |             "publication_year": "2006//",
166 |             "notes_abstract": "BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.",  # noqa: E501
167 |             "keywords": ["Pippi Langstrumpf", "Nordwind", "Piraten"],
168 |             "alternate_title3": "Lorem",
169 |             "alternate_title2": "lorem",
170 |             "volume": "6",
171 |             "number": "3",
172 |             "start_page": "e0815341",
173 |             "place_published": "Germany",
174 |             "publisher": "Dark Factory",
175 |             "issn": "1732-4208",
176 |             "note": "1228150341",
177 |             "file_attachments2": "http://example2.com",
178 |             "urls": ["http://example_url.com"],
179 |         },
180 |     ]
181 | 
182 |     with open(filepath) as f:
183 |         entries = rispy.load(f)
184 |     assert expected == entries
185 | 
186 | 
187 | def test_load_example_full_ris_without_whitespace():
188 |     # Parse files without whitespace after ER tag.
189 |     # Resolves https://github.com/MrTango/rispy/pull/25
190 | 
191 |     filepath = DATA_DIR / "example_full_without_whitespace.ris"
192 |     expected = [
193 |         {
194 |             "type_of_reference": "JOUR",
195 |             "id": "12345",
196 |             "primary_title": "Title of reference",
197 |             "first_authors": ["Marx, Karl", "Lindgren, Astrid"],
198 |             "secondary_authors": ["Glattauer, Daniel"],
199 |             "publication_year": "2014//",
200 |             "notes_abstract": "BACKGROUND: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.",  # noqa: E501
201 |             "keywords": ["Pippi", "Nordwind", "Piraten"],
202 |             "alternate_title3": "Lorem",
203 |             "alternate_title2": "lorem",
204 |             "volume": "9",
205 |             "number": "3",
206 |             "start_page": "e0815",
207 |             "place_published": "United States",
208 |             "publisher": "Fun Factory",
209 |             "issn": "1932-6208",
210 |             "note": "1008150341",
211 |             "file_attachments2": "http://example.com",
212 |             "urls": ["http://example_url.com"],
213 |         },
214 |         {
215 |             "type_of_reference": "JOUR",
216 |             "id": "12345",
217 |             "primary_title": "The title of the reference",
218 |             "first_authors": ["Marxus, Karlus", "Lindgren, Astrid"],
219 |             "secondary_authors": ["Glattauer, Daniel"],
220 |             "publication_year": "2006//",
221 |             "notes_abstract": "BACKGROUND: Lorem dammed ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus.  RESULTS: Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. CONCLUSIONS: Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium.",  # noqa: E501
222 |             "keywords": ["Pippi Langstrumpf", "Nordwind", "Piraten"],
223 |             "alternate_title3": "Lorem",
224 |             "alternate_title2": "lorem",
225 |             "volume": "6",
226 |             "number": "3",
227 |             "start_page": "e0815341",
228 |             "place_published": "Germany",
229 |             "publisher": "Dark Factory",
230 |             "issn": "1732-4208",
231 |             "note": "1228150341",
232 |             "file_attachments2": "http://example2.com",
233 |             "urls": ["http://example_url.com"],
234 |         },
235 |     ]
236 | 
237 |     with open(filepath) as f:
238 |         entries = rispy.load(f)
239 |     assert expected == entries
240 | 
241 | 
242 | def test_load_single_unknown_tag_ris():
243 |     filepath = DATA_DIR / "example_single_unknown_tag.ris"
244 |     expected = {
245 |         "type_of_reference": "JOUR",
246 |         "authors": ["Shannon,Claude E."],
247 |         "year": "1948/07//",
248 |         "title": "A Mathematical Theory of Communication",
249 |         "alternate_title3": "Bell System Technical Journal",
250 |         "start_page": "379",
251 |         "end_page": "423",
252 |         "volume": "27",
253 |         "unknown_tag": {"JP": ["CRISPR", "Direct Current"]},
254 |     }
255 | 
256 |     with open(filepath) as f:
257 |         entries = rispy.load(f)
258 | 
259 |     assert expected == entries[0]
260 | 
261 | 
262 | def test_load_multiple_unknown_tags_ris():
263 |     filepath = DATA_DIR / "example_multi_unknown_tags.ris"
264 |     expected = {
265 |         "type_of_reference": "JOUR",
266 |         "authors": ["Shannon,Claude E."],
267 |         "year": "1948/07//",
268 |         "title": "A Mathematical Theory of Communication",
269 |         "alternate_title3": "Bell System Technical Journal",
270 |         "end_page": "423",
271 |         "volume": "27",
272 |         "unknown_tag": {"JP": ["CRISPR"], "DC": ["Direct Current"]},
273 |     }
274 |     with open(filepath) as f:
275 |         entries = rispy.load(f)
276 |     assert expected == entries[0]
277 | 
278 | 
279 | def test_starting_newline():
280 |     fn = DATA_DIR / "example_starting_newlines.ris"
281 |     with open(fn) as f:
282 |         entries = rispy.load(f)
283 |     assert len(entries) == 1
284 | 
285 | 
286 | def test_strip_bom():
287 |     expected = {
288 |         "type_of_reference": "JOUR",
289 |         "doi": "10.1186/s40981-020-0316-0",
290 |     }
291 | 
292 |     filepath = DATA_DIR / "example_bom.ris"
293 | 
294 |     # we properly decode the content of this file as UTF-8, but leave the BOM
295 |     with open(filepath, encoding="utf-8-sig") as f:
296 |         entries = rispy.load(f)
297 | 
298 |     assert expected == entries[0]
299 | 
300 | 
301 | def test_wos_ris():
302 |     fn = DATA_DIR / "example_wos.ris"
303 |     with open(fn) as f:
304 |         entries = rispy.load(f, implementation=rispy.WokParser)
305 | 
306 |     assert len(entries) == 2
307 | 
308 |     title = "Interactions stabilizing the structure of the core light-harvesting complex (LHl) of photosynthetic bacteria and its subunit (B820)"  # noqa: E501
309 |     assert entries[0]["document_title"] == title
310 | 
311 |     title = "Proximal and distal influences on ligand binding kinetics in microperoxidase and heme model compounds"  # noqa: E501
312 |     assert entries[1]["document_title"] == title
313 | 
314 | 
315 | def test_unkown_skip():
316 |     filepath = DATA_DIR / "example_multi_unknown_tags.ris"
317 |     expected = {
318 |         "type_of_reference": "JOUR",
319 |         "authors": ["Shannon,Claude E."],
320 |         "year": "1948/07//",
321 |         "title": "A Mathematical Theory of Communication",
322 |         "alternate_title3": "Bell System Technical Journal",
323 |         "end_page": "423",
324 |         "volume": "27",
325 |     }
326 | 
327 |     with open(filepath) as f:
328 |         entries = rispy.load(f, skip_unknown_tags=True)
329 |     assert expected == entries[0]
330 | 
331 | 
332 | def test_type_conversion():
333 |     refs = [
334 |         {"type_of_reference": "JOUR", "id": "12345", "primary_title": "Title of reference"},
335 |         {
336 |             "type_of_reference": "BOOK",
337 |             "id": "12345",
338 |             "primary_title": "The title of the reference",
339 |         },
340 |         {"type_of_reference": "Journal", "id": "12345", "primary_title": "Title of reference"},
341 |         {"type_of_reference": "TEST", "id": "12345", "primary_title": "Title of reference"},
342 |     ]
343 | 
344 |     # test conversion
345 |     test1 = rispy.utils.convert_reference_types(refs)
346 |     test1_types = [i["type_of_reference"] for i in test1]
347 |     assert test1_types == [
348 |         "Journal",
349 |         "Whole book",
350 |         "Journal",
351 |         "TEST",
352 |     ]
353 | 
354 |     # test reverse
355 |     test2 = rispy.utils.convert_reference_types(test1, reverse=True)
356 |     assert test2[0:2] == refs[0:2]
357 |     assert test2[3] == refs[3]
358 |     assert test2[2]["type_of_reference"] == "JOUR"
359 | 
360 |     # test strict
361 |     with pytest.raises(KeyError):
362 |         rispy.utils.convert_reference_types(refs, strict=True)
363 |     refs_clean = refs[0:3]
364 |     test3 = rispy.utils.convert_reference_types(refs_clean, strict=True)
365 | 
366 |     # test strict in reverse
367 |     test4 = rispy.utils.convert_reference_types(test3, strict=True, reverse=True)
368 |     assert test4[0:2] == refs_clean[0:2]
369 |     assert test4[2]["type_of_reference"] == "JOUR"
370 | 
371 | 
372 | def test_encodings():
373 |     p = DATA_DIR / "example_utf_chars.ris"
374 | 
375 |     with open(p, encoding="utf-8-sig") as file:
376 |         expected = rispy.load(file)
377 | 
378 |     with pytest.raises(UnicodeDecodeError):
379 |         rispy.load(p, encoding="cp1252")
380 | 
381 |     entries = rispy.load(p, encoding="utf-8-sig")
382 | 
383 |     assert entries == expected
384 | 
385 | 
386 | def test_list_tag_enforcement():
387 |     filepath = DATA_DIR / "example_custom_list_tags.ris"
388 | 
389 |     expected = {
390 |         "type_of_reference": "JOUR",
391 |         "authors": ["Marx, Karl", "Marxus, Karlus"],
392 |         "issn": ["12345", "ABCDEFG", "666666"],
393 |     }
394 | 
395 |     entries = rispy.load(filepath, enforce_list_tags=False, list_tags=[])
396 |     assert expected == entries[0]
397 | 
398 | 
399 | def test_url_tag():
400 |     filepath = DATA_DIR / "example_urls.ris"
401 |     with open(filepath) as f:
402 |         entries = rispy.load(f)
403 | 
404 |     assert len(entries) == 4
405 |     assert entries[0]["urls"] == ["http://example.com"]
406 |     assert entries[1]["urls"] == ["http://example.com", "http://www.example.com"]
407 |     assert entries[2]["urls"] == ["http://example.com", "http://www.example.com"]
408 |     assert entries[3]["urls"] == ["http://example.com", "http://www.example.com"]
409 | 
410 | 
411 | def test_empty_tag():
412 |     filepath = DATA_DIR / "example_empty_tag.ris"
413 |     with open(filepath) as f:
414 |         entries = rispy.load(f)
415 | 
416 |     assert len(entries) == 1
417 |     assert entries[0]["number"] == "9"
418 |     assert entries[0]["start_page"] == ""
419 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rispy.utils import invert_dictionary
 4 | 
 5 | 
 6 | def test_invert_dictionary():
 7 |     d = {"a": "b"}
 8 |     assert invert_dictionary(d) == {"b": "a"}
 9 | 
10 | 
11 | def test_invert_dictionary_failure():
12 |     d = {"a": "b", "c": "b"}
13 |     with pytest.raises(ValueError, match="Dictionary cannot be inverted"):
14 |         invert_dictionary(d)
15 | 


--------------------------------------------------------------------------------
/tests/test_writer.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from pathlib import Path
  3 | from typing import ClassVar
  4 | 
  5 | import pytest
  6 | 
  7 | import rispy
  8 | 
  9 | DATA_DIR = Path(__file__).parent.resolve() / "data"
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def ris_data():
 14 |     return [
 15 |         {
 16 |             "type_of_reference": "JOUR",
 17 |             "authors": ["Shannon, Claude E.", "Doe, John"],
 18 |             "year": "1948/07//",
 19 |             "title": "A Mathematical Theory of Communication",
 20 |             "start_page": "379",
 21 |             "urls": ["https://example.com", "https://example2.com"],
 22 |         }
 23 |     ]
 24 | 
 25 | 
 26 | def test_dump_and_load():
 27 |     # check that we can write the same file we read
 28 |     source_fp = DATA_DIR / "example_full.ris"
 29 | 
 30 |     # read text
 31 |     actual = source_fp.read_text()
 32 | 
 33 |     # map to RIS structure and dump
 34 |     entries = rispy.loads(actual)
 35 |     export = rispy.dumps(entries)
 36 | 
 37 |     assert actual == export
 38 | 
 39 | 
 40 | def test_dumps_multiple_unknown_tags_ris(tmp_path):
 41 |     fp = tmp_path / "test_dump_unknown_tags.ris"
 42 | 
 43 |     results = [{"title": "my-title", "abstract": "my-abstract", "does_not_exists": "test"}]
 44 | 
 45 |     # check that we get a warning
 46 |     with pytest.warns(UserWarning, match="label `does_not_exists` not exported"):
 47 |         with open(fp, "w") as f:
 48 |             rispy.dump(results, f)
 49 | 
 50 |     # check that we get everything back except missing key
 51 |     text = Path(fp).read_text()
 52 |     entries = rispy.loads(text)
 53 |     assert entries[0] == {
 54 |         "type_of_reference": "JOUR",
 55 |         "title": "my-title",
 56 |         "abstract": "my-abstract",
 57 |     }
 58 | 
 59 |     # check file looks as expected
 60 |     lines = text.splitlines()
 61 |     assert lines[0] == "1."
 62 |     assert lines[1] == "TY  - JOUR"
 63 |     assert lines[4] == "ER  - "
 64 |     assert len(lines) == 5
 65 | 
 66 | 
 67 | def test_custom_list_tags():
 68 |     filepath = DATA_DIR / "example_custom_list_tags.ris"
 69 |     list_tags = deepcopy(rispy.LIST_TYPE_TAGS)
 70 |     list_tags.append("SN")
 71 | 
 72 |     expected = {
 73 |         "type_of_reference": "JOUR",
 74 |         "authors": ["Marx, Karl", "Marxus, Karlus"],
 75 |         "issn": ["12345", "ABCDEFG", "666666"],
 76 |     }
 77 | 
 78 |     actual = filepath.read_text()
 79 | 
 80 |     entries = rispy.loads(actual, list_tags=list_tags)
 81 |     assert expected == entries[0]
 82 | 
 83 |     export = rispy.dumps(entries, list_tags=list_tags)
 84 |     assert export == actual
 85 | 
 86 | 
 87 | def test_skip_unknown_tags():
 88 |     entries = [
 89 |         {
 90 |             "type_of_reference": "JOUR",
 91 |             "authors": ["Marx, Karl", "Marxus, Karlus"],
 92 |             "issn": "12222",
 93 |             "unknown_tag": {"JP": ["CRISPR"], "DC": ["Direct Current"]},
 94 |         }
 95 |     ]
 96 |     expected = [
 97 |         {
 98 |             "type_of_reference": "JOUR",
 99 |             "authors": ["Marx, Karl", "Marxus, Karlus"],
100 |             "issn": "12222",
101 |         }
102 |     ]
103 | 
104 |     export = rispy.dumps(entries, skip_unknown_tags=True)
105 |     reload = rispy.loads(export)
106 | 
107 |     assert reload == expected
108 | 
109 | 
110 | def test_writing_all_list_tags():
111 |     expected = [
112 |         {
113 |             "type_of_reference": "JOUR",
114 |             "authors": ["Marx, Karl", "Marxus, Karlus"],
115 |             "issn": ["12345", "ABCDEFG", "666666"],
116 |         }
117 |     ]
118 | 
119 |     export = rispy.dumps(expected, enforce_list_tags=False, list_tags=[])
120 |     entries = rispy.loads(export, list_tags=["AU", "SN"])
121 |     assert expected == entries
122 | 
123 | 
124 | def test_file_implementation_write():
125 |     class CustomParser(rispy.RisParser):
126 |         DEFAULT_IGNORE: ClassVar[list[str]] = ["JF", "ID", "KW"]
127 | 
128 |     class CustomWriter(rispy.RisWriter):
129 |         DEFAULT_IGNORE: ClassVar[list[str]] = ["JF", "ID", "KW"]
130 | 
131 |     list_tags = ["SN", "T1", "A1", "UR"]
132 | 
133 |     fn = DATA_DIR / "example_full.ris"
134 |     with open(fn) as f:
135 |         entries = rispy.load(f, implementation=CustomParser, list_tags=list_tags)
136 | 
137 |     fn_write = DATA_DIR / "example_full_write.ris"
138 | 
139 |     with open(fn_write, "w") as f:
140 |         rispy.dump(entries, f, implementation=CustomWriter, list_tags=list_tags)
141 | 
142 |     with open(fn_write) as f:
143 |         reload = rispy.load(f, implementation=CustomParser, list_tags=list_tags)
144 | 
145 |     assert reload == entries
146 | 
147 | 
148 | def test_write_single_unknown_tag(ris_data):
149 |     ris_data[0]["unknown_tag"] = {"JP": ["CRISPR"]}
150 |     text_output = rispy.dumps(ris_data)
151 |     # check output is as expected
152 |     lines = text_output.splitlines()
153 |     assert lines[9] == "JP  - CRISPR"
154 |     assert len(lines) == 11
155 | 
156 | 
157 | def test_write_multiple_unknown_tag_same_type(ris_data):
158 |     ris_data[0]["unknown_tag"] = {"JP": ["CRISPR", "PEOPLE"]}
159 |     text_output = rispy.dumps(ris_data)
160 | 
161 |     # check output is as expected
162 |     lines = text_output.splitlines()
163 |     assert lines[9] == "JP  - CRISPR"
164 |     assert lines[10] == "JP  - PEOPLE"
165 |     assert len(lines) == 12
166 | 
167 | 
168 | def test_write_multiple_unknown_tag_diff_type(ris_data):
169 |     ris_data[0]["unknown_tag"] = {"JP": ["CRISPR"], "ED": ["Swinburne, Ricardo"]}
170 |     text_output = rispy.dumps(ris_data)
171 | 
172 |     # check output is as expected
173 |     lines = text_output.splitlines()
174 |     assert lines[9] == "JP  - CRISPR"
175 |     assert lines[10] == "ED  - Swinburne, Ricardo"
176 |     assert len(lines) == 12
177 | 
178 | 
179 | def test_default_dump(ris_data):
180 |     text_output = rispy.dumps(ris_data)
181 |     lines = text_output.splitlines()
182 |     assert lines[2] == "AU  - Shannon, Claude E."
183 |     assert lines[3] == "AU  - Doe, John"
184 |     assert lines[7] == "UR  - https://example.com"
185 |     assert lines[8] == "UR  - https://example2.com"
186 |     assert len(lines) == 10
187 | 
188 | 
189 | def test_delimited_dump(ris_data):
190 |     # remove URLs from list_tags and give it a custom delimiter
191 |     text_output = rispy.dumps(ris_data, list_tags=["AU"], delimiter_tags_mapping={"UR": ","})
192 | 
193 |     # check output is as expected
194 |     lines = text_output.splitlines()
195 |     assert lines[2] == "AU  - Shannon, Claude E."
196 |     assert lines[3] == "AU  - Doe, John"
197 |     assert lines[7] == "UR  - https://example.com,https://example2.com"
198 |     assert len(lines) == 9
199 | 
200 | 
201 | def test_dump_path(tmp_path, ris_data):
202 |     # check that dump works with a Path object
203 |     path = tmp_path / "file.ris"
204 |     rispy.dump(ris_data, path)
205 |     assert len(path.read_text()) > 0
206 | 
207 | 
208 | def test_bad_dump(ris_data):
209 |     with pytest.raises(ValueError, match="File must be a file-like object or a Path object"):
210 |         rispy.dump(ris_data, 123)  # type: ignore
211 | 


--------------------------------------------------------------------------------