├── .gitignore
├── Eldev
├── LICENSE
├── README.md
├── parsebib.el
└── test
    ├── parsebib-test.el
    └── test-helper.el


/.gitignore:
--------------------------------------------------------------------------------
1 | *.elc
2 | testbib.bib
3 | /.cask/
4 | 
5 | # Added automatically by ‘eldev init’.
6 | /.eldev
7 | /Eldev-local
8 | 


--------------------------------------------------------------------------------
/Eldev:
--------------------------------------------------------------------------------
1 | ; -*- mode: emacs-lisp; lexical-binding: t -*-
2 | 
3 | ;; Uncomment some calls below as needed for your project.
4 | ;(eldev-use-package-archive 'gnu-elpa)
5 | ;(eldev-use-package-archive 'nongnu-elpa)
6 | ;(eldev-use-package-archive 'melpa)
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2014-2022, Joost Kremers
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Parsebib #
  2 | 
  3 | 
  4 | (c) 2014-2022 Joost Kremers
  5 | 
  6 | `Parsebib` is an Elisp library for reading bibliographic database files. It supports both BibTeX / `biblatex` (`.bib`)  files and CSL-JSON (`.json`) files.
  7 | 
  8 | The library provides functions that parse the current buffer. They are intended to be used inside a `with-temp-buffer` combined with `insert-file-contents`, but they can also be used in a buffer visiting a bibliography file, of course. Alternatively, the library provides a function `parsebib-parse` that takes a list of files and collects all the bibliographic data in them in. 
  9 | 
 10 | The data in the bibliography file can be returned in two ways. The first option is for `parsebib` to return the contents of the file accurately. This means that the field values as returned by `parsebib` are literally the field values in the file. This is useful if you need the contents of the `.bib` file literally, e.g., because your application provides the user with the option of modifying the data.
 11 | 
 12 | If you just want to display the bibliographic data, however, e.g., to allow a user to select a reference, you may want to represent the data in a way that is closer to what it would look like in a bibliography. To make this possible, `parsebib` can return the field values in such a way that they are suitable for display. For BibTeX / `biblatex` files, this means that `@String` abbreviations are expanded, cross-references are resolved and TeX markup is prettified or removed. For CSL-JSON files, it means that field values that are not strings (notably name and date fields) are converted to strings in a sensible way.
 13 | 
 14 | 
 15 | ## BibTeX / `biblatex` vs. CSL-JSON ##
 16 | 
 17 | Although both are bibliography file formats, there are obviously differences between BibTeX / `biblatex` on the one hand and CSL-JSON on the other. The entry types and field names are different, and CSL-JSON does not have something similar to `@String` abbreviations or cross-references.
 18 | 
 19 | Especially relevant for the purpose of this library is that there are differences in the format of the data returned for the two types of files. For both formats, the bibliographic data is returned as a hash table. In this hash table, each entry is stored under its entry key (which is the `id` field in CSL-JSON) as an alist of `(<field> . <value>)` pairs. In BibTeX data, `<field>` is a string and field names are case-insensitive, so you may have `"Author"` or `"author"`, and both may occur in a single `.bib` file. Furthermore, `<value>` is always a string.
 20 | 
 21 | In CSL-JSON data, the format of the alist is slightly different: `<field>` is not a string but a symbol and the symbol name is case-sensitive. The CSL-JSON standard describes which field names are lower case (most are) and which are upper case (`ISBN`, `DOI`, etc.) Furthermore, the `<value>` part of the alist items may be a string, a number or a vector, though when parsing a `.json` file, you can have `parsebib` convert all values to strings.
 22 | 
 23 | You can access the data in an entry's alist with `assoc` or `alist-get`, but in order to accommodate for the differences in the type of field names, you may want to use `assoc-string` instead, which generalises over symbols and strings and has a `case-fold` argument.
 24 | 
 25 | Another thing to note is that in BibTeX data, the type and key of an entry are stored in the entry's alist under `=type=` and `=key=`, while the same information is available in CSL-JSON data under `type` and `id`, respectively. While some of the core information on an entry is available in fields with the same name in both formats (esp. author, editor and title), most fields are named differently (i.e.., the `year` field in BibTeX corresponds to the `issued` field in CSL-JSON).
 26 | 
 27 | One last difference to note is that the general buffer-parsing functions, `parsebib-parse-bib-buffer` and `parsebib-parse-json-buffer` do not have the same type of return value. See the function descriptions below for details.
 28 | 
 29 | There is also a function `parsebib-parse` that takes a list of files and returns the entries in them. It has a slightly simplified interface, but it is format-agnostic: it handles both `.bib` and `.json` files.
 30 | 
 31 | 
 32 | ## BibTeX / `biblatex` ##
 33 | 
 34 | Support for `.bib` files comes in two different APIs, a higher-level one that reads all items in one go, and a lower-level one that reads one item at a time. `Parsebib` supports `@Preamble`, `@String` and `@Comment` items, and obviously actual bibliographic entries.
 35 | 
 36 | 
 37 | ### Returning entries for display ###
 38 | 
 39 | In order to return entries in a way that is suitable for display, `parsebib` can post-process field values while reading the contents of a `.bib` file. This post-processing involves a number of modifications. First, it removes the braces or double quotes around field values,  replaces newlines, tabs and form feeds with spaces, and collapses sequences of whitespace characters into a single space.
 40 | 
 41 | Furthermore, TeX markup is prettified: LaTeX commands for special characters are replaced with their (Unicode) representations (i.e. `\textdollar` is replaced with $, `\S` with §, `---` with —, etc.), LaTeX commands that have an obligatory argument are replaced with that argument, optional arguments and braces are removed. In addition, the arguments of `\textbf`, `\textit` and `\emph` are given text properties so that they display as bold or italic, and the argument of `\textsc` is converted to caps. (Optionally, the arguments of `\text{bf|it}` and `\emph` can be converted to Org or Markdown; see below for details.) LaTeX commands that have no obligatory argument, such as `\LaTeX`, are retained.
 42 | 
 43 | Post-processing also involves expanding `@String` abbreviations: abbreviations in field values (or `@String` definitions) are replaced with their definition, so that field values are (more or less) shown the way they would appear after processing with BibTeX / `biblatex`.
 44 | 
 45 | The `file`, `url` and `doi` fields are excluded from post-processing, because they usually don't contain any TeX code or `@String` abbreviations, and because modifying them may actually be harmful (e.g., replacing multiple spaces with a single space in a file name). You can exclude more fields from post-processing by adding them to the variable `parsebib-postprocessing-excluded-fields`. (Note that double quotes or braces around the values of these fields *are* removed.)
 46 | 
 47 | In addition to this post-processing, `parsebib` can resolve cross-references. This means that if an entry has a `crossref` field, fields in the cross-referenced entry that are not already part of the cross-referencing entry are added to it. Both BibTeX's (rather simplistic) inheritance rule and `biblatex`'s more sophisticated inheritance schema are supported. It is also possible to specify a custom inheritance schema.
 48 | 
 49 | Expanding `@Strings` and resolving cross-references can also be done across files, by passing the result of parsing one file as arguments when parsing the next file. Details are discussed below.
 50 | 
 51 | Note that if you wish to resolve cross-references, it is usually also necessary to expand `@String` abbreviations, because the `crossref` field may contain such an abbreviation. Resolving such a cross-reference will not work unless the abbreviation is expanded.
 52 | 
 53 | When parsing a bibliography file for display, one may not be interested in all the data of each entry. The higher-level API functions can therefore take a list of fields to be read and included in the results. Fields not in this list are ignored, except for the `=key=` and `=type=` fields, which are always included.
 54 | 
 55 | If you use this option and also want to resolve cross-references, you need to include the `crossref` field in the list of requested fields. Without it, `parsebib` is not able to determine which entries cross-reference another entry and no cross-references will be resolved. Also note that cross-referencing may add fields to an entry that are not on the list of requested fields. For example, in `biblatex`, the `booktitle` field of an `InBook` entry is linked to the `title` field of the cross-referenced `Book` entry. In such a case, if `title` is on the list  of requested fields, the `booktitle` field is added to the cross-referencing entry, even if `booktitle` is not on the list of requested fields.
 56 | 
 57 | 
 58 | ### Higher-level API ###
 59 | 
 60 | The higher-level API consists of functions that read and return all items of a specific type in the current buffer. They do not move point. Note that the arguments in these functions (except in `parsebib-expand-xrefs`) are keyword arguments.
 61 | 
 62 | 
 63 | #### `parsebib-collect-bib-entries (&key entries strings inheritance fields)` ####
 64 | 
 65 | Collect all entries in the current buffer and return them as a hash table, where the keys correspond to the BibTeX keys and the values are alists consisting of `(<field> . <value>)` pairs of the relevant entry. In this alist, the BibTeX key and the entry type are stored under `=key=` and `=type=`, respectively. Note that both `<field>` and `<value>` are strings. 
 66 | 
 67 | The argument `entries` can be used to pass a (possibly non-empty) hash table in which the entries are stored. This can be used to combine multiple `.bib` files into a single hash table, or to update an existing hash table by rereading its `.bib` file.
 68 | 
 69 | If the argument `strings` is present, `@String` abbreviations are expanded. `strings` should be a hash table of `@String` definitions as returned by `parsebib-collect-strings`. In addition, sequences of whitespace (space, tab, newline) are collapsed into a single space, field values are unquoted (i.e., the double quotes or braces around them are removed), and TeX markup is prettified or removed. Note that @String
 70 | expansion, collapsing of whitespace and prettifying TeX markup
 71 | are not applied to fields listed in `parsebib-postprocessing-excluded-fields`, but unquoting is.
 72 | 
 73 | If the argument `inheritance` is present, cross-references among entries are resolved. It can be `t`, in which case the file-local or global value of `bibtex-dialect` is used to determine which inheritance schema is used. It can also be one of the symbols `BibTeX` or `biblatex`, or it can be a custom inheritance schema. Note that cross-references are resolved against the entries that appear in the buffer *above* the current entry, and also against the entries in the hash table `entries`.
 74 | 
 75 | The argument `fields` is a list of names of the fields that should be included in the entries returned. Fields not in this list are ignored (except `=type=` and `=key=`, which are always included). Note that the field names should be strings; comparison is case-insensitive.
 76 | 
 77 | 
 78 | #### `parsebib-collect-strings (&key strings expand-strings)` ####
 79 | 
 80 | Collect all `@string` definitions in the current buffer and return them as a hash table. The argument `strings` can be used to provide a hash table to store the definitions in. If it is `nil`, a new hash table is created.
 81 | 
 82 | The argument `expand-strings` is a boolean value. If non-nil, any abbreviations found in the string definitions are expanded against the `@string` definitions appearing earlier in the `.bib` file and against `@string` definitions in `strings`, if provided.
 83 | 
 84 | 
 85 | #### `parsebib-collect-preambles` ####
 86 | 
 87 | Collect all `@preamble` definitions in the current buffer and return them as a list.
 88 | 
 89 | 
 90 | #### `parsebib-collect-comments` ####
 91 | 
 92 | Collect all `@comments` in the current buffer and return them as a list.
 93 | 
 94 | 
 95 | #### `parsebib-find-bibtex-dialect` ####
 96 | 
 97 | Find and return the BibTeX dialect for the current buffer. The BibTeX dialect is either `BibTeX` or `biblatex` and can be defined in a local-variable block at the end of the file.
 98 | 
 99 | 
100 | #### `parsebib-parse-bib-buffer (&keys entries strings expand-strings inheritance fields replace-TeX)` ####
101 | 
102 | Collect all BibTeX data in the current buffer. Return a five-element list:
103 | 
104 |     (<entries> <strings> <preambles> <comments> <BibTeX dialect>)
105 | 
106 | The `<entries>` and `<strings>` are hash tables, `<preambles>` and `<comments>` are lists, `<BibTeX dialect>` is a symbol (either `BibTeX` or `biblatex`) or `nil`.
107 | 
108 | If the arguments `entries` and `strings` are present, they should be hash tables with `equal` as the `:test` function. They are then used to store the entries and strings, respectively.
109 | 
110 | The argument `expand-strings` functions as the same-name argument in `parsebib-collect-strings`, and the arguments `inheritance` and `fields` function as the same-name arguments in `parsebib-collect-bib-entries`.
111 | 
112 | If `replace-TeX` in set, (La)TeX markup in field values is replaced with text that is more suitable for display. The variable `parsebib-TeX-markup-replace-alist` determines what exactly is replaced. This variable can be `let`-bound around calls to the parsing functions, but note that its value is construed on the basis of the variables  `parsebib-TeX-command-relacement-alist`, `parsebib-TeX-accent-replacement-alist` or `parsebib-TeX-literal-replacement-alist`, so you may want to customise those instead. See their doc strings and the doc string of `parsebib-TeX-markup-replacement-alist` for details.
113 | 
114 | Note that `parsebib-parse-bib-buffer` only makes one pass through the buffer. It is therefore a bit faster than calling all the `parsebib-collect-*` functions above in a row, since that would require making four passes through the buffer.
115 | 
116 | 
117 | #### `parsebib-postprocessing-excluded-fields` ####
118 | 
119 | This variable is set to a list of fields that should always be returned as-is, without any post-processing. The default value of this variable is  `("file" "url" "doi")`, though you can customise it by `let`-binding it around calls to the higher-level parse functions.
120 | 
121 | Note that double quotes or braces around field values *are* removed for fields listed in this variable.
122 | 
123 | 
124 | #### `parsebib-TeX-cleanup-target` ####
125 | 
126 | This variable controls how `parsebib-clean-TeX-markup` the LaTeX font commands `\text{bf|it}` and `\emph`. If the variable is set to the symbol `display`, which is the default, these commands are replaced with their arguments augmented with a font property `bold` or `italic`. If this variable is set to `org` or `markdown`, they are instead converted to the appropriate markup.
127 | 
128 | #### `parsebib-expand-xrefs (entries inheritance)` ####
129 | 
130 | Expand cross-references in `entries` according to inheritance schema `inheritance`. `entries` should be a hash table as returned by `parsebib-collect-bib-entries`. Each entry with a `crossref` field is expanded as described above. The results are stored in the hash table `entries` again, the return value of this function is always `nil`.
131 | 
132 | This function can be useful if you use the lower-level API to parse `.bib` files, because in that case, resolving cross-references cannot be done while reading entries.
133 | 
134 | 
135 | ### Lower-level API ###
136 | 
137 | The lower-level API consists of functions that do the actual reading of a BibTeX item. Unlike the higher-level API, the functions here are dependent on the position of `point`. They are meant to be used in a `while` loop in which `parsebib-find-next-item` is used to move `point` to the next item and then use one of the `parsebib-read-*` functions to read the contents of the item.
138 | 
139 | All functions here take an optional position argument, which is the position in the buffer from which they should start reading. The default value is `(point)`.
140 | 
141 | 
142 | #### `parsebib-find-next-item ()` ####
143 | 
144 | Find the first BibTeX item following point, where an item is either a BibTeX entry, or a `@Preamble`, `@String`, or `@Comment`. This function returns the item's type as a string, i.e., either `"preamble"`, `"string"`, or `"comment"`, or the entry type. Note that the `@` is *not* part of the returned string. This function moves point into the correct position to start reading the actual contents of the item, which is done by one of the following functions.
145 | 
146 | 
147 | #### `parsebib-read-entry (&optional fields strings replace-TeX)` ####
148 | #### `parsebib-read-string (&optional strings)` ####
149 | #### `parsebib-read-preamble ()` ####
150 | #### `parsebib-read-comment ()` ####
151 | 
152 | These functions do what their names suggest: read one single item of the type specified. `parsebib-read-entry` takes an optional argument `fields`, which is a list of names of the fields that should be included in the entries returned. Fields not in this list are ignored (except for `=type=` and `=key=`, which are always included). Note that the field names should be strings; comparison is case-insensitive.
153 | 
154 | `parsebib-read-entry` also takes an optional  argument `strings`, which is a hash table of `@String` definitions. When provided, abbreviations in the field values of the entry are expanded. In addition, when a `strings` argument is provided, newlines and other whitespace characters in the field value are replaced with a space, and sequences of whitespace are collapsed into a single space, and the braces or quotes around the field value are removed.
155 | 
156 | The last argument, `replace-TeX`, when non-nil, tells `parsebib-read-entry` to prettify TeX markup in the field value. Details are discussed [above](#returning-entries-for-display).
157 | 
158 | The reading functions return the contents of the item they read: `parsebib-read-preamble` and `parsebib-read-comment` return the text as a string. `parsebib-read-string` returns a cons cell of the form `(<abbrev> . <string>)`, and `parsebib-read-entry` returns the entry as an alist of `(<field> . <value>)` pairs. This alist contains entries for the entry key and the type under the keys `"=key="` and `"=type="`, respectively.
159 | 
160 | Note that all `parsebib-read*` functions move point to the end of the entry. If they cannot parse the item at point, they return an error of type `parsebib-error`, which can be captured, if necessary.
161 | 
162 | 
163 | #### parsebib-clean-TeX-markup (string) ####
164 | 
165 | Apply all replacements in `parsebib-TeX-markup-replace-alist` to `string`. Note that this function ignores the value of `parsebib-postprocessing-excluded-fields`, because it just sees the string itself, not which field `string` comes from. 
166 | 
167 | 
168 | ## CSL-JSON ##
169 | 
170 | The support for CSL-JSON files comprises just one function: `parsebib-parse-json-buffer`. The actual parsing of the JSON data is performed by Emacs itself, either by the native JSON parsing routines (starting with Emacs 27.1, if available), or the built-in Elisp library `json.el`. `Parsebib` makes sure that the data is returned in a format that is similar to what is returned for `.bib` files.
171 | 
172 | 
173 | ### Returning entries for display ###
174 | 
175 | When returning entries in a form that is suitable for display, the most important issue in CSL-JSON files is the fact that certain fields do not have string values. For example, name fields (`author`, `editor`, etc.) and date fields (`issued`, `submitted` etc.) are JSON arrays. Parsebib can convert these to strings if requested.
176 | 
177 | As with `.bib` files, it is possible to have `parsebib` only return specific fields when reading `.json` files. Here, too, the fields that identify an entry, i.e., `id` and `type`, are always included and do not need to be requested explicitly.
178 | 
179 | 
180 | #### `parsebib-parse-json-buffer (&key entries stringify year-only fields)` ####
181 | 
182 | Collect all CSL-JSON data in the current buffer and return the result. The return value is a hash table, where the keys correspond to the identifiers of the entries and the values are alists consisting of `(<field> . <value>)` pairs of the relevant entry. In this alist, the identifier is stored under the key `id` and the entry type is stored under `type`. `<field>` is a symbol, while `<value>` can be a string, a vector (array) or another alist.
183 | 
184 | The argument `entries` can be used to pass a (possibly non-empty) hash table in which the entries are stored. This can be used to combine multiple `.json` files into a single hash table, or to update an existing hash table by rereading its `.json` file.
185 | 
186 | Some field values in CSL-JSON are not strings. These are primarily name and date fields, which in CSL-JSON are represented as JSON objects. The argument `stringify` determines how they are returned. When `stringify` is set to `nil`, they are returned as alists; with `stringify` set to `t`, they are converted to strings.
187 | 
188 | The argument `year-only` controls the way dates are converted to strings. If it is non-`nil`, only the year part is returned. This argument only takes effect if `stringify` is set to `t`. See below for details.
189 | 
190 | The way values are converted to strings can be customised to some extent by the use of certain special variables, discussed below.
191 | 
192 | The argument `fields` is a list of names of the fields that should be included in the entries returned. Fields not in this list are ignored (except `type` and `id`, which are always included). Note that the field names should be symbols; comparison is case-sensitive.
193 | 
194 | Note that all arguments in this function are keyword arguments.
195 | 
196 | 
197 | #### `parsebib-stringify-json (entry &optional year-only)`  ####
198 | 
199 | Convert the CSL-JSON data in `entry` to string form. `entry` is an alist as stored in the hash table returned by `parsebib-parse-json-buffer`. Return value is the modified `entry`.
200 | 
201 | Note that this function modifies `entry` *in place*. If you need to keep the original entry, call this function on a copy.
202 | 
203 | If the argument `year-only` is non-`nil`, date fields are shortened to just the year.
204 | 
205 | For details on the conversion, see below.
206 | 
207 | 
208 | #### `parsebib-stringify-json-field (field &optional short)`  ####
209 | 
210 | Convert the value of `field` to string form and return the result. `field` is a cons cell of the form `(<field> . <value>)`, i.e., an element in the alist of an entry in the hash table returned by `parsebib-parse-json-buffer`.
211 | 
212 | The return value is a string representation of `<value>`. Note that unlike `parsebib-stringify-json`, this function does not modify its argument.
213 | 
214 | The following conversions are applied:
215 | 
216 | - Strings are returned unchanged.
217 | - Numbers are converted to strings using `(format "%s" number)`.
218 | - Name fields (as defined by `parsebib--json-name-fields`) are converted using `parsebib-json-name-field-template` and `parsebib-json-name-field-separator`; see below for details.
219 | - Date fields (as defined by `parsebib--json-date-fields`) are converted to a format `2021-4-22`. If only a year is present, the month and day parts are omitted. The `season` and `circa` fields are accounted for, and so are `literal` and `raw`.
220 | - Fields with an array as value (currently, in v1.0 of CSL-JSON, this only applies to the `categories` field), are converted to a string using `parsebib-json-field-separator`; see below for details.
221 | - Anything that doesn't match any of the categories above is converted to a string using `(format "%s" value)`, after which any newlines are removed and replaced with a space. This is a catch-all that shouldn't be necessary in valid CSL-JSON files.
222 | 
223 | The optional argument `short` only applies to date fields. If `short` is non-nil, a date field contains just the year; month and day parts are ignored. If no year part can be found, `short` returns the string `XXXX`. Note that with `short`, other parts of the date field are ignored.
224 | 
225 | #### `parsebib-json-name-field-template` ####
226 | 
227 | This variable holds the template used for converting name fields. Its default value is:
228 | 
229 | ```
230 | "{non-dropping-particle }{family, }{given}{ dropping-particle}{, suffix}{literal}"
231 | ```
232 | 
233 | The elements of a name field appear in the string in the order provided in the template. Elements that are not part of a particular name field are ignored, including any punctuation and white space contained inside the braces. So for example, if a name field does not have a `suffix`, the comma-space appearing inside the `{, suffix}` part of the template is not included in the final string.
234 | 
235 | It is possible to modify the form of the resulting string by `let`-binding `parsebib-json-name-field-template` before calling `parsebib-parse-json-buffer`, `parsebib-stringify-json` or `parsebib-stringify-json-field`.
236 | 
237 | #### `parsebib-json-name-field-separator` ####
238 | 
239 | If more than one name appears in a name field, they are separated by the value of this variable in the string. The default value is `" and "` (note the spaces). It is possible to `let`-bind this variable.
240 | 
241 | #### `parsebib-json-field-separator` ####
242 | 
243 | Field values that are arrays are converted to a string using the value of this variable as a separator. Currently (CSL-JSON v1.0), this only applies to the `categories` field, which is an array of strings. The default value of this variable is `", "` (note the space). It can be `let`-bound like the variables above.
244 | 
245 | 
246 | ## General API ##
247 | 
248 | ### `parsebib-parse (files &key entries strings (display t) fields)` ###
249 | 
250 | Parse a bibliography file or list of files and return the entries in them. This function can be used for both `.bib` and for `.json` files, and also for a combination thereof. It returns all entries in all files in a single hash table.
251 | 
252 | This is a high-level function meant for retrieving bibliographic entries in such a way that they can be shown to a user. It is not possible to retrieve the `@Preamble` or `@Comment`s in a `.bib` file using this function. Use `parsebib-parse-bib-buffer` or one of the other functions for that.
253 | 
254 | `parsebib-parse` basically just calls `parsebib-parse-bib-buffer` or `parsebib-parse-json-buffer` as appropriate and passes its arguments on to those functions. The argument `entries` is passed to both, as is `fields`. The field names in `fields` need to be strings, regardless of the file format, though. `parsebib-parse` converts the strings to symbols when it parses a `.json` file. The `strings` argument is only passed to `parsebib-parse-bib-buffer`, since there are obviously no `@String`s in a `.json` file.
255 | 
256 | The `display` argument controls the way in which the entry data is returned. By default, it returns the data in a way that is suitable for display. For `.bib` files, this means that `@String` abbreviations are expanded, cross-references are resolved and TeX markup in field values is removed or replaced with Unicode characters. For `.json` files, it means that fields are returned as strings and that month and day parts in date fields are ignored.
257 | 
258 | See the doc strings of `parsebib-parse`, `parsebib-parse-bib-buffer` and `parsebib-parse-json-buffer` for details on the meaning of `display`.
259 | 
260 | 


--------------------------------------------------------------------------------
/parsebib.el:
--------------------------------------------------------------------------------
   1 | ;;; parsebib.el --- A library for parsing bib files  -*- lexical-binding: t -*-
   2 | 
   3 | ;; Copyright (c) 2014-2025 Joost Kremers
   4 | ;; All rights reserved.
   5 | 
   6 | ;; Author: Joost Kremers <joostkremers@fastmail.fm>
   7 | ;; Maintainer: Joost Kremers <joostkremers@fastmail.fm>
   8 | ;; Created: 2014
   9 | ;; Version: 6.7
  10 | ;; Keywords: text bibtex
  11 | ;; URL: https://github.com/joostkremers/parsebib
  12 | ;; Package-Requires: ((emacs "25.1"))
  13 | 
  14 | ;; Redistribution and use in source and binary forms, with or without
  15 | ;; modification, are permitted provided that the following conditions
  16 | ;; are met:
  17 | ;;
  18 | ;; 1. Redistributions of source code must retain the above copyright
  19 | ;;    notice, this list of conditions and the following disclaimer.
  20 | ;; 2. Redistributions in binary form must reproduce the above copyright
  21 | ;;    notice, this list of conditions and the following disclaimer in the
  22 | ;;    documentation and/or other materials provided with the distribution.
  23 | ;; 3. The name of the author may not be used to endorse or promote products
  24 | ;;    derived from this software without specific prior written permission.
  25 | ;;
  26 | ;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  27 | ;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  28 | ;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  29 | ;; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  30 | ;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  31 | ;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE,
  32 | ;; DATA, OR PROFITS ; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33 | ;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34 | ;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  35 | ;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36 | 
  37 | ;;; Commentary:
  38 | 
  39 | ;; See the README for details.
  40 | ;;
  41 | ;; Acknowledgements:
  42 | ;;
  43 | ;; The code to clean up TeX markup in field values was contributed by Hugo
  44 | ;; Heagren <hugo.heagren@kcl.ac.uk>; additional improvements were made by
  45 | ;; <rahguzar@zohomail.eu>.
  46 | 
  47 | ;;; Code:
  48 | 
  49 | (require 'cl-lib)
  50 | (eval-and-compile (unless (fboundp 'json-parse-buffer)
  51 |                     (require 'json)))
  52 | (defvar json-object-type)
  53 | 
  54 | (declare-function json-read "json.el")
  55 | 
  56 | (defvar bibtex-dialect)
  57 | (defvar bibtex-dialect-list)
  58 | 
  59 | (defvar parsebib-hashid-fields nil
  60 |   "List of fields used to create a hash id for each entry.
  61 | Hash ids can only be created for BibTeX/biblatex files.  The hash
  62 | id is stored in the entry in the special field `=hashid='.")
  63 | 
  64 | ;; Regexes describing BibTeX identifiers and keys.  Note that while $ ^ &
  65 | ;; are valid in BibTeX keys, they may nonetheless be problematic, because
  66 | ;; they are special for TeX.  Which characters are allowed in keys and
  67 | ;; identifiers differs depending on context.  The StackExchange answer at
  68 | ;; https://tex.stackexchange.com/questions/96454/using-bibtex-keys-containing-parentheses-with-biber
  69 | ;; says that Biber uses a library for parsing .bib files (btparse) that
  70 | ;; disallows the following characters in keys;
  71 | ;;
  72 | ;; " # % ' ( ) , = { }
  73 | ;;
  74 | ;; Note that parentheses are allowed by BibTex, though, so I won't exclude
  75 | ;; them here.
  76 | ;;
  77 | ;; Also, keys and identifiers (field and @String names) are distinguished,
  78 | ;; though I'm not sure that is correct (or even desirable).  I'll keep it
  79 | ;; that way until someone complains, though.
  80 | 
  81 | (defconst parsebib--bibtex-identifier "[^\"@\\#%',={}() \t\n\f]+" "Regexp describing a licit BibTeX identifier.")
  82 | (defconst parsebib--bibtex-key-regexp "[^\"#%',={} \t\n\f]+" "Regexp describing a licit BibTeX key.")
  83 | (defconst parsebib--bibtex-entry-start "^[ \t]*@" "Regexp describing the start of an entry.")
  84 | 
  85 | (defvar parsebib-postprocessing-excluded-fields '("file"
  86 |                                                   "url"
  87 |                                                   "doi")
  88 |   "List of fields that should not be post-processed.")
  89 | 
  90 | ;; Cleaning up TeX code is very slow, so we restrict it to those fields for
  91 | ;; which it makes sense.
  92 | (defvar parsebib-replace-TeX-fields '("author" "editor" "title")
  93 |   "List of fields in which TeX code should be cleaned up.")
  94 | 
  95 | (defvar parsebib--biblatex-inheritances '(;; Source                        Target
  96 |                                           ("all"                           "all"
  97 |                                            (("ids"                         . none)
  98 |                                             ("crossref"                    . none)
  99 |                                             ("xref"                        . none)
 100 |                                             ("entryset"                    . none)
 101 |                                             ("entrysubtype"                . none)
 102 |                                             ("execute"                     . none)
 103 |                                             ("label"                       . none)
 104 |                                             ("options"                     . none)
 105 |                                             ("presort"                     . none)
 106 |                                             ("related"                     . none)
 107 |                                             ("relatedoptions"              . none)
 108 |                                             ("relatedstring"               . none)
 109 |                                             ("relatedtype"                 . none)
 110 |                                             ("shorthand"                   . none)
 111 |                                             ("shorthandintro"              . none)
 112 |                                             ("sortkey"                     . none)))
 113 | 
 114 |                                           ;; Source                        Target
 115 |                                           ("mvbook, book"                  "inbook, bookinbook, suppbook"
 116 |                                            (("author"                      . "author")
 117 |                                             ("author"                      . "bookauthor")))
 118 | 
 119 |                                           ;; Source                        Target
 120 |                                           ("mvbook"                        "book, inbook, bookinbook, suppbook"
 121 |                                            (("title"                       . "maintitle")
 122 |                                             ("subtitle"                    . "mainsubtitle")
 123 |                                             ("titleaddon"                  . "maintitleaddon")
 124 |                                             ("shorttitle"                  . none)
 125 |                                             ("sorttitle"                   . none)
 126 |                                             ("indextitle"                  . none)
 127 |                                             ("indexsorttitle"              . none)))
 128 | 
 129 |                                           ;; Source                        Target
 130 |                                           ("mvcollection, mvreference"     "collection, reference, incollection, inreference, suppcollection"
 131 |                                            (("title"                       . "maintitle")
 132 |                                             ("subtitle"                    . "mainsubtitle")
 133 |                                             ("titleaddon"                  . "maintitleaddon")
 134 |                                             ("shorttitle"                  . none)
 135 |                                             ("sorttitle"                   . none)
 136 |                                             ("indextitle"                  . none)
 137 |                                             ("indexsorttitle"              . none)))
 138 | 
 139 |                                           ;; Source                        Target
 140 |                                           ("mvproceedings"                 "proceedings, inproceedings"
 141 |                                            (("title"                       . "maintitle")
 142 |                                             ("subtitle"                    . "mainsubtitle")
 143 |                                             ("titleaddon"                  . "maintitleaddon")
 144 |                                             ("shorttitle"                  . none)
 145 |                                             ("sorttitle"                   . none)
 146 |                                             ("indextitle"                  . none)
 147 |                                             ("indexsorttitle"              . none)))
 148 | 
 149 |                                           ;; Source                        Target
 150 |                                           ("book"                          "inbook, bookinbook, suppbook"
 151 |                                            (("title"                       . "booktitle")
 152 |                                             ("subtitle"                    . "booksubtitle")
 153 |                                             ("titleaddon"                  . "booktitleaddon")
 154 |                                             ("shorttitle"                  . none)
 155 |                                             ("sorttitle"                   . none)
 156 |                                             ("indextitle"                  . none)
 157 |                                             ("indexsorttitle"              . none)))
 158 | 
 159 |                                           ;; Source                        Target
 160 |                                           ("collection, reference"         "incollection, inreference, suppcollection"
 161 |                                            (("title"                       . "booktitle")
 162 |                                             ("subtitle"                    . "booksubtitle")
 163 |                                             ("titleaddon"                  . "booktitleaddon")
 164 |                                             ("shorttitle"                  . none)
 165 |                                             ("sorttitle"                   . none)
 166 |                                             ("indextitle"                  . none)
 167 |                                             ("indexsorttitle"              . none)))
 168 | 
 169 |                                           ;; Source                        Target
 170 |                                           ("proceedings"                   "inproceedings"
 171 |                                            (("title"                       . "booktitle")
 172 |                                             ("subtitle"                    . "booksubtitle")
 173 |                                             ("titleaddon"                  . "booktitleaddon")
 174 |                                             ("shorttitle"                  . none)
 175 |                                             ("sorttitle"                   . none)
 176 |                                             ("indextitle"                  . none)
 177 |                                             ("indexsorttitle"              . none)))
 178 | 
 179 |                                           ;; Source                        Target
 180 |                                           ("periodical"                    "article, suppperiodical"
 181 |                                            (("title"                       . "journaltitle")
 182 |                                             ("subtitle"                    . "journalsubtitle")
 183 |                                             ("shorttitle"                  . none)
 184 |                                             ("sorttitle"                   . none)
 185 |                                             ("indextitle"                  . none)
 186 |                                             ("indexsorttitle"              . none))))
 187 | 
 188 |   "Inheritance scheme for BibLaTeX cross-referencing.
 189 | Inheritances are specified for pairs of source and target entry
 190 | type, where the target is the cross-referencing entry and the
 191 | source the cross-referenced entry.  Each pair specifies the
 192 | fields in the source and the fields in the target that they
 193 | correspond with.
 194 | 
 195 | Inheritances valid for all entry types are defined by specifying
 196 | the entry type as \"all\".  The entry type may also be a
 197 | comma-separated list of entry types.
 198 | 
 199 | If no inheritance rule is set up for a given entry type+field
 200 | combination, the field inherits from the same-name field in the
 201 | cross-referenced entry.  If no inheritance should take place, the
 202 | target field is set to the symbol `none'.")
 203 | 
 204 | ;;;; BibTeX / biblatex parser
 205 | 
 206 | ;;; Parser primitives
 207 | ;;
 208 | ;; The parser is divided into a set of primitives, which do the actual
 209 | ;; reading, and a set of grammar rules, which describe the syntax of a
 210 | ;; BibTeX file.
 211 | ;;
 212 | ;; A few things to keep in mind:
 213 | ;;
 214 | ;; - The primitives are BibTeX-agnostic. They read specific chunks of the
 215 | ;;   source and return them.
 216 | ;;
 217 | ;; - The primitives can be parameterised; that is, the exact text that they
 218 | ;;   read may depend on arguments passed to them. The grammar rules do not
 219 | ;;   have any parameters.
 220 | ;;
 221 | ;; - The primitives are responsible for skipping whitespace.
 222 | 
 223 | (define-error 'parsebib-error "[Parsebib error]" 'error)
 224 | 
 225 | (defun parsebib--skip-whitespace ()
 226 |   "Skip whitespace."
 227 |   (skip-chars-forward " \n\r\t\f\v"))
 228 | 
 229 | (defun parsebib--char (chars &optional noerror)
 230 |   "Read the character at point.
 231 | CHARS is a list of characters.  If the character at point matches
 232 | a character in CHARS, return it and move point, otherwise signal
 233 | an error, unless NOERROR is non-nil, in which case return nil."
 234 |   (parsebib--skip-whitespace)
 235 |   (if (memq (char-after) chars)
 236 |       (prog1
 237 |           (char-after)
 238 |         (forward-char 1))
 239 |     (unless noerror
 240 |       (signal 'parsebib-error (list (point)
 241 |                                     "Invalid character `%c'"
 242 |                                     (following-char))))))
 243 | 
 244 | (defun parsebib--keyword (keywords &optional noerror)
 245 |   "Read the keyword following point.
 246 | KEYWORDS is a list of allowed keywords.  If the text following
 247 | point matches one of KEYWORDS (case-insensitively), return it and
 248 | move point.  Otherwise signal an error, unless NOERROR is
 249 | non-nil, in which case return nil."
 250 |   (parsebib--skip-whitespace)
 251 |   (let ((case-fold-search t))
 252 |     (if (looking-at (regexp-opt keywords))
 253 |         (let ((keyword (match-string-no-properties 0)))
 254 |           (progn
 255 |             (goto-char (match-end 0))
 256 |             keyword))
 257 |       (unless noerror
 258 |         (signal 'parsebib-error (list (point)
 259 |                                       "Invalid keyword %s"
 260 |                                       keywords
 261 |                                       (char-after)))))))
 262 | 
 263 | (defun parsebib--symbol (regexp &optional noerror)
 264 |   "Read a symbol and return it.
 265 | REGEXP is a regular expression describing a licit symbol.  If a
 266 | symbol is found, return it.  Otherwise signal an error, unless
 267 | NOERROR is non-nil, in which case return nil."
 268 |   (parsebib--skip-whitespace)
 269 |   (if (looking-at regexp)
 270 |       (progn
 271 |         (goto-char (match-end 0))
 272 |         (match-string-no-properties 0))
 273 |     (unless noerror
 274 |       (signal 'parsebib-error (list (point) "Illegal identifier")))))
 275 | 
 276 | (defun parsebib--seq-delim (open close esc)
 277 |   "Read a delimited sequence.
 278 | A delimited sequence is a sequence delimited by OPEN and CLOSE
 279 | characters, which must be different (e.g., any kind of
 280 | parentheses).  ESC is an escape character that can be used to
 281 | escape OPEN and CLOSE inside the sequence.  OPEN and CLOSE can
 282 | appear in the sequence unescaped as long as they are
 283 | balanced.  (In other words, the sequence can contain nested
 284 | sequences)."
 285 |   (parsebib--skip-whitespace)
 286 |   (let ((beg (point))
 287 |         (n-braces 1)
 288 |         (skip-chars (format "^%c%c" open close)))
 289 |     (parsebib--char (list open))
 290 |     (while (and (> n-braces 0)
 291 |                 (not (eobp)))
 292 |       (skip-chars-forward skip-chars)
 293 |       (cond
 294 |        ((eq (char-after) open)
 295 |         (unless (eq (char-before) esc)
 296 |           (setq n-braces (1+ n-braces))))
 297 |        ((eq (char-after) close)
 298 |         (unless (eq (char-before) esc)
 299 |           (setq n-braces (1- n-braces)))))
 300 |       (ignore-error end-of-buffer (forward-char 1)))
 301 |     (if (= n-braces 0)
 302 |         (buffer-substring-no-properties beg (point))
 303 |       (signal 'parsebib-error (list beg
 304 |                                     "Opening %c has no closing %c"
 305 |                                     open
 306 |                                     close)))))
 307 | 
 308 | (defun parsebib--string (delim esc)
 309 |   "Read a string delimited by DELIM.
 310 | A string is a delimited sequence where the opening and closing
 311 | delimiters are identical, e.g., \"...\".  ESC is the escape
 312 | character."
 313 |   (parsebib--skip-whitespace)
 314 |   (let ((beg (point))
 315 |         (continue t)
 316 |         (skip-chars (format "^%c" delim)))
 317 |     (parsebib--char (list delim))
 318 |     (while (and continue
 319 |                 (not (eobp)))
 320 |       (skip-chars-forward skip-chars)
 321 |       (unless (eq (char-before) esc)
 322 |         (setq continue nil))
 323 |       (forward-char 1))
 324 |     (if (not continue)
 325 |         (buffer-substring-no-properties beg (point))
 326 |       (signal 'parsebib-error (list beg
 327 |                                     "Opening %c has no closing %c"
 328 |                                     delim
 329 |                                     delim)))))
 330 | 
 331 | (defun parsebib--comment-line ()
 332 |   "Read a single-line comment and return it."
 333 |   (prog1 (buffer-substring-no-properties (point) (pos-eol))
 334 |     (forward-line 1)))
 335 | 
 336 | (defun parsebib--match (rules &optional noerror)
 337 |   "Check if a rule in RULES matches at point.
 338 | Apply the first rule that matches and return the result.  If no
 339 | rule matches, signal an error, unless NOERROR is non-nil, in
 340 | which case return nil.
 341 | 
 342 | RULES is a list of symbols, each naming a parsing rule."
 343 |   (parsebib--skip-whitespace)
 344 |   (let ((start-pos (point))
 345 |         last-error)
 346 |     (catch 'success
 347 |       (dolist (rule rules)
 348 |         (condition-case err
 349 |             (let ((res (funcall rule)))
 350 |               (throw 'success res))
 351 |           (parsebib-error
 352 |            (goto-char start-pos)
 353 |            (setq last-error err))))
 354 |       (unless noerror
 355 |         (signal (car last-error) (cdr last-error))))))
 356 | 
 357 | ;;; Parser rules
 358 | 
 359 | ;; Basic building blocks
 360 | 
 361 | (defun parsebib--text ()
 362 |   "Parse text.
 363 | Text is anything that is between braces or double quotes that
 364 | should be read literally."
 365 |   (parsebib--match '(parsebib--braced-text
 366 |                      parsebib--quoted-text)))
 367 | 
 368 | (defun parsebib--braced-text ()
 369 |   "Parse text in curly braces."
 370 |   (parsebib--seq-delim ?\{ ?\} ?\\))
 371 | 
 372 | (defun parsebib--quoted-text ()
 373 |   "Parse text in double quotes."
 374 |   (parsebib--string ?\" ?\\))
 375 | 
 376 | (defun parsebib--key ()
 377 |   "Parse a BibTeX key."
 378 |   (parsebib--symbol parsebib--bibtex-key-regexp))
 379 | 
 380 | (defun parsebib--identifier ()
 381 |   "Parse a BibTeX identifier."
 382 |   (parsebib--symbol parsebib--bibtex-identifier))
 383 | 
 384 | (defun parsebib--value ()
 385 |   "Parse a BibTeX value.
 386 | A value is one component of a composed value (see
 387 | `parsebib--composed-value') and can either be a piece of quoted
 388 | text (i.e., text in double quotes or braces) or a @String
 389 | abbreviation."
 390 |   (or (parsebib--match '(parsebib--text
 391 |                          parsebib--identifier)
 392 |                        :noerror)
 393 |       (signal 'parsebib-error (list (point) "Expected {, \" or identifier"))))
 394 | 
 395 | (defun parsebib--composed-value ()
 396 |   "Parse a BibTeX composed field value.
 397 | A composed value consists of one or more values concatenated
 398 | using the character `#'.  They typically appear after an equal
 399 | sign as field values and in @String definitions as expansions."
 400 |   (let ((val (list (parsebib--value))))
 401 |     (while (and (parsebib--char '(?#) :noerror)
 402 |                 (not (eobp)))
 403 |       (push (parsebib--value) val))
 404 |     (nreverse val)))
 405 | 
 406 | (defun parsebib--assignment ()
 407 |   "Parse a BibTeX assignment.
 408 | An assignment is the combination of an identifier, an equal sign
 409 | and a composed value.  A @String definition has exactly one
 410 | assignment, an entry has a potentially unlimited number."
 411 |   (if-let* ((id (parsebib--identifier))
 412 |             (_ (parsebib--char '(?=)))
 413 |             (val (parsebib--composed-value)))
 414 |       (cons id val)
 415 |     (signal 'parsebib-error (list (point) "Malformed key=value assignment"))))
 416 | 
 417 | (defun parsebib--fields ()
 418 |   "Parse a set of BibTeX assignments.
 419 | A set of assignments makes up the body of an entry."
 420 |   (let ((fields (list (parsebib--assignment))))
 421 |     (while (and (parsebib--char '(?,) :noerror)
 422 |                 (not (eobp)))
 423 |       ;; There may be a comma after the final field of an entry. If that
 424 |       ;; happens, reading another assignment will fail, so we capture the
 425 |       ;; error here.
 426 |       (ignore-error parsebib-error
 427 |         (push (parsebib--assignment) fields)))
 428 |     fields))
 429 | 
 430 | ;; BibTeX items
 431 | 
 432 | (defun parsebib--@comment ()
 433 |   "Parse a @Comment.
 434 | Return the contents of the @Comment as a string."
 435 |   (parsebib--char '(?@))
 436 |   (parsebib--keyword '("comment"))
 437 |   (or (parsebib--match '(parsebib--text
 438 |                          parsebib--comment-line)
 439 |                        :noerror)
 440 |       (signal 'parsebib-error (list (point) "Malformed @Comment"))))
 441 | 
 442 | (defun parsebib--@preamble ()
 443 |   "Parse a @Preamble.
 444 | Return the contents of the @Preamble as a string."
 445 |   (parsebib--char '(?@))
 446 |   (parsebib--keyword '("preamble"))
 447 |   (or (parsebib--text)
 448 |       (signal 'parsebib-error (list (point) "Malformed @Preamble"))))
 449 | 
 450 | (defun parsebib--@string ()
 451 |   "Parse an @String definition.
 452 | Return the definition as a cons cell of the abbreviation and a
 453 | composed value as a list."
 454 |   (if-let* ((_ (parsebib--char '(?@)))
 455 |             (_ (parsebib--keyword '("string")))
 456 |             (open (parsebib--char '(?\{ ?\( )))
 457 |             (definition (parsebib--assignment))
 458 |             (_ (parsebib--char (alist-get open '((?\{ ?\}) (?\( ?\)))))))
 459 |       definition
 460 |     (signal 'parsebib-error (list (point) "Malformed @String definition"))))
 461 | 
 462 | (defun parsebib--@entry ()
 463 |   "Parse a BibTeX database entry.
 464 | Return the entry as an alist of <field . value> pairs, where
 465 | <field> is a string and <value> is a list of strings."
 466 |   (if-let* ((_ (parsebib--char '(?@)))
 467 |             (type (parsebib--identifier))
 468 |             (open (parsebib--char '(?\{ ?\( )))
 469 |             (key (parsebib--key))
 470 |             (_ (parsebib--char '(?,)))
 471 |             (fields (parsebib--fields))
 472 |             (_ (parsebib--char (alist-get open '((?\{ ?\}) (?\( ?\)))))))
 473 |       (progn (push (cons "=type=" (list type)) fields)
 474 |              (push (cons "=key=" (list key)) fields)
 475 |              fields)
 476 |     (signal 'parsebib-error (list (point) "Malformed entry definition"))))
 477 | 
 478 | ;;;; Low-level BibTeX/biblatex API
 479 | 
 480 | (defun parsebib-find-next-item ()
 481 |   "Find the first (potential) BibTeX item following point.
 482 | This function simply searches for an @ at the start of a line,
 483 | possibly preceded by spaces or tabs, followed by a string of
 484 | characters as defined by `parsebib--bibtex-identifier'.
 485 | 
 486 | If an item is found, position point at the start of the line and
 487 | return the name of the item as a string, either \"Comment\",
 488 | \"Preamble\" or \"String\", or the entry type (without the @).
 489 | If no item is found, move point to the end of the buffer."
 490 |   (when (re-search-forward parsebib--bibtex-entry-start nil 0)
 491 |     (if (looking-at (concat "\\(" parsebib--bibtex-identifier "\\)" "[[:space:]]*[\(\{]?"))
 492 |         (prog1
 493 |             (match-string-no-properties 1)
 494 |           (goto-char (pos-bol)))
 495 |       (signal 'parsebib-error (list (point) "Search for BibTeX entry failed")))))
 496 | 
 497 | (defun parsebib--get-hashid-string (fields)
 498 |   "Create a string from the contents of FIELDS to compute a hash id."
 499 |   (cl-loop for field in parsebib-hashid-fields
 500 |            collect (or
 501 |                     ;; Remove braces {}.
 502 |                     (replace-regexp-in-string "^{\\|}$" "" (cdr (assoc-string field fields 'case-fold)))
 503 |                     "")
 504 |            into hashid-fields
 505 |            finally return (mapconcat #'identity hashid-fields "")))
 506 | 
 507 | (defun parsebib-read-entry (&optional fields strings replace-TeX)
 508 |   "Read a BibTeX entry starting at point.
 509 | Point should be positioned before the `@'-character that starts
 510 | the entry, with possibly whitespace intervening.  Return an alist
 511 | of (<field> .  <contents>) conses, or nil if no entry was found.
 512 | The returned alist provides the entry key in the field \"=key=\"
 513 | and the entry type in the field \"=type=\".
 514 | 
 515 | If `parsebib-hashid-fields' is non-nil, a hash ID is added in the
 516 | field \"=hashid=\".  The hash is computed on the basis of the
 517 | contents of the fields listed in `parsebib-hashid-fields' using
 518 | the function `secure-hash' and the `sha256' algorithm.
 519 | 
 520 | FIELDS is a list of the field names (as strings) to be read and
 521 | included in the result.  Fields not in the list are ignored.
 522 | Case is ignored when comparing fields to the list in FIELDS.  If
 523 | FIELDS is nil, all fields are returned.  Note that if FIELDS is
 524 | non-nil, it should contain the field names \"=key=\" and
 525 | \"=type=\".
 526 | 
 527 | STRINGS and REPLACE-TEX are used to post-process field values.
 528 | See the function `parsebib--post-process' for details."
 529 |   (let ((entry (parsebib--@entry)))
 530 |     (when fields
 531 |       (setq entry (seq-filter (lambda (field)
 532 |                                 (member-ignore-case (car field) fields))
 533 |                               entry)))
 534 |     (setq entry (mapcar (lambda (field)
 535 |                           (parsebib--post-process field strings replace-TeX))
 536 |                         entry))
 537 |     (when parsebib-hashid-fields
 538 |       (push (cons "=hashid=" (secure-hash 'sha256 (parsebib--get-hashid-string fields))) entry))
 539 |     entry))
 540 | 
 541 | (defun parsebib-read-string (&optional strings)
 542 |   "Read the @String definition beginning at point.
 543 | Return the definition as a cons cell (<abbrev> . <expansion>).
 544 | 
 545 | If STRINGS is provided, it should be a hash table with @String
 546 | abbreviations, which are used to expand abbreviations in the
 547 | string's expansion."
 548 |   (let* ((definition (parsebib--@string))
 549 |          (abbrev (car definition))
 550 |          (expansion (cdr definition)))
 551 |     (setq expansion (if strings
 552 |                         (string-join (parsebib--post-process-strings expansion strings t))
 553 |                       (string-join expansion " # ")))
 554 |     (cons abbrev expansion)))
 555 | 
 556 | (defalias 'parsebib-read-preamble 'parsebib--@preamble)
 557 | (defalias 'parsebib-read-comment 'parsebib--@comment)
 558 | 
 559 | ;;;;; Post-processing stuff
 560 | 
 561 | (defun parsebib--post-process (field strings replace-TeX)
 562 |   "Post-process FIELD.
 563 | FIELD is a cons cell consisting of the field name and the field
 564 | value.  The field value is a list of strings.
 565 | 
 566 | If STRINGS is provided, it should be a hash table with string
 567 | definitions.  @String abbreviations in field values are then
 568 | expanded using these definitions.  In addition, field values are
 569 | unquoted, newlines are removed and sequences of whitespace are
 570 | collapsed into a single space.
 571 | 
 572 | If REPLACE-TEX is non-nil, TeX markup is cleaned up.  See the
 573 | variable `parsebib-TeX-markup-replace-alist' for details.
 574 | 
 575 | No post-processing is applied to fields listed in
 576 | `parsebib-postprocessing-excluded-fields', with the exception of
 577 | unquoting, which is always applied if STRINGS is non-nil.
 578 | 
 579 | Finally, the strings in the field value are concatenated.  Return
 580 | value is a cons cell of field name and field value, the value now
 581 | being a single string."
 582 |   (let* ((name (car field))
 583 |          (value (cdr field))
 584 |          (post-process (not (member-ignore-case name parsebib-postprocessing-excluded-fields)))
 585 |          (replace-TeX (and replace-TeX (member-ignore-case name parsebib-replace-TeX-fields))))
 586 |     (setq value (if strings
 587 |                     (string-join (parsebib--post-process-strings value strings post-process))
 588 |                   (string-join value " # ")))
 589 |     (when (and replace-TeX post-process)
 590 |       (setq value (parsebib-clean-TeX-markup value)))
 591 |     (cons name value)))
 592 | 
 593 | (defun parsebib--post-process-strings (strings abbrevs post-process)
 594 |   "Post-process the strings in STRINGS.
 595 | STRINGS is a list of strings, ABBREVS a hash table with @String
 596 | definitions.  Post-processing involves three changes: First,
 597 | sequences of whitespace are collapsed into a single space.
 598 | Second, if a string has an expansion in ABBREVS, it is replaced
 599 | with the expansion.  Both these changes are only applied if
 600 | POST-PROCESS is non-nil.  Lastly, if the string is enclosed in
 601 | braces {} or double -quotes \"\", these are removed."
 602 |   (mapcar (lambda (str)
 603 |             (when post-process
 604 |               (setq str (replace-regexp-in-string "[[:space:]\t\n\f]+" " " str)))
 605 |             (cond
 606 |              ((and post-process
 607 |                    (gethash str abbrevs)))
 608 |              ((string-match "\\`[\"{]\\(.*?\\)[\"}]\\'" str)
 609 |               (match-string 1 str))
 610 |              (t str)))
 611 |           strings))
 612 | 
 613 | (defun parsebib-expand-xrefs (entries inheritance)
 614 |   "Expand cross-referencing items in ENTRIES.
 615 | BibTeX entries in ENTRIES that have a `crossref' field are
 616 | expanded with the fields in the cross-referenced entry.  ENTRIES
 617 | is a hash table with entries.  This hash table is updated with
 618 | the new fields.  The return value of this function is always nil.
 619 | 
 620 | INHERITANCE indicates the inheritance schema.  It can be a symbol
 621 | `BibTeX' or `biblatex', or it can be an explicit inheritance
 622 | schema.  See the variable `parsebib--biblatex-inheritances' for
 623 | details on the structure of such an inheritance schema."
 624 |   (maphash (lambda (key fields)
 625 |              (let ((xref (cdr (assoc-string "crossref" fields))))
 626 |                (when xref
 627 |                  (if (string-match-p (concat "\\b[\"{]" parsebib--bibtex-key-regexp "[\"}]\\b") xref)
 628 |                      (setq xref (substring xref 1 -1)))
 629 |                  (let* ((source (gethash xref entries))
 630 |                         (updated-entry (parsebib--get-xref-fields fields source inheritance)))
 631 |                    (when updated-entry
 632 |                      (puthash key updated-entry entries))))))
 633 |            entries))
 634 | 
 635 | (defun parsebib--get-xref-fields (target-entry source-entry inheritance)
 636 |   "Return TARGET-ENTRY supplemented with fields inherited from SOURCE-ENTRY.
 637 | TARGET-ENTRY and SOURCE-ENTRY are entry alists.  Fields in
 638 | SOURCE-ENTRY for which TARGET-ENTRY has no value are added to
 639 | TARGET-ENTRY.  Return value is the modified TARGET-ENTRY.
 640 | 
 641 | INHERITANCE is an inheritance schema.  It can either be one of
 642 | the symbols `BibTeX' or `biblatex', or it can be an explicit
 643 | inheritance schema.  See the variable
 644 | `parsebib--biblatex-inheritances' for details on the structure of
 645 | such an inheritance schema."
 646 |   (when (and target-entry source-entry)
 647 |     (when (eq inheritance 'biblatex)
 648 |       (setq inheritance parsebib--biblatex-inheritances))
 649 |     (let* ((source-type (concat "\\b" (cdr (assoc-string "=type=" source-entry)) "\\b"))
 650 |            (target-type (concat "\\b" (cdr (assoc-string "=type=" target-entry)) "\\b"))
 651 |            (for-all-types (nth 2 (assoc-string "all" inheritance)))
 652 |            (inheritable-fields
 653 |             (unless (eq inheritance 'BibTeX)
 654 |               (append
 655 |                (apply #'append (mapcar #'cl-third
 656 |                                        (cl-remove-if-not
 657 |                                         (lambda (elem)
 658 |                                           (and (string-match-p source-type (nth 0 elem))
 659 |                                                (string-match-p target-type (nth 1 elem))))
 660 |                                         inheritance)))
 661 |                for-all-types)))
 662 |            (new-fields (mapcan (lambda (field)
 663 |                                  (let ((target-field (parsebib--get-target-field (car field) inheritable-fields)))
 664 |                                    (if (and target-field
 665 |                                             (not (assoc-string target-field target-entry 'case-fold)))
 666 |                                        (list (cons target-field (cdr field))))))
 667 |                                source-entry)))
 668 |       (append target-entry new-fields))))
 669 | 
 670 | (defun parsebib--get-target-field (source-field inheritances)
 671 |   "Return the target field for inheritance from SOURCE-FIELD.
 672 | Inheritance is determined by INHERITANCES, which is an alist of
 673 | source/target pairs.  If no inheritance should take place for
 674 | SOURCE-FIELD, the target in the relevant item in INHERITANCES is
 675 | the symbol `none'.  If there is no item for SOURCE-FIELD in
 676 | INHERITANCES, SOURCE-FIELD is returned.  Note that it is valid
 677 | for INHERITANCES to be nil."
 678 |   ;; Note: the argument INHERITANCES differs from the INHERITANCE argument in
 679 |   ;; the previous two functions.  It is a simple alist of (source-field
 680 |   ;; . target-field) pairs.
 681 |   (let ((target-field (cdr (assoc-string source-field inheritances 'case-fold))))
 682 |     (cond
 683 |      ((null target-field)
 684 |       source-field)
 685 |      ((eq target-field 'none)
 686 |       nil)
 687 |      (t target-field))))
 688 | 
 689 | ;;;; Clean up TeX markup
 690 | 
 691 | (defvar parsebib-TeX-cleanup-target 'display
 692 |   "Target for `parsebib-clean-TeX-markup'.
 693 | This variable affects the output of the functions that convert
 694 | LaTeX font commands \\textbf, \\textit, and \\emph.  Its value
 695 | should be one of the symbols `display', `markdown' `org' or
 696 | `plain'.  Any other value is treated as a synonym for `plain'.
 697 | See `parsebib--convert-tex-italics' and
 698 | `parsebib--convert-tex-bold' for details.")
 699 | 
 700 | (defun parsebib--convert-tex-italics (str)
 701 |   "Return STR converted to italic face.
 702 | Depending on the value of `parsebib-TeX-cleanup-target', add a
 703 | face property `italic' to STR, or return it with Markdown or Org
 704 | markup for italic text."
 705 |   (pcase parsebib-TeX-cleanup-target
 706 |     ('display (propertize str 'face 'italic))
 707 |     ('markdown (concat "*" str "*"))
 708 |     ('org (concat "/" str "/"))
 709 |     (_ str)))
 710 | 
 711 | (defun parsebib--convert-tex-bold (str)
 712 |   "Return STR converted to bold face.
 713 | Depending on the value of `parsebib-TeX-cleanup-target', add a
 714 | face property `bold' to STR, or return it with Markdown or Org
 715 | markup for bold text."
 716 |   (pcase parsebib-TeX-cleanup-target
 717 |     ('display (propertize str 'face 'bold))
 718 |     ('markdown (concat "**" str "**"))
 719 |     ('org (concat "*" str "*"))
 720 |     (_ str)))
 721 | 
 722 | (defvar parsebib-TeX-command-replacement-alist
 723 |   '(("ddag"               . "\N{DOUBLE DAGGER}")
 724 |     ("textdaggerdbl"      . "\N{DOUBLE DAGGER}")
 725 |     ("dag"                . "\N{DAGGER}")
 726 |     ("textdagger"         . "\N{DAGGER}")
 727 |     ("textpertenthousand" . "\N{PER TEN THOUSAND SIGN}")
 728 |     ("textperthousand"    . "\N{PER MILLE SIGN}")
 729 |     ("textquestiondown"   . "\N{INVERTED QUESTION MARK}")
 730 |     ("P"                  . "\N{PILCROW SIGN}")
 731 |     ("textdollar"         . "$")
 732 |     ("S"                  . "\N{SECTION SIGN}")
 733 |     ("ldots"              . "\N{HORIZONTAL ELLIPSIS}")
 734 |     ("dots"               . "\N{HORIZONTAL ELLIPSIS}")
 735 |     ("textellipsis"       . "\N{HORIZONTAL ELLIPSIS}")
 736 |     ("textemdash"         . "\N{EM DASH}")
 737 |     ("textendash"         . "\N{EN DASH}")
 738 |     ("textbar"            . "|")
 739 | 
 740 |     ;; Non-ASCII Letters (Excluding Accented Letters)
 741 |     ("AA" . "\N{LATIN CAPITAL LETTER A WITH RING ABOVE}")
 742 |     ("AE" . "\N{LATIN CAPITAL LETTER AE}")
 743 |     ("DH" . "\N{LATIN CAPITAL LETTER ETH}")
 744 |     ("DJ" . "\N{LATIN CAPITAL LETTER ETH}")
 745 |     ("L"  . "\N{LATIN CAPITAL LETTER L WITH STROKE}")
 746 |     ("SS" . "\N{LATIN CAPITAL LETTER SHARP S}")
 747 |     ("NG" . "\N{LATIN CAPITAL LETTER ENG}")
 748 |     ("OE" . "\N{LATIN CAPITAL LIGATURE OE}")
 749 |     ("O"  . "\N{LATIN CAPITAL LETTER O WITH STROKE}")
 750 |     ("TH" . "\N{LATIN CAPITAL LETTER THORN}")
 751 | 
 752 |     ("aa" . "\N{LATIN SMALL LETTER A WITH RING ABOVE}")
 753 |     ("ae" . "\N{LATIN SMALL LETTER AE}")
 754 |     ("dh" . "\N{LATIN SMALL LETTER ETH}")
 755 |     ("dj" . "\N{LATIN SMALL LETTER ETH}")
 756 |     ("l"  . "\N{LATIN SMALL LETTER L WITH STROKE}")
 757 |     ("ss" . "\N{LATIN SMALL LETTER SHARP S}")
 758 |     ("ng" . "\N{LATIN SMALL LETTER ENG}")
 759 |     ("oe" . "\N{LATIN SMALL LIGATURE OE}")
 760 |     ("o"  . "\N{LATIN SMALL LETTER O WITH STROKE}")
 761 |     ("th" . "\N{LATIN SMALL LETTER THORN}")
 762 | 
 763 |     ("ij" . "ij")
 764 |     ("i"  . "\N{LATIN SMALL LETTER DOTLESS I}")
 765 |     ("j"  . "\N{LATIN SMALL LETTER DOTLESS J}")
 766 | 
 767 |     ;; Formatting Commands
 768 |     ("textit" . parsebib--convert-tex-italics)
 769 |     ("emph"   . parsebib--convert-tex-italics)
 770 |     ("textbf" . parsebib--convert-tex-bold)
 771 |     ("textsc" . upcase))
 772 |   "An alist of <command>-<replacement> pairs for LaTeX commands.
 773 | <command> is the name of a TeX or LaTeX command (without
 774 | backslash), <replacement> is the string with which it is
 775 | replaced.
 776 | 
 777 | <replacement> can also be a function of one argument.  In this
 778 | case, <command> must take at least one obligatory argument, which
 779 | is passed as the first argument of the replacement function.  The
 780 | return value of this function is used as the replacement string
 781 | for <command>.
 782 | 
 783 | See `parsebib-TeX-markup-replacement-alist' and the function
 784 | `parsebib-clean-TeX-markup' to see how this variable is used.")
 785 | 
 786 | (defvar parsebib-TeX-accent-replacement-alist
 787 |   '(("\"" . "\N{COMBINING DIAERESIS}")
 788 |     ("'"  . "\N{COMBINING ACUTE ACCENT}")
 789 |     ("."  . "\N{COMBINING DOT ABOVE}")
 790 |     ("="  . "\N{COMBINING MACRON}")
 791 |     ("^"  . "\N{COMBINING CIRCUMFLEX ACCENT}")
 792 |     ("`"  . "\N{COMBINING GRAVE ACCENT}")
 793 |     ("b"  . "\N{COMBINING MACRON BELOW}")
 794 |     ("c"  . "\N{COMBINING CEDILLA}")
 795 |     ("d"  . "\N{COMBINING DOT BELOW}")
 796 |     ("H"  . "\N{COMBINING DOUBLE ACUTE ACCENT}")
 797 |     ("k"  . "\N{COMBINING OGONEK}")
 798 |     ("U"  . "\N{COMBINING DOUBLE VERTICAL LINE ABOVE}")
 799 |     ("u"  . "\N{COMBINING BREVE}")
 800 |     ("v"  . "\N{COMBINING CARON}")
 801 |     ("~"  . "\N{COMBINING TILDE}")
 802 |     ("|"  . "\N{COMBINING COMMA ABOVE}")
 803 |     ("f"  . "\N{COMBINING INVERTED BREVE}")
 804 |     ("G"  . "\N{COMBINING DOUBLE GRAVE ACCENT}")
 805 |     ("h"  . "\N{COMBINING HOOK ABOVE}")
 806 |     ("C"  . "\N{COMBINING DOUBLE GRAVE ACCENT}")
 807 |     ("r"  . "\N{COMBINING RING ABOVE}") )
 808 |   "Alist of <command>-<accent> pairs for LaTeX diacritics.
 809 | <command> is the name of a TeX or LaTeX command (without
 810 | backslash), <accent> is the Unicode combining character for the
 811 | diacritic that <command> generates.  Both <command> and <accent>
 812 | must be strings.
 813 | 
 814 | The replacement string for <command> is composed of its
 815 | obligatory argument (usually a single character) and the
 816 | combining diacritic.
 817 | 
 818 | See `parsebib-TeX-markup-replacement-alist' and the function
 819 | `parsebib-clean-TeX-markup' to see how this variable is used.")
 820 | 
 821 | (defvar parsebib-TeX-literal-replacement-alist
 822 |   ;; LaTeX2 Escapable "Special" Characters
 823 |   `(("\\%" . "%") ("\\&" . "&") ("\\#" . "#") ("\\$" . "$")
 824 |     ;; Quotes
 825 |     ("``" . "\N{LEFT DOUBLE QUOTATION MARK}")
 826 |     ("`"  . "\N{LEFT SINGLE QUOTATION MARK}")
 827 |     ("''" . "\N{RIGHT DOUBLE QUOTATION MARK}")
 828 |     ("'"  . "\N{RIGHT SINGLE QUOTATION MARK}")
 829 |     ;; Dashes
 830 |     ("---" . "\N{EM DASH}")
 831 |     ("--"  . "\N{EN DASH}")
 832 |     ;; Remove all remaining {braces}
 833 |     ("{" . "") ("}" . ""))
 834 |   "Alist of <literal>-<replacement> pairs.  Both are strings.
 835 | This variable contains characters that are special in LaTeX and
 836 | single-character, non-ASCII LaTeX commands.
 837 | 
 838 | Note that adding pairs to this variable has no effect unless
 839 | `parsebib-TeX-markup-replacement-alist' is adjusted accordingly.
 840 | For example, after adding a <literal>-<replacement> pair, the
 841 | following code will ensure that <literal> gets replaced with
 842 | <replacement>.
 843 | 
 844 |   (cl-callf (lambda (regex) (rx (or <literal> (regexp regex))))
 845 |      (alist-get (quote parsebib--TeX-replace-literal)
 846 |                 parsebib-TeX-markup-replacement-alist))
 847 | 
 848 | See `parsebib-TeX-markup-replacement-alist' and the function
 849 | `parsebib-clean-TeX-markup' to see how this variable is used.")
 850 | 
 851 | (defvar parsebib-TeX-markup-replacement-alist
 852 |   `((parsebib--TeX-replace-command-or-accent
 853 |      ;; This regexp matches any latex command i.e. anything that
 854 |      ;; starts with a backslash. The name of the command which
 855 |      ;; is either a string of alphabetic characters or a single
 856 |      ;; non-alphabetic character is captured by group 1. The command
 857 |      ;; can have a mandatory argument enclosed by braces which is
 858 |      ;; captured by group 2. If the command has no arguments in
 859 |      ;; brackets or braces, the first non-white space letter after
 860 |      ;; the command is captured in group 3. This is to be able to deal
 861 |      ;; with accents.
 862 |      ;; Note that the capturing of arguments in braces is imperfect,
 863 |      ;; because doing it properly requires sexp parsing. It will fail
 864 |      ;; for cases like \command{\anothercommand{an arg}some text}.
 865 |      . ,(rx "\\" (group-n 1 (or (1+ letter) nonl))
 866 |             (: (* blank) (opt (or (: (* (: "[" (* (not (any "]"))) "]"))
 867 |                                      "{" (group-n 2 (0+ (not (any "}")))) (opt "}"))
 868 |                                   (group-n 3 letter))))))
 869 |     (parsebib--TeX-replace-literal
 870 |      . ,(rx (or (regexp (regexp-opt (mapcar #'car parsebib-TeX-literal-replacement-alist)))
 871 |                 (1+ blank)))))
 872 |   "Alist of replacements and strings for TeX markup.
 873 | This is used in `parsebib-clean-TeX-markup' to make TeX markup more
 874 | suitable for display.  Each item in the list consists of a replacement
 875 | and a regexp.  The replacement can be a string (which will
 876 | simply replace the match) or a function (the match will be
 877 | replaced by the result of calling the function on the match
 878 | string).  Earlier elements are evaluated before later ones, so if
 879 | one string is a subpattern of another, the second must appear
 880 | later (e.g. \"''\" is before \"'\").
 881 | 
 882 | For the common cases of replacing a LaTeX command or a literal
 883 | it is faster to use `parsebib-TeX-command-replacement-alist'
 884 | and `parsebib-TeX-literal-replacement-alist' respectively.")
 885 | 
 886 | (defun parsebib--TeX-replace-command-or-accent (string)
 887 |   "Return the replacement text for the command or accent matched by STRING."
 888 |   (let* ((cmd (match-string 1 string))
 889 |          ;; bar is the argument in braces.
 890 |          (bar (match-string 2 string))
 891 |          ;; If there is no argument in braces, consider the letter after
 892 |          ;; the command as the argument. Clean this argument.
 893 |          (arg (parsebib-clean-TeX-markup (or (if bar bar (match-string 3 string)) "")))
 894 |          ;; Check if the cmd is an accent that needs to be replaced
 895 |          ;; and get its replacement.
 896 |          (acc (alist-get cmd parsebib-TeX-accent-replacement-alist nil nil #'equal))
 897 |          ;; If it is not an accent, check if it is a command that needs to be replaced
 898 |          ;; and get the replacement.
 899 |          (rep (or acc (alist-get cmd parsebib-TeX-command-replacement-alist nil nil #'equal))))
 900 |     (cond
 901 |      ;; If replacement is a function call it with the argument.
 902 |      ((functionp rep) (funcall rep arg))
 903 |      ;; Otherwise combine the replacement with the argument. The order of combination
 904 |      ;; depends on whether the command is an accent or not.
 905 |      (rep (if acc (concat arg rep) (concat rep arg)))
 906 |      ;; Now we handle the fallback cases. If there is a braced argument but no
 907 |      ;; replacement for the command was found, consider the replacement to be
 908 |      ;; empty.
 909 |      ((and bar (not (equal "" bar))) bar)
 910 |      ;; Otherwise clean any optional arguments by discarding them.
 911 |      (t (replace-regexp-in-string (rx "[" (* (not (any "]"))) "]") "" string t t)))))
 912 | 
 913 | (defun parsebib--TeX-replace-literal (string)
 914 |   "Look up the replacement text for literal STRING."
 915 |   (or (alist-get string parsebib-TeX-literal-replacement-alist nil nil #'equal)
 916 |       " "))
 917 | 
 918 | (defun parsebib-clean-TeX-markup (string)
 919 |   "Return STRING without TeX markup.
 920 | Any substring matching the car of a cell in
 921 | `parsebib-TeX-markup-replace-alist' is replaced with the
 922 | corresponding cdr (if the cdr is a string), or with the result of
 923 | calling the cdr on the match (if it is a function)."
 924 |   (let ((case-fold-search nil))
 925 |     (cl-loop for (replacement . pattern) in parsebib-TeX-markup-replacement-alist
 926 |              do (setq string (replace-regexp-in-string
 927 |                               pattern replacement string
 928 |                               t t))
 929 |              finally return string)))
 930 | 
 931 | ;;;; High-level BibTeX/biblatex API
 932 | 
 933 | (defun parsebib-collect-preambles ()
 934 |   "Collect all @Preamble definitions in the current buffer.
 935 | Return a list of strings, each string a separate @Preamble."
 936 |   (save-excursion
 937 |     (goto-char (point-min))
 938 |     (let (res)
 939 |       (cl-loop for item = (parsebib-find-next-item)
 940 |                while (and item
 941 |                           (cl-equalp item "preamble"))
 942 |                do (push (parsebib--@preamble) res))
 943 |       (nreverse res))))
 944 | 
 945 | (defun parsebib-collect-comments ()
 946 |   "Collect all @Comment definitions in the current buffer.
 947 | Return a list of strings, each string a separate @Comment."
 948 |   (save-excursion
 949 |     (goto-char (point-min))
 950 |     (let (res)
 951 |       (cl-loop for item = (parsebib-find-next-item)
 952 |                while (and item
 953 |                           (cl-equalp item "comment"))
 954 |                do (push (parsebib--@comment) res))
 955 |       (nreverse (delq nil res)))))
 956 | 
 957 | (cl-defun parsebib-collect-strings (&key strings expand-strings)
 958 |   "Collect all @String definitions in the current buffer.
 959 | Return value is a hash with the abbreviations as keys and the
 960 | expansions as values.  If STRINGS is a hash table with test
 961 | function `equal', it is used to store the @String definitions.
 962 | If EXPAND-STRINGS is non-nil, @String expansions are expanded
 963 | themselves using the @String definitions already stored in
 964 | STRINGS."
 965 |   (or (and (hash-table-p strings)
 966 |            (eq 'equal (hash-table-test strings)))
 967 |       (setq strings (make-hash-table :test #'equal)))
 968 |   (save-excursion
 969 |     (goto-char (point-min))
 970 |     (cl-loop with string = nil
 971 |              for item = (parsebib-find-next-item)
 972 |              while (and item
 973 |                         (cl-equalp item "string"))
 974 |              do
 975 |              (setq string (parsebib-read-string (if expand-strings strings)))
 976 |              (puthash (car string) (cdr string) strings))
 977 |     strings))
 978 | 
 979 | (cl-defun parsebib-collect-bib-entries (&key entries strings inheritance fields)
 980 |   "Collect all BibTeX / biblatex entries in the current buffer.
 981 | Return value is a hash table containing the entries.  If ENTRIES
 982 | is a hash table with test function `equal', it is used to store
 983 | the entries collected in the buffer.  Note that ENTRIES does not
 984 | have to be empty.  It may contain entries from a previous parse.
 985 | 
 986 | If STRINGS is non-nil, it should be a hash table of string
 987 | definitions, which are used to expand abbreviations used in the
 988 | entries.  In addition, if STRINGS is set, sequences of whitespace
 989 | in field values are collapsed into a single space, field values
 990 | are unquoted (i.e., the double quotes or braces around them are
 991 | removed), and TeX markup is prettified (see
 992 | `parsebib-clean-TeX-markup' for details).  Note that @String
 993 | expansion, collapsing of whitespace and prettifying TeX markup
 994 | are not applied to fields listed in
 995 | `parsebib-postprocessing-excluded-fields', but unquoting is.
 996 | 
 997 | If INHERITANCE is non-nil, cross-references in the entries are
 998 | resolved: if the crossref field of an entry points to an entry
 999 | already in ENTRIES (which includes the entries that appear
1000 | earlier in the buffer), the fields of the latter that do not occur
1001 | in the entry are added to it.  INHERITANCE indicates the
1002 | inheritance schema used for determining which fields inherit from
1003 | which fields.  It can be a symbol `BibTeX' or `biblatex', or it
1004 | can be an explicit inheritance schema.  (See the variable
1005 | `parsebib--biblatex-inheritances' for details on the structure of
1006 | such an inheritance schema.)  It can also be the symbol t, in
1007 | which case the local variable block is checked for a
1008 | dialect (using the variable `bibtex-dialect'), or, if no such
1009 | local variable is found, the value of the variable
1010 | `bibtex-dialect'.
1011 | 
1012 | FIELDS is a list of the field names (as strings) to be read and
1013 | included in the result.  Fields not in the list are ignored,
1014 | except \"=key=\" and \"=type=\", which are always included.  Case
1015 | is ignored when comparing fields to the list in FIELDS.  If
1016 | FIELDS is nil, all fields are returned."
1017 |   (or (and (hash-table-p entries)
1018 |            (eq 'equal (hash-table-test entries)))
1019 |       (setq entries (make-hash-table :test #'equal)))
1020 |   (if (eq inheritance t)
1021 |       (setq inheritance (or (parsebib-find-bibtex-dialect)
1022 |                             (and (boundp 'bibtex-dialect) bibtex-dialect)
1023 |                             'BibTeX)))
1024 |   ;; Ensure =key= and =type= are in `fields'.
1025 |   (if fields
1026 |       (setq fields (append (list "=key=" "=type=" fields))))
1027 |   (condition-case err
1028 |       (save-excursion
1029 |         (goto-char (point-min))
1030 |         (cl-loop with entry = nil
1031 |                  for entry-type = (parsebib-find-next-item)
1032 |                  while entry-type do
1033 |                  (unless (member-ignore-case entry-type '("preamble" "string" "comment"))
1034 |                    (setq entry (parsebib-read-entry fields strings (not (null strings))))
1035 |                    (if entry
1036 |                        (puthash (cdr (assoc-string "=key=" entry)) entry entries))))
1037 |         (when inheritance
1038 |           (parsebib-expand-xrefs entries inheritance))
1039 |         entries)
1040 |     (parsebib-error
1041 |      (save-excursion
1042 |        (goto-char (cadr err))
1043 |        (signal (car err) (list (concat (apply #'format (cddr err))
1044 |                                        (format " at position (%d,%d)" (line-number-at-pos) (current-column)))))))))
1045 | 
1046 | (defun parsebib-find-bibtex-dialect ()
1047 |   "Find the BibTeX dialect of a file if one is set.
1048 | This function looks for a local value of the variable
1049 | `bibtex-dialect' in the local variable block at the end of the
1050 | file.  Return nil if no dialect is found."
1051 |   (save-excursion
1052 |     (goto-char (point-max))
1053 |     (let ((case-fold-search t)
1054 |           (bibtex-dialect-list (or (and (boundp 'bibtex-dialect-list)
1055 |                                         bibtex-dialect-list)
1056 |                                    '(BibTeX biblatex))))
1057 |       (when (re-search-backward (concat parsebib--bibtex-entry-start "comment") (- (point-max) 3000) t)
1058 |         (let ((comment (parsebib--@comment)))
1059 |           (when (and comment
1060 |                      (string-match-p "\\`{[ \n\t\r]*Local Variables:" comment)
1061 |                      (string-match-p "End:[ \n\t\r]*}\\'" comment)
1062 |                      (string-match (concat "bibtex-dialect: " (regexp-opt (mapcar #'symbol-name bibtex-dialect-list) t)) comment))
1063 |             (intern (match-string 1 comment))))))))
1064 | 
1065 | (cl-defun parsebib-parse-bib-buffer (&key entries strings expand-strings inheritance fields replace-TeX)
1066 |   "Parse the current buffer and return all BibTeX data.
1067 | Return a list of five elements: a hash table with the entries, a
1068 | hash table with the @String definitions, a list of @Preamble
1069 | definitions, a list of @Comments and the BibTeX dialect, if
1070 | present in the file.
1071 | 
1072 | If ENTRIES is a hash table with test function `equal', it is used
1073 | to store the entries.  Any existing entries with identical keys
1074 | are overwritten.  Similarly, if STRINGS is a hash table with test
1075 | function `equal', the @String definitions are stored in it.
1076 | 
1077 | If EXPAND-STRINGS is non-nil, abbreviations in the entries and
1078 | @String definitions are expanded using the @String definitions
1079 | already in STRINGS.  In addition, sequences of whitespace in
1080 | field values are collapsed into a single space and field values
1081 | are unquoted, i.e., the double quotes or braces around them are
1082 | removed.  Note that @String expansion, collapsing of whitespace
1083 | and prettifying TeX markup are not applied to fields listed in
1084 | `parsebib-postprocessing-excluded-fields', but unquoting is.
1085 | 
1086 | If INHERITANCE is non-nil, cross-references in the entries are
1087 | resolved: if the crossref field of an entry points to an entry
1088 | already in ENTRIES, the fields of the latter that do not occur in
1089 | the entry are added to it.  INHERITANCE indicates the inheritance
1090 | schema used for determining which fields inherit from which
1091 | fields.  It can be a symbol `BibTeX' or `biblatex', which means
1092 | to use the default inheritance schema for either dialect, or it
1093 | can be an explicit inheritance schema.  (See the variable
1094 | `parsebib--biblatex-inheritances' for details on the structure of
1095 | such an inheritance schema.)  It can also be the symbol t, in
1096 | which case the local variable block is checked for a
1097 | dialect (using the variable `bibtex-dialect'), or, if no such
1098 | local variable is found, the value of the variable
1099 | `bibtex-dialect'.
1100 | 
1101 | FIELDS is a list of the field names (as strings) to be read and
1102 | included in the result.  Fields not in the list are ignored,
1103 | except \"=key=\" and \"=type=\", which are always included.  Case
1104 | is ignored when comparing fields to the list in FIELDS.  If
1105 | FIELDS is nil, all fields are returned.
1106 | 
1107 | REPLACE-TEX indicates whether TeX markup should be replaced with
1108 | ASCII/Unicode characters.  See the variable
1109 | `parsebib-TeX-markup-replace-alist' for details."
1110 |   (or (and (hash-table-p entries)
1111 |            (eq (hash-table-test entries) 'equal))
1112 |       (setq entries (make-hash-table :test #'equal)))
1113 |   (or (and (hash-table-p strings)
1114 |            (eq (hash-table-test strings) 'equal))
1115 |       (setq strings (make-hash-table :test #'equal)))
1116 |   ;; Ensure  =key= and =type= are in `fields'.
1117 |   (if fields
1118 |       (setq fields (append (list "=key=" "=type=") fields)))
1119 |   (condition-case err
1120 |       (let ((dialect (or (parsebib-find-bibtex-dialect)
1121 |                          (and (boundp 'bibtex-dialect) bibtex-dialect)
1122 |                          'BibTeX))
1123 |             preambles comments)
1124 |         (save-excursion
1125 |           (goto-char (point-min))
1126 |           (cl-loop for item = (parsebib-find-next-item)
1127 |                    while item do
1128 |                    (cond
1129 |                     ((cl-equalp item "string") ; `cl-equalp' compares strings case-insensitively.
1130 |                      (let ((string (parsebib-read-string (if expand-strings strings))))
1131 |                        (if string
1132 |                            (puthash (car string) (cdr string) strings))))
1133 |                     ((cl-equalp item "preamble")
1134 |                      (push (parsebib--@preamble) preambles))
1135 |                     ((cl-equalp item "comment")
1136 |                      (push (parsebib--@comment) comments))
1137 |                     ((stringp item)
1138 |                      (let ((entry (parsebib-read-entry fields (if expand-strings strings) replace-TeX)))
1139 |                        (when entry
1140 |                          (puthash (cdr (assoc-string "=key=" entry)) entry entries))))))
1141 |           (when inheritance (parsebib-expand-xrefs entries (if (eq inheritance t) dialect inheritance)))
1142 |           (list entries strings (nreverse preambles) (nreverse comments) dialect)))
1143 |     (parsebib-error
1144 |      (save-excursion
1145 |        (goto-char (cadr err))
1146 |        (signal (car err) (list (concat (apply #'format (cddr err))
1147 |                                        (format " at position (%d,%d)" (line-number-at-pos) (current-column)))))))))
1148 | 
1149 | ;;;; CSL-JSON API
1150 | 
1151 | (cl-defun parsebib-parse-json-buffer (&key entries stringify year-only fields)
1152 |   "Parse the current buffer and return all CSL-JSON data.
1153 | The return value is a hash table containing all the elements.
1154 | The hash table's keys are the \"id\" values of the entries, the
1155 | hash table's values are alists as returned by `json-parse-buffer'
1156 | or `json-read'
1157 | 
1158 | If ENTRIES is a hash table with test function `equal', it is used
1159 | to store the entries.  Any existing entries with identical keys
1160 | are overwritten.
1161 | 
1162 | If STRINGIFY is non-nil, JSON values that are not
1163 | strings (notably name and date fields) are converted to strings.
1164 | If additionally YEAR-ONLY is non-nil, dates are shortened to just
1165 | the year part.
1166 | 
1167 | FIELDS is a list of field names (as symbols) to be read and
1168 | included in the result.  Fields not in the list are ignored,
1169 | except `id' and `type', which are always included.  If FIELDS is
1170 | nil, all fields are returned.
1171 | 
1172 | If a JSON object is encountered that does not have an \"id\"
1173 | field, a `parsebib-error' is raised."
1174 |   (or (and (hash-table-p entries)
1175 |            (eq (hash-table-test entries) 'equal))
1176 |       (setq entries (make-hash-table :test #'equal)))
1177 |   (when fields
1178 |     (setq fields (append '(id type) fields)))
1179 |   (let ((parse (if (and (fboundp 'json-serialize)
1180 |                         (json-serialize '((test . 1)))) ; Returns nil if native json support isn't working for some reason.
1181 |                    (lambda ()
1182 |                      (json-parse-buffer :object-type 'alist))
1183 |                  (lambda ()
1184 |                    (let ((json-object-type 'alist))
1185 |                      (json-read))))))
1186 |     ;; We do not read the entire file in one go, but instead parse each entry
1187 |     ;; separately.  Large bibliographies would otherwise be returned as one
1188 |     ;; gigantic vector, which then needs to be converted to a hash table.  If we
1189 |     ;; need to convert some of the data because `stringify' is t, the data is
1190 |     ;; held in memory twice.
1191 |     (save-excursion
1192 |       (goto-char (point-min))
1193 |       ;; JSON is pretty strict, not even comments are allowed.  CSL-JSON
1194 |       ;; requires that the file is essentially one big array, so we know that
1195 |       ;; the first non-whitespace character in the file must be an opening
1196 |       ;; bracket;
1197 |       (if (not (looking-at-p "[\n\t ]*\\["))
1198 |           (error "[Parsebib Error] Not a valid CSL-JSON file"))
1199 |       (let ((continue t))
1200 |         (while continue
1201 |           ;; We also know that the first non-whitespace character after that
1202 |           ;; must be an opening brace:
1203 |           (skip-chars-forward "^{")
1204 |           (if-let* ((entry (funcall parse))
1205 |                     (id (alist-get 'id entry)))
1206 |               (progn
1207 |                 (when fields
1208 |                   (setq entry (seq-filter (lambda (elt)
1209 |                                             (memq (car elt) fields))
1210 |                                           entry)))
1211 |                 (puthash id (if stringify
1212 |                                 (parsebib-stringify-json entry year-only)
1213 |                               entry)
1214 |                          entries))
1215 |             (signal 'parsebib-error (list (format "Malformed JSON entry at position (%d,%d)"
1216 |                                                   (line-number-at-pos) (current-column)))))
1217 |           ;; Parsing an entry moves point to the end of the entry.  The next
1218 |           ;; character must be a comma if there is another entry.  If we're not
1219 |           ;; seeing a comma, we've reached the end of the file:
1220 |           (if (not (looking-at-p "[\n\t ]*,"))
1221 |               (setq continue nil))))))
1222 |   entries)
1223 | 
1224 | (defun parsebib-stringify-json (entry &optional year-only)
1225 |   "Return ENTRY with all non-string values converted to strings.
1226 | ENTRY is a CSL-JSON entry in the form of an alist.  ENTRY is
1227 | modified in place.  Return value is ENTRY.  If YEAR-ONLY is
1228 | non-nil, date fields are shortened to just the year."
1229 |   (dolist (field entry)
1230 |     (unless (stringp (alist-get (car field) entry))
1231 |       (setf (alist-get (car field) entry)
1232 |             (parsebib-stringify-json-field (assq (car field) entry) year-only))))
1233 |   entry)
1234 | 
1235 | (defvar parsebib--json-name-fields  '(author
1236 |                                       collection-editor
1237 |                                       composer
1238 |                                       container-author
1239 |                                       director
1240 |                                       editor
1241 |                                       editorial-director
1242 |                                       illustrator
1243 |                                       interviewer
1244 |                                       original-author
1245 |                                       recipient
1246 |                                       reviewed-author
1247 |                                       translator))
1248 | 
1249 | (defvar parsebib--json-date-fields '(accessed
1250 |                                      container
1251 |                                      event-date
1252 |                                      issued
1253 |                                      original-date
1254 |                                      submitted))
1255 | 
1256 | (defvar parsebib--json-number-fields '(chapter-number
1257 |                                        collection-number
1258 |                                        edition
1259 |                                        issue
1260 |                                        number
1261 |                                        number-of-pages
1262 |                                        number-of-volumes
1263 |                                        volume))
1264 | 
1265 | (defvar parsebib-json-name-field-template "{non-dropping-particle }{family, }{given}{ dropping-particle}{, suffix}{literal}"
1266 |   "Template used to display name fields.")
1267 | 
1268 | (defvar parsebib-json-name-field-separator " and "
1269 |   "Separator used to concatenate names in a name field.")
1270 | 
1271 | (defvar parsebib-json-field-separator ", "
1272 |   "Separator used to concatenate items of array fields.")
1273 | 
1274 | (defun parsebib--process-template (template items)
1275 |   "Process TEMPLATE and return a formatted string.
1276 | ITEMS is an alist, the keys of which may occur in TEMPLATE.
1277 | Braced occurrences of the keys in ITEMS are replaced with the
1278 | corresponding values.  Note that the keys in ITEMS should be
1279 | symbols."
1280 |   (cl-flet ((create-replacements (match)
1281 |               (save-match-data
1282 |                 (string-match "{\\([^A-Za-z]*\\)\\([A-Za-z][A-za-z-]+\\)\\([^A-Za-z]*\\)}" match)
1283 |                 (let* ((pre (match-string 1 match))
1284 |                        (key (match-string 2 match))
1285 |                        (post (match-string 3 match))
1286 |                        (value (alist-get (intern key) items)))
1287 |                   (if value
1288 |                       (format "%s%s%s" pre value post)
1289 |                     "")))))
1290 |     (replace-regexp-in-string "{.*?}" #'create-replacements template nil t)))
1291 | 
1292 | (defun parsebib-stringify-json-field (field &optional short)
1293 |   "Return the value of FIELD as a string.
1294 | FIELD is a cons cell that constitutes a CSL-JSON field-value
1295 | pair.  The car is the key, the cdr the value.  If the value is a
1296 | string, return it with sequences of white space reduced to a
1297 | single space.  Otherwise, convert it into a string.  SHORT is
1298 | only relevant for date fields: if it is non-nil, return just a
1299 | year, or the string \"XXXX\" if no year part is present."
1300 |   (let ((key (car field))
1301 |         (value (cdr field)))
1302 |     (cond
1303 |      ((stringp value)
1304 |       (replace-regexp-in-string "[ \t\n\f[:space:]]+" " " value))
1305 | 
1306 |      ((numberp value)
1307 |       (format "%s" value))
1308 | 
1309 |      ((memq key parsebib--json-name-fields)
1310 |       (parsebib--json-stringify-name-field value))
1311 | 
1312 |      ((memq key parsebib--json-date-fields)
1313 |       (parsebib--json-stringify-date-field value short))
1314 | 
1315 |      ;; In CSL-JSON v1.0, the only array field besides name and date fields
1316 |      ;; is "categories".  It has an array of strings as value, so the `format'
1317 |      ;; isn't strictly necessary.  We do it this way just to be on the safe
1318 |      ;; side.
1319 |      ((arrayp value)
1320 |       (mapconcat (lambda (e) (format "%s" e)) value parsebib-json-field-separator))
1321 | 
1322 |      ;; This clause should never be reached.
1323 |      (t (replace-regexp-in-string "\n" " " (format "%s" value))))))
1324 | 
1325 | (defun parsebib--json-stringify-name-field (names)
1326 |   "Convert NAMES to a string.
1327 | NAMES is the value of a CSL-JSON name field, a vector of alists.
1328 | Conversion is done on the basis of
1329 | `parsebib-json-name-field-template': each field in this template
1330 | is replaced with the value of the field in NAME.  Fields that
1331 | have no value in NAME are ignored."
1332 |   (mapconcat (lambda (name)
1333 |                (parsebib--process-template parsebib-json-name-field-template name))
1334 |              names
1335 |              parsebib-json-name-field-separator))
1336 | 
1337 | (defun parsebib--json-stringify-date-field (date &optional short)
1338 |   "Convert DATE to a string.
1339 | DATE is the value of a CSL-JSON date field.  If SHORT is non-nil,
1340 | try to return only a year (in a date range, just the year of the
1341 | first date).  If no year part is present, SHORT returns
1342 | \"XXXX\"."
1343 |   (if short
1344 |       (if-let* ((date-parts (alist-get 'date-parts date))
1345 |                 (first-date (aref date-parts 0))
1346 |                 (year (aref first-date 0)))
1347 |           (format "%s" year)
1348 |         "XXXX")
1349 | 
1350 |     ;; Work with a copy of the original alist.
1351 |     (setq date (copy-sequence date))
1352 | 
1353 |     ;; Set start-date and end-date.
1354 |     (when-let* ((date-parts (alist-get 'date-parts date)))
1355 |       (let* ((start-date (aref date-parts 0))
1356 |              (end-date (if (= (length date-parts) 2)
1357 |                            (aref date-parts 1))))
1358 |         (setf (alist-get 'date-parts date nil :remove) nil)
1359 |         (setf (alist-get 'start-date date)
1360 |               (parsebib--json-stringify-date-part start-date))
1361 |         (if end-date (setf (alist-get 'end-date date)
1362 |                            (parsebib--json-stringify-date-part end-date)))))
1363 | 
1364 |     ;; Set season.
1365 |     (when-let* ((season (alist-get 'season date)))
1366 |       (if (numberp season)
1367 |           (setf (alist-get 'season date)
1368 |                 (aref ["Spring" "Summer" "Autumn" "Winter"] (1- season)))))
1369 | 
1370 |     ;; Set circa.
1371 |     (when-let* ((circa (alist-get 'circa date)))
1372 |       (setf (alist-get 'circa date) "ca."))
1373 | 
1374 |     ;; Now convert the date.
1375 |     (parsebib--process-template "{circa }{season }{start-date}{/end-date}{literal}{raw}"
1376 |                                 date)))
1377 | 
1378 | (defun parsebib--json-stringify-date-part (date-parts)
1379 |   "Convert DATE-PARTS into a string.
1380 | DATE-PARTS is a sequence with up to three numeric elements: a
1381 | year, a month and a day."
1382 |   (parsebib--process-template "{year}{-month}{-day}"
1383 |                               (seq-mapn #'cons '(year month day) date-parts)))
1384 | 
1385 | ;;;; Format-independent API
1386 | 
1387 | (cl-defun parsebib-parse (files &key entries strings (display t) fields)
1388 |   "Parse one or more bibliography files.
1389 | FILES is the list of files to parse.  All bibliographic entries
1390 | in FILES are collected and returned in a single hash table.
1391 | FILES can be a list of `.bib' or `.json' files, or a combination
1392 | of these.  FILES can also be a string, which should be the path
1393 | to a single bibliography file.
1394 | 
1395 | ENTRIES, if provided, should be a hash table with test function
1396 | `equal', it is used to store the entries.  Any existing entries
1397 | with identical keys are overwritten.  If provided, ENTRIES is
1398 | also the return value.  If ENTRIES is nil, a new hash table is
1399 | created and returned.
1400 | 
1401 | STRINGS, similarly a hash table with test function `equal', is
1402 | used to store the @String definitions.  Although STRINGS is not
1403 | returned, it is modified in place and can therefore be used to
1404 | collect the @String definitions in the files being parsed.
1405 | 
1406 | If DISPLAY is non-nil, field values are returned in a way that is
1407 | suitable for display: in `.bib' files, @String abbreviations are
1408 | expanded, in `.json' files, values that are not strings are
1409 | converted to strings.  Furthermore, sequences of white space
1410 | characters (including newlines) are reduced to a single space.
1411 | 
1412 | Specifically, setting DISPLAY means setting the arguments
1413 | EXPAND-STRINGS and INHERITANCES in the function
1414 | `parsebib-parse-bib-buffer' and setting STRINGIFY and YEAR-ONLY
1415 | in the function `parsebib-parse-json-buffer'.  DISPLAY is simply
1416 | passed on to these arguments, which means that it can be set to
1417 | anything that INHERITANCES in `parsebib-parse-bib-buffer'
1418 | accepts.  (The other arguments only distinguish between nil and
1419 | non-nil.) Note that DISPLAY defaults to t.
1420 | 
1421 | FIELDS is a list of the field names to be read and included in
1422 | the result.  Fields not in the list are ignored.  Note that field
1423 | names should be strings; when parsing a `.json' file, they are
1424 | converted to symbols.  See the doc strings of
1425 | `parsebib-parse-bib-buffer' and `parsebib-parse-json-buffer' for
1426 | details.  If FIELDS is nil, all fields are returned."
1427 |   (or (and (hash-table-p entries)
1428 |            (eq (hash-table-test entries) 'equal))
1429 |       (setq entries (make-hash-table :test #'equal)))
1430 |   (or (and (hash-table-p strings)
1431 |            (eq (hash-table-test strings) 'equal))
1432 |       (setq strings (make-hash-table :test #'equal)))
1433 |   (when (stringp files)
1434 |     (setq files (list files)))
1435 |   (dolist (file files)
1436 |     (with-temp-buffer
1437 |       (insert-file-contents file)
1438 |       (cond
1439 |        ((string= (file-name-extension file t) ".bib")
1440 |         (parsebib-parse-bib-buffer :entries entries
1441 |                                    :strings strings
1442 |                                    :expand-strings display
1443 |                                    :inheritance display
1444 |                                    :fields fields
1445 |                                    :replace-TeX display))
1446 |        ((string= (file-name-extension file t) ".json")
1447 |         (parsebib-parse-json-buffer :entries entries
1448 |                                     :stringify display
1449 |                                     :year-only display
1450 |                                     :fields (mapcar #'intern fields)))
1451 |        (t (error "[Parsebib] Not a bibliography file: %s" file)))))
1452 |   entries)
1453 | 
1454 | (provide 'parsebib)
1455 | 
1456 | ;;; parsebib.el ends here
1457 | 


--------------------------------------------------------------------------------
/test/parsebib-test.el:
--------------------------------------------------------------------------------
  1 | ;;; parsebib-test.el --- Tests for parsebib
  2 | 
  3 | (require 'parsebib)
  4 | 
  5 | ;; Note: tests are named with the prefix `parsebib-test-' followed by the name
  6 | ;; of the function being tested, without the `parsebib-' or `parsebib--' prefix.
  7 | 
  8 | (ert-deftest parsebib-test-json-stringify-date-part ()
  9 |   (should (string= (parsebib--json-stringify-date-part [2021 22 4]) "2021-22-4"))
 10 |   (should (string= (parsebib--json-stringify-date-part [2021 22]) "2021-22"))
 11 |   (should (string= (parsebib--json-stringify-date-part [2021]) "2021")))
 12 | 
 13 | (ert-deftest parsebib-test-json-stringify-date-field ()
 14 |   (should (string= (parsebib--json-stringify-date-field '(issued
 15 |                                                           (date-parts . [[2004]])))
 16 |                    "2004"))
 17 |   (should (string= (parsebib--json-stringify-date-field '(issued
 18 |                                                           (date-parts . [[2004 4 22]])))
 19 |                    "2004-4-22"))
 20 |   (should (string= (parsebib--json-stringify-date-field '(issued
 21 |                                                           (date-parts . [[2004 4 22] [2021 4 22]])))
 22 |                    "2004-4-22/2021-4-22"))
 23 |   (should (string= (parsebib--json-stringify-date-field '(issued
 24 |                                                           (date-parts . [[2004] [2021]])))
 25 |                    "2004/2021"))
 26 |   (should (string= (parsebib--json-stringify-date-field '(issued
 27 |                                                           (date-parts . [[2004 4]])
 28 |                                                           (circa . t)))
 29 |                    "ca. 2004-4"))
 30 |   (should (string= (parsebib--json-stringify-date-field '(issued
 31 |                                                           (date-parts . [[2004]])
 32 |                                                           (season . 1)))
 33 |                    "Spring 2004"))
 34 |   (should (string= (parsebib--json-stringify-date-field '(issued
 35 |                                                           (literal . "April 2004")))
 36 |                    "April 2004")))
 37 | 
 38 | (ert-deftest parsebib-test-json-stringify-name-field ()
 39 |   (should (string= (parsebib--json-stringify-name-field [((family . "Anderson")
 40 |                                                           (given . "John R."))
 41 |                                                          ((family . "Bothell")
 42 |                                                           (given . "Daniel"))])
 43 |                    "Anderson, John R. and Bothell, Daniel"))
 44 |   (should (string= (parsebib--json-stringify-name-field [((family . "Koning")
 45 |                                                           (given . "Willem")
 46 |                                                           (non-dropping-particle . "de"))
 47 |                                                          ((family . "Beethoven")
 48 |                                                           (dropping-particle . "van")
 49 |                                                           (given . "Ludwig"))])
 50 |                    "de Koning, Willem and Beethoven, Ludwig van"))
 51 |   (should (string= (parsebib--json-stringify-name-field [((family . "Gates")
 52 |                                                           (given . "Bill")
 53 |                                                           (suffix . "III"))])
 54 |                    "Gates, Bill, III"))
 55 |   (should (string= (parsebib--json-stringify-name-field [((literal . "Michigan Institute of Technology"))])
 56 |                    "Michigan Institute of Technology")))
 57 | 
 58 | ;; Test `parsebib-stringify-json-field' with name fields.
 59 | (ert-deftest parsebib-test-stringify-json-field--name-fields ()
 60 |   (should (string= (parsebib-stringify-json-field '(author . [((family . "Anderson")
 61 |                                                                (given . "John R."))
 62 |                                                               ((family . "Bothell")
 63 |                                                                (given . "Daniel"))]))
 64 |                    "Anderson, John R. and Bothell, Daniel"))
 65 |   (should (string= (parsebib-stringify-json-field '(editor . [((family . "Koning")
 66 |                                                                (given . "Willem")
 67 |                                                                (non-dropping-particle . "de"))
 68 |                                                               ((family . "Beethoven")
 69 |                                                                (dropping-particle . "van")
 70 |                                                                (given . "Ludwig"))]))
 71 |                    "de Koning, Willem and Beethoven, Ludwig van"))
 72 |   (should (string= (parsebib-stringify-json-field '(translator . [((family . "Gates")
 73 |                                                                    (given . "Bill")
 74 |                                                                    (suffix . "III"))]))
 75 |                    "Gates, Bill, III"))
 76 |   (should (string= (parsebib-stringify-json-field '(recipient . [((literal . "Michigan Institute of Technology"))]))
 77 |                    "Michigan Institute of Technology")))
 78 | 
 79 | 
 80 | ;; Test `parsebib-stringify-json-field' with date fields.
 81 | (ert-deftest parsebib-test-stringify-json-field--date-fields ()
 82 |   (should (string= (parsebib-stringify-json-field '(container
 83 |                                                     (date-parts . [[2004 4 22] [2021 4 22]])))
 84 |                    "2004-4-22/2021-4-22"))
 85 |   (should (string= (parsebib-stringify-json-field '(submitted
 86 |                                                     (date-parts . [[2004]])))
 87 |                    "2004"))
 88 |   (should (string= (parsebib-stringify-json-field '(issued
 89 |                                                     (date-parts . [[2004 4]])
 90 |                                                     (circa . t)))
 91 |                    "ca. 2004-4"))
 92 |   (should (string= (parsebib-stringify-json-field '(event-date
 93 |                                                     (date-parts . [[2004]])
 94 |                                                     (season . 1)))
 95 |                    "Spring 2004"))
 96 |   (should (string= (parsebib-stringify-json-field '(accessed
 97 |                                                     (literal . "April 2004")))
 98 |                    "April 2004")))
 99 | 
100 | ;; Test `parsebib-stringify-json-field' with string fields.
101 | (ert-deftest parsebib-test-stringify-json-field--string-fields ()
102 |   (should (string= (parsebib-stringify-json-field '(title . "The Minimalist Program"))
103 |                    "The Minimalist Program"))
104 |   (should (string= (parsebib-stringify-json-field '(ISBN . "1-01-XXXXXX-X"))
105 |                    "1-01-XXXXXX-X")))
106 | 
107 | ;; Test `parsebib-stringify-json-field' with number fields.
108 | (ert-deftest parsebib-test-stringify-json-field--number-fields ()
109 |   (should (string= (parsebib-stringify-json-field '(volume . 3))
110 |                    "3"))
111 |   (should (string= (parsebib-stringify-json-field '(page . 155))
112 |                    "155")))
113 | 
114 | ;; Test `parsebib-stringify-json-field' with array fields.
115 | (ert-deftest parsebib-test-stringify-json-field--array-fields ()
116 |   (should (string= (parsebib-stringify-json-field '(categories . ["fiction" "horror"]))
117 |                    "fiction, horror")))
118 | 
119 | ;;; Tests for `parsebib-clean-TeX-markup'
120 | 
121 | (ert-deftest parsebib-clean-TeX-markup-dashes ()
122 |   (should (equal (parsebib-clean-TeX-markup "---") "—"))
123 |   (should (equal (parsebib-clean-TeX-markup "\\textemdash") "—"))
124 |   (should (equal (parsebib-clean-TeX-markup "\\textemdash and") "—and"))
125 |   (should (equal (parsebib-clean-TeX-markup "\\textemdash  and") "—and"))
126 |   (should (equal (parsebib-clean-TeX-markup "\\textemdash{}") "—"))
127 |   (should (equal (parsebib-clean-TeX-markup "\\textemdash{}and") "—and"))
128 |   (should (equal (parsebib-clean-TeX-markup "\\textemdash{} and") "— and"))
129 |   (should (equal (parsebib-clean-TeX-markup "\\textemdash{}  and") "— and"))
130 | 
131 |   (should (equal (parsebib-clean-TeX-markup "--") "–"))
132 |   (should (equal (parsebib-clean-TeX-markup "\\textendash") "–"))
133 |   (should (equal (parsebib-clean-TeX-markup "\\textendash{}") "–")))
134 | 
135 | (ert-deftest parsebib-clean-TeX-markup-math-and-text-mode-commands ()
136 |   (should (equal (parsebib-clean-TeX-markup "\\ddag{} \\textdaggerdbl") "‡ ‡"))
137 |   (should (equal (parsebib-clean-TeX-markup "10\\textpertenthousand") "10‱"))
138 |   (should (equal (parsebib-clean-TeX-markup "200\\textperthousand.") "200‰."))
139 |   (should (equal (parsebib-clean-TeX-markup "\\textquestiondown") "¿"))
140 |   (should (equal (parsebib-clean-TeX-markup "\\P 3.2") "¶3.2"))
141 |   (should (equal (parsebib-clean-TeX-markup "\\textdollar") "$"))
142 |   (should (equal (parsebib-clean-TeX-markup "\\S 5.2") "§5.2"))
143 |   (should (equal (parsebib-clean-TeX-markup "\\ldots{} [\\dots] \\textellipsis and")
144 |                  "… […] …and")))
145 | 
146 | (ert-deftest parsebib-clean-TeX-markup-nonletter-diacritics-without-braces ()
147 |   ;; No space is needed after a nonletter diacritic commands.
148 |   (should (equal (parsebib-clean-TeX-markup "\\\"a") "a\N{COMBINING DIAERESIS}"))
149 |   (should (equal (parsebib-clean-TeX-markup "\\'a")  "a\N{COMBINING ACUTE ACCENT}"))
150 |   (should (equal (parsebib-clean-TeX-markup "\\.a")  "a\N{COMBINING DOT ABOVE}"))
151 |   (should (equal (parsebib-clean-TeX-markup "\\=a")  "a\N{COMBINING MACRON}"))
152 |   (should (equal (parsebib-clean-TeX-markup "\\^a")  "a\N{COMBINING CIRCUMFLEX ACCENT}"))
153 |   (should (equal (parsebib-clean-TeX-markup "\\`a")  "a\N{COMBINING GRAVE ACCENT}"))
154 |   (should (equal (parsebib-clean-TeX-markup "\\~a")  "a\N{COMBINING TILDE}"))
155 |   (should (equal (parsebib-clean-TeX-markup "\\|a")  "a\N{COMBINING COMMA ABOVE}"))
156 |   ;; Spaces are possible, though:
157 |   (should (equal (parsebib-clean-TeX-markup "\\' a")  "a\N{COMBINING ACUTE ACCENT}"))
158 |   (should (equal (parsebib-clean-TeX-markup "\\'  a")  "a\N{COMBINING ACUTE ACCENT}")))
159 | 
160 | (ert-deftest parsebib-clean-TeX-markup-letter-diacritics-without-braces ()
161 |   ;; Diacritic commands that consist of a single letter require a space.
162 |   (should (equal (parsebib-clean-TeX-markup "\\b a") "a\N{COMBINING MACRON BELOW}"))
163 |   (should (equal (parsebib-clean-TeX-markup "\\c c") "c\N{COMBINING CEDILLA}"))
164 |   (should (equal (parsebib-clean-TeX-markup "\\d a") "a\N{COMBINING DOT BELOW}"))
165 |   (should (equal (parsebib-clean-TeX-markup "\\H a") "a\N{COMBINING DOUBLE ACUTE ACCENT}"))
166 |   (should (equal (parsebib-clean-TeX-markup "\\k a") "a\N{COMBINING OGONEK}"))
167 |   (should (equal (parsebib-clean-TeX-markup "\\U a") "a\N{COMBINING DOUBLE VERTICAL LINE ABOVE}"))
168 |   (should (equal (parsebib-clean-TeX-markup "\\u a") "a\N{COMBINING BREVE}"))
169 |   (should (equal (parsebib-clean-TeX-markup "\\v a") "a\N{COMBINING CARON}"))
170 |   (should (equal (parsebib-clean-TeX-markup "\\f a") "a\N{COMBINING INVERTED BREVE}"))
171 |   (should (equal (parsebib-clean-TeX-markup "\\G a") "a\N{COMBINING DOUBLE GRAVE ACCENT}"))
172 |   (should (equal (parsebib-clean-TeX-markup "\\h a") "a\N{COMBINING HOOK ABOVE}"))
173 |   (should (equal (parsebib-clean-TeX-markup "\\C a") "a\N{COMBINING DOUBLE GRAVE ACCENT}"))
174 |   (should (equal (parsebib-clean-TeX-markup "\\r a") "a\N{COMBINING RING ABOVE}"))
175 |   ;; More than one space should also work:
176 |   (should (equal (parsebib-clean-TeX-markup "\\b  a") "a\N{COMBINING MACRON BELOW}"))
177 |   (should (equal (parsebib-clean-TeX-markup "\\b   a") "a\N{COMBINING MACRON BELOW}"))
178 |   ;; It shouldn't work without space. Since something like "\ba after" is
179 |   ;; essentially a command without an (explicit) argument, it should remain
180 |   ;; unchanged.
181 |   (should (equal (parsebib-clean-TeX-markup "before \\ba after") "before \\ba after")))
182 | 
183 | (ert-deftest parsebib-clean-TeX-markup-diacritics-with-braces ()
184 |   ;; Diacritic commands may use braces to mark the argument.
185 |   (should (equal (parsebib-clean-TeX-markup "\\\"{a}") "a\N{COMBINING DIAERESIS}"))
186 |   (should (equal (parsebib-clean-TeX-markup "\\'{a}")  "a\N{COMBINING ACUTE ACCENT}"))
187 |   (should (equal (parsebib-clean-TeX-markup "\\.{a}")  "a\N{COMBINING DOT ABOVE}"))
188 |   (should (equal (parsebib-clean-TeX-markup "\\={a}")  "a\N{COMBINING MACRON}"))
189 |   (should (equal (parsebib-clean-TeX-markup "\\^{a}")  "a\N{COMBINING CIRCUMFLEX ACCENT}"))
190 |   (should (equal (parsebib-clean-TeX-markup "\\`{a}")  "a\N{COMBINING GRAVE ACCENT}"))
191 |   (should (equal (parsebib-clean-TeX-markup "\\b{a}")  "a\N{COMBINING MACRON BELOW}"))
192 |   (should (equal (parsebib-clean-TeX-markup "\\c{c}")  "c\N{COMBINING CEDILLA}"))
193 |   (should (equal (parsebib-clean-TeX-markup "\\d{a}")  "a\N{COMBINING DOT BELOW}"))
194 |   (should (equal (parsebib-clean-TeX-markup "\\H{a}")  "a\N{COMBINING DOUBLE ACUTE ACCENT}"))
195 |   (should (equal (parsebib-clean-TeX-markup "\\k{a}")  "a\N{COMBINING OGONEK}"))
196 |   (should (equal (parsebib-clean-TeX-markup "\\U{a}")  "a\N{COMBINING DOUBLE VERTICAL LINE ABOVE}"))
197 |   (should (equal (parsebib-clean-TeX-markup "\\u{a}")  "a\N{COMBINING BREVE}"))
198 |   (should (equal (parsebib-clean-TeX-markup "\\v{a}")  "a\N{COMBINING CARON}"))
199 |   (should (equal (parsebib-clean-TeX-markup "\\~{a}")  "a\N{COMBINING TILDE}"))
200 |   (should (equal (parsebib-clean-TeX-markup "\\|{a}")  "a\N{COMBINING COMMA ABOVE}"))
201 |   (should (equal (parsebib-clean-TeX-markup "\\f{a}")  "a\N{COMBINING INVERTED BREVE}"))
202 |   (should (equal (parsebib-clean-TeX-markup "\\G{a}")  "a\N{COMBINING DOUBLE GRAVE ACCENT}"))
203 |   (should (equal (parsebib-clean-TeX-markup "\\h{a}")  "a\N{COMBINING HOOK ABOVE}"))
204 |   (should (equal (parsebib-clean-TeX-markup "\\C{a}")  "a\N{COMBINING DOUBLE GRAVE ACCENT}"))
205 |   (should (equal (parsebib-clean-TeX-markup "\\r{a}")  "a\N{COMBINING RING ABOVE}"))
206 |   ;; There may be spaces between the command and the argument.
207 |   (should (equal (parsebib-clean-TeX-markup "\\' {a}")  "a\N{COMBINING ACUTE ACCENT}"))
208 |   (should (equal (parsebib-clean-TeX-markup "\\'  {a}")  "a\N{COMBINING ACUTE ACCENT}")))
209 | 
210 | (ert-deftest parsebib-clean-TeX-markup-escapable-characters ()
211 |   (should (equal (parsebib-clean-TeX-markup "percent: \\%")
212 |                  "percent: %"))
213 |   (should (equal (parsebib-clean-TeX-markup "ampersand: \\&")
214 |                  "ampersand: &"))
215 |   (should (equal (parsebib-clean-TeX-markup "hash: \\#")
216 |                  "hash: #"))
217 |   (should (equal (parsebib-clean-TeX-markup "dollar: \\$")
218 |                  "dollar: $")))
219 | 
220 | (ert-deftest parsebib-clean-TeX-markup-quotes ()
221 |   (should (equal (parsebib-clean-TeX-markup "``double'' quotes") "\N{LEFT DOUBLE QUOTATION MARK}double\N{RIGHT DOUBLE QUOTATION MARK} quotes"))
222 |   (should (equal (parsebib-clean-TeX-markup "`single' quotes") "\N{LEFT SINGLE QUOTATION MARK}single\N{RIGHT SINGLE QUOTATION MARK} quotes")))
223 | 
224 | (ert-deftest parsebib-clean-TeX-markup-textit ()
225 |   (should (equal-including-properties
226 |            (parsebib-clean-TeX-markup "The verb \\textit{krijgen} as an undative verb.")
227 |            #("The verb krijgen as an undative verb." 9 16
228 |              (face italic))))
229 |   (should (equal
230 |            (let ((parsebib-TeX-cleanup-target 'markdown))
231 |              (parsebib-clean-TeX-markup "The verb \\textit{krijgen} as an undative verb."))
232 |            "The verb *krijgen* as an undative verb."))
233 |   (should (equal
234 |            (let ((parsebib-TeX-cleanup-target 'org))
235 |              (parsebib-clean-TeX-markup "The verb \\textit{krijgen} as an undative verb."))
236 |            "The verb /krijgen/ as an undative verb."))
237 |   (should (equal
238 |            (let ((parsebib-TeX-cleanup-target 'plain))
239 |              (parsebib-clean-TeX-markup "The verb \\textit{krijgen} as an undative verb."))
240 |            "The verb krijgen as an undative verb.")))
241 | 
242 | (ert-deftest parsebib-clean-TeX-markup-emph ()
243 |   (should (equal-including-properties
244 |            (parsebib-clean-TeX-markup "The verb \\emph{krijgen} as an undative verb.")
245 |            #("The verb krijgen as an undative verb." 9 16
246 |              (face italic))))
247 |   (should (equal
248 |            (let ((parsebib-TeX-cleanup-target 'markdown))
249 |              (parsebib-clean-TeX-markup "The verb \\emph{krijgen} as an undative verb."))
250 |            "The verb *krijgen* as an undative verb."))
251 |   (should (equal
252 |            (let ((parsebib-TeX-cleanup-target 'org))
253 |              (parsebib-clean-TeX-markup "The verb \\emph{krijgen} as an undative verb."))
254 |            "The verb /krijgen/ as an undative verb.")))
255 | 
256 | (ert-deftest parsebib-clean-TeX-markup-textbf ()
257 |   (should (equal-including-properties
258 |            (parsebib-clean-TeX-markup "The verb \\textbf{krijgen} as an undative verb.")
259 |            #("The verb krijgen as an undative verb." 9 16
260 |              (face bold))))
261 |   (should (equal
262 |            (let ((parsebib-TeX-cleanup-target 'markdown))
263 |              (parsebib-clean-TeX-markup "The verb \\textbf{krijgen} as an undative verb."))
264 |            "The verb **krijgen** as an undative verb."))
265 |   (should (equal
266 |            (let ((parsebib-TeX-cleanup-target 'org))
267 |              (parsebib-clean-TeX-markup "The verb \\textbf{krijgen} as an undative verb."))
268 |            "The verb *krijgen* as an undative verb.")))
269 | 
270 | (ert-deftest parsebib-clean-TeX-markup-textsc ()
271 |   (should (equal
272 |            (parsebib-clean-TeX-markup "The verb \\textsc{krijgen} as an undative verb.")
273 |            "The verb KRIJGEN as an undative verb.")))
274 | 
275 | (ert-deftest parsebib-clean-TeX-markup-nested-macros ()
276 |   (should (equal (parsebib-clean-TeX-markup "\\textit{\\foo{bar}}")
277 |                  #("bar" 0 3 (face italic))))
278 |   (should (equal (parsebib-clean-TeX-markup "\\textit{\\foo}}")
279 |                  #("\\foo" 0 4 (face italic)))))
280 | 
281 | (ert-deftest parsebib-clean-TeX-markup-nonascii-letters-with-braces ()
282 |   ;; The braces should be removed and the space after it retained.
283 |   (should (equal (parsebib-clean-TeX-markup "\\AA{} and") "\N{LATIN CAPITAL LETTER A WITH RING ABOVE} and"))
284 |   (should (equal (parsebib-clean-TeX-markup "\\AE{} and") "\N{LATIN CAPITAL LETTER AE} and"))
285 |   (should (equal (parsebib-clean-TeX-markup "\\DH{} and") "\N{LATIN CAPITAL LETTER ETH} and"))
286 |   (should (equal (parsebib-clean-TeX-markup "\\DJ{} and") "\N{LATIN CAPITAL LETTER ETH} and"))
287 |   (should (equal (parsebib-clean-TeX-markup "\\L{} and")  "\N{LATIN CAPITAL LETTER L WITH STROKE} and"))
288 |   (should (equal (parsebib-clean-TeX-markup "\\SS{} and") "\N{LATIN CAPITAL LETTER SHARP S} and"))
289 |   (should (equal (parsebib-clean-TeX-markup "\\NG{} and") "\N{LATIN CAPITAL LETTER ENG} and"))
290 |   (should (equal (parsebib-clean-TeX-markup "\\OE{} and") "\N{LATIN CAPITAL LIGATURE OE} and"))
291 |   (should (equal (parsebib-clean-TeX-markup "\\O{} and")  "\N{LATIN CAPITAL LETTER O WITH STROKE} and"))
292 |   (should (equal (parsebib-clean-TeX-markup "\\TH{} and") "\N{LATIN CAPITAL LETTER THORN} and"))
293 |   (should (equal (parsebib-clean-TeX-markup "\\aa{} and") "\N{LATIN SMALL LETTER A WITH RING ABOVE} and"))
294 |   (should (equal (parsebib-clean-TeX-markup "\\ae{} and") "\N{LATIN SMALL LETTER AE} and"))
295 |   (should (equal (parsebib-clean-TeX-markup "\\dh{} and") "\N{LATIN SMALL LETTER ETH} and"))
296 |   (should (equal (parsebib-clean-TeX-markup "\\dj{} and") "\N{LATIN SMALL LETTER ETH} and"))
297 |   (should (equal (parsebib-clean-TeX-markup "\\l{} and")  "\N{LATIN SMALL LETTER L WITH STROKE} and"))
298 |   (should (equal (parsebib-clean-TeX-markup "\\ss{} and") "\N{LATIN SMALL LETTER SHARP S} and"))
299 |   (should (equal (parsebib-clean-TeX-markup "\\ng{} and") "\N{LATIN SMALL LETTER ENG} and"))
300 |   (should (equal (parsebib-clean-TeX-markup "\\oe{} and") "\N{LATIN SMALL LIGATURE OE} and"))
301 |   (should (equal (parsebib-clean-TeX-markup "\\o{} and")  "\N{LATIN SMALL LETTER O WITH STROKE} and"))
302 |   (should (equal (parsebib-clean-TeX-markup "\\th{} and") "\N{LATIN SMALL LETTER THORN} and"))
303 |   (should (equal (parsebib-clean-TeX-markup "\\ij{} and") "ij and"))
304 |   (should (equal (parsebib-clean-TeX-markup "\\i{} and")  "\N{LATIN SMALL LETTER DOTLESS I} and"))
305 |   (should (equal (parsebib-clean-TeX-markup "\\j{} and")  "\N{LATIN SMALL LETTER DOTLESS J} and"))
306 |   ;; More than one space should work as well.
307 |   (should (equal (parsebib-clean-TeX-markup "\\AA{}  and")  "\N{LATIN CAPITAL LETTER A WITH RING ABOVE} and"))
308 |   (should (equal (parsebib-clean-TeX-markup "\\AA{}   and") "\N{LATIN CAPITAL LETTER A WITH RING ABOVE} and")))
309 | 
310 | (ert-deftest parsebib-clean-TeX-markup-nonascii-letters-without-braces ()
311 |   ;; The space should be removed.
312 |   (should (equal (parsebib-clean-TeX-markup "\\AA n") "\N{LATIN CAPITAL LETTER A WITH RING ABOVE}n"))
313 |   (should (equal (parsebib-clean-TeX-markup "\\AE n") "\N{LATIN CAPITAL LETTER AE}n"))
314 |   (should (equal (parsebib-clean-TeX-markup "\\DH n") "\N{LATIN CAPITAL LETTER ETH}n"))
315 |   (should (equal (parsebib-clean-TeX-markup "\\DJ n") "\N{LATIN CAPITAL LETTER ETH}n"))
316 |   (should (equal (parsebib-clean-TeX-markup "\\L n")  "\N{LATIN CAPITAL LETTER L WITH STROKE}n"))
317 |   (should (equal (parsebib-clean-TeX-markup "\\SS n") "\N{LATIN CAPITAL LETTER SHARP S}n"))
318 |   (should (equal (parsebib-clean-TeX-markup "\\NG n") "\N{LATIN CAPITAL LETTER ENG}n"))
319 |   (should (equal (parsebib-clean-TeX-markup "\\OE n") "\N{LATIN CAPITAL LIGATURE OE}n"))
320 |   (should (equal (parsebib-clean-TeX-markup "\\O n")  "\N{LATIN CAPITAL LETTER O WITH STROKE}n"))
321 |   (should (equal (parsebib-clean-TeX-markup "\\TH n") "\N{LATIN CAPITAL LETTER THORN}n"))
322 |   (should (equal (parsebib-clean-TeX-markup "\\aa n") "\N{LATIN SMALL LETTER A WITH RING ABOVE}n"))
323 |   (should (equal (parsebib-clean-TeX-markup "\\ae n") "\N{LATIN SMALL LETTER AE}n"))
324 |   (should (equal (parsebib-clean-TeX-markup "\\dh n") "\N{LATIN SMALL LETTER ETH}n"))
325 |   (should (equal (parsebib-clean-TeX-markup "\\dj n") "\N{LATIN SMALL LETTER ETH}n"))
326 |   (should (equal (parsebib-clean-TeX-markup "\\l n")  "\N{LATIN SMALL LETTER L WITH STROKE}n"))
327 |   (should (equal (parsebib-clean-TeX-markup "\\ss n") "\N{LATIN SMALL LETTER SHARP S}n"))
328 |   (should (equal (parsebib-clean-TeX-markup "\\ng n") "\N{LATIN SMALL LETTER ENG}n"))
329 |   (should (equal (parsebib-clean-TeX-markup "\\oe n") "\N{LATIN SMALL LIGATURE OE}n"))
330 |   (should (equal (parsebib-clean-TeX-markup "\\o n")  "\N{LATIN SMALL LETTER O WITH STROKE}n"))
331 |   (should (equal (parsebib-clean-TeX-markup "\\th n") "\N{LATIN SMALL LETTER THORN}n"))
332 |   (should (equal (parsebib-clean-TeX-markup "\\ij n") "ijn"))
333 |   (should (equal (parsebib-clean-TeX-markup "\\i n")  "\N{LATIN SMALL LETTER DOTLESS I}n"))
334 |   (should (equal (parsebib-clean-TeX-markup "\\j n")  "\N{LATIN SMALL LETTER DOTLESS J}n"))
335 |   ;; More than one space should work as well.
336 |   (should (equal (parsebib-clean-TeX-markup "\\AA  n")  "\N{LATIN CAPITAL LETTER A WITH RING ABOVE}n"))
337 |   (should (equal (parsebib-clean-TeX-markup "\\AA   n") "\N{LATIN CAPITAL LETTER A WITH RING ABOVE}n"))
338 |   ;; If there is no space, treat it as an unknown command.
339 |   (should (equal (parsebib-clean-TeX-markup "\\AAn")  "\\AAn")))
340 | 
341 | (ert-deftest parsebib-clean-TeX-markup-other-commands ()
342 |   ;; Do not change commands with no arguments.
343 |   (should (equal (parsebib-clean-TeX-markup "\\LaTeX and") "\\LaTeX and"))
344 |   ;; Commands with an empty set of braces should remain, the braces should be removed.
345 |   (should (equal (parsebib-clean-TeX-markup "\\LaTeX{} and") "\\LaTeX and"))
346 |   ;; Obligatory arguments should replace the command.
347 |   (should (equal (parsebib-clean-TeX-markup "\\foo{bar} and") "bar and"))
348 |   ;; Optional arguments should be removed, even empty ones.
349 |   (should (equal (parsebib-clean-TeX-markup "\\foo[]{bar} and") "bar and"))
350 |   (should (equal (parsebib-clean-TeX-markup "\\foo[bar]{baz} and") "baz and"))
351 |   (should (equal (parsebib-clean-TeX-markup "\\foo[bar][baz]{boo} and") "boo and"))
352 |   (should (equal (parsebib-clean-TeX-markup "\\foo[bar][baz]{} and") "\\foo and")))
353 | 
354 | (ert-deftest parsebib-clean-TeX-markup-braces ()
355 |   ;; Braces not part of a command should be removed.
356 |   (should (equal (parsebib-clean-TeX-markup "The {UN} should be all-caps.") "The UN should be all-caps.")))
357 | 
358 | ;;; Test for reading the .bib file for display.
359 | 
360 | ;; Test if @String abbreviations are expanded.
361 | (ert-deftest parsebib-test-parse-bib-buffer-@Strings ()
362 |   (should (equal
363 |            (with-temp-buffer
364 |              (insert "@String{MGrt = {Berlin: Mouton de Gruyter}}\n"
365 |                      "\n"
366 |                      "@book{Alexiadou:Haegeman:Stavrou2007,\n"
367 |                      "	year = {2007},\n"
368 |                      "	publisher = MGrt,\n"
369 |                      "	title = {Noun Phrase in the Generative Perspective},\n"
370 |                      "	author = {Alexiadou, Artemis and Haegeman, Liliane and Stavrou, Melita},\n"
371 |                      "	timestamp = {2013-09-25 12:00:00 (CET)},\n"
372 |                      "	file = {a/Alexiadou_Haegeman_Stavrou2007.pdf}}\n")
373 |              (let ((results (parsebib-parse-bib-buffer :expand-strings t)))
374 |                (alist-get "publisher" (gethash "Alexiadou:Haegeman:Stavrou2007" (car results))
375 |                           nil nil #'equal)))
376 |            "Berlin: Mouton de Gruyter")))
377 | 
378 | ;; Test if braces around the `file' field are removed.
379 | (ert-deftest parsebib-test-parse-bib-buffer-braces-in-file-field ()
380 |   (should (equal
381 |            (with-temp-buffer
382 |              (insert "@String{MGrt = {Berlin: Mouton de Gruyter}}\n"
383 |                      "\n"
384 |                      "@book{Alexiadou:Haegeman:Stavrou2007,\n"
385 |                      "	year = {2007},\n"
386 |                      "	publisher = MGrt,\n"
387 |                      "	title = {Noun Phrase in the Generative Perspective},\n"
388 |                      "	author = {Alexiadou, Artemis and Haegeman, Liliane and Stavrou, Melita},\n"
389 |                      "	timestamp = {2013-09-25 12:00:00 (CET)},\n"
390 |                      "	file = {a/Alexiadou_Haegeman_Stavrou2007.pdf}}\n")
391 |              (let ((results (parsebib-parse-bib-buffer :expand-strings t)))
392 |                (alist-get "file" (gethash "Alexiadou:Haegeman:Stavrou2007" (car results))
393 |                           nil nil #'equal)))
394 |            "a/Alexiadou_Haegeman_Stavrou2007.pdf")))
395 | 
396 | ;; Test if TeX markup is handled.
397 | (ert-deftest parsebib-test-parse-bib-buffer-TeX-markup ()
398 |   (should (equal
399 |            (with-temp-buffer
400 |              (insert "@Article{Broekhuis:Cornips2012,\n"
401 |                      "	doi = {10.1515/ling-2012-0039},\n"
402 |                      "	file = {b/Broekhuis_Cornips2012.pdf},\n"
403 |                      "	pages = {1205-1249},\n"
404 |                      "	volume = {50},\n"
405 |                      "	number = {6},\n"
406 |                      "	date = {2012},\n"
407 |                      "	journaltitle = {Linguistics},\n"
408 |                      "	title = {The Verb \\textit{krijgen} `to get' as an Undative Verb},\n"
409 |                      "	author = {Broekhuis, Hans and Cornips, Leonie},\n"
410 |                      "	timestamp = {2019-01-16 23:38:31 (CET)}}\n")
411 |              (let ((results (parsebib-parse-bib-buffer :replace-TeX t)))
412 |                (alist-get "title" (gethash "Broekhuis:Cornips2012" (car results))
413 |                           nil nil #'equal)))
414 |            #("The Verb krijgen ‘to get’ as an Undative Verb" 9 16 (face italic)))))
415 | 
416 | ;; Test if white space is collapsed.
417 | (ert-deftest parsebib-test-parse-bib-buffer-collapse-whitespace ()
418 |   (should (equal
419 |            (with-temp-buffer
420 |              (insert "@String{MGrt = {Berlin: Mouton de Gruyter}}\n"
421 |                      "\n"
422 |                      "@book{Alexiadou:Haegeman:Stavrou2007,\n"
423 |                      "	year = {2007},\n"
424 |                      "	publisher = MGrt,\n"
425 |                      "	title = {Noun Phrase  in the \n  Generative Perspective},\n"
426 |                      "	author = {Alexiadou, Artemis and Haegeman, Liliane and Stavrou, Melita},\n"
427 |                      "	timestamp = {2013-09-25 12:00:00 (CET)},\n"
428 |                      "	file = {a/Alexiadou_Haegeman_Stavrou2007.pdf}}\n")
429 |              (let ((results (parsebib-parse-bib-buffer :expand-strings t)))
430 |                (alist-get "title" (gethash "Alexiadou:Haegeman:Stavrou2007" (car results))
431 |                           nil nil #'equal)))
432 |            "Noun Phrase in the Generative Perspective")))
433 | 
434 | ;; Test if sequences of spaces in file names are retained, even if @strings are expanded.
435 | (ert-deftest parsebib-test-parse-bib-buffer-dont-collapse-whitespace-in-file-field ()
436 |   (should (equal
437 |            (with-temp-buffer
438 |              (insert
439 |               "@inproceedings{ahnIdentifyingCPUBottlenecks,\n"
440 |               "  title = {Identifying {{On-}}/{{Off-CPU Bottlenecks Together}} with {{Blocked Samples}} {\textbar} {{USENIX}}},\n"
441 |               "  shorttitle = {{{BCOZ}}},\n"
442 |               "  author = {Ahn, Minwoo and Han, Jeongmin and Kwon, Youngjin and Jeong, Jinkyu},\n"
443 |               "  urldate = {2024-10-09},\n"
444 |               "  langid = {english},\n"
445 |               "  file = {/Users/xxxx/Zotero/storage/ZJVUZD8F/Ahn et al. - Identifying On-Off-CPU Bottlenecks Together with Blocked Samples  USENIX.pdf}}\n")
446 |              (let ((results (parsebib-parse-bib-buffer :expand-strings t)))
447 |                (alist-get "file" (gethash "ahnIdentifyingCPUBottlenecks" (car results))
448 |                           nil nil #'equal)))
449 |            "/Users/xxxx/Zotero/storage/ZJVUZD8F/Ahn et al. - Identifying On-Off-CPU Bottlenecks Together with Blocked Samples  USENIX.pdf")))
450 | 
451 | ;;; Test inheritance
452 | (ert-deftest parsebib-test-parse-bib-buffer-biblatex-inheritance ()
453 |   (should (equal
454 |            (with-temp-buffer
455 |              (insert
456 |               "@InCollection{Ackema:Neeleman2002,\n"
457 | 	      "pages = {219-256},\n"
458 | 	      "title = {Effects of Short-Term Storage in Processing Rightward Movement},\n"
459 | 	      "crossref = {Nooteboom:Weerman:Wijnen},\n"
460 | 	      "author = {Ackema, Peter and Neeleman, Ad}}\n"
461 |               "\n"
462 |               "@Collection{Nooteboom:Weerman:Wijnen,\n"
463 | 	      "  year = {2002},\n"
464 | 	      "  publisher = Klw,\n"
465 | 	      "  title = {Storage and Computation in the Language Faculty},\n"
466 | 	      "  editor = {Nooteboom, Sieb and Weerman, Fred and Wijnen, Frank}}\n")
467 |              (let* ((results (parsebib-parse-bib-buffer :inheritance 'biblatex)))
468 |                (alist-get "booktitle" (gethash "Ackema:Neeleman2002" (car results))
469 |                           nil nil #'equal)))
470 |            "{Storage and Computation in the Language Faculty}")))
471 | 
472 | ;;; Test the RDP
473 | (ert-deftest parsebib-test-@comment ()
474 |   (should (equal
475 |            (with-temp-buffer
476 |              (insert "@Comment{ -*-coding: utf-8 -*- }\n")
477 |              (goto-char (point-min))
478 |              (parsebib--@comment))
479 |            "{ -*-coding: utf-8 -*- }"))
480 |   (should (equal
481 |            (with-temp-buffer
482 |              (insert "@Comment -*-coding: utf-8 -*-\n")
483 |              (goto-char (point-min))
484 |              (parsebib--@comment))
485 |            "-*-coding: utf-8 -*-"))
486 |   (should (equal
487 |            (with-temp-buffer
488 |              (insert "@Comment{\n"
489 |                      "    Local Variables:\n"
490 |                      "    bibtex-dialect: biblatex\n"
491 |                      "    End:\n"
492 |                      "}\n")
493 |              (goto-char (point-min))
494 |              (parsebib--@comment))
495 |            (concat "{\n"
496 |                    "    Local Variables:\n"
497 |                    "    bibtex-dialect: biblatex\n"
498 |                    "    End:\n"
499 |                    "}"))))
500 | 
501 | (ert-deftest parsebib-test-@string ()
502 |   ;; @String definition with curly braces.
503 |   (should (equal
504 |            (with-temp-buffer
505 |              (insert "@String{MGrt = {Berlin: Mouton de Gruyter}}")
506 |              (goto-char (point-min))
507 |              (parsebib--@string))
508 |            (list "MGrt" "{Berlin: Mouton de Gruyter}")))
509 |   ;; @String definition with double quotes.
510 |   (should (equal
511 |            (with-temp-buffer
512 |              (insert "@String{LI = \"Linguistic Inquiry\"}")
513 |              (goto-char (point-min))
514 |              (parsebib--@string))
515 |            (list "LI" "\"Linguistic Inquiry\"")))
516 |   ;; @String definition with @String abbrev.
517 |   (should (equal
518 |            (with-temp-buffer
519 |              (insert "@String{CUP = {Cambridge: Cambridge } # UP}")
520 |              (goto-char (point-min))
521 |              (parsebib--@string))
522 |            (list "CUP" "{Cambridge: Cambridge }" "UP")))
523 |   ;; @String definition missing closing curly brace.
524 |   (should-error (with-temp-buffer
525 |                   (insert "@String{CUP = {Cambridge: Cambridge } # UP\n")
526 |                   (goto-char (point-min))
527 |                   (parsebib--@string))
528 |                 :type 'parsebib-error)
529 |   ;; @String definition with mismatched brace/parenthesis.
530 |   (should-error (with-temp-buffer
531 |                   (insert "@String{CUP = {Cambridge: Cambridge } # UP)\n")
532 |                   (goto-char (point-min))
533 |                   (parsebib--@string))
534 |                 :type 'parsebib-error)
535 |   ;; @String abbreviation without expansion.
536 |   (should (equal (with-temp-buffer
537 |                    (insert "@Article{Potapov_2016aa,\n"
538 |                            "    author = {Potapov, Denis and Sukochev, Fedor and Zanin, Dmitriy},\n"
539 |                            "    month = dec,\n"
540 |                            "    title = {{Krein's trace theorem revisited}},\n"
541 |                            "    url = {http://arxiv.org/abs/1701.00697v1},\n"
542 |                            "    year = {2016}\n"
543 |                            "}\n")
544 |                    (goto-char (point-min))
545 |                    (let ((results (parsebib-read-entry nil #s(hash-table size 10 data nil test equal))))
546 |                      (alist-get "month" results nil nil #'equal)))
547 |                  "dec")))
548 | 
549 | (ert-deftest parsebib-test-read-entry-nested-braces ()
550 |   (should (equal
551 |            (with-temp-buffer
552 |              (insert "@article{10.1162/coli_a_00528,\n"
553 |                      "    title = {Usage-based {Grammar Induction} from {Minimal Cognitive Principles}}\n"
554 |                      "}\n")
555 |              (goto-char (point-min))
556 |              (let ((results (parsebib-read-entry)))
557 |                (alist-get "title" results nil nil #'equal)))
558 |            "{Usage-based {Grammar Induction} from {Minimal Cognitive Principles}}")))
559 | 
560 | (ert-deftest parsebib-test-read-entry-after-last-field ()
561 |   ;; The last field in an entry does not have to have a comma after it:
562 |   (should (equal
563 |            (with-temp-buffer
564 |              (insert "@article{10.1162/coli_a_00528,\n"
565 |                      "    title = {Usage-based Grammar Induction from Minimal Cognitive Principles}\n"
566 |                      "}\n")
567 |              (goto-char (point-min))
568 |              (let ((results (parsebib-read-entry)))
569 |                (cons (alist-get "=key=" results nil nil #'equal)
570 |                      (alist-get "title" results nil nil #'equal))))
571 |            (cons "10.1162/coli_a_00528"
572 |                  "{Usage-based Grammar Induction from Minimal Cognitive Principles}")))
573 |   ;; But there *may* be a comma after the last field:
574 |   (should (equal
575 |            (with-temp-buffer
576 |              (insert "@article{10.1162/coli_a_00528,\n"
577 |                      "    title = {Usage-based Grammar Induction from Minimal Cognitive Principles},\n"
578 |                      "}\n")
579 |              (goto-char (point-min))
580 |              (let ((results (parsebib-read-entry)))
581 |                (cons (alist-get "=key=" results nil nil #'equal)
582 |                      (alist-get "title" results nil nil #'equal))))
583 |            (cons "10.1162/coli_a_00528"
584 |                  "{Usage-based Grammar Induction from Minimal Cognitive Principles}"))))
585 | 
586 | ;; Test braces and parentheses around an entry:
587 | (ert-deftest parsebib-test-read-entry-parentheses ()
588 |   ;; Braces.
589 |   (should (equal
590 |            (with-temp-buffer
591 |              (insert "@book{Alexiadou:Haegeman:Stavrou2007,\n"
592 |                      "	year = {2007},\n"
593 |                      "	publisher = MGrt,\n"
594 |                      "	title = {Noun Phrase in the Generative Perspective},\n"
595 |                      "	author = {Alexiadou, Artemis and Haegeman, Liliane and Stavrou, Melita},\n"
596 |                      "	timestamp = {2013-09-25 12:00:00 (CET)},\n"
597 |                      "	file = {a/Alexiadou_Haegeman_Stavrou2007.pdf}}\n")
598 |              (goto-char (point-min))
599 |              (let ((results (parsebib-read-entry)))
600 |                (alist-get "=key=" results nil nil #'equal)))
601 |            "Alexiadou:Haegeman:Stavrou2007"))
602 |   ;; Parentheses.
603 |   (should (equal
604 |            (with-temp-buffer
605 |              (insert "@book(Alexiadou:Haegeman:Stavrou2007,\n"
606 |                      "	year = {2007},\n"
607 |                      "	publisher = MGrt,\n"
608 |                      "	title = {Noun Phrase in the Generative Perspective},\n"
609 |                      "	author = {Alexiadou, Artemis and Haegeman, Liliane and Stavrou, Melita},\n"
610 |                      "	timestamp = {2013-09-25 12:00:00 (CET)},\n"
611 |                      "	file = {a/Alexiadou_Haegeman_Stavrou2007.pdf})\n")
612 |              (goto-char (point-min))
613 |              (let ((results (parsebib-read-entry)))
614 |                (alist-get "=key=" results nil nil #'equal)))
615 |            "Alexiadou:Haegeman:Stavrou2007"))
616 |   ;; Mismatched should error.
617 |   (should-error (with-temp-buffer
618 |                   (insert "@book(Alexiadou:Haegeman:Stavrou2007,\n"
619 |                           "	year = {2007},\n"
620 |                           "	publisher = MGrt,\n"
621 |                           "	title = {Noun Phrase in the Generative Perspective},\n"
622 |                           "	author = {Alexiadou, Artemis and Haegeman, Liliane and Stavrou, Melita},\n"
623 |                           "	timestamp = {2013-09-25 12:00:00 (CET)},\n"
624 |                           "	file = {a/Alexiadou_Haegeman_Stavrou2007.pdf}}\n")
625 |                   (goto-char (point-min))
626 |                   (let ((results (parsebib-read-entry)))
627 |                     (alist-get "=key=" results nil nil #'equal)))
628 |                 :type 'parsebib-error))
629 | 
630 | ;; Test unbalanced parentheses in field values.
631 | (ert-deftest parsebib-test-unbalanced-parentheses-in-fields ()
632 |   ;; (ASCII) LEFT PARENTHESIS
633 |   (should (equal (with-temp-buffer
634 |                    (insert
635 |                     "@article{Title,\n"
636 |                     "    title = {{Title}},\n"
637 |                     "    author = {Author},\n"
638 |                     "    year = {1970},\n"
639 |                     "    journal = {Journal},\n"
640 |                     "    abstract = {(}\n"
641 |                     "}\n")
642 |                    (goto-char (point-min))
643 |                    (let ((results (parsebib-read-entry)))
644 |                      (alist-get "abstract" results nil nil #'equal)))
645 |                  "{(}"))
646 |   ;; (ASCII) RIGHT PARENTHESIS
647 |   (should (equal (with-temp-buffer
648 |                    (insert
649 |                     "@article{Title,\n"
650 |                     "    title = {{Title}},\n"
651 |                     "    author = {Author},\n"
652 |                     "    year = {1970},\n"
653 |                     "    journal = {Journal},\n"
654 |                     "    abstract = {)}\n"
655 |                     "}\n")
656 |                    (goto-char (point-min))
657 |                    (let ((results (parsebib-read-entry)))
658 |                      (alist-get "abstract" results nil nil #'equal)))
659 |                  "{)}"))
660 |   ;; (ASCII) LEFT and RIGHT CURLY BRACKET with double quotes as delimiters.
661 |   (should (equal (with-temp-buffer
662 |                    (insert
663 |                     "@article{Title,\n"
664 |                     "    title = {{Title}},\n"
665 |                     "    author = {Author},\n"
666 |                     "    year = {1970},\n"
667 |                     "    journal = \"Journal{\",\n"
668 |                     "    abstract = \"Abstract}\"\n"
669 |                     "}\n")
670 |                    (goto-char (point-min))
671 |                    (let ((results (parsebib-read-entry)))
672 |                      (cons (alist-get "journal" results nil nil #'equal)
673 |                            (alist-get "abstract" results nil nil #'equal))))
674 |                  (cons "\"Journal{\"" "\"Abstract}\"")))
675 |   ;; (CJK) FULLWIDTH LEFT PARENTHESIS
676 |   (should (equal (with-temp-buffer
677 |                    (insert
678 |                     "@article{Title,\n"
679 |                     "    title = {{Title}},\n"
680 |                     "    author = {Author},\n"
681 |                     "    year = {1970},\n"
682 |                     "    journal = {Journal},\n"
683 |                     "    abstract = {（}\n"
684 |                     "}\n")
685 |                    (goto-char (point-min))
686 |                    (let ((results (parsebib-read-entry)))
687 |                      (alist-get "abstract" results nil nil #'equal)))
688 |                  "{（}"))
689 |   ;; (CJK) FULLWIDTH LEFT PARENTHESIS
690 |   (should (equal (with-temp-buffer
691 |                    (insert
692 |                     "@article{Title,\n"
693 |                     "    title = {{Title}},\n"
694 |                     "    author = {Author},\n"
695 |                     "    year = {1970},\n"
696 |                     "    journal = {Journal},\n"
697 |                     "    abstract = {）}\n"
698 |                     "}\n")
699 |                    (goto-char (point-min))
700 |                    (let ((results (parsebib-read-entry)))
701 |                      (alist-get "abstract" results nil nil #'equal)))
702 |                  "{）}"))
703 |   ;; (CJK) FULLWIDTH LEFT and RIGHT PARENTHESIS with double quotes as delimiters.
704 |   (should (equal (with-temp-buffer
705 |                    (insert
706 |                     "@article{Title,\n"
707 |                     "    title = {{Title}},\n"
708 |                     "    author = {Author},\n"
709 |                     "    year = {1970},\n"
710 |                     "    journal = \"Journal（\",\n"
711 |                     "    abstract = \"Abstract）\"\n"
712 |                     "}\n")
713 |                    (goto-char (point-min))
714 |                    (let ((results (parsebib-read-entry)))
715 |                      (cons (alist-get "journal" results nil nil #'equal)
716 |                            (alist-get "abstract" results nil nil #'equal))))
717 |                  (cons "\"Journal（\"" "\"Abstract）\""))))
718 | 
719 | (ert-deftest parsebib-test-@-in-key ()
720 |   (should (equal (with-temp-buffer
721 |                    (insert
722 |                     "@misc{[@mjnanakar]_2024,\n"
723 |                     "    type = {{Tweet}},\n"
724 |                     "    year = {2024},\n"
725 |                     "    month = dec,\n"
726 |                     "    journal = {Twitter},\n"
727 |                     "    urldate = {2024-12-25},\n"
728 |                     "    langid = {persian},\n"
729 |                     "    keywords = {/unread}\n"
730 |                     "}\n")
731 |                    (goto-char (point-min))
732 |                    (let ((results (parsebib-read-entry)))
733 |                      (alist-get "=key=" results nil nil #'equal)))
734 |                  "[@mjnanakar]_2024")))
735 | 
736 | 
737 | ;;; parsebib-test.el ends here
738 | 


--------------------------------------------------------------------------------
/test/test-helper.el:
--------------------------------------------------------------------------------
1 | ;;; test-helper.el --- Helpers for parsebib-test.el
2 | 
3 | ;;; test-helper.el ends here
4 | 


--------------------------------------------------------------------------------