├── version.js ├── index.d.ts ├── .babelrc ├── .travis.yml ├── Tame the BeaST.pdf ├── .npmignore ├── src ├── lexer │ ├── BibBlockTypes.ts │ ├── NumericToken.ts │ ├── IdToken.ts │ ├── WhitespaceToken.ts │ ├── Token.ts │ ├── Lexer.ts │ └── deprecated_lexer_.js ├── bibfile │ ├── datatype │ │ ├── string │ │ │ ├── StringRef.ts │ │ │ ├── BibStringData.ts │ │ │ ├── QuotedString.ts │ │ │ ├── BracedString.ts │ │ │ ├── BibStringComponent.ts │ │ │ └── bib-string-utils.ts │ │ └── KeyVal.ts │ ├── bib-entry │ │ ├── BibPreamble.ts │ │ ├── BibComment.ts │ │ ├── bibliographic-entity │ │ │ ├── Authors.ts │ │ │ ├── Author.ts │ │ │ └── mandatory-and-optional-fields.ts │ │ ├── BibEntry.ts │ │ └── BibStringEntry.ts │ └── BibFile.ts ├── index.ts ├── util.ts ├── nearley.d.ts └── parser │ ├── parser.ne │ └── ts-parser.ts ├── .gitignore ├── tsconfig.json ├── tslint.json ├── LICENSE ├── webpack.config.js ├── package.json ├── README.md └── test └── test.ts /version.js: -------------------------------------------------------------------------------- 1 | exports.default = "0.2.0"; -------------------------------------------------------------------------------- /index.d.ts: -------------------------------------------------------------------------------- 1 | export * from "./ts-compiled/index"; -------------------------------------------------------------------------------- /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | "babel-preset-es2015" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | sudo: false 3 | node_js: 4 | - stable 5 | - 6 -------------------------------------------------------------------------------- /Tame the BeaST.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cacfd3a/bibtex-js/HEAD/Tame the BeaST.pdf -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | *.log 3 | *.tgz 4 | 5 | src/ 6 | test/ 7 | gulpfile.js 8 | testrun.ts 9 | webpack.config.js 10 | /tsconfig.json 11 | /tslint.json 12 | /typings.json 13 | typings 14 | dist/test 15 | *.pdf 16 | /try.js -------------------------------------------------------------------------------- /src/lexer/BibBlockTypes.ts: -------------------------------------------------------------------------------- 1 | export const bibTypes = { 2 | string: "@string", 3 | preamble: "@preamble", 4 | comment: "@comment", 5 | bib: "@bib" 6 | }; 7 | 8 | export type BibType = keyof typeof bibTypes; 9 | 10 | export const isBibType = function (c: string): c is BibType { 11 | return bibTypes.hasOwnProperty(c); 12 | }; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | 6 | # Compiled binary addons (http://nodejs.org/api/addons.html) 7 | build/Release 8 | /dist/ 9 | 10 | # Dependency directories 11 | node_modules 12 | jspm_packages 13 | 14 | # Optional npm cache directory 15 | .npm 16 | /.idea/ 17 | /lib/ 18 | 19 | # try.js 20 | # /*.js 21 | #/src/**/*.js 22 | #/test/**/*.js 23 | /ts-compiled/ 24 | 25 | /index.js 26 | /index.js.map -------------------------------------------------------------------------------- /src/bibfile/datatype/string/StringRef.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * A named reference to a string, eg. `{string1} # stringRef # {string2}` 3 | */ 4 | export class StringRef { 5 | readonly stringref: string; 6 | readonly braceDepth: number; 7 | 8 | constructor(braceDepth: number, stringref: string) { 9 | this.braceDepth = braceDepth; 10 | this.stringref = stringref; 11 | } 12 | } 13 | 14 | export function isStringRef(stringref: any): stringref is StringRef { 15 | return typeof stringref.stringref === "string"; 16 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compileOnSave": false, 3 | "compilerOptions": { 4 | "declaration": true, 5 | "jsx": "react", 6 | "module": "commonjs", 7 | "noImplicitAny": false, 8 | "preserveConstEnums": true, 9 | "removeComments": true, 10 | "sourceMap": true, 11 | "lib": ["es2015"], 12 | "target": "es5", 13 | "strictNullChecks": true, 14 | "allowJs": false, 15 | "outDir": "ts-compiled" 16 | }, 17 | "include": [ 18 | "src/**/*.ts" 19 | ], 20 | "exclude": [ 21 | "node_modules" 22 | ] 23 | } -------------------------------------------------------------------------------- /src/bibfile/datatype/string/BibStringData.ts: -------------------------------------------------------------------------------- 1 | import {StringRef} from "./StringRef"; 2 | import {BracedString, OuterBracedString} from "./BracedString"; 3 | import {OuterQuotedString, QuotedString} from "./QuotedString"; 4 | 5 | /** 6 | * A piece or whole of a string in BiBTeX 7 | */ 8 | export type BibStringDatum = ( 9 | BracedString 10 | | QuotedString 11 | | OuterQuotedString 12 | | OuterBracedString 13 | | string 14 | | number 15 | | StringRef 16 | ); 17 | 18 | 19 | export type BibStringData = BibStringDatum[]; 20 | 21 | -------------------------------------------------------------------------------- /src/lexer/NumericToken.ts: -------------------------------------------------------------------------------- 1 | // export const NUMBER = "number"; 2 | 3 | import {TypedToken} from "./Token"; 4 | 5 | export function newNumber(string: string): NumberToken { 6 | return { 7 | type: "number", 8 | string 9 | }; 10 | } 11 | 12 | export interface NumberToken extends TypedToken { 13 | type: "number"; 14 | } 15 | 16 | export const numericChars = { 17 | "0": true, 18 | "1": true, 19 | "2": true, 20 | "3": true, 21 | "4": true, 22 | "5": true, 23 | "6": true, 24 | "7": true, 25 | "8": true, 26 | "9": true 27 | }; 28 | 29 | export type NumericChar = keyof typeof numericChars; 30 | 31 | export function isNum(c: string): c is NumericChar { 32 | return numericChars.hasOwnProperty(c); 33 | } -------------------------------------------------------------------------------- /src/lexer/IdToken.ts: -------------------------------------------------------------------------------- 1 | import {TypedToken, SpecialChar, isSpecialChar} from "./Token"; 2 | 3 | import {SingleWhitespace, isSingleWhiteSpaceCharacter} from "./WhitespaceToken"; 4 | import {isNum, NumericChar} from "./NumericToken"; 5 | 6 | export interface IdToken extends TypedToken { 7 | type: "id"; 8 | string: string; 9 | } 10 | 11 | export function newIdToken(string: string): IdToken { 12 | return { 13 | type: "id", 14 | string 15 | }; 16 | } 17 | 18 | export function isIdToken(string: any): string is IdToken { 19 | return string.type === "id" && typeof string.string === "string"; 20 | } 21 | 22 | export function isIdChar(c: string): c is IdChar { 23 | return !(isSpecialChar(c) || isNum(c) || isSingleWhiteSpaceCharacter(c)); 24 | } 25 | 26 | export type IdChar = SpecialChar | NumericChar | SingleWhitespace; 27 | -------------------------------------------------------------------------------- /src/bibfile/datatype/string/QuotedString.ts: -------------------------------------------------------------------------------- 1 | import { 2 | BibOuterStringComponent, 3 | BibStringComponent 4 | } from "./BibStringComponent"; 5 | 6 | import {BibStringData} from "./BibStringData"; 7 | 8 | /** 9 | * thisObject = "A string between quotes" 10 | */ 11 | export class QuotedString extends BibStringComponent { 12 | constructor(braceDepth: number, data: BibStringData) { 13 | super("quotedstring", braceDepth, data); 14 | } 15 | } 16 | 17 | export class OuterQuotedString extends BibOuterStringComponent { 18 | constructor(data: BibStringData) { 19 | super("quotedstringwrapper", data); 20 | } 21 | } 22 | 23 | export function isOuterQuotedString(x: any): x is OuterQuotedString { 24 | return x.type === "quotedstringwrapper"; 25 | } 26 | 27 | export function isQuotedString(x: any): x is QuotedString { 28 | return x.type === "quotedstring"; 29 | } -------------------------------------------------------------------------------- /src/lexer/WhitespaceToken.ts: -------------------------------------------------------------------------------- 1 | import {TypedToken} from "./Token"; 2 | 3 | export const WS = "ws"; 4 | 5 | export function newWhitespace(string: string): WhitespaceToken { 6 | return { 7 | type: "ws", 8 | string 9 | }; 10 | } 11 | 12 | //noinspection JSUnusedGlobalSymbols 13 | export function isWhitespace(token: any): token is WhitespaceToken { 14 | return typeof token.string === "string" && token.type === WS; 15 | } 16 | 17 | 18 | export interface WhitespaceToken extends TypedToken { 19 | type: "ws"; 20 | } 21 | 22 | export const singleWhitespaces = { 23 | " ": true, 24 | "\t": true, 25 | "\r": true, 26 | "\n": true 27 | }; 28 | 29 | export type SingleWhitespace = keyof typeof singleWhitespaces; 30 | 31 | export function isSingleWhiteSpaceCharacter(c: string): c is SingleWhitespace { 32 | return singleWhitespaces.hasOwnProperty(c); 33 | } 34 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./bibfile/bib-entry/bibliographic-entity/Author"; 2 | export * from "./bibfile/bib-entry/bibliographic-entity/Authors"; 3 | export * from "./bibfile/bib-entry/bibliographic-entity/mandatory-and-optional-fields"; 4 | export * from "./bibfile/bib-entry/BibComment"; 5 | export * from "./bibfile/bib-entry/BibEntry"; 6 | export * from "./bibfile/bib-entry/BibPreamble"; 7 | export * from "./bibfile/bib-entry/BibStringEntry"; 8 | export * from "./bibfile/datatype/string/bib-string-utils"; 9 | export * from "./bibfile/datatype/string/BibStringComponent"; 10 | export * from "./bibfile/datatype/string/BibStringData"; 11 | export * from "./bibfile/datatype/string/BracedString"; 12 | export * from "./bibfile/datatype/string/QuotedString"; 13 | export * from "./bibfile/datatype/string/StringRef"; 14 | export * from "./bibfile/datatype/KeyVal"; 15 | export * from "./bibfile/BibFile"; 16 | export * from "./util"; 17 | -------------------------------------------------------------------------------- /tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "rules": { 3 | "class-name": true, 4 | "comment-format": [true, 5 | "check-space" 6 | ], 7 | "indent": [true, 8 | "spaces" 9 | ], 10 | "one-line": [true, 11 | "check-open-brace", 12 | "check-whitespace" 13 | ], 14 | "no-var-keyword": true, 15 | "quotemark": [true, 16 | "double", 17 | "avoid-escape" 18 | ], 19 | "semicolon": true, 20 | "whitespace": [false, 21 | "check-branch", 22 | "check-decl", 23 | "check-operator", 24 | "check-module", 25 | "check-separator", 26 | "check-type" 27 | ], 28 | "typedef-whitespace": [true, { 29 | "call-signature": "nospace", 30 | "index-signature": "nospace", 31 | "parameter": "nospace", 32 | "property-declaration": "nospace", 33 | "variable-declaration": "nospace" 34 | }], 35 | "no-internal-module": true, 36 | "no-trailing-whitespace": true, 37 | "no-inferrable-types": true, 38 | "no-null-keyword": true, 39 | "prefer-const": true 40 | } 41 | } -------------------------------------------------------------------------------- /src/lexer/Token.ts: -------------------------------------------------------------------------------- 1 | export type Token = TypedToken | string | number; 2 | 3 | export interface TypedToken { 4 | type: string; 5 | string?: string; 6 | } 7 | 8 | export function newToken(type: string, string: string): TypedToken { 9 | return { 10 | type, 11 | string 12 | }; 13 | } 14 | 15 | 16 | export const specialChars = { 17 | "@": true, 18 | "(": true, 19 | ")": true, 20 | "{": true, 21 | "}": true, 22 | "#": true, 23 | "=": true, 24 | ",": true, 25 | "\\": true, 26 | "\"": true, 27 | }; 28 | 29 | export type SpecialChar = keyof typeof specialChars; 30 | 31 | export function isSpecialChar(c: string): c is SpecialChar { 32 | return specialChars.hasOwnProperty(c); 33 | } 34 | 35 | 36 | export const escapableChars = { 37 | "\\": true, 38 | "@": true, 39 | "{": true, 40 | "}": true 41 | }; 42 | 43 | export type EscapableChar = keyof typeof escapableChars; 44 | 45 | export function isEscapableChar(c: string): c is EscapableChar { 46 | return escapableChars.hasOwnProperty(c); 47 | } 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Maarten Trompper 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/bibfile/bib-entry/BibPreamble.ts: -------------------------------------------------------------------------------- 1 | import {mustBeArray} from "../../util"; 2 | import {parseBibEntriesAndNonEntries} from "../BibFile"; 3 | 4 | export class Preamble { 5 | readonly type: string; 6 | readonly data: any[]; 7 | readonly string: string; 8 | 9 | // TODO 10 | constructor(data: any[]) { 11 | this.type = ("preamble"); 12 | this.data = data; 13 | this.string = data.join(""); 14 | } 15 | 16 | toString() { 17 | return this.string; 18 | } 19 | } 20 | 21 | export function isPreamble(x: any): x is Preamble { 22 | return x.type === "preamble" && !!x.data; 23 | } 24 | 25 | 26 | // function parsePreambleContents(data: any) { 27 | // if (isString(data)) return data; 28 | // if (isString(data.type) && data.type === "@bib") 29 | // return "@" + data.string; 30 | // // if (isString(data.type) && data.type === "NON_ENTRY") 31 | // // return ; 32 | // if (isString(data.string)) return data.string; 33 | // return data; 34 | // } 35 | 36 | export function newPreambleNode(data: any): Preamble { 37 | const flattened = parseBibEntriesAndNonEntries(mustBeArray(data.data)); 38 | return new Preamble(flattened); 39 | } -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const webpack = require('webpack'); 2 | // const path = require('path'); 3 | 4 | const plugins = [ 5 | new webpack.LoaderOptionsPlugin({ 6 | options: { 7 | tslint: { 8 | emitErrors: true, 9 | failOnHint: true 10 | } 11 | } 12 | }) 13 | ]; 14 | 15 | // const libraryName = "bibtex-parser"; 16 | // console.log(libraryName + /*"." + VERSION + */".min.js"); 17 | 18 | const config = { 19 | entry: { 20 | umd: __dirname + '/src/index.ts' 21 | }, 22 | devtool: 'source-map', 23 | output: { 24 | filename: "index.js", 25 | path: __dirname + '/', 26 | libraryTarget: 'umd', 27 | }, 28 | module: { 29 | rules: [ 30 | { 31 | enforce: 'pre', 32 | test: /\.tsx?$/, 33 | loader: 'tslint-loader', 34 | exclude: /node_modules/ 35 | }, 36 | { 37 | test: /\.tsx?$/, 38 | loader: "awesome-typescript-loader", 39 | options: { 40 | configFileName: "tsconfig.json", 41 | useBabel: false 42 | }, 43 | exclude: /node_modules/ 44 | } 45 | ] 46 | }, 47 | resolve: { 48 | extensions: ['.js', '.ts', '.jsx', '.tsx'] 49 | }, 50 | plugins: plugins 51 | }; 52 | 53 | module.exports = config; -------------------------------------------------------------------------------- /src/bibfile/datatype/string/BracedString.ts: -------------------------------------------------------------------------------- 1 | 2 | import {BibOuterStringComponent, BibStringComponent} from "./BibStringComponent"; 3 | import {BibStringData} from "./BibStringData"; 4 | 5 | /** 6 | * thisObject = {A string between braces} 7 | */ 8 | export class BracedString extends BibStringComponent { 9 | 10 | /** 11 | * A special character is a 12 | * part of a field starting with a left brace being at brace depth 0 immediately followed with a backslash, 13 | * and ending with the corresponding right brace. 14 | * It should be noticed that anything in a special character is 15 | * considered as being at brace depth 0, even if it is placed between another pair of braces. 16 | */ 17 | readonly isSpecialCharacter: boolean; 18 | 19 | constructor(braceDepth: number, data: BibStringData) { 20 | super("bracedstring", braceDepth, data); 21 | 22 | // TODO braced strings inside a special character is treated as if it has brace depth 0. Maybe it's a good idea to mark these nested braces? 23 | this.isSpecialCharacter = braceDepth === 0 && data[0] === "\\"; 24 | } 25 | 26 | } 27 | 28 | export class OuterBracedString extends BibOuterStringComponent { 29 | constructor(data: BibStringData) { 30 | super("bracedstringwrapper", data); 31 | } 32 | } 33 | 34 | export function isOuterBracedString(x: any): x is OuterBracedString { 35 | return x.type === "bracedstringwrapper"; 36 | } 37 | 38 | export function isBracedString(x: any): x is BracedString { 39 | return x.type === "bracedstring"; 40 | } -------------------------------------------------------------------------------- /src/bibfile/bib-entry/BibComment.ts: -------------------------------------------------------------------------------- 1 | import {flattenMyArray, isArray, isString, mustBeString} from "../../util"; 2 | 3 | export class BibComment { 4 | readonly type: string; 5 | readonly data: string[]; 6 | readonly string: string; 7 | 8 | constructor(data: string[]) { 9 | this.type = "comment"; 10 | this.data = data; 11 | this.string = data.join(""); 12 | } 13 | 14 | toString() { 15 | return this.string; 16 | } 17 | } 18 | 19 | export class CommentEntry { 20 | readonly type: string; 21 | readonly data: string[]; 22 | readonly string: string; 23 | 24 | constructor(type: string, data: string[]) { 25 | this.type = type; 26 | this.data = data; 27 | this.string = data.join(""); 28 | } 29 | 30 | toString() { 31 | return this.string; 32 | } 33 | } 34 | 35 | export function isBibComment(n: any): n is BibComment { 36 | return n.type === "comment" && isArray(n.data); 37 | } 38 | 39 | const flattenO = (wrapper: any): string => isString(wrapper) ? wrapper 40 | : typeof wrapper === "number" ? wrapper.toString() 41 | // : (isString(wrapper.type) && wrapper.type === "@bib" && isString(wrapper.string)) ? "@" + wrapper.string 42 | : wrapper["type"] === "@bib" ? "@" + mustBeString(wrapper.string) 43 | : wrapper["type"] === "escapedEntry" ? "\\" + flattenO(wrapper.data) 44 | : mustBeString(wrapper.string) 45 | ; 46 | 47 | export function flattenPlainText(data: any[]): string[] { 48 | return flattenMyArray(data).map(flattenO); 49 | } -------------------------------------------------------------------------------- /src/bibfile/datatype/string/BibStringComponent.ts: -------------------------------------------------------------------------------- 1 | import {BibStringData, BibStringDatum} from "./BibStringData"; 2 | import {isNumber, isString} from "../../../util"; 3 | 4 | /** 5 | * A fully formed string (between {braces} or "quotes"). 6 | * Consists of 0 or many BibStringDatum 7 | */ 8 | export class BibStringComponent { 9 | readonly data: BibStringData; 10 | readonly type: string; 11 | 12 | /** 13 | * The brace depth of an item is the number of braces surrounding it (surrounding the field with braces instead of quotes does not modify the brace depth) 14 | */ 15 | readonly braceDepth: number; 16 | 17 | constructor(type: string, braceDepth: number, data: BibStringData) { 18 | this.type = type; 19 | this.braceDepth = braceDepth; 20 | this.data = data; 21 | } 22 | 23 | 24 | stringify(): string { 25 | return this.data.map(stringifyDatum).join(""); 26 | } 27 | } 28 | 29 | function isBibStringComponent(x: any): x is BibStringComponent { 30 | return typeof x.braceDepth === "number" && typeof x.type === "string"; 31 | } 32 | 33 | export function stringifyDatum(datum: BibStringDatum): string { 34 | if (isString(datum)) return datum; 35 | if (isNumber(datum)) return datum.toString(); 36 | if (isBibStringComponent(datum)) return datum.stringify(); 37 | // if (isStringRef(datum)) throw new Error("Unexpected state"); 38 | else throw new Error("Unexpected state"); 39 | } 40 | 41 | export class BibOuterStringComponent extends BibStringComponent { 42 | constructor(type: string, data: BibStringData) { 43 | super(type, 0, data); 44 | } 45 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bibtex", 3 | "version": "0.9.0", 4 | "description": "Library for parsing a BiBTeX file in pure Javascript / Typescript", 5 | "main": "index.js", 6 | "types": "index.d.ts", 7 | "repository": { 8 | "type": "git", 9 | "url": "git@github.com:digitalheir/bibtex-js.git" 10 | }, 11 | "author": "Maarten Trompper", 12 | "license": "MIT", 13 | "keywords": [ 14 | "bibtex", 15 | "parser" 16 | ], 17 | "dependencies": {}, 18 | "devDependencies": { 19 | "@types/chai": "^4.0.1", 20 | "@types/mocha": "^2.2.43", 21 | "assert": "^1.4.1", 22 | "awesome-typescript-loader": "^3.2.3", 23 | "babel-cli": "^6.26.0", 24 | "babel-core": "^6.26.0", 25 | "babel-loader": "^7.1.2", 26 | "babel-preset-env": "^1.5.1", 27 | "babel-preset-es2015": "^6.24.1", 28 | "chai": "^4.1.0", 29 | "latex-to-unicode-converter": "^0.5.0", 30 | "mocha": "^10.1.0", 31 | "nearley": "^2.11.0", 32 | "rimraf": "^2.5.4", 33 | "ts-node": "^3.0.4", 34 | "tslint": "^5.7.0", 35 | "tslint-loader": "^3.5.3", 36 | "typescript": "^2.5.2", 37 | "webpack": "^3.4.1" 38 | }, 39 | "scripts": { 40 | "build": "npm run clean && npm run build:min", 41 | "build:min": "webpack -p", 42 | "build:ts": "tsc", 43 | "clean": "rimraf dist", 44 | "test": "mocha --compilers ts:ts-node/register,tsx:ts-node/register", 45 | "nearleyc": "nearleyc src/parser/parser.ne -o src/parser/parser.js", 46 | "nearleytest": "nearleyc src/parser/parser.ne -o src/parser/test.ts", 47 | "nearleyrr": "nearley-railroad src/parser/parser.ne -o bibtex.html" 48 | }, 49 | "contributors": [ 50 | { 51 | "name": "Maarten Trompper", 52 | "email": "maartentrompper@freedom.nl", 53 | "url": "https://github.com/digitalheir/" 54 | } 55 | ] 56 | } 57 | -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | import {BibStringDatum} from "./bibfile/datatype/string/BibStringData"; 2 | import {isStringRef} from "./bibfile/datatype/string/StringRef"; 3 | import {isBibStringComponent} from "./bibfile/datatype/string/bib-string-utils"; 4 | 5 | export function mustBeString(str: any, o?: any): string { 6 | if (typeof str !== "string") 7 | throw new Error("Expected to be string: " + JSON.stringify(o ? o : str)); 8 | return str; 9 | } 10 | 11 | export function mustBeDefined(t?: T, o?: any): T { 12 | if (t === undefined) 13 | throw new Error("Expected to be defined: " + JSON.stringify(o ? o : t)); 14 | return t; 15 | } 16 | 17 | export function mustBeArray(str: any, o?: any): any[] { 18 | if (!isArray(str)) 19 | throw new Error("Expected to be array: " + JSON.stringify(o ? o : str)); 20 | return str; 21 | } 22 | 23 | export function isArray(data: any): data is any[] { 24 | return !!data && data.constructor === Array; 25 | } 26 | 27 | export function isNumber(data: any): data is number { 28 | return typeof data === "number"; 29 | } 30 | 31 | 32 | export function isString(data: any): data is string { 33 | return typeof data === "string"; 34 | } 35 | 36 | export const flattenMyArray = function (arr: any[], result?: any[]): any[] { 37 | if (!result) result = []; 38 | for (let i = 0, length = arr.length; i < length; i++) { 39 | const value: any = arr[i]; 40 | if (Array.isArray(value)) { 41 | for (let i = 0, length = value.length; i < length; i++) { 42 | const value2: any = value[i]; 43 | if (Array.isArray(value2)) { 44 | flattenMyArray(value2, result); 45 | } else { 46 | result.push(value2); 47 | } 48 | } 49 | } else { 50 | result.push(value); 51 | } 52 | } 53 | return result; 54 | }; 55 | -------------------------------------------------------------------------------- /src/nearley.d.ts: -------------------------------------------------------------------------------- 1 | declare module "nearley" { 2 | export class Parser { 3 | constructor(a: any, b: any); 4 | 5 | results: any; 6 | 7 | feed(tokens: any[]): string; 8 | } 9 | } 10 | 11 | declare module "assert" { 12 | //noinspection JSUnusedGlobalSymbols 13 | export class AssertionError implements Error { 14 | name: string; 15 | message: string; 16 | actual: any; 17 | expected: any; 18 | operator: string; 19 | generatedMessage: boolean; 20 | 21 | constructor(options?: { message?: string; actual?: any; expected?: any; operator?: string; stackStartFunction?: Function }); 22 | } 23 | 24 | export function fail(actual?: any, expected?: any, message?: string, operator?: string): void; 25 | 26 | export function ok(value: any, message?: string): void; 27 | 28 | export function equal(actual: any, expected: any, message?: string): void; 29 | 30 | export function notEqual(actual: any, expected: any, message?: string): void; 31 | 32 | export function deepEqual(actual: any, expected: any, message?: string): void; 33 | 34 | export function notDeepEqual(acutal: any, expected: any, message?: string): void; 35 | 36 | export function strictEqual(actual: any, expected: any, message?: string): void; 37 | 38 | export function notStrictEqual(actual: any, expected: any, message?: string): void; 39 | 40 | export const throws: { 41 | (block: Function, message?: string): void; 42 | (block: Function, error: Function, message?: string): void; 43 | (block: Function, error: RegExp, message?: string): void; 44 | (block: Function, error: (err: any) => boolean, message?: string): void; 45 | }; 46 | 47 | export const doesNotThrow: { 48 | (block: Function, message?: string): void; 49 | (block: Function, error: Function, message?: string): void; 50 | (block: Function, error: RegExp, message?: string): void; 51 | (block: Function, error: (err: any) => boolean, message?: string): void; 52 | }; 53 | 54 | export function ifError(value: any): void; 55 | } -------------------------------------------------------------------------------- /src/bibfile/bib-entry/bibliographic-entity/Authors.ts: -------------------------------------------------------------------------------- 1 | import {isArray, isNumber} from "../../../util"; 2 | import {AuthorName, parseAuthorName} from "./Author"; 3 | import {isOuterQuotedString} from "../../datatype/string/QuotedString"; 4 | import {BibOuterStringComponent} from "../../datatype/string/BibStringComponent"; 5 | import {BibStringData} from "../../datatype/string/BibStringData"; 6 | import { 7 | flattenQuotedStrings, 8 | globContiguousStrings, 9 | isContiguousSimpleString, 10 | joinContiguousSimpleStrings, 11 | splitOnAnd 12 | } from "../../datatype/string/bib-string-utils"; 13 | import {FieldValue} from "../../datatype/KeyVal"; 14 | 15 | 16 | /** 17 | * Represents a list of authors 18 | */ 19 | export class Authors extends BibOuterStringComponent { 20 | readonly authors$: AuthorName[]; 21 | 22 | constructor(fieldValue: FieldValue) { 23 | const data = isNumber(fieldValue) ? [fieldValue] : fieldValue.data; 24 | super("authors", data); 25 | 26 | const authorNames = determineAuthorNames$(fieldValue); 27 | this.authors$ = authorNames.map(name => parseAuthor(name)); 28 | } 29 | } 30 | 31 | 32 | function parseAuthor(data: BibStringData) { 33 | return parseAuthorName(data); 34 | } 35 | 36 | 37 | export function determineAuthorNames$(data: FieldValue): BibStringData[] { 38 | if (isNumber(data)) { 39 | return determineAuthorNames([data]); 40 | } else { 41 | return determineAuthorNames(data.data, isOuterQuotedString(data)); 42 | } 43 | } 44 | 45 | 46 | function determineAuthorNames(data: BibStringData, hideQuotes?: boolean): BibStringData[] { 47 | const globbed = globContiguousStrings( 48 | flattenQuotedStrings(data, hideQuotes) 49 | ); 50 | const normalizedString: BibStringData = globbed.map(e => isContiguousSimpleString(e) ? joinContiguousSimpleStrings(e) : e); 51 | return splitOnAnd(normalizedString); 52 | } 53 | 54 | 55 | export function mustBeAuthors(x: any): Authors { 56 | if (!isAuthors(x)) throw new Error(); 57 | return x; 58 | } 59 | 60 | 61 | export function isAuthors(x: any): x is Authors { 62 | return (isArray(x["authors$"]) && x.type === "authors"); 63 | } 64 | -------------------------------------------------------------------------------- /src/bibfile/datatype/KeyVal.ts: -------------------------------------------------------------------------------- 1 | import {OuterBracedString} from "./string/BracedString"; 2 | import {OuterQuotedString} from "./string/QuotedString"; 3 | import {isNumber, mustBeArray} from "../../util"; 4 | import {parseStringComponent} from "./string/bib-string-utils"; 5 | import {BibOuterStringComponent} from "./string/BibStringComponent"; 6 | 7 | /** 8 | * A key to value mapping such as `field = {name}`. 9 | */ 10 | export interface KeyVal { 11 | readonly key: string; 12 | readonly value: FieldValue; 13 | } 14 | 15 | export function isKeyVal(data: any): data is KeyVal { 16 | return typeof data.key === "string" 17 | && data.value !== undefined; 18 | } 19 | 20 | export function newKeyVal(data: any): KeyVal { 21 | if (isKeyVal(data)) { 22 | return { 23 | key: data.key, 24 | value: parseFieldValue(data.value), 25 | }; 26 | } else { 27 | throw new Error("Was not a KeyVal: " + JSON.stringify(data)); 28 | } 29 | } 30 | 31 | export function parseFieldValue(value: any): FieldValue { 32 | if (isNumber(value)) { 33 | return value; 34 | } 35 | 36 | const data = mustBeArray(value.data); 37 | switch (value.type) { 38 | case "quotedstringwrapper": 39 | if (data.length === 1 && isNumber(data[0])) 40 | // A single number is in a quoted string wrapper 41 | // because the parser considered it part of a 42 | // concatenated string 43 | return data[0]; 44 | 45 | return new OuterQuotedString(data.map(e => parseStringComponent(0, e))); 46 | 47 | case "bracedstringwrapper": 48 | return new OuterBracedString(data.map(e => parseStringComponent(0, e))); 49 | 50 | default: 51 | throw new Error("Unexpected value: " + JSON.stringify(value)); 52 | } 53 | } 54 | 55 | /** 56 | * Values (i.e. right hand sides of each assignment) can be either between curly braces or between 57 | * double quotes. The main difference is that you can write double quotes in the first case, and not 58 | * in the second case. 59 | * 60 | * For numerical values, curly braces and double quotes can be omitted. 61 | */ 62 | export type FieldValue = number | BibOuterStringComponent; 63 | 64 | export function normalizeFieldValue(field?: FieldValue): string | number | undefined { 65 | if(!field) return undefined; 66 | if(isNumber(field)) return field; 67 | else return field.stringify(); 68 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bibtex.js 2 | 3 | [![npm version](https://badge.fury.io/js/bibtex.svg)](https://www.npmjs.com/package/bibtex) 4 | [![Build Status](https://travis-ci.org/cacfd3a/bibtex-js.svg?branch=master)](https://travis-ci.org/cacfd3a/bibtex-js) 5 | [![License](https://img.shields.io/npm/l/bibtex.svg)](https://github.com/cacfd3a/bibtex-js/blob/master/LICENSE) 6 | [![Code Climate](https://codeclimate.com/github/cacfd3a/bibtex-js/badges/gpa.svg)](https://codeclimate.com/github/cacfd3a/bibtex-js) 7 | 8 | Library for parsing BibTeX .bib files, based mostly on the excellent guide to BibTeX, [*Tame the BeaST*](http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf). 9 | 10 | [Live demo in browser](https://cacfd3a.github.io/bibtex-js/) 11 | 12 | Written in Typescript, compiled to ES5 Javascript (with typings provided). 13 | 14 | This module literally just parses a BibTex file and processes it **as far as BibTeX goes**. It doesn't process TeX commands (i.e., `{\"o}` is not translated to `ö`). It does however, parse author names, as this is part of the BibTeX standard (see example below). If you want to actually work with a bibliography, look into [Bibliography.js](https://github.com/cacfd3a/bibliography-js) (which is mine) or [Citation.js](https://github.com/larsgw/citation.js) or [Zotero](https://github.com/zotero/zotero). If you want to convert LaTeX to Unicode, look into my [latex-to-unicode-converter](https://github.com/cacfd3a/latex-to-unicode-converter). 15 | 16 | ## Implementation 17 | Not all internal BibTeX functions are implemented, simply because I don't need them personally and can't imagine anyone to need them. Most notably [sorting entries is still an open issue](https://github.com/cacfd3a/bibtex-js/issues/1) because BibTeX has a little complicated algorithm which required a function that "purifies" field values, which for example makes `{\ss}` equivalent to `ss` but makes `ä` come after `z`. I am unsure if that is actually what anyone wants in modern days though. A modern approach would be to use Unicode collation and then sort. 18 | 19 | [Pull requests and issues are welcome.](https://github.com/cacfd3a/bibtex-js/issues) 20 | 21 | ## Usage 22 | 23 | Download standalone ES5 file ([latest](https://github.com/cacfd3a/bibtex-js/releases/latest)) or get [from npm](https://www.npmjs.com/package/bibtex): 24 | 25 | ``` 26 | npm install bibtex 27 | ``` 28 | 29 | ```js 30 | import {parseBibFile, normalizeFieldValue} from "bibtex"; 31 | 32 | const bibFile = parseBibFile(` 33 | @InProceedings{realscience, 34 | author = {Fred Trump and مهدي N\\"allen and henQ, jr, Mathize}, 35 | title = {You Won't Believe This Proof That {P} \\gtreqqless {NP} Using Super-{T}uring Computation Near Big Black Holes}, 36 | booktitle = {Book of Science}, 37 | month = {September}, 38 | year = {2001}, 39 | address = {Dordrecht}, 40 | publisher = {Willems Uitgeverij}, 41 | url = {https://github.com/cacfd3a/}, 42 | pages = {6--7} 43 | } 44 | `); 45 | 46 | const entry = bibFile 47 | .getEntry("realscience") // Keys are case-insensitive 48 | 49 | const fieldValue = entry 50 | .getField("TITLE"); // This is a complex BibTeX string 51 | 52 | console.log( 53 | // But we can normalize to a JavaScript string 54 | normalizeFieldValue(fieldValue) 55 | ); 56 | 57 | const authorField = entry 58 | .getField("author"); // This is a special object, divided into first names, vons and last names according to BibTeX spec 59 | 60 | authorField.authors$.map((author, i) => console.log("Author: " 61 | + (author.firstNames 62 | .concat(author.vons) 63 | .concat(author.lastNames) 64 | .concat(author.jrs)).join(" "))); 65 | 66 | ``` 67 | -------------------------------------------------------------------------------- /src/lexer/Lexer.ts: -------------------------------------------------------------------------------- 1 | import {TypedToken, isSpecialChar, newToken, Token} from "./Token"; 2 | import {isSingleWhiteSpaceCharacter, WhitespaceToken, SingleWhitespace, newWhitespace} from "./WhitespaceToken"; 3 | import {isNum, NumericChar, NumberToken, newNumber} from "./NumericToken"; 4 | import {IdToken, isIdChar, newIdToken} from "./IdToken"; 5 | import {isBibType, bibTypes} from "./BibBlockTypes"; 6 | 7 | export default class Lexer { 8 | private str: string; 9 | private len: number; 10 | private pos: number; 11 | 12 | constructor(string: string) { 13 | this.str = string; 14 | this.len = string.length; 15 | this.pos = 0; 16 | } 17 | 18 | getStringUntilNonEscapedChar(terminalRegex: RegExp | string): string { 19 | // if (typeof terminalRegex === 'string') { 20 | // } 21 | const chars: string[] = []; 22 | for (let i = this.pos; i < this.len + 1; i++) { 23 | this.pos = i; 24 | if (this.str.charAt(i) == "\\" && this.str.charAt(i + 1).match(terminalRegex)) { 25 | i++; 26 | this.pos = i; 27 | } else if (this.str.charAt(i).match(terminalRegex)) { 28 | break; 29 | } 30 | chars.push(this.str.charAt(i)); 31 | } 32 | return chars.join(""); 33 | } 34 | readTokens(): Token[] { 35 | const tokens: Token[] = []; 36 | let nextToken; 37 | while (nextToken = this.readNextToken()) 38 | tokens.push(nextToken); 39 | return tokens; 40 | } 41 | 42 | readNextToken(): Token | undefined { 43 | if (this.pos >= this.str.length) 44 | return undefined; 45 | 46 | const currentChar: string = this.str.charAt(this.pos); 47 | 48 | if (isSingleWhiteSpaceCharacter(currentChar)) 49 | return this.eatWhiteSpace(); 50 | else if (isSpecialChar(currentChar)) { 51 | return this.eatSpecialChars(currentChar); 52 | } else if (isNum(currentChar)) { 53 | return this.eatNumericString(currentChar); 54 | } else { 55 | return this.eatIdString(); 56 | } 57 | } 58 | 59 | // NOTE: not needed? delete? 60 | // isEscapeChar(i: number): boolean { 61 | // if (this.str.charAt(i) == '\\') { 62 | // // Might be an escaped character 63 | // const nextChar = this.str.charAt(i + 1); 64 | // 65 | // // We've escaped a special character 66 | // return isEscapableChar(nextChar); 67 | // } else return false; 68 | // } 69 | 70 | private eatIdString(): IdToken { 71 | // id 72 | const chars: string[] = []; 73 | const pos2 = this.pos; 74 | for (let i = pos2; i < this.len + 1; i++) { 75 | this.pos = i; 76 | // console.log(this.pos, i); 77 | // console.log(this.pos, this.str.charAt(i)); 78 | const charAtI = this.str.charAt(i); 79 | if (!isIdChar(charAtI)) { 80 | break; 81 | // else if (charAtI == '\\' && (this.str.charAt(i + 1) == '\\' || isSpecialChar(this.str.charAt(i + 1)))) { 82 | // i++; 83 | // this.pos = i; 84 | // chars.push(this.str.charAt(i)); 85 | } else { 86 | chars.push(charAtI); 87 | } 88 | } 89 | 90 | return newIdToken(chars.join("").trim()); 91 | } 92 | 93 | private eatNumericString(startAt: NumericChar): number | NumberToken { 94 | const nums: NumericChar[] = [startAt]; 95 | 96 | const nextPos = this.pos + 1; 97 | for (let newPos = nextPos; newPos < this.len + 1; newPos++) { 98 | this.pos = newPos; 99 | 100 | const newChar = this.str.charAt(newPos); 101 | if (isNum(newChar)) 102 | nums.push(newChar); 103 | else 104 | break; 105 | } 106 | 107 | const numericString: string = nums.join(""); 108 | 109 | if (nums[0] === "0") // If it starts with 0, return as a string 110 | return newNumber(numericString); 111 | else { 112 | const number = Number.parseInt(numericString); 113 | return Number.isFinite(number) ? number : newNumber(numericString); 114 | } 115 | } 116 | 117 | private eatSpecialChars(startAt: string): string | TypedToken { 118 | this.pos++; 119 | if (startAt === "@") { 120 | const type = this.getStringUntilNonEscapedChar("{").trim().toLowerCase(); 121 | if (isBibType(type)) 122 | return newToken(bibTypes[type], type); 123 | else 124 | return newToken("@bib", type); 125 | } 126 | return startAt; 127 | } 128 | 129 | private eatWhiteSpace(): WhitespaceToken { 130 | const chars: SingleWhitespace[] = []; 131 | while (this.pos < this.len + 1) { 132 | const c = this.str.charAt(this.pos); 133 | // ignore whitespaces 134 | if (isSingleWhiteSpaceCharacter(c)) { 135 | chars.push(c); 136 | this.pos++; 137 | } else break; 138 | } 139 | return newWhitespace(chars.join("")); 140 | } 141 | } -------------------------------------------------------------------------------- /src/bibfile/bib-entry/BibEntry.ts: -------------------------------------------------------------------------------- 1 | import {Authors, mustBeAuthors} from "./bibliographic-entity/Authors"; 2 | import { 3 | findError, 4 | hasMandatoryFields, 5 | mandatoryFields 6 | } from "./bibliographic-entity/mandatory-and-optional-fields"; 7 | import {resolveStringReference} from "./BibStringEntry"; 8 | import {FieldValue, normalizeFieldValue, parseFieldValue} from "../datatype/KeyVal"; 9 | 10 | /** 11 | * Represents a single "@[entityName]" entity, not a special entity such as @string 12 | */ 13 | export class BibEntry { 14 | readonly type: string; 15 | readonly _id: string; 16 | 17 | readonly fields: EntryFields; 18 | // noinspection JSUnusedGlobalSymbols 19 | readonly fields$: EntryFields; 20 | 21 | /** 22 | * When sorting, BibTEX computes a string, named 23 | sort.key$, for each entry. The sort.key$ string is an (often long) string defining the order 24 | in which entries will be sorted. To avoid any ambiguity, sort.key$ should only contain alphanumeric 25 | characters. Classical non-alphanumeric characters23, except special characters, will 26 | be removed by a BibTEX function named purify$. For special characters, purify$ removes 27 | spaces and LATEX commands (strings beginning with a backslash), even those placed between 28 | brace pairs. Everything else is left unmodified. For instance, t\^ete, t{\^e}te and t{\^{e}}te 29 | are transformed into tete, while tête gives tête; Bib{\TeX} gives Bib and Bib\TeX becomes 30 | BibTeX. There are thirteen LATEX commands that won’t follow the above rules: \OE, \ae, \AE, 31 | \aa, \AA, \o, \O, \l, \L, \ss. Those commands correspond to ı, , œ, Œ, æ, Æ, å, Å, ø, Ø, ł, Ł, 32 | ß, and purify$ transforms them (if they are in a special character, in i, j, oe, OE, ae, AE, aa, 33 | AA, o, O, l, L, ss, respectively. 34 | */ 35 | readonly sortkey$: string; 36 | 37 | /** 38 | the second transformation applied to a title is to be turned to lower case (except the first character). 39 | The function named change.case$ does this job. But it only applies to letters that are 40 | a brace depth 0, except within a special character. In a special character, brace depth is always 41 | 0, and letters are switched to lower case, except LATEX commands, that are left unmodified. 42 | */ 43 | readonly title$: string; 44 | 45 | constructor(type: string, id: string, fields: EntryFields) { 46 | this.type = type; 47 | this._id = id; 48 | 49 | this.fields = fields; 50 | 51 | 52 | // TODO implement; see above 53 | this.sortkey$ = ""; 54 | this.title$ = ""; 55 | } 56 | 57 | getField(key: string): FieldValue | undefined { 58 | return this.fields[key.toLowerCase()]; 59 | } 60 | 61 | getFieldAsString(key: string): string | number| undefined { 62 | const field: FieldValue | undefined = this.getField(key); 63 | return normalizeFieldValue(field); 64 | } 65 | 66 | getAuthors(): Authors | undefined { 67 | const field = this.fields["author"]; 68 | if (field === undefined) return field; 69 | return mustBeAuthors(field); 70 | } 71 | } 72 | 73 | 74 | export interface EntryFields { 75 | [k: string]: FieldValue; 76 | } 77 | 78 | export function parseEntryFields(fields: any): EntryFields { 79 | const fieldz: EntryFields = {}; 80 | Object.keys(fields).forEach(key => { 81 | switch (key) { 82 | default: 83 | fieldz[key] = parseFieldValue(fields[key]); 84 | break; 85 | } 86 | }); 87 | return fieldz; 88 | } 89 | 90 | 91 | 92 | 93 | // export function parseComplexStringOuter(obj: any): OuterQuotedString | OuterBracedString | number { 94 | // if (isString(obj)) return [obj]; 95 | // 96 | // switch (mustBeString(obj.type)) { 97 | // case "quotedstringwrapper": 98 | // case "bracedstringwrapper": 99 | // if (!isArray(obj.data)) 100 | // throw new Error("Expect array for data: " + JSON.stringify(obj)); 101 | // 102 | // return obj.data.map(parseStringy); 103 | // default: 104 | // throw new Error("Unexpected complex string type: " + obj.type); 105 | // } 106 | // } 107 | 108 | 109 | export function isBibEntry(x: any): x is BibEntry { 110 | return typeof x["type"] === "string" 111 | && typeof x["_id"] === "string" 112 | && !!x["fields"]; 113 | } 114 | 115 | export function processEntry(entry: BibEntry, strings$: { [p: string]: FieldValue }) { 116 | // TODO do something with this? 117 | // if (hasMandatoryFields(entry.type)) 118 | // mandatoryFields[entry.type] 119 | // .map(e => findError(entry, e)) 120 | // .forEach(e => { 121 | // if (!!e) console.warn(e.message); 122 | // }) 123 | // ; 124 | 125 | const processedFields: EntryFields = {}; 126 | 127 | const fields$ = entry.fields; 128 | 129 | Object.keys(entry.fields).forEach((key: string) => { 130 | const field$ = resolveStringReference({}, processedFields, strings$, fields$[key]); 131 | switch (key) { 132 | case "author": 133 | processedFields[key] = new Authors(field$); 134 | break; 135 | case "title": 136 | processedFields[key] = (field$); 137 | break; 138 | case "incollection": 139 | // TODO cross reference 140 | default: 141 | processedFields[key] = field$; 142 | break; 143 | } 144 | }); 145 | 146 | 147 | return new BibEntry( 148 | entry.type, 149 | entry._id, 150 | processedFields 151 | ); 152 | } 153 | -------------------------------------------------------------------------------- /src/bibfile/bib-entry/BibStringEntry.ts: -------------------------------------------------------------------------------- 1 | import {KeyVal, isKeyVal, newKeyVal, FieldValue} from "../datatype/KeyVal"; 2 | import {isStringRef, StringRef} from "../datatype/string/StringRef"; 3 | import {isOuterQuotedString, isQuotedString, OuterQuotedString, QuotedString} from "../datatype/string/QuotedString"; 4 | import {BracedString, isBracedString, isOuterBracedString, OuterBracedString} from "../datatype/string/BracedString"; 5 | import {isNumber, isString} from "../../util"; 6 | import {BibStringComponent} from "../datatype/string/BibStringComponent"; 7 | import {BibStringData} from "../datatype/string/BibStringData"; 8 | import {isBibStringComponent} from "../datatype/string/bib-string-utils"; 9 | 10 | /** 11 | * An "@string{}" entry 12 | */ 13 | export class BibStringEntry { 14 | readonly type: string; 15 | 16 | readonly key: string; 17 | readonly value: FieldValue; 18 | 19 | public constructor(key: string, value: FieldValue) { 20 | this.type = "string"; 21 | this.key = key; 22 | this.value = value; 23 | } 24 | } 25 | 26 | export function newStringEntry(data: any): BibStringEntry { 27 | const {key, value}: KeyVal = convertToKeyVal(data); 28 | return new BibStringEntry(key, value); 29 | } 30 | 31 | function convertToKeyVal(data: any): KeyVal { 32 | if (isKeyVal(data)) { 33 | return newKeyVal(data); 34 | } else { 35 | if (data.type !== "string") { 36 | throw new Error("Unexpected node: " + JSON.stringify(data)); 37 | } 38 | return convertToKeyVal(data.data); 39 | } 40 | } 41 | 42 | // function resolveStringDeclarations(wrapper: FieldValue, 43 | // compiledSoFar: { [key: string]: FieldValue }, 44 | // rawStrings: { [key: string]: FieldValue }) { 45 | // if (isNumber(wrapper)) 46 | // return wrapper; 47 | // 48 | // return copyWithResolvedStringReferences(wrapper, compiledSoFar, rawStrings); 49 | // 50 | // // else 51 | // // throw new Error("Unexpected object to resolve: " + JSON.stringify(wrapper)); 52 | // } 53 | 54 | export function resolveStrings(strings: { [key: string]: FieldValue }): { [key: string]: FieldValue } { 55 | const resolved: { [key: string]: FieldValue } = {}; 56 | Object.keys(strings).forEach(key => { 57 | if (!resolved[key]) 58 | resolved[key] = resolveStringReference({}, resolved, strings, strings[key]); 59 | }); 60 | return resolved; 61 | } 62 | 63 | export function resolveStringReferences(o: BibStringComponent, seenBeforeStack: { [key: string]: boolean }, 64 | alreadyResolved: { [key: string]: /*Resolved*/FieldValue }, 65 | refs: { [key: string]: FieldValue }): BibStringData { 66 | return o.data.map(datum => { 67 | if (isString(datum) || isNumber(datum)) return datum; 68 | else if (isStringRef(datum)) return resolveStringRef(seenBeforeStack, refs, datum, alreadyResolved); 69 | else if (isBibStringComponent(datum)) return copyWithResolvedStringReferences(datum, seenBeforeStack, alreadyResolved, refs); 70 | else throw new Error(); 71 | }); 72 | } 73 | 74 | export function resolveStringReference(seenBeforeStack: { [key: string]: boolean }, 75 | alreadyResolved: { [p: string]: FieldValue }, 76 | refs: { [p: string]: FieldValue }, 77 | data: FieldValue): FieldValue { 78 | if (isNumber(data)) { 79 | return data; 80 | } else if (isOuterBracedString(data) || isOuterQuotedString(data)) { 81 | return copyOuterWithResolvedStringReferences(data, seenBeforeStack, alreadyResolved, refs); 82 | } 83 | if (isStringRef(data)) { 84 | return resolveStringRef(seenBeforeStack, refs, data, alreadyResolved); 85 | } 86 | 87 | // else if (isBibStringComponent(data)) 88 | // return data.copyWithResolvedStringReferences(alreadyResolved, refs); 89 | // else throw new Error(); 90 | return data; 91 | } 92 | 93 | function resolveStringRef(seenBeforeStack: { [key: string]: boolean }, 94 | refs: { [key: string]: FieldValue }, 95 | data: StringRef, 96 | alreadyResolved: { [key: string]: FieldValue }): FieldValue { 97 | const refName = data.stringref; 98 | if (seenBeforeStack[refName]) 99 | throw new Error("Cycle detected: " + refName); 100 | if (alreadyResolved[refName]) { 101 | return alreadyResolved[refName]; 102 | } 103 | if (!refs[refName]) 104 | throw new Error(`Unresolved reference: "${data.stringref}" (${JSON.stringify(data)})`); 105 | 106 | alreadyResolved[refName] = resolveStringReference( 107 | Object.assign({}, seenBeforeStack, {[refName]: true}), 108 | alreadyResolved, 109 | refs, 110 | refs[refName] 111 | ); 112 | return alreadyResolved[refName]; 113 | } 114 | 115 | 116 | export function copyWithResolvedStringReferences(obj: BibStringComponent, 117 | seenBeforeStack: { [key: string]: boolean }, 118 | alreadyResolved: { [key: string]: /*Resolved*/FieldValue }, 119 | refs: { [key: string]: FieldValue }): OuterQuotedString | OuterBracedString { 120 | const newData = resolveStringReferences(obj, seenBeforeStack, alreadyResolved, refs); 121 | 122 | const braceDepth: number = obj.braceDepth; 123 | if (isQuotedString(obj)) 124 | return new QuotedString(braceDepth, newData); 125 | if (isBracedString(obj)) 126 | return new BracedString(braceDepth, newData); 127 | if (isOuterQuotedString(obj)) 128 | return new OuterQuotedString(newData); 129 | if (isOuterBracedString(obj)) 130 | return new OuterBracedString(newData); 131 | else 132 | throw new Error(); 133 | } 134 | 135 | export function copyOuterWithResolvedStringReferences(obj: OuterQuotedString | OuterBracedString, 136 | seenBeforeStack: { [key: string]: boolean }, 137 | alreadyResolved: { [key: string]: /*Resolved*/FieldValue }, 138 | refs: { [key: string]: FieldValue }): OuterQuotedString | OuterBracedString { 139 | const copied = copyWithResolvedStringReferences( 140 | obj, 141 | seenBeforeStack, 142 | alreadyResolved, 143 | refs 144 | ); 145 | if (!isOuterBracedString(copied) && !isOuterQuotedString(copied)) throw new Error(); 146 | return copied; 147 | } -------------------------------------------------------------------------------- /src/bibfile/BibFile.ts: -------------------------------------------------------------------------------- 1 | import * as nearley from "nearley"; 2 | 3 | import {grammar} from "../parser/ts-parser"; 4 | 5 | import {isArray, mustBeString} from "../util"; 6 | import {FieldValue, isKeyVal} from "./datatype/KeyVal"; 7 | import {BibEntry, isBibEntry, parseEntryFields, processEntry} from "./bib-entry/BibEntry"; 8 | import {BibComment, CommentEntry, flattenPlainText, isBibComment} from "./bib-entry/BibComment"; 9 | import {isPreamble, Preamble, newPreambleNode} from "./bib-entry/BibPreamble"; 10 | import {newStringEntry, resolveStrings, BibStringEntry} from "./bib-entry/BibStringEntry"; 11 | import Lexer from "../lexer/Lexer"; 12 | 13 | 14 | export type NonBibComment = BibEntry | CommentEntry | BibStringEntry | Preamble; 15 | 16 | /** 17 | * A bibfile is a sequence of entries, with comments interspersed 18 | */ 19 | export class BibFilePresenter { 20 | readonly content: (NonBibComment | BibComment)[]; 21 | readonly comments: BibComment[]; 22 | 23 | readonly entries_raw: BibEntry[]; 24 | readonly entries$: { [key: string]: BibEntry }; 25 | 26 | /** 27 | * Anything declared in a @preamble command will be concatenated and put in a variable 28 | named preamble$, for being used in the bibliography style and, generally, inserted at the beginning of 29 | the .bbl file, just before the thebibliography environment. This is useful for defining new commands 30 | used in the bibliography. Here is a small example: 31 | 32 | \@preamble{ "\makeatletter" } 33 | \@preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" } 34 | \@preamble{ "\makeatother" } 35 | 36 | This way, you may safely use the \url command in your entries. If it is not defined at the beginning 37 | of the bibliography, the default command defined in the @preamble will be used. 38 | Please note that you should never define style settings in the @preamble of a bibliography database, 39 | since it would be applied to any bibliography built from this database. 40 | */ 41 | readonly preambles_raw: Preamble[]; 42 | readonly preamble$: string; 43 | 44 | readonly strings_raw: { [k: string]: FieldValue }; 45 | /** 46 | * `strings`, but with all references resolved 47 | */ 48 | readonly strings$: { [k: string]: FieldValue }; 49 | 50 | 51 | constructor(content: (NonBibComment | BibComment)[]) { 52 | this.content = content; 53 | this.comments = content.filter(isBibComment).map(c => { 54 | if (isBibComment(c))return c; else throw new Error(); 55 | }); 56 | 57 | 58 | 59 | this.preambles_raw = content.filter(c => isPreamble(c)).map(c => { 60 | if (isPreamble(c)) return c; else throw new Error(); 61 | }); 62 | this.preamble$ = this.preambles_raw.map(p => p.toString()).join("\n"); 63 | 64 | const strings: { [k: string]: FieldValue } = {}; 65 | this.content.forEach(entry => { 66 | if (isKeyVal(entry)) { 67 | if (!!strings[entry.key]) 68 | throw new Error("String with id " + entry.key + " was defined more than once"); 69 | strings[entry.key] = entry.value; 70 | } 71 | } 72 | ); 73 | 74 | this.strings_raw = strings; 75 | this.strings$ = resolveStrings(strings); 76 | 77 | this.entries_raw = content.filter(c => isBibEntry(c)).map(c => { 78 | if (isBibEntry(c)) return c; 79 | else throw new Error(); 80 | }); 81 | 82 | const entryMap: { [k: string]: BibEntry } = {}; 83 | this.entries_raw.forEach((entry: BibEntry) => { 84 | const key = entry._id.toLowerCase(); 85 | /** 86 | * BibTEX will complain if two entries have the same internal key, even if they aren’t capitalized in the same 87 | * way. For instance, you cannot have two entries named Example and example. 88 | * In the same way, if you cite both example and Example, BibTEX will complain. Indeed, it would 89 | * have to include the same entry twice, which probably is not what you want 90 | */ 91 | if (!!entryMap[key]) throw new Error("Entry with id " + key + " was defined more than once"); 92 | entryMap[key] = processEntry(entry, this.strings$); 93 | }); 94 | this.entries$ = entryMap; 95 | } 96 | 97 | getEntry(id: string): BibEntry | undefined { 98 | return this.entries$[id.toLowerCase()]; 99 | } 100 | } 101 | 102 | function parseNonEntry(nonEntry: any): BibComment { 103 | if (!isArray(nonEntry.data) || nonEntry.type !== "NON_ENTRY") throw new Error(); 104 | return new BibComment(flattenPlainText(nonEntry.data)); 105 | } 106 | 107 | 108 | function parseEntry(entry: any): NonBibComment { 109 | switch (typeof entry) { 110 | case "object": 111 | const data = entry.data; 112 | if (typeof data["@type"] === "string") { 113 | return new BibEntry( 114 | data["@type"], 115 | data._id, 116 | parseEntryFields(data.fields) 117 | ); 118 | } 119 | 120 | const type = mustBeString(data.type); 121 | switch (type) { 122 | case "string": 123 | return newStringEntry(data); 124 | case "preamble": 125 | return newPreambleNode(data); 126 | // case "bracedstringwrapper": 127 | // return new BracedString(parseComplexStringOuter(data)); 128 | // case "quotedstringwrapper": 129 | // return new QuotedString(parseComplexStringOuter(data)); 130 | // case "braced": 131 | // case "quotedstring": 132 | default: 133 | throw new Error("Unexpected entry parsed: " + data.type); 134 | } 135 | default: 136 | throw new Error("Expected object as data for entry"); 137 | } 138 | } 139 | 140 | export const parseBibEntriesAndNonEntries = function (parse: any): (BibComment | NonBibComment)[] { 141 | return parse.map((entity: any) => { 142 | switch (entity.type) { 143 | case "NON_ENTRY": 144 | return (parseNonEntry(entity)); 145 | case "ENTRY": 146 | return (parseEntry(entity)); 147 | default: 148 | throw new Error("Expected ENTRY or NON_ENTRY"); 149 | } 150 | }); 151 | }; 152 | 153 | export function parseBibFile(input: string): BibFilePresenter { 154 | const p = new nearley.Parser(grammar.ParserRules, grammar.ParserStart); 155 | p.feed(new Lexer(input).readTokens()); 156 | const res = p.results; 157 | const parse = res[0]; 158 | 159 | return new BibFilePresenter(parseBibEntriesAndNonEntries(parse)); 160 | } 161 | -------------------------------------------------------------------------------- /src/lexer/deprecated_lexer_.js: -------------------------------------------------------------------------------- 1 | //const LITERAL = 'literal'; 2 | //// http://ftp.math.purdue.edu/mirrors/ctan.org/info/bibtex/tamethebeast/ttb_en.pdf 3 | //const modes = { 4 | // OUTSIDE_ENTRY: 'OUTSIDE_ENTRY', 5 | // ENTRY_TYPE: 'ENTRY_TYPE', 6 | // COMMENT_ENTRY: 'COMMENT_ENTRY', 7 | // BIB_ENTRY: 'BIB_ENTRY', 8 | // PREAMBULE: 'PREAMBULE', 9 | // CITATION_KEY: 'CITATION_KEY', 10 | // FIELD_KEY: 'FIELD_KEY', 11 | // FIELD_VAL: 'FIELD_VAL', 12 | //}; 13 | // 14 | //class TypedToken { 15 | // constructor(type, string) { 16 | // this.type = type; 17 | // this.string = string; 18 | // } 19 | //} 20 | // 21 | //export default class Lexer { 22 | // constructor(string) { 23 | // this.str = string; 24 | // this.len = string.length; 25 | // this.pos = 0; 26 | // this.mode = modes.OUTSIDE_ENTRY; 27 | // } 28 | // 29 | // /** 30 | // * Anything that is not a bibtex object (meaning "@TYPE{...}", is considered a comment and ignored. We suppose 31 | // * users can escape @ by prefixing a backslash 32 | // * 33 | // * A BibTeX tag is specified by its name followed by an equals-sign and the content. The tag's name is not 34 | // * case-sensitive. The content needs to be enclosed by either curly braces or quotation-marks. Which form 35 | // * of enclosure is used is depending on the user's taste, and both can be applied together in a single 36 | // * BibTeX entry, but there is one difference between those two methods: When quotation-marks are used, string 37 | // * concatenation using # is possible, but not when braces are used. 38 | // * 39 | // * @returns TypedToken object or null 40 | // */ 41 | // readNextToken() { 42 | // this.skipnontokens(); 43 | // if (this.pos >= this.str.length) return null; 44 | // 45 | // switch (this.mode) { 46 | // case modes.OUTSIDE_ENTRY: 47 | // if (this.str.charAt(this.pos) == '@') { 48 | // // Start entry type 49 | // this.mode = modes.ENTRY_TYPE; 50 | // this.pos++; 51 | // return new TypedToken('@', '@'); 52 | // } else { 53 | // // Still comment 54 | // const chars = []; 55 | // for (let i = this.pos; i < this.len; i++) { 56 | // if (this.str.charAt(i) == '\\' && this.str.charAt(i + 1) == '@') { 57 | // i++; 58 | // } else if (this.str.charAt(i) == '@') break; 59 | // this.pos = i; 60 | // chars.push(this.str.charAt(i)); 61 | // } 62 | // return new TypedToken(modes.OUTSIDE_ENTRY, chars.join("").trim()); 63 | // } 64 | // case modes.ENTRY_TYPE: 65 | // if (this.str.charAt(this.pos) == '{') { 66 | // // Start intra-entry 67 | // switch (this.entryType) { 68 | // case 'preambule': 69 | // this.mode = modes.PREAMBULE; 70 | // break; 71 | // case 'comment': 72 | // this.mode = modes.COMMENT_ENTRY; 73 | // break; 74 | // default: 75 | // this.mode = modes.CITATION_KEY; 76 | // break; 77 | // } 78 | // // console.log("mode is now " + this.mode); 79 | // this.pos++; 80 | // return new TypedToken('{', '{'); 81 | // } 82 | // // Anything until non-escaped { 83 | // let str = this.getStringUntilNonEscapedChar('{'); 84 | // this.entryType = str.trim().toLowerCase(); 85 | // return new TypedToken(modes.ENTRY_TYPE, this.entryType); 86 | // case modes.CITATION_KEY: 87 | // // Everything until non-escaped comma 88 | // let charAtCK = this.str.charAt(this.pos); 89 | // if (charAtCK == '}' || charAtCK == ',') { 90 | // this.pos++; 91 | // this.mode = modes.FIELD_KEY; 92 | // return new TypedToken(charAtCK, charAtCK); 93 | // } else return new TypedToken(modes.CITATION_KEY, this.getStringUntilNonEscapedChar(',').trim()); 94 | // case modes.FIELD_KEY: 95 | // // Should be [A-Za-z0-9] but we're lax; we parse until the first = 96 | // let charAtFK = this.str.charAt(this.pos); 97 | // if (charAtFK == '}' || charAtFK == '=') { 98 | // this.pos++; 99 | // this.mode = charAtFK == '}' ? modes.OUTSIDE_ENTRY : modes.FIELD_VAL; 100 | // return new TypedToken(charAtFK, charAtFK); 101 | // } else return new TypedToken(modes.FIELD_KEY, this.getStringUntilNonEscapedChar('=').trim()); 102 | // case modes.FIELD_VAL: 103 | // let charAtFV = this.str.charAt(this.pos); 104 | // if (charAtFV == '}' || charAtFV == ',') { 105 | // this.pos++; 106 | // this.mode = charAtFV == '}' ? modes.OUTSIDE_ENTRY : modes.FIELD_KEY; 107 | // return new TypedToken(charAtFV, charAtFV); 108 | // } else return new TypedToken(modes.FIELD_VAL, this.getStringUntilNonEscapedChar(/[,}]/).trim()); 109 | // case modes.PREAMBULE: 110 | // case modes.COMMENT_ENTRY: 111 | // // Everything until non-escaped } 112 | // if (this.str.charAt(this.pos) == '}') { 113 | // this.mode = modes.OUTSIDE_ENTRY; 114 | // this.pos++; 115 | // return new TypedToken('{', '{'); 116 | // } else { 117 | // return new TypedToken(modes.COMMENT_ENTRY, this.getStringUntilNonEscapedChar('}')); 118 | // } 119 | // case modes.BIB_ENTRY: 120 | // // Identifier 121 | // return new TypedToken(modes.COMMENT_ENTRY, ""); 122 | // default: 123 | // throw new Error(); 124 | // } 125 | // } 126 | // 127 | // getStringUntilNonEscapedChar(terminalRegex) { 128 | // if (typeof terminalRegex === 'string') { 129 | // 130 | // } 131 | // const chars = []; 132 | // for (let i = this.pos; i < this.len; i++) { 133 | // this.pos = i; 134 | // if (this.str.charAt(i) == '\\' && this.str.charAt(i + 1).match(terminalRegex)) { 135 | // i++; 136 | // this.pos = i; 137 | // } else if (this.str.charAt(i).match(terminalRegex)) { 138 | // break; 139 | // } 140 | // chars.push(this.str.charAt(i)); 141 | // } 142 | // return chars.join(""); 143 | // } 144 | // 145 | // 146 | // readIdChars() { 147 | // let fromPost = this.pos; 148 | // 149 | // 150 | // } 151 | // 152 | // skipnontokens() { 153 | // while (this.pos < this.len) { 154 | // var c = this.str.charAt(this.pos); 155 | // switch (this.mode) { 156 | // case this.mode.OUTSIDE_ENTRY: 157 | // // Outside of entries, ignore everything up to the first @ 158 | // if (c == '@') return; 159 | // else this.pos++; 160 | // break; 161 | // default: 162 | // // Within entries, ignore whitespaces between tokens 163 | // switch (c) { 164 | // case ' ': 165 | // case '\t': 166 | // case '\r': 167 | // case '\n': 168 | // console.log("non-token at " + this.pos); 169 | // this.pos++; 170 | // break; 171 | // default: 172 | // return; 173 | // // throw new Error(); 174 | // } 175 | // } 176 | // } 177 | // } 178 | //} 179 | -------------------------------------------------------------------------------- /src/bibfile/bib-entry/bibliographic-entity/Author.ts: -------------------------------------------------------------------------------- 1 | import {BibStringData, BibStringDatum} from "../../datatype/string/BibStringData"; 2 | import {isString} from "../../../util"; 3 | import {isOuterQuotedString, isQuotedString} from "../../datatype/string/QuotedString"; 4 | import {isStringRef} from "../../datatype/string/StringRef"; 5 | import {isOuterBracedString} from "../../datatype/string/BracedString"; 6 | import { 7 | splitOnAnd, 8 | splitOnComma, 9 | splitOnPattern, 10 | toStringBibStringData, 11 | toStringBibStringDatum 12 | } from "../../datatype/string/bib-string-utils"; 13 | 14 | function word2string(obj) { 15 | if (typeof obj === "string") return obj; 16 | else if (obj.type == "braced") return word2string(obj.data); 17 | else if (obj.unicode) return obj.unicode; 18 | else if (obj.string) return obj.string; 19 | else if (obj.constructor == Array) return obj.map(word2string).join(""); 20 | else throw new Error("? " + JSON.stringify(obj)); 21 | } 22 | 23 | const WHITESPACES = /\s+/g; 24 | 25 | export class AuthorName { 26 | readonly firstNames$: BibStringData[]; 27 | readonly initials: string[]; 28 | readonly vons$: BibStringData[]; 29 | readonly lastNames$: BibStringData[]; 30 | readonly jrs$: BibStringData[]; 31 | 32 | readonly firstNames: string[]; 33 | readonly vons: string[]; 34 | readonly lastNames: string[]; 35 | readonly jrs: string[]; 36 | 37 | readonly id: string; 38 | 39 | /** 40 | * @param firstNames Array of word objects 41 | * @param vons Array of word objects 42 | * @param lastNames Array of word objects 43 | * @param jrs Array of word objects 44 | */ 45 | constructor(firstNames: BibStringData[], vons: BibStringData[], lastNames: BibStringData[], jrs: BibStringData[]) { 46 | this.firstNames$ = firstNames; 47 | this.vons$ = vons; 48 | this.lastNames$ = lastNames; 49 | this.jrs$ = jrs; 50 | 51 | this.initials = firstNames.map(getFirstLetter); 52 | 53 | this.firstNames = firstNames.map(toStringBibStringData); 54 | this.vons = vons.map(toStringBibStringData); 55 | this.lastNames = lastNames.map(toStringBibStringData); 56 | this.jrs = jrs.map(toStringBibStringData); 57 | 58 | this.id = this.firstNames.join("-") + "-" 59 | + this.vons.join("-") + "-" 60 | + this.lastNames.join("-") + "-" 61 | + this.jrs.join("-"); 62 | } 63 | } 64 | 65 | function getFirstLetter(bsd: BibStringData): string { 66 | const asString = toStringBibStringData(bsd); 67 | return asString ? asString.charAt(0) : ""; 68 | } 69 | 70 | function isPartOfName(char) { 71 | return (char === "," || char.match(/\s/)); 72 | } 73 | 74 | function startsWithLowerCaseBSD(authorToken: BibStringData) { 75 | if (authorToken.length > 0) return startsWithLowerCase(authorToken[0]); 76 | else return false; 77 | } 78 | 79 | function startsWithLowerCase(authorToken: BibStringDatum) { 80 | if (isString(authorToken)) { 81 | if (!authorToken) return false; 82 | const ch = authorToken.charAt(0); 83 | return ch.toLowerCase() === ch && ch.toUpperCase() !== ch; 84 | } 85 | 86 | if (isQuotedString(authorToken)) { 87 | // TODO must be flattened string...? 88 | if (!authorToken.data || authorToken.data.length <= 0) return false; 89 | return startsWithLowerCase(authorToken.data[0]); 90 | } 91 | 92 | if (isStringRef(authorToken) 93 | || isOuterQuotedString(authorToken) 94 | || isOuterBracedString(authorToken) 95 | ) throw new Error("Should not do this test on this type"); 96 | 97 | return false; 98 | } 99 | 100 | function firstVonLast(outer: BibStringData): AuthorName { 101 | const authorTokens: BibStringData[] = splitOnPattern(outer, WHITESPACES); 102 | 103 | let vonStartInclusive = -1; 104 | let vonEndExclusive = -1; 105 | let firstNameEndExclusive = -1; 106 | 107 | for (let i = 0; i < authorTokens.length - 1; i++) { 108 | if (startsWithLowerCaseBSD(authorTokens[i])) { 109 | if (vonStartInclusive < 0) 110 | // Start von if not already started 111 | vonStartInclusive = i; 112 | // End von at last word that starts with lowercase 113 | vonEndExclusive = i + 1; 114 | } 115 | } 116 | if (vonStartInclusive >= 0) firstNameEndExclusive = vonStartInclusive; 117 | else firstNameEndExclusive = authorTokens.length - 1; 118 | 119 | const von: BibStringData[] = vonStartInclusive >= 0 ? getSubStringAsArray(authorTokens, vonStartInclusive, vonEndExclusive) : []; 120 | const firstName: BibStringData[] = getSubStringAsArray(authorTokens, 0, firstNameEndExclusive); 121 | const lastName: BibStringData[] = getSubStringAsArray(authorTokens, Math.max(vonEndExclusive, firstNameEndExclusive), authorTokens.length); 122 | 123 | return new AuthorName( 124 | firstName, 125 | von, 126 | lastName, 127 | [] 128 | ); 129 | } 130 | 131 | function vonLastFirst(vonLastStr: BibStringData, firstStr: BibStringData) { 132 | const vonLast = splitOnPattern(vonLastStr, WHITESPACES); 133 | const first = splitOnPattern(firstStr, WHITESPACES); 134 | 135 | let vonStartInclusive = -1; 136 | let vonEndExclusive = -1; 137 | 138 | for (let i = 0; i < vonLast.length - 1; i++) 139 | if (startsWithLowerCaseBSD(vonLast[i])) { 140 | if (vonStartInclusive < 0) vonStartInclusive = i; 141 | vonEndExclusive = i + 1; 142 | } 143 | 144 | const von = vonStartInclusive >= 0 ? getSubStringAsArray(vonLast, 0, vonEndExclusive) : []; 145 | const firstName = first; 146 | const lastName = getSubStringAsArray(vonLast, Math.max(vonEndExclusive, 0)); 147 | 148 | return new AuthorName( 149 | firstName, 150 | von, 151 | lastName, 152 | [] 153 | ); 154 | } 155 | 156 | 157 | function getSubStringAsArray(tokens: T[], startIncl: number, endExcl?: number) { 158 | const arr: T[] = []; 159 | for (let i = startIncl; i < (endExcl === undefined ? tokens.length : endExcl); i++) { 160 | arr.push(tokens[i]); 161 | } 162 | return arr; 163 | } 164 | 165 | function vonLastJrFirst(vonLastStr: BibStringData, jrStr: BibStringData, firstStr: BibStringData) { 166 | const vonLast = splitOnPattern(vonLastStr, WHITESPACES); 167 | const first = splitOnPattern(firstStr, WHITESPACES); 168 | const jr = splitOnPattern(jrStr, WHITESPACES); 169 | 170 | let vonStartInclusive = -1; 171 | let vonEndExclusive = -1; 172 | 173 | for (let i = 0; i < vonLast.length - 1; i++) 174 | if (startsWithLowerCaseBSD(vonLast[i])) { 175 | if (vonStartInclusive < 0) vonStartInclusive = i; 176 | vonEndExclusive = i + 1; 177 | } 178 | 179 | const von = vonStartInclusive >= 0 ? getSubStringAsArray(vonLast, 0, vonEndExclusive) : []; 180 | const lastName = getSubStringAsArray(vonLast, Math.max(vonEndExclusive, 0)); 181 | 182 | return new AuthorName( 183 | first, 184 | von, 185 | lastName, 186 | jr 187 | ); 188 | } 189 | 190 | /** 191 | * BibTEX must be able to distinguish between the different parts of the author field. To that 192 | * aim, BibTEX recognizes three possible formats: 193 | * • First von Last; 194 | * • von Last, First; 195 | * • von Last, Jr, First. 196 | * 197 | * The format to be considered is obtained by counting the number of commas in the name. 198 | */ 199 | export function parseAuthorName(normalizedFieldValue: BibStringData): AuthorName { 200 | const partitions: BibStringData[] = splitOnComma(normalizedFieldValue); 201 | 202 | switch (partitions.length) { 203 | case 1: 204 | return firstVonLast(partitions[0]); 205 | case 2: 206 | return vonLastFirst(mdbsd(partitions[0]), mdbsd(partitions[1])); 207 | case 3: 208 | return vonLastJrFirst(mdbsd(partitions[0]), mdbsd(partitions[1]), mdbsd(partitions[2])); 209 | default: 210 | throw new Error(`Could not parse author name: partitioned as ${JSON.stringify(partitions)} in ${JSON.stringify(normalizedFieldValue)}`); 211 | } 212 | } 213 | 214 | function isdbsd(x: any): x is BibStringData { 215 | return x !== undefined; 216 | } 217 | 218 | function mdbsd(x: any): BibStringData { 219 | if (isdbsd(x)) return x; else throw new Error("???????"); 220 | } 221 | 222 | -------------------------------------------------------------------------------- /src/bibfile/datatype/string/bib-string-utils.ts: -------------------------------------------------------------------------------- 1 | import {isStringRef, StringRef} from "./StringRef"; 2 | import {isBracedString, isOuterBracedString, BracedString} from "./BracedString"; 3 | import {isOuterQuotedString, isQuotedString, QuotedString} from "./QuotedString"; 4 | import {flattenMyArray, isArray, isNumber, isString, mustBeString} from "../../../util"; 5 | import {BibStringComponent} from "./BibStringComponent"; 6 | import {BibStringData, BibStringDatum} from "./BibStringData"; 7 | 8 | export function isBibStringComponent(x: any): x is BibStringComponent { 9 | return typeof x.braceDepth === "number" && typeof x.type === "string"; 10 | } 11 | 12 | export interface ContiguousSimpleString { 13 | type: "ContiguousSimpleString"; 14 | data: (number | string)[]; 15 | } 16 | 17 | export function isContiguousSimpleString(x: any): x is ContiguousSimpleString { 18 | return x.type === "ContiguousSimpleString" && isArray(x.data); 19 | } 20 | 21 | export function joinContiguousSimpleStrings(x: ContiguousSimpleString): string { 22 | return x.data.join(""); 23 | } 24 | 25 | 26 | export function parseStringComponent(braceDepth: number, obj: any): BibStringComponent | string | number | StringRef { 27 | if (isNumber(obj) || isString(obj)) 28 | return /*new BibStringComponent(typeof obj, braceDepth, [*/obj/*])*/; 29 | 30 | if (isStringRef(obj)) 31 | return new StringRef(0, obj.stringref); 32 | // if (isWhitespace(obj)) return obj; 33 | // if (isIdToken(obj)) return obj.string; 34 | 35 | switch (mustBeString(obj.type, obj)) { 36 | case "id": 37 | case "ws": 38 | case "number": 39 | return mustBeString(obj.string); 40 | case "bracedstring": 41 | case "braced": 42 | if (!isArray(obj.data)) { 43 | throw new Error("Expect array for data: " + JSON.stringify(obj)); 44 | } 45 | return new BracedString(braceDepth, flattenMyArray(obj.data).map(e => parseStringComponent(braceDepth + 1, e))); 46 | case "quotedstring": 47 | if (!isArray(obj.data)) { 48 | throw new Error("Expect array for data: " + JSON.stringify(obj)); 49 | } 50 | const flattened = flattenMyArray(obj.data); 51 | return new QuotedString(braceDepth, flattened.map(e => parseStringComponent(braceDepth, e))); 52 | default: 53 | throw new Error("Unexpected complex string type: " + obj.type); 54 | } 55 | } 56 | 57 | export function toStringBibStringDatum(data: BibStringDatum): string { 58 | if (isString(data)) 59 | return data; 60 | if (isNumber(data)) 61 | return data + ""; 62 | if ( 63 | isBracedString(data) 64 | || isQuotedString(data) 65 | || isOuterQuotedString(data) 66 | || isOuterBracedString(data) 67 | ) 68 | return toStringBibStringData(data.data); 69 | 70 | throw new Error(JSON.stringify(data)); 71 | } 72 | 73 | export function toStringBibStringData(data: BibStringData) { 74 | return data.map(toStringBibStringDatum).join(""); 75 | } 76 | 77 | export function flattenQuotedStrings(data: BibStringData, hideQuotes?: boolean): BibStringData { 78 | let result: BibStringData = []; 79 | for (const datum of data) { 80 | const flattenned = flattenQuotedString(datum, hideQuotes); 81 | if (isArray(flattenned)) { 82 | result = result.concat(flattenned); 83 | } else { 84 | result.push(flattenned); 85 | } 86 | } 87 | return result; 88 | } 89 | 90 | const doubleQuotes: BibStringDatum[] = ["\""]; 91 | 92 | function flattenQuotedString(data: BibStringDatum, hideQuotes?: boolean): BibStringDatum | BibStringData { 93 | if (isBracedString(data)) 94 | return data; 95 | if (isQuotedString(data)) { 96 | const flattenedQuotedString: BibStringData = flattenQuotedStrings(data.data, true); 97 | if (isArray(flattenedQuotedString)) { 98 | return hideQuotes 99 | ? flattenedQuotedString 100 | : doubleQuotes.concat(flattenedQuotedString).concat(doubleQuotes); 101 | } else if (hideQuotes) 102 | return flattenedQuotedString; 103 | else 104 | return ["\"", flattenedQuotedString, "\""]; 105 | } 106 | if (isOuterQuotedString(data)) 107 | return flattenQuotedStrings(data.data, true); 108 | if (isOuterBracedString(data)) 109 | return flattenQuotedStrings(data.data, false); 110 | if (isString(data) || isNumber(data)) 111 | return data; 112 | if (isStringRef(data)) 113 | throw new Error("StringRef should be resolved at this point!"); 114 | else 115 | throw new Error(); 116 | } 117 | 118 | export function globContiguousStrings(data: BibStringData): (BibStringDatum | ContiguousSimpleString)[] { 119 | const result: (BibStringDatum | ContiguousSimpleString)[] = []; 120 | for (const element of data) { 121 | if (isString(element) || isNumber(element)) { 122 | if (result.length <= 0) { 123 | const contiguousSimpleString: ContiguousSimpleString = { 124 | type: "ContiguousSimpleString", 125 | data: [element] 126 | }; 127 | result.push(contiguousSimpleString); 128 | } 129 | else { 130 | const lastElement = result[result.length - 1]; 131 | if (isContiguousSimpleString(lastElement)) { 132 | lastElement.data.push(element); 133 | } else { 134 | const contiguousSimpleString: ContiguousSimpleString = { 135 | type: "ContiguousSimpleString", 136 | data: [element] 137 | }; 138 | result.push(contiguousSimpleString); 139 | } 140 | } 141 | } else { 142 | result.push(element); 143 | } 144 | } 145 | return result; 146 | } 147 | 148 | export function splitOnAnd(data: BibStringData): BibStringData[] { 149 | return splitOnPattern(data, /\s+and\s+/g); 150 | } 151 | 152 | export function splitOnComma(data: BibStringData, limit = 2): BibStringData[] { 153 | return splitOnPattern(data, /\s*,\s*/g, limit); 154 | } 155 | 156 | export function splitOnPattern(data: BibStringData, pattern: RegExp, stopAfter?: number): BibStringData[] { 157 | const splitted: BibStringData[] = []; 158 | 159 | let buffer: BibStringData = []; 160 | for (const datum of data) { 161 | if (isString(datum) && (stopAfter === undefined || stopAfter > 0)) { 162 | let match: RegExpExecArray | null | undefined = pattern.exec(datum); 163 | let end = 0; 164 | if (match) { 165 | do { 166 | const prevEnd = end; 167 | end = match.index + match[0].length; 168 | // if(prevEnd !== match.index) 169 | buffer.push(datum.substring(prevEnd, match.index)); 170 | 171 | if (stopAfter === undefined || stopAfter > 0) { 172 | splitted.push(buffer); 173 | buffer = []; 174 | if (stopAfter !== undefined && stopAfter > 0) stopAfter--; 175 | } 176 | 177 | if (stopAfter === undefined || stopAfter > 0) 178 | match = pattern.exec(datum); 179 | else 180 | match = undefined; 181 | } while (match); 182 | 183 | if (end > 0 && end < datum.length) 184 | buffer.push(datum.substring(end)); 185 | } else { 186 | buffer.push(datum); 187 | } 188 | } 189 | else 190 | buffer.push(datum); 191 | } 192 | 193 | if (buffer.length > 0) splitted.push(buffer); 194 | return splitted; 195 | } 196 | 197 | // TODO 198 | // /** 199 | // * A special character is a 200 | // part of a field starting with a left brace being at brace depth 0 immediately followed with a backslash, 201 | // and ending with the corresponding right brace. For instance, in the above example, there is no special 202 | // character, since \LaTeX is at depth 2. It should be noticed that anything in a special character is 203 | // considered as being at brace depth 0, even if it is placed between another pair of braces. 204 | // */ 205 | // export class SpecialCharacter extends BibStringComponent { 206 | // constructor(data: BibStringData) { 207 | // super("specialCharacter", 0, data); 208 | // } 209 | // 210 | // copyWithResolvedStringReferences(alreadyResolved, refs): BibStringComponent { 211 | // return new SpecialCharacter(resolveStringReferences(this,(alreadyResolved, refs)); 212 | // } 213 | // } 214 | -------------------------------------------------------------------------------- /src/bibfile/bib-entry/bibliographic-entity/mandatory-and-optional-fields.ts: -------------------------------------------------------------------------------- 1 | import {isArray, isString} from "../../../util"; 2 | import {BibEntry} from "../BibEntry"; 3 | 4 | /** 5 | * From Taming the BeaST: http://ctan.cs.uu.nl/info/bibtex/tamethebeast/ttb_en.pdf 6 | * 7 |
8 | 9 |
address
10 |
Generally the city or complete address of the publisher. 11 |
12 | 13 |
author
14 |
For author names. The input format is quite special, since BibTEX has to be 15 | able to distinguish between the first and last names. Section 11 and 18 are 16 | completely dedicated to this topic. 17 |
18 | 19 |
booktitle
20 |
For the title of a book one part of which is cited. 21 |
22 | 23 |
chapter
24 |
The number of the chapter (or any part) of a book being cited. If not a chapter, 25 | the type field might be used for precising the type of sectioning. 26 |
27 | 28 |
crossref
29 |
This one is quite peculiar. It’s used to cross-reference within the bibliography. 30 | For instance, you might cite a document, and a part of it. In that case, the 31 | second one can reference the first one, or at least inherit some of its fields from 32 | the first one. This deserves some more comments, see section 12. 33 |
34 | 35 |
edition
36 |
The edition number. Or in fact its ordinal, for instance edition = "First". 37 | This might raise problems when trying to export a bibliography into another 38 | language. 39 |
40 | 41 |
editor
42 |
The name of the editor(s) of the entry. The format is the same as for authors. 43 |
44 | 45 |
howpublished
46 |
Only used in rare cases where the document being cited is not a classical type 47 | such as a @book, an @article or an @inproceedings publication. 48 |
49 | 50 |
institution
51 |
For a technical report, the name of the institution that published it. 52 |
53 | 54 |
journal
55 |
The name of the journal in which the cited article has been published. 56 | key Used for defining the label, in case it cannot be computed by BibTEX. It does 57 | not force the label, but defines the label when BibTEX needs one but can’t 58 | compute it. 59 |
60 | 61 |
month
62 |
Well... The month during which the document has been published. This also 63 | raises the problem of the translation of the bibliography: It’s better having 64 | a numerical value, or an abbreviation, instead of the complete name of the 65 | month. Having the number would also allow BibTEX to sort the entries more 66 | precisely (even though, as far as I know, no bibliography style does this at the 67 | present time). 68 |
69 | 70 |
note
71 |
For any additional data you would want to add. Since classical styles were 72 | written in 1985, they don’t have a url field, and note is often used for this 73 | purpose, together with the url.sty package.
74 | 75 |
number
76 |
A number... Not whichever, but the number of a report. For volume numbers, 77 | a special volume field exists. 78 | organization The organizing institution of a conference. 79 |
80 | 81 |
pages
82 |
The relevant pages of the document. Useful for the reader when you cite a huge 83 | book; Note that such a precision could be added through the optional argument 84 | of \cite (see page 6), in which case it would appear in the document but not 85 | in the bibliography. 86 |
87 | 88 |
publisher
89 |
The institution that published the document. 90 |
91 | 92 |
school
93 |
For theses, the name of the school the thesis has been prepared in. 94 |
95 | 96 |
series
97 |
The name of a collection of series or books. 98 |
99 | 100 |
title
101 |
The title of the document being cited. There are some rules to be observed 102 | when entering this field, see section 10. 103 |
104 | 105 |
type
106 |
The type. Which type? It depends... The type of publication, if needed. For 107 |
108 | 109 |
thesi
110 |
s, for instance, in order to distinguish between a masters thesis and a PhD. 111 | Or the type of section being cited (see chapter above). 112 |
113 | 114 |
volume
115 |
The volume number in a series or collection of books. 116 |
117 | 118 |
year
119 |
The publication year.
120 | 121 |
122 | **/ 123 | export const address = "address"; 124 | export const author = "author"; 125 | export const booktitle = "booktitle"; 126 | export const chapter = "chapter"; 127 | export const edition = "edition"; 128 | export const editor = "editor"; 129 | export const howpublished = "howpublished"; 130 | export const institution = "institution"; 131 | export const journal = "journal"; 132 | export const month = "month"; 133 | export const note = "note"; 134 | export const number = "number"; 135 | export const organization = "organization"; 136 | export const pages = "pages"; 137 | export const publisher = "publisher"; 138 | export const school = "school"; 139 | export const series = "series"; 140 | export const title = "title"; 141 | export const type = "type"; 142 | export const volume = "volume"; 143 | export const year = "year"; 144 | 145 | export type KnownField = "address" | 146 | "author" | 147 | "booktitle" | 148 | "chapter" | 149 | "edition" | 150 | "editor" | 151 | "howpublished" | 152 | "institution" | 153 | "journal" | 154 | "month" | 155 | "note" | 156 | "number" | 157 | "organization" | 158 | "pages" | 159 | "publisher" | 160 | "school" | 161 | "series" | 162 | "title" | 163 | "type" | 164 | "volume" | 165 | "year"; 166 | 167 | export type MandatoryFields = KnownField | KnownField[]; 168 | export type OptionalFields = KnownField | KnownField[]; 169 | 170 | export const optionalFields: { [k: string]: (KnownField | KnownField[])[] } = { 171 | "book": [["volume", "number"], "series", "address", "edition", "month", "note"], 172 | "booklet": ["author", "howpublished", "address", "address", "month", "year", "note"], 173 | "conference": ["editor", ["volume", "number"], "series", "pages", "address", "month", "organization", "publisher", "note"], 174 | "inproceedings": ["editor", ["volume", "number"], series, "pages", "address", "month", "organization", "publisher", "note"], 175 | "inbook": ["volume", "number", "series", "type", "address", "edition", "month", "note"], 176 | "incollection": ["editor", ["volume", "number"], "series", "type", "chapter", "pages", "address", "edition", "month", "note"], 177 | "manual": ["author", "organization", "year", "address", "edition", "month", "note"], 178 | "mastersthesis": ["type", "address", "month", "note"], 179 | "misc": [], 180 | "phdthesis": ["type", "address", "month", "note"], 181 | "proceedings": ["editor", ["volume", "number"], "series", "address", "month", "organization", "publisher", "note"], 182 | "techreport": ["type", "address", "number", "month", "note"], 183 | "unpublished": ["month", "year"] 184 | }; 185 | 186 | export const mandatoryFields: { [k: string]: (KnownField | KnownField[])[] } = { 187 | "article": ["author", "title", "year", "journal"], 188 | "book": [["author", "editor"], "title", "publisher", "year"], 189 | "booklet": ["title"], 190 | "conference": ["author", "title", "booktitle", "year"], 191 | "inproceedings": ["author", "title", "booktitle", "year"], 192 | "inbook": [["author", "editor"], "title", ["chapter", "pages"]], 193 | "incollection": ["author", "title", "booktitle", "publisher", "year"], 194 | "manual": ["title"], 195 | "mastersthesis": ["author", "title", "school", "year"], 196 | "misc": [["author", "title", "howpublished", "year", "month", "note"]], 197 | "phdthesis": ["author", "title", "school", "year"], 198 | "proceedings": ["year", "title"], 199 | "techreport": ["author", "title", "institution", "year"], 200 | "unpublished": ["author", "title", "note"] 201 | }; 202 | 203 | export function hasOptionalFields(s: string): s is (keyof typeof optionalFields) { 204 | return optionalFields.hasOwnProperty(s); 205 | } 206 | 207 | export function hasMandatoryFields(s: string): s is (keyof typeof mandatoryFields) { 208 | return mandatoryFields.hasOwnProperty(s); 209 | } 210 | 211 | export function getMandatoryFields(s: string): MandatoryFields[] { 212 | if (hasMandatoryFields(s)) { 213 | return mandatoryFields[s]; 214 | } else { 215 | return []; 216 | } 217 | } 218 | 219 | export function getOptionalFields(s: string): OptionalFields[] { 220 | if (hasOptionalFields(s)) { 221 | return optionalFields[s]; 222 | } else { 223 | return []; 224 | } 225 | } 226 | 227 | export const findError = (entry: BibEntry, field: MandatoryFields): (Error | undefined) => { 228 | const fields = entry.fields; 229 | if (isString(field)) { 230 | if (!fields[field]) 231 | return new Error("Warning: expected " + entry.type + " with id " + entry._id 232 | + " to have the field: " + field); 233 | } else if (isArray(field)) { 234 | const hasAllFields: boolean = field.reduce( 235 | (acc: boolean, fieldName: KnownField): boolean => { 236 | if (isString(fieldName)) { 237 | return (acc && fields.hasOwnProperty(fieldName)); 238 | } 239 | else 240 | throw new Error(); 241 | }, true 242 | ); 243 | if (!hasAllFields) { 244 | // not one of a list of options 245 | return new Error("Expected " + entry.type + " with id " + entry._id 246 | + " to have one of the following fields: " + field); 247 | } 248 | } 249 | }; -------------------------------------------------------------------------------- /src/parser/parser.ne: -------------------------------------------------------------------------------- 1 | # For more information, see: 2 | # http://ftp.math.purdue.edu/mirrors/ctan.org/info/bibtex/tamethebeast/ttb_en.pdf 3 | # http://artis.imag.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html 4 | # http://www.bibtex.org/Format/ 5 | 6 | 7 | # 8 | # See http://ftp.math.purdue.edu/mirrors/ctan.org/info/bibtex/tamethebeast/ttb_en.pdf: 9 | # 10 | # There is a special entry type named @comment. The main use of such an entry type is to comment a large part 11 | # of the bibliography easily, since anything outside an entry is already a comment, and commenting out one 12 | # entry may be achieved by just removing its initial @ 13 | # 14 | # ^ this suggests that opening braces within the comment MUST be closed, and the comment ends on the first ending brace 15 | # that balances with this opening brace 16 | # 17 | # 18 | # Case-independent sequence of non-whitespace, non-brace, non-commas 19 | # 20 | 21 | # 22 | # • Values (i.e. right hand sides of each assignment) can be either between curly braces or between 23 | # double quotes. The main difference is that you can write double quotes in the first case, and not 24 | # in the second case. 25 | # • For numerical values, curly braces and double quotes can be omitted. 26 | # 27 | # Text that is enclosed in braces is marked not to be touched by any formating instructions. For instance, when a style defines the title to become depicted using only lowercase, italic letters, the enclosed part will be left untouched. "An Introduction To {BibTeX}" would become ,,an introduction to the BibTeX'' when such a style is applied. Nested braces are ignored. 28 | 29 | @{% 30 | var isNumber = function(x) {return x.constructor === Number || (typeof x === "object"&&x.type === "number")}; 31 | var tok_id = {test: function(x) {return typeof x === "object" && x.type === "id"; }} 32 | var entry_type_bib = {test: function(x) {return typeof x === "object" && x.type === "@bib"; }} 33 | var entry_type_string = {test: function(x) {return typeof x === "object" && x.type === "@string"; }} 34 | var entry_type_preamble = {test: function(x) {return typeof x === "object" && x.type === "@preamble"; }} 35 | var entry_type_comment = {test: function(x) {return typeof x === "object" && x.type === "@comment"; }} 36 | var ws = {test: function(x) {return typeof x === "object" && x.type === "ws";}} 37 | var num = {test: isNumber} 38 | var pound = {literal: "#" } 39 | var eq = {literal: "=" } 40 | var esc = {literal: "\\" } 41 | var paren_l = {literal: "(" } 42 | var paren_r = {literal: ")" } 43 | var brace_l = {literal: "{" } 44 | var brace_r = {literal: "}" } 45 | var quote_dbl = {literal: '"' } 46 | var comma = {literal: "," } 47 | 48 | 49 | function addToObj(obj, keyval){ 50 | if(keyval.type !== "keyval") throw new Error("Expected a keyval object"); 51 | var key = keyval.key.toLowerCase(); 52 | if(obj.fields[key]) { 53 | console.log("WARNING: field "+key+ " was already defined on object "+obj._id+". Ignoring this value."); 54 | return; 55 | }else{ 56 | obj.fields[key]=keyval.value; 57 | return obj; 58 | } 59 | } 60 | 61 | function joinTokens(arr){ 62 | var strs = []; 63 | for(var i=0;i non_entry:? (entry non_entry:?):* {% 82 | function (data, location, reject) { 83 | var topLevelObjects = []; 84 | //console.log(JSON.stringify(data)); 85 | if(data[0]) 86 | topLevelObjects.push({type: "NON_ENTRY", data: data[0]}); 87 | 88 | for(var i=0;i < data[1].length;i++){ 89 | 90 | topLevelObjects.push({type: "ENTRY", data: data[1][i][0]}); 91 | 92 | if(data[1][i][1]) 93 | topLevelObjects.push({type: "NON_ENTRY", data: data[1][i][1]}); 94 | } 95 | return topLevelObjects; 96 | } 97 | %} 98 | _ -> %ws:* 99 | parenthesized[X] -> %paren_l $X %paren_r {% function (data, location, reject) { return data[1]; } %} 100 | braced[X] -> %brace_l $X %brace_r {% function (data, location, reject) { return data[1]; } %} 101 | parenthesizedPadded[X] -> %paren_l _ $X _ %paren_r {% function (data, location, reject) { return data[2]; } %} 102 | bracedPadded[X] -> %brace_l _ $X _ %brace_r {% function (data, location, reject) { return data[2]; } %} 103 | 104 | ##################### 105 | # ENTRY 106 | ##################### 107 | entry_decl -> (%entry_type_bib | 108 | %entry_type_string | 109 | %entry_type_preamble | 110 | %entry_type_comment) {% function (data, location, reject) { return data[0][0]; } %} 111 | 112 | entry -> (bib_entry | string_entry | preamble_entry | comment_entry) 113 | {% function (data, location, reject) { return data[0][0]; }%} 114 | 115 | # 116 | # See http://ftp.math.purdue.edu/mirrors/ctan.org/info/bibtex/tamethebeast/ttb_en.pdf: 117 | # 118 | # There is a special entry type named @comment. The main use of such an entry type is to comment a large part 119 | # of the bibliography easily, since anything outside an entry is already a comment, and commenting out one 120 | # entry may be achieved by just removing its initial @ 121 | # 122 | # ^ this suggests that opening and closing brackets within the comment MUST be closed, and the comment ends on the first ending bracket 123 | # that balances with the opening bracket 124 | # 125 | comment -> main {% 126 | function (data, location, reject) { 127 | return data; 128 | } 129 | %} 130 | comment_liberal -> (.):* {% 131 | function (data, location, reject) { 132 | var toeknz=[]; 133 | for(var tk=0; tk < data[0].length; tk++) 134 | toeknz.push(data[0][tk][0]); 135 | return toeknz; 136 | } %} 137 | 138 | entry_body_comment -> (parenthesized[comment] | braced[comment]) 139 | {% 140 | function (data, location, reject) { 141 | return data[0][0][0]; 142 | } 143 | %} 144 | 145 | entry_body_string -> (parenthesizedPadded[keyval] | bracedPadded[keyval]) {% function (data, location, reject) { return data[0][0][0]; } %} 146 | entry_body_bib -> (parenthesizedPadded[bib_content] | bracedPadded[bib_content]) {% function (data, location, reject) { 147 | var obj = data[0][0][0]; 148 | return obj; 149 | } %} 150 | bib_content -> key_string _ %comma _ (keyval _ %comma _):* keyval (_ %comma):? 151 | {% function (data, location, reject) { 152 | var obj = { 153 | _id: data[0], 154 | fields:[] 155 | }; 156 | var keyvals = data[4]; 157 | for(var kv=0;kv %entry_type_bib _ entry_body_bib {% function (data, location, reject) { 165 | var obj = { 166 | _id: data[2]._id, 167 | }; 168 | obj["@type"] = data[0].string; 169 | obj.fields = {}; 170 | 171 | var keyvals = data[2].fields; 172 | for(var kv=0;kv %entry_type_string _ entry_body_string {% function (data, location, reject) { return {type: "string", data: data[2]}; } %} 178 | preamble_entry -> %entry_type_preamble _ entry_body_comment {% function (data, location, reject) { return {type: "preamble", data: data[2]}; } %} 179 | comment_entry -> %entry_type_comment _ entry_body_comment {% function (data, location, reject) { return {type: "comment", data: data[2]}; } %} 180 | 181 | keyval -> key_string _ %eq _ value_string 182 | {% function (data, location, reject) {return {type: "keyval", key: data[0], value: data[4]};}%} 183 | 184 | braced_string -> %brace_l (non_brace|braced_string):* %brace_r {% function (data, location, reject) { 185 | var tkz = []; 186 | for(var i in data[1]) tkz.push(data[1][i][0]); 187 | return {type:"braced", data: tkz}; 188 | } 189 | 190 | %} 191 | quoted_string -> %quote_dbl (escaped_quote|non_quote_non_brace|braced_string):* %quote_dbl 192 | {% function (data, location, reject) { 193 | var tks = []; 194 | for(var i in data[1]) tks.push(data[1][i][0]); 195 | return {type:"quotedstring", data:tks}; 196 | } 197 | %} 198 | escaped_quote -> %esc %quote_dbl 199 | non_quote_non_brace -> (%tok_id | 200 | %entry_type_bib | 201 | %entry_type_string | 202 | %entry_type_preamble | 203 | %entry_type_comment | 204 | %ws | 205 | %num | 206 | %pound | 207 | %eq | 208 | %esc | 209 | %paren_l | 210 | %paren_r | 211 | %comma) 212 | 213 | # 214 | # Case-independent sequence of non-whitespace, non-brace, non-commas 215 | # 216 | key_string -> stringreftoken:+ {% function (data, location, reject) { return joinTokens(data[0]).toLowerCase(); } %} 217 | 218 | # 219 | # • Values (i.e. right hand sides of each assignment) can be either between curly braces or between 220 | # double quotes. The main difference is that you can write double quotes in the first case, and not 221 | # in the second case. 222 | # • For numerical values, curly braces and double quotes can be omitted. 223 | # 224 | value_string -> (quoted_string_or_ref (_ %pound _ quoted_string_or_ref):* | braced_string) 225 | {% function (data, location, reject) { 226 | //console.log("DATA",JSON.stringify(data)); 227 | var match = data[0]; 228 | if(match.length === 2){ 229 | // quoted string 230 | var tokenz = []; 231 | tokenz.push(match[0]); 232 | for(var i=0;i (quoted_string | string_ref | %num) {% function (data, location, reject) { 242 | //console.log(data); 243 | if (data[0][0].type === "quotedstring") return data[0][0]; 244 | else{return data[0][0];} 245 | } 246 | %} 247 | 248 | string_ref -> (stringreftoken_n_num stringreftoken:*) 249 | {% function (data, location, reject) { var str = data[0][0]+joinTokens(data[0][1]); return {stringref: str}; } %} 250 | 251 | # Non-white non-brace, non-comma 252 | stringreftoken -> (%esc | %paren_l | %paren_r | %tok_id | %num | %entry_type_bib | %entry_type_string | %entry_type_preamble | %entry_type_comment) 253 | {% 254 | function (data, location, reject) { 255 | if(typeof data[0][0] === "object") { 256 | if(!data[0][0].string) throw new Error("Expected "+data[0]+"to have a 'string' field"); 257 | return data[0][0].string; 258 | } else { 259 | if((!(typeof data[0][0] === "string"||typeof data[0][0] === "number"))) 260 | throw new Error("Expected "+data[0][0]+" to be a string");return data[0][0]; 261 | } 262 | } 263 | %} 264 | stringreftoken_n_num -> (%esc | %paren_l | %paren_r | %tok_id | %entry_type_bib | %entry_type_string | %entry_type_preamble | %entry_type_comment) 265 | {% 266 | function (data, location, reject) { 267 | if(typeof data[0][0] === "object") { 268 | if(!data[0][0].string) throw new Error("Expected "+data[0]+"to have a 'string' field"); 269 | return data[0][0].string; 270 | } else { 271 | if((!(typeof data[0][0] === "string"||typeof data[0][0] === "number"))) 272 | throw new Error("Expected "+data[0][0]+" to be a string");return data[0][0]; 273 | } 274 | } 275 | %} 276 | non_brace -> (%esc | %paren_l | %paren_r | %tok_id | %quote_dbl | %ws | %num | %comma | %entry_type_bib | %entry_type_string | %entry_type_preamble | %entry_type_comment | %pound | %eq) 277 | {% function (data, location, reject) { 278 | return data[0][0]; 279 | } 280 | %} 281 | non_bracket -> (%esc | %tok_id | %quote_dbl | %ws | %num | %comma | %entry_type_bib | %entry_type_string | %entry_type_preamble | %entry_type_comment | %pound | %eq) 282 | {% function (data, location, reject) { 283 | return data[0][0]; 284 | } 285 | %} 286 | 287 | ##################### 288 | # NON ENTRY 289 | ##################### 290 | non_entry -> (escaped_entry | escaped_escape | escaped_non_esc_outside_entry | non_esc_outside_entry):+ 291 | {% function (data, location, reject) { 292 | //console.log("non_entry",data); 293 | var tokens = []; 294 | for(var Ti = 0;Ti %esc %esc {% function (data, location, reject) { return '\\'; } %} 299 | escaped_entry -> %esc entry_decl 300 | {% function (data, location, reject) { return {type: "escapedEntry", data: data[1] }; } %} 301 | escaped_non_esc_outside_entry -> %esc non_esc_outside_entry 302 | {% function (data, location, reject) { return '\\' + data[1]; } %} 303 | non_esc_outside_entry -> (%tok_id | 304 | %ws | 305 | %num | 306 | %pound | 307 | %eq | 308 | %paren_l | 309 | %paren_r | 310 | %brace_l | 311 | %brace_r | 312 | %quote_dbl | 313 | %comma) 314 | {% function (data, location, reject) { 315 | //console.log("ooutside_entry",data[0][0]); 316 | return data[0][0]; 317 | } %} -------------------------------------------------------------------------------- /test/test.ts: -------------------------------------------------------------------------------- 1 | import "mocha"; 2 | 3 | import {expect} from "chai"; 4 | import {parseBibFile} from "../src/bibfile/BibFile"; 5 | import {isOuterQuotedString, OuterQuotedString, QuotedString} from "../src/bibfile/datatype/string/QuotedString"; 6 | import {isNumber, mustBeArray, mustBeDefined} from "../src/util"; 7 | import {BibEntry, EntryFields} from "../src/bibfile/bib-entry/BibEntry"; 8 | import {determineAuthorNames$, mustBeAuthors} from "../src/bibfile/bib-entry/bibliographic-entity/Authors"; 9 | import {BracedString} from "../src/bibfile/datatype/string/BracedString"; 10 | import Lexer from "../src/lexer/Lexer"; 11 | import {BibStringData} from "../src/bibfile/datatype/string/BibStringData"; 12 | import {splitOnComma, splitOnPattern, toStringBibStringData} from "../src/bibfile/datatype/string/bib-string-utils"; 13 | import {FieldValue} from "../src/bibfile/datatype/KeyVal"; 14 | import {parseAuthorName} from "../src/bibfile/bib-entry/bibliographic-entity/Author"; 15 | 16 | // TODO test crossref? 17 | 18 | 19 | describe("Author: von Last, First", () => { 20 | it("von Last, First", function () { 21 | const authorName = parseAuthorName(["Von De la ", "Last", ",", "firstName= ", "."]); 22 | expect(authorName.vons$[1].indexOf("De")).to.greaterThan(-1); 23 | expect(authorName.lastNames$[0].indexOf("Last")).to.greaterThan(-1); 24 | expect(authorName.jrs$).to.deep.equal([]); 25 | expect(authorName.firstNames$[0].indexOf("firstName=")).to.greaterThan(-1); 26 | }); 27 | 28 | // NOTE: This case raises an error message from BibTEX, complaining that a name ends with a comma. It is a common error to separate names with commas instead of “and” 29 | it("jean de la fontaine,", function () { 30 | const authorName = parseAuthorName(["jean de la fontaine,"]); 31 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq(""); 32 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("jean de la"); 33 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("fontaine"); 34 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 35 | }); 36 | 37 | it("de la fontaine, Jean", function () { 38 | const authorName = parseAuthorName(["de la fontaine, Jean"]); 39 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 40 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("de la"); 41 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("fontaine"); 42 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 43 | }); 44 | 45 | it("De La Fontaine, Jean", function () { 46 | const authorName = parseAuthorName(["De La Fontaine, Jean"]); 47 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 48 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq(""); 49 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("De La Fontaine"); 50 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 51 | }); 52 | 53 | it("De la Fontaine, Jean", function () { 54 | const authorName = parseAuthorName(["De la Fontaine, Jean"]); 55 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 56 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("De la"); 57 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("Fontaine"); 58 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 59 | }); 60 | 61 | it("de La Fontaine, Jean", function () { 62 | const authorName = parseAuthorName(["de La Fontaine, Jean"]); 63 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 64 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("de"); 65 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("La Fontaine"); 66 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 67 | }); 68 | 69 | }); 70 | describe("Author: von Last, Jr, First", () => { 71 | 72 | 73 | // NOTE: This case raises an error message from BibTEX, complaining that a name ends with a comma. It is a common error to separate names with commas instead of “and” 74 | it("jean de la fontaine,", function () { 75 | const authorName = parseAuthorName(["jean de la fontaine,,"]); 76 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq(""); 77 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("jean de la"); 78 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("fontaine"); 79 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 80 | }); 81 | it("de la fontaine, Jr., Jean", function () { 82 | const authorName = parseAuthorName(["de la fontaine, Jr., Jean"]); 83 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 84 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("de la"); 85 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("fontaine"); 86 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq("Jr."); 87 | }); 88 | 89 | it("De La Fontaine, Jr., Jean", function () { 90 | const authorName = parseAuthorName(["De La Fontaine, Jr., Jean"]); 91 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 92 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq(""); 93 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("De La Fontaine"); 94 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq("Jr."); 95 | }); 96 | 97 | it("De la Fontaine, Jr., Jean", function () { 98 | const authorName = parseAuthorName(["De la Fontaine, Jr., Jean"]); 99 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 100 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("De la"); 101 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("Fontaine"); 102 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq("Jr."); 103 | }); 104 | 105 | it("de La Fontaine, Jr., Jean", function () { 106 | const authorName = parseAuthorName(["de La Fontaine, Jr., Jean"]); 107 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 108 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("de"); 109 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("La Fontaine"); 110 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq("Jr."); 111 | }); 112 | it("von Last, Jr., First", function () { 113 | const authorName = parseAuthorName(["von ", "Last", ", Jr.", ",", "firstName, ", ".,,,etc,,"]); 114 | expect(authorName.vons$[0].indexOf("von")).to.greaterThan(-1); 115 | expect(authorName.lastNames$[0].indexOf("Last")).to.greaterThan(-1); 116 | expect(authorName.jrs$[0].indexOf("Jr.")).to.greaterThan(-1); 117 | expect(authorName.firstNames$[0].indexOf("firstName,")).to.greaterThan(-1); 118 | }); 119 | 120 | 121 | }); 122 | describe("Author: First von Last", () => { 123 | it("First von Last", function () { 124 | const authorName = parseAuthorName(["First von Last"]); 125 | expect(authorName.vons$[0].indexOf("von")).to.greaterThan(-1); 126 | expect(authorName.lastNames$[0].indexOf("Last")).to.greaterThan(-1); 127 | expect(authorName.jrs$.length).to.equal(0); 128 | expect(authorName.firstNames$[0].indexOf("First")).to.greaterThan(-1); 129 | }); 130 | 131 | it("jean de la fontaine", function () { 132 | const authorName = parseAuthorName(["jean de la fontaine"]); 133 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq(""); 134 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("jean de la"); 135 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("fontaine"); 136 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 137 | }); 138 | 139 | it("Jean de la fontaine", function () { 140 | const authorName = parseAuthorName(["Jean de la fontaine"]); 141 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 142 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("de la"); 143 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("fontaine"); 144 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 145 | }); 146 | 147 | it("Jean {de} la fontaine", function () { 148 | const authorName = parseAuthorName(["Jean ", new BracedString(0, ["de"]), " la fontaine"]); 149 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean de"); 150 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("la"); 151 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("fontaine"); 152 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 153 | }); 154 | 155 | it("jean {de} {la} fontaine", function () { 156 | const authorName = parseAuthorName(["jean ", new BracedString(0, ["de"]), " ", 157 | new BracedString(0, ["la"]), " fontaine"]); 158 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq(""); 159 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("jean"); 160 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("de la fontaine"); 161 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 162 | }); 163 | 164 | it("Jean {de} {la} fontaine", function () { 165 | const authorName = parseAuthorName(["Jean ", new BracedString(0, ["de"]), " ", 166 | new BracedString(0, ["la"]), " fontaine"]); 167 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean de la"); 168 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq(""); 169 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("fontaine"); 170 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 171 | }); 172 | 173 | it("Jean De La Fontaine", function () { 174 | const authorName = parseAuthorName(["Jean De La Fontaine"]); 175 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean De La"); 176 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq(""); 177 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("Fontaine"); 178 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 179 | }); 180 | 181 | it("jean De la Fontaine", function () { 182 | const authorName = parseAuthorName(["jean De la Fontaine"]); 183 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq(""); 184 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("jean De la"); 185 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("Fontaine"); 186 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 187 | }); 188 | 189 | it("Jean de La Fontaine", function () { 190 | const authorName = parseAuthorName(["Jean de La Fontaine"]); 191 | expect(authorName.firstNames$.map(toStringBibStringData).join(" ")).to.eq("Jean"); 192 | expect(authorName.vons$.map(toStringBibStringData).join(" ")).to.eq("de"); 193 | expect(authorName.lastNames$.map(toStringBibStringData).join(" ")).to.eq("La Fontaine"); 194 | expect(authorName.jrs$.map(toStringBibStringData).join(" ")).to.eq(""); 195 | }); 196 | 197 | }); 198 | describe("utils", () => { 199 | it("split on pattern", function () { 200 | expect( 201 | splitOnPattern( 202 | ["xx", "xx", "endfirst xxx startsecond", " xxx xxx xxx3", "xxx xxx", "midxxxEOF"], 203 | /\s*xxx\s*/g, 204 | 3 205 | ) 206 | ).to.deep.equal( 207 | [ 208 | [ 209 | "xx", 210 | "xx", 211 | "endfirst" 212 | ], 213 | [ 214 | "startsecond", 215 | "" 216 | ], 217 | [ 218 | "" 219 | ], 220 | [ 221 | "xxx3", 222 | "xxx xxx", 223 | "midxxxEOF" 224 | ], 225 | ] 226 | ); 227 | }); 228 | it("split on pattern", function () { 229 | expect( 230 | splitOnPattern(["xx", "xx", "xx"], /\s*xxx\s*/g, 3) 231 | ).to.deep.equal( 232 | [["xx", "xx", "xx"]] 233 | ); 234 | }); 235 | 236 | it("split on ,", function () { 237 | expect( 238 | splitOnComma(["von ", "Last", ", ", "name, ", "Jr.,,,etc,,"], 3) 239 | ).to.deep.equal( 240 | [ 241 | [ 242 | "von ", 243 | "Last", 244 | "" 245 | ], 246 | [ 247 | "name" 248 | ], 249 | [ 250 | "Jr." 251 | ], 252 | [ 253 | ",,etc,," 254 | ] 255 | ] 256 | ); 257 | }); 258 | }); 259 | 260 | 261 | describe("lexer", () => { 262 | it("should lex", function () { 263 | const lexer1 = new Lexer("\n\t\nthisisallacommentof{}commentswitheverythingexceptan\", whichweca123nescapewitha0123 "); 264 | expect( 265 | lexer1.readTokens() 266 | ).to.deep.equal([ 267 | {"type": "ws", "string": "\n\t\n"}, 268 | {"type": "id", "string": "thisisallacommentof"}, 269 | "{", 270 | "}", 271 | {"type": "id", "string": "commentswitheverythingexceptan"}, 272 | "\"", 273 | ",", 274 | {"type": "ws", "string": " "}, 275 | {"type": "id", "string": "whichweca"}, 276 | 123, 277 | {"type": "id", "string": "nescapewitha"}, 278 | {"type": "number", "string": "0123"}, 279 | {"type": "ws", "string": " "} 280 | ] 281 | ); 282 | }); 283 | }); 284 | 285 | 286 | describe("field values", () => { 287 | it("should handle strings of all shapes", function () { 288 | const bib = parseBibFile(` 289 | @string{ abc = "def" } 290 | @b00k{comp4nion, 291 | quoted = "Simple quoted string", 292 | quotedComplex = "Complex " # quoted #" string", 293 | braced = {I am a so-called "braced string09 11"}, 294 | bracedComplex = {I {{\\am}} a {so-called} {\\"b}raced string{\\"}.}, 295 | number = 911 , 296 | naughtyNumber = a911a, 297 | a911a = {a911a}, 298 | naughtyString = abc 299 | } 300 | @string{ quoted = " referenced" } 301 | @string{ a911a = {b911c} } 302 | `); 303 | 304 | expect(bib.entries$.comp4nion.getField("quoted")).to.deep.equal(new OuterQuotedString([ 305 | new QuotedString(0, [ 306 | "Simple", " ", "quoted", " ", "string" 307 | ]) 308 | ])); 309 | 310 | // TODO 311 | // expect(bib.entries$.comp4nion.getField("quotedCOMPLEX")).to.deep.equal( 312 | // { 313 | // "type": "quotedstringwrapper", 314 | // "braceDepth": 0, 315 | // "data": [{"type": "quotedstring", "braceDepth": 0, "data": ["Complex", " "]}, { 316 | // "braceDepth": 0, 317 | // "stringref": "quoted" 318 | // }, {"type": "quotedstring", "braceDepth": 0, "data": [" ", "string"]}] 319 | // } 320 | // ); 321 | // expect(bib.entries$.comp4nion.getField("braced")).to.deep.equal( 322 | // { 323 | // "type": "bracedstringwrapper", 324 | // "braceDepth": 0, 325 | // "data": [ 326 | // "I", " ", "am", " ", "a", " ", "so-called", " ", 327 | // "\"", "braced", " ", "string", "09", " ", 11, "\"" 328 | // ] 329 | // } 330 | // ); 331 | const bracedComplex: any = bib.entries$.comp4nion.getField("bracedCOMPLEX"); 332 | expect(bracedComplex.type).to.equal("bracedstringwrapper"); 333 | const bracedComplexData = bracedComplex.data; 334 | const bracedComplexDatum0: any = bracedComplexData[0]; 335 | const bracedComplexDatum2: any = bracedComplexData[2]; 336 | expect(bracedComplexDatum0).to.equal("I"); 337 | const bracedComplexDatum2Data: any = bracedComplexDatum2.data; 338 | const bracedComplexDatum2Datum0: any = bracedComplexDatum2Data[0]; 339 | expect(bracedComplexDatum2Datum0.braceDepth).to.equal(1); 340 | 341 | const numberField = bib.entries$.comp4nion.getField("number"); 342 | expect(numberField).to.equal(911); 343 | 344 | const naughtyNumber: any = mustBeDefined(bib.entries$.comp4nion.getField("naughtyNumber")); 345 | const t: any = naughtyNumber["type"]; 346 | const nnData: any[] = mustBeArray(naughtyNumber["data"]); 347 | 348 | expect(t).to.equal("quotedstringwrapper"); 349 | }); 350 | 351 | it("should tease apart author names", function () { 352 | function qs(data: BibStringData): QuotedString { 353 | return new QuotedString(0, data); 354 | } 355 | 356 | function bs(data: BibStringData): QuotedString { 357 | return new BracedString(0, data); 358 | } 359 | 360 | expect(determineAuthorNames$(new OuterQuotedString([1]))).to.deep.equal([["1"]]); 361 | const auth2 = new OuterQuotedString( 362 | [1, qs([" a"]), "n", "d", qs([" "]), bs(["\\", "two"])] 363 | ); 364 | const authNames2 = determineAuthorNames$(auth2); 365 | expect(authNames2.length).to.deep.equal(2); 366 | expect(authNames2[0]).to.deep.equal(["1"]); 367 | expect(authNames2[1][0]["type"]).to.deep.equal("bracedstring"/*{ 368 | "braceDepth": 0, 369 | "data": [ 370 | "\\", 371 | "two" 372 | ], 373 | "isSpecialCharacter": true, 374 | "type": "bracedstring" 375 | }*/); 376 | }); 377 | 378 | it("should determine author names", function () { 379 | const bib = parseBibFile(` @ STRiNG { mittelbach = "Mittelbach, Franck" } 380 | some comment 381 | @b00k 382 | { comp4nion , 383 | auTHor = "Goossens, jr, Mich{\\\`e}l Frederik and " # mittelbach # " and "#"{ { A}}le"#"xander de La Samarin ",\n 384 | }`); 385 | 386 | const book: BibEntry = mustBeDefined(bib.getEntry("COMP4NION")); 387 | 388 | // console.log( 389 | mustBeDefined(book.getAuthors()).authors$; 390 | // ); 391 | }); 392 | 393 | it("should flatten quoted strings", function () { 394 | const bib = parseBibFile(` 395 | @string { quoted = "QUO" # "TED" } 396 | @string { braced = {"quoted"} } 397 | @a{b, 398 | bracedComplex = {I {{\\am}} {\\"a} "so-"called"" braced string{\\"}.}, 399 | quotedComplex = "and I {"} am a {"} " # quoted # " or "#braced#"string ", 400 | number = 911, 401 | }`); 402 | // stringref = abc 403 | const a: BibEntry = bib.entries$.b; 404 | const fields$: EntryFields = a.fields; 405 | const bracedcomplex: FieldValue = fields$.bracedcomplex; 406 | if (isNumber(bracedcomplex)) throw Error(); 407 | // console.log(flattenQuotedStrings(bracedcomplex.data, true)); 408 | 409 | const quotedComplex: FieldValue = fields$.quotedcomplex; 410 | if (isNumber(quotedComplex)) throw Error(); 411 | // console.log(flattenQuotedStrings(quotedComplex.data, true)); 412 | 413 | const nineEleven: FieldValue = fields$.number; 414 | expect(nineEleven).to.equal(911); 415 | }); 416 | /* todo implement 417 | it("should process titles correctly", function () { 418 | const bib = parseBibFile(` 419 | This won’t work, since turning it to lower case will produce 420 | The \latex companion, and LATEX won't accept this... 421 | @article{lowercased, title = "The \LaTeX Companion"} 422 | 423 | This ensures that switching to lower case will be 424 | correct. However, applying purify$ gives The 425 | Companion. Thus sorting could be wrong; 426 | @article{wrongsorting1, title = "The {\csname LaTeX\endcsname} {C}ompanion"} 427 | 428 | In this case, { \LaTeX} is not a special character, 429 | but a set of letters at depth 1. It won’t be modified by change.case$. However, purify$ will 430 | leave both spaces, and produce The LaTeX Companion, which could result in wrong sorting; 431 | @article{wrongsorting2, title = "The { \LaTeX} {C}ompanion"} 432 | 433 | 434 | @article{works1, title = "The{ \LaTeX} {C}ompanion"} 435 | @article{works2, title = "The {{\LaTeX}} {C}ompanion"} 436 | 437 | For encoding an accent in a title, say É (in upper case) as in the French word École, we’ll write 438 | {\’{E}}cole, {\’E}cole or {{\’E}}cole, depending on whether we want it to be turned to lower 439 | case (the first two solutions) or not (the last one). purify$ will give the same result in the three 440 | cases. However, it should be noticed that the third one is not a special character. If you ask BibTEX 441 | to extract the first character of each string using text.prefix$, you’ll get {\’{E}} in the first case, 442 | {\’E} in the second case and {{\}} in the third case. 443 | 444 | @article{ecoleLowercased1, title = "{\'{E}}cole"} 445 | @article{ecoleLowercased2, title = "{\'E}cole"} 446 | @article{ecoleUppercased, title = "{{\'E}}cole"} 447 | `);*/ 448 | /* todo implement 449 | it("should process authors correctly", function () { 450 | const bib = parseBibFile(` 451 | The first point to notice is that two authors are separated with the keyword and. The format of the 452 | names is the second important point: The last name first, then the first name, with a separating 453 | comma. In fact, BibTEX understands other formats 454 | 455 | @article{authors, author = "Goossens, Michel and Mittelbach, Franck and Samarin, Alexander"} 456 | 457 | // TODO additional cases in http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf 458 | `);*/ 459 | // TODO crossref ; additional cases in http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf 460 | 461 | // }); 462 | }); 463 | 464 | 465 | describe("parser", () => { 466 | it("should parse comments", function () { 467 | const bib = parseBibFile("\n\t\nthisisallacommentof{}commentswitheverythingexceptan\", whichweca123nescapewitha0123 "); 468 | // console.log(JSON.stringify(bib)); 469 | expect(bib.entries_raw.length).to.equal(0); 470 | expect(bib.comments.length).to.equal(1); 471 | expect(bib.content.length).to.equal(1); 472 | const firstComment = bib.comments[0].data; 473 | expect(firstComment[0]).to.equal("\n\t\n"); 474 | expect(firstComment[9]).to.equal("123"); 475 | expect(firstComment[11]).to.equal("0123"); 476 | }); 477 | 478 | it("should parse empty", function () { 479 | expect(parseBibFile("").content.length).to.equal(0); 480 | }); 481 | 482 | it("should throw for cyclic string entries", function () { 483 | let thrown = false; 484 | try { 485 | parseBibFile( 486 | `@string{c = "a"#b} 487 | @string{b = "b"#a}` 488 | ); 489 | } catch (e) { 490 | thrown = true; 491 | } 492 | expect(thrown).to.equal(true); 493 | }); 494 | it("should parse string entries", function () { 495 | const bib = parseBibFile(`leading comment 496 | @ STRiNG { mittelbach = "Mittelbach, Franck" } 497 | @string{acab= a #_# c #_#"are" #_# b} 498 | @string{c = "co"#cc} 499 | @string{a = "a"#l#l} 500 | @string{_ = {{{{{ }}}}}} 501 | @string{l = {l}} 502 | @string{cc ={mp{\\"u}ters}} 503 | @string{b = "beautifu"#l} ` 504 | ); 505 | expect(bib.content.length).to.equal(17); 506 | 507 | // expect(bib.entries[0]["data"].key).to.equal("mittelbach"); 508 | 509 | const acab = bib.strings_raw.acab; 510 | if (isOuterQuotedString(acab)) { 511 | const thirdDatum: any = acab.data[3]; 512 | expect(thirdDatum.stringref).to.equal("_"); 513 | const fourthDatum: any = acab.data[4]; 514 | expect(fourthDatum["type"]).to.equal("quotedstring"); 515 | } else 516 | expect(isOuterQuotedString(acab)).to.throw(); 517 | 518 | const acab$ = bib.strings$.acab; 519 | if (isOuterQuotedString(acab$)) { 520 | expect(acab$.stringify()).to.equal("all comp\\\"uters are beautiful"); 521 | 522 | const thirdDatum: any = acab$.data[3]; 523 | expect(thirdDatum.type).to.equal("bracedstringwrapper"); 524 | const fourthDatum: any = acab$.data[4]; 525 | expect(fourthDatum["type"]).to.equal("quotedstring"); 526 | } else expect(isOuterQuotedString(acab$)).to.throw(); 527 | }); 528 | 529 | it("should parse bib entries", function () { 530 | const bib = parseBibFile(` @ STRiNG { mittelbach = "Mittelbach, Franck" } 531 | some comment 532 | @b00k 533 | { comp4nion , 534 | auTHor = "Goossens, jr, Mich{\\\`e}l Frederik and " # mittelbach # " and "#"{ { A}}le"#"xander de La Samarin ",\n 535 | titLe = "The {{\\LaTeX}} {C}{\\"o}mp{\\"a}nion", 536 | publisher = "Addison-Wesley", 537 | yeaR=1993 , 538 | Title = {{Bib}\\TeX}, 539 | title = {{Bib}\\TeX}, 540 | Title2 = "{Bib}\\TeX", 541 | Title3 = "{Bib}" # "\\TeX" 542 | }`); 543 | 544 | expect(bib.content.length).to.equal(4); 545 | 546 | // console.log(JSON.stringify(bib.content)); 547 | 548 | const entry: BibEntry = mustBeDefined(bib.getEntry("Comp4nion")); 549 | 550 | const authors = mustBeAuthors(mustBeDefined(entry.getField("author"))); 551 | expect(authors).to.not.be.null; 552 | expect(authors.authors$.length).to.eq(3); 553 | 554 | // console.log(authors.authors$); 555 | }); 556 | 557 | it("should parse preamble entries", function () { 558 | const bib = parseBibFile(`@preamble{ "\\@ifundefined{url}{\\def\\url#1{\\texttt{#1}}}{}" } 559 | @preamble{ "\\makeatletter" } 560 | @preamble{ "\\makeatother" } 561 | `); 562 | expect(bib.preamble$, ` "\\@ifundefined{url}{\\def\\url#1{\\texttt{#1}}}{}" 563 | "\\makeatletter" 564 | "\\makeatother" `); 565 | }); 566 | }); 567 | -------------------------------------------------------------------------------- /src/parser/ts-parser.ts: -------------------------------------------------------------------------------- 1 | function id(x) { 2 | return x[0]; 3 | } 4 | 5 | const isNumber = function (x) { 6 | return x.constructor === Number || (typeof x === "object" && x.type === "number"); 7 | }; 8 | const tok_id = { 9 | test: function (x) { 10 | return typeof x === "object" && x.type === "id"; 11 | } 12 | }; 13 | const entry_type_bib = { 14 | test: function (x) { 15 | return typeof x === "object" && x.type === "@bib"; 16 | } 17 | }; 18 | const entry_type_string = { 19 | test: function (x) { 20 | return typeof x === "object" && x.type === "@string"; 21 | } 22 | }; 23 | const entry_type_preamble = { 24 | test: function (x) { 25 | return typeof x === "object" && x.type === "@preamble"; 26 | } 27 | }; 28 | const entry_type_comment = { 29 | test: function (x) { 30 | return typeof x === "object" && x.type === "@comment"; 31 | } 32 | }; 33 | const ws: any = { 34 | test: function (x) { 35 | return typeof x === "object" && x.type === "ws"; 36 | } 37 | }; 38 | const num: any = {test: isNumber}; 39 | const pound: any = {literal: "#"}; 40 | const eq: any = {literal: "="}; 41 | const esc: any = {literal: "\\"}; 42 | const paren_l = {literal: "("}; 43 | const paren_r = {literal: ")"}; 44 | const brace_l = {literal: "{"}; 45 | const brace_r = {literal: "}"}; 46 | const quote_dbl = {literal: "\""}; 47 | const comma: any = {literal: ","}; 48 | 49 | 50 | function addToObj(obj, keyval) { 51 | if (keyval.type !== "keyval") throw new Error("Expected a keyval object"); 52 | const key = keyval.key.toLowerCase(); 53 | if (obj.fields[key]) { 54 | // TODO error? 55 | // console.log("WARNING: field '" + key + "' was already defined on " + obj["@type"] + " object with id '" + obj._id + "'. Ignoring this value."); 56 | return; 57 | } else { 58 | obj.fields[key] = keyval.value; 59 | return obj; 60 | } 61 | } 62 | 63 | function joinTokens(arr) { 64 | const strs: any = []; 65 | for (let i = 0; i < arr.length; i++) { 66 | if (typeof arr[i] === "object") { 67 | if (!arr[i].string) throw new Error("Expected token to have a string field called 'string' in object " + JSON.stringify(arr[i])); 68 | strs.push(arr[i].string); 69 | } else if (typeof arr[i] === "string" || typeof arr[i] === "number") { 70 | strs.push(arr[i]); 71 | } else throw new Error("Could not handle token " + JSON.stringify(arr[i]) + " in array " + JSON.stringify(arr)); 72 | } 73 | return strs.join(""); 74 | } 75 | 76 | export const grammar: any = { 77 | Lexer: undefined, 78 | ParserRules: [ 79 | {"name": "main$ebnf$1", "symbols": ["non_entry"], "postprocess": id}, 80 | { 81 | "name": "main$ebnf$1", "symbols": [], "postprocess": function () { 82 | return undefined; 83 | } 84 | }, 85 | {"name": "main$ebnf$2", "symbols": []}, 86 | {"name": "main$ebnf$2$subexpression$1$ebnf$1", "symbols": ["non_entry"], "postprocess": id}, 87 | { 88 | "name": "main$ebnf$2$subexpression$1$ebnf$1", "symbols": [], "postprocess": function () { 89 | return undefined; 90 | } 91 | }, 92 | {"name": "main$ebnf$2$subexpression$1", "symbols": ["entry", "main$ebnf$2$subexpression$1$ebnf$1"]}, 93 | { 94 | "name": "main$ebnf$2", 95 | "symbols": ["main$ebnf$2", "main$ebnf$2$subexpression$1"], 96 | "postprocess": function arrpush(d) { 97 | return d[0].concat([d[1]]); 98 | } 99 | }, 100 | { 101 | "name": "main", 102 | "symbols": ["main$ebnf$1", "main$ebnf$2"], 103 | "postprocess": function (data) { 104 | const topLevelObjects: any = []; 105 | // console.log(JSON.stringify(data)); 106 | if (data[0]) 107 | topLevelObjects.push({type: "NON_ENTRY", data: data[0]}); 108 | 109 | for (let i = 0; i < data[1].length; i++) { 110 | 111 | topLevelObjects.push({type: "ENTRY", data: data[1][i][0]}); 112 | 113 | if (data[1][i][1]) 114 | topLevelObjects.push({type: "NON_ENTRY", data: data[1][i][1]}); 115 | } 116 | return topLevelObjects; 117 | } 118 | }, 119 | {"name": "_$ebnf$1", "symbols": []}, 120 | { 121 | "name": "_$ebnf$1", "symbols": ["_$ebnf$1", ws], "postprocess": function arrpush(d) { 122 | return d[0].concat([d[1]]); 123 | } 124 | }, 125 | {"name": "_", "symbols": ["_$ebnf$1"]}, 126 | {"name": "entry_decl$subexpression$1", "symbols": [entry_type_bib]}, 127 | {"name": "entry_decl$subexpression$1", "symbols": [entry_type_string]}, 128 | {"name": "entry_decl$subexpression$1", "symbols": [entry_type_preamble]}, 129 | {"name": "entry_decl$subexpression$1", "symbols": [entry_type_comment]}, 130 | { 131 | "name": "entry_decl", 132 | "symbols": ["entry_decl$subexpression$1"], 133 | "postprocess": function (data) { 134 | return data[0][0]; 135 | } 136 | }, 137 | {"name": "entry$subexpression$1", "symbols": ["bib_entry"]}, 138 | {"name": "entry$subexpression$1", "symbols": ["string_entry"]}, 139 | {"name": "entry$subexpression$1", "symbols": ["preamble_entry"]}, 140 | {"name": "entry$subexpression$1", "symbols": ["comment_entry"]}, 141 | { 142 | "name": "entry", "symbols": ["entry$subexpression$1"], "postprocess": function (data) { 143 | return data[0][0]; 144 | } 145 | }, 146 | { 147 | "name": "comment", "symbols": ["main"], "postprocess": function (data) { 148 | return data[0]; 149 | } 150 | }, 151 | {"name": "comment_liberal$ebnf$1", "symbols": []}, 152 | {"name": "comment_liberal$ebnf$1$subexpression$1", "symbols": [/./]}, 153 | { 154 | "name": "comment_liberal$ebnf$1", 155 | "symbols": ["comment_liberal$ebnf$1", "comment_liberal$ebnf$1$subexpression$1"], 156 | "postprocess": function arrpush(d) { 157 | return d[0].concat([d[1]]); 158 | } 159 | }, 160 | { 161 | "name": "comment_liberal", 162 | "symbols": ["comment_liberal$ebnf$1"], 163 | "postprocess": function (data) { 164 | const toeknz: any = []; 165 | for (let tk = 0; tk < data[0].length; tk++) 166 | toeknz.push(data[0][tk][0]); 167 | return toeknz; 168 | } 169 | }, 170 | {"name": "entry_body_comment$subexpression$1$macrocall$2", "symbols": ["comment"]}, 171 | { 172 | "name": "entry_body_comment$subexpression$1$macrocall$1", 173 | "symbols": [paren_l, "entry_body_comment$subexpression$1$macrocall$2", paren_r], 174 | "postprocess": function (data) { 175 | return data[1]; 176 | } 177 | }, 178 | {"name": "entry_body_comment$subexpression$1", "symbols": ["entry_body_comment$subexpression$1$macrocall$1"]}, 179 | {"name": "entry_body_comment$subexpression$1$macrocall$4", "symbols": ["comment"]}, 180 | { 181 | "name": "entry_body_comment$subexpression$1$macrocall$3", 182 | "symbols": [brace_l, "entry_body_comment$subexpression$1$macrocall$4", brace_r], 183 | "postprocess": function (data) { 184 | return data[1]; 185 | } 186 | }, 187 | {"name": "entry_body_comment$subexpression$1", "symbols": ["entry_body_comment$subexpression$1$macrocall$3"]}, 188 | { 189 | "name": "entry_body_comment", 190 | "symbols": ["entry_body_comment$subexpression$1"], 191 | "postprocess": function (data) { 192 | return data[0][0][0]; 193 | } 194 | }, 195 | {"name": "entry_body_string$subexpression$1$macrocall$2", "symbols": ["keyval"]}, 196 | { 197 | "name": "entry_body_string$subexpression$1$macrocall$1", 198 | "symbols": [paren_l, "_", "entry_body_string$subexpression$1$macrocall$2", "_", paren_r], 199 | "postprocess": function (data) { 200 | return data[2]; 201 | } 202 | }, 203 | {"name": "entry_body_string$subexpression$1", "symbols": ["entry_body_string$subexpression$1$macrocall$1"]}, 204 | {"name": "entry_body_string$subexpression$1$macrocall$4", "symbols": ["keyval"]}, 205 | { 206 | "name": "entry_body_string$subexpression$1$macrocall$3", 207 | "symbols": [brace_l, "_", "entry_body_string$subexpression$1$macrocall$4", "_", brace_r], 208 | "postprocess": function (data) { 209 | return data[2]; 210 | } 211 | }, 212 | {"name": "entry_body_string$subexpression$1", "symbols": ["entry_body_string$subexpression$1$macrocall$3"]}, 213 | { 214 | "name": "entry_body_string", 215 | "symbols": ["entry_body_string$subexpression$1"], 216 | "postprocess": function (data) { 217 | return data[0][0][0]; 218 | } 219 | }, 220 | {"name": "entry_body_bib$subexpression$1$macrocall$2", "symbols": ["bib_content"]}, 221 | { 222 | "name": "entry_body_bib$subexpression$1$macrocall$1", 223 | "symbols": [paren_l, "_", "entry_body_bib$subexpression$1$macrocall$2", "_", paren_r], 224 | "postprocess": function (data) { 225 | return data[2]; 226 | } 227 | }, 228 | {"name": "entry_body_bib$subexpression$1", "symbols": ["entry_body_bib$subexpression$1$macrocall$1"]}, 229 | {"name": "entry_body_bib$subexpression$1$macrocall$4", "symbols": ["bib_content"]}, 230 | { 231 | "name": "entry_body_bib$subexpression$1$macrocall$3", 232 | "symbols": [brace_l, "_", "entry_body_bib$subexpression$1$macrocall$4", "_", brace_r], 233 | "postprocess": function (data) { 234 | return data[2]; 235 | } 236 | }, 237 | {"name": "entry_body_bib$subexpression$1", "symbols": ["entry_body_bib$subexpression$1$macrocall$3"]}, 238 | { 239 | "name": "entry_body_bib", 240 | "symbols": ["entry_body_bib$subexpression$1"], 241 | "postprocess": function (data) { 242 | return data[0][0][0]; 243 | } 244 | }, 245 | {"name": "bib_content$ebnf$1", "symbols": []}, 246 | {"name": "bib_content$ebnf$1$subexpression$1", "symbols": ["keyval", "_", comma, "_"]}, 247 | { 248 | "name": "bib_content$ebnf$1", 249 | "symbols": ["bib_content$ebnf$1", "bib_content$ebnf$1$subexpression$1"], 250 | "postprocess": function arrpush(d) { 251 | return d[0].concat([d[1]]); 252 | } 253 | }, 254 | {"name": "bib_content$ebnf$2$subexpression$1", "symbols": ["_", comma]}, 255 | {"name": "bib_content$ebnf$2", "symbols": ["bib_content$ebnf$2$subexpression$1"], "postprocess": id}, 256 | { 257 | "name": "bib_content$ebnf$2", "symbols": [], "postprocess": function () { 258 | return undefined; 259 | } 260 | }, 261 | { 262 | "name": "bib_content", 263 | "symbols": ["key_string", "_", comma, "_", "bib_content$ebnf$1", "keyval", "bib_content$ebnf$2"], 264 | "postprocess": function (data) { 265 | const obj: any = { 266 | _id: data[0], 267 | fields: [] 268 | }; 269 | const keyvals = data[4]; 270 | for (let kv = 0; kv < keyvals.length; kv++) { 271 | obj.fields.push(keyvals[kv][0]); 272 | } 273 | obj.fields.push(data[5]); 274 | return obj; 275 | } 276 | }, 277 | { 278 | "name": "bib_entry", 279 | "symbols": [entry_type_bib, "_", "entry_body_bib"], 280 | "postprocess": function (data) { 281 | const obj: any = { 282 | _id: data[2]._id 283 | }; 284 | obj["@type"] = data[0].string; 285 | obj.fields = {}; 286 | 287 | const keyvals = data[2].fields; 288 | for (let kv = 0; kv < keyvals.length; kv++) { 289 | addToObj(obj, keyvals[kv]); 290 | } 291 | return obj; 292 | } 293 | }, 294 | { 295 | "name": "string_entry", 296 | "symbols": [entry_type_string, "_", "entry_body_string"], 297 | "postprocess": function (data) { 298 | return {type: "string", data: data[2]}; 299 | } 300 | }, 301 | { 302 | "name": "preamble_entry", 303 | "symbols": [entry_type_preamble, "_", "entry_body_comment"], 304 | "postprocess": function (data) { 305 | return {type: "preamble", data: data[2]}; 306 | } 307 | }, 308 | { 309 | "name": "comment_entry", 310 | "symbols": [entry_type_comment, "_", "entry_body_comment"], 311 | "postprocess": function (data) { 312 | return {type: "comment", data: data[2]}; 313 | } 314 | }, 315 | { 316 | "name": "keyval", 317 | "symbols": ["key_string", "_", eq, "_", "value_string"], 318 | "postprocess": function (data) { 319 | return {type: "keyval", key: data[0], value: data[4]}; 320 | } 321 | }, 322 | {"name": "braced_string$ebnf$1", "symbols": []}, 323 | {"name": "braced_string$ebnf$1$subexpression$1", "symbols": ["non_brace"]}, 324 | {"name": "braced_string$ebnf$1$subexpression$1", "symbols": ["braced_string"]}, 325 | { 326 | "name": "braced_string$ebnf$1", 327 | "symbols": ["braced_string$ebnf$1", "braced_string$ebnf$1$subexpression$1"], 328 | "postprocess": function arrpush(d) { 329 | return d[0].concat([d[1]]); 330 | } 331 | }, 332 | { 333 | "name": "braced_string", 334 | "symbols": [brace_l, "braced_string$ebnf$1", brace_r], 335 | "postprocess": function (data) { 336 | const tkz: any = []; 337 | for (const i in data[1]) tkz.push(data[1][i][0]); 338 | return {type: "braced", data: tkz}; 339 | } 340 | 341 | }, 342 | {"name": "quoted_string$ebnf$1", "symbols": []}, 343 | {"name": "quoted_string$ebnf$1$subexpression$1", "symbols": ["escaped_quote"]}, 344 | {"name": "quoted_string$ebnf$1$subexpression$1", "symbols": ["non_quote_non_brace"]}, 345 | {"name": "quoted_string$ebnf$1$subexpression$1", "symbols": ["braced_string"]}, 346 | { 347 | "name": "quoted_string$ebnf$1", 348 | "symbols": ["quoted_string$ebnf$1", "quoted_string$ebnf$1$subexpression$1"], 349 | "postprocess": function arrpush(d) { 350 | return d[0].concat([d[1]]); 351 | } 352 | }, 353 | { 354 | "name": "quoted_string", 355 | "symbols": [quote_dbl, "quoted_string$ebnf$1", quote_dbl], 356 | "postprocess": function (data) { 357 | const tks: any = []; 358 | for (const i in data[1]) tks.push(data[1][i][0]); 359 | return {type: "quotedstring", data: tks}; 360 | } 361 | }, 362 | {"name": "escaped_quote", "symbols": [esc, quote_dbl]}, 363 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [tok_id]}, 364 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [entry_type_bib]}, 365 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [entry_type_string]}, 366 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [entry_type_preamble]}, 367 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [entry_type_comment]}, 368 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [ws]}, 369 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [num]}, 370 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [pound]}, 371 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [eq]}, 372 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [esc]}, 373 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [paren_l]}, 374 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [paren_r]}, 375 | {"name": "non_quote_non_brace$subexpression$1", "symbols": [comma]}, 376 | {"name": "non_quote_non_brace", "symbols": ["non_quote_non_brace$subexpression$1"]}, 377 | {"name": "key_string$ebnf$1", "symbols": ["stringreftoken"]}, 378 | { 379 | "name": "key_string$ebnf$1", 380 | "symbols": ["key_string$ebnf$1", "stringreftoken"], 381 | "postprocess": function arrpush(d) { 382 | return d[0].concat([d[1]]); 383 | } 384 | }, 385 | { 386 | "name": "key_string", "symbols": ["key_string$ebnf$1"], "postprocess": function (data) { 387 | return joinTokens(data[0]).toLowerCase(); 388 | } 389 | }, 390 | {"name": "value_string$subexpression$1$ebnf$1", "symbols": []}, 391 | { 392 | "name": "value_string$subexpression$1$ebnf$1$subexpression$1", 393 | "symbols": ["_", pound, "_", "quoted_string_or_ref"] 394 | }, 395 | { 396 | "name": "value_string$subexpression$1$ebnf$1", 397 | "symbols": ["value_string$subexpression$1$ebnf$1", "value_string$subexpression$1$ebnf$1$subexpression$1"], 398 | "postprocess": function arrpush(d) { 399 | return d[0].concat([d[1]]); 400 | } 401 | }, 402 | { 403 | "name": "value_string$subexpression$1", 404 | "symbols": ["quoted_string_or_ref", "value_string$subexpression$1$ebnf$1"] 405 | }, 406 | {"name": "value_string$subexpression$1", "symbols": ["braced_string"]}, 407 | { 408 | "name": "value_string", 409 | "symbols": ["value_string$subexpression$1"], 410 | "postprocess": function (data) { 411 | // console.log("DATA",JSON.stringify(data)); 412 | const match = data[0]; 413 | if (match.length === 2) { 414 | // quoted string 415 | const tokenz: any = []; 416 | tokenz.push(match[0]); 417 | for (let i = 0; i < match[1].length; i++) tokenz.push(match[1][i][3]); 418 | return {type: "quotedstringwrapper", data: tokenz}; 419 | } else if (match[0].type === "braced") 420 | return {type: "bracedstringwrapper", data: match[0].data}; 421 | // else if(isNumber(match[0]) return [match[0]]; 422 | else throw new Error("Don't know how to handle value " + JSON.stringify(match[0])); 423 | } 424 | }, 425 | {"name": "quoted_string_or_ref$subexpression$1", "symbols": ["quoted_string"]}, 426 | {"name": "quoted_string_or_ref$subexpression$1", "symbols": ["string_ref"]}, 427 | {"name": "quoted_string_or_ref$subexpression$1", "symbols": [num]}, 428 | { 429 | "name": "quoted_string_or_ref", 430 | "symbols": ["quoted_string_or_ref$subexpression$1"], 431 | "postprocess": function (data) { 432 | // console.log(data); 433 | if (data[0][0].type === "quotedstring") return data[0][0]; 434 | else { 435 | return data[0][0]; 436 | } 437 | } 438 | }, 439 | {"name": "string_ref$subexpression$1$ebnf$1", "symbols": []}, 440 | { 441 | "name": "string_ref$subexpression$1$ebnf$1", 442 | "symbols": ["string_ref$subexpression$1$ebnf$1", "stringreftoken"], 443 | "postprocess": function arrpush(d) { 444 | return d[0].concat([d[1]]); 445 | } 446 | }, 447 | { 448 | "name": "string_ref$subexpression$1", 449 | "symbols": ["stringreftoken_n_num", "string_ref$subexpression$1$ebnf$1"] 450 | }, 451 | { 452 | "name": "string_ref", 453 | "symbols": ["string_ref$subexpression$1"], 454 | "postprocess": function (data) { 455 | const str = data[0][0] + joinTokens(data[0][1]); 456 | return {stringref: str}; 457 | } 458 | }, 459 | {"name": "stringreftoken$subexpression$1", "symbols": [esc]}, 460 | {"name": "stringreftoken$subexpression$1", "symbols": [paren_l]}, 461 | {"name": "stringreftoken$subexpression$1", "symbols": [paren_r]}, 462 | {"name": "stringreftoken$subexpression$1", "symbols": [tok_id]}, 463 | {"name": "stringreftoken$subexpression$1", "symbols": [num]}, 464 | {"name": "stringreftoken$subexpression$1", "symbols": [entry_type_bib]}, 465 | {"name": "stringreftoken$subexpression$1", "symbols": [entry_type_string]}, 466 | {"name": "stringreftoken$subexpression$1", "symbols": [entry_type_preamble]}, 467 | {"name": "stringreftoken$subexpression$1", "symbols": [entry_type_comment]}, 468 | { 469 | "name": "stringreftoken", 470 | "symbols": ["stringreftoken$subexpression$1"], 471 | "postprocess": function (data) { 472 | if (typeof data[0][0] === "object") { 473 | if (!data[0][0].string) throw new Error("Expected " + data[0] + "to have a 'string' field"); 474 | return data[0][0].string; 475 | } else { 476 | if ((!(typeof data[0][0] === "string" || typeof data[0][0] === "number"))) 477 | throw new Error("Expected " + data[0][0] + " to be a string"); 478 | return data[0][0]; 479 | } 480 | } 481 | }, 482 | {"name": "stringreftoken_n_num$subexpression$1", "symbols": [esc]}, 483 | {"name": "stringreftoken_n_num$subexpression$1", "symbols": [paren_l]}, 484 | {"name": "stringreftoken_n_num$subexpression$1", "symbols": [paren_r]}, 485 | {"name": "stringreftoken_n_num$subexpression$1", "symbols": [tok_id]}, 486 | {"name": "stringreftoken_n_num$subexpression$1", "symbols": [entry_type_bib]}, 487 | {"name": "stringreftoken_n_num$subexpression$1", "symbols": [entry_type_string]}, 488 | {"name": "stringreftoken_n_num$subexpression$1", "symbols": [entry_type_preamble]}, 489 | {"name": "stringreftoken_n_num$subexpression$1", "symbols": [entry_type_comment]}, 490 | { 491 | "name": "stringreftoken_n_num", 492 | "symbols": ["stringreftoken_n_num$subexpression$1"], 493 | "postprocess": function (data) { 494 | if (typeof data[0][0] === "object") { 495 | if (!data[0][0].string) throw new Error("Expected " + data[0] + "to have a 'string' field"); 496 | return data[0][0].string; 497 | } else { 498 | if ((!(typeof data[0][0] === "string" || typeof data[0][0] === "number"))) 499 | throw new Error("Expected " + data[0][0] + " to be a string"); 500 | return data[0][0]; 501 | } 502 | } 503 | }, 504 | {"name": "non_brace$subexpression$1", "symbols": [esc]}, 505 | {"name": "non_brace$subexpression$1", "symbols": [paren_l]}, 506 | {"name": "non_brace$subexpression$1", "symbols": [paren_r]}, 507 | {"name": "non_brace$subexpression$1", "symbols": [tok_id]}, 508 | {"name": "non_brace$subexpression$1", "symbols": [quote_dbl]}, 509 | {"name": "non_brace$subexpression$1", "symbols": [ws]}, 510 | {"name": "non_brace$subexpression$1", "symbols": [num]}, 511 | {"name": "non_brace$subexpression$1", "symbols": [comma]}, 512 | {"name": "non_brace$subexpression$1", "symbols": [entry_type_bib]}, 513 | {"name": "non_brace$subexpression$1", "symbols": [entry_type_string]}, 514 | {"name": "non_brace$subexpression$1", "symbols": [entry_type_preamble]}, 515 | {"name": "non_brace$subexpression$1", "symbols": [entry_type_comment]}, 516 | {"name": "non_brace$subexpression$1", "symbols": [pound]}, 517 | {"name": "non_brace$subexpression$1", "symbols": [eq]}, 518 | { 519 | "name": "non_brace", 520 | "symbols": ["non_brace$subexpression$1"], 521 | "postprocess": function (data) { 522 | return data[0][0]; 523 | } 524 | }, 525 | {"name": "non_bracket$subexpression$1", "symbols": [esc]}, 526 | {"name": "non_bracket$subexpression$1", "symbols": [tok_id]}, 527 | {"name": "non_bracket$subexpression$1", "symbols": [quote_dbl]}, 528 | {"name": "non_bracket$subexpression$1", "symbols": [ws]}, 529 | {"name": "non_bracket$subexpression$1", "symbols": [num]}, 530 | {"name": "non_bracket$subexpression$1", "symbols": [comma]}, 531 | {"name": "non_bracket$subexpression$1", "symbols": [entry_type_bib]}, 532 | {"name": "non_bracket$subexpression$1", "symbols": [entry_type_string]}, 533 | {"name": "non_bracket$subexpression$1", "symbols": [entry_type_preamble]}, 534 | {"name": "non_bracket$subexpression$1", "symbols": [entry_type_comment]}, 535 | {"name": "non_bracket$subexpression$1", "symbols": [pound]}, 536 | {"name": "non_bracket$subexpression$1", "symbols": [eq]}, 537 | { 538 | "name": "non_bracket", 539 | "symbols": ["non_bracket$subexpression$1"], 540 | "postprocess": function (data) { 541 | return data[0][0]; 542 | } 543 | }, 544 | {"name": "non_entry$ebnf$1$subexpression$1", "symbols": ["escaped_entry"]}, 545 | {"name": "non_entry$ebnf$1$subexpression$1", "symbols": ["escaped_escape"]}, 546 | {"name": "non_entry$ebnf$1$subexpression$1", "symbols": ["escaped_non_esc_outside_entry"]}, 547 | {"name": "non_entry$ebnf$1$subexpression$1", "symbols": ["non_esc_outside_entry"]}, 548 | {"name": "non_entry$ebnf$1", "symbols": ["non_entry$ebnf$1$subexpression$1"]}, 549 | {"name": "non_entry$ebnf$1$subexpression$2", "symbols": ["escaped_entry"]}, 550 | {"name": "non_entry$ebnf$1$subexpression$2", "symbols": ["escaped_escape"]}, 551 | {"name": "non_entry$ebnf$1$subexpression$2", "symbols": ["escaped_non_esc_outside_entry"]}, 552 | {"name": "non_entry$ebnf$1$subexpression$2", "symbols": ["non_esc_outside_entry"]}, 553 | { 554 | "name": "non_entry$ebnf$1", 555 | "symbols": ["non_entry$ebnf$1", "non_entry$ebnf$1$subexpression$2"], 556 | "postprocess": function arrpush(d) { 557 | return d[0].concat([d[1]]); 558 | } 559 | }, 560 | { 561 | "name": "non_entry", "symbols": ["non_entry$ebnf$1"], "postprocess": function (data) { 562 | // console.log("non_entry",data); 563 | const tokens: any = []; 564 | for (let Ti = 0; Ti < data[0].length; Ti++) tokens.push(data[0][Ti][0]); 565 | return tokens; 566 | } 567 | }, 568 | { 569 | "name": "escaped_escape", "symbols": [esc, esc], "postprocess": function () { 570 | return "\\"; 571 | } 572 | }, 573 | { 574 | "name": "escaped_entry", "symbols": [esc, "entry_decl"], "postprocess": function (data) { 575 | return {type: "escapedEntry", data: data[1]}; 576 | } 577 | }, 578 | { 579 | "name": "escaped_non_esc_outside_entry", 580 | "symbols": [esc, "non_esc_outside_entry"], 581 | "postprocess": function (data) { 582 | return data; // ["\\", data[1]]; 583 | } 584 | }, 585 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [tok_id]}, 586 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [ws]}, 587 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [num]}, 588 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [pound]}, 589 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [eq]}, 590 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [paren_l]}, 591 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [paren_r]}, 592 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [brace_l]}, 593 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [brace_r]}, 594 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [quote_dbl]}, 595 | {"name": "non_esc_outside_entry$subexpression$1", "symbols": [comma]}, 596 | { 597 | "name": "non_esc_outside_entry", 598 | "symbols": ["non_esc_outside_entry$subexpression$1"], 599 | "postprocess": function (data) { 600 | // console.log("ooutside_entry",data[0][0]); 601 | return data[0][0]; 602 | } 603 | } 604 | ] 605 | , ParserStart: "main" 606 | }; --------------------------------------------------------------------------------