├── requirements.txt ├── tox.ini ├── MANIFEST.in ├── .coveragerc ├── bibtexparser ├── tests │ ├── data │ │ ├── comments_only.bib │ │ ├── wrong.bib │ │ ├── comments_only_output.bib │ │ ├── website.bib │ │ ├── book.bib │ │ ├── book_output.bib │ │ ├── book_comma_first.bib │ │ ├── string.bib │ │ ├── article_field_name_with_underscore.bib │ │ ├── article_start_with_whitespace.bib │ │ ├── article_oneline.bib │ │ ├── article.bib │ │ ├── article_output.bib │ │ ├── encoding.bib │ │ ├── article_no_braces.bib │ │ ├── article_start_with_bom.bib │ │ ├── article_with_protection_braces.bib │ │ ├── traps.bib │ │ ├── article_with_special_characters.bib │ │ ├── features.bib │ │ ├── comments_percentage.bib │ │ ├── comments_percentage_nolastcoma.bib │ │ ├── article_comma_first.bib │ │ ├── features_output.bib │ │ ├── multiline_comments.bib │ │ ├── multiple_entries_output.bib │ │ ├── multiple_entries_and_comments_output.bib │ │ ├── features2.bib │ │ ├── multiple_entries.bib │ │ ├── multiple_entries_and_comments.bib │ │ └── article_missing_coma.bib │ ├── test_bibdatabase.py │ ├── test_homogenise_fields.py │ ├── test_preambles.py │ ├── test_bibtexexpression.py │ ├── test_latexenc.py │ ├── test_bibtex_strings.py │ ├── test_bwriter.py │ ├── test_bibtexparser.py │ ├── test_customization.py │ ├── test_comments.py │ ├── test_bibtexwriter.py │ ├── test_bparser.py │ └── test_splitname.py ├── __init__.py ├── bibdatabase.py ├── bwriter.py ├── bibtexexpression.py ├── bparser.py └── customization.py ├── RELEASE ├── .gitignore ├── docs ├── source │ ├── who.rst │ ├── bibtexparser.rst │ ├── bibtex_conv.rst │ ├── index.rst │ ├── install.rst │ ├── logging.rst │ ├── conf.py │ └── tutorial.rst └── Makefile ├── CONTRIBUTORS.txt ├── .travis.yml ├── setup.py ├── README.rst ├── CHANGELOG └── COPYING /requirements.txt: -------------------------------------------------------------------------------- 1 | pyparsing>=2.0.3 2 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py35 3 | [testenv] 4 | deps = nose pyparsing 5 | commands = nosetests 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include *.md 3 | include docs/Makefile 4 | include docs/source/* 5 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = bibtexparser 4 | 5 | [report] 6 | exclude_lines = 7 | if __name__ == .__main__.: 8 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_only.bib: -------------------------------------------------------------------------------- 1 | @comment{ignore this line!} 2 | @Comment{ignore this line too!} 3 | @COMMENT{and ignore this line too!} 4 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/wrong.bib: -------------------------------------------------------------------------------- 1 | 2 | 3 | @wrong{foo, 4 | author = {wrong} 5 | } 6 | 7 | @article{bar, 8 | author = {correct} 9 | } 10 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_only_output.bib: -------------------------------------------------------------------------------- 1 | @comment{ignore this line!} 2 | 3 | @comment{ignore this line too!} 4 | 5 | @comment{and ignore this line too!} 6 | 7 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/website.bib: -------------------------------------------------------------------------------- 1 | @misc{feder2006, 2 | title = {BibTeX}, 3 | author = {Alexander Feder}, 4 | url = {http://bibtex.org}, 5 | year = {2006} 6 | } 7 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/book.bib: -------------------------------------------------------------------------------- 1 | @BOOK{Bird1987, 2 | title = {Dynamics of Polymeric Liquid}, 3 | publisher = {Wiley Edition}, 4 | year = {1987}, 5 | author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}, 6 | volume = {1}, 7 | edition = {2}, 8 | } 9 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/book_output.bib: -------------------------------------------------------------------------------- 1 | @book{Bird1987, 2 | author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}, 3 | edition = {2}, 4 | publisher = {Wiley Edition}, 5 | title = {Dynamics of Polymeric Liquid}, 6 | volume = {1}, 7 | year = {1987} 8 | } 9 | 10 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/book_comma_first.bib: -------------------------------------------------------------------------------- 1 | @book{Bird1987 2 | , author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.} 3 | , edition = {2} 4 | , publisher = {Wiley Edition} 5 | , title = {Dynamics of Polymeric Liquid} 6 | , volume = {1} 7 | , year = {1987} 8 | } 9 | 10 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/string.bib: -------------------------------------------------------------------------------- 1 | @STRING{oakland = {Proceedings of the {IEEE} Symposium on Security and Privacy}} 2 | @INPROCEEDINGS{cha:oakland15, 3 | author = {Sang Kil Cha and Maverick Woo and David Brumley}, 4 | title = {{Program-Adaptive Mutational Fuzzing}}, 5 | booktitle = oakland, 6 | year = {2015}, 7 | pages = {725--741} 8 | } 9 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_field_name_with_underscore.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean César}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | pages = {12-23}, 7 | journal = {Nice Journal}, 8 | comments = {A comment}, 9 | keyword = {keyword1, keyword2}, 10 | strange-field-name2 = {val2}, 11 | strange_field_name = {val}, 12 | } 13 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_start_with_whitespace.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean Cesar}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | journal = {Nice Journal} 7 | } 8 | 9 | @ARTICLE{Cesar2014, 10 | author = {Jean Cesar}, 11 | title = {An amazing title}, 12 | year = {2014}, 13 | volume = {12}, 14 | journal = {Nice Journal} 15 | } -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_oneline.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, author = {Jean Cesar}, title = {An amazing title}, year = {2013}, volume = {12}, journal = {Nice Journal}, comments = {A comment}, keyword = {keyword1, keyword2}} 2 | 3 | @ARTICLE{ Baltazar2013,author = {Jean Baltazar},title = {An amazing title},year = {2013},volume = {12},journal = {Nice Journal},comments = {A comment},keyword = {keyword1, keyword2}} 4 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean César}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {Nice Journal}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | keyword = {keyword1, keyword2}, 13 | } 14 | -------------------------------------------------------------------------------- /RELEASE: -------------------------------------------------------------------------------- 1 | How to release 2 | ============== 3 | 4 | * Update CHANGELOG 5 | * Update version in __init__.py 6 | * git tag -a 'vX' 7 | * merge in branch latest 8 | * Create a tarball and upload it on the server 9 | git archive master --prefix 'bibtexparser/' | bzip2 > bibtexparser-x.y.tar.bz2 10 | * Send the package on pypi 11 | python setup.py sdist upload 12 | * tick the doc version on readthedocs 13 | * Update version in __init__.py 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_output.bib: -------------------------------------------------------------------------------- 1 | @article{Cesar2013, 2 | abstract = {This is an abstract. This line should be long enough to test 3 | multilines... and with a french érudit word}, 4 | author = {Jean César}, 5 | comments = {A comment}, 6 | journal = {Nice Journal}, 7 | keyword = {keyword1, keyword2}, 8 | month = {jan}, 9 | pages = {12-23}, 10 | title = {An amazing title}, 11 | volume = {12}, 12 | year = {2013} 13 | } 14 | 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/encoding.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar_2013, 2 | author = {Jean César}, 3 | title = {An amazing title: à}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {Elémentaire}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | keywords = {keyword1, keyword2}, 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_no_braces.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = "Jean C{\'e}sar{\"u}", 3 | title = "An amazing title", 4 | year = "2013", 5 | month = "jan", 6 | volume = "12", 7 | pages = "12-23", 8 | journal = "Nice Journal", 9 | abstract = "This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word", 11 | comments = "A comment", 12 | keyword = "keyword1, keyword2", 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_start_with_bom.bib: -------------------------------------------------------------------------------- 1 | 2 | @ARTICLE{Cesar2013, 3 | author = {Jean César}, 4 | title = {An amazing title}, 5 | year = {2013}, 6 | month = "jan", 7 | volume = {12}, 8 | pages = {12-23}, 9 | journal = {Nice Journal}, 10 | abstract = {This is an abstract. This line should be long enough to test 11 | multilines... and with a french érudit word}, 12 | comments = {A comment}, 13 | keyword = {keyword1, keyword2}, 14 | } 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_with_protection_braces.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean César}, 3 | title = {{An amazing title}}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {{Nice Journal}}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | keyword = {keyword1, keyword2}, 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/traps.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Laide2013, 2 | author = {Jean Laid{\'e}, 3 | Ben Loaeb}, 4 | title = {{An} amazing {title}}, 5 | year = {2013}, 6 | month = "jan", 7 | volume = {n.s.~2}, 8 | pages = {12-23}, 9 | journal = {Nice Journal}, 10 | abstract = {This is an abstract. This line should be long enough to test 11 | multilines... and with a french érudit word}, 12 | comments = {A comment}, 13 | keywords = {keyword1, keyword2}, 14 | } 15 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_with_special_characters.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean C{\'e}sar{\"u}}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | month = "jan", 6 | volume = {12}, 7 | pages = {12-23}, 8 | journal = {Nice Journal}, 9 | abstract = {This is an abstract. This line should be long enough to test 10 | multilines... and with a french érudit word}, 11 | comments = {A comment}, 12 | keyword = {keyword1, keyword2}, 13 | } 14 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/features.bib: -------------------------------------------------------------------------------- 1 | @comment{ignore this line!} 2 | @Comment{ignore this line too!} 3 | @COMMENT{and ignore this line too!} 4 | 5 | @preamble{ "\makeatletter" } 6 | @preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" } 7 | @preamble{ "\makeatother" } 8 | 9 | @string{mystring = "Hello"} 10 | @string{myconf = "My International Conference"} 11 | @string{myname = "Doe"} 12 | 13 | @inproceedings{mykey, 14 | author = "John", 15 | title = {Cool Stuff}, 16 | booktitle = myconf, 17 | year = 2014, 18 | } 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | 37 | # Pycharm 38 | .idea 39 | 40 | # Vim. 41 | *.swp 42 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_percentage.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean Cesar}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | journal = {Nice Journal}, 7 | comments = {A comment}, 8 | keyword = {keyword1, keyword2}, 9 | } 10 | % comment. 11 | @ARTICLE{Baltazar2013, 12 | author = {Jean Baltazar}, 13 | title = {An amazing title}, 14 | year = {2013}, 15 | volume = {12}, 16 | journal = {Nice Journal}, 17 | comments = {A comment}, 18 | keyword = {keyword1, keyword2}, 19 | } 20 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/comments_percentage_nolastcoma.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean Cesar}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | journal = {Nice Journal}, 7 | comments = {A comment}, 8 | keyword = {keyword1, keyword2} 9 | } 10 | % comment. 11 | @ARTICLE{Baltazar2013, 12 | author = {Jean Baltazar}, 13 | title = {An amazing title}, 14 | year = {2013}, 15 | volume = {12}, 16 | journal = {Nice Journal}, 17 | comments = {A comment}, 18 | keyword = {keyword1, keyword2} 19 | } 20 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_comma_first.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013 2 | , author = {Jean Cesar} 3 | , title = {An amazing title} 4 | , year = {2013} 5 | , volume = {12} 6 | , journal = {Nice Journal} 7 | , comments = {A comment} 8 | , keyword = {keyword1, keyword2} 9 | } 10 | 11 | @ARTICLE{ Baltazar2013 12 | , author = {Jean Baltazar} 13 | , title = {An amazing title} 14 | , year = {2013} 15 | , volume = {12} 16 | , journal = {Nice Journal} 17 | , comments = {A comment} 18 | , keyword = {keyword1, keyword2}} 19 | -------------------------------------------------------------------------------- /docs/source/who.rst: -------------------------------------------------------------------------------- 1 | Who uses BibtexParser? 2 | ====================== 3 | 4 | If your project uses BibtexParser, you can ask for the addition of a link in this list. 5 | 6 | * http://timotheepoisot.fr/2013/11/10/shared-bibtex-file-markdown/ 7 | * https://github.com/Phyks/BMC 8 | * http://aurelien.naldi.info/research/publications.html 9 | * http://robot.kut.ac.kr/publications 10 | * https://git.atelo.org/etlapale/bibgen 11 | * https://onmenwhostareongraphs.wordpress.com/2015/06/09/graph-display-software-for-author-relationships-with-bibtex-files/ 12 | * https://github.com/vitorfs/parsifal 13 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/features_output.bib: -------------------------------------------------------------------------------- 1 | @comment{ignore this line!} 2 | 3 | @comment{ignore this line too!} 4 | 5 | @comment{and ignore this line too!} 6 | 7 | @preamble{ "\makeatletter" } 8 | 9 | @preamble{ "\@ifundefined{url}{\def\url#1{\texttt{#1}}}{}" } 10 | 11 | @preamble{ "\makeatother" } 12 | 13 | @string{mystring = "Hello"} 14 | 15 | @string{myconf = "My International Conference"} 16 | 17 | @string{myname = "Doe"} 18 | 19 | @inproceedings{mykey, 20 | author = {John}, 21 | booktitle = {My International Conference}, 22 | title = {Cool Stuff}, 23 | year = {2014} 24 | } 25 | 26 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiline_comments.bib: -------------------------------------------------------------------------------- 1 | @comment{Lorem ipsum dolor sit amet, 2 | consectetur adipisicing elit} 3 | 4 | @comment{ 5 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 6 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. 7 | 8 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. 9 | Excepteur sint occaecat cupidatat non proident. 10 | , 11 | } 12 | 13 | @comment{ 14 | 15 | 16 | Sunt in culpa qui officia deserunt mollit anim id est laborum. 17 | 18 | 19 | } 20 | 21 | @comment{} 22 | 23 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiple_entries_output.bib: -------------------------------------------------------------------------------- 1 | @book{Toto3000, 2 | author = {Toto, A and Titi, B}, 3 | title = {A title} 4 | } 5 | 6 | @article{Wigner1938, 7 | author = {Wigner, E.}, 8 | doi = {10.1039/TF9383400029}, 9 | issn = {0014-7672}, 10 | journal = {Trans. Faraday Soc.}, 11 | owner = {fr}, 12 | pages = {29--41}, 13 | publisher = {The Royal Society of Chemistry}, 14 | title = {The transition state method}, 15 | volume = {34}, 16 | year = {1938} 17 | } 18 | 19 | @book{Yablon2005, 20 | author = {Yablon, A.D.}, 21 | publisher = {Springer}, 22 | title = {Optical fiber fusion slicing}, 23 | year = {2005} 24 | } 25 | 26 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiple_entries_and_comments_output.bib: -------------------------------------------------------------------------------- 1 | @comment{} 2 | 3 | @comment{A comment} 4 | 5 | @book{Toto3000, 6 | author = {Toto, A and Titi, B}, 7 | title = {A title} 8 | } 9 | 10 | @article{Wigner1938, 11 | author = {Wigner, E.}, 12 | doi = {10.1039/TF9383400029}, 13 | issn = {0014-7672}, 14 | journal = {Trans. Faraday Soc.}, 15 | owner = {fr}, 16 | pages = {29--41}, 17 | publisher = {The Royal Society of Chemistry}, 18 | title = {The transition state method}, 19 | volume = {34}, 20 | year = {1938} 21 | } 22 | 23 | @book{Yablon2005, 24 | author = {Yablon, A.D.}, 25 | publisher = {Springer}, 26 | title = {Optical fiber fusion slicing}, 27 | year = {2005} 28 | } 29 | 30 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/features2.bib: -------------------------------------------------------------------------------- 1 | @string{CoOl = "Cool"} 2 | @string{stuff = "Stuff"} 3 | @string{myTitle = cool # " " # stuff} 4 | 5 | @string{int = "International"} 6 | @string{myconf = "My "#int#" Conference"} 7 | 8 | @string{myname = "Doe"} 9 | 10 | @String {firstname = "John"} 11 | @String {lastname = myname} 12 | @String {domain = "example"} 13 | @String {tld = "com"} 14 | 15 | @String {foo = "1--10"} 16 | @String {BaR = FOO} 17 | @String {pages = baR} 18 | 19 | @inproceedings{mykey, 20 | author = "John " # mynamE, 21 | title = mytitle, 22 | booktitle = myconf, 23 | pages = pages, 24 | year = 2014, 25 | note = "Email: " # firstname # "." # lastname # 26 | "@" # domain # "." # tld, 27 | } 28 | -------------------------------------------------------------------------------- /CONTRIBUTORS.txt: -------------------------------------------------------------------------------- 1 | - François Boulogne 2 | Project coordinator 3 | 4 | - bibserver's contributors 5 | for the parser's core and the permission to release this project under LGPLv3 and BSD 6 | 7 | - Shuen-Huei (Drake) Guan 8 | Python 2.7 porting 9 | 10 | - Sebastien Diemer 11 | Bugfix 12 | 13 | - Georg C. Brückmann 14 | Support for non-standard entry types 15 | 16 | - Uwe Schmidt 17 | String replacement 18 | 19 | - faph 20 | coma fixes, optional keys sanitising, refactoring and other improvements 21 | 22 | - Steven M. Bellovin 23 | Fix braces detection 24 | 25 | - Sven Goossens 26 | Support for bibtex with leading spaces 27 | 28 | - Michal Grochmal 29 | Comma first syntax support 30 | 31 | - Cschaffner 32 | New features in bwriter 33 | 34 | - Olivier Mangin 35 | Pyparsing implementation of the parser. 36 | 37 | - Blair Bonnett 38 | customization.splitname() function 39 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | matrix: 3 | include: 4 | - python: "2.7" 5 | env: TEST_SUITE=suite_2_7 6 | - python: "3.3" 7 | env: TEST_SUITE=suite_3_3 8 | - python: "3.4" 9 | env: TEST_SUITE=suite_3_4 10 | - python: "3.5" 11 | env: TEST_SUITE=suite_3_5 12 | - python: "pypy" 13 | env: TEST_SUITE=suite_pypy 14 | - python: "pypy3" 15 | env: TEST_SUITE=suite_pypy3 16 | install: 17 | - if [[ $TEST_SUITE == suite_3_5 ]]; then 18 | pip install sphinx; 19 | fi; 20 | - pip install coverage 21 | - pip install -r requirements.txt 22 | - python setup.py install 23 | script: 24 | - nosetests --with-coverage --cover-erase --cover-package=bibtexparser 25 | - if [[ $TEST_SUITE == suite_3_5 ]]; then 26 | cd docs; 27 | make html; 28 | fi; 29 | 30 | after_success: 31 | - pip install coveralls 32 | - coveralls 33 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | try: 4 | from setuptools import setup 5 | except ImportError as ex: 6 | print('[python-bibtexparser] setuptools not found. Falling back to distutils.core') 7 | from distutils.core import setup 8 | 9 | with open('bibtexparser/__init__.py') as fh: 10 | for line in fh: 11 | if line.startswith('__version__'): 12 | version = line.strip().split()[-1][1:-1] 13 | break 14 | 15 | setup( 16 | name = 'bibtexparser', 17 | version = version, 18 | url = "https://github.com/sciunto-org/python-bibtexparser", 19 | author = "Francois Boulogne and other contributors", 20 | license = "LGPLv3 or BSD", 21 | author_email = "devel@sciunto.org", 22 | description = "Bibtex parser for python 2.7 and 3.3 and newer", 23 | packages = ['bibtexparser'], 24 | install_requires = ['pyparsing'], 25 | ) 26 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiple_entries.bib: -------------------------------------------------------------------------------- 1 | @Book{Yablon2005, 2 | Title = {Optical fiber fusion slicing}, 3 | Author = {Yablon, A.D.}, 4 | Publisher = {Springer}, 5 | Year = {2005}, 6 | } 7 | 8 | @Article{Wigner1938, 9 | Title = {The transition state method}, 10 | Author = {Wigner, E.}, 11 | Journal = {Trans. Faraday Soc.}, 12 | Year = {1938}, 13 | Pages = {29--41}, 14 | Volume = {34}, 15 | Doi = {10.1039/TF9383400029}, 16 | ISSN = {0014-7672}, 17 | Owner = {fr}, 18 | Publisher = {The Royal Society of Chemistry}, 19 | } 20 | 21 | @Book{Toto3000, 22 | Title = {A title}, 23 | Author = {Toto, A and Titi, B}, 24 | } 25 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/multiple_entries_and_comments.bib: -------------------------------------------------------------------------------- 1 | @Book{Yablon2005, 2 | Title = {Optical fiber fusion slicing}, 3 | Author = {Yablon, A.D.}, 4 | Publisher = {Springer}, 5 | Year = {2005}, 6 | } 7 | 8 | @Article{Wigner1938, 9 | Title = {The transition state method}, 10 | Author = {Wigner, E.}, 11 | Journal = {Trans. Faraday Soc.}, 12 | Year = {1938}, 13 | Pages = {29--41}, 14 | Volume = {34}, 15 | Doi = {10.1039/TF9383400029}, 16 | ISSN = {0014-7672}, 17 | Owner = {fr}, 18 | Publisher = {The Royal Society of Chemistry}, 19 | } 20 | 21 | @Book{Toto3000, 22 | Title = {A title}, 23 | Author = {Toto, A and Titi, B}, 24 | } 25 | 26 | @Comment{} 27 | 28 | @Comment{A comment} 29 | 30 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibdatabase.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from bibtexparser.bibdatabase import BibDatabase 3 | 4 | 5 | class TestBibDatabase(unittest.TestCase): 6 | entries = [{'ENTRYTYPE': 'book', 7 | 'year': '1987', 8 | 'edition': '2', 9 | 'publisher': 'Wiley Edition', 10 | 'ID': 'Bird1987', 11 | 'volume': '1', 12 | 'title': 'Dynamics of Polymeric Liquid', 13 | 'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.' 14 | }] 15 | 16 | def test_entries_list_method(self): 17 | bib_db = BibDatabase() 18 | bib_db.entries = self.entries 19 | self.assertEqual(bib_db.entries, bib_db.get_entry_list()) 20 | 21 | def test_entries_dict_prop(self): 22 | bib_db = BibDatabase() 23 | bib_db.entries = self.entries 24 | self.assertEqual(bib_db.entries_dict, bib_db.get_entry_dict()) 25 | 26 | 27 | if __name__ == '__main__': 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /bibtexparser/tests/data/article_missing_coma.bib: -------------------------------------------------------------------------------- 1 | @ARTICLE{Cesar2013, 2 | author = {Jean Cesar}, 3 | title = {An amazing title}, 4 | year = {2013}, 5 | volume = {12}, 6 | journal = {Nice Journal}, 7 | comments = {A comment}, 8 | keyword = {keyword1, keyword2} 9 | } 10 | 11 | @ARTICLE{Baltazar2013, 12 | author = {Jean Baltazar}, 13 | title = {An amazing title}, 14 | year = {2013}, 15 | volume = {12}, 16 | journal = {Nice Journal}, 17 | comments = {A comment}, 18 | keyword = {keyword1, keyword2}} 19 | 20 | @ARTICLE{Aimar2013, 21 | author = {Jean Aimar}, 22 | title = {An amazing title}, 23 | year = {2013}, 24 | volume = {12}, 25 | journal = {Nice Journal}, 26 | comments = {A comment}, 27 | keyword = {keyword1, keyword2}, 28 | month = "january" 29 | } 30 | 31 | @ARTICLE{Doute2013, 32 | author = {Jean Doute}, 33 | title = {An amazing title}, 34 | volume = {12}, 35 | journal = {Nice Journal}, 36 | comments = {A comment}, 37 | keyword = {keyword1, keyword2}, 38 | year = "2013" 39 | } 40 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_homogenise_fields.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from bibtexparser.bparser import BibTexParser 3 | 4 | 5 | class TestHomogenizeFields(unittest.TestCase): 6 | def test_homogenize_default(self): 7 | with open('bibtexparser/tests/data/website.bib', 'r') as bibfile: 8 | bib = BibTexParser(bibfile.read()) 9 | entries = bib.get_entry_list() 10 | self.assertNotIn('link', entries[0]) 11 | self.assertIn('url', entries[0]) 12 | 13 | def test_homogenize_on(self): 14 | with open('bibtexparser/tests/data/website.bib', 'r') as bibfile: 15 | bib = BibTexParser(bibfile.read(), homogenize_fields=True) 16 | entries = bib.get_entry_list() 17 | self.assertIn('link', entries[0]) 18 | self.assertNotIn('url', entries[0]) 19 | 20 | def test_homogenize_off(self): 21 | with open('bibtexparser/tests/data/website.bib', 'r') as bibfile: 22 | bib = BibTexParser(bibfile.read(), homogenize_fields=False) 23 | entries = bib.get_entry_list() 24 | self.assertNotIn('link', entries[0]) 25 | self.assertIn('url', entries[0]) 26 | -------------------------------------------------------------------------------- /docs/source/bibtexparser.rst: -------------------------------------------------------------------------------- 1 | .. _bibtexparser_api: 2 | 3 | .. contents:: 4 | 5 | bibtexparser: API 6 | ================= 7 | 8 | :mod:`bibtexparser` --- Parsing and writing BibTeX files 9 | -------------------------------------------------------- 10 | 11 | .. automodule:: bibtexparser 12 | :members: load, loads, dumps, dump 13 | 14 | :mod:`bibtexparser.bibdatabase` --- The bibliographic database object 15 | --------------------------------------------------------------------- 16 | 17 | .. autoclass:: bibdatabase.BibDatabase 18 | :members: entries, entries_dict, comments, strings, preambles 19 | 20 | :mod:`bibtexparser.bparser` --- Tune the default parser 21 | -------------------------------------------------------- 22 | 23 | .. automodule:: bparser 24 | :members: 25 | 26 | :mod:`bibtexparser.customization` --- Functions to customize records 27 | -------------------------------------------------------------------- 28 | 29 | .. automodule:: customization 30 | :members: 31 | 32 | Exception classes 33 | ^^^^^^^^^^^^^^^^^ 34 | .. autoclass:: customization.InvalidName 35 | 36 | :mod:`bibtexparser.bwriter` --- Tune the default writer 37 | ------------------------------------------------------- 38 | 39 | .. autoclass:: bwriter.BibTexWriter 40 | :members: 41 | 42 | :mod:`bibtexparser.bibtexexpression` --- Parser's core relying on pyparsing 43 | --------------------------------------------------------------------------- 44 | 45 | .. automodule:: bibtexexpression 46 | :members: 47 | 48 | -------------------------------------------------------------------------------- /docs/source/bibtex_conv.rst: -------------------------------------------------------------------------------- 1 | =============================================== 2 | Bibtex tips, conventions and unrelated projects 3 | =============================================== 4 | 5 | This page presents various resources about bibtex in general. 6 | 7 | Format 8 | ====== 9 | 10 | http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html 11 | 12 | * Comments 13 | * Variable 14 | * @preamble 15 | * Name convention 16 | 17 | Upper case letters in titles 18 | ---------------------------- 19 | 20 | Put the letter/word in curly braces like {this}. 21 | 22 | 23 | General references 24 | ------------------ 25 | 26 | * http://tug.ctan.org/tex-archive/info/bibtex/tamethebeast/ttb_en.pdf 27 | * http://ctan.mirrors.hoobly.com/macros/latex/contrib/biblatex/doc/biblatex.pdf 28 | 29 | IEEE citation reference 30 | ----------------------- 31 | 32 | * https://origin.www.ieee.org/documents/ieeecitationref.pdf 33 | 34 | 35 | Common Errors in Bibliographies John Owens 36 | ------------------------------------------ 37 | 38 | * http://www.ece.ucdavis.edu/~jowens/biberrors.html 39 | 40 | Common abbreviations for journals 41 | --------------------------------- 42 | 43 | * Jabref list http://jabref.sourceforge.net/resources.php#downloadlists 44 | 45 | 46 | Projects 47 | ======== 48 | 49 | Here are some interesting projects using bibtex but not necessarily this parser. 50 | 51 | Display your bibliography in html pages 52 | --------------------------------------- 53 | 54 | * http://www.monperrus.net/martin/bibtexbrowser/ 55 | 56 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. BibtexParser documentation master file, created by 2 | sphinx-quickstart on Thu Aug 1 13:30:23 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to BibtexParser's documentation! 7 | ======================================== 8 | 9 | 10 | :Author: François Boulogne and other contributors 11 | :Devel: `github.com project `_ 12 | :Mirror: `git.sciunto.org `_ 13 | :Bugs: `github.com `_ 14 | :Generated: |today| 15 | :License: LGPL v3 or BSD 16 | :Version: |release| 17 | 18 | BibtexParser is a python library to parse bibtex files. The code relies on `pyparsing `_ and is tested with unittests. 19 | 20 | If you use BibtexParser for your project, feel free to send me an email. I would be happy to hear that and to mention your project in the documentation. 21 | 22 | Contents: 23 | 24 | .. toctree:: 25 | :maxdepth: 2 26 | 27 | install.rst 28 | tutorial.rst 29 | bibtexparser.rst 30 | logging.rst 31 | bibtex_conv.rst 32 | who.rst 33 | 34 | 35 | Other projects 36 | ============== 37 | 38 | * http://pybtex.sourceforge.net/ 39 | * http://pybliographer.org/ 40 | * https://github.com/matthew-brett/babybib 41 | 42 | Indices and tables 43 | ================== 44 | 45 | * :ref:`genindex` 46 | * :ref:`modindex` 47 | * :ref:`search` 48 | 49 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | python-bibtexparser 2 | =================== 3 | 4 | Python library to parse `bibtex `_ files. 5 | 6 | 7 | IMPORTANT: the library is looking for new maintainers. Please, manifest yourself if you are interested. 8 | 9 | .. contents:: 10 | 11 | 12 | Bibtexparser relies on `pyparsing `_ and is compatible with Python 2.7 and 3.3 or newer. 13 | 14 | Documentation 15 | ------------- 16 | 17 | Our documentation includes the installation procedure, a tutorial, the API and advices to report a bug. 18 | References, related projects and softwares based on bibtexparser are also listed. If you would like to appear on this list, feel free to open a ticket or send an email. 19 | 20 | `Documentation on readthedocs.io `_ 21 | 22 | Upgrading 23 | --------- 24 | 25 | Please, read the changelog before upgrading regarding API modifications. 26 | Prior version 1.0, we do not hesitate to modify the API to get the best API from your feedbacks. 27 | 28 | License 29 | ------- 30 | 31 | Dual license (at your choice): 32 | 33 | * LGPLv3. 34 | * BSD 35 | 36 | See COPYING for details. 37 | 38 | History and evolutions 39 | ---------------------- 40 | 41 | The original source code was part of bibserver from `OKFN `_. This project is released under the AGPLv3. OKFN and the original authors kindly provided the permission to use a subpart of their project (ie the bibtex parser) under LGPLv3. Many thanks to them! 42 | 43 | The parser evolved to a new core based on pyparsing. 44 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_preambles.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import bibtexparser 3 | from bibtexparser.bibdatabase import BibDatabase 4 | from collections import OrderedDict 5 | 6 | 7 | class TestPreambleParse(unittest.TestCase): 8 | def test_single_preamble_parse_count(self): 9 | bibtex_str = '@preamble{" a "}\n\n' 10 | bib_database = bibtexparser.loads(bibtex_str) 11 | self.assertEqual(len(bib_database.preambles), 1) 12 | 13 | def test_multiple_preamble_parse_count(self): 14 | bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n' 15 | bib_database = bibtexparser.loads(bibtex_str) 16 | self.assertEqual(len(bib_database.preambles), 2) 17 | 18 | def test_single_preamble_parse(self): 19 | bibtex_str = '@preamble{" a "}\n\n' 20 | bib_database = bibtexparser.loads(bibtex_str) 21 | expected = [' a '] 22 | self.assertEqual(bib_database.preambles, expected) 23 | 24 | def test_multiple_preamble_parse(self): 25 | bibtex_str = '@preamble{" a "}\n\n@preamble{"b"}\n\n' 26 | bib_database = bibtexparser.loads(bibtex_str) 27 | expected = [' a ', 'b'] 28 | self.assertEqual(bib_database.preambles, expected) 29 | 30 | 31 | class TestPreambleWrite(unittest.TestCase): 32 | def test_single_preamble_write(self): 33 | bib_database = BibDatabase() 34 | bib_database.preambles = [' a '] 35 | result = bibtexparser.dumps(bib_database) 36 | expected = '@preamble{" a "}\n\n' 37 | self.assertEqual(result, expected) 38 | 39 | def test_multiple_string_write(self): 40 | bib_database = BibDatabase() 41 | bib_database.preambles = [' a ', 'b'] 42 | result = bibtexparser.dumps(bib_database) 43 | expected = '@preamble{" a "}\n\n@preamble{"b"}\n\n' 44 | self.assertEqual(result, expected) 45 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | How to install and test? 3 | ======================== 4 | 5 | How to install? 6 | =============== 7 | 8 | Requirements 9 | ------------ 10 | 11 | * python **2.7** or python **3.3** or newer 12 | * pyparsing **2.0.3** or newer 13 | 14 | Package manager (recommended for those OS users) 15 | ------------------------------------------------ 16 | 17 | * `Archlinux `_ 18 | * `Debian `_ 19 | 20 | pip (recommended to other users) 21 | --------------------------------- 22 | 23 | To install with pip: 24 | 25 | .. code-block:: sh 26 | 27 | pip install bibtexparser 28 | 29 | 30 | Manual installation (recommended for packagers) 31 | ----------------------------------------------- 32 | 33 | Download the archive on `Pypi `_. 34 | 35 | .. code-block:: sh 36 | 37 | python setup.py install 38 | 39 | 40 | How to run the test suite? 41 | ========================== 42 | 43 | This paragraph briefly describes how to run the test suite. 44 | This is useful for contributors, for packagers but also for users who wants to check their environment. 45 | 46 | 47 | Virtualenv 48 | ---------- 49 | 50 | You can make a virtualenv. I like `pew `_ for that because the API is easier. 51 | 52 | The first time, you need to make a virtualenv 53 | 54 | .. code-block:: sh 55 | 56 | pew mkproject bibtexparser 57 | pip install -r requirements.txt 58 | python setup.py install 59 | nosetest 60 | 61 | 62 | If you already have a virtualenv, you can use workon 63 | 64 | .. code-block:: sh 65 | 66 | pew workon bibtexparser 67 | 68 | 69 | Tox 70 | --- 71 | 72 | The advantage of `Tox `_ is that you can build and test the code against several versions of python. 73 | Of course, you need tox to be installed on your system. 74 | The configuration file is tox.ini, in the root of the project. There, you can change the python versions. 75 | 76 | .. code-block:: sh 77 | 78 | tox # and nothing more :) 79 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibtexexpression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | from __future__ import unicode_literals 6 | import unittest 7 | 8 | from bibtexparser.bibtexexpression import BibtexExpression 9 | 10 | 11 | class TestBibtexExpression(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.expr = BibtexExpression() 15 | 16 | def test_minimal(self): 17 | result = self.expr.entry.parseString('@journal{key, name = 123 }') 18 | self.assertEqual(result.get('EntryType'), 'journal') 19 | self.assertEqual(result.get('Key'), 'key') 20 | self.assertEqual(result.get('Fields'), {'name': '123'}) 21 | 22 | def test_capital_type(self): 23 | result = self.expr.entry.parseString('@JOURNAL{key, name = 123 }') 24 | self.assertEqual(result.get('EntryType'), 'JOURNAL') 25 | 26 | def test_capital_key(self): 27 | result = self.expr.entry.parseString('@journal{KEY, name = 123 }') 28 | self.assertEqual(result.get('Key'), 'KEY') 29 | 30 | def test_braced(self): 31 | result = self.expr.entry.parseString('@journal{key, name = {abc} }') 32 | self.assertEqual(result.get('Fields'), {'name': 'abc'}) 33 | 34 | def test_braced_with_new_line(self): 35 | result = self.expr.entry.parseString( 36 | '@journal{key, name = {abc\ndef} }') 37 | self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'}) 38 | 39 | def test_braced_unicode(self): 40 | result = self.expr.entry.parseString( 41 | '@journal{key, name = {àbcđéf} }') 42 | self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'}) 43 | 44 | def test_quoted(self): 45 | result = self.expr.entry.parseString('@journal{key, name = "abc" }') 46 | self.assertEqual(result.get('Fields'), {'name': 'abc'}) 47 | 48 | def test_quoted_with_new_line(self): 49 | result = self.expr.entry.parseString( 50 | '@journal{key, name = "abc\ndef" }') 51 | self.assertEqual(result.get('Fields'), {'name': 'abc\ndef'}) 52 | 53 | def test_quoted_with_unicode(self): 54 | result = self.expr.entry.parseString( 55 | '@journal{key, name = "àbcđéf" }') 56 | self.assertEqual(result.get('Fields'), {'name': 'àbcđéf'}) 57 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_latexenc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | #This program is free software: you can redistribute it and/or modify 4 | #it under the terms of the GNU General Public License as published by 5 | #the Free Software Foundation, either version 3 of the License, or 6 | #(at your option) any later version. 7 | # 8 | #This program is distributed in the hope that it will be useful, 9 | #but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | #GNU General Public License for more details. 12 | # 13 | #You should have received a copy of the GNU General Public License 14 | #along with this program. If not, see 15 | # 16 | # Author: Francois Boulogne , 2012 17 | 18 | from __future__ import unicode_literals 19 | import unittest 20 | 21 | from bibtexparser.latexenc import * 22 | 23 | class TestLatexConverter(unittest.TestCase): 24 | 25 | def test_accent(self): 26 | string = 'à é è ö' 27 | result = string_to_latex(string) 28 | expected = "{\`a} {\\\'e} {\`e} {\\\"o}" 29 | self.assertEqual(result, expected) 30 | 31 | def test_special_caracter(self): 32 | string = 'ç' 33 | result = string_to_latex(string) 34 | expected = '{\c c}' 35 | self.assertEqual(result, expected) 36 | 37 | class TestUppercaseProtection(unittest.TestCase): 38 | 39 | def test_uppercase(self): 40 | string = 'An upPer Case A' 41 | result = protect_uppercase(string) 42 | expected = '{A}n up{P}er {C}ase {A}' 43 | self.assertEqual(result, expected) 44 | 45 | def test_lowercase(self): 46 | string = 'a' 47 | result = protect_uppercase(string) 48 | expected = 'a' 49 | self.assertEqual(result, expected) 50 | 51 | def test_alreadyprotected(self): 52 | string = '{A}, m{A}gnificient, it is a {A}...' 53 | result = protect_uppercase(string) 54 | expected = '{A}, m{A}gnificient, it is a {A}...' 55 | self.assertEqual(result, expected) 56 | 57 | def test_traps(self): 58 | string = '{A, m{Agnificient, it is a {A' 59 | result = protect_uppercase(string) 60 | expected = '{A, m{Agnificient, it is a {A' 61 | self.assertEqual(result, expected) 62 | 63 | def test_traps2(self): 64 | string = 'A}, mA}gnificient, it is a A}' 65 | result = protect_uppercase(string) 66 | expected = 'A}, mA}gnificient, it is a A}' 67 | self.assertEqual(result, expected) 68 | 69 | 70 | if __name__ == '__main__': 71 | unittest.main() 72 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibtex_strings.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import codecs 3 | import bibtexparser 4 | from bibtexparser.bibdatabase import BibDatabase 5 | from bibtexparser.bparser import BibTexParser 6 | from collections import OrderedDict 7 | 8 | 9 | class TestStringParse(unittest.TestCase): 10 | def test_single_string_parse_count(self): 11 | bibtex_str = '@string{name1 = "value1"}\n\n' 12 | bib_database = bibtexparser.loads(bibtex_str) 13 | self.assertEqual(len(bib_database.strings), 1) 14 | 15 | def test_multiple_string_parse_count(self): 16 | bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n' 17 | bib_database = bibtexparser.loads(bibtex_str) 18 | self.assertEqual(len(bib_database.strings), 2) 19 | 20 | def test_single_string_parse(self): 21 | bibtex_str = '@string{name1 = "value1"}\n\n' 22 | bib_database = bibtexparser.loads(bibtex_str) 23 | expected = {'name1': 'value1'} 24 | self.assertEqual(bib_database.strings, expected) 25 | 26 | def test_multiple_string_parse(self): 27 | bibtex_str = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n' 28 | bib_database = bibtexparser.loads(bibtex_str) 29 | expected = OrderedDict() 30 | expected['name1'] = 'value1' 31 | expected['name2'] = 'value2' 32 | self.assertEqual(bib_database.strings, expected) 33 | 34 | def test_string_braces(self): 35 | with codecs.open('bibtexparser/tests/data/string.bib', 'r', 'utf-8') as bibfile: 36 | bib = BibTexParser(bibfile.read()) 37 | res = bib.get_entry_list() 38 | expected = [{'author': 'Sang Kil Cha and Maverick Woo and David Brumley', 39 | 'ID': 'cha:oakland15', 40 | 'year': '2015', 41 | 'booktitle': 'Proceedings of the {IEEE} Symposium on Security and Privacy', 42 | 'title': '{Program-Adaptive Mutational Fuzzing}', 43 | 'ENTRYTYPE': 'inproceedings', 44 | 'pages': '725--741' 45 | }] 46 | self.assertEqual(res, expected) 47 | 48 | 49 | 50 | class TestStringWrite(unittest.TestCase): 51 | def test_single_string_write(self): 52 | bib_database = BibDatabase() 53 | bib_database.strings['name1'] = 'value1' 54 | result = bibtexparser.dumps(bib_database) 55 | expected = '@string{name1 = "value1"}\n\n' 56 | self.assertEqual(result, expected) 57 | 58 | def test_multiple_string_write(self): 59 | bib_database = BibDatabase() 60 | bib_database.strings['name1'] = 'value1' 61 | bib_database.strings['name2'] = 'value2' # Order is important! 62 | result = bibtexparser.dumps(bib_database) 63 | expected = '@string{name1 = "value1"}\n\n@string{name2 = "value2"}\n\n' 64 | self.assertEqual(result, expected) 65 | -------------------------------------------------------------------------------- /docs/source/logging.rst: -------------------------------------------------------------------------------- 1 | How to report a bug? 2 | ==================== 3 | 4 | Bugs can be reported on github or via private communications. 5 | 6 | Steps 7 | ----- 8 | 9 | 1. Make a minimal code, which reproduces the problem. 10 | 2. Provide the code, the bibtex (if necessary), the output. 11 | 3. For a parsing error, provide the expected output. 12 | 4. For a crash, set the logger to the debug level (see below). 13 | 14 | If you want to provide a patch (that's wonderful! thank you), please, take few minutes to write a unit test that fails without your contribution. 15 | 16 | Logging module to understand failures 17 | ------------------------------------- 18 | 19 | Syntax of bibtex files is simple but there are many possible variations. This library probably fails for some of them. 20 | 21 | Bibtexparser includes a large quantity of debug messages which helps to understand why and where the parser fails. 22 | The example below can be used to print these messages in the console. 23 | 24 | .. code-block:: python 25 | 26 | import logging 27 | import logging.config 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | logging.config.dictConfig({ 32 | 'version': 1, 33 | 'disable_existing_loggers': False, 34 | 'formatters': { 35 | 'standard': { 36 | 'format': '%(asctime)s [%(levelname)s] %(name)s %(funcName)s:%(lineno)d: %(message)s' 37 | }, 38 | }, 39 | 'handlers': { 40 | 'default': { 41 | 'level':'DEBUG', 42 | 'formatter': 'standard', 43 | 'class':'logging.StreamHandler', 44 | }, 45 | }, 46 | 'loggers': { 47 | '': { 48 | 'handlers': ['default'], 49 | 'level': 'DEBUG', 50 | 'formatter': 'standard', 51 | 'propagate': True 52 | } 53 | } 54 | }) 55 | 56 | 57 | if __name__ == '__main__': 58 | bibtex = """@ARTICLE{Cesar2013, 59 | author = {Jean César}, 60 | title = {An amazing title}, 61 | year = {2013}, 62 | month = jan, 63 | volume = {12}, 64 | pages = {12--23}, 65 | journal = {Nice Journal}, 66 | abstract = {This is an abstract. This line should be long enough to test 67 | multilines...}, 68 | comments = {A comment}, 69 | keywords = {keyword1, keyword2}, 70 | } 71 | """ 72 | 73 | with open('/tmp/bibtex.bib', 'w') as bibfile: 74 | bibfile.write(bibtex) 75 | 76 | from bibtexparser.bparser import BibTexParser 77 | 78 | with open('/tmp/bibtex.bib', 'r') as bibfile: 79 | bp = BibTexParser(bibfile.read()) 80 | print(bp.get_entry_list()) 81 | 82 | I recommend you to use this output if you would like to report a bug. 83 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bwriter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Author: Francois Boulogne 4 | # License: 5 | 6 | from __future__ import unicode_literals 7 | 8 | import unittest 9 | import sys 10 | 11 | from bibtexparser.bparser import BibTexParser 12 | from bibtexparser.bwriter import BibTexWriter, to_bibtex 13 | from bibtexparser.customization import author 14 | 15 | 16 | class TestBibtexWriterList(unittest.TestCase): 17 | 18 | ########### 19 | # ARTICLE 20 | ########### 21 | def test_article(self): 22 | with open('bibtexparser/tests/data/article.bib', 'r') as bibfile: 23 | bib = BibTexParser(bibfile.read()) 24 | 25 | with open('bibtexparser/tests/data/article_output.bib', 'r') as bibfile: 26 | expected = bibfile.read() 27 | result = to_bibtex(bib) 28 | if not sys.version_info >= (3, 0): 29 | if isinstance(result, unicode): 30 | result = result.encode('utf-8') 31 | self.maxDiff = None 32 | self.assertEqual(expected, result) 33 | 34 | ########### 35 | # BOOK 36 | ########### 37 | def test_book(self): 38 | with open('bibtexparser/tests/data/book.bib', 'r') as bibfile: 39 | bib = BibTexParser(bibfile.read()) 40 | 41 | with open('bibtexparser/tests/data/book_output.bib', 'r') as bibfile: 42 | expected = bibfile.read() 43 | result = to_bibtex(bib) 44 | self.maxDiff = None 45 | self.assertEqual(expected, result) 46 | 47 | ########### 48 | # COMMA FIRST 49 | ########### 50 | def test_comma_first(self): 51 | with open('bibtexparser/tests/data/book.bib', 'r') as bibfile: 52 | bib = BibTexParser(bibfile.read()) 53 | 54 | with open('bibtexparser/tests/data/book_comma_first.bib', 'r') as bibfile: 55 | expected = bibfile.read() 56 | writer = BibTexWriter() 57 | writer.indent = ' ' 58 | writer.comma_first = True 59 | result = writer.write(bib) 60 | self.maxDiff = None 61 | self.assertEqual(expected, result) 62 | 63 | ########### 64 | # MULTIPLE 65 | ########### 66 | def test_multiple(self): 67 | with open('bibtexparser/tests/data/multiple_entries.bib', 'r') as bibfile: 68 | bib = BibTexParser(bibfile.read()) 69 | 70 | with open('bibtexparser/tests/data/multiple_entries_output.bib', 'r') as bibfile: 71 | expected = bibfile.read() 72 | result = to_bibtex(bib) 73 | self.maxDiff = None 74 | self.assertEqual(expected, result) 75 | 76 | ########### 77 | # Exception 78 | ########### 79 | def test_exception_typeerror(self): 80 | with open('bibtexparser/tests/data/article.bib', 'r') as bibfile: 81 | bib = BibTexParser(bibfile.read(), customization=author) 82 | self.assertRaises(TypeError, to_bibtex, bib) 83 | 84 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | v0.XXX 2 | ====== 3 | 4 | * ENH: we use pyparsing (#64) by Olivier Magin. 5 | * DOC: Refactoring of the tutorial 6 | * DOC: include docs/ in manifest 7 | * API: fix spelling "homogenize". Affects customization and bparser 8 | * API: BibtexParser: homogenize_fields is now False by default (#94) 9 | 10 | v0.6.2 11 | ====== 12 | 13 | * ENH: customization: handle various hyphens (#76). 14 | * ENH: writer: all values according to this maximal key width (#83). 15 | * END: writer: display_order allows to have custom ordering of the fields of 16 | each entry instead of just alphabetical (#83) by cschaffner. 17 | * FIX: bad support of braces in string (#90) by sangkilc. 18 | 19 | v0.6.1 20 | ====== 21 | 22 | * API: Previous type and id keywords which are automatically added to 23 | the dictionnary are now ENTRYTYPE and ID, respectively (#42). 24 | * ENH: comma first syntax support (#49) by Michal Grochmal. 25 | 26 | v0.6.0 27 | ====== 28 | 29 | * DOC: clarify version number 30 | * ENH: support for bibtex with leading spaces (#34) 31 | * FIX: if title contained multiples words in braces 32 | * ENH: code refactoring (#33) 33 | * ENH: support for comment blocks (#32) 34 | * ENH: Removed comma after last key-value pair by faph (#28) 35 | * ENH: optional keys sanitising by faph (#29) 36 | * FIX: missing coma at the end of a record (#24) 37 | * DOC: clarify the usecase of to_bibtex 38 | * FIX: raise exception for TypeError in to_bibtex (#22) 39 | 40 | v0.5.5 41 | ====== 42 | 43 | * ENH: json output 44 | * ENH: Add (optional) support for non-standard entry types by Georg C. Brückmann 45 | * FIX: protect uppercase only on unprotected characters. #18 46 | * ENH: string replacement by Uwe Schmidt (#13 #20) 47 | 48 | v0.5.4 49 | ====== 50 | 51 | * ENH: json output 52 | * API: enhance the naming choice for bwriter 53 | 54 | v0.5.3 55 | ====== 56 | 57 | * ENH: add writer (#16), thanks to Lucas Verney 58 | * MAINT: Remove non-standard --BREAK-- command detection 59 | * FIX: missing strip() (#14) by Sebastien Diemer 60 | * API breakage: the parser takes data instead of a filehandler 61 | 62 | v0.5.2 63 | ====== 64 | 65 | * ENH: fix tests latex encoding 66 | * ENH: support @comment @preambule (escaped) 67 | * ENH: check that bibtype belongs to a known type 68 | 69 | v0.5.1 70 | ====== 71 | 72 | * ENH: split keywords with various separators 73 | * ENH: get_entry_dict make the dict once 74 | * ENH: add messages with logging 75 | * FIX: fix unittest related to braces detection 76 | 77 | v0.5 78 | ==== 79 | 80 | * Permission from original authors and OKFN to use LGPLv3 81 | * ENH: Python 2.7 support 82 | * FIX: issue related to accents 83 | 84 | v0.4 85 | ==== 86 | 87 | * ENH: Transformations on characters are now considered as a customization 88 | * ENH: New customization: clean latex style 89 | * FIX: issue related to name processing 90 | 91 | v0.3 92 | ==== 93 | 94 | * DOC: moved to readsthedoc 95 | * DOC: several improvements 96 | * MAINT: separate customizations 97 | 98 | v0.2 99 | ==== 100 | 101 | * TEST: initialized 102 | * DOC: initialized 103 | 104 | v0.1 105 | ==== 106 | 107 | * First preliminary release 108 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibtexparser.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import bibtexparser 3 | from bibtexparser.bparser import BibTexParser 4 | from tempfile import TemporaryFile 5 | 6 | 7 | class TestBibtexParserParserMethods(unittest.TestCase): 8 | input_file_path = 'bibtexparser/tests/data/book.bib' 9 | entries_expected = [{'ENTRYTYPE': 'book', 10 | 'year': '1987', 11 | 'edition': '2', 12 | 'publisher': 'Wiley Edition', 13 | 'ID': 'Bird1987', 14 | 'volume': '1', 15 | 'title': 'Dynamics of Polymeric Liquid', 16 | 'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.' 17 | }] 18 | 19 | def test_parse_immediately(self): 20 | with open(self.input_file_path) as bibtex_file: 21 | bibtex_str = bibtex_file.read() 22 | bibtex_database = BibTexParser(bibtex_str) 23 | self.assertEqual(bibtex_database.entries, self.entries_expected) 24 | 25 | def test_parse_str(self): 26 | parser = BibTexParser() 27 | with open(self.input_file_path) as bibtex_file: 28 | bibtex_str = bibtex_file.read() 29 | bibtex_database = parser.parse(bibtex_str) 30 | self.assertEqual(bibtex_database.entries, self.entries_expected) 31 | 32 | def test_parse_file(self): 33 | parser = BibTexParser() 34 | with open(self.input_file_path) as bibtex_file: 35 | bibtex_database = parser.parse_file(bibtex_file) 36 | self.assertEqual(bibtex_database.entries, self.entries_expected) 37 | 38 | def test_parse_str_module(self): 39 | with open(self.input_file_path) as bibtex_file: 40 | bibtex_str = bibtex_file.read() 41 | bibtex_database = bibtexparser.loads(bibtex_str) 42 | self.assertEqual(bibtex_database.entries, self.entries_expected) 43 | 44 | def test_parse_file_module(self): 45 | with open(self.input_file_path) as bibtex_file: 46 | bibtex_database = bibtexparser.load(bibtex_file) 47 | self.assertEqual(bibtex_database.entries, self.entries_expected) 48 | 49 | 50 | class TestBibtexparserWriteMethods(unittest.TestCase): 51 | input_file_path = 'bibtexparser/tests/data/book.bib' 52 | expected = \ 53 | """@book{Bird1987, 54 | author = {Bird, R.B. and Armstrong, R.C. and Hassager, O.}, 55 | edition = {2}, 56 | publisher = {Wiley Edition}, 57 | title = {Dynamics of Polymeric Liquid}, 58 | volume = {1}, 59 | year = {1987} 60 | } 61 | 62 | """ 63 | 64 | def test_write_str(self): 65 | with open(self.input_file_path) as bibtex_file: 66 | bibtex_database = bibtexparser.load(bibtex_file) 67 | result = bibtexparser.dumps(bibtex_database) 68 | self.assertEqual(result, self.expected) 69 | 70 | def test_write_file(self): 71 | with open(self.input_file_path) as bibtex_file: 72 | bibtex_database = bibtexparser.load(bibtex_file) 73 | 74 | with TemporaryFile(mode='w+') as bibtex_out_file: 75 | bibtexparser.dump(bibtex_database, bibtex_out_file) 76 | bibtex_out_file.seek(0) 77 | bibtex_out_str = bibtex_out_file.read() 78 | 79 | self.assertEqual(bibtex_out_str, self.expected) 80 | 81 | 82 | if __name__ == '__main__': 83 | unittest.main() 84 | -------------------------------------------------------------------------------- /bibtexparser/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | `BibTeX `_ is a bibliographic data file format. 3 | 4 | The :mod:`bibtexparser` module can parse BibTeX files and write them. The API is similar to the 5 | :mod:`json` module. The parsed data is returned as a simple :class:`BibDatabase` object with the main attribute being 6 | :attr:`entries` representing bibliographic sources such as books and journal articles. 7 | 8 | The following functions provide a quick and basic way to manipulate a BibTeX file. 9 | More advanced features are also available in this module. 10 | 11 | Parsing a file is as simple as:: 12 | 13 | import bibtexparser 14 | with open('bibtex.bib') as bibtex_file: 15 | bibtex_database = bibtexparser.load(bibtex_file) 16 | 17 | And writing:: 18 | 19 | import bibtexparser 20 | with open('bibtex.bib', 'w') as bibtex_file: 21 | bibtexparser.dump(bibtex_database, bibtex_file) 22 | 23 | """ 24 | __all__ = [ 25 | 'loads', 'load', 'dumps', 'dump', 'bibdatabase', 26 | 'bparser', 'bwriter', 'bibtexexpression', 'latexenc', 'customization', 27 | ] 28 | __version__ = '0.6.2' 29 | 30 | from . import bibdatabase, bibtexexpression, bparser, bwriter, latexenc, customization 31 | 32 | 33 | def loads(bibtex_str, parser=None): 34 | """ 35 | Load :class:`BibDatabase` object from a string 36 | 37 | :param bibtex_str: input BibTeX string to be parsed 38 | :type bibtex_str: str or unicode 39 | :param parser: custom parser to use (optional) 40 | :type parser: BibTexParser 41 | :returns: bibliographic database object 42 | :rtype: BibDatabase 43 | """ 44 | if parser is None: 45 | parser = bparser.BibTexParser() 46 | return parser.parse(bibtex_str) 47 | 48 | 49 | def load(bibtex_file, parser=None): 50 | """ 51 | Load :class:`BibDatabase` object from a file 52 | 53 | :param bibtex_file: input file to be parsed 54 | :type bibtex_file: file 55 | :param parser: custom parser to use (optional) 56 | :type parser: BibTexParser 57 | :returns: bibliographic database object 58 | :rtype: BibDatabase 59 | 60 | Example:: 61 | 62 | import bibtexparser 63 | with open('bibtex.bib') as bibtex_file: 64 | bibtex_database = bibtexparser.load(bibtex_file) 65 | 66 | """ 67 | if parser is None: 68 | parser = bparser.BibTexParser() 69 | return parser.parse_file(bibtex_file) 70 | 71 | 72 | def dumps(bib_database, writer=None): 73 | """ 74 | Dump :class:`BibDatabase` object to a BibTeX string 75 | 76 | :param bib_database: bibliographic database object 77 | :type bib_database: BibDatabase 78 | :param writer: custom writer to use (optional) (not yet implemented) 79 | :type writer: BibTexWriter 80 | :returns: BibTeX string 81 | :rtype: unicode 82 | """ 83 | if writer is None: 84 | writer = bwriter.BibTexWriter() 85 | return writer.write(bib_database) 86 | 87 | 88 | def dump(bib_database, bibtex_file, writer=None): 89 | """ 90 | Dump :class:`BibDatabase` object as a BibTeX text file 91 | 92 | :param bib_database: bibliographic database object 93 | :type bib_database: BibDatabase 94 | :param bibtex_file: file to write to 95 | :type bibtex_file: file 96 | :param writer: custom writer to use (optional) (not yet implemented) 97 | :type writer: BibTexWriter 98 | 99 | Example:: 100 | 101 | import bibtexparser 102 | with open('bibtex.bib', 'w') as bibtex_file: 103 | bibtexparser.dump(bibtex_database, bibtex_file) 104 | 105 | """ 106 | if writer is None: 107 | writer = bwriter.BibTexWriter() 108 | bibtex_file.write(writer.write(bib_database)) 109 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_customization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import unicode_literals 5 | import unittest 6 | 7 | from bibtexparser.customization import getnames, convert_to_unicode, homogenize_latex_encoding, page_double_hyphen, keyword 8 | 9 | 10 | class TestBibtexParserMethod(unittest.TestCase): 11 | 12 | ########### 13 | # getnames 14 | ########### 15 | def test_getnames(self): 16 | names = ['Foo Bar', 17 | 'Foo B. Bar', 18 | 'F. B. Bar', 19 | 'F.B. Bar', 20 | 'F. Bar', 21 | 'Jean de Savigny', 22 | 'Jean la Tour', 23 | 'Jean le Tour', 24 | 'Mike ben Akar', 25 | #'Jean de la Tour', 26 | #'Johannes Diderik van der Waals', 27 | ] 28 | result = getnames(names) 29 | expected = ['Bar, Foo', 30 | 'Bar, Foo B.', 31 | 'Bar, F. B.', 32 | 'Bar, F. B.', 33 | 'Bar, F.', 34 | 'de Savigny, Jean', 35 | 'la Tour, Jean', 36 | 'le Tour, Jean', 37 | 'ben Akar, Mike', 38 | #'de la Tour, Jean', 39 | #'van der Waals, Johannes Diderik', 40 | ] 41 | self.assertEqual(result, expected) 42 | 43 | @unittest.skip('Bug #9') 44 | def test_getnames_braces(self): 45 | names = ['A. {Delgado de Molina}', 'M. Vign{\\\'e}'] 46 | result = getnames(names) 47 | expected = ['Delgado de Molina, A.', 'Vigné, M.'] 48 | self.assertEqual(result, expected) 49 | 50 | ########### 51 | # page_double_hyphen 52 | ########### 53 | def test_page_double_hyphen_alreadyOK(self): 54 | record = {'pages': '12--24'} 55 | result = page_double_hyphen(record) 56 | expected = record 57 | self.assertEqual(result, expected) 58 | 59 | def test_page_double_hyphen_simple(self): 60 | record = {'pages': '12-24'} 61 | result = page_double_hyphen(record) 62 | expected = {'pages': '12--24'} 63 | self.assertEqual(result, expected) 64 | 65 | def test_page_double_hyphen_space(self): 66 | record = {'pages': '12 - 24'} 67 | result = page_double_hyphen(record) 68 | expected = {'pages': '12--24'} 69 | self.assertEqual(result, expected) 70 | 71 | def test_page_double_hyphen_nothing(self): 72 | record = {'pages': '12 24'} 73 | result = page_double_hyphen(record) 74 | expected = {'pages': '12 24'} 75 | self.assertEqual(result, expected) 76 | 77 | ########### 78 | # convert to unicode 79 | ########### 80 | def test_convert_to_unicode(self): 81 | record = {'toto': '{\`a} \`{a}'} 82 | result = convert_to_unicode(record) 83 | expected = {'toto': 'à à'} 84 | self.assertEqual(result, expected) 85 | record = {'toto': '{\\"u} \\"{u}'} 86 | result = convert_to_unicode(record) 87 | expected = {'toto': 'ü ü'} 88 | self.assertEqual(result, expected) 89 | 90 | ########### 91 | # homogenize 92 | ########### 93 | def test_homogenize(self): 94 | record = {'toto': 'à {\`a} \`{a}'} 95 | result = homogenize_latex_encoding(record) 96 | expected = {'toto': '{\`a} {\`a} {\`a}'} 97 | self.assertEqual(result, expected) 98 | 99 | ########### 100 | # keywords 101 | ########### 102 | def test_keywords(self): 103 | record = {'keyword': "a b, a b , a b;a b ; a b, a b\n"} 104 | result = keyword(record) 105 | expected = {'keyword': ['a b'] * 6} 106 | self.assertEqual(result, expected) 107 | 108 | if __name__ == '__main__': 109 | unittest.main() 110 | -------------------------------------------------------------------------------- /bibtexparser/bibdatabase.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import sys 3 | 4 | 5 | if sys.version_info.major == 2: 6 | TEXT_TYPE = unicode 7 | else: 8 | TEXT_TYPE = str 9 | 10 | 11 | STANDARD_TYPES = set([ 12 | 'article', 13 | 'book', 14 | 'booklet', 15 | 'conference', 16 | 'inbook', 17 | 'incollection', 18 | 'inproceedings', 19 | 'manual', 20 | 'mastersthesis', 21 | 'misc', 22 | 'phdthesis', 23 | 'proceedings', 24 | 'techreport', 25 | 'unpublished']) 26 | COMMON_STRINGS = { 27 | 'jan': 'January', 28 | 'feb': 'February', 29 | 'mar': 'March', 30 | 'apr': 'April', 31 | 'may': 'May', 32 | 'jun': 'June', 33 | 'jul': 'July', 34 | 'aug': 'August', 35 | 'sep': 'September', 36 | 'oct': 'October', 37 | 'nov': 'November', 38 | 'dec': 'December', 39 | } 40 | 41 | 42 | class BibDatabase(object): 43 | """ 44 | Bibliographic database object that follows the data structure of a BibTeX file. 45 | """ 46 | def __init__(self): 47 | #: List of BibTeX entries, for example `@book{...}`, `@article{...}`, etc. Each entry is a simple dict with 48 | #: BibTeX field-value pairs, for example `'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.'` Each 49 | #: entry will always have the following dict keys (in addition to other BibTeX fields): 50 | #: 51 | #: * `ID` (BibTeX key) 52 | #: * `ENTRYTYPE` (entry type in lowercase, e.g. `book`, `article` etc.) 53 | self.entries = [] 54 | self._entries_dict = {} 55 | #: List of BibTeX comment (`@comment{...}`) blocks. 56 | self.comments = [] 57 | #: OrderedDict of BibTeX string definitions (`@string{...}`). In order of definition. 58 | self.strings = OrderedDict() # Not sure if order is import, keep order just in case 59 | #: List of BibTeX preamble (`@preamble{...}`) blocks. 60 | self.preambles = [] 61 | 62 | def load_common_strings(self): 63 | self.strings.update(COMMON_STRINGS) 64 | 65 | def get_entry_list(self): 66 | """Get a list of bibtex entries. 67 | 68 | :returns: BibTeX entries 69 | :rtype: list 70 | .. deprecated:: 0.5.6 71 | Use :attr:`entries` instead. 72 | """ 73 | return self.entries 74 | 75 | @staticmethod 76 | def entry_sort_key(entry, fields): 77 | result = [] 78 | for field in fields: 79 | result.append(TEXT_TYPE(entry.get(field, '')).lower()) # Sorting always as string 80 | return tuple(result) 81 | 82 | def get_entry_dict(self): 83 | """Return a dictionary of BibTeX entries. 84 | The dict key is the BibTeX entry key 85 | """ 86 | # If the hash has never been made, make it 87 | if not self._entries_dict: 88 | for entry in self.entries: 89 | self._entries_dict[entry['ID']] = entry 90 | return self._entries_dict 91 | 92 | entries_dict = property(get_entry_dict) 93 | 94 | def expand_string(self, name): 95 | try: 96 | return self.strings[name] 97 | except KeyError: 98 | raise(KeyError("Unknown string: {}.".format(name))) 99 | 100 | 101 | class BibDataString(object): 102 | """ 103 | Represents a bibtex string. 104 | 105 | This object enables mainting string expressions as list of strings 106 | and BibDataString. Can be interpolated from Bibdatabase. 107 | """ 108 | 109 | def __init__(self, bibdatabase, name): 110 | self._bibdatabase = bibdatabase 111 | self.name = name.lower() 112 | 113 | def __repr__(self): 114 | return "BibDataString({})".format(self.name.__repr__()) 115 | 116 | def get_value(self): 117 | """ 118 | Query value from string name. 119 | 120 | :returns: string 121 | """ 122 | return self._bibdatabase.expand_string(self.name) 123 | -------------------------------------------------------------------------------- /bibtexparser/bwriter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Author: Francois Boulogne 4 | # License: 5 | 6 | import logging 7 | from bibtexparser.bibdatabase import BibDatabase 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | __all__ = ['BibTexWriter'] 12 | 13 | 14 | def to_bibtex(parsed): 15 | """ 16 | Convenience function for backwards compatibility. 17 | """ 18 | return BibTexWriter().write(parsed) 19 | 20 | 21 | class BibTexWriter(object): 22 | """ 23 | Writer to convert a :class:`BibDatabase` object to a string or file formatted as a BibTeX file. 24 | 25 | Example:: 26 | 27 | from bibtexparser.bwriter import BibTexWriter 28 | 29 | bib_database = ... 30 | 31 | writer = BibTexWriter() 32 | writer.contents = ['comments', 'entries'] 33 | writer.indent = ' ' 34 | writer.order_entries_by = ('ENTRYTYPE', 'author', 'year') 35 | bibtex_str = bibtexparser.dumps(bib_database, writer) 36 | 37 | """ 38 | 39 | _valid_contents = ['entries', 'comments', 'preambles', 'strings'] 40 | 41 | def __init__(self): 42 | #: List of BibTeX elements to write, valid values are `entries`, `comments`, `preambles`, `strings`. 43 | self.contents = ['comments', 'preambles', 'strings', 'entries'] 44 | #: Character(s) for indenting BibTeX field-value pairs. Default: single space. 45 | self.indent = ' ' 46 | #: Align values. Determines the maximal number of characters used in any fieldname and aligns all values 47 | # according to that by filling up with single spaces. Default: False 48 | self.align_values = False 49 | #: Characters(s) for separating BibTeX entries. Default: new line. 50 | self.entry_separator = '\n' 51 | #: Tuple of fields for ordering BibTeX entries. Set to `None` to disable sorting. Default: BibTeX key `('ID', )`. 52 | self.order_entries_by = ('ID', ) 53 | #: Tuple of fields for display order in a single BibTeX entry. Fields not listed here will be displayed 54 | #: alphabetically at the end. Set to '[]' for alphabetical order. Default: '[]' 55 | self.display_order = [] 56 | #: BibTeX syntax allows comma first syntax 57 | #: (common in functional languages), use this to enable 58 | #: comma first syntax as the bwritter output 59 | self.comma_first = False 60 | 61 | #: internal variable used if self.align_values = True 62 | self._max_field_width = 0 63 | 64 | 65 | def write(self, bib_database): 66 | """ 67 | Converts a bibliographic database to a BibTeX-formatted string. 68 | 69 | :param bib_database: bibliographic database to be converted to a BibTeX string 70 | :type bib_database: BibDatabase 71 | :return: BibTeX-formatted string 72 | :rtype: str or unicode 73 | """ 74 | bibtex = '' 75 | for content in self.contents: 76 | try: 77 | # Add each element set (entries, comments) 78 | bibtex += getattr(self, '_' + content + '_to_bibtex')(bib_database) 79 | except AttributeError: 80 | logger.warning("BibTeX item '{}' does not exist and will not be written. Valid items are {}." 81 | .format(content, self._valid_contents)) 82 | return bibtex 83 | 84 | def _entries_to_bibtex(self, bib_database): 85 | bibtex = '' 86 | if self.order_entries_by: 87 | # TODO: allow sort field does not exist for entry 88 | entries = sorted(bib_database.entries, key=lambda x: BibDatabase.entry_sort_key(x, self.order_entries_by)) 89 | else: 90 | entries = bib_database.entries 91 | 92 | if self.align_values: 93 | # determine maximum field width to be used 94 | widths = [max(map(len, entry.keys())) for entry in entries] 95 | self._max_field_width = max(widths) 96 | 97 | for entry in entries: 98 | bibtex += self._entry_to_bibtex(entry) 99 | return bibtex 100 | 101 | def _entry_to_bibtex(self, entry): 102 | bibtex = '' 103 | # Write BibTeX key 104 | bibtex += '@' + entry['ENTRYTYPE'] + '{' + entry['ID'] 105 | 106 | # create display_order of fields for this entry 107 | # first those keys which are both in self.display_order and in entry.keys 108 | display_order = [i for i in self.display_order if i in entry] 109 | # then all the other fields sorted alphabetically 110 | more_fields = [i for i in sorted(entry) if i not in self.display_order] 111 | display_order += [i for i in sorted(entry) if i not in self.display_order] 112 | 113 | # Write field = value lines 114 | for field in [i for i in display_order if i not in ['ENTRYTYPE', 'ID']]: 115 | try: 116 | if self.comma_first: 117 | bibtex += "\n" + self.indent + ", " + "{0:<{1}}".format(field, self._max_field_width) + " = {" + entry[field] + "}" 118 | else: 119 | bibtex += ",\n" + self.indent + "{0:<{1}}".format(field, self._max_field_width) + " = {" + entry[field] + "}" 120 | except TypeError: 121 | raise TypeError(u"The field %s in entry %s must be a string" 122 | % (field, entry['ID'])) 123 | bibtex += "\n}\n" + self.entry_separator 124 | return bibtex 125 | 126 | def _comments_to_bibtex(self, bib_database): 127 | return ''.join(['@comment{{{0}}}\n{1}'.format(comment, self.entry_separator) 128 | for comment in bib_database.comments]) 129 | 130 | def _preambles_to_bibtex(self, bib_database): 131 | return ''.join(['@preamble{{"{0}"}}\n{1}'.format(preamble, self.entry_separator) 132 | for preamble in bib_database.preambles]) 133 | 134 | def _strings_to_bibtex(self, bib_database): 135 | return ''.join(['@string{{{0} = "{1}"}}\n{2}'.format(name, value, self.entry_separator) 136 | for name, value in bib_database.strings.items()]) 137 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BibtexParser.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BibtexParser.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/BibtexParser" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BibtexParser" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_comments.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from bibtexparser.bparser import BibTexParser 3 | from bibtexparser.bwriter import to_bibtex 4 | 5 | 6 | """ The code is supposed to treat comments the following way: 7 | Each @Comment opens a comment that ends when something 8 | that is not a comment is encountered. More precisely 9 | this means a line starting with an @. Lines that are not 10 | parsed as anything else are also considered comments. 11 | If the comment starts and ends with braces, they are removed. 12 | 13 | Current issues: 14 | - a comment followed by a line starting with @smthing 15 | that is not a valid bibtex element are parsed separately, 16 | that is as two comments. 17 | - braces are either ignored or removed which is not easily 18 | predictable. 19 | """ 20 | 21 | 22 | class TestParseComment(unittest.TestCase): 23 | 24 | def test_comment_count(self): 25 | with open('bibtexparser/tests/data/features.bib') as bibfile: 26 | bib = BibTexParser(bibfile.read()) 27 | self.assertEqual(len(bib.comments), 3) 28 | 29 | def test_comment_list(self): 30 | with open('bibtexparser/tests/data/features.bib') as bibfile: 31 | bib = BibTexParser(bibfile.read()) 32 | expected = ["ignore this line!", 33 | "ignore this line too!", 34 | "and ignore this line too!"] 35 | self.assertEqual(bib.comments, expected) 36 | 37 | def test_multiline_comments(self): 38 | with open('bibtexparser/tests/data/multiline_comments.bib') as bibfile: 39 | bib = BibTexParser(bibfile.read()) 40 | expected = [ 41 | """Lorem ipsum dolor sit amet, 42 | consectetur adipisicing elit""", 43 | """ 44 | Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 45 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. 46 | 47 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. 48 | Excepteur sint occaecat cupidatat non proident. 49 | , 50 | """, 51 | """ 52 | 53 | 54 | Sunt in culpa qui officia deserunt mollit anim id est laborum. 55 | 56 | 57 | """, 58 | "" 59 | ] 60 | self.maxDiff = None 61 | self.assertEqual(bib.comments, expected) 62 | 63 | def test_multiple_entries(self): 64 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile: 65 | bparser = BibTexParser() 66 | bib = bparser.parse_file(bibfile) 67 | expected = ["", 68 | "A comment"] 69 | self.assertEqual(bib.comments, expected) 70 | 71 | def test_comments_percentage(self): 72 | with open('bibtexparser/tests/data/comments_percentage.bib', 'r') as bibfile: 73 | bib = BibTexParser(bibfile.read()) 74 | res = bib.get_entry_list() 75 | expected = [{'ENTRYTYPE': 'article', 76 | 'journal': 'Nice Journal', 77 | 'volume': '12', 78 | 'ID': 'Cesar2013', 79 | 'year': '2013', 80 | 'author': 'Jean Cesar', 81 | 'comments': 'A comment', 82 | 'keyword': 'keyword1, keyword2', 83 | 'title': 'An amazing title' 84 | }, 85 | {'ENTRYTYPE': 'article', 86 | 'journal': 'Nice Journal', 87 | 'volume': '12', 88 | 'ID': 'Baltazar2013', 89 | 'year': '2013', 90 | 'author': 'Jean Baltazar', 91 | 'comments': 'A comment', 92 | 'keyword': 'keyword1, keyword2', 93 | 'title': 'An amazing title' 94 | }] 95 | self.assertEqual(res, expected) 96 | 97 | def test_comments_percentage_nocoma(self): 98 | with open('bibtexparser/tests/data/comments_percentage_nolastcoma.bib', 'r') as bibfile: 99 | bib = BibTexParser(bibfile.read()) 100 | res = bib.get_entry_list() 101 | expected = [{'ENTRYTYPE': 'article', 102 | 'journal': 'Nice Journal', 103 | 'volume': '12', 104 | 'ID': 'Cesar2013', 105 | 'year': '2013', 106 | 'author': 'Jean Cesar', 107 | 'comments': 'A comment', 108 | 'keyword': 'keyword1, keyword2', 109 | 'title': 'An amazing title' 110 | }, 111 | {'ENTRYTYPE': 'article', 112 | 'journal': 'Nice Journal', 113 | 'volume': '12', 114 | 'ID': 'Baltazar2013', 115 | 'year': '2013', 116 | 'author': 'Jean Baltazar', 117 | 'comments': 'A comment', 118 | 'keyword': 'keyword1, keyword2', 119 | 'title': 'An amazing title' 120 | }] 121 | self.assertEqual(res, expected) 122 | 123 | def test_no_newline(self): 124 | comments = """This is a comment.""" 125 | expected = ["This is a comment."] 126 | bib = BibTexParser(comments) 127 | self.assertEqual(bib.comments, expected) 128 | 129 | def test_43(self): 130 | comment = "@STRING{foo = \"bar\"}\n" \ 131 | "This is a comment\n" \ 132 | "This is a second comment." 133 | expected = "This is a comment\nThis is a second comment." 134 | bib = BibTexParser(comment) 135 | self.assertEqual(bib.comments, [expected]) 136 | self.assertEqual(bib.strings, {'foo': 'bar'}) 137 | 138 | def test_43_bis(self): 139 | comment = "@STRING{foo = \"bar\"}\n" \ 140 | "This is a comment\n" \ 141 | "STRING{Baz = \"This should be interpreted as comment.\"}" 142 | expected = "This is a comment\n" \ 143 | "STRING{Baz = \"This should be interpreted as comment.\"}" 144 | bib = BibTexParser(comment) 145 | self.assertEqual(bib.comments, [expected]) 146 | self.assertEqual(bib.strings, {'foo': 'bar'}) 147 | 148 | 149 | class TestWriteComment(unittest.TestCase): 150 | def test_comment_write(self): 151 | with open('bibtexparser/tests/data/comments_only.bib') as bibfile: 152 | bib = BibTexParser(bibfile.read()) 153 | 154 | with open('bibtexparser/tests/data/comments_only_output.bib') as bibfile: 155 | expected = bibfile.read() 156 | result = to_bibtex(bib) 157 | self.assertEqual(result, expected) 158 | 159 | def test_multiline_comment_write(self): 160 | with open('bibtexparser/tests/data/multiline_comments.bib') as bibfile: 161 | expected = bibfile.read() 162 | 163 | bib = BibTexParser(expected) 164 | result = to_bibtex(bib) 165 | self.assertEqual(result, expected) 166 | 167 | def test_multiple_entries(self): 168 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibfile: 169 | bib = BibTexParser(bibfile.read()) 170 | with open('bibtexparser/tests/data/multiple_entries_and_comments_output.bib') as bibfile: 171 | expected = bibfile.read() 172 | result = to_bibtex(bib) 173 | self.assertEqual(result, expected) 174 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bibtexwriter.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import bibtexparser 3 | from bibtexparser.bwriter import BibTexWriter 4 | from bibtexparser.bibdatabase import BibDatabase 5 | 6 | 7 | class TestBibTexWriter(unittest.TestCase): 8 | def test_content_entries_only(self): 9 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: 10 | bib_database = bibtexparser.load(bibtex_file) 11 | writer = BibTexWriter() 12 | writer.contents = ['entries'] 13 | result = bibtexparser.dumps(bib_database, writer) 14 | expected = \ 15 | """@book{Toto3000, 16 | author = {Toto, A and Titi, B}, 17 | title = {A title} 18 | } 19 | 20 | @article{Wigner1938, 21 | author = {Wigner, E.}, 22 | doi = {10.1039/TF9383400029}, 23 | issn = {0014-7672}, 24 | journal = {Trans. Faraday Soc.}, 25 | owner = {fr}, 26 | pages = {29--41}, 27 | publisher = {The Royal Society of Chemistry}, 28 | title = {The transition state method}, 29 | volume = {34}, 30 | year = {1938} 31 | } 32 | 33 | @book{Yablon2005, 34 | author = {Yablon, A.D.}, 35 | publisher = {Springer}, 36 | title = {Optical fiber fusion slicing}, 37 | year = {2005} 38 | } 39 | 40 | """ 41 | self.assertEqual(result, expected) 42 | 43 | def test_content_comment_only(self): 44 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: 45 | bib_database = bibtexparser.load(bibtex_file) 46 | writer = BibTexWriter() 47 | writer.contents = ['comments'] 48 | result = bibtexparser.dumps(bib_database, writer) 49 | expected = \ 50 | """@comment{} 51 | 52 | @comment{A comment} 53 | 54 | """ 55 | self.assertEqual(result, expected) 56 | 57 | def test_indent(self): 58 | bib_database = BibDatabase() 59 | bib_database.entries = [{'ID': 'abc123', 60 | 'ENTRYTYPE': 'book', 61 | 'author': 'test'}] 62 | writer = BibTexWriter() 63 | writer.indent = ' ' 64 | result = bibtexparser.dumps(bib_database, writer) 65 | expected = \ 66 | """@book{abc123, 67 | author = {test} 68 | } 69 | 70 | """ 71 | self.assertEqual(result, expected) 72 | 73 | def test_align(self): 74 | bib_database = BibDatabase() 75 | bib_database.entries = [{'ID': 'abc123', 76 | 'ENTRYTYPE': 'book', 77 | 'author': 'test', 78 | 'thisisaverylongkey': 'longvalue'}] 79 | writer = BibTexWriter() 80 | writer.align_values = True 81 | result = bibtexparser.dumps(bib_database, writer) 82 | expected = \ 83 | """@book{abc123, 84 | author = {test}, 85 | thisisaverylongkey = {longvalue} 86 | } 87 | 88 | """ 89 | self.assertEqual(result, expected) 90 | 91 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: 92 | bib_database = bibtexparser.load(bibtex_file) 93 | writer = BibTexWriter() 94 | writer.contents = ['entries'] 95 | writer.align_values = True 96 | result = bibtexparser.dumps(bib_database, writer) 97 | expected = \ 98 | """@book{Toto3000, 99 | author = {Toto, A and Titi, B}, 100 | title = {A title} 101 | } 102 | 103 | @article{Wigner1938, 104 | author = {Wigner, E.}, 105 | doi = {10.1039/TF9383400029}, 106 | issn = {0014-7672}, 107 | journal = {Trans. Faraday Soc.}, 108 | owner = {fr}, 109 | pages = {29--41}, 110 | publisher = {The Royal Society of Chemistry}, 111 | title = {The transition state method}, 112 | volume = {34}, 113 | year = {1938} 114 | } 115 | 116 | @book{Yablon2005, 117 | author = {Yablon, A.D.}, 118 | publisher = {Springer}, 119 | title = {Optical fiber fusion slicing}, 120 | year = {2005} 121 | } 122 | 123 | """ 124 | self.assertEqual(result, expected) 125 | 126 | 127 | def test_entry_separator(self): 128 | bib_database = BibDatabase() 129 | bib_database.entries = [{'ID': 'abc123', 130 | 'ENTRYTYPE': 'book', 131 | 'author': 'test'}] 132 | writer = BibTexWriter() 133 | writer.entry_separator = '' 134 | result = bibtexparser.dumps(bib_database, writer) 135 | expected = \ 136 | """@book{abc123, 137 | author = {test} 138 | } 139 | """ 140 | self.assertEqual(result, expected) 141 | 142 | def test_display_order(self): 143 | with open('bibtexparser/tests/data/multiple_entries_and_comments.bib') as bibtex_file: 144 | bib_database = bibtexparser.load(bibtex_file) 145 | writer = BibTexWriter() 146 | writer.contents = ['entries'] 147 | writer.display_order = ['year', 'publisher', 'title'] 148 | result = bibtexparser.dumps(bib_database, writer) 149 | expected = \ 150 | """@book{Toto3000, 151 | title = {A title}, 152 | author = {Toto, A and Titi, B} 153 | } 154 | 155 | @article{Wigner1938, 156 | year = {1938}, 157 | publisher = {The Royal Society of Chemistry}, 158 | title = {The transition state method}, 159 | author = {Wigner, E.}, 160 | doi = {10.1039/TF9383400029}, 161 | issn = {0014-7672}, 162 | journal = {Trans. Faraday Soc.}, 163 | owner = {fr}, 164 | pages = {29--41}, 165 | volume = {34} 166 | } 167 | 168 | @book{Yablon2005, 169 | year = {2005}, 170 | publisher = {Springer}, 171 | title = {Optical fiber fusion slicing}, 172 | author = {Yablon, A.D.} 173 | } 174 | 175 | """ 176 | self.assertEqual(result, expected) 177 | 178 | 179 | class TestEntrySorting(unittest.TestCase): 180 | bib_database = BibDatabase() 181 | bib_database.entries = [{'ID': 'b', 182 | 'ENTRYTYPE': 'article'}, 183 | {'ID': 'c', 184 | 'ENTRYTYPE': 'book'}, 185 | {'ID': 'a', 186 | 'ENTRYTYPE': 'book'}] 187 | 188 | def test_sort_default(self): 189 | result = bibtexparser.dumps(self.bib_database) 190 | expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n" 191 | self.assertEqual(result, expected) 192 | 193 | def test_sort_none(self): 194 | writer = BibTexWriter() 195 | writer.order_entries_by = None 196 | result = bibtexparser.dumps(self.bib_database, writer) 197 | expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n" 198 | self.assertEqual(result, expected) 199 | 200 | def test_sort_id(self): 201 | writer = BibTexWriter() 202 | writer.order_entries_by = ('ID', ) 203 | result = bibtexparser.dumps(self.bib_database, writer) 204 | expected = "@book{a\n}\n\n@article{b\n}\n\n@book{c\n}\n\n" 205 | self.assertEqual(result, expected) 206 | 207 | def test_sort_type(self): 208 | writer = BibTexWriter() 209 | writer.order_entries_by = ('ENTRYTYPE', ) 210 | result = bibtexparser.dumps(self.bib_database, writer) 211 | expected = "@article{b\n}\n\n@book{c\n}\n\n@book{a\n}\n\n" 212 | self.assertEqual(result, expected) 213 | 214 | def test_sort_type_id(self): 215 | writer = BibTexWriter() 216 | writer.order_entries_by = ('ENTRYTYPE', 'ID') 217 | result = bibtexparser.dumps(self.bib_database, writer) 218 | expected = "@article{b\n}\n\n@book{a\n}\n\n@book{c\n}\n\n" 219 | self.assertEqual(result, expected) 220 | 221 | def test_sort_missing_field(self): 222 | bib_database = BibDatabase() 223 | bib_database.entries = [{'ID': 'b', 224 | 'ENTRYTYPE': 'article', 225 | 'year': '2000'}, 226 | {'ID': 'c', 227 | 'ENTRYTYPE': 'book', 228 | 'year': '2010'}, 229 | {'ID': 'a', 230 | 'ENTRYTYPE': 'book'}] 231 | writer = BibTexWriter() 232 | writer.order_entries_by = ('year', ) 233 | result = bibtexparser.dumps(bib_database, writer) 234 | expected = "@book{a\n}\n\n@article{b,\n year = {2000}\n}\n\n@book{c,\n year = {2010}\n}\n\n" 235 | self.assertEqual(result, expected) 236 | 237 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # BibtexParser documentation build configuration file, created by 5 | # sphinx-quickstart on Thu Aug 1 13:30:23 2013. 6 | # 7 | # This file is execfile()d with the current directory set to its containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys, os 16 | 17 | # If extensions (or modules to document with autodoc) are in another directory, 18 | # add these directories to sys.path here. If the directory is relative to the 19 | # documentation root, use os.path.abspath to make it absolute, like shown here. 20 | sys.path.insert(0, os.path.abspath('../..')) 21 | sys.path.insert(0, os.path.abspath('../../bibtexparser')) 22 | 23 | # -- General configuration ----------------------------------------------------- 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be extensions 29 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 30 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.viewcode'] 31 | 32 | # Add any paths that contain templates here, relative to this directory. 33 | templates_path = ['_templates'] 34 | 35 | # The suffix of source filenames. 36 | source_suffix = '.rst' 37 | 38 | # The encoding of source files. 39 | #source_encoding = 'utf-8-sig' 40 | 41 | # The master toctree document. 42 | master_doc = 'index' 43 | 44 | # General information about the project. 45 | project = 'BibtexParser' 46 | copyright = '2013-2016, F. Boulogne and other contributors' 47 | 48 | # The version info for the project you're documenting, acts as replacement for 49 | # |version| and |release|, also used in various other places throughout the 50 | # built documents. 51 | # 52 | try: 53 | import bibtexparser as bp 54 | # The short X.Y version. 55 | version = bp.__version__ 56 | # The full version, including alpha/beta/rc tags. 57 | release = bp.__version__ 58 | except ImportError: 59 | version = 'latest' 60 | release = 'latest' 61 | 62 | # The language for content autogenerated by Sphinx. Refer to documentation 63 | # for a list of supported languages. 64 | #language = None 65 | 66 | # There are two options for replacing |today|: either, you set today to some 67 | # non-false value, then it is used: 68 | #today = '' 69 | # Else, today_fmt is used as the format for a strftime call. 70 | #today_fmt = '%B %d, %Y' 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | exclude_patterns = [] 75 | 76 | # The reST default role (used for this markup: `text`) to use for all documents. 77 | #default_role = None 78 | 79 | # If true, '()' will be appended to :func: etc. cross-reference text. 80 | #add_function_parentheses = True 81 | 82 | # If true, the current module name will be prepended to all description 83 | # unit titles (such as .. function::). 84 | #add_module_names = True 85 | 86 | # If true, sectionauthor and moduleauthor directives will be shown in the 87 | # output. They are ignored by default. 88 | #show_authors = False 89 | 90 | # The name of the Pygments (syntax highlighting) style to use. 91 | pygments_style = 'sphinx' 92 | 93 | # A list of ignored prefixes for module index sorting. 94 | #modindex_common_prefix = [] 95 | 96 | # -- Options for HTML output --------------------------------------------------- 97 | 98 | # The theme to use for HTML and HTML Help pages. See the documentation for 99 | # a list of builtin themes. 100 | html_theme = 'default' 101 | 102 | # Theme options are theme-specific and customize the look and feel of a theme 103 | # further. For a list of options available for each theme, see the 104 | # documentation. 105 | #html_theme_options = {} 106 | 107 | # Add any paths that contain custom themes here, relative to this directory. 108 | #html_theme_path = [] 109 | 110 | # The name for this set of Sphinx documents. If None, it defaults to 111 | # " v documentation". 112 | #html_title = None 113 | 114 | # A shorter title for the navigation bar. Default is the same as html_title. 115 | #html_short_title = None 116 | 117 | # The name of an image file (relative to this directory) to place at the top 118 | # of the sidebar. 119 | #html_logo = None 120 | 121 | # The name of an image file (within the static path) to use as favicon of the 122 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 123 | # pixels large. 124 | #html_favicon = None 125 | 126 | # Add any paths that contain custom static files (such as style sheets) here, 127 | # relative to this directory. They are copied after the builtin static files, 128 | # so a file named "default.css" will overwrite the builtin "default.css". 129 | #html_static_path = ['_static'] 130 | 131 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 132 | # using the given strftime format. 133 | #html_last_updated_fmt = '%b %d, %Y' 134 | 135 | # If true, SmartyPants will be used to convert quotes and dashes to 136 | # typographically correct entities. 137 | #html_use_smartypants = True 138 | 139 | # Custom sidebar templates, maps document names to template names. 140 | #html_sidebars = {} 141 | 142 | # Additional templates that should be rendered to pages, maps page names to 143 | # template names. 144 | #html_additional_pages = {} 145 | 146 | # If false, no module index is generated. 147 | #html_domain_indices = True 148 | 149 | # If false, no index is generated. 150 | #html_use_index = True 151 | 152 | # If true, the index is split into individual pages for each letter. 153 | #html_split_index = False 154 | 155 | # If true, links to the reST sources are added to the pages. 156 | #html_show_sourcelink = True 157 | 158 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 159 | #html_show_sphinx = True 160 | 161 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 162 | #html_show_copyright = True 163 | 164 | # If true, an OpenSearch description file will be output, and all pages will 165 | # contain a tag referring to it. The value of this option must be the 166 | # base URL from which the finished HTML is served. 167 | #html_use_opensearch = '' 168 | 169 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 170 | #html_file_suffix = None 171 | 172 | # Output file base name for HTML help builder. 173 | htmlhelp_basename = 'BibtexParserdoc' 174 | 175 | 176 | # -- Options for LaTeX output -------------------------------------------------- 177 | 178 | latex_elements = { 179 | # The paper size ('letterpaper' or 'a4paper'). 180 | #'papersize': 'letterpaper', 181 | 182 | # The font size ('10pt', '11pt' or '12pt'). 183 | #'pointsize': '10pt', 184 | 185 | # Additional stuff for the LaTeX preamble. 186 | #'preamble': '', 187 | } 188 | 189 | # Grouping the document tree into LaTeX files. List of tuples 190 | # (source start file, target name, title, author, documentclass [howto/manual]). 191 | latex_documents = [ 192 | ('index', 'BibtexParser.tex', 'BibtexParser Documentation', 193 | 'F. Boulogne', 'manual'), 194 | ] 195 | 196 | # The name of an image file (relative to this directory) to place at the top of 197 | # the title page. 198 | #latex_logo = None 199 | 200 | # For "manual" documents, if this is true, then toplevel headings are parts, 201 | # not chapters. 202 | #latex_use_parts = False 203 | 204 | # If true, show page references after internal links. 205 | #latex_show_pagerefs = False 206 | 207 | # If true, show URL addresses after external links. 208 | #latex_show_urls = False 209 | 210 | # Documents to append as an appendix to all manuals. 211 | #latex_appendices = [] 212 | 213 | # If false, no module index is generated. 214 | #latex_domain_indices = True 215 | 216 | 217 | # -- Options for manual page output -------------------------------------------- 218 | 219 | # One entry per manual page. List of tuples 220 | # (source start file, name, description, authors, manual section). 221 | man_pages = [ 222 | ('index', 'bibtexparser', 'BibtexParser Documentation', 223 | ['F. Boulogne'], 1) 224 | ] 225 | 226 | # If true, show URL addresses after external links. 227 | #man_show_urls = False 228 | 229 | 230 | # -- Options for Texinfo output ------------------------------------------------ 231 | 232 | # Grouping the document tree into Texinfo files. List of tuples 233 | # (source start file, target name, title, author, 234 | # dir menu entry, description, category) 235 | texinfo_documents = [ 236 | ('index', 'BibtexParser', 'BibtexParser Documentation', 237 | 'F. Boulogne', 'BibtexParser', 'One line description of project.', 238 | 'Miscellaneous'), 239 | ] 240 | 241 | # Documents to append as an appendix to all manuals. 242 | #texinfo_appendices = [] 243 | 244 | # If false, no module index is generated. 245 | #texinfo_domain_indices = True 246 | 247 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 248 | #texinfo_show_urls = 'footnote' 249 | -------------------------------------------------------------------------------- /bibtexparser/bibtexexpression.py: -------------------------------------------------------------------------------- 1 | import pyparsing as pp 2 | 3 | 4 | # General helpers 5 | 6 | def strip_after_new_lines(s): 7 | """Removes leading and trailing whitespaces in all but first line.""" 8 | lines = s.splitlines() 9 | if len(lines) > 1: 10 | lines = [lines[0]] + [l.lstrip() for l in lines[1:]] 11 | return '\n'.join(lines) 12 | 13 | 14 | def add_logger_parse_action(expr, log_func): 15 | """Register a callback on expression parsing with the adequate message.""" 16 | def action(s, l, t): 17 | log_func("Found {}: {}".format(expr.resultsName, t)) 18 | expr.addParseAction(action) 19 | 20 | 21 | # Parse action helpers 22 | # Helpers for returning values from the parsed tokens. Shaped as pyparsing's 23 | # parse actions. In pyparsing wording: 24 | # s, l, t, stand for string, location, token 25 | 26 | def first_token(s, l, t): 27 | # TODO Handle this case correctly! 28 | assert(len(t) == 1) 29 | return t[0] 30 | 31 | 32 | def remove_trailing_newlines(s, l, t): 33 | if t[0]: 34 | return t[0].rstrip('\n') 35 | 36 | 37 | def remove_braces(s, l, t): 38 | if len(t[0]) < 1: 39 | return '' 40 | else: 41 | start = 1 if t[0][0] == '{' else 0 42 | end = -1 if t[0][-1] == '}' else None 43 | return t[0][start:end] 44 | 45 | 46 | def field_to_pair(s, l, t): 47 | """ 48 | Looks for parsed element named 'Field'. 49 | 50 | :returns: (name, value). 51 | """ 52 | f = t.get('Field') 53 | # Not sure it is desirable here to strip but it is for conformance 54 | # to previous implementation 55 | return (f.get('FieldName'), 56 | strip_after_new_lines(f.get('Value'))) 57 | 58 | 59 | # Expressions helpers 60 | 61 | def in_braces_or_pars(exp): 62 | """ 63 | exp -> (exp)|{exp} 64 | """ 65 | return ((pp.Suppress('{') + exp + pp.Suppress('}')) | 66 | (pp.Suppress('(') + exp + pp.Suppress(')'))) 67 | 68 | 69 | class BibtexExpression(object): 70 | """Gives access to pyparsing expressions. 71 | 72 | Attributes are pyparsing expressions for the following elements: 73 | 74 | * main_expression: the bibtex file 75 | * string_def: a string definition 76 | * preamble_decl: a preamble declaration 77 | * explicit_comment: an explicit comment 78 | * entry: an entry definition 79 | * implicit_comment: an implicit comment 80 | 81 | """ 82 | 83 | ParseException = pp.ParseException 84 | 85 | def __init__(self): 86 | 87 | # Bibtex keywords 88 | 89 | string_def_start = pp.CaselessKeyword("@string") 90 | preamble_start = pp.CaselessKeyword("@preamble") 91 | comment_line_start = pp.CaselessKeyword('@comment') 92 | 93 | # String names 94 | string_name = pp.Word(pp.alphanums + '_')('StringName') 95 | self.set_string_name_parse_action(lambda s, l, t: None) 96 | string_name.addParseAction(self._string_name_parse_action) 97 | 98 | # Values inside bibtex fields 99 | # Values can be integer or string expressions. The latter may use 100 | # quoted or braced values. 101 | 102 | # Integer values 103 | integer = pp.Word(pp.nums)('Integer') 104 | 105 | # Braced values: braced values can contain nested (but balanced) braces 106 | braced_value_content = pp.CharsNotIn('{}') 107 | braced_value = pp.Forward() # Recursive definition for nested braces 108 | braced_value <<= pp.originalTextFor( 109 | '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}' 110 | )('BracedValue') 111 | braced_value.setParseAction(remove_braces) 112 | # TODO add ignore for "\}" and "\{" ? 113 | # TODO @ are not parsed by bibtex in braces 114 | 115 | # Quoted values: may contain braced content with balanced braces 116 | brace_in_quoted = pp.nestedExpr('{', '}', ignoreExpr=None) 117 | text_in_quoted = pp.CharsNotIn('"{}') 118 | # (quotes should be escaped by braces in quoted value) 119 | quoted_value = pp.originalTextFor( 120 | '"' + pp.ZeroOrMore(text_in_quoted | brace_in_quoted) + '"' 121 | )('QuotedValue') 122 | quoted_value.addParseAction(pp.removeQuotes) 123 | 124 | # String expressions 125 | string_expr = pp.delimitedList( 126 | (quoted_value | braced_value | string_name), delim='#' 127 | )('StringExpression') 128 | self.set_string_expression_parse_action(lambda s, l, t: None) 129 | string_expr.addParseAction(self._string_expr_parse_action) 130 | 131 | value = (integer | string_expr)('Value') 132 | 133 | # Entries 134 | 135 | # @EntryType { ... 136 | entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType') 137 | entry_type.setParseAction(first_token) 138 | 139 | # Entry key: any character up to a ',' without leading and trailing 140 | # spaces. 141 | key = pp.SkipTo(',')('Key') # Exclude @',\#}{~% 142 | key.setParseAction(lambda s, l, t: first_token(s, l, t).strip()) 143 | 144 | # Field name: word of letters, digits, dashes and underscores 145 | field_name = pp.Word(pp.alphanums + '_-()')('FieldName') 146 | field_name.setParseAction(first_token) 147 | 148 | # Field: field_name = value 149 | field = pp.Group(field_name + pp.Suppress('=') + value)('Field') 150 | field.setParseAction(field_to_pair) 151 | 152 | # List of fields: comma separeted fields 153 | field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(',')) 154 | )('Fields') 155 | field_list.setParseAction( 156 | lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))}) 157 | 158 | # Entry: type, key, and fields 159 | self.entry = (entry_type + 160 | in_braces_or_pars(key + pp.Suppress(',') + field_list) 161 | )('Entry') 162 | 163 | # Other stuff: comments, string definitions, and preamble declarations 164 | 165 | # Explicit comments: @comment + everything up to next valid declaration 166 | # starting on new line. 167 | not_an_implicit_comment = (pp.LineStart() + pp.Literal('@') 168 | ) | pp.stringEnd() 169 | self.explicit_comment = ( 170 | pp.Suppress(comment_line_start) + 171 | pp.originalTextFor(pp.SkipTo(not_an_implicit_comment), 172 | asString=True))('ExplicitComment') 173 | self.explicit_comment.addParseAction(remove_trailing_newlines) 174 | self.explicit_comment.addParseAction(remove_braces) 175 | # Previous implementation included comment until next '}'. 176 | # This is however not inline with bibtex behavior that is to only 177 | # ignore until EOL. Brace stipping is arbitrary here but avoids 178 | # duplication on bibtex write. 179 | 180 | # Empty implicit_comments lead to infinite loop of zeroOrMore 181 | def mustNotBeEmpty(t): 182 | if not t[0]: 183 | raise pp.ParseException("Match must not be empty.") 184 | 185 | # Implicit comments: not anything else 186 | self.implicit_comment = pp.originalTextFor( 187 | pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty), 188 | asString=True)('ImplicitComment') 189 | self.implicit_comment.addParseAction(remove_trailing_newlines) 190 | 191 | # String definition 192 | self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars( 193 | string_name + 194 | pp.Suppress('=') + 195 | string_expr('StringValue') 196 | ))('StringDefinition') 197 | 198 | # Preamble declaration 199 | self.preamble_decl = (pp.Suppress(preamble_start) + 200 | in_braces_or_pars(value))('PreambleDeclaration') 201 | 202 | # Main bibtex expression 203 | 204 | self.main_expression = pp.ZeroOrMore( 205 | self.string_def | 206 | self.preamble_decl | 207 | self.explicit_comment | 208 | self.entry | 209 | self.implicit_comment) 210 | 211 | def add_log_function(self, log_fun): 212 | """Add notice to logger on entry, comment, preamble, string definitions. 213 | 214 | :param log_fun: logger function 215 | """ 216 | for e in [self.entry, 217 | self.implicit_comment, 218 | self.explicit_comment, 219 | self.preamble_decl, 220 | self.string_def]: 221 | add_logger_parse_action(e, log_fun) 222 | 223 | def set_string_name_parse_action(self, fun): 224 | """Set the parseAction for string name expression. 225 | 226 | .. Note:: 227 | 228 | For some reason pyparsing duplicates the string_name 229 | expression so setting its parseAction a posteriori has no effect 230 | in the context of a string expression. This is why this function 231 | should be used instead. 232 | """ 233 | self._string_name_parse_action_fun = fun 234 | 235 | def _string_name_parse_action(self, s, l, t): 236 | return self._string_name_parse_action_fun(s, l, t) 237 | 238 | def set_string_expression_parse_action(self, fun): 239 | """Set the parseAction for string_expression expression. 240 | 241 | .. Note:: 242 | 243 | See set_string_name_parse_action. 244 | """ 245 | self._string_expr_parse_action_fun = fun 246 | 247 | def _string_expr_parse_action(self, s, l, t): 248 | return self._string_expr_parse_action_fun(s, l, t) 249 | 250 | def parseFile(self, file_obj): 251 | return self.main_expression.parseFile(file_obj, parseAll=True) 252 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | The code is distributed under a dual license (at your choice). 2 | 3 | ##################################################################### 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | (1) Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | (2) Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in 13 | the documentation and/or other materials provided with the 14 | distribution. 15 | 16 | (3)The name of the author may not be used to 17 | endorse or promote products derived from this software without 18 | specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 24 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 29 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | 32 | ##################################################################### 33 | GNU LESSER GENERAL PUBLIC LICENSE 34 | Version 3, 29 June 2007 35 | 36 | Copyright (C) 2007 Free Software Foundation, Inc. 37 | Everyone is permitted to copy and distribute verbatim copies 38 | of this license document, but changing it is not allowed. 39 | 40 | 41 | This version of the GNU Lesser General Public License incorporates 42 | the terms and conditions of version 3 of the GNU General Public 43 | License, supplemented by the additional permissions listed below. 44 | 45 | 0. Additional Definitions. 46 | 47 | As used herein, "this License" refers to version 3 of the GNU Lesser 48 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 49 | General Public License. 50 | 51 | "The Library" refers to a covered work governed by this License, 52 | other than an Application or a Combined Work as defined below. 53 | 54 | An "Application" is any work that makes use of an interface provided 55 | by the Library, but which is not otherwise based on the Library. 56 | Defining a subclass of a class defined by the Library is deemed a mode 57 | of using an interface provided by the Library. 58 | 59 | A "Combined Work" is a work produced by combining or linking an 60 | Application with the Library. The particular version of the Library 61 | with which the Combined Work was made is also called the "Linked 62 | Version". 63 | 64 | The "Minimal Corresponding Source" for a Combined Work means the 65 | Corresponding Source for the Combined Work, excluding any source code 66 | for portions of the Combined Work that, considered in isolation, are 67 | based on the Application, and not on the Linked Version. 68 | 69 | The "Corresponding Application Code" for a Combined Work means the 70 | object code and/or source code for the Application, including any data 71 | and utility programs needed for reproducing the Combined Work from the 72 | Application, but excluding the System Libraries of the Combined Work. 73 | 74 | 1. Exception to Section 3 of the GNU GPL. 75 | 76 | You may convey a covered work under sections 3 and 4 of this License 77 | without being bound by section 3 of the GNU GPL. 78 | 79 | 2. Conveying Modified Versions. 80 | 81 | If you modify a copy of the Library, and, in your modifications, a 82 | facility refers to a function or data to be supplied by an Application 83 | that uses the facility (other than as an argument passed when the 84 | facility is invoked), then you may convey a copy of the modified 85 | version: 86 | 87 | a) under this License, provided that you make a good faith effort to 88 | ensure that, in the event an Application does not supply the 89 | function or data, the facility still operates, and performs 90 | whatever part of its purpose remains meaningful, or 91 | 92 | b) under the GNU GPL, with none of the additional permissions of 93 | this License applicable to that copy. 94 | 95 | 3. Object Code Incorporating Material from Library Header Files. 96 | 97 | The object code form of an Application may incorporate material from 98 | a header file that is part of the Library. You may convey such object 99 | code under terms of your choice, provided that, if the incorporated 100 | material is not limited to numerical parameters, data structure 101 | layouts and accessors, or small macros, inline functions and templates 102 | (ten or fewer lines in length), you do both of the following: 103 | 104 | a) Give prominent notice with each copy of the object code that the 105 | Library is used in it and that the Library and its use are 106 | covered by this License. 107 | 108 | b) Accompany the object code with a copy of the GNU GPL and this license 109 | document. 110 | 111 | 4. Combined Works. 112 | 113 | You may convey a Combined Work under terms of your choice that, 114 | taken together, effectively do not restrict modification of the 115 | portions of the Library contained in the Combined Work and reverse 116 | engineering for debugging such modifications, if you also do each of 117 | the following: 118 | 119 | a) Give prominent notice with each copy of the Combined Work that 120 | the Library is used in it and that the Library and its use are 121 | covered by this License. 122 | 123 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 124 | document. 125 | 126 | c) For a Combined Work that displays copyright notices during 127 | execution, include the copyright notice for the Library among 128 | these notices, as well as a reference directing the user to the 129 | copies of the GNU GPL and this license document. 130 | 131 | d) Do one of the following: 132 | 133 | 0) Convey the Minimal Corresponding Source under the terms of this 134 | License, and the Corresponding Application Code in a form 135 | suitable for, and under terms that permit, the user to 136 | recombine or relink the Application with a modified version of 137 | the Linked Version to produce a modified Combined Work, in the 138 | manner specified by section 6 of the GNU GPL for conveying 139 | Corresponding Source. 140 | 141 | 1) Use a suitable shared library mechanism for linking with the 142 | Library. A suitable mechanism is one that (a) uses at run time 143 | a copy of the Library already present on the user's computer 144 | system, and (b) will operate properly with a modified version 145 | of the Library that is interface-compatible with the Linked 146 | Version. 147 | 148 | e) Provide Installation Information, but only if you would otherwise 149 | be required to provide such information under section 6 of the 150 | GNU GPL, and only to the extent that such information is 151 | necessary to install and execute a modified version of the 152 | Combined Work produced by recombining or relinking the 153 | Application with a modified version of the Linked Version. (If 154 | you use option 4d0, the Installation Information must accompany 155 | the Minimal Corresponding Source and Corresponding Application 156 | Code. If you use option 4d1, you must provide the Installation 157 | Information in the manner specified by section 6 of the GNU GPL 158 | for conveying Corresponding Source.) 159 | 160 | 5. Combined Libraries. 161 | 162 | You may place library facilities that are a work based on the 163 | Library side by side in a single library together with other library 164 | facilities that are not Applications and are not covered by this 165 | License, and convey such a combined library under terms of your 166 | choice, if you do both of the following: 167 | 168 | a) Accompany the combined library with a copy of the same work based 169 | on the Library, uncombined with any other library facilities, 170 | conveyed under the terms of this License. 171 | 172 | b) Give prominent notice with the combined library that part of it 173 | is a work based on the Library, and explaining where to find the 174 | accompanying uncombined form of the same work. 175 | 176 | 6. Revised Versions of the GNU Lesser General Public License. 177 | 178 | The Free Software Foundation may publish revised and/or new versions 179 | of the GNU Lesser General Public License from time to time. Such new 180 | versions will be similar in spirit to the present version, but may 181 | differ in detail to address new problems or concerns. 182 | 183 | Each version is given a distinguishing version number. If the 184 | Library as you received it specifies that a certain numbered version 185 | of the GNU Lesser General Public License "or any later version" 186 | applies to it, you have the option of following the terms and 187 | conditions either of that published version or of any later version 188 | published by the Free Software Foundation. If the Library as you 189 | received it does not specify a version number of the GNU Lesser 190 | General Public License, you may choose any version of the GNU Lesser 191 | General Public License ever published by the Free Software Foundation. 192 | 193 | If the Library as you received it specifies that a proxy can decide 194 | whether future versions of the GNU Lesser General Public License shall 195 | apply, that proxy's public statement of acceptance of any version is 196 | permanent authorization for you to choose that version for the 197 | Library. 198 | -------------------------------------------------------------------------------- /docs/source/tutorial.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Tutorial 3 | ======== 4 | 5 | Step 0: Vocabulary 6 | ================== 7 | 8 | * An **entry** designates for example `@book{...}`, `@article{...}`, etc. 9 | * A **comment** is written as `@comment{...}`. 10 | * A **preamble** is a `@preamble{...}` block. 11 | * A **string** is `@string{...}`. 12 | 13 | In an entry, you can find 14 | 15 | * an **entry type** like `article`, `book`, etc. 16 | * **entry keys** or **keys** such as `author`, `title`, `year`... 17 | * and also **records**, which designates the values of those keys. 18 | 19 | 20 | Step 1: Prepare a BibTeX file 21 | ============================= 22 | 23 | First, we prepare a BibTeX sample file. This is just for the purpose of illustration: 24 | 25 | .. code-block:: python 26 | 27 | bibtex = """@ARTICLE{Cesar2013, 28 | author = {Jean César}, 29 | title = {An amazing title}, 30 | year = {2013}, 31 | month = jan, 32 | volume = {12}, 33 | pages = {12--23}, 34 | journal = {Nice Journal}, 35 | abstract = {This is an abstract. This line should be long enough to test 36 | multilines...}, 37 | comments = {A comment}, 38 | keywords = {keyword1, keyword2} 39 | } 40 | """ 41 | 42 | with open('bibtex.bib', 'w') as bibfile: 43 | bibfile.write(bibtex) 44 | 45 | Step 2: Parse it! 46 | ================= 47 | 48 | Simplest call 49 | ------------- 50 | 51 | OK. Everything is in place. Let's parse the BibTeX file. 52 | 53 | .. code-block:: python 54 | 55 | import bibtexparser 56 | 57 | with open('bibtex.bib') as bibtex_file: 58 | bib_database = bibtexparser.load(bibtex_file) 59 | 60 | print(bib_database.entries) 61 | 62 | 63 | It prints a list of dictionaries for reference entries, for example books, articles: 64 | 65 | .. code-block:: python 66 | 67 | [{'journal': 'Nice Journal', 68 | 'comments': 'A comment', 69 | 'pages': '12--23', 70 | 'month': 'jan', 71 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...', 72 | 'title': 'An amazing title', 73 | 'year': '2013', 74 | 'volume': '12', 75 | 'ID': 'Cesar2013', 76 | 'author': 'Jean César', 77 | 'keyword': 'keyword1, keyword2', 78 | 'ENTRYTYPE': 'article'}] 79 | 80 | Note that, by convention, uppercase keys (ID, ENTRYTYPE) are data generated by the parser, while lowercase keys come from the original bibtex file. 81 | 82 | You can also print comments, preambles and string: 83 | 84 | .. code-block:: python 85 | 86 | print(bib_database.comments) 87 | print(bib_database.preambles) 88 | print(bib_database.strings) 89 | 90 | 91 | Parse a string 92 | -------------- 93 | 94 | If for some reason, you prefer to parse a string, that's also possible: 95 | 96 | .. code-block:: python 97 | 98 | import bibtexparser 99 | 100 | with open('bibtex.bib') as bibtex_file: 101 | bibtex_str = bibtex_file.read() 102 | 103 | bib_database = bibtexparser.loads(bibtex_str) 104 | 105 | 106 | Tune parser's options 107 | --------------------- 108 | 109 | In the previous snippet, several default options are used. 110 | You can tweak them as you wish. 111 | 112 | .. code-block:: python 113 | 114 | import bibtexparser 115 | from bibtexparser.bparser import BibTexParser 116 | 117 | parser = BibTexParser() 118 | parser.ignore_nonstandard_types = False 119 | parser.homogenise_fields = False 120 | parser.common_strings = False 121 | 122 | bib_database = bibtexparser.loads(bibtex_str, parser) 123 | 124 | 125 | Step 3: Export 126 | ============== 127 | 128 | Once you worked on your parsed database, you may want to export the result. This library provides some functions to help on that. However, you can write your own functions if you have specific requirements. 129 | 130 | Create a BibTeX file or string 131 | -------------------------------- 132 | 133 | The bibliographic data can be converted back into a string : 134 | 135 | .. code-block:: python 136 | 137 | import bibtexparser 138 | 139 | bibtex_str = bibtexparser.dumps(bib_database) 140 | 141 | or a BibTeX file like this: 142 | 143 | .. code-block:: python 144 | 145 | import bibtexparser 146 | 147 | with open('bibtex.bib', 'w') as bibtex_file: 148 | bibtexparser.dump(bibtex_database, bibtex_file) 149 | 150 | 151 | Call the writer 152 | --------------- 153 | 154 | In the first section we prepared a BibTeX sample file, we can prepare the same file using pure python and the ``BibTexWriter`` class. 155 | 156 | .. code-block:: python 157 | 158 | from bibtexparser.bwriter import BibTexWriter 159 | from bibtexparser.bibdatabase import BibDatabase 160 | 161 | db = BibDatabase() 162 | db.entries = [ 163 | {'journal': 'Nice Journal', 164 | 'comments': 'A comment', 165 | 'pages': '12--23', 166 | 'month': 'jan', 167 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...', 168 | 'title': 'An amazing title', 169 | 'year': '2013', 170 | 'volume': '12', 171 | 'ID': 'Cesar2013', 172 | 'author': 'Jean César', 173 | 'keyword': 'keyword1, keyword2', 174 | 'ENTRYTYPE': 'article'}] 175 | 176 | writer = BibTexWriter() 177 | with open('bibtex.bib', 'w') as bibfile: 178 | bibfile.write(writer.write(db)) 179 | 180 | This code generates the following file: 181 | 182 | .. code-block:: latex 183 | 184 | @article{Cesar2013, 185 | abstract = {This is an abstract. This line should be long enough to test 186 | multilines...}, 187 | author = {Jean César}, 188 | comments = {A comment}, 189 | journal = {Nice Journal}, 190 | keyword = {keyword1, keyword2}, 191 | month = {jan}, 192 | pages = {12--23}, 193 | title = {An amazing title}, 194 | volume = {12}, 195 | year = {2013} 196 | } 197 | 198 | The writer also has several flags that can be enabled to customize the output file. 199 | For example we can use ``indent`` and ``comma_first`` to customize the previous entry, first the code: 200 | 201 | .. code-block:: python 202 | 203 | from bibtexparser.bwriter import BibTexWriter 204 | from bibtexparser.bibdatabase import BibDatabase 205 | 206 | db = BibDatabase() 207 | db.entries = [ 208 | {'journal': 'Nice Journal', 209 | 'comments': 'A comment', 210 | 'pages': '12--23', 211 | 'month': 'jan', 212 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines...', 213 | 'title': 'An amazing title', 214 | 'year': '2013', 215 | 'volume': '12', 216 | 'ID': 'Cesar2013', 217 | 'author': 'Jean César', 218 | 'keyword': 'keyword1, keyword2', 219 | 'ENTRYTYPE': 'article'}] 220 | 221 | writer = BibTexWriter() 222 | writer.indent = ' ' # indent entries with 4 spaces instead of one 223 | writer.comma_first = True # place the comma at the beginning of the line 224 | with open('bibtex.bib', 'w') as bibfile: 225 | bibfile.write(writer.write(db)) 226 | 227 | This code results in the following, customized, file: 228 | 229 | .. code-block:: latex 230 | 231 | @article{Cesar2013 232 | , abstract = {This is an abstract. This line should be long enough to test 233 | multilines...} 234 | , author = {Jean César} 235 | , comments = {A comment} 236 | , journal = {Nice Journal} 237 | , keyword = {keyword1, keyword2} 238 | , month = {jan} 239 | , pages = {12--23} 240 | , title = {An amazing title} 241 | , volume = {12} 242 | , year = {2013} 243 | } 244 | 245 | 246 | Flags to the writer object can modify not only how an entry is printed but how several BibTeX entries are sorted and separated. 247 | See the :ref:`bibtexparser_api` for the full list of flags. 248 | 249 | 250 | Step 4: Add salt and pepper 251 | =========================== 252 | 253 | In this section, we discuss about some customizations and details. 254 | 255 | Customizations 256 | -------------- 257 | 258 | By default, the parser does not alter the content of each field and keeps it as a simple string. There are many cases 259 | where this is not desired. For example, instead of a string with a multiple of authors, it could be parsed as a list. 260 | 261 | To modify field values during parsing, a callback function can be supplied to the parser which can be used to modify 262 | BibTeX entries. The library includes several functions which may be used. Alternatively, you can read them to create 263 | your own functions. 264 | 265 | .. code-block:: python 266 | 267 | import bibtexparser 268 | from bibtexparser.bparser import BibTexParser 269 | from bibtexparser.customization import * 270 | 271 | # Let's define a function to customize our entries. 272 | # It takes a record and return this record. 273 | def customizations(record): 274 | """Use some functions delivered by the library 275 | 276 | :param record: a record 277 | :returns: -- customized record 278 | """ 279 | record = type(record) 280 | record = author(record) 281 | record = editor(record) 282 | record = journal(record) 283 | record = keyword(record) 284 | record = link(record) 285 | record = page_double_hyphen(record) 286 | record = doi(record) 287 | return record 288 | 289 | with open('bibtex.bib') as bibtex_file: 290 | parser = BibTexParser() 291 | parser.customization = customizations 292 | bib_database = bibtexparser.load(bibtex_file, parser=parser) 293 | print(bib_database.entries) 294 | 295 | 296 | If you think that you have a customization which could be useful to others, please share with us! 297 | 298 | 299 | Accents and weird characters 300 | ---------------------------- 301 | 302 | Your bibtex may contain accents and specific characters. 303 | They are sometimes coded like this ``\'{e}`` but this is not the correct way, ``{\'e}`` is preferred. Moreover, you may want to manipulate ``é``. There is different situations: 304 | 305 | * Case 1: you plan to use this library to work with latex and you assume that the original bibtex is clean. You have nothing to do. 306 | 307 | * Case 2: you plan to use this library to work with latex but your bibtex is not really clean. 308 | 309 | .. code-block:: python 310 | 311 | import bibtexparser 312 | from bibtexparser.bparser import BibTexParser 313 | from bibtexparser.customization import homogenize_latex_encoding 314 | 315 | with open('bibtex.bib') as bibtex_file: 316 | parser = BibTexParser() 317 | parser.customization = homogenize_latex_encoding 318 | bib_database = bibtexparser.load(bibtex_file, parser=parser) 319 | print(bib_database.entries) 320 | 321 | 322 | * Case 3: you plan to use this library to work with something different and your bibtex is not really clean. 323 | Then, you probably want to use unicode. 324 | 325 | .. code-block:: python 326 | 327 | import bibtexparser 328 | from bibtexparser.bparser import BibTexParser 329 | from bibtexparser.customization import convert_to_unicode 330 | 331 | with open('bibtex.bib') as bibtex_file: 332 | parser = BibTexParser() 333 | parser.customization = convert_to_unicode 334 | bib_database = bibtexparser.load(bibtex_file, parser=parser) 335 | print(bib_database.entries) 336 | 337 | 338 | .. Note:: 339 | 340 | If you want to mix different customization functions, you can write your own function. 341 | -------------------------------------------------------------------------------- /bibtexparser/bparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Original source: github.com/okfn/bibserver 5 | # Authors: 6 | # markmacgillivray 7 | # Etienne Posthumus (epoz) 8 | # Francois Boulogne 9 | 10 | import sys 11 | import io 12 | import logging 13 | 14 | from bibtexparser.bibdatabase import BibDatabase, BibDataString, STANDARD_TYPES 15 | from bibtexparser.bibtexexpression import BibtexExpression 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | __all__ = ['BibTexParser'] 20 | 21 | 22 | if sys.version_info >= (3, 0): 23 | ustr = str 24 | else: 25 | ustr = unicode 26 | 27 | 28 | def parse(data, *args, **kwargs): 29 | parser = BibTexParser(*args, **kwargs) 30 | return parser.parse(data) 31 | 32 | 33 | class BibTexParser(object): 34 | """ 35 | A parser for reading BibTeX bibliographic data files. 36 | 37 | Example:: 38 | 39 | from bibtexparser.bparser import BibTexParser 40 | 41 | bibtex_str = ... 42 | 43 | parser = BibTexParser() 44 | parser.ignore_nonstandard_types = False 45 | parser.homogenize_fields = False 46 | parser.common_strings = False 47 | bib_database = bibtexparser.loads(bibtex_str, parser) 48 | 49 | """ 50 | 51 | def __new__(cls, data=None, **args): 52 | """ 53 | To catch the old API structure in which creating the parser would 54 | immediately parse and return data. 55 | """ 56 | 57 | if data is None: 58 | return super(BibTexParser, cls).__new__(cls) 59 | else: 60 | # For backwards compatibility: if data is given, parse 61 | # and return the `BibDatabase` object instead of the parser. 62 | return parse(data, **args) 63 | 64 | def __init__(self, data=None, 65 | customization=None, 66 | ignore_nonstandard_types=True, 67 | homogenize_fields=False, 68 | common_strings=False): 69 | """ 70 | Creates a parser for rading BibTeX files 71 | 72 | :return: parser 73 | :rtype: `BibTexParser` 74 | """ 75 | self.bib_database = BibDatabase() 76 | 77 | #: Load common strings such as months abbreviation 78 | #: Default: `False`. 79 | self.common_strings = common_strings 80 | if self.common_strings: 81 | self.bib_database.load_common_strings() 82 | 83 | #: Callback function to process BibTeX entries after parsing, 84 | #: for example to create a list from a string with multiple values. 85 | #: By default all BibTeX values are treated as simple strings. 86 | #: Default: `None`. 87 | self.customization = customization 88 | 89 | #: Ignore non-standard BibTeX types (`book`, `article`, etc). 90 | #: Default: `True`. 91 | self.ignore_nonstandard_types = ignore_nonstandard_types 92 | 93 | #: Sanitize BibTeX field names, for example change `url` to `link` etc. 94 | #: Field names are always converted to lowercase names. 95 | #: Default: `False`. 96 | self.homogenize_fields = homogenize_fields 97 | 98 | # On some sample data files, the character encoding detection simply 99 | # hangs We are going to default to utf8, and mandate it. 100 | self.encoding = 'utf8' 101 | 102 | # pre-defined set of key changes 103 | self.alt_dict = { 104 | 'keyw': u'keyword', 105 | 'keywords': u'keyword', 106 | 'authors': u'author', 107 | 'editors': u'editor', 108 | 'url': u'link', 109 | 'urls': u'link', 110 | 'links': u'link', 111 | 'subjects': u'subject' 112 | } 113 | 114 | # Setup the parser expression 115 | self._init_expressions() 116 | 117 | def parse(self, bibtex_str, partial=False): 118 | """Parse a BibTeX string into an object 119 | 120 | :param bibtex_str: BibTeX string 121 | :type: str or unicode 122 | :param partial: If True, print errors only on parsing failures. 123 | If False, an exception is raised. 124 | :type: boolean 125 | :return: bibliographic database 126 | :rtype: BibDatabase 127 | """ 128 | bibtex_file_obj = self._bibtex_file_obj(bibtex_str) 129 | try: 130 | self._expr.parseFile(bibtex_file_obj) 131 | except self._expr.ParseException as exc: 132 | logger.error("Could not parse properly, starting at %s", exc.line) 133 | if not partial: 134 | raise exc 135 | return self.bib_database 136 | 137 | def parse_file(self, file, partial=False): 138 | """Parse a BibTeX file into an object 139 | 140 | :param file: BibTeX file or file-like object 141 | :type: file 142 | :param partial: If True, print errors only on parsing failures. 143 | If False, an exception is raised. 144 | :type: boolean 145 | :return: bibliographic database 146 | :rtype: BibDatabase 147 | """ 148 | return self.parse(file.read(), partial=partial) 149 | 150 | def _init_expressions(self): 151 | """ 152 | Defines all parser expressions used internally. 153 | """ 154 | self._expr = BibtexExpression() 155 | 156 | # Handle string as BibDataString object 157 | self._expr.set_string_name_parse_action( 158 | lambda s, l, t: 159 | BibDataString(self.bib_database, t[0])) 160 | self._expr.set_string_expression_parse_action( 161 | lambda s, l, t: 162 | self._interpolate_string_expression(t)) 163 | 164 | # Add notice to logger 165 | self._expr.add_log_function(logger.debug) 166 | 167 | # Set actions 168 | self._expr.entry.addParseAction( 169 | lambda s, l, t: self._add_entry( 170 | t.get('EntryType'), t.get('Key'), t.get('Fields')) 171 | ) 172 | self._expr.implicit_comment.addParseAction( 173 | lambda s, l, t: self._add_comment(t[0]) 174 | ) 175 | self._expr.explicit_comment.addParseAction( 176 | lambda s, l, t: self._add_comment(t[0]) 177 | ) 178 | self._expr.preamble_decl.addParseAction( 179 | lambda s, l, t: self._add_preamble(t[0]) 180 | ) 181 | self._expr.string_def.addParseAction( 182 | lambda s, l, t: self._add_string(t['StringName'].name, 183 | t['StringValue']) 184 | ) 185 | 186 | def _bibtex_file_obj(self, bibtex_str): 187 | # Some files have Byte-order marks inserted at the start 188 | byte = '\xef\xbb\xbf' 189 | if not isinstance(byte, ustr): 190 | byte = ustr(byte, self.encoding, 'ignore') 191 | if bibtex_str[:3] == byte: 192 | bibtex_str = bibtex_str[3:] 193 | if not isinstance(bibtex_str, ustr): 194 | bibtex_str = bibtex_str.decode(encoding=self.encoding) 195 | return io.StringIO(bibtex_str) 196 | 197 | def _clean_val(self, val): 198 | """ Clean instring before adding to dictionary 199 | 200 | :param val: a value 201 | :type val: string 202 | :returns: string -- value 203 | """ 204 | if not val or val == "{}": 205 | return '' 206 | return val 207 | 208 | def _clean_key(self, key): 209 | """ Lowercase a key and return as unicode. 210 | 211 | :param key: a key 212 | :type key: string 213 | :returns: (unicode) string -- value 214 | """ 215 | key = key.lower() 216 | if not isinstance(key, ustr): 217 | return ustr(key, 'utf-8') 218 | else: 219 | return key 220 | 221 | def _clean_field_key(self, key): 222 | """ Clean a bibtex field key and homogenize alternative forms. 223 | 224 | :param key: a key 225 | :type key: string 226 | :returns: string -- value 227 | """ 228 | key = self._clean_key(key) 229 | if self.homogenize_fields: 230 | if key in list(self.alt_dict.keys()): 231 | key = self.alt_dict[key] 232 | return key 233 | 234 | def _add_entry(self, entry_type, entry_id, fields): 235 | """ Adds a parsed entry. 236 | Includes checking type and fields, cleaning, applying customizations. 237 | 238 | :param entry_type: the entry type 239 | :type entry_type: string 240 | :param entry_id: the entry bibid 241 | :type entry_id: string 242 | :param fields: the fields and values 243 | :type fields: dictionary 244 | :returns: string -- value 245 | """ 246 | d = {} 247 | entry_type = self._clean_key(entry_type) 248 | if self.ignore_nonstandard_types and entry_type not in STANDARD_TYPES: 249 | logger.warning('Entry type %s not standard. Not considered.', 250 | entry_type) 251 | return 252 | for key in fields: 253 | d[self._clean_field_key(key)] = self._clean_val(fields[key]) 254 | d['ENTRYTYPE'] = entry_type 255 | d['ID'] = entry_id 256 | if self.customization is not None: 257 | # apply any customizations to the record object then return it 258 | logger.debug('Apply customizations and return dict') 259 | d = self.customization(d) 260 | self.bib_database.entries.append(d) 261 | 262 | def _add_comment(self, comment): 263 | """ 264 | Stores a comment in the list of comment. 265 | 266 | :param comment: the parsed comment 267 | :type comment: string 268 | """ 269 | logger.debug('Store comment in list of comments: ' + 270 | comment.__repr__()) 271 | self.bib_database.comments.append(comment) 272 | 273 | def _add_string(self, string_key, string): 274 | """ 275 | Stores a new string in the string dictionary. 276 | 277 | :param string_key: the string key 278 | :type string_key: string 279 | :param string: the string value 280 | :type string: string 281 | """ 282 | if string_key in self.bib_database.strings: 283 | logger.warning('Overwritting existing string for key: %s.', 284 | string_key) 285 | logger.debug('Store string: {} -> {}'.format(string_key, string)) 286 | self.bib_database.strings[string_key] = self._clean_val(string) 287 | 288 | def _interpolate_string_expression(self, string_expr): 289 | """ 290 | Replaces bibdatastrings by their values in an expression. 291 | 292 | :param string_expr: the parsed string as a list 293 | :type string_expr: list 294 | """ 295 | return ''.join([self._expand_string(s) for s in string_expr]) 296 | 297 | def _expand_string(self, string_or_bibdatastring): 298 | """ 299 | Eventually replaces a bibdatastring by its value. 300 | 301 | :param string_or_bibdatastring: the parsed token 302 | :type string_expr: string or BibDataString 303 | :returns: string 304 | """ 305 | if isinstance(string_or_bibdatastring, BibDataString): 306 | return string_or_bibdatastring.get_value() 307 | else: 308 | return string_or_bibdatastring 309 | 310 | def _add_preamble(self, preamble): 311 | """ 312 | Stores a preamble. 313 | 314 | :param preamble: the parsed preamble 315 | :type preamble: string 316 | """ 317 | logger.debug('Store preamble in list of preambles') 318 | self.bib_database.preambles.append(preamble) 319 | -------------------------------------------------------------------------------- /bibtexparser/customization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | A set of functions useful for customizing bibtex fields. 6 | You can find inspiration from these functions to design yours. 7 | Each of them takes a record and return the modified record. 8 | """ 9 | 10 | import itertools 11 | import re 12 | import logging 13 | 14 | from bibtexparser.latexenc import unicode_to_latex, unicode_to_crappy_latex1, unicode_to_crappy_latex2, string_to_latex, protect_uppercase 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | __all__ = ['splitname', 'getnames', 'author', 'editor', 'journal', 'keyword', 19 | 'link', 'page_double_hyphen', 'doi', 'type', 'convert_to_unicode', 20 | 'homogenize_latex_encoding'] 21 | 22 | 23 | class InvalidName(ValueError): 24 | """Exception raised by :py:func:`customization.splitname` when an invalid name is input. 25 | 26 | """ 27 | pass 28 | 29 | 30 | def splitname(name, strict_mode=True): 31 | """ 32 | Break a name into its constituent parts: First, von, Last, and Jr. 33 | 34 | :param string name: a string containing a single name 35 | :param Boolean strict_mode: whether to use strict mode 36 | :returns: dictionary of constituent parts 37 | :raises `customization.InvalidName`: If an invalid name is given and 38 | ``strict_mode = True``. 39 | 40 | In BibTeX, a name can be represented in any of three forms: 41 | * First von Last 42 | * von Last, First 43 | * von Last, Jr, First 44 | 45 | This function attempts to split a given name into its four parts. The 46 | returned dictionary has keys of ``first``, ``last``, ``von`` and ``jr``. 47 | Each value is a list of the words making up that part; this may be an empty 48 | list. If the input has no non-whitespace characters, a blank dictionary is 49 | returned. 50 | 51 | It is capable of detecting some errors with the input name. If the 52 | ``strict_mode`` parameter is ``True``, which is the default, this results in 53 | a :class:`customization.InvalidName` exception being raised. If it is 54 | ``False``, the function continues, working around the error as best it can. 55 | The errors that can be detected are listed below along with the handling 56 | for non-strict mode: 57 | 58 | * Name finishes with a trailing comma: delete the comma 59 | * Too many parts (e.g., von Last, Jr, First, Error): merge extra parts 60 | into First 61 | * Unterminated opening brace: add closing brace to end of input 62 | * Unmatched closing brace: add opening brace at start of word 63 | 64 | """ 65 | # Useful references: 66 | # http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html#names 67 | # http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf 68 | 69 | # Whitespace characters that can separate words. 70 | whitespace = set(' ~\r\n\t') 71 | 72 | # We'll iterate over the input once, dividing it into a list of words for 73 | # each comma-separated section. We'll also calculate the case of each word 74 | # as we work. 75 | sections = [[]] # Sections of the name. 76 | cases = [[]] # 1 = uppercase, 0 = lowercase, -1 = caseless. 77 | word = [] # Current word. 78 | case = -1 # Case of the current word. 79 | level = 0 # Current brace level. 80 | bracestart = False # Will the next character be the first within a brace? 81 | controlseq = True # Are we currently processing a control sequence? 82 | specialchar = None # Are we currently processing a special character? 83 | 84 | # Using an iterator allows us to deal with escapes in a simple manner. 85 | nameiter = iter(name) 86 | for char in nameiter: 87 | # An escape. 88 | if char == '\\': 89 | escaped = next(nameiter) 90 | 91 | # BibTeX doesn't allow whitespace escaping. Copy the slash and fall 92 | # through to the normal case to handle the whitespace. 93 | if escaped in whitespace: 94 | word.append(char) 95 | char = escaped 96 | 97 | else: 98 | # Is this the first character in a brace? 99 | if bracestart: 100 | bracestart = False 101 | controlseq = escaped.isalpha() 102 | specialchar = True 103 | 104 | # Can we use it to determine the case? 105 | elif (case == -1) and escaped.isalpha(): 106 | if escaped.isupper(): 107 | case = 1 108 | else: 109 | case = 0 110 | 111 | # Copy the escape to the current word and go to the next 112 | # character in the input. 113 | word.append(char) 114 | word.append(escaped) 115 | continue 116 | 117 | # Start of a braced expression. 118 | if char == '{': 119 | level += 1 120 | word.append(char) 121 | bracestart = True 122 | controlseq = False 123 | specialchar = False 124 | continue 125 | 126 | # All the below cases imply this (and don't test its previous value). 127 | bracestart = False 128 | 129 | # End of a braced expression. 130 | if char == '}': 131 | # Check and reduce the level. 132 | if level: 133 | level -= 1 134 | else: 135 | if strict_mode: 136 | raise InvalidName("Unmatched closing brace in name {{{0}}}.".format(name)) 137 | word.insert(0, '{') 138 | 139 | # Update the state, append the character, and move on. 140 | controlseq = False 141 | specialchar = False 142 | word.append(char) 143 | continue 144 | 145 | # Inside a braced expression. 146 | if level: 147 | # Is this the end of a control sequence? 148 | if controlseq: 149 | if not char.isalpha(): 150 | controlseq = False 151 | 152 | # If it's a special character, can we use it for a case? 153 | elif specialchar: 154 | if (case == -1) and char.isalpha(): 155 | if char.isupper(): 156 | case = 1 157 | else: 158 | case = 0 159 | 160 | # Append the character and move on. 161 | word.append(char) 162 | continue 163 | 164 | # End of a word. 165 | # NB. we know we're not in a brace here due to the previous case. 166 | if char == ',' or char in whitespace: 167 | # Don't add empty words due to repeated whitespace. 168 | if word: 169 | sections[-1].append(''.join(word)) 170 | word = [] 171 | cases[-1].append(case) 172 | case = -1 173 | controlseq = False 174 | specialchar = False 175 | 176 | # End of a section. 177 | if char == ',': 178 | if len(sections) < 3: 179 | sections.append([]) 180 | cases.append([]) 181 | elif strict_mode: 182 | raise InvalidName("Too many commas in the name {{{0}}}.".format(name)) 183 | continue 184 | 185 | # Regular character. 186 | word.append(char) 187 | if (case == -1) and char.isalpha(): 188 | if char.isupper(): 189 | case = 1 190 | else: 191 | case = 0 192 | 193 | # Unterminated brace? 194 | if level: 195 | if strict_mode: 196 | raise InvalidName("Unterminated opening brace in the name {{{0}}}.".format(name)) 197 | while level: 198 | word.append('}') 199 | level -= 1 200 | 201 | # Handle the final word. 202 | if word: 203 | sections[-1].append(''.join(word)) 204 | cases[-1].append(case) 205 | 206 | # Get rid of trailing sections. 207 | if not sections[-1]: 208 | # Trailing comma? 209 | if (len(sections) > 1) and strict_mode: 210 | raise InvalidName("Trailing comma at end of name {{{0}}}.".format(name)) 211 | sections.pop(-1) 212 | cases.pop(-1) 213 | 214 | # No non-whitespace input. 215 | if not sections or not any(bool(section) for section in sections): 216 | return {} 217 | 218 | # Initialise the output dictionary. 219 | parts = {'first': [], 'last': [], 'von': [], 'jr': []} 220 | 221 | # Form 1: "First von Last" 222 | if len(sections) == 1: 223 | p0 = sections[0] 224 | 225 | # One word only: last cannot be empty. 226 | if len(p0) == 1: 227 | parts['last'] = p0 228 | 229 | # Two words: must be first and last. 230 | elif len(p0) == 2: 231 | parts['first'] = p0[:1] 232 | parts['last'] = p0[1:] 233 | 234 | # Need to use the cases to figure it out. 235 | else: 236 | cases = cases[0] 237 | 238 | # First is the longest sequence of words starting with uppercase 239 | # that is not the whole string. von is then the longest sequence 240 | # whose last word starts with lowercase that is not the whole 241 | # string. Last is the rest. NB., this means last cannot be empty. 242 | 243 | # At least one lowercase letter. 244 | if 0 in cases: 245 | # Index from end of list of first and last lowercase word. 246 | firstl = cases.index(0) - len(cases) 247 | lastl = -cases[::-1].index(0) - 1 248 | if lastl == -1: 249 | lastl -= 1 # Cannot consume the rest of the string. 250 | 251 | # Pull the parts out. 252 | parts['first'] = p0[:firstl] 253 | parts['von'] = p0[firstl:lastl+1] 254 | parts['last'] = p0[lastl+1:] 255 | 256 | # No lowercase: last is the last word, first is everything else. 257 | else: 258 | parts['first'] = p0[:-1] 259 | parts['last'] = p0[-1:] 260 | 261 | # Form 2 ("von Last, First") or 3 ("von Last, jr, First") 262 | else: 263 | # As long as there is content in the first name partition, use it as-is. 264 | first = sections[-1] 265 | if first and first[0]: 266 | parts['first'] = first 267 | 268 | # And again with the jr part. 269 | if len(sections) == 3: 270 | jr = sections[-2] 271 | if jr and jr[0]: 272 | parts['jr'] = jr 273 | 274 | # Last name cannot be empty; if there is only one word in the first 275 | # partition, we have to use it for the last name. 276 | last = sections[0] 277 | if len(last) == 1: 278 | parts['last'] = last 279 | 280 | # Have to look at the cases to figure it out. 281 | else: 282 | lcases = cases[0] 283 | 284 | # At least one lowercase: von is the longest sequence of whitespace 285 | # separated words whose last word does not start with an uppercase 286 | # word, and last is the rest. 287 | if 0 in lcases: 288 | split = len(lcases) - lcases[::-1].index(0) 289 | if split == len(lcases): 290 | split = 0 # Last cannot be empty. 291 | parts['von'] = sections[0][:split] 292 | parts['last'] = sections[0][split:] 293 | 294 | # All uppercase => all last. 295 | else: 296 | parts['last'] = sections[0] 297 | 298 | # Done. 299 | return parts 300 | 301 | 302 | def getnames(names): 303 | """Convert people names as surname, firstnames 304 | or surname, initials. 305 | 306 | :param names: a list of names 307 | :type names: list 308 | :returns: list -- Correctly formated names 309 | 310 | .. Note:: 311 | 312 | This function is known to be too simple to handle properly 313 | the complex rules. We would like to enhance this in forthcoming releases. 314 | """ 315 | tidynames = [] 316 | for namestring in names: 317 | namestring = namestring.strip() 318 | if len(namestring) < 1: 319 | continue 320 | if ',' in namestring: 321 | namesplit = namestring.split(',', 1) 322 | last = namesplit[0].strip() 323 | firsts = [i.strip() for i in namesplit[1].split()] 324 | else: 325 | namesplit = namestring.split() 326 | last = namesplit.pop() 327 | firsts = [i.replace('.', '. ').strip() for i in namesplit] 328 | if last in ['jnr', 'jr', 'junior']: 329 | last = firsts.pop() 330 | for item in firsts: 331 | if item in ['ben', 'van', 'der', 'de', 'la', 'le']: 332 | last = firsts.pop() + ' ' + last 333 | tidynames.append(last + ", " + ' '.join(firsts)) 334 | return tidynames 335 | 336 | 337 | def author(record): 338 | """ 339 | Split author field into a list of "Name, Surname". 340 | 341 | :param record: the record. 342 | :type record: dict 343 | :returns: dict -- the modified record. 344 | 345 | """ 346 | if "author" in record: 347 | if record["author"]: 348 | record["author"] = getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")]) 349 | else: 350 | del record["author"] 351 | return record 352 | 353 | 354 | def editor(record): 355 | """ 356 | Turn the editor field into a dict composed of the original editor name 357 | and a editor id (without coma or blank). 358 | 359 | :param record: the record. 360 | :type record: dict 361 | :returns: dict -- the modified record. 362 | 363 | """ 364 | if "editor" in record: 365 | if record["editor"]: 366 | record["editor"] = getnames([i.strip() for i in record["editor"].replace('\n', ' ').split(" and ")]) 367 | # convert editor to object 368 | record["editor"] = [{"name": i, "ID": i.replace(',', '').replace(' ', '').replace('.', '')} for i in record["editor"]] 369 | else: 370 | del record["editor"] 371 | return record 372 | 373 | 374 | def page_double_hyphen(record): 375 | """ 376 | Separate pages by a double hyphen (--). 377 | 378 | :param record: the record. 379 | :type record: dict 380 | :returns: dict -- the modified record. 381 | 382 | """ 383 | if "pages" in record: 384 | # hyphen, non-breaking hyphen, en dash, em dash, hyphen-minus, minus sign 385 | separators = [u'‐', u'‑', u'–', u'—', u'-', u'−'] 386 | for separator in separators: 387 | if separator in record["pages"]: 388 | p = [i.strip().strip(separator) for i in record["pages"].split(separator)] 389 | record["pages"] = p[0] + '--' + p[-1] 390 | return record 391 | 392 | 393 | def type(record): 394 | """ 395 | Put the type into lower case. 396 | 397 | :param record: the record. 398 | :type record: dict 399 | :returns: dict -- the modified record. 400 | 401 | """ 402 | if "type" in record: 403 | record["type"] = record["type"].lower() 404 | return record 405 | 406 | 407 | def journal(record): 408 | """ 409 | Turn the journal field into a dict composed of the original journal name 410 | and a journal id (without coma or blank). 411 | 412 | :param record: the record. 413 | :type record: dict 414 | :returns: dict -- the modified record. 415 | 416 | """ 417 | if "journal" in record: 418 | # switch journal to object 419 | if record["journal"]: 420 | record["journal"] = {"name": record["journal"], "ID": record["journal"].replace(',', '').replace(' ', '').replace('.', '')} 421 | 422 | return record 423 | 424 | 425 | def keyword(record, sep=',|;'): 426 | """ 427 | Split keyword field into a list. 428 | 429 | :param record: the record. 430 | :type record: dict 431 | :param sep: pattern used for the splitting regexp. 432 | :type record: string, optional 433 | :returns: dict -- the modified record. 434 | 435 | """ 436 | if "keyword" in record: 437 | record["keyword"] = [i.strip() for i in re.split(sep, record["keyword"].replace('\n', ''))] 438 | 439 | return record 440 | 441 | 442 | def link(record): 443 | """ 444 | 445 | :param record: the record. 446 | :type record: dict 447 | :returns: dict -- the modified record. 448 | 449 | """ 450 | if "link" in record: 451 | links = [i.strip().replace(" ", " ") for i in record["link"].split('\n')] 452 | record['link'] = [] 453 | for link in links: 454 | parts = link.split(" ") 455 | linkobj = {"url": parts[0]} 456 | if len(parts) > 1: 457 | linkobj["anchor"] = parts[1] 458 | if len(parts) > 2: 459 | linkobj["format"] = parts[2] 460 | if len(linkobj["url"]) > 0: 461 | record["link"].append(linkobj) 462 | 463 | return record 464 | 465 | 466 | def doi(record): 467 | """ 468 | 469 | :param record: the record. 470 | :type record: dict 471 | :returns: dict -- the modified record. 472 | 473 | """ 474 | if 'doi' in record: 475 | if 'link' not in record: 476 | record['link'] = [] 477 | nodoi = True 478 | for item in record['link']: 479 | if 'doi' in item: 480 | nodoi = False 481 | if nodoi: 482 | link = record['doi'] 483 | if link.startswith('10'): 484 | link = 'http://dx.doi.org/' + link 485 | record['link'].append({"url": link, "anchor": "doi"}) 486 | return record 487 | 488 | 489 | def convert_to_unicode(record): 490 | """ 491 | Convert accent from latex to unicode style. 492 | 493 | :param record: the record. 494 | :type record: dict 495 | :returns: dict -- the modified record. 496 | """ 497 | for val in record: 498 | if '\\' in record[val] or '{' in record[val]: 499 | for k, v in itertools.chain(unicode_to_crappy_latex1, unicode_to_latex): 500 | if v in record[val]: 501 | record[val] = record[val].replace(v, k) 502 | 503 | # If there is still very crappy items 504 | if '\\' in record[val]: 505 | for k, v in unicode_to_crappy_latex2: 506 | if v in record[val]: 507 | parts = record[val].split(str(v)) 508 | for key, record[val] in enumerate(parts): 509 | if key+1 < len(parts) and len(parts[key+1]) > 0: 510 | # Change order to display accents 511 | parts[key] = parts[key] + parts[key+1][0] 512 | parts[key+1] = parts[key+1][1:] 513 | record[val] = k.join(parts) 514 | return record 515 | 516 | 517 | def homogenize_latex_encoding(record): 518 | """ 519 | Homogenize the latex enconding style for bibtex 520 | 521 | This function is experimental. 522 | 523 | :param record: the record. 524 | :type record: dict 525 | :returns: dict -- the modified record. 526 | """ 527 | # First, we convert everything to unicode 528 | record = convert_to_unicode(record) 529 | # And then, we fall back 530 | for val in record: 531 | if val not in ('ID',): 532 | logger.debug('Apply string_to_latex to: %s', val) 533 | record[val] = string_to_latex(record[val]) 534 | if val == 'title': 535 | logger.debug('Protect uppercase in title') 536 | logger.debug('Before: %s', record[val]) 537 | record[val] = protect_uppercase(record[val]) 538 | logger.debug('After: %s', record[val]) 539 | return record 540 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_bparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import unicode_literals 5 | import unittest 6 | import codecs 7 | 8 | from bibtexparser.bparser import BibTexParser 9 | from bibtexparser.customization import * 10 | from bibtexparser import customization 11 | 12 | 13 | def customizations_unicode(record): 14 | """Use all functions related to specific fields 15 | + converter to unicode. 16 | 17 | :param record: a record 18 | :returns: -- customized record 19 | """ 20 | 21 | record = type(record) 22 | record = author(record) 23 | record = editor(record) 24 | record = journal(record) 25 | record = keyword(record) 26 | record = link(record) 27 | record = page_double_hyphen(record) 28 | record = doi(record) 29 | record = convert_to_unicode(record) 30 | return record 31 | 32 | 33 | def customizations_latex(record): 34 | """Use all functions related to specific fields 35 | + converter to latex. 36 | 37 | :param record: a record 38 | :returns: -- customized record 39 | """ 40 | 41 | record = homogenize_latex_encoding(record) 42 | record = type(record) 43 | record = author(record) 44 | record = editor(record) 45 | record = journal(record) 46 | record = keyword(record) 47 | record = link(record) 48 | record = page_double_hyphen(record) 49 | record = doi(record) 50 | return record 51 | 52 | 53 | class TestBibtexParserList(unittest.TestCase): 54 | 55 | def test_wrong(self): 56 | """ 57 | Wrong entry type 58 | """ 59 | with open('bibtexparser/tests/data/wrong.bib', 'r') as bibfile: 60 | self.assetRaises(TypeError, BibTexParser, bibfile) 61 | 62 | ########### 63 | # ARTICLE 64 | ########### 65 | # test also that list and dict are equivalent 66 | def test_article(self): 67 | with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile: 68 | bib = BibTexParser(bibfile.read()) 69 | res_list = bib.get_entry_list() 70 | res_dict = bib.get_entry_dict() 71 | expected_list = [{'keyword': 'keyword1, keyword2', 72 | 'ENTRYTYPE': 'article', 73 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 74 | 'year': '2013', 75 | 'journal': 'Nice Journal', 76 | 'ID': 'Cesar2013', 77 | 'pages': '12-23', 78 | 'title': 'An amazing title', 79 | 'comments': 'A comment', 80 | 'author': 'Jean César', 81 | 'volume': '12', 82 | 'month': 'jan' 83 | }] 84 | expected_dict = {'Cesar2013': {'keyword': 'keyword1, keyword2', 85 | 'ENTRYTYPE': 'article', 86 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 87 | 'year': '2013', 88 | 'journal': 'Nice Journal', 89 | 'ID': 'Cesar2013', 90 | 'pages': '12-23', 91 | 'title': 'An amazing title', 92 | 'comments': 'A comment', 93 | 'author': 'Jean César', 94 | 'volume': '12', 95 | 'month': 'jan' 96 | }} 97 | self.assertEqual(res_list, expected_list) 98 | self.assertEqual(res_dict, expected_dict) 99 | 100 | def test_article_start_bom(self): 101 | with codecs.open('bibtexparser/tests/data/article_start_with_bom.bib', 'r', 'utf-8') as bibfile: 102 | bib = BibTexParser(bibfile.read()) 103 | res = bib.get_entry_list() 104 | expected = [{'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 105 | 'ENTRYTYPE': 'article', 106 | 'pages': '12-23', 107 | 'volume': '12', 108 | 'ID': 'Cesar2013', 109 | 'year': '2013', 110 | 'author': 'Jean César', 111 | 'journal': 'Nice Journal', 112 | 'comments': 'A comment', 113 | 'month': 'jan', 114 | 'keyword': 'keyword1, keyword2', 115 | 'title': 'An amazing title' 116 | }] 117 | self.assertEqual(res, expected) 118 | 119 | def test_article_cust_unicode(self): 120 | with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile: 121 | bib = BibTexParser(bibfile.read(), customization=customizations_unicode) 122 | res = bib.get_entry_list() 123 | expected = [{'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 124 | 'ENTRYTYPE': 'article', 125 | 'pages': '12--23', 126 | 'volume': '12', 127 | 'ID': 'Cesar2013', 128 | 'year': '2013', 129 | 'author': ['César, Jean'], 130 | 'journal': {'ID': 'NiceJournal', 'name': 'Nice Journal'}, 131 | 'comments': 'A comment', 132 | 'month': 'jan', 133 | 'keyword': ['keyword1', 'keyword2'], 134 | 'title': 'An amazing title' 135 | }] 136 | self.assertEqual(res, expected) 137 | 138 | def test_article_cust_latex(self): 139 | with codecs.open('bibtexparser/tests/data/article.bib', 'r', 'utf-8') as bibfile: 140 | bib = BibTexParser(bibfile.read(), customization=customizations_latex) 141 | res = bib.get_entry_list() 142 | expected = [{'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french {\\\'e}rudit word', 143 | 'ENTRYTYPE': 'article', 144 | 'pages': '12--23', 145 | 'volume': '12', 146 | 'ID': 'Cesar2013', 147 | 'year': '2013', 148 | 'author': ['C{\\\'e}sar, Jean'], 149 | 'journal': {'ID': 'NiceJournal', 'name': 'Nice Journal'}, 150 | 'comments': 'A comment', 151 | 'month': 'jan', 152 | 'keyword': ['keyword1', 'keyword2'], 153 | 'title': '{A}n amazing title' 154 | }] 155 | self.assertEqual(res, expected) 156 | 157 | def test_article_cust_order(self): 158 | def cust(record): 159 | record = customization.page_double_hyphen(record) 160 | record = customization.homogenize_latex_encoding(record) 161 | record = customization.author(record) 162 | return record 163 | 164 | def cust2(record): 165 | record = customization.author(record) 166 | record = customization.page_double_hyphen(record) 167 | record = customization.homogenize_latex_encoding(record) 168 | return record 169 | 170 | with open('bibtexparser/tests/data/multiple_entries.bib', 'r') as bibfile: 171 | bib = BibTexParser(bibfile.read(), customization=cust) 172 | res = bib.get_entry_list() 173 | with open('bibtexparser/tests/data/multiple_entries.bib', 'r') as bibfile: 174 | bib2 = BibTexParser(bibfile.read(), customization=cust2) 175 | res2 = bib.get_entry_list() 176 | self.assertEqual(res, res2) 177 | 178 | def test_article_missing_coma(self): 179 | with open('bibtexparser/tests/data/article_missing_coma.bib', 'r') as bibfile: 180 | bib = BibTexParser(bibfile.read()) 181 | res = bib.get_entry_list() 182 | expected = [{'ENTRYTYPE': 'article', 183 | 'journal': 'Nice Journal', 184 | 'volume': '12', 185 | 'ID': 'Cesar2013', 186 | 'year': '2013', 187 | 'author': 'Jean Cesar', 188 | 'comments': 'A comment', 189 | 'keyword': 'keyword1, keyword2', 190 | 'title': 'An amazing title' 191 | }, 192 | {'ENTRYTYPE': 'article', 193 | 'journal': 'Nice Journal', 194 | 'volume': '12', 195 | 'ID': 'Baltazar2013', 196 | 'year': '2013', 197 | 'author': 'Jean Baltazar', 198 | 'comments': 'A comment', 199 | 'keyword': 'keyword1, keyword2', 200 | 'title': 'An amazing title' 201 | }, 202 | {'ENTRYTYPE': 'article', 203 | 'journal': 'Nice Journal', 204 | 'volume': '12', 205 | 'ID': 'Aimar2013', 206 | 'year': '2013', 207 | 'author': 'Jean Aimar', 208 | 'comments': 'A comment', 209 | 'keyword': 'keyword1, keyword2', 210 | 'title': 'An amazing title', 211 | 'month': 'january' 212 | }, 213 | {'ENTRYTYPE': 'article', 214 | 'journal': 'Nice Journal', 215 | 'volume': '12', 216 | 'ID': 'Doute2013', 217 | 'year': '2013', 218 | 'author': 'Jean Doute', 219 | 'comments': 'A comment', 220 | 'keyword': 'keyword1, keyword2', 221 | 'title': 'An amazing title' 222 | }] 223 | self.assertEqual(res, expected) 224 | 225 | def test_oneline(self): 226 | with open('bibtexparser/tests/data/article_oneline.bib', 'r') as bibfile: 227 | bib = BibTexParser(bibfile.read()) 228 | res = bib.get_entry_list() 229 | expected = [{'ENTRYTYPE': 'article', 230 | 'journal': 'Nice Journal', 231 | 'volume': '12', 232 | 'ID': 'Cesar2013', 233 | 'year': '2013', 234 | 'author': 'Jean Cesar', 235 | 'comments': 'A comment', 236 | 'keyword': 'keyword1, keyword2', 237 | 'title': 'An amazing title' 238 | }, 239 | {'ENTRYTYPE': 'article', 240 | 'journal': 'Nice Journal', 241 | 'volume': '12', 242 | 'ID': 'Baltazar2013', 243 | 'year': '2013', 244 | 'author': 'Jean Baltazar', 245 | 'comments': 'A comment', 246 | 'keyword': 'keyword1, keyword2', 247 | 'title': 'An amazing title' 248 | }] 249 | self.assertEqual(res, expected) 250 | 251 | 252 | def test_article_start_with_whitespace(self): 253 | with open('bibtexparser/tests/data/article_start_with_whitespace.bib', 'r') as bibfile: 254 | bib = BibTexParser(bibfile.read()) 255 | self.assertEqual(len(bib.get_entry_list()), 2) 256 | 257 | def test_article_comma_first(self): 258 | with open('bibtexparser/tests/data/article_comma_first.bib', 'r') as bibfile: 259 | bib = BibTexParser(bibfile.read()) 260 | res = bib.get_entry_list() 261 | expected = [{'ENTRYTYPE': 'article', 262 | 'journal': 'Nice Journal', 263 | 'volume': '12', 264 | 'ID': 'Cesar2013', 265 | 'year': '2013', 266 | 'author': 'Jean Cesar', 267 | 'comments': 'A comment', 268 | 'keyword': 'keyword1, keyword2', 269 | 'title': 'An amazing title' 270 | }, 271 | {'ENTRYTYPE': 'article', 272 | 'journal': 'Nice Journal', 273 | 'volume': '12', 274 | 'ID': 'Baltazar2013', 275 | 'year': '2013', 276 | 'author': 'Jean Baltazar', 277 | 'comments': 'A comment', 278 | 'keyword': 'keyword1, keyword2', 279 | 'title': 'An amazing title' 280 | }] 281 | self.assertEqual(res, expected) 282 | 283 | def test_article_no_braces(self): 284 | with open('bibtexparser/tests/data/article_no_braces.bib', 'r') as bibfile: 285 | bib = BibTexParser(bibfile.read()) 286 | res = bib.get_entry_list() 287 | expected = [{'ENTRYTYPE': 'article', 288 | 'journal': 'Nice Journal', 289 | 'volume': '12', 290 | 'pages': '12-23', 291 | 'ID': 'Cesar2013', 292 | 'year': '2013', 293 | 'month': 'jan', 294 | 'author': 'Jean C{\\\'e}sar{\\\"u}', 295 | 'comments': 'A comment', 296 | 'keyword': 'keyword1, keyword2', 297 | 'title': 'An amazing title', 298 | 'abstract': "This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word", 299 | }, 300 | ] 301 | self.assertEqual(res, expected) 302 | 303 | def test_article_special_characters(self): 304 | with open('bibtexparser/tests/data/article_with_special_characters.bib', 'r') as bibfile: 305 | bib = BibTexParser(bibfile.read()) 306 | res = bib.get_entry_list() 307 | expected = [{'ENTRYTYPE': 'article', 308 | 'journal': 'Nice Journal', 309 | 'volume': '12', 310 | 'pages': '12-23', 311 | 'ID': 'Cesar2013', 312 | 'year': '2013', 313 | 'month': 'jan', 314 | 'author': 'Jean C{\\\'e}sar{\\\"u}', 315 | 'comments': 'A comment', 316 | 'keyword': 'keyword1, keyword2', 317 | 'title': 'An amazing title', 318 | 'abstract': "This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word", 319 | }, 320 | ] 321 | self.assertEqual(res, expected) 322 | 323 | def test_article_protection_braces(self): 324 | with open('bibtexparser/tests/data/article_with_protection_braces.bib', 'r') as bibfile: 325 | bib = BibTexParser(bibfile.read()) 326 | res = bib.get_entry_list() 327 | expected = [{'ENTRYTYPE': 'article', 328 | 'journal': '{Nice Journal}', 329 | 'volume': '12', 330 | 'pages': '12-23', 331 | 'ID': 'Cesar2013', 332 | 'year': '2013', 333 | 'month': 'jan', 334 | 'author': 'Jean César', 335 | 'comments': 'A comment', 336 | 'keyword': 'keyword1, keyword2', 337 | 'title': '{An amazing title}', 338 | 'abstract': "This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word", 339 | }, 340 | ] 341 | self.assertEqual(res, expected) 342 | 343 | 344 | ########### 345 | # BOOK 346 | ########### 347 | def test_book(self): 348 | with open('bibtexparser/tests/data/book.bib', 'r') as bibfile: 349 | bib = BibTexParser(bibfile.read()) 350 | res = bib.get_entry_list() 351 | expected = [{'ENTRYTYPE': 'book', 352 | 'year': '1987', 353 | 'edition': '2', 354 | 'publisher': 'Wiley Edition', 355 | 'ID': 'Bird1987', 356 | 'volume': '1', 357 | 'title': 'Dynamics of Polymeric Liquid', 358 | 'author': 'Bird, R.B. and Armstrong, R.C. and Hassager, O.' 359 | }] 360 | 361 | self.assertEqual(res, expected) 362 | 363 | def test_book_cust_unicode(self): 364 | with open('bibtexparser/tests/data/book.bib', 'r') as bibfile: 365 | bib = BibTexParser(bibfile.read(), customization=customizations_unicode) 366 | res = bib.get_entry_list() 367 | expected = [{'ENTRYTYPE': 'book', 368 | 'year': '1987', 369 | 'edition': '2', 370 | 'publisher': 'Wiley Edition', 371 | 'ID': 'Bird1987', 372 | 'volume': '1', 373 | 'title': 'Dynamics of Polymeric Liquid', 374 | 'author': ['Bird, R.B.', 'Armstrong, R.C.', 'Hassager, O.'] 375 | }] 376 | 377 | self.assertEqual(res, expected) 378 | 379 | def test_book_cust_latex(self): 380 | with open('bibtexparser/tests/data/book.bib', 'r') as bibfile: 381 | bib = BibTexParser(bibfile.read(), customization=customizations_latex) 382 | res = bib.get_entry_list() 383 | expected = [{'ENTRYTYPE': 'book', 384 | 'year': '1987', 385 | 'edition': '2', 386 | 'publisher': 'Wiley Edition', 387 | 'ID': 'Bird1987', 388 | 'volume': '1', 389 | 'title': '{D}ynamics of {P}olymeric {L}iquid', 390 | 'author': ['Bird, R.B.', 'Armstrong, R.C.', 'Hassager, O.'] 391 | }] 392 | 393 | self.assertEqual(res, expected) 394 | 395 | ########### 396 | # TRAPS 397 | ########### 398 | def test_traps(self): 399 | with codecs.open('bibtexparser/tests/data/traps.bib', 'r', 'utf-8') as bibfile: 400 | bib = BibTexParser(bibfile.read()) 401 | res = bib.get_entry_list() 402 | expected = [{'keywords': 'keyword1, keyword2', 403 | 'ENTRYTYPE': 'article', 404 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 405 | 'year': '2013', 406 | 'journal': 'Nice Journal', 407 | 'ID': 'Laide2013', 408 | 'pages': '12-23', 409 | 'title': '{An} amazing {title}', 410 | 'comments': 'A comment', 411 | 'author': 'Jean Laid{\\\'e},\nBen Loaeb', 412 | 'volume': 'n.s.~2', 413 | 'month': 'jan' 414 | }] 415 | self.assertEqual(res, expected) 416 | 417 | ########### 418 | # FEATURES 419 | ########### 420 | def test_features(self): 421 | with open('bibtexparser/tests/data/features.bib', 'r') as bibfile: 422 | bib = BibTexParser(bibfile.read()) 423 | res = bib.get_entry_list() 424 | expected = [{'ENTRYTYPE': 'inproceedings', 425 | 'year': '2014', 426 | 'title': 'Cool Stuff', 427 | 'author': 'John', 428 | 'ID': 'mykey', 429 | 'booktitle': 'My International Conference', 430 | }] 431 | self.assertEqual(res, expected) 432 | 433 | def test_features2(self): 434 | with open('bibtexparser/tests/data/features2.bib', 'r') as bibfile: 435 | bib = BibTexParser(bibfile.read()) 436 | res = bib.get_entry_list() 437 | expected = [{'ENTRYTYPE': 'inproceedings', 438 | 'year': '2014', 439 | 'title': 'Cool Stuff', 440 | 'author': 'John Doe', 441 | 'ID': 'mykey', 442 | 'booktitle': 'My International Conference', 443 | 'note': 'Email: John.Doe@example.com', 444 | 'pages': '1--10', 445 | }] 446 | self.assertEqual(res, expected) 447 | 448 | ########### 449 | # WRONG 450 | ########### 451 | def test_wrong(self): 452 | with open('bibtexparser/tests/data/wrong.bib', 'r') as bibfile: 453 | bib = BibTexParser(bibfile.read()) 454 | res = bib.get_entry_list() 455 | expected = [{'author': 'correct', 456 | 'ID': 'bar', 457 | 'ENTRYTYPE': 'article'}] 458 | self.assertEqual(res, expected) 459 | 460 | ########### 461 | # ENCODING 462 | ########### 463 | def test_encoding(self): 464 | with codecs.open('bibtexparser/tests/data/encoding.bib', 'r', 'utf-8') as bibfile: 465 | bib = BibTexParser(bibfile.read()) 466 | res = bib.get_entry_list() 467 | expected = [{'keywords': 'keyword1, keyword2', 468 | 'ENTRYTYPE': 'article', 469 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french érudit word', 470 | 'year': '2013', 471 | 'journal': 'Elémentaire', 472 | 'ID': 'Cesar_2013', 473 | 'pages': '12-23', 474 | 'title': 'An amazing title: à', 475 | 'comments': 'A comment', 476 | 'author': 'Jean César', 477 | 'volume': '12', 478 | 'month': 'jan' 479 | }] 480 | self.assertEqual(res, expected) 481 | 482 | def test_encoding_with_homogenize(self): 483 | with codecs.open('bibtexparser/tests/data/encoding.bib', 'r', 'utf-8') as bibfile: 484 | bib = BibTexParser(bibfile.read(), customization=homogenize_latex_encoding) 485 | res = bib.get_entry_list() 486 | expected = [{'keywords': 'keyword1, keyword2', 487 | 'ENTRYTYPE': 'article', 488 | 'abstract': 'This is an abstract. This line should be long enough to test\nmultilines... and with a french {\\\'e}rudit word', 489 | 'year': '2013', 490 | 'journal': 'El{\\\'e}mentaire', 491 | 'ID': 'Cesar_2013', 492 | 'pages': '12-23', 493 | 'title': '{A}n amazing title: {\\`a}', 494 | 'comments': 'A comment', 495 | 'author': 'Jean C{\\\'e}sar', 496 | 'volume': '12', 497 | 'month': 'jan' 498 | }] 499 | self.assertEqual(res, expected) 500 | 501 | def test_field_name_with_dash_underscore(self): 502 | with open('bibtexparser/tests/data/article_field_name_with_underscore.bib', 'r') as bibfile: 503 | bib = BibTexParser(bibfile.read()) 504 | res = bib.get_entry_list() 505 | expected = [{ 506 | 'keyword': 'keyword1, keyword2', 507 | 'ENTRYTYPE': 'article', 508 | 'year': '2013', 509 | 'journal': 'Nice Journal', 510 | 'ID': 'Cesar2013', 511 | 'pages': '12-23', 512 | 'title': 'An amazing title', 513 | 'comments': 'A comment', 514 | 'author': 'Jean César', 515 | 'volume': '12', 516 | 'strange_field_name': 'val', 517 | 'strange-field-name2': 'val2', 518 | }] 519 | self.assertEqual(res, expected) 520 | 521 | if __name__ == '__main__': 522 | unittest.main() 523 | -------------------------------------------------------------------------------- /bibtexparser/tests/test_splitname.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import unittest 5 | 6 | from bibtexparser.customization import InvalidName, splitname 7 | 8 | class TestSplitnameMethod(unittest.TestCase): 9 | def test_splitname_basic(self): 10 | """Basic tests of customization.splitname() """ 11 | # Empty input. 12 | result = splitname("") 13 | expected = {} 14 | self.assertEqual(result, expected, msg="Invalid output for empty name") 15 | 16 | # Non-whitespace names. 17 | result = splitname(" ") 18 | expected = {} 19 | self.assertEqual(result, expected, msg="Invalid output for space-only name") 20 | result = splitname(" \t~~") 21 | expected = {} 22 | self.assertEqual(result, expected, msg="Invalid output for whitespace name") 23 | 24 | # Test strict mode. 25 | with self.assertRaises(InvalidName): # Trailing comma (4 cases). 26 | splitname("BB,", strict_mode=True) 27 | with self.assertRaises(InvalidName): 28 | splitname("BB, ", strict_mode=True) 29 | with self.assertRaises(InvalidName): 30 | splitname("BB, ~\t", strict_mode=True) 31 | with self.assertRaises(InvalidName): 32 | splitname(", ~\t", strict_mode=True) 33 | with self.assertRaises(InvalidName): # Too many sections. 34 | splitname("AA, BB, CC, DD", strict_mode=True) 35 | with self.assertRaises(InvalidName): # Unterminated opening brace (x3). 36 | splitname("AA {BB CC", strict_mode=True) 37 | with self.assertRaises(InvalidName): 38 | splitname("AA {{{BB CC", strict_mode=True) 39 | with self.assertRaises(InvalidName): 40 | splitname("AA {{{BB} CC}", strict_mode=True) 41 | with self.assertRaises(InvalidName): # Unmatched closing brace (x3). 42 | splitname("AA BB CC}", strict_mode=True) 43 | with self.assertRaises(InvalidName): 44 | splitname("AA BB CC}}}", strict_mode=True) 45 | with self.assertRaises(InvalidName): 46 | splitname("{AA {BB CC}}}", strict_mode=True) 47 | 48 | # Test strict mode off for trailing comma. 49 | expected = {'first': [], 'von': [], 'last': ["BB"], 'jr': []} 50 | result = splitname("BB,", strict_mode=False) 51 | self.assertEqual(result, expected, msg="Invalid output for trailing comma with strict mode off") 52 | result = splitname("BB, ", strict_mode=False) 53 | self.assertEqual(result, expected, msg="Invalid output for trailing comma with strict mode off") 54 | result = splitname("BB, ~\t ", strict_mode=False) 55 | self.assertEqual(result, expected, msg="Invalid output for trailing comma with strict mode off") 56 | expected = {} 57 | result = splitname(", ~\t", strict_mode=False) 58 | self.assertEqual(result, expected, msg="Invalid output for trailing comma with strict mode off") 59 | 60 | # Test strict mode off for too many sections. 61 | expected = {'first': ["CC", "DD"], 'von': [], 'last': ["AA"], 'jr': ["BB"]} 62 | result = splitname("AA, BB, CC, DD", strict_mode=False) 63 | self.assertEqual(result, expected, msg="Invalid output for too many sections with strict mode off") 64 | 65 | # Test strict mode off for an unterminated opening brace. 66 | result = splitname("AA {BB CC", strict_mode=False) 67 | expected = {'first': ["AA"], 'von': [], 'last': ["{BB CC}"], 'jr': []} 68 | self.assertEqual(result, expected, msg="Invalid output for unterminated opening brace with strict mode off") 69 | result = splitname("AA {{{BB CC", strict_mode=False) 70 | expected = {'first': ["AA"], 'von': [], 'last': ["{{{BB CC}}}"], 'jr': []} 71 | self.assertEqual(result, expected, msg="Invalid output for unterminated opening brace with strict mode off") 72 | result = splitname("AA {{{BB} CC}", strict_mode=False) 73 | expected = {'first': ["AA"], 'von': [], 'last': ["{{{BB} CC}}"], 'jr': []} 74 | self.assertEqual(result, expected, msg="Invalid output for unterminated opening brace with strict mode off") 75 | 76 | # Test strict mode off for an unmatched closing brace. 77 | result = splitname("AA BB CC}", strict_mode=False) 78 | expected = {'first': ["AA", "BB"], 'von': [], 'last': ["{CC}"], 'jr': []} 79 | self.assertEqual(result, expected, msg="Invalid output for unmatched closing brace with strict mode off") 80 | result = splitname("AA BB CC}}}", strict_mode=False) 81 | expected = {'first': ["AA", "BB"], 'von': [], 'last': ["{{{CC}}}"], 'jr': []} 82 | self.assertEqual(result, expected, msg="Invalid output for unmatched closing brace with strict mode off") 83 | result = splitname("{AA {BB CC}}}", strict_mode=False) 84 | expected = {'first': [], 'von': [], 'last': ["{{AA {BB CC}}}"], 'jr': []} 85 | self.assertEqual(result, expected, msg="Invalid output for unmatched closing brace with strict mode off") 86 | 87 | # Test it handles commas at higher brace levels. 88 | result = splitname("CC, dd, {AA, BB}") 89 | expected = {'first': ["{AA, BB}"], 'von': [], 'last': ["CC"], 'jr': ["dd"]} 90 | self.assertEqual(result, expected, msg="Invalid output for braced commas") 91 | 92 | 93 | def test_splitname_cases(self): 94 | """Test customization.splitname() vs output from BibTeX """ 95 | for name, expected in splitname_test_cases: 96 | result = splitname(name) 97 | self.assertEqual(result, expected, msg="Input name: {0}".format(name)) 98 | 99 | 100 | splitname_test_cases = ( 101 | (r'Per Brinch Hansen', 102 | {'first': ['Per', 'Brinch'], 'von': [], 'last': ['Hansen'], 'jr': []}), 103 | 104 | (r'Brinch Hansen, Per', 105 | {'first': ['Per'], 'von': [], 'last': ['Brinch', 'Hansen'], 'jr': []}), 106 | 107 | (r'Brinch Hansen,, Per', 108 | {'first': ['Per'], 'von': [], 'last': ['Brinch', 'Hansen'], 'jr': []}), 109 | 110 | (r"Charles Louis Xavier Joseph de la Vall{\'e}e Poussin", 111 | {'first': ['Charles', 'Louis', 'Xavier', 'Joseph'], 'von': ['de', 'la'], 112 | 'last': [r'Vall{\'e}e', 'Poussin'], 'jr': []}), 113 | 114 | (r'D[onald] E. Knuth', 115 | {'first': ['D[onald]', 'E.'], 'von': [], 'last': ['Knuth'], 'jr': []}), 116 | 117 | (r'A. {Delgado de Molina}', 118 | {'first': ['A.'], 'von': [], 'last': ['{Delgado de Molina}'], 'jr': []}), 119 | 120 | (r"M. Vign{\'e}", 121 | {'first': ['M.'], 'von': [], 'last': [r"Vign{\'e}"], 'jr': []}), 122 | 123 | ############################################################################### 124 | # 125 | # Test cases from 126 | # http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html 127 | # 128 | ############################################################################### 129 | 130 | (r'AA BB', 131 | {'first': ['AA'], 'von': [], 'last': ['BB'], 'jr': []}), 132 | 133 | (r'AA', 134 | {'first': [], 'von': [], 'last': ['AA'], 'jr': []}), 135 | 136 | (r'AA bb', 137 | {'first': ['AA'], 'von': [], 'last': ['bb'], 'jr': []}), 138 | 139 | (r'aa', 140 | {'first': [], 'von': [], 'last': ['aa'], 'jr': []}), 141 | 142 | (r'AA bb CC', 143 | {'first': ['AA'], 'von': ['bb'], 'last': ['CC'], 'jr': []}), 144 | 145 | (r'AA bb CC dd EE', 146 | {'first': ['AA'], 'von': ['bb', 'CC', 'dd'], 'last': ['EE'], 'jr': []}), 147 | 148 | (r'AA 1B cc dd', 149 | {'first': ['AA', '1B'], 'von': ['cc'], 'last': ['dd'], 'jr': []}), 150 | 151 | (r'AA 1b cc dd', 152 | {'first': ['AA'], 'von': ['1b', 'cc'], 'last': ['dd'], 'jr': []}), 153 | 154 | (r'AA {b}B cc dd', 155 | {'first': ['AA', '{b}B'], 'von': ['cc'], 'last': ['dd'], 'jr': []}), 156 | 157 | (r'AA {b}b cc dd', 158 | {'first': ['AA'], 'von': ['{b}b', 'cc'], 'last': ['dd'], 'jr': []}), 159 | 160 | (r'AA {B}b cc dd', 161 | {'first': ['AA'], 'von': ['{B}b', 'cc'], 'last': ['dd'], 'jr': []}), 162 | 163 | (r'AA {B}B cc dd', 164 | {'first': ['AA', '{B}B'], 'von': ['cc'], 'last': ['dd'], 'jr': []}), 165 | 166 | (r'AA \BB{b} cc dd', 167 | {'first': ['AA', r'\BB{b}'], 'von': ['cc'], 'last': ['dd'], 'jr': []}), 168 | 169 | (r'AA \bb{b} cc dd', 170 | {'first': ['AA'], 'von': [r'\bb{b}', 'cc'], 'last': ['dd'], 'jr': []}), 171 | 172 | (r'AA {bb} cc DD', 173 | {'first': ['AA', '{bb}'], 'von': ['cc'], 'last': ['DD'], 'jr': []}), 174 | 175 | (r'AA bb {cc} DD', 176 | {'first': ['AA'], 'von': ['bb'], 'last': ['{cc}', 'DD'], 'jr': []}), 177 | 178 | (r'AA {bb} CC', 179 | {'first': ['AA', '{bb}'], 'von': [], 'last': ['CC'], 'jr': []}), 180 | 181 | (r'bb CC, AA', 182 | {'first': ['AA'], 'von': ['bb'], 'last': ['CC'], 'jr': []}), 183 | 184 | (r'bb CC, aa', 185 | {'first': ['aa'], 'von': ['bb'], 'last': ['CC'], 'jr': []}), 186 | 187 | (r'bb CC dd EE, AA', 188 | {'first': ['AA'], 'von': ['bb', 'CC', 'dd'], 'last': ['EE'], 'jr': []}), 189 | 190 | (r'bb, AA', 191 | {'first': ['AA'], 'von': [], 'last': ['bb'], 'jr': []}), 192 | 193 | (r'bb CC,XX, AA', 194 | {'first': ['AA'], 'von': ['bb'], 'last': ['CC'], 'jr': ['XX']}), 195 | 196 | (r'bb CC,xx, AA', 197 | {'first': ['AA'], 'von': ['bb'], 'last': ['CC'], 'jr': ['xx']}), 198 | 199 | (r'BB,, AA', 200 | {'first': ['AA'], 'von': [], 'last': ['BB'], 'jr': []}), 201 | 202 | (r"Paul \'Emile Victor", 203 | {'first': ['Paul', r"\'Emile"], 'von': [], 'last': ['Victor'], 'jr': []}), 204 | 205 | (r"Paul {\'E}mile Victor", 206 | {'first': ['Paul', r"{\'E}mile"], 'von': [], 'last': ['Victor'], 'jr': []}), 207 | 208 | (r"Paul \'emile Victor", 209 | {'first': ['Paul'], 'von': [r"\'emile"], 'last': ['Victor'], 'jr': []}), 210 | 211 | (r"Paul {\'e}mile Victor", 212 | {'first': ['Paul'], 'von': [r"{\'e}mile"], 'last': ['Victor'], 'jr': []}), 213 | 214 | (r"Victor, Paul \'Emile", 215 | {'first': ['Paul', r"\'Emile"], 'von': [], 'last': ['Victor'], 'jr': []}), 216 | 217 | (r"Victor, Paul {\'E}mile", 218 | {'first': ['Paul', r"{\'E}mile"], 'von': [], 'last': ['Victor'], 'jr': []}), 219 | 220 | (r"Victor, Paul \'emile", 221 | {'first': ['Paul', r"\'emile"], 'von': [], 'last': ['Victor'], 'jr': []}), 222 | 223 | (r"Victor, Paul {\'e}mile", 224 | {'first': ['Paul', r"{\'e}mile"], 'von': [], 'last': ['Victor'], 'jr': []}), 225 | 226 | (r'Dominique Galouzeau de Villepin', 227 | {'first': ['Dominique', 'Galouzeau'], 'von': ['de'], 'last': ['Villepin'], 'jr': []}), 228 | 229 | (r'Dominique {G}alouzeau de Villepin', 230 | {'first': ['Dominique'], 'von': ['{G}alouzeau', 'de'], 231 | 'last': ['Villepin'], 'jr': []}), 232 | 233 | (r'Galouzeau de Villepin, Dominique', 234 | {'first': ['Dominique'], 'von': ['Galouzeau', 'de'], 235 | 'last': ['Villepin'], 'jr': []}), 236 | 237 | ############################################################################### 238 | # 239 | # Test cases from pybtex 240 | # See file /pybtex/tests/parse_name_test.py in the pybtex source. 241 | # 242 | ############################################################################### 243 | 244 | (r'A. E. Siegman', 245 | {'first': ['A.', 'E.'], 'von': [], 'last': ['Siegman'], 'jr': []}), 246 | 247 | (r'A. G. W. Cameron', 248 | {'first': ['A.', 'G.', 'W.'], 'von': [], 'last': ['Cameron'], 'jr': []}), 249 | 250 | (r'A. Hoenig', 251 | {'first': ['A.'], 'von': [], 'last': ['Hoenig'], 'jr': []}), 252 | 253 | (r'A. J. Van Haagen', 254 | {'first': ['A.', 'J.', 'Van'], 'von': [], 'last': ['Haagen'], 'jr': []}), 255 | 256 | (r'A. S. Berdnikov', 257 | {'first': ['A.', 'S.'], 'von': [], 'last': ['Berdnikov'], 'jr': []}), 258 | 259 | (r'A. Trevorrow', 260 | {'first': ['A.'], 'von': [], 'last': ['Trevorrow'], 'jr': []}), 261 | 262 | (r'Adam H. Lewenberg', 263 | {'first': ['Adam', 'H.'], 'von': [], 'last': ['Lewenberg'], 'jr': []}), 264 | 265 | (r'Addison-Wesley Publishing Company', 266 | {'first': ['Addison-Wesley', 'Publishing'], 'von': [], 267 | 'last': ['Company'], 'jr': []}), 268 | 269 | (r'Advogato (Raph Levien)', 270 | {'first': ['Advogato', '(Raph'], 'von': [], 'last': ['Levien)'], 'jr': []}), 271 | 272 | (r'Andrea de Leeuw van Weenen', 273 | {'first': ['Andrea'], 'von': ['de', 'Leeuw', 'van'], 'last': ['Weenen'], 'jr': []}), 274 | 275 | (r'Andreas Geyer-Schulz', 276 | {'first': ['Andreas'], 'von': [], 'last': ['Geyer-Schulz'], 'jr': []}), 277 | 278 | (r'Andr{\'e} Heck', 279 | {'first': [r'Andr{\'e}'], 'von': [], 'last': ['Heck'], 'jr': []}), 280 | 281 | (r'Anne Br{\"u}ggemann-Klein', 282 | {'first': ['Anne'], 'von': [], 'last': [r'Br{\"u}ggemann-Klein'], 'jr': []}), 283 | 284 | (r'Anonymous', 285 | {'first': [], 'von': [], 'last': ['Anonymous'], 'jr': []}), 286 | 287 | (r'B. Beeton', 288 | {'first': ['B.'], 'von': [], 'last': ['Beeton'], 'jr': []}), 289 | 290 | (r'B. Hamilton Kelly', 291 | {'first': ['B.', 'Hamilton'], 'von': [], 'last': ['Kelly'], 'jr': []}), 292 | 293 | (r'B. V. Venkata Krishna Sastry', 294 | {'first': ['B.', 'V.', 'Venkata', 'Krishna'], 'von': [], 295 | 'last': ['Sastry'], 'jr': []}), 296 | 297 | (r'Benedict L{\o}fstedt', 298 | {'first': ['Benedict'], 'von': [], 'last': [r'L{\o}fstedt'], 'jr': []}), 299 | 300 | (r'Bogus{\l}aw Jackowski', 301 | {'first': ['Bogus{\l}aw'], 'von': [], 'last': ['Jackowski'], 'jr': []}), 302 | 303 | (r'Christina A. L.\ Thiele', 304 | {'first': ['Christina', 'A.', 'L.\\'], 'von': [], 305 | 'last': ['Thiele'], 'jr': []}), 306 | 307 | (r"D. Men'shikov", 308 | {'first': ['D.'], 'von': [], 'last': ["Men'shikov"], 'jr': []}), 309 | 310 | (r'Darko \v{Z}ubrini{\'c}', 311 | {'first': ['Darko'], 'von': [], 'last': [r'\v{Z}ubrini{\'c}'], 'jr': []}), 312 | 313 | (r'Dunja Mladeni{\'c}', 314 | {'first': ['Dunja'], 'von': [], 'last': [r'Mladeni{\'c}'], 'jr': []}), 315 | 316 | (r'Edwin V. {Bell, II}', 317 | {'first': ['Edwin', 'V.'], 'von': [], 'last': ['{Bell, II}'], 'jr': []}), 318 | 319 | (r'Frank G. {Bennett, Jr.}', 320 | {'first': ['Frank', 'G.'], 'von': [], 'last': ['{Bennett, Jr.}'], 'jr': []}), 321 | 322 | (r'Fr{\'e}d{\'e}ric Boulanger', 323 | {'first': [r'Fr{\'e}d{\'e}ric'], 'von': [], 'last': ['Boulanger'], 'jr': []}), 324 | 325 | (r'Ford, Jr., Henry', 326 | {'first': ['Henry'], 'von': [], 'last': ['Ford'], 'jr': ['Jr.']}), 327 | 328 | (r'mr Ford, Jr., Henry', 329 | {'first': ['Henry'], 'von': ['mr'], 'last': ['Ford'], 'jr': ['Jr.']}), 330 | 331 | (r'Fukui Rei', 332 | {'first': ['Fukui'], 'von': [], 'last': ['Rei'], 'jr': []}), 333 | 334 | (r'G. Gr{\"a}tzer', 335 | {'first': ['G.'], 'von': [], 'last': [r'Gr{\"a}tzer'], 'jr': []}), 336 | 337 | (r'George Gr{\"a}tzer', 338 | {'first': ['George'], 'von': [], 'last': [r'Gr{\"a}tzer'], 'jr': []}), 339 | 340 | (r'Georgia K. M. Tobin', 341 | {'first': ['Georgia', 'K.', 'M.'], 'von': [], 'last': ['Tobin'], 'jr': []}), 342 | 343 | (r'Gilbert van den Dobbelsteen', 344 | {'first': ['Gilbert'], 'von': ['van', 'den'], 'last': ['Dobbelsteen'], 'jr': []}), 345 | 346 | (r'Gy{\"o}ngyi Bujdos{\'o}', 347 | {'first': [r'Gy{\"o}ngyi'], 'von': [], 'last': [r'Bujdos{\'o}'], 'jr': []}), 348 | 349 | (r'Helmut J{\"u}rgensen', 350 | {'first': ['Helmut'], 'von': [], 'last': [r'J{\"u}rgensen'], 'jr': []}), 351 | 352 | (r'Herbert Vo{\ss}', 353 | {'first': ['Herbert'], 'von': [], 'last': ['Vo{\ss}'], 'jr': []}), 354 | 355 | (r"H{\'a}n Th{\^e}\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh", 356 | {'first': [r'H{\'a}n', r"Th{\^e}\llap{\raise 0.5ex\hbox{\'{\relax}}}"], 357 | 'von': [], 'last': [r"Th{\'a}nh"], 'jr': []}), 358 | 359 | (r"H{\`a}n Th\^e\llap{\raise0.5ex\hbox{\'{\relax}}} Th{\`a}nh", 360 | {'first': [r'H{\`a}n', r"Th\^e\llap{\raise0.5ex\hbox{\'{\relax}}}"], 361 | 'von': [], 'last': [r"Th{\`a}nh"], 'jr': []}), 362 | 363 | (r'J. Vesel{\'y}', 364 | {'first': ['J.'], 'von': [], 'last': [r'Vesel{\'y}'], 'jr': []}), 365 | 366 | (r'Javier Rodr\'{\i}guez Laguna', 367 | {'first': ['Javier', r'Rodr\'{\i}guez'], 'von': [], 'last': ['Laguna'], 'jr': []}), 368 | 369 | (r'Ji\v{r}\'{\i} Vesel{\'y}', 370 | {'first': [r'Ji\v{r}\'{\i}'], 'von': [], 'last': [r'Vesel{\'y}'], 'jr': []}), 371 | 372 | (r'Ji\v{r}\'{\i} Zlatu{\v{s}}ka', 373 | {'first': [r'Ji\v{r}\'{\i}'], 'von': [], 'last': [r'Zlatu{\v{s}}ka'], 'jr': []}), 374 | 375 | (r'Ji\v{r}{\'\i} Vesel{\'y}', 376 | {'first': [r'Ji\v{r}{\'\i}'], 'von': [], 'last': [r'Vesel{\'y}'], 'jr': []}), 377 | 378 | (r'Ji\v{r}{\'{\i}}Zlatu{\v{s}}ka', 379 | {'first': [], 'von': [], 'last': [r'Ji\v{r}{\'{\i}}Zlatu{\v{s}}ka'], 'jr': []}), 380 | 381 | (r'Jim Hef{}feron', 382 | {'first': ['Jim'], 'von': [], 'last': ['Hef{}feron'], 'jr': []}), 383 | 384 | (r'J{\"o}rg Knappen', 385 | {'first': [r'J{\"o}rg'], 'von': [], 'last': ['Knappen'], 'jr': []}), 386 | 387 | (r'J{\"o}rgen L. Pind', 388 | {'first': [r'J{\"o}rgen', 'L.'], 'von': [], 'last': ['Pind'], 'jr': []}), 389 | 390 | (r'J{\'e}r\^ome Laurens', 391 | {'first': [r'J{\'e}r\^ome'], 'von': [], 'last': ['Laurens'], 'jr': []}), 392 | 393 | (r'J{{\"o}}rg Knappen', 394 | {'first': [r'J{{\"o}}rg'], 'von': [], 'last': ['Knappen'], 'jr': []}), 395 | 396 | (r'K. Anil Kumar', 397 | {'first': ['K.', 'Anil'], 'von': [], 'last': ['Kumar'], 'jr': []}), 398 | 399 | (r'Karel Hor{\'a}k', 400 | {'first': ['Karel'], 'von': [], 'last': [r'Hor{\'a}k'], 'jr': []}), 401 | 402 | (r'Karel P\'{\i}{\v{s}}ka', 403 | {'first': ['Karel'], 'von': [], 'last': [r'P\'{\i}{\v{s}}ka'], 'jr': []}), 404 | 405 | (r'Karel P{\'\i}{\v{s}}ka', 406 | {'first': ['Karel'], 'von': [], 'last': [r'P{\'\i}{\v{s}}ka'], 'jr': []}), 407 | 408 | (r'Karel Skoup\'{y}', 409 | {'first': ['Karel'], 'von': [], 'last': [r'Skoup\'{y}'], 'jr': []}), 410 | 411 | (r'Karel Skoup{\'y}', 412 | {'first': ['Karel'], 'von': [], 'last': [r'Skoup{\'y}'], 'jr': []}), 413 | 414 | (r'Kent McPherson', 415 | {'first': ['Kent'], 'von': [], 'last': ['McPherson'], 'jr': []}), 416 | 417 | (r'Klaus H{\"o}ppner', 418 | {'first': ['Klaus'], 'von': [], 'last': [r'H{\"o}ppner'], 'jr': []}), 419 | 420 | (r'Lars Hellstr{\"o}m', 421 | {'first': ['Lars'], 'von': [], 'last': [r'Hellstr{\"o}m'], 'jr': []}), 422 | 423 | (r'Laura Elizabeth Jackson', 424 | {'first': ['Laura', 'Elizabeth'], 'von': [], 'last': ['Jackson'], 'jr': []}), 425 | 426 | (r'M. D{\'{\i}}az', 427 | {'first': ['M.'], 'von': [], 'last': [r'D{\'{\i}}az'], 'jr': []}), 428 | 429 | (r'M/iche/al /O Searc/oid', 430 | {'first': [r'M/iche/al', r'/O'], 'von': [], 'last': [r'Searc/oid'], 'jr': []}), 431 | 432 | (r'Marek Ry{\'c}ko', 433 | {'first': ['Marek'], 'von': [], 'last': [r'Ry{\'c}ko'], 'jr': []}), 434 | 435 | (r'Marina Yu. Nikulina', 436 | {'first': ['Marina', 'Yu.'], 'von': [], 'last': ['Nikulina'], 'jr': []}), 437 | 438 | (r'Max D{\'{\i}}az', 439 | {'first': ['Max'], 'von': [], 'last': [r'D{\'{\i}}az'], 'jr': []}), 440 | 441 | (r'Merry Obrecht Sawdey', 442 | {'first': ['Merry', 'Obrecht'], 'von': [], 'last': ['Sawdey'], 'jr': []}), 443 | 444 | (r'Miroslava Mis{\'a}kov{\'a}', 445 | {'first': ['Miroslava'], 'von': [], 'last': [r'Mis{\'a}kov{\'a}'], 'jr': []}), 446 | 447 | (r'N. A. F. M. Poppelier', 448 | {'first': ['N.', 'A.', 'F.', 'M.'], 'von': [], 'last': ['Poppelier'], 'jr': []}), 449 | 450 | (r'Nico A. F. M. Poppelier', 451 | {'first': ['Nico', 'A.', 'F.', 'M.'], 'von': [], 'last': ['Poppelier'], 'jr': []}), 452 | 453 | (r'Onofrio de Bari', 454 | {'first': ['Onofrio'], 'von': ['de'], 'last': ['Bari'], 'jr': []}), 455 | 456 | (r'Pablo Rosell-Gonz{\'a}lez', 457 | {'first': ['Pablo'], 'von': [], 'last': [r'Rosell-Gonz{\'a}lez'], 'jr': []}), 458 | 459 | (r'Paco La Bruna', 460 | {'first': ['Paco', 'La'], 'von': [], 'last': ['Bruna'], 'jr': []}), 461 | 462 | (r'Paul Franchi-Zannettacci', 463 | {'first': ['Paul'], 'von': [], 'last': ['Franchi-Zannettacci'], 'jr': []}), 464 | 465 | (r'Pavel \v{S}eve\v{c}ek', 466 | {'first': ['Pavel'], 'von': [], 'last': [r'\v{S}eve\v{c}ek'], 'jr': []}), 467 | 468 | (r'Petr Ol{\v{s}}ak', 469 | {'first': ['Petr'], 'von': [], 'last': [r'Ol{\v{s}}ak'], 'jr': []}), 470 | 471 | (r'Petr Ol{\v{s}}{\'a}k', 472 | {'first': ['Petr'], 'von': [], 'last': [r'Ol{\v{s}}{\'a}k'], 'jr': []}), 473 | 474 | (r'Primo\v{z} Peterlin', 475 | {'first': [r'Primo\v{z}'], 'von': [], 'last': ['Peterlin'], 'jr': []}), 476 | 477 | (r'Prof. Alban Grimm', 478 | {'first': ['Prof.', 'Alban'], 'von': [], 'last': ['Grimm'], 'jr': []}), 479 | 480 | (r'P{\'e}ter Husz{\'a}r', 481 | {'first': [r'P{\'e}ter'], 'von': [], 'last': [r'Husz{\'a}r'], 'jr': []}), 482 | 483 | (r'P{\'e}ter Szab{\'o}', 484 | {'first': [r'P{\'e}ter'], 'von': [], 'last': [r'Szab{\'o}'], 'jr': []}), 485 | 486 | (r'Rafa{\l}\.Zbikowski', 487 | {'first': [], 'von': [], 'last': [r'Rafa{\l}\.Zbikowski'], 'jr': []}), 488 | 489 | (r'Rainer Sch{\"o}pf', 490 | {'first': ['Rainer'], 'von': [], 'last': [r'Sch{\"o}pf'], 'jr': []}), 491 | 492 | (r'T. L. (Frank) Pappas', 493 | {'first': ['T.', 'L.', '(Frank)'], 'von': [], 'last': ['Pappas'], 'jr': []}), 494 | 495 | (r'TUG 2004 conference', 496 | {'first': ['TUG', '2004'], 'von': [], 'last': ['conference'], 'jr': []}), 497 | 498 | (r'TUG {\sltt DVI} Driver Standards Committee', 499 | {'first': ['TUG', '{\sltt DVI}', 'Driver', 'Standards'], 'von': [], 500 | 'last': ['Committee'], 'jr': []}), 501 | 502 | (r'TUG {\sltt xDVIx} Driver Standards Committee', 503 | {'first': ['TUG'], 'von': ['{\sltt xDVIx}'], 504 | 'last': ['Driver', 'Standards', 'Committee'], 'jr': []}), 505 | 506 | (r'University of M{\"u}nster', 507 | {'first': ['University'], 'von': ['of'], 'last': [r'M{\"u}nster'], 'jr': []}), 508 | 509 | (r'Walter van der Laan', 510 | {'first': ['Walter'], 'von': ['van', 'der'], 'last': ['Laan'], 'jr': []}), 511 | 512 | (r'Wendy G. McKay', 513 | {'first': ['Wendy', 'G.'], 'von': [], 'last': ['McKay'], 'jr': []}), 514 | 515 | (r'Wendy McKay', 516 | {'first': ['Wendy'], 'von': [], 'last': ['McKay'], 'jr': []}), 517 | 518 | (r'W{\l}odek Bzyl', 519 | {'first': [r'W{\l}odek'], 'von': [], 'last': ['Bzyl'], 'jr': []}), 520 | 521 | (r'\LaTeX Project Team', 522 | {'first': [r'\LaTeX', 'Project'], 'von': [], 'last': ['Team'], 'jr': []}), 523 | 524 | (r'\rlap{Lutz Birkhahn}', 525 | {'first': [], 'von': [], 'last': [r'\rlap{Lutz Birkhahn}'], 'jr': []}), 526 | 527 | (r'{Jim Hef{}feron}', 528 | {'first': [], 'von': [], 'last': ['{Jim Hef{}feron}'], 'jr': []}), 529 | 530 | (r'{Kristoffer H\o{}gsbro Rose}', 531 | {'first': [], 'von': [], 'last': ['{Kristoffer H\o{}gsbro Rose}'], 'jr': []}), 532 | 533 | (r'{TUG} {Working} {Group} on a {\TeX} {Directory} {Structure}', 534 | {'first': ['{TUG}', '{Working}', '{Group}'], 'von': ['on', 'a'], 535 | 'last': [r'{\TeX}', '{Directory}', '{Structure}'], 'jr': []}), 536 | 537 | (r'{The \TUB{} Team}', 538 | {'first': [], 'von': [], 'last': [r'{The \TUB{} Team}'], 'jr': []}), 539 | 540 | (r'{\LaTeX} project team', 541 | {'first': [r'{\LaTeX}'], 'von': ['project'], 'last': ['team'], 'jr': []}), 542 | 543 | (r'{\NTG{} \TeX{} future working group}', 544 | {'first': [], 'von': [], 'last': [r'{\NTG{} \TeX{} future working group}'], 'jr': []}), 545 | 546 | (r'{{\LaTeX\,3} Project Team}', 547 | {'first': [], 'von': [], 'last': [r'{{\LaTeX\,3} Project Team}'], 'jr': []}), 548 | 549 | (r'Johansen Kyle, Derik Mamania M.', 550 | {'first': ['Derik', 'Mamania', 'M.'], 'von': [], 'last': ['Johansen', 'Kyle'], 'jr': []}), 551 | 552 | (r"Johannes Adam Ferdinand Alois Josef Maria Marko d'Aviano Pius von und zu Liechtenstein", 553 | {'first': ['Johannes', 'Adam', 'Ferdinand', 'Alois', 'Josef', 'Maria', 'Marko'], 554 | 'von': ["d'Aviano", 'Pius', 'von', 'und', 'zu'], 'last': ['Liechtenstein'], 'jr': []}), 555 | 556 | (r"Brand\~{a}o, F", 557 | {'first': ['F'], 'von': [], 'last': ['Brand\\', '{a}o'], 'jr': []}), 558 | ) 559 | 560 | 561 | if __name__ == '__main__': 562 | unittest.main() 563 | --------------------------------------------------------------------------------